]> jfr.im git - yt-dlp.git/blob - youtube_dlc/utils.py
Documentation fixes
[yt-dlp.git] / youtube_dlc / utils.py
1 #!/usr/bin/env python
2 # coding: utf-8
3
4 from __future__ import unicode_literals
5
6 import base64
7 import binascii
8 import calendar
9 import codecs
10 import collections
11 import contextlib
12 import ctypes
13 import datetime
14 import email.utils
15 import email.header
16 import errno
17 import functools
18 import gzip
19 import io
20 import itertools
21 import json
22 import locale
23 import math
24 import operator
25 import os
26 import platform
27 import random
28 import re
29 import socket
30 import ssl
31 import subprocess
32 import sys
33 import tempfile
34 import time
35 import traceback
36 import xml.etree.ElementTree
37 import zlib
38
39 from .compat import (
40 compat_HTMLParseError,
41 compat_HTMLParser,
42 compat_basestring,
43 compat_chr,
44 compat_cookiejar,
45 compat_ctypes_WINFUNCTYPE,
46 compat_etree_fromstring,
47 compat_expanduser,
48 compat_html_entities,
49 compat_html_entities_html5,
50 compat_http_client,
51 compat_integer_types,
52 compat_kwargs,
53 compat_os_name,
54 compat_parse_qs,
55 compat_shlex_quote,
56 compat_str,
57 compat_struct_pack,
58 compat_struct_unpack,
59 compat_urllib_error,
60 compat_urllib_parse,
61 compat_urllib_parse_urlencode,
62 compat_urllib_parse_urlparse,
63 compat_urllib_parse_urlunparse,
64 compat_urllib_parse_quote,
65 compat_urllib_parse_quote_plus,
66 compat_urllib_parse_unquote_plus,
67 compat_urllib_request,
68 compat_urlparse,
69 compat_xpath,
70 )
71
72 from .socks import (
73 ProxyType,
74 sockssocket,
75 )
76
77
78 def register_socks_protocols():
79 # "Register" SOCKS protocols
80 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
81 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
82 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
83 if scheme not in compat_urlparse.uses_netloc:
84 compat_urlparse.uses_netloc.append(scheme)
85
86
87 # This is not clearly defined otherwise
88 compiled_regex_type = type(re.compile(''))
89
90
91 def random_user_agent():
92 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
93 _CHROME_VERSIONS = (
94 '74.0.3729.129',
95 '76.0.3780.3',
96 '76.0.3780.2',
97 '74.0.3729.128',
98 '76.0.3780.1',
99 '76.0.3780.0',
100 '75.0.3770.15',
101 '74.0.3729.127',
102 '74.0.3729.126',
103 '76.0.3779.1',
104 '76.0.3779.0',
105 '75.0.3770.14',
106 '74.0.3729.125',
107 '76.0.3778.1',
108 '76.0.3778.0',
109 '75.0.3770.13',
110 '74.0.3729.124',
111 '74.0.3729.123',
112 '73.0.3683.121',
113 '76.0.3777.1',
114 '76.0.3777.0',
115 '75.0.3770.12',
116 '74.0.3729.122',
117 '76.0.3776.4',
118 '75.0.3770.11',
119 '74.0.3729.121',
120 '76.0.3776.3',
121 '76.0.3776.2',
122 '73.0.3683.120',
123 '74.0.3729.120',
124 '74.0.3729.119',
125 '74.0.3729.118',
126 '76.0.3776.1',
127 '76.0.3776.0',
128 '76.0.3775.5',
129 '75.0.3770.10',
130 '74.0.3729.117',
131 '76.0.3775.4',
132 '76.0.3775.3',
133 '74.0.3729.116',
134 '75.0.3770.9',
135 '76.0.3775.2',
136 '76.0.3775.1',
137 '76.0.3775.0',
138 '75.0.3770.8',
139 '74.0.3729.115',
140 '74.0.3729.114',
141 '76.0.3774.1',
142 '76.0.3774.0',
143 '75.0.3770.7',
144 '74.0.3729.113',
145 '74.0.3729.112',
146 '74.0.3729.111',
147 '76.0.3773.1',
148 '76.0.3773.0',
149 '75.0.3770.6',
150 '74.0.3729.110',
151 '74.0.3729.109',
152 '76.0.3772.1',
153 '76.0.3772.0',
154 '75.0.3770.5',
155 '74.0.3729.108',
156 '74.0.3729.107',
157 '76.0.3771.1',
158 '76.0.3771.0',
159 '75.0.3770.4',
160 '74.0.3729.106',
161 '74.0.3729.105',
162 '75.0.3770.3',
163 '74.0.3729.104',
164 '74.0.3729.103',
165 '74.0.3729.102',
166 '75.0.3770.2',
167 '74.0.3729.101',
168 '75.0.3770.1',
169 '75.0.3770.0',
170 '74.0.3729.100',
171 '75.0.3769.5',
172 '75.0.3769.4',
173 '74.0.3729.99',
174 '75.0.3769.3',
175 '75.0.3769.2',
176 '75.0.3768.6',
177 '74.0.3729.98',
178 '75.0.3769.1',
179 '75.0.3769.0',
180 '74.0.3729.97',
181 '73.0.3683.119',
182 '73.0.3683.118',
183 '74.0.3729.96',
184 '75.0.3768.5',
185 '75.0.3768.4',
186 '75.0.3768.3',
187 '75.0.3768.2',
188 '74.0.3729.95',
189 '74.0.3729.94',
190 '75.0.3768.1',
191 '75.0.3768.0',
192 '74.0.3729.93',
193 '74.0.3729.92',
194 '73.0.3683.117',
195 '74.0.3729.91',
196 '75.0.3766.3',
197 '74.0.3729.90',
198 '75.0.3767.2',
199 '75.0.3767.1',
200 '75.0.3767.0',
201 '74.0.3729.89',
202 '73.0.3683.116',
203 '75.0.3766.2',
204 '74.0.3729.88',
205 '75.0.3766.1',
206 '75.0.3766.0',
207 '74.0.3729.87',
208 '73.0.3683.115',
209 '74.0.3729.86',
210 '75.0.3765.1',
211 '75.0.3765.0',
212 '74.0.3729.85',
213 '73.0.3683.114',
214 '74.0.3729.84',
215 '75.0.3764.1',
216 '75.0.3764.0',
217 '74.0.3729.83',
218 '73.0.3683.113',
219 '75.0.3763.2',
220 '75.0.3761.4',
221 '74.0.3729.82',
222 '75.0.3763.1',
223 '75.0.3763.0',
224 '74.0.3729.81',
225 '73.0.3683.112',
226 '75.0.3762.1',
227 '75.0.3762.0',
228 '74.0.3729.80',
229 '75.0.3761.3',
230 '74.0.3729.79',
231 '73.0.3683.111',
232 '75.0.3761.2',
233 '74.0.3729.78',
234 '74.0.3729.77',
235 '75.0.3761.1',
236 '75.0.3761.0',
237 '73.0.3683.110',
238 '74.0.3729.76',
239 '74.0.3729.75',
240 '75.0.3760.0',
241 '74.0.3729.74',
242 '75.0.3759.8',
243 '75.0.3759.7',
244 '75.0.3759.6',
245 '74.0.3729.73',
246 '75.0.3759.5',
247 '74.0.3729.72',
248 '73.0.3683.109',
249 '75.0.3759.4',
250 '75.0.3759.3',
251 '74.0.3729.71',
252 '75.0.3759.2',
253 '74.0.3729.70',
254 '73.0.3683.108',
255 '74.0.3729.69',
256 '75.0.3759.1',
257 '75.0.3759.0',
258 '74.0.3729.68',
259 '73.0.3683.107',
260 '74.0.3729.67',
261 '75.0.3758.1',
262 '75.0.3758.0',
263 '74.0.3729.66',
264 '73.0.3683.106',
265 '74.0.3729.65',
266 '75.0.3757.1',
267 '75.0.3757.0',
268 '74.0.3729.64',
269 '73.0.3683.105',
270 '74.0.3729.63',
271 '75.0.3756.1',
272 '75.0.3756.0',
273 '74.0.3729.62',
274 '73.0.3683.104',
275 '75.0.3755.3',
276 '75.0.3755.2',
277 '73.0.3683.103',
278 '75.0.3755.1',
279 '75.0.3755.0',
280 '74.0.3729.61',
281 '73.0.3683.102',
282 '74.0.3729.60',
283 '75.0.3754.2',
284 '74.0.3729.59',
285 '75.0.3753.4',
286 '74.0.3729.58',
287 '75.0.3754.1',
288 '75.0.3754.0',
289 '74.0.3729.57',
290 '73.0.3683.101',
291 '75.0.3753.3',
292 '75.0.3752.2',
293 '75.0.3753.2',
294 '74.0.3729.56',
295 '75.0.3753.1',
296 '75.0.3753.0',
297 '74.0.3729.55',
298 '73.0.3683.100',
299 '74.0.3729.54',
300 '75.0.3752.1',
301 '75.0.3752.0',
302 '74.0.3729.53',
303 '73.0.3683.99',
304 '74.0.3729.52',
305 '75.0.3751.1',
306 '75.0.3751.0',
307 '74.0.3729.51',
308 '73.0.3683.98',
309 '74.0.3729.50',
310 '75.0.3750.0',
311 '74.0.3729.49',
312 '74.0.3729.48',
313 '74.0.3729.47',
314 '75.0.3749.3',
315 '74.0.3729.46',
316 '73.0.3683.97',
317 '75.0.3749.2',
318 '74.0.3729.45',
319 '75.0.3749.1',
320 '75.0.3749.0',
321 '74.0.3729.44',
322 '73.0.3683.96',
323 '74.0.3729.43',
324 '74.0.3729.42',
325 '75.0.3748.1',
326 '75.0.3748.0',
327 '74.0.3729.41',
328 '75.0.3747.1',
329 '73.0.3683.95',
330 '75.0.3746.4',
331 '74.0.3729.40',
332 '74.0.3729.39',
333 '75.0.3747.0',
334 '75.0.3746.3',
335 '75.0.3746.2',
336 '74.0.3729.38',
337 '75.0.3746.1',
338 '75.0.3746.0',
339 '74.0.3729.37',
340 '73.0.3683.94',
341 '75.0.3745.5',
342 '75.0.3745.4',
343 '75.0.3745.3',
344 '75.0.3745.2',
345 '74.0.3729.36',
346 '75.0.3745.1',
347 '75.0.3745.0',
348 '75.0.3744.2',
349 '74.0.3729.35',
350 '73.0.3683.93',
351 '74.0.3729.34',
352 '75.0.3744.1',
353 '75.0.3744.0',
354 '74.0.3729.33',
355 '73.0.3683.92',
356 '74.0.3729.32',
357 '74.0.3729.31',
358 '73.0.3683.91',
359 '75.0.3741.2',
360 '75.0.3740.5',
361 '74.0.3729.30',
362 '75.0.3741.1',
363 '75.0.3741.0',
364 '74.0.3729.29',
365 '75.0.3740.4',
366 '73.0.3683.90',
367 '74.0.3729.28',
368 '75.0.3740.3',
369 '73.0.3683.89',
370 '75.0.3740.2',
371 '74.0.3729.27',
372 '75.0.3740.1',
373 '75.0.3740.0',
374 '74.0.3729.26',
375 '73.0.3683.88',
376 '73.0.3683.87',
377 '74.0.3729.25',
378 '75.0.3739.1',
379 '75.0.3739.0',
380 '73.0.3683.86',
381 '74.0.3729.24',
382 '73.0.3683.85',
383 '75.0.3738.4',
384 '75.0.3738.3',
385 '75.0.3738.2',
386 '75.0.3738.1',
387 '75.0.3738.0',
388 '74.0.3729.23',
389 '73.0.3683.84',
390 '74.0.3729.22',
391 '74.0.3729.21',
392 '75.0.3737.1',
393 '75.0.3737.0',
394 '74.0.3729.20',
395 '73.0.3683.83',
396 '74.0.3729.19',
397 '75.0.3736.1',
398 '75.0.3736.0',
399 '74.0.3729.18',
400 '73.0.3683.82',
401 '74.0.3729.17',
402 '75.0.3735.1',
403 '75.0.3735.0',
404 '74.0.3729.16',
405 '73.0.3683.81',
406 '75.0.3734.1',
407 '75.0.3734.0',
408 '74.0.3729.15',
409 '73.0.3683.80',
410 '74.0.3729.14',
411 '75.0.3733.1',
412 '75.0.3733.0',
413 '75.0.3732.1',
414 '74.0.3729.13',
415 '74.0.3729.12',
416 '73.0.3683.79',
417 '74.0.3729.11',
418 '75.0.3732.0',
419 '74.0.3729.10',
420 '73.0.3683.78',
421 '74.0.3729.9',
422 '74.0.3729.8',
423 '74.0.3729.7',
424 '75.0.3731.3',
425 '75.0.3731.2',
426 '75.0.3731.0',
427 '74.0.3729.6',
428 '73.0.3683.77',
429 '73.0.3683.76',
430 '75.0.3730.5',
431 '75.0.3730.4',
432 '73.0.3683.75',
433 '74.0.3729.5',
434 '73.0.3683.74',
435 '75.0.3730.3',
436 '75.0.3730.2',
437 '74.0.3729.4',
438 '73.0.3683.73',
439 '73.0.3683.72',
440 '75.0.3730.1',
441 '75.0.3730.0',
442 '74.0.3729.3',
443 '73.0.3683.71',
444 '74.0.3729.2',
445 '73.0.3683.70',
446 '74.0.3729.1',
447 '74.0.3729.0',
448 '74.0.3726.4',
449 '73.0.3683.69',
450 '74.0.3726.3',
451 '74.0.3728.0',
452 '74.0.3726.2',
453 '73.0.3683.68',
454 '74.0.3726.1',
455 '74.0.3726.0',
456 '74.0.3725.4',
457 '73.0.3683.67',
458 '73.0.3683.66',
459 '74.0.3725.3',
460 '74.0.3725.2',
461 '74.0.3725.1',
462 '74.0.3724.8',
463 '74.0.3725.0',
464 '73.0.3683.65',
465 '74.0.3724.7',
466 '74.0.3724.6',
467 '74.0.3724.5',
468 '74.0.3724.4',
469 '74.0.3724.3',
470 '74.0.3724.2',
471 '74.0.3724.1',
472 '74.0.3724.0',
473 '73.0.3683.64',
474 '74.0.3723.1',
475 '74.0.3723.0',
476 '73.0.3683.63',
477 '74.0.3722.1',
478 '74.0.3722.0',
479 '73.0.3683.62',
480 '74.0.3718.9',
481 '74.0.3702.3',
482 '74.0.3721.3',
483 '74.0.3721.2',
484 '74.0.3721.1',
485 '74.0.3721.0',
486 '74.0.3720.6',
487 '73.0.3683.61',
488 '72.0.3626.122',
489 '73.0.3683.60',
490 '74.0.3720.5',
491 '72.0.3626.121',
492 '74.0.3718.8',
493 '74.0.3720.4',
494 '74.0.3720.3',
495 '74.0.3718.7',
496 '74.0.3720.2',
497 '74.0.3720.1',
498 '74.0.3720.0',
499 '74.0.3718.6',
500 '74.0.3719.5',
501 '73.0.3683.59',
502 '74.0.3718.5',
503 '74.0.3718.4',
504 '74.0.3719.4',
505 '74.0.3719.3',
506 '74.0.3719.2',
507 '74.0.3719.1',
508 '73.0.3683.58',
509 '74.0.3719.0',
510 '73.0.3683.57',
511 '73.0.3683.56',
512 '74.0.3718.3',
513 '73.0.3683.55',
514 '74.0.3718.2',
515 '74.0.3718.1',
516 '74.0.3718.0',
517 '73.0.3683.54',
518 '74.0.3717.2',
519 '73.0.3683.53',
520 '74.0.3717.1',
521 '74.0.3717.0',
522 '73.0.3683.52',
523 '74.0.3716.1',
524 '74.0.3716.0',
525 '73.0.3683.51',
526 '74.0.3715.1',
527 '74.0.3715.0',
528 '73.0.3683.50',
529 '74.0.3711.2',
530 '74.0.3714.2',
531 '74.0.3713.3',
532 '74.0.3714.1',
533 '74.0.3714.0',
534 '73.0.3683.49',
535 '74.0.3713.1',
536 '74.0.3713.0',
537 '72.0.3626.120',
538 '73.0.3683.48',
539 '74.0.3712.2',
540 '74.0.3712.1',
541 '74.0.3712.0',
542 '73.0.3683.47',
543 '72.0.3626.119',
544 '73.0.3683.46',
545 '74.0.3710.2',
546 '72.0.3626.118',
547 '74.0.3711.1',
548 '74.0.3711.0',
549 '73.0.3683.45',
550 '72.0.3626.117',
551 '74.0.3710.1',
552 '74.0.3710.0',
553 '73.0.3683.44',
554 '72.0.3626.116',
555 '74.0.3709.1',
556 '74.0.3709.0',
557 '74.0.3704.9',
558 '73.0.3683.43',
559 '72.0.3626.115',
560 '74.0.3704.8',
561 '74.0.3704.7',
562 '74.0.3708.0',
563 '74.0.3706.7',
564 '74.0.3704.6',
565 '73.0.3683.42',
566 '72.0.3626.114',
567 '74.0.3706.6',
568 '72.0.3626.113',
569 '74.0.3704.5',
570 '74.0.3706.5',
571 '74.0.3706.4',
572 '74.0.3706.3',
573 '74.0.3706.2',
574 '74.0.3706.1',
575 '74.0.3706.0',
576 '73.0.3683.41',
577 '72.0.3626.112',
578 '74.0.3705.1',
579 '74.0.3705.0',
580 '73.0.3683.40',
581 '72.0.3626.111',
582 '73.0.3683.39',
583 '74.0.3704.4',
584 '73.0.3683.38',
585 '74.0.3704.3',
586 '74.0.3704.2',
587 '74.0.3704.1',
588 '74.0.3704.0',
589 '73.0.3683.37',
590 '72.0.3626.110',
591 '72.0.3626.109',
592 '74.0.3703.3',
593 '74.0.3703.2',
594 '73.0.3683.36',
595 '74.0.3703.1',
596 '74.0.3703.0',
597 '73.0.3683.35',
598 '72.0.3626.108',
599 '74.0.3702.2',
600 '74.0.3699.3',
601 '74.0.3702.1',
602 '74.0.3702.0',
603 '73.0.3683.34',
604 '72.0.3626.107',
605 '73.0.3683.33',
606 '74.0.3701.1',
607 '74.0.3701.0',
608 '73.0.3683.32',
609 '73.0.3683.31',
610 '72.0.3626.105',
611 '74.0.3700.1',
612 '74.0.3700.0',
613 '73.0.3683.29',
614 '72.0.3626.103',
615 '74.0.3699.2',
616 '74.0.3699.1',
617 '74.0.3699.0',
618 '73.0.3683.28',
619 '72.0.3626.102',
620 '73.0.3683.27',
621 '73.0.3683.26',
622 '74.0.3698.0',
623 '74.0.3696.2',
624 '72.0.3626.101',
625 '73.0.3683.25',
626 '74.0.3696.1',
627 '74.0.3696.0',
628 '74.0.3694.8',
629 '72.0.3626.100',
630 '74.0.3694.7',
631 '74.0.3694.6',
632 '74.0.3694.5',
633 '74.0.3694.4',
634 '72.0.3626.99',
635 '72.0.3626.98',
636 '74.0.3694.3',
637 '73.0.3683.24',
638 '72.0.3626.97',
639 '72.0.3626.96',
640 '72.0.3626.95',
641 '73.0.3683.23',
642 '72.0.3626.94',
643 '73.0.3683.22',
644 '73.0.3683.21',
645 '72.0.3626.93',
646 '74.0.3694.2',
647 '72.0.3626.92',
648 '74.0.3694.1',
649 '74.0.3694.0',
650 '74.0.3693.6',
651 '73.0.3683.20',
652 '72.0.3626.91',
653 '74.0.3693.5',
654 '74.0.3693.4',
655 '74.0.3693.3',
656 '74.0.3693.2',
657 '73.0.3683.19',
658 '74.0.3693.1',
659 '74.0.3693.0',
660 '73.0.3683.18',
661 '72.0.3626.90',
662 '74.0.3692.1',
663 '74.0.3692.0',
664 '73.0.3683.17',
665 '72.0.3626.89',
666 '74.0.3687.3',
667 '74.0.3691.1',
668 '74.0.3691.0',
669 '73.0.3683.16',
670 '72.0.3626.88',
671 '72.0.3626.87',
672 '73.0.3683.15',
673 '74.0.3690.1',
674 '74.0.3690.0',
675 '73.0.3683.14',
676 '72.0.3626.86',
677 '73.0.3683.13',
678 '73.0.3683.12',
679 '74.0.3689.1',
680 '74.0.3689.0',
681 '73.0.3683.11',
682 '72.0.3626.85',
683 '73.0.3683.10',
684 '72.0.3626.84',
685 '73.0.3683.9',
686 '74.0.3688.1',
687 '74.0.3688.0',
688 '73.0.3683.8',
689 '72.0.3626.83',
690 '74.0.3687.2',
691 '74.0.3687.1',
692 '74.0.3687.0',
693 '73.0.3683.7',
694 '72.0.3626.82',
695 '74.0.3686.4',
696 '72.0.3626.81',
697 '74.0.3686.3',
698 '74.0.3686.2',
699 '74.0.3686.1',
700 '74.0.3686.0',
701 '73.0.3683.6',
702 '72.0.3626.80',
703 '74.0.3685.1',
704 '74.0.3685.0',
705 '73.0.3683.5',
706 '72.0.3626.79',
707 '74.0.3684.1',
708 '74.0.3684.0',
709 '73.0.3683.4',
710 '72.0.3626.78',
711 '72.0.3626.77',
712 '73.0.3683.3',
713 '73.0.3683.2',
714 '72.0.3626.76',
715 '73.0.3683.1',
716 '73.0.3683.0',
717 '72.0.3626.75',
718 '71.0.3578.141',
719 '73.0.3682.1',
720 '73.0.3682.0',
721 '72.0.3626.74',
722 '71.0.3578.140',
723 '73.0.3681.4',
724 '73.0.3681.3',
725 '73.0.3681.2',
726 '73.0.3681.1',
727 '73.0.3681.0',
728 '72.0.3626.73',
729 '71.0.3578.139',
730 '72.0.3626.72',
731 '72.0.3626.71',
732 '73.0.3680.1',
733 '73.0.3680.0',
734 '72.0.3626.70',
735 '71.0.3578.138',
736 '73.0.3678.2',
737 '73.0.3679.1',
738 '73.0.3679.0',
739 '72.0.3626.69',
740 '71.0.3578.137',
741 '73.0.3678.1',
742 '73.0.3678.0',
743 '71.0.3578.136',
744 '73.0.3677.1',
745 '73.0.3677.0',
746 '72.0.3626.68',
747 '72.0.3626.67',
748 '71.0.3578.135',
749 '73.0.3676.1',
750 '73.0.3676.0',
751 '73.0.3674.2',
752 '72.0.3626.66',
753 '71.0.3578.134',
754 '73.0.3674.1',
755 '73.0.3674.0',
756 '72.0.3626.65',
757 '71.0.3578.133',
758 '73.0.3673.2',
759 '73.0.3673.1',
760 '73.0.3673.0',
761 '72.0.3626.64',
762 '71.0.3578.132',
763 '72.0.3626.63',
764 '72.0.3626.62',
765 '72.0.3626.61',
766 '72.0.3626.60',
767 '73.0.3672.1',
768 '73.0.3672.0',
769 '72.0.3626.59',
770 '71.0.3578.131',
771 '73.0.3671.3',
772 '73.0.3671.2',
773 '73.0.3671.1',
774 '73.0.3671.0',
775 '72.0.3626.58',
776 '71.0.3578.130',
777 '73.0.3670.1',
778 '73.0.3670.0',
779 '72.0.3626.57',
780 '71.0.3578.129',
781 '73.0.3669.1',
782 '73.0.3669.0',
783 '72.0.3626.56',
784 '71.0.3578.128',
785 '73.0.3668.2',
786 '73.0.3668.1',
787 '73.0.3668.0',
788 '72.0.3626.55',
789 '71.0.3578.127',
790 '73.0.3667.2',
791 '73.0.3667.1',
792 '73.0.3667.0',
793 '72.0.3626.54',
794 '71.0.3578.126',
795 '73.0.3666.1',
796 '73.0.3666.0',
797 '72.0.3626.53',
798 '71.0.3578.125',
799 '73.0.3665.4',
800 '73.0.3665.3',
801 '72.0.3626.52',
802 '73.0.3665.2',
803 '73.0.3664.4',
804 '73.0.3665.1',
805 '73.0.3665.0',
806 '72.0.3626.51',
807 '71.0.3578.124',
808 '72.0.3626.50',
809 '73.0.3664.3',
810 '73.0.3664.2',
811 '73.0.3664.1',
812 '73.0.3664.0',
813 '73.0.3663.2',
814 '72.0.3626.49',
815 '71.0.3578.123',
816 '73.0.3663.1',
817 '73.0.3663.0',
818 '72.0.3626.48',
819 '71.0.3578.122',
820 '73.0.3662.1',
821 '73.0.3662.0',
822 '72.0.3626.47',
823 '71.0.3578.121',
824 '73.0.3661.1',
825 '72.0.3626.46',
826 '73.0.3661.0',
827 '72.0.3626.45',
828 '71.0.3578.120',
829 '73.0.3660.2',
830 '73.0.3660.1',
831 '73.0.3660.0',
832 '72.0.3626.44',
833 '71.0.3578.119',
834 '73.0.3659.1',
835 '73.0.3659.0',
836 '72.0.3626.43',
837 '71.0.3578.118',
838 '73.0.3658.1',
839 '73.0.3658.0',
840 '72.0.3626.42',
841 '71.0.3578.117',
842 '73.0.3657.1',
843 '73.0.3657.0',
844 '72.0.3626.41',
845 '71.0.3578.116',
846 '73.0.3656.1',
847 '73.0.3656.0',
848 '72.0.3626.40',
849 '71.0.3578.115',
850 '73.0.3655.1',
851 '73.0.3655.0',
852 '72.0.3626.39',
853 '71.0.3578.114',
854 '73.0.3654.1',
855 '73.0.3654.0',
856 '72.0.3626.38',
857 '71.0.3578.113',
858 '73.0.3653.1',
859 '73.0.3653.0',
860 '72.0.3626.37',
861 '71.0.3578.112',
862 '73.0.3652.1',
863 '73.0.3652.0',
864 '72.0.3626.36',
865 '71.0.3578.111',
866 '73.0.3651.1',
867 '73.0.3651.0',
868 '72.0.3626.35',
869 '71.0.3578.110',
870 '73.0.3650.1',
871 '73.0.3650.0',
872 '72.0.3626.34',
873 '71.0.3578.109',
874 '73.0.3649.1',
875 '73.0.3649.0',
876 '72.0.3626.33',
877 '71.0.3578.108',
878 '73.0.3648.2',
879 '73.0.3648.1',
880 '73.0.3648.0',
881 '72.0.3626.32',
882 '71.0.3578.107',
883 '73.0.3647.2',
884 '73.0.3647.1',
885 '73.0.3647.0',
886 '72.0.3626.31',
887 '71.0.3578.106',
888 '73.0.3635.3',
889 '73.0.3646.2',
890 '73.0.3646.1',
891 '73.0.3646.0',
892 '72.0.3626.30',
893 '71.0.3578.105',
894 '72.0.3626.29',
895 '73.0.3645.2',
896 '73.0.3645.1',
897 '73.0.3645.0',
898 '72.0.3626.28',
899 '71.0.3578.104',
900 '72.0.3626.27',
901 '72.0.3626.26',
902 '72.0.3626.25',
903 '72.0.3626.24',
904 '73.0.3644.0',
905 '73.0.3643.2',
906 '72.0.3626.23',
907 '71.0.3578.103',
908 '73.0.3643.1',
909 '73.0.3643.0',
910 '72.0.3626.22',
911 '71.0.3578.102',
912 '73.0.3642.1',
913 '73.0.3642.0',
914 '72.0.3626.21',
915 '71.0.3578.101',
916 '73.0.3641.1',
917 '73.0.3641.0',
918 '72.0.3626.20',
919 '71.0.3578.100',
920 '72.0.3626.19',
921 '73.0.3640.1',
922 '73.0.3640.0',
923 '72.0.3626.18',
924 '73.0.3639.1',
925 '71.0.3578.99',
926 '73.0.3639.0',
927 '72.0.3626.17',
928 '73.0.3638.2',
929 '72.0.3626.16',
930 '73.0.3638.1',
931 '73.0.3638.0',
932 '72.0.3626.15',
933 '71.0.3578.98',
934 '73.0.3635.2',
935 '71.0.3578.97',
936 '73.0.3637.1',
937 '73.0.3637.0',
938 '72.0.3626.14',
939 '71.0.3578.96',
940 '71.0.3578.95',
941 '72.0.3626.13',
942 '71.0.3578.94',
943 '73.0.3636.2',
944 '71.0.3578.93',
945 '73.0.3636.1',
946 '73.0.3636.0',
947 '72.0.3626.12',
948 '71.0.3578.92',
949 '73.0.3635.1',
950 '73.0.3635.0',
951 '72.0.3626.11',
952 '71.0.3578.91',
953 '73.0.3634.2',
954 '73.0.3634.1',
955 '73.0.3634.0',
956 '72.0.3626.10',
957 '71.0.3578.90',
958 '71.0.3578.89',
959 '73.0.3633.2',
960 '73.0.3633.1',
961 '73.0.3633.0',
962 '72.0.3610.4',
963 '72.0.3626.9',
964 '71.0.3578.88',
965 '73.0.3632.5',
966 '73.0.3632.4',
967 '73.0.3632.3',
968 '73.0.3632.2',
969 '73.0.3632.1',
970 '73.0.3632.0',
971 '72.0.3626.8',
972 '71.0.3578.87',
973 '73.0.3631.2',
974 '73.0.3631.1',
975 '73.0.3631.0',
976 '72.0.3626.7',
977 '71.0.3578.86',
978 '72.0.3626.6',
979 '73.0.3630.1',
980 '73.0.3630.0',
981 '72.0.3626.5',
982 '71.0.3578.85',
983 '72.0.3626.4',
984 '73.0.3628.3',
985 '73.0.3628.2',
986 '73.0.3629.1',
987 '73.0.3629.0',
988 '72.0.3626.3',
989 '71.0.3578.84',
990 '73.0.3628.1',
991 '73.0.3628.0',
992 '71.0.3578.83',
993 '73.0.3627.1',
994 '73.0.3627.0',
995 '72.0.3626.2',
996 '71.0.3578.82',
997 '71.0.3578.81',
998 '71.0.3578.80',
999 '72.0.3626.1',
1000 '72.0.3626.0',
1001 '71.0.3578.79',
1002 '70.0.3538.124',
1003 '71.0.3578.78',
1004 '72.0.3623.4',
1005 '72.0.3625.2',
1006 '72.0.3625.1',
1007 '72.0.3625.0',
1008 '71.0.3578.77',
1009 '70.0.3538.123',
1010 '72.0.3624.4',
1011 '72.0.3624.3',
1012 '72.0.3624.2',
1013 '71.0.3578.76',
1014 '72.0.3624.1',
1015 '72.0.3624.0',
1016 '72.0.3623.3',
1017 '71.0.3578.75',
1018 '70.0.3538.122',
1019 '71.0.3578.74',
1020 '72.0.3623.2',
1021 '72.0.3610.3',
1022 '72.0.3623.1',
1023 '72.0.3623.0',
1024 '72.0.3622.3',
1025 '72.0.3622.2',
1026 '71.0.3578.73',
1027 '70.0.3538.121',
1028 '72.0.3622.1',
1029 '72.0.3622.0',
1030 '71.0.3578.72',
1031 '70.0.3538.120',
1032 '72.0.3621.1',
1033 '72.0.3621.0',
1034 '71.0.3578.71',
1035 '70.0.3538.119',
1036 '72.0.3620.1',
1037 '72.0.3620.0',
1038 '71.0.3578.70',
1039 '70.0.3538.118',
1040 '71.0.3578.69',
1041 '72.0.3619.1',
1042 '72.0.3619.0',
1043 '71.0.3578.68',
1044 '70.0.3538.117',
1045 '71.0.3578.67',
1046 '72.0.3618.1',
1047 '72.0.3618.0',
1048 '71.0.3578.66',
1049 '70.0.3538.116',
1050 '72.0.3617.1',
1051 '72.0.3617.0',
1052 '71.0.3578.65',
1053 '70.0.3538.115',
1054 '72.0.3602.3',
1055 '71.0.3578.64',
1056 '72.0.3616.1',
1057 '72.0.3616.0',
1058 '71.0.3578.63',
1059 '70.0.3538.114',
1060 '71.0.3578.62',
1061 '72.0.3615.1',
1062 '72.0.3615.0',
1063 '71.0.3578.61',
1064 '70.0.3538.113',
1065 '72.0.3614.1',
1066 '72.0.3614.0',
1067 '71.0.3578.60',
1068 '70.0.3538.112',
1069 '72.0.3613.1',
1070 '72.0.3613.0',
1071 '71.0.3578.59',
1072 '70.0.3538.111',
1073 '72.0.3612.2',
1074 '72.0.3612.1',
1075 '72.0.3612.0',
1076 '70.0.3538.110',
1077 '71.0.3578.58',
1078 '70.0.3538.109',
1079 '72.0.3611.2',
1080 '72.0.3611.1',
1081 '72.0.3611.0',
1082 '71.0.3578.57',
1083 '70.0.3538.108',
1084 '72.0.3610.2',
1085 '71.0.3578.56',
1086 '71.0.3578.55',
1087 '72.0.3610.1',
1088 '72.0.3610.0',
1089 '71.0.3578.54',
1090 '70.0.3538.107',
1091 '71.0.3578.53',
1092 '72.0.3609.3',
1093 '71.0.3578.52',
1094 '72.0.3609.2',
1095 '71.0.3578.51',
1096 '72.0.3608.5',
1097 '72.0.3609.1',
1098 '72.0.3609.0',
1099 '71.0.3578.50',
1100 '70.0.3538.106',
1101 '72.0.3608.4',
1102 '72.0.3608.3',
1103 '72.0.3608.2',
1104 '71.0.3578.49',
1105 '72.0.3608.1',
1106 '72.0.3608.0',
1107 '70.0.3538.105',
1108 '71.0.3578.48',
1109 '72.0.3607.1',
1110 '72.0.3607.0',
1111 '71.0.3578.47',
1112 '70.0.3538.104',
1113 '72.0.3606.2',
1114 '72.0.3606.1',
1115 '72.0.3606.0',
1116 '71.0.3578.46',
1117 '70.0.3538.103',
1118 '70.0.3538.102',
1119 '72.0.3605.3',
1120 '72.0.3605.2',
1121 '72.0.3605.1',
1122 '72.0.3605.0',
1123 '71.0.3578.45',
1124 '70.0.3538.101',
1125 '71.0.3578.44',
1126 '71.0.3578.43',
1127 '70.0.3538.100',
1128 '70.0.3538.99',
1129 '71.0.3578.42',
1130 '72.0.3604.1',
1131 '72.0.3604.0',
1132 '71.0.3578.41',
1133 '70.0.3538.98',
1134 '71.0.3578.40',
1135 '72.0.3603.2',
1136 '72.0.3603.1',
1137 '72.0.3603.0',
1138 '71.0.3578.39',
1139 '70.0.3538.97',
1140 '72.0.3602.2',
1141 '71.0.3578.38',
1142 '71.0.3578.37',
1143 '72.0.3602.1',
1144 '72.0.3602.0',
1145 '71.0.3578.36',
1146 '70.0.3538.96',
1147 '72.0.3601.1',
1148 '72.0.3601.0',
1149 '71.0.3578.35',
1150 '70.0.3538.95',
1151 '72.0.3600.1',
1152 '72.0.3600.0',
1153 '71.0.3578.34',
1154 '70.0.3538.94',
1155 '72.0.3599.3',
1156 '72.0.3599.2',
1157 '72.0.3599.1',
1158 '72.0.3599.0',
1159 '71.0.3578.33',
1160 '70.0.3538.93',
1161 '72.0.3598.1',
1162 '72.0.3598.0',
1163 '71.0.3578.32',
1164 '70.0.3538.87',
1165 '72.0.3597.1',
1166 '72.0.3597.0',
1167 '72.0.3596.2',
1168 '71.0.3578.31',
1169 '70.0.3538.86',
1170 '71.0.3578.30',
1171 '71.0.3578.29',
1172 '72.0.3596.1',
1173 '72.0.3596.0',
1174 '71.0.3578.28',
1175 '70.0.3538.85',
1176 '72.0.3595.2',
1177 '72.0.3591.3',
1178 '72.0.3595.1',
1179 '72.0.3595.0',
1180 '71.0.3578.27',
1181 '70.0.3538.84',
1182 '72.0.3594.1',
1183 '72.0.3594.0',
1184 '71.0.3578.26',
1185 '70.0.3538.83',
1186 '72.0.3593.2',
1187 '72.0.3593.1',
1188 '72.0.3593.0',
1189 '71.0.3578.25',
1190 '70.0.3538.82',
1191 '72.0.3589.3',
1192 '72.0.3592.2',
1193 '72.0.3592.1',
1194 '72.0.3592.0',
1195 '71.0.3578.24',
1196 '72.0.3589.2',
1197 '70.0.3538.81',
1198 '70.0.3538.80',
1199 '72.0.3591.2',
1200 '72.0.3591.1',
1201 '72.0.3591.0',
1202 '71.0.3578.23',
1203 '70.0.3538.79',
1204 '71.0.3578.22',
1205 '72.0.3590.1',
1206 '72.0.3590.0',
1207 '71.0.3578.21',
1208 '70.0.3538.78',
1209 '70.0.3538.77',
1210 '72.0.3589.1',
1211 '72.0.3589.0',
1212 '71.0.3578.20',
1213 '70.0.3538.76',
1214 '71.0.3578.19',
1215 '70.0.3538.75',
1216 '72.0.3588.1',
1217 '72.0.3588.0',
1218 '71.0.3578.18',
1219 '70.0.3538.74',
1220 '72.0.3586.2',
1221 '72.0.3587.0',
1222 '71.0.3578.17',
1223 '70.0.3538.73',
1224 '72.0.3586.1',
1225 '72.0.3586.0',
1226 '71.0.3578.16',
1227 '70.0.3538.72',
1228 '72.0.3585.1',
1229 '72.0.3585.0',
1230 '71.0.3578.15',
1231 '70.0.3538.71',
1232 '71.0.3578.14',
1233 '72.0.3584.1',
1234 '72.0.3584.0',
1235 '71.0.3578.13',
1236 '70.0.3538.70',
1237 '72.0.3583.2',
1238 '71.0.3578.12',
1239 '72.0.3583.1',
1240 '72.0.3583.0',
1241 '71.0.3578.11',
1242 '70.0.3538.69',
1243 '71.0.3578.10',
1244 '72.0.3582.0',
1245 '72.0.3581.4',
1246 '71.0.3578.9',
1247 '70.0.3538.67',
1248 '72.0.3581.3',
1249 '72.0.3581.2',
1250 '72.0.3581.1',
1251 '72.0.3581.0',
1252 '71.0.3578.8',
1253 '70.0.3538.66',
1254 '72.0.3580.1',
1255 '72.0.3580.0',
1256 '71.0.3578.7',
1257 '70.0.3538.65',
1258 '71.0.3578.6',
1259 '72.0.3579.1',
1260 '72.0.3579.0',
1261 '71.0.3578.5',
1262 '70.0.3538.64',
1263 '71.0.3578.4',
1264 '71.0.3578.3',
1265 '71.0.3578.2',
1266 '71.0.3578.1',
1267 '71.0.3578.0',
1268 '70.0.3538.63',
1269 '69.0.3497.128',
1270 '70.0.3538.62',
1271 '70.0.3538.61',
1272 '70.0.3538.60',
1273 '70.0.3538.59',
1274 '71.0.3577.1',
1275 '71.0.3577.0',
1276 '70.0.3538.58',
1277 '69.0.3497.127',
1278 '71.0.3576.2',
1279 '71.0.3576.1',
1280 '71.0.3576.0',
1281 '70.0.3538.57',
1282 '70.0.3538.56',
1283 '71.0.3575.2',
1284 '70.0.3538.55',
1285 '69.0.3497.126',
1286 '70.0.3538.54',
1287 '71.0.3575.1',
1288 '71.0.3575.0',
1289 '71.0.3574.1',
1290 '71.0.3574.0',
1291 '70.0.3538.53',
1292 '69.0.3497.125',
1293 '70.0.3538.52',
1294 '71.0.3573.1',
1295 '71.0.3573.0',
1296 '70.0.3538.51',
1297 '69.0.3497.124',
1298 '71.0.3572.1',
1299 '71.0.3572.0',
1300 '70.0.3538.50',
1301 '69.0.3497.123',
1302 '71.0.3571.2',
1303 '70.0.3538.49',
1304 '69.0.3497.122',
1305 '71.0.3571.1',
1306 '71.0.3571.0',
1307 '70.0.3538.48',
1308 '69.0.3497.121',
1309 '71.0.3570.1',
1310 '71.0.3570.0',
1311 '70.0.3538.47',
1312 '69.0.3497.120',
1313 '71.0.3568.2',
1314 '71.0.3569.1',
1315 '71.0.3569.0',
1316 '70.0.3538.46',
1317 '69.0.3497.119',
1318 '70.0.3538.45',
1319 '71.0.3568.1',
1320 '71.0.3568.0',
1321 '70.0.3538.44',
1322 '69.0.3497.118',
1323 '70.0.3538.43',
1324 '70.0.3538.42',
1325 '71.0.3567.1',
1326 '71.0.3567.0',
1327 '70.0.3538.41',
1328 '69.0.3497.117',
1329 '71.0.3566.1',
1330 '71.0.3566.0',
1331 '70.0.3538.40',
1332 '69.0.3497.116',
1333 '71.0.3565.1',
1334 '71.0.3565.0',
1335 '70.0.3538.39',
1336 '69.0.3497.115',
1337 '71.0.3564.1',
1338 '71.0.3564.0',
1339 '70.0.3538.38',
1340 '69.0.3497.114',
1341 '71.0.3563.0',
1342 '71.0.3562.2',
1343 '70.0.3538.37',
1344 '69.0.3497.113',
1345 '70.0.3538.36',
1346 '70.0.3538.35',
1347 '71.0.3562.1',
1348 '71.0.3562.0',
1349 '70.0.3538.34',
1350 '69.0.3497.112',
1351 '70.0.3538.33',
1352 '71.0.3561.1',
1353 '71.0.3561.0',
1354 '70.0.3538.32',
1355 '69.0.3497.111',
1356 '71.0.3559.6',
1357 '71.0.3560.1',
1358 '71.0.3560.0',
1359 '71.0.3559.5',
1360 '71.0.3559.4',
1361 '70.0.3538.31',
1362 '69.0.3497.110',
1363 '71.0.3559.3',
1364 '70.0.3538.30',
1365 '69.0.3497.109',
1366 '71.0.3559.2',
1367 '71.0.3559.1',
1368 '71.0.3559.0',
1369 '70.0.3538.29',
1370 '69.0.3497.108',
1371 '71.0.3558.2',
1372 '71.0.3558.1',
1373 '71.0.3558.0',
1374 '70.0.3538.28',
1375 '69.0.3497.107',
1376 '71.0.3557.2',
1377 '71.0.3557.1',
1378 '71.0.3557.0',
1379 '70.0.3538.27',
1380 '69.0.3497.106',
1381 '71.0.3554.4',
1382 '70.0.3538.26',
1383 '71.0.3556.1',
1384 '71.0.3556.0',
1385 '70.0.3538.25',
1386 '71.0.3554.3',
1387 '69.0.3497.105',
1388 '71.0.3554.2',
1389 '70.0.3538.24',
1390 '69.0.3497.104',
1391 '71.0.3555.2',
1392 '70.0.3538.23',
1393 '71.0.3555.1',
1394 '71.0.3555.0',
1395 '70.0.3538.22',
1396 '69.0.3497.103',
1397 '71.0.3554.1',
1398 '71.0.3554.0',
1399 '70.0.3538.21',
1400 '69.0.3497.102',
1401 '71.0.3553.3',
1402 '70.0.3538.20',
1403 '69.0.3497.101',
1404 '71.0.3553.2',
1405 '69.0.3497.100',
1406 '71.0.3553.1',
1407 '71.0.3553.0',
1408 '70.0.3538.19',
1409 '69.0.3497.99',
1410 '69.0.3497.98',
1411 '69.0.3497.97',
1412 '71.0.3552.6',
1413 '71.0.3552.5',
1414 '71.0.3552.4',
1415 '71.0.3552.3',
1416 '71.0.3552.2',
1417 '71.0.3552.1',
1418 '71.0.3552.0',
1419 '70.0.3538.18',
1420 '69.0.3497.96',
1421 '71.0.3551.3',
1422 '71.0.3551.2',
1423 '71.0.3551.1',
1424 '71.0.3551.0',
1425 '70.0.3538.17',
1426 '69.0.3497.95',
1427 '71.0.3550.3',
1428 '71.0.3550.2',
1429 '71.0.3550.1',
1430 '71.0.3550.0',
1431 '70.0.3538.16',
1432 '69.0.3497.94',
1433 '71.0.3549.1',
1434 '71.0.3549.0',
1435 '70.0.3538.15',
1436 '69.0.3497.93',
1437 '69.0.3497.92',
1438 '71.0.3548.1',
1439 '71.0.3548.0',
1440 '70.0.3538.14',
1441 '69.0.3497.91',
1442 '71.0.3547.1',
1443 '71.0.3547.0',
1444 '70.0.3538.13',
1445 '69.0.3497.90',
1446 '71.0.3546.2',
1447 '69.0.3497.89',
1448 '71.0.3546.1',
1449 '71.0.3546.0',
1450 '70.0.3538.12',
1451 '69.0.3497.88',
1452 '71.0.3545.4',
1453 '71.0.3545.3',
1454 '71.0.3545.2',
1455 '71.0.3545.1',
1456 '71.0.3545.0',
1457 '70.0.3538.11',
1458 '69.0.3497.87',
1459 '71.0.3544.5',
1460 '71.0.3544.4',
1461 '71.0.3544.3',
1462 '71.0.3544.2',
1463 '71.0.3544.1',
1464 '71.0.3544.0',
1465 '69.0.3497.86',
1466 '70.0.3538.10',
1467 '69.0.3497.85',
1468 '70.0.3538.9',
1469 '69.0.3497.84',
1470 '71.0.3543.4',
1471 '70.0.3538.8',
1472 '71.0.3543.3',
1473 '71.0.3543.2',
1474 '71.0.3543.1',
1475 '71.0.3543.0',
1476 '70.0.3538.7',
1477 '69.0.3497.83',
1478 '71.0.3542.2',
1479 '71.0.3542.1',
1480 '71.0.3542.0',
1481 '70.0.3538.6',
1482 '69.0.3497.82',
1483 '69.0.3497.81',
1484 '71.0.3541.1',
1485 '71.0.3541.0',
1486 '70.0.3538.5',
1487 '69.0.3497.80',
1488 '71.0.3540.1',
1489 '71.0.3540.0',
1490 '70.0.3538.4',
1491 '69.0.3497.79',
1492 '70.0.3538.3',
1493 '71.0.3539.1',
1494 '71.0.3539.0',
1495 '69.0.3497.78',
1496 '68.0.3440.134',
1497 '69.0.3497.77',
1498 '70.0.3538.2',
1499 '70.0.3538.1',
1500 '70.0.3538.0',
1501 '69.0.3497.76',
1502 '68.0.3440.133',
1503 '69.0.3497.75',
1504 '70.0.3537.2',
1505 '70.0.3537.1',
1506 '70.0.3537.0',
1507 '69.0.3497.74',
1508 '68.0.3440.132',
1509 '70.0.3536.0',
1510 '70.0.3535.5',
1511 '70.0.3535.4',
1512 '70.0.3535.3',
1513 '69.0.3497.73',
1514 '68.0.3440.131',
1515 '70.0.3532.8',
1516 '70.0.3532.7',
1517 '69.0.3497.72',
1518 '69.0.3497.71',
1519 '70.0.3535.2',
1520 '70.0.3535.1',
1521 '70.0.3535.0',
1522 '69.0.3497.70',
1523 '68.0.3440.130',
1524 '69.0.3497.69',
1525 '68.0.3440.129',
1526 '70.0.3534.4',
1527 '70.0.3534.3',
1528 '70.0.3534.2',
1529 '70.0.3534.1',
1530 '70.0.3534.0',
1531 '69.0.3497.68',
1532 '68.0.3440.128',
1533 '70.0.3533.2',
1534 '70.0.3533.1',
1535 '70.0.3533.0',
1536 '69.0.3497.67',
1537 '68.0.3440.127',
1538 '70.0.3532.6',
1539 '70.0.3532.5',
1540 '70.0.3532.4',
1541 '69.0.3497.66',
1542 '68.0.3440.126',
1543 '70.0.3532.3',
1544 '70.0.3532.2',
1545 '70.0.3532.1',
1546 '69.0.3497.60',
1547 '69.0.3497.65',
1548 '69.0.3497.64',
1549 '70.0.3532.0',
1550 '70.0.3531.0',
1551 '70.0.3530.4',
1552 '70.0.3530.3',
1553 '70.0.3530.2',
1554 '69.0.3497.58',
1555 '68.0.3440.125',
1556 '69.0.3497.57',
1557 '69.0.3497.56',
1558 '69.0.3497.55',
1559 '69.0.3497.54',
1560 '70.0.3530.1',
1561 '70.0.3530.0',
1562 '69.0.3497.53',
1563 '68.0.3440.124',
1564 '69.0.3497.52',
1565 '70.0.3529.3',
1566 '70.0.3529.2',
1567 '70.0.3529.1',
1568 '70.0.3529.0',
1569 '69.0.3497.51',
1570 '70.0.3528.4',
1571 '68.0.3440.123',
1572 '70.0.3528.3',
1573 '70.0.3528.2',
1574 '70.0.3528.1',
1575 '70.0.3528.0',
1576 '69.0.3497.50',
1577 '68.0.3440.122',
1578 '70.0.3527.1',
1579 '70.0.3527.0',
1580 '69.0.3497.49',
1581 '68.0.3440.121',
1582 '70.0.3526.1',
1583 '70.0.3526.0',
1584 '68.0.3440.120',
1585 '69.0.3497.48',
1586 '69.0.3497.47',
1587 '68.0.3440.119',
1588 '68.0.3440.118',
1589 '70.0.3525.5',
1590 '70.0.3525.4',
1591 '70.0.3525.3',
1592 '68.0.3440.117',
1593 '69.0.3497.46',
1594 '70.0.3525.2',
1595 '70.0.3525.1',
1596 '70.0.3525.0',
1597 '69.0.3497.45',
1598 '68.0.3440.116',
1599 '70.0.3524.4',
1600 '70.0.3524.3',
1601 '69.0.3497.44',
1602 '70.0.3524.2',
1603 '70.0.3524.1',
1604 '70.0.3524.0',
1605 '70.0.3523.2',
1606 '69.0.3497.43',
1607 '68.0.3440.115',
1608 '70.0.3505.9',
1609 '69.0.3497.42',
1610 '70.0.3505.8',
1611 '70.0.3523.1',
1612 '70.0.3523.0',
1613 '69.0.3497.41',
1614 '68.0.3440.114',
1615 '70.0.3505.7',
1616 '69.0.3497.40',
1617 '70.0.3522.1',
1618 '70.0.3522.0',
1619 '70.0.3521.2',
1620 '69.0.3497.39',
1621 '68.0.3440.113',
1622 '70.0.3505.6',
1623 '70.0.3521.1',
1624 '70.0.3521.0',
1625 '69.0.3497.38',
1626 '68.0.3440.112',
1627 '70.0.3520.1',
1628 '70.0.3520.0',
1629 '69.0.3497.37',
1630 '68.0.3440.111',
1631 '70.0.3519.3',
1632 '70.0.3519.2',
1633 '70.0.3519.1',
1634 '70.0.3519.0',
1635 '69.0.3497.36',
1636 '68.0.3440.110',
1637 '70.0.3518.1',
1638 '70.0.3518.0',
1639 '69.0.3497.35',
1640 '69.0.3497.34',
1641 '68.0.3440.109',
1642 '70.0.3517.1',
1643 '70.0.3517.0',
1644 '69.0.3497.33',
1645 '68.0.3440.108',
1646 '69.0.3497.32',
1647 '70.0.3516.3',
1648 '70.0.3516.2',
1649 '70.0.3516.1',
1650 '70.0.3516.0',
1651 '69.0.3497.31',
1652 '68.0.3440.107',
1653 '70.0.3515.4',
1654 '68.0.3440.106',
1655 '70.0.3515.3',
1656 '70.0.3515.2',
1657 '70.0.3515.1',
1658 '70.0.3515.0',
1659 '69.0.3497.30',
1660 '68.0.3440.105',
1661 '68.0.3440.104',
1662 '70.0.3514.2',
1663 '70.0.3514.1',
1664 '70.0.3514.0',
1665 '69.0.3497.29',
1666 '68.0.3440.103',
1667 '70.0.3513.1',
1668 '70.0.3513.0',
1669 '69.0.3497.28',
1670 )
1671 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1672
1673
1674 std_headers = {
1675 'User-Agent': random_user_agent(),
1676 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1677 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1678 'Accept-Encoding': 'gzip, deflate',
1679 'Accept-Language': 'en-us,en;q=0.5',
1680 }
1681
1682
1683 USER_AGENTS = {
1684 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1685 }
1686
1687
1688 NO_DEFAULT = object()
1689
1690 ENGLISH_MONTH_NAMES = [
1691 'January', 'February', 'March', 'April', 'May', 'June',
1692 'July', 'August', 'September', 'October', 'November', 'December']
1693
1694 MONTH_NAMES = {
1695 'en': ENGLISH_MONTH_NAMES,
1696 'fr': [
1697 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1698 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1699 }
1700
1701 KNOWN_EXTENSIONS = (
1702 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1703 'flv', 'f4v', 'f4a', 'f4b',
1704 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1705 'mkv', 'mka', 'mk3d',
1706 'avi', 'divx',
1707 'mov',
1708 'asf', 'wmv', 'wma',
1709 '3gp', '3g2',
1710 'mp3',
1711 'flac',
1712 'ape',
1713 'wav',
1714 'f4f', 'f4m', 'm3u8', 'smil')
1715
1716 # needed for sanitizing filenames in restricted mode
1717 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1718 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1719 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1720
1721 DATE_FORMATS = (
1722 '%d %B %Y',
1723 '%d %b %Y',
1724 '%B %d %Y',
1725 '%B %dst %Y',
1726 '%B %dnd %Y',
1727 '%B %drd %Y',
1728 '%B %dth %Y',
1729 '%b %d %Y',
1730 '%b %dst %Y',
1731 '%b %dnd %Y',
1732 '%b %drd %Y',
1733 '%b %dth %Y',
1734 '%b %dst %Y %I:%M',
1735 '%b %dnd %Y %I:%M',
1736 '%b %drd %Y %I:%M',
1737 '%b %dth %Y %I:%M',
1738 '%Y %m %d',
1739 '%Y-%m-%d',
1740 '%Y/%m/%d',
1741 '%Y/%m/%d %H:%M',
1742 '%Y/%m/%d %H:%M:%S',
1743 '%Y-%m-%d %H:%M',
1744 '%Y-%m-%d %H:%M:%S',
1745 '%Y-%m-%d %H:%M:%S.%f',
1746 '%d.%m.%Y %H:%M',
1747 '%d.%m.%Y %H.%M',
1748 '%Y-%m-%dT%H:%M:%SZ',
1749 '%Y-%m-%dT%H:%M:%S.%fZ',
1750 '%Y-%m-%dT%H:%M:%S.%f0Z',
1751 '%Y-%m-%dT%H:%M:%S',
1752 '%Y-%m-%dT%H:%M:%S.%f',
1753 '%Y-%m-%dT%H:%M',
1754 '%b %d %Y at %H:%M',
1755 '%b %d %Y at %H:%M:%S',
1756 '%B %d %Y at %H:%M',
1757 '%B %d %Y at %H:%M:%S',
1758 )
1759
1760 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1761 DATE_FORMATS_DAY_FIRST.extend([
1762 '%d-%m-%Y',
1763 '%d.%m.%Y',
1764 '%d.%m.%y',
1765 '%d/%m/%Y',
1766 '%d/%m/%y',
1767 '%d/%m/%Y %H:%M:%S',
1768 ])
1769
1770 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1771 DATE_FORMATS_MONTH_FIRST.extend([
1772 '%m-%d-%Y',
1773 '%m.%d.%Y',
1774 '%m/%d/%Y',
1775 '%m/%d/%y',
1776 '%m/%d/%Y %H:%M:%S',
1777 ])
1778
1779 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1780 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1781
1782
1783 def preferredencoding():
1784 """Get preferred encoding.
1785
1786 Returns the best encoding scheme for the system, based on
1787 locale.getpreferredencoding() and some further tweaks.
1788 """
1789 try:
1790 pref = locale.getpreferredencoding()
1791 'TEST'.encode(pref)
1792 except Exception:
1793 pref = 'UTF-8'
1794
1795 return pref
1796
1797
1798 def write_json_file(obj, fn):
1799 """ Encode obj as JSON and write it to fn, atomically if possible """
1800
1801 fn = encodeFilename(fn)
1802 if sys.version_info < (3, 0) and sys.platform != 'win32':
1803 encoding = get_filesystem_encoding()
1804 # os.path.basename returns a bytes object, but NamedTemporaryFile
1805 # will fail if the filename contains non ascii characters unless we
1806 # use a unicode object
1807 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1808 # the same for os.path.dirname
1809 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1810 else:
1811 path_basename = os.path.basename
1812 path_dirname = os.path.dirname
1813
1814 args = {
1815 'suffix': '.tmp',
1816 'prefix': path_basename(fn) + '.',
1817 'dir': path_dirname(fn),
1818 'delete': False,
1819 }
1820
1821 # In Python 2.x, json.dump expects a bytestream.
1822 # In Python 3.x, it writes to a character stream
1823 if sys.version_info < (3, 0):
1824 args['mode'] = 'wb'
1825 else:
1826 args.update({
1827 'mode': 'w',
1828 'encoding': 'utf-8',
1829 })
1830
1831 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1832
1833 try:
1834 with tf:
1835 json.dump(obj, tf)
1836 if sys.platform == 'win32':
1837 # Need to remove existing file on Windows, else os.rename raises
1838 # WindowsError or FileExistsError.
1839 try:
1840 os.unlink(fn)
1841 except OSError:
1842 pass
1843 try:
1844 mask = os.umask(0)
1845 os.umask(mask)
1846 os.chmod(tf.name, 0o666 & ~mask)
1847 except OSError:
1848 pass
1849 os.rename(tf.name, fn)
1850 except Exception:
1851 try:
1852 os.remove(tf.name)
1853 except OSError:
1854 pass
1855 raise
1856
1857
1858 if sys.version_info >= (2, 7):
1859 def find_xpath_attr(node, xpath, key, val=None):
1860 """ Find the xpath xpath[@key=val] """
1861 assert re.match(r'^[a-zA-Z_-]+$', key)
1862 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1863 return node.find(expr)
1864 else:
1865 def find_xpath_attr(node, xpath, key, val=None):
1866 for f in node.findall(compat_xpath(xpath)):
1867 if key not in f.attrib:
1868 continue
1869 if val is None or f.attrib.get(key) == val:
1870 return f
1871 return None
1872
1873 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1874 # the namespace parameter
1875
1876
1877 def xpath_with_ns(path, ns_map):
1878 components = [c.split(':') for c in path.split('/')]
1879 replaced = []
1880 for c in components:
1881 if len(c) == 1:
1882 replaced.append(c[0])
1883 else:
1884 ns, tag = c
1885 replaced.append('{%s}%s' % (ns_map[ns], tag))
1886 return '/'.join(replaced)
1887
1888
1889 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1890 def _find_xpath(xpath):
1891 return node.find(compat_xpath(xpath))
1892
1893 if isinstance(xpath, (str, compat_str)):
1894 n = _find_xpath(xpath)
1895 else:
1896 for xp in xpath:
1897 n = _find_xpath(xp)
1898 if n is not None:
1899 break
1900
1901 if n is None:
1902 if default is not NO_DEFAULT:
1903 return default
1904 elif fatal:
1905 name = xpath if name is None else name
1906 raise ExtractorError('Could not find XML element %s' % name)
1907 else:
1908 return None
1909 return n
1910
1911
1912 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1913 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1914 if n is None or n == default:
1915 return n
1916 if n.text is None:
1917 if default is not NO_DEFAULT:
1918 return default
1919 elif fatal:
1920 name = xpath if name is None else name
1921 raise ExtractorError('Could not find XML element\'s text %s' % name)
1922 else:
1923 return None
1924 return n.text
1925
1926
1927 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1928 n = find_xpath_attr(node, xpath, key)
1929 if n is None:
1930 if default is not NO_DEFAULT:
1931 return default
1932 elif fatal:
1933 name = '%s[@%s]' % (xpath, key) if name is None else name
1934 raise ExtractorError('Could not find XML attribute %s' % name)
1935 else:
1936 return None
1937 return n.attrib[key]
1938
1939
1940 def get_element_by_id(id, html):
1941 """Return the content of the tag with the specified ID in the passed HTML document"""
1942 return get_element_by_attribute('id', id, html)
1943
1944
1945 def get_element_by_class(class_name, html):
1946 """Return the content of the first tag with the specified class in the passed HTML document"""
1947 retval = get_elements_by_class(class_name, html)
1948 return retval[0] if retval else None
1949
1950
1951 def get_element_by_attribute(attribute, value, html, escape_value=True):
1952 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1953 return retval[0] if retval else None
1954
1955
1956 def get_elements_by_class(class_name, html):
1957 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1958 return get_elements_by_attribute(
1959 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1960 html, escape_value=False)
1961
1962
1963 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1964 """Return the content of the tag with the specified attribute in the passed HTML document"""
1965
1966 value = re.escape(value) if escape_value else value
1967
1968 retlist = []
1969 for m in re.finditer(r'''(?xs)
1970 <([a-zA-Z0-9:._-]+)
1971 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1972 \s+%s=['"]?%s['"]?
1973 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1974 \s*>
1975 (?P<content>.*?)
1976 </\1>
1977 ''' % (re.escape(attribute), value), html):
1978 res = m.group('content')
1979
1980 if res.startswith('"') or res.startswith("'"):
1981 res = res[1:-1]
1982
1983 retlist.append(unescapeHTML(res))
1984
1985 return retlist
1986
1987
1988 class HTMLAttributeParser(compat_HTMLParser):
1989 """Trivial HTML parser to gather the attributes for a single element"""
1990
1991 def __init__(self):
1992 self.attrs = {}
1993 compat_HTMLParser.__init__(self)
1994
1995 def handle_starttag(self, tag, attrs):
1996 self.attrs = dict(attrs)
1997
1998
1999 def extract_attributes(html_element):
2000 """Given a string for an HTML element such as
2001 <el
2002 a="foo" B="bar" c="&98;az" d=boz
2003 empty= noval entity="&amp;"
2004 sq='"' dq="'"
2005 >
2006 Decode and return a dictionary of attributes.
2007 {
2008 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2009 'empty': '', 'noval': None, 'entity': '&',
2010 'sq': '"', 'dq': '\''
2011 }.
2012 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2013 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2014 """
2015 parser = HTMLAttributeParser()
2016 try:
2017 parser.feed(html_element)
2018 parser.close()
2019 # Older Python may throw HTMLParseError in case of malformed HTML
2020 except compat_HTMLParseError:
2021 pass
2022 return parser.attrs
2023
2024
2025 def clean_html(html):
2026 """Clean an HTML snippet into a readable string"""
2027
2028 if html is None: # Convenience for sanitizing descriptions etc.
2029 return html
2030
2031 # Newline vs <br />
2032 html = html.replace('\n', ' ')
2033 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2034 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2035 # Strip html tags
2036 html = re.sub('<.*?>', '', html)
2037 # Replace html entities
2038 html = unescapeHTML(html)
2039 return html.strip()
2040
2041
2042 def sanitize_open(filename, open_mode):
2043 """Try to open the given filename, and slightly tweak it if this fails.
2044
2045 Attempts to open the given filename. If this fails, it tries to change
2046 the filename slightly, step by step, until it's either able to open it
2047 or it fails and raises a final exception, like the standard open()
2048 function.
2049
2050 It returns the tuple (stream, definitive_file_name).
2051 """
2052 try:
2053 if filename == '-':
2054 if sys.platform == 'win32':
2055 import msvcrt
2056 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2057 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2058 stream = open(encodeFilename(filename), open_mode)
2059 return (stream, filename)
2060 except (IOError, OSError) as err:
2061 if err.errno in (errno.EACCES,):
2062 raise
2063
2064 # In case of error, try to remove win32 forbidden chars
2065 alt_filename = sanitize_path(filename)
2066 if alt_filename == filename:
2067 raise
2068 else:
2069 # An exception here should be caught in the caller
2070 stream = open(encodeFilename(alt_filename), open_mode)
2071 return (stream, alt_filename)
2072
2073
2074 def timeconvert(timestr):
2075 """Convert RFC 2822 defined time string into system timestamp"""
2076 timestamp = None
2077 timetuple = email.utils.parsedate_tz(timestr)
2078 if timetuple is not None:
2079 timestamp = email.utils.mktime_tz(timetuple)
2080 return timestamp
2081
2082
2083 def sanitize_filename(s, restricted=False, is_id=False):
2084 """Sanitizes a string so it could be used as part of a filename.
2085 If restricted is set, use a stricter subset of allowed characters.
2086 Set is_id if this is not an arbitrary string, but an ID that should be kept
2087 if possible.
2088 """
2089 def replace_insane(char):
2090 if restricted and char in ACCENT_CHARS:
2091 return ACCENT_CHARS[char]
2092 if char == '?' or ord(char) < 32 or ord(char) == 127:
2093 return ''
2094 elif char == '"':
2095 return '' if restricted else '\''
2096 elif char == ':':
2097 return '_-' if restricted else ' -'
2098 elif char in '\\/|*<>':
2099 return '_'
2100 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2101 return '_'
2102 if restricted and ord(char) > 127:
2103 return '_'
2104 return char
2105
2106 # Handle timestamps
2107 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2108 result = ''.join(map(replace_insane, s))
2109 if not is_id:
2110 while '__' in result:
2111 result = result.replace('__', '_')
2112 result = result.strip('_')
2113 # Common case of "Foreign band name - English song title"
2114 if restricted and result.startswith('-_'):
2115 result = result[2:]
2116 if result.startswith('-'):
2117 result = '_' + result[len('-'):]
2118 result = result.lstrip('.')
2119 if not result:
2120 result = '_'
2121 return result
2122
2123
2124 def sanitize_path(s):
2125 """Sanitizes and normalizes path on Windows"""
2126 if sys.platform != 'win32':
2127 return s
2128 drive_or_unc, _ = os.path.splitdrive(s)
2129 if sys.version_info < (2, 7) and not drive_or_unc:
2130 drive_or_unc, _ = os.path.splitunc(s)
2131 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2132 if drive_or_unc:
2133 norm_path.pop(0)
2134 sanitized_path = [
2135 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2136 for path_part in norm_path]
2137 if drive_or_unc:
2138 sanitized_path.insert(0, drive_or_unc + os.path.sep)
2139 return os.path.join(*sanitized_path)
2140
2141
2142 def sanitize_url(url):
2143 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2144 # the number of unwanted failures due to missing protocol
2145 if url.startswith('//'):
2146 return 'http:%s' % url
2147 # Fix some common typos seen so far
2148 COMMON_TYPOS = (
2149 # https://github.com/ytdl-org/youtube-dl/issues/15649
2150 (r'^httpss://', r'https://'),
2151 # https://bx1.be/lives/direct-tv/
2152 (r'^rmtp([es]?)://', r'rtmp\1://'),
2153 )
2154 for mistake, fixup in COMMON_TYPOS:
2155 if re.match(mistake, url):
2156 return re.sub(mistake, fixup, url)
2157 return url
2158
2159
2160 def sanitized_Request(url, *args, **kwargs):
2161 return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2162
2163
2164 def expand_path(s):
2165 """Expand shell variables and ~"""
2166 return os.path.expandvars(compat_expanduser(s))
2167
2168
2169 def orderedSet(iterable):
2170 """ Remove all duplicates from the input iterable """
2171 res = []
2172 for el in iterable:
2173 if el not in res:
2174 res.append(el)
2175 return res
2176
2177
2178 def _htmlentity_transform(entity_with_semicolon):
2179 """Transforms an HTML entity to a character."""
2180 entity = entity_with_semicolon[:-1]
2181
2182 # Known non-numeric HTML entity
2183 if entity in compat_html_entities.name2codepoint:
2184 return compat_chr(compat_html_entities.name2codepoint[entity])
2185
2186 # TODO: HTML5 allows entities without a semicolon. For example,
2187 # '&Eacuteric' should be decoded as 'Éric'.
2188 if entity_with_semicolon in compat_html_entities_html5:
2189 return compat_html_entities_html5[entity_with_semicolon]
2190
2191 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2192 if mobj is not None:
2193 numstr = mobj.group(1)
2194 if numstr.startswith('x'):
2195 base = 16
2196 numstr = '0%s' % numstr
2197 else:
2198 base = 10
2199 # See https://github.com/ytdl-org/youtube-dl/issues/7518
2200 try:
2201 return compat_chr(int(numstr, base))
2202 except ValueError:
2203 pass
2204
2205 # Unknown entity in name, return its literal representation
2206 return '&%s;' % entity
2207
2208
2209 def unescapeHTML(s):
2210 if s is None:
2211 return None
2212 assert type(s) == compat_str
2213
2214 return re.sub(
2215 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2216
2217
2218 def process_communicate_or_kill(p, *args, **kwargs):
2219 try:
2220 return p.communicate(*args, **kwargs)
2221 except BaseException: # Including KeyboardInterrupt
2222 p.kill()
2223 p.wait()
2224 raise
2225
2226
2227 def get_subprocess_encoding():
2228 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2229 # For subprocess calls, encode with locale encoding
2230 # Refer to http://stackoverflow.com/a/9951851/35070
2231 encoding = preferredencoding()
2232 else:
2233 encoding = sys.getfilesystemencoding()
2234 if encoding is None:
2235 encoding = 'utf-8'
2236 return encoding
2237
2238
2239 def encodeFilename(s, for_subprocess=False):
2240 """
2241 @param s The name of the file
2242 """
2243
2244 assert type(s) == compat_str
2245
2246 # Python 3 has a Unicode API
2247 if sys.version_info >= (3, 0):
2248 return s
2249
2250 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2251 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2252 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2253 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2254 return s
2255
2256 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2257 if sys.platform.startswith('java'):
2258 return s
2259
2260 return s.encode(get_subprocess_encoding(), 'ignore')
2261
2262
2263 def decodeFilename(b, for_subprocess=False):
2264
2265 if sys.version_info >= (3, 0):
2266 return b
2267
2268 if not isinstance(b, bytes):
2269 return b
2270
2271 return b.decode(get_subprocess_encoding(), 'ignore')
2272
2273
2274 def encodeArgument(s):
2275 if not isinstance(s, compat_str):
2276 # Legacy code that uses byte strings
2277 # Uncomment the following line after fixing all post processors
2278 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2279 s = s.decode('ascii')
2280 return encodeFilename(s, True)
2281
2282
2283 def decodeArgument(b):
2284 return decodeFilename(b, True)
2285
2286
2287 def decodeOption(optval):
2288 if optval is None:
2289 return optval
2290 if isinstance(optval, bytes):
2291 optval = optval.decode(preferredencoding())
2292
2293 assert isinstance(optval, compat_str)
2294 return optval
2295
2296
2297 def formatSeconds(secs, delim=':'):
2298 if secs > 3600:
2299 return '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
2300 elif secs > 60:
2301 return '%d%s%02d' % (secs // 60, delim, secs % 60)
2302 else:
2303 return '%d' % secs
2304
2305
2306 def make_HTTPS_handler(params, **kwargs):
2307 opts_no_check_certificate = params.get('nocheckcertificate', False)
2308 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
2309 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2310 if opts_no_check_certificate:
2311 context.check_hostname = False
2312 context.verify_mode = ssl.CERT_NONE
2313 try:
2314 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2315 except TypeError:
2316 # Python 2.7.8
2317 # (create_default_context present but HTTPSHandler has no context=)
2318 pass
2319
2320 if sys.version_info < (3, 2):
2321 return YoutubeDLHTTPSHandler(params, **kwargs)
2322 else: # Python < 3.4
2323 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2324 context.verify_mode = (ssl.CERT_NONE
2325 if opts_no_check_certificate
2326 else ssl.CERT_REQUIRED)
2327 context.set_default_verify_paths()
2328 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2329
2330
2331 def bug_reports_message():
2332 if ytdl_is_updateable():
2333 update_cmd = 'type youtube-dlc -U to update'
2334 else:
2335 update_cmd = 'see https://github.com/pukkandan/yt-dlp on how to update'
2336 msg = '; please report this issue on https://github.com/pukkandan/yt-dlp .'
2337 msg += ' Make sure you are using the latest version; %s.' % update_cmd
2338 msg += ' Be sure to call youtube-dlc with the --verbose flag and include its complete output.'
2339 return msg
2340
2341
2342 class YoutubeDLError(Exception):
2343 """Base exception for YoutubeDL errors."""
2344 pass
2345
2346
2347 class ExtractorError(YoutubeDLError):
2348 """Error during info extraction."""
2349
2350 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2351 """ tb, if given, is the original traceback (so that it can be printed out).
2352 If expected is set, this is a normal error message and most likely not a bug in youtube-dlc.
2353 """
2354
2355 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2356 expected = True
2357 if video_id is not None:
2358 msg = video_id + ': ' + msg
2359 if cause:
2360 msg += ' (caused by %r)' % cause
2361 if not expected:
2362 msg += bug_reports_message()
2363 super(ExtractorError, self).__init__(msg)
2364
2365 self.traceback = tb
2366 self.exc_info = sys.exc_info() # preserve original exception
2367 self.cause = cause
2368 self.video_id = video_id
2369
2370 def format_traceback(self):
2371 if self.traceback is None:
2372 return None
2373 return ''.join(traceback.format_tb(self.traceback))
2374
2375
2376 class UnsupportedError(ExtractorError):
2377 def __init__(self, url):
2378 super(UnsupportedError, self).__init__(
2379 'Unsupported URL: %s' % url, expected=True)
2380 self.url = url
2381
2382
2383 class RegexNotFoundError(ExtractorError):
2384 """Error when a regex didn't match"""
2385 pass
2386
2387
2388 class GeoRestrictedError(ExtractorError):
2389 """Geographic restriction Error exception.
2390
2391 This exception may be thrown when a video is not available from your
2392 geographic location due to geographic restrictions imposed by a website.
2393 """
2394
2395 def __init__(self, msg, countries=None):
2396 super(GeoRestrictedError, self).__init__(msg, expected=True)
2397 self.msg = msg
2398 self.countries = countries
2399
2400
2401 class DownloadError(YoutubeDLError):
2402 """Download Error exception.
2403
2404 This exception may be thrown by FileDownloader objects if they are not
2405 configured to continue on errors. They will contain the appropriate
2406 error message.
2407 """
2408
2409 def __init__(self, msg, exc_info=None):
2410 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2411 super(DownloadError, self).__init__(msg)
2412 self.exc_info = exc_info
2413
2414
2415 class SameFileError(YoutubeDLError):
2416 """Same File exception.
2417
2418 This exception will be thrown by FileDownloader objects if they detect
2419 multiple files would have to be downloaded to the same file on disk.
2420 """
2421 pass
2422
2423
2424 class PostProcessingError(YoutubeDLError):
2425 """Post Processing exception.
2426
2427 This exception may be raised by PostProcessor's .run() method to
2428 indicate an error in the postprocessing task.
2429 """
2430
2431 def __init__(self, msg):
2432 super(PostProcessingError, self).__init__(msg)
2433 self.msg = msg
2434
2435
2436 class ExistingVideoReached(YoutubeDLError):
2437 """ --max-downloads limit has been reached. """
2438 pass
2439
2440
2441 class RejectedVideoReached(YoutubeDLError):
2442 """ --max-downloads limit has been reached. """
2443 pass
2444
2445
2446 class MaxDownloadsReached(YoutubeDLError):
2447 """ --max-downloads limit has been reached. """
2448 pass
2449
2450
2451 class UnavailableVideoError(YoutubeDLError):
2452 """Unavailable Format exception.
2453
2454 This exception will be thrown when a video is requested
2455 in a format that is not available for that video.
2456 """
2457 pass
2458
2459
2460 class ContentTooShortError(YoutubeDLError):
2461 """Content Too Short exception.
2462
2463 This exception may be raised by FileDownloader objects when a file they
2464 download is too small for what the server announced first, indicating
2465 the connection was probably interrupted.
2466 """
2467
2468 def __init__(self, downloaded, expected):
2469 super(ContentTooShortError, self).__init__(
2470 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2471 )
2472 # Both in bytes
2473 self.downloaded = downloaded
2474 self.expected = expected
2475
2476
2477 class XAttrMetadataError(YoutubeDLError):
2478 def __init__(self, code=None, msg='Unknown error'):
2479 super(XAttrMetadataError, self).__init__(msg)
2480 self.code = code
2481 self.msg = msg
2482
2483 # Parsing code and msg
2484 if (self.code in (errno.ENOSPC, errno.EDQUOT)
2485 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2486 self.reason = 'NO_SPACE'
2487 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2488 self.reason = 'VALUE_TOO_LONG'
2489 else:
2490 self.reason = 'NOT_SUPPORTED'
2491
2492
2493 class XAttrUnavailableError(YoutubeDLError):
2494 pass
2495
2496
2497 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2498 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2499 # expected HTTP responses to meet HTTP/1.0 or later (see also
2500 # https://github.com/ytdl-org/youtube-dl/issues/6727)
2501 if sys.version_info < (3, 0):
2502 kwargs['strict'] = True
2503 hc = http_class(*args, **compat_kwargs(kwargs))
2504 source_address = ydl_handler._params.get('source_address')
2505
2506 if source_address is not None:
2507 # This is to workaround _create_connection() from socket where it will try all
2508 # address data from getaddrinfo() including IPv6. This filters the result from
2509 # getaddrinfo() based on the source_address value.
2510 # This is based on the cpython socket.create_connection() function.
2511 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2512 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2513 host, port = address
2514 err = None
2515 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2516 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2517 ip_addrs = [addr for addr in addrs if addr[0] == af]
2518 if addrs and not ip_addrs:
2519 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2520 raise socket.error(
2521 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2522 % (ip_version, source_address[0]))
2523 for res in ip_addrs:
2524 af, socktype, proto, canonname, sa = res
2525 sock = None
2526 try:
2527 sock = socket.socket(af, socktype, proto)
2528 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2529 sock.settimeout(timeout)
2530 sock.bind(source_address)
2531 sock.connect(sa)
2532 err = None # Explicitly break reference cycle
2533 return sock
2534 except socket.error as _:
2535 err = _
2536 if sock is not None:
2537 sock.close()
2538 if err is not None:
2539 raise err
2540 else:
2541 raise socket.error('getaddrinfo returns an empty list')
2542 if hasattr(hc, '_create_connection'):
2543 hc._create_connection = _create_connection
2544 sa = (source_address, 0)
2545 if hasattr(hc, 'source_address'): # Python 2.7+
2546 hc.source_address = sa
2547 else: # Python 2.6
2548 def _hc_connect(self, *args, **kwargs):
2549 sock = _create_connection(
2550 (self.host, self.port), self.timeout, sa)
2551 if is_https:
2552 self.sock = ssl.wrap_socket(
2553 sock, self.key_file, self.cert_file,
2554 ssl_version=ssl.PROTOCOL_TLSv1)
2555 else:
2556 self.sock = sock
2557 hc.connect = functools.partial(_hc_connect, hc)
2558
2559 return hc
2560
2561
2562 def handle_youtubedl_headers(headers):
2563 filtered_headers = headers
2564
2565 if 'Youtubedl-no-compression' in filtered_headers:
2566 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2567 del filtered_headers['Youtubedl-no-compression']
2568
2569 return filtered_headers
2570
2571
2572 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2573 """Handler for HTTP requests and responses.
2574
2575 This class, when installed with an OpenerDirector, automatically adds
2576 the standard headers to every HTTP request and handles gzipped and
2577 deflated responses from web servers. If compression is to be avoided in
2578 a particular request, the original request in the program code only has
2579 to include the HTTP header "Youtubedl-no-compression", which will be
2580 removed before making the real request.
2581
2582 Part of this code was copied from:
2583
2584 http://techknack.net/python-urllib2-handlers/
2585
2586 Andrew Rowls, the author of that code, agreed to release it to the
2587 public domain.
2588 """
2589
2590 def __init__(self, params, *args, **kwargs):
2591 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2592 self._params = params
2593
2594 def http_open(self, req):
2595 conn_class = compat_http_client.HTTPConnection
2596
2597 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2598 if socks_proxy:
2599 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2600 del req.headers['Ytdl-socks-proxy']
2601
2602 return self.do_open(functools.partial(
2603 _create_http_connection, self, conn_class, False),
2604 req)
2605
2606 @staticmethod
2607 def deflate(data):
2608 try:
2609 return zlib.decompress(data, -zlib.MAX_WBITS)
2610 except zlib.error:
2611 return zlib.decompress(data)
2612
2613 def http_request(self, req):
2614 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2615 # always respected by websites, some tend to give out URLs with non percent-encoded
2616 # non-ASCII characters (see telemb.py, ard.py [#3412])
2617 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2618 # To work around aforementioned issue we will replace request's original URL with
2619 # percent-encoded one
2620 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2621 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2622 url = req.get_full_url()
2623 url_escaped = escape_url(url)
2624
2625 # Substitute URL if any change after escaping
2626 if url != url_escaped:
2627 req = update_Request(req, url=url_escaped)
2628
2629 for h, v in std_headers.items():
2630 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2631 # The dict keys are capitalized because of this bug by urllib
2632 if h.capitalize() not in req.headers:
2633 req.add_header(h, v)
2634
2635 req.headers = handle_youtubedl_headers(req.headers)
2636
2637 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2638 # Python 2.6 is brain-dead when it comes to fragments
2639 req._Request__original = req._Request__original.partition('#')[0]
2640 req._Request__r_type = req._Request__r_type.partition('#')[0]
2641
2642 return req
2643
2644 def http_response(self, req, resp):
2645 old_resp = resp
2646 # gzip
2647 if resp.headers.get('Content-encoding', '') == 'gzip':
2648 content = resp.read()
2649 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2650 try:
2651 uncompressed = io.BytesIO(gz.read())
2652 except IOError as original_ioerror:
2653 # There may be junk add the end of the file
2654 # See http://stackoverflow.com/q/4928560/35070 for details
2655 for i in range(1, 1024):
2656 try:
2657 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2658 uncompressed = io.BytesIO(gz.read())
2659 except IOError:
2660 continue
2661 break
2662 else:
2663 raise original_ioerror
2664 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2665 resp.msg = old_resp.msg
2666 del resp.headers['Content-encoding']
2667 # deflate
2668 if resp.headers.get('Content-encoding', '') == 'deflate':
2669 gz = io.BytesIO(self.deflate(resp.read()))
2670 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2671 resp.msg = old_resp.msg
2672 del resp.headers['Content-encoding']
2673 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2674 # https://github.com/ytdl-org/youtube-dl/issues/6457).
2675 if 300 <= resp.code < 400:
2676 location = resp.headers.get('Location')
2677 if location:
2678 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2679 if sys.version_info >= (3, 0):
2680 location = location.encode('iso-8859-1').decode('utf-8')
2681 else:
2682 location = location.decode('utf-8')
2683 location_escaped = escape_url(location)
2684 if location != location_escaped:
2685 del resp.headers['Location']
2686 if sys.version_info < (3, 0):
2687 location_escaped = location_escaped.encode('utf-8')
2688 resp.headers['Location'] = location_escaped
2689 return resp
2690
2691 https_request = http_request
2692 https_response = http_response
2693
2694
2695 def make_socks_conn_class(base_class, socks_proxy):
2696 assert issubclass(base_class, (
2697 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2698
2699 url_components = compat_urlparse.urlparse(socks_proxy)
2700 if url_components.scheme.lower() == 'socks5':
2701 socks_type = ProxyType.SOCKS5
2702 elif url_components.scheme.lower() in ('socks', 'socks4'):
2703 socks_type = ProxyType.SOCKS4
2704 elif url_components.scheme.lower() == 'socks4a':
2705 socks_type = ProxyType.SOCKS4A
2706
2707 def unquote_if_non_empty(s):
2708 if not s:
2709 return s
2710 return compat_urllib_parse_unquote_plus(s)
2711
2712 proxy_args = (
2713 socks_type,
2714 url_components.hostname, url_components.port or 1080,
2715 True, # Remote DNS
2716 unquote_if_non_empty(url_components.username),
2717 unquote_if_non_empty(url_components.password),
2718 )
2719
2720 class SocksConnection(base_class):
2721 def connect(self):
2722 self.sock = sockssocket()
2723 self.sock.setproxy(*proxy_args)
2724 if type(self.timeout) in (int, float):
2725 self.sock.settimeout(self.timeout)
2726 self.sock.connect((self.host, self.port))
2727
2728 if isinstance(self, compat_http_client.HTTPSConnection):
2729 if hasattr(self, '_context'): # Python > 2.6
2730 self.sock = self._context.wrap_socket(
2731 self.sock, server_hostname=self.host)
2732 else:
2733 self.sock = ssl.wrap_socket(self.sock)
2734
2735 return SocksConnection
2736
2737
2738 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2739 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2740 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2741 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2742 self._params = params
2743
2744 def https_open(self, req):
2745 kwargs = {}
2746 conn_class = self._https_conn_class
2747
2748 if hasattr(self, '_context'): # python > 2.6
2749 kwargs['context'] = self._context
2750 if hasattr(self, '_check_hostname'): # python 3.x
2751 kwargs['check_hostname'] = self._check_hostname
2752
2753 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2754 if socks_proxy:
2755 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2756 del req.headers['Ytdl-socks-proxy']
2757
2758 return self.do_open(functools.partial(
2759 _create_http_connection, self, conn_class, True),
2760 req, **kwargs)
2761
2762
2763 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2764 """
2765 See [1] for cookie file format.
2766
2767 1. https://curl.haxx.se/docs/http-cookies.html
2768 """
2769 _HTTPONLY_PREFIX = '#HttpOnly_'
2770 _ENTRY_LEN = 7
2771 _HEADER = '''# Netscape HTTP Cookie File
2772 # This file is generated by youtube-dlc. Do not edit.
2773
2774 '''
2775 _CookieFileEntry = collections.namedtuple(
2776 'CookieFileEntry',
2777 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2778
2779 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2780 """
2781 Save cookies to a file.
2782
2783 Most of the code is taken from CPython 3.8 and slightly adapted
2784 to support cookie files with UTF-8 in both python 2 and 3.
2785 """
2786 if filename is None:
2787 if self.filename is not None:
2788 filename = self.filename
2789 else:
2790 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2791
2792 # Store session cookies with `expires` set to 0 instead of an empty
2793 # string
2794 for cookie in self:
2795 if cookie.expires is None:
2796 cookie.expires = 0
2797
2798 with io.open(filename, 'w', encoding='utf-8') as f:
2799 f.write(self._HEADER)
2800 now = time.time()
2801 for cookie in self:
2802 if not ignore_discard and cookie.discard:
2803 continue
2804 if not ignore_expires and cookie.is_expired(now):
2805 continue
2806 if cookie.secure:
2807 secure = 'TRUE'
2808 else:
2809 secure = 'FALSE'
2810 if cookie.domain.startswith('.'):
2811 initial_dot = 'TRUE'
2812 else:
2813 initial_dot = 'FALSE'
2814 if cookie.expires is not None:
2815 expires = compat_str(cookie.expires)
2816 else:
2817 expires = ''
2818 if cookie.value is None:
2819 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2820 # with no name, whereas http.cookiejar regards it as a
2821 # cookie with no value.
2822 name = ''
2823 value = cookie.name
2824 else:
2825 name = cookie.name
2826 value = cookie.value
2827 f.write(
2828 '\t'.join([cookie.domain, initial_dot, cookie.path,
2829 secure, expires, name, value]) + '\n')
2830
2831 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2832 """Load cookies from a file."""
2833 if filename is None:
2834 if self.filename is not None:
2835 filename = self.filename
2836 else:
2837 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2838
2839 def prepare_line(line):
2840 if line.startswith(self._HTTPONLY_PREFIX):
2841 line = line[len(self._HTTPONLY_PREFIX):]
2842 # comments and empty lines are fine
2843 if line.startswith('#') or not line.strip():
2844 return line
2845 cookie_list = line.split('\t')
2846 if len(cookie_list) != self._ENTRY_LEN:
2847 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2848 cookie = self._CookieFileEntry(*cookie_list)
2849 if cookie.expires_at and not cookie.expires_at.isdigit():
2850 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2851 return line
2852
2853 cf = io.StringIO()
2854 with io.open(filename, encoding='utf-8') as f:
2855 for line in f:
2856 try:
2857 cf.write(prepare_line(line))
2858 except compat_cookiejar.LoadError as e:
2859 write_string(
2860 'WARNING: skipping cookie file entry due to %s: %r\n'
2861 % (e, line), sys.stderr)
2862 continue
2863 cf.seek(0)
2864 self._really_load(cf, filename, ignore_discard, ignore_expires)
2865 # Session cookies are denoted by either `expires` field set to
2866 # an empty string or 0. MozillaCookieJar only recognizes the former
2867 # (see [1]). So we need force the latter to be recognized as session
2868 # cookies on our own.
2869 # Session cookies may be important for cookies-based authentication,
2870 # e.g. usually, when user does not check 'Remember me' check box while
2871 # logging in on a site, some important cookies are stored as session
2872 # cookies so that not recognizing them will result in failed login.
2873 # 1. https://bugs.python.org/issue17164
2874 for cookie in self:
2875 # Treat `expires=0` cookies as session cookies
2876 if cookie.expires == 0:
2877 cookie.expires = None
2878 cookie.discard = True
2879
2880
2881 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2882 def __init__(self, cookiejar=None):
2883 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2884
2885 def http_response(self, request, response):
2886 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2887 # characters in Set-Cookie HTTP header of last response (see
2888 # https://github.com/ytdl-org/youtube-dl/issues/6769).
2889 # In order to at least prevent crashing we will percent encode Set-Cookie
2890 # header before HTTPCookieProcessor starts processing it.
2891 # if sys.version_info < (3, 0) and response.headers:
2892 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2893 # set_cookie = response.headers.get(set_cookie_header)
2894 # if set_cookie:
2895 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2896 # if set_cookie != set_cookie_escaped:
2897 # del response.headers[set_cookie_header]
2898 # response.headers[set_cookie_header] = set_cookie_escaped
2899 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2900
2901 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2902 https_response = http_response
2903
2904
2905 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2906 if sys.version_info[0] < 3:
2907 def redirect_request(self, req, fp, code, msg, headers, newurl):
2908 # On python 2 urlh.geturl() may sometimes return redirect URL
2909 # as byte string instead of unicode. This workaround allows
2910 # to force it always return unicode.
2911 return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2912
2913
2914 def extract_timezone(date_str):
2915 m = re.search(
2916 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2917 date_str)
2918 if not m:
2919 timezone = datetime.timedelta()
2920 else:
2921 date_str = date_str[:-len(m.group('tz'))]
2922 if not m.group('sign'):
2923 timezone = datetime.timedelta()
2924 else:
2925 sign = 1 if m.group('sign') == '+' else -1
2926 timezone = datetime.timedelta(
2927 hours=sign * int(m.group('hours')),
2928 minutes=sign * int(m.group('minutes')))
2929 return timezone, date_str
2930
2931
2932 def parse_iso8601(date_str, delimiter='T', timezone=None):
2933 """ Return a UNIX timestamp from the given date """
2934
2935 if date_str is None:
2936 return None
2937
2938 date_str = re.sub(r'\.[0-9]+', '', date_str)
2939
2940 if timezone is None:
2941 timezone, date_str = extract_timezone(date_str)
2942
2943 try:
2944 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2945 dt = datetime.datetime.strptime(date_str, date_format) - timezone
2946 return calendar.timegm(dt.timetuple())
2947 except ValueError:
2948 pass
2949
2950
2951 def date_formats(day_first=True):
2952 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2953
2954
2955 def unified_strdate(date_str, day_first=True):
2956 """Return a string with the date in the format YYYYMMDD"""
2957
2958 if date_str is None:
2959 return None
2960 upload_date = None
2961 # Replace commas
2962 date_str = date_str.replace(',', ' ')
2963 # Remove AM/PM + timezone
2964 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2965 _, date_str = extract_timezone(date_str)
2966
2967 for expression in date_formats(day_first):
2968 try:
2969 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
2970 except ValueError:
2971 pass
2972 if upload_date is None:
2973 timetuple = email.utils.parsedate_tz(date_str)
2974 if timetuple:
2975 try:
2976 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2977 except ValueError:
2978 pass
2979 if upload_date is not None:
2980 return compat_str(upload_date)
2981
2982
2983 def unified_timestamp(date_str, day_first=True):
2984 if date_str is None:
2985 return None
2986
2987 date_str = re.sub(r'[,|]', '', date_str)
2988
2989 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
2990 timezone, date_str = extract_timezone(date_str)
2991
2992 # Remove AM/PM + timezone
2993 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2994
2995 # Remove unrecognized timezones from ISO 8601 alike timestamps
2996 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
2997 if m:
2998 date_str = date_str[:-len(m.group('tz'))]
2999
3000 # Python only supports microseconds, so remove nanoseconds
3001 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3002 if m:
3003 date_str = m.group(1)
3004
3005 for expression in date_formats(day_first):
3006 try:
3007 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3008 return calendar.timegm(dt.timetuple())
3009 except ValueError:
3010 pass
3011 timetuple = email.utils.parsedate_tz(date_str)
3012 if timetuple:
3013 return calendar.timegm(timetuple) + pm_delta * 3600
3014
3015
3016 def determine_ext(url, default_ext='unknown_video'):
3017 if url is None or '.' not in url:
3018 return default_ext
3019 guess = url.partition('?')[0].rpartition('.')[2]
3020 if re.match(r'^[A-Za-z0-9]+$', guess):
3021 return guess
3022 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3023 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3024 return guess.rstrip('/')
3025 else:
3026 return default_ext
3027
3028
3029 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3030 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3031
3032
3033 def date_from_str(date_str):
3034 """
3035 Return a datetime object from a string in the format YYYYMMDD or
3036 (now|today)[+-][0-9](day|week|month|year)(s)?"""
3037 today = datetime.date.today()
3038 if date_str in ('now', 'today'):
3039 return today
3040 if date_str == 'yesterday':
3041 return today - datetime.timedelta(days=1)
3042 match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
3043 if match is not None:
3044 sign = match.group('sign')
3045 time = int(match.group('time'))
3046 if sign == '-':
3047 time = -time
3048 unit = match.group('unit')
3049 # A bad approximation?
3050 if unit == 'month':
3051 unit = 'day'
3052 time *= 30
3053 elif unit == 'year':
3054 unit = 'day'
3055 time *= 365
3056 unit += 's'
3057 delta = datetime.timedelta(**{unit: time})
3058 return today + delta
3059 return datetime.datetime.strptime(date_str, '%Y%m%d').date()
3060
3061
3062 def hyphenate_date(date_str):
3063 """
3064 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3065 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3066 if match is not None:
3067 return '-'.join(match.groups())
3068 else:
3069 return date_str
3070
3071
3072 class DateRange(object):
3073 """Represents a time interval between two dates"""
3074
3075 def __init__(self, start=None, end=None):
3076 """start and end must be strings in the format accepted by date"""
3077 if start is not None:
3078 self.start = date_from_str(start)
3079 else:
3080 self.start = datetime.datetime.min.date()
3081 if end is not None:
3082 self.end = date_from_str(end)
3083 else:
3084 self.end = datetime.datetime.max.date()
3085 if self.start > self.end:
3086 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3087
3088 @classmethod
3089 def day(cls, day):
3090 """Returns a range that only contains the given day"""
3091 return cls(day, day)
3092
3093 def __contains__(self, date):
3094 """Check if the date is in the range"""
3095 if not isinstance(date, datetime.date):
3096 date = date_from_str(date)
3097 return self.start <= date <= self.end
3098
3099 def __str__(self):
3100 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3101
3102
3103 def platform_name():
3104 """ Returns the platform name as a compat_str """
3105 res = platform.platform()
3106 if isinstance(res, bytes):
3107 res = res.decode(preferredencoding())
3108
3109 assert isinstance(res, compat_str)
3110 return res
3111
3112
3113 def _windows_write_string(s, out):
3114 """ Returns True if the string was written using special methods,
3115 False if it has yet to be written out."""
3116 # Adapted from http://stackoverflow.com/a/3259271/35070
3117
3118 import ctypes
3119 import ctypes.wintypes
3120
3121 WIN_OUTPUT_IDS = {
3122 1: -11,
3123 2: -12,
3124 }
3125
3126 try:
3127 fileno = out.fileno()
3128 except AttributeError:
3129 # If the output stream doesn't have a fileno, it's virtual
3130 return False
3131 except io.UnsupportedOperation:
3132 # Some strange Windows pseudo files?
3133 return False
3134 if fileno not in WIN_OUTPUT_IDS:
3135 return False
3136
3137 GetStdHandle = compat_ctypes_WINFUNCTYPE(
3138 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3139 ('GetStdHandle', ctypes.windll.kernel32))
3140 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3141
3142 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3143 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3144 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3145 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3146 written = ctypes.wintypes.DWORD(0)
3147
3148 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3149 FILE_TYPE_CHAR = 0x0002
3150 FILE_TYPE_REMOTE = 0x8000
3151 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3152 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3153 ctypes.POINTER(ctypes.wintypes.DWORD))(
3154 ('GetConsoleMode', ctypes.windll.kernel32))
3155 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3156
3157 def not_a_console(handle):
3158 if handle == INVALID_HANDLE_VALUE or handle is None:
3159 return True
3160 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3161 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3162
3163 if not_a_console(h):
3164 return False
3165
3166 def next_nonbmp_pos(s):
3167 try:
3168 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3169 except StopIteration:
3170 return len(s)
3171
3172 while s:
3173 count = min(next_nonbmp_pos(s), 1024)
3174
3175 ret = WriteConsoleW(
3176 h, s, count if count else 2, ctypes.byref(written), None)
3177 if ret == 0:
3178 raise OSError('Failed to write string')
3179 if not count: # We just wrote a non-BMP character
3180 assert written.value == 2
3181 s = s[1:]
3182 else:
3183 assert written.value > 0
3184 s = s[written.value:]
3185 return True
3186
3187
3188 def write_string(s, out=None, encoding=None):
3189 if out is None:
3190 out = sys.stderr
3191 assert type(s) == compat_str
3192
3193 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3194 if _windows_write_string(s, out):
3195 return
3196
3197 if ('b' in getattr(out, 'mode', '')
3198 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
3199 byt = s.encode(encoding or preferredencoding(), 'ignore')
3200 out.write(byt)
3201 elif hasattr(out, 'buffer'):
3202 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3203 byt = s.encode(enc, 'ignore')
3204 out.buffer.write(byt)
3205 else:
3206 out.write(s)
3207 out.flush()
3208
3209
3210 def bytes_to_intlist(bs):
3211 if not bs:
3212 return []
3213 if isinstance(bs[0], int): # Python 3
3214 return list(bs)
3215 else:
3216 return [ord(c) for c in bs]
3217
3218
3219 def intlist_to_bytes(xs):
3220 if not xs:
3221 return b''
3222 return compat_struct_pack('%dB' % len(xs), *xs)
3223
3224
3225 # Cross-platform file locking
3226 if sys.platform == 'win32':
3227 import ctypes.wintypes
3228 import msvcrt
3229
3230 class OVERLAPPED(ctypes.Structure):
3231 _fields_ = [
3232 ('Internal', ctypes.wintypes.LPVOID),
3233 ('InternalHigh', ctypes.wintypes.LPVOID),
3234 ('Offset', ctypes.wintypes.DWORD),
3235 ('OffsetHigh', ctypes.wintypes.DWORD),
3236 ('hEvent', ctypes.wintypes.HANDLE),
3237 ]
3238
3239 kernel32 = ctypes.windll.kernel32
3240 LockFileEx = kernel32.LockFileEx
3241 LockFileEx.argtypes = [
3242 ctypes.wintypes.HANDLE, # hFile
3243 ctypes.wintypes.DWORD, # dwFlags
3244 ctypes.wintypes.DWORD, # dwReserved
3245 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3246 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3247 ctypes.POINTER(OVERLAPPED) # Overlapped
3248 ]
3249 LockFileEx.restype = ctypes.wintypes.BOOL
3250 UnlockFileEx = kernel32.UnlockFileEx
3251 UnlockFileEx.argtypes = [
3252 ctypes.wintypes.HANDLE, # hFile
3253 ctypes.wintypes.DWORD, # dwReserved
3254 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3255 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3256 ctypes.POINTER(OVERLAPPED) # Overlapped
3257 ]
3258 UnlockFileEx.restype = ctypes.wintypes.BOOL
3259 whole_low = 0xffffffff
3260 whole_high = 0x7fffffff
3261
3262 def _lock_file(f, exclusive):
3263 overlapped = OVERLAPPED()
3264 overlapped.Offset = 0
3265 overlapped.OffsetHigh = 0
3266 overlapped.hEvent = 0
3267 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3268 handle = msvcrt.get_osfhandle(f.fileno())
3269 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3270 whole_low, whole_high, f._lock_file_overlapped_p):
3271 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3272
3273 def _unlock_file(f):
3274 assert f._lock_file_overlapped_p
3275 handle = msvcrt.get_osfhandle(f.fileno())
3276 if not UnlockFileEx(handle, 0,
3277 whole_low, whole_high, f._lock_file_overlapped_p):
3278 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3279
3280 else:
3281 # Some platforms, such as Jython, is missing fcntl
3282 try:
3283 import fcntl
3284
3285 def _lock_file(f, exclusive):
3286 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3287
3288 def _unlock_file(f):
3289 fcntl.flock(f, fcntl.LOCK_UN)
3290 except ImportError:
3291 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3292
3293 def _lock_file(f, exclusive):
3294 raise IOError(UNSUPPORTED_MSG)
3295
3296 def _unlock_file(f):
3297 raise IOError(UNSUPPORTED_MSG)
3298
3299
3300 class locked_file(object):
3301 def __init__(self, filename, mode, encoding=None):
3302 assert mode in ['r', 'a', 'w']
3303 self.f = io.open(filename, mode, encoding=encoding)
3304 self.mode = mode
3305
3306 def __enter__(self):
3307 exclusive = self.mode != 'r'
3308 try:
3309 _lock_file(self.f, exclusive)
3310 except IOError:
3311 self.f.close()
3312 raise
3313 return self
3314
3315 def __exit__(self, etype, value, traceback):
3316 try:
3317 _unlock_file(self.f)
3318 finally:
3319 self.f.close()
3320
3321 def __iter__(self):
3322 return iter(self.f)
3323
3324 def write(self, *args):
3325 return self.f.write(*args)
3326
3327 def read(self, *args):
3328 return self.f.read(*args)
3329
3330
3331 def get_filesystem_encoding():
3332 encoding = sys.getfilesystemencoding()
3333 return encoding if encoding is not None else 'utf-8'
3334
3335
3336 def shell_quote(args):
3337 quoted_args = []
3338 encoding = get_filesystem_encoding()
3339 for a in args:
3340 if isinstance(a, bytes):
3341 # We may get a filename encoded with 'encodeFilename'
3342 a = a.decode(encoding)
3343 quoted_args.append(compat_shlex_quote(a))
3344 return ' '.join(quoted_args)
3345
3346
3347 def smuggle_url(url, data):
3348 """ Pass additional data in a URL for internal use. """
3349
3350 url, idata = unsmuggle_url(url, {})
3351 data.update(idata)
3352 sdata = compat_urllib_parse_urlencode(
3353 {'__youtubedl_smuggle': json.dumps(data)})
3354 return url + '#' + sdata
3355
3356
3357 def unsmuggle_url(smug_url, default=None):
3358 if '#__youtubedl_smuggle' not in smug_url:
3359 return smug_url, default
3360 url, _, sdata = smug_url.rpartition('#')
3361 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3362 data = json.loads(jsond)
3363 return url, data
3364
3365
3366 def format_bytes(bytes):
3367 if bytes is None:
3368 return 'N/A'
3369 if type(bytes) is str:
3370 bytes = float(bytes)
3371 if bytes == 0.0:
3372 exponent = 0
3373 else:
3374 exponent = int(math.log(bytes, 1024.0))
3375 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3376 converted = float(bytes) / float(1024 ** exponent)
3377 return '%.2f%s' % (converted, suffix)
3378
3379
3380 def lookup_unit_table(unit_table, s):
3381 units_re = '|'.join(re.escape(u) for u in unit_table)
3382 m = re.match(
3383 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3384 if not m:
3385 return None
3386 num_str = m.group('num').replace(',', '.')
3387 mult = unit_table[m.group('unit')]
3388 return int(float(num_str) * mult)
3389
3390
3391 def parse_filesize(s):
3392 if s is None:
3393 return None
3394
3395 # The lower-case forms are of course incorrect and unofficial,
3396 # but we support those too
3397 _UNIT_TABLE = {
3398 'B': 1,
3399 'b': 1,
3400 'bytes': 1,
3401 'KiB': 1024,
3402 'KB': 1000,
3403 'kB': 1024,
3404 'Kb': 1000,
3405 'kb': 1000,
3406 'kilobytes': 1000,
3407 'kibibytes': 1024,
3408 'MiB': 1024 ** 2,
3409 'MB': 1000 ** 2,
3410 'mB': 1024 ** 2,
3411 'Mb': 1000 ** 2,
3412 'mb': 1000 ** 2,
3413 'megabytes': 1000 ** 2,
3414 'mebibytes': 1024 ** 2,
3415 'GiB': 1024 ** 3,
3416 'GB': 1000 ** 3,
3417 'gB': 1024 ** 3,
3418 'Gb': 1000 ** 3,
3419 'gb': 1000 ** 3,
3420 'gigabytes': 1000 ** 3,
3421 'gibibytes': 1024 ** 3,
3422 'TiB': 1024 ** 4,
3423 'TB': 1000 ** 4,
3424 'tB': 1024 ** 4,
3425 'Tb': 1000 ** 4,
3426 'tb': 1000 ** 4,
3427 'terabytes': 1000 ** 4,
3428 'tebibytes': 1024 ** 4,
3429 'PiB': 1024 ** 5,
3430 'PB': 1000 ** 5,
3431 'pB': 1024 ** 5,
3432 'Pb': 1000 ** 5,
3433 'pb': 1000 ** 5,
3434 'petabytes': 1000 ** 5,
3435 'pebibytes': 1024 ** 5,
3436 'EiB': 1024 ** 6,
3437 'EB': 1000 ** 6,
3438 'eB': 1024 ** 6,
3439 'Eb': 1000 ** 6,
3440 'eb': 1000 ** 6,
3441 'exabytes': 1000 ** 6,
3442 'exbibytes': 1024 ** 6,
3443 'ZiB': 1024 ** 7,
3444 'ZB': 1000 ** 7,
3445 'zB': 1024 ** 7,
3446 'Zb': 1000 ** 7,
3447 'zb': 1000 ** 7,
3448 'zettabytes': 1000 ** 7,
3449 'zebibytes': 1024 ** 7,
3450 'YiB': 1024 ** 8,
3451 'YB': 1000 ** 8,
3452 'yB': 1024 ** 8,
3453 'Yb': 1000 ** 8,
3454 'yb': 1000 ** 8,
3455 'yottabytes': 1000 ** 8,
3456 'yobibytes': 1024 ** 8,
3457 }
3458
3459 return lookup_unit_table(_UNIT_TABLE, s)
3460
3461
3462 def parse_count(s):
3463 if s is None:
3464 return None
3465
3466 s = s.strip()
3467
3468 if re.match(r'^[\d,.]+$', s):
3469 return str_to_int(s)
3470
3471 _UNIT_TABLE = {
3472 'k': 1000,
3473 'K': 1000,
3474 'm': 1000 ** 2,
3475 'M': 1000 ** 2,
3476 'kk': 1000 ** 2,
3477 'KK': 1000 ** 2,
3478 }
3479
3480 return lookup_unit_table(_UNIT_TABLE, s)
3481
3482
3483 def parse_resolution(s):
3484 if s is None:
3485 return {}
3486
3487 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3488 if mobj:
3489 return {
3490 'width': int(mobj.group('w')),
3491 'height': int(mobj.group('h')),
3492 }
3493
3494 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3495 if mobj:
3496 return {'height': int(mobj.group(1))}
3497
3498 mobj = re.search(r'\b([48])[kK]\b', s)
3499 if mobj:
3500 return {'height': int(mobj.group(1)) * 540}
3501
3502 return {}
3503
3504
3505 def parse_bitrate(s):
3506 if not isinstance(s, compat_str):
3507 return
3508 mobj = re.search(r'\b(\d+)\s*kbps', s)
3509 if mobj:
3510 return int(mobj.group(1))
3511
3512
3513 def month_by_name(name, lang='en'):
3514 """ Return the number of a month by (locale-independently) English name """
3515
3516 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3517
3518 try:
3519 return month_names.index(name) + 1
3520 except ValueError:
3521 return None
3522
3523
3524 def month_by_abbreviation(abbrev):
3525 """ Return the number of a month by (locale-independently) English
3526 abbreviations """
3527
3528 try:
3529 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3530 except ValueError:
3531 return None
3532
3533
3534 def fix_xml_ampersands(xml_str):
3535 """Replace all the '&' by '&amp;' in XML"""
3536 return re.sub(
3537 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3538 '&amp;',
3539 xml_str)
3540
3541
3542 def setproctitle(title):
3543 assert isinstance(title, compat_str)
3544
3545 # ctypes in Jython is not complete
3546 # http://bugs.jython.org/issue2148
3547 if sys.platform.startswith('java'):
3548 return
3549
3550 try:
3551 libc = ctypes.cdll.LoadLibrary('libc.so.6')
3552 except OSError:
3553 return
3554 except TypeError:
3555 # LoadLibrary in Windows Python 2.7.13 only expects
3556 # a bytestring, but since unicode_literals turns
3557 # every string into a unicode string, it fails.
3558 return
3559 title_bytes = title.encode('utf-8')
3560 buf = ctypes.create_string_buffer(len(title_bytes))
3561 buf.value = title_bytes
3562 try:
3563 libc.prctl(15, buf, 0, 0, 0)
3564 except AttributeError:
3565 return # Strange libc, just skip this
3566
3567
3568 def remove_start(s, start):
3569 return s[len(start):] if s is not None and s.startswith(start) else s
3570
3571
3572 def remove_end(s, end):
3573 return s[:-len(end)] if s is not None and s.endswith(end) else s
3574
3575
3576 def remove_quotes(s):
3577 if s is None or len(s) < 2:
3578 return s
3579 for quote in ('"', "'", ):
3580 if s[0] == quote and s[-1] == quote:
3581 return s[1:-1]
3582 return s
3583
3584
3585 def get_domain(url):
3586 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3587 return domain.group('domain') if domain else None
3588
3589
3590 def url_basename(url):
3591 path = compat_urlparse.urlparse(url).path
3592 return path.strip('/').split('/')[-1]
3593
3594
3595 def base_url(url):
3596 return re.match(r'https?://[^?#&]+/', url).group()
3597
3598
3599 def urljoin(base, path):
3600 if isinstance(path, bytes):
3601 path = path.decode('utf-8')
3602 if not isinstance(path, compat_str) or not path:
3603 return None
3604 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3605 return path
3606 if isinstance(base, bytes):
3607 base = base.decode('utf-8')
3608 if not isinstance(base, compat_str) or not re.match(
3609 r'^(?:https?:)?//', base):
3610 return None
3611 return compat_urlparse.urljoin(base, path)
3612
3613
3614 class HEADRequest(compat_urllib_request.Request):
3615 def get_method(self):
3616 return 'HEAD'
3617
3618
3619 class PUTRequest(compat_urllib_request.Request):
3620 def get_method(self):
3621 return 'PUT'
3622
3623
3624 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3625 if get_attr:
3626 if v is not None:
3627 v = getattr(v, get_attr, None)
3628 if v == '':
3629 v = None
3630 if v is None:
3631 return default
3632 try:
3633 return int(v) * invscale // scale
3634 except (ValueError, TypeError):
3635 return default
3636
3637
3638 def str_or_none(v, default=None):
3639 return default if v is None else compat_str(v)
3640
3641
3642 def str_to_int(int_str):
3643 """ A more relaxed version of int_or_none """
3644 if isinstance(int_str, compat_integer_types):
3645 return int_str
3646 elif isinstance(int_str, compat_str):
3647 int_str = re.sub(r'[,\.\+]', '', int_str)
3648 return int_or_none(int_str)
3649
3650
3651 def float_or_none(v, scale=1, invscale=1, default=None):
3652 if v is None:
3653 return default
3654 try:
3655 return float(v) * invscale / scale
3656 except (ValueError, TypeError):
3657 return default
3658
3659
3660 def bool_or_none(v, default=None):
3661 return v if isinstance(v, bool) else default
3662
3663
3664 def strip_or_none(v, default=None):
3665 return v.strip() if isinstance(v, compat_str) else default
3666
3667
3668 def url_or_none(url):
3669 if not url or not isinstance(url, compat_str):
3670 return None
3671 url = url.strip()
3672 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3673
3674
3675 def parse_duration(s):
3676 if not isinstance(s, compat_basestring):
3677 return None
3678
3679 s = s.strip()
3680
3681 days, hours, mins, secs, ms = [None] * 5
3682 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3683 if m:
3684 days, hours, mins, secs, ms = m.groups()
3685 else:
3686 m = re.match(
3687 r'''(?ix)(?:P?
3688 (?:
3689 [0-9]+\s*y(?:ears?)?\s*
3690 )?
3691 (?:
3692 [0-9]+\s*m(?:onths?)?\s*
3693 )?
3694 (?:
3695 [0-9]+\s*w(?:eeks?)?\s*
3696 )?
3697 (?:
3698 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3699 )?
3700 T)?
3701 (?:
3702 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3703 )?
3704 (?:
3705 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3706 )?
3707 (?:
3708 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3709 )?Z?$''', s)
3710 if m:
3711 days, hours, mins, secs, ms = m.groups()
3712 else:
3713 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3714 if m:
3715 hours, mins = m.groups()
3716 else:
3717 return None
3718
3719 duration = 0
3720 if secs:
3721 duration += float(secs)
3722 if mins:
3723 duration += float(mins) * 60
3724 if hours:
3725 duration += float(hours) * 60 * 60
3726 if days:
3727 duration += float(days) * 24 * 60 * 60
3728 if ms:
3729 duration += float(ms)
3730 return duration
3731
3732
3733 def prepend_extension(filename, ext, expected_real_ext=None):
3734 name, real_ext = os.path.splitext(filename)
3735 return (
3736 '{0}.{1}{2}'.format(name, ext, real_ext)
3737 if not expected_real_ext or real_ext[1:] == expected_real_ext
3738 else '{0}.{1}'.format(filename, ext))
3739
3740
3741 def replace_extension(filename, ext, expected_real_ext=None):
3742 name, real_ext = os.path.splitext(filename)
3743 return '{0}.{1}'.format(
3744 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3745 ext)
3746
3747
3748 def check_executable(exe, args=[]):
3749 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3750 args can be a list of arguments for a short output (like -version) """
3751 try:
3752 process_communicate_or_kill(subprocess.Popen(
3753 [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
3754 except OSError:
3755 return False
3756 return exe
3757
3758
3759 def get_exe_version(exe, args=['--version'],
3760 version_re=None, unrecognized='present'):
3761 """ Returns the version of the specified executable,
3762 or False if the executable is not present """
3763 try:
3764 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3765 # SIGTTOU if youtube-dlc is run in the background.
3766 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3767 out, _ = process_communicate_or_kill(subprocess.Popen(
3768 [encodeArgument(exe)] + args,
3769 stdin=subprocess.PIPE,
3770 stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
3771 except OSError:
3772 return False
3773 if isinstance(out, bytes): # Python 2.x
3774 out = out.decode('ascii', 'ignore')
3775 return detect_exe_version(out, version_re, unrecognized)
3776
3777
3778 def detect_exe_version(output, version_re=None, unrecognized='present'):
3779 assert isinstance(output, compat_str)
3780 if version_re is None:
3781 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3782 m = re.search(version_re, output)
3783 if m:
3784 return m.group(1)
3785 else:
3786 return unrecognized
3787
3788
3789 class PagedList(object):
3790 def __len__(self):
3791 # This is only useful for tests
3792 return len(self.getslice())
3793
3794
3795 class OnDemandPagedList(PagedList):
3796 def __init__(self, pagefunc, pagesize, use_cache=True):
3797 self._pagefunc = pagefunc
3798 self._pagesize = pagesize
3799 self._use_cache = use_cache
3800 if use_cache:
3801 self._cache = {}
3802
3803 def getslice(self, start=0, end=None):
3804 res = []
3805 for pagenum in itertools.count(start // self._pagesize):
3806 firstid = pagenum * self._pagesize
3807 nextfirstid = pagenum * self._pagesize + self._pagesize
3808 if start >= nextfirstid:
3809 continue
3810
3811 page_results = None
3812 if self._use_cache:
3813 page_results = self._cache.get(pagenum)
3814 if page_results is None:
3815 page_results = list(self._pagefunc(pagenum))
3816 if self._use_cache:
3817 self._cache[pagenum] = page_results
3818
3819 startv = (
3820 start % self._pagesize
3821 if firstid <= start < nextfirstid
3822 else 0)
3823
3824 endv = (
3825 ((end - 1) % self._pagesize) + 1
3826 if (end is not None and firstid <= end <= nextfirstid)
3827 else None)
3828
3829 if startv != 0 or endv is not None:
3830 page_results = page_results[startv:endv]
3831 res.extend(page_results)
3832
3833 # A little optimization - if current page is not "full", ie. does
3834 # not contain page_size videos then we can assume that this page
3835 # is the last one - there are no more ids on further pages -
3836 # i.e. no need to query again.
3837 if len(page_results) + startv < self._pagesize:
3838 break
3839
3840 # If we got the whole page, but the next page is not interesting,
3841 # break out early as well
3842 if end == nextfirstid:
3843 break
3844 return res
3845
3846
3847 class InAdvancePagedList(PagedList):
3848 def __init__(self, pagefunc, pagecount, pagesize):
3849 self._pagefunc = pagefunc
3850 self._pagecount = pagecount
3851 self._pagesize = pagesize
3852
3853 def getslice(self, start=0, end=None):
3854 res = []
3855 start_page = start // self._pagesize
3856 end_page = (
3857 self._pagecount if end is None else (end // self._pagesize + 1))
3858 skip_elems = start - start_page * self._pagesize
3859 only_more = None if end is None else end - start
3860 for pagenum in range(start_page, end_page):
3861 page = list(self._pagefunc(pagenum))
3862 if skip_elems:
3863 page = page[skip_elems:]
3864 skip_elems = None
3865 if only_more is not None:
3866 if len(page) < only_more:
3867 only_more -= len(page)
3868 else:
3869 page = page[:only_more]
3870 res.extend(page)
3871 break
3872 res.extend(page)
3873 return res
3874
3875
3876 def uppercase_escape(s):
3877 unicode_escape = codecs.getdecoder('unicode_escape')
3878 return re.sub(
3879 r'\\U[0-9a-fA-F]{8}',
3880 lambda m: unicode_escape(m.group(0))[0],
3881 s)
3882
3883
3884 def lowercase_escape(s):
3885 unicode_escape = codecs.getdecoder('unicode_escape')
3886 return re.sub(
3887 r'\\u[0-9a-fA-F]{4}',
3888 lambda m: unicode_escape(m.group(0))[0],
3889 s)
3890
3891
3892 def escape_rfc3986(s):
3893 """Escape non-ASCII characters as suggested by RFC 3986"""
3894 if sys.version_info < (3, 0) and isinstance(s, compat_str):
3895 s = s.encode('utf-8')
3896 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3897
3898
3899 def escape_url(url):
3900 """Escape URL as suggested by RFC 3986"""
3901 url_parsed = compat_urllib_parse_urlparse(url)
3902 return url_parsed._replace(
3903 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3904 path=escape_rfc3986(url_parsed.path),
3905 params=escape_rfc3986(url_parsed.params),
3906 query=escape_rfc3986(url_parsed.query),
3907 fragment=escape_rfc3986(url_parsed.fragment)
3908 ).geturl()
3909
3910
3911 def read_batch_urls(batch_fd):
3912 def fixup(url):
3913 if not isinstance(url, compat_str):
3914 url = url.decode('utf-8', 'replace')
3915 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
3916 for bom in BOM_UTF8:
3917 if url.startswith(bom):
3918 url = url[len(bom):]
3919 url = url.lstrip()
3920 if not url or url.startswith(('#', ';', ']')):
3921 return False
3922 # "#" cannot be stripped out since it is part of the URI
3923 # However, it can be safely stipped out if follwing a whitespace
3924 return re.split(r'\s#', url, 1)[0].rstrip()
3925
3926 with contextlib.closing(batch_fd) as fd:
3927 return [url for url in map(fixup, fd) if url]
3928
3929
3930 def urlencode_postdata(*args, **kargs):
3931 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
3932
3933
3934 def update_url_query(url, query):
3935 if not query:
3936 return url
3937 parsed_url = compat_urlparse.urlparse(url)
3938 qs = compat_parse_qs(parsed_url.query)
3939 qs.update(query)
3940 return compat_urlparse.urlunparse(parsed_url._replace(
3941 query=compat_urllib_parse_urlencode(qs, True)))
3942
3943
3944 def update_Request(req, url=None, data=None, headers={}, query={}):
3945 req_headers = req.headers.copy()
3946 req_headers.update(headers)
3947 req_data = data or req.data
3948 req_url = update_url_query(url or req.get_full_url(), query)
3949 req_get_method = req.get_method()
3950 if req_get_method == 'HEAD':
3951 req_type = HEADRequest
3952 elif req_get_method == 'PUT':
3953 req_type = PUTRequest
3954 else:
3955 req_type = compat_urllib_request.Request
3956 new_req = req_type(
3957 req_url, data=req_data, headers=req_headers,
3958 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3959 if hasattr(req, 'timeout'):
3960 new_req.timeout = req.timeout
3961 return new_req
3962
3963
3964 def _multipart_encode_impl(data, boundary):
3965 content_type = 'multipart/form-data; boundary=%s' % boundary
3966
3967 out = b''
3968 for k, v in data.items():
3969 out += b'--' + boundary.encode('ascii') + b'\r\n'
3970 if isinstance(k, compat_str):
3971 k = k.encode('utf-8')
3972 if isinstance(v, compat_str):
3973 v = v.encode('utf-8')
3974 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3975 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3976 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3977 if boundary.encode('ascii') in content:
3978 raise ValueError('Boundary overlaps with data')
3979 out += content
3980
3981 out += b'--' + boundary.encode('ascii') + b'--\r\n'
3982
3983 return out, content_type
3984
3985
3986 def multipart_encode(data, boundary=None):
3987 '''
3988 Encode a dict to RFC 7578-compliant form-data
3989
3990 data:
3991 A dict where keys and values can be either Unicode or bytes-like
3992 objects.
3993 boundary:
3994 If specified a Unicode object, it's used as the boundary. Otherwise
3995 a random boundary is generated.
3996
3997 Reference: https://tools.ietf.org/html/rfc7578
3998 '''
3999 has_specified_boundary = boundary is not None
4000
4001 while True:
4002 if boundary is None:
4003 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4004
4005 try:
4006 out, content_type = _multipart_encode_impl(data, boundary)
4007 break
4008 except ValueError:
4009 if has_specified_boundary:
4010 raise
4011 boundary = None
4012
4013 return out, content_type
4014
4015
4016 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4017 if isinstance(key_or_keys, (list, tuple)):
4018 for key in key_or_keys:
4019 if key not in d or d[key] is None or skip_false_values and not d[key]:
4020 continue
4021 return d[key]
4022 return default
4023 return d.get(key_or_keys, default)
4024
4025
4026 def try_get(src, getter, expected_type=None):
4027 if not isinstance(getter, (list, tuple)):
4028 getter = [getter]
4029 for get in getter:
4030 try:
4031 v = get(src)
4032 except (AttributeError, KeyError, TypeError, IndexError):
4033 pass
4034 else:
4035 if expected_type is None or isinstance(v, expected_type):
4036 return v
4037
4038
4039 def merge_dicts(*dicts):
4040 merged = {}
4041 for a_dict in dicts:
4042 for k, v in a_dict.items():
4043 if v is None:
4044 continue
4045 if (k not in merged
4046 or (isinstance(v, compat_str) and v
4047 and isinstance(merged[k], compat_str)
4048 and not merged[k])):
4049 merged[k] = v
4050 return merged
4051
4052
4053 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4054 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4055
4056
4057 US_RATINGS = {
4058 'G': 0,
4059 'PG': 10,
4060 'PG-13': 13,
4061 'R': 16,
4062 'NC': 18,
4063 }
4064
4065
4066 TV_PARENTAL_GUIDELINES = {
4067 'TV-Y': 0,
4068 'TV-Y7': 7,
4069 'TV-G': 0,
4070 'TV-PG': 0,
4071 'TV-14': 14,
4072 'TV-MA': 17,
4073 }
4074
4075
4076 def parse_age_limit(s):
4077 if type(s) == int:
4078 return s if 0 <= s <= 21 else None
4079 if not isinstance(s, compat_basestring):
4080 return None
4081 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4082 if m:
4083 return int(m.group('age'))
4084 if s in US_RATINGS:
4085 return US_RATINGS[s]
4086 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4087 if m:
4088 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4089 return None
4090
4091
4092 def strip_jsonp(code):
4093 return re.sub(
4094 r'''(?sx)^
4095 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4096 (?:\s*&&\s*(?P=func_name))?
4097 \s*\(\s*(?P<callback_data>.*)\);?
4098 \s*?(?://[^\n]*)*$''',
4099 r'\g<callback_data>', code)
4100
4101
4102 def js_to_json(code):
4103 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4104 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4105 INTEGER_TABLE = (
4106 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4107 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4108 )
4109
4110 def fix_kv(m):
4111 v = m.group(0)
4112 if v in ('true', 'false', 'null'):
4113 return v
4114 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4115 return ""
4116
4117 if v[0] in ("'", '"'):
4118 v = re.sub(r'(?s)\\.|"', lambda m: {
4119 '"': '\\"',
4120 "\\'": "'",
4121 '\\\n': '',
4122 '\\x': '\\u00',
4123 }.get(m.group(0), m.group(0)), v[1:-1])
4124 else:
4125 for regex, base in INTEGER_TABLE:
4126 im = re.match(regex, v)
4127 if im:
4128 i = int(im.group(1), base)
4129 return '"%d":' % i if v.endswith(':') else '%d' % i
4130
4131 return '"%s"' % v
4132
4133 return re.sub(r'''(?sx)
4134 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4135 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4136 {comment}|,(?={skip}[\]}}])|
4137 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4138 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4139 [0-9]+(?={skip}:)|
4140 !+
4141 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4142
4143
4144 def qualities(quality_ids):
4145 """ Get a numeric quality value out of a list of possible values """
4146 def q(qid):
4147 try:
4148 return quality_ids.index(qid)
4149 except ValueError:
4150 return -1
4151 return q
4152
4153
4154 DEFAULT_OUTTMPL = '%(title)s [%(id)s].%(ext)s'
4155
4156
4157 def limit_length(s, length):
4158 """ Add ellipses to overly long strings """
4159 if s is None:
4160 return None
4161 ELLIPSES = '...'
4162 if len(s) > length:
4163 return s[:length - len(ELLIPSES)] + ELLIPSES
4164 return s
4165
4166
4167 def version_tuple(v):
4168 return tuple(int(e) for e in re.split(r'[-.]', v))
4169
4170
4171 def is_outdated_version(version, limit, assume_new=True):
4172 if not version:
4173 return not assume_new
4174 try:
4175 return version_tuple(version) < version_tuple(limit)
4176 except ValueError:
4177 return not assume_new
4178
4179
4180 def ytdl_is_updateable():
4181 """ Returns if youtube-dlc can be updated with -U """
4182 return False
4183
4184 from zipimport import zipimporter
4185
4186 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4187
4188
4189 def args_to_str(args):
4190 # Get a short string representation for a subprocess command
4191 return ' '.join(compat_shlex_quote(a) for a in args)
4192
4193
4194 def error_to_compat_str(err):
4195 err_str = str(err)
4196 # On python 2 error byte string must be decoded with proper
4197 # encoding rather than ascii
4198 if sys.version_info[0] < 3:
4199 err_str = err_str.decode(preferredencoding())
4200 return err_str
4201
4202
4203 def mimetype2ext(mt):
4204 if mt is None:
4205 return None
4206
4207 ext = {
4208 'audio/mp4': 'm4a',
4209 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4210 # it's the most popular one
4211 'audio/mpeg': 'mp3',
4212 'audio/x-wav': 'wav',
4213 }.get(mt)
4214 if ext is not None:
4215 return ext
4216
4217 _, _, res = mt.rpartition('/')
4218 res = res.split(';')[0].strip().lower()
4219
4220 return {
4221 '3gpp': '3gp',
4222 'smptett+xml': 'tt',
4223 'ttaf+xml': 'dfxp',
4224 'ttml+xml': 'ttml',
4225 'x-flv': 'flv',
4226 'x-mp4-fragmented': 'mp4',
4227 'x-ms-sami': 'sami',
4228 'x-ms-wmv': 'wmv',
4229 'mpegurl': 'm3u8',
4230 'x-mpegurl': 'm3u8',
4231 'vnd.apple.mpegurl': 'm3u8',
4232 'dash+xml': 'mpd',
4233 'f4m+xml': 'f4m',
4234 'hds+xml': 'f4m',
4235 'vnd.ms-sstr+xml': 'ism',
4236 'quicktime': 'mov',
4237 'mp2t': 'ts',
4238 'x-wav': 'wav',
4239 }.get(res, res)
4240
4241
4242 def parse_codecs(codecs_str):
4243 # http://tools.ietf.org/html/rfc6381
4244 if not codecs_str:
4245 return {}
4246 split_codecs = list(filter(None, map(
4247 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4248 vcodec, acodec = None, None
4249 for full_codec in split_codecs:
4250 codec = full_codec.split('.')[0]
4251 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4252 if not vcodec:
4253 vcodec = full_codec
4254 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4255 if not acodec:
4256 acodec = full_codec
4257 else:
4258 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4259 if not vcodec and not acodec:
4260 if len(split_codecs) == 2:
4261 return {
4262 'vcodec': split_codecs[0],
4263 'acodec': split_codecs[1],
4264 }
4265 else:
4266 return {
4267 'vcodec': vcodec or 'none',
4268 'acodec': acodec or 'none',
4269 }
4270 return {}
4271
4272
4273 def urlhandle_detect_ext(url_handle):
4274 getheader = url_handle.headers.get
4275
4276 cd = getheader('Content-Disposition')
4277 if cd:
4278 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4279 if m:
4280 e = determine_ext(m.group('filename'), default_ext=None)
4281 if e:
4282 return e
4283
4284 return mimetype2ext(getheader('Content-Type'))
4285
4286
4287 def encode_data_uri(data, mime_type):
4288 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4289
4290
4291 def age_restricted(content_limit, age_limit):
4292 """ Returns True iff the content should be blocked """
4293
4294 if age_limit is None: # No limit set
4295 return False
4296 if content_limit is None:
4297 return False # Content available for everyone
4298 return age_limit < content_limit
4299
4300
4301 def is_html(first_bytes):
4302 """ Detect whether a file contains HTML by examining its first bytes. """
4303
4304 BOMS = [
4305 (b'\xef\xbb\xbf', 'utf-8'),
4306 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4307 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4308 (b'\xff\xfe', 'utf-16-le'),
4309 (b'\xfe\xff', 'utf-16-be'),
4310 ]
4311 for bom, enc in BOMS:
4312 if first_bytes.startswith(bom):
4313 s = first_bytes[len(bom):].decode(enc, 'replace')
4314 break
4315 else:
4316 s = first_bytes.decode('utf-8', 'replace')
4317
4318 return re.match(r'^\s*<', s)
4319
4320
4321 def determine_protocol(info_dict):
4322 protocol = info_dict.get('protocol')
4323 if protocol is not None:
4324 return protocol
4325
4326 url = info_dict['url']
4327 if url.startswith('rtmp'):
4328 return 'rtmp'
4329 elif url.startswith('mms'):
4330 return 'mms'
4331 elif url.startswith('rtsp'):
4332 return 'rtsp'
4333
4334 ext = determine_ext(url)
4335 if ext == 'm3u8':
4336 return 'm3u8'
4337 elif ext == 'f4m':
4338 return 'f4m'
4339
4340 return compat_urllib_parse_urlparse(url).scheme
4341
4342
4343 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4344 """ Render a list of rows, each as a list of values """
4345
4346 def get_max_lens(table):
4347 return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4348
4349 def filter_using_list(row, filterArray):
4350 return [col for (take, col) in zip(filterArray, row) if take]
4351
4352 if hideEmpty:
4353 max_lens = get_max_lens(data)
4354 header_row = filter_using_list(header_row, max_lens)
4355 data = [filter_using_list(row, max_lens) for row in data]
4356
4357 table = [header_row] + data
4358 max_lens = get_max_lens(table)
4359 if delim:
4360 table = [header_row] + [['-' * ml for ml in max_lens]] + data
4361 format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
4362 return '\n'.join(format_str % tuple(row) for row in table)
4363
4364
4365 def _match_one(filter_part, dct):
4366 COMPARISON_OPERATORS = {
4367 '<': operator.lt,
4368 '<=': operator.le,
4369 '>': operator.gt,
4370 '>=': operator.ge,
4371 '=': operator.eq,
4372 '!=': operator.ne,
4373 }
4374 operator_rex = re.compile(r'''(?x)\s*
4375 (?P<key>[a-z_]+)
4376 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4377 (?:
4378 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4379 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4380 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4381 )
4382 \s*$
4383 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4384 m = operator_rex.search(filter_part)
4385 if m:
4386 op = COMPARISON_OPERATORS[m.group('op')]
4387 actual_value = dct.get(m.group('key'))
4388 if (m.group('quotedstrval') is not None
4389 or m.group('strval') is not None
4390 # If the original field is a string and matching comparisonvalue is
4391 # a number we should respect the origin of the original field
4392 # and process comparison value as a string (see
4393 # https://github.com/ytdl-org/youtube-dl/issues/11082).
4394 or actual_value is not None and m.group('intval') is not None
4395 and isinstance(actual_value, compat_str)):
4396 if m.group('op') not in ('=', '!='):
4397 raise ValueError(
4398 'Operator %s does not support string values!' % m.group('op'))
4399 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4400 quote = m.group('quote')
4401 if quote is not None:
4402 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4403 else:
4404 try:
4405 comparison_value = int(m.group('intval'))
4406 except ValueError:
4407 comparison_value = parse_filesize(m.group('intval'))
4408 if comparison_value is None:
4409 comparison_value = parse_filesize(m.group('intval') + 'B')
4410 if comparison_value is None:
4411 raise ValueError(
4412 'Invalid integer value %r in filter part %r' % (
4413 m.group('intval'), filter_part))
4414 if actual_value is None:
4415 return m.group('none_inclusive')
4416 return op(actual_value, comparison_value)
4417
4418 UNARY_OPERATORS = {
4419 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4420 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4421 }
4422 operator_rex = re.compile(r'''(?x)\s*
4423 (?P<op>%s)\s*(?P<key>[a-z_]+)
4424 \s*$
4425 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4426 m = operator_rex.search(filter_part)
4427 if m:
4428 op = UNARY_OPERATORS[m.group('op')]
4429 actual_value = dct.get(m.group('key'))
4430 return op(actual_value)
4431
4432 raise ValueError('Invalid filter part %r' % filter_part)
4433
4434
4435 def match_str(filter_str, dct):
4436 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4437
4438 return all(
4439 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4440
4441
4442 def match_filter_func(filter_str):
4443 def _match_func(info_dict):
4444 if match_str(filter_str, info_dict):
4445 return None
4446 else:
4447 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4448 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4449 return _match_func
4450
4451
4452 def parse_dfxp_time_expr(time_expr):
4453 if not time_expr:
4454 return
4455
4456 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4457 if mobj:
4458 return float(mobj.group('time_offset'))
4459
4460 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4461 if mobj:
4462 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4463
4464
4465 def srt_subtitles_timecode(seconds):
4466 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4467
4468
4469 def dfxp2srt(dfxp_data):
4470 '''
4471 @param dfxp_data A bytes-like object containing DFXP data
4472 @returns A unicode object containing converted SRT data
4473 '''
4474 LEGACY_NAMESPACES = (
4475 (b'http://www.w3.org/ns/ttml', [
4476 b'http://www.w3.org/2004/11/ttaf1',
4477 b'http://www.w3.org/2006/04/ttaf1',
4478 b'http://www.w3.org/2006/10/ttaf1',
4479 ]),
4480 (b'http://www.w3.org/ns/ttml#styling', [
4481 b'http://www.w3.org/ns/ttml#style',
4482 ]),
4483 )
4484
4485 SUPPORTED_STYLING = [
4486 'color',
4487 'fontFamily',
4488 'fontSize',
4489 'fontStyle',
4490 'fontWeight',
4491 'textDecoration'
4492 ]
4493
4494 _x = functools.partial(xpath_with_ns, ns_map={
4495 'xml': 'http://www.w3.org/XML/1998/namespace',
4496 'ttml': 'http://www.w3.org/ns/ttml',
4497 'tts': 'http://www.w3.org/ns/ttml#styling',
4498 })
4499
4500 styles = {}
4501 default_style = {}
4502
4503 class TTMLPElementParser(object):
4504 _out = ''
4505 _unclosed_elements = []
4506 _applied_styles = []
4507
4508 def start(self, tag, attrib):
4509 if tag in (_x('ttml:br'), 'br'):
4510 self._out += '\n'
4511 else:
4512 unclosed_elements = []
4513 style = {}
4514 element_style_id = attrib.get('style')
4515 if default_style:
4516 style.update(default_style)
4517 if element_style_id:
4518 style.update(styles.get(element_style_id, {}))
4519 for prop in SUPPORTED_STYLING:
4520 prop_val = attrib.get(_x('tts:' + prop))
4521 if prop_val:
4522 style[prop] = prop_val
4523 if style:
4524 font = ''
4525 for k, v in sorted(style.items()):
4526 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4527 continue
4528 if k == 'color':
4529 font += ' color="%s"' % v
4530 elif k == 'fontSize':
4531 font += ' size="%s"' % v
4532 elif k == 'fontFamily':
4533 font += ' face="%s"' % v
4534 elif k == 'fontWeight' and v == 'bold':
4535 self._out += '<b>'
4536 unclosed_elements.append('b')
4537 elif k == 'fontStyle' and v == 'italic':
4538 self._out += '<i>'
4539 unclosed_elements.append('i')
4540 elif k == 'textDecoration' and v == 'underline':
4541 self._out += '<u>'
4542 unclosed_elements.append('u')
4543 if font:
4544 self._out += '<font' + font + '>'
4545 unclosed_elements.append('font')
4546 applied_style = {}
4547 if self._applied_styles:
4548 applied_style.update(self._applied_styles[-1])
4549 applied_style.update(style)
4550 self._applied_styles.append(applied_style)
4551 self._unclosed_elements.append(unclosed_elements)
4552
4553 def end(self, tag):
4554 if tag not in (_x('ttml:br'), 'br'):
4555 unclosed_elements = self._unclosed_elements.pop()
4556 for element in reversed(unclosed_elements):
4557 self._out += '</%s>' % element
4558 if unclosed_elements and self._applied_styles:
4559 self._applied_styles.pop()
4560
4561 def data(self, data):
4562 self._out += data
4563
4564 def close(self):
4565 return self._out.strip()
4566
4567 def parse_node(node):
4568 target = TTMLPElementParser()
4569 parser = xml.etree.ElementTree.XMLParser(target=target)
4570 parser.feed(xml.etree.ElementTree.tostring(node))
4571 return parser.close()
4572
4573 for k, v in LEGACY_NAMESPACES:
4574 for ns in v:
4575 dfxp_data = dfxp_data.replace(ns, k)
4576
4577 dfxp = compat_etree_fromstring(dfxp_data)
4578 out = []
4579 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4580
4581 if not paras:
4582 raise ValueError('Invalid dfxp/TTML subtitle')
4583
4584 repeat = False
4585 while True:
4586 for style in dfxp.findall(_x('.//ttml:style')):
4587 style_id = style.get('id') or style.get(_x('xml:id'))
4588 if not style_id:
4589 continue
4590 parent_style_id = style.get('style')
4591 if parent_style_id:
4592 if parent_style_id not in styles:
4593 repeat = True
4594 continue
4595 styles[style_id] = styles[parent_style_id].copy()
4596 for prop in SUPPORTED_STYLING:
4597 prop_val = style.get(_x('tts:' + prop))
4598 if prop_val:
4599 styles.setdefault(style_id, {})[prop] = prop_val
4600 if repeat:
4601 repeat = False
4602 else:
4603 break
4604
4605 for p in ('body', 'div'):
4606 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4607 if ele is None:
4608 continue
4609 style = styles.get(ele.get('style'))
4610 if not style:
4611 continue
4612 default_style.update(style)
4613
4614 for para, index in zip(paras, itertools.count(1)):
4615 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4616 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4617 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4618 if begin_time is None:
4619 continue
4620 if not end_time:
4621 if not dur:
4622 continue
4623 end_time = begin_time + dur
4624 out.append('%d\n%s --> %s\n%s\n\n' % (
4625 index,
4626 srt_subtitles_timecode(begin_time),
4627 srt_subtitles_timecode(end_time),
4628 parse_node(para)))
4629
4630 return ''.join(out)
4631
4632
4633 def cli_option(params, command_option, param):
4634 param = params.get(param)
4635 if param:
4636 param = compat_str(param)
4637 return [command_option, param] if param is not None else []
4638
4639
4640 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4641 param = params.get(param)
4642 if param is None:
4643 return []
4644 assert isinstance(param, bool)
4645 if separator:
4646 return [command_option + separator + (true_value if param else false_value)]
4647 return [command_option, true_value if param else false_value]
4648
4649
4650 def cli_valueless_option(params, command_option, param, expected_value=True):
4651 param = params.get(param)
4652 return [command_option] if param == expected_value else []
4653
4654
4655 def cli_configuration_args(params, param, default=[]):
4656 ex_args = params.get(param)
4657 if ex_args is None:
4658 return default
4659 assert isinstance(ex_args, list)
4660 return ex_args
4661
4662
4663 class ISO639Utils(object):
4664 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4665 _lang_map = {
4666 'aa': 'aar',
4667 'ab': 'abk',
4668 'ae': 'ave',
4669 'af': 'afr',
4670 'ak': 'aka',
4671 'am': 'amh',
4672 'an': 'arg',
4673 'ar': 'ara',
4674 'as': 'asm',
4675 'av': 'ava',
4676 'ay': 'aym',
4677 'az': 'aze',
4678 'ba': 'bak',
4679 'be': 'bel',
4680 'bg': 'bul',
4681 'bh': 'bih',
4682 'bi': 'bis',
4683 'bm': 'bam',
4684 'bn': 'ben',
4685 'bo': 'bod',
4686 'br': 'bre',
4687 'bs': 'bos',
4688 'ca': 'cat',
4689 'ce': 'che',
4690 'ch': 'cha',
4691 'co': 'cos',
4692 'cr': 'cre',
4693 'cs': 'ces',
4694 'cu': 'chu',
4695 'cv': 'chv',
4696 'cy': 'cym',
4697 'da': 'dan',
4698 'de': 'deu',
4699 'dv': 'div',
4700 'dz': 'dzo',
4701 'ee': 'ewe',
4702 'el': 'ell',
4703 'en': 'eng',
4704 'eo': 'epo',
4705 'es': 'spa',
4706 'et': 'est',
4707 'eu': 'eus',
4708 'fa': 'fas',
4709 'ff': 'ful',
4710 'fi': 'fin',
4711 'fj': 'fij',
4712 'fo': 'fao',
4713 'fr': 'fra',
4714 'fy': 'fry',
4715 'ga': 'gle',
4716 'gd': 'gla',
4717 'gl': 'glg',
4718 'gn': 'grn',
4719 'gu': 'guj',
4720 'gv': 'glv',
4721 'ha': 'hau',
4722 'he': 'heb',
4723 'iw': 'heb', # Replaced by he in 1989 revision
4724 'hi': 'hin',
4725 'ho': 'hmo',
4726 'hr': 'hrv',
4727 'ht': 'hat',
4728 'hu': 'hun',
4729 'hy': 'hye',
4730 'hz': 'her',
4731 'ia': 'ina',
4732 'id': 'ind',
4733 'in': 'ind', # Replaced by id in 1989 revision
4734 'ie': 'ile',
4735 'ig': 'ibo',
4736 'ii': 'iii',
4737 'ik': 'ipk',
4738 'io': 'ido',
4739 'is': 'isl',
4740 'it': 'ita',
4741 'iu': 'iku',
4742 'ja': 'jpn',
4743 'jv': 'jav',
4744 'ka': 'kat',
4745 'kg': 'kon',
4746 'ki': 'kik',
4747 'kj': 'kua',
4748 'kk': 'kaz',
4749 'kl': 'kal',
4750 'km': 'khm',
4751 'kn': 'kan',
4752 'ko': 'kor',
4753 'kr': 'kau',
4754 'ks': 'kas',
4755 'ku': 'kur',
4756 'kv': 'kom',
4757 'kw': 'cor',
4758 'ky': 'kir',
4759 'la': 'lat',
4760 'lb': 'ltz',
4761 'lg': 'lug',
4762 'li': 'lim',
4763 'ln': 'lin',
4764 'lo': 'lao',
4765 'lt': 'lit',
4766 'lu': 'lub',
4767 'lv': 'lav',
4768 'mg': 'mlg',
4769 'mh': 'mah',
4770 'mi': 'mri',
4771 'mk': 'mkd',
4772 'ml': 'mal',
4773 'mn': 'mon',
4774 'mr': 'mar',
4775 'ms': 'msa',
4776 'mt': 'mlt',
4777 'my': 'mya',
4778 'na': 'nau',
4779 'nb': 'nob',
4780 'nd': 'nde',
4781 'ne': 'nep',
4782 'ng': 'ndo',
4783 'nl': 'nld',
4784 'nn': 'nno',
4785 'no': 'nor',
4786 'nr': 'nbl',
4787 'nv': 'nav',
4788 'ny': 'nya',
4789 'oc': 'oci',
4790 'oj': 'oji',
4791 'om': 'orm',
4792 'or': 'ori',
4793 'os': 'oss',
4794 'pa': 'pan',
4795 'pi': 'pli',
4796 'pl': 'pol',
4797 'ps': 'pus',
4798 'pt': 'por',
4799 'qu': 'que',
4800 'rm': 'roh',
4801 'rn': 'run',
4802 'ro': 'ron',
4803 'ru': 'rus',
4804 'rw': 'kin',
4805 'sa': 'san',
4806 'sc': 'srd',
4807 'sd': 'snd',
4808 'se': 'sme',
4809 'sg': 'sag',
4810 'si': 'sin',
4811 'sk': 'slk',
4812 'sl': 'slv',
4813 'sm': 'smo',
4814 'sn': 'sna',
4815 'so': 'som',
4816 'sq': 'sqi',
4817 'sr': 'srp',
4818 'ss': 'ssw',
4819 'st': 'sot',
4820 'su': 'sun',
4821 'sv': 'swe',
4822 'sw': 'swa',
4823 'ta': 'tam',
4824 'te': 'tel',
4825 'tg': 'tgk',
4826 'th': 'tha',
4827 'ti': 'tir',
4828 'tk': 'tuk',
4829 'tl': 'tgl',
4830 'tn': 'tsn',
4831 'to': 'ton',
4832 'tr': 'tur',
4833 'ts': 'tso',
4834 'tt': 'tat',
4835 'tw': 'twi',
4836 'ty': 'tah',
4837 'ug': 'uig',
4838 'uk': 'ukr',
4839 'ur': 'urd',
4840 'uz': 'uzb',
4841 've': 'ven',
4842 'vi': 'vie',
4843 'vo': 'vol',
4844 'wa': 'wln',
4845 'wo': 'wol',
4846 'xh': 'xho',
4847 'yi': 'yid',
4848 'ji': 'yid', # Replaced by yi in 1989 revision
4849 'yo': 'yor',
4850 'za': 'zha',
4851 'zh': 'zho',
4852 'zu': 'zul',
4853 }
4854
4855 @classmethod
4856 def short2long(cls, code):
4857 """Convert language code from ISO 639-1 to ISO 639-2/T"""
4858 return cls._lang_map.get(code[:2])
4859
4860 @classmethod
4861 def long2short(cls, code):
4862 """Convert language code from ISO 639-2/T to ISO 639-1"""
4863 for short_name, long_name in cls._lang_map.items():
4864 if long_name == code:
4865 return short_name
4866
4867
4868 class ISO3166Utils(object):
4869 # From http://data.okfn.org/data/core/country-list
4870 _country_map = {
4871 'AF': 'Afghanistan',
4872 'AX': 'Åland Islands',
4873 'AL': 'Albania',
4874 'DZ': 'Algeria',
4875 'AS': 'American Samoa',
4876 'AD': 'Andorra',
4877 'AO': 'Angola',
4878 'AI': 'Anguilla',
4879 'AQ': 'Antarctica',
4880 'AG': 'Antigua and Barbuda',
4881 'AR': 'Argentina',
4882 'AM': 'Armenia',
4883 'AW': 'Aruba',
4884 'AU': 'Australia',
4885 'AT': 'Austria',
4886 'AZ': 'Azerbaijan',
4887 'BS': 'Bahamas',
4888 'BH': 'Bahrain',
4889 'BD': 'Bangladesh',
4890 'BB': 'Barbados',
4891 'BY': 'Belarus',
4892 'BE': 'Belgium',
4893 'BZ': 'Belize',
4894 'BJ': 'Benin',
4895 'BM': 'Bermuda',
4896 'BT': 'Bhutan',
4897 'BO': 'Bolivia, Plurinational State of',
4898 'BQ': 'Bonaire, Sint Eustatius and Saba',
4899 'BA': 'Bosnia and Herzegovina',
4900 'BW': 'Botswana',
4901 'BV': 'Bouvet Island',
4902 'BR': 'Brazil',
4903 'IO': 'British Indian Ocean Territory',
4904 'BN': 'Brunei Darussalam',
4905 'BG': 'Bulgaria',
4906 'BF': 'Burkina Faso',
4907 'BI': 'Burundi',
4908 'KH': 'Cambodia',
4909 'CM': 'Cameroon',
4910 'CA': 'Canada',
4911 'CV': 'Cape Verde',
4912 'KY': 'Cayman Islands',
4913 'CF': 'Central African Republic',
4914 'TD': 'Chad',
4915 'CL': 'Chile',
4916 'CN': 'China',
4917 'CX': 'Christmas Island',
4918 'CC': 'Cocos (Keeling) Islands',
4919 'CO': 'Colombia',
4920 'KM': 'Comoros',
4921 'CG': 'Congo',
4922 'CD': 'Congo, the Democratic Republic of the',
4923 'CK': 'Cook Islands',
4924 'CR': 'Costa Rica',
4925 'CI': 'Côte d\'Ivoire',
4926 'HR': 'Croatia',
4927 'CU': 'Cuba',
4928 'CW': 'Curaçao',
4929 'CY': 'Cyprus',
4930 'CZ': 'Czech Republic',
4931 'DK': 'Denmark',
4932 'DJ': 'Djibouti',
4933 'DM': 'Dominica',
4934 'DO': 'Dominican Republic',
4935 'EC': 'Ecuador',
4936 'EG': 'Egypt',
4937 'SV': 'El Salvador',
4938 'GQ': 'Equatorial Guinea',
4939 'ER': 'Eritrea',
4940 'EE': 'Estonia',
4941 'ET': 'Ethiopia',
4942 'FK': 'Falkland Islands (Malvinas)',
4943 'FO': 'Faroe Islands',
4944 'FJ': 'Fiji',
4945 'FI': 'Finland',
4946 'FR': 'France',
4947 'GF': 'French Guiana',
4948 'PF': 'French Polynesia',
4949 'TF': 'French Southern Territories',
4950 'GA': 'Gabon',
4951 'GM': 'Gambia',
4952 'GE': 'Georgia',
4953 'DE': 'Germany',
4954 'GH': 'Ghana',
4955 'GI': 'Gibraltar',
4956 'GR': 'Greece',
4957 'GL': 'Greenland',
4958 'GD': 'Grenada',
4959 'GP': 'Guadeloupe',
4960 'GU': 'Guam',
4961 'GT': 'Guatemala',
4962 'GG': 'Guernsey',
4963 'GN': 'Guinea',
4964 'GW': 'Guinea-Bissau',
4965 'GY': 'Guyana',
4966 'HT': 'Haiti',
4967 'HM': 'Heard Island and McDonald Islands',
4968 'VA': 'Holy See (Vatican City State)',
4969 'HN': 'Honduras',
4970 'HK': 'Hong Kong',
4971 'HU': 'Hungary',
4972 'IS': 'Iceland',
4973 'IN': 'India',
4974 'ID': 'Indonesia',
4975 'IR': 'Iran, Islamic Republic of',
4976 'IQ': 'Iraq',
4977 'IE': 'Ireland',
4978 'IM': 'Isle of Man',
4979 'IL': 'Israel',
4980 'IT': 'Italy',
4981 'JM': 'Jamaica',
4982 'JP': 'Japan',
4983 'JE': 'Jersey',
4984 'JO': 'Jordan',
4985 'KZ': 'Kazakhstan',
4986 'KE': 'Kenya',
4987 'KI': 'Kiribati',
4988 'KP': 'Korea, Democratic People\'s Republic of',
4989 'KR': 'Korea, Republic of',
4990 'KW': 'Kuwait',
4991 'KG': 'Kyrgyzstan',
4992 'LA': 'Lao People\'s Democratic Republic',
4993 'LV': 'Latvia',
4994 'LB': 'Lebanon',
4995 'LS': 'Lesotho',
4996 'LR': 'Liberia',
4997 'LY': 'Libya',
4998 'LI': 'Liechtenstein',
4999 'LT': 'Lithuania',
5000 'LU': 'Luxembourg',
5001 'MO': 'Macao',
5002 'MK': 'Macedonia, the Former Yugoslav Republic of',
5003 'MG': 'Madagascar',
5004 'MW': 'Malawi',
5005 'MY': 'Malaysia',
5006 'MV': 'Maldives',
5007 'ML': 'Mali',
5008 'MT': 'Malta',
5009 'MH': 'Marshall Islands',
5010 'MQ': 'Martinique',
5011 'MR': 'Mauritania',
5012 'MU': 'Mauritius',
5013 'YT': 'Mayotte',
5014 'MX': 'Mexico',
5015 'FM': 'Micronesia, Federated States of',
5016 'MD': 'Moldova, Republic of',
5017 'MC': 'Monaco',
5018 'MN': 'Mongolia',
5019 'ME': 'Montenegro',
5020 'MS': 'Montserrat',
5021 'MA': 'Morocco',
5022 'MZ': 'Mozambique',
5023 'MM': 'Myanmar',
5024 'NA': 'Namibia',
5025 'NR': 'Nauru',
5026 'NP': 'Nepal',
5027 'NL': 'Netherlands',
5028 'NC': 'New Caledonia',
5029 'NZ': 'New Zealand',
5030 'NI': 'Nicaragua',
5031 'NE': 'Niger',
5032 'NG': 'Nigeria',
5033 'NU': 'Niue',
5034 'NF': 'Norfolk Island',
5035 'MP': 'Northern Mariana Islands',
5036 'NO': 'Norway',
5037 'OM': 'Oman',
5038 'PK': 'Pakistan',
5039 'PW': 'Palau',
5040 'PS': 'Palestine, State of',
5041 'PA': 'Panama',
5042 'PG': 'Papua New Guinea',
5043 'PY': 'Paraguay',
5044 'PE': 'Peru',
5045 'PH': 'Philippines',
5046 'PN': 'Pitcairn',
5047 'PL': 'Poland',
5048 'PT': 'Portugal',
5049 'PR': 'Puerto Rico',
5050 'QA': 'Qatar',
5051 'RE': 'Réunion',
5052 'RO': 'Romania',
5053 'RU': 'Russian Federation',
5054 'RW': 'Rwanda',
5055 'BL': 'Saint Barthélemy',
5056 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5057 'KN': 'Saint Kitts and Nevis',
5058 'LC': 'Saint Lucia',
5059 'MF': 'Saint Martin (French part)',
5060 'PM': 'Saint Pierre and Miquelon',
5061 'VC': 'Saint Vincent and the Grenadines',
5062 'WS': 'Samoa',
5063 'SM': 'San Marino',
5064 'ST': 'Sao Tome and Principe',
5065 'SA': 'Saudi Arabia',
5066 'SN': 'Senegal',
5067 'RS': 'Serbia',
5068 'SC': 'Seychelles',
5069 'SL': 'Sierra Leone',
5070 'SG': 'Singapore',
5071 'SX': 'Sint Maarten (Dutch part)',
5072 'SK': 'Slovakia',
5073 'SI': 'Slovenia',
5074 'SB': 'Solomon Islands',
5075 'SO': 'Somalia',
5076 'ZA': 'South Africa',
5077 'GS': 'South Georgia and the South Sandwich Islands',
5078 'SS': 'South Sudan',
5079 'ES': 'Spain',
5080 'LK': 'Sri Lanka',
5081 'SD': 'Sudan',
5082 'SR': 'Suriname',
5083 'SJ': 'Svalbard and Jan Mayen',
5084 'SZ': 'Swaziland',
5085 'SE': 'Sweden',
5086 'CH': 'Switzerland',
5087 'SY': 'Syrian Arab Republic',
5088 'TW': 'Taiwan, Province of China',
5089 'TJ': 'Tajikistan',
5090 'TZ': 'Tanzania, United Republic of',
5091 'TH': 'Thailand',
5092 'TL': 'Timor-Leste',
5093 'TG': 'Togo',
5094 'TK': 'Tokelau',
5095 'TO': 'Tonga',
5096 'TT': 'Trinidad and Tobago',
5097 'TN': 'Tunisia',
5098 'TR': 'Turkey',
5099 'TM': 'Turkmenistan',
5100 'TC': 'Turks and Caicos Islands',
5101 'TV': 'Tuvalu',
5102 'UG': 'Uganda',
5103 'UA': 'Ukraine',
5104 'AE': 'United Arab Emirates',
5105 'GB': 'United Kingdom',
5106 'US': 'United States',
5107 'UM': 'United States Minor Outlying Islands',
5108 'UY': 'Uruguay',
5109 'UZ': 'Uzbekistan',
5110 'VU': 'Vanuatu',
5111 'VE': 'Venezuela, Bolivarian Republic of',
5112 'VN': 'Viet Nam',
5113 'VG': 'Virgin Islands, British',
5114 'VI': 'Virgin Islands, U.S.',
5115 'WF': 'Wallis and Futuna',
5116 'EH': 'Western Sahara',
5117 'YE': 'Yemen',
5118 'ZM': 'Zambia',
5119 'ZW': 'Zimbabwe',
5120 }
5121
5122 @classmethod
5123 def short2full(cls, code):
5124 """Convert an ISO 3166-2 country code to the corresponding full name"""
5125 return cls._country_map.get(code.upper())
5126
5127
5128 class GeoUtils(object):
5129 # Major IPv4 address blocks per country
5130 _country_ip_map = {
5131 'AD': '46.172.224.0/19',
5132 'AE': '94.200.0.0/13',
5133 'AF': '149.54.0.0/17',
5134 'AG': '209.59.64.0/18',
5135 'AI': '204.14.248.0/21',
5136 'AL': '46.99.0.0/16',
5137 'AM': '46.70.0.0/15',
5138 'AO': '105.168.0.0/13',
5139 'AP': '182.50.184.0/21',
5140 'AQ': '23.154.160.0/24',
5141 'AR': '181.0.0.0/12',
5142 'AS': '202.70.112.0/20',
5143 'AT': '77.116.0.0/14',
5144 'AU': '1.128.0.0/11',
5145 'AW': '181.41.0.0/18',
5146 'AX': '185.217.4.0/22',
5147 'AZ': '5.197.0.0/16',
5148 'BA': '31.176.128.0/17',
5149 'BB': '65.48.128.0/17',
5150 'BD': '114.130.0.0/16',
5151 'BE': '57.0.0.0/8',
5152 'BF': '102.178.0.0/15',
5153 'BG': '95.42.0.0/15',
5154 'BH': '37.131.0.0/17',
5155 'BI': '154.117.192.0/18',
5156 'BJ': '137.255.0.0/16',
5157 'BL': '185.212.72.0/23',
5158 'BM': '196.12.64.0/18',
5159 'BN': '156.31.0.0/16',
5160 'BO': '161.56.0.0/16',
5161 'BQ': '161.0.80.0/20',
5162 'BR': '191.128.0.0/12',
5163 'BS': '24.51.64.0/18',
5164 'BT': '119.2.96.0/19',
5165 'BW': '168.167.0.0/16',
5166 'BY': '178.120.0.0/13',
5167 'BZ': '179.42.192.0/18',
5168 'CA': '99.224.0.0/11',
5169 'CD': '41.243.0.0/16',
5170 'CF': '197.242.176.0/21',
5171 'CG': '160.113.0.0/16',
5172 'CH': '85.0.0.0/13',
5173 'CI': '102.136.0.0/14',
5174 'CK': '202.65.32.0/19',
5175 'CL': '152.172.0.0/14',
5176 'CM': '102.244.0.0/14',
5177 'CN': '36.128.0.0/10',
5178 'CO': '181.240.0.0/12',
5179 'CR': '201.192.0.0/12',
5180 'CU': '152.206.0.0/15',
5181 'CV': '165.90.96.0/19',
5182 'CW': '190.88.128.0/17',
5183 'CY': '31.153.0.0/16',
5184 'CZ': '88.100.0.0/14',
5185 'DE': '53.0.0.0/8',
5186 'DJ': '197.241.0.0/17',
5187 'DK': '87.48.0.0/12',
5188 'DM': '192.243.48.0/20',
5189 'DO': '152.166.0.0/15',
5190 'DZ': '41.96.0.0/12',
5191 'EC': '186.68.0.0/15',
5192 'EE': '90.190.0.0/15',
5193 'EG': '156.160.0.0/11',
5194 'ER': '196.200.96.0/20',
5195 'ES': '88.0.0.0/11',
5196 'ET': '196.188.0.0/14',
5197 'EU': '2.16.0.0/13',
5198 'FI': '91.152.0.0/13',
5199 'FJ': '144.120.0.0/16',
5200 'FK': '80.73.208.0/21',
5201 'FM': '119.252.112.0/20',
5202 'FO': '88.85.32.0/19',
5203 'FR': '90.0.0.0/9',
5204 'GA': '41.158.0.0/15',
5205 'GB': '25.0.0.0/8',
5206 'GD': '74.122.88.0/21',
5207 'GE': '31.146.0.0/16',
5208 'GF': '161.22.64.0/18',
5209 'GG': '62.68.160.0/19',
5210 'GH': '154.160.0.0/12',
5211 'GI': '95.164.0.0/16',
5212 'GL': '88.83.0.0/19',
5213 'GM': '160.182.0.0/15',
5214 'GN': '197.149.192.0/18',
5215 'GP': '104.250.0.0/19',
5216 'GQ': '105.235.224.0/20',
5217 'GR': '94.64.0.0/13',
5218 'GT': '168.234.0.0/16',
5219 'GU': '168.123.0.0/16',
5220 'GW': '197.214.80.0/20',
5221 'GY': '181.41.64.0/18',
5222 'HK': '113.252.0.0/14',
5223 'HN': '181.210.0.0/16',
5224 'HR': '93.136.0.0/13',
5225 'HT': '148.102.128.0/17',
5226 'HU': '84.0.0.0/14',
5227 'ID': '39.192.0.0/10',
5228 'IE': '87.32.0.0/12',
5229 'IL': '79.176.0.0/13',
5230 'IM': '5.62.80.0/20',
5231 'IN': '117.192.0.0/10',
5232 'IO': '203.83.48.0/21',
5233 'IQ': '37.236.0.0/14',
5234 'IR': '2.176.0.0/12',
5235 'IS': '82.221.0.0/16',
5236 'IT': '79.0.0.0/10',
5237 'JE': '87.244.64.0/18',
5238 'JM': '72.27.0.0/17',
5239 'JO': '176.29.0.0/16',
5240 'JP': '133.0.0.0/8',
5241 'KE': '105.48.0.0/12',
5242 'KG': '158.181.128.0/17',
5243 'KH': '36.37.128.0/17',
5244 'KI': '103.25.140.0/22',
5245 'KM': '197.255.224.0/20',
5246 'KN': '198.167.192.0/19',
5247 'KP': '175.45.176.0/22',
5248 'KR': '175.192.0.0/10',
5249 'KW': '37.36.0.0/14',
5250 'KY': '64.96.0.0/15',
5251 'KZ': '2.72.0.0/13',
5252 'LA': '115.84.64.0/18',
5253 'LB': '178.135.0.0/16',
5254 'LC': '24.92.144.0/20',
5255 'LI': '82.117.0.0/19',
5256 'LK': '112.134.0.0/15',
5257 'LR': '102.183.0.0/16',
5258 'LS': '129.232.0.0/17',
5259 'LT': '78.56.0.0/13',
5260 'LU': '188.42.0.0/16',
5261 'LV': '46.109.0.0/16',
5262 'LY': '41.252.0.0/14',
5263 'MA': '105.128.0.0/11',
5264 'MC': '88.209.64.0/18',
5265 'MD': '37.246.0.0/16',
5266 'ME': '178.175.0.0/17',
5267 'MF': '74.112.232.0/21',
5268 'MG': '154.126.0.0/17',
5269 'MH': '117.103.88.0/21',
5270 'MK': '77.28.0.0/15',
5271 'ML': '154.118.128.0/18',
5272 'MM': '37.111.0.0/17',
5273 'MN': '49.0.128.0/17',
5274 'MO': '60.246.0.0/16',
5275 'MP': '202.88.64.0/20',
5276 'MQ': '109.203.224.0/19',
5277 'MR': '41.188.64.0/18',
5278 'MS': '208.90.112.0/22',
5279 'MT': '46.11.0.0/16',
5280 'MU': '105.16.0.0/12',
5281 'MV': '27.114.128.0/18',
5282 'MW': '102.70.0.0/15',
5283 'MX': '187.192.0.0/11',
5284 'MY': '175.136.0.0/13',
5285 'MZ': '197.218.0.0/15',
5286 'NA': '41.182.0.0/16',
5287 'NC': '101.101.0.0/18',
5288 'NE': '197.214.0.0/18',
5289 'NF': '203.17.240.0/22',
5290 'NG': '105.112.0.0/12',
5291 'NI': '186.76.0.0/15',
5292 'NL': '145.96.0.0/11',
5293 'NO': '84.208.0.0/13',
5294 'NP': '36.252.0.0/15',
5295 'NR': '203.98.224.0/19',
5296 'NU': '49.156.48.0/22',
5297 'NZ': '49.224.0.0/14',
5298 'OM': '5.36.0.0/15',
5299 'PA': '186.72.0.0/15',
5300 'PE': '186.160.0.0/14',
5301 'PF': '123.50.64.0/18',
5302 'PG': '124.240.192.0/19',
5303 'PH': '49.144.0.0/13',
5304 'PK': '39.32.0.0/11',
5305 'PL': '83.0.0.0/11',
5306 'PM': '70.36.0.0/20',
5307 'PR': '66.50.0.0/16',
5308 'PS': '188.161.0.0/16',
5309 'PT': '85.240.0.0/13',
5310 'PW': '202.124.224.0/20',
5311 'PY': '181.120.0.0/14',
5312 'QA': '37.210.0.0/15',
5313 'RE': '102.35.0.0/16',
5314 'RO': '79.112.0.0/13',
5315 'RS': '93.86.0.0/15',
5316 'RU': '5.136.0.0/13',
5317 'RW': '41.186.0.0/16',
5318 'SA': '188.48.0.0/13',
5319 'SB': '202.1.160.0/19',
5320 'SC': '154.192.0.0/11',
5321 'SD': '102.120.0.0/13',
5322 'SE': '78.64.0.0/12',
5323 'SG': '8.128.0.0/10',
5324 'SI': '188.196.0.0/14',
5325 'SK': '78.98.0.0/15',
5326 'SL': '102.143.0.0/17',
5327 'SM': '89.186.32.0/19',
5328 'SN': '41.82.0.0/15',
5329 'SO': '154.115.192.0/18',
5330 'SR': '186.179.128.0/17',
5331 'SS': '105.235.208.0/21',
5332 'ST': '197.159.160.0/19',
5333 'SV': '168.243.0.0/16',
5334 'SX': '190.102.0.0/20',
5335 'SY': '5.0.0.0/16',
5336 'SZ': '41.84.224.0/19',
5337 'TC': '65.255.48.0/20',
5338 'TD': '154.68.128.0/19',
5339 'TG': '196.168.0.0/14',
5340 'TH': '171.96.0.0/13',
5341 'TJ': '85.9.128.0/18',
5342 'TK': '27.96.24.0/21',
5343 'TL': '180.189.160.0/20',
5344 'TM': '95.85.96.0/19',
5345 'TN': '197.0.0.0/11',
5346 'TO': '175.176.144.0/21',
5347 'TR': '78.160.0.0/11',
5348 'TT': '186.44.0.0/15',
5349 'TV': '202.2.96.0/19',
5350 'TW': '120.96.0.0/11',
5351 'TZ': '156.156.0.0/14',
5352 'UA': '37.52.0.0/14',
5353 'UG': '102.80.0.0/13',
5354 'US': '6.0.0.0/8',
5355 'UY': '167.56.0.0/13',
5356 'UZ': '84.54.64.0/18',
5357 'VA': '212.77.0.0/19',
5358 'VC': '207.191.240.0/21',
5359 'VE': '186.88.0.0/13',
5360 'VG': '66.81.192.0/20',
5361 'VI': '146.226.0.0/16',
5362 'VN': '14.160.0.0/11',
5363 'VU': '202.80.32.0/20',
5364 'WF': '117.20.32.0/21',
5365 'WS': '202.4.32.0/19',
5366 'YE': '134.35.0.0/16',
5367 'YT': '41.242.116.0/22',
5368 'ZA': '41.0.0.0/11',
5369 'ZM': '102.144.0.0/13',
5370 'ZW': '102.177.192.0/18',
5371 }
5372
5373 @classmethod
5374 def random_ipv4(cls, code_or_block):
5375 if len(code_or_block) == 2:
5376 block = cls._country_ip_map.get(code_or_block.upper())
5377 if not block:
5378 return None
5379 else:
5380 block = code_or_block
5381 addr, preflen = block.split('/')
5382 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5383 addr_max = addr_min | (0xffffffff >> int(preflen))
5384 return compat_str(socket.inet_ntoa(
5385 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5386
5387
5388 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5389 def __init__(self, proxies=None):
5390 # Set default handlers
5391 for type in ('http', 'https'):
5392 setattr(self, '%s_open' % type,
5393 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5394 meth(r, proxy, type))
5395 compat_urllib_request.ProxyHandler.__init__(self, proxies)
5396
5397 def proxy_open(self, req, proxy, type):
5398 req_proxy = req.headers.get('Ytdl-request-proxy')
5399 if req_proxy is not None:
5400 proxy = req_proxy
5401 del req.headers['Ytdl-request-proxy']
5402
5403 if proxy == '__noproxy__':
5404 return None # No Proxy
5405 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5406 req.add_header('Ytdl-socks-proxy', proxy)
5407 # youtube-dlc's http/https handlers do wrapping the socket with socks
5408 return None
5409 return compat_urllib_request.ProxyHandler.proxy_open(
5410 self, req, proxy, type)
5411
5412
5413 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5414 # released into Public Domain
5415 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5416
5417 def long_to_bytes(n, blocksize=0):
5418 """long_to_bytes(n:long, blocksize:int) : string
5419 Convert a long integer to a byte string.
5420
5421 If optional blocksize is given and greater than zero, pad the front of the
5422 byte string with binary zeros so that the length is a multiple of
5423 blocksize.
5424 """
5425 # after much testing, this algorithm was deemed to be the fastest
5426 s = b''
5427 n = int(n)
5428 while n > 0:
5429 s = compat_struct_pack('>I', n & 0xffffffff) + s
5430 n = n >> 32
5431 # strip off leading zeros
5432 for i in range(len(s)):
5433 if s[i] != b'\000'[0]:
5434 break
5435 else:
5436 # only happens when n == 0
5437 s = b'\000'
5438 i = 0
5439 s = s[i:]
5440 # add back some pad bytes. this could be done more efficiently w.r.t. the
5441 # de-padding being done above, but sigh...
5442 if blocksize > 0 and len(s) % blocksize:
5443 s = (blocksize - len(s) % blocksize) * b'\000' + s
5444 return s
5445
5446
5447 def bytes_to_long(s):
5448 """bytes_to_long(string) : long
5449 Convert a byte string to a long integer.
5450
5451 This is (essentially) the inverse of long_to_bytes().
5452 """
5453 acc = 0
5454 length = len(s)
5455 if length % 4:
5456 extra = (4 - length % 4)
5457 s = b'\000' * extra + s
5458 length = length + extra
5459 for i in range(0, length, 4):
5460 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5461 return acc
5462
5463
5464 def ohdave_rsa_encrypt(data, exponent, modulus):
5465 '''
5466 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5467
5468 Input:
5469 data: data to encrypt, bytes-like object
5470 exponent, modulus: parameter e and N of RSA algorithm, both integer
5471 Output: hex string of encrypted data
5472
5473 Limitation: supports one block encryption only
5474 '''
5475
5476 payload = int(binascii.hexlify(data[::-1]), 16)
5477 encrypted = pow(payload, exponent, modulus)
5478 return '%x' % encrypted
5479
5480
5481 def pkcs1pad(data, length):
5482 """
5483 Padding input data with PKCS#1 scheme
5484
5485 @param {int[]} data input data
5486 @param {int} length target length
5487 @returns {int[]} padded data
5488 """
5489 if len(data) > length - 11:
5490 raise ValueError('Input data too long for PKCS#1 padding')
5491
5492 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5493 return [0, 2] + pseudo_random + [0] + data
5494
5495
5496 def encode_base_n(num, n, table=None):
5497 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5498 if not table:
5499 table = FULL_TABLE[:n]
5500
5501 if n > len(table):
5502 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5503
5504 if num == 0:
5505 return table[0]
5506
5507 ret = ''
5508 while num:
5509 ret = table[num % n] + ret
5510 num = num // n
5511 return ret
5512
5513
5514 def decode_packed_codes(code):
5515 mobj = re.search(PACKED_CODES_RE, code)
5516 obfuscated_code, base, count, symbols = mobj.groups()
5517 base = int(base)
5518 count = int(count)
5519 symbols = symbols.split('|')
5520 symbol_table = {}
5521
5522 while count:
5523 count -= 1
5524 base_n_count = encode_base_n(count, base)
5525 symbol_table[base_n_count] = symbols[count] or base_n_count
5526
5527 return re.sub(
5528 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5529 obfuscated_code)
5530
5531
5532 def caesar(s, alphabet, shift):
5533 if shift == 0:
5534 return s
5535 l = len(alphabet)
5536 return ''.join(
5537 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5538 for c in s)
5539
5540
5541 def rot47(s):
5542 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5543
5544
5545 def parse_m3u8_attributes(attrib):
5546 info = {}
5547 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5548 if val.startswith('"'):
5549 val = val[1:-1]
5550 info[key] = val
5551 return info
5552
5553
5554 def urshift(val, n):
5555 return val >> n if val >= 0 else (val + 0x100000000) >> n
5556
5557
5558 # Based on png2str() written by @gdkchan and improved by @yokrysty
5559 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5560 def decode_png(png_data):
5561 # Reference: https://www.w3.org/TR/PNG/
5562 header = png_data[8:]
5563
5564 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5565 raise IOError('Not a valid PNG file.')
5566
5567 int_map = {1: '>B', 2: '>H', 4: '>I'}
5568 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5569
5570 chunks = []
5571
5572 while header:
5573 length = unpack_integer(header[:4])
5574 header = header[4:]
5575
5576 chunk_type = header[:4]
5577 header = header[4:]
5578
5579 chunk_data = header[:length]
5580 header = header[length:]
5581
5582 header = header[4:] # Skip CRC
5583
5584 chunks.append({
5585 'type': chunk_type,
5586 'length': length,
5587 'data': chunk_data
5588 })
5589
5590 ihdr = chunks[0]['data']
5591
5592 width = unpack_integer(ihdr[:4])
5593 height = unpack_integer(ihdr[4:8])
5594
5595 idat = b''
5596
5597 for chunk in chunks:
5598 if chunk['type'] == b'IDAT':
5599 idat += chunk['data']
5600
5601 if not idat:
5602 raise IOError('Unable to read PNG data.')
5603
5604 decompressed_data = bytearray(zlib.decompress(idat))
5605
5606 stride = width * 3
5607 pixels = []
5608
5609 def _get_pixel(idx):
5610 x = idx % stride
5611 y = idx // stride
5612 return pixels[y][x]
5613
5614 for y in range(height):
5615 basePos = y * (1 + stride)
5616 filter_type = decompressed_data[basePos]
5617
5618 current_row = []
5619
5620 pixels.append(current_row)
5621
5622 for x in range(stride):
5623 color = decompressed_data[1 + basePos + x]
5624 basex = y * stride + x
5625 left = 0
5626 up = 0
5627
5628 if x > 2:
5629 left = _get_pixel(basex - 3)
5630 if y > 0:
5631 up = _get_pixel(basex - stride)
5632
5633 if filter_type == 1: # Sub
5634 color = (color + left) & 0xff
5635 elif filter_type == 2: # Up
5636 color = (color + up) & 0xff
5637 elif filter_type == 3: # Average
5638 color = (color + ((left + up) >> 1)) & 0xff
5639 elif filter_type == 4: # Paeth
5640 a = left
5641 b = up
5642 c = 0
5643
5644 if x > 2 and y > 0:
5645 c = _get_pixel(basex - stride - 3)
5646
5647 p = a + b - c
5648
5649 pa = abs(p - a)
5650 pb = abs(p - b)
5651 pc = abs(p - c)
5652
5653 if pa <= pb and pa <= pc:
5654 color = (color + a) & 0xff
5655 elif pb <= pc:
5656 color = (color + b) & 0xff
5657 else:
5658 color = (color + c) & 0xff
5659
5660 current_row.append(color)
5661
5662 return width, height, pixels
5663
5664
5665 def write_xattr(path, key, value):
5666 # This mess below finds the best xattr tool for the job
5667 try:
5668 # try the pyxattr module...
5669 import xattr
5670
5671 if hasattr(xattr, 'set'): # pyxattr
5672 # Unicode arguments are not supported in python-pyxattr until
5673 # version 0.5.0
5674 # See https://github.com/ytdl-org/youtube-dl/issues/5498
5675 pyxattr_required_version = '0.5.0'
5676 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5677 # TODO: fallback to CLI tools
5678 raise XAttrUnavailableError(
5679 'python-pyxattr is detected but is too old. '
5680 'youtube-dlc requires %s or above while your version is %s. '
5681 'Falling back to other xattr implementations' % (
5682 pyxattr_required_version, xattr.__version__))
5683
5684 setxattr = xattr.set
5685 else: # xattr
5686 setxattr = xattr.setxattr
5687
5688 try:
5689 setxattr(path, key, value)
5690 except EnvironmentError as e:
5691 raise XAttrMetadataError(e.errno, e.strerror)
5692
5693 except ImportError:
5694 if compat_os_name == 'nt':
5695 # Write xattrs to NTFS Alternate Data Streams:
5696 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5697 assert ':' not in key
5698 assert os.path.exists(path)
5699
5700 ads_fn = path + ':' + key
5701 try:
5702 with open(ads_fn, 'wb') as f:
5703 f.write(value)
5704 except EnvironmentError as e:
5705 raise XAttrMetadataError(e.errno, e.strerror)
5706 else:
5707 user_has_setfattr = check_executable('setfattr', ['--version'])
5708 user_has_xattr = check_executable('xattr', ['-h'])
5709
5710 if user_has_setfattr or user_has_xattr:
5711
5712 value = value.decode('utf-8')
5713 if user_has_setfattr:
5714 executable = 'setfattr'
5715 opts = ['-n', key, '-v', value]
5716 elif user_has_xattr:
5717 executable = 'xattr'
5718 opts = ['-w', key, value]
5719
5720 cmd = ([encodeFilename(executable, True)]
5721 + [encodeArgument(o) for o in opts]
5722 + [encodeFilename(path, True)])
5723
5724 try:
5725 p = subprocess.Popen(
5726 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5727 except EnvironmentError as e:
5728 raise XAttrMetadataError(e.errno, e.strerror)
5729 stdout, stderr = process_communicate_or_kill(p)
5730 stderr = stderr.decode('utf-8', 'replace')
5731 if p.returncode != 0:
5732 raise XAttrMetadataError(p.returncode, stderr)
5733
5734 else:
5735 # On Unix, and can't find pyxattr, setfattr, or xattr.
5736 if sys.platform.startswith('linux'):
5737 raise XAttrUnavailableError(
5738 "Couldn't find a tool to set the xattrs. "
5739 "Install either the python 'pyxattr' or 'xattr' "
5740 "modules, or the GNU 'attr' package "
5741 "(which contains the 'setfattr' tool).")
5742 else:
5743 raise XAttrUnavailableError(
5744 "Couldn't find a tool to set the xattrs. "
5745 "Install either the python 'xattr' module, "
5746 "or the 'xattr' binary.")
5747
5748
5749 def random_birthday(year_field, month_field, day_field):
5750 start_date = datetime.date(1950, 1, 1)
5751 end_date = datetime.date(1995, 12, 31)
5752 offset = random.randint(0, (end_date - start_date).days)
5753 random_date = start_date + datetime.timedelta(offset)
5754 return {
5755 year_field: str(random_date.year),
5756 month_field: str(random_date.month),
5757 day_field: str(random_date.day),
5758 }
5759
5760
5761 # Templates for internet shortcut files, which are plain text files.
5762 DOT_URL_LINK_TEMPLATE = '''
5763 [InternetShortcut]
5764 URL=%(url)s
5765 '''.lstrip()
5766
5767 DOT_WEBLOC_LINK_TEMPLATE = '''
5768 <?xml version="1.0" encoding="UTF-8"?>
5769 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5770 <plist version="1.0">
5771 <dict>
5772 \t<key>URL</key>
5773 \t<string>%(url)s</string>
5774 </dict>
5775 </plist>
5776 '''.lstrip()
5777
5778 DOT_DESKTOP_LINK_TEMPLATE = '''
5779 [Desktop Entry]
5780 Encoding=UTF-8
5781 Name=%(filename)s
5782 Type=Link
5783 URL=%(url)s
5784 Icon=text-html
5785 '''.lstrip()
5786
5787
5788 def iri_to_uri(iri):
5789 """
5790 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5791
5792 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5793 """
5794
5795 iri_parts = compat_urllib_parse_urlparse(iri)
5796
5797 if '[' in iri_parts.netloc:
5798 raise ValueError('IPv6 URIs are not, yet, supported.')
5799 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5800
5801 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5802
5803 net_location = ''
5804 if iri_parts.username:
5805 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
5806 if iri_parts.password is not None:
5807 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
5808 net_location += '@'
5809
5810 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
5811 # The 'idna' encoding produces ASCII text.
5812 if iri_parts.port is not None and iri_parts.port != 80:
5813 net_location += ':' + str(iri_parts.port)
5814
5815 return compat_urllib_parse_urlunparse(
5816 (iri_parts.scheme,
5817 net_location,
5818
5819 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5820
5821 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5822 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5823
5824 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5825 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5826
5827 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5828
5829 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5830
5831
5832 def to_high_limit_path(path):
5833 if sys.platform in ['win32', 'cygwin']:
5834 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5835 return r'\\?\ '.rstrip() + os.path.abspath(path)
5836
5837 return path
5838
5839
5840 def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
5841 val = obj.get(field, default)
5842 if func and val not in ignore:
5843 val = func(val)
5844 return template % val if val not in ignore else default
5845
5846
5847 def clean_podcast_url(url):
5848 return re.sub(r'''(?x)
5849 (?:
5850 (?:
5851 chtbl\.com/track|
5852 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5853 play\.podtrac\.com
5854 )/[^/]+|
5855 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5856 flex\.acast\.com|
5857 pd(?:
5858 cn\.co| # https://podcorn.com/analytics-prefix/
5859 st\.fm # https://podsights.com/docs/
5860 )/e
5861 )/''', '', url)