]> jfr.im git - yt-dlp.git/blob - yt_dlp/utils.py
[la7] Add podcasts and podcast playlists (#198)
[yt-dlp.git] / yt_dlp / utils.py
1 #!/usr/bin/env python
2 # coding: utf-8
3
4 from __future__ import unicode_literals
5
6 import base64
7 import binascii
8 import calendar
9 import codecs
10 import collections
11 import contextlib
12 import ctypes
13 import datetime
14 import email.utils
15 import email.header
16 import errno
17 import functools
18 import gzip
19 import imp
20 import io
21 import itertools
22 import json
23 import locale
24 import math
25 import operator
26 import os
27 import platform
28 import random
29 import re
30 import socket
31 import ssl
32 import subprocess
33 import sys
34 import tempfile
35 import time
36 import traceback
37 import xml.etree.ElementTree
38 import zlib
39
40 from .compat import (
41 compat_HTMLParseError,
42 compat_HTMLParser,
43 compat_basestring,
44 compat_chr,
45 compat_cookiejar,
46 compat_ctypes_WINFUNCTYPE,
47 compat_etree_fromstring,
48 compat_expanduser,
49 compat_html_entities,
50 compat_html_entities_html5,
51 compat_http_client,
52 compat_integer_types,
53 compat_numeric_types,
54 compat_kwargs,
55 compat_os_name,
56 compat_parse_qs,
57 compat_shlex_quote,
58 compat_str,
59 compat_struct_pack,
60 compat_struct_unpack,
61 compat_urllib_error,
62 compat_urllib_parse,
63 compat_urllib_parse_urlencode,
64 compat_urllib_parse_urlparse,
65 compat_urllib_parse_urlunparse,
66 compat_urllib_parse_quote,
67 compat_urllib_parse_quote_plus,
68 compat_urllib_parse_unquote_plus,
69 compat_urllib_request,
70 compat_urlparse,
71 compat_xpath,
72 )
73
74 from .socks import (
75 ProxyType,
76 sockssocket,
77 )
78
79
80 def register_socks_protocols():
81 # "Register" SOCKS protocols
82 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
83 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
84 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
85 if scheme not in compat_urlparse.uses_netloc:
86 compat_urlparse.uses_netloc.append(scheme)
87
88
89 # This is not clearly defined otherwise
90 compiled_regex_type = type(re.compile(''))
91
92
93 def random_user_agent():
94 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
95 _CHROME_VERSIONS = (
96 '74.0.3729.129',
97 '76.0.3780.3',
98 '76.0.3780.2',
99 '74.0.3729.128',
100 '76.0.3780.1',
101 '76.0.3780.0',
102 '75.0.3770.15',
103 '74.0.3729.127',
104 '74.0.3729.126',
105 '76.0.3779.1',
106 '76.0.3779.0',
107 '75.0.3770.14',
108 '74.0.3729.125',
109 '76.0.3778.1',
110 '76.0.3778.0',
111 '75.0.3770.13',
112 '74.0.3729.124',
113 '74.0.3729.123',
114 '73.0.3683.121',
115 '76.0.3777.1',
116 '76.0.3777.0',
117 '75.0.3770.12',
118 '74.0.3729.122',
119 '76.0.3776.4',
120 '75.0.3770.11',
121 '74.0.3729.121',
122 '76.0.3776.3',
123 '76.0.3776.2',
124 '73.0.3683.120',
125 '74.0.3729.120',
126 '74.0.3729.119',
127 '74.0.3729.118',
128 '76.0.3776.1',
129 '76.0.3776.0',
130 '76.0.3775.5',
131 '75.0.3770.10',
132 '74.0.3729.117',
133 '76.0.3775.4',
134 '76.0.3775.3',
135 '74.0.3729.116',
136 '75.0.3770.9',
137 '76.0.3775.2',
138 '76.0.3775.1',
139 '76.0.3775.0',
140 '75.0.3770.8',
141 '74.0.3729.115',
142 '74.0.3729.114',
143 '76.0.3774.1',
144 '76.0.3774.0',
145 '75.0.3770.7',
146 '74.0.3729.113',
147 '74.0.3729.112',
148 '74.0.3729.111',
149 '76.0.3773.1',
150 '76.0.3773.0',
151 '75.0.3770.6',
152 '74.0.3729.110',
153 '74.0.3729.109',
154 '76.0.3772.1',
155 '76.0.3772.0',
156 '75.0.3770.5',
157 '74.0.3729.108',
158 '74.0.3729.107',
159 '76.0.3771.1',
160 '76.0.3771.0',
161 '75.0.3770.4',
162 '74.0.3729.106',
163 '74.0.3729.105',
164 '75.0.3770.3',
165 '74.0.3729.104',
166 '74.0.3729.103',
167 '74.0.3729.102',
168 '75.0.3770.2',
169 '74.0.3729.101',
170 '75.0.3770.1',
171 '75.0.3770.0',
172 '74.0.3729.100',
173 '75.0.3769.5',
174 '75.0.3769.4',
175 '74.0.3729.99',
176 '75.0.3769.3',
177 '75.0.3769.2',
178 '75.0.3768.6',
179 '74.0.3729.98',
180 '75.0.3769.1',
181 '75.0.3769.0',
182 '74.0.3729.97',
183 '73.0.3683.119',
184 '73.0.3683.118',
185 '74.0.3729.96',
186 '75.0.3768.5',
187 '75.0.3768.4',
188 '75.0.3768.3',
189 '75.0.3768.2',
190 '74.0.3729.95',
191 '74.0.3729.94',
192 '75.0.3768.1',
193 '75.0.3768.0',
194 '74.0.3729.93',
195 '74.0.3729.92',
196 '73.0.3683.117',
197 '74.0.3729.91',
198 '75.0.3766.3',
199 '74.0.3729.90',
200 '75.0.3767.2',
201 '75.0.3767.1',
202 '75.0.3767.0',
203 '74.0.3729.89',
204 '73.0.3683.116',
205 '75.0.3766.2',
206 '74.0.3729.88',
207 '75.0.3766.1',
208 '75.0.3766.0',
209 '74.0.3729.87',
210 '73.0.3683.115',
211 '74.0.3729.86',
212 '75.0.3765.1',
213 '75.0.3765.0',
214 '74.0.3729.85',
215 '73.0.3683.114',
216 '74.0.3729.84',
217 '75.0.3764.1',
218 '75.0.3764.0',
219 '74.0.3729.83',
220 '73.0.3683.113',
221 '75.0.3763.2',
222 '75.0.3761.4',
223 '74.0.3729.82',
224 '75.0.3763.1',
225 '75.0.3763.0',
226 '74.0.3729.81',
227 '73.0.3683.112',
228 '75.0.3762.1',
229 '75.0.3762.0',
230 '74.0.3729.80',
231 '75.0.3761.3',
232 '74.0.3729.79',
233 '73.0.3683.111',
234 '75.0.3761.2',
235 '74.0.3729.78',
236 '74.0.3729.77',
237 '75.0.3761.1',
238 '75.0.3761.0',
239 '73.0.3683.110',
240 '74.0.3729.76',
241 '74.0.3729.75',
242 '75.0.3760.0',
243 '74.0.3729.74',
244 '75.0.3759.8',
245 '75.0.3759.7',
246 '75.0.3759.6',
247 '74.0.3729.73',
248 '75.0.3759.5',
249 '74.0.3729.72',
250 '73.0.3683.109',
251 '75.0.3759.4',
252 '75.0.3759.3',
253 '74.0.3729.71',
254 '75.0.3759.2',
255 '74.0.3729.70',
256 '73.0.3683.108',
257 '74.0.3729.69',
258 '75.0.3759.1',
259 '75.0.3759.0',
260 '74.0.3729.68',
261 '73.0.3683.107',
262 '74.0.3729.67',
263 '75.0.3758.1',
264 '75.0.3758.0',
265 '74.0.3729.66',
266 '73.0.3683.106',
267 '74.0.3729.65',
268 '75.0.3757.1',
269 '75.0.3757.0',
270 '74.0.3729.64',
271 '73.0.3683.105',
272 '74.0.3729.63',
273 '75.0.3756.1',
274 '75.0.3756.0',
275 '74.0.3729.62',
276 '73.0.3683.104',
277 '75.0.3755.3',
278 '75.0.3755.2',
279 '73.0.3683.103',
280 '75.0.3755.1',
281 '75.0.3755.0',
282 '74.0.3729.61',
283 '73.0.3683.102',
284 '74.0.3729.60',
285 '75.0.3754.2',
286 '74.0.3729.59',
287 '75.0.3753.4',
288 '74.0.3729.58',
289 '75.0.3754.1',
290 '75.0.3754.0',
291 '74.0.3729.57',
292 '73.0.3683.101',
293 '75.0.3753.3',
294 '75.0.3752.2',
295 '75.0.3753.2',
296 '74.0.3729.56',
297 '75.0.3753.1',
298 '75.0.3753.0',
299 '74.0.3729.55',
300 '73.0.3683.100',
301 '74.0.3729.54',
302 '75.0.3752.1',
303 '75.0.3752.0',
304 '74.0.3729.53',
305 '73.0.3683.99',
306 '74.0.3729.52',
307 '75.0.3751.1',
308 '75.0.3751.0',
309 '74.0.3729.51',
310 '73.0.3683.98',
311 '74.0.3729.50',
312 '75.0.3750.0',
313 '74.0.3729.49',
314 '74.0.3729.48',
315 '74.0.3729.47',
316 '75.0.3749.3',
317 '74.0.3729.46',
318 '73.0.3683.97',
319 '75.0.3749.2',
320 '74.0.3729.45',
321 '75.0.3749.1',
322 '75.0.3749.0',
323 '74.0.3729.44',
324 '73.0.3683.96',
325 '74.0.3729.43',
326 '74.0.3729.42',
327 '75.0.3748.1',
328 '75.0.3748.0',
329 '74.0.3729.41',
330 '75.0.3747.1',
331 '73.0.3683.95',
332 '75.0.3746.4',
333 '74.0.3729.40',
334 '74.0.3729.39',
335 '75.0.3747.0',
336 '75.0.3746.3',
337 '75.0.3746.2',
338 '74.0.3729.38',
339 '75.0.3746.1',
340 '75.0.3746.0',
341 '74.0.3729.37',
342 '73.0.3683.94',
343 '75.0.3745.5',
344 '75.0.3745.4',
345 '75.0.3745.3',
346 '75.0.3745.2',
347 '74.0.3729.36',
348 '75.0.3745.1',
349 '75.0.3745.0',
350 '75.0.3744.2',
351 '74.0.3729.35',
352 '73.0.3683.93',
353 '74.0.3729.34',
354 '75.0.3744.1',
355 '75.0.3744.0',
356 '74.0.3729.33',
357 '73.0.3683.92',
358 '74.0.3729.32',
359 '74.0.3729.31',
360 '73.0.3683.91',
361 '75.0.3741.2',
362 '75.0.3740.5',
363 '74.0.3729.30',
364 '75.0.3741.1',
365 '75.0.3741.0',
366 '74.0.3729.29',
367 '75.0.3740.4',
368 '73.0.3683.90',
369 '74.0.3729.28',
370 '75.0.3740.3',
371 '73.0.3683.89',
372 '75.0.3740.2',
373 '74.0.3729.27',
374 '75.0.3740.1',
375 '75.0.3740.0',
376 '74.0.3729.26',
377 '73.0.3683.88',
378 '73.0.3683.87',
379 '74.0.3729.25',
380 '75.0.3739.1',
381 '75.0.3739.0',
382 '73.0.3683.86',
383 '74.0.3729.24',
384 '73.0.3683.85',
385 '75.0.3738.4',
386 '75.0.3738.3',
387 '75.0.3738.2',
388 '75.0.3738.1',
389 '75.0.3738.0',
390 '74.0.3729.23',
391 '73.0.3683.84',
392 '74.0.3729.22',
393 '74.0.3729.21',
394 '75.0.3737.1',
395 '75.0.3737.0',
396 '74.0.3729.20',
397 '73.0.3683.83',
398 '74.0.3729.19',
399 '75.0.3736.1',
400 '75.0.3736.0',
401 '74.0.3729.18',
402 '73.0.3683.82',
403 '74.0.3729.17',
404 '75.0.3735.1',
405 '75.0.3735.0',
406 '74.0.3729.16',
407 '73.0.3683.81',
408 '75.0.3734.1',
409 '75.0.3734.0',
410 '74.0.3729.15',
411 '73.0.3683.80',
412 '74.0.3729.14',
413 '75.0.3733.1',
414 '75.0.3733.0',
415 '75.0.3732.1',
416 '74.0.3729.13',
417 '74.0.3729.12',
418 '73.0.3683.79',
419 '74.0.3729.11',
420 '75.0.3732.0',
421 '74.0.3729.10',
422 '73.0.3683.78',
423 '74.0.3729.9',
424 '74.0.3729.8',
425 '74.0.3729.7',
426 '75.0.3731.3',
427 '75.0.3731.2',
428 '75.0.3731.0',
429 '74.0.3729.6',
430 '73.0.3683.77',
431 '73.0.3683.76',
432 '75.0.3730.5',
433 '75.0.3730.4',
434 '73.0.3683.75',
435 '74.0.3729.5',
436 '73.0.3683.74',
437 '75.0.3730.3',
438 '75.0.3730.2',
439 '74.0.3729.4',
440 '73.0.3683.73',
441 '73.0.3683.72',
442 '75.0.3730.1',
443 '75.0.3730.0',
444 '74.0.3729.3',
445 '73.0.3683.71',
446 '74.0.3729.2',
447 '73.0.3683.70',
448 '74.0.3729.1',
449 '74.0.3729.0',
450 '74.0.3726.4',
451 '73.0.3683.69',
452 '74.0.3726.3',
453 '74.0.3728.0',
454 '74.0.3726.2',
455 '73.0.3683.68',
456 '74.0.3726.1',
457 '74.0.3726.0',
458 '74.0.3725.4',
459 '73.0.3683.67',
460 '73.0.3683.66',
461 '74.0.3725.3',
462 '74.0.3725.2',
463 '74.0.3725.1',
464 '74.0.3724.8',
465 '74.0.3725.0',
466 '73.0.3683.65',
467 '74.0.3724.7',
468 '74.0.3724.6',
469 '74.0.3724.5',
470 '74.0.3724.4',
471 '74.0.3724.3',
472 '74.0.3724.2',
473 '74.0.3724.1',
474 '74.0.3724.0',
475 '73.0.3683.64',
476 '74.0.3723.1',
477 '74.0.3723.0',
478 '73.0.3683.63',
479 '74.0.3722.1',
480 '74.0.3722.0',
481 '73.0.3683.62',
482 '74.0.3718.9',
483 '74.0.3702.3',
484 '74.0.3721.3',
485 '74.0.3721.2',
486 '74.0.3721.1',
487 '74.0.3721.0',
488 '74.0.3720.6',
489 '73.0.3683.61',
490 '72.0.3626.122',
491 '73.0.3683.60',
492 '74.0.3720.5',
493 '72.0.3626.121',
494 '74.0.3718.8',
495 '74.0.3720.4',
496 '74.0.3720.3',
497 '74.0.3718.7',
498 '74.0.3720.2',
499 '74.0.3720.1',
500 '74.0.3720.0',
501 '74.0.3718.6',
502 '74.0.3719.5',
503 '73.0.3683.59',
504 '74.0.3718.5',
505 '74.0.3718.4',
506 '74.0.3719.4',
507 '74.0.3719.3',
508 '74.0.3719.2',
509 '74.0.3719.1',
510 '73.0.3683.58',
511 '74.0.3719.0',
512 '73.0.3683.57',
513 '73.0.3683.56',
514 '74.0.3718.3',
515 '73.0.3683.55',
516 '74.0.3718.2',
517 '74.0.3718.1',
518 '74.0.3718.0',
519 '73.0.3683.54',
520 '74.0.3717.2',
521 '73.0.3683.53',
522 '74.0.3717.1',
523 '74.0.3717.0',
524 '73.0.3683.52',
525 '74.0.3716.1',
526 '74.0.3716.0',
527 '73.0.3683.51',
528 '74.0.3715.1',
529 '74.0.3715.0',
530 '73.0.3683.50',
531 '74.0.3711.2',
532 '74.0.3714.2',
533 '74.0.3713.3',
534 '74.0.3714.1',
535 '74.0.3714.0',
536 '73.0.3683.49',
537 '74.0.3713.1',
538 '74.0.3713.0',
539 '72.0.3626.120',
540 '73.0.3683.48',
541 '74.0.3712.2',
542 '74.0.3712.1',
543 '74.0.3712.0',
544 '73.0.3683.47',
545 '72.0.3626.119',
546 '73.0.3683.46',
547 '74.0.3710.2',
548 '72.0.3626.118',
549 '74.0.3711.1',
550 '74.0.3711.0',
551 '73.0.3683.45',
552 '72.0.3626.117',
553 '74.0.3710.1',
554 '74.0.3710.0',
555 '73.0.3683.44',
556 '72.0.3626.116',
557 '74.0.3709.1',
558 '74.0.3709.0',
559 '74.0.3704.9',
560 '73.0.3683.43',
561 '72.0.3626.115',
562 '74.0.3704.8',
563 '74.0.3704.7',
564 '74.0.3708.0',
565 '74.0.3706.7',
566 '74.0.3704.6',
567 '73.0.3683.42',
568 '72.0.3626.114',
569 '74.0.3706.6',
570 '72.0.3626.113',
571 '74.0.3704.5',
572 '74.0.3706.5',
573 '74.0.3706.4',
574 '74.0.3706.3',
575 '74.0.3706.2',
576 '74.0.3706.1',
577 '74.0.3706.0',
578 '73.0.3683.41',
579 '72.0.3626.112',
580 '74.0.3705.1',
581 '74.0.3705.0',
582 '73.0.3683.40',
583 '72.0.3626.111',
584 '73.0.3683.39',
585 '74.0.3704.4',
586 '73.0.3683.38',
587 '74.0.3704.3',
588 '74.0.3704.2',
589 '74.0.3704.1',
590 '74.0.3704.0',
591 '73.0.3683.37',
592 '72.0.3626.110',
593 '72.0.3626.109',
594 '74.0.3703.3',
595 '74.0.3703.2',
596 '73.0.3683.36',
597 '74.0.3703.1',
598 '74.0.3703.0',
599 '73.0.3683.35',
600 '72.0.3626.108',
601 '74.0.3702.2',
602 '74.0.3699.3',
603 '74.0.3702.1',
604 '74.0.3702.0',
605 '73.0.3683.34',
606 '72.0.3626.107',
607 '73.0.3683.33',
608 '74.0.3701.1',
609 '74.0.3701.0',
610 '73.0.3683.32',
611 '73.0.3683.31',
612 '72.0.3626.105',
613 '74.0.3700.1',
614 '74.0.3700.0',
615 '73.0.3683.29',
616 '72.0.3626.103',
617 '74.0.3699.2',
618 '74.0.3699.1',
619 '74.0.3699.0',
620 '73.0.3683.28',
621 '72.0.3626.102',
622 '73.0.3683.27',
623 '73.0.3683.26',
624 '74.0.3698.0',
625 '74.0.3696.2',
626 '72.0.3626.101',
627 '73.0.3683.25',
628 '74.0.3696.1',
629 '74.0.3696.0',
630 '74.0.3694.8',
631 '72.0.3626.100',
632 '74.0.3694.7',
633 '74.0.3694.6',
634 '74.0.3694.5',
635 '74.0.3694.4',
636 '72.0.3626.99',
637 '72.0.3626.98',
638 '74.0.3694.3',
639 '73.0.3683.24',
640 '72.0.3626.97',
641 '72.0.3626.96',
642 '72.0.3626.95',
643 '73.0.3683.23',
644 '72.0.3626.94',
645 '73.0.3683.22',
646 '73.0.3683.21',
647 '72.0.3626.93',
648 '74.0.3694.2',
649 '72.0.3626.92',
650 '74.0.3694.1',
651 '74.0.3694.0',
652 '74.0.3693.6',
653 '73.0.3683.20',
654 '72.0.3626.91',
655 '74.0.3693.5',
656 '74.0.3693.4',
657 '74.0.3693.3',
658 '74.0.3693.2',
659 '73.0.3683.19',
660 '74.0.3693.1',
661 '74.0.3693.0',
662 '73.0.3683.18',
663 '72.0.3626.90',
664 '74.0.3692.1',
665 '74.0.3692.0',
666 '73.0.3683.17',
667 '72.0.3626.89',
668 '74.0.3687.3',
669 '74.0.3691.1',
670 '74.0.3691.0',
671 '73.0.3683.16',
672 '72.0.3626.88',
673 '72.0.3626.87',
674 '73.0.3683.15',
675 '74.0.3690.1',
676 '74.0.3690.0',
677 '73.0.3683.14',
678 '72.0.3626.86',
679 '73.0.3683.13',
680 '73.0.3683.12',
681 '74.0.3689.1',
682 '74.0.3689.0',
683 '73.0.3683.11',
684 '72.0.3626.85',
685 '73.0.3683.10',
686 '72.0.3626.84',
687 '73.0.3683.9',
688 '74.0.3688.1',
689 '74.0.3688.0',
690 '73.0.3683.8',
691 '72.0.3626.83',
692 '74.0.3687.2',
693 '74.0.3687.1',
694 '74.0.3687.0',
695 '73.0.3683.7',
696 '72.0.3626.82',
697 '74.0.3686.4',
698 '72.0.3626.81',
699 '74.0.3686.3',
700 '74.0.3686.2',
701 '74.0.3686.1',
702 '74.0.3686.0',
703 '73.0.3683.6',
704 '72.0.3626.80',
705 '74.0.3685.1',
706 '74.0.3685.0',
707 '73.0.3683.5',
708 '72.0.3626.79',
709 '74.0.3684.1',
710 '74.0.3684.0',
711 '73.0.3683.4',
712 '72.0.3626.78',
713 '72.0.3626.77',
714 '73.0.3683.3',
715 '73.0.3683.2',
716 '72.0.3626.76',
717 '73.0.3683.1',
718 '73.0.3683.0',
719 '72.0.3626.75',
720 '71.0.3578.141',
721 '73.0.3682.1',
722 '73.0.3682.0',
723 '72.0.3626.74',
724 '71.0.3578.140',
725 '73.0.3681.4',
726 '73.0.3681.3',
727 '73.0.3681.2',
728 '73.0.3681.1',
729 '73.0.3681.0',
730 '72.0.3626.73',
731 '71.0.3578.139',
732 '72.0.3626.72',
733 '72.0.3626.71',
734 '73.0.3680.1',
735 '73.0.3680.0',
736 '72.0.3626.70',
737 '71.0.3578.138',
738 '73.0.3678.2',
739 '73.0.3679.1',
740 '73.0.3679.0',
741 '72.0.3626.69',
742 '71.0.3578.137',
743 '73.0.3678.1',
744 '73.0.3678.0',
745 '71.0.3578.136',
746 '73.0.3677.1',
747 '73.0.3677.0',
748 '72.0.3626.68',
749 '72.0.3626.67',
750 '71.0.3578.135',
751 '73.0.3676.1',
752 '73.0.3676.0',
753 '73.0.3674.2',
754 '72.0.3626.66',
755 '71.0.3578.134',
756 '73.0.3674.1',
757 '73.0.3674.0',
758 '72.0.3626.65',
759 '71.0.3578.133',
760 '73.0.3673.2',
761 '73.0.3673.1',
762 '73.0.3673.0',
763 '72.0.3626.64',
764 '71.0.3578.132',
765 '72.0.3626.63',
766 '72.0.3626.62',
767 '72.0.3626.61',
768 '72.0.3626.60',
769 '73.0.3672.1',
770 '73.0.3672.0',
771 '72.0.3626.59',
772 '71.0.3578.131',
773 '73.0.3671.3',
774 '73.0.3671.2',
775 '73.0.3671.1',
776 '73.0.3671.0',
777 '72.0.3626.58',
778 '71.0.3578.130',
779 '73.0.3670.1',
780 '73.0.3670.0',
781 '72.0.3626.57',
782 '71.0.3578.129',
783 '73.0.3669.1',
784 '73.0.3669.0',
785 '72.0.3626.56',
786 '71.0.3578.128',
787 '73.0.3668.2',
788 '73.0.3668.1',
789 '73.0.3668.0',
790 '72.0.3626.55',
791 '71.0.3578.127',
792 '73.0.3667.2',
793 '73.0.3667.1',
794 '73.0.3667.0',
795 '72.0.3626.54',
796 '71.0.3578.126',
797 '73.0.3666.1',
798 '73.0.3666.0',
799 '72.0.3626.53',
800 '71.0.3578.125',
801 '73.0.3665.4',
802 '73.0.3665.3',
803 '72.0.3626.52',
804 '73.0.3665.2',
805 '73.0.3664.4',
806 '73.0.3665.1',
807 '73.0.3665.0',
808 '72.0.3626.51',
809 '71.0.3578.124',
810 '72.0.3626.50',
811 '73.0.3664.3',
812 '73.0.3664.2',
813 '73.0.3664.1',
814 '73.0.3664.0',
815 '73.0.3663.2',
816 '72.0.3626.49',
817 '71.0.3578.123',
818 '73.0.3663.1',
819 '73.0.3663.0',
820 '72.0.3626.48',
821 '71.0.3578.122',
822 '73.0.3662.1',
823 '73.0.3662.0',
824 '72.0.3626.47',
825 '71.0.3578.121',
826 '73.0.3661.1',
827 '72.0.3626.46',
828 '73.0.3661.0',
829 '72.0.3626.45',
830 '71.0.3578.120',
831 '73.0.3660.2',
832 '73.0.3660.1',
833 '73.0.3660.0',
834 '72.0.3626.44',
835 '71.0.3578.119',
836 '73.0.3659.1',
837 '73.0.3659.0',
838 '72.0.3626.43',
839 '71.0.3578.118',
840 '73.0.3658.1',
841 '73.0.3658.0',
842 '72.0.3626.42',
843 '71.0.3578.117',
844 '73.0.3657.1',
845 '73.0.3657.0',
846 '72.0.3626.41',
847 '71.0.3578.116',
848 '73.0.3656.1',
849 '73.0.3656.0',
850 '72.0.3626.40',
851 '71.0.3578.115',
852 '73.0.3655.1',
853 '73.0.3655.0',
854 '72.0.3626.39',
855 '71.0.3578.114',
856 '73.0.3654.1',
857 '73.0.3654.0',
858 '72.0.3626.38',
859 '71.0.3578.113',
860 '73.0.3653.1',
861 '73.0.3653.0',
862 '72.0.3626.37',
863 '71.0.3578.112',
864 '73.0.3652.1',
865 '73.0.3652.0',
866 '72.0.3626.36',
867 '71.0.3578.111',
868 '73.0.3651.1',
869 '73.0.3651.0',
870 '72.0.3626.35',
871 '71.0.3578.110',
872 '73.0.3650.1',
873 '73.0.3650.0',
874 '72.0.3626.34',
875 '71.0.3578.109',
876 '73.0.3649.1',
877 '73.0.3649.0',
878 '72.0.3626.33',
879 '71.0.3578.108',
880 '73.0.3648.2',
881 '73.0.3648.1',
882 '73.0.3648.0',
883 '72.0.3626.32',
884 '71.0.3578.107',
885 '73.0.3647.2',
886 '73.0.3647.1',
887 '73.0.3647.0',
888 '72.0.3626.31',
889 '71.0.3578.106',
890 '73.0.3635.3',
891 '73.0.3646.2',
892 '73.0.3646.1',
893 '73.0.3646.0',
894 '72.0.3626.30',
895 '71.0.3578.105',
896 '72.0.3626.29',
897 '73.0.3645.2',
898 '73.0.3645.1',
899 '73.0.3645.0',
900 '72.0.3626.28',
901 '71.0.3578.104',
902 '72.0.3626.27',
903 '72.0.3626.26',
904 '72.0.3626.25',
905 '72.0.3626.24',
906 '73.0.3644.0',
907 '73.0.3643.2',
908 '72.0.3626.23',
909 '71.0.3578.103',
910 '73.0.3643.1',
911 '73.0.3643.0',
912 '72.0.3626.22',
913 '71.0.3578.102',
914 '73.0.3642.1',
915 '73.0.3642.0',
916 '72.0.3626.21',
917 '71.0.3578.101',
918 '73.0.3641.1',
919 '73.0.3641.0',
920 '72.0.3626.20',
921 '71.0.3578.100',
922 '72.0.3626.19',
923 '73.0.3640.1',
924 '73.0.3640.0',
925 '72.0.3626.18',
926 '73.0.3639.1',
927 '71.0.3578.99',
928 '73.0.3639.0',
929 '72.0.3626.17',
930 '73.0.3638.2',
931 '72.0.3626.16',
932 '73.0.3638.1',
933 '73.0.3638.0',
934 '72.0.3626.15',
935 '71.0.3578.98',
936 '73.0.3635.2',
937 '71.0.3578.97',
938 '73.0.3637.1',
939 '73.0.3637.0',
940 '72.0.3626.14',
941 '71.0.3578.96',
942 '71.0.3578.95',
943 '72.0.3626.13',
944 '71.0.3578.94',
945 '73.0.3636.2',
946 '71.0.3578.93',
947 '73.0.3636.1',
948 '73.0.3636.0',
949 '72.0.3626.12',
950 '71.0.3578.92',
951 '73.0.3635.1',
952 '73.0.3635.0',
953 '72.0.3626.11',
954 '71.0.3578.91',
955 '73.0.3634.2',
956 '73.0.3634.1',
957 '73.0.3634.0',
958 '72.0.3626.10',
959 '71.0.3578.90',
960 '71.0.3578.89',
961 '73.0.3633.2',
962 '73.0.3633.1',
963 '73.0.3633.0',
964 '72.0.3610.4',
965 '72.0.3626.9',
966 '71.0.3578.88',
967 '73.0.3632.5',
968 '73.0.3632.4',
969 '73.0.3632.3',
970 '73.0.3632.2',
971 '73.0.3632.1',
972 '73.0.3632.0',
973 '72.0.3626.8',
974 '71.0.3578.87',
975 '73.0.3631.2',
976 '73.0.3631.1',
977 '73.0.3631.0',
978 '72.0.3626.7',
979 '71.0.3578.86',
980 '72.0.3626.6',
981 '73.0.3630.1',
982 '73.0.3630.0',
983 '72.0.3626.5',
984 '71.0.3578.85',
985 '72.0.3626.4',
986 '73.0.3628.3',
987 '73.0.3628.2',
988 '73.0.3629.1',
989 '73.0.3629.0',
990 '72.0.3626.3',
991 '71.0.3578.84',
992 '73.0.3628.1',
993 '73.0.3628.0',
994 '71.0.3578.83',
995 '73.0.3627.1',
996 '73.0.3627.0',
997 '72.0.3626.2',
998 '71.0.3578.82',
999 '71.0.3578.81',
1000 '71.0.3578.80',
1001 '72.0.3626.1',
1002 '72.0.3626.0',
1003 '71.0.3578.79',
1004 '70.0.3538.124',
1005 '71.0.3578.78',
1006 '72.0.3623.4',
1007 '72.0.3625.2',
1008 '72.0.3625.1',
1009 '72.0.3625.0',
1010 '71.0.3578.77',
1011 '70.0.3538.123',
1012 '72.0.3624.4',
1013 '72.0.3624.3',
1014 '72.0.3624.2',
1015 '71.0.3578.76',
1016 '72.0.3624.1',
1017 '72.0.3624.0',
1018 '72.0.3623.3',
1019 '71.0.3578.75',
1020 '70.0.3538.122',
1021 '71.0.3578.74',
1022 '72.0.3623.2',
1023 '72.0.3610.3',
1024 '72.0.3623.1',
1025 '72.0.3623.0',
1026 '72.0.3622.3',
1027 '72.0.3622.2',
1028 '71.0.3578.73',
1029 '70.0.3538.121',
1030 '72.0.3622.1',
1031 '72.0.3622.0',
1032 '71.0.3578.72',
1033 '70.0.3538.120',
1034 '72.0.3621.1',
1035 '72.0.3621.0',
1036 '71.0.3578.71',
1037 '70.0.3538.119',
1038 '72.0.3620.1',
1039 '72.0.3620.0',
1040 '71.0.3578.70',
1041 '70.0.3538.118',
1042 '71.0.3578.69',
1043 '72.0.3619.1',
1044 '72.0.3619.0',
1045 '71.0.3578.68',
1046 '70.0.3538.117',
1047 '71.0.3578.67',
1048 '72.0.3618.1',
1049 '72.0.3618.0',
1050 '71.0.3578.66',
1051 '70.0.3538.116',
1052 '72.0.3617.1',
1053 '72.0.3617.0',
1054 '71.0.3578.65',
1055 '70.0.3538.115',
1056 '72.0.3602.3',
1057 '71.0.3578.64',
1058 '72.0.3616.1',
1059 '72.0.3616.0',
1060 '71.0.3578.63',
1061 '70.0.3538.114',
1062 '71.0.3578.62',
1063 '72.0.3615.1',
1064 '72.0.3615.0',
1065 '71.0.3578.61',
1066 '70.0.3538.113',
1067 '72.0.3614.1',
1068 '72.0.3614.0',
1069 '71.0.3578.60',
1070 '70.0.3538.112',
1071 '72.0.3613.1',
1072 '72.0.3613.0',
1073 '71.0.3578.59',
1074 '70.0.3538.111',
1075 '72.0.3612.2',
1076 '72.0.3612.1',
1077 '72.0.3612.0',
1078 '70.0.3538.110',
1079 '71.0.3578.58',
1080 '70.0.3538.109',
1081 '72.0.3611.2',
1082 '72.0.3611.1',
1083 '72.0.3611.0',
1084 '71.0.3578.57',
1085 '70.0.3538.108',
1086 '72.0.3610.2',
1087 '71.0.3578.56',
1088 '71.0.3578.55',
1089 '72.0.3610.1',
1090 '72.0.3610.0',
1091 '71.0.3578.54',
1092 '70.0.3538.107',
1093 '71.0.3578.53',
1094 '72.0.3609.3',
1095 '71.0.3578.52',
1096 '72.0.3609.2',
1097 '71.0.3578.51',
1098 '72.0.3608.5',
1099 '72.0.3609.1',
1100 '72.0.3609.0',
1101 '71.0.3578.50',
1102 '70.0.3538.106',
1103 '72.0.3608.4',
1104 '72.0.3608.3',
1105 '72.0.3608.2',
1106 '71.0.3578.49',
1107 '72.0.3608.1',
1108 '72.0.3608.0',
1109 '70.0.3538.105',
1110 '71.0.3578.48',
1111 '72.0.3607.1',
1112 '72.0.3607.0',
1113 '71.0.3578.47',
1114 '70.0.3538.104',
1115 '72.0.3606.2',
1116 '72.0.3606.1',
1117 '72.0.3606.0',
1118 '71.0.3578.46',
1119 '70.0.3538.103',
1120 '70.0.3538.102',
1121 '72.0.3605.3',
1122 '72.0.3605.2',
1123 '72.0.3605.1',
1124 '72.0.3605.0',
1125 '71.0.3578.45',
1126 '70.0.3538.101',
1127 '71.0.3578.44',
1128 '71.0.3578.43',
1129 '70.0.3538.100',
1130 '70.0.3538.99',
1131 '71.0.3578.42',
1132 '72.0.3604.1',
1133 '72.0.3604.0',
1134 '71.0.3578.41',
1135 '70.0.3538.98',
1136 '71.0.3578.40',
1137 '72.0.3603.2',
1138 '72.0.3603.1',
1139 '72.0.3603.0',
1140 '71.0.3578.39',
1141 '70.0.3538.97',
1142 '72.0.3602.2',
1143 '71.0.3578.38',
1144 '71.0.3578.37',
1145 '72.0.3602.1',
1146 '72.0.3602.0',
1147 '71.0.3578.36',
1148 '70.0.3538.96',
1149 '72.0.3601.1',
1150 '72.0.3601.0',
1151 '71.0.3578.35',
1152 '70.0.3538.95',
1153 '72.0.3600.1',
1154 '72.0.3600.0',
1155 '71.0.3578.34',
1156 '70.0.3538.94',
1157 '72.0.3599.3',
1158 '72.0.3599.2',
1159 '72.0.3599.1',
1160 '72.0.3599.0',
1161 '71.0.3578.33',
1162 '70.0.3538.93',
1163 '72.0.3598.1',
1164 '72.0.3598.0',
1165 '71.0.3578.32',
1166 '70.0.3538.87',
1167 '72.0.3597.1',
1168 '72.0.3597.0',
1169 '72.0.3596.2',
1170 '71.0.3578.31',
1171 '70.0.3538.86',
1172 '71.0.3578.30',
1173 '71.0.3578.29',
1174 '72.0.3596.1',
1175 '72.0.3596.0',
1176 '71.0.3578.28',
1177 '70.0.3538.85',
1178 '72.0.3595.2',
1179 '72.0.3591.3',
1180 '72.0.3595.1',
1181 '72.0.3595.0',
1182 '71.0.3578.27',
1183 '70.0.3538.84',
1184 '72.0.3594.1',
1185 '72.0.3594.0',
1186 '71.0.3578.26',
1187 '70.0.3538.83',
1188 '72.0.3593.2',
1189 '72.0.3593.1',
1190 '72.0.3593.0',
1191 '71.0.3578.25',
1192 '70.0.3538.82',
1193 '72.0.3589.3',
1194 '72.0.3592.2',
1195 '72.0.3592.1',
1196 '72.0.3592.0',
1197 '71.0.3578.24',
1198 '72.0.3589.2',
1199 '70.0.3538.81',
1200 '70.0.3538.80',
1201 '72.0.3591.2',
1202 '72.0.3591.1',
1203 '72.0.3591.0',
1204 '71.0.3578.23',
1205 '70.0.3538.79',
1206 '71.0.3578.22',
1207 '72.0.3590.1',
1208 '72.0.3590.0',
1209 '71.0.3578.21',
1210 '70.0.3538.78',
1211 '70.0.3538.77',
1212 '72.0.3589.1',
1213 '72.0.3589.0',
1214 '71.0.3578.20',
1215 '70.0.3538.76',
1216 '71.0.3578.19',
1217 '70.0.3538.75',
1218 '72.0.3588.1',
1219 '72.0.3588.0',
1220 '71.0.3578.18',
1221 '70.0.3538.74',
1222 '72.0.3586.2',
1223 '72.0.3587.0',
1224 '71.0.3578.17',
1225 '70.0.3538.73',
1226 '72.0.3586.1',
1227 '72.0.3586.0',
1228 '71.0.3578.16',
1229 '70.0.3538.72',
1230 '72.0.3585.1',
1231 '72.0.3585.0',
1232 '71.0.3578.15',
1233 '70.0.3538.71',
1234 '71.0.3578.14',
1235 '72.0.3584.1',
1236 '72.0.3584.0',
1237 '71.0.3578.13',
1238 '70.0.3538.70',
1239 '72.0.3583.2',
1240 '71.0.3578.12',
1241 '72.0.3583.1',
1242 '72.0.3583.0',
1243 '71.0.3578.11',
1244 '70.0.3538.69',
1245 '71.0.3578.10',
1246 '72.0.3582.0',
1247 '72.0.3581.4',
1248 '71.0.3578.9',
1249 '70.0.3538.67',
1250 '72.0.3581.3',
1251 '72.0.3581.2',
1252 '72.0.3581.1',
1253 '72.0.3581.0',
1254 '71.0.3578.8',
1255 '70.0.3538.66',
1256 '72.0.3580.1',
1257 '72.0.3580.0',
1258 '71.0.3578.7',
1259 '70.0.3538.65',
1260 '71.0.3578.6',
1261 '72.0.3579.1',
1262 '72.0.3579.0',
1263 '71.0.3578.5',
1264 '70.0.3538.64',
1265 '71.0.3578.4',
1266 '71.0.3578.3',
1267 '71.0.3578.2',
1268 '71.0.3578.1',
1269 '71.0.3578.0',
1270 '70.0.3538.63',
1271 '69.0.3497.128',
1272 '70.0.3538.62',
1273 '70.0.3538.61',
1274 '70.0.3538.60',
1275 '70.0.3538.59',
1276 '71.0.3577.1',
1277 '71.0.3577.0',
1278 '70.0.3538.58',
1279 '69.0.3497.127',
1280 '71.0.3576.2',
1281 '71.0.3576.1',
1282 '71.0.3576.0',
1283 '70.0.3538.57',
1284 '70.0.3538.56',
1285 '71.0.3575.2',
1286 '70.0.3538.55',
1287 '69.0.3497.126',
1288 '70.0.3538.54',
1289 '71.0.3575.1',
1290 '71.0.3575.0',
1291 '71.0.3574.1',
1292 '71.0.3574.0',
1293 '70.0.3538.53',
1294 '69.0.3497.125',
1295 '70.0.3538.52',
1296 '71.0.3573.1',
1297 '71.0.3573.0',
1298 '70.0.3538.51',
1299 '69.0.3497.124',
1300 '71.0.3572.1',
1301 '71.0.3572.0',
1302 '70.0.3538.50',
1303 '69.0.3497.123',
1304 '71.0.3571.2',
1305 '70.0.3538.49',
1306 '69.0.3497.122',
1307 '71.0.3571.1',
1308 '71.0.3571.0',
1309 '70.0.3538.48',
1310 '69.0.3497.121',
1311 '71.0.3570.1',
1312 '71.0.3570.0',
1313 '70.0.3538.47',
1314 '69.0.3497.120',
1315 '71.0.3568.2',
1316 '71.0.3569.1',
1317 '71.0.3569.0',
1318 '70.0.3538.46',
1319 '69.0.3497.119',
1320 '70.0.3538.45',
1321 '71.0.3568.1',
1322 '71.0.3568.0',
1323 '70.0.3538.44',
1324 '69.0.3497.118',
1325 '70.0.3538.43',
1326 '70.0.3538.42',
1327 '71.0.3567.1',
1328 '71.0.3567.0',
1329 '70.0.3538.41',
1330 '69.0.3497.117',
1331 '71.0.3566.1',
1332 '71.0.3566.0',
1333 '70.0.3538.40',
1334 '69.0.3497.116',
1335 '71.0.3565.1',
1336 '71.0.3565.0',
1337 '70.0.3538.39',
1338 '69.0.3497.115',
1339 '71.0.3564.1',
1340 '71.0.3564.0',
1341 '70.0.3538.38',
1342 '69.0.3497.114',
1343 '71.0.3563.0',
1344 '71.0.3562.2',
1345 '70.0.3538.37',
1346 '69.0.3497.113',
1347 '70.0.3538.36',
1348 '70.0.3538.35',
1349 '71.0.3562.1',
1350 '71.0.3562.0',
1351 '70.0.3538.34',
1352 '69.0.3497.112',
1353 '70.0.3538.33',
1354 '71.0.3561.1',
1355 '71.0.3561.0',
1356 '70.0.3538.32',
1357 '69.0.3497.111',
1358 '71.0.3559.6',
1359 '71.0.3560.1',
1360 '71.0.3560.0',
1361 '71.0.3559.5',
1362 '71.0.3559.4',
1363 '70.0.3538.31',
1364 '69.0.3497.110',
1365 '71.0.3559.3',
1366 '70.0.3538.30',
1367 '69.0.3497.109',
1368 '71.0.3559.2',
1369 '71.0.3559.1',
1370 '71.0.3559.0',
1371 '70.0.3538.29',
1372 '69.0.3497.108',
1373 '71.0.3558.2',
1374 '71.0.3558.1',
1375 '71.0.3558.0',
1376 '70.0.3538.28',
1377 '69.0.3497.107',
1378 '71.0.3557.2',
1379 '71.0.3557.1',
1380 '71.0.3557.0',
1381 '70.0.3538.27',
1382 '69.0.3497.106',
1383 '71.0.3554.4',
1384 '70.0.3538.26',
1385 '71.0.3556.1',
1386 '71.0.3556.0',
1387 '70.0.3538.25',
1388 '71.0.3554.3',
1389 '69.0.3497.105',
1390 '71.0.3554.2',
1391 '70.0.3538.24',
1392 '69.0.3497.104',
1393 '71.0.3555.2',
1394 '70.0.3538.23',
1395 '71.0.3555.1',
1396 '71.0.3555.0',
1397 '70.0.3538.22',
1398 '69.0.3497.103',
1399 '71.0.3554.1',
1400 '71.0.3554.0',
1401 '70.0.3538.21',
1402 '69.0.3497.102',
1403 '71.0.3553.3',
1404 '70.0.3538.20',
1405 '69.0.3497.101',
1406 '71.0.3553.2',
1407 '69.0.3497.100',
1408 '71.0.3553.1',
1409 '71.0.3553.0',
1410 '70.0.3538.19',
1411 '69.0.3497.99',
1412 '69.0.3497.98',
1413 '69.0.3497.97',
1414 '71.0.3552.6',
1415 '71.0.3552.5',
1416 '71.0.3552.4',
1417 '71.0.3552.3',
1418 '71.0.3552.2',
1419 '71.0.3552.1',
1420 '71.0.3552.0',
1421 '70.0.3538.18',
1422 '69.0.3497.96',
1423 '71.0.3551.3',
1424 '71.0.3551.2',
1425 '71.0.3551.1',
1426 '71.0.3551.0',
1427 '70.0.3538.17',
1428 '69.0.3497.95',
1429 '71.0.3550.3',
1430 '71.0.3550.2',
1431 '71.0.3550.1',
1432 '71.0.3550.0',
1433 '70.0.3538.16',
1434 '69.0.3497.94',
1435 '71.0.3549.1',
1436 '71.0.3549.0',
1437 '70.0.3538.15',
1438 '69.0.3497.93',
1439 '69.0.3497.92',
1440 '71.0.3548.1',
1441 '71.0.3548.0',
1442 '70.0.3538.14',
1443 '69.0.3497.91',
1444 '71.0.3547.1',
1445 '71.0.3547.0',
1446 '70.0.3538.13',
1447 '69.0.3497.90',
1448 '71.0.3546.2',
1449 '69.0.3497.89',
1450 '71.0.3546.1',
1451 '71.0.3546.0',
1452 '70.0.3538.12',
1453 '69.0.3497.88',
1454 '71.0.3545.4',
1455 '71.0.3545.3',
1456 '71.0.3545.2',
1457 '71.0.3545.1',
1458 '71.0.3545.0',
1459 '70.0.3538.11',
1460 '69.0.3497.87',
1461 '71.0.3544.5',
1462 '71.0.3544.4',
1463 '71.0.3544.3',
1464 '71.0.3544.2',
1465 '71.0.3544.1',
1466 '71.0.3544.0',
1467 '69.0.3497.86',
1468 '70.0.3538.10',
1469 '69.0.3497.85',
1470 '70.0.3538.9',
1471 '69.0.3497.84',
1472 '71.0.3543.4',
1473 '70.0.3538.8',
1474 '71.0.3543.3',
1475 '71.0.3543.2',
1476 '71.0.3543.1',
1477 '71.0.3543.0',
1478 '70.0.3538.7',
1479 '69.0.3497.83',
1480 '71.0.3542.2',
1481 '71.0.3542.1',
1482 '71.0.3542.0',
1483 '70.0.3538.6',
1484 '69.0.3497.82',
1485 '69.0.3497.81',
1486 '71.0.3541.1',
1487 '71.0.3541.0',
1488 '70.0.3538.5',
1489 '69.0.3497.80',
1490 '71.0.3540.1',
1491 '71.0.3540.0',
1492 '70.0.3538.4',
1493 '69.0.3497.79',
1494 '70.0.3538.3',
1495 '71.0.3539.1',
1496 '71.0.3539.0',
1497 '69.0.3497.78',
1498 '68.0.3440.134',
1499 '69.0.3497.77',
1500 '70.0.3538.2',
1501 '70.0.3538.1',
1502 '70.0.3538.0',
1503 '69.0.3497.76',
1504 '68.0.3440.133',
1505 '69.0.3497.75',
1506 '70.0.3537.2',
1507 '70.0.3537.1',
1508 '70.0.3537.0',
1509 '69.0.3497.74',
1510 '68.0.3440.132',
1511 '70.0.3536.0',
1512 '70.0.3535.5',
1513 '70.0.3535.4',
1514 '70.0.3535.3',
1515 '69.0.3497.73',
1516 '68.0.3440.131',
1517 '70.0.3532.8',
1518 '70.0.3532.7',
1519 '69.0.3497.72',
1520 '69.0.3497.71',
1521 '70.0.3535.2',
1522 '70.0.3535.1',
1523 '70.0.3535.0',
1524 '69.0.3497.70',
1525 '68.0.3440.130',
1526 '69.0.3497.69',
1527 '68.0.3440.129',
1528 '70.0.3534.4',
1529 '70.0.3534.3',
1530 '70.0.3534.2',
1531 '70.0.3534.1',
1532 '70.0.3534.0',
1533 '69.0.3497.68',
1534 '68.0.3440.128',
1535 '70.0.3533.2',
1536 '70.0.3533.1',
1537 '70.0.3533.0',
1538 '69.0.3497.67',
1539 '68.0.3440.127',
1540 '70.0.3532.6',
1541 '70.0.3532.5',
1542 '70.0.3532.4',
1543 '69.0.3497.66',
1544 '68.0.3440.126',
1545 '70.0.3532.3',
1546 '70.0.3532.2',
1547 '70.0.3532.1',
1548 '69.0.3497.60',
1549 '69.0.3497.65',
1550 '69.0.3497.64',
1551 '70.0.3532.0',
1552 '70.0.3531.0',
1553 '70.0.3530.4',
1554 '70.0.3530.3',
1555 '70.0.3530.2',
1556 '69.0.3497.58',
1557 '68.0.3440.125',
1558 '69.0.3497.57',
1559 '69.0.3497.56',
1560 '69.0.3497.55',
1561 '69.0.3497.54',
1562 '70.0.3530.1',
1563 '70.0.3530.0',
1564 '69.0.3497.53',
1565 '68.0.3440.124',
1566 '69.0.3497.52',
1567 '70.0.3529.3',
1568 '70.0.3529.2',
1569 '70.0.3529.1',
1570 '70.0.3529.0',
1571 '69.0.3497.51',
1572 '70.0.3528.4',
1573 '68.0.3440.123',
1574 '70.0.3528.3',
1575 '70.0.3528.2',
1576 '70.0.3528.1',
1577 '70.0.3528.0',
1578 '69.0.3497.50',
1579 '68.0.3440.122',
1580 '70.0.3527.1',
1581 '70.0.3527.0',
1582 '69.0.3497.49',
1583 '68.0.3440.121',
1584 '70.0.3526.1',
1585 '70.0.3526.0',
1586 '68.0.3440.120',
1587 '69.0.3497.48',
1588 '69.0.3497.47',
1589 '68.0.3440.119',
1590 '68.0.3440.118',
1591 '70.0.3525.5',
1592 '70.0.3525.4',
1593 '70.0.3525.3',
1594 '68.0.3440.117',
1595 '69.0.3497.46',
1596 '70.0.3525.2',
1597 '70.0.3525.1',
1598 '70.0.3525.0',
1599 '69.0.3497.45',
1600 '68.0.3440.116',
1601 '70.0.3524.4',
1602 '70.0.3524.3',
1603 '69.0.3497.44',
1604 '70.0.3524.2',
1605 '70.0.3524.1',
1606 '70.0.3524.0',
1607 '70.0.3523.2',
1608 '69.0.3497.43',
1609 '68.0.3440.115',
1610 '70.0.3505.9',
1611 '69.0.3497.42',
1612 '70.0.3505.8',
1613 '70.0.3523.1',
1614 '70.0.3523.0',
1615 '69.0.3497.41',
1616 '68.0.3440.114',
1617 '70.0.3505.7',
1618 '69.0.3497.40',
1619 '70.0.3522.1',
1620 '70.0.3522.0',
1621 '70.0.3521.2',
1622 '69.0.3497.39',
1623 '68.0.3440.113',
1624 '70.0.3505.6',
1625 '70.0.3521.1',
1626 '70.0.3521.0',
1627 '69.0.3497.38',
1628 '68.0.3440.112',
1629 '70.0.3520.1',
1630 '70.0.3520.0',
1631 '69.0.3497.37',
1632 '68.0.3440.111',
1633 '70.0.3519.3',
1634 '70.0.3519.2',
1635 '70.0.3519.1',
1636 '70.0.3519.0',
1637 '69.0.3497.36',
1638 '68.0.3440.110',
1639 '70.0.3518.1',
1640 '70.0.3518.0',
1641 '69.0.3497.35',
1642 '69.0.3497.34',
1643 '68.0.3440.109',
1644 '70.0.3517.1',
1645 '70.0.3517.0',
1646 '69.0.3497.33',
1647 '68.0.3440.108',
1648 '69.0.3497.32',
1649 '70.0.3516.3',
1650 '70.0.3516.2',
1651 '70.0.3516.1',
1652 '70.0.3516.0',
1653 '69.0.3497.31',
1654 '68.0.3440.107',
1655 '70.0.3515.4',
1656 '68.0.3440.106',
1657 '70.0.3515.3',
1658 '70.0.3515.2',
1659 '70.0.3515.1',
1660 '70.0.3515.0',
1661 '69.0.3497.30',
1662 '68.0.3440.105',
1663 '68.0.3440.104',
1664 '70.0.3514.2',
1665 '70.0.3514.1',
1666 '70.0.3514.0',
1667 '69.0.3497.29',
1668 '68.0.3440.103',
1669 '70.0.3513.1',
1670 '70.0.3513.0',
1671 '69.0.3497.28',
1672 )
1673 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1674
1675
1676 std_headers = {
1677 'User-Agent': random_user_agent(),
1678 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1679 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1680 'Accept-Encoding': 'gzip, deflate',
1681 'Accept-Language': 'en-us,en;q=0.5',
1682 }
1683
1684
1685 USER_AGENTS = {
1686 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1687 }
1688
1689
1690 NO_DEFAULT = object()
1691
1692 ENGLISH_MONTH_NAMES = [
1693 'January', 'February', 'March', 'April', 'May', 'June',
1694 'July', 'August', 'September', 'October', 'November', 'December']
1695
1696 MONTH_NAMES = {
1697 'en': ENGLISH_MONTH_NAMES,
1698 'fr': [
1699 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1700 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1701 }
1702
1703 KNOWN_EXTENSIONS = (
1704 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1705 'flv', 'f4v', 'f4a', 'f4b',
1706 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1707 'mkv', 'mka', 'mk3d',
1708 'avi', 'divx',
1709 'mov',
1710 'asf', 'wmv', 'wma',
1711 '3gp', '3g2',
1712 'mp3',
1713 'flac',
1714 'ape',
1715 'wav',
1716 'f4f', 'f4m', 'm3u8', 'smil')
1717
1718 REMUX_EXTENSIONS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mp3', 'mka', 'm4a', 'ogg', 'opus')
1719
1720 # needed for sanitizing filenames in restricted mode
1721 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1722 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1723 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1724
1725 DATE_FORMATS = (
1726 '%d %B %Y',
1727 '%d %b %Y',
1728 '%B %d %Y',
1729 '%B %dst %Y',
1730 '%B %dnd %Y',
1731 '%B %drd %Y',
1732 '%B %dth %Y',
1733 '%b %d %Y',
1734 '%b %dst %Y',
1735 '%b %dnd %Y',
1736 '%b %drd %Y',
1737 '%b %dth %Y',
1738 '%b %dst %Y %I:%M',
1739 '%b %dnd %Y %I:%M',
1740 '%b %drd %Y %I:%M',
1741 '%b %dth %Y %I:%M',
1742 '%Y %m %d',
1743 '%Y-%m-%d',
1744 '%Y/%m/%d',
1745 '%Y/%m/%d %H:%M',
1746 '%Y/%m/%d %H:%M:%S',
1747 '%Y-%m-%d %H:%M',
1748 '%Y-%m-%d %H:%M:%S',
1749 '%Y-%m-%d %H:%M:%S.%f',
1750 '%d.%m.%Y %H:%M',
1751 '%d.%m.%Y %H.%M',
1752 '%Y-%m-%dT%H:%M:%SZ',
1753 '%Y-%m-%dT%H:%M:%S.%fZ',
1754 '%Y-%m-%dT%H:%M:%S.%f0Z',
1755 '%Y-%m-%dT%H:%M:%S',
1756 '%Y-%m-%dT%H:%M:%S.%f',
1757 '%Y-%m-%dT%H:%M',
1758 '%b %d %Y at %H:%M',
1759 '%b %d %Y at %H:%M:%S',
1760 '%B %d %Y at %H:%M',
1761 '%B %d %Y at %H:%M:%S',
1762 )
1763
1764 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1765 DATE_FORMATS_DAY_FIRST.extend([
1766 '%d-%m-%Y',
1767 '%d.%m.%Y',
1768 '%d.%m.%y',
1769 '%d/%m/%Y',
1770 '%d/%m/%y',
1771 '%d/%m/%Y %H:%M:%S',
1772 ])
1773
1774 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1775 DATE_FORMATS_MONTH_FIRST.extend([
1776 '%m-%d-%Y',
1777 '%m.%d.%Y',
1778 '%m/%d/%Y',
1779 '%m/%d/%y',
1780 '%m/%d/%Y %H:%M:%S',
1781 ])
1782
1783 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1784 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1785
1786
1787 def preferredencoding():
1788 """Get preferred encoding.
1789
1790 Returns the best encoding scheme for the system, based on
1791 locale.getpreferredencoding() and some further tweaks.
1792 """
1793 try:
1794 pref = locale.getpreferredencoding()
1795 'TEST'.encode(pref)
1796 except Exception:
1797 pref = 'UTF-8'
1798
1799 return pref
1800
1801
1802 def write_json_file(obj, fn):
1803 """ Encode obj as JSON and write it to fn, atomically if possible """
1804
1805 fn = encodeFilename(fn)
1806 if sys.version_info < (3, 0) and sys.platform != 'win32':
1807 encoding = get_filesystem_encoding()
1808 # os.path.basename returns a bytes object, but NamedTemporaryFile
1809 # will fail if the filename contains non ascii characters unless we
1810 # use a unicode object
1811 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1812 # the same for os.path.dirname
1813 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1814 else:
1815 path_basename = os.path.basename
1816 path_dirname = os.path.dirname
1817
1818 args = {
1819 'suffix': '.tmp',
1820 'prefix': path_basename(fn) + '.',
1821 'dir': path_dirname(fn),
1822 'delete': False,
1823 }
1824
1825 # In Python 2.x, json.dump expects a bytestream.
1826 # In Python 3.x, it writes to a character stream
1827 if sys.version_info < (3, 0):
1828 args['mode'] = 'wb'
1829 else:
1830 args.update({
1831 'mode': 'w',
1832 'encoding': 'utf-8',
1833 })
1834
1835 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1836
1837 try:
1838 with tf:
1839 json.dump(obj, tf, default=repr)
1840 if sys.platform == 'win32':
1841 # Need to remove existing file on Windows, else os.rename raises
1842 # WindowsError or FileExistsError.
1843 try:
1844 os.unlink(fn)
1845 except OSError:
1846 pass
1847 try:
1848 mask = os.umask(0)
1849 os.umask(mask)
1850 os.chmod(tf.name, 0o666 & ~mask)
1851 except OSError:
1852 pass
1853 os.rename(tf.name, fn)
1854 except Exception:
1855 try:
1856 os.remove(tf.name)
1857 except OSError:
1858 pass
1859 raise
1860
1861
1862 if sys.version_info >= (2, 7):
1863 def find_xpath_attr(node, xpath, key, val=None):
1864 """ Find the xpath xpath[@key=val] """
1865 assert re.match(r'^[a-zA-Z_-]+$', key)
1866 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1867 return node.find(expr)
1868 else:
1869 def find_xpath_attr(node, xpath, key, val=None):
1870 for f in node.findall(compat_xpath(xpath)):
1871 if key not in f.attrib:
1872 continue
1873 if val is None or f.attrib.get(key) == val:
1874 return f
1875 return None
1876
1877 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1878 # the namespace parameter
1879
1880
1881 def xpath_with_ns(path, ns_map):
1882 components = [c.split(':') for c in path.split('/')]
1883 replaced = []
1884 for c in components:
1885 if len(c) == 1:
1886 replaced.append(c[0])
1887 else:
1888 ns, tag = c
1889 replaced.append('{%s}%s' % (ns_map[ns], tag))
1890 return '/'.join(replaced)
1891
1892
1893 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1894 def _find_xpath(xpath):
1895 return node.find(compat_xpath(xpath))
1896
1897 if isinstance(xpath, (str, compat_str)):
1898 n = _find_xpath(xpath)
1899 else:
1900 for xp in xpath:
1901 n = _find_xpath(xp)
1902 if n is not None:
1903 break
1904
1905 if n is None:
1906 if default is not NO_DEFAULT:
1907 return default
1908 elif fatal:
1909 name = xpath if name is None else name
1910 raise ExtractorError('Could not find XML element %s' % name)
1911 else:
1912 return None
1913 return n
1914
1915
1916 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1917 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1918 if n is None or n == default:
1919 return n
1920 if n.text is None:
1921 if default is not NO_DEFAULT:
1922 return default
1923 elif fatal:
1924 name = xpath if name is None else name
1925 raise ExtractorError('Could not find XML element\'s text %s' % name)
1926 else:
1927 return None
1928 return n.text
1929
1930
1931 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1932 n = find_xpath_attr(node, xpath, key)
1933 if n is None:
1934 if default is not NO_DEFAULT:
1935 return default
1936 elif fatal:
1937 name = '%s[@%s]' % (xpath, key) if name is None else name
1938 raise ExtractorError('Could not find XML attribute %s' % name)
1939 else:
1940 return None
1941 return n.attrib[key]
1942
1943
1944 def get_element_by_id(id, html):
1945 """Return the content of the tag with the specified ID in the passed HTML document"""
1946 return get_element_by_attribute('id', id, html)
1947
1948
1949 def get_element_by_class(class_name, html):
1950 """Return the content of the first tag with the specified class in the passed HTML document"""
1951 retval = get_elements_by_class(class_name, html)
1952 return retval[0] if retval else None
1953
1954
1955 def get_element_by_attribute(attribute, value, html, escape_value=True):
1956 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1957 return retval[0] if retval else None
1958
1959
1960 def get_elements_by_class(class_name, html):
1961 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1962 return get_elements_by_attribute(
1963 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1964 html, escape_value=False)
1965
1966
1967 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1968 """Return the content of the tag with the specified attribute in the passed HTML document"""
1969
1970 value = re.escape(value) if escape_value else value
1971
1972 retlist = []
1973 for m in re.finditer(r'''(?xs)
1974 <([a-zA-Z0-9:._-]+)
1975 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1976 \s+%s=['"]?%s['"]?
1977 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1978 \s*>
1979 (?P<content>.*?)
1980 </\1>
1981 ''' % (re.escape(attribute), value), html):
1982 res = m.group('content')
1983
1984 if res.startswith('"') or res.startswith("'"):
1985 res = res[1:-1]
1986
1987 retlist.append(unescapeHTML(res))
1988
1989 return retlist
1990
1991
1992 class HTMLAttributeParser(compat_HTMLParser):
1993 """Trivial HTML parser to gather the attributes for a single element"""
1994
1995 def __init__(self):
1996 self.attrs = {}
1997 compat_HTMLParser.__init__(self)
1998
1999 def handle_starttag(self, tag, attrs):
2000 self.attrs = dict(attrs)
2001
2002
2003 def extract_attributes(html_element):
2004 """Given a string for an HTML element such as
2005 <el
2006 a="foo" B="bar" c="&98;az" d=boz
2007 empty= noval entity="&amp;"
2008 sq='"' dq="'"
2009 >
2010 Decode and return a dictionary of attributes.
2011 {
2012 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2013 'empty': '', 'noval': None, 'entity': '&',
2014 'sq': '"', 'dq': '\''
2015 }.
2016 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2017 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2018 """
2019 parser = HTMLAttributeParser()
2020 try:
2021 parser.feed(html_element)
2022 parser.close()
2023 # Older Python may throw HTMLParseError in case of malformed HTML
2024 except compat_HTMLParseError:
2025 pass
2026 return parser.attrs
2027
2028
2029 def clean_html(html):
2030 """Clean an HTML snippet into a readable string"""
2031
2032 if html is None: # Convenience for sanitizing descriptions etc.
2033 return html
2034
2035 # Newline vs <br />
2036 html = html.replace('\n', ' ')
2037 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2038 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2039 # Strip html tags
2040 html = re.sub('<.*?>', '', html)
2041 # Replace html entities
2042 html = unescapeHTML(html)
2043 return html.strip()
2044
2045
2046 def sanitize_open(filename, open_mode):
2047 """Try to open the given filename, and slightly tweak it if this fails.
2048
2049 Attempts to open the given filename. If this fails, it tries to change
2050 the filename slightly, step by step, until it's either able to open it
2051 or it fails and raises a final exception, like the standard open()
2052 function.
2053
2054 It returns the tuple (stream, definitive_file_name).
2055 """
2056 try:
2057 if filename == '-':
2058 if sys.platform == 'win32':
2059 import msvcrt
2060 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2061 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2062 stream = open(encodeFilename(filename), open_mode)
2063 return (stream, filename)
2064 except (IOError, OSError) as err:
2065 if err.errno in (errno.EACCES,):
2066 raise
2067
2068 # In case of error, try to remove win32 forbidden chars
2069 alt_filename = sanitize_path(filename)
2070 if alt_filename == filename:
2071 raise
2072 else:
2073 # An exception here should be caught in the caller
2074 stream = open(encodeFilename(alt_filename), open_mode)
2075 return (stream, alt_filename)
2076
2077
2078 def timeconvert(timestr):
2079 """Convert RFC 2822 defined time string into system timestamp"""
2080 timestamp = None
2081 timetuple = email.utils.parsedate_tz(timestr)
2082 if timetuple is not None:
2083 timestamp = email.utils.mktime_tz(timetuple)
2084 return timestamp
2085
2086
2087 def sanitize_filename(s, restricted=False, is_id=False):
2088 """Sanitizes a string so it could be used as part of a filename.
2089 If restricted is set, use a stricter subset of allowed characters.
2090 Set is_id if this is not an arbitrary string, but an ID that should be kept
2091 if possible.
2092 """
2093 def replace_insane(char):
2094 if restricted and char in ACCENT_CHARS:
2095 return ACCENT_CHARS[char]
2096 if char == '?' or ord(char) < 32 or ord(char) == 127:
2097 return ''
2098 elif char == '"':
2099 return '' if restricted else '\''
2100 elif char == ':':
2101 return '_-' if restricted else ' -'
2102 elif char in '\\/|*<>':
2103 return '_'
2104 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2105 return '_'
2106 if restricted and ord(char) > 127:
2107 return '_'
2108 return char
2109
2110 # Handle timestamps
2111 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2112 result = ''.join(map(replace_insane, s))
2113 if not is_id:
2114 while '__' in result:
2115 result = result.replace('__', '_')
2116 result = result.strip('_')
2117 # Common case of "Foreign band name - English song title"
2118 if restricted and result.startswith('-_'):
2119 result = result[2:]
2120 if result.startswith('-'):
2121 result = '_' + result[len('-'):]
2122 result = result.lstrip('.')
2123 if not result:
2124 result = '_'
2125 return result
2126
2127
2128 def sanitize_path(s, force=False):
2129 """Sanitizes and normalizes path on Windows"""
2130 if sys.platform == 'win32':
2131 force = False
2132 drive_or_unc, _ = os.path.splitdrive(s)
2133 if sys.version_info < (2, 7) and not drive_or_unc:
2134 drive_or_unc, _ = os.path.splitunc(s)
2135 elif force:
2136 drive_or_unc = ''
2137 else:
2138 return s
2139
2140 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2141 if drive_or_unc:
2142 norm_path.pop(0)
2143 sanitized_path = [
2144 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2145 for path_part in norm_path]
2146 if drive_or_unc:
2147 sanitized_path.insert(0, drive_or_unc + os.path.sep)
2148 elif force and s[0] == os.path.sep:
2149 sanitized_path.insert(0, os.path.sep)
2150 return os.path.join(*sanitized_path)
2151
2152
2153 def sanitize_url(url):
2154 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2155 # the number of unwanted failures due to missing protocol
2156 if url.startswith('//'):
2157 return 'http:%s' % url
2158 # Fix some common typos seen so far
2159 COMMON_TYPOS = (
2160 # https://github.com/ytdl-org/youtube-dl/issues/15649
2161 (r'^httpss://', r'https://'),
2162 # https://bx1.be/lives/direct-tv/
2163 (r'^rmtp([es]?)://', r'rtmp\1://'),
2164 )
2165 for mistake, fixup in COMMON_TYPOS:
2166 if re.match(mistake, url):
2167 return re.sub(mistake, fixup, url)
2168 return url
2169
2170
2171 def sanitized_Request(url, *args, **kwargs):
2172 return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2173
2174
2175 def expand_path(s):
2176 """Expand shell variables and ~"""
2177 return os.path.expandvars(compat_expanduser(s))
2178
2179
2180 def orderedSet(iterable):
2181 """ Remove all duplicates from the input iterable """
2182 res = []
2183 for el in iterable:
2184 if el not in res:
2185 res.append(el)
2186 return res
2187
2188
2189 def _htmlentity_transform(entity_with_semicolon):
2190 """Transforms an HTML entity to a character."""
2191 entity = entity_with_semicolon[:-1]
2192
2193 # Known non-numeric HTML entity
2194 if entity in compat_html_entities.name2codepoint:
2195 return compat_chr(compat_html_entities.name2codepoint[entity])
2196
2197 # TODO: HTML5 allows entities without a semicolon. For example,
2198 # '&Eacuteric' should be decoded as 'Éric'.
2199 if entity_with_semicolon in compat_html_entities_html5:
2200 return compat_html_entities_html5[entity_with_semicolon]
2201
2202 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2203 if mobj is not None:
2204 numstr = mobj.group(1)
2205 if numstr.startswith('x'):
2206 base = 16
2207 numstr = '0%s' % numstr
2208 else:
2209 base = 10
2210 # See https://github.com/ytdl-org/youtube-dl/issues/7518
2211 try:
2212 return compat_chr(int(numstr, base))
2213 except ValueError:
2214 pass
2215
2216 # Unknown entity in name, return its literal representation
2217 return '&%s;' % entity
2218
2219
2220 def unescapeHTML(s):
2221 if s is None:
2222 return None
2223 assert type(s) == compat_str
2224
2225 return re.sub(
2226 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2227
2228
2229 def process_communicate_or_kill(p, *args, **kwargs):
2230 try:
2231 return p.communicate(*args, **kwargs)
2232 except BaseException: # Including KeyboardInterrupt
2233 p.kill()
2234 p.wait()
2235 raise
2236
2237
2238 def get_subprocess_encoding():
2239 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2240 # For subprocess calls, encode with locale encoding
2241 # Refer to http://stackoverflow.com/a/9951851/35070
2242 encoding = preferredencoding()
2243 else:
2244 encoding = sys.getfilesystemencoding()
2245 if encoding is None:
2246 encoding = 'utf-8'
2247 return encoding
2248
2249
2250 def encodeFilename(s, for_subprocess=False):
2251 """
2252 @param s The name of the file
2253 """
2254
2255 assert type(s) == compat_str
2256
2257 # Python 3 has a Unicode API
2258 if sys.version_info >= (3, 0):
2259 return s
2260
2261 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2262 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2263 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2264 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2265 return s
2266
2267 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2268 if sys.platform.startswith('java'):
2269 return s
2270
2271 return s.encode(get_subprocess_encoding(), 'ignore')
2272
2273
2274 def decodeFilename(b, for_subprocess=False):
2275
2276 if sys.version_info >= (3, 0):
2277 return b
2278
2279 if not isinstance(b, bytes):
2280 return b
2281
2282 return b.decode(get_subprocess_encoding(), 'ignore')
2283
2284
2285 def encodeArgument(s):
2286 if not isinstance(s, compat_str):
2287 # Legacy code that uses byte strings
2288 # Uncomment the following line after fixing all post processors
2289 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2290 s = s.decode('ascii')
2291 return encodeFilename(s, True)
2292
2293
2294 def decodeArgument(b):
2295 return decodeFilename(b, True)
2296
2297
2298 def decodeOption(optval):
2299 if optval is None:
2300 return optval
2301 if isinstance(optval, bytes):
2302 optval = optval.decode(preferredencoding())
2303
2304 assert isinstance(optval, compat_str)
2305 return optval
2306
2307
2308 def formatSeconds(secs, delim=':'):
2309 if secs > 3600:
2310 return '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
2311 elif secs > 60:
2312 return '%d%s%02d' % (secs // 60, delim, secs % 60)
2313 else:
2314 return '%d' % secs
2315
2316
2317 def make_HTTPS_handler(params, **kwargs):
2318 opts_no_check_certificate = params.get('nocheckcertificate', False)
2319 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
2320 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2321 if opts_no_check_certificate:
2322 context.check_hostname = False
2323 context.verify_mode = ssl.CERT_NONE
2324 try:
2325 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2326 except TypeError:
2327 # Python 2.7.8
2328 # (create_default_context present but HTTPSHandler has no context=)
2329 pass
2330
2331 if sys.version_info < (3, 2):
2332 return YoutubeDLHTTPSHandler(params, **kwargs)
2333 else: # Python < 3.4
2334 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2335 context.verify_mode = (ssl.CERT_NONE
2336 if opts_no_check_certificate
2337 else ssl.CERT_REQUIRED)
2338 context.set_default_verify_paths()
2339 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2340
2341
2342 def bug_reports_message():
2343 if ytdl_is_updateable():
2344 update_cmd = 'type yt-dlp -U to update'
2345 else:
2346 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
2347 msg = '; please report this issue on https://github.com/yt-dlp/yt-dlp .'
2348 msg += ' Make sure you are using the latest version; %s.' % update_cmd
2349 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
2350 return msg
2351
2352
2353 class YoutubeDLError(Exception):
2354 """Base exception for YoutubeDL errors."""
2355 pass
2356
2357
2358 class ExtractorError(YoutubeDLError):
2359 """Error during info extraction."""
2360
2361 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2362 """ tb, if given, is the original traceback (so that it can be printed out).
2363 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
2364 """
2365
2366 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2367 expected = True
2368 if video_id is not None:
2369 msg = video_id + ': ' + msg
2370 if cause:
2371 msg += ' (caused by %r)' % cause
2372 if not expected:
2373 msg += bug_reports_message()
2374 super(ExtractorError, self).__init__(msg)
2375
2376 self.traceback = tb
2377 self.exc_info = sys.exc_info() # preserve original exception
2378 self.cause = cause
2379 self.video_id = video_id
2380
2381 def format_traceback(self):
2382 if self.traceback is None:
2383 return None
2384 return ''.join(traceback.format_tb(self.traceback))
2385
2386
2387 class UnsupportedError(ExtractorError):
2388 def __init__(self, url):
2389 super(UnsupportedError, self).__init__(
2390 'Unsupported URL: %s' % url, expected=True)
2391 self.url = url
2392
2393
2394 class RegexNotFoundError(ExtractorError):
2395 """Error when a regex didn't match"""
2396 pass
2397
2398
2399 class GeoRestrictedError(ExtractorError):
2400 """Geographic restriction Error exception.
2401
2402 This exception may be thrown when a video is not available from your
2403 geographic location due to geographic restrictions imposed by a website.
2404 """
2405
2406 def __init__(self, msg, countries=None):
2407 super(GeoRestrictedError, self).__init__(msg, expected=True)
2408 self.msg = msg
2409 self.countries = countries
2410
2411
2412 class DownloadError(YoutubeDLError):
2413 """Download Error exception.
2414
2415 This exception may be thrown by FileDownloader objects if they are not
2416 configured to continue on errors. They will contain the appropriate
2417 error message.
2418 """
2419
2420 def __init__(self, msg, exc_info=None):
2421 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2422 super(DownloadError, self).__init__(msg)
2423 self.exc_info = exc_info
2424
2425
2426 class EntryNotInPlaylist(YoutubeDLError):
2427 """Entry not in playlist exception.
2428
2429 This exception will be thrown by YoutubeDL when a requested entry
2430 is not found in the playlist info_dict
2431 """
2432 pass
2433
2434
2435 class SameFileError(YoutubeDLError):
2436 """Same File exception.
2437
2438 This exception will be thrown by FileDownloader objects if they detect
2439 multiple files would have to be downloaded to the same file on disk.
2440 """
2441 pass
2442
2443
2444 class PostProcessingError(YoutubeDLError):
2445 """Post Processing exception.
2446
2447 This exception may be raised by PostProcessor's .run() method to
2448 indicate an error in the postprocessing task.
2449 """
2450
2451 def __init__(self, msg):
2452 super(PostProcessingError, self).__init__(msg)
2453 self.msg = msg
2454
2455
2456 class ExistingVideoReached(YoutubeDLError):
2457 """ --max-downloads limit has been reached. """
2458 pass
2459
2460
2461 class RejectedVideoReached(YoutubeDLError):
2462 """ --max-downloads limit has been reached. """
2463 pass
2464
2465
2466 class MaxDownloadsReached(YoutubeDLError):
2467 """ --max-downloads limit has been reached. """
2468 pass
2469
2470
2471 class UnavailableVideoError(YoutubeDLError):
2472 """Unavailable Format exception.
2473
2474 This exception will be thrown when a video is requested
2475 in a format that is not available for that video.
2476 """
2477 pass
2478
2479
2480 class ContentTooShortError(YoutubeDLError):
2481 """Content Too Short exception.
2482
2483 This exception may be raised by FileDownloader objects when a file they
2484 download is too small for what the server announced first, indicating
2485 the connection was probably interrupted.
2486 """
2487
2488 def __init__(self, downloaded, expected):
2489 super(ContentTooShortError, self).__init__(
2490 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2491 )
2492 # Both in bytes
2493 self.downloaded = downloaded
2494 self.expected = expected
2495
2496
2497 class XAttrMetadataError(YoutubeDLError):
2498 def __init__(self, code=None, msg='Unknown error'):
2499 super(XAttrMetadataError, self).__init__(msg)
2500 self.code = code
2501 self.msg = msg
2502
2503 # Parsing code and msg
2504 if (self.code in (errno.ENOSPC, errno.EDQUOT)
2505 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2506 self.reason = 'NO_SPACE'
2507 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2508 self.reason = 'VALUE_TOO_LONG'
2509 else:
2510 self.reason = 'NOT_SUPPORTED'
2511
2512
2513 class XAttrUnavailableError(YoutubeDLError):
2514 pass
2515
2516
2517 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2518 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2519 # expected HTTP responses to meet HTTP/1.0 or later (see also
2520 # https://github.com/ytdl-org/youtube-dl/issues/6727)
2521 if sys.version_info < (3, 0):
2522 kwargs['strict'] = True
2523 hc = http_class(*args, **compat_kwargs(kwargs))
2524 source_address = ydl_handler._params.get('source_address')
2525
2526 if source_address is not None:
2527 # This is to workaround _create_connection() from socket where it will try all
2528 # address data from getaddrinfo() including IPv6. This filters the result from
2529 # getaddrinfo() based on the source_address value.
2530 # This is based on the cpython socket.create_connection() function.
2531 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2532 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2533 host, port = address
2534 err = None
2535 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2536 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2537 ip_addrs = [addr for addr in addrs if addr[0] == af]
2538 if addrs and not ip_addrs:
2539 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2540 raise socket.error(
2541 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2542 % (ip_version, source_address[0]))
2543 for res in ip_addrs:
2544 af, socktype, proto, canonname, sa = res
2545 sock = None
2546 try:
2547 sock = socket.socket(af, socktype, proto)
2548 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2549 sock.settimeout(timeout)
2550 sock.bind(source_address)
2551 sock.connect(sa)
2552 err = None # Explicitly break reference cycle
2553 return sock
2554 except socket.error as _:
2555 err = _
2556 if sock is not None:
2557 sock.close()
2558 if err is not None:
2559 raise err
2560 else:
2561 raise socket.error('getaddrinfo returns an empty list')
2562 if hasattr(hc, '_create_connection'):
2563 hc._create_connection = _create_connection
2564 sa = (source_address, 0)
2565 if hasattr(hc, 'source_address'): # Python 2.7+
2566 hc.source_address = sa
2567 else: # Python 2.6
2568 def _hc_connect(self, *args, **kwargs):
2569 sock = _create_connection(
2570 (self.host, self.port), self.timeout, sa)
2571 if is_https:
2572 self.sock = ssl.wrap_socket(
2573 sock, self.key_file, self.cert_file,
2574 ssl_version=ssl.PROTOCOL_TLSv1)
2575 else:
2576 self.sock = sock
2577 hc.connect = functools.partial(_hc_connect, hc)
2578
2579 return hc
2580
2581
2582 def handle_youtubedl_headers(headers):
2583 filtered_headers = headers
2584
2585 if 'Youtubedl-no-compression' in filtered_headers:
2586 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2587 del filtered_headers['Youtubedl-no-compression']
2588
2589 return filtered_headers
2590
2591
2592 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2593 """Handler for HTTP requests and responses.
2594
2595 This class, when installed with an OpenerDirector, automatically adds
2596 the standard headers to every HTTP request and handles gzipped and
2597 deflated responses from web servers. If compression is to be avoided in
2598 a particular request, the original request in the program code only has
2599 to include the HTTP header "Youtubedl-no-compression", which will be
2600 removed before making the real request.
2601
2602 Part of this code was copied from:
2603
2604 http://techknack.net/python-urllib2-handlers/
2605
2606 Andrew Rowls, the author of that code, agreed to release it to the
2607 public domain.
2608 """
2609
2610 def __init__(self, params, *args, **kwargs):
2611 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2612 self._params = params
2613
2614 def http_open(self, req):
2615 conn_class = compat_http_client.HTTPConnection
2616
2617 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2618 if socks_proxy:
2619 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2620 del req.headers['Ytdl-socks-proxy']
2621
2622 return self.do_open(functools.partial(
2623 _create_http_connection, self, conn_class, False),
2624 req)
2625
2626 @staticmethod
2627 def deflate(data):
2628 if not data:
2629 return data
2630 try:
2631 return zlib.decompress(data, -zlib.MAX_WBITS)
2632 except zlib.error:
2633 return zlib.decompress(data)
2634
2635 def http_request(self, req):
2636 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2637 # always respected by websites, some tend to give out URLs with non percent-encoded
2638 # non-ASCII characters (see telemb.py, ard.py [#3412])
2639 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2640 # To work around aforementioned issue we will replace request's original URL with
2641 # percent-encoded one
2642 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2643 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2644 url = req.get_full_url()
2645 url_escaped = escape_url(url)
2646
2647 # Substitute URL if any change after escaping
2648 if url != url_escaped:
2649 req = update_Request(req, url=url_escaped)
2650
2651 for h, v in std_headers.items():
2652 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2653 # The dict keys are capitalized because of this bug by urllib
2654 if h.capitalize() not in req.headers:
2655 req.add_header(h, v)
2656
2657 req.headers = handle_youtubedl_headers(req.headers)
2658
2659 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2660 # Python 2.6 is brain-dead when it comes to fragments
2661 req._Request__original = req._Request__original.partition('#')[0]
2662 req._Request__r_type = req._Request__r_type.partition('#')[0]
2663
2664 return req
2665
2666 def http_response(self, req, resp):
2667 old_resp = resp
2668 # gzip
2669 if resp.headers.get('Content-encoding', '') == 'gzip':
2670 content = resp.read()
2671 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2672 try:
2673 uncompressed = io.BytesIO(gz.read())
2674 except IOError as original_ioerror:
2675 # There may be junk add the end of the file
2676 # See http://stackoverflow.com/q/4928560/35070 for details
2677 for i in range(1, 1024):
2678 try:
2679 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2680 uncompressed = io.BytesIO(gz.read())
2681 except IOError:
2682 continue
2683 break
2684 else:
2685 raise original_ioerror
2686 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2687 resp.msg = old_resp.msg
2688 del resp.headers['Content-encoding']
2689 # deflate
2690 if resp.headers.get('Content-encoding', '') == 'deflate':
2691 gz = io.BytesIO(self.deflate(resp.read()))
2692 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2693 resp.msg = old_resp.msg
2694 del resp.headers['Content-encoding']
2695 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2696 # https://github.com/ytdl-org/youtube-dl/issues/6457).
2697 if 300 <= resp.code < 400:
2698 location = resp.headers.get('Location')
2699 if location:
2700 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2701 if sys.version_info >= (3, 0):
2702 location = location.encode('iso-8859-1').decode('utf-8')
2703 else:
2704 location = location.decode('utf-8')
2705 location_escaped = escape_url(location)
2706 if location != location_escaped:
2707 del resp.headers['Location']
2708 if sys.version_info < (3, 0):
2709 location_escaped = location_escaped.encode('utf-8')
2710 resp.headers['Location'] = location_escaped
2711 return resp
2712
2713 https_request = http_request
2714 https_response = http_response
2715
2716
2717 def make_socks_conn_class(base_class, socks_proxy):
2718 assert issubclass(base_class, (
2719 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2720
2721 url_components = compat_urlparse.urlparse(socks_proxy)
2722 if url_components.scheme.lower() == 'socks5':
2723 socks_type = ProxyType.SOCKS5
2724 elif url_components.scheme.lower() in ('socks', 'socks4'):
2725 socks_type = ProxyType.SOCKS4
2726 elif url_components.scheme.lower() == 'socks4a':
2727 socks_type = ProxyType.SOCKS4A
2728
2729 def unquote_if_non_empty(s):
2730 if not s:
2731 return s
2732 return compat_urllib_parse_unquote_plus(s)
2733
2734 proxy_args = (
2735 socks_type,
2736 url_components.hostname, url_components.port or 1080,
2737 True, # Remote DNS
2738 unquote_if_non_empty(url_components.username),
2739 unquote_if_non_empty(url_components.password),
2740 )
2741
2742 class SocksConnection(base_class):
2743 def connect(self):
2744 self.sock = sockssocket()
2745 self.sock.setproxy(*proxy_args)
2746 if type(self.timeout) in (int, float):
2747 self.sock.settimeout(self.timeout)
2748 self.sock.connect((self.host, self.port))
2749
2750 if isinstance(self, compat_http_client.HTTPSConnection):
2751 if hasattr(self, '_context'): # Python > 2.6
2752 self.sock = self._context.wrap_socket(
2753 self.sock, server_hostname=self.host)
2754 else:
2755 self.sock = ssl.wrap_socket(self.sock)
2756
2757 return SocksConnection
2758
2759
2760 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2761 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2762 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2763 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2764 self._params = params
2765
2766 def https_open(self, req):
2767 kwargs = {}
2768 conn_class = self._https_conn_class
2769
2770 if hasattr(self, '_context'): # python > 2.6
2771 kwargs['context'] = self._context
2772 if hasattr(self, '_check_hostname'): # python 3.x
2773 kwargs['check_hostname'] = self._check_hostname
2774
2775 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2776 if socks_proxy:
2777 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2778 del req.headers['Ytdl-socks-proxy']
2779
2780 return self.do_open(functools.partial(
2781 _create_http_connection, self, conn_class, True),
2782 req, **kwargs)
2783
2784
2785 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2786 """
2787 See [1] for cookie file format.
2788
2789 1. https://curl.haxx.se/docs/http-cookies.html
2790 """
2791 _HTTPONLY_PREFIX = '#HttpOnly_'
2792 _ENTRY_LEN = 7
2793 _HEADER = '''# Netscape HTTP Cookie File
2794 # This file is generated by yt-dlp. Do not edit.
2795
2796 '''
2797 _CookieFileEntry = collections.namedtuple(
2798 'CookieFileEntry',
2799 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2800
2801 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2802 """
2803 Save cookies to a file.
2804
2805 Most of the code is taken from CPython 3.8 and slightly adapted
2806 to support cookie files with UTF-8 in both python 2 and 3.
2807 """
2808 if filename is None:
2809 if self.filename is not None:
2810 filename = self.filename
2811 else:
2812 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2813
2814 # Store session cookies with `expires` set to 0 instead of an empty
2815 # string
2816 for cookie in self:
2817 if cookie.expires is None:
2818 cookie.expires = 0
2819
2820 with io.open(filename, 'w', encoding='utf-8') as f:
2821 f.write(self._HEADER)
2822 now = time.time()
2823 for cookie in self:
2824 if not ignore_discard and cookie.discard:
2825 continue
2826 if not ignore_expires and cookie.is_expired(now):
2827 continue
2828 if cookie.secure:
2829 secure = 'TRUE'
2830 else:
2831 secure = 'FALSE'
2832 if cookie.domain.startswith('.'):
2833 initial_dot = 'TRUE'
2834 else:
2835 initial_dot = 'FALSE'
2836 if cookie.expires is not None:
2837 expires = compat_str(cookie.expires)
2838 else:
2839 expires = ''
2840 if cookie.value is None:
2841 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2842 # with no name, whereas http.cookiejar regards it as a
2843 # cookie with no value.
2844 name = ''
2845 value = cookie.name
2846 else:
2847 name = cookie.name
2848 value = cookie.value
2849 f.write(
2850 '\t'.join([cookie.domain, initial_dot, cookie.path,
2851 secure, expires, name, value]) + '\n')
2852
2853 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2854 """Load cookies from a file."""
2855 if filename is None:
2856 if self.filename is not None:
2857 filename = self.filename
2858 else:
2859 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2860
2861 def prepare_line(line):
2862 if line.startswith(self._HTTPONLY_PREFIX):
2863 line = line[len(self._HTTPONLY_PREFIX):]
2864 # comments and empty lines are fine
2865 if line.startswith('#') or not line.strip():
2866 return line
2867 cookie_list = line.split('\t')
2868 if len(cookie_list) != self._ENTRY_LEN:
2869 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2870 cookie = self._CookieFileEntry(*cookie_list)
2871 if cookie.expires_at and not cookie.expires_at.isdigit():
2872 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2873 return line
2874
2875 cf = io.StringIO()
2876 with io.open(filename, encoding='utf-8') as f:
2877 for line in f:
2878 try:
2879 cf.write(prepare_line(line))
2880 except compat_cookiejar.LoadError as e:
2881 write_string(
2882 'WARNING: skipping cookie file entry due to %s: %r\n'
2883 % (e, line), sys.stderr)
2884 continue
2885 cf.seek(0)
2886 self._really_load(cf, filename, ignore_discard, ignore_expires)
2887 # Session cookies are denoted by either `expires` field set to
2888 # an empty string or 0. MozillaCookieJar only recognizes the former
2889 # (see [1]). So we need force the latter to be recognized as session
2890 # cookies on our own.
2891 # Session cookies may be important for cookies-based authentication,
2892 # e.g. usually, when user does not check 'Remember me' check box while
2893 # logging in on a site, some important cookies are stored as session
2894 # cookies so that not recognizing them will result in failed login.
2895 # 1. https://bugs.python.org/issue17164
2896 for cookie in self:
2897 # Treat `expires=0` cookies as session cookies
2898 if cookie.expires == 0:
2899 cookie.expires = None
2900 cookie.discard = True
2901
2902
2903 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2904 def __init__(self, cookiejar=None):
2905 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2906
2907 def http_response(self, request, response):
2908 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2909 # characters in Set-Cookie HTTP header of last response (see
2910 # https://github.com/ytdl-org/youtube-dl/issues/6769).
2911 # In order to at least prevent crashing we will percent encode Set-Cookie
2912 # header before HTTPCookieProcessor starts processing it.
2913 # if sys.version_info < (3, 0) and response.headers:
2914 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2915 # set_cookie = response.headers.get(set_cookie_header)
2916 # if set_cookie:
2917 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2918 # if set_cookie != set_cookie_escaped:
2919 # del response.headers[set_cookie_header]
2920 # response.headers[set_cookie_header] = set_cookie_escaped
2921 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2922
2923 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2924 https_response = http_response
2925
2926
2927 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2928 if sys.version_info[0] < 3:
2929 def redirect_request(self, req, fp, code, msg, headers, newurl):
2930 # On python 2 urlh.geturl() may sometimes return redirect URL
2931 # as byte string instead of unicode. This workaround allows
2932 # to force it always return unicode.
2933 return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2934
2935
2936 def extract_timezone(date_str):
2937 m = re.search(
2938 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2939 date_str)
2940 if not m:
2941 timezone = datetime.timedelta()
2942 else:
2943 date_str = date_str[:-len(m.group('tz'))]
2944 if not m.group('sign'):
2945 timezone = datetime.timedelta()
2946 else:
2947 sign = 1 if m.group('sign') == '+' else -1
2948 timezone = datetime.timedelta(
2949 hours=sign * int(m.group('hours')),
2950 minutes=sign * int(m.group('minutes')))
2951 return timezone, date_str
2952
2953
2954 def parse_iso8601(date_str, delimiter='T', timezone=None):
2955 """ Return a UNIX timestamp from the given date """
2956
2957 if date_str is None:
2958 return None
2959
2960 date_str = re.sub(r'\.[0-9]+', '', date_str)
2961
2962 if timezone is None:
2963 timezone, date_str = extract_timezone(date_str)
2964
2965 try:
2966 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2967 dt = datetime.datetime.strptime(date_str, date_format) - timezone
2968 return calendar.timegm(dt.timetuple())
2969 except ValueError:
2970 pass
2971
2972
2973 def date_formats(day_first=True):
2974 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2975
2976
2977 def unified_strdate(date_str, day_first=True):
2978 """Return a string with the date in the format YYYYMMDD"""
2979
2980 if date_str is None:
2981 return None
2982 upload_date = None
2983 # Replace commas
2984 date_str = date_str.replace(',', ' ')
2985 # Remove AM/PM + timezone
2986 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2987 _, date_str = extract_timezone(date_str)
2988
2989 for expression in date_formats(day_first):
2990 try:
2991 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
2992 except ValueError:
2993 pass
2994 if upload_date is None:
2995 timetuple = email.utils.parsedate_tz(date_str)
2996 if timetuple:
2997 try:
2998 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2999 except ValueError:
3000 pass
3001 if upload_date is not None:
3002 return compat_str(upload_date)
3003
3004
3005 def unified_timestamp(date_str, day_first=True):
3006 if date_str is None:
3007 return None
3008
3009 date_str = re.sub(r'[,|]', '', date_str)
3010
3011 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
3012 timezone, date_str = extract_timezone(date_str)
3013
3014 # Remove AM/PM + timezone
3015 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3016
3017 # Remove unrecognized timezones from ISO 8601 alike timestamps
3018 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3019 if m:
3020 date_str = date_str[:-len(m.group('tz'))]
3021
3022 # Python only supports microseconds, so remove nanoseconds
3023 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3024 if m:
3025 date_str = m.group(1)
3026
3027 for expression in date_formats(day_first):
3028 try:
3029 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3030 return calendar.timegm(dt.timetuple())
3031 except ValueError:
3032 pass
3033 timetuple = email.utils.parsedate_tz(date_str)
3034 if timetuple:
3035 return calendar.timegm(timetuple) + pm_delta * 3600
3036
3037
3038 def determine_ext(url, default_ext='unknown_video'):
3039 if url is None or '.' not in url:
3040 return default_ext
3041 guess = url.partition('?')[0].rpartition('.')[2]
3042 if re.match(r'^[A-Za-z0-9]+$', guess):
3043 return guess
3044 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3045 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3046 return guess.rstrip('/')
3047 else:
3048 return default_ext
3049
3050
3051 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3052 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3053
3054
3055 def date_from_str(date_str):
3056 """
3057 Return a datetime object from a string in the format YYYYMMDD or
3058 (now|today)[+-][0-9](day|week|month|year)(s)?"""
3059 today = datetime.date.today()
3060 if date_str in ('now', 'today'):
3061 return today
3062 if date_str == 'yesterday':
3063 return today - datetime.timedelta(days=1)
3064 match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
3065 if match is not None:
3066 sign = match.group('sign')
3067 time = int(match.group('time'))
3068 if sign == '-':
3069 time = -time
3070 unit = match.group('unit')
3071 # A bad approximation?
3072 if unit == 'month':
3073 unit = 'day'
3074 time *= 30
3075 elif unit == 'year':
3076 unit = 'day'
3077 time *= 365
3078 unit += 's'
3079 delta = datetime.timedelta(**{unit: time})
3080 return today + delta
3081 return datetime.datetime.strptime(date_str, '%Y%m%d').date()
3082
3083
3084 def hyphenate_date(date_str):
3085 """
3086 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3087 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3088 if match is not None:
3089 return '-'.join(match.groups())
3090 else:
3091 return date_str
3092
3093
3094 class DateRange(object):
3095 """Represents a time interval between two dates"""
3096
3097 def __init__(self, start=None, end=None):
3098 """start and end must be strings in the format accepted by date"""
3099 if start is not None:
3100 self.start = date_from_str(start)
3101 else:
3102 self.start = datetime.datetime.min.date()
3103 if end is not None:
3104 self.end = date_from_str(end)
3105 else:
3106 self.end = datetime.datetime.max.date()
3107 if self.start > self.end:
3108 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3109
3110 @classmethod
3111 def day(cls, day):
3112 """Returns a range that only contains the given day"""
3113 return cls(day, day)
3114
3115 def __contains__(self, date):
3116 """Check if the date is in the range"""
3117 if not isinstance(date, datetime.date):
3118 date = date_from_str(date)
3119 return self.start <= date <= self.end
3120
3121 def __str__(self):
3122 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3123
3124
3125 def platform_name():
3126 """ Returns the platform name as a compat_str """
3127 res = platform.platform()
3128 if isinstance(res, bytes):
3129 res = res.decode(preferredencoding())
3130
3131 assert isinstance(res, compat_str)
3132 return res
3133
3134
3135 def _windows_write_string(s, out):
3136 """ Returns True if the string was written using special methods,
3137 False if it has yet to be written out."""
3138 # Adapted from http://stackoverflow.com/a/3259271/35070
3139
3140 import ctypes
3141 import ctypes.wintypes
3142
3143 WIN_OUTPUT_IDS = {
3144 1: -11,
3145 2: -12,
3146 }
3147
3148 try:
3149 fileno = out.fileno()
3150 except AttributeError:
3151 # If the output stream doesn't have a fileno, it's virtual
3152 return False
3153 except io.UnsupportedOperation:
3154 # Some strange Windows pseudo files?
3155 return False
3156 if fileno not in WIN_OUTPUT_IDS:
3157 return False
3158
3159 GetStdHandle = compat_ctypes_WINFUNCTYPE(
3160 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3161 ('GetStdHandle', ctypes.windll.kernel32))
3162 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3163
3164 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3165 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3166 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3167 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3168 written = ctypes.wintypes.DWORD(0)
3169
3170 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3171 FILE_TYPE_CHAR = 0x0002
3172 FILE_TYPE_REMOTE = 0x8000
3173 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3174 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3175 ctypes.POINTER(ctypes.wintypes.DWORD))(
3176 ('GetConsoleMode', ctypes.windll.kernel32))
3177 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3178
3179 def not_a_console(handle):
3180 if handle == INVALID_HANDLE_VALUE or handle is None:
3181 return True
3182 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3183 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3184
3185 if not_a_console(h):
3186 return False
3187
3188 def next_nonbmp_pos(s):
3189 try:
3190 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3191 except StopIteration:
3192 return len(s)
3193
3194 while s:
3195 count = min(next_nonbmp_pos(s), 1024)
3196
3197 ret = WriteConsoleW(
3198 h, s, count if count else 2, ctypes.byref(written), None)
3199 if ret == 0:
3200 raise OSError('Failed to write string')
3201 if not count: # We just wrote a non-BMP character
3202 assert written.value == 2
3203 s = s[1:]
3204 else:
3205 assert written.value > 0
3206 s = s[written.value:]
3207 return True
3208
3209
3210 def write_string(s, out=None, encoding=None):
3211 if out is None:
3212 out = sys.stderr
3213 assert type(s) == compat_str
3214
3215 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3216 if _windows_write_string(s, out):
3217 return
3218
3219 if ('b' in getattr(out, 'mode', '')
3220 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
3221 byt = s.encode(encoding or preferredencoding(), 'ignore')
3222 out.write(byt)
3223 elif hasattr(out, 'buffer'):
3224 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3225 byt = s.encode(enc, 'ignore')
3226 out.buffer.write(byt)
3227 else:
3228 out.write(s)
3229 out.flush()
3230
3231
3232 def bytes_to_intlist(bs):
3233 if not bs:
3234 return []
3235 if isinstance(bs[0], int): # Python 3
3236 return list(bs)
3237 else:
3238 return [ord(c) for c in bs]
3239
3240
3241 def intlist_to_bytes(xs):
3242 if not xs:
3243 return b''
3244 return compat_struct_pack('%dB' % len(xs), *xs)
3245
3246
3247 # Cross-platform file locking
3248 if sys.platform == 'win32':
3249 import ctypes.wintypes
3250 import msvcrt
3251
3252 class OVERLAPPED(ctypes.Structure):
3253 _fields_ = [
3254 ('Internal', ctypes.wintypes.LPVOID),
3255 ('InternalHigh', ctypes.wintypes.LPVOID),
3256 ('Offset', ctypes.wintypes.DWORD),
3257 ('OffsetHigh', ctypes.wintypes.DWORD),
3258 ('hEvent', ctypes.wintypes.HANDLE),
3259 ]
3260
3261 kernel32 = ctypes.windll.kernel32
3262 LockFileEx = kernel32.LockFileEx
3263 LockFileEx.argtypes = [
3264 ctypes.wintypes.HANDLE, # hFile
3265 ctypes.wintypes.DWORD, # dwFlags
3266 ctypes.wintypes.DWORD, # dwReserved
3267 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3268 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3269 ctypes.POINTER(OVERLAPPED) # Overlapped
3270 ]
3271 LockFileEx.restype = ctypes.wintypes.BOOL
3272 UnlockFileEx = kernel32.UnlockFileEx
3273 UnlockFileEx.argtypes = [
3274 ctypes.wintypes.HANDLE, # hFile
3275 ctypes.wintypes.DWORD, # dwReserved
3276 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3277 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3278 ctypes.POINTER(OVERLAPPED) # Overlapped
3279 ]
3280 UnlockFileEx.restype = ctypes.wintypes.BOOL
3281 whole_low = 0xffffffff
3282 whole_high = 0x7fffffff
3283
3284 def _lock_file(f, exclusive):
3285 overlapped = OVERLAPPED()
3286 overlapped.Offset = 0
3287 overlapped.OffsetHigh = 0
3288 overlapped.hEvent = 0
3289 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3290 handle = msvcrt.get_osfhandle(f.fileno())
3291 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3292 whole_low, whole_high, f._lock_file_overlapped_p):
3293 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3294
3295 def _unlock_file(f):
3296 assert f._lock_file_overlapped_p
3297 handle = msvcrt.get_osfhandle(f.fileno())
3298 if not UnlockFileEx(handle, 0,
3299 whole_low, whole_high, f._lock_file_overlapped_p):
3300 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3301
3302 else:
3303 # Some platforms, such as Jython, is missing fcntl
3304 try:
3305 import fcntl
3306
3307 def _lock_file(f, exclusive):
3308 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3309
3310 def _unlock_file(f):
3311 fcntl.flock(f, fcntl.LOCK_UN)
3312 except ImportError:
3313 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3314
3315 def _lock_file(f, exclusive):
3316 raise IOError(UNSUPPORTED_MSG)
3317
3318 def _unlock_file(f):
3319 raise IOError(UNSUPPORTED_MSG)
3320
3321
3322 class locked_file(object):
3323 def __init__(self, filename, mode, encoding=None):
3324 assert mode in ['r', 'a', 'w']
3325 self.f = io.open(filename, mode, encoding=encoding)
3326 self.mode = mode
3327
3328 def __enter__(self):
3329 exclusive = self.mode != 'r'
3330 try:
3331 _lock_file(self.f, exclusive)
3332 except IOError:
3333 self.f.close()
3334 raise
3335 return self
3336
3337 def __exit__(self, etype, value, traceback):
3338 try:
3339 _unlock_file(self.f)
3340 finally:
3341 self.f.close()
3342
3343 def __iter__(self):
3344 return iter(self.f)
3345
3346 def write(self, *args):
3347 return self.f.write(*args)
3348
3349 def read(self, *args):
3350 return self.f.read(*args)
3351
3352
3353 def get_filesystem_encoding():
3354 encoding = sys.getfilesystemencoding()
3355 return encoding if encoding is not None else 'utf-8'
3356
3357
3358 def shell_quote(args):
3359 quoted_args = []
3360 encoding = get_filesystem_encoding()
3361 for a in args:
3362 if isinstance(a, bytes):
3363 # We may get a filename encoded with 'encodeFilename'
3364 a = a.decode(encoding)
3365 quoted_args.append(compat_shlex_quote(a))
3366 return ' '.join(quoted_args)
3367
3368
3369 def smuggle_url(url, data):
3370 """ Pass additional data in a URL for internal use. """
3371
3372 url, idata = unsmuggle_url(url, {})
3373 data.update(idata)
3374 sdata = compat_urllib_parse_urlencode(
3375 {'__youtubedl_smuggle': json.dumps(data)})
3376 return url + '#' + sdata
3377
3378
3379 def unsmuggle_url(smug_url, default=None):
3380 if '#__youtubedl_smuggle' not in smug_url:
3381 return smug_url, default
3382 url, _, sdata = smug_url.rpartition('#')
3383 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3384 data = json.loads(jsond)
3385 return url, data
3386
3387
3388 def format_bytes(bytes):
3389 if bytes is None:
3390 return 'N/A'
3391 if type(bytes) is str:
3392 bytes = float(bytes)
3393 if bytes == 0.0:
3394 exponent = 0
3395 else:
3396 exponent = int(math.log(bytes, 1024.0))
3397 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3398 converted = float(bytes) / float(1024 ** exponent)
3399 return '%.2f%s' % (converted, suffix)
3400
3401
3402 def lookup_unit_table(unit_table, s):
3403 units_re = '|'.join(re.escape(u) for u in unit_table)
3404 m = re.match(
3405 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3406 if not m:
3407 return None
3408 num_str = m.group('num').replace(',', '.')
3409 mult = unit_table[m.group('unit')]
3410 return int(float(num_str) * mult)
3411
3412
3413 def parse_filesize(s):
3414 if s is None:
3415 return None
3416
3417 # The lower-case forms are of course incorrect and unofficial,
3418 # but we support those too
3419 _UNIT_TABLE = {
3420 'B': 1,
3421 'b': 1,
3422 'bytes': 1,
3423 'KiB': 1024,
3424 'KB': 1000,
3425 'kB': 1024,
3426 'Kb': 1000,
3427 'kb': 1000,
3428 'kilobytes': 1000,
3429 'kibibytes': 1024,
3430 'MiB': 1024 ** 2,
3431 'MB': 1000 ** 2,
3432 'mB': 1024 ** 2,
3433 'Mb': 1000 ** 2,
3434 'mb': 1000 ** 2,
3435 'megabytes': 1000 ** 2,
3436 'mebibytes': 1024 ** 2,
3437 'GiB': 1024 ** 3,
3438 'GB': 1000 ** 3,
3439 'gB': 1024 ** 3,
3440 'Gb': 1000 ** 3,
3441 'gb': 1000 ** 3,
3442 'gigabytes': 1000 ** 3,
3443 'gibibytes': 1024 ** 3,
3444 'TiB': 1024 ** 4,
3445 'TB': 1000 ** 4,
3446 'tB': 1024 ** 4,
3447 'Tb': 1000 ** 4,
3448 'tb': 1000 ** 4,
3449 'terabytes': 1000 ** 4,
3450 'tebibytes': 1024 ** 4,
3451 'PiB': 1024 ** 5,
3452 'PB': 1000 ** 5,
3453 'pB': 1024 ** 5,
3454 'Pb': 1000 ** 5,
3455 'pb': 1000 ** 5,
3456 'petabytes': 1000 ** 5,
3457 'pebibytes': 1024 ** 5,
3458 'EiB': 1024 ** 6,
3459 'EB': 1000 ** 6,
3460 'eB': 1024 ** 6,
3461 'Eb': 1000 ** 6,
3462 'eb': 1000 ** 6,
3463 'exabytes': 1000 ** 6,
3464 'exbibytes': 1024 ** 6,
3465 'ZiB': 1024 ** 7,
3466 'ZB': 1000 ** 7,
3467 'zB': 1024 ** 7,
3468 'Zb': 1000 ** 7,
3469 'zb': 1000 ** 7,
3470 'zettabytes': 1000 ** 7,
3471 'zebibytes': 1024 ** 7,
3472 'YiB': 1024 ** 8,
3473 'YB': 1000 ** 8,
3474 'yB': 1024 ** 8,
3475 'Yb': 1000 ** 8,
3476 'yb': 1000 ** 8,
3477 'yottabytes': 1000 ** 8,
3478 'yobibytes': 1024 ** 8,
3479 }
3480
3481 return lookup_unit_table(_UNIT_TABLE, s)
3482
3483
3484 def parse_count(s):
3485 if s is None:
3486 return None
3487
3488 s = s.strip()
3489
3490 if re.match(r'^[\d,.]+$', s):
3491 return str_to_int(s)
3492
3493 _UNIT_TABLE = {
3494 'k': 1000,
3495 'K': 1000,
3496 'm': 1000 ** 2,
3497 'M': 1000 ** 2,
3498 'kk': 1000 ** 2,
3499 'KK': 1000 ** 2,
3500 }
3501
3502 return lookup_unit_table(_UNIT_TABLE, s)
3503
3504
3505 def parse_resolution(s):
3506 if s is None:
3507 return {}
3508
3509 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3510 if mobj:
3511 return {
3512 'width': int(mobj.group('w')),
3513 'height': int(mobj.group('h')),
3514 }
3515
3516 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3517 if mobj:
3518 return {'height': int(mobj.group(1))}
3519
3520 mobj = re.search(r'\b([48])[kK]\b', s)
3521 if mobj:
3522 return {'height': int(mobj.group(1)) * 540}
3523
3524 return {}
3525
3526
3527 def parse_bitrate(s):
3528 if not isinstance(s, compat_str):
3529 return
3530 mobj = re.search(r'\b(\d+)\s*kbps', s)
3531 if mobj:
3532 return int(mobj.group(1))
3533
3534
3535 def month_by_name(name, lang='en'):
3536 """ Return the number of a month by (locale-independently) English name """
3537
3538 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3539
3540 try:
3541 return month_names.index(name) + 1
3542 except ValueError:
3543 return None
3544
3545
3546 def month_by_abbreviation(abbrev):
3547 """ Return the number of a month by (locale-independently) English
3548 abbreviations """
3549
3550 try:
3551 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3552 except ValueError:
3553 return None
3554
3555
3556 def fix_xml_ampersands(xml_str):
3557 """Replace all the '&' by '&amp;' in XML"""
3558 return re.sub(
3559 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3560 '&amp;',
3561 xml_str)
3562
3563
3564 def setproctitle(title):
3565 assert isinstance(title, compat_str)
3566
3567 # ctypes in Jython is not complete
3568 # http://bugs.jython.org/issue2148
3569 if sys.platform.startswith('java'):
3570 return
3571
3572 try:
3573 libc = ctypes.cdll.LoadLibrary('libc.so.6')
3574 except OSError:
3575 return
3576 except TypeError:
3577 # LoadLibrary in Windows Python 2.7.13 only expects
3578 # a bytestring, but since unicode_literals turns
3579 # every string into a unicode string, it fails.
3580 return
3581 title_bytes = title.encode('utf-8')
3582 buf = ctypes.create_string_buffer(len(title_bytes))
3583 buf.value = title_bytes
3584 try:
3585 libc.prctl(15, buf, 0, 0, 0)
3586 except AttributeError:
3587 return # Strange libc, just skip this
3588
3589
3590 def remove_start(s, start):
3591 return s[len(start):] if s is not None and s.startswith(start) else s
3592
3593
3594 def remove_end(s, end):
3595 return s[:-len(end)] if s is not None and s.endswith(end) else s
3596
3597
3598 def remove_quotes(s):
3599 if s is None or len(s) < 2:
3600 return s
3601 for quote in ('"', "'", ):
3602 if s[0] == quote and s[-1] == quote:
3603 return s[1:-1]
3604 return s
3605
3606
3607 def get_domain(url):
3608 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3609 return domain.group('domain') if domain else None
3610
3611
3612 def url_basename(url):
3613 path = compat_urlparse.urlparse(url).path
3614 return path.strip('/').split('/')[-1]
3615
3616
3617 def base_url(url):
3618 return re.match(r'https?://[^?#&]+/', url).group()
3619
3620
3621 def urljoin(base, path):
3622 if isinstance(path, bytes):
3623 path = path.decode('utf-8')
3624 if not isinstance(path, compat_str) or not path:
3625 return None
3626 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3627 return path
3628 if isinstance(base, bytes):
3629 base = base.decode('utf-8')
3630 if not isinstance(base, compat_str) or not re.match(
3631 r'^(?:https?:)?//', base):
3632 return None
3633 return compat_urlparse.urljoin(base, path)
3634
3635
3636 class HEADRequest(compat_urllib_request.Request):
3637 def get_method(self):
3638 return 'HEAD'
3639
3640
3641 class PUTRequest(compat_urllib_request.Request):
3642 def get_method(self):
3643 return 'PUT'
3644
3645
3646 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3647 if get_attr:
3648 if v is not None:
3649 v = getattr(v, get_attr, None)
3650 if v == '':
3651 v = None
3652 if v is None:
3653 return default
3654 try:
3655 return int(v) * invscale // scale
3656 except (ValueError, TypeError):
3657 return default
3658
3659
3660 def str_or_none(v, default=None):
3661 return default if v is None else compat_str(v)
3662
3663
3664 def str_to_int(int_str):
3665 """ A more relaxed version of int_or_none """
3666 if isinstance(int_str, compat_integer_types):
3667 return int_str
3668 elif isinstance(int_str, compat_str):
3669 int_str = re.sub(r'[,\.\+]', '', int_str)
3670 return int_or_none(int_str)
3671
3672
3673 def float_or_none(v, scale=1, invscale=1, default=None):
3674 if v is None:
3675 return default
3676 try:
3677 return float(v) * invscale / scale
3678 except (ValueError, TypeError):
3679 return default
3680
3681
3682 def bool_or_none(v, default=None):
3683 return v if isinstance(v, bool) else default
3684
3685
3686 def strip_or_none(v, default=None):
3687 return v.strip() if isinstance(v, compat_str) else default
3688
3689
3690 def url_or_none(url):
3691 if not url or not isinstance(url, compat_str):
3692 return None
3693 url = url.strip()
3694 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3695
3696
3697 def strftime_or_none(timestamp, date_format, default=None):
3698 datetime_object = None
3699 try:
3700 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3701 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3702 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3703 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3704 return datetime_object.strftime(date_format)
3705 except (ValueError, TypeError, AttributeError):
3706 return default
3707
3708
3709 def parse_duration(s):
3710 if not isinstance(s, compat_basestring):
3711 return None
3712
3713 s = s.strip()
3714
3715 days, hours, mins, secs, ms = [None] * 5
3716 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3717 if m:
3718 days, hours, mins, secs, ms = m.groups()
3719 else:
3720 m = re.match(
3721 r'''(?ix)(?:P?
3722 (?:
3723 [0-9]+\s*y(?:ears?)?\s*
3724 )?
3725 (?:
3726 [0-9]+\s*m(?:onths?)?\s*
3727 )?
3728 (?:
3729 [0-9]+\s*w(?:eeks?)?\s*
3730 )?
3731 (?:
3732 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3733 )?
3734 T)?
3735 (?:
3736 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3737 )?
3738 (?:
3739 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3740 )?
3741 (?:
3742 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3743 )?Z?$''', s)
3744 if m:
3745 days, hours, mins, secs, ms = m.groups()
3746 else:
3747 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3748 if m:
3749 hours, mins = m.groups()
3750 else:
3751 return None
3752
3753 duration = 0
3754 if secs:
3755 duration += float(secs)
3756 if mins:
3757 duration += float(mins) * 60
3758 if hours:
3759 duration += float(hours) * 60 * 60
3760 if days:
3761 duration += float(days) * 24 * 60 * 60
3762 if ms:
3763 duration += float(ms)
3764 return duration
3765
3766
3767 def prepend_extension(filename, ext, expected_real_ext=None):
3768 name, real_ext = os.path.splitext(filename)
3769 return (
3770 '{0}.{1}{2}'.format(name, ext, real_ext)
3771 if not expected_real_ext or real_ext[1:] == expected_real_ext
3772 else '{0}.{1}'.format(filename, ext))
3773
3774
3775 def replace_extension(filename, ext, expected_real_ext=None):
3776 name, real_ext = os.path.splitext(filename)
3777 return '{0}.{1}'.format(
3778 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3779 ext)
3780
3781
3782 def check_executable(exe, args=[]):
3783 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3784 args can be a list of arguments for a short output (like -version) """
3785 try:
3786 process_communicate_or_kill(subprocess.Popen(
3787 [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
3788 except OSError:
3789 return False
3790 return exe
3791
3792
3793 def get_exe_version(exe, args=['--version'],
3794 version_re=None, unrecognized='present'):
3795 """ Returns the version of the specified executable,
3796 or False if the executable is not present """
3797 try:
3798 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3799 # SIGTTOU if yt-dlp is run in the background.
3800 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3801 out, _ = process_communicate_or_kill(subprocess.Popen(
3802 [encodeArgument(exe)] + args,
3803 stdin=subprocess.PIPE,
3804 stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
3805 except OSError:
3806 return False
3807 if isinstance(out, bytes): # Python 2.x
3808 out = out.decode('ascii', 'ignore')
3809 return detect_exe_version(out, version_re, unrecognized)
3810
3811
3812 def detect_exe_version(output, version_re=None, unrecognized='present'):
3813 assert isinstance(output, compat_str)
3814 if version_re is None:
3815 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3816 m = re.search(version_re, output)
3817 if m:
3818 return m.group(1)
3819 else:
3820 return unrecognized
3821
3822
3823 class PagedList(object):
3824 def __len__(self):
3825 # This is only useful for tests
3826 return len(self.getslice())
3827
3828
3829 class OnDemandPagedList(PagedList):
3830 def __init__(self, pagefunc, pagesize, use_cache=True):
3831 self._pagefunc = pagefunc
3832 self._pagesize = pagesize
3833 self._use_cache = use_cache
3834 if use_cache:
3835 self._cache = {}
3836
3837 def getslice(self, start=0, end=None):
3838 res = []
3839 for pagenum in itertools.count(start // self._pagesize):
3840 firstid = pagenum * self._pagesize
3841 nextfirstid = pagenum * self._pagesize + self._pagesize
3842 if start >= nextfirstid:
3843 continue
3844
3845 page_results = None
3846 if self._use_cache:
3847 page_results = self._cache.get(pagenum)
3848 if page_results is None:
3849 page_results = list(self._pagefunc(pagenum))
3850 if self._use_cache:
3851 self._cache[pagenum] = page_results
3852
3853 startv = (
3854 start % self._pagesize
3855 if firstid <= start < nextfirstid
3856 else 0)
3857
3858 endv = (
3859 ((end - 1) % self._pagesize) + 1
3860 if (end is not None and firstid <= end <= nextfirstid)
3861 else None)
3862
3863 if startv != 0 or endv is not None:
3864 page_results = page_results[startv:endv]
3865 res.extend(page_results)
3866
3867 # A little optimization - if current page is not "full", ie. does
3868 # not contain page_size videos then we can assume that this page
3869 # is the last one - there are no more ids on further pages -
3870 # i.e. no need to query again.
3871 if len(page_results) + startv < self._pagesize:
3872 break
3873
3874 # If we got the whole page, but the next page is not interesting,
3875 # break out early as well
3876 if end == nextfirstid:
3877 break
3878 return res
3879
3880
3881 class InAdvancePagedList(PagedList):
3882 def __init__(self, pagefunc, pagecount, pagesize):
3883 self._pagefunc = pagefunc
3884 self._pagecount = pagecount
3885 self._pagesize = pagesize
3886
3887 def getslice(self, start=0, end=None):
3888 res = []
3889 start_page = start // self._pagesize
3890 end_page = (
3891 self._pagecount if end is None else (end // self._pagesize + 1))
3892 skip_elems = start - start_page * self._pagesize
3893 only_more = None if end is None else end - start
3894 for pagenum in range(start_page, end_page):
3895 page = list(self._pagefunc(pagenum))
3896 if skip_elems:
3897 page = page[skip_elems:]
3898 skip_elems = None
3899 if only_more is not None:
3900 if len(page) < only_more:
3901 only_more -= len(page)
3902 else:
3903 page = page[:only_more]
3904 res.extend(page)
3905 break
3906 res.extend(page)
3907 return res
3908
3909
3910 def uppercase_escape(s):
3911 unicode_escape = codecs.getdecoder('unicode_escape')
3912 return re.sub(
3913 r'\\U[0-9a-fA-F]{8}',
3914 lambda m: unicode_escape(m.group(0))[0],
3915 s)
3916
3917
3918 def lowercase_escape(s):
3919 unicode_escape = codecs.getdecoder('unicode_escape')
3920 return re.sub(
3921 r'\\u[0-9a-fA-F]{4}',
3922 lambda m: unicode_escape(m.group(0))[0],
3923 s)
3924
3925
3926 def escape_rfc3986(s):
3927 """Escape non-ASCII characters as suggested by RFC 3986"""
3928 if sys.version_info < (3, 0) and isinstance(s, compat_str):
3929 s = s.encode('utf-8')
3930 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3931
3932
3933 def escape_url(url):
3934 """Escape URL as suggested by RFC 3986"""
3935 url_parsed = compat_urllib_parse_urlparse(url)
3936 return url_parsed._replace(
3937 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3938 path=escape_rfc3986(url_parsed.path),
3939 params=escape_rfc3986(url_parsed.params),
3940 query=escape_rfc3986(url_parsed.query),
3941 fragment=escape_rfc3986(url_parsed.fragment)
3942 ).geturl()
3943
3944
3945 def read_batch_urls(batch_fd):
3946 def fixup(url):
3947 if not isinstance(url, compat_str):
3948 url = url.decode('utf-8', 'replace')
3949 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
3950 for bom in BOM_UTF8:
3951 if url.startswith(bom):
3952 url = url[len(bom):]
3953 url = url.lstrip()
3954 if not url or url.startswith(('#', ';', ']')):
3955 return False
3956 # "#" cannot be stripped out since it is part of the URI
3957 # However, it can be safely stipped out if follwing a whitespace
3958 return re.split(r'\s#', url, 1)[0].rstrip()
3959
3960 with contextlib.closing(batch_fd) as fd:
3961 return [url for url in map(fixup, fd) if url]
3962
3963
3964 def urlencode_postdata(*args, **kargs):
3965 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
3966
3967
3968 def update_url_query(url, query):
3969 if not query:
3970 return url
3971 parsed_url = compat_urlparse.urlparse(url)
3972 qs = compat_parse_qs(parsed_url.query)
3973 qs.update(query)
3974 return compat_urlparse.urlunparse(parsed_url._replace(
3975 query=compat_urllib_parse_urlencode(qs, True)))
3976
3977
3978 def update_Request(req, url=None, data=None, headers={}, query={}):
3979 req_headers = req.headers.copy()
3980 req_headers.update(headers)
3981 req_data = data or req.data
3982 req_url = update_url_query(url or req.get_full_url(), query)
3983 req_get_method = req.get_method()
3984 if req_get_method == 'HEAD':
3985 req_type = HEADRequest
3986 elif req_get_method == 'PUT':
3987 req_type = PUTRequest
3988 else:
3989 req_type = compat_urllib_request.Request
3990 new_req = req_type(
3991 req_url, data=req_data, headers=req_headers,
3992 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3993 if hasattr(req, 'timeout'):
3994 new_req.timeout = req.timeout
3995 return new_req
3996
3997
3998 def _multipart_encode_impl(data, boundary):
3999 content_type = 'multipart/form-data; boundary=%s' % boundary
4000
4001 out = b''
4002 for k, v in data.items():
4003 out += b'--' + boundary.encode('ascii') + b'\r\n'
4004 if isinstance(k, compat_str):
4005 k = k.encode('utf-8')
4006 if isinstance(v, compat_str):
4007 v = v.encode('utf-8')
4008 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4009 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4010 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
4011 if boundary.encode('ascii') in content:
4012 raise ValueError('Boundary overlaps with data')
4013 out += content
4014
4015 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4016
4017 return out, content_type
4018
4019
4020 def multipart_encode(data, boundary=None):
4021 '''
4022 Encode a dict to RFC 7578-compliant form-data
4023
4024 data:
4025 A dict where keys and values can be either Unicode or bytes-like
4026 objects.
4027 boundary:
4028 If specified a Unicode object, it's used as the boundary. Otherwise
4029 a random boundary is generated.
4030
4031 Reference: https://tools.ietf.org/html/rfc7578
4032 '''
4033 has_specified_boundary = boundary is not None
4034
4035 while True:
4036 if boundary is None:
4037 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4038
4039 try:
4040 out, content_type = _multipart_encode_impl(data, boundary)
4041 break
4042 except ValueError:
4043 if has_specified_boundary:
4044 raise
4045 boundary = None
4046
4047 return out, content_type
4048
4049
4050 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4051 if isinstance(key_or_keys, (list, tuple)):
4052 for key in key_or_keys:
4053 if key not in d or d[key] is None or skip_false_values and not d[key]:
4054 continue
4055 return d[key]
4056 return default
4057 return d.get(key_or_keys, default)
4058
4059
4060 def try_get(src, getter, expected_type=None):
4061 if not isinstance(getter, (list, tuple)):
4062 getter = [getter]
4063 for get in getter:
4064 try:
4065 v = get(src)
4066 except (AttributeError, KeyError, TypeError, IndexError):
4067 pass
4068 else:
4069 if expected_type is None or isinstance(v, expected_type):
4070 return v
4071
4072
4073 def merge_dicts(*dicts):
4074 merged = {}
4075 for a_dict in dicts:
4076 for k, v in a_dict.items():
4077 if v is None:
4078 continue
4079 if (k not in merged
4080 or (isinstance(v, compat_str) and v
4081 and isinstance(merged[k], compat_str)
4082 and not merged[k])):
4083 merged[k] = v
4084 return merged
4085
4086
4087 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4088 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4089
4090
4091 US_RATINGS = {
4092 'G': 0,
4093 'PG': 10,
4094 'PG-13': 13,
4095 'R': 16,
4096 'NC': 18,
4097 }
4098
4099
4100 TV_PARENTAL_GUIDELINES = {
4101 'TV-Y': 0,
4102 'TV-Y7': 7,
4103 'TV-G': 0,
4104 'TV-PG': 0,
4105 'TV-14': 14,
4106 'TV-MA': 17,
4107 }
4108
4109
4110 def parse_age_limit(s):
4111 if type(s) == int:
4112 return s if 0 <= s <= 21 else None
4113 if not isinstance(s, compat_basestring):
4114 return None
4115 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4116 if m:
4117 return int(m.group('age'))
4118 s = s.upper()
4119 if s in US_RATINGS:
4120 return US_RATINGS[s]
4121 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4122 if m:
4123 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4124 return None
4125
4126
4127 def strip_jsonp(code):
4128 return re.sub(
4129 r'''(?sx)^
4130 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4131 (?:\s*&&\s*(?P=func_name))?
4132 \s*\(\s*(?P<callback_data>.*)\);?
4133 \s*?(?://[^\n]*)*$''',
4134 r'\g<callback_data>', code)
4135
4136
4137 def js_to_json(code, vars={}):
4138 # vars is a dict of var, val pairs to substitute
4139 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4140 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4141 INTEGER_TABLE = (
4142 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4143 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4144 )
4145
4146 def fix_kv(m):
4147 v = m.group(0)
4148 if v in ('true', 'false', 'null'):
4149 return v
4150 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4151 return ""
4152
4153 if v[0] in ("'", '"'):
4154 v = re.sub(r'(?s)\\.|"', lambda m: {
4155 '"': '\\"',
4156 "\\'": "'",
4157 '\\\n': '',
4158 '\\x': '\\u00',
4159 }.get(m.group(0), m.group(0)), v[1:-1])
4160 else:
4161 for regex, base in INTEGER_TABLE:
4162 im = re.match(regex, v)
4163 if im:
4164 i = int(im.group(1), base)
4165 return '"%d":' % i if v.endswith(':') else '%d' % i
4166
4167 if v in vars:
4168 return vars[v]
4169
4170 return '"%s"' % v
4171
4172 return re.sub(r'''(?sx)
4173 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4174 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4175 {comment}|,(?={skip}[\]}}])|
4176 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4177 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4178 [0-9]+(?={skip}:)|
4179 !+
4180 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4181
4182
4183 def qualities(quality_ids):
4184 """ Get a numeric quality value out of a list of possible values """
4185 def q(qid):
4186 try:
4187 return quality_ids.index(qid)
4188 except ValueError:
4189 return -1
4190 return q
4191
4192
4193 DEFAULT_OUTTMPL = {
4194 'default': '%(title)s [%(id)s].%(ext)s',
4195 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
4196 }
4197 OUTTMPL_TYPES = {
4198 'chapter': None,
4199 'subtitle': None,
4200 'thumbnail': None,
4201 'description': 'description',
4202 'annotation': 'annotations.xml',
4203 'infojson': 'info.json',
4204 'pl_description': 'description',
4205 'pl_infojson': 'info.json',
4206 }
4207
4208 # As of [1] format syntax is:
4209 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4210 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
4211 FORMAT_RE = r'''(?x)
4212 (?<!%)
4213 %
4214 \({0}\) # mapping key
4215 (?:[#0\-+ ]+)? # conversion flags (optional)
4216 (?:\d+)? # minimum field width (optional)
4217 (?:\.\d+)? # precision (optional)
4218 [hlL]? # length modifier (optional)
4219 (?P<type>[diouxXeEfFgGcrs%]) # conversion type
4220 '''
4221
4222
4223 def limit_length(s, length):
4224 """ Add ellipses to overly long strings """
4225 if s is None:
4226 return None
4227 ELLIPSES = '...'
4228 if len(s) > length:
4229 return s[:length - len(ELLIPSES)] + ELLIPSES
4230 return s
4231
4232
4233 def version_tuple(v):
4234 return tuple(int(e) for e in re.split(r'[-.]', v))
4235
4236
4237 def is_outdated_version(version, limit, assume_new=True):
4238 if not version:
4239 return not assume_new
4240 try:
4241 return version_tuple(version) < version_tuple(limit)
4242 except ValueError:
4243 return not assume_new
4244
4245
4246 def ytdl_is_updateable():
4247 """ Returns if yt-dlp can be updated with -U """
4248 return False
4249
4250 from zipimport import zipimporter
4251
4252 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4253
4254
4255 def args_to_str(args):
4256 # Get a short string representation for a subprocess command
4257 return ' '.join(compat_shlex_quote(a) for a in args)
4258
4259
4260 def error_to_compat_str(err):
4261 err_str = str(err)
4262 # On python 2 error byte string must be decoded with proper
4263 # encoding rather than ascii
4264 if sys.version_info[0] < 3:
4265 err_str = err_str.decode(preferredencoding())
4266 return err_str
4267
4268
4269 def mimetype2ext(mt):
4270 if mt is None:
4271 return None
4272
4273 ext = {
4274 'audio/mp4': 'm4a',
4275 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4276 # it's the most popular one
4277 'audio/mpeg': 'mp3',
4278 'audio/x-wav': 'wav',
4279 }.get(mt)
4280 if ext is not None:
4281 return ext
4282
4283 _, _, res = mt.rpartition('/')
4284 res = res.split(';')[0].strip().lower()
4285
4286 return {
4287 '3gpp': '3gp',
4288 'smptett+xml': 'tt',
4289 'ttaf+xml': 'dfxp',
4290 'ttml+xml': 'ttml',
4291 'x-flv': 'flv',
4292 'x-mp4-fragmented': 'mp4',
4293 'x-ms-sami': 'sami',
4294 'x-ms-wmv': 'wmv',
4295 'mpegurl': 'm3u8',
4296 'x-mpegurl': 'm3u8',
4297 'vnd.apple.mpegurl': 'm3u8',
4298 'dash+xml': 'mpd',
4299 'f4m+xml': 'f4m',
4300 'hds+xml': 'f4m',
4301 'vnd.ms-sstr+xml': 'ism',
4302 'quicktime': 'mov',
4303 'mp2t': 'ts',
4304 'x-wav': 'wav',
4305 }.get(res, res)
4306
4307
4308 def parse_codecs(codecs_str):
4309 # http://tools.ietf.org/html/rfc6381
4310 if not codecs_str:
4311 return {}
4312 split_codecs = list(filter(None, map(
4313 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4314 vcodec, acodec = None, None
4315 for full_codec in split_codecs:
4316 codec = full_codec.split('.')[0]
4317 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4318 if not vcodec:
4319 vcodec = full_codec
4320 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4321 if not acodec:
4322 acodec = full_codec
4323 else:
4324 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4325 if not vcodec and not acodec:
4326 if len(split_codecs) == 2:
4327 return {
4328 'vcodec': split_codecs[0],
4329 'acodec': split_codecs[1],
4330 }
4331 else:
4332 return {
4333 'vcodec': vcodec or 'none',
4334 'acodec': acodec or 'none',
4335 }
4336 return {}
4337
4338
4339 def urlhandle_detect_ext(url_handle):
4340 getheader = url_handle.headers.get
4341
4342 cd = getheader('Content-Disposition')
4343 if cd:
4344 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4345 if m:
4346 e = determine_ext(m.group('filename'), default_ext=None)
4347 if e:
4348 return e
4349
4350 return mimetype2ext(getheader('Content-Type'))
4351
4352
4353 def encode_data_uri(data, mime_type):
4354 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4355
4356
4357 def age_restricted(content_limit, age_limit):
4358 """ Returns True iff the content should be blocked """
4359
4360 if age_limit is None: # No limit set
4361 return False
4362 if content_limit is None:
4363 return False # Content available for everyone
4364 return age_limit < content_limit
4365
4366
4367 def is_html(first_bytes):
4368 """ Detect whether a file contains HTML by examining its first bytes. """
4369
4370 BOMS = [
4371 (b'\xef\xbb\xbf', 'utf-8'),
4372 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4373 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4374 (b'\xff\xfe', 'utf-16-le'),
4375 (b'\xfe\xff', 'utf-16-be'),
4376 ]
4377 for bom, enc in BOMS:
4378 if first_bytes.startswith(bom):
4379 s = first_bytes[len(bom):].decode(enc, 'replace')
4380 break
4381 else:
4382 s = first_bytes.decode('utf-8', 'replace')
4383
4384 return re.match(r'^\s*<', s)
4385
4386
4387 def determine_protocol(info_dict):
4388 protocol = info_dict.get('protocol')
4389 if protocol is not None:
4390 return protocol
4391
4392 url = info_dict['url']
4393 if url.startswith('rtmp'):
4394 return 'rtmp'
4395 elif url.startswith('mms'):
4396 return 'mms'
4397 elif url.startswith('rtsp'):
4398 return 'rtsp'
4399
4400 ext = determine_ext(url)
4401 if ext == 'm3u8':
4402 return 'm3u8'
4403 elif ext == 'f4m':
4404 return 'f4m'
4405
4406 return compat_urllib_parse_urlparse(url).scheme
4407
4408
4409 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4410 """ Render a list of rows, each as a list of values """
4411
4412 def get_max_lens(table):
4413 return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4414
4415 def filter_using_list(row, filterArray):
4416 return [col for (take, col) in zip(filterArray, row) if take]
4417
4418 if hideEmpty:
4419 max_lens = get_max_lens(data)
4420 header_row = filter_using_list(header_row, max_lens)
4421 data = [filter_using_list(row, max_lens) for row in data]
4422
4423 table = [header_row] + data
4424 max_lens = get_max_lens(table)
4425 if delim:
4426 table = [header_row] + [['-' * ml for ml in max_lens]] + data
4427 format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
4428 return '\n'.join(format_str % tuple(row) for row in table)
4429
4430
4431 def _match_one(filter_part, dct):
4432 COMPARISON_OPERATORS = {
4433 '<': operator.lt,
4434 '<=': operator.le,
4435 '>': operator.gt,
4436 '>=': operator.ge,
4437 '=': operator.eq,
4438 '!=': operator.ne,
4439 }
4440 operator_rex = re.compile(r'''(?x)\s*
4441 (?P<key>[a-z_]+)
4442 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4443 (?:
4444 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4445 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4446 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4447 )
4448 \s*$
4449 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4450 m = operator_rex.search(filter_part)
4451 if m:
4452 op = COMPARISON_OPERATORS[m.group('op')]
4453 actual_value = dct.get(m.group('key'))
4454 if (m.group('quotedstrval') is not None
4455 or m.group('strval') is not None
4456 # If the original field is a string and matching comparisonvalue is
4457 # a number we should respect the origin of the original field
4458 # and process comparison value as a string (see
4459 # https://github.com/ytdl-org/youtube-dl/issues/11082).
4460 or actual_value is not None and m.group('intval') is not None
4461 and isinstance(actual_value, compat_str)):
4462 if m.group('op') not in ('=', '!='):
4463 raise ValueError(
4464 'Operator %s does not support string values!' % m.group('op'))
4465 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4466 quote = m.group('quote')
4467 if quote is not None:
4468 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4469 else:
4470 try:
4471 comparison_value = int(m.group('intval'))
4472 except ValueError:
4473 comparison_value = parse_filesize(m.group('intval'))
4474 if comparison_value is None:
4475 comparison_value = parse_filesize(m.group('intval') + 'B')
4476 if comparison_value is None:
4477 raise ValueError(
4478 'Invalid integer value %r in filter part %r' % (
4479 m.group('intval'), filter_part))
4480 if actual_value is None:
4481 return m.group('none_inclusive')
4482 return op(actual_value, comparison_value)
4483
4484 UNARY_OPERATORS = {
4485 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4486 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4487 }
4488 operator_rex = re.compile(r'''(?x)\s*
4489 (?P<op>%s)\s*(?P<key>[a-z_]+)
4490 \s*$
4491 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4492 m = operator_rex.search(filter_part)
4493 if m:
4494 op = UNARY_OPERATORS[m.group('op')]
4495 actual_value = dct.get(m.group('key'))
4496 return op(actual_value)
4497
4498 raise ValueError('Invalid filter part %r' % filter_part)
4499
4500
4501 def match_str(filter_str, dct):
4502 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4503
4504 return all(
4505 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4506
4507
4508 def match_filter_func(filter_str):
4509 def _match_func(info_dict):
4510 if match_str(filter_str, info_dict):
4511 return None
4512 else:
4513 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4514 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4515 return _match_func
4516
4517
4518 def parse_dfxp_time_expr(time_expr):
4519 if not time_expr:
4520 return
4521
4522 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4523 if mobj:
4524 return float(mobj.group('time_offset'))
4525
4526 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4527 if mobj:
4528 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4529
4530
4531 def srt_subtitles_timecode(seconds):
4532 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4533
4534
4535 def dfxp2srt(dfxp_data):
4536 '''
4537 @param dfxp_data A bytes-like object containing DFXP data
4538 @returns A unicode object containing converted SRT data
4539 '''
4540 LEGACY_NAMESPACES = (
4541 (b'http://www.w3.org/ns/ttml', [
4542 b'http://www.w3.org/2004/11/ttaf1',
4543 b'http://www.w3.org/2006/04/ttaf1',
4544 b'http://www.w3.org/2006/10/ttaf1',
4545 ]),
4546 (b'http://www.w3.org/ns/ttml#styling', [
4547 b'http://www.w3.org/ns/ttml#style',
4548 ]),
4549 )
4550
4551 SUPPORTED_STYLING = [
4552 'color',
4553 'fontFamily',
4554 'fontSize',
4555 'fontStyle',
4556 'fontWeight',
4557 'textDecoration'
4558 ]
4559
4560 _x = functools.partial(xpath_with_ns, ns_map={
4561 'xml': 'http://www.w3.org/XML/1998/namespace',
4562 'ttml': 'http://www.w3.org/ns/ttml',
4563 'tts': 'http://www.w3.org/ns/ttml#styling',
4564 })
4565
4566 styles = {}
4567 default_style = {}
4568
4569 class TTMLPElementParser(object):
4570 _out = ''
4571 _unclosed_elements = []
4572 _applied_styles = []
4573
4574 def start(self, tag, attrib):
4575 if tag in (_x('ttml:br'), 'br'):
4576 self._out += '\n'
4577 else:
4578 unclosed_elements = []
4579 style = {}
4580 element_style_id = attrib.get('style')
4581 if default_style:
4582 style.update(default_style)
4583 if element_style_id:
4584 style.update(styles.get(element_style_id, {}))
4585 for prop in SUPPORTED_STYLING:
4586 prop_val = attrib.get(_x('tts:' + prop))
4587 if prop_val:
4588 style[prop] = prop_val
4589 if style:
4590 font = ''
4591 for k, v in sorted(style.items()):
4592 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4593 continue
4594 if k == 'color':
4595 font += ' color="%s"' % v
4596 elif k == 'fontSize':
4597 font += ' size="%s"' % v
4598 elif k == 'fontFamily':
4599 font += ' face="%s"' % v
4600 elif k == 'fontWeight' and v == 'bold':
4601 self._out += '<b>'
4602 unclosed_elements.append('b')
4603 elif k == 'fontStyle' and v == 'italic':
4604 self._out += '<i>'
4605 unclosed_elements.append('i')
4606 elif k == 'textDecoration' and v == 'underline':
4607 self._out += '<u>'
4608 unclosed_elements.append('u')
4609 if font:
4610 self._out += '<font' + font + '>'
4611 unclosed_elements.append('font')
4612 applied_style = {}
4613 if self._applied_styles:
4614 applied_style.update(self._applied_styles[-1])
4615 applied_style.update(style)
4616 self._applied_styles.append(applied_style)
4617 self._unclosed_elements.append(unclosed_elements)
4618
4619 def end(self, tag):
4620 if tag not in (_x('ttml:br'), 'br'):
4621 unclosed_elements = self._unclosed_elements.pop()
4622 for element in reversed(unclosed_elements):
4623 self._out += '</%s>' % element
4624 if unclosed_elements and self._applied_styles:
4625 self._applied_styles.pop()
4626
4627 def data(self, data):
4628 self._out += data
4629
4630 def close(self):
4631 return self._out.strip()
4632
4633 def parse_node(node):
4634 target = TTMLPElementParser()
4635 parser = xml.etree.ElementTree.XMLParser(target=target)
4636 parser.feed(xml.etree.ElementTree.tostring(node))
4637 return parser.close()
4638
4639 for k, v in LEGACY_NAMESPACES:
4640 for ns in v:
4641 dfxp_data = dfxp_data.replace(ns, k)
4642
4643 dfxp = compat_etree_fromstring(dfxp_data)
4644 out = []
4645 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4646
4647 if not paras:
4648 raise ValueError('Invalid dfxp/TTML subtitle')
4649
4650 repeat = False
4651 while True:
4652 for style in dfxp.findall(_x('.//ttml:style')):
4653 style_id = style.get('id') or style.get(_x('xml:id'))
4654 if not style_id:
4655 continue
4656 parent_style_id = style.get('style')
4657 if parent_style_id:
4658 if parent_style_id not in styles:
4659 repeat = True
4660 continue
4661 styles[style_id] = styles[parent_style_id].copy()
4662 for prop in SUPPORTED_STYLING:
4663 prop_val = style.get(_x('tts:' + prop))
4664 if prop_val:
4665 styles.setdefault(style_id, {})[prop] = prop_val
4666 if repeat:
4667 repeat = False
4668 else:
4669 break
4670
4671 for p in ('body', 'div'):
4672 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4673 if ele is None:
4674 continue
4675 style = styles.get(ele.get('style'))
4676 if not style:
4677 continue
4678 default_style.update(style)
4679
4680 for para, index in zip(paras, itertools.count(1)):
4681 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4682 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4683 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4684 if begin_time is None:
4685 continue
4686 if not end_time:
4687 if not dur:
4688 continue
4689 end_time = begin_time + dur
4690 out.append('%d\n%s --> %s\n%s\n\n' % (
4691 index,
4692 srt_subtitles_timecode(begin_time),
4693 srt_subtitles_timecode(end_time),
4694 parse_node(para)))
4695
4696 return ''.join(out)
4697
4698
4699 def cli_option(params, command_option, param):
4700 param = params.get(param)
4701 if param:
4702 param = compat_str(param)
4703 return [command_option, param] if param is not None else []
4704
4705
4706 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4707 param = params.get(param)
4708 if param is None:
4709 return []
4710 assert isinstance(param, bool)
4711 if separator:
4712 return [command_option + separator + (true_value if param else false_value)]
4713 return [command_option, true_value if param else false_value]
4714
4715
4716 def cli_valueless_option(params, command_option, param, expected_value=True):
4717 param = params.get(param)
4718 return [command_option] if param == expected_value else []
4719
4720
4721 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
4722 if isinstance(argdict, (list, tuple)): # for backward compatibility
4723 if use_compat:
4724 return argdict
4725 else:
4726 argdict = None
4727 if argdict is None:
4728 return default
4729 assert isinstance(argdict, dict)
4730
4731 assert isinstance(keys, (list, tuple))
4732 for key_list in keys:
4733 if isinstance(key_list, compat_str):
4734 key_list = (key_list,)
4735 arg_list = list(filter(
4736 lambda x: x is not None,
4737 [argdict.get(key.lower()) for key in key_list]))
4738 if arg_list:
4739 return [arg for args in arg_list for arg in args]
4740 return default
4741
4742
4743 class ISO639Utils(object):
4744 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4745 _lang_map = {
4746 'aa': 'aar',
4747 'ab': 'abk',
4748 'ae': 'ave',
4749 'af': 'afr',
4750 'ak': 'aka',
4751 'am': 'amh',
4752 'an': 'arg',
4753 'ar': 'ara',
4754 'as': 'asm',
4755 'av': 'ava',
4756 'ay': 'aym',
4757 'az': 'aze',
4758 'ba': 'bak',
4759 'be': 'bel',
4760 'bg': 'bul',
4761 'bh': 'bih',
4762 'bi': 'bis',
4763 'bm': 'bam',
4764 'bn': 'ben',
4765 'bo': 'bod',
4766 'br': 'bre',
4767 'bs': 'bos',
4768 'ca': 'cat',
4769 'ce': 'che',
4770 'ch': 'cha',
4771 'co': 'cos',
4772 'cr': 'cre',
4773 'cs': 'ces',
4774 'cu': 'chu',
4775 'cv': 'chv',
4776 'cy': 'cym',
4777 'da': 'dan',
4778 'de': 'deu',
4779 'dv': 'div',
4780 'dz': 'dzo',
4781 'ee': 'ewe',
4782 'el': 'ell',
4783 'en': 'eng',
4784 'eo': 'epo',
4785 'es': 'spa',
4786 'et': 'est',
4787 'eu': 'eus',
4788 'fa': 'fas',
4789 'ff': 'ful',
4790 'fi': 'fin',
4791 'fj': 'fij',
4792 'fo': 'fao',
4793 'fr': 'fra',
4794 'fy': 'fry',
4795 'ga': 'gle',
4796 'gd': 'gla',
4797 'gl': 'glg',
4798 'gn': 'grn',
4799 'gu': 'guj',
4800 'gv': 'glv',
4801 'ha': 'hau',
4802 'he': 'heb',
4803 'iw': 'heb', # Replaced by he in 1989 revision
4804 'hi': 'hin',
4805 'ho': 'hmo',
4806 'hr': 'hrv',
4807 'ht': 'hat',
4808 'hu': 'hun',
4809 'hy': 'hye',
4810 'hz': 'her',
4811 'ia': 'ina',
4812 'id': 'ind',
4813 'in': 'ind', # Replaced by id in 1989 revision
4814 'ie': 'ile',
4815 'ig': 'ibo',
4816 'ii': 'iii',
4817 'ik': 'ipk',
4818 'io': 'ido',
4819 'is': 'isl',
4820 'it': 'ita',
4821 'iu': 'iku',
4822 'ja': 'jpn',
4823 'jv': 'jav',
4824 'ka': 'kat',
4825 'kg': 'kon',
4826 'ki': 'kik',
4827 'kj': 'kua',
4828 'kk': 'kaz',
4829 'kl': 'kal',
4830 'km': 'khm',
4831 'kn': 'kan',
4832 'ko': 'kor',
4833 'kr': 'kau',
4834 'ks': 'kas',
4835 'ku': 'kur',
4836 'kv': 'kom',
4837 'kw': 'cor',
4838 'ky': 'kir',
4839 'la': 'lat',
4840 'lb': 'ltz',
4841 'lg': 'lug',
4842 'li': 'lim',
4843 'ln': 'lin',
4844 'lo': 'lao',
4845 'lt': 'lit',
4846 'lu': 'lub',
4847 'lv': 'lav',
4848 'mg': 'mlg',
4849 'mh': 'mah',
4850 'mi': 'mri',
4851 'mk': 'mkd',
4852 'ml': 'mal',
4853 'mn': 'mon',
4854 'mr': 'mar',
4855 'ms': 'msa',
4856 'mt': 'mlt',
4857 'my': 'mya',
4858 'na': 'nau',
4859 'nb': 'nob',
4860 'nd': 'nde',
4861 'ne': 'nep',
4862 'ng': 'ndo',
4863 'nl': 'nld',
4864 'nn': 'nno',
4865 'no': 'nor',
4866 'nr': 'nbl',
4867 'nv': 'nav',
4868 'ny': 'nya',
4869 'oc': 'oci',
4870 'oj': 'oji',
4871 'om': 'orm',
4872 'or': 'ori',
4873 'os': 'oss',
4874 'pa': 'pan',
4875 'pi': 'pli',
4876 'pl': 'pol',
4877 'ps': 'pus',
4878 'pt': 'por',
4879 'qu': 'que',
4880 'rm': 'roh',
4881 'rn': 'run',
4882 'ro': 'ron',
4883 'ru': 'rus',
4884 'rw': 'kin',
4885 'sa': 'san',
4886 'sc': 'srd',
4887 'sd': 'snd',
4888 'se': 'sme',
4889 'sg': 'sag',
4890 'si': 'sin',
4891 'sk': 'slk',
4892 'sl': 'slv',
4893 'sm': 'smo',
4894 'sn': 'sna',
4895 'so': 'som',
4896 'sq': 'sqi',
4897 'sr': 'srp',
4898 'ss': 'ssw',
4899 'st': 'sot',
4900 'su': 'sun',
4901 'sv': 'swe',
4902 'sw': 'swa',
4903 'ta': 'tam',
4904 'te': 'tel',
4905 'tg': 'tgk',
4906 'th': 'tha',
4907 'ti': 'tir',
4908 'tk': 'tuk',
4909 'tl': 'tgl',
4910 'tn': 'tsn',
4911 'to': 'ton',
4912 'tr': 'tur',
4913 'ts': 'tso',
4914 'tt': 'tat',
4915 'tw': 'twi',
4916 'ty': 'tah',
4917 'ug': 'uig',
4918 'uk': 'ukr',
4919 'ur': 'urd',
4920 'uz': 'uzb',
4921 've': 'ven',
4922 'vi': 'vie',
4923 'vo': 'vol',
4924 'wa': 'wln',
4925 'wo': 'wol',
4926 'xh': 'xho',
4927 'yi': 'yid',
4928 'ji': 'yid', # Replaced by yi in 1989 revision
4929 'yo': 'yor',
4930 'za': 'zha',
4931 'zh': 'zho',
4932 'zu': 'zul',
4933 }
4934
4935 @classmethod
4936 def short2long(cls, code):
4937 """Convert language code from ISO 639-1 to ISO 639-2/T"""
4938 return cls._lang_map.get(code[:2])
4939
4940 @classmethod
4941 def long2short(cls, code):
4942 """Convert language code from ISO 639-2/T to ISO 639-1"""
4943 for short_name, long_name in cls._lang_map.items():
4944 if long_name == code:
4945 return short_name
4946
4947
4948 class ISO3166Utils(object):
4949 # From http://data.okfn.org/data/core/country-list
4950 _country_map = {
4951 'AF': 'Afghanistan',
4952 'AX': 'Åland Islands',
4953 'AL': 'Albania',
4954 'DZ': 'Algeria',
4955 'AS': 'American Samoa',
4956 'AD': 'Andorra',
4957 'AO': 'Angola',
4958 'AI': 'Anguilla',
4959 'AQ': 'Antarctica',
4960 'AG': 'Antigua and Barbuda',
4961 'AR': 'Argentina',
4962 'AM': 'Armenia',
4963 'AW': 'Aruba',
4964 'AU': 'Australia',
4965 'AT': 'Austria',
4966 'AZ': 'Azerbaijan',
4967 'BS': 'Bahamas',
4968 'BH': 'Bahrain',
4969 'BD': 'Bangladesh',
4970 'BB': 'Barbados',
4971 'BY': 'Belarus',
4972 'BE': 'Belgium',
4973 'BZ': 'Belize',
4974 'BJ': 'Benin',
4975 'BM': 'Bermuda',
4976 'BT': 'Bhutan',
4977 'BO': 'Bolivia, Plurinational State of',
4978 'BQ': 'Bonaire, Sint Eustatius and Saba',
4979 'BA': 'Bosnia and Herzegovina',
4980 'BW': 'Botswana',
4981 'BV': 'Bouvet Island',
4982 'BR': 'Brazil',
4983 'IO': 'British Indian Ocean Territory',
4984 'BN': 'Brunei Darussalam',
4985 'BG': 'Bulgaria',
4986 'BF': 'Burkina Faso',
4987 'BI': 'Burundi',
4988 'KH': 'Cambodia',
4989 'CM': 'Cameroon',
4990 'CA': 'Canada',
4991 'CV': 'Cape Verde',
4992 'KY': 'Cayman Islands',
4993 'CF': 'Central African Republic',
4994 'TD': 'Chad',
4995 'CL': 'Chile',
4996 'CN': 'China',
4997 'CX': 'Christmas Island',
4998 'CC': 'Cocos (Keeling) Islands',
4999 'CO': 'Colombia',
5000 'KM': 'Comoros',
5001 'CG': 'Congo',
5002 'CD': 'Congo, the Democratic Republic of the',
5003 'CK': 'Cook Islands',
5004 'CR': 'Costa Rica',
5005 'CI': 'Côte d\'Ivoire',
5006 'HR': 'Croatia',
5007 'CU': 'Cuba',
5008 'CW': 'Curaçao',
5009 'CY': 'Cyprus',
5010 'CZ': 'Czech Republic',
5011 'DK': 'Denmark',
5012 'DJ': 'Djibouti',
5013 'DM': 'Dominica',
5014 'DO': 'Dominican Republic',
5015 'EC': 'Ecuador',
5016 'EG': 'Egypt',
5017 'SV': 'El Salvador',
5018 'GQ': 'Equatorial Guinea',
5019 'ER': 'Eritrea',
5020 'EE': 'Estonia',
5021 'ET': 'Ethiopia',
5022 'FK': 'Falkland Islands (Malvinas)',
5023 'FO': 'Faroe Islands',
5024 'FJ': 'Fiji',
5025 'FI': 'Finland',
5026 'FR': 'France',
5027 'GF': 'French Guiana',
5028 'PF': 'French Polynesia',
5029 'TF': 'French Southern Territories',
5030 'GA': 'Gabon',
5031 'GM': 'Gambia',
5032 'GE': 'Georgia',
5033 'DE': 'Germany',
5034 'GH': 'Ghana',
5035 'GI': 'Gibraltar',
5036 'GR': 'Greece',
5037 'GL': 'Greenland',
5038 'GD': 'Grenada',
5039 'GP': 'Guadeloupe',
5040 'GU': 'Guam',
5041 'GT': 'Guatemala',
5042 'GG': 'Guernsey',
5043 'GN': 'Guinea',
5044 'GW': 'Guinea-Bissau',
5045 'GY': 'Guyana',
5046 'HT': 'Haiti',
5047 'HM': 'Heard Island and McDonald Islands',
5048 'VA': 'Holy See (Vatican City State)',
5049 'HN': 'Honduras',
5050 'HK': 'Hong Kong',
5051 'HU': 'Hungary',
5052 'IS': 'Iceland',
5053 'IN': 'India',
5054 'ID': 'Indonesia',
5055 'IR': 'Iran, Islamic Republic of',
5056 'IQ': 'Iraq',
5057 'IE': 'Ireland',
5058 'IM': 'Isle of Man',
5059 'IL': 'Israel',
5060 'IT': 'Italy',
5061 'JM': 'Jamaica',
5062 'JP': 'Japan',
5063 'JE': 'Jersey',
5064 'JO': 'Jordan',
5065 'KZ': 'Kazakhstan',
5066 'KE': 'Kenya',
5067 'KI': 'Kiribati',
5068 'KP': 'Korea, Democratic People\'s Republic of',
5069 'KR': 'Korea, Republic of',
5070 'KW': 'Kuwait',
5071 'KG': 'Kyrgyzstan',
5072 'LA': 'Lao People\'s Democratic Republic',
5073 'LV': 'Latvia',
5074 'LB': 'Lebanon',
5075 'LS': 'Lesotho',
5076 'LR': 'Liberia',
5077 'LY': 'Libya',
5078 'LI': 'Liechtenstein',
5079 'LT': 'Lithuania',
5080 'LU': 'Luxembourg',
5081 'MO': 'Macao',
5082 'MK': 'Macedonia, the Former Yugoslav Republic of',
5083 'MG': 'Madagascar',
5084 'MW': 'Malawi',
5085 'MY': 'Malaysia',
5086 'MV': 'Maldives',
5087 'ML': 'Mali',
5088 'MT': 'Malta',
5089 'MH': 'Marshall Islands',
5090 'MQ': 'Martinique',
5091 'MR': 'Mauritania',
5092 'MU': 'Mauritius',
5093 'YT': 'Mayotte',
5094 'MX': 'Mexico',
5095 'FM': 'Micronesia, Federated States of',
5096 'MD': 'Moldova, Republic of',
5097 'MC': 'Monaco',
5098 'MN': 'Mongolia',
5099 'ME': 'Montenegro',
5100 'MS': 'Montserrat',
5101 'MA': 'Morocco',
5102 'MZ': 'Mozambique',
5103 'MM': 'Myanmar',
5104 'NA': 'Namibia',
5105 'NR': 'Nauru',
5106 'NP': 'Nepal',
5107 'NL': 'Netherlands',
5108 'NC': 'New Caledonia',
5109 'NZ': 'New Zealand',
5110 'NI': 'Nicaragua',
5111 'NE': 'Niger',
5112 'NG': 'Nigeria',
5113 'NU': 'Niue',
5114 'NF': 'Norfolk Island',
5115 'MP': 'Northern Mariana Islands',
5116 'NO': 'Norway',
5117 'OM': 'Oman',
5118 'PK': 'Pakistan',
5119 'PW': 'Palau',
5120 'PS': 'Palestine, State of',
5121 'PA': 'Panama',
5122 'PG': 'Papua New Guinea',
5123 'PY': 'Paraguay',
5124 'PE': 'Peru',
5125 'PH': 'Philippines',
5126 'PN': 'Pitcairn',
5127 'PL': 'Poland',
5128 'PT': 'Portugal',
5129 'PR': 'Puerto Rico',
5130 'QA': 'Qatar',
5131 'RE': 'Réunion',
5132 'RO': 'Romania',
5133 'RU': 'Russian Federation',
5134 'RW': 'Rwanda',
5135 'BL': 'Saint Barthélemy',
5136 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5137 'KN': 'Saint Kitts and Nevis',
5138 'LC': 'Saint Lucia',
5139 'MF': 'Saint Martin (French part)',
5140 'PM': 'Saint Pierre and Miquelon',
5141 'VC': 'Saint Vincent and the Grenadines',
5142 'WS': 'Samoa',
5143 'SM': 'San Marino',
5144 'ST': 'Sao Tome and Principe',
5145 'SA': 'Saudi Arabia',
5146 'SN': 'Senegal',
5147 'RS': 'Serbia',
5148 'SC': 'Seychelles',
5149 'SL': 'Sierra Leone',
5150 'SG': 'Singapore',
5151 'SX': 'Sint Maarten (Dutch part)',
5152 'SK': 'Slovakia',
5153 'SI': 'Slovenia',
5154 'SB': 'Solomon Islands',
5155 'SO': 'Somalia',
5156 'ZA': 'South Africa',
5157 'GS': 'South Georgia and the South Sandwich Islands',
5158 'SS': 'South Sudan',
5159 'ES': 'Spain',
5160 'LK': 'Sri Lanka',
5161 'SD': 'Sudan',
5162 'SR': 'Suriname',
5163 'SJ': 'Svalbard and Jan Mayen',
5164 'SZ': 'Swaziland',
5165 'SE': 'Sweden',
5166 'CH': 'Switzerland',
5167 'SY': 'Syrian Arab Republic',
5168 'TW': 'Taiwan, Province of China',
5169 'TJ': 'Tajikistan',
5170 'TZ': 'Tanzania, United Republic of',
5171 'TH': 'Thailand',
5172 'TL': 'Timor-Leste',
5173 'TG': 'Togo',
5174 'TK': 'Tokelau',
5175 'TO': 'Tonga',
5176 'TT': 'Trinidad and Tobago',
5177 'TN': 'Tunisia',
5178 'TR': 'Turkey',
5179 'TM': 'Turkmenistan',
5180 'TC': 'Turks and Caicos Islands',
5181 'TV': 'Tuvalu',
5182 'UG': 'Uganda',
5183 'UA': 'Ukraine',
5184 'AE': 'United Arab Emirates',
5185 'GB': 'United Kingdom',
5186 'US': 'United States',
5187 'UM': 'United States Minor Outlying Islands',
5188 'UY': 'Uruguay',
5189 'UZ': 'Uzbekistan',
5190 'VU': 'Vanuatu',
5191 'VE': 'Venezuela, Bolivarian Republic of',
5192 'VN': 'Viet Nam',
5193 'VG': 'Virgin Islands, British',
5194 'VI': 'Virgin Islands, U.S.',
5195 'WF': 'Wallis and Futuna',
5196 'EH': 'Western Sahara',
5197 'YE': 'Yemen',
5198 'ZM': 'Zambia',
5199 'ZW': 'Zimbabwe',
5200 }
5201
5202 @classmethod
5203 def short2full(cls, code):
5204 """Convert an ISO 3166-2 country code to the corresponding full name"""
5205 return cls._country_map.get(code.upper())
5206
5207
5208 class GeoUtils(object):
5209 # Major IPv4 address blocks per country
5210 _country_ip_map = {
5211 'AD': '46.172.224.0/19',
5212 'AE': '94.200.0.0/13',
5213 'AF': '149.54.0.0/17',
5214 'AG': '209.59.64.0/18',
5215 'AI': '204.14.248.0/21',
5216 'AL': '46.99.0.0/16',
5217 'AM': '46.70.0.0/15',
5218 'AO': '105.168.0.0/13',
5219 'AP': '182.50.184.0/21',
5220 'AQ': '23.154.160.0/24',
5221 'AR': '181.0.0.0/12',
5222 'AS': '202.70.112.0/20',
5223 'AT': '77.116.0.0/14',
5224 'AU': '1.128.0.0/11',
5225 'AW': '181.41.0.0/18',
5226 'AX': '185.217.4.0/22',
5227 'AZ': '5.197.0.0/16',
5228 'BA': '31.176.128.0/17',
5229 'BB': '65.48.128.0/17',
5230 'BD': '114.130.0.0/16',
5231 'BE': '57.0.0.0/8',
5232 'BF': '102.178.0.0/15',
5233 'BG': '95.42.0.0/15',
5234 'BH': '37.131.0.0/17',
5235 'BI': '154.117.192.0/18',
5236 'BJ': '137.255.0.0/16',
5237 'BL': '185.212.72.0/23',
5238 'BM': '196.12.64.0/18',
5239 'BN': '156.31.0.0/16',
5240 'BO': '161.56.0.0/16',
5241 'BQ': '161.0.80.0/20',
5242 'BR': '191.128.0.0/12',
5243 'BS': '24.51.64.0/18',
5244 'BT': '119.2.96.0/19',
5245 'BW': '168.167.0.0/16',
5246 'BY': '178.120.0.0/13',
5247 'BZ': '179.42.192.0/18',
5248 'CA': '99.224.0.0/11',
5249 'CD': '41.243.0.0/16',
5250 'CF': '197.242.176.0/21',
5251 'CG': '160.113.0.0/16',
5252 'CH': '85.0.0.0/13',
5253 'CI': '102.136.0.0/14',
5254 'CK': '202.65.32.0/19',
5255 'CL': '152.172.0.0/14',
5256 'CM': '102.244.0.0/14',
5257 'CN': '36.128.0.0/10',
5258 'CO': '181.240.0.0/12',
5259 'CR': '201.192.0.0/12',
5260 'CU': '152.206.0.0/15',
5261 'CV': '165.90.96.0/19',
5262 'CW': '190.88.128.0/17',
5263 'CY': '31.153.0.0/16',
5264 'CZ': '88.100.0.0/14',
5265 'DE': '53.0.0.0/8',
5266 'DJ': '197.241.0.0/17',
5267 'DK': '87.48.0.0/12',
5268 'DM': '192.243.48.0/20',
5269 'DO': '152.166.0.0/15',
5270 'DZ': '41.96.0.0/12',
5271 'EC': '186.68.0.0/15',
5272 'EE': '90.190.0.0/15',
5273 'EG': '156.160.0.0/11',
5274 'ER': '196.200.96.0/20',
5275 'ES': '88.0.0.0/11',
5276 'ET': '196.188.0.0/14',
5277 'EU': '2.16.0.0/13',
5278 'FI': '91.152.0.0/13',
5279 'FJ': '144.120.0.0/16',
5280 'FK': '80.73.208.0/21',
5281 'FM': '119.252.112.0/20',
5282 'FO': '88.85.32.0/19',
5283 'FR': '90.0.0.0/9',
5284 'GA': '41.158.0.0/15',
5285 'GB': '25.0.0.0/8',
5286 'GD': '74.122.88.0/21',
5287 'GE': '31.146.0.0/16',
5288 'GF': '161.22.64.0/18',
5289 'GG': '62.68.160.0/19',
5290 'GH': '154.160.0.0/12',
5291 'GI': '95.164.0.0/16',
5292 'GL': '88.83.0.0/19',
5293 'GM': '160.182.0.0/15',
5294 'GN': '197.149.192.0/18',
5295 'GP': '104.250.0.0/19',
5296 'GQ': '105.235.224.0/20',
5297 'GR': '94.64.0.0/13',
5298 'GT': '168.234.0.0/16',
5299 'GU': '168.123.0.0/16',
5300 'GW': '197.214.80.0/20',
5301 'GY': '181.41.64.0/18',
5302 'HK': '113.252.0.0/14',
5303 'HN': '181.210.0.0/16',
5304 'HR': '93.136.0.0/13',
5305 'HT': '148.102.128.0/17',
5306 'HU': '84.0.0.0/14',
5307 'ID': '39.192.0.0/10',
5308 'IE': '87.32.0.0/12',
5309 'IL': '79.176.0.0/13',
5310 'IM': '5.62.80.0/20',
5311 'IN': '117.192.0.0/10',
5312 'IO': '203.83.48.0/21',
5313 'IQ': '37.236.0.0/14',
5314 'IR': '2.176.0.0/12',
5315 'IS': '82.221.0.0/16',
5316 'IT': '79.0.0.0/10',
5317 'JE': '87.244.64.0/18',
5318 'JM': '72.27.0.0/17',
5319 'JO': '176.29.0.0/16',
5320 'JP': '133.0.0.0/8',
5321 'KE': '105.48.0.0/12',
5322 'KG': '158.181.128.0/17',
5323 'KH': '36.37.128.0/17',
5324 'KI': '103.25.140.0/22',
5325 'KM': '197.255.224.0/20',
5326 'KN': '198.167.192.0/19',
5327 'KP': '175.45.176.0/22',
5328 'KR': '175.192.0.0/10',
5329 'KW': '37.36.0.0/14',
5330 'KY': '64.96.0.0/15',
5331 'KZ': '2.72.0.0/13',
5332 'LA': '115.84.64.0/18',
5333 'LB': '178.135.0.0/16',
5334 'LC': '24.92.144.0/20',
5335 'LI': '82.117.0.0/19',
5336 'LK': '112.134.0.0/15',
5337 'LR': '102.183.0.0/16',
5338 'LS': '129.232.0.0/17',
5339 'LT': '78.56.0.0/13',
5340 'LU': '188.42.0.0/16',
5341 'LV': '46.109.0.0/16',
5342 'LY': '41.252.0.0/14',
5343 'MA': '105.128.0.0/11',
5344 'MC': '88.209.64.0/18',
5345 'MD': '37.246.0.0/16',
5346 'ME': '178.175.0.0/17',
5347 'MF': '74.112.232.0/21',
5348 'MG': '154.126.0.0/17',
5349 'MH': '117.103.88.0/21',
5350 'MK': '77.28.0.0/15',
5351 'ML': '154.118.128.0/18',
5352 'MM': '37.111.0.0/17',
5353 'MN': '49.0.128.0/17',
5354 'MO': '60.246.0.0/16',
5355 'MP': '202.88.64.0/20',
5356 'MQ': '109.203.224.0/19',
5357 'MR': '41.188.64.0/18',
5358 'MS': '208.90.112.0/22',
5359 'MT': '46.11.0.0/16',
5360 'MU': '105.16.0.0/12',
5361 'MV': '27.114.128.0/18',
5362 'MW': '102.70.0.0/15',
5363 'MX': '187.192.0.0/11',
5364 'MY': '175.136.0.0/13',
5365 'MZ': '197.218.0.0/15',
5366 'NA': '41.182.0.0/16',
5367 'NC': '101.101.0.0/18',
5368 'NE': '197.214.0.0/18',
5369 'NF': '203.17.240.0/22',
5370 'NG': '105.112.0.0/12',
5371 'NI': '186.76.0.0/15',
5372 'NL': '145.96.0.0/11',
5373 'NO': '84.208.0.0/13',
5374 'NP': '36.252.0.0/15',
5375 'NR': '203.98.224.0/19',
5376 'NU': '49.156.48.0/22',
5377 'NZ': '49.224.0.0/14',
5378 'OM': '5.36.0.0/15',
5379 'PA': '186.72.0.0/15',
5380 'PE': '186.160.0.0/14',
5381 'PF': '123.50.64.0/18',
5382 'PG': '124.240.192.0/19',
5383 'PH': '49.144.0.0/13',
5384 'PK': '39.32.0.0/11',
5385 'PL': '83.0.0.0/11',
5386 'PM': '70.36.0.0/20',
5387 'PR': '66.50.0.0/16',
5388 'PS': '188.161.0.0/16',
5389 'PT': '85.240.0.0/13',
5390 'PW': '202.124.224.0/20',
5391 'PY': '181.120.0.0/14',
5392 'QA': '37.210.0.0/15',
5393 'RE': '102.35.0.0/16',
5394 'RO': '79.112.0.0/13',
5395 'RS': '93.86.0.0/15',
5396 'RU': '5.136.0.0/13',
5397 'RW': '41.186.0.0/16',
5398 'SA': '188.48.0.0/13',
5399 'SB': '202.1.160.0/19',
5400 'SC': '154.192.0.0/11',
5401 'SD': '102.120.0.0/13',
5402 'SE': '78.64.0.0/12',
5403 'SG': '8.128.0.0/10',
5404 'SI': '188.196.0.0/14',
5405 'SK': '78.98.0.0/15',
5406 'SL': '102.143.0.0/17',
5407 'SM': '89.186.32.0/19',
5408 'SN': '41.82.0.0/15',
5409 'SO': '154.115.192.0/18',
5410 'SR': '186.179.128.0/17',
5411 'SS': '105.235.208.0/21',
5412 'ST': '197.159.160.0/19',
5413 'SV': '168.243.0.0/16',
5414 'SX': '190.102.0.0/20',
5415 'SY': '5.0.0.0/16',
5416 'SZ': '41.84.224.0/19',
5417 'TC': '65.255.48.0/20',
5418 'TD': '154.68.128.0/19',
5419 'TG': '196.168.0.0/14',
5420 'TH': '171.96.0.0/13',
5421 'TJ': '85.9.128.0/18',
5422 'TK': '27.96.24.0/21',
5423 'TL': '180.189.160.0/20',
5424 'TM': '95.85.96.0/19',
5425 'TN': '197.0.0.0/11',
5426 'TO': '175.176.144.0/21',
5427 'TR': '78.160.0.0/11',
5428 'TT': '186.44.0.0/15',
5429 'TV': '202.2.96.0/19',
5430 'TW': '120.96.0.0/11',
5431 'TZ': '156.156.0.0/14',
5432 'UA': '37.52.0.0/14',
5433 'UG': '102.80.0.0/13',
5434 'US': '6.0.0.0/8',
5435 'UY': '167.56.0.0/13',
5436 'UZ': '84.54.64.0/18',
5437 'VA': '212.77.0.0/19',
5438 'VC': '207.191.240.0/21',
5439 'VE': '186.88.0.0/13',
5440 'VG': '66.81.192.0/20',
5441 'VI': '146.226.0.0/16',
5442 'VN': '14.160.0.0/11',
5443 'VU': '202.80.32.0/20',
5444 'WF': '117.20.32.0/21',
5445 'WS': '202.4.32.0/19',
5446 'YE': '134.35.0.0/16',
5447 'YT': '41.242.116.0/22',
5448 'ZA': '41.0.0.0/11',
5449 'ZM': '102.144.0.0/13',
5450 'ZW': '102.177.192.0/18',
5451 }
5452
5453 @classmethod
5454 def random_ipv4(cls, code_or_block):
5455 if len(code_or_block) == 2:
5456 block = cls._country_ip_map.get(code_or_block.upper())
5457 if not block:
5458 return None
5459 else:
5460 block = code_or_block
5461 addr, preflen = block.split('/')
5462 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5463 addr_max = addr_min | (0xffffffff >> int(preflen))
5464 return compat_str(socket.inet_ntoa(
5465 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5466
5467
5468 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5469 def __init__(self, proxies=None):
5470 # Set default handlers
5471 for type in ('http', 'https'):
5472 setattr(self, '%s_open' % type,
5473 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5474 meth(r, proxy, type))
5475 compat_urllib_request.ProxyHandler.__init__(self, proxies)
5476
5477 def proxy_open(self, req, proxy, type):
5478 req_proxy = req.headers.get('Ytdl-request-proxy')
5479 if req_proxy is not None:
5480 proxy = req_proxy
5481 del req.headers['Ytdl-request-proxy']
5482
5483 if proxy == '__noproxy__':
5484 return None # No Proxy
5485 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5486 req.add_header('Ytdl-socks-proxy', proxy)
5487 # yt-dlp's http/https handlers do wrapping the socket with socks
5488 return None
5489 return compat_urllib_request.ProxyHandler.proxy_open(
5490 self, req, proxy, type)
5491
5492
5493 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5494 # released into Public Domain
5495 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5496
5497 def long_to_bytes(n, blocksize=0):
5498 """long_to_bytes(n:long, blocksize:int) : string
5499 Convert a long integer to a byte string.
5500
5501 If optional blocksize is given and greater than zero, pad the front of the
5502 byte string with binary zeros so that the length is a multiple of
5503 blocksize.
5504 """
5505 # after much testing, this algorithm was deemed to be the fastest
5506 s = b''
5507 n = int(n)
5508 while n > 0:
5509 s = compat_struct_pack('>I', n & 0xffffffff) + s
5510 n = n >> 32
5511 # strip off leading zeros
5512 for i in range(len(s)):
5513 if s[i] != b'\000'[0]:
5514 break
5515 else:
5516 # only happens when n == 0
5517 s = b'\000'
5518 i = 0
5519 s = s[i:]
5520 # add back some pad bytes. this could be done more efficiently w.r.t. the
5521 # de-padding being done above, but sigh...
5522 if blocksize > 0 and len(s) % blocksize:
5523 s = (blocksize - len(s) % blocksize) * b'\000' + s
5524 return s
5525
5526
5527 def bytes_to_long(s):
5528 """bytes_to_long(string) : long
5529 Convert a byte string to a long integer.
5530
5531 This is (essentially) the inverse of long_to_bytes().
5532 """
5533 acc = 0
5534 length = len(s)
5535 if length % 4:
5536 extra = (4 - length % 4)
5537 s = b'\000' * extra + s
5538 length = length + extra
5539 for i in range(0, length, 4):
5540 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5541 return acc
5542
5543
5544 def ohdave_rsa_encrypt(data, exponent, modulus):
5545 '''
5546 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5547
5548 Input:
5549 data: data to encrypt, bytes-like object
5550 exponent, modulus: parameter e and N of RSA algorithm, both integer
5551 Output: hex string of encrypted data
5552
5553 Limitation: supports one block encryption only
5554 '''
5555
5556 payload = int(binascii.hexlify(data[::-1]), 16)
5557 encrypted = pow(payload, exponent, modulus)
5558 return '%x' % encrypted
5559
5560
5561 def pkcs1pad(data, length):
5562 """
5563 Padding input data with PKCS#1 scheme
5564
5565 @param {int[]} data input data
5566 @param {int} length target length
5567 @returns {int[]} padded data
5568 """
5569 if len(data) > length - 11:
5570 raise ValueError('Input data too long for PKCS#1 padding')
5571
5572 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5573 return [0, 2] + pseudo_random + [0] + data
5574
5575
5576 def encode_base_n(num, n, table=None):
5577 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5578 if not table:
5579 table = FULL_TABLE[:n]
5580
5581 if n > len(table):
5582 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5583
5584 if num == 0:
5585 return table[0]
5586
5587 ret = ''
5588 while num:
5589 ret = table[num % n] + ret
5590 num = num // n
5591 return ret
5592
5593
5594 def decode_packed_codes(code):
5595 mobj = re.search(PACKED_CODES_RE, code)
5596 obfuscated_code, base, count, symbols = mobj.groups()
5597 base = int(base)
5598 count = int(count)
5599 symbols = symbols.split('|')
5600 symbol_table = {}
5601
5602 while count:
5603 count -= 1
5604 base_n_count = encode_base_n(count, base)
5605 symbol_table[base_n_count] = symbols[count] or base_n_count
5606
5607 return re.sub(
5608 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5609 obfuscated_code)
5610
5611
5612 def caesar(s, alphabet, shift):
5613 if shift == 0:
5614 return s
5615 l = len(alphabet)
5616 return ''.join(
5617 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5618 for c in s)
5619
5620
5621 def rot47(s):
5622 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5623
5624
5625 def parse_m3u8_attributes(attrib):
5626 info = {}
5627 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5628 if val.startswith('"'):
5629 val = val[1:-1]
5630 info[key] = val
5631 return info
5632
5633
5634 def urshift(val, n):
5635 return val >> n if val >= 0 else (val + 0x100000000) >> n
5636
5637
5638 # Based on png2str() written by @gdkchan and improved by @yokrysty
5639 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5640 def decode_png(png_data):
5641 # Reference: https://www.w3.org/TR/PNG/
5642 header = png_data[8:]
5643
5644 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5645 raise IOError('Not a valid PNG file.')
5646
5647 int_map = {1: '>B', 2: '>H', 4: '>I'}
5648 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5649
5650 chunks = []
5651
5652 while header:
5653 length = unpack_integer(header[:4])
5654 header = header[4:]
5655
5656 chunk_type = header[:4]
5657 header = header[4:]
5658
5659 chunk_data = header[:length]
5660 header = header[length:]
5661
5662 header = header[4:] # Skip CRC
5663
5664 chunks.append({
5665 'type': chunk_type,
5666 'length': length,
5667 'data': chunk_data
5668 })
5669
5670 ihdr = chunks[0]['data']
5671
5672 width = unpack_integer(ihdr[:4])
5673 height = unpack_integer(ihdr[4:8])
5674
5675 idat = b''
5676
5677 for chunk in chunks:
5678 if chunk['type'] == b'IDAT':
5679 idat += chunk['data']
5680
5681 if not idat:
5682 raise IOError('Unable to read PNG data.')
5683
5684 decompressed_data = bytearray(zlib.decompress(idat))
5685
5686 stride = width * 3
5687 pixels = []
5688
5689 def _get_pixel(idx):
5690 x = idx % stride
5691 y = idx // stride
5692 return pixels[y][x]
5693
5694 for y in range(height):
5695 basePos = y * (1 + stride)
5696 filter_type = decompressed_data[basePos]
5697
5698 current_row = []
5699
5700 pixels.append(current_row)
5701
5702 for x in range(stride):
5703 color = decompressed_data[1 + basePos + x]
5704 basex = y * stride + x
5705 left = 0
5706 up = 0
5707
5708 if x > 2:
5709 left = _get_pixel(basex - 3)
5710 if y > 0:
5711 up = _get_pixel(basex - stride)
5712
5713 if filter_type == 1: # Sub
5714 color = (color + left) & 0xff
5715 elif filter_type == 2: # Up
5716 color = (color + up) & 0xff
5717 elif filter_type == 3: # Average
5718 color = (color + ((left + up) >> 1)) & 0xff
5719 elif filter_type == 4: # Paeth
5720 a = left
5721 b = up
5722 c = 0
5723
5724 if x > 2 and y > 0:
5725 c = _get_pixel(basex - stride - 3)
5726
5727 p = a + b - c
5728
5729 pa = abs(p - a)
5730 pb = abs(p - b)
5731 pc = abs(p - c)
5732
5733 if pa <= pb and pa <= pc:
5734 color = (color + a) & 0xff
5735 elif pb <= pc:
5736 color = (color + b) & 0xff
5737 else:
5738 color = (color + c) & 0xff
5739
5740 current_row.append(color)
5741
5742 return width, height, pixels
5743
5744
5745 def write_xattr(path, key, value):
5746 # This mess below finds the best xattr tool for the job
5747 try:
5748 # try the pyxattr module...
5749 import xattr
5750
5751 if hasattr(xattr, 'set'): # pyxattr
5752 # Unicode arguments are not supported in python-pyxattr until
5753 # version 0.5.0
5754 # See https://github.com/ytdl-org/youtube-dl/issues/5498
5755 pyxattr_required_version = '0.5.0'
5756 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5757 # TODO: fallback to CLI tools
5758 raise XAttrUnavailableError(
5759 'python-pyxattr is detected but is too old. '
5760 'yt-dlp requires %s or above while your version is %s. '
5761 'Falling back to other xattr implementations' % (
5762 pyxattr_required_version, xattr.__version__))
5763
5764 setxattr = xattr.set
5765 else: # xattr
5766 setxattr = xattr.setxattr
5767
5768 try:
5769 setxattr(path, key, value)
5770 except EnvironmentError as e:
5771 raise XAttrMetadataError(e.errno, e.strerror)
5772
5773 except ImportError:
5774 if compat_os_name == 'nt':
5775 # Write xattrs to NTFS Alternate Data Streams:
5776 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5777 assert ':' not in key
5778 assert os.path.exists(path)
5779
5780 ads_fn = path + ':' + key
5781 try:
5782 with open(ads_fn, 'wb') as f:
5783 f.write(value)
5784 except EnvironmentError as e:
5785 raise XAttrMetadataError(e.errno, e.strerror)
5786 else:
5787 user_has_setfattr = check_executable('setfattr', ['--version'])
5788 user_has_xattr = check_executable('xattr', ['-h'])
5789
5790 if user_has_setfattr or user_has_xattr:
5791
5792 value = value.decode('utf-8')
5793 if user_has_setfattr:
5794 executable = 'setfattr'
5795 opts = ['-n', key, '-v', value]
5796 elif user_has_xattr:
5797 executable = 'xattr'
5798 opts = ['-w', key, value]
5799
5800 cmd = ([encodeFilename(executable, True)]
5801 + [encodeArgument(o) for o in opts]
5802 + [encodeFilename(path, True)])
5803
5804 try:
5805 p = subprocess.Popen(
5806 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5807 except EnvironmentError as e:
5808 raise XAttrMetadataError(e.errno, e.strerror)
5809 stdout, stderr = process_communicate_or_kill(p)
5810 stderr = stderr.decode('utf-8', 'replace')
5811 if p.returncode != 0:
5812 raise XAttrMetadataError(p.returncode, stderr)
5813
5814 else:
5815 # On Unix, and can't find pyxattr, setfattr, or xattr.
5816 if sys.platform.startswith('linux'):
5817 raise XAttrUnavailableError(
5818 "Couldn't find a tool to set the xattrs. "
5819 "Install either the python 'pyxattr' or 'xattr' "
5820 "modules, or the GNU 'attr' package "
5821 "(which contains the 'setfattr' tool).")
5822 else:
5823 raise XAttrUnavailableError(
5824 "Couldn't find a tool to set the xattrs. "
5825 "Install either the python 'xattr' module, "
5826 "or the 'xattr' binary.")
5827
5828
5829 def random_birthday(year_field, month_field, day_field):
5830 start_date = datetime.date(1950, 1, 1)
5831 end_date = datetime.date(1995, 12, 31)
5832 offset = random.randint(0, (end_date - start_date).days)
5833 random_date = start_date + datetime.timedelta(offset)
5834 return {
5835 year_field: str(random_date.year),
5836 month_field: str(random_date.month),
5837 day_field: str(random_date.day),
5838 }
5839
5840
5841 # Templates for internet shortcut files, which are plain text files.
5842 DOT_URL_LINK_TEMPLATE = '''
5843 [InternetShortcut]
5844 URL=%(url)s
5845 '''.lstrip()
5846
5847 DOT_WEBLOC_LINK_TEMPLATE = '''
5848 <?xml version="1.0" encoding="UTF-8"?>
5849 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5850 <plist version="1.0">
5851 <dict>
5852 \t<key>URL</key>
5853 \t<string>%(url)s</string>
5854 </dict>
5855 </plist>
5856 '''.lstrip()
5857
5858 DOT_DESKTOP_LINK_TEMPLATE = '''
5859 [Desktop Entry]
5860 Encoding=UTF-8
5861 Name=%(filename)s
5862 Type=Link
5863 URL=%(url)s
5864 Icon=text-html
5865 '''.lstrip()
5866
5867
5868 def iri_to_uri(iri):
5869 """
5870 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5871
5872 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5873 """
5874
5875 iri_parts = compat_urllib_parse_urlparse(iri)
5876
5877 if '[' in iri_parts.netloc:
5878 raise ValueError('IPv6 URIs are not, yet, supported.')
5879 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5880
5881 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5882
5883 net_location = ''
5884 if iri_parts.username:
5885 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
5886 if iri_parts.password is not None:
5887 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
5888 net_location += '@'
5889
5890 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
5891 # The 'idna' encoding produces ASCII text.
5892 if iri_parts.port is not None and iri_parts.port != 80:
5893 net_location += ':' + str(iri_parts.port)
5894
5895 return compat_urllib_parse_urlunparse(
5896 (iri_parts.scheme,
5897 net_location,
5898
5899 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5900
5901 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5902 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5903
5904 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5905 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5906
5907 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5908
5909 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5910
5911
5912 def to_high_limit_path(path):
5913 if sys.platform in ['win32', 'cygwin']:
5914 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5915 return r'\\?\ '.rstrip() + os.path.abspath(path)
5916
5917 return path
5918
5919
5920 def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
5921 val = obj.get(field, default)
5922 if func and val not in ignore:
5923 val = func(val)
5924 return template % val if val not in ignore else default
5925
5926
5927 def clean_podcast_url(url):
5928 return re.sub(r'''(?x)
5929 (?:
5930 (?:
5931 chtbl\.com/track|
5932 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5933 play\.podtrac\.com
5934 )/[^/]+|
5935 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5936 flex\.acast\.com|
5937 pd(?:
5938 cn\.co| # https://podcorn.com/analytics-prefix/
5939 st\.fm # https://podsights.com/docs/
5940 )/e
5941 )/''', '', url)
5942
5943
5944 _HEX_TABLE = '0123456789abcdef'
5945
5946
5947 def random_uuidv4():
5948 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
5949
5950
5951 def make_dir(path, to_screen=None):
5952 try:
5953 dn = os.path.dirname(path)
5954 if dn and not os.path.exists(dn):
5955 os.makedirs(dn)
5956 return True
5957 except (OSError, IOError) as err:
5958 if callable(to_screen) is not None:
5959 to_screen('unable to create directory ' + error_to_compat_str(err))
5960 return False
5961
5962
5963 def get_executable_path():
5964 from zipimport import zipimporter
5965 if hasattr(sys, 'frozen'): # Running from PyInstaller
5966 path = os.path.dirname(sys.executable)
5967 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
5968 path = os.path.join(os.path.dirname(__file__), '../..')
5969 else:
5970 path = os.path.join(os.path.dirname(__file__), '..')
5971 return os.path.abspath(path)
5972
5973
5974 def load_plugins(name, type, namespace):
5975 plugin_info = [None]
5976 classes = []
5977 try:
5978 plugin_info = imp.find_module(
5979 name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
5980 plugins = imp.load_module(name, *plugin_info)
5981 for name in dir(plugins):
5982 if not name.endswith(type):
5983 continue
5984 klass = getattr(plugins, name)
5985 classes.append(klass)
5986 namespace[name] = klass
5987 except ImportError:
5988 pass
5989 finally:
5990 if plugin_info[0] is not None:
5991 plugin_info[0].close()
5992 return classes
5993
5994
5995 def traverse_dict(dictn, keys, casesense=True):
5996 if not isinstance(dictn, dict):
5997 return None
5998 first_key = keys[0]
5999 if not casesense:
6000 dictn = {key.lower(): val for key, val in dictn.items()}
6001 first_key = first_key.lower()
6002 value = dictn.get(first_key, None)
6003 return value if len(keys) < 2 else traverse_dict(value, keys[1:], casesense)