]> jfr.im git - yt-dlp.git/blob - yt_dlp/utils.py
[utils] Improve `extract_timezone`
[yt-dlp.git] / yt_dlp / utils.py
1 #!/usr/bin/env python3
2 # coding: utf-8
3
4 from __future__ import unicode_literals
5
6 import base64
7 import binascii
8 import calendar
9 import codecs
10 import collections
11 import contextlib
12 import ctypes
13 import datetime
14 import email.utils
15 import email.header
16 import errno
17 import functools
18 import gzip
19 import imp
20 import io
21 import itertools
22 import json
23 import locale
24 import math
25 import operator
26 import os
27 import platform
28 import random
29 import re
30 import socket
31 import ssl
32 import subprocess
33 import sys
34 import tempfile
35 import time
36 import traceback
37 import xml.etree.ElementTree
38 import zlib
39
40 from .compat import (
41 compat_HTMLParseError,
42 compat_HTMLParser,
43 compat_HTTPError,
44 compat_basestring,
45 compat_chr,
46 compat_cookiejar,
47 compat_ctypes_WINFUNCTYPE,
48 compat_etree_fromstring,
49 compat_expanduser,
50 compat_html_entities,
51 compat_html_entities_html5,
52 compat_http_client,
53 compat_integer_types,
54 compat_numeric_types,
55 compat_kwargs,
56 compat_os_name,
57 compat_parse_qs,
58 compat_shlex_quote,
59 compat_str,
60 compat_struct_pack,
61 compat_struct_unpack,
62 compat_urllib_error,
63 compat_urllib_parse,
64 compat_urllib_parse_urlencode,
65 compat_urllib_parse_urlparse,
66 compat_urllib_parse_urlunparse,
67 compat_urllib_parse_quote,
68 compat_urllib_parse_quote_plus,
69 compat_urllib_parse_unquote_plus,
70 compat_urllib_request,
71 compat_urlparse,
72 compat_xpath,
73 )
74
75 from .socks import (
76 ProxyType,
77 sockssocket,
78 )
79
80
81 def register_socks_protocols():
82 # "Register" SOCKS protocols
83 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
84 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
85 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
86 if scheme not in compat_urlparse.uses_netloc:
87 compat_urlparse.uses_netloc.append(scheme)
88
89
90 # This is not clearly defined otherwise
91 compiled_regex_type = type(re.compile(''))
92
93
94 def random_user_agent():
95 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
96 _CHROME_VERSIONS = (
97 '74.0.3729.129',
98 '76.0.3780.3',
99 '76.0.3780.2',
100 '74.0.3729.128',
101 '76.0.3780.1',
102 '76.0.3780.0',
103 '75.0.3770.15',
104 '74.0.3729.127',
105 '74.0.3729.126',
106 '76.0.3779.1',
107 '76.0.3779.0',
108 '75.0.3770.14',
109 '74.0.3729.125',
110 '76.0.3778.1',
111 '76.0.3778.0',
112 '75.0.3770.13',
113 '74.0.3729.124',
114 '74.0.3729.123',
115 '73.0.3683.121',
116 '76.0.3777.1',
117 '76.0.3777.0',
118 '75.0.3770.12',
119 '74.0.3729.122',
120 '76.0.3776.4',
121 '75.0.3770.11',
122 '74.0.3729.121',
123 '76.0.3776.3',
124 '76.0.3776.2',
125 '73.0.3683.120',
126 '74.0.3729.120',
127 '74.0.3729.119',
128 '74.0.3729.118',
129 '76.0.3776.1',
130 '76.0.3776.0',
131 '76.0.3775.5',
132 '75.0.3770.10',
133 '74.0.3729.117',
134 '76.0.3775.4',
135 '76.0.3775.3',
136 '74.0.3729.116',
137 '75.0.3770.9',
138 '76.0.3775.2',
139 '76.0.3775.1',
140 '76.0.3775.0',
141 '75.0.3770.8',
142 '74.0.3729.115',
143 '74.0.3729.114',
144 '76.0.3774.1',
145 '76.0.3774.0',
146 '75.0.3770.7',
147 '74.0.3729.113',
148 '74.0.3729.112',
149 '74.0.3729.111',
150 '76.0.3773.1',
151 '76.0.3773.0',
152 '75.0.3770.6',
153 '74.0.3729.110',
154 '74.0.3729.109',
155 '76.0.3772.1',
156 '76.0.3772.0',
157 '75.0.3770.5',
158 '74.0.3729.108',
159 '74.0.3729.107',
160 '76.0.3771.1',
161 '76.0.3771.0',
162 '75.0.3770.4',
163 '74.0.3729.106',
164 '74.0.3729.105',
165 '75.0.3770.3',
166 '74.0.3729.104',
167 '74.0.3729.103',
168 '74.0.3729.102',
169 '75.0.3770.2',
170 '74.0.3729.101',
171 '75.0.3770.1',
172 '75.0.3770.0',
173 '74.0.3729.100',
174 '75.0.3769.5',
175 '75.0.3769.4',
176 '74.0.3729.99',
177 '75.0.3769.3',
178 '75.0.3769.2',
179 '75.0.3768.6',
180 '74.0.3729.98',
181 '75.0.3769.1',
182 '75.0.3769.0',
183 '74.0.3729.97',
184 '73.0.3683.119',
185 '73.0.3683.118',
186 '74.0.3729.96',
187 '75.0.3768.5',
188 '75.0.3768.4',
189 '75.0.3768.3',
190 '75.0.3768.2',
191 '74.0.3729.95',
192 '74.0.3729.94',
193 '75.0.3768.1',
194 '75.0.3768.0',
195 '74.0.3729.93',
196 '74.0.3729.92',
197 '73.0.3683.117',
198 '74.0.3729.91',
199 '75.0.3766.3',
200 '74.0.3729.90',
201 '75.0.3767.2',
202 '75.0.3767.1',
203 '75.0.3767.0',
204 '74.0.3729.89',
205 '73.0.3683.116',
206 '75.0.3766.2',
207 '74.0.3729.88',
208 '75.0.3766.1',
209 '75.0.3766.0',
210 '74.0.3729.87',
211 '73.0.3683.115',
212 '74.0.3729.86',
213 '75.0.3765.1',
214 '75.0.3765.0',
215 '74.0.3729.85',
216 '73.0.3683.114',
217 '74.0.3729.84',
218 '75.0.3764.1',
219 '75.0.3764.0',
220 '74.0.3729.83',
221 '73.0.3683.113',
222 '75.0.3763.2',
223 '75.0.3761.4',
224 '74.0.3729.82',
225 '75.0.3763.1',
226 '75.0.3763.0',
227 '74.0.3729.81',
228 '73.0.3683.112',
229 '75.0.3762.1',
230 '75.0.3762.0',
231 '74.0.3729.80',
232 '75.0.3761.3',
233 '74.0.3729.79',
234 '73.0.3683.111',
235 '75.0.3761.2',
236 '74.0.3729.78',
237 '74.0.3729.77',
238 '75.0.3761.1',
239 '75.0.3761.0',
240 '73.0.3683.110',
241 '74.0.3729.76',
242 '74.0.3729.75',
243 '75.0.3760.0',
244 '74.0.3729.74',
245 '75.0.3759.8',
246 '75.0.3759.7',
247 '75.0.3759.6',
248 '74.0.3729.73',
249 '75.0.3759.5',
250 '74.0.3729.72',
251 '73.0.3683.109',
252 '75.0.3759.4',
253 '75.0.3759.3',
254 '74.0.3729.71',
255 '75.0.3759.2',
256 '74.0.3729.70',
257 '73.0.3683.108',
258 '74.0.3729.69',
259 '75.0.3759.1',
260 '75.0.3759.0',
261 '74.0.3729.68',
262 '73.0.3683.107',
263 '74.0.3729.67',
264 '75.0.3758.1',
265 '75.0.3758.0',
266 '74.0.3729.66',
267 '73.0.3683.106',
268 '74.0.3729.65',
269 '75.0.3757.1',
270 '75.0.3757.0',
271 '74.0.3729.64',
272 '73.0.3683.105',
273 '74.0.3729.63',
274 '75.0.3756.1',
275 '75.0.3756.0',
276 '74.0.3729.62',
277 '73.0.3683.104',
278 '75.0.3755.3',
279 '75.0.3755.2',
280 '73.0.3683.103',
281 '75.0.3755.1',
282 '75.0.3755.0',
283 '74.0.3729.61',
284 '73.0.3683.102',
285 '74.0.3729.60',
286 '75.0.3754.2',
287 '74.0.3729.59',
288 '75.0.3753.4',
289 '74.0.3729.58',
290 '75.0.3754.1',
291 '75.0.3754.0',
292 '74.0.3729.57',
293 '73.0.3683.101',
294 '75.0.3753.3',
295 '75.0.3752.2',
296 '75.0.3753.2',
297 '74.0.3729.56',
298 '75.0.3753.1',
299 '75.0.3753.0',
300 '74.0.3729.55',
301 '73.0.3683.100',
302 '74.0.3729.54',
303 '75.0.3752.1',
304 '75.0.3752.0',
305 '74.0.3729.53',
306 '73.0.3683.99',
307 '74.0.3729.52',
308 '75.0.3751.1',
309 '75.0.3751.0',
310 '74.0.3729.51',
311 '73.0.3683.98',
312 '74.0.3729.50',
313 '75.0.3750.0',
314 '74.0.3729.49',
315 '74.0.3729.48',
316 '74.0.3729.47',
317 '75.0.3749.3',
318 '74.0.3729.46',
319 '73.0.3683.97',
320 '75.0.3749.2',
321 '74.0.3729.45',
322 '75.0.3749.1',
323 '75.0.3749.0',
324 '74.0.3729.44',
325 '73.0.3683.96',
326 '74.0.3729.43',
327 '74.0.3729.42',
328 '75.0.3748.1',
329 '75.0.3748.0',
330 '74.0.3729.41',
331 '75.0.3747.1',
332 '73.0.3683.95',
333 '75.0.3746.4',
334 '74.0.3729.40',
335 '74.0.3729.39',
336 '75.0.3747.0',
337 '75.0.3746.3',
338 '75.0.3746.2',
339 '74.0.3729.38',
340 '75.0.3746.1',
341 '75.0.3746.0',
342 '74.0.3729.37',
343 '73.0.3683.94',
344 '75.0.3745.5',
345 '75.0.3745.4',
346 '75.0.3745.3',
347 '75.0.3745.2',
348 '74.0.3729.36',
349 '75.0.3745.1',
350 '75.0.3745.0',
351 '75.0.3744.2',
352 '74.0.3729.35',
353 '73.0.3683.93',
354 '74.0.3729.34',
355 '75.0.3744.1',
356 '75.0.3744.0',
357 '74.0.3729.33',
358 '73.0.3683.92',
359 '74.0.3729.32',
360 '74.0.3729.31',
361 '73.0.3683.91',
362 '75.0.3741.2',
363 '75.0.3740.5',
364 '74.0.3729.30',
365 '75.0.3741.1',
366 '75.0.3741.0',
367 '74.0.3729.29',
368 '75.0.3740.4',
369 '73.0.3683.90',
370 '74.0.3729.28',
371 '75.0.3740.3',
372 '73.0.3683.89',
373 '75.0.3740.2',
374 '74.0.3729.27',
375 '75.0.3740.1',
376 '75.0.3740.0',
377 '74.0.3729.26',
378 '73.0.3683.88',
379 '73.0.3683.87',
380 '74.0.3729.25',
381 '75.0.3739.1',
382 '75.0.3739.0',
383 '73.0.3683.86',
384 '74.0.3729.24',
385 '73.0.3683.85',
386 '75.0.3738.4',
387 '75.0.3738.3',
388 '75.0.3738.2',
389 '75.0.3738.1',
390 '75.0.3738.0',
391 '74.0.3729.23',
392 '73.0.3683.84',
393 '74.0.3729.22',
394 '74.0.3729.21',
395 '75.0.3737.1',
396 '75.0.3737.0',
397 '74.0.3729.20',
398 '73.0.3683.83',
399 '74.0.3729.19',
400 '75.0.3736.1',
401 '75.0.3736.0',
402 '74.0.3729.18',
403 '73.0.3683.82',
404 '74.0.3729.17',
405 '75.0.3735.1',
406 '75.0.3735.0',
407 '74.0.3729.16',
408 '73.0.3683.81',
409 '75.0.3734.1',
410 '75.0.3734.0',
411 '74.0.3729.15',
412 '73.0.3683.80',
413 '74.0.3729.14',
414 '75.0.3733.1',
415 '75.0.3733.0',
416 '75.0.3732.1',
417 '74.0.3729.13',
418 '74.0.3729.12',
419 '73.0.3683.79',
420 '74.0.3729.11',
421 '75.0.3732.0',
422 '74.0.3729.10',
423 '73.0.3683.78',
424 '74.0.3729.9',
425 '74.0.3729.8',
426 '74.0.3729.7',
427 '75.0.3731.3',
428 '75.0.3731.2',
429 '75.0.3731.0',
430 '74.0.3729.6',
431 '73.0.3683.77',
432 '73.0.3683.76',
433 '75.0.3730.5',
434 '75.0.3730.4',
435 '73.0.3683.75',
436 '74.0.3729.5',
437 '73.0.3683.74',
438 '75.0.3730.3',
439 '75.0.3730.2',
440 '74.0.3729.4',
441 '73.0.3683.73',
442 '73.0.3683.72',
443 '75.0.3730.1',
444 '75.0.3730.0',
445 '74.0.3729.3',
446 '73.0.3683.71',
447 '74.0.3729.2',
448 '73.0.3683.70',
449 '74.0.3729.1',
450 '74.0.3729.0',
451 '74.0.3726.4',
452 '73.0.3683.69',
453 '74.0.3726.3',
454 '74.0.3728.0',
455 '74.0.3726.2',
456 '73.0.3683.68',
457 '74.0.3726.1',
458 '74.0.3726.0',
459 '74.0.3725.4',
460 '73.0.3683.67',
461 '73.0.3683.66',
462 '74.0.3725.3',
463 '74.0.3725.2',
464 '74.0.3725.1',
465 '74.0.3724.8',
466 '74.0.3725.0',
467 '73.0.3683.65',
468 '74.0.3724.7',
469 '74.0.3724.6',
470 '74.0.3724.5',
471 '74.0.3724.4',
472 '74.0.3724.3',
473 '74.0.3724.2',
474 '74.0.3724.1',
475 '74.0.3724.0',
476 '73.0.3683.64',
477 '74.0.3723.1',
478 '74.0.3723.0',
479 '73.0.3683.63',
480 '74.0.3722.1',
481 '74.0.3722.0',
482 '73.0.3683.62',
483 '74.0.3718.9',
484 '74.0.3702.3',
485 '74.0.3721.3',
486 '74.0.3721.2',
487 '74.0.3721.1',
488 '74.0.3721.0',
489 '74.0.3720.6',
490 '73.0.3683.61',
491 '72.0.3626.122',
492 '73.0.3683.60',
493 '74.0.3720.5',
494 '72.0.3626.121',
495 '74.0.3718.8',
496 '74.0.3720.4',
497 '74.0.3720.3',
498 '74.0.3718.7',
499 '74.0.3720.2',
500 '74.0.3720.1',
501 '74.0.3720.0',
502 '74.0.3718.6',
503 '74.0.3719.5',
504 '73.0.3683.59',
505 '74.0.3718.5',
506 '74.0.3718.4',
507 '74.0.3719.4',
508 '74.0.3719.3',
509 '74.0.3719.2',
510 '74.0.3719.1',
511 '73.0.3683.58',
512 '74.0.3719.0',
513 '73.0.3683.57',
514 '73.0.3683.56',
515 '74.0.3718.3',
516 '73.0.3683.55',
517 '74.0.3718.2',
518 '74.0.3718.1',
519 '74.0.3718.0',
520 '73.0.3683.54',
521 '74.0.3717.2',
522 '73.0.3683.53',
523 '74.0.3717.1',
524 '74.0.3717.0',
525 '73.0.3683.52',
526 '74.0.3716.1',
527 '74.0.3716.0',
528 '73.0.3683.51',
529 '74.0.3715.1',
530 '74.0.3715.0',
531 '73.0.3683.50',
532 '74.0.3711.2',
533 '74.0.3714.2',
534 '74.0.3713.3',
535 '74.0.3714.1',
536 '74.0.3714.0',
537 '73.0.3683.49',
538 '74.0.3713.1',
539 '74.0.3713.0',
540 '72.0.3626.120',
541 '73.0.3683.48',
542 '74.0.3712.2',
543 '74.0.3712.1',
544 '74.0.3712.0',
545 '73.0.3683.47',
546 '72.0.3626.119',
547 '73.0.3683.46',
548 '74.0.3710.2',
549 '72.0.3626.118',
550 '74.0.3711.1',
551 '74.0.3711.0',
552 '73.0.3683.45',
553 '72.0.3626.117',
554 '74.0.3710.1',
555 '74.0.3710.0',
556 '73.0.3683.44',
557 '72.0.3626.116',
558 '74.0.3709.1',
559 '74.0.3709.0',
560 '74.0.3704.9',
561 '73.0.3683.43',
562 '72.0.3626.115',
563 '74.0.3704.8',
564 '74.0.3704.7',
565 '74.0.3708.0',
566 '74.0.3706.7',
567 '74.0.3704.6',
568 '73.0.3683.42',
569 '72.0.3626.114',
570 '74.0.3706.6',
571 '72.0.3626.113',
572 '74.0.3704.5',
573 '74.0.3706.5',
574 '74.0.3706.4',
575 '74.0.3706.3',
576 '74.0.3706.2',
577 '74.0.3706.1',
578 '74.0.3706.0',
579 '73.0.3683.41',
580 '72.0.3626.112',
581 '74.0.3705.1',
582 '74.0.3705.0',
583 '73.0.3683.40',
584 '72.0.3626.111',
585 '73.0.3683.39',
586 '74.0.3704.4',
587 '73.0.3683.38',
588 '74.0.3704.3',
589 '74.0.3704.2',
590 '74.0.3704.1',
591 '74.0.3704.0',
592 '73.0.3683.37',
593 '72.0.3626.110',
594 '72.0.3626.109',
595 '74.0.3703.3',
596 '74.0.3703.2',
597 '73.0.3683.36',
598 '74.0.3703.1',
599 '74.0.3703.0',
600 '73.0.3683.35',
601 '72.0.3626.108',
602 '74.0.3702.2',
603 '74.0.3699.3',
604 '74.0.3702.1',
605 '74.0.3702.0',
606 '73.0.3683.34',
607 '72.0.3626.107',
608 '73.0.3683.33',
609 '74.0.3701.1',
610 '74.0.3701.0',
611 '73.0.3683.32',
612 '73.0.3683.31',
613 '72.0.3626.105',
614 '74.0.3700.1',
615 '74.0.3700.0',
616 '73.0.3683.29',
617 '72.0.3626.103',
618 '74.0.3699.2',
619 '74.0.3699.1',
620 '74.0.3699.0',
621 '73.0.3683.28',
622 '72.0.3626.102',
623 '73.0.3683.27',
624 '73.0.3683.26',
625 '74.0.3698.0',
626 '74.0.3696.2',
627 '72.0.3626.101',
628 '73.0.3683.25',
629 '74.0.3696.1',
630 '74.0.3696.0',
631 '74.0.3694.8',
632 '72.0.3626.100',
633 '74.0.3694.7',
634 '74.0.3694.6',
635 '74.0.3694.5',
636 '74.0.3694.4',
637 '72.0.3626.99',
638 '72.0.3626.98',
639 '74.0.3694.3',
640 '73.0.3683.24',
641 '72.0.3626.97',
642 '72.0.3626.96',
643 '72.0.3626.95',
644 '73.0.3683.23',
645 '72.0.3626.94',
646 '73.0.3683.22',
647 '73.0.3683.21',
648 '72.0.3626.93',
649 '74.0.3694.2',
650 '72.0.3626.92',
651 '74.0.3694.1',
652 '74.0.3694.0',
653 '74.0.3693.6',
654 '73.0.3683.20',
655 '72.0.3626.91',
656 '74.0.3693.5',
657 '74.0.3693.4',
658 '74.0.3693.3',
659 '74.0.3693.2',
660 '73.0.3683.19',
661 '74.0.3693.1',
662 '74.0.3693.0',
663 '73.0.3683.18',
664 '72.0.3626.90',
665 '74.0.3692.1',
666 '74.0.3692.0',
667 '73.0.3683.17',
668 '72.0.3626.89',
669 '74.0.3687.3',
670 '74.0.3691.1',
671 '74.0.3691.0',
672 '73.0.3683.16',
673 '72.0.3626.88',
674 '72.0.3626.87',
675 '73.0.3683.15',
676 '74.0.3690.1',
677 '74.0.3690.0',
678 '73.0.3683.14',
679 '72.0.3626.86',
680 '73.0.3683.13',
681 '73.0.3683.12',
682 '74.0.3689.1',
683 '74.0.3689.0',
684 '73.0.3683.11',
685 '72.0.3626.85',
686 '73.0.3683.10',
687 '72.0.3626.84',
688 '73.0.3683.9',
689 '74.0.3688.1',
690 '74.0.3688.0',
691 '73.0.3683.8',
692 '72.0.3626.83',
693 '74.0.3687.2',
694 '74.0.3687.1',
695 '74.0.3687.0',
696 '73.0.3683.7',
697 '72.0.3626.82',
698 '74.0.3686.4',
699 '72.0.3626.81',
700 '74.0.3686.3',
701 '74.0.3686.2',
702 '74.0.3686.1',
703 '74.0.3686.0',
704 '73.0.3683.6',
705 '72.0.3626.80',
706 '74.0.3685.1',
707 '74.0.3685.0',
708 '73.0.3683.5',
709 '72.0.3626.79',
710 '74.0.3684.1',
711 '74.0.3684.0',
712 '73.0.3683.4',
713 '72.0.3626.78',
714 '72.0.3626.77',
715 '73.0.3683.3',
716 '73.0.3683.2',
717 '72.0.3626.76',
718 '73.0.3683.1',
719 '73.0.3683.0',
720 '72.0.3626.75',
721 '71.0.3578.141',
722 '73.0.3682.1',
723 '73.0.3682.0',
724 '72.0.3626.74',
725 '71.0.3578.140',
726 '73.0.3681.4',
727 '73.0.3681.3',
728 '73.0.3681.2',
729 '73.0.3681.1',
730 '73.0.3681.0',
731 '72.0.3626.73',
732 '71.0.3578.139',
733 '72.0.3626.72',
734 '72.0.3626.71',
735 '73.0.3680.1',
736 '73.0.3680.0',
737 '72.0.3626.70',
738 '71.0.3578.138',
739 '73.0.3678.2',
740 '73.0.3679.1',
741 '73.0.3679.0',
742 '72.0.3626.69',
743 '71.0.3578.137',
744 '73.0.3678.1',
745 '73.0.3678.0',
746 '71.0.3578.136',
747 '73.0.3677.1',
748 '73.0.3677.0',
749 '72.0.3626.68',
750 '72.0.3626.67',
751 '71.0.3578.135',
752 '73.0.3676.1',
753 '73.0.3676.0',
754 '73.0.3674.2',
755 '72.0.3626.66',
756 '71.0.3578.134',
757 '73.0.3674.1',
758 '73.0.3674.0',
759 '72.0.3626.65',
760 '71.0.3578.133',
761 '73.0.3673.2',
762 '73.0.3673.1',
763 '73.0.3673.0',
764 '72.0.3626.64',
765 '71.0.3578.132',
766 '72.0.3626.63',
767 '72.0.3626.62',
768 '72.0.3626.61',
769 '72.0.3626.60',
770 '73.0.3672.1',
771 '73.0.3672.0',
772 '72.0.3626.59',
773 '71.0.3578.131',
774 '73.0.3671.3',
775 '73.0.3671.2',
776 '73.0.3671.1',
777 '73.0.3671.0',
778 '72.0.3626.58',
779 '71.0.3578.130',
780 '73.0.3670.1',
781 '73.0.3670.0',
782 '72.0.3626.57',
783 '71.0.3578.129',
784 '73.0.3669.1',
785 '73.0.3669.0',
786 '72.0.3626.56',
787 '71.0.3578.128',
788 '73.0.3668.2',
789 '73.0.3668.1',
790 '73.0.3668.0',
791 '72.0.3626.55',
792 '71.0.3578.127',
793 '73.0.3667.2',
794 '73.0.3667.1',
795 '73.0.3667.0',
796 '72.0.3626.54',
797 '71.0.3578.126',
798 '73.0.3666.1',
799 '73.0.3666.0',
800 '72.0.3626.53',
801 '71.0.3578.125',
802 '73.0.3665.4',
803 '73.0.3665.3',
804 '72.0.3626.52',
805 '73.0.3665.2',
806 '73.0.3664.4',
807 '73.0.3665.1',
808 '73.0.3665.0',
809 '72.0.3626.51',
810 '71.0.3578.124',
811 '72.0.3626.50',
812 '73.0.3664.3',
813 '73.0.3664.2',
814 '73.0.3664.1',
815 '73.0.3664.0',
816 '73.0.3663.2',
817 '72.0.3626.49',
818 '71.0.3578.123',
819 '73.0.3663.1',
820 '73.0.3663.0',
821 '72.0.3626.48',
822 '71.0.3578.122',
823 '73.0.3662.1',
824 '73.0.3662.0',
825 '72.0.3626.47',
826 '71.0.3578.121',
827 '73.0.3661.1',
828 '72.0.3626.46',
829 '73.0.3661.0',
830 '72.0.3626.45',
831 '71.0.3578.120',
832 '73.0.3660.2',
833 '73.0.3660.1',
834 '73.0.3660.0',
835 '72.0.3626.44',
836 '71.0.3578.119',
837 '73.0.3659.1',
838 '73.0.3659.0',
839 '72.0.3626.43',
840 '71.0.3578.118',
841 '73.0.3658.1',
842 '73.0.3658.0',
843 '72.0.3626.42',
844 '71.0.3578.117',
845 '73.0.3657.1',
846 '73.0.3657.0',
847 '72.0.3626.41',
848 '71.0.3578.116',
849 '73.0.3656.1',
850 '73.0.3656.0',
851 '72.0.3626.40',
852 '71.0.3578.115',
853 '73.0.3655.1',
854 '73.0.3655.0',
855 '72.0.3626.39',
856 '71.0.3578.114',
857 '73.0.3654.1',
858 '73.0.3654.0',
859 '72.0.3626.38',
860 '71.0.3578.113',
861 '73.0.3653.1',
862 '73.0.3653.0',
863 '72.0.3626.37',
864 '71.0.3578.112',
865 '73.0.3652.1',
866 '73.0.3652.0',
867 '72.0.3626.36',
868 '71.0.3578.111',
869 '73.0.3651.1',
870 '73.0.3651.0',
871 '72.0.3626.35',
872 '71.0.3578.110',
873 '73.0.3650.1',
874 '73.0.3650.0',
875 '72.0.3626.34',
876 '71.0.3578.109',
877 '73.0.3649.1',
878 '73.0.3649.0',
879 '72.0.3626.33',
880 '71.0.3578.108',
881 '73.0.3648.2',
882 '73.0.3648.1',
883 '73.0.3648.0',
884 '72.0.3626.32',
885 '71.0.3578.107',
886 '73.0.3647.2',
887 '73.0.3647.1',
888 '73.0.3647.0',
889 '72.0.3626.31',
890 '71.0.3578.106',
891 '73.0.3635.3',
892 '73.0.3646.2',
893 '73.0.3646.1',
894 '73.0.3646.0',
895 '72.0.3626.30',
896 '71.0.3578.105',
897 '72.0.3626.29',
898 '73.0.3645.2',
899 '73.0.3645.1',
900 '73.0.3645.0',
901 '72.0.3626.28',
902 '71.0.3578.104',
903 '72.0.3626.27',
904 '72.0.3626.26',
905 '72.0.3626.25',
906 '72.0.3626.24',
907 '73.0.3644.0',
908 '73.0.3643.2',
909 '72.0.3626.23',
910 '71.0.3578.103',
911 '73.0.3643.1',
912 '73.0.3643.0',
913 '72.0.3626.22',
914 '71.0.3578.102',
915 '73.0.3642.1',
916 '73.0.3642.0',
917 '72.0.3626.21',
918 '71.0.3578.101',
919 '73.0.3641.1',
920 '73.0.3641.0',
921 '72.0.3626.20',
922 '71.0.3578.100',
923 '72.0.3626.19',
924 '73.0.3640.1',
925 '73.0.3640.0',
926 '72.0.3626.18',
927 '73.0.3639.1',
928 '71.0.3578.99',
929 '73.0.3639.0',
930 '72.0.3626.17',
931 '73.0.3638.2',
932 '72.0.3626.16',
933 '73.0.3638.1',
934 '73.0.3638.0',
935 '72.0.3626.15',
936 '71.0.3578.98',
937 '73.0.3635.2',
938 '71.0.3578.97',
939 '73.0.3637.1',
940 '73.0.3637.0',
941 '72.0.3626.14',
942 '71.0.3578.96',
943 '71.0.3578.95',
944 '72.0.3626.13',
945 '71.0.3578.94',
946 '73.0.3636.2',
947 '71.0.3578.93',
948 '73.0.3636.1',
949 '73.0.3636.0',
950 '72.0.3626.12',
951 '71.0.3578.92',
952 '73.0.3635.1',
953 '73.0.3635.0',
954 '72.0.3626.11',
955 '71.0.3578.91',
956 '73.0.3634.2',
957 '73.0.3634.1',
958 '73.0.3634.0',
959 '72.0.3626.10',
960 '71.0.3578.90',
961 '71.0.3578.89',
962 '73.0.3633.2',
963 '73.0.3633.1',
964 '73.0.3633.0',
965 '72.0.3610.4',
966 '72.0.3626.9',
967 '71.0.3578.88',
968 '73.0.3632.5',
969 '73.0.3632.4',
970 '73.0.3632.3',
971 '73.0.3632.2',
972 '73.0.3632.1',
973 '73.0.3632.0',
974 '72.0.3626.8',
975 '71.0.3578.87',
976 '73.0.3631.2',
977 '73.0.3631.1',
978 '73.0.3631.0',
979 '72.0.3626.7',
980 '71.0.3578.86',
981 '72.0.3626.6',
982 '73.0.3630.1',
983 '73.0.3630.0',
984 '72.0.3626.5',
985 '71.0.3578.85',
986 '72.0.3626.4',
987 '73.0.3628.3',
988 '73.0.3628.2',
989 '73.0.3629.1',
990 '73.0.3629.0',
991 '72.0.3626.3',
992 '71.0.3578.84',
993 '73.0.3628.1',
994 '73.0.3628.0',
995 '71.0.3578.83',
996 '73.0.3627.1',
997 '73.0.3627.0',
998 '72.0.3626.2',
999 '71.0.3578.82',
1000 '71.0.3578.81',
1001 '71.0.3578.80',
1002 '72.0.3626.1',
1003 '72.0.3626.0',
1004 '71.0.3578.79',
1005 '70.0.3538.124',
1006 '71.0.3578.78',
1007 '72.0.3623.4',
1008 '72.0.3625.2',
1009 '72.0.3625.1',
1010 '72.0.3625.0',
1011 '71.0.3578.77',
1012 '70.0.3538.123',
1013 '72.0.3624.4',
1014 '72.0.3624.3',
1015 '72.0.3624.2',
1016 '71.0.3578.76',
1017 '72.0.3624.1',
1018 '72.0.3624.0',
1019 '72.0.3623.3',
1020 '71.0.3578.75',
1021 '70.0.3538.122',
1022 '71.0.3578.74',
1023 '72.0.3623.2',
1024 '72.0.3610.3',
1025 '72.0.3623.1',
1026 '72.0.3623.0',
1027 '72.0.3622.3',
1028 '72.0.3622.2',
1029 '71.0.3578.73',
1030 '70.0.3538.121',
1031 '72.0.3622.1',
1032 '72.0.3622.0',
1033 '71.0.3578.72',
1034 '70.0.3538.120',
1035 '72.0.3621.1',
1036 '72.0.3621.0',
1037 '71.0.3578.71',
1038 '70.0.3538.119',
1039 '72.0.3620.1',
1040 '72.0.3620.0',
1041 '71.0.3578.70',
1042 '70.0.3538.118',
1043 '71.0.3578.69',
1044 '72.0.3619.1',
1045 '72.0.3619.0',
1046 '71.0.3578.68',
1047 '70.0.3538.117',
1048 '71.0.3578.67',
1049 '72.0.3618.1',
1050 '72.0.3618.0',
1051 '71.0.3578.66',
1052 '70.0.3538.116',
1053 '72.0.3617.1',
1054 '72.0.3617.0',
1055 '71.0.3578.65',
1056 '70.0.3538.115',
1057 '72.0.3602.3',
1058 '71.0.3578.64',
1059 '72.0.3616.1',
1060 '72.0.3616.0',
1061 '71.0.3578.63',
1062 '70.0.3538.114',
1063 '71.0.3578.62',
1064 '72.0.3615.1',
1065 '72.0.3615.0',
1066 '71.0.3578.61',
1067 '70.0.3538.113',
1068 '72.0.3614.1',
1069 '72.0.3614.0',
1070 '71.0.3578.60',
1071 '70.0.3538.112',
1072 '72.0.3613.1',
1073 '72.0.3613.0',
1074 '71.0.3578.59',
1075 '70.0.3538.111',
1076 '72.0.3612.2',
1077 '72.0.3612.1',
1078 '72.0.3612.0',
1079 '70.0.3538.110',
1080 '71.0.3578.58',
1081 '70.0.3538.109',
1082 '72.0.3611.2',
1083 '72.0.3611.1',
1084 '72.0.3611.0',
1085 '71.0.3578.57',
1086 '70.0.3538.108',
1087 '72.0.3610.2',
1088 '71.0.3578.56',
1089 '71.0.3578.55',
1090 '72.0.3610.1',
1091 '72.0.3610.0',
1092 '71.0.3578.54',
1093 '70.0.3538.107',
1094 '71.0.3578.53',
1095 '72.0.3609.3',
1096 '71.0.3578.52',
1097 '72.0.3609.2',
1098 '71.0.3578.51',
1099 '72.0.3608.5',
1100 '72.0.3609.1',
1101 '72.0.3609.0',
1102 '71.0.3578.50',
1103 '70.0.3538.106',
1104 '72.0.3608.4',
1105 '72.0.3608.3',
1106 '72.0.3608.2',
1107 '71.0.3578.49',
1108 '72.0.3608.1',
1109 '72.0.3608.0',
1110 '70.0.3538.105',
1111 '71.0.3578.48',
1112 '72.0.3607.1',
1113 '72.0.3607.0',
1114 '71.0.3578.47',
1115 '70.0.3538.104',
1116 '72.0.3606.2',
1117 '72.0.3606.1',
1118 '72.0.3606.0',
1119 '71.0.3578.46',
1120 '70.0.3538.103',
1121 '70.0.3538.102',
1122 '72.0.3605.3',
1123 '72.0.3605.2',
1124 '72.0.3605.1',
1125 '72.0.3605.0',
1126 '71.0.3578.45',
1127 '70.0.3538.101',
1128 '71.0.3578.44',
1129 '71.0.3578.43',
1130 '70.0.3538.100',
1131 '70.0.3538.99',
1132 '71.0.3578.42',
1133 '72.0.3604.1',
1134 '72.0.3604.0',
1135 '71.0.3578.41',
1136 '70.0.3538.98',
1137 '71.0.3578.40',
1138 '72.0.3603.2',
1139 '72.0.3603.1',
1140 '72.0.3603.0',
1141 '71.0.3578.39',
1142 '70.0.3538.97',
1143 '72.0.3602.2',
1144 '71.0.3578.38',
1145 '71.0.3578.37',
1146 '72.0.3602.1',
1147 '72.0.3602.0',
1148 '71.0.3578.36',
1149 '70.0.3538.96',
1150 '72.0.3601.1',
1151 '72.0.3601.0',
1152 '71.0.3578.35',
1153 '70.0.3538.95',
1154 '72.0.3600.1',
1155 '72.0.3600.0',
1156 '71.0.3578.34',
1157 '70.0.3538.94',
1158 '72.0.3599.3',
1159 '72.0.3599.2',
1160 '72.0.3599.1',
1161 '72.0.3599.0',
1162 '71.0.3578.33',
1163 '70.0.3538.93',
1164 '72.0.3598.1',
1165 '72.0.3598.0',
1166 '71.0.3578.32',
1167 '70.0.3538.87',
1168 '72.0.3597.1',
1169 '72.0.3597.0',
1170 '72.0.3596.2',
1171 '71.0.3578.31',
1172 '70.0.3538.86',
1173 '71.0.3578.30',
1174 '71.0.3578.29',
1175 '72.0.3596.1',
1176 '72.0.3596.0',
1177 '71.0.3578.28',
1178 '70.0.3538.85',
1179 '72.0.3595.2',
1180 '72.0.3591.3',
1181 '72.0.3595.1',
1182 '72.0.3595.0',
1183 '71.0.3578.27',
1184 '70.0.3538.84',
1185 '72.0.3594.1',
1186 '72.0.3594.0',
1187 '71.0.3578.26',
1188 '70.0.3538.83',
1189 '72.0.3593.2',
1190 '72.0.3593.1',
1191 '72.0.3593.0',
1192 '71.0.3578.25',
1193 '70.0.3538.82',
1194 '72.0.3589.3',
1195 '72.0.3592.2',
1196 '72.0.3592.1',
1197 '72.0.3592.0',
1198 '71.0.3578.24',
1199 '72.0.3589.2',
1200 '70.0.3538.81',
1201 '70.0.3538.80',
1202 '72.0.3591.2',
1203 '72.0.3591.1',
1204 '72.0.3591.0',
1205 '71.0.3578.23',
1206 '70.0.3538.79',
1207 '71.0.3578.22',
1208 '72.0.3590.1',
1209 '72.0.3590.0',
1210 '71.0.3578.21',
1211 '70.0.3538.78',
1212 '70.0.3538.77',
1213 '72.0.3589.1',
1214 '72.0.3589.0',
1215 '71.0.3578.20',
1216 '70.0.3538.76',
1217 '71.0.3578.19',
1218 '70.0.3538.75',
1219 '72.0.3588.1',
1220 '72.0.3588.0',
1221 '71.0.3578.18',
1222 '70.0.3538.74',
1223 '72.0.3586.2',
1224 '72.0.3587.0',
1225 '71.0.3578.17',
1226 '70.0.3538.73',
1227 '72.0.3586.1',
1228 '72.0.3586.0',
1229 '71.0.3578.16',
1230 '70.0.3538.72',
1231 '72.0.3585.1',
1232 '72.0.3585.0',
1233 '71.0.3578.15',
1234 '70.0.3538.71',
1235 '71.0.3578.14',
1236 '72.0.3584.1',
1237 '72.0.3584.0',
1238 '71.0.3578.13',
1239 '70.0.3538.70',
1240 '72.0.3583.2',
1241 '71.0.3578.12',
1242 '72.0.3583.1',
1243 '72.0.3583.0',
1244 '71.0.3578.11',
1245 '70.0.3538.69',
1246 '71.0.3578.10',
1247 '72.0.3582.0',
1248 '72.0.3581.4',
1249 '71.0.3578.9',
1250 '70.0.3538.67',
1251 '72.0.3581.3',
1252 '72.0.3581.2',
1253 '72.0.3581.1',
1254 '72.0.3581.0',
1255 '71.0.3578.8',
1256 '70.0.3538.66',
1257 '72.0.3580.1',
1258 '72.0.3580.0',
1259 '71.0.3578.7',
1260 '70.0.3538.65',
1261 '71.0.3578.6',
1262 '72.0.3579.1',
1263 '72.0.3579.0',
1264 '71.0.3578.5',
1265 '70.0.3538.64',
1266 '71.0.3578.4',
1267 '71.0.3578.3',
1268 '71.0.3578.2',
1269 '71.0.3578.1',
1270 '71.0.3578.0',
1271 '70.0.3538.63',
1272 '69.0.3497.128',
1273 '70.0.3538.62',
1274 '70.0.3538.61',
1275 '70.0.3538.60',
1276 '70.0.3538.59',
1277 '71.0.3577.1',
1278 '71.0.3577.0',
1279 '70.0.3538.58',
1280 '69.0.3497.127',
1281 '71.0.3576.2',
1282 '71.0.3576.1',
1283 '71.0.3576.0',
1284 '70.0.3538.57',
1285 '70.0.3538.56',
1286 '71.0.3575.2',
1287 '70.0.3538.55',
1288 '69.0.3497.126',
1289 '70.0.3538.54',
1290 '71.0.3575.1',
1291 '71.0.3575.0',
1292 '71.0.3574.1',
1293 '71.0.3574.0',
1294 '70.0.3538.53',
1295 '69.0.3497.125',
1296 '70.0.3538.52',
1297 '71.0.3573.1',
1298 '71.0.3573.0',
1299 '70.0.3538.51',
1300 '69.0.3497.124',
1301 '71.0.3572.1',
1302 '71.0.3572.0',
1303 '70.0.3538.50',
1304 '69.0.3497.123',
1305 '71.0.3571.2',
1306 '70.0.3538.49',
1307 '69.0.3497.122',
1308 '71.0.3571.1',
1309 '71.0.3571.0',
1310 '70.0.3538.48',
1311 '69.0.3497.121',
1312 '71.0.3570.1',
1313 '71.0.3570.0',
1314 '70.0.3538.47',
1315 '69.0.3497.120',
1316 '71.0.3568.2',
1317 '71.0.3569.1',
1318 '71.0.3569.0',
1319 '70.0.3538.46',
1320 '69.0.3497.119',
1321 '70.0.3538.45',
1322 '71.0.3568.1',
1323 '71.0.3568.0',
1324 '70.0.3538.44',
1325 '69.0.3497.118',
1326 '70.0.3538.43',
1327 '70.0.3538.42',
1328 '71.0.3567.1',
1329 '71.0.3567.0',
1330 '70.0.3538.41',
1331 '69.0.3497.117',
1332 '71.0.3566.1',
1333 '71.0.3566.0',
1334 '70.0.3538.40',
1335 '69.0.3497.116',
1336 '71.0.3565.1',
1337 '71.0.3565.0',
1338 '70.0.3538.39',
1339 '69.0.3497.115',
1340 '71.0.3564.1',
1341 '71.0.3564.0',
1342 '70.0.3538.38',
1343 '69.0.3497.114',
1344 '71.0.3563.0',
1345 '71.0.3562.2',
1346 '70.0.3538.37',
1347 '69.0.3497.113',
1348 '70.0.3538.36',
1349 '70.0.3538.35',
1350 '71.0.3562.1',
1351 '71.0.3562.0',
1352 '70.0.3538.34',
1353 '69.0.3497.112',
1354 '70.0.3538.33',
1355 '71.0.3561.1',
1356 '71.0.3561.0',
1357 '70.0.3538.32',
1358 '69.0.3497.111',
1359 '71.0.3559.6',
1360 '71.0.3560.1',
1361 '71.0.3560.0',
1362 '71.0.3559.5',
1363 '71.0.3559.4',
1364 '70.0.3538.31',
1365 '69.0.3497.110',
1366 '71.0.3559.3',
1367 '70.0.3538.30',
1368 '69.0.3497.109',
1369 '71.0.3559.2',
1370 '71.0.3559.1',
1371 '71.0.3559.0',
1372 '70.0.3538.29',
1373 '69.0.3497.108',
1374 '71.0.3558.2',
1375 '71.0.3558.1',
1376 '71.0.3558.0',
1377 '70.0.3538.28',
1378 '69.0.3497.107',
1379 '71.0.3557.2',
1380 '71.0.3557.1',
1381 '71.0.3557.0',
1382 '70.0.3538.27',
1383 '69.0.3497.106',
1384 '71.0.3554.4',
1385 '70.0.3538.26',
1386 '71.0.3556.1',
1387 '71.0.3556.0',
1388 '70.0.3538.25',
1389 '71.0.3554.3',
1390 '69.0.3497.105',
1391 '71.0.3554.2',
1392 '70.0.3538.24',
1393 '69.0.3497.104',
1394 '71.0.3555.2',
1395 '70.0.3538.23',
1396 '71.0.3555.1',
1397 '71.0.3555.0',
1398 '70.0.3538.22',
1399 '69.0.3497.103',
1400 '71.0.3554.1',
1401 '71.0.3554.0',
1402 '70.0.3538.21',
1403 '69.0.3497.102',
1404 '71.0.3553.3',
1405 '70.0.3538.20',
1406 '69.0.3497.101',
1407 '71.0.3553.2',
1408 '69.0.3497.100',
1409 '71.0.3553.1',
1410 '71.0.3553.0',
1411 '70.0.3538.19',
1412 '69.0.3497.99',
1413 '69.0.3497.98',
1414 '69.0.3497.97',
1415 '71.0.3552.6',
1416 '71.0.3552.5',
1417 '71.0.3552.4',
1418 '71.0.3552.3',
1419 '71.0.3552.2',
1420 '71.0.3552.1',
1421 '71.0.3552.0',
1422 '70.0.3538.18',
1423 '69.0.3497.96',
1424 '71.0.3551.3',
1425 '71.0.3551.2',
1426 '71.0.3551.1',
1427 '71.0.3551.0',
1428 '70.0.3538.17',
1429 '69.0.3497.95',
1430 '71.0.3550.3',
1431 '71.0.3550.2',
1432 '71.0.3550.1',
1433 '71.0.3550.0',
1434 '70.0.3538.16',
1435 '69.0.3497.94',
1436 '71.0.3549.1',
1437 '71.0.3549.0',
1438 '70.0.3538.15',
1439 '69.0.3497.93',
1440 '69.0.3497.92',
1441 '71.0.3548.1',
1442 '71.0.3548.0',
1443 '70.0.3538.14',
1444 '69.0.3497.91',
1445 '71.0.3547.1',
1446 '71.0.3547.0',
1447 '70.0.3538.13',
1448 '69.0.3497.90',
1449 '71.0.3546.2',
1450 '69.0.3497.89',
1451 '71.0.3546.1',
1452 '71.0.3546.0',
1453 '70.0.3538.12',
1454 '69.0.3497.88',
1455 '71.0.3545.4',
1456 '71.0.3545.3',
1457 '71.0.3545.2',
1458 '71.0.3545.1',
1459 '71.0.3545.0',
1460 '70.0.3538.11',
1461 '69.0.3497.87',
1462 '71.0.3544.5',
1463 '71.0.3544.4',
1464 '71.0.3544.3',
1465 '71.0.3544.2',
1466 '71.0.3544.1',
1467 '71.0.3544.0',
1468 '69.0.3497.86',
1469 '70.0.3538.10',
1470 '69.0.3497.85',
1471 '70.0.3538.9',
1472 '69.0.3497.84',
1473 '71.0.3543.4',
1474 '70.0.3538.8',
1475 '71.0.3543.3',
1476 '71.0.3543.2',
1477 '71.0.3543.1',
1478 '71.0.3543.0',
1479 '70.0.3538.7',
1480 '69.0.3497.83',
1481 '71.0.3542.2',
1482 '71.0.3542.1',
1483 '71.0.3542.0',
1484 '70.0.3538.6',
1485 '69.0.3497.82',
1486 '69.0.3497.81',
1487 '71.0.3541.1',
1488 '71.0.3541.0',
1489 '70.0.3538.5',
1490 '69.0.3497.80',
1491 '71.0.3540.1',
1492 '71.0.3540.0',
1493 '70.0.3538.4',
1494 '69.0.3497.79',
1495 '70.0.3538.3',
1496 '71.0.3539.1',
1497 '71.0.3539.0',
1498 '69.0.3497.78',
1499 '68.0.3440.134',
1500 '69.0.3497.77',
1501 '70.0.3538.2',
1502 '70.0.3538.1',
1503 '70.0.3538.0',
1504 '69.0.3497.76',
1505 '68.0.3440.133',
1506 '69.0.3497.75',
1507 '70.0.3537.2',
1508 '70.0.3537.1',
1509 '70.0.3537.0',
1510 '69.0.3497.74',
1511 '68.0.3440.132',
1512 '70.0.3536.0',
1513 '70.0.3535.5',
1514 '70.0.3535.4',
1515 '70.0.3535.3',
1516 '69.0.3497.73',
1517 '68.0.3440.131',
1518 '70.0.3532.8',
1519 '70.0.3532.7',
1520 '69.0.3497.72',
1521 '69.0.3497.71',
1522 '70.0.3535.2',
1523 '70.0.3535.1',
1524 '70.0.3535.0',
1525 '69.0.3497.70',
1526 '68.0.3440.130',
1527 '69.0.3497.69',
1528 '68.0.3440.129',
1529 '70.0.3534.4',
1530 '70.0.3534.3',
1531 '70.0.3534.2',
1532 '70.0.3534.1',
1533 '70.0.3534.0',
1534 '69.0.3497.68',
1535 '68.0.3440.128',
1536 '70.0.3533.2',
1537 '70.0.3533.1',
1538 '70.0.3533.0',
1539 '69.0.3497.67',
1540 '68.0.3440.127',
1541 '70.0.3532.6',
1542 '70.0.3532.5',
1543 '70.0.3532.4',
1544 '69.0.3497.66',
1545 '68.0.3440.126',
1546 '70.0.3532.3',
1547 '70.0.3532.2',
1548 '70.0.3532.1',
1549 '69.0.3497.60',
1550 '69.0.3497.65',
1551 '69.0.3497.64',
1552 '70.0.3532.0',
1553 '70.0.3531.0',
1554 '70.0.3530.4',
1555 '70.0.3530.3',
1556 '70.0.3530.2',
1557 '69.0.3497.58',
1558 '68.0.3440.125',
1559 '69.0.3497.57',
1560 '69.0.3497.56',
1561 '69.0.3497.55',
1562 '69.0.3497.54',
1563 '70.0.3530.1',
1564 '70.0.3530.0',
1565 '69.0.3497.53',
1566 '68.0.3440.124',
1567 '69.0.3497.52',
1568 '70.0.3529.3',
1569 '70.0.3529.2',
1570 '70.0.3529.1',
1571 '70.0.3529.0',
1572 '69.0.3497.51',
1573 '70.0.3528.4',
1574 '68.0.3440.123',
1575 '70.0.3528.3',
1576 '70.0.3528.2',
1577 '70.0.3528.1',
1578 '70.0.3528.0',
1579 '69.0.3497.50',
1580 '68.0.3440.122',
1581 '70.0.3527.1',
1582 '70.0.3527.0',
1583 '69.0.3497.49',
1584 '68.0.3440.121',
1585 '70.0.3526.1',
1586 '70.0.3526.0',
1587 '68.0.3440.120',
1588 '69.0.3497.48',
1589 '69.0.3497.47',
1590 '68.0.3440.119',
1591 '68.0.3440.118',
1592 '70.0.3525.5',
1593 '70.0.3525.4',
1594 '70.0.3525.3',
1595 '68.0.3440.117',
1596 '69.0.3497.46',
1597 '70.0.3525.2',
1598 '70.0.3525.1',
1599 '70.0.3525.0',
1600 '69.0.3497.45',
1601 '68.0.3440.116',
1602 '70.0.3524.4',
1603 '70.0.3524.3',
1604 '69.0.3497.44',
1605 '70.0.3524.2',
1606 '70.0.3524.1',
1607 '70.0.3524.0',
1608 '70.0.3523.2',
1609 '69.0.3497.43',
1610 '68.0.3440.115',
1611 '70.0.3505.9',
1612 '69.0.3497.42',
1613 '70.0.3505.8',
1614 '70.0.3523.1',
1615 '70.0.3523.0',
1616 '69.0.3497.41',
1617 '68.0.3440.114',
1618 '70.0.3505.7',
1619 '69.0.3497.40',
1620 '70.0.3522.1',
1621 '70.0.3522.0',
1622 '70.0.3521.2',
1623 '69.0.3497.39',
1624 '68.0.3440.113',
1625 '70.0.3505.6',
1626 '70.0.3521.1',
1627 '70.0.3521.0',
1628 '69.0.3497.38',
1629 '68.0.3440.112',
1630 '70.0.3520.1',
1631 '70.0.3520.0',
1632 '69.0.3497.37',
1633 '68.0.3440.111',
1634 '70.0.3519.3',
1635 '70.0.3519.2',
1636 '70.0.3519.1',
1637 '70.0.3519.0',
1638 '69.0.3497.36',
1639 '68.0.3440.110',
1640 '70.0.3518.1',
1641 '70.0.3518.0',
1642 '69.0.3497.35',
1643 '69.0.3497.34',
1644 '68.0.3440.109',
1645 '70.0.3517.1',
1646 '70.0.3517.0',
1647 '69.0.3497.33',
1648 '68.0.3440.108',
1649 '69.0.3497.32',
1650 '70.0.3516.3',
1651 '70.0.3516.2',
1652 '70.0.3516.1',
1653 '70.0.3516.0',
1654 '69.0.3497.31',
1655 '68.0.3440.107',
1656 '70.0.3515.4',
1657 '68.0.3440.106',
1658 '70.0.3515.3',
1659 '70.0.3515.2',
1660 '70.0.3515.1',
1661 '70.0.3515.0',
1662 '69.0.3497.30',
1663 '68.0.3440.105',
1664 '68.0.3440.104',
1665 '70.0.3514.2',
1666 '70.0.3514.1',
1667 '70.0.3514.0',
1668 '69.0.3497.29',
1669 '68.0.3440.103',
1670 '70.0.3513.1',
1671 '70.0.3513.0',
1672 '69.0.3497.28',
1673 )
1674 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1675
1676
1677 std_headers = {
1678 'User-Agent': random_user_agent(),
1679 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1680 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1681 'Accept-Encoding': 'gzip, deflate',
1682 'Accept-Language': 'en-us,en;q=0.5',
1683 }
1684
1685
1686 USER_AGENTS = {
1687 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1688 }
1689
1690
1691 NO_DEFAULT = object()
1692
1693 ENGLISH_MONTH_NAMES = [
1694 'January', 'February', 'March', 'April', 'May', 'June',
1695 'July', 'August', 'September', 'October', 'November', 'December']
1696
1697 MONTH_NAMES = {
1698 'en': ENGLISH_MONTH_NAMES,
1699 'fr': [
1700 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1701 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1702 }
1703
1704 KNOWN_EXTENSIONS = (
1705 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1706 'flv', 'f4v', 'f4a', 'f4b',
1707 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1708 'mkv', 'mka', 'mk3d',
1709 'avi', 'divx',
1710 'mov',
1711 'asf', 'wmv', 'wma',
1712 '3gp', '3g2',
1713 'mp3',
1714 'flac',
1715 'ape',
1716 'wav',
1717 'f4f', 'f4m', 'm3u8', 'smil')
1718
1719 # needed for sanitizing filenames in restricted mode
1720 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1721 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1722 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1723
1724 DATE_FORMATS = (
1725 '%d %B %Y',
1726 '%d %b %Y',
1727 '%B %d %Y',
1728 '%B %dst %Y',
1729 '%B %dnd %Y',
1730 '%B %drd %Y',
1731 '%B %dth %Y',
1732 '%b %d %Y',
1733 '%b %dst %Y',
1734 '%b %dnd %Y',
1735 '%b %drd %Y',
1736 '%b %dth %Y',
1737 '%b %dst %Y %I:%M',
1738 '%b %dnd %Y %I:%M',
1739 '%b %drd %Y %I:%M',
1740 '%b %dth %Y %I:%M',
1741 '%Y %m %d',
1742 '%Y-%m-%d',
1743 '%Y.%m.%d.',
1744 '%Y/%m/%d',
1745 '%Y/%m/%d %H:%M',
1746 '%Y/%m/%d %H:%M:%S',
1747 '%Y%m%d%H%M',
1748 '%Y%m%d%H%M%S',
1749 '%Y-%m-%d %H:%M',
1750 '%Y-%m-%d %H:%M:%S',
1751 '%Y-%m-%d %H:%M:%S.%f',
1752 '%Y-%m-%d %H:%M:%S:%f',
1753 '%d.%m.%Y %H:%M',
1754 '%d.%m.%Y %H.%M',
1755 '%Y-%m-%dT%H:%M:%SZ',
1756 '%Y-%m-%dT%H:%M:%S.%fZ',
1757 '%Y-%m-%dT%H:%M:%S.%f0Z',
1758 '%Y-%m-%dT%H:%M:%S',
1759 '%Y-%m-%dT%H:%M:%S.%f',
1760 '%Y-%m-%dT%H:%M',
1761 '%b %d %Y at %H:%M',
1762 '%b %d %Y at %H:%M:%S',
1763 '%B %d %Y at %H:%M',
1764 '%B %d %Y at %H:%M:%S',
1765 )
1766
1767 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1768 DATE_FORMATS_DAY_FIRST.extend([
1769 '%d-%m-%Y',
1770 '%d.%m.%Y',
1771 '%d.%m.%y',
1772 '%d/%m/%Y',
1773 '%d/%m/%y',
1774 '%d/%m/%Y %H:%M:%S',
1775 ])
1776
1777 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1778 DATE_FORMATS_MONTH_FIRST.extend([
1779 '%m-%d-%Y',
1780 '%m.%d.%Y',
1781 '%m/%d/%Y',
1782 '%m/%d/%y',
1783 '%m/%d/%Y %H:%M:%S',
1784 ])
1785
1786 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1787 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1788
1789
1790 def preferredencoding():
1791 """Get preferred encoding.
1792
1793 Returns the best encoding scheme for the system, based on
1794 locale.getpreferredencoding() and some further tweaks.
1795 """
1796 try:
1797 pref = locale.getpreferredencoding()
1798 'TEST'.encode(pref)
1799 except Exception:
1800 pref = 'UTF-8'
1801
1802 return pref
1803
1804
1805 def write_json_file(obj, fn):
1806 """ Encode obj as JSON and write it to fn, atomically if possible """
1807
1808 fn = encodeFilename(fn)
1809 if sys.version_info < (3, 0) and sys.platform != 'win32':
1810 encoding = get_filesystem_encoding()
1811 # os.path.basename returns a bytes object, but NamedTemporaryFile
1812 # will fail if the filename contains non ascii characters unless we
1813 # use a unicode object
1814 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1815 # the same for os.path.dirname
1816 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1817 else:
1818 path_basename = os.path.basename
1819 path_dirname = os.path.dirname
1820
1821 args = {
1822 'suffix': '.tmp',
1823 'prefix': path_basename(fn) + '.',
1824 'dir': path_dirname(fn),
1825 'delete': False,
1826 }
1827
1828 # In Python 2.x, json.dump expects a bytestream.
1829 # In Python 3.x, it writes to a character stream
1830 if sys.version_info < (3, 0):
1831 args['mode'] = 'wb'
1832 else:
1833 args.update({
1834 'mode': 'w',
1835 'encoding': 'utf-8',
1836 })
1837
1838 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1839
1840 try:
1841 with tf:
1842 json.dump(obj, tf)
1843 if sys.platform == 'win32':
1844 # Need to remove existing file on Windows, else os.rename raises
1845 # WindowsError or FileExistsError.
1846 try:
1847 os.unlink(fn)
1848 except OSError:
1849 pass
1850 try:
1851 mask = os.umask(0)
1852 os.umask(mask)
1853 os.chmod(tf.name, 0o666 & ~mask)
1854 except OSError:
1855 pass
1856 os.rename(tf.name, fn)
1857 except Exception:
1858 try:
1859 os.remove(tf.name)
1860 except OSError:
1861 pass
1862 raise
1863
1864
1865 if sys.version_info >= (2, 7):
1866 def find_xpath_attr(node, xpath, key, val=None):
1867 """ Find the xpath xpath[@key=val] """
1868 assert re.match(r'^[a-zA-Z_-]+$', key)
1869 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1870 return node.find(expr)
1871 else:
1872 def find_xpath_attr(node, xpath, key, val=None):
1873 for f in node.findall(compat_xpath(xpath)):
1874 if key not in f.attrib:
1875 continue
1876 if val is None or f.attrib.get(key) == val:
1877 return f
1878 return None
1879
1880 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1881 # the namespace parameter
1882
1883
1884 def xpath_with_ns(path, ns_map):
1885 components = [c.split(':') for c in path.split('/')]
1886 replaced = []
1887 for c in components:
1888 if len(c) == 1:
1889 replaced.append(c[0])
1890 else:
1891 ns, tag = c
1892 replaced.append('{%s}%s' % (ns_map[ns], tag))
1893 return '/'.join(replaced)
1894
1895
1896 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1897 def _find_xpath(xpath):
1898 return node.find(compat_xpath(xpath))
1899
1900 if isinstance(xpath, (str, compat_str)):
1901 n = _find_xpath(xpath)
1902 else:
1903 for xp in xpath:
1904 n = _find_xpath(xp)
1905 if n is not None:
1906 break
1907
1908 if n is None:
1909 if default is not NO_DEFAULT:
1910 return default
1911 elif fatal:
1912 name = xpath if name is None else name
1913 raise ExtractorError('Could not find XML element %s' % name)
1914 else:
1915 return None
1916 return n
1917
1918
1919 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1920 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1921 if n is None or n == default:
1922 return n
1923 if n.text is None:
1924 if default is not NO_DEFAULT:
1925 return default
1926 elif fatal:
1927 name = xpath if name is None else name
1928 raise ExtractorError('Could not find XML element\'s text %s' % name)
1929 else:
1930 return None
1931 return n.text
1932
1933
1934 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1935 n = find_xpath_attr(node, xpath, key)
1936 if n is None:
1937 if default is not NO_DEFAULT:
1938 return default
1939 elif fatal:
1940 name = '%s[@%s]' % (xpath, key) if name is None else name
1941 raise ExtractorError('Could not find XML attribute %s' % name)
1942 else:
1943 return None
1944 return n.attrib[key]
1945
1946
1947 def get_element_by_id(id, html):
1948 """Return the content of the tag with the specified ID in the passed HTML document"""
1949 return get_element_by_attribute('id', id, html)
1950
1951
1952 def get_element_by_class(class_name, html):
1953 """Return the content of the first tag with the specified class in the passed HTML document"""
1954 retval = get_elements_by_class(class_name, html)
1955 return retval[0] if retval else None
1956
1957
1958 def get_element_by_attribute(attribute, value, html, escape_value=True):
1959 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1960 return retval[0] if retval else None
1961
1962
1963 def get_elements_by_class(class_name, html):
1964 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1965 return get_elements_by_attribute(
1966 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1967 html, escape_value=False)
1968
1969
1970 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1971 """Return the content of the tag with the specified attribute in the passed HTML document"""
1972
1973 value = re.escape(value) if escape_value else value
1974
1975 retlist = []
1976 for m in re.finditer(r'''(?xs)
1977 <([a-zA-Z0-9:._-]+)
1978 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1979 \s+%s=['"]?%s['"]?
1980 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1981 \s*>
1982 (?P<content>.*?)
1983 </\1>
1984 ''' % (re.escape(attribute), value), html):
1985 res = m.group('content')
1986
1987 if res.startswith('"') or res.startswith("'"):
1988 res = res[1:-1]
1989
1990 retlist.append(unescapeHTML(res))
1991
1992 return retlist
1993
1994
1995 class HTMLAttributeParser(compat_HTMLParser):
1996 """Trivial HTML parser to gather the attributes for a single element"""
1997
1998 def __init__(self):
1999 self.attrs = {}
2000 compat_HTMLParser.__init__(self)
2001
2002 def handle_starttag(self, tag, attrs):
2003 self.attrs = dict(attrs)
2004
2005
2006 def extract_attributes(html_element):
2007 """Given a string for an HTML element such as
2008 <el
2009 a="foo" B="bar" c="&98;az" d=boz
2010 empty= noval entity="&amp;"
2011 sq='"' dq="'"
2012 >
2013 Decode and return a dictionary of attributes.
2014 {
2015 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2016 'empty': '', 'noval': None, 'entity': '&',
2017 'sq': '"', 'dq': '\''
2018 }.
2019 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2020 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2021 """
2022 parser = HTMLAttributeParser()
2023 try:
2024 parser.feed(html_element)
2025 parser.close()
2026 # Older Python may throw HTMLParseError in case of malformed HTML
2027 except compat_HTMLParseError:
2028 pass
2029 return parser.attrs
2030
2031
2032 def clean_html(html):
2033 """Clean an HTML snippet into a readable string"""
2034
2035 if html is None: # Convenience for sanitizing descriptions etc.
2036 return html
2037
2038 # Newline vs <br />
2039 html = html.replace('\n', ' ')
2040 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2041 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2042 # Strip html tags
2043 html = re.sub('<.*?>', '', html)
2044 # Replace html entities
2045 html = unescapeHTML(html)
2046 return html.strip()
2047
2048
2049 def sanitize_open(filename, open_mode):
2050 """Try to open the given filename, and slightly tweak it if this fails.
2051
2052 Attempts to open the given filename. If this fails, it tries to change
2053 the filename slightly, step by step, until it's either able to open it
2054 or it fails and raises a final exception, like the standard open()
2055 function.
2056
2057 It returns the tuple (stream, definitive_file_name).
2058 """
2059 try:
2060 if filename == '-':
2061 if sys.platform == 'win32':
2062 import msvcrt
2063 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2064 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2065 stream = open(encodeFilename(filename), open_mode)
2066 return (stream, filename)
2067 except (IOError, OSError) as err:
2068 if err.errno in (errno.EACCES,):
2069 raise
2070
2071 # In case of error, try to remove win32 forbidden chars
2072 alt_filename = sanitize_path(filename)
2073 if alt_filename == filename:
2074 raise
2075 else:
2076 # An exception here should be caught in the caller
2077 stream = open(encodeFilename(alt_filename), open_mode)
2078 return (stream, alt_filename)
2079
2080
2081 def timeconvert(timestr):
2082 """Convert RFC 2822 defined time string into system timestamp"""
2083 timestamp = None
2084 timetuple = email.utils.parsedate_tz(timestr)
2085 if timetuple is not None:
2086 timestamp = email.utils.mktime_tz(timetuple)
2087 return timestamp
2088
2089
2090 def sanitize_filename(s, restricted=False, is_id=False):
2091 """Sanitizes a string so it could be used as part of a filename.
2092 If restricted is set, use a stricter subset of allowed characters.
2093 Set is_id if this is not an arbitrary string, but an ID that should be kept
2094 if possible.
2095 """
2096 def replace_insane(char):
2097 if restricted and char in ACCENT_CHARS:
2098 return ACCENT_CHARS[char]
2099 if char == '?' or ord(char) < 32 or ord(char) == 127:
2100 return ''
2101 elif char == '"':
2102 return '' if restricted else '\''
2103 elif char == ':':
2104 return '_-' if restricted else ' -'
2105 elif char in '\\/|*<>':
2106 return '_'
2107 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2108 return '_'
2109 if restricted and ord(char) > 127:
2110 return '_'
2111 return char
2112
2113 if s == '':
2114 return ''
2115 # Handle timestamps
2116 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2117 result = ''.join(map(replace_insane, s))
2118 if not is_id:
2119 while '__' in result:
2120 result = result.replace('__', '_')
2121 result = result.strip('_')
2122 # Common case of "Foreign band name - English song title"
2123 if restricted and result.startswith('-_'):
2124 result = result[2:]
2125 if result.startswith('-'):
2126 result = '_' + result[len('-'):]
2127 result = result.lstrip('.')
2128 if not result:
2129 result = '_'
2130 return result
2131
2132
2133 def sanitize_path(s, force=False):
2134 """Sanitizes and normalizes path on Windows"""
2135 if sys.platform == 'win32':
2136 force = False
2137 drive_or_unc, _ = os.path.splitdrive(s)
2138 if sys.version_info < (2, 7) and not drive_or_unc:
2139 drive_or_unc, _ = os.path.splitunc(s)
2140 elif force:
2141 drive_or_unc = ''
2142 else:
2143 return s
2144
2145 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2146 if drive_or_unc:
2147 norm_path.pop(0)
2148 sanitized_path = [
2149 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2150 for path_part in norm_path]
2151 if drive_or_unc:
2152 sanitized_path.insert(0, drive_or_unc + os.path.sep)
2153 elif force and s[0] == os.path.sep:
2154 sanitized_path.insert(0, os.path.sep)
2155 return os.path.join(*sanitized_path)
2156
2157
2158 def sanitize_url(url):
2159 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2160 # the number of unwanted failures due to missing protocol
2161 if url.startswith('//'):
2162 return 'http:%s' % url
2163 # Fix some common typos seen so far
2164 COMMON_TYPOS = (
2165 # https://github.com/ytdl-org/youtube-dl/issues/15649
2166 (r'^httpss://', r'https://'),
2167 # https://bx1.be/lives/direct-tv/
2168 (r'^rmtp([es]?)://', r'rtmp\1://'),
2169 )
2170 for mistake, fixup in COMMON_TYPOS:
2171 if re.match(mistake, url):
2172 return re.sub(mistake, fixup, url)
2173 return url
2174
2175
2176 def extract_basic_auth(url):
2177 parts = compat_urlparse.urlsplit(url)
2178 if parts.username is None:
2179 return url, None
2180 url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2181 parts.hostname if parts.port is None
2182 else '%s:%d' % (parts.hostname, parts.port))))
2183 auth_payload = base64.b64encode(
2184 ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2185 return url, 'Basic ' + auth_payload.decode('utf-8')
2186
2187
2188 def sanitized_Request(url, *args, **kwargs):
2189 url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
2190 if auth_header is not None:
2191 headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2192 headers['Authorization'] = auth_header
2193 return compat_urllib_request.Request(url, *args, **kwargs)
2194
2195
2196 def expand_path(s):
2197 """Expand shell variables and ~"""
2198 return os.path.expandvars(compat_expanduser(s))
2199
2200
2201 def orderedSet(iterable):
2202 """ Remove all duplicates from the input iterable """
2203 res = []
2204 for el in iterable:
2205 if el not in res:
2206 res.append(el)
2207 return res
2208
2209
2210 def _htmlentity_transform(entity_with_semicolon):
2211 """Transforms an HTML entity to a character."""
2212 entity = entity_with_semicolon[:-1]
2213
2214 # Known non-numeric HTML entity
2215 if entity in compat_html_entities.name2codepoint:
2216 return compat_chr(compat_html_entities.name2codepoint[entity])
2217
2218 # TODO: HTML5 allows entities without a semicolon. For example,
2219 # '&Eacuteric' should be decoded as 'Éric'.
2220 if entity_with_semicolon in compat_html_entities_html5:
2221 return compat_html_entities_html5[entity_with_semicolon]
2222
2223 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2224 if mobj is not None:
2225 numstr = mobj.group(1)
2226 if numstr.startswith('x'):
2227 base = 16
2228 numstr = '0%s' % numstr
2229 else:
2230 base = 10
2231 # See https://github.com/ytdl-org/youtube-dl/issues/7518
2232 try:
2233 return compat_chr(int(numstr, base))
2234 except ValueError:
2235 pass
2236
2237 # Unknown entity in name, return its literal representation
2238 return '&%s;' % entity
2239
2240
2241 def unescapeHTML(s):
2242 if s is None:
2243 return None
2244 assert type(s) == compat_str
2245
2246 return re.sub(
2247 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2248
2249
2250 def escapeHTML(text):
2251 return (
2252 text
2253 .replace('&', '&amp;')
2254 .replace('<', '&lt;')
2255 .replace('>', '&gt;')
2256 .replace('"', '&quot;')
2257 .replace("'", '&#39;')
2258 )
2259
2260
2261 def process_communicate_or_kill(p, *args, **kwargs):
2262 try:
2263 return p.communicate(*args, **kwargs)
2264 except BaseException: # Including KeyboardInterrupt
2265 p.kill()
2266 p.wait()
2267 raise
2268
2269
2270 def get_subprocess_encoding():
2271 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2272 # For subprocess calls, encode with locale encoding
2273 # Refer to http://stackoverflow.com/a/9951851/35070
2274 encoding = preferredencoding()
2275 else:
2276 encoding = sys.getfilesystemencoding()
2277 if encoding is None:
2278 encoding = 'utf-8'
2279 return encoding
2280
2281
2282 def encodeFilename(s, for_subprocess=False):
2283 """
2284 @param s The name of the file
2285 """
2286
2287 assert type(s) == compat_str
2288
2289 # Python 3 has a Unicode API
2290 if sys.version_info >= (3, 0):
2291 return s
2292
2293 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2294 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2295 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2296 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2297 return s
2298
2299 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2300 if sys.platform.startswith('java'):
2301 return s
2302
2303 return s.encode(get_subprocess_encoding(), 'ignore')
2304
2305
2306 def decodeFilename(b, for_subprocess=False):
2307
2308 if sys.version_info >= (3, 0):
2309 return b
2310
2311 if not isinstance(b, bytes):
2312 return b
2313
2314 return b.decode(get_subprocess_encoding(), 'ignore')
2315
2316
2317 def encodeArgument(s):
2318 if not isinstance(s, compat_str):
2319 # Legacy code that uses byte strings
2320 # Uncomment the following line after fixing all post processors
2321 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2322 s = s.decode('ascii')
2323 return encodeFilename(s, True)
2324
2325
2326 def decodeArgument(b):
2327 return decodeFilename(b, True)
2328
2329
2330 def decodeOption(optval):
2331 if optval is None:
2332 return optval
2333 if isinstance(optval, bytes):
2334 optval = optval.decode(preferredencoding())
2335
2336 assert isinstance(optval, compat_str)
2337 return optval
2338
2339
2340 def formatSeconds(secs, delim=':', msec=False):
2341 if secs > 3600:
2342 ret = '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
2343 elif secs > 60:
2344 ret = '%d%s%02d' % (secs // 60, delim, secs % 60)
2345 else:
2346 ret = '%d' % secs
2347 return '%s.%03d' % (ret, secs % 1) if msec else ret
2348
2349
2350 def make_HTTPS_handler(params, **kwargs):
2351 opts_no_check_certificate = params.get('nocheckcertificate', False)
2352 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
2353 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2354 if opts_no_check_certificate:
2355 context.check_hostname = False
2356 context.verify_mode = ssl.CERT_NONE
2357 try:
2358 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2359 except TypeError:
2360 # Python 2.7.8
2361 # (create_default_context present but HTTPSHandler has no context=)
2362 pass
2363
2364 if sys.version_info < (3, 2):
2365 return YoutubeDLHTTPSHandler(params, **kwargs)
2366 else: # Python < 3.4
2367 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2368 context.verify_mode = (ssl.CERT_NONE
2369 if opts_no_check_certificate
2370 else ssl.CERT_REQUIRED)
2371 context.set_default_verify_paths()
2372 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2373
2374
2375 def bug_reports_message(before=';'):
2376 if ytdl_is_updateable():
2377 update_cmd = 'type yt-dlp -U to update'
2378 else:
2379 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
2380 msg = 'please report this issue on https://github.com/yt-dlp/yt-dlp .'
2381 msg += ' Make sure you are using the latest version; %s.' % update_cmd
2382 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
2383
2384 before = before.rstrip()
2385 if not before or before.endswith(('.', '!', '?')):
2386 msg = msg[0].title() + msg[1:]
2387
2388 return (before + ' ' if before else '') + msg
2389
2390
2391 class YoutubeDLError(Exception):
2392 """Base exception for YoutubeDL errors."""
2393 pass
2394
2395
2396 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2397 if hasattr(ssl, 'CertificateError'):
2398 network_exceptions.append(ssl.CertificateError)
2399 network_exceptions = tuple(network_exceptions)
2400
2401
2402 class ExtractorError(YoutubeDLError):
2403 """Error during info extraction."""
2404
2405 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
2406 """ tb, if given, is the original traceback (so that it can be printed out).
2407 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
2408 """
2409 if sys.exc_info()[0] in network_exceptions:
2410 expected = True
2411
2412 self.msg = str(msg)
2413 self.traceback = tb
2414 self.expected = expected
2415 self.cause = cause
2416 self.video_id = video_id
2417 self.ie = ie
2418 self.exc_info = sys.exc_info() # preserve original exception
2419
2420 super(ExtractorError, self).__init__(''.join((
2421 format_field(ie, template='[%s] '),
2422 format_field(video_id, template='%s: '),
2423 self.msg,
2424 format_field(cause, template=' (caused by %r)'),
2425 '' if expected else bug_reports_message())))
2426
2427 def format_traceback(self):
2428 if self.traceback is None:
2429 return None
2430 return ''.join(traceback.format_tb(self.traceback))
2431
2432
2433 class UnsupportedError(ExtractorError):
2434 def __init__(self, url):
2435 super(UnsupportedError, self).__init__(
2436 'Unsupported URL: %s' % url, expected=True)
2437 self.url = url
2438
2439
2440 class RegexNotFoundError(ExtractorError):
2441 """Error when a regex didn't match"""
2442 pass
2443
2444
2445 class GeoRestrictedError(ExtractorError):
2446 """Geographic restriction Error exception.
2447
2448 This exception may be thrown when a video is not available from your
2449 geographic location due to geographic restrictions imposed by a website.
2450 """
2451
2452 def __init__(self, msg, countries=None):
2453 super(GeoRestrictedError, self).__init__(msg, expected=True)
2454 self.msg = msg
2455 self.countries = countries
2456
2457
2458 class DownloadError(YoutubeDLError):
2459 """Download Error exception.
2460
2461 This exception may be thrown by FileDownloader objects if they are not
2462 configured to continue on errors. They will contain the appropriate
2463 error message.
2464 """
2465
2466 def __init__(self, msg, exc_info=None):
2467 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2468 super(DownloadError, self).__init__(msg)
2469 self.exc_info = exc_info
2470
2471
2472 class EntryNotInPlaylist(YoutubeDLError):
2473 """Entry not in playlist exception.
2474
2475 This exception will be thrown by YoutubeDL when a requested entry
2476 is not found in the playlist info_dict
2477 """
2478 pass
2479
2480
2481 class SameFileError(YoutubeDLError):
2482 """Same File exception.
2483
2484 This exception will be thrown by FileDownloader objects if they detect
2485 multiple files would have to be downloaded to the same file on disk.
2486 """
2487 pass
2488
2489
2490 class PostProcessingError(YoutubeDLError):
2491 """Post Processing exception.
2492
2493 This exception may be raised by PostProcessor's .run() method to
2494 indicate an error in the postprocessing task.
2495 """
2496
2497 def __init__(self, msg):
2498 super(PostProcessingError, self).__init__(msg)
2499 self.msg = msg
2500
2501
2502 class ExistingVideoReached(YoutubeDLError):
2503 """ --max-downloads limit has been reached. """
2504 pass
2505
2506
2507 class RejectedVideoReached(YoutubeDLError):
2508 """ --max-downloads limit has been reached. """
2509 pass
2510
2511
2512 class ThrottledDownload(YoutubeDLError):
2513 """ Download speed below --throttled-rate. """
2514 pass
2515
2516
2517 class MaxDownloadsReached(YoutubeDLError):
2518 """ --max-downloads limit has been reached. """
2519 pass
2520
2521
2522 class UnavailableVideoError(YoutubeDLError):
2523 """Unavailable Format exception.
2524
2525 This exception will be thrown when a video is requested
2526 in a format that is not available for that video.
2527 """
2528 pass
2529
2530
2531 class ContentTooShortError(YoutubeDLError):
2532 """Content Too Short exception.
2533
2534 This exception may be raised by FileDownloader objects when a file they
2535 download is too small for what the server announced first, indicating
2536 the connection was probably interrupted.
2537 """
2538
2539 def __init__(self, downloaded, expected):
2540 super(ContentTooShortError, self).__init__(
2541 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2542 )
2543 # Both in bytes
2544 self.downloaded = downloaded
2545 self.expected = expected
2546
2547
2548 class XAttrMetadataError(YoutubeDLError):
2549 def __init__(self, code=None, msg='Unknown error'):
2550 super(XAttrMetadataError, self).__init__(msg)
2551 self.code = code
2552 self.msg = msg
2553
2554 # Parsing code and msg
2555 if (self.code in (errno.ENOSPC, errno.EDQUOT)
2556 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2557 self.reason = 'NO_SPACE'
2558 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2559 self.reason = 'VALUE_TOO_LONG'
2560 else:
2561 self.reason = 'NOT_SUPPORTED'
2562
2563
2564 class XAttrUnavailableError(YoutubeDLError):
2565 pass
2566
2567
2568 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2569 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2570 # expected HTTP responses to meet HTTP/1.0 or later (see also
2571 # https://github.com/ytdl-org/youtube-dl/issues/6727)
2572 if sys.version_info < (3, 0):
2573 kwargs['strict'] = True
2574 hc = http_class(*args, **compat_kwargs(kwargs))
2575 source_address = ydl_handler._params.get('source_address')
2576
2577 if source_address is not None:
2578 # This is to workaround _create_connection() from socket where it will try all
2579 # address data from getaddrinfo() including IPv6. This filters the result from
2580 # getaddrinfo() based on the source_address value.
2581 # This is based on the cpython socket.create_connection() function.
2582 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2583 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2584 host, port = address
2585 err = None
2586 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2587 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2588 ip_addrs = [addr for addr in addrs if addr[0] == af]
2589 if addrs and not ip_addrs:
2590 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2591 raise socket.error(
2592 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2593 % (ip_version, source_address[0]))
2594 for res in ip_addrs:
2595 af, socktype, proto, canonname, sa = res
2596 sock = None
2597 try:
2598 sock = socket.socket(af, socktype, proto)
2599 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2600 sock.settimeout(timeout)
2601 sock.bind(source_address)
2602 sock.connect(sa)
2603 err = None # Explicitly break reference cycle
2604 return sock
2605 except socket.error as _:
2606 err = _
2607 if sock is not None:
2608 sock.close()
2609 if err is not None:
2610 raise err
2611 else:
2612 raise socket.error('getaddrinfo returns an empty list')
2613 if hasattr(hc, '_create_connection'):
2614 hc._create_connection = _create_connection
2615 sa = (source_address, 0)
2616 if hasattr(hc, 'source_address'): # Python 2.7+
2617 hc.source_address = sa
2618 else: # Python 2.6
2619 def _hc_connect(self, *args, **kwargs):
2620 sock = _create_connection(
2621 (self.host, self.port), self.timeout, sa)
2622 if is_https:
2623 self.sock = ssl.wrap_socket(
2624 sock, self.key_file, self.cert_file,
2625 ssl_version=ssl.PROTOCOL_TLSv1)
2626 else:
2627 self.sock = sock
2628 hc.connect = functools.partial(_hc_connect, hc)
2629
2630 return hc
2631
2632
2633 def handle_youtubedl_headers(headers):
2634 filtered_headers = headers
2635
2636 if 'Youtubedl-no-compression' in filtered_headers:
2637 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2638 del filtered_headers['Youtubedl-no-compression']
2639
2640 return filtered_headers
2641
2642
2643 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2644 """Handler for HTTP requests and responses.
2645
2646 This class, when installed with an OpenerDirector, automatically adds
2647 the standard headers to every HTTP request and handles gzipped and
2648 deflated responses from web servers. If compression is to be avoided in
2649 a particular request, the original request in the program code only has
2650 to include the HTTP header "Youtubedl-no-compression", which will be
2651 removed before making the real request.
2652
2653 Part of this code was copied from:
2654
2655 http://techknack.net/python-urllib2-handlers/
2656
2657 Andrew Rowls, the author of that code, agreed to release it to the
2658 public domain.
2659 """
2660
2661 def __init__(self, params, *args, **kwargs):
2662 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2663 self._params = params
2664
2665 def http_open(self, req):
2666 conn_class = compat_http_client.HTTPConnection
2667
2668 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2669 if socks_proxy:
2670 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2671 del req.headers['Ytdl-socks-proxy']
2672
2673 return self.do_open(functools.partial(
2674 _create_http_connection, self, conn_class, False),
2675 req)
2676
2677 @staticmethod
2678 def deflate(data):
2679 if not data:
2680 return data
2681 try:
2682 return zlib.decompress(data, -zlib.MAX_WBITS)
2683 except zlib.error:
2684 return zlib.decompress(data)
2685
2686 def http_request(self, req):
2687 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2688 # always respected by websites, some tend to give out URLs with non percent-encoded
2689 # non-ASCII characters (see telemb.py, ard.py [#3412])
2690 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2691 # To work around aforementioned issue we will replace request's original URL with
2692 # percent-encoded one
2693 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2694 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2695 url = req.get_full_url()
2696 url_escaped = escape_url(url)
2697
2698 # Substitute URL if any change after escaping
2699 if url != url_escaped:
2700 req = update_Request(req, url=url_escaped)
2701
2702 for h, v in std_headers.items():
2703 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2704 # The dict keys are capitalized because of this bug by urllib
2705 if h.capitalize() not in req.headers:
2706 req.add_header(h, v)
2707
2708 req.headers = handle_youtubedl_headers(req.headers)
2709
2710 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2711 # Python 2.6 is brain-dead when it comes to fragments
2712 req._Request__original = req._Request__original.partition('#')[0]
2713 req._Request__r_type = req._Request__r_type.partition('#')[0]
2714
2715 return req
2716
2717 def http_response(self, req, resp):
2718 old_resp = resp
2719 # gzip
2720 if resp.headers.get('Content-encoding', '') == 'gzip':
2721 content = resp.read()
2722 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2723 try:
2724 uncompressed = io.BytesIO(gz.read())
2725 except IOError as original_ioerror:
2726 # There may be junk add the end of the file
2727 # See http://stackoverflow.com/q/4928560/35070 for details
2728 for i in range(1, 1024):
2729 try:
2730 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2731 uncompressed = io.BytesIO(gz.read())
2732 except IOError:
2733 continue
2734 break
2735 else:
2736 raise original_ioerror
2737 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2738 resp.msg = old_resp.msg
2739 del resp.headers['Content-encoding']
2740 # deflate
2741 if resp.headers.get('Content-encoding', '') == 'deflate':
2742 gz = io.BytesIO(self.deflate(resp.read()))
2743 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2744 resp.msg = old_resp.msg
2745 del resp.headers['Content-encoding']
2746 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2747 # https://github.com/ytdl-org/youtube-dl/issues/6457).
2748 if 300 <= resp.code < 400:
2749 location = resp.headers.get('Location')
2750 if location:
2751 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2752 if sys.version_info >= (3, 0):
2753 location = location.encode('iso-8859-1').decode('utf-8')
2754 else:
2755 location = location.decode('utf-8')
2756 location_escaped = escape_url(location)
2757 if location != location_escaped:
2758 del resp.headers['Location']
2759 if sys.version_info < (3, 0):
2760 location_escaped = location_escaped.encode('utf-8')
2761 resp.headers['Location'] = location_escaped
2762 return resp
2763
2764 https_request = http_request
2765 https_response = http_response
2766
2767
2768 def make_socks_conn_class(base_class, socks_proxy):
2769 assert issubclass(base_class, (
2770 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2771
2772 url_components = compat_urlparse.urlparse(socks_proxy)
2773 if url_components.scheme.lower() == 'socks5':
2774 socks_type = ProxyType.SOCKS5
2775 elif url_components.scheme.lower() in ('socks', 'socks4'):
2776 socks_type = ProxyType.SOCKS4
2777 elif url_components.scheme.lower() == 'socks4a':
2778 socks_type = ProxyType.SOCKS4A
2779
2780 def unquote_if_non_empty(s):
2781 if not s:
2782 return s
2783 return compat_urllib_parse_unquote_plus(s)
2784
2785 proxy_args = (
2786 socks_type,
2787 url_components.hostname, url_components.port or 1080,
2788 True, # Remote DNS
2789 unquote_if_non_empty(url_components.username),
2790 unquote_if_non_empty(url_components.password),
2791 )
2792
2793 class SocksConnection(base_class):
2794 def connect(self):
2795 self.sock = sockssocket()
2796 self.sock.setproxy(*proxy_args)
2797 if type(self.timeout) in (int, float):
2798 self.sock.settimeout(self.timeout)
2799 self.sock.connect((self.host, self.port))
2800
2801 if isinstance(self, compat_http_client.HTTPSConnection):
2802 if hasattr(self, '_context'): # Python > 2.6
2803 self.sock = self._context.wrap_socket(
2804 self.sock, server_hostname=self.host)
2805 else:
2806 self.sock = ssl.wrap_socket(self.sock)
2807
2808 return SocksConnection
2809
2810
2811 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2812 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2813 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2814 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2815 self._params = params
2816
2817 def https_open(self, req):
2818 kwargs = {}
2819 conn_class = self._https_conn_class
2820
2821 if hasattr(self, '_context'): # python > 2.6
2822 kwargs['context'] = self._context
2823 if hasattr(self, '_check_hostname'): # python 3.x
2824 kwargs['check_hostname'] = self._check_hostname
2825
2826 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2827 if socks_proxy:
2828 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2829 del req.headers['Ytdl-socks-proxy']
2830
2831 return self.do_open(functools.partial(
2832 _create_http_connection, self, conn_class, True),
2833 req, **kwargs)
2834
2835
2836 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2837 """
2838 See [1] for cookie file format.
2839
2840 1. https://curl.haxx.se/docs/http-cookies.html
2841 """
2842 _HTTPONLY_PREFIX = '#HttpOnly_'
2843 _ENTRY_LEN = 7
2844 _HEADER = '''# Netscape HTTP Cookie File
2845 # This file is generated by yt-dlp. Do not edit.
2846
2847 '''
2848 _CookieFileEntry = collections.namedtuple(
2849 'CookieFileEntry',
2850 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2851
2852 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2853 """
2854 Save cookies to a file.
2855
2856 Most of the code is taken from CPython 3.8 and slightly adapted
2857 to support cookie files with UTF-8 in both python 2 and 3.
2858 """
2859 if filename is None:
2860 if self.filename is not None:
2861 filename = self.filename
2862 else:
2863 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2864
2865 # Store session cookies with `expires` set to 0 instead of an empty
2866 # string
2867 for cookie in self:
2868 if cookie.expires is None:
2869 cookie.expires = 0
2870
2871 with io.open(filename, 'w', encoding='utf-8') as f:
2872 f.write(self._HEADER)
2873 now = time.time()
2874 for cookie in self:
2875 if not ignore_discard and cookie.discard:
2876 continue
2877 if not ignore_expires and cookie.is_expired(now):
2878 continue
2879 if cookie.secure:
2880 secure = 'TRUE'
2881 else:
2882 secure = 'FALSE'
2883 if cookie.domain.startswith('.'):
2884 initial_dot = 'TRUE'
2885 else:
2886 initial_dot = 'FALSE'
2887 if cookie.expires is not None:
2888 expires = compat_str(cookie.expires)
2889 else:
2890 expires = ''
2891 if cookie.value is None:
2892 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2893 # with no name, whereas http.cookiejar regards it as a
2894 # cookie with no value.
2895 name = ''
2896 value = cookie.name
2897 else:
2898 name = cookie.name
2899 value = cookie.value
2900 f.write(
2901 '\t'.join([cookie.domain, initial_dot, cookie.path,
2902 secure, expires, name, value]) + '\n')
2903
2904 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2905 """Load cookies from a file."""
2906 if filename is None:
2907 if self.filename is not None:
2908 filename = self.filename
2909 else:
2910 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2911
2912 def prepare_line(line):
2913 if line.startswith(self._HTTPONLY_PREFIX):
2914 line = line[len(self._HTTPONLY_PREFIX):]
2915 # comments and empty lines are fine
2916 if line.startswith('#') or not line.strip():
2917 return line
2918 cookie_list = line.split('\t')
2919 if len(cookie_list) != self._ENTRY_LEN:
2920 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2921 cookie = self._CookieFileEntry(*cookie_list)
2922 if cookie.expires_at and not cookie.expires_at.isdigit():
2923 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2924 return line
2925
2926 cf = io.StringIO()
2927 with io.open(filename, encoding='utf-8') as f:
2928 for line in f:
2929 try:
2930 cf.write(prepare_line(line))
2931 except compat_cookiejar.LoadError as e:
2932 write_string(
2933 'WARNING: skipping cookie file entry due to %s: %r\n'
2934 % (e, line), sys.stderr)
2935 continue
2936 cf.seek(0)
2937 self._really_load(cf, filename, ignore_discard, ignore_expires)
2938 # Session cookies are denoted by either `expires` field set to
2939 # an empty string or 0. MozillaCookieJar only recognizes the former
2940 # (see [1]). So we need force the latter to be recognized as session
2941 # cookies on our own.
2942 # Session cookies may be important for cookies-based authentication,
2943 # e.g. usually, when user does not check 'Remember me' check box while
2944 # logging in on a site, some important cookies are stored as session
2945 # cookies so that not recognizing them will result in failed login.
2946 # 1. https://bugs.python.org/issue17164
2947 for cookie in self:
2948 # Treat `expires=0` cookies as session cookies
2949 if cookie.expires == 0:
2950 cookie.expires = None
2951 cookie.discard = True
2952
2953
2954 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2955 def __init__(self, cookiejar=None):
2956 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2957
2958 def http_response(self, request, response):
2959 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2960 # characters in Set-Cookie HTTP header of last response (see
2961 # https://github.com/ytdl-org/youtube-dl/issues/6769).
2962 # In order to at least prevent crashing we will percent encode Set-Cookie
2963 # header before HTTPCookieProcessor starts processing it.
2964 # if sys.version_info < (3, 0) and response.headers:
2965 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2966 # set_cookie = response.headers.get(set_cookie_header)
2967 # if set_cookie:
2968 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2969 # if set_cookie != set_cookie_escaped:
2970 # del response.headers[set_cookie_header]
2971 # response.headers[set_cookie_header] = set_cookie_escaped
2972 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2973
2974 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2975 https_response = http_response
2976
2977
2978 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2979 """YoutubeDL redirect handler
2980
2981 The code is based on HTTPRedirectHandler implementation from CPython [1].
2982
2983 This redirect handler solves two issues:
2984 - ensures redirect URL is always unicode under python 2
2985 - introduces support for experimental HTTP response status code
2986 308 Permanent Redirect [2] used by some sites [3]
2987
2988 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
2989 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
2990 3. https://github.com/ytdl-org/youtube-dl/issues/28768
2991 """
2992
2993 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
2994
2995 def redirect_request(self, req, fp, code, msg, headers, newurl):
2996 """Return a Request or None in response to a redirect.
2997
2998 This is called by the http_error_30x methods when a
2999 redirection response is received. If a redirection should
3000 take place, return a new Request to allow http_error_30x to
3001 perform the redirect. Otherwise, raise HTTPError if no-one
3002 else should try to handle this url. Return None if you can't
3003 but another Handler might.
3004 """
3005 m = req.get_method()
3006 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
3007 or code in (301, 302, 303) and m == "POST")):
3008 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
3009 # Strictly (according to RFC 2616), 301 or 302 in response to
3010 # a POST MUST NOT cause a redirection without confirmation
3011 # from the user (of urllib.request, in this case). In practice,
3012 # essentially all clients do redirect in this case, so we do
3013 # the same.
3014
3015 # On python 2 urlh.geturl() may sometimes return redirect URL
3016 # as byte string instead of unicode. This workaround allows
3017 # to force it always return unicode.
3018 if sys.version_info[0] < 3:
3019 newurl = compat_str(newurl)
3020
3021 # Be conciliant with URIs containing a space. This is mainly
3022 # redundant with the more complete encoding done in http_error_302(),
3023 # but it is kept for compatibility with other callers.
3024 newurl = newurl.replace(' ', '%20')
3025
3026 CONTENT_HEADERS = ("content-length", "content-type")
3027 # NB: don't use dict comprehension for python 2.6 compatibility
3028 newheaders = dict((k, v) for k, v in req.headers.items()
3029 if k.lower() not in CONTENT_HEADERS)
3030 return compat_urllib_request.Request(
3031 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3032 unverifiable=True)
3033
3034
3035 def extract_timezone(date_str):
3036 m = re.search(
3037 r'''(?x)
3038 ^.{8,}? # >=8 char non-TZ prefix, if present
3039 (?P<tz>Z| # just the UTC Z, or
3040 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
3041 (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
3042 [ ]? # optional space
3043 (?P<sign>\+|-) # +/-
3044 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
3045 $)
3046 ''', date_str)
3047 if not m:
3048 timezone = datetime.timedelta()
3049 else:
3050 date_str = date_str[:-len(m.group('tz'))]
3051 if not m.group('sign'):
3052 timezone = datetime.timedelta()
3053 else:
3054 sign = 1 if m.group('sign') == '+' else -1
3055 timezone = datetime.timedelta(
3056 hours=sign * int(m.group('hours')),
3057 minutes=sign * int(m.group('minutes')))
3058 return timezone, date_str
3059
3060
3061 def parse_iso8601(date_str, delimiter='T', timezone=None):
3062 """ Return a UNIX timestamp from the given date """
3063
3064 if date_str is None:
3065 return None
3066
3067 date_str = re.sub(r'\.[0-9]+', '', date_str)
3068
3069 if timezone is None:
3070 timezone, date_str = extract_timezone(date_str)
3071
3072 try:
3073 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3074 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3075 return calendar.timegm(dt.timetuple())
3076 except ValueError:
3077 pass
3078
3079
3080 def date_formats(day_first=True):
3081 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3082
3083
3084 def unified_strdate(date_str, day_first=True):
3085 """Return a string with the date in the format YYYYMMDD"""
3086
3087 if date_str is None:
3088 return None
3089 upload_date = None
3090 # Replace commas
3091 date_str = date_str.replace(',', ' ')
3092 # Remove AM/PM + timezone
3093 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3094 _, date_str = extract_timezone(date_str)
3095
3096 for expression in date_formats(day_first):
3097 try:
3098 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
3099 except ValueError:
3100 pass
3101 if upload_date is None:
3102 timetuple = email.utils.parsedate_tz(date_str)
3103 if timetuple:
3104 try:
3105 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3106 except ValueError:
3107 pass
3108 if upload_date is not None:
3109 return compat_str(upload_date)
3110
3111
3112 def unified_timestamp(date_str, day_first=True):
3113 if date_str is None:
3114 return None
3115
3116 date_str = re.sub(r'[,|]', '', date_str)
3117
3118 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
3119 timezone, date_str = extract_timezone(date_str)
3120
3121 # Remove AM/PM + timezone
3122 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3123
3124 # Remove unrecognized timezones from ISO 8601 alike timestamps
3125 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3126 if m:
3127 date_str = date_str[:-len(m.group('tz'))]
3128
3129 # Python only supports microseconds, so remove nanoseconds
3130 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3131 if m:
3132 date_str = m.group(1)
3133
3134 for expression in date_formats(day_first):
3135 try:
3136 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3137 return calendar.timegm(dt.timetuple())
3138 except ValueError:
3139 pass
3140 timetuple = email.utils.parsedate_tz(date_str)
3141 if timetuple:
3142 return calendar.timegm(timetuple) + pm_delta * 3600
3143
3144
3145 def determine_ext(url, default_ext='unknown_video'):
3146 if url is None or '.' not in url:
3147 return default_ext
3148 guess = url.partition('?')[0].rpartition('.')[2]
3149 if re.match(r'^[A-Za-z0-9]+$', guess):
3150 return guess
3151 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3152 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3153 return guess.rstrip('/')
3154 else:
3155 return default_ext
3156
3157
3158 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3159 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3160
3161
3162 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
3163 """
3164 Return a datetime object from a string in the format YYYYMMDD or
3165 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3166
3167 format: string date format used to return datetime object from
3168 precision: round the time portion of a datetime object.
3169 auto|microsecond|second|minute|hour|day.
3170 auto: round to the unit provided in date_str (if applicable).
3171 """
3172 auto_precision = False
3173 if precision == 'auto':
3174 auto_precision = True
3175 precision = 'microsecond'
3176 today = datetime_round(datetime.datetime.now(), precision)
3177 if date_str in ('now', 'today'):
3178 return today
3179 if date_str == 'yesterday':
3180 return today - datetime.timedelta(days=1)
3181 match = re.match(
3182 r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3183 date_str)
3184 if match is not None:
3185 start_time = datetime_from_str(match.group('start'), precision, format)
3186 time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
3187 unit = match.group('unit')
3188 if unit == 'month' or unit == 'year':
3189 new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
3190 unit = 'day'
3191 else:
3192 if unit == 'week':
3193 unit = 'day'
3194 time *= 7
3195 delta = datetime.timedelta(**{unit + 's': time})
3196 new_date = start_time + delta
3197 if auto_precision:
3198 return datetime_round(new_date, unit)
3199 return new_date
3200
3201 return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3202
3203
3204 def date_from_str(date_str, format='%Y%m%d'):
3205 """
3206 Return a datetime object from a string in the format YYYYMMDD or
3207 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3208
3209 format: string date format used to return datetime object from
3210 """
3211 return datetime_from_str(date_str, precision='microsecond', format=format).date()
3212
3213
3214 def datetime_add_months(dt, months):
3215 """Increment/Decrement a datetime object by months."""
3216 month = dt.month + months - 1
3217 year = dt.year + month // 12
3218 month = month % 12 + 1
3219 day = min(dt.day, calendar.monthrange(year, month)[1])
3220 return dt.replace(year, month, day)
3221
3222
3223 def datetime_round(dt, precision='day'):
3224 """
3225 Round a datetime object's time to a specific precision
3226 """
3227 if precision == 'microsecond':
3228 return dt
3229
3230 unit_seconds = {
3231 'day': 86400,
3232 'hour': 3600,
3233 'minute': 60,
3234 'second': 1,
3235 }
3236 roundto = lambda x, n: ((x + n / 2) // n) * n
3237 timestamp = calendar.timegm(dt.timetuple())
3238 return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
3239
3240
3241 def hyphenate_date(date_str):
3242 """
3243 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3244 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3245 if match is not None:
3246 return '-'.join(match.groups())
3247 else:
3248 return date_str
3249
3250
3251 class DateRange(object):
3252 """Represents a time interval between two dates"""
3253
3254 def __init__(self, start=None, end=None):
3255 """start and end must be strings in the format accepted by date"""
3256 if start is not None:
3257 self.start = date_from_str(start)
3258 else:
3259 self.start = datetime.datetime.min.date()
3260 if end is not None:
3261 self.end = date_from_str(end)
3262 else:
3263 self.end = datetime.datetime.max.date()
3264 if self.start > self.end:
3265 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3266
3267 @classmethod
3268 def day(cls, day):
3269 """Returns a range that only contains the given day"""
3270 return cls(day, day)
3271
3272 def __contains__(self, date):
3273 """Check if the date is in the range"""
3274 if not isinstance(date, datetime.date):
3275 date = date_from_str(date)
3276 return self.start <= date <= self.end
3277
3278 def __str__(self):
3279 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3280
3281
3282 def platform_name():
3283 """ Returns the platform name as a compat_str """
3284 res = platform.platform()
3285 if isinstance(res, bytes):
3286 res = res.decode(preferredencoding())
3287
3288 assert isinstance(res, compat_str)
3289 return res
3290
3291
3292 def _windows_write_string(s, out):
3293 """ Returns True if the string was written using special methods,
3294 False if it has yet to be written out."""
3295 # Adapted from http://stackoverflow.com/a/3259271/35070
3296
3297 import ctypes
3298 import ctypes.wintypes
3299
3300 WIN_OUTPUT_IDS = {
3301 1: -11,
3302 2: -12,
3303 }
3304
3305 try:
3306 fileno = out.fileno()
3307 except AttributeError:
3308 # If the output stream doesn't have a fileno, it's virtual
3309 return False
3310 except io.UnsupportedOperation:
3311 # Some strange Windows pseudo files?
3312 return False
3313 if fileno not in WIN_OUTPUT_IDS:
3314 return False
3315
3316 GetStdHandle = compat_ctypes_WINFUNCTYPE(
3317 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3318 ('GetStdHandle', ctypes.windll.kernel32))
3319 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3320
3321 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3322 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3323 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3324 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3325 written = ctypes.wintypes.DWORD(0)
3326
3327 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3328 FILE_TYPE_CHAR = 0x0002
3329 FILE_TYPE_REMOTE = 0x8000
3330 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3331 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3332 ctypes.POINTER(ctypes.wintypes.DWORD))(
3333 ('GetConsoleMode', ctypes.windll.kernel32))
3334 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3335
3336 def not_a_console(handle):
3337 if handle == INVALID_HANDLE_VALUE or handle is None:
3338 return True
3339 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3340 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3341
3342 if not_a_console(h):
3343 return False
3344
3345 def next_nonbmp_pos(s):
3346 try:
3347 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3348 except StopIteration:
3349 return len(s)
3350
3351 while s:
3352 count = min(next_nonbmp_pos(s), 1024)
3353
3354 ret = WriteConsoleW(
3355 h, s, count if count else 2, ctypes.byref(written), None)
3356 if ret == 0:
3357 raise OSError('Failed to write string')
3358 if not count: # We just wrote a non-BMP character
3359 assert written.value == 2
3360 s = s[1:]
3361 else:
3362 assert written.value > 0
3363 s = s[written.value:]
3364 return True
3365
3366
3367 def write_string(s, out=None, encoding=None):
3368 if out is None:
3369 out = sys.stderr
3370 assert type(s) == compat_str
3371
3372 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3373 if _windows_write_string(s, out):
3374 return
3375
3376 if ('b' in getattr(out, 'mode', '')
3377 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
3378 byt = s.encode(encoding or preferredencoding(), 'ignore')
3379 out.write(byt)
3380 elif hasattr(out, 'buffer'):
3381 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3382 byt = s.encode(enc, 'ignore')
3383 out.buffer.write(byt)
3384 else:
3385 out.write(s)
3386 out.flush()
3387
3388
3389 def bytes_to_intlist(bs):
3390 if not bs:
3391 return []
3392 if isinstance(bs[0], int): # Python 3
3393 return list(bs)
3394 else:
3395 return [ord(c) for c in bs]
3396
3397
3398 def intlist_to_bytes(xs):
3399 if not xs:
3400 return b''
3401 return compat_struct_pack('%dB' % len(xs), *xs)
3402
3403
3404 # Cross-platform file locking
3405 if sys.platform == 'win32':
3406 import ctypes.wintypes
3407 import msvcrt
3408
3409 class OVERLAPPED(ctypes.Structure):
3410 _fields_ = [
3411 ('Internal', ctypes.wintypes.LPVOID),
3412 ('InternalHigh', ctypes.wintypes.LPVOID),
3413 ('Offset', ctypes.wintypes.DWORD),
3414 ('OffsetHigh', ctypes.wintypes.DWORD),
3415 ('hEvent', ctypes.wintypes.HANDLE),
3416 ]
3417
3418 kernel32 = ctypes.windll.kernel32
3419 LockFileEx = kernel32.LockFileEx
3420 LockFileEx.argtypes = [
3421 ctypes.wintypes.HANDLE, # hFile
3422 ctypes.wintypes.DWORD, # dwFlags
3423 ctypes.wintypes.DWORD, # dwReserved
3424 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3425 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3426 ctypes.POINTER(OVERLAPPED) # Overlapped
3427 ]
3428 LockFileEx.restype = ctypes.wintypes.BOOL
3429 UnlockFileEx = kernel32.UnlockFileEx
3430 UnlockFileEx.argtypes = [
3431 ctypes.wintypes.HANDLE, # hFile
3432 ctypes.wintypes.DWORD, # dwReserved
3433 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3434 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3435 ctypes.POINTER(OVERLAPPED) # Overlapped
3436 ]
3437 UnlockFileEx.restype = ctypes.wintypes.BOOL
3438 whole_low = 0xffffffff
3439 whole_high = 0x7fffffff
3440
3441 def _lock_file(f, exclusive):
3442 overlapped = OVERLAPPED()
3443 overlapped.Offset = 0
3444 overlapped.OffsetHigh = 0
3445 overlapped.hEvent = 0
3446 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3447 handle = msvcrt.get_osfhandle(f.fileno())
3448 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3449 whole_low, whole_high, f._lock_file_overlapped_p):
3450 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3451
3452 def _unlock_file(f):
3453 assert f._lock_file_overlapped_p
3454 handle = msvcrt.get_osfhandle(f.fileno())
3455 if not UnlockFileEx(handle, 0,
3456 whole_low, whole_high, f._lock_file_overlapped_p):
3457 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3458
3459 else:
3460 # Some platforms, such as Jython, is missing fcntl
3461 try:
3462 import fcntl
3463
3464 def _lock_file(f, exclusive):
3465 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3466
3467 def _unlock_file(f):
3468 fcntl.flock(f, fcntl.LOCK_UN)
3469 except ImportError:
3470 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3471
3472 def _lock_file(f, exclusive):
3473 raise IOError(UNSUPPORTED_MSG)
3474
3475 def _unlock_file(f):
3476 raise IOError(UNSUPPORTED_MSG)
3477
3478
3479 class locked_file(object):
3480 def __init__(self, filename, mode, encoding=None):
3481 assert mode in ['r', 'a', 'w']
3482 self.f = io.open(filename, mode, encoding=encoding)
3483 self.mode = mode
3484
3485 def __enter__(self):
3486 exclusive = self.mode != 'r'
3487 try:
3488 _lock_file(self.f, exclusive)
3489 except IOError:
3490 self.f.close()
3491 raise
3492 return self
3493
3494 def __exit__(self, etype, value, traceback):
3495 try:
3496 _unlock_file(self.f)
3497 finally:
3498 self.f.close()
3499
3500 def __iter__(self):
3501 return iter(self.f)
3502
3503 def write(self, *args):
3504 return self.f.write(*args)
3505
3506 def read(self, *args):
3507 return self.f.read(*args)
3508
3509
3510 def get_filesystem_encoding():
3511 encoding = sys.getfilesystemencoding()
3512 return encoding if encoding is not None else 'utf-8'
3513
3514
3515 def shell_quote(args):
3516 quoted_args = []
3517 encoding = get_filesystem_encoding()
3518 for a in args:
3519 if isinstance(a, bytes):
3520 # We may get a filename encoded with 'encodeFilename'
3521 a = a.decode(encoding)
3522 quoted_args.append(compat_shlex_quote(a))
3523 return ' '.join(quoted_args)
3524
3525
3526 def smuggle_url(url, data):
3527 """ Pass additional data in a URL for internal use. """
3528
3529 url, idata = unsmuggle_url(url, {})
3530 data.update(idata)
3531 sdata = compat_urllib_parse_urlencode(
3532 {'__youtubedl_smuggle': json.dumps(data)})
3533 return url + '#' + sdata
3534
3535
3536 def unsmuggle_url(smug_url, default=None):
3537 if '#__youtubedl_smuggle' not in smug_url:
3538 return smug_url, default
3539 url, _, sdata = smug_url.rpartition('#')
3540 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3541 data = json.loads(jsond)
3542 return url, data
3543
3544
3545 def format_bytes(bytes):
3546 if bytes is None:
3547 return 'N/A'
3548 if type(bytes) is str:
3549 bytes = float(bytes)
3550 if bytes == 0.0:
3551 exponent = 0
3552 else:
3553 exponent = int(math.log(bytes, 1024.0))
3554 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3555 converted = float(bytes) / float(1024 ** exponent)
3556 return '%.2f%s' % (converted, suffix)
3557
3558
3559 def lookup_unit_table(unit_table, s):
3560 units_re = '|'.join(re.escape(u) for u in unit_table)
3561 m = re.match(
3562 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3563 if not m:
3564 return None
3565 num_str = m.group('num').replace(',', '.')
3566 mult = unit_table[m.group('unit')]
3567 return int(float(num_str) * mult)
3568
3569
3570 def parse_filesize(s):
3571 if s is None:
3572 return None
3573
3574 # The lower-case forms are of course incorrect and unofficial,
3575 # but we support those too
3576 _UNIT_TABLE = {
3577 'B': 1,
3578 'b': 1,
3579 'bytes': 1,
3580 'KiB': 1024,
3581 'KB': 1000,
3582 'kB': 1024,
3583 'Kb': 1000,
3584 'kb': 1000,
3585 'kilobytes': 1000,
3586 'kibibytes': 1024,
3587 'MiB': 1024 ** 2,
3588 'MB': 1000 ** 2,
3589 'mB': 1024 ** 2,
3590 'Mb': 1000 ** 2,
3591 'mb': 1000 ** 2,
3592 'megabytes': 1000 ** 2,
3593 'mebibytes': 1024 ** 2,
3594 'GiB': 1024 ** 3,
3595 'GB': 1000 ** 3,
3596 'gB': 1024 ** 3,
3597 'Gb': 1000 ** 3,
3598 'gb': 1000 ** 3,
3599 'gigabytes': 1000 ** 3,
3600 'gibibytes': 1024 ** 3,
3601 'TiB': 1024 ** 4,
3602 'TB': 1000 ** 4,
3603 'tB': 1024 ** 4,
3604 'Tb': 1000 ** 4,
3605 'tb': 1000 ** 4,
3606 'terabytes': 1000 ** 4,
3607 'tebibytes': 1024 ** 4,
3608 'PiB': 1024 ** 5,
3609 'PB': 1000 ** 5,
3610 'pB': 1024 ** 5,
3611 'Pb': 1000 ** 5,
3612 'pb': 1000 ** 5,
3613 'petabytes': 1000 ** 5,
3614 'pebibytes': 1024 ** 5,
3615 'EiB': 1024 ** 6,
3616 'EB': 1000 ** 6,
3617 'eB': 1024 ** 6,
3618 'Eb': 1000 ** 6,
3619 'eb': 1000 ** 6,
3620 'exabytes': 1000 ** 6,
3621 'exbibytes': 1024 ** 6,
3622 'ZiB': 1024 ** 7,
3623 'ZB': 1000 ** 7,
3624 'zB': 1024 ** 7,
3625 'Zb': 1000 ** 7,
3626 'zb': 1000 ** 7,
3627 'zettabytes': 1000 ** 7,
3628 'zebibytes': 1024 ** 7,
3629 'YiB': 1024 ** 8,
3630 'YB': 1000 ** 8,
3631 'yB': 1024 ** 8,
3632 'Yb': 1000 ** 8,
3633 'yb': 1000 ** 8,
3634 'yottabytes': 1000 ** 8,
3635 'yobibytes': 1024 ** 8,
3636 }
3637
3638 return lookup_unit_table(_UNIT_TABLE, s)
3639
3640
3641 def parse_count(s):
3642 if s is None:
3643 return None
3644
3645 s = s.strip()
3646
3647 if re.match(r'^[\d,.]+$', s):
3648 return str_to_int(s)
3649
3650 _UNIT_TABLE = {
3651 'k': 1000,
3652 'K': 1000,
3653 'm': 1000 ** 2,
3654 'M': 1000 ** 2,
3655 'kk': 1000 ** 2,
3656 'KK': 1000 ** 2,
3657 }
3658
3659 return lookup_unit_table(_UNIT_TABLE, s)
3660
3661
3662 def parse_resolution(s):
3663 if s is None:
3664 return {}
3665
3666 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3667 if mobj:
3668 return {
3669 'width': int(mobj.group('w')),
3670 'height': int(mobj.group('h')),
3671 }
3672
3673 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3674 if mobj:
3675 return {'height': int(mobj.group(1))}
3676
3677 mobj = re.search(r'\b([48])[kK]\b', s)
3678 if mobj:
3679 return {'height': int(mobj.group(1)) * 540}
3680
3681 return {}
3682
3683
3684 def parse_bitrate(s):
3685 if not isinstance(s, compat_str):
3686 return
3687 mobj = re.search(r'\b(\d+)\s*kbps', s)
3688 if mobj:
3689 return int(mobj.group(1))
3690
3691
3692 def month_by_name(name, lang='en'):
3693 """ Return the number of a month by (locale-independently) English name """
3694
3695 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3696
3697 try:
3698 return month_names.index(name) + 1
3699 except ValueError:
3700 return None
3701
3702
3703 def month_by_abbreviation(abbrev):
3704 """ Return the number of a month by (locale-independently) English
3705 abbreviations """
3706
3707 try:
3708 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3709 except ValueError:
3710 return None
3711
3712
3713 def fix_xml_ampersands(xml_str):
3714 """Replace all the '&' by '&amp;' in XML"""
3715 return re.sub(
3716 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3717 '&amp;',
3718 xml_str)
3719
3720
3721 def setproctitle(title):
3722 assert isinstance(title, compat_str)
3723
3724 # ctypes in Jython is not complete
3725 # http://bugs.jython.org/issue2148
3726 if sys.platform.startswith('java'):
3727 return
3728
3729 try:
3730 libc = ctypes.cdll.LoadLibrary('libc.so.6')
3731 except OSError:
3732 return
3733 except TypeError:
3734 # LoadLibrary in Windows Python 2.7.13 only expects
3735 # a bytestring, but since unicode_literals turns
3736 # every string into a unicode string, it fails.
3737 return
3738 title_bytes = title.encode('utf-8')
3739 buf = ctypes.create_string_buffer(len(title_bytes))
3740 buf.value = title_bytes
3741 try:
3742 libc.prctl(15, buf, 0, 0, 0)
3743 except AttributeError:
3744 return # Strange libc, just skip this
3745
3746
3747 def remove_start(s, start):
3748 return s[len(start):] if s is not None and s.startswith(start) else s
3749
3750
3751 def remove_end(s, end):
3752 return s[:-len(end)] if s is not None and s.endswith(end) else s
3753
3754
3755 def remove_quotes(s):
3756 if s is None or len(s) < 2:
3757 return s
3758 for quote in ('"', "'", ):
3759 if s[0] == quote and s[-1] == quote:
3760 return s[1:-1]
3761 return s
3762
3763
3764 def get_domain(url):
3765 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3766 return domain.group('domain') if domain else None
3767
3768
3769 def url_basename(url):
3770 path = compat_urlparse.urlparse(url).path
3771 return path.strip('/').split('/')[-1]
3772
3773
3774 def base_url(url):
3775 return re.match(r'https?://[^?#&]+/', url).group()
3776
3777
3778 def urljoin(base, path):
3779 if isinstance(path, bytes):
3780 path = path.decode('utf-8')
3781 if not isinstance(path, compat_str) or not path:
3782 return None
3783 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3784 return path
3785 if isinstance(base, bytes):
3786 base = base.decode('utf-8')
3787 if not isinstance(base, compat_str) or not re.match(
3788 r'^(?:https?:)?//', base):
3789 return None
3790 return compat_urlparse.urljoin(base, path)
3791
3792
3793 class HEADRequest(compat_urllib_request.Request):
3794 def get_method(self):
3795 return 'HEAD'
3796
3797
3798 class PUTRequest(compat_urllib_request.Request):
3799 def get_method(self):
3800 return 'PUT'
3801
3802
3803 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3804 if get_attr:
3805 if v is not None:
3806 v = getattr(v, get_attr, None)
3807 if v == '':
3808 v = None
3809 if v is None:
3810 return default
3811 try:
3812 return int(v) * invscale // scale
3813 except (ValueError, TypeError):
3814 return default
3815
3816
3817 def str_or_none(v, default=None):
3818 return default if v is None else compat_str(v)
3819
3820
3821 def str_to_int(int_str):
3822 """ A more relaxed version of int_or_none """
3823 if isinstance(int_str, compat_integer_types):
3824 return int_str
3825 elif isinstance(int_str, compat_str):
3826 int_str = re.sub(r'[,\.\+]', '', int_str)
3827 return int_or_none(int_str)
3828
3829
3830 def float_or_none(v, scale=1, invscale=1, default=None):
3831 if v is None:
3832 return default
3833 try:
3834 return float(v) * invscale / scale
3835 except (ValueError, TypeError):
3836 return default
3837
3838
3839 def bool_or_none(v, default=None):
3840 return v if isinstance(v, bool) else default
3841
3842
3843 def strip_or_none(v, default=None):
3844 return v.strip() if isinstance(v, compat_str) else default
3845
3846
3847 def url_or_none(url):
3848 if not url or not isinstance(url, compat_str):
3849 return None
3850 url = url.strip()
3851 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3852
3853
3854 def strftime_or_none(timestamp, date_format, default=None):
3855 datetime_object = None
3856 try:
3857 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3858 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3859 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3860 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3861 return datetime_object.strftime(date_format)
3862 except (ValueError, TypeError, AttributeError):
3863 return default
3864
3865
3866 def parse_duration(s):
3867 if not isinstance(s, compat_basestring):
3868 return None
3869
3870 s = s.strip()
3871
3872 days, hours, mins, secs, ms = [None] * 5
3873 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3874 if m:
3875 days, hours, mins, secs, ms = m.groups()
3876 else:
3877 m = re.match(
3878 r'''(?ix)(?:P?
3879 (?:
3880 [0-9]+\s*y(?:ears?)?\s*
3881 )?
3882 (?:
3883 [0-9]+\s*m(?:onths?)?\s*
3884 )?
3885 (?:
3886 [0-9]+\s*w(?:eeks?)?\s*
3887 )?
3888 (?:
3889 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3890 )?
3891 T)?
3892 (?:
3893 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3894 )?
3895 (?:
3896 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3897 )?
3898 (?:
3899 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3900 )?Z?$''', s)
3901 if m:
3902 days, hours, mins, secs, ms = m.groups()
3903 else:
3904 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3905 if m:
3906 hours, mins = m.groups()
3907 else:
3908 return None
3909
3910 duration = 0
3911 if secs:
3912 duration += float(secs)
3913 if mins:
3914 duration += float(mins) * 60
3915 if hours:
3916 duration += float(hours) * 60 * 60
3917 if days:
3918 duration += float(days) * 24 * 60 * 60
3919 if ms:
3920 duration += float(ms)
3921 return duration
3922
3923
3924 def prepend_extension(filename, ext, expected_real_ext=None):
3925 name, real_ext = os.path.splitext(filename)
3926 return (
3927 '{0}.{1}{2}'.format(name, ext, real_ext)
3928 if not expected_real_ext or real_ext[1:] == expected_real_ext
3929 else '{0}.{1}'.format(filename, ext))
3930
3931
3932 def replace_extension(filename, ext, expected_real_ext=None):
3933 name, real_ext = os.path.splitext(filename)
3934 return '{0}.{1}'.format(
3935 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3936 ext)
3937
3938
3939 def check_executable(exe, args=[]):
3940 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3941 args can be a list of arguments for a short output (like -version) """
3942 try:
3943 process_communicate_or_kill(subprocess.Popen(
3944 [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
3945 except OSError:
3946 return False
3947 return exe
3948
3949
3950 def get_exe_version(exe, args=['--version'],
3951 version_re=None, unrecognized='present'):
3952 """ Returns the version of the specified executable,
3953 or False if the executable is not present """
3954 try:
3955 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3956 # SIGTTOU if yt-dlp is run in the background.
3957 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3958 out, _ = process_communicate_or_kill(subprocess.Popen(
3959 [encodeArgument(exe)] + args,
3960 stdin=subprocess.PIPE,
3961 stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
3962 except OSError:
3963 return False
3964 if isinstance(out, bytes): # Python 2.x
3965 out = out.decode('ascii', 'ignore')
3966 return detect_exe_version(out, version_re, unrecognized)
3967
3968
3969 def detect_exe_version(output, version_re=None, unrecognized='present'):
3970 assert isinstance(output, compat_str)
3971 if version_re is None:
3972 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3973 m = re.search(version_re, output)
3974 if m:
3975 return m.group(1)
3976 else:
3977 return unrecognized
3978
3979
3980 class LazyList(collections.abc.Sequence):
3981 ''' Lazy immutable list from an iterable
3982 Note that slices of a LazyList are lists and not LazyList'''
3983
3984 class IndexError(IndexError):
3985 pass
3986
3987 def __init__(self, iterable):
3988 self.__iterable = iter(iterable)
3989 self.__cache = []
3990 self.__reversed = False
3991
3992 def __iter__(self):
3993 if self.__reversed:
3994 # We need to consume the entire iterable to iterate in reverse
3995 yield from self.exhaust()
3996 return
3997 yield from self.__cache
3998 for item in self.__iterable:
3999 self.__cache.append(item)
4000 yield item
4001
4002 def __exhaust(self):
4003 self.__cache.extend(self.__iterable)
4004 return self.__cache
4005
4006 def exhaust(self):
4007 ''' Evaluate the entire iterable '''
4008 return self.__exhaust()[::-1 if self.__reversed else 1]
4009
4010 @staticmethod
4011 def __reverse_index(x):
4012 return None if x is None else -(x + 1)
4013
4014 def __getitem__(self, idx):
4015 if isinstance(idx, slice):
4016 if self.__reversed:
4017 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
4018 start, stop, step = idx.start, idx.stop, idx.step or 1
4019 elif isinstance(idx, int):
4020 if self.__reversed:
4021 idx = self.__reverse_index(idx)
4022 start, stop, step = idx, idx, 0
4023 else:
4024 raise TypeError('indices must be integers or slices')
4025 if ((start or 0) < 0 or (stop or 0) < 0
4026 or (start is None and step < 0)
4027 or (stop is None and step > 0)):
4028 # We need to consume the entire iterable to be able to slice from the end
4029 # Obviously, never use this with infinite iterables
4030 self.__exhaust()
4031 try:
4032 return self.__cache[idx]
4033 except IndexError as e:
4034 raise self.IndexError(e) from e
4035 n = max(start or 0, stop or 0) - len(self.__cache) + 1
4036 if n > 0:
4037 self.__cache.extend(itertools.islice(self.__iterable, n))
4038 try:
4039 return self.__cache[idx]
4040 except IndexError as e:
4041 raise self.IndexError(e) from e
4042
4043 def __bool__(self):
4044 try:
4045 self[-1] if self.__reversed else self[0]
4046 except self.IndexError:
4047 return False
4048 return True
4049
4050 def __len__(self):
4051 self.__exhaust()
4052 return len(self.__cache)
4053
4054 def reverse(self):
4055 self.__reversed = not self.__reversed
4056 return self
4057
4058 def __repr__(self):
4059 # repr and str should mimic a list. So we exhaust the iterable
4060 return repr(self.exhaust())
4061
4062 def __str__(self):
4063 return repr(self.exhaust())
4064
4065
4066 class PagedList:
4067 def __len__(self):
4068 # This is only useful for tests
4069 return len(self.getslice())
4070
4071 def __init__(self, pagefunc, pagesize, use_cache=True):
4072 self._pagefunc = pagefunc
4073 self._pagesize = pagesize
4074 self._use_cache = use_cache
4075 self._cache = {}
4076
4077 def getpage(self, pagenum):
4078 page_results = self._cache.get(pagenum) or list(self._pagefunc(pagenum))
4079 if self._use_cache:
4080 self._cache[pagenum] = page_results
4081 return page_results
4082
4083 def getslice(self, start=0, end=None):
4084 return list(self._getslice(start, end))
4085
4086 def _getslice(self, start, end):
4087 raise NotImplementedError('This method must be implemented by subclasses')
4088
4089 def __getitem__(self, idx):
4090 # NOTE: cache must be enabled if this is used
4091 if not isinstance(idx, int) or idx < 0:
4092 raise TypeError('indices must be non-negative integers')
4093 entries = self.getslice(idx, idx + 1)
4094 return entries[0] if entries else None
4095
4096
4097 class OnDemandPagedList(PagedList):
4098 def _getslice(self, start, end):
4099 for pagenum in itertools.count(start // self._pagesize):
4100 firstid = pagenum * self._pagesize
4101 nextfirstid = pagenum * self._pagesize + self._pagesize
4102 if start >= nextfirstid:
4103 continue
4104
4105 startv = (
4106 start % self._pagesize
4107 if firstid <= start < nextfirstid
4108 else 0)
4109 endv = (
4110 ((end - 1) % self._pagesize) + 1
4111 if (end is not None and firstid <= end <= nextfirstid)
4112 else None)
4113
4114 page_results = self.getpage(pagenum)
4115 if startv != 0 or endv is not None:
4116 page_results = page_results[startv:endv]
4117 yield from page_results
4118
4119 # A little optimization - if current page is not "full", ie. does
4120 # not contain page_size videos then we can assume that this page
4121 # is the last one - there are no more ids on further pages -
4122 # i.e. no need to query again.
4123 if len(page_results) + startv < self._pagesize:
4124 break
4125
4126 # If we got the whole page, but the next page is not interesting,
4127 # break out early as well
4128 if end == nextfirstid:
4129 break
4130
4131
4132 class InAdvancePagedList(PagedList):
4133 def __init__(self, pagefunc, pagecount, pagesize):
4134 self._pagecount = pagecount
4135 PagedList.__init__(self, pagefunc, pagesize, True)
4136
4137 def _getslice(self, start, end):
4138 start_page = start // self._pagesize
4139 end_page = (
4140 self._pagecount if end is None else (end // self._pagesize + 1))
4141 skip_elems = start - start_page * self._pagesize
4142 only_more = None if end is None else end - start
4143 for pagenum in range(start_page, end_page):
4144 page_results = self.getpage(pagenum)
4145 if skip_elems:
4146 page_results = page_results[skip_elems:]
4147 skip_elems = None
4148 if only_more is not None:
4149 if len(page_results) < only_more:
4150 only_more -= len(page_results)
4151 else:
4152 yield from page_results[:only_more]
4153 break
4154 yield from page_results
4155
4156
4157 def uppercase_escape(s):
4158 unicode_escape = codecs.getdecoder('unicode_escape')
4159 return re.sub(
4160 r'\\U[0-9a-fA-F]{8}',
4161 lambda m: unicode_escape(m.group(0))[0],
4162 s)
4163
4164
4165 def lowercase_escape(s):
4166 unicode_escape = codecs.getdecoder('unicode_escape')
4167 return re.sub(
4168 r'\\u[0-9a-fA-F]{4}',
4169 lambda m: unicode_escape(m.group(0))[0],
4170 s)
4171
4172
4173 def escape_rfc3986(s):
4174 """Escape non-ASCII characters as suggested by RFC 3986"""
4175 if sys.version_info < (3, 0) and isinstance(s, compat_str):
4176 s = s.encode('utf-8')
4177 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
4178
4179
4180 def escape_url(url):
4181 """Escape URL as suggested by RFC 3986"""
4182 url_parsed = compat_urllib_parse_urlparse(url)
4183 return url_parsed._replace(
4184 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
4185 path=escape_rfc3986(url_parsed.path),
4186 params=escape_rfc3986(url_parsed.params),
4187 query=escape_rfc3986(url_parsed.query),
4188 fragment=escape_rfc3986(url_parsed.fragment)
4189 ).geturl()
4190
4191
4192 def parse_qs(url):
4193 return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4194
4195
4196 def read_batch_urls(batch_fd):
4197 def fixup(url):
4198 if not isinstance(url, compat_str):
4199 url = url.decode('utf-8', 'replace')
4200 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4201 for bom in BOM_UTF8:
4202 if url.startswith(bom):
4203 url = url[len(bom):]
4204 url = url.lstrip()
4205 if not url or url.startswith(('#', ';', ']')):
4206 return False
4207 # "#" cannot be stripped out since it is part of the URI
4208 # However, it can be safely stipped out if follwing a whitespace
4209 return re.split(r'\s#', url, 1)[0].rstrip()
4210
4211 with contextlib.closing(batch_fd) as fd:
4212 return [url for url in map(fixup, fd) if url]
4213
4214
4215 def urlencode_postdata(*args, **kargs):
4216 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
4217
4218
4219 def update_url_query(url, query):
4220 if not query:
4221 return url
4222 parsed_url = compat_urlparse.urlparse(url)
4223 qs = compat_parse_qs(parsed_url.query)
4224 qs.update(query)
4225 return compat_urlparse.urlunparse(parsed_url._replace(
4226 query=compat_urllib_parse_urlencode(qs, True)))
4227
4228
4229 def update_Request(req, url=None, data=None, headers={}, query={}):
4230 req_headers = req.headers.copy()
4231 req_headers.update(headers)
4232 req_data = data or req.data
4233 req_url = update_url_query(url or req.get_full_url(), query)
4234 req_get_method = req.get_method()
4235 if req_get_method == 'HEAD':
4236 req_type = HEADRequest
4237 elif req_get_method == 'PUT':
4238 req_type = PUTRequest
4239 else:
4240 req_type = compat_urllib_request.Request
4241 new_req = req_type(
4242 req_url, data=req_data, headers=req_headers,
4243 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4244 if hasattr(req, 'timeout'):
4245 new_req.timeout = req.timeout
4246 return new_req
4247
4248
4249 def _multipart_encode_impl(data, boundary):
4250 content_type = 'multipart/form-data; boundary=%s' % boundary
4251
4252 out = b''
4253 for k, v in data.items():
4254 out += b'--' + boundary.encode('ascii') + b'\r\n'
4255 if isinstance(k, compat_str):
4256 k = k.encode('utf-8')
4257 if isinstance(v, compat_str):
4258 v = v.encode('utf-8')
4259 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4260 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4261 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
4262 if boundary.encode('ascii') in content:
4263 raise ValueError('Boundary overlaps with data')
4264 out += content
4265
4266 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4267
4268 return out, content_type
4269
4270
4271 def multipart_encode(data, boundary=None):
4272 '''
4273 Encode a dict to RFC 7578-compliant form-data
4274
4275 data:
4276 A dict where keys and values can be either Unicode or bytes-like
4277 objects.
4278 boundary:
4279 If specified a Unicode object, it's used as the boundary. Otherwise
4280 a random boundary is generated.
4281
4282 Reference: https://tools.ietf.org/html/rfc7578
4283 '''
4284 has_specified_boundary = boundary is not None
4285
4286 while True:
4287 if boundary is None:
4288 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4289
4290 try:
4291 out, content_type = _multipart_encode_impl(data, boundary)
4292 break
4293 except ValueError:
4294 if has_specified_boundary:
4295 raise
4296 boundary = None
4297
4298 return out, content_type
4299
4300
4301 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4302 if isinstance(key_or_keys, (list, tuple)):
4303 for key in key_or_keys:
4304 if key not in d or d[key] is None or skip_false_values and not d[key]:
4305 continue
4306 return d[key]
4307 return default
4308 return d.get(key_or_keys, default)
4309
4310
4311 def try_get(src, getter, expected_type=None):
4312 for get in variadic(getter):
4313 try:
4314 v = get(src)
4315 except (AttributeError, KeyError, TypeError, IndexError):
4316 pass
4317 else:
4318 if expected_type is None or isinstance(v, expected_type):
4319 return v
4320
4321
4322 def merge_dicts(*dicts):
4323 merged = {}
4324 for a_dict in dicts:
4325 for k, v in a_dict.items():
4326 if v is None:
4327 continue
4328 if (k not in merged
4329 or (isinstance(v, compat_str) and v
4330 and isinstance(merged[k], compat_str)
4331 and not merged[k])):
4332 merged[k] = v
4333 return merged
4334
4335
4336 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4337 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4338
4339
4340 US_RATINGS = {
4341 'G': 0,
4342 'PG': 10,
4343 'PG-13': 13,
4344 'R': 16,
4345 'NC': 18,
4346 }
4347
4348
4349 TV_PARENTAL_GUIDELINES = {
4350 'TV-Y': 0,
4351 'TV-Y7': 7,
4352 'TV-G': 0,
4353 'TV-PG': 0,
4354 'TV-14': 14,
4355 'TV-MA': 17,
4356 }
4357
4358
4359 def parse_age_limit(s):
4360 if type(s) == int:
4361 return s if 0 <= s <= 21 else None
4362 if not isinstance(s, compat_basestring):
4363 return None
4364 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4365 if m:
4366 return int(m.group('age'))
4367 s = s.upper()
4368 if s in US_RATINGS:
4369 return US_RATINGS[s]
4370 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4371 if m:
4372 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4373 return None
4374
4375
4376 def strip_jsonp(code):
4377 return re.sub(
4378 r'''(?sx)^
4379 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4380 (?:\s*&&\s*(?P=func_name))?
4381 \s*\(\s*(?P<callback_data>.*)\);?
4382 \s*?(?://[^\n]*)*$''',
4383 r'\g<callback_data>', code)
4384
4385
4386 def js_to_json(code, vars={}):
4387 # vars is a dict of var, val pairs to substitute
4388 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4389 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4390 INTEGER_TABLE = (
4391 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4392 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4393 )
4394
4395 def fix_kv(m):
4396 v = m.group(0)
4397 if v in ('true', 'false', 'null'):
4398 return v
4399 elif v in ('undefined', 'void 0'):
4400 return 'null'
4401 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4402 return ""
4403
4404 if v[0] in ("'", '"'):
4405 v = re.sub(r'(?s)\\.|"', lambda m: {
4406 '"': '\\"',
4407 "\\'": "'",
4408 '\\\n': '',
4409 '\\x': '\\u00',
4410 }.get(m.group(0), m.group(0)), v[1:-1])
4411 else:
4412 for regex, base in INTEGER_TABLE:
4413 im = re.match(regex, v)
4414 if im:
4415 i = int(im.group(1), base)
4416 return '"%d":' % i if v.endswith(':') else '%d' % i
4417
4418 if v in vars:
4419 return vars[v]
4420
4421 return '"%s"' % v
4422
4423 return re.sub(r'''(?sx)
4424 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4425 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4426 {comment}|,(?={skip}[\]}}])|
4427 void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
4428 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4429 [0-9]+(?={skip}:)|
4430 !+
4431 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4432
4433
4434 def qualities(quality_ids):
4435 """ Get a numeric quality value out of a list of possible values """
4436 def q(qid):
4437 try:
4438 return quality_ids.index(qid)
4439 except ValueError:
4440 return -1
4441 return q
4442
4443
4444 DEFAULT_OUTTMPL = {
4445 'default': '%(title)s [%(id)s].%(ext)s',
4446 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
4447 }
4448 OUTTMPL_TYPES = {
4449 'chapter': None,
4450 'subtitle': None,
4451 'thumbnail': None,
4452 'description': 'description',
4453 'annotation': 'annotations.xml',
4454 'infojson': 'info.json',
4455 'pl_thumbnail': None,
4456 'pl_description': 'description',
4457 'pl_infojson': 'info.json',
4458 }
4459
4460 # As of [1] format syntax is:
4461 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4462 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
4463 STR_FORMAT_RE_TMPL = r'''(?x)
4464 (?<!%)(?P<prefix>(?:%%)*)
4465 %
4466 (?P<has_key>\((?P<key>{0})\))? # mapping key
4467 (?P<format>
4468 (?:[#0\-+ ]+)? # conversion flags (optional)
4469 (?:\d+)? # minimum field width (optional)
4470 (?:\.\d+)? # precision (optional)
4471 [hlL]? # length modifier (optional)
4472 {1} # conversion type
4473 )
4474 '''
4475
4476
4477 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
4478
4479
4480 def limit_length(s, length):
4481 """ Add ellipses to overly long strings """
4482 if s is None:
4483 return None
4484 ELLIPSES = '...'
4485 if len(s) > length:
4486 return s[:length - len(ELLIPSES)] + ELLIPSES
4487 return s
4488
4489
4490 def version_tuple(v):
4491 return tuple(int(e) for e in re.split(r'[-.]', v))
4492
4493
4494 def is_outdated_version(version, limit, assume_new=True):
4495 if not version:
4496 return not assume_new
4497 try:
4498 return version_tuple(version) < version_tuple(limit)
4499 except ValueError:
4500 return not assume_new
4501
4502
4503 def ytdl_is_updateable():
4504 """ Returns if yt-dlp can be updated with -U """
4505 return False
4506
4507 from zipimport import zipimporter
4508
4509 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4510
4511
4512 def args_to_str(args):
4513 # Get a short string representation for a subprocess command
4514 return ' '.join(compat_shlex_quote(a) for a in args)
4515
4516
4517 def error_to_compat_str(err):
4518 err_str = str(err)
4519 # On python 2 error byte string must be decoded with proper
4520 # encoding rather than ascii
4521 if sys.version_info[0] < 3:
4522 err_str = err_str.decode(preferredencoding())
4523 return err_str
4524
4525
4526 def mimetype2ext(mt):
4527 if mt is None:
4528 return None
4529
4530 ext = {
4531 'audio/mp4': 'm4a',
4532 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4533 # it's the most popular one
4534 'audio/mpeg': 'mp3',
4535 'audio/x-wav': 'wav',
4536 }.get(mt)
4537 if ext is not None:
4538 return ext
4539
4540 _, _, res = mt.rpartition('/')
4541 res = res.split(';')[0].strip().lower()
4542
4543 return {
4544 '3gpp': '3gp',
4545 'smptett+xml': 'tt',
4546 'ttaf+xml': 'dfxp',
4547 'ttml+xml': 'ttml',
4548 'x-flv': 'flv',
4549 'x-mp4-fragmented': 'mp4',
4550 'x-ms-sami': 'sami',
4551 'x-ms-wmv': 'wmv',
4552 'mpegurl': 'm3u8',
4553 'x-mpegurl': 'm3u8',
4554 'vnd.apple.mpegurl': 'm3u8',
4555 'dash+xml': 'mpd',
4556 'f4m+xml': 'f4m',
4557 'hds+xml': 'f4m',
4558 'vnd.ms-sstr+xml': 'ism',
4559 'quicktime': 'mov',
4560 'mp2t': 'ts',
4561 'x-wav': 'wav',
4562 }.get(res, res)
4563
4564
4565 def parse_codecs(codecs_str):
4566 # http://tools.ietf.org/html/rfc6381
4567 if not codecs_str:
4568 return {}
4569 split_codecs = list(filter(None, map(
4570 str.strip, codecs_str.strip().strip(',').split(','))))
4571 vcodec, acodec = None, None
4572 for full_codec in split_codecs:
4573 codec = full_codec.split('.')[0]
4574 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4575 if not vcodec:
4576 vcodec = full_codec
4577 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4578 if not acodec:
4579 acodec = full_codec
4580 else:
4581 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4582 if not vcodec and not acodec:
4583 if len(split_codecs) == 2:
4584 return {
4585 'vcodec': split_codecs[0],
4586 'acodec': split_codecs[1],
4587 }
4588 else:
4589 return {
4590 'vcodec': vcodec or 'none',
4591 'acodec': acodec or 'none',
4592 }
4593 return {}
4594
4595
4596 def urlhandle_detect_ext(url_handle):
4597 getheader = url_handle.headers.get
4598
4599 cd = getheader('Content-Disposition')
4600 if cd:
4601 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4602 if m:
4603 e = determine_ext(m.group('filename'), default_ext=None)
4604 if e:
4605 return e
4606
4607 return mimetype2ext(getheader('Content-Type'))
4608
4609
4610 def encode_data_uri(data, mime_type):
4611 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4612
4613
4614 def age_restricted(content_limit, age_limit):
4615 """ Returns True iff the content should be blocked """
4616
4617 if age_limit is None: # No limit set
4618 return False
4619 if content_limit is None:
4620 return False # Content available for everyone
4621 return age_limit < content_limit
4622
4623
4624 def is_html(first_bytes):
4625 """ Detect whether a file contains HTML by examining its first bytes. """
4626
4627 BOMS = [
4628 (b'\xef\xbb\xbf', 'utf-8'),
4629 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4630 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4631 (b'\xff\xfe', 'utf-16-le'),
4632 (b'\xfe\xff', 'utf-16-be'),
4633 ]
4634 for bom, enc in BOMS:
4635 if first_bytes.startswith(bom):
4636 s = first_bytes[len(bom):].decode(enc, 'replace')
4637 break
4638 else:
4639 s = first_bytes.decode('utf-8', 'replace')
4640
4641 return re.match(r'^\s*<', s)
4642
4643
4644 def determine_protocol(info_dict):
4645 protocol = info_dict.get('protocol')
4646 if protocol is not None:
4647 return protocol
4648
4649 url = info_dict['url']
4650 if url.startswith('rtmp'):
4651 return 'rtmp'
4652 elif url.startswith('mms'):
4653 return 'mms'
4654 elif url.startswith('rtsp'):
4655 return 'rtsp'
4656
4657 ext = determine_ext(url)
4658 if ext == 'm3u8':
4659 return 'm3u8'
4660 elif ext == 'f4m':
4661 return 'f4m'
4662
4663 return compat_urllib_parse_urlparse(url).scheme
4664
4665
4666 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4667 """ Render a list of rows, each as a list of values """
4668
4669 def get_max_lens(table):
4670 return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4671
4672 def filter_using_list(row, filterArray):
4673 return [col for (take, col) in zip(filterArray, row) if take]
4674
4675 if hideEmpty:
4676 max_lens = get_max_lens(data)
4677 header_row = filter_using_list(header_row, max_lens)
4678 data = [filter_using_list(row, max_lens) for row in data]
4679
4680 table = [header_row] + data
4681 max_lens = get_max_lens(table)
4682 if delim:
4683 table = [header_row] + [['-' * ml for ml in max_lens]] + data
4684 format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
4685 return '\n'.join(format_str % tuple(row) for row in table)
4686
4687
4688 def _match_one(filter_part, dct, incomplete):
4689 # TODO: Generalize code with YoutubeDL._build_format_filter
4690 STRING_OPERATORS = {
4691 '*=': operator.contains,
4692 '^=': lambda attr, value: attr.startswith(value),
4693 '$=': lambda attr, value: attr.endswith(value),
4694 '~=': lambda attr, value: re.search(value, attr),
4695 }
4696 COMPARISON_OPERATORS = {
4697 **STRING_OPERATORS,
4698 '<=': operator.le, # "<=" must be defined above "<"
4699 '<': operator.lt,
4700 '>=': operator.ge,
4701 '>': operator.gt,
4702 '=': operator.eq,
4703 }
4704
4705 operator_rex = re.compile(r'''(?x)\s*
4706 (?P<key>[a-z_]+)
4707 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4708 (?:
4709 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4710 (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
4711 (?P<strval>.+?)
4712 )
4713 \s*$
4714 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4715 m = operator_rex.search(filter_part)
4716 if m:
4717 unnegated_op = COMPARISON_OPERATORS[m.group('op')]
4718 if m.group('negation'):
4719 op = lambda attr, value: not unnegated_op(attr, value)
4720 else:
4721 op = unnegated_op
4722 actual_value = dct.get(m.group('key'))
4723 if (m.group('quotedstrval') is not None
4724 or m.group('strval') is not None
4725 # If the original field is a string and matching comparisonvalue is
4726 # a number we should respect the origin of the original field
4727 # and process comparison value as a string (see
4728 # https://github.com/ytdl-org/youtube-dl/issues/11082).
4729 or actual_value is not None and m.group('intval') is not None
4730 and isinstance(actual_value, compat_str)):
4731 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4732 quote = m.group('quote')
4733 if quote is not None:
4734 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4735 else:
4736 if m.group('op') in STRING_OPERATORS:
4737 raise ValueError('Operator %s only supports string values!' % m.group('op'))
4738 try:
4739 comparison_value = int(m.group('intval'))
4740 except ValueError:
4741 comparison_value = parse_filesize(m.group('intval'))
4742 if comparison_value is None:
4743 comparison_value = parse_filesize(m.group('intval') + 'B')
4744 if comparison_value is None:
4745 raise ValueError(
4746 'Invalid integer value %r in filter part %r' % (
4747 m.group('intval'), filter_part))
4748 if actual_value is None:
4749 return incomplete or m.group('none_inclusive')
4750 return op(actual_value, comparison_value)
4751
4752 UNARY_OPERATORS = {
4753 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4754 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4755 }
4756 operator_rex = re.compile(r'''(?x)\s*
4757 (?P<op>%s)\s*(?P<key>[a-z_]+)
4758 \s*$
4759 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4760 m = operator_rex.search(filter_part)
4761 if m:
4762 op = UNARY_OPERATORS[m.group('op')]
4763 actual_value = dct.get(m.group('key'))
4764 if incomplete and actual_value is None:
4765 return True
4766 return op(actual_value)
4767
4768 raise ValueError('Invalid filter part %r' % filter_part)
4769
4770
4771 def match_str(filter_str, dct, incomplete=False):
4772 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
4773 When incomplete, all conditions passes on missing fields
4774 """
4775 return all(
4776 _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
4777 for filter_part in re.split(r'(?<!\\)&', filter_str))
4778
4779
4780 def match_filter_func(filter_str):
4781 def _match_func(info_dict, *args, **kwargs):
4782 if match_str(filter_str, info_dict, *args, **kwargs):
4783 return None
4784 else:
4785 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4786 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4787 return _match_func
4788
4789
4790 def parse_dfxp_time_expr(time_expr):
4791 if not time_expr:
4792 return
4793
4794 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4795 if mobj:
4796 return float(mobj.group('time_offset'))
4797
4798 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4799 if mobj:
4800 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4801
4802
4803 def srt_subtitles_timecode(seconds):
4804 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4805
4806
4807 def dfxp2srt(dfxp_data):
4808 '''
4809 @param dfxp_data A bytes-like object containing DFXP data
4810 @returns A unicode object containing converted SRT data
4811 '''
4812 LEGACY_NAMESPACES = (
4813 (b'http://www.w3.org/ns/ttml', [
4814 b'http://www.w3.org/2004/11/ttaf1',
4815 b'http://www.w3.org/2006/04/ttaf1',
4816 b'http://www.w3.org/2006/10/ttaf1',
4817 ]),
4818 (b'http://www.w3.org/ns/ttml#styling', [
4819 b'http://www.w3.org/ns/ttml#style',
4820 ]),
4821 )
4822
4823 SUPPORTED_STYLING = [
4824 'color',
4825 'fontFamily',
4826 'fontSize',
4827 'fontStyle',
4828 'fontWeight',
4829 'textDecoration'
4830 ]
4831
4832 _x = functools.partial(xpath_with_ns, ns_map={
4833 'xml': 'http://www.w3.org/XML/1998/namespace',
4834 'ttml': 'http://www.w3.org/ns/ttml',
4835 'tts': 'http://www.w3.org/ns/ttml#styling',
4836 })
4837
4838 styles = {}
4839 default_style = {}
4840
4841 class TTMLPElementParser(object):
4842 _out = ''
4843 _unclosed_elements = []
4844 _applied_styles = []
4845
4846 def start(self, tag, attrib):
4847 if tag in (_x('ttml:br'), 'br'):
4848 self._out += '\n'
4849 else:
4850 unclosed_elements = []
4851 style = {}
4852 element_style_id = attrib.get('style')
4853 if default_style:
4854 style.update(default_style)
4855 if element_style_id:
4856 style.update(styles.get(element_style_id, {}))
4857 for prop in SUPPORTED_STYLING:
4858 prop_val = attrib.get(_x('tts:' + prop))
4859 if prop_val:
4860 style[prop] = prop_val
4861 if style:
4862 font = ''
4863 for k, v in sorted(style.items()):
4864 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4865 continue
4866 if k == 'color':
4867 font += ' color="%s"' % v
4868 elif k == 'fontSize':
4869 font += ' size="%s"' % v
4870 elif k == 'fontFamily':
4871 font += ' face="%s"' % v
4872 elif k == 'fontWeight' and v == 'bold':
4873 self._out += '<b>'
4874 unclosed_elements.append('b')
4875 elif k == 'fontStyle' and v == 'italic':
4876 self._out += '<i>'
4877 unclosed_elements.append('i')
4878 elif k == 'textDecoration' and v == 'underline':
4879 self._out += '<u>'
4880 unclosed_elements.append('u')
4881 if font:
4882 self._out += '<font' + font + '>'
4883 unclosed_elements.append('font')
4884 applied_style = {}
4885 if self._applied_styles:
4886 applied_style.update(self._applied_styles[-1])
4887 applied_style.update(style)
4888 self._applied_styles.append(applied_style)
4889 self._unclosed_elements.append(unclosed_elements)
4890
4891 def end(self, tag):
4892 if tag not in (_x('ttml:br'), 'br'):
4893 unclosed_elements = self._unclosed_elements.pop()
4894 for element in reversed(unclosed_elements):
4895 self._out += '</%s>' % element
4896 if unclosed_elements and self._applied_styles:
4897 self._applied_styles.pop()
4898
4899 def data(self, data):
4900 self._out += data
4901
4902 def close(self):
4903 return self._out.strip()
4904
4905 def parse_node(node):
4906 target = TTMLPElementParser()
4907 parser = xml.etree.ElementTree.XMLParser(target=target)
4908 parser.feed(xml.etree.ElementTree.tostring(node))
4909 return parser.close()
4910
4911 for k, v in LEGACY_NAMESPACES:
4912 for ns in v:
4913 dfxp_data = dfxp_data.replace(ns, k)
4914
4915 dfxp = compat_etree_fromstring(dfxp_data)
4916 out = []
4917 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4918
4919 if not paras:
4920 raise ValueError('Invalid dfxp/TTML subtitle')
4921
4922 repeat = False
4923 while True:
4924 for style in dfxp.findall(_x('.//ttml:style')):
4925 style_id = style.get('id') or style.get(_x('xml:id'))
4926 if not style_id:
4927 continue
4928 parent_style_id = style.get('style')
4929 if parent_style_id:
4930 if parent_style_id not in styles:
4931 repeat = True
4932 continue
4933 styles[style_id] = styles[parent_style_id].copy()
4934 for prop in SUPPORTED_STYLING:
4935 prop_val = style.get(_x('tts:' + prop))
4936 if prop_val:
4937 styles.setdefault(style_id, {})[prop] = prop_val
4938 if repeat:
4939 repeat = False
4940 else:
4941 break
4942
4943 for p in ('body', 'div'):
4944 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4945 if ele is None:
4946 continue
4947 style = styles.get(ele.get('style'))
4948 if not style:
4949 continue
4950 default_style.update(style)
4951
4952 for para, index in zip(paras, itertools.count(1)):
4953 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4954 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4955 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4956 if begin_time is None:
4957 continue
4958 if not end_time:
4959 if not dur:
4960 continue
4961 end_time = begin_time + dur
4962 out.append('%d\n%s --> %s\n%s\n\n' % (
4963 index,
4964 srt_subtitles_timecode(begin_time),
4965 srt_subtitles_timecode(end_time),
4966 parse_node(para)))
4967
4968 return ''.join(out)
4969
4970
4971 def cli_option(params, command_option, param):
4972 param = params.get(param)
4973 if param:
4974 param = compat_str(param)
4975 return [command_option, param] if param is not None else []
4976
4977
4978 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4979 param = params.get(param)
4980 if param is None:
4981 return []
4982 assert isinstance(param, bool)
4983 if separator:
4984 return [command_option + separator + (true_value if param else false_value)]
4985 return [command_option, true_value if param else false_value]
4986
4987
4988 def cli_valueless_option(params, command_option, param, expected_value=True):
4989 param = params.get(param)
4990 return [command_option] if param == expected_value else []
4991
4992
4993 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
4994 if isinstance(argdict, (list, tuple)): # for backward compatibility
4995 if use_compat:
4996 return argdict
4997 else:
4998 argdict = None
4999 if argdict is None:
5000 return default
5001 assert isinstance(argdict, dict)
5002
5003 assert isinstance(keys, (list, tuple))
5004 for key_list in keys:
5005 arg_list = list(filter(
5006 lambda x: x is not None,
5007 [argdict.get(key.lower()) for key in variadic(key_list)]))
5008 if arg_list:
5009 return [arg for args in arg_list for arg in args]
5010 return default
5011
5012
5013 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
5014 main_key, exe = main_key.lower(), exe.lower()
5015 root_key = exe if main_key == exe else f'{main_key}+{exe}'
5016 keys = [f'{root_key}{k}' for k in (keys or [''])]
5017 if root_key in keys:
5018 if main_key != exe:
5019 keys.append((main_key, exe))
5020 keys.append('default')
5021 else:
5022 use_compat = False
5023 return cli_configuration_args(argdict, keys, default, use_compat)
5024
5025
5026 class ISO639Utils(object):
5027 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
5028 _lang_map = {
5029 'aa': 'aar',
5030 'ab': 'abk',
5031 'ae': 'ave',
5032 'af': 'afr',
5033 'ak': 'aka',
5034 'am': 'amh',
5035 'an': 'arg',
5036 'ar': 'ara',
5037 'as': 'asm',
5038 'av': 'ava',
5039 'ay': 'aym',
5040 'az': 'aze',
5041 'ba': 'bak',
5042 'be': 'bel',
5043 'bg': 'bul',
5044 'bh': 'bih',
5045 'bi': 'bis',
5046 'bm': 'bam',
5047 'bn': 'ben',
5048 'bo': 'bod',
5049 'br': 'bre',
5050 'bs': 'bos',
5051 'ca': 'cat',
5052 'ce': 'che',
5053 'ch': 'cha',
5054 'co': 'cos',
5055 'cr': 'cre',
5056 'cs': 'ces',
5057 'cu': 'chu',
5058 'cv': 'chv',
5059 'cy': 'cym',
5060 'da': 'dan',
5061 'de': 'deu',
5062 'dv': 'div',
5063 'dz': 'dzo',
5064 'ee': 'ewe',
5065 'el': 'ell',
5066 'en': 'eng',
5067 'eo': 'epo',
5068 'es': 'spa',
5069 'et': 'est',
5070 'eu': 'eus',
5071 'fa': 'fas',
5072 'ff': 'ful',
5073 'fi': 'fin',
5074 'fj': 'fij',
5075 'fo': 'fao',
5076 'fr': 'fra',
5077 'fy': 'fry',
5078 'ga': 'gle',
5079 'gd': 'gla',
5080 'gl': 'glg',
5081 'gn': 'grn',
5082 'gu': 'guj',
5083 'gv': 'glv',
5084 'ha': 'hau',
5085 'he': 'heb',
5086 'iw': 'heb', # Replaced by he in 1989 revision
5087 'hi': 'hin',
5088 'ho': 'hmo',
5089 'hr': 'hrv',
5090 'ht': 'hat',
5091 'hu': 'hun',
5092 'hy': 'hye',
5093 'hz': 'her',
5094 'ia': 'ina',
5095 'id': 'ind',
5096 'in': 'ind', # Replaced by id in 1989 revision
5097 'ie': 'ile',
5098 'ig': 'ibo',
5099 'ii': 'iii',
5100 'ik': 'ipk',
5101 'io': 'ido',
5102 'is': 'isl',
5103 'it': 'ita',
5104 'iu': 'iku',
5105 'ja': 'jpn',
5106 'jv': 'jav',
5107 'ka': 'kat',
5108 'kg': 'kon',
5109 'ki': 'kik',
5110 'kj': 'kua',
5111 'kk': 'kaz',
5112 'kl': 'kal',
5113 'km': 'khm',
5114 'kn': 'kan',
5115 'ko': 'kor',
5116 'kr': 'kau',
5117 'ks': 'kas',
5118 'ku': 'kur',
5119 'kv': 'kom',
5120 'kw': 'cor',
5121 'ky': 'kir',
5122 'la': 'lat',
5123 'lb': 'ltz',
5124 'lg': 'lug',
5125 'li': 'lim',
5126 'ln': 'lin',
5127 'lo': 'lao',
5128 'lt': 'lit',
5129 'lu': 'lub',
5130 'lv': 'lav',
5131 'mg': 'mlg',
5132 'mh': 'mah',
5133 'mi': 'mri',
5134 'mk': 'mkd',
5135 'ml': 'mal',
5136 'mn': 'mon',
5137 'mr': 'mar',
5138 'ms': 'msa',
5139 'mt': 'mlt',
5140 'my': 'mya',
5141 'na': 'nau',
5142 'nb': 'nob',
5143 'nd': 'nde',
5144 'ne': 'nep',
5145 'ng': 'ndo',
5146 'nl': 'nld',
5147 'nn': 'nno',
5148 'no': 'nor',
5149 'nr': 'nbl',
5150 'nv': 'nav',
5151 'ny': 'nya',
5152 'oc': 'oci',
5153 'oj': 'oji',
5154 'om': 'orm',
5155 'or': 'ori',
5156 'os': 'oss',
5157 'pa': 'pan',
5158 'pi': 'pli',
5159 'pl': 'pol',
5160 'ps': 'pus',
5161 'pt': 'por',
5162 'qu': 'que',
5163 'rm': 'roh',
5164 'rn': 'run',
5165 'ro': 'ron',
5166 'ru': 'rus',
5167 'rw': 'kin',
5168 'sa': 'san',
5169 'sc': 'srd',
5170 'sd': 'snd',
5171 'se': 'sme',
5172 'sg': 'sag',
5173 'si': 'sin',
5174 'sk': 'slk',
5175 'sl': 'slv',
5176 'sm': 'smo',
5177 'sn': 'sna',
5178 'so': 'som',
5179 'sq': 'sqi',
5180 'sr': 'srp',
5181 'ss': 'ssw',
5182 'st': 'sot',
5183 'su': 'sun',
5184 'sv': 'swe',
5185 'sw': 'swa',
5186 'ta': 'tam',
5187 'te': 'tel',
5188 'tg': 'tgk',
5189 'th': 'tha',
5190 'ti': 'tir',
5191 'tk': 'tuk',
5192 'tl': 'tgl',
5193 'tn': 'tsn',
5194 'to': 'ton',
5195 'tr': 'tur',
5196 'ts': 'tso',
5197 'tt': 'tat',
5198 'tw': 'twi',
5199 'ty': 'tah',
5200 'ug': 'uig',
5201 'uk': 'ukr',
5202 'ur': 'urd',
5203 'uz': 'uzb',
5204 've': 'ven',
5205 'vi': 'vie',
5206 'vo': 'vol',
5207 'wa': 'wln',
5208 'wo': 'wol',
5209 'xh': 'xho',
5210 'yi': 'yid',
5211 'ji': 'yid', # Replaced by yi in 1989 revision
5212 'yo': 'yor',
5213 'za': 'zha',
5214 'zh': 'zho',
5215 'zu': 'zul',
5216 }
5217
5218 @classmethod
5219 def short2long(cls, code):
5220 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5221 return cls._lang_map.get(code[:2])
5222
5223 @classmethod
5224 def long2short(cls, code):
5225 """Convert language code from ISO 639-2/T to ISO 639-1"""
5226 for short_name, long_name in cls._lang_map.items():
5227 if long_name == code:
5228 return short_name
5229
5230
5231 class ISO3166Utils(object):
5232 # From http://data.okfn.org/data/core/country-list
5233 _country_map = {
5234 'AF': 'Afghanistan',
5235 'AX': 'Åland Islands',
5236 'AL': 'Albania',
5237 'DZ': 'Algeria',
5238 'AS': 'American Samoa',
5239 'AD': 'Andorra',
5240 'AO': 'Angola',
5241 'AI': 'Anguilla',
5242 'AQ': 'Antarctica',
5243 'AG': 'Antigua and Barbuda',
5244 'AR': 'Argentina',
5245 'AM': 'Armenia',
5246 'AW': 'Aruba',
5247 'AU': 'Australia',
5248 'AT': 'Austria',
5249 'AZ': 'Azerbaijan',
5250 'BS': 'Bahamas',
5251 'BH': 'Bahrain',
5252 'BD': 'Bangladesh',
5253 'BB': 'Barbados',
5254 'BY': 'Belarus',
5255 'BE': 'Belgium',
5256 'BZ': 'Belize',
5257 'BJ': 'Benin',
5258 'BM': 'Bermuda',
5259 'BT': 'Bhutan',
5260 'BO': 'Bolivia, Plurinational State of',
5261 'BQ': 'Bonaire, Sint Eustatius and Saba',
5262 'BA': 'Bosnia and Herzegovina',
5263 'BW': 'Botswana',
5264 'BV': 'Bouvet Island',
5265 'BR': 'Brazil',
5266 'IO': 'British Indian Ocean Territory',
5267 'BN': 'Brunei Darussalam',
5268 'BG': 'Bulgaria',
5269 'BF': 'Burkina Faso',
5270 'BI': 'Burundi',
5271 'KH': 'Cambodia',
5272 'CM': 'Cameroon',
5273 'CA': 'Canada',
5274 'CV': 'Cape Verde',
5275 'KY': 'Cayman Islands',
5276 'CF': 'Central African Republic',
5277 'TD': 'Chad',
5278 'CL': 'Chile',
5279 'CN': 'China',
5280 'CX': 'Christmas Island',
5281 'CC': 'Cocos (Keeling) Islands',
5282 'CO': 'Colombia',
5283 'KM': 'Comoros',
5284 'CG': 'Congo',
5285 'CD': 'Congo, the Democratic Republic of the',
5286 'CK': 'Cook Islands',
5287 'CR': 'Costa Rica',
5288 'CI': 'Côte d\'Ivoire',
5289 'HR': 'Croatia',
5290 'CU': 'Cuba',
5291 'CW': 'Curaçao',
5292 'CY': 'Cyprus',
5293 'CZ': 'Czech Republic',
5294 'DK': 'Denmark',
5295 'DJ': 'Djibouti',
5296 'DM': 'Dominica',
5297 'DO': 'Dominican Republic',
5298 'EC': 'Ecuador',
5299 'EG': 'Egypt',
5300 'SV': 'El Salvador',
5301 'GQ': 'Equatorial Guinea',
5302 'ER': 'Eritrea',
5303 'EE': 'Estonia',
5304 'ET': 'Ethiopia',
5305 'FK': 'Falkland Islands (Malvinas)',
5306 'FO': 'Faroe Islands',
5307 'FJ': 'Fiji',
5308 'FI': 'Finland',
5309 'FR': 'France',
5310 'GF': 'French Guiana',
5311 'PF': 'French Polynesia',
5312 'TF': 'French Southern Territories',
5313 'GA': 'Gabon',
5314 'GM': 'Gambia',
5315 'GE': 'Georgia',
5316 'DE': 'Germany',
5317 'GH': 'Ghana',
5318 'GI': 'Gibraltar',
5319 'GR': 'Greece',
5320 'GL': 'Greenland',
5321 'GD': 'Grenada',
5322 'GP': 'Guadeloupe',
5323 'GU': 'Guam',
5324 'GT': 'Guatemala',
5325 'GG': 'Guernsey',
5326 'GN': 'Guinea',
5327 'GW': 'Guinea-Bissau',
5328 'GY': 'Guyana',
5329 'HT': 'Haiti',
5330 'HM': 'Heard Island and McDonald Islands',
5331 'VA': 'Holy See (Vatican City State)',
5332 'HN': 'Honduras',
5333 'HK': 'Hong Kong',
5334 'HU': 'Hungary',
5335 'IS': 'Iceland',
5336 'IN': 'India',
5337 'ID': 'Indonesia',
5338 'IR': 'Iran, Islamic Republic of',
5339 'IQ': 'Iraq',
5340 'IE': 'Ireland',
5341 'IM': 'Isle of Man',
5342 'IL': 'Israel',
5343 'IT': 'Italy',
5344 'JM': 'Jamaica',
5345 'JP': 'Japan',
5346 'JE': 'Jersey',
5347 'JO': 'Jordan',
5348 'KZ': 'Kazakhstan',
5349 'KE': 'Kenya',
5350 'KI': 'Kiribati',
5351 'KP': 'Korea, Democratic People\'s Republic of',
5352 'KR': 'Korea, Republic of',
5353 'KW': 'Kuwait',
5354 'KG': 'Kyrgyzstan',
5355 'LA': 'Lao People\'s Democratic Republic',
5356 'LV': 'Latvia',
5357 'LB': 'Lebanon',
5358 'LS': 'Lesotho',
5359 'LR': 'Liberia',
5360 'LY': 'Libya',
5361 'LI': 'Liechtenstein',
5362 'LT': 'Lithuania',
5363 'LU': 'Luxembourg',
5364 'MO': 'Macao',
5365 'MK': 'Macedonia, the Former Yugoslav Republic of',
5366 'MG': 'Madagascar',
5367 'MW': 'Malawi',
5368 'MY': 'Malaysia',
5369 'MV': 'Maldives',
5370 'ML': 'Mali',
5371 'MT': 'Malta',
5372 'MH': 'Marshall Islands',
5373 'MQ': 'Martinique',
5374 'MR': 'Mauritania',
5375 'MU': 'Mauritius',
5376 'YT': 'Mayotte',
5377 'MX': 'Mexico',
5378 'FM': 'Micronesia, Federated States of',
5379 'MD': 'Moldova, Republic of',
5380 'MC': 'Monaco',
5381 'MN': 'Mongolia',
5382 'ME': 'Montenegro',
5383 'MS': 'Montserrat',
5384 'MA': 'Morocco',
5385 'MZ': 'Mozambique',
5386 'MM': 'Myanmar',
5387 'NA': 'Namibia',
5388 'NR': 'Nauru',
5389 'NP': 'Nepal',
5390 'NL': 'Netherlands',
5391 'NC': 'New Caledonia',
5392 'NZ': 'New Zealand',
5393 'NI': 'Nicaragua',
5394 'NE': 'Niger',
5395 'NG': 'Nigeria',
5396 'NU': 'Niue',
5397 'NF': 'Norfolk Island',
5398 'MP': 'Northern Mariana Islands',
5399 'NO': 'Norway',
5400 'OM': 'Oman',
5401 'PK': 'Pakistan',
5402 'PW': 'Palau',
5403 'PS': 'Palestine, State of',
5404 'PA': 'Panama',
5405 'PG': 'Papua New Guinea',
5406 'PY': 'Paraguay',
5407 'PE': 'Peru',
5408 'PH': 'Philippines',
5409 'PN': 'Pitcairn',
5410 'PL': 'Poland',
5411 'PT': 'Portugal',
5412 'PR': 'Puerto Rico',
5413 'QA': 'Qatar',
5414 'RE': 'Réunion',
5415 'RO': 'Romania',
5416 'RU': 'Russian Federation',
5417 'RW': 'Rwanda',
5418 'BL': 'Saint Barthélemy',
5419 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5420 'KN': 'Saint Kitts and Nevis',
5421 'LC': 'Saint Lucia',
5422 'MF': 'Saint Martin (French part)',
5423 'PM': 'Saint Pierre and Miquelon',
5424 'VC': 'Saint Vincent and the Grenadines',
5425 'WS': 'Samoa',
5426 'SM': 'San Marino',
5427 'ST': 'Sao Tome and Principe',
5428 'SA': 'Saudi Arabia',
5429 'SN': 'Senegal',
5430 'RS': 'Serbia',
5431 'SC': 'Seychelles',
5432 'SL': 'Sierra Leone',
5433 'SG': 'Singapore',
5434 'SX': 'Sint Maarten (Dutch part)',
5435 'SK': 'Slovakia',
5436 'SI': 'Slovenia',
5437 'SB': 'Solomon Islands',
5438 'SO': 'Somalia',
5439 'ZA': 'South Africa',
5440 'GS': 'South Georgia and the South Sandwich Islands',
5441 'SS': 'South Sudan',
5442 'ES': 'Spain',
5443 'LK': 'Sri Lanka',
5444 'SD': 'Sudan',
5445 'SR': 'Suriname',
5446 'SJ': 'Svalbard and Jan Mayen',
5447 'SZ': 'Swaziland',
5448 'SE': 'Sweden',
5449 'CH': 'Switzerland',
5450 'SY': 'Syrian Arab Republic',
5451 'TW': 'Taiwan, Province of China',
5452 'TJ': 'Tajikistan',
5453 'TZ': 'Tanzania, United Republic of',
5454 'TH': 'Thailand',
5455 'TL': 'Timor-Leste',
5456 'TG': 'Togo',
5457 'TK': 'Tokelau',
5458 'TO': 'Tonga',
5459 'TT': 'Trinidad and Tobago',
5460 'TN': 'Tunisia',
5461 'TR': 'Turkey',
5462 'TM': 'Turkmenistan',
5463 'TC': 'Turks and Caicos Islands',
5464 'TV': 'Tuvalu',
5465 'UG': 'Uganda',
5466 'UA': 'Ukraine',
5467 'AE': 'United Arab Emirates',
5468 'GB': 'United Kingdom',
5469 'US': 'United States',
5470 'UM': 'United States Minor Outlying Islands',
5471 'UY': 'Uruguay',
5472 'UZ': 'Uzbekistan',
5473 'VU': 'Vanuatu',
5474 'VE': 'Venezuela, Bolivarian Republic of',
5475 'VN': 'Viet Nam',
5476 'VG': 'Virgin Islands, British',
5477 'VI': 'Virgin Islands, U.S.',
5478 'WF': 'Wallis and Futuna',
5479 'EH': 'Western Sahara',
5480 'YE': 'Yemen',
5481 'ZM': 'Zambia',
5482 'ZW': 'Zimbabwe',
5483 }
5484
5485 @classmethod
5486 def short2full(cls, code):
5487 """Convert an ISO 3166-2 country code to the corresponding full name"""
5488 return cls._country_map.get(code.upper())
5489
5490
5491 class GeoUtils(object):
5492 # Major IPv4 address blocks per country
5493 _country_ip_map = {
5494 'AD': '46.172.224.0/19',
5495 'AE': '94.200.0.0/13',
5496 'AF': '149.54.0.0/17',
5497 'AG': '209.59.64.0/18',
5498 'AI': '204.14.248.0/21',
5499 'AL': '46.99.0.0/16',
5500 'AM': '46.70.0.0/15',
5501 'AO': '105.168.0.0/13',
5502 'AP': '182.50.184.0/21',
5503 'AQ': '23.154.160.0/24',
5504 'AR': '181.0.0.0/12',
5505 'AS': '202.70.112.0/20',
5506 'AT': '77.116.0.0/14',
5507 'AU': '1.128.0.0/11',
5508 'AW': '181.41.0.0/18',
5509 'AX': '185.217.4.0/22',
5510 'AZ': '5.197.0.0/16',
5511 'BA': '31.176.128.0/17',
5512 'BB': '65.48.128.0/17',
5513 'BD': '114.130.0.0/16',
5514 'BE': '57.0.0.0/8',
5515 'BF': '102.178.0.0/15',
5516 'BG': '95.42.0.0/15',
5517 'BH': '37.131.0.0/17',
5518 'BI': '154.117.192.0/18',
5519 'BJ': '137.255.0.0/16',
5520 'BL': '185.212.72.0/23',
5521 'BM': '196.12.64.0/18',
5522 'BN': '156.31.0.0/16',
5523 'BO': '161.56.0.0/16',
5524 'BQ': '161.0.80.0/20',
5525 'BR': '191.128.0.0/12',
5526 'BS': '24.51.64.0/18',
5527 'BT': '119.2.96.0/19',
5528 'BW': '168.167.0.0/16',
5529 'BY': '178.120.0.0/13',
5530 'BZ': '179.42.192.0/18',
5531 'CA': '99.224.0.0/11',
5532 'CD': '41.243.0.0/16',
5533 'CF': '197.242.176.0/21',
5534 'CG': '160.113.0.0/16',
5535 'CH': '85.0.0.0/13',
5536 'CI': '102.136.0.0/14',
5537 'CK': '202.65.32.0/19',
5538 'CL': '152.172.0.0/14',
5539 'CM': '102.244.0.0/14',
5540 'CN': '36.128.0.0/10',
5541 'CO': '181.240.0.0/12',
5542 'CR': '201.192.0.0/12',
5543 'CU': '152.206.0.0/15',
5544 'CV': '165.90.96.0/19',
5545 'CW': '190.88.128.0/17',
5546 'CY': '31.153.0.0/16',
5547 'CZ': '88.100.0.0/14',
5548 'DE': '53.0.0.0/8',
5549 'DJ': '197.241.0.0/17',
5550 'DK': '87.48.0.0/12',
5551 'DM': '192.243.48.0/20',
5552 'DO': '152.166.0.0/15',
5553 'DZ': '41.96.0.0/12',
5554 'EC': '186.68.0.0/15',
5555 'EE': '90.190.0.0/15',
5556 'EG': '156.160.0.0/11',
5557 'ER': '196.200.96.0/20',
5558 'ES': '88.0.0.0/11',
5559 'ET': '196.188.0.0/14',
5560 'EU': '2.16.0.0/13',
5561 'FI': '91.152.0.0/13',
5562 'FJ': '144.120.0.0/16',
5563 'FK': '80.73.208.0/21',
5564 'FM': '119.252.112.0/20',
5565 'FO': '88.85.32.0/19',
5566 'FR': '90.0.0.0/9',
5567 'GA': '41.158.0.0/15',
5568 'GB': '25.0.0.0/8',
5569 'GD': '74.122.88.0/21',
5570 'GE': '31.146.0.0/16',
5571 'GF': '161.22.64.0/18',
5572 'GG': '62.68.160.0/19',
5573 'GH': '154.160.0.0/12',
5574 'GI': '95.164.0.0/16',
5575 'GL': '88.83.0.0/19',
5576 'GM': '160.182.0.0/15',
5577 'GN': '197.149.192.0/18',
5578 'GP': '104.250.0.0/19',
5579 'GQ': '105.235.224.0/20',
5580 'GR': '94.64.0.0/13',
5581 'GT': '168.234.0.0/16',
5582 'GU': '168.123.0.0/16',
5583 'GW': '197.214.80.0/20',
5584 'GY': '181.41.64.0/18',
5585 'HK': '113.252.0.0/14',
5586 'HN': '181.210.0.0/16',
5587 'HR': '93.136.0.0/13',
5588 'HT': '148.102.128.0/17',
5589 'HU': '84.0.0.0/14',
5590 'ID': '39.192.0.0/10',
5591 'IE': '87.32.0.0/12',
5592 'IL': '79.176.0.0/13',
5593 'IM': '5.62.80.0/20',
5594 'IN': '117.192.0.0/10',
5595 'IO': '203.83.48.0/21',
5596 'IQ': '37.236.0.0/14',
5597 'IR': '2.176.0.0/12',
5598 'IS': '82.221.0.0/16',
5599 'IT': '79.0.0.0/10',
5600 'JE': '87.244.64.0/18',
5601 'JM': '72.27.0.0/17',
5602 'JO': '176.29.0.0/16',
5603 'JP': '133.0.0.0/8',
5604 'KE': '105.48.0.0/12',
5605 'KG': '158.181.128.0/17',
5606 'KH': '36.37.128.0/17',
5607 'KI': '103.25.140.0/22',
5608 'KM': '197.255.224.0/20',
5609 'KN': '198.167.192.0/19',
5610 'KP': '175.45.176.0/22',
5611 'KR': '175.192.0.0/10',
5612 'KW': '37.36.0.0/14',
5613 'KY': '64.96.0.0/15',
5614 'KZ': '2.72.0.0/13',
5615 'LA': '115.84.64.0/18',
5616 'LB': '178.135.0.0/16',
5617 'LC': '24.92.144.0/20',
5618 'LI': '82.117.0.0/19',
5619 'LK': '112.134.0.0/15',
5620 'LR': '102.183.0.0/16',
5621 'LS': '129.232.0.0/17',
5622 'LT': '78.56.0.0/13',
5623 'LU': '188.42.0.0/16',
5624 'LV': '46.109.0.0/16',
5625 'LY': '41.252.0.0/14',
5626 'MA': '105.128.0.0/11',
5627 'MC': '88.209.64.0/18',
5628 'MD': '37.246.0.0/16',
5629 'ME': '178.175.0.0/17',
5630 'MF': '74.112.232.0/21',
5631 'MG': '154.126.0.0/17',
5632 'MH': '117.103.88.0/21',
5633 'MK': '77.28.0.0/15',
5634 'ML': '154.118.128.0/18',
5635 'MM': '37.111.0.0/17',
5636 'MN': '49.0.128.0/17',
5637 'MO': '60.246.0.0/16',
5638 'MP': '202.88.64.0/20',
5639 'MQ': '109.203.224.0/19',
5640 'MR': '41.188.64.0/18',
5641 'MS': '208.90.112.0/22',
5642 'MT': '46.11.0.0/16',
5643 'MU': '105.16.0.0/12',
5644 'MV': '27.114.128.0/18',
5645 'MW': '102.70.0.0/15',
5646 'MX': '187.192.0.0/11',
5647 'MY': '175.136.0.0/13',
5648 'MZ': '197.218.0.0/15',
5649 'NA': '41.182.0.0/16',
5650 'NC': '101.101.0.0/18',
5651 'NE': '197.214.0.0/18',
5652 'NF': '203.17.240.0/22',
5653 'NG': '105.112.0.0/12',
5654 'NI': '186.76.0.0/15',
5655 'NL': '145.96.0.0/11',
5656 'NO': '84.208.0.0/13',
5657 'NP': '36.252.0.0/15',
5658 'NR': '203.98.224.0/19',
5659 'NU': '49.156.48.0/22',
5660 'NZ': '49.224.0.0/14',
5661 'OM': '5.36.0.0/15',
5662 'PA': '186.72.0.0/15',
5663 'PE': '186.160.0.0/14',
5664 'PF': '123.50.64.0/18',
5665 'PG': '124.240.192.0/19',
5666 'PH': '49.144.0.0/13',
5667 'PK': '39.32.0.0/11',
5668 'PL': '83.0.0.0/11',
5669 'PM': '70.36.0.0/20',
5670 'PR': '66.50.0.0/16',
5671 'PS': '188.161.0.0/16',
5672 'PT': '85.240.0.0/13',
5673 'PW': '202.124.224.0/20',
5674 'PY': '181.120.0.0/14',
5675 'QA': '37.210.0.0/15',
5676 'RE': '102.35.0.0/16',
5677 'RO': '79.112.0.0/13',
5678 'RS': '93.86.0.0/15',
5679 'RU': '5.136.0.0/13',
5680 'RW': '41.186.0.0/16',
5681 'SA': '188.48.0.0/13',
5682 'SB': '202.1.160.0/19',
5683 'SC': '154.192.0.0/11',
5684 'SD': '102.120.0.0/13',
5685 'SE': '78.64.0.0/12',
5686 'SG': '8.128.0.0/10',
5687 'SI': '188.196.0.0/14',
5688 'SK': '78.98.0.0/15',
5689 'SL': '102.143.0.0/17',
5690 'SM': '89.186.32.0/19',
5691 'SN': '41.82.0.0/15',
5692 'SO': '154.115.192.0/18',
5693 'SR': '186.179.128.0/17',
5694 'SS': '105.235.208.0/21',
5695 'ST': '197.159.160.0/19',
5696 'SV': '168.243.0.0/16',
5697 'SX': '190.102.0.0/20',
5698 'SY': '5.0.0.0/16',
5699 'SZ': '41.84.224.0/19',
5700 'TC': '65.255.48.0/20',
5701 'TD': '154.68.128.0/19',
5702 'TG': '196.168.0.0/14',
5703 'TH': '171.96.0.0/13',
5704 'TJ': '85.9.128.0/18',
5705 'TK': '27.96.24.0/21',
5706 'TL': '180.189.160.0/20',
5707 'TM': '95.85.96.0/19',
5708 'TN': '197.0.0.0/11',
5709 'TO': '175.176.144.0/21',
5710 'TR': '78.160.0.0/11',
5711 'TT': '186.44.0.0/15',
5712 'TV': '202.2.96.0/19',
5713 'TW': '120.96.0.0/11',
5714 'TZ': '156.156.0.0/14',
5715 'UA': '37.52.0.0/14',
5716 'UG': '102.80.0.0/13',
5717 'US': '6.0.0.0/8',
5718 'UY': '167.56.0.0/13',
5719 'UZ': '84.54.64.0/18',
5720 'VA': '212.77.0.0/19',
5721 'VC': '207.191.240.0/21',
5722 'VE': '186.88.0.0/13',
5723 'VG': '66.81.192.0/20',
5724 'VI': '146.226.0.0/16',
5725 'VN': '14.160.0.0/11',
5726 'VU': '202.80.32.0/20',
5727 'WF': '117.20.32.0/21',
5728 'WS': '202.4.32.0/19',
5729 'YE': '134.35.0.0/16',
5730 'YT': '41.242.116.0/22',
5731 'ZA': '41.0.0.0/11',
5732 'ZM': '102.144.0.0/13',
5733 'ZW': '102.177.192.0/18',
5734 }
5735
5736 @classmethod
5737 def random_ipv4(cls, code_or_block):
5738 if len(code_or_block) == 2:
5739 block = cls._country_ip_map.get(code_or_block.upper())
5740 if not block:
5741 return None
5742 else:
5743 block = code_or_block
5744 addr, preflen = block.split('/')
5745 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5746 addr_max = addr_min | (0xffffffff >> int(preflen))
5747 return compat_str(socket.inet_ntoa(
5748 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5749
5750
5751 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5752 def __init__(self, proxies=None):
5753 # Set default handlers
5754 for type in ('http', 'https'):
5755 setattr(self, '%s_open' % type,
5756 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5757 meth(r, proxy, type))
5758 compat_urllib_request.ProxyHandler.__init__(self, proxies)
5759
5760 def proxy_open(self, req, proxy, type):
5761 req_proxy = req.headers.get('Ytdl-request-proxy')
5762 if req_proxy is not None:
5763 proxy = req_proxy
5764 del req.headers['Ytdl-request-proxy']
5765
5766 if proxy == '__noproxy__':
5767 return None # No Proxy
5768 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5769 req.add_header('Ytdl-socks-proxy', proxy)
5770 # yt-dlp's http/https handlers do wrapping the socket with socks
5771 return None
5772 return compat_urllib_request.ProxyHandler.proxy_open(
5773 self, req, proxy, type)
5774
5775
5776 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5777 # released into Public Domain
5778 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5779
5780 def long_to_bytes(n, blocksize=0):
5781 """long_to_bytes(n:long, blocksize:int) : string
5782 Convert a long integer to a byte string.
5783
5784 If optional blocksize is given and greater than zero, pad the front of the
5785 byte string with binary zeros so that the length is a multiple of
5786 blocksize.
5787 """
5788 # after much testing, this algorithm was deemed to be the fastest
5789 s = b''
5790 n = int(n)
5791 while n > 0:
5792 s = compat_struct_pack('>I', n & 0xffffffff) + s
5793 n = n >> 32
5794 # strip off leading zeros
5795 for i in range(len(s)):
5796 if s[i] != b'\000'[0]:
5797 break
5798 else:
5799 # only happens when n == 0
5800 s = b'\000'
5801 i = 0
5802 s = s[i:]
5803 # add back some pad bytes. this could be done more efficiently w.r.t. the
5804 # de-padding being done above, but sigh...
5805 if blocksize > 0 and len(s) % blocksize:
5806 s = (blocksize - len(s) % blocksize) * b'\000' + s
5807 return s
5808
5809
5810 def bytes_to_long(s):
5811 """bytes_to_long(string) : long
5812 Convert a byte string to a long integer.
5813
5814 This is (essentially) the inverse of long_to_bytes().
5815 """
5816 acc = 0
5817 length = len(s)
5818 if length % 4:
5819 extra = (4 - length % 4)
5820 s = b'\000' * extra + s
5821 length = length + extra
5822 for i in range(0, length, 4):
5823 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5824 return acc
5825
5826
5827 def ohdave_rsa_encrypt(data, exponent, modulus):
5828 '''
5829 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5830
5831 Input:
5832 data: data to encrypt, bytes-like object
5833 exponent, modulus: parameter e and N of RSA algorithm, both integer
5834 Output: hex string of encrypted data
5835
5836 Limitation: supports one block encryption only
5837 '''
5838
5839 payload = int(binascii.hexlify(data[::-1]), 16)
5840 encrypted = pow(payload, exponent, modulus)
5841 return '%x' % encrypted
5842
5843
5844 def pkcs1pad(data, length):
5845 """
5846 Padding input data with PKCS#1 scheme
5847
5848 @param {int[]} data input data
5849 @param {int} length target length
5850 @returns {int[]} padded data
5851 """
5852 if len(data) > length - 11:
5853 raise ValueError('Input data too long for PKCS#1 padding')
5854
5855 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5856 return [0, 2] + pseudo_random + [0] + data
5857
5858
5859 def encode_base_n(num, n, table=None):
5860 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5861 if not table:
5862 table = FULL_TABLE[:n]
5863
5864 if n > len(table):
5865 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5866
5867 if num == 0:
5868 return table[0]
5869
5870 ret = ''
5871 while num:
5872 ret = table[num % n] + ret
5873 num = num // n
5874 return ret
5875
5876
5877 def decode_packed_codes(code):
5878 mobj = re.search(PACKED_CODES_RE, code)
5879 obfuscated_code, base, count, symbols = mobj.groups()
5880 base = int(base)
5881 count = int(count)
5882 symbols = symbols.split('|')
5883 symbol_table = {}
5884
5885 while count:
5886 count -= 1
5887 base_n_count = encode_base_n(count, base)
5888 symbol_table[base_n_count] = symbols[count] or base_n_count
5889
5890 return re.sub(
5891 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5892 obfuscated_code)
5893
5894
5895 def caesar(s, alphabet, shift):
5896 if shift == 0:
5897 return s
5898 l = len(alphabet)
5899 return ''.join(
5900 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5901 for c in s)
5902
5903
5904 def rot47(s):
5905 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5906
5907
5908 def parse_m3u8_attributes(attrib):
5909 info = {}
5910 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5911 if val.startswith('"'):
5912 val = val[1:-1]
5913 info[key] = val
5914 return info
5915
5916
5917 def urshift(val, n):
5918 return val >> n if val >= 0 else (val + 0x100000000) >> n
5919
5920
5921 # Based on png2str() written by @gdkchan and improved by @yokrysty
5922 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5923 def decode_png(png_data):
5924 # Reference: https://www.w3.org/TR/PNG/
5925 header = png_data[8:]
5926
5927 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5928 raise IOError('Not a valid PNG file.')
5929
5930 int_map = {1: '>B', 2: '>H', 4: '>I'}
5931 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5932
5933 chunks = []
5934
5935 while header:
5936 length = unpack_integer(header[:4])
5937 header = header[4:]
5938
5939 chunk_type = header[:4]
5940 header = header[4:]
5941
5942 chunk_data = header[:length]
5943 header = header[length:]
5944
5945 header = header[4:] # Skip CRC
5946
5947 chunks.append({
5948 'type': chunk_type,
5949 'length': length,
5950 'data': chunk_data
5951 })
5952
5953 ihdr = chunks[0]['data']
5954
5955 width = unpack_integer(ihdr[:4])
5956 height = unpack_integer(ihdr[4:8])
5957
5958 idat = b''
5959
5960 for chunk in chunks:
5961 if chunk['type'] == b'IDAT':
5962 idat += chunk['data']
5963
5964 if not idat:
5965 raise IOError('Unable to read PNG data.')
5966
5967 decompressed_data = bytearray(zlib.decompress(idat))
5968
5969 stride = width * 3
5970 pixels = []
5971
5972 def _get_pixel(idx):
5973 x = idx % stride
5974 y = idx // stride
5975 return pixels[y][x]
5976
5977 for y in range(height):
5978 basePos = y * (1 + stride)
5979 filter_type = decompressed_data[basePos]
5980
5981 current_row = []
5982
5983 pixels.append(current_row)
5984
5985 for x in range(stride):
5986 color = decompressed_data[1 + basePos + x]
5987 basex = y * stride + x
5988 left = 0
5989 up = 0
5990
5991 if x > 2:
5992 left = _get_pixel(basex - 3)
5993 if y > 0:
5994 up = _get_pixel(basex - stride)
5995
5996 if filter_type == 1: # Sub
5997 color = (color + left) & 0xff
5998 elif filter_type == 2: # Up
5999 color = (color + up) & 0xff
6000 elif filter_type == 3: # Average
6001 color = (color + ((left + up) >> 1)) & 0xff
6002 elif filter_type == 4: # Paeth
6003 a = left
6004 b = up
6005 c = 0
6006
6007 if x > 2 and y > 0:
6008 c = _get_pixel(basex - stride - 3)
6009
6010 p = a + b - c
6011
6012 pa = abs(p - a)
6013 pb = abs(p - b)
6014 pc = abs(p - c)
6015
6016 if pa <= pb and pa <= pc:
6017 color = (color + a) & 0xff
6018 elif pb <= pc:
6019 color = (color + b) & 0xff
6020 else:
6021 color = (color + c) & 0xff
6022
6023 current_row.append(color)
6024
6025 return width, height, pixels
6026
6027
6028 def write_xattr(path, key, value):
6029 # This mess below finds the best xattr tool for the job
6030 try:
6031 # try the pyxattr module...
6032 import xattr
6033
6034 if hasattr(xattr, 'set'): # pyxattr
6035 # Unicode arguments are not supported in python-pyxattr until
6036 # version 0.5.0
6037 # See https://github.com/ytdl-org/youtube-dl/issues/5498
6038 pyxattr_required_version = '0.5.0'
6039 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
6040 # TODO: fallback to CLI tools
6041 raise XAttrUnavailableError(
6042 'python-pyxattr is detected but is too old. '
6043 'yt-dlp requires %s or above while your version is %s. '
6044 'Falling back to other xattr implementations' % (
6045 pyxattr_required_version, xattr.__version__))
6046
6047 setxattr = xattr.set
6048 else: # xattr
6049 setxattr = xattr.setxattr
6050
6051 try:
6052 setxattr(path, key, value)
6053 except EnvironmentError as e:
6054 raise XAttrMetadataError(e.errno, e.strerror)
6055
6056 except ImportError:
6057 if compat_os_name == 'nt':
6058 # Write xattrs to NTFS Alternate Data Streams:
6059 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6060 assert ':' not in key
6061 assert os.path.exists(path)
6062
6063 ads_fn = path + ':' + key
6064 try:
6065 with open(ads_fn, 'wb') as f:
6066 f.write(value)
6067 except EnvironmentError as e:
6068 raise XAttrMetadataError(e.errno, e.strerror)
6069 else:
6070 user_has_setfattr = check_executable('setfattr', ['--version'])
6071 user_has_xattr = check_executable('xattr', ['-h'])
6072
6073 if user_has_setfattr or user_has_xattr:
6074
6075 value = value.decode('utf-8')
6076 if user_has_setfattr:
6077 executable = 'setfattr'
6078 opts = ['-n', key, '-v', value]
6079 elif user_has_xattr:
6080 executable = 'xattr'
6081 opts = ['-w', key, value]
6082
6083 cmd = ([encodeFilename(executable, True)]
6084 + [encodeArgument(o) for o in opts]
6085 + [encodeFilename(path, True)])
6086
6087 try:
6088 p = subprocess.Popen(
6089 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6090 except EnvironmentError as e:
6091 raise XAttrMetadataError(e.errno, e.strerror)
6092 stdout, stderr = process_communicate_or_kill(p)
6093 stderr = stderr.decode('utf-8', 'replace')
6094 if p.returncode != 0:
6095 raise XAttrMetadataError(p.returncode, stderr)
6096
6097 else:
6098 # On Unix, and can't find pyxattr, setfattr, or xattr.
6099 if sys.platform.startswith('linux'):
6100 raise XAttrUnavailableError(
6101 "Couldn't find a tool to set the xattrs. "
6102 "Install either the python 'pyxattr' or 'xattr' "
6103 "modules, or the GNU 'attr' package "
6104 "(which contains the 'setfattr' tool).")
6105 else:
6106 raise XAttrUnavailableError(
6107 "Couldn't find a tool to set the xattrs. "
6108 "Install either the python 'xattr' module, "
6109 "or the 'xattr' binary.")
6110
6111
6112 def random_birthday(year_field, month_field, day_field):
6113 start_date = datetime.date(1950, 1, 1)
6114 end_date = datetime.date(1995, 12, 31)
6115 offset = random.randint(0, (end_date - start_date).days)
6116 random_date = start_date + datetime.timedelta(offset)
6117 return {
6118 year_field: str(random_date.year),
6119 month_field: str(random_date.month),
6120 day_field: str(random_date.day),
6121 }
6122
6123
6124 # Templates for internet shortcut files, which are plain text files.
6125 DOT_URL_LINK_TEMPLATE = '''
6126 [InternetShortcut]
6127 URL=%(url)s
6128 '''.lstrip()
6129
6130 DOT_WEBLOC_LINK_TEMPLATE = '''
6131 <?xml version="1.0" encoding="UTF-8"?>
6132 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6133 <plist version="1.0">
6134 <dict>
6135 \t<key>URL</key>
6136 \t<string>%(url)s</string>
6137 </dict>
6138 </plist>
6139 '''.lstrip()
6140
6141 DOT_DESKTOP_LINK_TEMPLATE = '''
6142 [Desktop Entry]
6143 Encoding=UTF-8
6144 Name=%(filename)s
6145 Type=Link
6146 URL=%(url)s
6147 Icon=text-html
6148 '''.lstrip()
6149
6150
6151 def iri_to_uri(iri):
6152 """
6153 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6154
6155 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6156 """
6157
6158 iri_parts = compat_urllib_parse_urlparse(iri)
6159
6160 if '[' in iri_parts.netloc:
6161 raise ValueError('IPv6 URIs are not, yet, supported.')
6162 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6163
6164 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6165
6166 net_location = ''
6167 if iri_parts.username:
6168 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6169 if iri_parts.password is not None:
6170 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6171 net_location += '@'
6172
6173 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
6174 # The 'idna' encoding produces ASCII text.
6175 if iri_parts.port is not None and iri_parts.port != 80:
6176 net_location += ':' + str(iri_parts.port)
6177
6178 return compat_urllib_parse_urlunparse(
6179 (iri_parts.scheme,
6180 net_location,
6181
6182 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6183
6184 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6185 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6186
6187 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6188 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6189
6190 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6191
6192 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6193
6194
6195 def to_high_limit_path(path):
6196 if sys.platform in ['win32', 'cygwin']:
6197 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6198 return r'\\?\ '.rstrip() + os.path.abspath(path)
6199
6200 return path
6201
6202
6203 def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
6204 if field is None:
6205 val = obj if obj is not None else default
6206 else:
6207 val = obj.get(field, default)
6208 if func and val not in ignore:
6209 val = func(val)
6210 return template % val if val not in ignore else default
6211
6212
6213 def clean_podcast_url(url):
6214 return re.sub(r'''(?x)
6215 (?:
6216 (?:
6217 chtbl\.com/track|
6218 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6219 play\.podtrac\.com
6220 )/[^/]+|
6221 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6222 flex\.acast\.com|
6223 pd(?:
6224 cn\.co| # https://podcorn.com/analytics-prefix/
6225 st\.fm # https://podsights.com/docs/
6226 )/e
6227 )/''', '', url)
6228
6229
6230 _HEX_TABLE = '0123456789abcdef'
6231
6232
6233 def random_uuidv4():
6234 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
6235
6236
6237 def make_dir(path, to_screen=None):
6238 try:
6239 dn = os.path.dirname(path)
6240 if dn and not os.path.exists(dn):
6241 os.makedirs(dn)
6242 return True
6243 except (OSError, IOError) as err:
6244 if callable(to_screen) is not None:
6245 to_screen('unable to create directory ' + error_to_compat_str(err))
6246 return False
6247
6248
6249 def get_executable_path():
6250 from zipimport import zipimporter
6251 if hasattr(sys, 'frozen'): # Running from PyInstaller
6252 path = os.path.dirname(sys.executable)
6253 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
6254 path = os.path.join(os.path.dirname(__file__), '../..')
6255 else:
6256 path = os.path.join(os.path.dirname(__file__), '..')
6257 return os.path.abspath(path)
6258
6259
6260 def load_plugins(name, suffix, namespace):
6261 plugin_info = [None]
6262 classes = []
6263 try:
6264 plugin_info = imp.find_module(
6265 name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
6266 plugins = imp.load_module(name, *plugin_info)
6267 for name in dir(plugins):
6268 if name in namespace:
6269 continue
6270 if not name.endswith(suffix):
6271 continue
6272 klass = getattr(plugins, name)
6273 classes.append(klass)
6274 namespace[name] = klass
6275 except ImportError:
6276 pass
6277 finally:
6278 if plugin_info[0] is not None:
6279 plugin_info[0].close()
6280 return classes
6281
6282
6283 def traverse_obj(
6284 obj, *path_list, default=None, expected_type=None, get_all=True,
6285 casesense=True, is_user_input=False, traverse_string=False):
6286 ''' Traverse nested list/dict/tuple
6287 @param path_list A list of paths which are checked one by one.
6288 Each path is a list of keys where each key is a string,
6289 a tuple of strings or "...". When a tuple is given,
6290 all the keys given in the tuple are traversed, and
6291 "..." traverses all the keys in the object
6292 @param default Default value to return
6293 @param expected_type Only accept final value of this type (Can also be any callable)
6294 @param get_all Return all the values obtained from a path or only the first one
6295 @param casesense Whether to consider dictionary keys as case sensitive
6296 @param is_user_input Whether the keys are generated from user input. If True,
6297 strings are converted to int/slice if necessary
6298 @param traverse_string Whether to traverse inside strings. If True, any
6299 non-compatible object will also be converted into a string
6300 # TODO: Write tests
6301 '''
6302 if not casesense:
6303 _lower = lambda k: (k.lower() if isinstance(k, str) else k)
6304 path_list = (map(_lower, variadic(path)) for path in path_list)
6305
6306 def _traverse_obj(obj, path, _current_depth=0):
6307 nonlocal depth
6308 if obj is None:
6309 return None
6310 path = tuple(variadic(path))
6311 for i, key in enumerate(path):
6312 if isinstance(key, (list, tuple)):
6313 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
6314 key = ...
6315 if key is ...:
6316 obj = (obj.values() if isinstance(obj, dict)
6317 else obj if isinstance(obj, (list, tuple, LazyList))
6318 else str(obj) if traverse_string else [])
6319 _current_depth += 1
6320 depth = max(depth, _current_depth)
6321 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
6322 elif isinstance(obj, dict) and not (is_user_input and key == ':'):
6323 obj = (obj.get(key) if casesense or (key in obj)
6324 else next((v for k, v in obj.items() if _lower(k) == key), None))
6325 else:
6326 if is_user_input:
6327 key = (int_or_none(key) if ':' not in key
6328 else slice(*map(int_or_none, key.split(':'))))
6329 if key == slice(None):
6330 return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
6331 if not isinstance(key, (int, slice)):
6332 return None
6333 if not isinstance(obj, (list, tuple, LazyList)):
6334 if not traverse_string:
6335 return None
6336 obj = str(obj)
6337 try:
6338 obj = obj[key]
6339 except IndexError:
6340 return None
6341 return obj
6342
6343 if isinstance(expected_type, type):
6344 type_test = lambda val: val if isinstance(val, expected_type) else None
6345 elif expected_type is not None:
6346 type_test = expected_type
6347 else:
6348 type_test = lambda val: val
6349
6350 for path in path_list:
6351 depth = 0
6352 val = _traverse_obj(obj, path)
6353 if val is not None:
6354 if depth:
6355 for _ in range(depth - 1):
6356 val = itertools.chain.from_iterable(v for v in val if v is not None)
6357 val = [v for v in map(type_test, val) if v is not None]
6358 if val:
6359 return val if get_all else val[0]
6360 else:
6361 val = type_test(val)
6362 if val is not None:
6363 return val
6364 return default
6365
6366
6367 def traverse_dict(dictn, keys, casesense=True):
6368 ''' For backward compatibility. Do not use '''
6369 return traverse_obj(dictn, keys, casesense=casesense,
6370 is_user_input=True, traverse_string=True)
6371
6372
6373 def variadic(x, allowed_types=(str, bytes)):
6374 return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)