]> jfr.im git - yt-dlp.git/blob - yt_dlp/utils.py
Basic framework for simultaneous download of multiple formats (#1036)
[yt-dlp.git] / yt_dlp / utils.py
1 #!/usr/bin/env python3
2 # coding: utf-8
3
4 from __future__ import unicode_literals
5
6 import base64
7 import binascii
8 import calendar
9 import codecs
10 import collections
11 import contextlib
12 import ctypes
13 import datetime
14 import email.utils
15 import email.header
16 import errno
17 import functools
18 import gzip
19 import imp
20 import io
21 import itertools
22 import json
23 import locale
24 import math
25 import operator
26 import os
27 import platform
28 import random
29 import re
30 import socket
31 import ssl
32 import subprocess
33 import sys
34 import tempfile
35 import time
36 import traceback
37 import xml.etree.ElementTree
38 import zlib
39
40 from .compat import (
41 compat_HTMLParseError,
42 compat_HTMLParser,
43 compat_HTTPError,
44 compat_basestring,
45 compat_chr,
46 compat_cookiejar,
47 compat_ctypes_WINFUNCTYPE,
48 compat_etree_fromstring,
49 compat_expanduser,
50 compat_html_entities,
51 compat_html_entities_html5,
52 compat_http_client,
53 compat_integer_types,
54 compat_numeric_types,
55 compat_kwargs,
56 compat_os_name,
57 compat_parse_qs,
58 compat_shlex_quote,
59 compat_str,
60 compat_struct_pack,
61 compat_struct_unpack,
62 compat_urllib_error,
63 compat_urllib_parse,
64 compat_urllib_parse_urlencode,
65 compat_urllib_parse_urlparse,
66 compat_urllib_parse_urlunparse,
67 compat_urllib_parse_quote,
68 compat_urllib_parse_quote_plus,
69 compat_urllib_parse_unquote_plus,
70 compat_urllib_request,
71 compat_urlparse,
72 compat_xpath,
73 )
74
75 from .socks import (
76 ProxyType,
77 sockssocket,
78 )
79
80
81 def register_socks_protocols():
82 # "Register" SOCKS protocols
83 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
84 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
85 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
86 if scheme not in compat_urlparse.uses_netloc:
87 compat_urlparse.uses_netloc.append(scheme)
88
89
90 # This is not clearly defined otherwise
91 compiled_regex_type = type(re.compile(''))
92
93
94 def random_user_agent():
95 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
96 _CHROME_VERSIONS = (
97 '74.0.3729.129',
98 '76.0.3780.3',
99 '76.0.3780.2',
100 '74.0.3729.128',
101 '76.0.3780.1',
102 '76.0.3780.0',
103 '75.0.3770.15',
104 '74.0.3729.127',
105 '74.0.3729.126',
106 '76.0.3779.1',
107 '76.0.3779.0',
108 '75.0.3770.14',
109 '74.0.3729.125',
110 '76.0.3778.1',
111 '76.0.3778.0',
112 '75.0.3770.13',
113 '74.0.3729.124',
114 '74.0.3729.123',
115 '73.0.3683.121',
116 '76.0.3777.1',
117 '76.0.3777.0',
118 '75.0.3770.12',
119 '74.0.3729.122',
120 '76.0.3776.4',
121 '75.0.3770.11',
122 '74.0.3729.121',
123 '76.0.3776.3',
124 '76.0.3776.2',
125 '73.0.3683.120',
126 '74.0.3729.120',
127 '74.0.3729.119',
128 '74.0.3729.118',
129 '76.0.3776.1',
130 '76.0.3776.0',
131 '76.0.3775.5',
132 '75.0.3770.10',
133 '74.0.3729.117',
134 '76.0.3775.4',
135 '76.0.3775.3',
136 '74.0.3729.116',
137 '75.0.3770.9',
138 '76.0.3775.2',
139 '76.0.3775.1',
140 '76.0.3775.0',
141 '75.0.3770.8',
142 '74.0.3729.115',
143 '74.0.3729.114',
144 '76.0.3774.1',
145 '76.0.3774.0',
146 '75.0.3770.7',
147 '74.0.3729.113',
148 '74.0.3729.112',
149 '74.0.3729.111',
150 '76.0.3773.1',
151 '76.0.3773.0',
152 '75.0.3770.6',
153 '74.0.3729.110',
154 '74.0.3729.109',
155 '76.0.3772.1',
156 '76.0.3772.0',
157 '75.0.3770.5',
158 '74.0.3729.108',
159 '74.0.3729.107',
160 '76.0.3771.1',
161 '76.0.3771.0',
162 '75.0.3770.4',
163 '74.0.3729.106',
164 '74.0.3729.105',
165 '75.0.3770.3',
166 '74.0.3729.104',
167 '74.0.3729.103',
168 '74.0.3729.102',
169 '75.0.3770.2',
170 '74.0.3729.101',
171 '75.0.3770.1',
172 '75.0.3770.0',
173 '74.0.3729.100',
174 '75.0.3769.5',
175 '75.0.3769.4',
176 '74.0.3729.99',
177 '75.0.3769.3',
178 '75.0.3769.2',
179 '75.0.3768.6',
180 '74.0.3729.98',
181 '75.0.3769.1',
182 '75.0.3769.0',
183 '74.0.3729.97',
184 '73.0.3683.119',
185 '73.0.3683.118',
186 '74.0.3729.96',
187 '75.0.3768.5',
188 '75.0.3768.4',
189 '75.0.3768.3',
190 '75.0.3768.2',
191 '74.0.3729.95',
192 '74.0.3729.94',
193 '75.0.3768.1',
194 '75.0.3768.0',
195 '74.0.3729.93',
196 '74.0.3729.92',
197 '73.0.3683.117',
198 '74.0.3729.91',
199 '75.0.3766.3',
200 '74.0.3729.90',
201 '75.0.3767.2',
202 '75.0.3767.1',
203 '75.0.3767.0',
204 '74.0.3729.89',
205 '73.0.3683.116',
206 '75.0.3766.2',
207 '74.0.3729.88',
208 '75.0.3766.1',
209 '75.0.3766.0',
210 '74.0.3729.87',
211 '73.0.3683.115',
212 '74.0.3729.86',
213 '75.0.3765.1',
214 '75.0.3765.0',
215 '74.0.3729.85',
216 '73.0.3683.114',
217 '74.0.3729.84',
218 '75.0.3764.1',
219 '75.0.3764.0',
220 '74.0.3729.83',
221 '73.0.3683.113',
222 '75.0.3763.2',
223 '75.0.3761.4',
224 '74.0.3729.82',
225 '75.0.3763.1',
226 '75.0.3763.0',
227 '74.0.3729.81',
228 '73.0.3683.112',
229 '75.0.3762.1',
230 '75.0.3762.0',
231 '74.0.3729.80',
232 '75.0.3761.3',
233 '74.0.3729.79',
234 '73.0.3683.111',
235 '75.0.3761.2',
236 '74.0.3729.78',
237 '74.0.3729.77',
238 '75.0.3761.1',
239 '75.0.3761.0',
240 '73.0.3683.110',
241 '74.0.3729.76',
242 '74.0.3729.75',
243 '75.0.3760.0',
244 '74.0.3729.74',
245 '75.0.3759.8',
246 '75.0.3759.7',
247 '75.0.3759.6',
248 '74.0.3729.73',
249 '75.0.3759.5',
250 '74.0.3729.72',
251 '73.0.3683.109',
252 '75.0.3759.4',
253 '75.0.3759.3',
254 '74.0.3729.71',
255 '75.0.3759.2',
256 '74.0.3729.70',
257 '73.0.3683.108',
258 '74.0.3729.69',
259 '75.0.3759.1',
260 '75.0.3759.0',
261 '74.0.3729.68',
262 '73.0.3683.107',
263 '74.0.3729.67',
264 '75.0.3758.1',
265 '75.0.3758.0',
266 '74.0.3729.66',
267 '73.0.3683.106',
268 '74.0.3729.65',
269 '75.0.3757.1',
270 '75.0.3757.0',
271 '74.0.3729.64',
272 '73.0.3683.105',
273 '74.0.3729.63',
274 '75.0.3756.1',
275 '75.0.3756.0',
276 '74.0.3729.62',
277 '73.0.3683.104',
278 '75.0.3755.3',
279 '75.0.3755.2',
280 '73.0.3683.103',
281 '75.0.3755.1',
282 '75.0.3755.0',
283 '74.0.3729.61',
284 '73.0.3683.102',
285 '74.0.3729.60',
286 '75.0.3754.2',
287 '74.0.3729.59',
288 '75.0.3753.4',
289 '74.0.3729.58',
290 '75.0.3754.1',
291 '75.0.3754.0',
292 '74.0.3729.57',
293 '73.0.3683.101',
294 '75.0.3753.3',
295 '75.0.3752.2',
296 '75.0.3753.2',
297 '74.0.3729.56',
298 '75.0.3753.1',
299 '75.0.3753.0',
300 '74.0.3729.55',
301 '73.0.3683.100',
302 '74.0.3729.54',
303 '75.0.3752.1',
304 '75.0.3752.0',
305 '74.0.3729.53',
306 '73.0.3683.99',
307 '74.0.3729.52',
308 '75.0.3751.1',
309 '75.0.3751.0',
310 '74.0.3729.51',
311 '73.0.3683.98',
312 '74.0.3729.50',
313 '75.0.3750.0',
314 '74.0.3729.49',
315 '74.0.3729.48',
316 '74.0.3729.47',
317 '75.0.3749.3',
318 '74.0.3729.46',
319 '73.0.3683.97',
320 '75.0.3749.2',
321 '74.0.3729.45',
322 '75.0.3749.1',
323 '75.0.3749.0',
324 '74.0.3729.44',
325 '73.0.3683.96',
326 '74.0.3729.43',
327 '74.0.3729.42',
328 '75.0.3748.1',
329 '75.0.3748.0',
330 '74.0.3729.41',
331 '75.0.3747.1',
332 '73.0.3683.95',
333 '75.0.3746.4',
334 '74.0.3729.40',
335 '74.0.3729.39',
336 '75.0.3747.0',
337 '75.0.3746.3',
338 '75.0.3746.2',
339 '74.0.3729.38',
340 '75.0.3746.1',
341 '75.0.3746.0',
342 '74.0.3729.37',
343 '73.0.3683.94',
344 '75.0.3745.5',
345 '75.0.3745.4',
346 '75.0.3745.3',
347 '75.0.3745.2',
348 '74.0.3729.36',
349 '75.0.3745.1',
350 '75.0.3745.0',
351 '75.0.3744.2',
352 '74.0.3729.35',
353 '73.0.3683.93',
354 '74.0.3729.34',
355 '75.0.3744.1',
356 '75.0.3744.0',
357 '74.0.3729.33',
358 '73.0.3683.92',
359 '74.0.3729.32',
360 '74.0.3729.31',
361 '73.0.3683.91',
362 '75.0.3741.2',
363 '75.0.3740.5',
364 '74.0.3729.30',
365 '75.0.3741.1',
366 '75.0.3741.0',
367 '74.0.3729.29',
368 '75.0.3740.4',
369 '73.0.3683.90',
370 '74.0.3729.28',
371 '75.0.3740.3',
372 '73.0.3683.89',
373 '75.0.3740.2',
374 '74.0.3729.27',
375 '75.0.3740.1',
376 '75.0.3740.0',
377 '74.0.3729.26',
378 '73.0.3683.88',
379 '73.0.3683.87',
380 '74.0.3729.25',
381 '75.0.3739.1',
382 '75.0.3739.0',
383 '73.0.3683.86',
384 '74.0.3729.24',
385 '73.0.3683.85',
386 '75.0.3738.4',
387 '75.0.3738.3',
388 '75.0.3738.2',
389 '75.0.3738.1',
390 '75.0.3738.0',
391 '74.0.3729.23',
392 '73.0.3683.84',
393 '74.0.3729.22',
394 '74.0.3729.21',
395 '75.0.3737.1',
396 '75.0.3737.0',
397 '74.0.3729.20',
398 '73.0.3683.83',
399 '74.0.3729.19',
400 '75.0.3736.1',
401 '75.0.3736.0',
402 '74.0.3729.18',
403 '73.0.3683.82',
404 '74.0.3729.17',
405 '75.0.3735.1',
406 '75.0.3735.0',
407 '74.0.3729.16',
408 '73.0.3683.81',
409 '75.0.3734.1',
410 '75.0.3734.0',
411 '74.0.3729.15',
412 '73.0.3683.80',
413 '74.0.3729.14',
414 '75.0.3733.1',
415 '75.0.3733.0',
416 '75.0.3732.1',
417 '74.0.3729.13',
418 '74.0.3729.12',
419 '73.0.3683.79',
420 '74.0.3729.11',
421 '75.0.3732.0',
422 '74.0.3729.10',
423 '73.0.3683.78',
424 '74.0.3729.9',
425 '74.0.3729.8',
426 '74.0.3729.7',
427 '75.0.3731.3',
428 '75.0.3731.2',
429 '75.0.3731.0',
430 '74.0.3729.6',
431 '73.0.3683.77',
432 '73.0.3683.76',
433 '75.0.3730.5',
434 '75.0.3730.4',
435 '73.0.3683.75',
436 '74.0.3729.5',
437 '73.0.3683.74',
438 '75.0.3730.3',
439 '75.0.3730.2',
440 '74.0.3729.4',
441 '73.0.3683.73',
442 '73.0.3683.72',
443 '75.0.3730.1',
444 '75.0.3730.0',
445 '74.0.3729.3',
446 '73.0.3683.71',
447 '74.0.3729.2',
448 '73.0.3683.70',
449 '74.0.3729.1',
450 '74.0.3729.0',
451 '74.0.3726.4',
452 '73.0.3683.69',
453 '74.0.3726.3',
454 '74.0.3728.0',
455 '74.0.3726.2',
456 '73.0.3683.68',
457 '74.0.3726.1',
458 '74.0.3726.0',
459 '74.0.3725.4',
460 '73.0.3683.67',
461 '73.0.3683.66',
462 '74.0.3725.3',
463 '74.0.3725.2',
464 '74.0.3725.1',
465 '74.0.3724.8',
466 '74.0.3725.0',
467 '73.0.3683.65',
468 '74.0.3724.7',
469 '74.0.3724.6',
470 '74.0.3724.5',
471 '74.0.3724.4',
472 '74.0.3724.3',
473 '74.0.3724.2',
474 '74.0.3724.1',
475 '74.0.3724.0',
476 '73.0.3683.64',
477 '74.0.3723.1',
478 '74.0.3723.0',
479 '73.0.3683.63',
480 '74.0.3722.1',
481 '74.0.3722.0',
482 '73.0.3683.62',
483 '74.0.3718.9',
484 '74.0.3702.3',
485 '74.0.3721.3',
486 '74.0.3721.2',
487 '74.0.3721.1',
488 '74.0.3721.0',
489 '74.0.3720.6',
490 '73.0.3683.61',
491 '72.0.3626.122',
492 '73.0.3683.60',
493 '74.0.3720.5',
494 '72.0.3626.121',
495 '74.0.3718.8',
496 '74.0.3720.4',
497 '74.0.3720.3',
498 '74.0.3718.7',
499 '74.0.3720.2',
500 '74.0.3720.1',
501 '74.0.3720.0',
502 '74.0.3718.6',
503 '74.0.3719.5',
504 '73.0.3683.59',
505 '74.0.3718.5',
506 '74.0.3718.4',
507 '74.0.3719.4',
508 '74.0.3719.3',
509 '74.0.3719.2',
510 '74.0.3719.1',
511 '73.0.3683.58',
512 '74.0.3719.0',
513 '73.0.3683.57',
514 '73.0.3683.56',
515 '74.0.3718.3',
516 '73.0.3683.55',
517 '74.0.3718.2',
518 '74.0.3718.1',
519 '74.0.3718.0',
520 '73.0.3683.54',
521 '74.0.3717.2',
522 '73.0.3683.53',
523 '74.0.3717.1',
524 '74.0.3717.0',
525 '73.0.3683.52',
526 '74.0.3716.1',
527 '74.0.3716.0',
528 '73.0.3683.51',
529 '74.0.3715.1',
530 '74.0.3715.0',
531 '73.0.3683.50',
532 '74.0.3711.2',
533 '74.0.3714.2',
534 '74.0.3713.3',
535 '74.0.3714.1',
536 '74.0.3714.0',
537 '73.0.3683.49',
538 '74.0.3713.1',
539 '74.0.3713.0',
540 '72.0.3626.120',
541 '73.0.3683.48',
542 '74.0.3712.2',
543 '74.0.3712.1',
544 '74.0.3712.0',
545 '73.0.3683.47',
546 '72.0.3626.119',
547 '73.0.3683.46',
548 '74.0.3710.2',
549 '72.0.3626.118',
550 '74.0.3711.1',
551 '74.0.3711.0',
552 '73.0.3683.45',
553 '72.0.3626.117',
554 '74.0.3710.1',
555 '74.0.3710.0',
556 '73.0.3683.44',
557 '72.0.3626.116',
558 '74.0.3709.1',
559 '74.0.3709.0',
560 '74.0.3704.9',
561 '73.0.3683.43',
562 '72.0.3626.115',
563 '74.0.3704.8',
564 '74.0.3704.7',
565 '74.0.3708.0',
566 '74.0.3706.7',
567 '74.0.3704.6',
568 '73.0.3683.42',
569 '72.0.3626.114',
570 '74.0.3706.6',
571 '72.0.3626.113',
572 '74.0.3704.5',
573 '74.0.3706.5',
574 '74.0.3706.4',
575 '74.0.3706.3',
576 '74.0.3706.2',
577 '74.0.3706.1',
578 '74.0.3706.0',
579 '73.0.3683.41',
580 '72.0.3626.112',
581 '74.0.3705.1',
582 '74.0.3705.0',
583 '73.0.3683.40',
584 '72.0.3626.111',
585 '73.0.3683.39',
586 '74.0.3704.4',
587 '73.0.3683.38',
588 '74.0.3704.3',
589 '74.0.3704.2',
590 '74.0.3704.1',
591 '74.0.3704.0',
592 '73.0.3683.37',
593 '72.0.3626.110',
594 '72.0.3626.109',
595 '74.0.3703.3',
596 '74.0.3703.2',
597 '73.0.3683.36',
598 '74.0.3703.1',
599 '74.0.3703.0',
600 '73.0.3683.35',
601 '72.0.3626.108',
602 '74.0.3702.2',
603 '74.0.3699.3',
604 '74.0.3702.1',
605 '74.0.3702.0',
606 '73.0.3683.34',
607 '72.0.3626.107',
608 '73.0.3683.33',
609 '74.0.3701.1',
610 '74.0.3701.0',
611 '73.0.3683.32',
612 '73.0.3683.31',
613 '72.0.3626.105',
614 '74.0.3700.1',
615 '74.0.3700.0',
616 '73.0.3683.29',
617 '72.0.3626.103',
618 '74.0.3699.2',
619 '74.0.3699.1',
620 '74.0.3699.0',
621 '73.0.3683.28',
622 '72.0.3626.102',
623 '73.0.3683.27',
624 '73.0.3683.26',
625 '74.0.3698.0',
626 '74.0.3696.2',
627 '72.0.3626.101',
628 '73.0.3683.25',
629 '74.0.3696.1',
630 '74.0.3696.0',
631 '74.0.3694.8',
632 '72.0.3626.100',
633 '74.0.3694.7',
634 '74.0.3694.6',
635 '74.0.3694.5',
636 '74.0.3694.4',
637 '72.0.3626.99',
638 '72.0.3626.98',
639 '74.0.3694.3',
640 '73.0.3683.24',
641 '72.0.3626.97',
642 '72.0.3626.96',
643 '72.0.3626.95',
644 '73.0.3683.23',
645 '72.0.3626.94',
646 '73.0.3683.22',
647 '73.0.3683.21',
648 '72.0.3626.93',
649 '74.0.3694.2',
650 '72.0.3626.92',
651 '74.0.3694.1',
652 '74.0.3694.0',
653 '74.0.3693.6',
654 '73.0.3683.20',
655 '72.0.3626.91',
656 '74.0.3693.5',
657 '74.0.3693.4',
658 '74.0.3693.3',
659 '74.0.3693.2',
660 '73.0.3683.19',
661 '74.0.3693.1',
662 '74.0.3693.0',
663 '73.0.3683.18',
664 '72.0.3626.90',
665 '74.0.3692.1',
666 '74.0.3692.0',
667 '73.0.3683.17',
668 '72.0.3626.89',
669 '74.0.3687.3',
670 '74.0.3691.1',
671 '74.0.3691.0',
672 '73.0.3683.16',
673 '72.0.3626.88',
674 '72.0.3626.87',
675 '73.0.3683.15',
676 '74.0.3690.1',
677 '74.0.3690.0',
678 '73.0.3683.14',
679 '72.0.3626.86',
680 '73.0.3683.13',
681 '73.0.3683.12',
682 '74.0.3689.1',
683 '74.0.3689.0',
684 '73.0.3683.11',
685 '72.0.3626.85',
686 '73.0.3683.10',
687 '72.0.3626.84',
688 '73.0.3683.9',
689 '74.0.3688.1',
690 '74.0.3688.0',
691 '73.0.3683.8',
692 '72.0.3626.83',
693 '74.0.3687.2',
694 '74.0.3687.1',
695 '74.0.3687.0',
696 '73.0.3683.7',
697 '72.0.3626.82',
698 '74.0.3686.4',
699 '72.0.3626.81',
700 '74.0.3686.3',
701 '74.0.3686.2',
702 '74.0.3686.1',
703 '74.0.3686.0',
704 '73.0.3683.6',
705 '72.0.3626.80',
706 '74.0.3685.1',
707 '74.0.3685.0',
708 '73.0.3683.5',
709 '72.0.3626.79',
710 '74.0.3684.1',
711 '74.0.3684.0',
712 '73.0.3683.4',
713 '72.0.3626.78',
714 '72.0.3626.77',
715 '73.0.3683.3',
716 '73.0.3683.2',
717 '72.0.3626.76',
718 '73.0.3683.1',
719 '73.0.3683.0',
720 '72.0.3626.75',
721 '71.0.3578.141',
722 '73.0.3682.1',
723 '73.0.3682.0',
724 '72.0.3626.74',
725 '71.0.3578.140',
726 '73.0.3681.4',
727 '73.0.3681.3',
728 '73.0.3681.2',
729 '73.0.3681.1',
730 '73.0.3681.0',
731 '72.0.3626.73',
732 '71.0.3578.139',
733 '72.0.3626.72',
734 '72.0.3626.71',
735 '73.0.3680.1',
736 '73.0.3680.0',
737 '72.0.3626.70',
738 '71.0.3578.138',
739 '73.0.3678.2',
740 '73.0.3679.1',
741 '73.0.3679.0',
742 '72.0.3626.69',
743 '71.0.3578.137',
744 '73.0.3678.1',
745 '73.0.3678.0',
746 '71.0.3578.136',
747 '73.0.3677.1',
748 '73.0.3677.0',
749 '72.0.3626.68',
750 '72.0.3626.67',
751 '71.0.3578.135',
752 '73.0.3676.1',
753 '73.0.3676.0',
754 '73.0.3674.2',
755 '72.0.3626.66',
756 '71.0.3578.134',
757 '73.0.3674.1',
758 '73.0.3674.0',
759 '72.0.3626.65',
760 '71.0.3578.133',
761 '73.0.3673.2',
762 '73.0.3673.1',
763 '73.0.3673.0',
764 '72.0.3626.64',
765 '71.0.3578.132',
766 '72.0.3626.63',
767 '72.0.3626.62',
768 '72.0.3626.61',
769 '72.0.3626.60',
770 '73.0.3672.1',
771 '73.0.3672.0',
772 '72.0.3626.59',
773 '71.0.3578.131',
774 '73.0.3671.3',
775 '73.0.3671.2',
776 '73.0.3671.1',
777 '73.0.3671.0',
778 '72.0.3626.58',
779 '71.0.3578.130',
780 '73.0.3670.1',
781 '73.0.3670.0',
782 '72.0.3626.57',
783 '71.0.3578.129',
784 '73.0.3669.1',
785 '73.0.3669.0',
786 '72.0.3626.56',
787 '71.0.3578.128',
788 '73.0.3668.2',
789 '73.0.3668.1',
790 '73.0.3668.0',
791 '72.0.3626.55',
792 '71.0.3578.127',
793 '73.0.3667.2',
794 '73.0.3667.1',
795 '73.0.3667.0',
796 '72.0.3626.54',
797 '71.0.3578.126',
798 '73.0.3666.1',
799 '73.0.3666.0',
800 '72.0.3626.53',
801 '71.0.3578.125',
802 '73.0.3665.4',
803 '73.0.3665.3',
804 '72.0.3626.52',
805 '73.0.3665.2',
806 '73.0.3664.4',
807 '73.0.3665.1',
808 '73.0.3665.0',
809 '72.0.3626.51',
810 '71.0.3578.124',
811 '72.0.3626.50',
812 '73.0.3664.3',
813 '73.0.3664.2',
814 '73.0.3664.1',
815 '73.0.3664.0',
816 '73.0.3663.2',
817 '72.0.3626.49',
818 '71.0.3578.123',
819 '73.0.3663.1',
820 '73.0.3663.0',
821 '72.0.3626.48',
822 '71.0.3578.122',
823 '73.0.3662.1',
824 '73.0.3662.0',
825 '72.0.3626.47',
826 '71.0.3578.121',
827 '73.0.3661.1',
828 '72.0.3626.46',
829 '73.0.3661.0',
830 '72.0.3626.45',
831 '71.0.3578.120',
832 '73.0.3660.2',
833 '73.0.3660.1',
834 '73.0.3660.0',
835 '72.0.3626.44',
836 '71.0.3578.119',
837 '73.0.3659.1',
838 '73.0.3659.0',
839 '72.0.3626.43',
840 '71.0.3578.118',
841 '73.0.3658.1',
842 '73.0.3658.0',
843 '72.0.3626.42',
844 '71.0.3578.117',
845 '73.0.3657.1',
846 '73.0.3657.0',
847 '72.0.3626.41',
848 '71.0.3578.116',
849 '73.0.3656.1',
850 '73.0.3656.0',
851 '72.0.3626.40',
852 '71.0.3578.115',
853 '73.0.3655.1',
854 '73.0.3655.0',
855 '72.0.3626.39',
856 '71.0.3578.114',
857 '73.0.3654.1',
858 '73.0.3654.0',
859 '72.0.3626.38',
860 '71.0.3578.113',
861 '73.0.3653.1',
862 '73.0.3653.0',
863 '72.0.3626.37',
864 '71.0.3578.112',
865 '73.0.3652.1',
866 '73.0.3652.0',
867 '72.0.3626.36',
868 '71.0.3578.111',
869 '73.0.3651.1',
870 '73.0.3651.0',
871 '72.0.3626.35',
872 '71.0.3578.110',
873 '73.0.3650.1',
874 '73.0.3650.0',
875 '72.0.3626.34',
876 '71.0.3578.109',
877 '73.0.3649.1',
878 '73.0.3649.0',
879 '72.0.3626.33',
880 '71.0.3578.108',
881 '73.0.3648.2',
882 '73.0.3648.1',
883 '73.0.3648.0',
884 '72.0.3626.32',
885 '71.0.3578.107',
886 '73.0.3647.2',
887 '73.0.3647.1',
888 '73.0.3647.0',
889 '72.0.3626.31',
890 '71.0.3578.106',
891 '73.0.3635.3',
892 '73.0.3646.2',
893 '73.0.3646.1',
894 '73.0.3646.0',
895 '72.0.3626.30',
896 '71.0.3578.105',
897 '72.0.3626.29',
898 '73.0.3645.2',
899 '73.0.3645.1',
900 '73.0.3645.0',
901 '72.0.3626.28',
902 '71.0.3578.104',
903 '72.0.3626.27',
904 '72.0.3626.26',
905 '72.0.3626.25',
906 '72.0.3626.24',
907 '73.0.3644.0',
908 '73.0.3643.2',
909 '72.0.3626.23',
910 '71.0.3578.103',
911 '73.0.3643.1',
912 '73.0.3643.0',
913 '72.0.3626.22',
914 '71.0.3578.102',
915 '73.0.3642.1',
916 '73.0.3642.0',
917 '72.0.3626.21',
918 '71.0.3578.101',
919 '73.0.3641.1',
920 '73.0.3641.0',
921 '72.0.3626.20',
922 '71.0.3578.100',
923 '72.0.3626.19',
924 '73.0.3640.1',
925 '73.0.3640.0',
926 '72.0.3626.18',
927 '73.0.3639.1',
928 '71.0.3578.99',
929 '73.0.3639.0',
930 '72.0.3626.17',
931 '73.0.3638.2',
932 '72.0.3626.16',
933 '73.0.3638.1',
934 '73.0.3638.0',
935 '72.0.3626.15',
936 '71.0.3578.98',
937 '73.0.3635.2',
938 '71.0.3578.97',
939 '73.0.3637.1',
940 '73.0.3637.0',
941 '72.0.3626.14',
942 '71.0.3578.96',
943 '71.0.3578.95',
944 '72.0.3626.13',
945 '71.0.3578.94',
946 '73.0.3636.2',
947 '71.0.3578.93',
948 '73.0.3636.1',
949 '73.0.3636.0',
950 '72.0.3626.12',
951 '71.0.3578.92',
952 '73.0.3635.1',
953 '73.0.3635.0',
954 '72.0.3626.11',
955 '71.0.3578.91',
956 '73.0.3634.2',
957 '73.0.3634.1',
958 '73.0.3634.0',
959 '72.0.3626.10',
960 '71.0.3578.90',
961 '71.0.3578.89',
962 '73.0.3633.2',
963 '73.0.3633.1',
964 '73.0.3633.0',
965 '72.0.3610.4',
966 '72.0.3626.9',
967 '71.0.3578.88',
968 '73.0.3632.5',
969 '73.0.3632.4',
970 '73.0.3632.3',
971 '73.0.3632.2',
972 '73.0.3632.1',
973 '73.0.3632.0',
974 '72.0.3626.8',
975 '71.0.3578.87',
976 '73.0.3631.2',
977 '73.0.3631.1',
978 '73.0.3631.0',
979 '72.0.3626.7',
980 '71.0.3578.86',
981 '72.0.3626.6',
982 '73.0.3630.1',
983 '73.0.3630.0',
984 '72.0.3626.5',
985 '71.0.3578.85',
986 '72.0.3626.4',
987 '73.0.3628.3',
988 '73.0.3628.2',
989 '73.0.3629.1',
990 '73.0.3629.0',
991 '72.0.3626.3',
992 '71.0.3578.84',
993 '73.0.3628.1',
994 '73.0.3628.0',
995 '71.0.3578.83',
996 '73.0.3627.1',
997 '73.0.3627.0',
998 '72.0.3626.2',
999 '71.0.3578.82',
1000 '71.0.3578.81',
1001 '71.0.3578.80',
1002 '72.0.3626.1',
1003 '72.0.3626.0',
1004 '71.0.3578.79',
1005 '70.0.3538.124',
1006 '71.0.3578.78',
1007 '72.0.3623.4',
1008 '72.0.3625.2',
1009 '72.0.3625.1',
1010 '72.0.3625.0',
1011 '71.0.3578.77',
1012 '70.0.3538.123',
1013 '72.0.3624.4',
1014 '72.0.3624.3',
1015 '72.0.3624.2',
1016 '71.0.3578.76',
1017 '72.0.3624.1',
1018 '72.0.3624.0',
1019 '72.0.3623.3',
1020 '71.0.3578.75',
1021 '70.0.3538.122',
1022 '71.0.3578.74',
1023 '72.0.3623.2',
1024 '72.0.3610.3',
1025 '72.0.3623.1',
1026 '72.0.3623.0',
1027 '72.0.3622.3',
1028 '72.0.3622.2',
1029 '71.0.3578.73',
1030 '70.0.3538.121',
1031 '72.0.3622.1',
1032 '72.0.3622.0',
1033 '71.0.3578.72',
1034 '70.0.3538.120',
1035 '72.0.3621.1',
1036 '72.0.3621.0',
1037 '71.0.3578.71',
1038 '70.0.3538.119',
1039 '72.0.3620.1',
1040 '72.0.3620.0',
1041 '71.0.3578.70',
1042 '70.0.3538.118',
1043 '71.0.3578.69',
1044 '72.0.3619.1',
1045 '72.0.3619.0',
1046 '71.0.3578.68',
1047 '70.0.3538.117',
1048 '71.0.3578.67',
1049 '72.0.3618.1',
1050 '72.0.3618.0',
1051 '71.0.3578.66',
1052 '70.0.3538.116',
1053 '72.0.3617.1',
1054 '72.0.3617.0',
1055 '71.0.3578.65',
1056 '70.0.3538.115',
1057 '72.0.3602.3',
1058 '71.0.3578.64',
1059 '72.0.3616.1',
1060 '72.0.3616.0',
1061 '71.0.3578.63',
1062 '70.0.3538.114',
1063 '71.0.3578.62',
1064 '72.0.3615.1',
1065 '72.0.3615.0',
1066 '71.0.3578.61',
1067 '70.0.3538.113',
1068 '72.0.3614.1',
1069 '72.0.3614.0',
1070 '71.0.3578.60',
1071 '70.0.3538.112',
1072 '72.0.3613.1',
1073 '72.0.3613.0',
1074 '71.0.3578.59',
1075 '70.0.3538.111',
1076 '72.0.3612.2',
1077 '72.0.3612.1',
1078 '72.0.3612.0',
1079 '70.0.3538.110',
1080 '71.0.3578.58',
1081 '70.0.3538.109',
1082 '72.0.3611.2',
1083 '72.0.3611.1',
1084 '72.0.3611.0',
1085 '71.0.3578.57',
1086 '70.0.3538.108',
1087 '72.0.3610.2',
1088 '71.0.3578.56',
1089 '71.0.3578.55',
1090 '72.0.3610.1',
1091 '72.0.3610.0',
1092 '71.0.3578.54',
1093 '70.0.3538.107',
1094 '71.0.3578.53',
1095 '72.0.3609.3',
1096 '71.0.3578.52',
1097 '72.0.3609.2',
1098 '71.0.3578.51',
1099 '72.0.3608.5',
1100 '72.0.3609.1',
1101 '72.0.3609.0',
1102 '71.0.3578.50',
1103 '70.0.3538.106',
1104 '72.0.3608.4',
1105 '72.0.3608.3',
1106 '72.0.3608.2',
1107 '71.0.3578.49',
1108 '72.0.3608.1',
1109 '72.0.3608.0',
1110 '70.0.3538.105',
1111 '71.0.3578.48',
1112 '72.0.3607.1',
1113 '72.0.3607.0',
1114 '71.0.3578.47',
1115 '70.0.3538.104',
1116 '72.0.3606.2',
1117 '72.0.3606.1',
1118 '72.0.3606.0',
1119 '71.0.3578.46',
1120 '70.0.3538.103',
1121 '70.0.3538.102',
1122 '72.0.3605.3',
1123 '72.0.3605.2',
1124 '72.0.3605.1',
1125 '72.0.3605.0',
1126 '71.0.3578.45',
1127 '70.0.3538.101',
1128 '71.0.3578.44',
1129 '71.0.3578.43',
1130 '70.0.3538.100',
1131 '70.0.3538.99',
1132 '71.0.3578.42',
1133 '72.0.3604.1',
1134 '72.0.3604.0',
1135 '71.0.3578.41',
1136 '70.0.3538.98',
1137 '71.0.3578.40',
1138 '72.0.3603.2',
1139 '72.0.3603.1',
1140 '72.0.3603.0',
1141 '71.0.3578.39',
1142 '70.0.3538.97',
1143 '72.0.3602.2',
1144 '71.0.3578.38',
1145 '71.0.3578.37',
1146 '72.0.3602.1',
1147 '72.0.3602.0',
1148 '71.0.3578.36',
1149 '70.0.3538.96',
1150 '72.0.3601.1',
1151 '72.0.3601.0',
1152 '71.0.3578.35',
1153 '70.0.3538.95',
1154 '72.0.3600.1',
1155 '72.0.3600.0',
1156 '71.0.3578.34',
1157 '70.0.3538.94',
1158 '72.0.3599.3',
1159 '72.0.3599.2',
1160 '72.0.3599.1',
1161 '72.0.3599.0',
1162 '71.0.3578.33',
1163 '70.0.3538.93',
1164 '72.0.3598.1',
1165 '72.0.3598.0',
1166 '71.0.3578.32',
1167 '70.0.3538.87',
1168 '72.0.3597.1',
1169 '72.0.3597.0',
1170 '72.0.3596.2',
1171 '71.0.3578.31',
1172 '70.0.3538.86',
1173 '71.0.3578.30',
1174 '71.0.3578.29',
1175 '72.0.3596.1',
1176 '72.0.3596.0',
1177 '71.0.3578.28',
1178 '70.0.3538.85',
1179 '72.0.3595.2',
1180 '72.0.3591.3',
1181 '72.0.3595.1',
1182 '72.0.3595.0',
1183 '71.0.3578.27',
1184 '70.0.3538.84',
1185 '72.0.3594.1',
1186 '72.0.3594.0',
1187 '71.0.3578.26',
1188 '70.0.3538.83',
1189 '72.0.3593.2',
1190 '72.0.3593.1',
1191 '72.0.3593.0',
1192 '71.0.3578.25',
1193 '70.0.3538.82',
1194 '72.0.3589.3',
1195 '72.0.3592.2',
1196 '72.0.3592.1',
1197 '72.0.3592.0',
1198 '71.0.3578.24',
1199 '72.0.3589.2',
1200 '70.0.3538.81',
1201 '70.0.3538.80',
1202 '72.0.3591.2',
1203 '72.0.3591.1',
1204 '72.0.3591.0',
1205 '71.0.3578.23',
1206 '70.0.3538.79',
1207 '71.0.3578.22',
1208 '72.0.3590.1',
1209 '72.0.3590.0',
1210 '71.0.3578.21',
1211 '70.0.3538.78',
1212 '70.0.3538.77',
1213 '72.0.3589.1',
1214 '72.0.3589.0',
1215 '71.0.3578.20',
1216 '70.0.3538.76',
1217 '71.0.3578.19',
1218 '70.0.3538.75',
1219 '72.0.3588.1',
1220 '72.0.3588.0',
1221 '71.0.3578.18',
1222 '70.0.3538.74',
1223 '72.0.3586.2',
1224 '72.0.3587.0',
1225 '71.0.3578.17',
1226 '70.0.3538.73',
1227 '72.0.3586.1',
1228 '72.0.3586.0',
1229 '71.0.3578.16',
1230 '70.0.3538.72',
1231 '72.0.3585.1',
1232 '72.0.3585.0',
1233 '71.0.3578.15',
1234 '70.0.3538.71',
1235 '71.0.3578.14',
1236 '72.0.3584.1',
1237 '72.0.3584.0',
1238 '71.0.3578.13',
1239 '70.0.3538.70',
1240 '72.0.3583.2',
1241 '71.0.3578.12',
1242 '72.0.3583.1',
1243 '72.0.3583.0',
1244 '71.0.3578.11',
1245 '70.0.3538.69',
1246 '71.0.3578.10',
1247 '72.0.3582.0',
1248 '72.0.3581.4',
1249 '71.0.3578.9',
1250 '70.0.3538.67',
1251 '72.0.3581.3',
1252 '72.0.3581.2',
1253 '72.0.3581.1',
1254 '72.0.3581.0',
1255 '71.0.3578.8',
1256 '70.0.3538.66',
1257 '72.0.3580.1',
1258 '72.0.3580.0',
1259 '71.0.3578.7',
1260 '70.0.3538.65',
1261 '71.0.3578.6',
1262 '72.0.3579.1',
1263 '72.0.3579.0',
1264 '71.0.3578.5',
1265 '70.0.3538.64',
1266 '71.0.3578.4',
1267 '71.0.3578.3',
1268 '71.0.3578.2',
1269 '71.0.3578.1',
1270 '71.0.3578.0',
1271 '70.0.3538.63',
1272 '69.0.3497.128',
1273 '70.0.3538.62',
1274 '70.0.3538.61',
1275 '70.0.3538.60',
1276 '70.0.3538.59',
1277 '71.0.3577.1',
1278 '71.0.3577.0',
1279 '70.0.3538.58',
1280 '69.0.3497.127',
1281 '71.0.3576.2',
1282 '71.0.3576.1',
1283 '71.0.3576.0',
1284 '70.0.3538.57',
1285 '70.0.3538.56',
1286 '71.0.3575.2',
1287 '70.0.3538.55',
1288 '69.0.3497.126',
1289 '70.0.3538.54',
1290 '71.0.3575.1',
1291 '71.0.3575.0',
1292 '71.0.3574.1',
1293 '71.0.3574.0',
1294 '70.0.3538.53',
1295 '69.0.3497.125',
1296 '70.0.3538.52',
1297 '71.0.3573.1',
1298 '71.0.3573.0',
1299 '70.0.3538.51',
1300 '69.0.3497.124',
1301 '71.0.3572.1',
1302 '71.0.3572.0',
1303 '70.0.3538.50',
1304 '69.0.3497.123',
1305 '71.0.3571.2',
1306 '70.0.3538.49',
1307 '69.0.3497.122',
1308 '71.0.3571.1',
1309 '71.0.3571.0',
1310 '70.0.3538.48',
1311 '69.0.3497.121',
1312 '71.0.3570.1',
1313 '71.0.3570.0',
1314 '70.0.3538.47',
1315 '69.0.3497.120',
1316 '71.0.3568.2',
1317 '71.0.3569.1',
1318 '71.0.3569.0',
1319 '70.0.3538.46',
1320 '69.0.3497.119',
1321 '70.0.3538.45',
1322 '71.0.3568.1',
1323 '71.0.3568.0',
1324 '70.0.3538.44',
1325 '69.0.3497.118',
1326 '70.0.3538.43',
1327 '70.0.3538.42',
1328 '71.0.3567.1',
1329 '71.0.3567.0',
1330 '70.0.3538.41',
1331 '69.0.3497.117',
1332 '71.0.3566.1',
1333 '71.0.3566.0',
1334 '70.0.3538.40',
1335 '69.0.3497.116',
1336 '71.0.3565.1',
1337 '71.0.3565.0',
1338 '70.0.3538.39',
1339 '69.0.3497.115',
1340 '71.0.3564.1',
1341 '71.0.3564.0',
1342 '70.0.3538.38',
1343 '69.0.3497.114',
1344 '71.0.3563.0',
1345 '71.0.3562.2',
1346 '70.0.3538.37',
1347 '69.0.3497.113',
1348 '70.0.3538.36',
1349 '70.0.3538.35',
1350 '71.0.3562.1',
1351 '71.0.3562.0',
1352 '70.0.3538.34',
1353 '69.0.3497.112',
1354 '70.0.3538.33',
1355 '71.0.3561.1',
1356 '71.0.3561.0',
1357 '70.0.3538.32',
1358 '69.0.3497.111',
1359 '71.0.3559.6',
1360 '71.0.3560.1',
1361 '71.0.3560.0',
1362 '71.0.3559.5',
1363 '71.0.3559.4',
1364 '70.0.3538.31',
1365 '69.0.3497.110',
1366 '71.0.3559.3',
1367 '70.0.3538.30',
1368 '69.0.3497.109',
1369 '71.0.3559.2',
1370 '71.0.3559.1',
1371 '71.0.3559.0',
1372 '70.0.3538.29',
1373 '69.0.3497.108',
1374 '71.0.3558.2',
1375 '71.0.3558.1',
1376 '71.0.3558.0',
1377 '70.0.3538.28',
1378 '69.0.3497.107',
1379 '71.0.3557.2',
1380 '71.0.3557.1',
1381 '71.0.3557.0',
1382 '70.0.3538.27',
1383 '69.0.3497.106',
1384 '71.0.3554.4',
1385 '70.0.3538.26',
1386 '71.0.3556.1',
1387 '71.0.3556.0',
1388 '70.0.3538.25',
1389 '71.0.3554.3',
1390 '69.0.3497.105',
1391 '71.0.3554.2',
1392 '70.0.3538.24',
1393 '69.0.3497.104',
1394 '71.0.3555.2',
1395 '70.0.3538.23',
1396 '71.0.3555.1',
1397 '71.0.3555.0',
1398 '70.0.3538.22',
1399 '69.0.3497.103',
1400 '71.0.3554.1',
1401 '71.0.3554.0',
1402 '70.0.3538.21',
1403 '69.0.3497.102',
1404 '71.0.3553.3',
1405 '70.0.3538.20',
1406 '69.0.3497.101',
1407 '71.0.3553.2',
1408 '69.0.3497.100',
1409 '71.0.3553.1',
1410 '71.0.3553.0',
1411 '70.0.3538.19',
1412 '69.0.3497.99',
1413 '69.0.3497.98',
1414 '69.0.3497.97',
1415 '71.0.3552.6',
1416 '71.0.3552.5',
1417 '71.0.3552.4',
1418 '71.0.3552.3',
1419 '71.0.3552.2',
1420 '71.0.3552.1',
1421 '71.0.3552.0',
1422 '70.0.3538.18',
1423 '69.0.3497.96',
1424 '71.0.3551.3',
1425 '71.0.3551.2',
1426 '71.0.3551.1',
1427 '71.0.3551.0',
1428 '70.0.3538.17',
1429 '69.0.3497.95',
1430 '71.0.3550.3',
1431 '71.0.3550.2',
1432 '71.0.3550.1',
1433 '71.0.3550.0',
1434 '70.0.3538.16',
1435 '69.0.3497.94',
1436 '71.0.3549.1',
1437 '71.0.3549.0',
1438 '70.0.3538.15',
1439 '69.0.3497.93',
1440 '69.0.3497.92',
1441 '71.0.3548.1',
1442 '71.0.3548.0',
1443 '70.0.3538.14',
1444 '69.0.3497.91',
1445 '71.0.3547.1',
1446 '71.0.3547.0',
1447 '70.0.3538.13',
1448 '69.0.3497.90',
1449 '71.0.3546.2',
1450 '69.0.3497.89',
1451 '71.0.3546.1',
1452 '71.0.3546.0',
1453 '70.0.3538.12',
1454 '69.0.3497.88',
1455 '71.0.3545.4',
1456 '71.0.3545.3',
1457 '71.0.3545.2',
1458 '71.0.3545.1',
1459 '71.0.3545.0',
1460 '70.0.3538.11',
1461 '69.0.3497.87',
1462 '71.0.3544.5',
1463 '71.0.3544.4',
1464 '71.0.3544.3',
1465 '71.0.3544.2',
1466 '71.0.3544.1',
1467 '71.0.3544.0',
1468 '69.0.3497.86',
1469 '70.0.3538.10',
1470 '69.0.3497.85',
1471 '70.0.3538.9',
1472 '69.0.3497.84',
1473 '71.0.3543.4',
1474 '70.0.3538.8',
1475 '71.0.3543.3',
1476 '71.0.3543.2',
1477 '71.0.3543.1',
1478 '71.0.3543.0',
1479 '70.0.3538.7',
1480 '69.0.3497.83',
1481 '71.0.3542.2',
1482 '71.0.3542.1',
1483 '71.0.3542.0',
1484 '70.0.3538.6',
1485 '69.0.3497.82',
1486 '69.0.3497.81',
1487 '71.0.3541.1',
1488 '71.0.3541.0',
1489 '70.0.3538.5',
1490 '69.0.3497.80',
1491 '71.0.3540.1',
1492 '71.0.3540.0',
1493 '70.0.3538.4',
1494 '69.0.3497.79',
1495 '70.0.3538.3',
1496 '71.0.3539.1',
1497 '71.0.3539.0',
1498 '69.0.3497.78',
1499 '68.0.3440.134',
1500 '69.0.3497.77',
1501 '70.0.3538.2',
1502 '70.0.3538.1',
1503 '70.0.3538.0',
1504 '69.0.3497.76',
1505 '68.0.3440.133',
1506 '69.0.3497.75',
1507 '70.0.3537.2',
1508 '70.0.3537.1',
1509 '70.0.3537.0',
1510 '69.0.3497.74',
1511 '68.0.3440.132',
1512 '70.0.3536.0',
1513 '70.0.3535.5',
1514 '70.0.3535.4',
1515 '70.0.3535.3',
1516 '69.0.3497.73',
1517 '68.0.3440.131',
1518 '70.0.3532.8',
1519 '70.0.3532.7',
1520 '69.0.3497.72',
1521 '69.0.3497.71',
1522 '70.0.3535.2',
1523 '70.0.3535.1',
1524 '70.0.3535.0',
1525 '69.0.3497.70',
1526 '68.0.3440.130',
1527 '69.0.3497.69',
1528 '68.0.3440.129',
1529 '70.0.3534.4',
1530 '70.0.3534.3',
1531 '70.0.3534.2',
1532 '70.0.3534.1',
1533 '70.0.3534.0',
1534 '69.0.3497.68',
1535 '68.0.3440.128',
1536 '70.0.3533.2',
1537 '70.0.3533.1',
1538 '70.0.3533.0',
1539 '69.0.3497.67',
1540 '68.0.3440.127',
1541 '70.0.3532.6',
1542 '70.0.3532.5',
1543 '70.0.3532.4',
1544 '69.0.3497.66',
1545 '68.0.3440.126',
1546 '70.0.3532.3',
1547 '70.0.3532.2',
1548 '70.0.3532.1',
1549 '69.0.3497.60',
1550 '69.0.3497.65',
1551 '69.0.3497.64',
1552 '70.0.3532.0',
1553 '70.0.3531.0',
1554 '70.0.3530.4',
1555 '70.0.3530.3',
1556 '70.0.3530.2',
1557 '69.0.3497.58',
1558 '68.0.3440.125',
1559 '69.0.3497.57',
1560 '69.0.3497.56',
1561 '69.0.3497.55',
1562 '69.0.3497.54',
1563 '70.0.3530.1',
1564 '70.0.3530.0',
1565 '69.0.3497.53',
1566 '68.0.3440.124',
1567 '69.0.3497.52',
1568 '70.0.3529.3',
1569 '70.0.3529.2',
1570 '70.0.3529.1',
1571 '70.0.3529.0',
1572 '69.0.3497.51',
1573 '70.0.3528.4',
1574 '68.0.3440.123',
1575 '70.0.3528.3',
1576 '70.0.3528.2',
1577 '70.0.3528.1',
1578 '70.0.3528.0',
1579 '69.0.3497.50',
1580 '68.0.3440.122',
1581 '70.0.3527.1',
1582 '70.0.3527.0',
1583 '69.0.3497.49',
1584 '68.0.3440.121',
1585 '70.0.3526.1',
1586 '70.0.3526.0',
1587 '68.0.3440.120',
1588 '69.0.3497.48',
1589 '69.0.3497.47',
1590 '68.0.3440.119',
1591 '68.0.3440.118',
1592 '70.0.3525.5',
1593 '70.0.3525.4',
1594 '70.0.3525.3',
1595 '68.0.3440.117',
1596 '69.0.3497.46',
1597 '70.0.3525.2',
1598 '70.0.3525.1',
1599 '70.0.3525.0',
1600 '69.0.3497.45',
1601 '68.0.3440.116',
1602 '70.0.3524.4',
1603 '70.0.3524.3',
1604 '69.0.3497.44',
1605 '70.0.3524.2',
1606 '70.0.3524.1',
1607 '70.0.3524.0',
1608 '70.0.3523.2',
1609 '69.0.3497.43',
1610 '68.0.3440.115',
1611 '70.0.3505.9',
1612 '69.0.3497.42',
1613 '70.0.3505.8',
1614 '70.0.3523.1',
1615 '70.0.3523.0',
1616 '69.0.3497.41',
1617 '68.0.3440.114',
1618 '70.0.3505.7',
1619 '69.0.3497.40',
1620 '70.0.3522.1',
1621 '70.0.3522.0',
1622 '70.0.3521.2',
1623 '69.0.3497.39',
1624 '68.0.3440.113',
1625 '70.0.3505.6',
1626 '70.0.3521.1',
1627 '70.0.3521.0',
1628 '69.0.3497.38',
1629 '68.0.3440.112',
1630 '70.0.3520.1',
1631 '70.0.3520.0',
1632 '69.0.3497.37',
1633 '68.0.3440.111',
1634 '70.0.3519.3',
1635 '70.0.3519.2',
1636 '70.0.3519.1',
1637 '70.0.3519.0',
1638 '69.0.3497.36',
1639 '68.0.3440.110',
1640 '70.0.3518.1',
1641 '70.0.3518.0',
1642 '69.0.3497.35',
1643 '69.0.3497.34',
1644 '68.0.3440.109',
1645 '70.0.3517.1',
1646 '70.0.3517.0',
1647 '69.0.3497.33',
1648 '68.0.3440.108',
1649 '69.0.3497.32',
1650 '70.0.3516.3',
1651 '70.0.3516.2',
1652 '70.0.3516.1',
1653 '70.0.3516.0',
1654 '69.0.3497.31',
1655 '68.0.3440.107',
1656 '70.0.3515.4',
1657 '68.0.3440.106',
1658 '70.0.3515.3',
1659 '70.0.3515.2',
1660 '70.0.3515.1',
1661 '70.0.3515.0',
1662 '69.0.3497.30',
1663 '68.0.3440.105',
1664 '68.0.3440.104',
1665 '70.0.3514.2',
1666 '70.0.3514.1',
1667 '70.0.3514.0',
1668 '69.0.3497.29',
1669 '68.0.3440.103',
1670 '70.0.3513.1',
1671 '70.0.3513.0',
1672 '69.0.3497.28',
1673 )
1674 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1675
1676
1677 std_headers = {
1678 'User-Agent': random_user_agent(),
1679 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1680 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1681 'Accept-Encoding': 'gzip, deflate',
1682 'Accept-Language': 'en-us,en;q=0.5',
1683 }
1684
1685
1686 USER_AGENTS = {
1687 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1688 }
1689
1690
1691 NO_DEFAULT = object()
1692
1693 ENGLISH_MONTH_NAMES = [
1694 'January', 'February', 'March', 'April', 'May', 'June',
1695 'July', 'August', 'September', 'October', 'November', 'December']
1696
1697 MONTH_NAMES = {
1698 'en': ENGLISH_MONTH_NAMES,
1699 'fr': [
1700 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1701 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1702 }
1703
1704 KNOWN_EXTENSIONS = (
1705 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1706 'flv', 'f4v', 'f4a', 'f4b',
1707 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1708 'mkv', 'mka', 'mk3d',
1709 'avi', 'divx',
1710 'mov',
1711 'asf', 'wmv', 'wma',
1712 '3gp', '3g2',
1713 'mp3',
1714 'flac',
1715 'ape',
1716 'wav',
1717 'f4f', 'f4m', 'm3u8', 'smil')
1718
1719 # needed for sanitizing filenames in restricted mode
1720 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1721 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1722 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1723
1724 DATE_FORMATS = (
1725 '%d %B %Y',
1726 '%d %b %Y',
1727 '%B %d %Y',
1728 '%B %dst %Y',
1729 '%B %dnd %Y',
1730 '%B %drd %Y',
1731 '%B %dth %Y',
1732 '%b %d %Y',
1733 '%b %dst %Y',
1734 '%b %dnd %Y',
1735 '%b %drd %Y',
1736 '%b %dth %Y',
1737 '%b %dst %Y %I:%M',
1738 '%b %dnd %Y %I:%M',
1739 '%b %drd %Y %I:%M',
1740 '%b %dth %Y %I:%M',
1741 '%Y %m %d',
1742 '%Y-%m-%d',
1743 '%Y.%m.%d.',
1744 '%Y/%m/%d',
1745 '%Y/%m/%d %H:%M',
1746 '%Y/%m/%d %H:%M:%S',
1747 '%Y%m%d%H%M',
1748 '%Y%m%d%H%M%S',
1749 '%Y-%m-%d %H:%M',
1750 '%Y-%m-%d %H:%M:%S',
1751 '%Y-%m-%d %H:%M:%S.%f',
1752 '%Y-%m-%d %H:%M:%S:%f',
1753 '%d.%m.%Y %H:%M',
1754 '%d.%m.%Y %H.%M',
1755 '%Y-%m-%dT%H:%M:%SZ',
1756 '%Y-%m-%dT%H:%M:%S.%fZ',
1757 '%Y-%m-%dT%H:%M:%S.%f0Z',
1758 '%Y-%m-%dT%H:%M:%S',
1759 '%Y-%m-%dT%H:%M:%S.%f',
1760 '%Y-%m-%dT%H:%M',
1761 '%b %d %Y at %H:%M',
1762 '%b %d %Y at %H:%M:%S',
1763 '%B %d %Y at %H:%M',
1764 '%B %d %Y at %H:%M:%S',
1765 '%H:%M %d-%b-%Y',
1766 )
1767
1768 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1769 DATE_FORMATS_DAY_FIRST.extend([
1770 '%d-%m-%Y',
1771 '%d.%m.%Y',
1772 '%d.%m.%y',
1773 '%d/%m/%Y',
1774 '%d/%m/%y',
1775 '%d/%m/%Y %H:%M:%S',
1776 ])
1777
1778 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1779 DATE_FORMATS_MONTH_FIRST.extend([
1780 '%m-%d-%Y',
1781 '%m.%d.%Y',
1782 '%m/%d/%Y',
1783 '%m/%d/%y',
1784 '%m/%d/%Y %H:%M:%S',
1785 ])
1786
1787 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1788 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1789
1790
1791 def preferredencoding():
1792 """Get preferred encoding.
1793
1794 Returns the best encoding scheme for the system, based on
1795 locale.getpreferredencoding() and some further tweaks.
1796 """
1797 try:
1798 pref = locale.getpreferredencoding()
1799 'TEST'.encode(pref)
1800 except Exception:
1801 pref = 'UTF-8'
1802
1803 return pref
1804
1805
1806 def write_json_file(obj, fn):
1807 """ Encode obj as JSON and write it to fn, atomically if possible """
1808
1809 fn = encodeFilename(fn)
1810 if sys.version_info < (3, 0) and sys.platform != 'win32':
1811 encoding = get_filesystem_encoding()
1812 # os.path.basename returns a bytes object, but NamedTemporaryFile
1813 # will fail if the filename contains non ascii characters unless we
1814 # use a unicode object
1815 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1816 # the same for os.path.dirname
1817 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1818 else:
1819 path_basename = os.path.basename
1820 path_dirname = os.path.dirname
1821
1822 args = {
1823 'suffix': '.tmp',
1824 'prefix': path_basename(fn) + '.',
1825 'dir': path_dirname(fn),
1826 'delete': False,
1827 }
1828
1829 # In Python 2.x, json.dump expects a bytestream.
1830 # In Python 3.x, it writes to a character stream
1831 if sys.version_info < (3, 0):
1832 args['mode'] = 'wb'
1833 else:
1834 args.update({
1835 'mode': 'w',
1836 'encoding': 'utf-8',
1837 })
1838
1839 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1840
1841 try:
1842 with tf:
1843 json.dump(obj, tf)
1844 if sys.platform == 'win32':
1845 # Need to remove existing file on Windows, else os.rename raises
1846 # WindowsError or FileExistsError.
1847 try:
1848 os.unlink(fn)
1849 except OSError:
1850 pass
1851 try:
1852 mask = os.umask(0)
1853 os.umask(mask)
1854 os.chmod(tf.name, 0o666 & ~mask)
1855 except OSError:
1856 pass
1857 os.rename(tf.name, fn)
1858 except Exception:
1859 try:
1860 os.remove(tf.name)
1861 except OSError:
1862 pass
1863 raise
1864
1865
1866 if sys.version_info >= (2, 7):
1867 def find_xpath_attr(node, xpath, key, val=None):
1868 """ Find the xpath xpath[@key=val] """
1869 assert re.match(r'^[a-zA-Z_-]+$', key)
1870 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1871 return node.find(expr)
1872 else:
1873 def find_xpath_attr(node, xpath, key, val=None):
1874 for f in node.findall(compat_xpath(xpath)):
1875 if key not in f.attrib:
1876 continue
1877 if val is None or f.attrib.get(key) == val:
1878 return f
1879 return None
1880
1881 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1882 # the namespace parameter
1883
1884
1885 def xpath_with_ns(path, ns_map):
1886 components = [c.split(':') for c in path.split('/')]
1887 replaced = []
1888 for c in components:
1889 if len(c) == 1:
1890 replaced.append(c[0])
1891 else:
1892 ns, tag = c
1893 replaced.append('{%s}%s' % (ns_map[ns], tag))
1894 return '/'.join(replaced)
1895
1896
1897 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1898 def _find_xpath(xpath):
1899 return node.find(compat_xpath(xpath))
1900
1901 if isinstance(xpath, (str, compat_str)):
1902 n = _find_xpath(xpath)
1903 else:
1904 for xp in xpath:
1905 n = _find_xpath(xp)
1906 if n is not None:
1907 break
1908
1909 if n is None:
1910 if default is not NO_DEFAULT:
1911 return default
1912 elif fatal:
1913 name = xpath if name is None else name
1914 raise ExtractorError('Could not find XML element %s' % name)
1915 else:
1916 return None
1917 return n
1918
1919
1920 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1921 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1922 if n is None or n == default:
1923 return n
1924 if n.text is None:
1925 if default is not NO_DEFAULT:
1926 return default
1927 elif fatal:
1928 name = xpath if name is None else name
1929 raise ExtractorError('Could not find XML element\'s text %s' % name)
1930 else:
1931 return None
1932 return n.text
1933
1934
1935 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1936 n = find_xpath_attr(node, xpath, key)
1937 if n is None:
1938 if default is not NO_DEFAULT:
1939 return default
1940 elif fatal:
1941 name = '%s[@%s]' % (xpath, key) if name is None else name
1942 raise ExtractorError('Could not find XML attribute %s' % name)
1943 else:
1944 return None
1945 return n.attrib[key]
1946
1947
1948 def get_element_by_id(id, html):
1949 """Return the content of the tag with the specified ID in the passed HTML document"""
1950 return get_element_by_attribute('id', id, html)
1951
1952
1953 def get_element_by_class(class_name, html):
1954 """Return the content of the first tag with the specified class in the passed HTML document"""
1955 retval = get_elements_by_class(class_name, html)
1956 return retval[0] if retval else None
1957
1958
1959 def get_element_by_attribute(attribute, value, html, escape_value=True):
1960 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1961 return retval[0] if retval else None
1962
1963
1964 def get_elements_by_class(class_name, html):
1965 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1966 return get_elements_by_attribute(
1967 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1968 html, escape_value=False)
1969
1970
1971 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1972 """Return the content of the tag with the specified attribute in the passed HTML document"""
1973
1974 value = re.escape(value) if escape_value else value
1975
1976 retlist = []
1977 for m in re.finditer(r'''(?xs)
1978 <([a-zA-Z0-9:._-]+)
1979 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1980 \s+%s=['"]?%s['"]?
1981 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1982 \s*>
1983 (?P<content>.*?)
1984 </\1>
1985 ''' % (re.escape(attribute), value), html):
1986 res = m.group('content')
1987
1988 if res.startswith('"') or res.startswith("'"):
1989 res = res[1:-1]
1990
1991 retlist.append(unescapeHTML(res))
1992
1993 return retlist
1994
1995
1996 class HTMLAttributeParser(compat_HTMLParser):
1997 """Trivial HTML parser to gather the attributes for a single element"""
1998
1999 def __init__(self):
2000 self.attrs = {}
2001 compat_HTMLParser.__init__(self)
2002
2003 def handle_starttag(self, tag, attrs):
2004 self.attrs = dict(attrs)
2005
2006
2007 def extract_attributes(html_element):
2008 """Given a string for an HTML element such as
2009 <el
2010 a="foo" B="bar" c="&98;az" d=boz
2011 empty= noval entity="&amp;"
2012 sq='"' dq="'"
2013 >
2014 Decode and return a dictionary of attributes.
2015 {
2016 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2017 'empty': '', 'noval': None, 'entity': '&',
2018 'sq': '"', 'dq': '\''
2019 }.
2020 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2021 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2022 """
2023 parser = HTMLAttributeParser()
2024 try:
2025 parser.feed(html_element)
2026 parser.close()
2027 # Older Python may throw HTMLParseError in case of malformed HTML
2028 except compat_HTMLParseError:
2029 pass
2030 return parser.attrs
2031
2032
2033 def clean_html(html):
2034 """Clean an HTML snippet into a readable string"""
2035
2036 if html is None: # Convenience for sanitizing descriptions etc.
2037 return html
2038
2039 # Newline vs <br />
2040 html = html.replace('\n', ' ')
2041 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2042 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2043 # Strip html tags
2044 html = re.sub('<.*?>', '', html)
2045 # Replace html entities
2046 html = unescapeHTML(html)
2047 return html.strip()
2048
2049
2050 def sanitize_open(filename, open_mode):
2051 """Try to open the given filename, and slightly tweak it if this fails.
2052
2053 Attempts to open the given filename. If this fails, it tries to change
2054 the filename slightly, step by step, until it's either able to open it
2055 or it fails and raises a final exception, like the standard open()
2056 function.
2057
2058 It returns the tuple (stream, definitive_file_name).
2059 """
2060 try:
2061 if filename == '-':
2062 if sys.platform == 'win32':
2063 import msvcrt
2064 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2065 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2066 stream = open(encodeFilename(filename), open_mode)
2067 return (stream, filename)
2068 except (IOError, OSError) as err:
2069 if err.errno in (errno.EACCES,):
2070 raise
2071
2072 # In case of error, try to remove win32 forbidden chars
2073 alt_filename = sanitize_path(filename)
2074 if alt_filename == filename:
2075 raise
2076 else:
2077 # An exception here should be caught in the caller
2078 stream = open(encodeFilename(alt_filename), open_mode)
2079 return (stream, alt_filename)
2080
2081
2082 def timeconvert(timestr):
2083 """Convert RFC 2822 defined time string into system timestamp"""
2084 timestamp = None
2085 timetuple = email.utils.parsedate_tz(timestr)
2086 if timetuple is not None:
2087 timestamp = email.utils.mktime_tz(timetuple)
2088 return timestamp
2089
2090
2091 def sanitize_filename(s, restricted=False, is_id=False):
2092 """Sanitizes a string so it could be used as part of a filename.
2093 If restricted is set, use a stricter subset of allowed characters.
2094 Set is_id if this is not an arbitrary string, but an ID that should be kept
2095 if possible.
2096 """
2097 def replace_insane(char):
2098 if restricted and char in ACCENT_CHARS:
2099 return ACCENT_CHARS[char]
2100 if char == '?' or ord(char) < 32 or ord(char) == 127:
2101 return ''
2102 elif char == '"':
2103 return '' if restricted else '\''
2104 elif char == ':':
2105 return '_-' if restricted else ' -'
2106 elif char in '\\/|*<>':
2107 return '_'
2108 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2109 return '_'
2110 if restricted and ord(char) > 127:
2111 return '_'
2112 return char
2113
2114 if s == '':
2115 return ''
2116 # Handle timestamps
2117 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2118 result = ''.join(map(replace_insane, s))
2119 if not is_id:
2120 while '__' in result:
2121 result = result.replace('__', '_')
2122 result = result.strip('_')
2123 # Common case of "Foreign band name - English song title"
2124 if restricted and result.startswith('-_'):
2125 result = result[2:]
2126 if result.startswith('-'):
2127 result = '_' + result[len('-'):]
2128 result = result.lstrip('.')
2129 if not result:
2130 result = '_'
2131 return result
2132
2133
2134 def sanitize_path(s, force=False):
2135 """Sanitizes and normalizes path on Windows"""
2136 if sys.platform == 'win32':
2137 force = False
2138 drive_or_unc, _ = os.path.splitdrive(s)
2139 if sys.version_info < (2, 7) and not drive_or_unc:
2140 drive_or_unc, _ = os.path.splitunc(s)
2141 elif force:
2142 drive_or_unc = ''
2143 else:
2144 return s
2145
2146 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2147 if drive_or_unc:
2148 norm_path.pop(0)
2149 sanitized_path = [
2150 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2151 for path_part in norm_path]
2152 if drive_or_unc:
2153 sanitized_path.insert(0, drive_or_unc + os.path.sep)
2154 elif force and s[0] == os.path.sep:
2155 sanitized_path.insert(0, os.path.sep)
2156 return os.path.join(*sanitized_path)
2157
2158
2159 def sanitize_url(url):
2160 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2161 # the number of unwanted failures due to missing protocol
2162 if url.startswith('//'):
2163 return 'http:%s' % url
2164 # Fix some common typos seen so far
2165 COMMON_TYPOS = (
2166 # https://github.com/ytdl-org/youtube-dl/issues/15649
2167 (r'^httpss://', r'https://'),
2168 # https://bx1.be/lives/direct-tv/
2169 (r'^rmtp([es]?)://', r'rtmp\1://'),
2170 )
2171 for mistake, fixup in COMMON_TYPOS:
2172 if re.match(mistake, url):
2173 return re.sub(mistake, fixup, url)
2174 return url
2175
2176
2177 def extract_basic_auth(url):
2178 parts = compat_urlparse.urlsplit(url)
2179 if parts.username is None:
2180 return url, None
2181 url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2182 parts.hostname if parts.port is None
2183 else '%s:%d' % (parts.hostname, parts.port))))
2184 auth_payload = base64.b64encode(
2185 ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2186 return url, 'Basic ' + auth_payload.decode('utf-8')
2187
2188
2189 def sanitized_Request(url, *args, **kwargs):
2190 url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
2191 if auth_header is not None:
2192 headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2193 headers['Authorization'] = auth_header
2194 return compat_urllib_request.Request(url, *args, **kwargs)
2195
2196
2197 def expand_path(s):
2198 """Expand shell variables and ~"""
2199 return os.path.expandvars(compat_expanduser(s))
2200
2201
2202 def orderedSet(iterable):
2203 """ Remove all duplicates from the input iterable """
2204 res = []
2205 for el in iterable:
2206 if el not in res:
2207 res.append(el)
2208 return res
2209
2210
2211 def _htmlentity_transform(entity_with_semicolon):
2212 """Transforms an HTML entity to a character."""
2213 entity = entity_with_semicolon[:-1]
2214
2215 # Known non-numeric HTML entity
2216 if entity in compat_html_entities.name2codepoint:
2217 return compat_chr(compat_html_entities.name2codepoint[entity])
2218
2219 # TODO: HTML5 allows entities without a semicolon. For example,
2220 # '&Eacuteric' should be decoded as 'Éric'.
2221 if entity_with_semicolon in compat_html_entities_html5:
2222 return compat_html_entities_html5[entity_with_semicolon]
2223
2224 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2225 if mobj is not None:
2226 numstr = mobj.group(1)
2227 if numstr.startswith('x'):
2228 base = 16
2229 numstr = '0%s' % numstr
2230 else:
2231 base = 10
2232 # See https://github.com/ytdl-org/youtube-dl/issues/7518
2233 try:
2234 return compat_chr(int(numstr, base))
2235 except ValueError:
2236 pass
2237
2238 # Unknown entity in name, return its literal representation
2239 return '&%s;' % entity
2240
2241
2242 def unescapeHTML(s):
2243 if s is None:
2244 return None
2245 assert type(s) == compat_str
2246
2247 return re.sub(
2248 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2249
2250
2251 def escapeHTML(text):
2252 return (
2253 text
2254 .replace('&', '&amp;')
2255 .replace('<', '&lt;')
2256 .replace('>', '&gt;')
2257 .replace('"', '&quot;')
2258 .replace("'", '&#39;')
2259 )
2260
2261
2262 def process_communicate_or_kill(p, *args, **kwargs):
2263 try:
2264 return p.communicate(*args, **kwargs)
2265 except BaseException: # Including KeyboardInterrupt
2266 p.kill()
2267 p.wait()
2268 raise
2269
2270
2271 def get_subprocess_encoding():
2272 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2273 # For subprocess calls, encode with locale encoding
2274 # Refer to http://stackoverflow.com/a/9951851/35070
2275 encoding = preferredencoding()
2276 else:
2277 encoding = sys.getfilesystemencoding()
2278 if encoding is None:
2279 encoding = 'utf-8'
2280 return encoding
2281
2282
2283 def encodeFilename(s, for_subprocess=False):
2284 """
2285 @param s The name of the file
2286 """
2287
2288 assert type(s) == compat_str
2289
2290 # Python 3 has a Unicode API
2291 if sys.version_info >= (3, 0):
2292 return s
2293
2294 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2295 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2296 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2297 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2298 return s
2299
2300 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2301 if sys.platform.startswith('java'):
2302 return s
2303
2304 return s.encode(get_subprocess_encoding(), 'ignore')
2305
2306
2307 def decodeFilename(b, for_subprocess=False):
2308
2309 if sys.version_info >= (3, 0):
2310 return b
2311
2312 if not isinstance(b, bytes):
2313 return b
2314
2315 return b.decode(get_subprocess_encoding(), 'ignore')
2316
2317
2318 def encodeArgument(s):
2319 if not isinstance(s, compat_str):
2320 # Legacy code that uses byte strings
2321 # Uncomment the following line after fixing all post processors
2322 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2323 s = s.decode('ascii')
2324 return encodeFilename(s, True)
2325
2326
2327 def decodeArgument(b):
2328 return decodeFilename(b, True)
2329
2330
2331 def decodeOption(optval):
2332 if optval is None:
2333 return optval
2334 if isinstance(optval, bytes):
2335 optval = optval.decode(preferredencoding())
2336
2337 assert isinstance(optval, compat_str)
2338 return optval
2339
2340
2341 def formatSeconds(secs, delim=':', msec=False):
2342 if secs > 3600:
2343 ret = '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
2344 elif secs > 60:
2345 ret = '%d%s%02d' % (secs // 60, delim, secs % 60)
2346 else:
2347 ret = '%d' % secs
2348 return '%s.%03d' % (ret, secs % 1) if msec else ret
2349
2350
2351 def make_HTTPS_handler(params, **kwargs):
2352 opts_no_check_certificate = params.get('nocheckcertificate', False)
2353 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
2354 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2355 if opts_no_check_certificate:
2356 context.check_hostname = False
2357 context.verify_mode = ssl.CERT_NONE
2358 try:
2359 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2360 except TypeError:
2361 # Python 2.7.8
2362 # (create_default_context present but HTTPSHandler has no context=)
2363 pass
2364
2365 if sys.version_info < (3, 2):
2366 return YoutubeDLHTTPSHandler(params, **kwargs)
2367 else: # Python < 3.4
2368 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2369 context.verify_mode = (ssl.CERT_NONE
2370 if opts_no_check_certificate
2371 else ssl.CERT_REQUIRED)
2372 context.set_default_verify_paths()
2373 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2374
2375
2376 def bug_reports_message(before=';'):
2377 if ytdl_is_updateable():
2378 update_cmd = 'type yt-dlp -U to update'
2379 else:
2380 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
2381 msg = 'please report this issue on https://github.com/yt-dlp/yt-dlp .'
2382 msg += ' Make sure you are using the latest version; %s.' % update_cmd
2383 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
2384
2385 before = before.rstrip()
2386 if not before or before.endswith(('.', '!', '?')):
2387 msg = msg[0].title() + msg[1:]
2388
2389 return (before + ' ' if before else '') + msg
2390
2391
2392 class YoutubeDLError(Exception):
2393 """Base exception for YoutubeDL errors."""
2394 pass
2395
2396
2397 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2398 if hasattr(ssl, 'CertificateError'):
2399 network_exceptions.append(ssl.CertificateError)
2400 network_exceptions = tuple(network_exceptions)
2401
2402
2403 class ExtractorError(YoutubeDLError):
2404 """Error during info extraction."""
2405
2406 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
2407 """ tb, if given, is the original traceback (so that it can be printed out).
2408 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
2409 """
2410 if sys.exc_info()[0] in network_exceptions:
2411 expected = True
2412
2413 self.msg = str(msg)
2414 self.traceback = tb
2415 self.expected = expected
2416 self.cause = cause
2417 self.video_id = video_id
2418 self.ie = ie
2419 self.exc_info = sys.exc_info() # preserve original exception
2420
2421 super(ExtractorError, self).__init__(''.join((
2422 format_field(ie, template='[%s] '),
2423 format_field(video_id, template='%s: '),
2424 self.msg,
2425 format_field(cause, template=' (caused by %r)'),
2426 '' if expected else bug_reports_message())))
2427
2428 def format_traceback(self):
2429 if self.traceback is None:
2430 return None
2431 return ''.join(traceback.format_tb(self.traceback))
2432
2433
2434 class UnsupportedError(ExtractorError):
2435 def __init__(self, url):
2436 super(UnsupportedError, self).__init__(
2437 'Unsupported URL: %s' % url, expected=True)
2438 self.url = url
2439
2440
2441 class RegexNotFoundError(ExtractorError):
2442 """Error when a regex didn't match"""
2443 pass
2444
2445
2446 class GeoRestrictedError(ExtractorError):
2447 """Geographic restriction Error exception.
2448
2449 This exception may be thrown when a video is not available from your
2450 geographic location due to geographic restrictions imposed by a website.
2451 """
2452
2453 def __init__(self, msg, countries=None):
2454 super(GeoRestrictedError, self).__init__(msg, expected=True)
2455 self.msg = msg
2456 self.countries = countries
2457
2458
2459 class DownloadError(YoutubeDLError):
2460 """Download Error exception.
2461
2462 This exception may be thrown by FileDownloader objects if they are not
2463 configured to continue on errors. They will contain the appropriate
2464 error message.
2465 """
2466
2467 def __init__(self, msg, exc_info=None):
2468 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2469 super(DownloadError, self).__init__(msg)
2470 self.exc_info = exc_info
2471
2472
2473 class EntryNotInPlaylist(YoutubeDLError):
2474 """Entry not in playlist exception.
2475
2476 This exception will be thrown by YoutubeDL when a requested entry
2477 is not found in the playlist info_dict
2478 """
2479 pass
2480
2481
2482 class SameFileError(YoutubeDLError):
2483 """Same File exception.
2484
2485 This exception will be thrown by FileDownloader objects if they detect
2486 multiple files would have to be downloaded to the same file on disk.
2487 """
2488 pass
2489
2490
2491 class PostProcessingError(YoutubeDLError):
2492 """Post Processing exception.
2493
2494 This exception may be raised by PostProcessor's .run() method to
2495 indicate an error in the postprocessing task.
2496 """
2497
2498 def __init__(self, msg):
2499 super(PostProcessingError, self).__init__(msg)
2500 self.msg = msg
2501
2502
2503 class ExistingVideoReached(YoutubeDLError):
2504 """ --max-downloads limit has been reached. """
2505 pass
2506
2507
2508 class RejectedVideoReached(YoutubeDLError):
2509 """ --max-downloads limit has been reached. """
2510 pass
2511
2512
2513 class ThrottledDownload(YoutubeDLError):
2514 """ Download speed below --throttled-rate. """
2515 pass
2516
2517
2518 class MaxDownloadsReached(YoutubeDLError):
2519 """ --max-downloads limit has been reached. """
2520 pass
2521
2522
2523 class UnavailableVideoError(YoutubeDLError):
2524 """Unavailable Format exception.
2525
2526 This exception will be thrown when a video is requested
2527 in a format that is not available for that video.
2528 """
2529 pass
2530
2531
2532 class ContentTooShortError(YoutubeDLError):
2533 """Content Too Short exception.
2534
2535 This exception may be raised by FileDownloader objects when a file they
2536 download is too small for what the server announced first, indicating
2537 the connection was probably interrupted.
2538 """
2539
2540 def __init__(self, downloaded, expected):
2541 super(ContentTooShortError, self).__init__(
2542 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2543 )
2544 # Both in bytes
2545 self.downloaded = downloaded
2546 self.expected = expected
2547
2548
2549 class XAttrMetadataError(YoutubeDLError):
2550 def __init__(self, code=None, msg='Unknown error'):
2551 super(XAttrMetadataError, self).__init__(msg)
2552 self.code = code
2553 self.msg = msg
2554
2555 # Parsing code and msg
2556 if (self.code in (errno.ENOSPC, errno.EDQUOT)
2557 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2558 self.reason = 'NO_SPACE'
2559 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2560 self.reason = 'VALUE_TOO_LONG'
2561 else:
2562 self.reason = 'NOT_SUPPORTED'
2563
2564
2565 class XAttrUnavailableError(YoutubeDLError):
2566 pass
2567
2568
2569 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2570 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2571 # expected HTTP responses to meet HTTP/1.0 or later (see also
2572 # https://github.com/ytdl-org/youtube-dl/issues/6727)
2573 if sys.version_info < (3, 0):
2574 kwargs['strict'] = True
2575 hc = http_class(*args, **compat_kwargs(kwargs))
2576 source_address = ydl_handler._params.get('source_address')
2577
2578 if source_address is not None:
2579 # This is to workaround _create_connection() from socket where it will try all
2580 # address data from getaddrinfo() including IPv6. This filters the result from
2581 # getaddrinfo() based on the source_address value.
2582 # This is based on the cpython socket.create_connection() function.
2583 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2584 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2585 host, port = address
2586 err = None
2587 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2588 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2589 ip_addrs = [addr for addr in addrs if addr[0] == af]
2590 if addrs and not ip_addrs:
2591 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2592 raise socket.error(
2593 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2594 % (ip_version, source_address[0]))
2595 for res in ip_addrs:
2596 af, socktype, proto, canonname, sa = res
2597 sock = None
2598 try:
2599 sock = socket.socket(af, socktype, proto)
2600 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2601 sock.settimeout(timeout)
2602 sock.bind(source_address)
2603 sock.connect(sa)
2604 err = None # Explicitly break reference cycle
2605 return sock
2606 except socket.error as _:
2607 err = _
2608 if sock is not None:
2609 sock.close()
2610 if err is not None:
2611 raise err
2612 else:
2613 raise socket.error('getaddrinfo returns an empty list')
2614 if hasattr(hc, '_create_connection'):
2615 hc._create_connection = _create_connection
2616 sa = (source_address, 0)
2617 if hasattr(hc, 'source_address'): # Python 2.7+
2618 hc.source_address = sa
2619 else: # Python 2.6
2620 def _hc_connect(self, *args, **kwargs):
2621 sock = _create_connection(
2622 (self.host, self.port), self.timeout, sa)
2623 if is_https:
2624 self.sock = ssl.wrap_socket(
2625 sock, self.key_file, self.cert_file,
2626 ssl_version=ssl.PROTOCOL_TLSv1)
2627 else:
2628 self.sock = sock
2629 hc.connect = functools.partial(_hc_connect, hc)
2630
2631 return hc
2632
2633
2634 def handle_youtubedl_headers(headers):
2635 filtered_headers = headers
2636
2637 if 'Youtubedl-no-compression' in filtered_headers:
2638 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2639 del filtered_headers['Youtubedl-no-compression']
2640
2641 return filtered_headers
2642
2643
2644 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2645 """Handler for HTTP requests and responses.
2646
2647 This class, when installed with an OpenerDirector, automatically adds
2648 the standard headers to every HTTP request and handles gzipped and
2649 deflated responses from web servers. If compression is to be avoided in
2650 a particular request, the original request in the program code only has
2651 to include the HTTP header "Youtubedl-no-compression", which will be
2652 removed before making the real request.
2653
2654 Part of this code was copied from:
2655
2656 http://techknack.net/python-urllib2-handlers/
2657
2658 Andrew Rowls, the author of that code, agreed to release it to the
2659 public domain.
2660 """
2661
2662 def __init__(self, params, *args, **kwargs):
2663 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2664 self._params = params
2665
2666 def http_open(self, req):
2667 conn_class = compat_http_client.HTTPConnection
2668
2669 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2670 if socks_proxy:
2671 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2672 del req.headers['Ytdl-socks-proxy']
2673
2674 return self.do_open(functools.partial(
2675 _create_http_connection, self, conn_class, False),
2676 req)
2677
2678 @staticmethod
2679 def deflate(data):
2680 if not data:
2681 return data
2682 try:
2683 return zlib.decompress(data, -zlib.MAX_WBITS)
2684 except zlib.error:
2685 return zlib.decompress(data)
2686
2687 def http_request(self, req):
2688 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2689 # always respected by websites, some tend to give out URLs with non percent-encoded
2690 # non-ASCII characters (see telemb.py, ard.py [#3412])
2691 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2692 # To work around aforementioned issue we will replace request's original URL with
2693 # percent-encoded one
2694 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2695 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2696 url = req.get_full_url()
2697 url_escaped = escape_url(url)
2698
2699 # Substitute URL if any change after escaping
2700 if url != url_escaped:
2701 req = update_Request(req, url=url_escaped)
2702
2703 for h, v in std_headers.items():
2704 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2705 # The dict keys are capitalized because of this bug by urllib
2706 if h.capitalize() not in req.headers:
2707 req.add_header(h, v)
2708
2709 req.headers = handle_youtubedl_headers(req.headers)
2710
2711 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2712 # Python 2.6 is brain-dead when it comes to fragments
2713 req._Request__original = req._Request__original.partition('#')[0]
2714 req._Request__r_type = req._Request__r_type.partition('#')[0]
2715
2716 return req
2717
2718 def http_response(self, req, resp):
2719 old_resp = resp
2720 # gzip
2721 if resp.headers.get('Content-encoding', '') == 'gzip':
2722 content = resp.read()
2723 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2724 try:
2725 uncompressed = io.BytesIO(gz.read())
2726 except IOError as original_ioerror:
2727 # There may be junk add the end of the file
2728 # See http://stackoverflow.com/q/4928560/35070 for details
2729 for i in range(1, 1024):
2730 try:
2731 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2732 uncompressed = io.BytesIO(gz.read())
2733 except IOError:
2734 continue
2735 break
2736 else:
2737 raise original_ioerror
2738 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2739 resp.msg = old_resp.msg
2740 del resp.headers['Content-encoding']
2741 # deflate
2742 if resp.headers.get('Content-encoding', '') == 'deflate':
2743 gz = io.BytesIO(self.deflate(resp.read()))
2744 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2745 resp.msg = old_resp.msg
2746 del resp.headers['Content-encoding']
2747 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2748 # https://github.com/ytdl-org/youtube-dl/issues/6457).
2749 if 300 <= resp.code < 400:
2750 location = resp.headers.get('Location')
2751 if location:
2752 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2753 if sys.version_info >= (3, 0):
2754 location = location.encode('iso-8859-1').decode('utf-8')
2755 else:
2756 location = location.decode('utf-8')
2757 location_escaped = escape_url(location)
2758 if location != location_escaped:
2759 del resp.headers['Location']
2760 if sys.version_info < (3, 0):
2761 location_escaped = location_escaped.encode('utf-8')
2762 resp.headers['Location'] = location_escaped
2763 return resp
2764
2765 https_request = http_request
2766 https_response = http_response
2767
2768
2769 def make_socks_conn_class(base_class, socks_proxy):
2770 assert issubclass(base_class, (
2771 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2772
2773 url_components = compat_urlparse.urlparse(socks_proxy)
2774 if url_components.scheme.lower() == 'socks5':
2775 socks_type = ProxyType.SOCKS5
2776 elif url_components.scheme.lower() in ('socks', 'socks4'):
2777 socks_type = ProxyType.SOCKS4
2778 elif url_components.scheme.lower() == 'socks4a':
2779 socks_type = ProxyType.SOCKS4A
2780
2781 def unquote_if_non_empty(s):
2782 if not s:
2783 return s
2784 return compat_urllib_parse_unquote_plus(s)
2785
2786 proxy_args = (
2787 socks_type,
2788 url_components.hostname, url_components.port or 1080,
2789 True, # Remote DNS
2790 unquote_if_non_empty(url_components.username),
2791 unquote_if_non_empty(url_components.password),
2792 )
2793
2794 class SocksConnection(base_class):
2795 def connect(self):
2796 self.sock = sockssocket()
2797 self.sock.setproxy(*proxy_args)
2798 if type(self.timeout) in (int, float):
2799 self.sock.settimeout(self.timeout)
2800 self.sock.connect((self.host, self.port))
2801
2802 if isinstance(self, compat_http_client.HTTPSConnection):
2803 if hasattr(self, '_context'): # Python > 2.6
2804 self.sock = self._context.wrap_socket(
2805 self.sock, server_hostname=self.host)
2806 else:
2807 self.sock = ssl.wrap_socket(self.sock)
2808
2809 return SocksConnection
2810
2811
2812 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2813 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2814 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2815 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2816 self._params = params
2817
2818 def https_open(self, req):
2819 kwargs = {}
2820 conn_class = self._https_conn_class
2821
2822 if hasattr(self, '_context'): # python > 2.6
2823 kwargs['context'] = self._context
2824 if hasattr(self, '_check_hostname'): # python 3.x
2825 kwargs['check_hostname'] = self._check_hostname
2826
2827 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2828 if socks_proxy:
2829 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2830 del req.headers['Ytdl-socks-proxy']
2831
2832 return self.do_open(functools.partial(
2833 _create_http_connection, self, conn_class, True),
2834 req, **kwargs)
2835
2836
2837 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2838 """
2839 See [1] for cookie file format.
2840
2841 1. https://curl.haxx.se/docs/http-cookies.html
2842 """
2843 _HTTPONLY_PREFIX = '#HttpOnly_'
2844 _ENTRY_LEN = 7
2845 _HEADER = '''# Netscape HTTP Cookie File
2846 # This file is generated by yt-dlp. Do not edit.
2847
2848 '''
2849 _CookieFileEntry = collections.namedtuple(
2850 'CookieFileEntry',
2851 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2852
2853 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2854 """
2855 Save cookies to a file.
2856
2857 Most of the code is taken from CPython 3.8 and slightly adapted
2858 to support cookie files with UTF-8 in both python 2 and 3.
2859 """
2860 if filename is None:
2861 if self.filename is not None:
2862 filename = self.filename
2863 else:
2864 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2865
2866 # Store session cookies with `expires` set to 0 instead of an empty
2867 # string
2868 for cookie in self:
2869 if cookie.expires is None:
2870 cookie.expires = 0
2871
2872 with io.open(filename, 'w', encoding='utf-8') as f:
2873 f.write(self._HEADER)
2874 now = time.time()
2875 for cookie in self:
2876 if not ignore_discard and cookie.discard:
2877 continue
2878 if not ignore_expires and cookie.is_expired(now):
2879 continue
2880 if cookie.secure:
2881 secure = 'TRUE'
2882 else:
2883 secure = 'FALSE'
2884 if cookie.domain.startswith('.'):
2885 initial_dot = 'TRUE'
2886 else:
2887 initial_dot = 'FALSE'
2888 if cookie.expires is not None:
2889 expires = compat_str(cookie.expires)
2890 else:
2891 expires = ''
2892 if cookie.value is None:
2893 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2894 # with no name, whereas http.cookiejar regards it as a
2895 # cookie with no value.
2896 name = ''
2897 value = cookie.name
2898 else:
2899 name = cookie.name
2900 value = cookie.value
2901 f.write(
2902 '\t'.join([cookie.domain, initial_dot, cookie.path,
2903 secure, expires, name, value]) + '\n')
2904
2905 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2906 """Load cookies from a file."""
2907 if filename is None:
2908 if self.filename is not None:
2909 filename = self.filename
2910 else:
2911 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2912
2913 def prepare_line(line):
2914 if line.startswith(self._HTTPONLY_PREFIX):
2915 line = line[len(self._HTTPONLY_PREFIX):]
2916 # comments and empty lines are fine
2917 if line.startswith('#') or not line.strip():
2918 return line
2919 cookie_list = line.split('\t')
2920 if len(cookie_list) != self._ENTRY_LEN:
2921 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2922 cookie = self._CookieFileEntry(*cookie_list)
2923 if cookie.expires_at and not cookie.expires_at.isdigit():
2924 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2925 return line
2926
2927 cf = io.StringIO()
2928 with io.open(filename, encoding='utf-8') as f:
2929 for line in f:
2930 try:
2931 cf.write(prepare_line(line))
2932 except compat_cookiejar.LoadError as e:
2933 write_string(
2934 'WARNING: skipping cookie file entry due to %s: %r\n'
2935 % (e, line), sys.stderr)
2936 continue
2937 cf.seek(0)
2938 self._really_load(cf, filename, ignore_discard, ignore_expires)
2939 # Session cookies are denoted by either `expires` field set to
2940 # an empty string or 0. MozillaCookieJar only recognizes the former
2941 # (see [1]). So we need force the latter to be recognized as session
2942 # cookies on our own.
2943 # Session cookies may be important for cookies-based authentication,
2944 # e.g. usually, when user does not check 'Remember me' check box while
2945 # logging in on a site, some important cookies are stored as session
2946 # cookies so that not recognizing them will result in failed login.
2947 # 1. https://bugs.python.org/issue17164
2948 for cookie in self:
2949 # Treat `expires=0` cookies as session cookies
2950 if cookie.expires == 0:
2951 cookie.expires = None
2952 cookie.discard = True
2953
2954
2955 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2956 def __init__(self, cookiejar=None):
2957 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2958
2959 def http_response(self, request, response):
2960 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2961 # characters in Set-Cookie HTTP header of last response (see
2962 # https://github.com/ytdl-org/youtube-dl/issues/6769).
2963 # In order to at least prevent crashing we will percent encode Set-Cookie
2964 # header before HTTPCookieProcessor starts processing it.
2965 # if sys.version_info < (3, 0) and response.headers:
2966 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2967 # set_cookie = response.headers.get(set_cookie_header)
2968 # if set_cookie:
2969 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2970 # if set_cookie != set_cookie_escaped:
2971 # del response.headers[set_cookie_header]
2972 # response.headers[set_cookie_header] = set_cookie_escaped
2973 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2974
2975 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2976 https_response = http_response
2977
2978
2979 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2980 """YoutubeDL redirect handler
2981
2982 The code is based on HTTPRedirectHandler implementation from CPython [1].
2983
2984 This redirect handler solves two issues:
2985 - ensures redirect URL is always unicode under python 2
2986 - introduces support for experimental HTTP response status code
2987 308 Permanent Redirect [2] used by some sites [3]
2988
2989 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
2990 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
2991 3. https://github.com/ytdl-org/youtube-dl/issues/28768
2992 """
2993
2994 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
2995
2996 def redirect_request(self, req, fp, code, msg, headers, newurl):
2997 """Return a Request or None in response to a redirect.
2998
2999 This is called by the http_error_30x methods when a
3000 redirection response is received. If a redirection should
3001 take place, return a new Request to allow http_error_30x to
3002 perform the redirect. Otherwise, raise HTTPError if no-one
3003 else should try to handle this url. Return None if you can't
3004 but another Handler might.
3005 """
3006 m = req.get_method()
3007 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
3008 or code in (301, 302, 303) and m == "POST")):
3009 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
3010 # Strictly (according to RFC 2616), 301 or 302 in response to
3011 # a POST MUST NOT cause a redirection without confirmation
3012 # from the user (of urllib.request, in this case). In practice,
3013 # essentially all clients do redirect in this case, so we do
3014 # the same.
3015
3016 # On python 2 urlh.geturl() may sometimes return redirect URL
3017 # as byte string instead of unicode. This workaround allows
3018 # to force it always return unicode.
3019 if sys.version_info[0] < 3:
3020 newurl = compat_str(newurl)
3021
3022 # Be conciliant with URIs containing a space. This is mainly
3023 # redundant with the more complete encoding done in http_error_302(),
3024 # but it is kept for compatibility with other callers.
3025 newurl = newurl.replace(' ', '%20')
3026
3027 CONTENT_HEADERS = ("content-length", "content-type")
3028 # NB: don't use dict comprehension for python 2.6 compatibility
3029 newheaders = dict((k, v) for k, v in req.headers.items()
3030 if k.lower() not in CONTENT_HEADERS)
3031 return compat_urllib_request.Request(
3032 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3033 unverifiable=True)
3034
3035
3036 def extract_timezone(date_str):
3037 m = re.search(
3038 r'''(?x)
3039 ^.{8,}? # >=8 char non-TZ prefix, if present
3040 (?P<tz>Z| # just the UTC Z, or
3041 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
3042 (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
3043 [ ]? # optional space
3044 (?P<sign>\+|-) # +/-
3045 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
3046 $)
3047 ''', date_str)
3048 if not m:
3049 timezone = datetime.timedelta()
3050 else:
3051 date_str = date_str[:-len(m.group('tz'))]
3052 if not m.group('sign'):
3053 timezone = datetime.timedelta()
3054 else:
3055 sign = 1 if m.group('sign') == '+' else -1
3056 timezone = datetime.timedelta(
3057 hours=sign * int(m.group('hours')),
3058 minutes=sign * int(m.group('minutes')))
3059 return timezone, date_str
3060
3061
3062 def parse_iso8601(date_str, delimiter='T', timezone=None):
3063 """ Return a UNIX timestamp from the given date """
3064
3065 if date_str is None:
3066 return None
3067
3068 date_str = re.sub(r'\.[0-9]+', '', date_str)
3069
3070 if timezone is None:
3071 timezone, date_str = extract_timezone(date_str)
3072
3073 try:
3074 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3075 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3076 return calendar.timegm(dt.timetuple())
3077 except ValueError:
3078 pass
3079
3080
3081 def date_formats(day_first=True):
3082 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3083
3084
3085 def unified_strdate(date_str, day_first=True):
3086 """Return a string with the date in the format YYYYMMDD"""
3087
3088 if date_str is None:
3089 return None
3090 upload_date = None
3091 # Replace commas
3092 date_str = date_str.replace(',', ' ')
3093 # Remove AM/PM + timezone
3094 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3095 _, date_str = extract_timezone(date_str)
3096
3097 for expression in date_formats(day_first):
3098 try:
3099 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
3100 except ValueError:
3101 pass
3102 if upload_date is None:
3103 timetuple = email.utils.parsedate_tz(date_str)
3104 if timetuple:
3105 try:
3106 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3107 except ValueError:
3108 pass
3109 if upload_date is not None:
3110 return compat_str(upload_date)
3111
3112
3113 def unified_timestamp(date_str, day_first=True):
3114 if date_str is None:
3115 return None
3116
3117 date_str = re.sub(r'[,|]', '', date_str)
3118
3119 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
3120 timezone, date_str = extract_timezone(date_str)
3121
3122 # Remove AM/PM + timezone
3123 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3124
3125 # Remove unrecognized timezones from ISO 8601 alike timestamps
3126 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3127 if m:
3128 date_str = date_str[:-len(m.group('tz'))]
3129
3130 # Python only supports microseconds, so remove nanoseconds
3131 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3132 if m:
3133 date_str = m.group(1)
3134
3135 for expression in date_formats(day_first):
3136 try:
3137 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3138 return calendar.timegm(dt.timetuple())
3139 except ValueError:
3140 pass
3141 timetuple = email.utils.parsedate_tz(date_str)
3142 if timetuple:
3143 return calendar.timegm(timetuple) + pm_delta * 3600
3144
3145
3146 def determine_ext(url, default_ext='unknown_video'):
3147 if url is None or '.' not in url:
3148 return default_ext
3149 guess = url.partition('?')[0].rpartition('.')[2]
3150 if re.match(r'^[A-Za-z0-9]+$', guess):
3151 return guess
3152 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3153 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3154 return guess.rstrip('/')
3155 else:
3156 return default_ext
3157
3158
3159 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3160 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3161
3162
3163 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
3164 """
3165 Return a datetime object from a string in the format YYYYMMDD or
3166 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3167
3168 format: string date format used to return datetime object from
3169 precision: round the time portion of a datetime object.
3170 auto|microsecond|second|minute|hour|day.
3171 auto: round to the unit provided in date_str (if applicable).
3172 """
3173 auto_precision = False
3174 if precision == 'auto':
3175 auto_precision = True
3176 precision = 'microsecond'
3177 today = datetime_round(datetime.datetime.now(), precision)
3178 if date_str in ('now', 'today'):
3179 return today
3180 if date_str == 'yesterday':
3181 return today - datetime.timedelta(days=1)
3182 match = re.match(
3183 r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3184 date_str)
3185 if match is not None:
3186 start_time = datetime_from_str(match.group('start'), precision, format)
3187 time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
3188 unit = match.group('unit')
3189 if unit == 'month' or unit == 'year':
3190 new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
3191 unit = 'day'
3192 else:
3193 if unit == 'week':
3194 unit = 'day'
3195 time *= 7
3196 delta = datetime.timedelta(**{unit + 's': time})
3197 new_date = start_time + delta
3198 if auto_precision:
3199 return datetime_round(new_date, unit)
3200 return new_date
3201
3202 return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3203
3204
3205 def date_from_str(date_str, format='%Y%m%d'):
3206 """
3207 Return a datetime object from a string in the format YYYYMMDD or
3208 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3209
3210 format: string date format used to return datetime object from
3211 """
3212 return datetime_from_str(date_str, precision='microsecond', format=format).date()
3213
3214
3215 def datetime_add_months(dt, months):
3216 """Increment/Decrement a datetime object by months."""
3217 month = dt.month + months - 1
3218 year = dt.year + month // 12
3219 month = month % 12 + 1
3220 day = min(dt.day, calendar.monthrange(year, month)[1])
3221 return dt.replace(year, month, day)
3222
3223
3224 def datetime_round(dt, precision='day'):
3225 """
3226 Round a datetime object's time to a specific precision
3227 """
3228 if precision == 'microsecond':
3229 return dt
3230
3231 unit_seconds = {
3232 'day': 86400,
3233 'hour': 3600,
3234 'minute': 60,
3235 'second': 1,
3236 }
3237 roundto = lambda x, n: ((x + n / 2) // n) * n
3238 timestamp = calendar.timegm(dt.timetuple())
3239 return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
3240
3241
3242 def hyphenate_date(date_str):
3243 """
3244 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3245 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3246 if match is not None:
3247 return '-'.join(match.groups())
3248 else:
3249 return date_str
3250
3251
3252 class DateRange(object):
3253 """Represents a time interval between two dates"""
3254
3255 def __init__(self, start=None, end=None):
3256 """start and end must be strings in the format accepted by date"""
3257 if start is not None:
3258 self.start = date_from_str(start)
3259 else:
3260 self.start = datetime.datetime.min.date()
3261 if end is not None:
3262 self.end = date_from_str(end)
3263 else:
3264 self.end = datetime.datetime.max.date()
3265 if self.start > self.end:
3266 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3267
3268 @classmethod
3269 def day(cls, day):
3270 """Returns a range that only contains the given day"""
3271 return cls(day, day)
3272
3273 def __contains__(self, date):
3274 """Check if the date is in the range"""
3275 if not isinstance(date, datetime.date):
3276 date = date_from_str(date)
3277 return self.start <= date <= self.end
3278
3279 def __str__(self):
3280 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3281
3282
3283 def platform_name():
3284 """ Returns the platform name as a compat_str """
3285 res = platform.platform()
3286 if isinstance(res, bytes):
3287 res = res.decode(preferredencoding())
3288
3289 assert isinstance(res, compat_str)
3290 return res
3291
3292
3293 def _windows_write_string(s, out):
3294 """ Returns True if the string was written using special methods,
3295 False if it has yet to be written out."""
3296 # Adapted from http://stackoverflow.com/a/3259271/35070
3297
3298 import ctypes
3299 import ctypes.wintypes
3300
3301 WIN_OUTPUT_IDS = {
3302 1: -11,
3303 2: -12,
3304 }
3305
3306 try:
3307 fileno = out.fileno()
3308 except AttributeError:
3309 # If the output stream doesn't have a fileno, it's virtual
3310 return False
3311 except io.UnsupportedOperation:
3312 # Some strange Windows pseudo files?
3313 return False
3314 if fileno not in WIN_OUTPUT_IDS:
3315 return False
3316
3317 GetStdHandle = compat_ctypes_WINFUNCTYPE(
3318 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3319 ('GetStdHandle', ctypes.windll.kernel32))
3320 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3321
3322 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3323 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3324 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3325 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3326 written = ctypes.wintypes.DWORD(0)
3327
3328 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3329 FILE_TYPE_CHAR = 0x0002
3330 FILE_TYPE_REMOTE = 0x8000
3331 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3332 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3333 ctypes.POINTER(ctypes.wintypes.DWORD))(
3334 ('GetConsoleMode', ctypes.windll.kernel32))
3335 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3336
3337 def not_a_console(handle):
3338 if handle == INVALID_HANDLE_VALUE or handle is None:
3339 return True
3340 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3341 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3342
3343 if not_a_console(h):
3344 return False
3345
3346 def next_nonbmp_pos(s):
3347 try:
3348 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3349 except StopIteration:
3350 return len(s)
3351
3352 while s:
3353 count = min(next_nonbmp_pos(s), 1024)
3354
3355 ret = WriteConsoleW(
3356 h, s, count if count else 2, ctypes.byref(written), None)
3357 if ret == 0:
3358 raise OSError('Failed to write string')
3359 if not count: # We just wrote a non-BMP character
3360 assert written.value == 2
3361 s = s[1:]
3362 else:
3363 assert written.value > 0
3364 s = s[written.value:]
3365 return True
3366
3367
3368 def write_string(s, out=None, encoding=None):
3369 if out is None:
3370 out = sys.stderr
3371 assert type(s) == compat_str
3372
3373 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3374 if _windows_write_string(s, out):
3375 return
3376
3377 if ('b' in getattr(out, 'mode', '')
3378 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
3379 byt = s.encode(encoding or preferredencoding(), 'ignore')
3380 out.write(byt)
3381 elif hasattr(out, 'buffer'):
3382 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3383 byt = s.encode(enc, 'ignore')
3384 out.buffer.write(byt)
3385 else:
3386 out.write(s)
3387 out.flush()
3388
3389
3390 def bytes_to_intlist(bs):
3391 if not bs:
3392 return []
3393 if isinstance(bs[0], int): # Python 3
3394 return list(bs)
3395 else:
3396 return [ord(c) for c in bs]
3397
3398
3399 def intlist_to_bytes(xs):
3400 if not xs:
3401 return b''
3402 return compat_struct_pack('%dB' % len(xs), *xs)
3403
3404
3405 # Cross-platform file locking
3406 if sys.platform == 'win32':
3407 import ctypes.wintypes
3408 import msvcrt
3409
3410 class OVERLAPPED(ctypes.Structure):
3411 _fields_ = [
3412 ('Internal', ctypes.wintypes.LPVOID),
3413 ('InternalHigh', ctypes.wintypes.LPVOID),
3414 ('Offset', ctypes.wintypes.DWORD),
3415 ('OffsetHigh', ctypes.wintypes.DWORD),
3416 ('hEvent', ctypes.wintypes.HANDLE),
3417 ]
3418
3419 kernel32 = ctypes.windll.kernel32
3420 LockFileEx = kernel32.LockFileEx
3421 LockFileEx.argtypes = [
3422 ctypes.wintypes.HANDLE, # hFile
3423 ctypes.wintypes.DWORD, # dwFlags
3424 ctypes.wintypes.DWORD, # dwReserved
3425 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3426 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3427 ctypes.POINTER(OVERLAPPED) # Overlapped
3428 ]
3429 LockFileEx.restype = ctypes.wintypes.BOOL
3430 UnlockFileEx = kernel32.UnlockFileEx
3431 UnlockFileEx.argtypes = [
3432 ctypes.wintypes.HANDLE, # hFile
3433 ctypes.wintypes.DWORD, # dwReserved
3434 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3435 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3436 ctypes.POINTER(OVERLAPPED) # Overlapped
3437 ]
3438 UnlockFileEx.restype = ctypes.wintypes.BOOL
3439 whole_low = 0xffffffff
3440 whole_high = 0x7fffffff
3441
3442 def _lock_file(f, exclusive):
3443 overlapped = OVERLAPPED()
3444 overlapped.Offset = 0
3445 overlapped.OffsetHigh = 0
3446 overlapped.hEvent = 0
3447 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3448 handle = msvcrt.get_osfhandle(f.fileno())
3449 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3450 whole_low, whole_high, f._lock_file_overlapped_p):
3451 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3452
3453 def _unlock_file(f):
3454 assert f._lock_file_overlapped_p
3455 handle = msvcrt.get_osfhandle(f.fileno())
3456 if not UnlockFileEx(handle, 0,
3457 whole_low, whole_high, f._lock_file_overlapped_p):
3458 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3459
3460 else:
3461 # Some platforms, such as Jython, is missing fcntl
3462 try:
3463 import fcntl
3464
3465 def _lock_file(f, exclusive):
3466 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3467
3468 def _unlock_file(f):
3469 fcntl.flock(f, fcntl.LOCK_UN)
3470 except ImportError:
3471 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3472
3473 def _lock_file(f, exclusive):
3474 raise IOError(UNSUPPORTED_MSG)
3475
3476 def _unlock_file(f):
3477 raise IOError(UNSUPPORTED_MSG)
3478
3479
3480 class locked_file(object):
3481 def __init__(self, filename, mode, encoding=None):
3482 assert mode in ['r', 'a', 'w']
3483 self.f = io.open(filename, mode, encoding=encoding)
3484 self.mode = mode
3485
3486 def __enter__(self):
3487 exclusive = self.mode != 'r'
3488 try:
3489 _lock_file(self.f, exclusive)
3490 except IOError:
3491 self.f.close()
3492 raise
3493 return self
3494
3495 def __exit__(self, etype, value, traceback):
3496 try:
3497 _unlock_file(self.f)
3498 finally:
3499 self.f.close()
3500
3501 def __iter__(self):
3502 return iter(self.f)
3503
3504 def write(self, *args):
3505 return self.f.write(*args)
3506
3507 def read(self, *args):
3508 return self.f.read(*args)
3509
3510
3511 def get_filesystem_encoding():
3512 encoding = sys.getfilesystemencoding()
3513 return encoding if encoding is not None else 'utf-8'
3514
3515
3516 def shell_quote(args):
3517 quoted_args = []
3518 encoding = get_filesystem_encoding()
3519 for a in args:
3520 if isinstance(a, bytes):
3521 # We may get a filename encoded with 'encodeFilename'
3522 a = a.decode(encoding)
3523 quoted_args.append(compat_shlex_quote(a))
3524 return ' '.join(quoted_args)
3525
3526
3527 def smuggle_url(url, data):
3528 """ Pass additional data in a URL for internal use. """
3529
3530 url, idata = unsmuggle_url(url, {})
3531 data.update(idata)
3532 sdata = compat_urllib_parse_urlencode(
3533 {'__youtubedl_smuggle': json.dumps(data)})
3534 return url + '#' + sdata
3535
3536
3537 def unsmuggle_url(smug_url, default=None):
3538 if '#__youtubedl_smuggle' not in smug_url:
3539 return smug_url, default
3540 url, _, sdata = smug_url.rpartition('#')
3541 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3542 data = json.loads(jsond)
3543 return url, data
3544
3545
3546 def format_bytes(bytes):
3547 if bytes is None:
3548 return 'N/A'
3549 if type(bytes) is str:
3550 bytes = float(bytes)
3551 if bytes == 0.0:
3552 exponent = 0
3553 else:
3554 exponent = int(math.log(bytes, 1024.0))
3555 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3556 converted = float(bytes) / float(1024 ** exponent)
3557 return '%.2f%s' % (converted, suffix)
3558
3559
3560 def lookup_unit_table(unit_table, s):
3561 units_re = '|'.join(re.escape(u) for u in unit_table)
3562 m = re.match(
3563 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3564 if not m:
3565 return None
3566 num_str = m.group('num').replace(',', '.')
3567 mult = unit_table[m.group('unit')]
3568 return int(float(num_str) * mult)
3569
3570
3571 def parse_filesize(s):
3572 if s is None:
3573 return None
3574
3575 # The lower-case forms are of course incorrect and unofficial,
3576 # but we support those too
3577 _UNIT_TABLE = {
3578 'B': 1,
3579 'b': 1,
3580 'bytes': 1,
3581 'KiB': 1024,
3582 'KB': 1000,
3583 'kB': 1024,
3584 'Kb': 1000,
3585 'kb': 1000,
3586 'kilobytes': 1000,
3587 'kibibytes': 1024,
3588 'MiB': 1024 ** 2,
3589 'MB': 1000 ** 2,
3590 'mB': 1024 ** 2,
3591 'Mb': 1000 ** 2,
3592 'mb': 1000 ** 2,
3593 'megabytes': 1000 ** 2,
3594 'mebibytes': 1024 ** 2,
3595 'GiB': 1024 ** 3,
3596 'GB': 1000 ** 3,
3597 'gB': 1024 ** 3,
3598 'Gb': 1000 ** 3,
3599 'gb': 1000 ** 3,
3600 'gigabytes': 1000 ** 3,
3601 'gibibytes': 1024 ** 3,
3602 'TiB': 1024 ** 4,
3603 'TB': 1000 ** 4,
3604 'tB': 1024 ** 4,
3605 'Tb': 1000 ** 4,
3606 'tb': 1000 ** 4,
3607 'terabytes': 1000 ** 4,
3608 'tebibytes': 1024 ** 4,
3609 'PiB': 1024 ** 5,
3610 'PB': 1000 ** 5,
3611 'pB': 1024 ** 5,
3612 'Pb': 1000 ** 5,
3613 'pb': 1000 ** 5,
3614 'petabytes': 1000 ** 5,
3615 'pebibytes': 1024 ** 5,
3616 'EiB': 1024 ** 6,
3617 'EB': 1000 ** 6,
3618 'eB': 1024 ** 6,
3619 'Eb': 1000 ** 6,
3620 'eb': 1000 ** 6,
3621 'exabytes': 1000 ** 6,
3622 'exbibytes': 1024 ** 6,
3623 'ZiB': 1024 ** 7,
3624 'ZB': 1000 ** 7,
3625 'zB': 1024 ** 7,
3626 'Zb': 1000 ** 7,
3627 'zb': 1000 ** 7,
3628 'zettabytes': 1000 ** 7,
3629 'zebibytes': 1024 ** 7,
3630 'YiB': 1024 ** 8,
3631 'YB': 1000 ** 8,
3632 'yB': 1024 ** 8,
3633 'Yb': 1000 ** 8,
3634 'yb': 1000 ** 8,
3635 'yottabytes': 1000 ** 8,
3636 'yobibytes': 1024 ** 8,
3637 }
3638
3639 return lookup_unit_table(_UNIT_TABLE, s)
3640
3641
3642 def parse_count(s):
3643 if s is None:
3644 return None
3645
3646 s = s.strip()
3647
3648 if re.match(r'^[\d,.]+$', s):
3649 return str_to_int(s)
3650
3651 _UNIT_TABLE = {
3652 'k': 1000,
3653 'K': 1000,
3654 'm': 1000 ** 2,
3655 'M': 1000 ** 2,
3656 'kk': 1000 ** 2,
3657 'KK': 1000 ** 2,
3658 }
3659
3660 return lookup_unit_table(_UNIT_TABLE, s)
3661
3662
3663 def parse_resolution(s):
3664 if s is None:
3665 return {}
3666
3667 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3668 if mobj:
3669 return {
3670 'width': int(mobj.group('w')),
3671 'height': int(mobj.group('h')),
3672 }
3673
3674 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3675 if mobj:
3676 return {'height': int(mobj.group(1))}
3677
3678 mobj = re.search(r'\b([48])[kK]\b', s)
3679 if mobj:
3680 return {'height': int(mobj.group(1)) * 540}
3681
3682 return {}
3683
3684
3685 def parse_bitrate(s):
3686 if not isinstance(s, compat_str):
3687 return
3688 mobj = re.search(r'\b(\d+)\s*kbps', s)
3689 if mobj:
3690 return int(mobj.group(1))
3691
3692
3693 def month_by_name(name, lang='en'):
3694 """ Return the number of a month by (locale-independently) English name """
3695
3696 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3697
3698 try:
3699 return month_names.index(name) + 1
3700 except ValueError:
3701 return None
3702
3703
3704 def month_by_abbreviation(abbrev):
3705 """ Return the number of a month by (locale-independently) English
3706 abbreviations """
3707
3708 try:
3709 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3710 except ValueError:
3711 return None
3712
3713
3714 def fix_xml_ampersands(xml_str):
3715 """Replace all the '&' by '&amp;' in XML"""
3716 return re.sub(
3717 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3718 '&amp;',
3719 xml_str)
3720
3721
3722 def setproctitle(title):
3723 assert isinstance(title, compat_str)
3724
3725 # ctypes in Jython is not complete
3726 # http://bugs.jython.org/issue2148
3727 if sys.platform.startswith('java'):
3728 return
3729
3730 try:
3731 libc = ctypes.cdll.LoadLibrary('libc.so.6')
3732 except OSError:
3733 return
3734 except TypeError:
3735 # LoadLibrary in Windows Python 2.7.13 only expects
3736 # a bytestring, but since unicode_literals turns
3737 # every string into a unicode string, it fails.
3738 return
3739 title_bytes = title.encode('utf-8')
3740 buf = ctypes.create_string_buffer(len(title_bytes))
3741 buf.value = title_bytes
3742 try:
3743 libc.prctl(15, buf, 0, 0, 0)
3744 except AttributeError:
3745 return # Strange libc, just skip this
3746
3747
3748 def remove_start(s, start):
3749 return s[len(start):] if s is not None and s.startswith(start) else s
3750
3751
3752 def remove_end(s, end):
3753 return s[:-len(end)] if s is not None and s.endswith(end) else s
3754
3755
3756 def remove_quotes(s):
3757 if s is None or len(s) < 2:
3758 return s
3759 for quote in ('"', "'", ):
3760 if s[0] == quote and s[-1] == quote:
3761 return s[1:-1]
3762 return s
3763
3764
3765 def get_domain(url):
3766 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3767 return domain.group('domain') if domain else None
3768
3769
3770 def url_basename(url):
3771 path = compat_urlparse.urlparse(url).path
3772 return path.strip('/').split('/')[-1]
3773
3774
3775 def base_url(url):
3776 return re.match(r'https?://[^?#&]+/', url).group()
3777
3778
3779 def urljoin(base, path):
3780 if isinstance(path, bytes):
3781 path = path.decode('utf-8')
3782 if not isinstance(path, compat_str) or not path:
3783 return None
3784 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3785 return path
3786 if isinstance(base, bytes):
3787 base = base.decode('utf-8')
3788 if not isinstance(base, compat_str) or not re.match(
3789 r'^(?:https?:)?//', base):
3790 return None
3791 return compat_urlparse.urljoin(base, path)
3792
3793
3794 class HEADRequest(compat_urllib_request.Request):
3795 def get_method(self):
3796 return 'HEAD'
3797
3798
3799 class PUTRequest(compat_urllib_request.Request):
3800 def get_method(self):
3801 return 'PUT'
3802
3803
3804 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3805 if get_attr:
3806 if v is not None:
3807 v = getattr(v, get_attr, None)
3808 if v == '':
3809 v = None
3810 if v is None:
3811 return default
3812 try:
3813 return int(v) * invscale // scale
3814 except (ValueError, TypeError):
3815 return default
3816
3817
3818 def str_or_none(v, default=None):
3819 return default if v is None else compat_str(v)
3820
3821
3822 def str_to_int(int_str):
3823 """ A more relaxed version of int_or_none """
3824 if isinstance(int_str, compat_integer_types):
3825 return int_str
3826 elif isinstance(int_str, compat_str):
3827 int_str = re.sub(r'[,\.\+]', '', int_str)
3828 return int_or_none(int_str)
3829
3830
3831 def float_or_none(v, scale=1, invscale=1, default=None):
3832 if v is None:
3833 return default
3834 try:
3835 return float(v) * invscale / scale
3836 except (ValueError, TypeError):
3837 return default
3838
3839
3840 def bool_or_none(v, default=None):
3841 return v if isinstance(v, bool) else default
3842
3843
3844 def strip_or_none(v, default=None):
3845 return v.strip() if isinstance(v, compat_str) else default
3846
3847
3848 def url_or_none(url):
3849 if not url or not isinstance(url, compat_str):
3850 return None
3851 url = url.strip()
3852 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3853
3854
3855 def strftime_or_none(timestamp, date_format, default=None):
3856 datetime_object = None
3857 try:
3858 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3859 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3860 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3861 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3862 return datetime_object.strftime(date_format)
3863 except (ValueError, TypeError, AttributeError):
3864 return default
3865
3866
3867 def parse_duration(s):
3868 if not isinstance(s, compat_basestring):
3869 return None
3870
3871 s = s.strip()
3872
3873 days, hours, mins, secs, ms = [None] * 5
3874 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3875 if m:
3876 days, hours, mins, secs, ms = m.groups()
3877 else:
3878 m = re.match(
3879 r'''(?ix)(?:P?
3880 (?:
3881 [0-9]+\s*y(?:ears?)?\s*
3882 )?
3883 (?:
3884 [0-9]+\s*m(?:onths?)?\s*
3885 )?
3886 (?:
3887 [0-9]+\s*w(?:eeks?)?\s*
3888 )?
3889 (?:
3890 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3891 )?
3892 T)?
3893 (?:
3894 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3895 )?
3896 (?:
3897 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3898 )?
3899 (?:
3900 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3901 )?Z?$''', s)
3902 if m:
3903 days, hours, mins, secs, ms = m.groups()
3904 else:
3905 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3906 if m:
3907 hours, mins = m.groups()
3908 else:
3909 return None
3910
3911 duration = 0
3912 if secs:
3913 duration += float(secs)
3914 if mins:
3915 duration += float(mins) * 60
3916 if hours:
3917 duration += float(hours) * 60 * 60
3918 if days:
3919 duration += float(days) * 24 * 60 * 60
3920 if ms:
3921 duration += float(ms)
3922 return duration
3923
3924
3925 def prepend_extension(filename, ext, expected_real_ext=None):
3926 name, real_ext = os.path.splitext(filename)
3927 return (
3928 '{0}.{1}{2}'.format(name, ext, real_ext)
3929 if not expected_real_ext or real_ext[1:] == expected_real_ext
3930 else '{0}.{1}'.format(filename, ext))
3931
3932
3933 def replace_extension(filename, ext, expected_real_ext=None):
3934 name, real_ext = os.path.splitext(filename)
3935 return '{0}.{1}'.format(
3936 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3937 ext)
3938
3939
3940 def check_executable(exe, args=[]):
3941 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3942 args can be a list of arguments for a short output (like -version) """
3943 try:
3944 process_communicate_or_kill(subprocess.Popen(
3945 [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
3946 except OSError:
3947 return False
3948 return exe
3949
3950
3951 def get_exe_version(exe, args=['--version'],
3952 version_re=None, unrecognized='present'):
3953 """ Returns the version of the specified executable,
3954 or False if the executable is not present """
3955 try:
3956 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3957 # SIGTTOU if yt-dlp is run in the background.
3958 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3959 out, _ = process_communicate_or_kill(subprocess.Popen(
3960 [encodeArgument(exe)] + args,
3961 stdin=subprocess.PIPE,
3962 stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
3963 except OSError:
3964 return False
3965 if isinstance(out, bytes): # Python 2.x
3966 out = out.decode('ascii', 'ignore')
3967 return detect_exe_version(out, version_re, unrecognized)
3968
3969
3970 def detect_exe_version(output, version_re=None, unrecognized='present'):
3971 assert isinstance(output, compat_str)
3972 if version_re is None:
3973 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3974 m = re.search(version_re, output)
3975 if m:
3976 return m.group(1)
3977 else:
3978 return unrecognized
3979
3980
3981 class LazyList(collections.abc.Sequence):
3982 ''' Lazy immutable list from an iterable
3983 Note that slices of a LazyList are lists and not LazyList'''
3984
3985 class IndexError(IndexError):
3986 pass
3987
3988 def __init__(self, iterable):
3989 self.__iterable = iter(iterable)
3990 self.__cache = []
3991 self.__reversed = False
3992
3993 def __iter__(self):
3994 if self.__reversed:
3995 # We need to consume the entire iterable to iterate in reverse
3996 yield from self.exhaust()
3997 return
3998 yield from self.__cache
3999 for item in self.__iterable:
4000 self.__cache.append(item)
4001 yield item
4002
4003 def __exhaust(self):
4004 self.__cache.extend(self.__iterable)
4005 return self.__cache
4006
4007 def exhaust(self):
4008 ''' Evaluate the entire iterable '''
4009 return self.__exhaust()[::-1 if self.__reversed else 1]
4010
4011 @staticmethod
4012 def __reverse_index(x):
4013 return None if x is None else -(x + 1)
4014
4015 def __getitem__(self, idx):
4016 if isinstance(idx, slice):
4017 if self.__reversed:
4018 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
4019 start, stop, step = idx.start, idx.stop, idx.step or 1
4020 elif isinstance(idx, int):
4021 if self.__reversed:
4022 idx = self.__reverse_index(idx)
4023 start, stop, step = idx, idx, 0
4024 else:
4025 raise TypeError('indices must be integers or slices')
4026 if ((start or 0) < 0 or (stop or 0) < 0
4027 or (start is None and step < 0)
4028 or (stop is None and step > 0)):
4029 # We need to consume the entire iterable to be able to slice from the end
4030 # Obviously, never use this with infinite iterables
4031 self.__exhaust()
4032 try:
4033 return self.__cache[idx]
4034 except IndexError as e:
4035 raise self.IndexError(e) from e
4036 n = max(start or 0, stop or 0) - len(self.__cache) + 1
4037 if n > 0:
4038 self.__cache.extend(itertools.islice(self.__iterable, n))
4039 try:
4040 return self.__cache[idx]
4041 except IndexError as e:
4042 raise self.IndexError(e) from e
4043
4044 def __bool__(self):
4045 try:
4046 self[-1] if self.__reversed else self[0]
4047 except self.IndexError:
4048 return False
4049 return True
4050
4051 def __len__(self):
4052 self.__exhaust()
4053 return len(self.__cache)
4054
4055 def reverse(self):
4056 self.__reversed = not self.__reversed
4057 return self
4058
4059 def __repr__(self):
4060 # repr and str should mimic a list. So we exhaust the iterable
4061 return repr(self.exhaust())
4062
4063 def __str__(self):
4064 return repr(self.exhaust())
4065
4066
4067 class PagedList:
4068 def __len__(self):
4069 # This is only useful for tests
4070 return len(self.getslice())
4071
4072 def __init__(self, pagefunc, pagesize, use_cache=True):
4073 self._pagefunc = pagefunc
4074 self._pagesize = pagesize
4075 self._use_cache = use_cache
4076 self._cache = {}
4077
4078 def getpage(self, pagenum):
4079 page_results = self._cache.get(pagenum) or list(self._pagefunc(pagenum))
4080 if self._use_cache:
4081 self._cache[pagenum] = page_results
4082 return page_results
4083
4084 def getslice(self, start=0, end=None):
4085 return list(self._getslice(start, end))
4086
4087 def _getslice(self, start, end):
4088 raise NotImplementedError('This method must be implemented by subclasses')
4089
4090 def __getitem__(self, idx):
4091 # NOTE: cache must be enabled if this is used
4092 if not isinstance(idx, int) or idx < 0:
4093 raise TypeError('indices must be non-negative integers')
4094 entries = self.getslice(idx, idx + 1)
4095 return entries[0] if entries else None
4096
4097
4098 class OnDemandPagedList(PagedList):
4099 def _getslice(self, start, end):
4100 for pagenum in itertools.count(start // self._pagesize):
4101 firstid = pagenum * self._pagesize
4102 nextfirstid = pagenum * self._pagesize + self._pagesize
4103 if start >= nextfirstid:
4104 continue
4105
4106 startv = (
4107 start % self._pagesize
4108 if firstid <= start < nextfirstid
4109 else 0)
4110 endv = (
4111 ((end - 1) % self._pagesize) + 1
4112 if (end is not None and firstid <= end <= nextfirstid)
4113 else None)
4114
4115 page_results = self.getpage(pagenum)
4116 if startv != 0 or endv is not None:
4117 page_results = page_results[startv:endv]
4118 yield from page_results
4119
4120 # A little optimization - if current page is not "full", ie. does
4121 # not contain page_size videos then we can assume that this page
4122 # is the last one - there are no more ids on further pages -
4123 # i.e. no need to query again.
4124 if len(page_results) + startv < self._pagesize:
4125 break
4126
4127 # If we got the whole page, but the next page is not interesting,
4128 # break out early as well
4129 if end == nextfirstid:
4130 break
4131
4132
4133 class InAdvancePagedList(PagedList):
4134 def __init__(self, pagefunc, pagecount, pagesize):
4135 self._pagecount = pagecount
4136 PagedList.__init__(self, pagefunc, pagesize, True)
4137
4138 def _getslice(self, start, end):
4139 start_page = start // self._pagesize
4140 end_page = (
4141 self._pagecount if end is None else (end // self._pagesize + 1))
4142 skip_elems = start - start_page * self._pagesize
4143 only_more = None if end is None else end - start
4144 for pagenum in range(start_page, end_page):
4145 page_results = self.getpage(pagenum)
4146 if skip_elems:
4147 page_results = page_results[skip_elems:]
4148 skip_elems = None
4149 if only_more is not None:
4150 if len(page_results) < only_more:
4151 only_more -= len(page_results)
4152 else:
4153 yield from page_results[:only_more]
4154 break
4155 yield from page_results
4156
4157
4158 def uppercase_escape(s):
4159 unicode_escape = codecs.getdecoder('unicode_escape')
4160 return re.sub(
4161 r'\\U[0-9a-fA-F]{8}',
4162 lambda m: unicode_escape(m.group(0))[0],
4163 s)
4164
4165
4166 def lowercase_escape(s):
4167 unicode_escape = codecs.getdecoder('unicode_escape')
4168 return re.sub(
4169 r'\\u[0-9a-fA-F]{4}',
4170 lambda m: unicode_escape(m.group(0))[0],
4171 s)
4172
4173
4174 def escape_rfc3986(s):
4175 """Escape non-ASCII characters as suggested by RFC 3986"""
4176 if sys.version_info < (3, 0) and isinstance(s, compat_str):
4177 s = s.encode('utf-8')
4178 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
4179
4180
4181 def escape_url(url):
4182 """Escape URL as suggested by RFC 3986"""
4183 url_parsed = compat_urllib_parse_urlparse(url)
4184 return url_parsed._replace(
4185 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
4186 path=escape_rfc3986(url_parsed.path),
4187 params=escape_rfc3986(url_parsed.params),
4188 query=escape_rfc3986(url_parsed.query),
4189 fragment=escape_rfc3986(url_parsed.fragment)
4190 ).geturl()
4191
4192
4193 def parse_qs(url):
4194 return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4195
4196
4197 def read_batch_urls(batch_fd):
4198 def fixup(url):
4199 if not isinstance(url, compat_str):
4200 url = url.decode('utf-8', 'replace')
4201 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4202 for bom in BOM_UTF8:
4203 if url.startswith(bom):
4204 url = url[len(bom):]
4205 url = url.lstrip()
4206 if not url or url.startswith(('#', ';', ']')):
4207 return False
4208 # "#" cannot be stripped out since it is part of the URI
4209 # However, it can be safely stipped out if follwing a whitespace
4210 return re.split(r'\s#', url, 1)[0].rstrip()
4211
4212 with contextlib.closing(batch_fd) as fd:
4213 return [url for url in map(fixup, fd) if url]
4214
4215
4216 def urlencode_postdata(*args, **kargs):
4217 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
4218
4219
4220 def update_url_query(url, query):
4221 if not query:
4222 return url
4223 parsed_url = compat_urlparse.urlparse(url)
4224 qs = compat_parse_qs(parsed_url.query)
4225 qs.update(query)
4226 return compat_urlparse.urlunparse(parsed_url._replace(
4227 query=compat_urllib_parse_urlencode(qs, True)))
4228
4229
4230 def update_Request(req, url=None, data=None, headers={}, query={}):
4231 req_headers = req.headers.copy()
4232 req_headers.update(headers)
4233 req_data = data or req.data
4234 req_url = update_url_query(url or req.get_full_url(), query)
4235 req_get_method = req.get_method()
4236 if req_get_method == 'HEAD':
4237 req_type = HEADRequest
4238 elif req_get_method == 'PUT':
4239 req_type = PUTRequest
4240 else:
4241 req_type = compat_urllib_request.Request
4242 new_req = req_type(
4243 req_url, data=req_data, headers=req_headers,
4244 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4245 if hasattr(req, 'timeout'):
4246 new_req.timeout = req.timeout
4247 return new_req
4248
4249
4250 def _multipart_encode_impl(data, boundary):
4251 content_type = 'multipart/form-data; boundary=%s' % boundary
4252
4253 out = b''
4254 for k, v in data.items():
4255 out += b'--' + boundary.encode('ascii') + b'\r\n'
4256 if isinstance(k, compat_str):
4257 k = k.encode('utf-8')
4258 if isinstance(v, compat_str):
4259 v = v.encode('utf-8')
4260 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4261 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4262 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
4263 if boundary.encode('ascii') in content:
4264 raise ValueError('Boundary overlaps with data')
4265 out += content
4266
4267 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4268
4269 return out, content_type
4270
4271
4272 def multipart_encode(data, boundary=None):
4273 '''
4274 Encode a dict to RFC 7578-compliant form-data
4275
4276 data:
4277 A dict where keys and values can be either Unicode or bytes-like
4278 objects.
4279 boundary:
4280 If specified a Unicode object, it's used as the boundary. Otherwise
4281 a random boundary is generated.
4282
4283 Reference: https://tools.ietf.org/html/rfc7578
4284 '''
4285 has_specified_boundary = boundary is not None
4286
4287 while True:
4288 if boundary is None:
4289 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4290
4291 try:
4292 out, content_type = _multipart_encode_impl(data, boundary)
4293 break
4294 except ValueError:
4295 if has_specified_boundary:
4296 raise
4297 boundary = None
4298
4299 return out, content_type
4300
4301
4302 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4303 if isinstance(key_or_keys, (list, tuple)):
4304 for key in key_or_keys:
4305 if key not in d or d[key] is None or skip_false_values and not d[key]:
4306 continue
4307 return d[key]
4308 return default
4309 return d.get(key_or_keys, default)
4310
4311
4312 def try_get(src, getter, expected_type=None):
4313 for get in variadic(getter):
4314 try:
4315 v = get(src)
4316 except (AttributeError, KeyError, TypeError, IndexError):
4317 pass
4318 else:
4319 if expected_type is None or isinstance(v, expected_type):
4320 return v
4321
4322
4323 def merge_dicts(*dicts):
4324 merged = {}
4325 for a_dict in dicts:
4326 for k, v in a_dict.items():
4327 if v is None:
4328 continue
4329 if (k not in merged
4330 or (isinstance(v, compat_str) and v
4331 and isinstance(merged[k], compat_str)
4332 and not merged[k])):
4333 merged[k] = v
4334 return merged
4335
4336
4337 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4338 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4339
4340
4341 US_RATINGS = {
4342 'G': 0,
4343 'PG': 10,
4344 'PG-13': 13,
4345 'R': 16,
4346 'NC': 18,
4347 }
4348
4349
4350 TV_PARENTAL_GUIDELINES = {
4351 'TV-Y': 0,
4352 'TV-Y7': 7,
4353 'TV-G': 0,
4354 'TV-PG': 0,
4355 'TV-14': 14,
4356 'TV-MA': 17,
4357 }
4358
4359
4360 def parse_age_limit(s):
4361 if type(s) == int:
4362 return s if 0 <= s <= 21 else None
4363 if not isinstance(s, compat_basestring):
4364 return None
4365 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4366 if m:
4367 return int(m.group('age'))
4368 s = s.upper()
4369 if s in US_RATINGS:
4370 return US_RATINGS[s]
4371 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4372 if m:
4373 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4374 return None
4375
4376
4377 def strip_jsonp(code):
4378 return re.sub(
4379 r'''(?sx)^
4380 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4381 (?:\s*&&\s*(?P=func_name))?
4382 \s*\(\s*(?P<callback_data>.*)\);?
4383 \s*?(?://[^\n]*)*$''',
4384 r'\g<callback_data>', code)
4385
4386
4387 def js_to_json(code, vars={}):
4388 # vars is a dict of var, val pairs to substitute
4389 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4390 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4391 INTEGER_TABLE = (
4392 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4393 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4394 )
4395
4396 def fix_kv(m):
4397 v = m.group(0)
4398 if v in ('true', 'false', 'null'):
4399 return v
4400 elif v in ('undefined', 'void 0'):
4401 return 'null'
4402 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4403 return ""
4404
4405 if v[0] in ("'", '"'):
4406 v = re.sub(r'(?s)\\.|"', lambda m: {
4407 '"': '\\"',
4408 "\\'": "'",
4409 '\\\n': '',
4410 '\\x': '\\u00',
4411 }.get(m.group(0), m.group(0)), v[1:-1])
4412 else:
4413 for regex, base in INTEGER_TABLE:
4414 im = re.match(regex, v)
4415 if im:
4416 i = int(im.group(1), base)
4417 return '"%d":' % i if v.endswith(':') else '%d' % i
4418
4419 if v in vars:
4420 return vars[v]
4421
4422 return '"%s"' % v
4423
4424 return re.sub(r'''(?sx)
4425 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4426 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4427 {comment}|,(?={skip}[\]}}])|
4428 void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
4429 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4430 [0-9]+(?={skip}:)|
4431 !+
4432 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4433
4434
4435 def qualities(quality_ids):
4436 """ Get a numeric quality value out of a list of possible values """
4437 def q(qid):
4438 try:
4439 return quality_ids.index(qid)
4440 except ValueError:
4441 return -1
4442 return q
4443
4444
4445 DEFAULT_OUTTMPL = {
4446 'default': '%(title)s [%(id)s].%(ext)s',
4447 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
4448 }
4449 OUTTMPL_TYPES = {
4450 'chapter': None,
4451 'subtitle': None,
4452 'thumbnail': None,
4453 'description': 'description',
4454 'annotation': 'annotations.xml',
4455 'infojson': 'info.json',
4456 'pl_thumbnail': None,
4457 'pl_description': 'description',
4458 'pl_infojson': 'info.json',
4459 }
4460
4461 # As of [1] format syntax is:
4462 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4463 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
4464 STR_FORMAT_RE_TMPL = r'''(?x)
4465 (?<!%)(?P<prefix>(?:%%)*)
4466 %
4467 (?P<has_key>\((?P<key>{0})\))? # mapping key
4468 (?P<format>
4469 (?:[#0\-+ ]+)? # conversion flags (optional)
4470 (?:\d+)? # minimum field width (optional)
4471 (?:\.\d+)? # precision (optional)
4472 [hlL]? # length modifier (optional)
4473 {1} # conversion type
4474 )
4475 '''
4476
4477
4478 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
4479
4480
4481 def limit_length(s, length):
4482 """ Add ellipses to overly long strings """
4483 if s is None:
4484 return None
4485 ELLIPSES = '...'
4486 if len(s) > length:
4487 return s[:length - len(ELLIPSES)] + ELLIPSES
4488 return s
4489
4490
4491 def version_tuple(v):
4492 return tuple(int(e) for e in re.split(r'[-.]', v))
4493
4494
4495 def is_outdated_version(version, limit, assume_new=True):
4496 if not version:
4497 return not assume_new
4498 try:
4499 return version_tuple(version) < version_tuple(limit)
4500 except ValueError:
4501 return not assume_new
4502
4503
4504 def ytdl_is_updateable():
4505 """ Returns if yt-dlp can be updated with -U """
4506 return False
4507
4508 from zipimport import zipimporter
4509
4510 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4511
4512
4513 def args_to_str(args):
4514 # Get a short string representation for a subprocess command
4515 return ' '.join(compat_shlex_quote(a) for a in args)
4516
4517
4518 def error_to_compat_str(err):
4519 err_str = str(err)
4520 # On python 2 error byte string must be decoded with proper
4521 # encoding rather than ascii
4522 if sys.version_info[0] < 3:
4523 err_str = err_str.decode(preferredencoding())
4524 return err_str
4525
4526
4527 def mimetype2ext(mt):
4528 if mt is None:
4529 return None
4530
4531 ext = {
4532 'audio/mp4': 'm4a',
4533 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4534 # it's the most popular one
4535 'audio/mpeg': 'mp3',
4536 'audio/x-wav': 'wav',
4537 }.get(mt)
4538 if ext is not None:
4539 return ext
4540
4541 _, _, res = mt.rpartition('/')
4542 res = res.split(';')[0].strip().lower()
4543
4544 return {
4545 '3gpp': '3gp',
4546 'smptett+xml': 'tt',
4547 'ttaf+xml': 'dfxp',
4548 'ttml+xml': 'ttml',
4549 'x-flv': 'flv',
4550 'x-mp4-fragmented': 'mp4',
4551 'x-ms-sami': 'sami',
4552 'x-ms-wmv': 'wmv',
4553 'mpegurl': 'm3u8',
4554 'x-mpegurl': 'm3u8',
4555 'vnd.apple.mpegurl': 'm3u8',
4556 'dash+xml': 'mpd',
4557 'f4m+xml': 'f4m',
4558 'hds+xml': 'f4m',
4559 'vnd.ms-sstr+xml': 'ism',
4560 'quicktime': 'mov',
4561 'mp2t': 'ts',
4562 'x-wav': 'wav',
4563 }.get(res, res)
4564
4565
4566 def parse_codecs(codecs_str):
4567 # http://tools.ietf.org/html/rfc6381
4568 if not codecs_str:
4569 return {}
4570 split_codecs = list(filter(None, map(
4571 str.strip, codecs_str.strip().strip(',').split(','))))
4572 vcodec, acodec = None, None
4573 for full_codec in split_codecs:
4574 codec = full_codec.split('.')[0]
4575 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4576 if not vcodec:
4577 vcodec = full_codec
4578 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4579 if not acodec:
4580 acodec = full_codec
4581 else:
4582 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4583 if not vcodec and not acodec:
4584 if len(split_codecs) == 2:
4585 return {
4586 'vcodec': split_codecs[0],
4587 'acodec': split_codecs[1],
4588 }
4589 else:
4590 return {
4591 'vcodec': vcodec or 'none',
4592 'acodec': acodec or 'none',
4593 }
4594 return {}
4595
4596
4597 def urlhandle_detect_ext(url_handle):
4598 getheader = url_handle.headers.get
4599
4600 cd = getheader('Content-Disposition')
4601 if cd:
4602 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4603 if m:
4604 e = determine_ext(m.group('filename'), default_ext=None)
4605 if e:
4606 return e
4607
4608 return mimetype2ext(getheader('Content-Type'))
4609
4610
4611 def encode_data_uri(data, mime_type):
4612 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4613
4614
4615 def age_restricted(content_limit, age_limit):
4616 """ Returns True iff the content should be blocked """
4617
4618 if age_limit is None: # No limit set
4619 return False
4620 if content_limit is None:
4621 return False # Content available for everyone
4622 return age_limit < content_limit
4623
4624
4625 def is_html(first_bytes):
4626 """ Detect whether a file contains HTML by examining its first bytes. """
4627
4628 BOMS = [
4629 (b'\xef\xbb\xbf', 'utf-8'),
4630 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4631 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4632 (b'\xff\xfe', 'utf-16-le'),
4633 (b'\xfe\xff', 'utf-16-be'),
4634 ]
4635 for bom, enc in BOMS:
4636 if first_bytes.startswith(bom):
4637 s = first_bytes[len(bom):].decode(enc, 'replace')
4638 break
4639 else:
4640 s = first_bytes.decode('utf-8', 'replace')
4641
4642 return re.match(r'^\s*<', s)
4643
4644
4645 def determine_protocol(info_dict):
4646 protocol = info_dict.get('protocol')
4647 if protocol is not None:
4648 return protocol
4649
4650 url = info_dict['url']
4651 if url.startswith('rtmp'):
4652 return 'rtmp'
4653 elif url.startswith('mms'):
4654 return 'mms'
4655 elif url.startswith('rtsp'):
4656 return 'rtsp'
4657
4658 ext = determine_ext(url)
4659 if ext == 'm3u8':
4660 return 'm3u8'
4661 elif ext == 'f4m':
4662 return 'f4m'
4663
4664 return compat_urllib_parse_urlparse(url).scheme
4665
4666
4667 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4668 """ Render a list of rows, each as a list of values """
4669
4670 def get_max_lens(table):
4671 return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4672
4673 def filter_using_list(row, filterArray):
4674 return [col for (take, col) in zip(filterArray, row) if take]
4675
4676 if hideEmpty:
4677 max_lens = get_max_lens(data)
4678 header_row = filter_using_list(header_row, max_lens)
4679 data = [filter_using_list(row, max_lens) for row in data]
4680
4681 table = [header_row] + data
4682 max_lens = get_max_lens(table)
4683 if delim:
4684 table = [header_row] + [['-' * ml for ml in max_lens]] + data
4685 format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
4686 return '\n'.join(format_str % tuple(row) for row in table)
4687
4688
4689 def _match_one(filter_part, dct, incomplete):
4690 # TODO: Generalize code with YoutubeDL._build_format_filter
4691 STRING_OPERATORS = {
4692 '*=': operator.contains,
4693 '^=': lambda attr, value: attr.startswith(value),
4694 '$=': lambda attr, value: attr.endswith(value),
4695 '~=': lambda attr, value: re.search(value, attr),
4696 }
4697 COMPARISON_OPERATORS = {
4698 **STRING_OPERATORS,
4699 '<=': operator.le, # "<=" must be defined above "<"
4700 '<': operator.lt,
4701 '>=': operator.ge,
4702 '>': operator.gt,
4703 '=': operator.eq,
4704 }
4705
4706 operator_rex = re.compile(r'''(?x)\s*
4707 (?P<key>[a-z_]+)
4708 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4709 (?:
4710 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4711 (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
4712 (?P<strval>.+?)
4713 )
4714 \s*$
4715 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4716 m = operator_rex.search(filter_part)
4717 if m:
4718 unnegated_op = COMPARISON_OPERATORS[m.group('op')]
4719 if m.group('negation'):
4720 op = lambda attr, value: not unnegated_op(attr, value)
4721 else:
4722 op = unnegated_op
4723 actual_value = dct.get(m.group('key'))
4724 if (m.group('quotedstrval') is not None
4725 or m.group('strval') is not None
4726 # If the original field is a string and matching comparisonvalue is
4727 # a number we should respect the origin of the original field
4728 # and process comparison value as a string (see
4729 # https://github.com/ytdl-org/youtube-dl/issues/11082).
4730 or actual_value is not None and m.group('intval') is not None
4731 and isinstance(actual_value, compat_str)):
4732 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4733 quote = m.group('quote')
4734 if quote is not None:
4735 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4736 else:
4737 if m.group('op') in STRING_OPERATORS:
4738 raise ValueError('Operator %s only supports string values!' % m.group('op'))
4739 try:
4740 comparison_value = int(m.group('intval'))
4741 except ValueError:
4742 comparison_value = parse_filesize(m.group('intval'))
4743 if comparison_value is None:
4744 comparison_value = parse_filesize(m.group('intval') + 'B')
4745 if comparison_value is None:
4746 raise ValueError(
4747 'Invalid integer value %r in filter part %r' % (
4748 m.group('intval'), filter_part))
4749 if actual_value is None:
4750 return incomplete or m.group('none_inclusive')
4751 return op(actual_value, comparison_value)
4752
4753 UNARY_OPERATORS = {
4754 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4755 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4756 }
4757 operator_rex = re.compile(r'''(?x)\s*
4758 (?P<op>%s)\s*(?P<key>[a-z_]+)
4759 \s*$
4760 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4761 m = operator_rex.search(filter_part)
4762 if m:
4763 op = UNARY_OPERATORS[m.group('op')]
4764 actual_value = dct.get(m.group('key'))
4765 if incomplete and actual_value is None:
4766 return True
4767 return op(actual_value)
4768
4769 raise ValueError('Invalid filter part %r' % filter_part)
4770
4771
4772 def match_str(filter_str, dct, incomplete=False):
4773 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
4774 When incomplete, all conditions passes on missing fields
4775 """
4776 return all(
4777 _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
4778 for filter_part in re.split(r'(?<!\\)&', filter_str))
4779
4780
4781 def match_filter_func(filter_str):
4782 def _match_func(info_dict, *args, **kwargs):
4783 if match_str(filter_str, info_dict, *args, **kwargs):
4784 return None
4785 else:
4786 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4787 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4788 return _match_func
4789
4790
4791 def parse_dfxp_time_expr(time_expr):
4792 if not time_expr:
4793 return
4794
4795 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4796 if mobj:
4797 return float(mobj.group('time_offset'))
4798
4799 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4800 if mobj:
4801 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4802
4803
4804 def srt_subtitles_timecode(seconds):
4805 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4806
4807
4808 def dfxp2srt(dfxp_data):
4809 '''
4810 @param dfxp_data A bytes-like object containing DFXP data
4811 @returns A unicode object containing converted SRT data
4812 '''
4813 LEGACY_NAMESPACES = (
4814 (b'http://www.w3.org/ns/ttml', [
4815 b'http://www.w3.org/2004/11/ttaf1',
4816 b'http://www.w3.org/2006/04/ttaf1',
4817 b'http://www.w3.org/2006/10/ttaf1',
4818 ]),
4819 (b'http://www.w3.org/ns/ttml#styling', [
4820 b'http://www.w3.org/ns/ttml#style',
4821 ]),
4822 )
4823
4824 SUPPORTED_STYLING = [
4825 'color',
4826 'fontFamily',
4827 'fontSize',
4828 'fontStyle',
4829 'fontWeight',
4830 'textDecoration'
4831 ]
4832
4833 _x = functools.partial(xpath_with_ns, ns_map={
4834 'xml': 'http://www.w3.org/XML/1998/namespace',
4835 'ttml': 'http://www.w3.org/ns/ttml',
4836 'tts': 'http://www.w3.org/ns/ttml#styling',
4837 })
4838
4839 styles = {}
4840 default_style = {}
4841
4842 class TTMLPElementParser(object):
4843 _out = ''
4844 _unclosed_elements = []
4845 _applied_styles = []
4846
4847 def start(self, tag, attrib):
4848 if tag in (_x('ttml:br'), 'br'):
4849 self._out += '\n'
4850 else:
4851 unclosed_elements = []
4852 style = {}
4853 element_style_id = attrib.get('style')
4854 if default_style:
4855 style.update(default_style)
4856 if element_style_id:
4857 style.update(styles.get(element_style_id, {}))
4858 for prop in SUPPORTED_STYLING:
4859 prop_val = attrib.get(_x('tts:' + prop))
4860 if prop_val:
4861 style[prop] = prop_val
4862 if style:
4863 font = ''
4864 for k, v in sorted(style.items()):
4865 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4866 continue
4867 if k == 'color':
4868 font += ' color="%s"' % v
4869 elif k == 'fontSize':
4870 font += ' size="%s"' % v
4871 elif k == 'fontFamily':
4872 font += ' face="%s"' % v
4873 elif k == 'fontWeight' and v == 'bold':
4874 self._out += '<b>'
4875 unclosed_elements.append('b')
4876 elif k == 'fontStyle' and v == 'italic':
4877 self._out += '<i>'
4878 unclosed_elements.append('i')
4879 elif k == 'textDecoration' and v == 'underline':
4880 self._out += '<u>'
4881 unclosed_elements.append('u')
4882 if font:
4883 self._out += '<font' + font + '>'
4884 unclosed_elements.append('font')
4885 applied_style = {}
4886 if self._applied_styles:
4887 applied_style.update(self._applied_styles[-1])
4888 applied_style.update(style)
4889 self._applied_styles.append(applied_style)
4890 self._unclosed_elements.append(unclosed_elements)
4891
4892 def end(self, tag):
4893 if tag not in (_x('ttml:br'), 'br'):
4894 unclosed_elements = self._unclosed_elements.pop()
4895 for element in reversed(unclosed_elements):
4896 self._out += '</%s>' % element
4897 if unclosed_elements and self._applied_styles:
4898 self._applied_styles.pop()
4899
4900 def data(self, data):
4901 self._out += data
4902
4903 def close(self):
4904 return self._out.strip()
4905
4906 def parse_node(node):
4907 target = TTMLPElementParser()
4908 parser = xml.etree.ElementTree.XMLParser(target=target)
4909 parser.feed(xml.etree.ElementTree.tostring(node))
4910 return parser.close()
4911
4912 for k, v in LEGACY_NAMESPACES:
4913 for ns in v:
4914 dfxp_data = dfxp_data.replace(ns, k)
4915
4916 dfxp = compat_etree_fromstring(dfxp_data)
4917 out = []
4918 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4919
4920 if not paras:
4921 raise ValueError('Invalid dfxp/TTML subtitle')
4922
4923 repeat = False
4924 while True:
4925 for style in dfxp.findall(_x('.//ttml:style')):
4926 style_id = style.get('id') or style.get(_x('xml:id'))
4927 if not style_id:
4928 continue
4929 parent_style_id = style.get('style')
4930 if parent_style_id:
4931 if parent_style_id not in styles:
4932 repeat = True
4933 continue
4934 styles[style_id] = styles[parent_style_id].copy()
4935 for prop in SUPPORTED_STYLING:
4936 prop_val = style.get(_x('tts:' + prop))
4937 if prop_val:
4938 styles.setdefault(style_id, {})[prop] = prop_val
4939 if repeat:
4940 repeat = False
4941 else:
4942 break
4943
4944 for p in ('body', 'div'):
4945 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4946 if ele is None:
4947 continue
4948 style = styles.get(ele.get('style'))
4949 if not style:
4950 continue
4951 default_style.update(style)
4952
4953 for para, index in zip(paras, itertools.count(1)):
4954 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4955 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4956 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4957 if begin_time is None:
4958 continue
4959 if not end_time:
4960 if not dur:
4961 continue
4962 end_time = begin_time + dur
4963 out.append('%d\n%s --> %s\n%s\n\n' % (
4964 index,
4965 srt_subtitles_timecode(begin_time),
4966 srt_subtitles_timecode(end_time),
4967 parse_node(para)))
4968
4969 return ''.join(out)
4970
4971
4972 def cli_option(params, command_option, param):
4973 param = params.get(param)
4974 if param:
4975 param = compat_str(param)
4976 return [command_option, param] if param is not None else []
4977
4978
4979 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4980 param = params.get(param)
4981 if param is None:
4982 return []
4983 assert isinstance(param, bool)
4984 if separator:
4985 return [command_option + separator + (true_value if param else false_value)]
4986 return [command_option, true_value if param else false_value]
4987
4988
4989 def cli_valueless_option(params, command_option, param, expected_value=True):
4990 param = params.get(param)
4991 return [command_option] if param == expected_value else []
4992
4993
4994 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
4995 if isinstance(argdict, (list, tuple)): # for backward compatibility
4996 if use_compat:
4997 return argdict
4998 else:
4999 argdict = None
5000 if argdict is None:
5001 return default
5002 assert isinstance(argdict, dict)
5003
5004 assert isinstance(keys, (list, tuple))
5005 for key_list in keys:
5006 arg_list = list(filter(
5007 lambda x: x is not None,
5008 [argdict.get(key.lower()) for key in variadic(key_list)]))
5009 if arg_list:
5010 return [arg for args in arg_list for arg in args]
5011 return default
5012
5013
5014 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
5015 main_key, exe = main_key.lower(), exe.lower()
5016 root_key = exe if main_key == exe else f'{main_key}+{exe}'
5017 keys = [f'{root_key}{k}' for k in (keys or [''])]
5018 if root_key in keys:
5019 if main_key != exe:
5020 keys.append((main_key, exe))
5021 keys.append('default')
5022 else:
5023 use_compat = False
5024 return cli_configuration_args(argdict, keys, default, use_compat)
5025
5026
5027 class ISO639Utils(object):
5028 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
5029 _lang_map = {
5030 'aa': 'aar',
5031 'ab': 'abk',
5032 'ae': 'ave',
5033 'af': 'afr',
5034 'ak': 'aka',
5035 'am': 'amh',
5036 'an': 'arg',
5037 'ar': 'ara',
5038 'as': 'asm',
5039 'av': 'ava',
5040 'ay': 'aym',
5041 'az': 'aze',
5042 'ba': 'bak',
5043 'be': 'bel',
5044 'bg': 'bul',
5045 'bh': 'bih',
5046 'bi': 'bis',
5047 'bm': 'bam',
5048 'bn': 'ben',
5049 'bo': 'bod',
5050 'br': 'bre',
5051 'bs': 'bos',
5052 'ca': 'cat',
5053 'ce': 'che',
5054 'ch': 'cha',
5055 'co': 'cos',
5056 'cr': 'cre',
5057 'cs': 'ces',
5058 'cu': 'chu',
5059 'cv': 'chv',
5060 'cy': 'cym',
5061 'da': 'dan',
5062 'de': 'deu',
5063 'dv': 'div',
5064 'dz': 'dzo',
5065 'ee': 'ewe',
5066 'el': 'ell',
5067 'en': 'eng',
5068 'eo': 'epo',
5069 'es': 'spa',
5070 'et': 'est',
5071 'eu': 'eus',
5072 'fa': 'fas',
5073 'ff': 'ful',
5074 'fi': 'fin',
5075 'fj': 'fij',
5076 'fo': 'fao',
5077 'fr': 'fra',
5078 'fy': 'fry',
5079 'ga': 'gle',
5080 'gd': 'gla',
5081 'gl': 'glg',
5082 'gn': 'grn',
5083 'gu': 'guj',
5084 'gv': 'glv',
5085 'ha': 'hau',
5086 'he': 'heb',
5087 'iw': 'heb', # Replaced by he in 1989 revision
5088 'hi': 'hin',
5089 'ho': 'hmo',
5090 'hr': 'hrv',
5091 'ht': 'hat',
5092 'hu': 'hun',
5093 'hy': 'hye',
5094 'hz': 'her',
5095 'ia': 'ina',
5096 'id': 'ind',
5097 'in': 'ind', # Replaced by id in 1989 revision
5098 'ie': 'ile',
5099 'ig': 'ibo',
5100 'ii': 'iii',
5101 'ik': 'ipk',
5102 'io': 'ido',
5103 'is': 'isl',
5104 'it': 'ita',
5105 'iu': 'iku',
5106 'ja': 'jpn',
5107 'jv': 'jav',
5108 'ka': 'kat',
5109 'kg': 'kon',
5110 'ki': 'kik',
5111 'kj': 'kua',
5112 'kk': 'kaz',
5113 'kl': 'kal',
5114 'km': 'khm',
5115 'kn': 'kan',
5116 'ko': 'kor',
5117 'kr': 'kau',
5118 'ks': 'kas',
5119 'ku': 'kur',
5120 'kv': 'kom',
5121 'kw': 'cor',
5122 'ky': 'kir',
5123 'la': 'lat',
5124 'lb': 'ltz',
5125 'lg': 'lug',
5126 'li': 'lim',
5127 'ln': 'lin',
5128 'lo': 'lao',
5129 'lt': 'lit',
5130 'lu': 'lub',
5131 'lv': 'lav',
5132 'mg': 'mlg',
5133 'mh': 'mah',
5134 'mi': 'mri',
5135 'mk': 'mkd',
5136 'ml': 'mal',
5137 'mn': 'mon',
5138 'mr': 'mar',
5139 'ms': 'msa',
5140 'mt': 'mlt',
5141 'my': 'mya',
5142 'na': 'nau',
5143 'nb': 'nob',
5144 'nd': 'nde',
5145 'ne': 'nep',
5146 'ng': 'ndo',
5147 'nl': 'nld',
5148 'nn': 'nno',
5149 'no': 'nor',
5150 'nr': 'nbl',
5151 'nv': 'nav',
5152 'ny': 'nya',
5153 'oc': 'oci',
5154 'oj': 'oji',
5155 'om': 'orm',
5156 'or': 'ori',
5157 'os': 'oss',
5158 'pa': 'pan',
5159 'pi': 'pli',
5160 'pl': 'pol',
5161 'ps': 'pus',
5162 'pt': 'por',
5163 'qu': 'que',
5164 'rm': 'roh',
5165 'rn': 'run',
5166 'ro': 'ron',
5167 'ru': 'rus',
5168 'rw': 'kin',
5169 'sa': 'san',
5170 'sc': 'srd',
5171 'sd': 'snd',
5172 'se': 'sme',
5173 'sg': 'sag',
5174 'si': 'sin',
5175 'sk': 'slk',
5176 'sl': 'slv',
5177 'sm': 'smo',
5178 'sn': 'sna',
5179 'so': 'som',
5180 'sq': 'sqi',
5181 'sr': 'srp',
5182 'ss': 'ssw',
5183 'st': 'sot',
5184 'su': 'sun',
5185 'sv': 'swe',
5186 'sw': 'swa',
5187 'ta': 'tam',
5188 'te': 'tel',
5189 'tg': 'tgk',
5190 'th': 'tha',
5191 'ti': 'tir',
5192 'tk': 'tuk',
5193 'tl': 'tgl',
5194 'tn': 'tsn',
5195 'to': 'ton',
5196 'tr': 'tur',
5197 'ts': 'tso',
5198 'tt': 'tat',
5199 'tw': 'twi',
5200 'ty': 'tah',
5201 'ug': 'uig',
5202 'uk': 'ukr',
5203 'ur': 'urd',
5204 'uz': 'uzb',
5205 've': 'ven',
5206 'vi': 'vie',
5207 'vo': 'vol',
5208 'wa': 'wln',
5209 'wo': 'wol',
5210 'xh': 'xho',
5211 'yi': 'yid',
5212 'ji': 'yid', # Replaced by yi in 1989 revision
5213 'yo': 'yor',
5214 'za': 'zha',
5215 'zh': 'zho',
5216 'zu': 'zul',
5217 }
5218
5219 @classmethod
5220 def short2long(cls, code):
5221 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5222 return cls._lang_map.get(code[:2])
5223
5224 @classmethod
5225 def long2short(cls, code):
5226 """Convert language code from ISO 639-2/T to ISO 639-1"""
5227 for short_name, long_name in cls._lang_map.items():
5228 if long_name == code:
5229 return short_name
5230
5231
5232 class ISO3166Utils(object):
5233 # From http://data.okfn.org/data/core/country-list
5234 _country_map = {
5235 'AF': 'Afghanistan',
5236 'AX': 'Åland Islands',
5237 'AL': 'Albania',
5238 'DZ': 'Algeria',
5239 'AS': 'American Samoa',
5240 'AD': 'Andorra',
5241 'AO': 'Angola',
5242 'AI': 'Anguilla',
5243 'AQ': 'Antarctica',
5244 'AG': 'Antigua and Barbuda',
5245 'AR': 'Argentina',
5246 'AM': 'Armenia',
5247 'AW': 'Aruba',
5248 'AU': 'Australia',
5249 'AT': 'Austria',
5250 'AZ': 'Azerbaijan',
5251 'BS': 'Bahamas',
5252 'BH': 'Bahrain',
5253 'BD': 'Bangladesh',
5254 'BB': 'Barbados',
5255 'BY': 'Belarus',
5256 'BE': 'Belgium',
5257 'BZ': 'Belize',
5258 'BJ': 'Benin',
5259 'BM': 'Bermuda',
5260 'BT': 'Bhutan',
5261 'BO': 'Bolivia, Plurinational State of',
5262 'BQ': 'Bonaire, Sint Eustatius and Saba',
5263 'BA': 'Bosnia and Herzegovina',
5264 'BW': 'Botswana',
5265 'BV': 'Bouvet Island',
5266 'BR': 'Brazil',
5267 'IO': 'British Indian Ocean Territory',
5268 'BN': 'Brunei Darussalam',
5269 'BG': 'Bulgaria',
5270 'BF': 'Burkina Faso',
5271 'BI': 'Burundi',
5272 'KH': 'Cambodia',
5273 'CM': 'Cameroon',
5274 'CA': 'Canada',
5275 'CV': 'Cape Verde',
5276 'KY': 'Cayman Islands',
5277 'CF': 'Central African Republic',
5278 'TD': 'Chad',
5279 'CL': 'Chile',
5280 'CN': 'China',
5281 'CX': 'Christmas Island',
5282 'CC': 'Cocos (Keeling) Islands',
5283 'CO': 'Colombia',
5284 'KM': 'Comoros',
5285 'CG': 'Congo',
5286 'CD': 'Congo, the Democratic Republic of the',
5287 'CK': 'Cook Islands',
5288 'CR': 'Costa Rica',
5289 'CI': 'Côte d\'Ivoire',
5290 'HR': 'Croatia',
5291 'CU': 'Cuba',
5292 'CW': 'Curaçao',
5293 'CY': 'Cyprus',
5294 'CZ': 'Czech Republic',
5295 'DK': 'Denmark',
5296 'DJ': 'Djibouti',
5297 'DM': 'Dominica',
5298 'DO': 'Dominican Republic',
5299 'EC': 'Ecuador',
5300 'EG': 'Egypt',
5301 'SV': 'El Salvador',
5302 'GQ': 'Equatorial Guinea',
5303 'ER': 'Eritrea',
5304 'EE': 'Estonia',
5305 'ET': 'Ethiopia',
5306 'FK': 'Falkland Islands (Malvinas)',
5307 'FO': 'Faroe Islands',
5308 'FJ': 'Fiji',
5309 'FI': 'Finland',
5310 'FR': 'France',
5311 'GF': 'French Guiana',
5312 'PF': 'French Polynesia',
5313 'TF': 'French Southern Territories',
5314 'GA': 'Gabon',
5315 'GM': 'Gambia',
5316 'GE': 'Georgia',
5317 'DE': 'Germany',
5318 'GH': 'Ghana',
5319 'GI': 'Gibraltar',
5320 'GR': 'Greece',
5321 'GL': 'Greenland',
5322 'GD': 'Grenada',
5323 'GP': 'Guadeloupe',
5324 'GU': 'Guam',
5325 'GT': 'Guatemala',
5326 'GG': 'Guernsey',
5327 'GN': 'Guinea',
5328 'GW': 'Guinea-Bissau',
5329 'GY': 'Guyana',
5330 'HT': 'Haiti',
5331 'HM': 'Heard Island and McDonald Islands',
5332 'VA': 'Holy See (Vatican City State)',
5333 'HN': 'Honduras',
5334 'HK': 'Hong Kong',
5335 'HU': 'Hungary',
5336 'IS': 'Iceland',
5337 'IN': 'India',
5338 'ID': 'Indonesia',
5339 'IR': 'Iran, Islamic Republic of',
5340 'IQ': 'Iraq',
5341 'IE': 'Ireland',
5342 'IM': 'Isle of Man',
5343 'IL': 'Israel',
5344 'IT': 'Italy',
5345 'JM': 'Jamaica',
5346 'JP': 'Japan',
5347 'JE': 'Jersey',
5348 'JO': 'Jordan',
5349 'KZ': 'Kazakhstan',
5350 'KE': 'Kenya',
5351 'KI': 'Kiribati',
5352 'KP': 'Korea, Democratic People\'s Republic of',
5353 'KR': 'Korea, Republic of',
5354 'KW': 'Kuwait',
5355 'KG': 'Kyrgyzstan',
5356 'LA': 'Lao People\'s Democratic Republic',
5357 'LV': 'Latvia',
5358 'LB': 'Lebanon',
5359 'LS': 'Lesotho',
5360 'LR': 'Liberia',
5361 'LY': 'Libya',
5362 'LI': 'Liechtenstein',
5363 'LT': 'Lithuania',
5364 'LU': 'Luxembourg',
5365 'MO': 'Macao',
5366 'MK': 'Macedonia, the Former Yugoslav Republic of',
5367 'MG': 'Madagascar',
5368 'MW': 'Malawi',
5369 'MY': 'Malaysia',
5370 'MV': 'Maldives',
5371 'ML': 'Mali',
5372 'MT': 'Malta',
5373 'MH': 'Marshall Islands',
5374 'MQ': 'Martinique',
5375 'MR': 'Mauritania',
5376 'MU': 'Mauritius',
5377 'YT': 'Mayotte',
5378 'MX': 'Mexico',
5379 'FM': 'Micronesia, Federated States of',
5380 'MD': 'Moldova, Republic of',
5381 'MC': 'Monaco',
5382 'MN': 'Mongolia',
5383 'ME': 'Montenegro',
5384 'MS': 'Montserrat',
5385 'MA': 'Morocco',
5386 'MZ': 'Mozambique',
5387 'MM': 'Myanmar',
5388 'NA': 'Namibia',
5389 'NR': 'Nauru',
5390 'NP': 'Nepal',
5391 'NL': 'Netherlands',
5392 'NC': 'New Caledonia',
5393 'NZ': 'New Zealand',
5394 'NI': 'Nicaragua',
5395 'NE': 'Niger',
5396 'NG': 'Nigeria',
5397 'NU': 'Niue',
5398 'NF': 'Norfolk Island',
5399 'MP': 'Northern Mariana Islands',
5400 'NO': 'Norway',
5401 'OM': 'Oman',
5402 'PK': 'Pakistan',
5403 'PW': 'Palau',
5404 'PS': 'Palestine, State of',
5405 'PA': 'Panama',
5406 'PG': 'Papua New Guinea',
5407 'PY': 'Paraguay',
5408 'PE': 'Peru',
5409 'PH': 'Philippines',
5410 'PN': 'Pitcairn',
5411 'PL': 'Poland',
5412 'PT': 'Portugal',
5413 'PR': 'Puerto Rico',
5414 'QA': 'Qatar',
5415 'RE': 'Réunion',
5416 'RO': 'Romania',
5417 'RU': 'Russian Federation',
5418 'RW': 'Rwanda',
5419 'BL': 'Saint Barthélemy',
5420 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5421 'KN': 'Saint Kitts and Nevis',
5422 'LC': 'Saint Lucia',
5423 'MF': 'Saint Martin (French part)',
5424 'PM': 'Saint Pierre and Miquelon',
5425 'VC': 'Saint Vincent and the Grenadines',
5426 'WS': 'Samoa',
5427 'SM': 'San Marino',
5428 'ST': 'Sao Tome and Principe',
5429 'SA': 'Saudi Arabia',
5430 'SN': 'Senegal',
5431 'RS': 'Serbia',
5432 'SC': 'Seychelles',
5433 'SL': 'Sierra Leone',
5434 'SG': 'Singapore',
5435 'SX': 'Sint Maarten (Dutch part)',
5436 'SK': 'Slovakia',
5437 'SI': 'Slovenia',
5438 'SB': 'Solomon Islands',
5439 'SO': 'Somalia',
5440 'ZA': 'South Africa',
5441 'GS': 'South Georgia and the South Sandwich Islands',
5442 'SS': 'South Sudan',
5443 'ES': 'Spain',
5444 'LK': 'Sri Lanka',
5445 'SD': 'Sudan',
5446 'SR': 'Suriname',
5447 'SJ': 'Svalbard and Jan Mayen',
5448 'SZ': 'Swaziland',
5449 'SE': 'Sweden',
5450 'CH': 'Switzerland',
5451 'SY': 'Syrian Arab Republic',
5452 'TW': 'Taiwan, Province of China',
5453 'TJ': 'Tajikistan',
5454 'TZ': 'Tanzania, United Republic of',
5455 'TH': 'Thailand',
5456 'TL': 'Timor-Leste',
5457 'TG': 'Togo',
5458 'TK': 'Tokelau',
5459 'TO': 'Tonga',
5460 'TT': 'Trinidad and Tobago',
5461 'TN': 'Tunisia',
5462 'TR': 'Turkey',
5463 'TM': 'Turkmenistan',
5464 'TC': 'Turks and Caicos Islands',
5465 'TV': 'Tuvalu',
5466 'UG': 'Uganda',
5467 'UA': 'Ukraine',
5468 'AE': 'United Arab Emirates',
5469 'GB': 'United Kingdom',
5470 'US': 'United States',
5471 'UM': 'United States Minor Outlying Islands',
5472 'UY': 'Uruguay',
5473 'UZ': 'Uzbekistan',
5474 'VU': 'Vanuatu',
5475 'VE': 'Venezuela, Bolivarian Republic of',
5476 'VN': 'Viet Nam',
5477 'VG': 'Virgin Islands, British',
5478 'VI': 'Virgin Islands, U.S.',
5479 'WF': 'Wallis and Futuna',
5480 'EH': 'Western Sahara',
5481 'YE': 'Yemen',
5482 'ZM': 'Zambia',
5483 'ZW': 'Zimbabwe',
5484 }
5485
5486 @classmethod
5487 def short2full(cls, code):
5488 """Convert an ISO 3166-2 country code to the corresponding full name"""
5489 return cls._country_map.get(code.upper())
5490
5491
5492 class GeoUtils(object):
5493 # Major IPv4 address blocks per country
5494 _country_ip_map = {
5495 'AD': '46.172.224.0/19',
5496 'AE': '94.200.0.0/13',
5497 'AF': '149.54.0.0/17',
5498 'AG': '209.59.64.0/18',
5499 'AI': '204.14.248.0/21',
5500 'AL': '46.99.0.0/16',
5501 'AM': '46.70.0.0/15',
5502 'AO': '105.168.0.0/13',
5503 'AP': '182.50.184.0/21',
5504 'AQ': '23.154.160.0/24',
5505 'AR': '181.0.0.0/12',
5506 'AS': '202.70.112.0/20',
5507 'AT': '77.116.0.0/14',
5508 'AU': '1.128.0.0/11',
5509 'AW': '181.41.0.0/18',
5510 'AX': '185.217.4.0/22',
5511 'AZ': '5.197.0.0/16',
5512 'BA': '31.176.128.0/17',
5513 'BB': '65.48.128.0/17',
5514 'BD': '114.130.0.0/16',
5515 'BE': '57.0.0.0/8',
5516 'BF': '102.178.0.0/15',
5517 'BG': '95.42.0.0/15',
5518 'BH': '37.131.0.0/17',
5519 'BI': '154.117.192.0/18',
5520 'BJ': '137.255.0.0/16',
5521 'BL': '185.212.72.0/23',
5522 'BM': '196.12.64.0/18',
5523 'BN': '156.31.0.0/16',
5524 'BO': '161.56.0.0/16',
5525 'BQ': '161.0.80.0/20',
5526 'BR': '191.128.0.0/12',
5527 'BS': '24.51.64.0/18',
5528 'BT': '119.2.96.0/19',
5529 'BW': '168.167.0.0/16',
5530 'BY': '178.120.0.0/13',
5531 'BZ': '179.42.192.0/18',
5532 'CA': '99.224.0.0/11',
5533 'CD': '41.243.0.0/16',
5534 'CF': '197.242.176.0/21',
5535 'CG': '160.113.0.0/16',
5536 'CH': '85.0.0.0/13',
5537 'CI': '102.136.0.0/14',
5538 'CK': '202.65.32.0/19',
5539 'CL': '152.172.0.0/14',
5540 'CM': '102.244.0.0/14',
5541 'CN': '36.128.0.0/10',
5542 'CO': '181.240.0.0/12',
5543 'CR': '201.192.0.0/12',
5544 'CU': '152.206.0.0/15',
5545 'CV': '165.90.96.0/19',
5546 'CW': '190.88.128.0/17',
5547 'CY': '31.153.0.0/16',
5548 'CZ': '88.100.0.0/14',
5549 'DE': '53.0.0.0/8',
5550 'DJ': '197.241.0.0/17',
5551 'DK': '87.48.0.0/12',
5552 'DM': '192.243.48.0/20',
5553 'DO': '152.166.0.0/15',
5554 'DZ': '41.96.0.0/12',
5555 'EC': '186.68.0.0/15',
5556 'EE': '90.190.0.0/15',
5557 'EG': '156.160.0.0/11',
5558 'ER': '196.200.96.0/20',
5559 'ES': '88.0.0.0/11',
5560 'ET': '196.188.0.0/14',
5561 'EU': '2.16.0.0/13',
5562 'FI': '91.152.0.0/13',
5563 'FJ': '144.120.0.0/16',
5564 'FK': '80.73.208.0/21',
5565 'FM': '119.252.112.0/20',
5566 'FO': '88.85.32.0/19',
5567 'FR': '90.0.0.0/9',
5568 'GA': '41.158.0.0/15',
5569 'GB': '25.0.0.0/8',
5570 'GD': '74.122.88.0/21',
5571 'GE': '31.146.0.0/16',
5572 'GF': '161.22.64.0/18',
5573 'GG': '62.68.160.0/19',
5574 'GH': '154.160.0.0/12',
5575 'GI': '95.164.0.0/16',
5576 'GL': '88.83.0.0/19',
5577 'GM': '160.182.0.0/15',
5578 'GN': '197.149.192.0/18',
5579 'GP': '104.250.0.0/19',
5580 'GQ': '105.235.224.0/20',
5581 'GR': '94.64.0.0/13',
5582 'GT': '168.234.0.0/16',
5583 'GU': '168.123.0.0/16',
5584 'GW': '197.214.80.0/20',
5585 'GY': '181.41.64.0/18',
5586 'HK': '113.252.0.0/14',
5587 'HN': '181.210.0.0/16',
5588 'HR': '93.136.0.0/13',
5589 'HT': '148.102.128.0/17',
5590 'HU': '84.0.0.0/14',
5591 'ID': '39.192.0.0/10',
5592 'IE': '87.32.0.0/12',
5593 'IL': '79.176.0.0/13',
5594 'IM': '5.62.80.0/20',
5595 'IN': '117.192.0.0/10',
5596 'IO': '203.83.48.0/21',
5597 'IQ': '37.236.0.0/14',
5598 'IR': '2.176.0.0/12',
5599 'IS': '82.221.0.0/16',
5600 'IT': '79.0.0.0/10',
5601 'JE': '87.244.64.0/18',
5602 'JM': '72.27.0.0/17',
5603 'JO': '176.29.0.0/16',
5604 'JP': '133.0.0.0/8',
5605 'KE': '105.48.0.0/12',
5606 'KG': '158.181.128.0/17',
5607 'KH': '36.37.128.0/17',
5608 'KI': '103.25.140.0/22',
5609 'KM': '197.255.224.0/20',
5610 'KN': '198.167.192.0/19',
5611 'KP': '175.45.176.0/22',
5612 'KR': '175.192.0.0/10',
5613 'KW': '37.36.0.0/14',
5614 'KY': '64.96.0.0/15',
5615 'KZ': '2.72.0.0/13',
5616 'LA': '115.84.64.0/18',
5617 'LB': '178.135.0.0/16',
5618 'LC': '24.92.144.0/20',
5619 'LI': '82.117.0.0/19',
5620 'LK': '112.134.0.0/15',
5621 'LR': '102.183.0.0/16',
5622 'LS': '129.232.0.0/17',
5623 'LT': '78.56.0.0/13',
5624 'LU': '188.42.0.0/16',
5625 'LV': '46.109.0.0/16',
5626 'LY': '41.252.0.0/14',
5627 'MA': '105.128.0.0/11',
5628 'MC': '88.209.64.0/18',
5629 'MD': '37.246.0.0/16',
5630 'ME': '178.175.0.0/17',
5631 'MF': '74.112.232.0/21',
5632 'MG': '154.126.0.0/17',
5633 'MH': '117.103.88.0/21',
5634 'MK': '77.28.0.0/15',
5635 'ML': '154.118.128.0/18',
5636 'MM': '37.111.0.0/17',
5637 'MN': '49.0.128.0/17',
5638 'MO': '60.246.0.0/16',
5639 'MP': '202.88.64.0/20',
5640 'MQ': '109.203.224.0/19',
5641 'MR': '41.188.64.0/18',
5642 'MS': '208.90.112.0/22',
5643 'MT': '46.11.0.0/16',
5644 'MU': '105.16.0.0/12',
5645 'MV': '27.114.128.0/18',
5646 'MW': '102.70.0.0/15',
5647 'MX': '187.192.0.0/11',
5648 'MY': '175.136.0.0/13',
5649 'MZ': '197.218.0.0/15',
5650 'NA': '41.182.0.0/16',
5651 'NC': '101.101.0.0/18',
5652 'NE': '197.214.0.0/18',
5653 'NF': '203.17.240.0/22',
5654 'NG': '105.112.0.0/12',
5655 'NI': '186.76.0.0/15',
5656 'NL': '145.96.0.0/11',
5657 'NO': '84.208.0.0/13',
5658 'NP': '36.252.0.0/15',
5659 'NR': '203.98.224.0/19',
5660 'NU': '49.156.48.0/22',
5661 'NZ': '49.224.0.0/14',
5662 'OM': '5.36.0.0/15',
5663 'PA': '186.72.0.0/15',
5664 'PE': '186.160.0.0/14',
5665 'PF': '123.50.64.0/18',
5666 'PG': '124.240.192.0/19',
5667 'PH': '49.144.0.0/13',
5668 'PK': '39.32.0.0/11',
5669 'PL': '83.0.0.0/11',
5670 'PM': '70.36.0.0/20',
5671 'PR': '66.50.0.0/16',
5672 'PS': '188.161.0.0/16',
5673 'PT': '85.240.0.0/13',
5674 'PW': '202.124.224.0/20',
5675 'PY': '181.120.0.0/14',
5676 'QA': '37.210.0.0/15',
5677 'RE': '102.35.0.0/16',
5678 'RO': '79.112.0.0/13',
5679 'RS': '93.86.0.0/15',
5680 'RU': '5.136.0.0/13',
5681 'RW': '41.186.0.0/16',
5682 'SA': '188.48.0.0/13',
5683 'SB': '202.1.160.0/19',
5684 'SC': '154.192.0.0/11',
5685 'SD': '102.120.0.0/13',
5686 'SE': '78.64.0.0/12',
5687 'SG': '8.128.0.0/10',
5688 'SI': '188.196.0.0/14',
5689 'SK': '78.98.0.0/15',
5690 'SL': '102.143.0.0/17',
5691 'SM': '89.186.32.0/19',
5692 'SN': '41.82.0.0/15',
5693 'SO': '154.115.192.0/18',
5694 'SR': '186.179.128.0/17',
5695 'SS': '105.235.208.0/21',
5696 'ST': '197.159.160.0/19',
5697 'SV': '168.243.0.0/16',
5698 'SX': '190.102.0.0/20',
5699 'SY': '5.0.0.0/16',
5700 'SZ': '41.84.224.0/19',
5701 'TC': '65.255.48.0/20',
5702 'TD': '154.68.128.0/19',
5703 'TG': '196.168.0.0/14',
5704 'TH': '171.96.0.0/13',
5705 'TJ': '85.9.128.0/18',
5706 'TK': '27.96.24.0/21',
5707 'TL': '180.189.160.0/20',
5708 'TM': '95.85.96.0/19',
5709 'TN': '197.0.0.0/11',
5710 'TO': '175.176.144.0/21',
5711 'TR': '78.160.0.0/11',
5712 'TT': '186.44.0.0/15',
5713 'TV': '202.2.96.0/19',
5714 'TW': '120.96.0.0/11',
5715 'TZ': '156.156.0.0/14',
5716 'UA': '37.52.0.0/14',
5717 'UG': '102.80.0.0/13',
5718 'US': '6.0.0.0/8',
5719 'UY': '167.56.0.0/13',
5720 'UZ': '84.54.64.0/18',
5721 'VA': '212.77.0.0/19',
5722 'VC': '207.191.240.0/21',
5723 'VE': '186.88.0.0/13',
5724 'VG': '66.81.192.0/20',
5725 'VI': '146.226.0.0/16',
5726 'VN': '14.160.0.0/11',
5727 'VU': '202.80.32.0/20',
5728 'WF': '117.20.32.0/21',
5729 'WS': '202.4.32.0/19',
5730 'YE': '134.35.0.0/16',
5731 'YT': '41.242.116.0/22',
5732 'ZA': '41.0.0.0/11',
5733 'ZM': '102.144.0.0/13',
5734 'ZW': '102.177.192.0/18',
5735 }
5736
5737 @classmethod
5738 def random_ipv4(cls, code_or_block):
5739 if len(code_or_block) == 2:
5740 block = cls._country_ip_map.get(code_or_block.upper())
5741 if not block:
5742 return None
5743 else:
5744 block = code_or_block
5745 addr, preflen = block.split('/')
5746 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5747 addr_max = addr_min | (0xffffffff >> int(preflen))
5748 return compat_str(socket.inet_ntoa(
5749 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5750
5751
5752 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5753 def __init__(self, proxies=None):
5754 # Set default handlers
5755 for type in ('http', 'https'):
5756 setattr(self, '%s_open' % type,
5757 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5758 meth(r, proxy, type))
5759 compat_urllib_request.ProxyHandler.__init__(self, proxies)
5760
5761 def proxy_open(self, req, proxy, type):
5762 req_proxy = req.headers.get('Ytdl-request-proxy')
5763 if req_proxy is not None:
5764 proxy = req_proxy
5765 del req.headers['Ytdl-request-proxy']
5766
5767 if proxy == '__noproxy__':
5768 return None # No Proxy
5769 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5770 req.add_header('Ytdl-socks-proxy', proxy)
5771 # yt-dlp's http/https handlers do wrapping the socket with socks
5772 return None
5773 return compat_urllib_request.ProxyHandler.proxy_open(
5774 self, req, proxy, type)
5775
5776
5777 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5778 # released into Public Domain
5779 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5780
5781 def long_to_bytes(n, blocksize=0):
5782 """long_to_bytes(n:long, blocksize:int) : string
5783 Convert a long integer to a byte string.
5784
5785 If optional blocksize is given and greater than zero, pad the front of the
5786 byte string with binary zeros so that the length is a multiple of
5787 blocksize.
5788 """
5789 # after much testing, this algorithm was deemed to be the fastest
5790 s = b''
5791 n = int(n)
5792 while n > 0:
5793 s = compat_struct_pack('>I', n & 0xffffffff) + s
5794 n = n >> 32
5795 # strip off leading zeros
5796 for i in range(len(s)):
5797 if s[i] != b'\000'[0]:
5798 break
5799 else:
5800 # only happens when n == 0
5801 s = b'\000'
5802 i = 0
5803 s = s[i:]
5804 # add back some pad bytes. this could be done more efficiently w.r.t. the
5805 # de-padding being done above, but sigh...
5806 if blocksize > 0 and len(s) % blocksize:
5807 s = (blocksize - len(s) % blocksize) * b'\000' + s
5808 return s
5809
5810
5811 def bytes_to_long(s):
5812 """bytes_to_long(string) : long
5813 Convert a byte string to a long integer.
5814
5815 This is (essentially) the inverse of long_to_bytes().
5816 """
5817 acc = 0
5818 length = len(s)
5819 if length % 4:
5820 extra = (4 - length % 4)
5821 s = b'\000' * extra + s
5822 length = length + extra
5823 for i in range(0, length, 4):
5824 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5825 return acc
5826
5827
5828 def ohdave_rsa_encrypt(data, exponent, modulus):
5829 '''
5830 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5831
5832 Input:
5833 data: data to encrypt, bytes-like object
5834 exponent, modulus: parameter e and N of RSA algorithm, both integer
5835 Output: hex string of encrypted data
5836
5837 Limitation: supports one block encryption only
5838 '''
5839
5840 payload = int(binascii.hexlify(data[::-1]), 16)
5841 encrypted = pow(payload, exponent, modulus)
5842 return '%x' % encrypted
5843
5844
5845 def pkcs1pad(data, length):
5846 """
5847 Padding input data with PKCS#1 scheme
5848
5849 @param {int[]} data input data
5850 @param {int} length target length
5851 @returns {int[]} padded data
5852 """
5853 if len(data) > length - 11:
5854 raise ValueError('Input data too long for PKCS#1 padding')
5855
5856 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5857 return [0, 2] + pseudo_random + [0] + data
5858
5859
5860 def encode_base_n(num, n, table=None):
5861 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5862 if not table:
5863 table = FULL_TABLE[:n]
5864
5865 if n > len(table):
5866 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5867
5868 if num == 0:
5869 return table[0]
5870
5871 ret = ''
5872 while num:
5873 ret = table[num % n] + ret
5874 num = num // n
5875 return ret
5876
5877
5878 def decode_packed_codes(code):
5879 mobj = re.search(PACKED_CODES_RE, code)
5880 obfuscated_code, base, count, symbols = mobj.groups()
5881 base = int(base)
5882 count = int(count)
5883 symbols = symbols.split('|')
5884 symbol_table = {}
5885
5886 while count:
5887 count -= 1
5888 base_n_count = encode_base_n(count, base)
5889 symbol_table[base_n_count] = symbols[count] or base_n_count
5890
5891 return re.sub(
5892 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5893 obfuscated_code)
5894
5895
5896 def caesar(s, alphabet, shift):
5897 if shift == 0:
5898 return s
5899 l = len(alphabet)
5900 return ''.join(
5901 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5902 for c in s)
5903
5904
5905 def rot47(s):
5906 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5907
5908
5909 def parse_m3u8_attributes(attrib):
5910 info = {}
5911 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5912 if val.startswith('"'):
5913 val = val[1:-1]
5914 info[key] = val
5915 return info
5916
5917
5918 def urshift(val, n):
5919 return val >> n if val >= 0 else (val + 0x100000000) >> n
5920
5921
5922 # Based on png2str() written by @gdkchan and improved by @yokrysty
5923 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5924 def decode_png(png_data):
5925 # Reference: https://www.w3.org/TR/PNG/
5926 header = png_data[8:]
5927
5928 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5929 raise IOError('Not a valid PNG file.')
5930
5931 int_map = {1: '>B', 2: '>H', 4: '>I'}
5932 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5933
5934 chunks = []
5935
5936 while header:
5937 length = unpack_integer(header[:4])
5938 header = header[4:]
5939
5940 chunk_type = header[:4]
5941 header = header[4:]
5942
5943 chunk_data = header[:length]
5944 header = header[length:]
5945
5946 header = header[4:] # Skip CRC
5947
5948 chunks.append({
5949 'type': chunk_type,
5950 'length': length,
5951 'data': chunk_data
5952 })
5953
5954 ihdr = chunks[0]['data']
5955
5956 width = unpack_integer(ihdr[:4])
5957 height = unpack_integer(ihdr[4:8])
5958
5959 idat = b''
5960
5961 for chunk in chunks:
5962 if chunk['type'] == b'IDAT':
5963 idat += chunk['data']
5964
5965 if not idat:
5966 raise IOError('Unable to read PNG data.')
5967
5968 decompressed_data = bytearray(zlib.decompress(idat))
5969
5970 stride = width * 3
5971 pixels = []
5972
5973 def _get_pixel(idx):
5974 x = idx % stride
5975 y = idx // stride
5976 return pixels[y][x]
5977
5978 for y in range(height):
5979 basePos = y * (1 + stride)
5980 filter_type = decompressed_data[basePos]
5981
5982 current_row = []
5983
5984 pixels.append(current_row)
5985
5986 for x in range(stride):
5987 color = decompressed_data[1 + basePos + x]
5988 basex = y * stride + x
5989 left = 0
5990 up = 0
5991
5992 if x > 2:
5993 left = _get_pixel(basex - 3)
5994 if y > 0:
5995 up = _get_pixel(basex - stride)
5996
5997 if filter_type == 1: # Sub
5998 color = (color + left) & 0xff
5999 elif filter_type == 2: # Up
6000 color = (color + up) & 0xff
6001 elif filter_type == 3: # Average
6002 color = (color + ((left + up) >> 1)) & 0xff
6003 elif filter_type == 4: # Paeth
6004 a = left
6005 b = up
6006 c = 0
6007
6008 if x > 2 and y > 0:
6009 c = _get_pixel(basex - stride - 3)
6010
6011 p = a + b - c
6012
6013 pa = abs(p - a)
6014 pb = abs(p - b)
6015 pc = abs(p - c)
6016
6017 if pa <= pb and pa <= pc:
6018 color = (color + a) & 0xff
6019 elif pb <= pc:
6020 color = (color + b) & 0xff
6021 else:
6022 color = (color + c) & 0xff
6023
6024 current_row.append(color)
6025
6026 return width, height, pixels
6027
6028
6029 def write_xattr(path, key, value):
6030 # This mess below finds the best xattr tool for the job
6031 try:
6032 # try the pyxattr module...
6033 import xattr
6034
6035 if hasattr(xattr, 'set'): # pyxattr
6036 # Unicode arguments are not supported in python-pyxattr until
6037 # version 0.5.0
6038 # See https://github.com/ytdl-org/youtube-dl/issues/5498
6039 pyxattr_required_version = '0.5.0'
6040 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
6041 # TODO: fallback to CLI tools
6042 raise XAttrUnavailableError(
6043 'python-pyxattr is detected but is too old. '
6044 'yt-dlp requires %s or above while your version is %s. '
6045 'Falling back to other xattr implementations' % (
6046 pyxattr_required_version, xattr.__version__))
6047
6048 setxattr = xattr.set
6049 else: # xattr
6050 setxattr = xattr.setxattr
6051
6052 try:
6053 setxattr(path, key, value)
6054 except EnvironmentError as e:
6055 raise XAttrMetadataError(e.errno, e.strerror)
6056
6057 except ImportError:
6058 if compat_os_name == 'nt':
6059 # Write xattrs to NTFS Alternate Data Streams:
6060 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6061 assert ':' not in key
6062 assert os.path.exists(path)
6063
6064 ads_fn = path + ':' + key
6065 try:
6066 with open(ads_fn, 'wb') as f:
6067 f.write(value)
6068 except EnvironmentError as e:
6069 raise XAttrMetadataError(e.errno, e.strerror)
6070 else:
6071 user_has_setfattr = check_executable('setfattr', ['--version'])
6072 user_has_xattr = check_executable('xattr', ['-h'])
6073
6074 if user_has_setfattr or user_has_xattr:
6075
6076 value = value.decode('utf-8')
6077 if user_has_setfattr:
6078 executable = 'setfattr'
6079 opts = ['-n', key, '-v', value]
6080 elif user_has_xattr:
6081 executable = 'xattr'
6082 opts = ['-w', key, value]
6083
6084 cmd = ([encodeFilename(executable, True)]
6085 + [encodeArgument(o) for o in opts]
6086 + [encodeFilename(path, True)])
6087
6088 try:
6089 p = subprocess.Popen(
6090 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6091 except EnvironmentError as e:
6092 raise XAttrMetadataError(e.errno, e.strerror)
6093 stdout, stderr = process_communicate_or_kill(p)
6094 stderr = stderr.decode('utf-8', 'replace')
6095 if p.returncode != 0:
6096 raise XAttrMetadataError(p.returncode, stderr)
6097
6098 else:
6099 # On Unix, and can't find pyxattr, setfattr, or xattr.
6100 if sys.platform.startswith('linux'):
6101 raise XAttrUnavailableError(
6102 "Couldn't find a tool to set the xattrs. "
6103 "Install either the python 'pyxattr' or 'xattr' "
6104 "modules, or the GNU 'attr' package "
6105 "(which contains the 'setfattr' tool).")
6106 else:
6107 raise XAttrUnavailableError(
6108 "Couldn't find a tool to set the xattrs. "
6109 "Install either the python 'xattr' module, "
6110 "or the 'xattr' binary.")
6111
6112
6113 def random_birthday(year_field, month_field, day_field):
6114 start_date = datetime.date(1950, 1, 1)
6115 end_date = datetime.date(1995, 12, 31)
6116 offset = random.randint(0, (end_date - start_date).days)
6117 random_date = start_date + datetime.timedelta(offset)
6118 return {
6119 year_field: str(random_date.year),
6120 month_field: str(random_date.month),
6121 day_field: str(random_date.day),
6122 }
6123
6124
6125 # Templates for internet shortcut files, which are plain text files.
6126 DOT_URL_LINK_TEMPLATE = '''
6127 [InternetShortcut]
6128 URL=%(url)s
6129 '''.lstrip()
6130
6131 DOT_WEBLOC_LINK_TEMPLATE = '''
6132 <?xml version="1.0" encoding="UTF-8"?>
6133 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6134 <plist version="1.0">
6135 <dict>
6136 \t<key>URL</key>
6137 \t<string>%(url)s</string>
6138 </dict>
6139 </plist>
6140 '''.lstrip()
6141
6142 DOT_DESKTOP_LINK_TEMPLATE = '''
6143 [Desktop Entry]
6144 Encoding=UTF-8
6145 Name=%(filename)s
6146 Type=Link
6147 URL=%(url)s
6148 Icon=text-html
6149 '''.lstrip()
6150
6151
6152 def iri_to_uri(iri):
6153 """
6154 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6155
6156 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6157 """
6158
6159 iri_parts = compat_urllib_parse_urlparse(iri)
6160
6161 if '[' in iri_parts.netloc:
6162 raise ValueError('IPv6 URIs are not, yet, supported.')
6163 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6164
6165 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6166
6167 net_location = ''
6168 if iri_parts.username:
6169 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6170 if iri_parts.password is not None:
6171 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6172 net_location += '@'
6173
6174 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
6175 # The 'idna' encoding produces ASCII text.
6176 if iri_parts.port is not None and iri_parts.port != 80:
6177 net_location += ':' + str(iri_parts.port)
6178
6179 return compat_urllib_parse_urlunparse(
6180 (iri_parts.scheme,
6181 net_location,
6182
6183 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6184
6185 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6186 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6187
6188 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6189 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6190
6191 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6192
6193 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6194
6195
6196 def to_high_limit_path(path):
6197 if sys.platform in ['win32', 'cygwin']:
6198 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6199 return r'\\?\ '.rstrip() + os.path.abspath(path)
6200
6201 return path
6202
6203
6204 def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
6205 if field is None:
6206 val = obj if obj is not None else default
6207 else:
6208 val = obj.get(field, default)
6209 if func and val not in ignore:
6210 val = func(val)
6211 return template % val if val not in ignore else default
6212
6213
6214 def clean_podcast_url(url):
6215 return re.sub(r'''(?x)
6216 (?:
6217 (?:
6218 chtbl\.com/track|
6219 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6220 play\.podtrac\.com
6221 )/[^/]+|
6222 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6223 flex\.acast\.com|
6224 pd(?:
6225 cn\.co| # https://podcorn.com/analytics-prefix/
6226 st\.fm # https://podsights.com/docs/
6227 )/e
6228 )/''', '', url)
6229
6230
6231 _HEX_TABLE = '0123456789abcdef'
6232
6233
6234 def random_uuidv4():
6235 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
6236
6237
6238 def make_dir(path, to_screen=None):
6239 try:
6240 dn = os.path.dirname(path)
6241 if dn and not os.path.exists(dn):
6242 os.makedirs(dn)
6243 return True
6244 except (OSError, IOError) as err:
6245 if callable(to_screen) is not None:
6246 to_screen('unable to create directory ' + error_to_compat_str(err))
6247 return False
6248
6249
6250 def get_executable_path():
6251 from zipimport import zipimporter
6252 if hasattr(sys, 'frozen'): # Running from PyInstaller
6253 path = os.path.dirname(sys.executable)
6254 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
6255 path = os.path.join(os.path.dirname(__file__), '../..')
6256 else:
6257 path = os.path.join(os.path.dirname(__file__), '..')
6258 return os.path.abspath(path)
6259
6260
6261 def load_plugins(name, suffix, namespace):
6262 plugin_info = [None]
6263 classes = []
6264 try:
6265 plugin_info = imp.find_module(
6266 name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
6267 plugins = imp.load_module(name, *plugin_info)
6268 for name in dir(plugins):
6269 if name in namespace:
6270 continue
6271 if not name.endswith(suffix):
6272 continue
6273 klass = getattr(plugins, name)
6274 classes.append(klass)
6275 namespace[name] = klass
6276 except ImportError:
6277 pass
6278 finally:
6279 if plugin_info[0] is not None:
6280 plugin_info[0].close()
6281 return classes
6282
6283
6284 def traverse_obj(
6285 obj, *path_list, default=None, expected_type=None, get_all=True,
6286 casesense=True, is_user_input=False, traverse_string=False):
6287 ''' Traverse nested list/dict/tuple
6288 @param path_list A list of paths which are checked one by one.
6289 Each path is a list of keys where each key is a string,
6290 a tuple of strings or "...". When a tuple is given,
6291 all the keys given in the tuple are traversed, and
6292 "..." traverses all the keys in the object
6293 @param default Default value to return
6294 @param expected_type Only accept final value of this type (Can also be any callable)
6295 @param get_all Return all the values obtained from a path or only the first one
6296 @param casesense Whether to consider dictionary keys as case sensitive
6297 @param is_user_input Whether the keys are generated from user input. If True,
6298 strings are converted to int/slice if necessary
6299 @param traverse_string Whether to traverse inside strings. If True, any
6300 non-compatible object will also be converted into a string
6301 # TODO: Write tests
6302 '''
6303 if not casesense:
6304 _lower = lambda k: (k.lower() if isinstance(k, str) else k)
6305 path_list = (map(_lower, variadic(path)) for path in path_list)
6306
6307 def _traverse_obj(obj, path, _current_depth=0):
6308 nonlocal depth
6309 if obj is None:
6310 return None
6311 path = tuple(variadic(path))
6312 for i, key in enumerate(path):
6313 if isinstance(key, (list, tuple)):
6314 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
6315 key = ...
6316 if key is ...:
6317 obj = (obj.values() if isinstance(obj, dict)
6318 else obj if isinstance(obj, (list, tuple, LazyList))
6319 else str(obj) if traverse_string else [])
6320 _current_depth += 1
6321 depth = max(depth, _current_depth)
6322 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
6323 elif isinstance(obj, dict) and not (is_user_input and key == ':'):
6324 obj = (obj.get(key) if casesense or (key in obj)
6325 else next((v for k, v in obj.items() if _lower(k) == key), None))
6326 else:
6327 if is_user_input:
6328 key = (int_or_none(key) if ':' not in key
6329 else slice(*map(int_or_none, key.split(':'))))
6330 if key == slice(None):
6331 return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
6332 if not isinstance(key, (int, slice)):
6333 return None
6334 if not isinstance(obj, (list, tuple, LazyList)):
6335 if not traverse_string:
6336 return None
6337 obj = str(obj)
6338 try:
6339 obj = obj[key]
6340 except IndexError:
6341 return None
6342 return obj
6343
6344 if isinstance(expected_type, type):
6345 type_test = lambda val: val if isinstance(val, expected_type) else None
6346 elif expected_type is not None:
6347 type_test = expected_type
6348 else:
6349 type_test = lambda val: val
6350
6351 for path in path_list:
6352 depth = 0
6353 val = _traverse_obj(obj, path)
6354 if val is not None:
6355 if depth:
6356 for _ in range(depth - 1):
6357 val = itertools.chain.from_iterable(v for v in val if v is not None)
6358 val = [v for v in map(type_test, val) if v is not None]
6359 if val:
6360 return val if get_all else val[0]
6361 else:
6362 val = type_test(val)
6363 if val is not None:
6364 return val
6365 return default
6366
6367
6368 def traverse_dict(dictn, keys, casesense=True):
6369 ''' For backward compatibility. Do not use '''
6370 return traverse_obj(dictn, keys, casesense=casesense,
6371 is_user_input=True, traverse_string=True)
6372
6373
6374 def variadic(x, allowed_types=(str, bytes)):
6375 return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
6376
6377
6378 def get_windows_version():
6379 ''' Get Windows version. None if it's not running on Windows '''
6380 if compat_os_name == 'nt':
6381 return version_tuple(platform.win32_ver()[1])
6382 else:
6383 return None