]> jfr.im git - yt-dlp.git/blob - yt_dlp/utils.py
[cleanup] Misc cleanup
[yt-dlp.git] / yt_dlp / utils.py
1 #!/usr/bin/env python3
2 # coding: utf-8
3
4 from __future__ import unicode_literals
5
6 import base64
7 import binascii
8 import calendar
9 import codecs
10 import collections
11 import contextlib
12 import ctypes
13 import datetime
14 import email.utils
15 import email.header
16 import errno
17 import functools
18 import gzip
19 import hashlib
20 import hmac
21 import importlib.util
22 import io
23 import itertools
24 import json
25 import locale
26 import math
27 import operator
28 import os
29 import platform
30 import random
31 import re
32 import socket
33 import ssl
34 import subprocess
35 import sys
36 import tempfile
37 import time
38 import traceback
39 import xml.etree.ElementTree
40 import zlib
41
42 from .compat import (
43 compat_HTMLParseError,
44 compat_HTMLParser,
45 compat_HTTPError,
46 compat_basestring,
47 compat_chr,
48 compat_cookiejar,
49 compat_ctypes_WINFUNCTYPE,
50 compat_etree_fromstring,
51 compat_expanduser,
52 compat_html_entities,
53 compat_html_entities_html5,
54 compat_http_client,
55 compat_integer_types,
56 compat_numeric_types,
57 compat_kwargs,
58 compat_os_name,
59 compat_parse_qs,
60 compat_shlex_quote,
61 compat_str,
62 compat_struct_pack,
63 compat_struct_unpack,
64 compat_urllib_error,
65 compat_urllib_parse,
66 compat_urllib_parse_urlencode,
67 compat_urllib_parse_urlparse,
68 compat_urllib_parse_urlunparse,
69 compat_urllib_parse_quote,
70 compat_urllib_parse_quote_plus,
71 compat_urllib_parse_unquote_plus,
72 compat_urllib_request,
73 compat_urlparse,
74 compat_xpath,
75 )
76
77 from .socks import (
78 ProxyType,
79 sockssocket,
80 )
81
82
83 def register_socks_protocols():
84 # "Register" SOCKS protocols
85 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
86 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
87 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
88 if scheme not in compat_urlparse.uses_netloc:
89 compat_urlparse.uses_netloc.append(scheme)
90
91
92 # This is not clearly defined otherwise
93 compiled_regex_type = type(re.compile(''))
94
95
96 def random_user_agent():
97 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
98 _CHROME_VERSIONS = (
99 '74.0.3729.129',
100 '76.0.3780.3',
101 '76.0.3780.2',
102 '74.0.3729.128',
103 '76.0.3780.1',
104 '76.0.3780.0',
105 '75.0.3770.15',
106 '74.0.3729.127',
107 '74.0.3729.126',
108 '76.0.3779.1',
109 '76.0.3779.0',
110 '75.0.3770.14',
111 '74.0.3729.125',
112 '76.0.3778.1',
113 '76.0.3778.0',
114 '75.0.3770.13',
115 '74.0.3729.124',
116 '74.0.3729.123',
117 '73.0.3683.121',
118 '76.0.3777.1',
119 '76.0.3777.0',
120 '75.0.3770.12',
121 '74.0.3729.122',
122 '76.0.3776.4',
123 '75.0.3770.11',
124 '74.0.3729.121',
125 '76.0.3776.3',
126 '76.0.3776.2',
127 '73.0.3683.120',
128 '74.0.3729.120',
129 '74.0.3729.119',
130 '74.0.3729.118',
131 '76.0.3776.1',
132 '76.0.3776.0',
133 '76.0.3775.5',
134 '75.0.3770.10',
135 '74.0.3729.117',
136 '76.0.3775.4',
137 '76.0.3775.3',
138 '74.0.3729.116',
139 '75.0.3770.9',
140 '76.0.3775.2',
141 '76.0.3775.1',
142 '76.0.3775.0',
143 '75.0.3770.8',
144 '74.0.3729.115',
145 '74.0.3729.114',
146 '76.0.3774.1',
147 '76.0.3774.0',
148 '75.0.3770.7',
149 '74.0.3729.113',
150 '74.0.3729.112',
151 '74.0.3729.111',
152 '76.0.3773.1',
153 '76.0.3773.0',
154 '75.0.3770.6',
155 '74.0.3729.110',
156 '74.0.3729.109',
157 '76.0.3772.1',
158 '76.0.3772.0',
159 '75.0.3770.5',
160 '74.0.3729.108',
161 '74.0.3729.107',
162 '76.0.3771.1',
163 '76.0.3771.0',
164 '75.0.3770.4',
165 '74.0.3729.106',
166 '74.0.3729.105',
167 '75.0.3770.3',
168 '74.0.3729.104',
169 '74.0.3729.103',
170 '74.0.3729.102',
171 '75.0.3770.2',
172 '74.0.3729.101',
173 '75.0.3770.1',
174 '75.0.3770.0',
175 '74.0.3729.100',
176 '75.0.3769.5',
177 '75.0.3769.4',
178 '74.0.3729.99',
179 '75.0.3769.3',
180 '75.0.3769.2',
181 '75.0.3768.6',
182 '74.0.3729.98',
183 '75.0.3769.1',
184 '75.0.3769.0',
185 '74.0.3729.97',
186 '73.0.3683.119',
187 '73.0.3683.118',
188 '74.0.3729.96',
189 '75.0.3768.5',
190 '75.0.3768.4',
191 '75.0.3768.3',
192 '75.0.3768.2',
193 '74.0.3729.95',
194 '74.0.3729.94',
195 '75.0.3768.1',
196 '75.0.3768.0',
197 '74.0.3729.93',
198 '74.0.3729.92',
199 '73.0.3683.117',
200 '74.0.3729.91',
201 '75.0.3766.3',
202 '74.0.3729.90',
203 '75.0.3767.2',
204 '75.0.3767.1',
205 '75.0.3767.0',
206 '74.0.3729.89',
207 '73.0.3683.116',
208 '75.0.3766.2',
209 '74.0.3729.88',
210 '75.0.3766.1',
211 '75.0.3766.0',
212 '74.0.3729.87',
213 '73.0.3683.115',
214 '74.0.3729.86',
215 '75.0.3765.1',
216 '75.0.3765.0',
217 '74.0.3729.85',
218 '73.0.3683.114',
219 '74.0.3729.84',
220 '75.0.3764.1',
221 '75.0.3764.0',
222 '74.0.3729.83',
223 '73.0.3683.113',
224 '75.0.3763.2',
225 '75.0.3761.4',
226 '74.0.3729.82',
227 '75.0.3763.1',
228 '75.0.3763.0',
229 '74.0.3729.81',
230 '73.0.3683.112',
231 '75.0.3762.1',
232 '75.0.3762.0',
233 '74.0.3729.80',
234 '75.0.3761.3',
235 '74.0.3729.79',
236 '73.0.3683.111',
237 '75.0.3761.2',
238 '74.0.3729.78',
239 '74.0.3729.77',
240 '75.0.3761.1',
241 '75.0.3761.0',
242 '73.0.3683.110',
243 '74.0.3729.76',
244 '74.0.3729.75',
245 '75.0.3760.0',
246 '74.0.3729.74',
247 '75.0.3759.8',
248 '75.0.3759.7',
249 '75.0.3759.6',
250 '74.0.3729.73',
251 '75.0.3759.5',
252 '74.0.3729.72',
253 '73.0.3683.109',
254 '75.0.3759.4',
255 '75.0.3759.3',
256 '74.0.3729.71',
257 '75.0.3759.2',
258 '74.0.3729.70',
259 '73.0.3683.108',
260 '74.0.3729.69',
261 '75.0.3759.1',
262 '75.0.3759.0',
263 '74.0.3729.68',
264 '73.0.3683.107',
265 '74.0.3729.67',
266 '75.0.3758.1',
267 '75.0.3758.0',
268 '74.0.3729.66',
269 '73.0.3683.106',
270 '74.0.3729.65',
271 '75.0.3757.1',
272 '75.0.3757.0',
273 '74.0.3729.64',
274 '73.0.3683.105',
275 '74.0.3729.63',
276 '75.0.3756.1',
277 '75.0.3756.0',
278 '74.0.3729.62',
279 '73.0.3683.104',
280 '75.0.3755.3',
281 '75.0.3755.2',
282 '73.0.3683.103',
283 '75.0.3755.1',
284 '75.0.3755.0',
285 '74.0.3729.61',
286 '73.0.3683.102',
287 '74.0.3729.60',
288 '75.0.3754.2',
289 '74.0.3729.59',
290 '75.0.3753.4',
291 '74.0.3729.58',
292 '75.0.3754.1',
293 '75.0.3754.0',
294 '74.0.3729.57',
295 '73.0.3683.101',
296 '75.0.3753.3',
297 '75.0.3752.2',
298 '75.0.3753.2',
299 '74.0.3729.56',
300 '75.0.3753.1',
301 '75.0.3753.0',
302 '74.0.3729.55',
303 '73.0.3683.100',
304 '74.0.3729.54',
305 '75.0.3752.1',
306 '75.0.3752.0',
307 '74.0.3729.53',
308 '73.0.3683.99',
309 '74.0.3729.52',
310 '75.0.3751.1',
311 '75.0.3751.0',
312 '74.0.3729.51',
313 '73.0.3683.98',
314 '74.0.3729.50',
315 '75.0.3750.0',
316 '74.0.3729.49',
317 '74.0.3729.48',
318 '74.0.3729.47',
319 '75.0.3749.3',
320 '74.0.3729.46',
321 '73.0.3683.97',
322 '75.0.3749.2',
323 '74.0.3729.45',
324 '75.0.3749.1',
325 '75.0.3749.0',
326 '74.0.3729.44',
327 '73.0.3683.96',
328 '74.0.3729.43',
329 '74.0.3729.42',
330 '75.0.3748.1',
331 '75.0.3748.0',
332 '74.0.3729.41',
333 '75.0.3747.1',
334 '73.0.3683.95',
335 '75.0.3746.4',
336 '74.0.3729.40',
337 '74.0.3729.39',
338 '75.0.3747.0',
339 '75.0.3746.3',
340 '75.0.3746.2',
341 '74.0.3729.38',
342 '75.0.3746.1',
343 '75.0.3746.0',
344 '74.0.3729.37',
345 '73.0.3683.94',
346 '75.0.3745.5',
347 '75.0.3745.4',
348 '75.0.3745.3',
349 '75.0.3745.2',
350 '74.0.3729.36',
351 '75.0.3745.1',
352 '75.0.3745.0',
353 '75.0.3744.2',
354 '74.0.3729.35',
355 '73.0.3683.93',
356 '74.0.3729.34',
357 '75.0.3744.1',
358 '75.0.3744.0',
359 '74.0.3729.33',
360 '73.0.3683.92',
361 '74.0.3729.32',
362 '74.0.3729.31',
363 '73.0.3683.91',
364 '75.0.3741.2',
365 '75.0.3740.5',
366 '74.0.3729.30',
367 '75.0.3741.1',
368 '75.0.3741.0',
369 '74.0.3729.29',
370 '75.0.3740.4',
371 '73.0.3683.90',
372 '74.0.3729.28',
373 '75.0.3740.3',
374 '73.0.3683.89',
375 '75.0.3740.2',
376 '74.0.3729.27',
377 '75.0.3740.1',
378 '75.0.3740.0',
379 '74.0.3729.26',
380 '73.0.3683.88',
381 '73.0.3683.87',
382 '74.0.3729.25',
383 '75.0.3739.1',
384 '75.0.3739.0',
385 '73.0.3683.86',
386 '74.0.3729.24',
387 '73.0.3683.85',
388 '75.0.3738.4',
389 '75.0.3738.3',
390 '75.0.3738.2',
391 '75.0.3738.1',
392 '75.0.3738.0',
393 '74.0.3729.23',
394 '73.0.3683.84',
395 '74.0.3729.22',
396 '74.0.3729.21',
397 '75.0.3737.1',
398 '75.0.3737.0',
399 '74.0.3729.20',
400 '73.0.3683.83',
401 '74.0.3729.19',
402 '75.0.3736.1',
403 '75.0.3736.0',
404 '74.0.3729.18',
405 '73.0.3683.82',
406 '74.0.3729.17',
407 '75.0.3735.1',
408 '75.0.3735.0',
409 '74.0.3729.16',
410 '73.0.3683.81',
411 '75.0.3734.1',
412 '75.0.3734.0',
413 '74.0.3729.15',
414 '73.0.3683.80',
415 '74.0.3729.14',
416 '75.0.3733.1',
417 '75.0.3733.0',
418 '75.0.3732.1',
419 '74.0.3729.13',
420 '74.0.3729.12',
421 '73.0.3683.79',
422 '74.0.3729.11',
423 '75.0.3732.0',
424 '74.0.3729.10',
425 '73.0.3683.78',
426 '74.0.3729.9',
427 '74.0.3729.8',
428 '74.0.3729.7',
429 '75.0.3731.3',
430 '75.0.3731.2',
431 '75.0.3731.0',
432 '74.0.3729.6',
433 '73.0.3683.77',
434 '73.0.3683.76',
435 '75.0.3730.5',
436 '75.0.3730.4',
437 '73.0.3683.75',
438 '74.0.3729.5',
439 '73.0.3683.74',
440 '75.0.3730.3',
441 '75.0.3730.2',
442 '74.0.3729.4',
443 '73.0.3683.73',
444 '73.0.3683.72',
445 '75.0.3730.1',
446 '75.0.3730.0',
447 '74.0.3729.3',
448 '73.0.3683.71',
449 '74.0.3729.2',
450 '73.0.3683.70',
451 '74.0.3729.1',
452 '74.0.3729.0',
453 '74.0.3726.4',
454 '73.0.3683.69',
455 '74.0.3726.3',
456 '74.0.3728.0',
457 '74.0.3726.2',
458 '73.0.3683.68',
459 '74.0.3726.1',
460 '74.0.3726.0',
461 '74.0.3725.4',
462 '73.0.3683.67',
463 '73.0.3683.66',
464 '74.0.3725.3',
465 '74.0.3725.2',
466 '74.0.3725.1',
467 '74.0.3724.8',
468 '74.0.3725.0',
469 '73.0.3683.65',
470 '74.0.3724.7',
471 '74.0.3724.6',
472 '74.0.3724.5',
473 '74.0.3724.4',
474 '74.0.3724.3',
475 '74.0.3724.2',
476 '74.0.3724.1',
477 '74.0.3724.0',
478 '73.0.3683.64',
479 '74.0.3723.1',
480 '74.0.3723.0',
481 '73.0.3683.63',
482 '74.0.3722.1',
483 '74.0.3722.0',
484 '73.0.3683.62',
485 '74.0.3718.9',
486 '74.0.3702.3',
487 '74.0.3721.3',
488 '74.0.3721.2',
489 '74.0.3721.1',
490 '74.0.3721.0',
491 '74.0.3720.6',
492 '73.0.3683.61',
493 '72.0.3626.122',
494 '73.0.3683.60',
495 '74.0.3720.5',
496 '72.0.3626.121',
497 '74.0.3718.8',
498 '74.0.3720.4',
499 '74.0.3720.3',
500 '74.0.3718.7',
501 '74.0.3720.2',
502 '74.0.3720.1',
503 '74.0.3720.0',
504 '74.0.3718.6',
505 '74.0.3719.5',
506 '73.0.3683.59',
507 '74.0.3718.5',
508 '74.0.3718.4',
509 '74.0.3719.4',
510 '74.0.3719.3',
511 '74.0.3719.2',
512 '74.0.3719.1',
513 '73.0.3683.58',
514 '74.0.3719.0',
515 '73.0.3683.57',
516 '73.0.3683.56',
517 '74.0.3718.3',
518 '73.0.3683.55',
519 '74.0.3718.2',
520 '74.0.3718.1',
521 '74.0.3718.0',
522 '73.0.3683.54',
523 '74.0.3717.2',
524 '73.0.3683.53',
525 '74.0.3717.1',
526 '74.0.3717.0',
527 '73.0.3683.52',
528 '74.0.3716.1',
529 '74.0.3716.0',
530 '73.0.3683.51',
531 '74.0.3715.1',
532 '74.0.3715.0',
533 '73.0.3683.50',
534 '74.0.3711.2',
535 '74.0.3714.2',
536 '74.0.3713.3',
537 '74.0.3714.1',
538 '74.0.3714.0',
539 '73.0.3683.49',
540 '74.0.3713.1',
541 '74.0.3713.0',
542 '72.0.3626.120',
543 '73.0.3683.48',
544 '74.0.3712.2',
545 '74.0.3712.1',
546 '74.0.3712.0',
547 '73.0.3683.47',
548 '72.0.3626.119',
549 '73.0.3683.46',
550 '74.0.3710.2',
551 '72.0.3626.118',
552 '74.0.3711.1',
553 '74.0.3711.0',
554 '73.0.3683.45',
555 '72.0.3626.117',
556 '74.0.3710.1',
557 '74.0.3710.0',
558 '73.0.3683.44',
559 '72.0.3626.116',
560 '74.0.3709.1',
561 '74.0.3709.0',
562 '74.0.3704.9',
563 '73.0.3683.43',
564 '72.0.3626.115',
565 '74.0.3704.8',
566 '74.0.3704.7',
567 '74.0.3708.0',
568 '74.0.3706.7',
569 '74.0.3704.6',
570 '73.0.3683.42',
571 '72.0.3626.114',
572 '74.0.3706.6',
573 '72.0.3626.113',
574 '74.0.3704.5',
575 '74.0.3706.5',
576 '74.0.3706.4',
577 '74.0.3706.3',
578 '74.0.3706.2',
579 '74.0.3706.1',
580 '74.0.3706.0',
581 '73.0.3683.41',
582 '72.0.3626.112',
583 '74.0.3705.1',
584 '74.0.3705.0',
585 '73.0.3683.40',
586 '72.0.3626.111',
587 '73.0.3683.39',
588 '74.0.3704.4',
589 '73.0.3683.38',
590 '74.0.3704.3',
591 '74.0.3704.2',
592 '74.0.3704.1',
593 '74.0.3704.0',
594 '73.0.3683.37',
595 '72.0.3626.110',
596 '72.0.3626.109',
597 '74.0.3703.3',
598 '74.0.3703.2',
599 '73.0.3683.36',
600 '74.0.3703.1',
601 '74.0.3703.0',
602 '73.0.3683.35',
603 '72.0.3626.108',
604 '74.0.3702.2',
605 '74.0.3699.3',
606 '74.0.3702.1',
607 '74.0.3702.0',
608 '73.0.3683.34',
609 '72.0.3626.107',
610 '73.0.3683.33',
611 '74.0.3701.1',
612 '74.0.3701.0',
613 '73.0.3683.32',
614 '73.0.3683.31',
615 '72.0.3626.105',
616 '74.0.3700.1',
617 '74.0.3700.0',
618 '73.0.3683.29',
619 '72.0.3626.103',
620 '74.0.3699.2',
621 '74.0.3699.1',
622 '74.0.3699.0',
623 '73.0.3683.28',
624 '72.0.3626.102',
625 '73.0.3683.27',
626 '73.0.3683.26',
627 '74.0.3698.0',
628 '74.0.3696.2',
629 '72.0.3626.101',
630 '73.0.3683.25',
631 '74.0.3696.1',
632 '74.0.3696.0',
633 '74.0.3694.8',
634 '72.0.3626.100',
635 '74.0.3694.7',
636 '74.0.3694.6',
637 '74.0.3694.5',
638 '74.0.3694.4',
639 '72.0.3626.99',
640 '72.0.3626.98',
641 '74.0.3694.3',
642 '73.0.3683.24',
643 '72.0.3626.97',
644 '72.0.3626.96',
645 '72.0.3626.95',
646 '73.0.3683.23',
647 '72.0.3626.94',
648 '73.0.3683.22',
649 '73.0.3683.21',
650 '72.0.3626.93',
651 '74.0.3694.2',
652 '72.0.3626.92',
653 '74.0.3694.1',
654 '74.0.3694.0',
655 '74.0.3693.6',
656 '73.0.3683.20',
657 '72.0.3626.91',
658 '74.0.3693.5',
659 '74.0.3693.4',
660 '74.0.3693.3',
661 '74.0.3693.2',
662 '73.0.3683.19',
663 '74.0.3693.1',
664 '74.0.3693.0',
665 '73.0.3683.18',
666 '72.0.3626.90',
667 '74.0.3692.1',
668 '74.0.3692.0',
669 '73.0.3683.17',
670 '72.0.3626.89',
671 '74.0.3687.3',
672 '74.0.3691.1',
673 '74.0.3691.0',
674 '73.0.3683.16',
675 '72.0.3626.88',
676 '72.0.3626.87',
677 '73.0.3683.15',
678 '74.0.3690.1',
679 '74.0.3690.0',
680 '73.0.3683.14',
681 '72.0.3626.86',
682 '73.0.3683.13',
683 '73.0.3683.12',
684 '74.0.3689.1',
685 '74.0.3689.0',
686 '73.0.3683.11',
687 '72.0.3626.85',
688 '73.0.3683.10',
689 '72.0.3626.84',
690 '73.0.3683.9',
691 '74.0.3688.1',
692 '74.0.3688.0',
693 '73.0.3683.8',
694 '72.0.3626.83',
695 '74.0.3687.2',
696 '74.0.3687.1',
697 '74.0.3687.0',
698 '73.0.3683.7',
699 '72.0.3626.82',
700 '74.0.3686.4',
701 '72.0.3626.81',
702 '74.0.3686.3',
703 '74.0.3686.2',
704 '74.0.3686.1',
705 '74.0.3686.0',
706 '73.0.3683.6',
707 '72.0.3626.80',
708 '74.0.3685.1',
709 '74.0.3685.0',
710 '73.0.3683.5',
711 '72.0.3626.79',
712 '74.0.3684.1',
713 '74.0.3684.0',
714 '73.0.3683.4',
715 '72.0.3626.78',
716 '72.0.3626.77',
717 '73.0.3683.3',
718 '73.0.3683.2',
719 '72.0.3626.76',
720 '73.0.3683.1',
721 '73.0.3683.0',
722 '72.0.3626.75',
723 '71.0.3578.141',
724 '73.0.3682.1',
725 '73.0.3682.0',
726 '72.0.3626.74',
727 '71.0.3578.140',
728 '73.0.3681.4',
729 '73.0.3681.3',
730 '73.0.3681.2',
731 '73.0.3681.1',
732 '73.0.3681.0',
733 '72.0.3626.73',
734 '71.0.3578.139',
735 '72.0.3626.72',
736 '72.0.3626.71',
737 '73.0.3680.1',
738 '73.0.3680.0',
739 '72.0.3626.70',
740 '71.0.3578.138',
741 '73.0.3678.2',
742 '73.0.3679.1',
743 '73.0.3679.0',
744 '72.0.3626.69',
745 '71.0.3578.137',
746 '73.0.3678.1',
747 '73.0.3678.0',
748 '71.0.3578.136',
749 '73.0.3677.1',
750 '73.0.3677.0',
751 '72.0.3626.68',
752 '72.0.3626.67',
753 '71.0.3578.135',
754 '73.0.3676.1',
755 '73.0.3676.0',
756 '73.0.3674.2',
757 '72.0.3626.66',
758 '71.0.3578.134',
759 '73.0.3674.1',
760 '73.0.3674.0',
761 '72.0.3626.65',
762 '71.0.3578.133',
763 '73.0.3673.2',
764 '73.0.3673.1',
765 '73.0.3673.0',
766 '72.0.3626.64',
767 '71.0.3578.132',
768 '72.0.3626.63',
769 '72.0.3626.62',
770 '72.0.3626.61',
771 '72.0.3626.60',
772 '73.0.3672.1',
773 '73.0.3672.0',
774 '72.0.3626.59',
775 '71.0.3578.131',
776 '73.0.3671.3',
777 '73.0.3671.2',
778 '73.0.3671.1',
779 '73.0.3671.0',
780 '72.0.3626.58',
781 '71.0.3578.130',
782 '73.0.3670.1',
783 '73.0.3670.0',
784 '72.0.3626.57',
785 '71.0.3578.129',
786 '73.0.3669.1',
787 '73.0.3669.0',
788 '72.0.3626.56',
789 '71.0.3578.128',
790 '73.0.3668.2',
791 '73.0.3668.1',
792 '73.0.3668.0',
793 '72.0.3626.55',
794 '71.0.3578.127',
795 '73.0.3667.2',
796 '73.0.3667.1',
797 '73.0.3667.0',
798 '72.0.3626.54',
799 '71.0.3578.126',
800 '73.0.3666.1',
801 '73.0.3666.0',
802 '72.0.3626.53',
803 '71.0.3578.125',
804 '73.0.3665.4',
805 '73.0.3665.3',
806 '72.0.3626.52',
807 '73.0.3665.2',
808 '73.0.3664.4',
809 '73.0.3665.1',
810 '73.0.3665.0',
811 '72.0.3626.51',
812 '71.0.3578.124',
813 '72.0.3626.50',
814 '73.0.3664.3',
815 '73.0.3664.2',
816 '73.0.3664.1',
817 '73.0.3664.0',
818 '73.0.3663.2',
819 '72.0.3626.49',
820 '71.0.3578.123',
821 '73.0.3663.1',
822 '73.0.3663.0',
823 '72.0.3626.48',
824 '71.0.3578.122',
825 '73.0.3662.1',
826 '73.0.3662.0',
827 '72.0.3626.47',
828 '71.0.3578.121',
829 '73.0.3661.1',
830 '72.0.3626.46',
831 '73.0.3661.0',
832 '72.0.3626.45',
833 '71.0.3578.120',
834 '73.0.3660.2',
835 '73.0.3660.1',
836 '73.0.3660.0',
837 '72.0.3626.44',
838 '71.0.3578.119',
839 '73.0.3659.1',
840 '73.0.3659.0',
841 '72.0.3626.43',
842 '71.0.3578.118',
843 '73.0.3658.1',
844 '73.0.3658.0',
845 '72.0.3626.42',
846 '71.0.3578.117',
847 '73.0.3657.1',
848 '73.0.3657.0',
849 '72.0.3626.41',
850 '71.0.3578.116',
851 '73.0.3656.1',
852 '73.0.3656.0',
853 '72.0.3626.40',
854 '71.0.3578.115',
855 '73.0.3655.1',
856 '73.0.3655.0',
857 '72.0.3626.39',
858 '71.0.3578.114',
859 '73.0.3654.1',
860 '73.0.3654.0',
861 '72.0.3626.38',
862 '71.0.3578.113',
863 '73.0.3653.1',
864 '73.0.3653.0',
865 '72.0.3626.37',
866 '71.0.3578.112',
867 '73.0.3652.1',
868 '73.0.3652.0',
869 '72.0.3626.36',
870 '71.0.3578.111',
871 '73.0.3651.1',
872 '73.0.3651.0',
873 '72.0.3626.35',
874 '71.0.3578.110',
875 '73.0.3650.1',
876 '73.0.3650.0',
877 '72.0.3626.34',
878 '71.0.3578.109',
879 '73.0.3649.1',
880 '73.0.3649.0',
881 '72.0.3626.33',
882 '71.0.3578.108',
883 '73.0.3648.2',
884 '73.0.3648.1',
885 '73.0.3648.0',
886 '72.0.3626.32',
887 '71.0.3578.107',
888 '73.0.3647.2',
889 '73.0.3647.1',
890 '73.0.3647.0',
891 '72.0.3626.31',
892 '71.0.3578.106',
893 '73.0.3635.3',
894 '73.0.3646.2',
895 '73.0.3646.1',
896 '73.0.3646.0',
897 '72.0.3626.30',
898 '71.0.3578.105',
899 '72.0.3626.29',
900 '73.0.3645.2',
901 '73.0.3645.1',
902 '73.0.3645.0',
903 '72.0.3626.28',
904 '71.0.3578.104',
905 '72.0.3626.27',
906 '72.0.3626.26',
907 '72.0.3626.25',
908 '72.0.3626.24',
909 '73.0.3644.0',
910 '73.0.3643.2',
911 '72.0.3626.23',
912 '71.0.3578.103',
913 '73.0.3643.1',
914 '73.0.3643.0',
915 '72.0.3626.22',
916 '71.0.3578.102',
917 '73.0.3642.1',
918 '73.0.3642.0',
919 '72.0.3626.21',
920 '71.0.3578.101',
921 '73.0.3641.1',
922 '73.0.3641.0',
923 '72.0.3626.20',
924 '71.0.3578.100',
925 '72.0.3626.19',
926 '73.0.3640.1',
927 '73.0.3640.0',
928 '72.0.3626.18',
929 '73.0.3639.1',
930 '71.0.3578.99',
931 '73.0.3639.0',
932 '72.0.3626.17',
933 '73.0.3638.2',
934 '72.0.3626.16',
935 '73.0.3638.1',
936 '73.0.3638.0',
937 '72.0.3626.15',
938 '71.0.3578.98',
939 '73.0.3635.2',
940 '71.0.3578.97',
941 '73.0.3637.1',
942 '73.0.3637.0',
943 '72.0.3626.14',
944 '71.0.3578.96',
945 '71.0.3578.95',
946 '72.0.3626.13',
947 '71.0.3578.94',
948 '73.0.3636.2',
949 '71.0.3578.93',
950 '73.0.3636.1',
951 '73.0.3636.0',
952 '72.0.3626.12',
953 '71.0.3578.92',
954 '73.0.3635.1',
955 '73.0.3635.0',
956 '72.0.3626.11',
957 '71.0.3578.91',
958 '73.0.3634.2',
959 '73.0.3634.1',
960 '73.0.3634.0',
961 '72.0.3626.10',
962 '71.0.3578.90',
963 '71.0.3578.89',
964 '73.0.3633.2',
965 '73.0.3633.1',
966 '73.0.3633.0',
967 '72.0.3610.4',
968 '72.0.3626.9',
969 '71.0.3578.88',
970 '73.0.3632.5',
971 '73.0.3632.4',
972 '73.0.3632.3',
973 '73.0.3632.2',
974 '73.0.3632.1',
975 '73.0.3632.0',
976 '72.0.3626.8',
977 '71.0.3578.87',
978 '73.0.3631.2',
979 '73.0.3631.1',
980 '73.0.3631.0',
981 '72.0.3626.7',
982 '71.0.3578.86',
983 '72.0.3626.6',
984 '73.0.3630.1',
985 '73.0.3630.0',
986 '72.0.3626.5',
987 '71.0.3578.85',
988 '72.0.3626.4',
989 '73.0.3628.3',
990 '73.0.3628.2',
991 '73.0.3629.1',
992 '73.0.3629.0',
993 '72.0.3626.3',
994 '71.0.3578.84',
995 '73.0.3628.1',
996 '73.0.3628.0',
997 '71.0.3578.83',
998 '73.0.3627.1',
999 '73.0.3627.0',
1000 '72.0.3626.2',
1001 '71.0.3578.82',
1002 '71.0.3578.81',
1003 '71.0.3578.80',
1004 '72.0.3626.1',
1005 '72.0.3626.0',
1006 '71.0.3578.79',
1007 '70.0.3538.124',
1008 '71.0.3578.78',
1009 '72.0.3623.4',
1010 '72.0.3625.2',
1011 '72.0.3625.1',
1012 '72.0.3625.0',
1013 '71.0.3578.77',
1014 '70.0.3538.123',
1015 '72.0.3624.4',
1016 '72.0.3624.3',
1017 '72.0.3624.2',
1018 '71.0.3578.76',
1019 '72.0.3624.1',
1020 '72.0.3624.0',
1021 '72.0.3623.3',
1022 '71.0.3578.75',
1023 '70.0.3538.122',
1024 '71.0.3578.74',
1025 '72.0.3623.2',
1026 '72.0.3610.3',
1027 '72.0.3623.1',
1028 '72.0.3623.0',
1029 '72.0.3622.3',
1030 '72.0.3622.2',
1031 '71.0.3578.73',
1032 '70.0.3538.121',
1033 '72.0.3622.1',
1034 '72.0.3622.0',
1035 '71.0.3578.72',
1036 '70.0.3538.120',
1037 '72.0.3621.1',
1038 '72.0.3621.0',
1039 '71.0.3578.71',
1040 '70.0.3538.119',
1041 '72.0.3620.1',
1042 '72.0.3620.0',
1043 '71.0.3578.70',
1044 '70.0.3538.118',
1045 '71.0.3578.69',
1046 '72.0.3619.1',
1047 '72.0.3619.0',
1048 '71.0.3578.68',
1049 '70.0.3538.117',
1050 '71.0.3578.67',
1051 '72.0.3618.1',
1052 '72.0.3618.0',
1053 '71.0.3578.66',
1054 '70.0.3538.116',
1055 '72.0.3617.1',
1056 '72.0.3617.0',
1057 '71.0.3578.65',
1058 '70.0.3538.115',
1059 '72.0.3602.3',
1060 '71.0.3578.64',
1061 '72.0.3616.1',
1062 '72.0.3616.0',
1063 '71.0.3578.63',
1064 '70.0.3538.114',
1065 '71.0.3578.62',
1066 '72.0.3615.1',
1067 '72.0.3615.0',
1068 '71.0.3578.61',
1069 '70.0.3538.113',
1070 '72.0.3614.1',
1071 '72.0.3614.0',
1072 '71.0.3578.60',
1073 '70.0.3538.112',
1074 '72.0.3613.1',
1075 '72.0.3613.0',
1076 '71.0.3578.59',
1077 '70.0.3538.111',
1078 '72.0.3612.2',
1079 '72.0.3612.1',
1080 '72.0.3612.0',
1081 '70.0.3538.110',
1082 '71.0.3578.58',
1083 '70.0.3538.109',
1084 '72.0.3611.2',
1085 '72.0.3611.1',
1086 '72.0.3611.0',
1087 '71.0.3578.57',
1088 '70.0.3538.108',
1089 '72.0.3610.2',
1090 '71.0.3578.56',
1091 '71.0.3578.55',
1092 '72.0.3610.1',
1093 '72.0.3610.0',
1094 '71.0.3578.54',
1095 '70.0.3538.107',
1096 '71.0.3578.53',
1097 '72.0.3609.3',
1098 '71.0.3578.52',
1099 '72.0.3609.2',
1100 '71.0.3578.51',
1101 '72.0.3608.5',
1102 '72.0.3609.1',
1103 '72.0.3609.0',
1104 '71.0.3578.50',
1105 '70.0.3538.106',
1106 '72.0.3608.4',
1107 '72.0.3608.3',
1108 '72.0.3608.2',
1109 '71.0.3578.49',
1110 '72.0.3608.1',
1111 '72.0.3608.0',
1112 '70.0.3538.105',
1113 '71.0.3578.48',
1114 '72.0.3607.1',
1115 '72.0.3607.0',
1116 '71.0.3578.47',
1117 '70.0.3538.104',
1118 '72.0.3606.2',
1119 '72.0.3606.1',
1120 '72.0.3606.0',
1121 '71.0.3578.46',
1122 '70.0.3538.103',
1123 '70.0.3538.102',
1124 '72.0.3605.3',
1125 '72.0.3605.2',
1126 '72.0.3605.1',
1127 '72.0.3605.0',
1128 '71.0.3578.45',
1129 '70.0.3538.101',
1130 '71.0.3578.44',
1131 '71.0.3578.43',
1132 '70.0.3538.100',
1133 '70.0.3538.99',
1134 '71.0.3578.42',
1135 '72.0.3604.1',
1136 '72.0.3604.0',
1137 '71.0.3578.41',
1138 '70.0.3538.98',
1139 '71.0.3578.40',
1140 '72.0.3603.2',
1141 '72.0.3603.1',
1142 '72.0.3603.0',
1143 '71.0.3578.39',
1144 '70.0.3538.97',
1145 '72.0.3602.2',
1146 '71.0.3578.38',
1147 '71.0.3578.37',
1148 '72.0.3602.1',
1149 '72.0.3602.0',
1150 '71.0.3578.36',
1151 '70.0.3538.96',
1152 '72.0.3601.1',
1153 '72.0.3601.0',
1154 '71.0.3578.35',
1155 '70.0.3538.95',
1156 '72.0.3600.1',
1157 '72.0.3600.0',
1158 '71.0.3578.34',
1159 '70.0.3538.94',
1160 '72.0.3599.3',
1161 '72.0.3599.2',
1162 '72.0.3599.1',
1163 '72.0.3599.0',
1164 '71.0.3578.33',
1165 '70.0.3538.93',
1166 '72.0.3598.1',
1167 '72.0.3598.0',
1168 '71.0.3578.32',
1169 '70.0.3538.87',
1170 '72.0.3597.1',
1171 '72.0.3597.0',
1172 '72.0.3596.2',
1173 '71.0.3578.31',
1174 '70.0.3538.86',
1175 '71.0.3578.30',
1176 '71.0.3578.29',
1177 '72.0.3596.1',
1178 '72.0.3596.0',
1179 '71.0.3578.28',
1180 '70.0.3538.85',
1181 '72.0.3595.2',
1182 '72.0.3591.3',
1183 '72.0.3595.1',
1184 '72.0.3595.0',
1185 '71.0.3578.27',
1186 '70.0.3538.84',
1187 '72.0.3594.1',
1188 '72.0.3594.0',
1189 '71.0.3578.26',
1190 '70.0.3538.83',
1191 '72.0.3593.2',
1192 '72.0.3593.1',
1193 '72.0.3593.0',
1194 '71.0.3578.25',
1195 '70.0.3538.82',
1196 '72.0.3589.3',
1197 '72.0.3592.2',
1198 '72.0.3592.1',
1199 '72.0.3592.0',
1200 '71.0.3578.24',
1201 '72.0.3589.2',
1202 '70.0.3538.81',
1203 '70.0.3538.80',
1204 '72.0.3591.2',
1205 '72.0.3591.1',
1206 '72.0.3591.0',
1207 '71.0.3578.23',
1208 '70.0.3538.79',
1209 '71.0.3578.22',
1210 '72.0.3590.1',
1211 '72.0.3590.0',
1212 '71.0.3578.21',
1213 '70.0.3538.78',
1214 '70.0.3538.77',
1215 '72.0.3589.1',
1216 '72.0.3589.0',
1217 '71.0.3578.20',
1218 '70.0.3538.76',
1219 '71.0.3578.19',
1220 '70.0.3538.75',
1221 '72.0.3588.1',
1222 '72.0.3588.0',
1223 '71.0.3578.18',
1224 '70.0.3538.74',
1225 '72.0.3586.2',
1226 '72.0.3587.0',
1227 '71.0.3578.17',
1228 '70.0.3538.73',
1229 '72.0.3586.1',
1230 '72.0.3586.0',
1231 '71.0.3578.16',
1232 '70.0.3538.72',
1233 '72.0.3585.1',
1234 '72.0.3585.0',
1235 '71.0.3578.15',
1236 '70.0.3538.71',
1237 '71.0.3578.14',
1238 '72.0.3584.1',
1239 '72.0.3584.0',
1240 '71.0.3578.13',
1241 '70.0.3538.70',
1242 '72.0.3583.2',
1243 '71.0.3578.12',
1244 '72.0.3583.1',
1245 '72.0.3583.0',
1246 '71.0.3578.11',
1247 '70.0.3538.69',
1248 '71.0.3578.10',
1249 '72.0.3582.0',
1250 '72.0.3581.4',
1251 '71.0.3578.9',
1252 '70.0.3538.67',
1253 '72.0.3581.3',
1254 '72.0.3581.2',
1255 '72.0.3581.1',
1256 '72.0.3581.0',
1257 '71.0.3578.8',
1258 '70.0.3538.66',
1259 '72.0.3580.1',
1260 '72.0.3580.0',
1261 '71.0.3578.7',
1262 '70.0.3538.65',
1263 '71.0.3578.6',
1264 '72.0.3579.1',
1265 '72.0.3579.0',
1266 '71.0.3578.5',
1267 '70.0.3538.64',
1268 '71.0.3578.4',
1269 '71.0.3578.3',
1270 '71.0.3578.2',
1271 '71.0.3578.1',
1272 '71.0.3578.0',
1273 '70.0.3538.63',
1274 '69.0.3497.128',
1275 '70.0.3538.62',
1276 '70.0.3538.61',
1277 '70.0.3538.60',
1278 '70.0.3538.59',
1279 '71.0.3577.1',
1280 '71.0.3577.0',
1281 '70.0.3538.58',
1282 '69.0.3497.127',
1283 '71.0.3576.2',
1284 '71.0.3576.1',
1285 '71.0.3576.0',
1286 '70.0.3538.57',
1287 '70.0.3538.56',
1288 '71.0.3575.2',
1289 '70.0.3538.55',
1290 '69.0.3497.126',
1291 '70.0.3538.54',
1292 '71.0.3575.1',
1293 '71.0.3575.0',
1294 '71.0.3574.1',
1295 '71.0.3574.0',
1296 '70.0.3538.53',
1297 '69.0.3497.125',
1298 '70.0.3538.52',
1299 '71.0.3573.1',
1300 '71.0.3573.0',
1301 '70.0.3538.51',
1302 '69.0.3497.124',
1303 '71.0.3572.1',
1304 '71.0.3572.0',
1305 '70.0.3538.50',
1306 '69.0.3497.123',
1307 '71.0.3571.2',
1308 '70.0.3538.49',
1309 '69.0.3497.122',
1310 '71.0.3571.1',
1311 '71.0.3571.0',
1312 '70.0.3538.48',
1313 '69.0.3497.121',
1314 '71.0.3570.1',
1315 '71.0.3570.0',
1316 '70.0.3538.47',
1317 '69.0.3497.120',
1318 '71.0.3568.2',
1319 '71.0.3569.1',
1320 '71.0.3569.0',
1321 '70.0.3538.46',
1322 '69.0.3497.119',
1323 '70.0.3538.45',
1324 '71.0.3568.1',
1325 '71.0.3568.0',
1326 '70.0.3538.44',
1327 '69.0.3497.118',
1328 '70.0.3538.43',
1329 '70.0.3538.42',
1330 '71.0.3567.1',
1331 '71.0.3567.0',
1332 '70.0.3538.41',
1333 '69.0.3497.117',
1334 '71.0.3566.1',
1335 '71.0.3566.0',
1336 '70.0.3538.40',
1337 '69.0.3497.116',
1338 '71.0.3565.1',
1339 '71.0.3565.0',
1340 '70.0.3538.39',
1341 '69.0.3497.115',
1342 '71.0.3564.1',
1343 '71.0.3564.0',
1344 '70.0.3538.38',
1345 '69.0.3497.114',
1346 '71.0.3563.0',
1347 '71.0.3562.2',
1348 '70.0.3538.37',
1349 '69.0.3497.113',
1350 '70.0.3538.36',
1351 '70.0.3538.35',
1352 '71.0.3562.1',
1353 '71.0.3562.0',
1354 '70.0.3538.34',
1355 '69.0.3497.112',
1356 '70.0.3538.33',
1357 '71.0.3561.1',
1358 '71.0.3561.0',
1359 '70.0.3538.32',
1360 '69.0.3497.111',
1361 '71.0.3559.6',
1362 '71.0.3560.1',
1363 '71.0.3560.0',
1364 '71.0.3559.5',
1365 '71.0.3559.4',
1366 '70.0.3538.31',
1367 '69.0.3497.110',
1368 '71.0.3559.3',
1369 '70.0.3538.30',
1370 '69.0.3497.109',
1371 '71.0.3559.2',
1372 '71.0.3559.1',
1373 '71.0.3559.0',
1374 '70.0.3538.29',
1375 '69.0.3497.108',
1376 '71.0.3558.2',
1377 '71.0.3558.1',
1378 '71.0.3558.0',
1379 '70.0.3538.28',
1380 '69.0.3497.107',
1381 '71.0.3557.2',
1382 '71.0.3557.1',
1383 '71.0.3557.0',
1384 '70.0.3538.27',
1385 '69.0.3497.106',
1386 '71.0.3554.4',
1387 '70.0.3538.26',
1388 '71.0.3556.1',
1389 '71.0.3556.0',
1390 '70.0.3538.25',
1391 '71.0.3554.3',
1392 '69.0.3497.105',
1393 '71.0.3554.2',
1394 '70.0.3538.24',
1395 '69.0.3497.104',
1396 '71.0.3555.2',
1397 '70.0.3538.23',
1398 '71.0.3555.1',
1399 '71.0.3555.0',
1400 '70.0.3538.22',
1401 '69.0.3497.103',
1402 '71.0.3554.1',
1403 '71.0.3554.0',
1404 '70.0.3538.21',
1405 '69.0.3497.102',
1406 '71.0.3553.3',
1407 '70.0.3538.20',
1408 '69.0.3497.101',
1409 '71.0.3553.2',
1410 '69.0.3497.100',
1411 '71.0.3553.1',
1412 '71.0.3553.0',
1413 '70.0.3538.19',
1414 '69.0.3497.99',
1415 '69.0.3497.98',
1416 '69.0.3497.97',
1417 '71.0.3552.6',
1418 '71.0.3552.5',
1419 '71.0.3552.4',
1420 '71.0.3552.3',
1421 '71.0.3552.2',
1422 '71.0.3552.1',
1423 '71.0.3552.0',
1424 '70.0.3538.18',
1425 '69.0.3497.96',
1426 '71.0.3551.3',
1427 '71.0.3551.2',
1428 '71.0.3551.1',
1429 '71.0.3551.0',
1430 '70.0.3538.17',
1431 '69.0.3497.95',
1432 '71.0.3550.3',
1433 '71.0.3550.2',
1434 '71.0.3550.1',
1435 '71.0.3550.0',
1436 '70.0.3538.16',
1437 '69.0.3497.94',
1438 '71.0.3549.1',
1439 '71.0.3549.0',
1440 '70.0.3538.15',
1441 '69.0.3497.93',
1442 '69.0.3497.92',
1443 '71.0.3548.1',
1444 '71.0.3548.0',
1445 '70.0.3538.14',
1446 '69.0.3497.91',
1447 '71.0.3547.1',
1448 '71.0.3547.0',
1449 '70.0.3538.13',
1450 '69.0.3497.90',
1451 '71.0.3546.2',
1452 '69.0.3497.89',
1453 '71.0.3546.1',
1454 '71.0.3546.0',
1455 '70.0.3538.12',
1456 '69.0.3497.88',
1457 '71.0.3545.4',
1458 '71.0.3545.3',
1459 '71.0.3545.2',
1460 '71.0.3545.1',
1461 '71.0.3545.0',
1462 '70.0.3538.11',
1463 '69.0.3497.87',
1464 '71.0.3544.5',
1465 '71.0.3544.4',
1466 '71.0.3544.3',
1467 '71.0.3544.2',
1468 '71.0.3544.1',
1469 '71.0.3544.0',
1470 '69.0.3497.86',
1471 '70.0.3538.10',
1472 '69.0.3497.85',
1473 '70.0.3538.9',
1474 '69.0.3497.84',
1475 '71.0.3543.4',
1476 '70.0.3538.8',
1477 '71.0.3543.3',
1478 '71.0.3543.2',
1479 '71.0.3543.1',
1480 '71.0.3543.0',
1481 '70.0.3538.7',
1482 '69.0.3497.83',
1483 '71.0.3542.2',
1484 '71.0.3542.1',
1485 '71.0.3542.0',
1486 '70.0.3538.6',
1487 '69.0.3497.82',
1488 '69.0.3497.81',
1489 '71.0.3541.1',
1490 '71.0.3541.0',
1491 '70.0.3538.5',
1492 '69.0.3497.80',
1493 '71.0.3540.1',
1494 '71.0.3540.0',
1495 '70.0.3538.4',
1496 '69.0.3497.79',
1497 '70.0.3538.3',
1498 '71.0.3539.1',
1499 '71.0.3539.0',
1500 '69.0.3497.78',
1501 '68.0.3440.134',
1502 '69.0.3497.77',
1503 '70.0.3538.2',
1504 '70.0.3538.1',
1505 '70.0.3538.0',
1506 '69.0.3497.76',
1507 '68.0.3440.133',
1508 '69.0.3497.75',
1509 '70.0.3537.2',
1510 '70.0.3537.1',
1511 '70.0.3537.0',
1512 '69.0.3497.74',
1513 '68.0.3440.132',
1514 '70.0.3536.0',
1515 '70.0.3535.5',
1516 '70.0.3535.4',
1517 '70.0.3535.3',
1518 '69.0.3497.73',
1519 '68.0.3440.131',
1520 '70.0.3532.8',
1521 '70.0.3532.7',
1522 '69.0.3497.72',
1523 '69.0.3497.71',
1524 '70.0.3535.2',
1525 '70.0.3535.1',
1526 '70.0.3535.0',
1527 '69.0.3497.70',
1528 '68.0.3440.130',
1529 '69.0.3497.69',
1530 '68.0.3440.129',
1531 '70.0.3534.4',
1532 '70.0.3534.3',
1533 '70.0.3534.2',
1534 '70.0.3534.1',
1535 '70.0.3534.0',
1536 '69.0.3497.68',
1537 '68.0.3440.128',
1538 '70.0.3533.2',
1539 '70.0.3533.1',
1540 '70.0.3533.0',
1541 '69.0.3497.67',
1542 '68.0.3440.127',
1543 '70.0.3532.6',
1544 '70.0.3532.5',
1545 '70.0.3532.4',
1546 '69.0.3497.66',
1547 '68.0.3440.126',
1548 '70.0.3532.3',
1549 '70.0.3532.2',
1550 '70.0.3532.1',
1551 '69.0.3497.60',
1552 '69.0.3497.65',
1553 '69.0.3497.64',
1554 '70.0.3532.0',
1555 '70.0.3531.0',
1556 '70.0.3530.4',
1557 '70.0.3530.3',
1558 '70.0.3530.2',
1559 '69.0.3497.58',
1560 '68.0.3440.125',
1561 '69.0.3497.57',
1562 '69.0.3497.56',
1563 '69.0.3497.55',
1564 '69.0.3497.54',
1565 '70.0.3530.1',
1566 '70.0.3530.0',
1567 '69.0.3497.53',
1568 '68.0.3440.124',
1569 '69.0.3497.52',
1570 '70.0.3529.3',
1571 '70.0.3529.2',
1572 '70.0.3529.1',
1573 '70.0.3529.0',
1574 '69.0.3497.51',
1575 '70.0.3528.4',
1576 '68.0.3440.123',
1577 '70.0.3528.3',
1578 '70.0.3528.2',
1579 '70.0.3528.1',
1580 '70.0.3528.0',
1581 '69.0.3497.50',
1582 '68.0.3440.122',
1583 '70.0.3527.1',
1584 '70.0.3527.0',
1585 '69.0.3497.49',
1586 '68.0.3440.121',
1587 '70.0.3526.1',
1588 '70.0.3526.0',
1589 '68.0.3440.120',
1590 '69.0.3497.48',
1591 '69.0.3497.47',
1592 '68.0.3440.119',
1593 '68.0.3440.118',
1594 '70.0.3525.5',
1595 '70.0.3525.4',
1596 '70.0.3525.3',
1597 '68.0.3440.117',
1598 '69.0.3497.46',
1599 '70.0.3525.2',
1600 '70.0.3525.1',
1601 '70.0.3525.0',
1602 '69.0.3497.45',
1603 '68.0.3440.116',
1604 '70.0.3524.4',
1605 '70.0.3524.3',
1606 '69.0.3497.44',
1607 '70.0.3524.2',
1608 '70.0.3524.1',
1609 '70.0.3524.0',
1610 '70.0.3523.2',
1611 '69.0.3497.43',
1612 '68.0.3440.115',
1613 '70.0.3505.9',
1614 '69.0.3497.42',
1615 '70.0.3505.8',
1616 '70.0.3523.1',
1617 '70.0.3523.0',
1618 '69.0.3497.41',
1619 '68.0.3440.114',
1620 '70.0.3505.7',
1621 '69.0.3497.40',
1622 '70.0.3522.1',
1623 '70.0.3522.0',
1624 '70.0.3521.2',
1625 '69.0.3497.39',
1626 '68.0.3440.113',
1627 '70.0.3505.6',
1628 '70.0.3521.1',
1629 '70.0.3521.0',
1630 '69.0.3497.38',
1631 '68.0.3440.112',
1632 '70.0.3520.1',
1633 '70.0.3520.0',
1634 '69.0.3497.37',
1635 '68.0.3440.111',
1636 '70.0.3519.3',
1637 '70.0.3519.2',
1638 '70.0.3519.1',
1639 '70.0.3519.0',
1640 '69.0.3497.36',
1641 '68.0.3440.110',
1642 '70.0.3518.1',
1643 '70.0.3518.0',
1644 '69.0.3497.35',
1645 '69.0.3497.34',
1646 '68.0.3440.109',
1647 '70.0.3517.1',
1648 '70.0.3517.0',
1649 '69.0.3497.33',
1650 '68.0.3440.108',
1651 '69.0.3497.32',
1652 '70.0.3516.3',
1653 '70.0.3516.2',
1654 '70.0.3516.1',
1655 '70.0.3516.0',
1656 '69.0.3497.31',
1657 '68.0.3440.107',
1658 '70.0.3515.4',
1659 '68.0.3440.106',
1660 '70.0.3515.3',
1661 '70.0.3515.2',
1662 '70.0.3515.1',
1663 '70.0.3515.0',
1664 '69.0.3497.30',
1665 '68.0.3440.105',
1666 '68.0.3440.104',
1667 '70.0.3514.2',
1668 '70.0.3514.1',
1669 '70.0.3514.0',
1670 '69.0.3497.29',
1671 '68.0.3440.103',
1672 '70.0.3513.1',
1673 '70.0.3513.0',
1674 '69.0.3497.28',
1675 )
1676 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1677
1678
1679 std_headers = {
1680 'User-Agent': random_user_agent(),
1681 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1682 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1683 'Accept-Encoding': 'gzip, deflate',
1684 'Accept-Language': 'en-us,en;q=0.5',
1685 }
1686
1687
1688 USER_AGENTS = {
1689 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1690 }
1691
1692
1693 NO_DEFAULT = object()
1694
1695 ENGLISH_MONTH_NAMES = [
1696 'January', 'February', 'March', 'April', 'May', 'June',
1697 'July', 'August', 'September', 'October', 'November', 'December']
1698
1699 MONTH_NAMES = {
1700 'en': ENGLISH_MONTH_NAMES,
1701 'fr': [
1702 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1703 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1704 }
1705
1706 KNOWN_EXTENSIONS = (
1707 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1708 'flv', 'f4v', 'f4a', 'f4b',
1709 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1710 'mkv', 'mka', 'mk3d',
1711 'avi', 'divx',
1712 'mov',
1713 'asf', 'wmv', 'wma',
1714 '3gp', '3g2',
1715 'mp3',
1716 'flac',
1717 'ape',
1718 'wav',
1719 'f4f', 'f4m', 'm3u8', 'smil')
1720
1721 # needed for sanitizing filenames in restricted mode
1722 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1723 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1724 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1725
1726 DATE_FORMATS = (
1727 '%d %B %Y',
1728 '%d %b %Y',
1729 '%B %d %Y',
1730 '%B %dst %Y',
1731 '%B %dnd %Y',
1732 '%B %drd %Y',
1733 '%B %dth %Y',
1734 '%b %d %Y',
1735 '%b %dst %Y',
1736 '%b %dnd %Y',
1737 '%b %drd %Y',
1738 '%b %dth %Y',
1739 '%b %dst %Y %I:%M',
1740 '%b %dnd %Y %I:%M',
1741 '%b %drd %Y %I:%M',
1742 '%b %dth %Y %I:%M',
1743 '%Y %m %d',
1744 '%Y-%m-%d',
1745 '%Y.%m.%d.',
1746 '%Y/%m/%d',
1747 '%Y/%m/%d %H:%M',
1748 '%Y/%m/%d %H:%M:%S',
1749 '%Y%m%d%H%M',
1750 '%Y%m%d%H%M%S',
1751 '%Y-%m-%d %H:%M',
1752 '%Y-%m-%d %H:%M:%S',
1753 '%Y-%m-%d %H:%M:%S.%f',
1754 '%Y-%m-%d %H:%M:%S:%f',
1755 '%d.%m.%Y %H:%M',
1756 '%d.%m.%Y %H.%M',
1757 '%Y-%m-%dT%H:%M:%SZ',
1758 '%Y-%m-%dT%H:%M:%S.%fZ',
1759 '%Y-%m-%dT%H:%M:%S.%f0Z',
1760 '%Y-%m-%dT%H:%M:%S',
1761 '%Y-%m-%dT%H:%M:%S.%f',
1762 '%Y-%m-%dT%H:%M',
1763 '%b %d %Y at %H:%M',
1764 '%b %d %Y at %H:%M:%S',
1765 '%B %d %Y at %H:%M',
1766 '%B %d %Y at %H:%M:%S',
1767 '%H:%M %d-%b-%Y',
1768 )
1769
1770 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1771 DATE_FORMATS_DAY_FIRST.extend([
1772 '%d-%m-%Y',
1773 '%d.%m.%Y',
1774 '%d.%m.%y',
1775 '%d/%m/%Y',
1776 '%d/%m/%y',
1777 '%d/%m/%Y %H:%M:%S',
1778 ])
1779
1780 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1781 DATE_FORMATS_MONTH_FIRST.extend([
1782 '%m-%d-%Y',
1783 '%m.%d.%Y',
1784 '%m/%d/%Y',
1785 '%m/%d/%y',
1786 '%m/%d/%Y %H:%M:%S',
1787 ])
1788
1789 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1790 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1791
1792
1793 def preferredencoding():
1794 """Get preferred encoding.
1795
1796 Returns the best encoding scheme for the system, based on
1797 locale.getpreferredencoding() and some further tweaks.
1798 """
1799 try:
1800 pref = locale.getpreferredencoding()
1801 'TEST'.encode(pref)
1802 except Exception:
1803 pref = 'UTF-8'
1804
1805 return pref
1806
1807
1808 def write_json_file(obj, fn):
1809 """ Encode obj as JSON and write it to fn, atomically if possible """
1810
1811 fn = encodeFilename(fn)
1812 if sys.version_info < (3, 0) and sys.platform != 'win32':
1813 encoding = get_filesystem_encoding()
1814 # os.path.basename returns a bytes object, but NamedTemporaryFile
1815 # will fail if the filename contains non ascii characters unless we
1816 # use a unicode object
1817 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1818 # the same for os.path.dirname
1819 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1820 else:
1821 path_basename = os.path.basename
1822 path_dirname = os.path.dirname
1823
1824 args = {
1825 'suffix': '.tmp',
1826 'prefix': path_basename(fn) + '.',
1827 'dir': path_dirname(fn),
1828 'delete': False,
1829 }
1830
1831 # In Python 2.x, json.dump expects a bytestream.
1832 # In Python 3.x, it writes to a character stream
1833 if sys.version_info < (3, 0):
1834 args['mode'] = 'wb'
1835 else:
1836 args.update({
1837 'mode': 'w',
1838 'encoding': 'utf-8',
1839 })
1840
1841 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1842
1843 try:
1844 with tf:
1845 json.dump(obj, tf)
1846 if sys.platform == 'win32':
1847 # Need to remove existing file on Windows, else os.rename raises
1848 # WindowsError or FileExistsError.
1849 try:
1850 os.unlink(fn)
1851 except OSError:
1852 pass
1853 try:
1854 mask = os.umask(0)
1855 os.umask(mask)
1856 os.chmod(tf.name, 0o666 & ~mask)
1857 except OSError:
1858 pass
1859 os.rename(tf.name, fn)
1860 except Exception:
1861 try:
1862 os.remove(tf.name)
1863 except OSError:
1864 pass
1865 raise
1866
1867
1868 if sys.version_info >= (2, 7):
1869 def find_xpath_attr(node, xpath, key, val=None):
1870 """ Find the xpath xpath[@key=val] """
1871 assert re.match(r'^[a-zA-Z_-]+$', key)
1872 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1873 return node.find(expr)
1874 else:
1875 def find_xpath_attr(node, xpath, key, val=None):
1876 for f in node.findall(compat_xpath(xpath)):
1877 if key not in f.attrib:
1878 continue
1879 if val is None or f.attrib.get(key) == val:
1880 return f
1881 return None
1882
1883 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1884 # the namespace parameter
1885
1886
1887 def xpath_with_ns(path, ns_map):
1888 components = [c.split(':') for c in path.split('/')]
1889 replaced = []
1890 for c in components:
1891 if len(c) == 1:
1892 replaced.append(c[0])
1893 else:
1894 ns, tag = c
1895 replaced.append('{%s}%s' % (ns_map[ns], tag))
1896 return '/'.join(replaced)
1897
1898
1899 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1900 def _find_xpath(xpath):
1901 return node.find(compat_xpath(xpath))
1902
1903 if isinstance(xpath, (str, compat_str)):
1904 n = _find_xpath(xpath)
1905 else:
1906 for xp in xpath:
1907 n = _find_xpath(xp)
1908 if n is not None:
1909 break
1910
1911 if n is None:
1912 if default is not NO_DEFAULT:
1913 return default
1914 elif fatal:
1915 name = xpath if name is None else name
1916 raise ExtractorError('Could not find XML element %s' % name)
1917 else:
1918 return None
1919 return n
1920
1921
1922 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1923 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1924 if n is None or n == default:
1925 return n
1926 if n.text is None:
1927 if default is not NO_DEFAULT:
1928 return default
1929 elif fatal:
1930 name = xpath if name is None else name
1931 raise ExtractorError('Could not find XML element\'s text %s' % name)
1932 else:
1933 return None
1934 return n.text
1935
1936
1937 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1938 n = find_xpath_attr(node, xpath, key)
1939 if n is None:
1940 if default is not NO_DEFAULT:
1941 return default
1942 elif fatal:
1943 name = '%s[@%s]' % (xpath, key) if name is None else name
1944 raise ExtractorError('Could not find XML attribute %s' % name)
1945 else:
1946 return None
1947 return n.attrib[key]
1948
1949
1950 def get_element_by_id(id, html):
1951 """Return the content of the tag with the specified ID in the passed HTML document"""
1952 return get_element_by_attribute('id', id, html)
1953
1954
1955 def get_element_by_class(class_name, html):
1956 """Return the content of the first tag with the specified class in the passed HTML document"""
1957 retval = get_elements_by_class(class_name, html)
1958 return retval[0] if retval else None
1959
1960
1961 def get_element_by_attribute(attribute, value, html, escape_value=True):
1962 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1963 return retval[0] if retval else None
1964
1965
1966 def get_elements_by_class(class_name, html):
1967 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1968 return get_elements_by_attribute(
1969 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1970 html, escape_value=False)
1971
1972
1973 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1974 """Return the content of the tag with the specified attribute in the passed HTML document"""
1975
1976 value = re.escape(value) if escape_value else value
1977
1978 retlist = []
1979 for m in re.finditer(r'''(?xs)
1980 <([a-zA-Z0-9:._-]+)
1981 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1982 \s+%s=['"]?%s['"]?
1983 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1984 \s*>
1985 (?P<content>.*?)
1986 </\1>
1987 ''' % (re.escape(attribute), value), html):
1988 res = m.group('content')
1989
1990 if res.startswith('"') or res.startswith("'"):
1991 res = res[1:-1]
1992
1993 retlist.append(unescapeHTML(res))
1994
1995 return retlist
1996
1997
1998 class HTMLAttributeParser(compat_HTMLParser):
1999 """Trivial HTML parser to gather the attributes for a single element"""
2000
2001 def __init__(self):
2002 self.attrs = {}
2003 compat_HTMLParser.__init__(self)
2004
2005 def handle_starttag(self, tag, attrs):
2006 self.attrs = dict(attrs)
2007
2008
2009 class HTMLListAttrsParser(compat_HTMLParser):
2010 """HTML parser to gather the attributes for the elements of a list"""
2011
2012 def __init__(self):
2013 compat_HTMLParser.__init__(self)
2014 self.items = []
2015 self._level = 0
2016
2017 def handle_starttag(self, tag, attrs):
2018 if tag == 'li' and self._level == 0:
2019 self.items.append(dict(attrs))
2020 self._level += 1
2021
2022 def handle_endtag(self, tag):
2023 self._level -= 1
2024
2025
2026 def extract_attributes(html_element):
2027 """Given a string for an HTML element such as
2028 <el
2029 a="foo" B="bar" c="&98;az" d=boz
2030 empty= noval entity="&amp;"
2031 sq='"' dq="'"
2032 >
2033 Decode and return a dictionary of attributes.
2034 {
2035 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2036 'empty': '', 'noval': None, 'entity': '&',
2037 'sq': '"', 'dq': '\''
2038 }.
2039 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2040 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2041 """
2042 parser = HTMLAttributeParser()
2043 try:
2044 parser.feed(html_element)
2045 parser.close()
2046 # Older Python may throw HTMLParseError in case of malformed HTML
2047 except compat_HTMLParseError:
2048 pass
2049 return parser.attrs
2050
2051
2052 def parse_list(webpage):
2053 """Given a string for an series of HTML <li> elements,
2054 return a dictionary of their attributes"""
2055 parser = HTMLListAttrsParser()
2056 parser.feed(webpage)
2057 parser.close()
2058 return parser.items
2059
2060
2061 def clean_html(html):
2062 """Clean an HTML snippet into a readable string"""
2063
2064 if html is None: # Convenience for sanitizing descriptions etc.
2065 return html
2066
2067 # Newline vs <br />
2068 html = html.replace('\n', ' ')
2069 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2070 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2071 # Strip html tags
2072 html = re.sub('<.*?>', '', html)
2073 # Replace html entities
2074 html = unescapeHTML(html)
2075 return html.strip()
2076
2077
2078 def sanitize_open(filename, open_mode):
2079 """Try to open the given filename, and slightly tweak it if this fails.
2080
2081 Attempts to open the given filename. If this fails, it tries to change
2082 the filename slightly, step by step, until it's either able to open it
2083 or it fails and raises a final exception, like the standard open()
2084 function.
2085
2086 It returns the tuple (stream, definitive_file_name).
2087 """
2088 try:
2089 if filename == '-':
2090 if sys.platform == 'win32':
2091 import msvcrt
2092 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2093 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2094 stream = open(encodeFilename(filename), open_mode)
2095 return (stream, filename)
2096 except (IOError, OSError) as err:
2097 if err.errno in (errno.EACCES,):
2098 raise
2099
2100 # In case of error, try to remove win32 forbidden chars
2101 alt_filename = sanitize_path(filename)
2102 if alt_filename == filename:
2103 raise
2104 else:
2105 # An exception here should be caught in the caller
2106 stream = open(encodeFilename(alt_filename), open_mode)
2107 return (stream, alt_filename)
2108
2109
2110 def timeconvert(timestr):
2111 """Convert RFC 2822 defined time string into system timestamp"""
2112 timestamp = None
2113 timetuple = email.utils.parsedate_tz(timestr)
2114 if timetuple is not None:
2115 timestamp = email.utils.mktime_tz(timetuple)
2116 return timestamp
2117
2118
2119 def sanitize_filename(s, restricted=False, is_id=False):
2120 """Sanitizes a string so it could be used as part of a filename.
2121 If restricted is set, use a stricter subset of allowed characters.
2122 Set is_id if this is not an arbitrary string, but an ID that should be kept
2123 if possible.
2124 """
2125 def replace_insane(char):
2126 if restricted and char in ACCENT_CHARS:
2127 return ACCENT_CHARS[char]
2128 elif not restricted and char == '\n':
2129 return ' '
2130 elif char == '?' or ord(char) < 32 or ord(char) == 127:
2131 return ''
2132 elif char == '"':
2133 return '' if restricted else '\''
2134 elif char == ':':
2135 return '_-' if restricted else ' -'
2136 elif char in '\\/|*<>':
2137 return '_'
2138 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2139 return '_'
2140 if restricted and ord(char) > 127:
2141 return '_'
2142 return char
2143
2144 if s == '':
2145 return ''
2146 # Handle timestamps
2147 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2148 result = ''.join(map(replace_insane, s))
2149 if not is_id:
2150 while '__' in result:
2151 result = result.replace('__', '_')
2152 result = result.strip('_')
2153 # Common case of "Foreign band name - English song title"
2154 if restricted and result.startswith('-_'):
2155 result = result[2:]
2156 if result.startswith('-'):
2157 result = '_' + result[len('-'):]
2158 result = result.lstrip('.')
2159 if not result:
2160 result = '_'
2161 return result
2162
2163
2164 def sanitize_path(s, force=False):
2165 """Sanitizes and normalizes path on Windows"""
2166 if sys.platform == 'win32':
2167 force = False
2168 drive_or_unc, _ = os.path.splitdrive(s)
2169 if sys.version_info < (2, 7) and not drive_or_unc:
2170 drive_or_unc, _ = os.path.splitunc(s)
2171 elif force:
2172 drive_or_unc = ''
2173 else:
2174 return s
2175
2176 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2177 if drive_or_unc:
2178 norm_path.pop(0)
2179 sanitized_path = [
2180 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2181 for path_part in norm_path]
2182 if drive_or_unc:
2183 sanitized_path.insert(0, drive_or_unc + os.path.sep)
2184 elif force and s[0] == os.path.sep:
2185 sanitized_path.insert(0, os.path.sep)
2186 return os.path.join(*sanitized_path)
2187
2188
2189 def sanitize_url(url):
2190 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2191 # the number of unwanted failures due to missing protocol
2192 if url.startswith('//'):
2193 return 'http:%s' % url
2194 # Fix some common typos seen so far
2195 COMMON_TYPOS = (
2196 # https://github.com/ytdl-org/youtube-dl/issues/15649
2197 (r'^httpss://', r'https://'),
2198 # https://bx1.be/lives/direct-tv/
2199 (r'^rmtp([es]?)://', r'rtmp\1://'),
2200 )
2201 for mistake, fixup in COMMON_TYPOS:
2202 if re.match(mistake, url):
2203 return re.sub(mistake, fixup, url)
2204 return url
2205
2206
2207 def extract_basic_auth(url):
2208 parts = compat_urlparse.urlsplit(url)
2209 if parts.username is None:
2210 return url, None
2211 url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2212 parts.hostname if parts.port is None
2213 else '%s:%d' % (parts.hostname, parts.port))))
2214 auth_payload = base64.b64encode(
2215 ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2216 return url, 'Basic ' + auth_payload.decode('utf-8')
2217
2218
2219 def sanitized_Request(url, *args, **kwargs):
2220 url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
2221 if auth_header is not None:
2222 headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2223 headers['Authorization'] = auth_header
2224 return compat_urllib_request.Request(url, *args, **kwargs)
2225
2226
2227 def expand_path(s):
2228 """Expand shell variables and ~"""
2229 return os.path.expandvars(compat_expanduser(s))
2230
2231
2232 def orderedSet(iterable):
2233 """ Remove all duplicates from the input iterable """
2234 res = []
2235 for el in iterable:
2236 if el not in res:
2237 res.append(el)
2238 return res
2239
2240
2241 def _htmlentity_transform(entity_with_semicolon):
2242 """Transforms an HTML entity to a character."""
2243 entity = entity_with_semicolon[:-1]
2244
2245 # Known non-numeric HTML entity
2246 if entity in compat_html_entities.name2codepoint:
2247 return compat_chr(compat_html_entities.name2codepoint[entity])
2248
2249 # TODO: HTML5 allows entities without a semicolon. For example,
2250 # '&Eacuteric' should be decoded as 'Éric'.
2251 if entity_with_semicolon in compat_html_entities_html5:
2252 return compat_html_entities_html5[entity_with_semicolon]
2253
2254 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2255 if mobj is not None:
2256 numstr = mobj.group(1)
2257 if numstr.startswith('x'):
2258 base = 16
2259 numstr = '0%s' % numstr
2260 else:
2261 base = 10
2262 # See https://github.com/ytdl-org/youtube-dl/issues/7518
2263 try:
2264 return compat_chr(int(numstr, base))
2265 except ValueError:
2266 pass
2267
2268 # Unknown entity in name, return its literal representation
2269 return '&%s;' % entity
2270
2271
2272 def unescapeHTML(s):
2273 if s is None:
2274 return None
2275 assert type(s) == compat_str
2276
2277 return re.sub(
2278 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2279
2280
2281 def escapeHTML(text):
2282 return (
2283 text
2284 .replace('&', '&amp;')
2285 .replace('<', '&lt;')
2286 .replace('>', '&gt;')
2287 .replace('"', '&quot;')
2288 .replace("'", '&#39;')
2289 )
2290
2291
2292 def process_communicate_or_kill(p, *args, **kwargs):
2293 try:
2294 return p.communicate(*args, **kwargs)
2295 except BaseException: # Including KeyboardInterrupt
2296 p.kill()
2297 p.wait()
2298 raise
2299
2300
2301 class Popen(subprocess.Popen):
2302 if sys.platform == 'win32':
2303 _startupinfo = subprocess.STARTUPINFO()
2304 _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
2305 else:
2306 _startupinfo = None
2307
2308 def __init__(self, *args, **kwargs):
2309 super(Popen, self).__init__(*args, **kwargs, startupinfo=self._startupinfo)
2310
2311 def communicate_or_kill(self, *args, **kwargs):
2312 return process_communicate_or_kill(self, *args, **kwargs)
2313
2314
2315 def get_subprocess_encoding():
2316 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2317 # For subprocess calls, encode with locale encoding
2318 # Refer to http://stackoverflow.com/a/9951851/35070
2319 encoding = preferredencoding()
2320 else:
2321 encoding = sys.getfilesystemencoding()
2322 if encoding is None:
2323 encoding = 'utf-8'
2324 return encoding
2325
2326
2327 def encodeFilename(s, for_subprocess=False):
2328 """
2329 @param s The name of the file
2330 """
2331
2332 assert type(s) == compat_str
2333
2334 # Python 3 has a Unicode API
2335 if sys.version_info >= (3, 0):
2336 return s
2337
2338 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2339 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2340 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2341 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2342 return s
2343
2344 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2345 if sys.platform.startswith('java'):
2346 return s
2347
2348 return s.encode(get_subprocess_encoding(), 'ignore')
2349
2350
2351 def decodeFilename(b, for_subprocess=False):
2352
2353 if sys.version_info >= (3, 0):
2354 return b
2355
2356 if not isinstance(b, bytes):
2357 return b
2358
2359 return b.decode(get_subprocess_encoding(), 'ignore')
2360
2361
2362 def encodeArgument(s):
2363 if not isinstance(s, compat_str):
2364 # Legacy code that uses byte strings
2365 # Uncomment the following line after fixing all post processors
2366 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2367 s = s.decode('ascii')
2368 return encodeFilename(s, True)
2369
2370
2371 def decodeArgument(b):
2372 return decodeFilename(b, True)
2373
2374
2375 def decodeOption(optval):
2376 if optval is None:
2377 return optval
2378 if isinstance(optval, bytes):
2379 optval = optval.decode(preferredencoding())
2380
2381 assert isinstance(optval, compat_str)
2382 return optval
2383
2384
2385 _timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
2386
2387
2388 def timetuple_from_msec(msec):
2389 secs, msec = divmod(msec, 1000)
2390 mins, secs = divmod(secs, 60)
2391 hrs, mins = divmod(mins, 60)
2392 return _timetuple(hrs, mins, secs, msec)
2393
2394
2395 def formatSeconds(secs, delim=':', msec=False):
2396 time = timetuple_from_msec(secs * 1000)
2397 if time.hours:
2398 ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
2399 elif time.minutes:
2400 ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
2401 else:
2402 ret = '%d' % time.seconds
2403 return '%s.%03d' % (ret, time.milliseconds) if msec else ret
2404
2405
2406 def _ssl_load_windows_store_certs(ssl_context, storename):
2407 # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
2408 try:
2409 certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
2410 if encoding == 'x509_asn' and (
2411 trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
2412 except PermissionError:
2413 return
2414 for cert in certs:
2415 try:
2416 ssl_context.load_verify_locations(cadata=cert)
2417 except ssl.SSLError:
2418 pass
2419
2420
2421 def make_HTTPS_handler(params, **kwargs):
2422 opts_check_certificate = not params.get('nocheckcertificate')
2423 context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
2424 context.check_hostname = opts_check_certificate
2425 context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
2426 if opts_check_certificate:
2427 try:
2428 context.load_default_certs()
2429 # Work around the issue in load_default_certs when there are bad certificates. See:
2430 # https://github.com/yt-dlp/yt-dlp/issues/1060,
2431 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
2432 except ssl.SSLError:
2433 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
2434 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
2435 # Create a new context to discard any certificates that were already loaded
2436 context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
2437 context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED
2438 for storename in ('CA', 'ROOT'):
2439 _ssl_load_windows_store_certs(context, storename)
2440 context.set_default_verify_paths()
2441 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2442
2443
2444 def bug_reports_message(before=';'):
2445 if ytdl_is_updateable():
2446 update_cmd = 'type yt-dlp -U to update'
2447 else:
2448 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
2449 msg = 'please report this issue on https://github.com/yt-dlp/yt-dlp .'
2450 msg += ' Make sure you are using the latest version; %s.' % update_cmd
2451 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
2452
2453 before = before.rstrip()
2454 if not before or before.endswith(('.', '!', '?')):
2455 msg = msg[0].title() + msg[1:]
2456
2457 return (before + ' ' if before else '') + msg
2458
2459
2460 class YoutubeDLError(Exception):
2461 """Base exception for YoutubeDL errors."""
2462 msg = None
2463
2464 def __init__(self, msg=None):
2465 if msg is not None:
2466 self.msg = msg
2467 elif self.msg is None:
2468 self.msg = type(self).__name__
2469 super().__init__(self.msg)
2470
2471
2472 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2473 if hasattr(ssl, 'CertificateError'):
2474 network_exceptions.append(ssl.CertificateError)
2475 network_exceptions = tuple(network_exceptions)
2476
2477
2478 class ExtractorError(YoutubeDLError):
2479 """Error during info extraction."""
2480
2481 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
2482 """ tb, if given, is the original traceback (so that it can be printed out).
2483 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
2484 """
2485 if sys.exc_info()[0] in network_exceptions:
2486 expected = True
2487
2488 self.msg = str(msg)
2489 self.traceback = tb
2490 self.expected = expected
2491 self.cause = cause
2492 self.video_id = video_id
2493 self.ie = ie
2494 self.exc_info = sys.exc_info() # preserve original exception
2495
2496 super(ExtractorError, self).__init__(''.join((
2497 format_field(ie, template='[%s] '),
2498 format_field(video_id, template='%s: '),
2499 self.msg,
2500 format_field(cause, template=' (caused by %r)'),
2501 '' if expected else bug_reports_message())))
2502
2503 def format_traceback(self):
2504 if self.traceback is None:
2505 return None
2506 return ''.join(traceback.format_tb(self.traceback))
2507
2508
2509 class UnsupportedError(ExtractorError):
2510 def __init__(self, url):
2511 super(UnsupportedError, self).__init__(
2512 'Unsupported URL: %s' % url, expected=True)
2513 self.url = url
2514
2515
2516 class RegexNotFoundError(ExtractorError):
2517 """Error when a regex didn't match"""
2518 pass
2519
2520
2521 class GeoRestrictedError(ExtractorError):
2522 """Geographic restriction Error exception.
2523
2524 This exception may be thrown when a video is not available from your
2525 geographic location due to geographic restrictions imposed by a website.
2526 """
2527
2528 def __init__(self, msg, countries=None, **kwargs):
2529 kwargs['expected'] = True
2530 super(GeoRestrictedError, self).__init__(msg, **kwargs)
2531 self.countries = countries
2532
2533
2534 class DownloadError(YoutubeDLError):
2535 """Download Error exception.
2536
2537 This exception may be thrown by FileDownloader objects if they are not
2538 configured to continue on errors. They will contain the appropriate
2539 error message.
2540 """
2541
2542 def __init__(self, msg, exc_info=None):
2543 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2544 super(DownloadError, self).__init__(msg)
2545 self.exc_info = exc_info
2546
2547
2548 class EntryNotInPlaylist(YoutubeDLError):
2549 """Entry not in playlist exception.
2550
2551 This exception will be thrown by YoutubeDL when a requested entry
2552 is not found in the playlist info_dict
2553 """
2554 msg = 'Entry not found in info'
2555
2556
2557 class SameFileError(YoutubeDLError):
2558 """Same File exception.
2559
2560 This exception will be thrown by FileDownloader objects if they detect
2561 multiple files would have to be downloaded to the same file on disk.
2562 """
2563 msg = 'Fixed output name but more than one file to download'
2564
2565 def __init__(self, filename=None):
2566 if filename is not None:
2567 self.msg += f': {filename}'
2568 super().__init__(self.msg)
2569
2570
2571 class PostProcessingError(YoutubeDLError):
2572 """Post Processing exception.
2573
2574 This exception may be raised by PostProcessor's .run() method to
2575 indicate an error in the postprocessing task.
2576 """
2577
2578
2579 class DownloadCancelled(YoutubeDLError):
2580 """ Exception raised when the download queue should be interrupted """
2581 msg = 'The download was cancelled'
2582
2583
2584 class ExistingVideoReached(DownloadCancelled):
2585 """ --break-on-existing triggered """
2586 msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
2587
2588
2589 class RejectedVideoReached(DownloadCancelled):
2590 """ --break-on-reject triggered """
2591 msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
2592
2593
2594 class MaxDownloadsReached(DownloadCancelled):
2595 """ --max-downloads limit has been reached. """
2596 msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
2597
2598
2599 class ReExtractInfo(YoutubeDLError):
2600 """ Video info needs to be re-extracted. """
2601
2602 def __init__(self, msg, expected=False):
2603 super().__init__(msg)
2604 self.expected = expected
2605
2606
2607 class ThrottledDownload(ReExtractInfo):
2608 """ Download speed below --throttled-rate. """
2609 msg = 'The download speed is below throttle limit'
2610
2611 def __init__(self, msg):
2612 super().__init__(msg, expected=False)
2613
2614
2615 class UnavailableVideoError(YoutubeDLError):
2616 """Unavailable Format exception.
2617
2618 This exception will be thrown when a video is requested
2619 in a format that is not available for that video.
2620 """
2621 msg = 'Unable to download video'
2622
2623 def __init__(self, err=None):
2624 if err is not None:
2625 self.msg += f': {err}'
2626 super().__init__(self.msg)
2627
2628
2629 class ContentTooShortError(YoutubeDLError):
2630 """Content Too Short exception.
2631
2632 This exception may be raised by FileDownloader objects when a file they
2633 download is too small for what the server announced first, indicating
2634 the connection was probably interrupted.
2635 """
2636
2637 def __init__(self, downloaded, expected):
2638 super(ContentTooShortError, self).__init__(
2639 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2640 )
2641 # Both in bytes
2642 self.downloaded = downloaded
2643 self.expected = expected
2644
2645
2646 class XAttrMetadataError(YoutubeDLError):
2647 def __init__(self, code=None, msg='Unknown error'):
2648 super(XAttrMetadataError, self).__init__(msg)
2649 self.code = code
2650 self.msg = msg
2651
2652 # Parsing code and msg
2653 if (self.code in (errno.ENOSPC, errno.EDQUOT)
2654 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2655 self.reason = 'NO_SPACE'
2656 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2657 self.reason = 'VALUE_TOO_LONG'
2658 else:
2659 self.reason = 'NOT_SUPPORTED'
2660
2661
2662 class XAttrUnavailableError(YoutubeDLError):
2663 pass
2664
2665
2666 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2667 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2668 # expected HTTP responses to meet HTTP/1.0 or later (see also
2669 # https://github.com/ytdl-org/youtube-dl/issues/6727)
2670 if sys.version_info < (3, 0):
2671 kwargs['strict'] = True
2672 hc = http_class(*args, **compat_kwargs(kwargs))
2673 source_address = ydl_handler._params.get('source_address')
2674
2675 if source_address is not None:
2676 # This is to workaround _create_connection() from socket where it will try all
2677 # address data from getaddrinfo() including IPv6. This filters the result from
2678 # getaddrinfo() based on the source_address value.
2679 # This is based on the cpython socket.create_connection() function.
2680 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2681 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2682 host, port = address
2683 err = None
2684 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2685 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2686 ip_addrs = [addr for addr in addrs if addr[0] == af]
2687 if addrs and not ip_addrs:
2688 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2689 raise socket.error(
2690 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2691 % (ip_version, source_address[0]))
2692 for res in ip_addrs:
2693 af, socktype, proto, canonname, sa = res
2694 sock = None
2695 try:
2696 sock = socket.socket(af, socktype, proto)
2697 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2698 sock.settimeout(timeout)
2699 sock.bind(source_address)
2700 sock.connect(sa)
2701 err = None # Explicitly break reference cycle
2702 return sock
2703 except socket.error as _:
2704 err = _
2705 if sock is not None:
2706 sock.close()
2707 if err is not None:
2708 raise err
2709 else:
2710 raise socket.error('getaddrinfo returns an empty list')
2711 if hasattr(hc, '_create_connection'):
2712 hc._create_connection = _create_connection
2713 sa = (source_address, 0)
2714 if hasattr(hc, 'source_address'): # Python 2.7+
2715 hc.source_address = sa
2716 else: # Python 2.6
2717 def _hc_connect(self, *args, **kwargs):
2718 sock = _create_connection(
2719 (self.host, self.port), self.timeout, sa)
2720 if is_https:
2721 self.sock = ssl.wrap_socket(
2722 sock, self.key_file, self.cert_file,
2723 ssl_version=ssl.PROTOCOL_TLSv1)
2724 else:
2725 self.sock = sock
2726 hc.connect = functools.partial(_hc_connect, hc)
2727
2728 return hc
2729
2730
2731 def handle_youtubedl_headers(headers):
2732 filtered_headers = headers
2733
2734 if 'Youtubedl-no-compression' in filtered_headers:
2735 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2736 del filtered_headers['Youtubedl-no-compression']
2737
2738 return filtered_headers
2739
2740
2741 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2742 """Handler for HTTP requests and responses.
2743
2744 This class, when installed with an OpenerDirector, automatically adds
2745 the standard headers to every HTTP request and handles gzipped and
2746 deflated responses from web servers. If compression is to be avoided in
2747 a particular request, the original request in the program code only has
2748 to include the HTTP header "Youtubedl-no-compression", which will be
2749 removed before making the real request.
2750
2751 Part of this code was copied from:
2752
2753 http://techknack.net/python-urllib2-handlers/
2754
2755 Andrew Rowls, the author of that code, agreed to release it to the
2756 public domain.
2757 """
2758
2759 def __init__(self, params, *args, **kwargs):
2760 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2761 self._params = params
2762
2763 def http_open(self, req):
2764 conn_class = compat_http_client.HTTPConnection
2765
2766 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2767 if socks_proxy:
2768 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2769 del req.headers['Ytdl-socks-proxy']
2770
2771 return self.do_open(functools.partial(
2772 _create_http_connection, self, conn_class, False),
2773 req)
2774
2775 @staticmethod
2776 def deflate(data):
2777 if not data:
2778 return data
2779 try:
2780 return zlib.decompress(data, -zlib.MAX_WBITS)
2781 except zlib.error:
2782 return zlib.decompress(data)
2783
2784 def http_request(self, req):
2785 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2786 # always respected by websites, some tend to give out URLs with non percent-encoded
2787 # non-ASCII characters (see telemb.py, ard.py [#3412])
2788 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2789 # To work around aforementioned issue we will replace request's original URL with
2790 # percent-encoded one
2791 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2792 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2793 url = req.get_full_url()
2794 url_escaped = escape_url(url)
2795
2796 # Substitute URL if any change after escaping
2797 if url != url_escaped:
2798 req = update_Request(req, url=url_escaped)
2799
2800 for h, v in std_headers.items():
2801 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2802 # The dict keys are capitalized because of this bug by urllib
2803 if h.capitalize() not in req.headers:
2804 req.add_header(h, v)
2805
2806 req.headers = handle_youtubedl_headers(req.headers)
2807
2808 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2809 # Python 2.6 is brain-dead when it comes to fragments
2810 req._Request__original = req._Request__original.partition('#')[0]
2811 req._Request__r_type = req._Request__r_type.partition('#')[0]
2812
2813 return req
2814
2815 def http_response(self, req, resp):
2816 old_resp = resp
2817 # gzip
2818 if resp.headers.get('Content-encoding', '') == 'gzip':
2819 content = resp.read()
2820 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2821 try:
2822 uncompressed = io.BytesIO(gz.read())
2823 except IOError as original_ioerror:
2824 # There may be junk add the end of the file
2825 # See http://stackoverflow.com/q/4928560/35070 for details
2826 for i in range(1, 1024):
2827 try:
2828 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2829 uncompressed = io.BytesIO(gz.read())
2830 except IOError:
2831 continue
2832 break
2833 else:
2834 raise original_ioerror
2835 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2836 resp.msg = old_resp.msg
2837 del resp.headers['Content-encoding']
2838 # deflate
2839 if resp.headers.get('Content-encoding', '') == 'deflate':
2840 gz = io.BytesIO(self.deflate(resp.read()))
2841 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2842 resp.msg = old_resp.msg
2843 del resp.headers['Content-encoding']
2844 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2845 # https://github.com/ytdl-org/youtube-dl/issues/6457).
2846 if 300 <= resp.code < 400:
2847 location = resp.headers.get('Location')
2848 if location:
2849 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2850 if sys.version_info >= (3, 0):
2851 location = location.encode('iso-8859-1').decode('utf-8')
2852 else:
2853 location = location.decode('utf-8')
2854 location_escaped = escape_url(location)
2855 if location != location_escaped:
2856 del resp.headers['Location']
2857 if sys.version_info < (3, 0):
2858 location_escaped = location_escaped.encode('utf-8')
2859 resp.headers['Location'] = location_escaped
2860 return resp
2861
2862 https_request = http_request
2863 https_response = http_response
2864
2865
2866 def make_socks_conn_class(base_class, socks_proxy):
2867 assert issubclass(base_class, (
2868 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2869
2870 url_components = compat_urlparse.urlparse(socks_proxy)
2871 if url_components.scheme.lower() == 'socks5':
2872 socks_type = ProxyType.SOCKS5
2873 elif url_components.scheme.lower() in ('socks', 'socks4'):
2874 socks_type = ProxyType.SOCKS4
2875 elif url_components.scheme.lower() == 'socks4a':
2876 socks_type = ProxyType.SOCKS4A
2877
2878 def unquote_if_non_empty(s):
2879 if not s:
2880 return s
2881 return compat_urllib_parse_unquote_plus(s)
2882
2883 proxy_args = (
2884 socks_type,
2885 url_components.hostname, url_components.port or 1080,
2886 True, # Remote DNS
2887 unquote_if_non_empty(url_components.username),
2888 unquote_if_non_empty(url_components.password),
2889 )
2890
2891 class SocksConnection(base_class):
2892 def connect(self):
2893 self.sock = sockssocket()
2894 self.sock.setproxy(*proxy_args)
2895 if type(self.timeout) in (int, float):
2896 self.sock.settimeout(self.timeout)
2897 self.sock.connect((self.host, self.port))
2898
2899 if isinstance(self, compat_http_client.HTTPSConnection):
2900 if hasattr(self, '_context'): # Python > 2.6
2901 self.sock = self._context.wrap_socket(
2902 self.sock, server_hostname=self.host)
2903 else:
2904 self.sock = ssl.wrap_socket(self.sock)
2905
2906 return SocksConnection
2907
2908
2909 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2910 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2911 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2912 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2913 self._params = params
2914
2915 def https_open(self, req):
2916 kwargs = {}
2917 conn_class = self._https_conn_class
2918
2919 if hasattr(self, '_context'): # python > 2.6
2920 kwargs['context'] = self._context
2921 if hasattr(self, '_check_hostname'): # python 3.x
2922 kwargs['check_hostname'] = self._check_hostname
2923
2924 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2925 if socks_proxy:
2926 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2927 del req.headers['Ytdl-socks-proxy']
2928
2929 return self.do_open(functools.partial(
2930 _create_http_connection, self, conn_class, True),
2931 req, **kwargs)
2932
2933
2934 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2935 """
2936 See [1] for cookie file format.
2937
2938 1. https://curl.haxx.se/docs/http-cookies.html
2939 """
2940 _HTTPONLY_PREFIX = '#HttpOnly_'
2941 _ENTRY_LEN = 7
2942 _HEADER = '''# Netscape HTTP Cookie File
2943 # This file is generated by yt-dlp. Do not edit.
2944
2945 '''
2946 _CookieFileEntry = collections.namedtuple(
2947 'CookieFileEntry',
2948 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2949
2950 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2951 """
2952 Save cookies to a file.
2953
2954 Most of the code is taken from CPython 3.8 and slightly adapted
2955 to support cookie files with UTF-8 in both python 2 and 3.
2956 """
2957 if filename is None:
2958 if self.filename is not None:
2959 filename = self.filename
2960 else:
2961 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2962
2963 # Store session cookies with `expires` set to 0 instead of an empty
2964 # string
2965 for cookie in self:
2966 if cookie.expires is None:
2967 cookie.expires = 0
2968
2969 with io.open(filename, 'w', encoding='utf-8') as f:
2970 f.write(self._HEADER)
2971 now = time.time()
2972 for cookie in self:
2973 if not ignore_discard and cookie.discard:
2974 continue
2975 if not ignore_expires and cookie.is_expired(now):
2976 continue
2977 if cookie.secure:
2978 secure = 'TRUE'
2979 else:
2980 secure = 'FALSE'
2981 if cookie.domain.startswith('.'):
2982 initial_dot = 'TRUE'
2983 else:
2984 initial_dot = 'FALSE'
2985 if cookie.expires is not None:
2986 expires = compat_str(cookie.expires)
2987 else:
2988 expires = ''
2989 if cookie.value is None:
2990 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2991 # with no name, whereas http.cookiejar regards it as a
2992 # cookie with no value.
2993 name = ''
2994 value = cookie.name
2995 else:
2996 name = cookie.name
2997 value = cookie.value
2998 f.write(
2999 '\t'.join([cookie.domain, initial_dot, cookie.path,
3000 secure, expires, name, value]) + '\n')
3001
3002 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
3003 """Load cookies from a file."""
3004 if filename is None:
3005 if self.filename is not None:
3006 filename = self.filename
3007 else:
3008 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
3009
3010 def prepare_line(line):
3011 if line.startswith(self._HTTPONLY_PREFIX):
3012 line = line[len(self._HTTPONLY_PREFIX):]
3013 # comments and empty lines are fine
3014 if line.startswith('#') or not line.strip():
3015 return line
3016 cookie_list = line.split('\t')
3017 if len(cookie_list) != self._ENTRY_LEN:
3018 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
3019 cookie = self._CookieFileEntry(*cookie_list)
3020 if cookie.expires_at and not cookie.expires_at.isdigit():
3021 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
3022 return line
3023
3024 cf = io.StringIO()
3025 with io.open(filename, encoding='utf-8') as f:
3026 for line in f:
3027 try:
3028 cf.write(prepare_line(line))
3029 except compat_cookiejar.LoadError as e:
3030 write_string(
3031 'WARNING: skipping cookie file entry due to %s: %r\n'
3032 % (e, line), sys.stderr)
3033 continue
3034 cf.seek(0)
3035 self._really_load(cf, filename, ignore_discard, ignore_expires)
3036 # Session cookies are denoted by either `expires` field set to
3037 # an empty string or 0. MozillaCookieJar only recognizes the former
3038 # (see [1]). So we need force the latter to be recognized as session
3039 # cookies on our own.
3040 # Session cookies may be important for cookies-based authentication,
3041 # e.g. usually, when user does not check 'Remember me' check box while
3042 # logging in on a site, some important cookies are stored as session
3043 # cookies so that not recognizing them will result in failed login.
3044 # 1. https://bugs.python.org/issue17164
3045 for cookie in self:
3046 # Treat `expires=0` cookies as session cookies
3047 if cookie.expires == 0:
3048 cookie.expires = None
3049 cookie.discard = True
3050
3051
3052 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
3053 def __init__(self, cookiejar=None):
3054 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
3055
3056 def http_response(self, request, response):
3057 # Python 2 will choke on next HTTP request in row if there are non-ASCII
3058 # characters in Set-Cookie HTTP header of last response (see
3059 # https://github.com/ytdl-org/youtube-dl/issues/6769).
3060 # In order to at least prevent crashing we will percent encode Set-Cookie
3061 # header before HTTPCookieProcessor starts processing it.
3062 # if sys.version_info < (3, 0) and response.headers:
3063 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
3064 # set_cookie = response.headers.get(set_cookie_header)
3065 # if set_cookie:
3066 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
3067 # if set_cookie != set_cookie_escaped:
3068 # del response.headers[set_cookie_header]
3069 # response.headers[set_cookie_header] = set_cookie_escaped
3070 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
3071
3072 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
3073 https_response = http_response
3074
3075
3076 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
3077 """YoutubeDL redirect handler
3078
3079 The code is based on HTTPRedirectHandler implementation from CPython [1].
3080
3081 This redirect handler solves two issues:
3082 - ensures redirect URL is always unicode under python 2
3083 - introduces support for experimental HTTP response status code
3084 308 Permanent Redirect [2] used by some sites [3]
3085
3086 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
3087 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
3088 3. https://github.com/ytdl-org/youtube-dl/issues/28768
3089 """
3090
3091 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
3092
3093 def redirect_request(self, req, fp, code, msg, headers, newurl):
3094 """Return a Request or None in response to a redirect.
3095
3096 This is called by the http_error_30x methods when a
3097 redirection response is received. If a redirection should
3098 take place, return a new Request to allow http_error_30x to
3099 perform the redirect. Otherwise, raise HTTPError if no-one
3100 else should try to handle this url. Return None if you can't
3101 but another Handler might.
3102 """
3103 m = req.get_method()
3104 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
3105 or code in (301, 302, 303) and m == "POST")):
3106 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
3107 # Strictly (according to RFC 2616), 301 or 302 in response to
3108 # a POST MUST NOT cause a redirection without confirmation
3109 # from the user (of urllib.request, in this case). In practice,
3110 # essentially all clients do redirect in this case, so we do
3111 # the same.
3112
3113 # On python 2 urlh.geturl() may sometimes return redirect URL
3114 # as byte string instead of unicode. This workaround allows
3115 # to force it always return unicode.
3116 if sys.version_info[0] < 3:
3117 newurl = compat_str(newurl)
3118
3119 # Be conciliant with URIs containing a space. This is mainly
3120 # redundant with the more complete encoding done in http_error_302(),
3121 # but it is kept for compatibility with other callers.
3122 newurl = newurl.replace(' ', '%20')
3123
3124 CONTENT_HEADERS = ("content-length", "content-type")
3125 # NB: don't use dict comprehension for python 2.6 compatibility
3126 newheaders = dict((k, v) for k, v in req.headers.items()
3127 if k.lower() not in CONTENT_HEADERS)
3128 return compat_urllib_request.Request(
3129 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3130 unverifiable=True)
3131
3132
3133 def extract_timezone(date_str):
3134 m = re.search(
3135 r'''(?x)
3136 ^.{8,}? # >=8 char non-TZ prefix, if present
3137 (?P<tz>Z| # just the UTC Z, or
3138 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
3139 (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
3140 [ ]? # optional space
3141 (?P<sign>\+|-) # +/-
3142 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
3143 $)
3144 ''', date_str)
3145 if not m:
3146 timezone = datetime.timedelta()
3147 else:
3148 date_str = date_str[:-len(m.group('tz'))]
3149 if not m.group('sign'):
3150 timezone = datetime.timedelta()
3151 else:
3152 sign = 1 if m.group('sign') == '+' else -1
3153 timezone = datetime.timedelta(
3154 hours=sign * int(m.group('hours')),
3155 minutes=sign * int(m.group('minutes')))
3156 return timezone, date_str
3157
3158
3159 def parse_iso8601(date_str, delimiter='T', timezone=None):
3160 """ Return a UNIX timestamp from the given date """
3161
3162 if date_str is None:
3163 return None
3164
3165 date_str = re.sub(r'\.[0-9]+', '', date_str)
3166
3167 if timezone is None:
3168 timezone, date_str = extract_timezone(date_str)
3169
3170 try:
3171 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3172 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3173 return calendar.timegm(dt.timetuple())
3174 except ValueError:
3175 pass
3176
3177
3178 def date_formats(day_first=True):
3179 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3180
3181
3182 def unified_strdate(date_str, day_first=True):
3183 """Return a string with the date in the format YYYYMMDD"""
3184
3185 if date_str is None:
3186 return None
3187 upload_date = None
3188 # Replace commas
3189 date_str = date_str.replace(',', ' ')
3190 # Remove AM/PM + timezone
3191 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3192 _, date_str = extract_timezone(date_str)
3193
3194 for expression in date_formats(day_first):
3195 try:
3196 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
3197 except ValueError:
3198 pass
3199 if upload_date is None:
3200 timetuple = email.utils.parsedate_tz(date_str)
3201 if timetuple:
3202 try:
3203 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3204 except ValueError:
3205 pass
3206 if upload_date is not None:
3207 return compat_str(upload_date)
3208
3209
3210 def unified_timestamp(date_str, day_first=True):
3211 if date_str is None:
3212 return None
3213
3214 date_str = re.sub(r'[,|]', '', date_str)
3215
3216 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
3217 timezone, date_str = extract_timezone(date_str)
3218
3219 # Remove AM/PM + timezone
3220 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3221
3222 # Remove unrecognized timezones from ISO 8601 alike timestamps
3223 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3224 if m:
3225 date_str = date_str[:-len(m.group('tz'))]
3226
3227 # Python only supports microseconds, so remove nanoseconds
3228 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3229 if m:
3230 date_str = m.group(1)
3231
3232 for expression in date_formats(day_first):
3233 try:
3234 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3235 return calendar.timegm(dt.timetuple())
3236 except ValueError:
3237 pass
3238 timetuple = email.utils.parsedate_tz(date_str)
3239 if timetuple:
3240 return calendar.timegm(timetuple) + pm_delta * 3600
3241
3242
3243 def determine_ext(url, default_ext='unknown_video'):
3244 if url is None or '.' not in url:
3245 return default_ext
3246 guess = url.partition('?')[0].rpartition('.')[2]
3247 if re.match(r'^[A-Za-z0-9]+$', guess):
3248 return guess
3249 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3250 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3251 return guess.rstrip('/')
3252 else:
3253 return default_ext
3254
3255
3256 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3257 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3258
3259
3260 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
3261 """
3262 Return a datetime object from a string in the format YYYYMMDD or
3263 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3264
3265 format: string date format used to return datetime object from
3266 precision: round the time portion of a datetime object.
3267 auto|microsecond|second|minute|hour|day.
3268 auto: round to the unit provided in date_str (if applicable).
3269 """
3270 auto_precision = False
3271 if precision == 'auto':
3272 auto_precision = True
3273 precision = 'microsecond'
3274 today = datetime_round(datetime.datetime.now(), precision)
3275 if date_str in ('now', 'today'):
3276 return today
3277 if date_str == 'yesterday':
3278 return today - datetime.timedelta(days=1)
3279 match = re.match(
3280 r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3281 date_str)
3282 if match is not None:
3283 start_time = datetime_from_str(match.group('start'), precision, format)
3284 time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
3285 unit = match.group('unit')
3286 if unit == 'month' or unit == 'year':
3287 new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
3288 unit = 'day'
3289 else:
3290 if unit == 'week':
3291 unit = 'day'
3292 time *= 7
3293 delta = datetime.timedelta(**{unit + 's': time})
3294 new_date = start_time + delta
3295 if auto_precision:
3296 return datetime_round(new_date, unit)
3297 return new_date
3298
3299 return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3300
3301
3302 def date_from_str(date_str, format='%Y%m%d'):
3303 """
3304 Return a datetime object from a string in the format YYYYMMDD or
3305 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3306
3307 format: string date format used to return datetime object from
3308 """
3309 return datetime_from_str(date_str, precision='microsecond', format=format).date()
3310
3311
3312 def datetime_add_months(dt, months):
3313 """Increment/Decrement a datetime object by months."""
3314 month = dt.month + months - 1
3315 year = dt.year + month // 12
3316 month = month % 12 + 1
3317 day = min(dt.day, calendar.monthrange(year, month)[1])
3318 return dt.replace(year, month, day)
3319
3320
3321 def datetime_round(dt, precision='day'):
3322 """
3323 Round a datetime object's time to a specific precision
3324 """
3325 if precision == 'microsecond':
3326 return dt
3327
3328 unit_seconds = {
3329 'day': 86400,
3330 'hour': 3600,
3331 'minute': 60,
3332 'second': 1,
3333 }
3334 roundto = lambda x, n: ((x + n / 2) // n) * n
3335 timestamp = calendar.timegm(dt.timetuple())
3336 return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
3337
3338
3339 def hyphenate_date(date_str):
3340 """
3341 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3342 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3343 if match is not None:
3344 return '-'.join(match.groups())
3345 else:
3346 return date_str
3347
3348
3349 class DateRange(object):
3350 """Represents a time interval between two dates"""
3351
3352 def __init__(self, start=None, end=None):
3353 """start and end must be strings in the format accepted by date"""
3354 if start is not None:
3355 self.start = date_from_str(start)
3356 else:
3357 self.start = datetime.datetime.min.date()
3358 if end is not None:
3359 self.end = date_from_str(end)
3360 else:
3361 self.end = datetime.datetime.max.date()
3362 if self.start > self.end:
3363 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3364
3365 @classmethod
3366 def day(cls, day):
3367 """Returns a range that only contains the given day"""
3368 return cls(day, day)
3369
3370 def __contains__(self, date):
3371 """Check if the date is in the range"""
3372 if not isinstance(date, datetime.date):
3373 date = date_from_str(date)
3374 return self.start <= date <= self.end
3375
3376 def __str__(self):
3377 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3378
3379
3380 def platform_name():
3381 """ Returns the platform name as a compat_str """
3382 res = platform.platform()
3383 if isinstance(res, bytes):
3384 res = res.decode(preferredencoding())
3385
3386 assert isinstance(res, compat_str)
3387 return res
3388
3389
3390 def get_windows_version():
3391 ''' Get Windows version. None if it's not running on Windows '''
3392 if compat_os_name == 'nt':
3393 return version_tuple(platform.win32_ver()[1])
3394 else:
3395 return None
3396
3397
3398 def _windows_write_string(s, out):
3399 """ Returns True if the string was written using special methods,
3400 False if it has yet to be written out."""
3401 # Adapted from http://stackoverflow.com/a/3259271/35070
3402
3403 import ctypes
3404 import ctypes.wintypes
3405
3406 WIN_OUTPUT_IDS = {
3407 1: -11,
3408 2: -12,
3409 }
3410
3411 try:
3412 fileno = out.fileno()
3413 except AttributeError:
3414 # If the output stream doesn't have a fileno, it's virtual
3415 return False
3416 except io.UnsupportedOperation:
3417 # Some strange Windows pseudo files?
3418 return False
3419 if fileno not in WIN_OUTPUT_IDS:
3420 return False
3421
3422 GetStdHandle = compat_ctypes_WINFUNCTYPE(
3423 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3424 ('GetStdHandle', ctypes.windll.kernel32))
3425 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3426
3427 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3428 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3429 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3430 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3431 written = ctypes.wintypes.DWORD(0)
3432
3433 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3434 FILE_TYPE_CHAR = 0x0002
3435 FILE_TYPE_REMOTE = 0x8000
3436 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3437 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3438 ctypes.POINTER(ctypes.wintypes.DWORD))(
3439 ('GetConsoleMode', ctypes.windll.kernel32))
3440 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3441
3442 def not_a_console(handle):
3443 if handle == INVALID_HANDLE_VALUE or handle is None:
3444 return True
3445 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3446 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3447
3448 if not_a_console(h):
3449 return False
3450
3451 def next_nonbmp_pos(s):
3452 try:
3453 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3454 except StopIteration:
3455 return len(s)
3456
3457 while s:
3458 count = min(next_nonbmp_pos(s), 1024)
3459
3460 ret = WriteConsoleW(
3461 h, s, count if count else 2, ctypes.byref(written), None)
3462 if ret == 0:
3463 raise OSError('Failed to write string')
3464 if not count: # We just wrote a non-BMP character
3465 assert written.value == 2
3466 s = s[1:]
3467 else:
3468 assert written.value > 0
3469 s = s[written.value:]
3470 return True
3471
3472
3473 def write_string(s, out=None, encoding=None):
3474 if out is None:
3475 out = sys.stderr
3476 assert type(s) == compat_str
3477
3478 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3479 if _windows_write_string(s, out):
3480 return
3481
3482 if ('b' in getattr(out, 'mode', '')
3483 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
3484 byt = s.encode(encoding or preferredencoding(), 'ignore')
3485 out.write(byt)
3486 elif hasattr(out, 'buffer'):
3487 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3488 byt = s.encode(enc, 'ignore')
3489 out.buffer.write(byt)
3490 else:
3491 out.write(s)
3492 out.flush()
3493
3494
3495 def bytes_to_intlist(bs):
3496 if not bs:
3497 return []
3498 if isinstance(bs[0], int): # Python 3
3499 return list(bs)
3500 else:
3501 return [ord(c) for c in bs]
3502
3503
3504 def intlist_to_bytes(xs):
3505 if not xs:
3506 return b''
3507 return compat_struct_pack('%dB' % len(xs), *xs)
3508
3509
3510 # Cross-platform file locking
3511 if sys.platform == 'win32':
3512 import ctypes.wintypes
3513 import msvcrt
3514
3515 class OVERLAPPED(ctypes.Structure):
3516 _fields_ = [
3517 ('Internal', ctypes.wintypes.LPVOID),
3518 ('InternalHigh', ctypes.wintypes.LPVOID),
3519 ('Offset', ctypes.wintypes.DWORD),
3520 ('OffsetHigh', ctypes.wintypes.DWORD),
3521 ('hEvent', ctypes.wintypes.HANDLE),
3522 ]
3523
3524 kernel32 = ctypes.windll.kernel32
3525 LockFileEx = kernel32.LockFileEx
3526 LockFileEx.argtypes = [
3527 ctypes.wintypes.HANDLE, # hFile
3528 ctypes.wintypes.DWORD, # dwFlags
3529 ctypes.wintypes.DWORD, # dwReserved
3530 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3531 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3532 ctypes.POINTER(OVERLAPPED) # Overlapped
3533 ]
3534 LockFileEx.restype = ctypes.wintypes.BOOL
3535 UnlockFileEx = kernel32.UnlockFileEx
3536 UnlockFileEx.argtypes = [
3537 ctypes.wintypes.HANDLE, # hFile
3538 ctypes.wintypes.DWORD, # dwReserved
3539 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3540 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3541 ctypes.POINTER(OVERLAPPED) # Overlapped
3542 ]
3543 UnlockFileEx.restype = ctypes.wintypes.BOOL
3544 whole_low = 0xffffffff
3545 whole_high = 0x7fffffff
3546
3547 def _lock_file(f, exclusive):
3548 overlapped = OVERLAPPED()
3549 overlapped.Offset = 0
3550 overlapped.OffsetHigh = 0
3551 overlapped.hEvent = 0
3552 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3553 handle = msvcrt.get_osfhandle(f.fileno())
3554 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3555 whole_low, whole_high, f._lock_file_overlapped_p):
3556 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3557
3558 def _unlock_file(f):
3559 assert f._lock_file_overlapped_p
3560 handle = msvcrt.get_osfhandle(f.fileno())
3561 if not UnlockFileEx(handle, 0,
3562 whole_low, whole_high, f._lock_file_overlapped_p):
3563 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3564
3565 else:
3566 # Some platforms, such as Jython, is missing fcntl
3567 try:
3568 import fcntl
3569
3570 def _lock_file(f, exclusive):
3571 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3572
3573 def _unlock_file(f):
3574 fcntl.flock(f, fcntl.LOCK_UN)
3575 except ImportError:
3576 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3577
3578 def _lock_file(f, exclusive):
3579 raise IOError(UNSUPPORTED_MSG)
3580
3581 def _unlock_file(f):
3582 raise IOError(UNSUPPORTED_MSG)
3583
3584
3585 class locked_file(object):
3586 def __init__(self, filename, mode, encoding=None):
3587 assert mode in ['r', 'a', 'w']
3588 self.f = io.open(filename, mode, encoding=encoding)
3589 self.mode = mode
3590
3591 def __enter__(self):
3592 exclusive = self.mode != 'r'
3593 try:
3594 _lock_file(self.f, exclusive)
3595 except IOError:
3596 self.f.close()
3597 raise
3598 return self
3599
3600 def __exit__(self, etype, value, traceback):
3601 try:
3602 _unlock_file(self.f)
3603 finally:
3604 self.f.close()
3605
3606 def __iter__(self):
3607 return iter(self.f)
3608
3609 def write(self, *args):
3610 return self.f.write(*args)
3611
3612 def read(self, *args):
3613 return self.f.read(*args)
3614
3615
3616 def get_filesystem_encoding():
3617 encoding = sys.getfilesystemencoding()
3618 return encoding if encoding is not None else 'utf-8'
3619
3620
3621 def shell_quote(args):
3622 quoted_args = []
3623 encoding = get_filesystem_encoding()
3624 for a in args:
3625 if isinstance(a, bytes):
3626 # We may get a filename encoded with 'encodeFilename'
3627 a = a.decode(encoding)
3628 quoted_args.append(compat_shlex_quote(a))
3629 return ' '.join(quoted_args)
3630
3631
3632 def smuggle_url(url, data):
3633 """ Pass additional data in a URL for internal use. """
3634
3635 url, idata = unsmuggle_url(url, {})
3636 data.update(idata)
3637 sdata = compat_urllib_parse_urlencode(
3638 {'__youtubedl_smuggle': json.dumps(data)})
3639 return url + '#' + sdata
3640
3641
3642 def unsmuggle_url(smug_url, default=None):
3643 if '#__youtubedl_smuggle' not in smug_url:
3644 return smug_url, default
3645 url, _, sdata = smug_url.rpartition('#')
3646 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3647 data = json.loads(jsond)
3648 return url, data
3649
3650
3651 def format_bytes(bytes):
3652 if bytes is None:
3653 return 'N/A'
3654 if type(bytes) is str:
3655 bytes = float(bytes)
3656 if bytes == 0.0:
3657 exponent = 0
3658 else:
3659 exponent = int(math.log(bytes, 1024.0))
3660 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3661 converted = float(bytes) / float(1024 ** exponent)
3662 return '%.2f%s' % (converted, suffix)
3663
3664
3665 def lookup_unit_table(unit_table, s):
3666 units_re = '|'.join(re.escape(u) for u in unit_table)
3667 m = re.match(
3668 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3669 if not m:
3670 return None
3671 num_str = m.group('num').replace(',', '.')
3672 mult = unit_table[m.group('unit')]
3673 return int(float(num_str) * mult)
3674
3675
3676 def parse_filesize(s):
3677 if s is None:
3678 return None
3679
3680 # The lower-case forms are of course incorrect and unofficial,
3681 # but we support those too
3682 _UNIT_TABLE = {
3683 'B': 1,
3684 'b': 1,
3685 'bytes': 1,
3686 'KiB': 1024,
3687 'KB': 1000,
3688 'kB': 1024,
3689 'Kb': 1000,
3690 'kb': 1000,
3691 'kilobytes': 1000,
3692 'kibibytes': 1024,
3693 'MiB': 1024 ** 2,
3694 'MB': 1000 ** 2,
3695 'mB': 1024 ** 2,
3696 'Mb': 1000 ** 2,
3697 'mb': 1000 ** 2,
3698 'megabytes': 1000 ** 2,
3699 'mebibytes': 1024 ** 2,
3700 'GiB': 1024 ** 3,
3701 'GB': 1000 ** 3,
3702 'gB': 1024 ** 3,
3703 'Gb': 1000 ** 3,
3704 'gb': 1000 ** 3,
3705 'gigabytes': 1000 ** 3,
3706 'gibibytes': 1024 ** 3,
3707 'TiB': 1024 ** 4,
3708 'TB': 1000 ** 4,
3709 'tB': 1024 ** 4,
3710 'Tb': 1000 ** 4,
3711 'tb': 1000 ** 4,
3712 'terabytes': 1000 ** 4,
3713 'tebibytes': 1024 ** 4,
3714 'PiB': 1024 ** 5,
3715 'PB': 1000 ** 5,
3716 'pB': 1024 ** 5,
3717 'Pb': 1000 ** 5,
3718 'pb': 1000 ** 5,
3719 'petabytes': 1000 ** 5,
3720 'pebibytes': 1024 ** 5,
3721 'EiB': 1024 ** 6,
3722 'EB': 1000 ** 6,
3723 'eB': 1024 ** 6,
3724 'Eb': 1000 ** 6,
3725 'eb': 1000 ** 6,
3726 'exabytes': 1000 ** 6,
3727 'exbibytes': 1024 ** 6,
3728 'ZiB': 1024 ** 7,
3729 'ZB': 1000 ** 7,
3730 'zB': 1024 ** 7,
3731 'Zb': 1000 ** 7,
3732 'zb': 1000 ** 7,
3733 'zettabytes': 1000 ** 7,
3734 'zebibytes': 1024 ** 7,
3735 'YiB': 1024 ** 8,
3736 'YB': 1000 ** 8,
3737 'yB': 1024 ** 8,
3738 'Yb': 1000 ** 8,
3739 'yb': 1000 ** 8,
3740 'yottabytes': 1000 ** 8,
3741 'yobibytes': 1024 ** 8,
3742 }
3743
3744 return lookup_unit_table(_UNIT_TABLE, s)
3745
3746
3747 def parse_count(s):
3748 if s is None:
3749 return None
3750
3751 s = s.strip()
3752
3753 if re.match(r'^[\d,.]+$', s):
3754 return str_to_int(s)
3755
3756 _UNIT_TABLE = {
3757 'k': 1000,
3758 'K': 1000,
3759 'm': 1000 ** 2,
3760 'M': 1000 ** 2,
3761 'kk': 1000 ** 2,
3762 'KK': 1000 ** 2,
3763 }
3764
3765 return lookup_unit_table(_UNIT_TABLE, s)
3766
3767
3768 def parse_resolution(s):
3769 if s is None:
3770 return {}
3771
3772 mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
3773 if mobj:
3774 return {
3775 'width': int(mobj.group('w')),
3776 'height': int(mobj.group('h')),
3777 }
3778
3779 mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
3780 if mobj:
3781 return {'height': int(mobj.group(1))}
3782
3783 mobj = re.search(r'\b([48])[kK]\b', s)
3784 if mobj:
3785 return {'height': int(mobj.group(1)) * 540}
3786
3787 return {}
3788
3789
3790 def parse_bitrate(s):
3791 if not isinstance(s, compat_str):
3792 return
3793 mobj = re.search(r'\b(\d+)\s*kbps', s)
3794 if mobj:
3795 return int(mobj.group(1))
3796
3797
3798 def month_by_name(name, lang='en'):
3799 """ Return the number of a month by (locale-independently) English name """
3800
3801 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3802
3803 try:
3804 return month_names.index(name) + 1
3805 except ValueError:
3806 return None
3807
3808
3809 def month_by_abbreviation(abbrev):
3810 """ Return the number of a month by (locale-independently) English
3811 abbreviations """
3812
3813 try:
3814 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3815 except ValueError:
3816 return None
3817
3818
3819 def fix_xml_ampersands(xml_str):
3820 """Replace all the '&' by '&amp;' in XML"""
3821 return re.sub(
3822 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3823 '&amp;',
3824 xml_str)
3825
3826
3827 def setproctitle(title):
3828 assert isinstance(title, compat_str)
3829
3830 # ctypes in Jython is not complete
3831 # http://bugs.jython.org/issue2148
3832 if sys.platform.startswith('java'):
3833 return
3834
3835 try:
3836 libc = ctypes.cdll.LoadLibrary('libc.so.6')
3837 except OSError:
3838 return
3839 except TypeError:
3840 # LoadLibrary in Windows Python 2.7.13 only expects
3841 # a bytestring, but since unicode_literals turns
3842 # every string into a unicode string, it fails.
3843 return
3844 title_bytes = title.encode('utf-8')
3845 buf = ctypes.create_string_buffer(len(title_bytes))
3846 buf.value = title_bytes
3847 try:
3848 libc.prctl(15, buf, 0, 0, 0)
3849 except AttributeError:
3850 return # Strange libc, just skip this
3851
3852
3853 def remove_start(s, start):
3854 return s[len(start):] if s is not None and s.startswith(start) else s
3855
3856
3857 def remove_end(s, end):
3858 return s[:-len(end)] if s is not None and s.endswith(end) else s
3859
3860
3861 def remove_quotes(s):
3862 if s is None or len(s) < 2:
3863 return s
3864 for quote in ('"', "'", ):
3865 if s[0] == quote and s[-1] == quote:
3866 return s[1:-1]
3867 return s
3868
3869
3870 def get_domain(url):
3871 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3872 return domain.group('domain') if domain else None
3873
3874
3875 def url_basename(url):
3876 path = compat_urlparse.urlparse(url).path
3877 return path.strip('/').split('/')[-1]
3878
3879
3880 def base_url(url):
3881 return re.match(r'https?://[^?#&]+/', url).group()
3882
3883
3884 def urljoin(base, path):
3885 if isinstance(path, bytes):
3886 path = path.decode('utf-8')
3887 if not isinstance(path, compat_str) or not path:
3888 return None
3889 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3890 return path
3891 if isinstance(base, bytes):
3892 base = base.decode('utf-8')
3893 if not isinstance(base, compat_str) or not re.match(
3894 r'^(?:https?:)?//', base):
3895 return None
3896 return compat_urlparse.urljoin(base, path)
3897
3898
3899 class HEADRequest(compat_urllib_request.Request):
3900 def get_method(self):
3901 return 'HEAD'
3902
3903
3904 class PUTRequest(compat_urllib_request.Request):
3905 def get_method(self):
3906 return 'PUT'
3907
3908
3909 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3910 if get_attr:
3911 if v is not None:
3912 v = getattr(v, get_attr, None)
3913 if v == '':
3914 v = None
3915 if v is None:
3916 return default
3917 try:
3918 return int(v) * invscale // scale
3919 except (ValueError, TypeError, OverflowError):
3920 return default
3921
3922
3923 def str_or_none(v, default=None):
3924 return default if v is None else compat_str(v)
3925
3926
3927 def str_to_int(int_str):
3928 """ A more relaxed version of int_or_none """
3929 if isinstance(int_str, compat_integer_types):
3930 return int_str
3931 elif isinstance(int_str, compat_str):
3932 int_str = re.sub(r'[,\.\+]', '', int_str)
3933 return int_or_none(int_str)
3934
3935
3936 def float_or_none(v, scale=1, invscale=1, default=None):
3937 if v is None:
3938 return default
3939 try:
3940 return float(v) * invscale / scale
3941 except (ValueError, TypeError):
3942 return default
3943
3944
3945 def bool_or_none(v, default=None):
3946 return v if isinstance(v, bool) else default
3947
3948
3949 def strip_or_none(v, default=None):
3950 return v.strip() if isinstance(v, compat_str) else default
3951
3952
3953 def url_or_none(url):
3954 if not url or not isinstance(url, compat_str):
3955 return None
3956 url = url.strip()
3957 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3958
3959
3960 def strftime_or_none(timestamp, date_format, default=None):
3961 datetime_object = None
3962 try:
3963 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3964 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3965 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3966 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3967 return datetime_object.strftime(date_format)
3968 except (ValueError, TypeError, AttributeError):
3969 return default
3970
3971
3972 def parse_duration(s):
3973 if not isinstance(s, compat_basestring):
3974 return None
3975
3976 s = s.strip()
3977
3978 days, hours, mins, secs, ms = [None] * 5
3979 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3980 if m:
3981 days, hours, mins, secs, ms = m.groups()
3982 else:
3983 m = re.match(
3984 r'''(?ix)(?:P?
3985 (?:
3986 [0-9]+\s*y(?:ears?)?\s*
3987 )?
3988 (?:
3989 [0-9]+\s*m(?:onths?)?\s*
3990 )?
3991 (?:
3992 [0-9]+\s*w(?:eeks?)?\s*
3993 )?
3994 (?:
3995 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3996 )?
3997 T)?
3998 (?:
3999 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
4000 )?
4001 (?:
4002 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
4003 )?
4004 (?:
4005 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
4006 )?Z?$''', s)
4007 if m:
4008 days, hours, mins, secs, ms = m.groups()
4009 else:
4010 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
4011 if m:
4012 hours, mins = m.groups()
4013 else:
4014 return None
4015
4016 duration = 0
4017 if secs:
4018 duration += float(secs)
4019 if mins:
4020 duration += float(mins) * 60
4021 if hours:
4022 duration += float(hours) * 60 * 60
4023 if days:
4024 duration += float(days) * 24 * 60 * 60
4025 if ms:
4026 duration += float(ms)
4027 return duration
4028
4029
4030 def prepend_extension(filename, ext, expected_real_ext=None):
4031 name, real_ext = os.path.splitext(filename)
4032 return (
4033 '{0}.{1}{2}'.format(name, ext, real_ext)
4034 if not expected_real_ext or real_ext[1:] == expected_real_ext
4035 else '{0}.{1}'.format(filename, ext))
4036
4037
4038 def replace_extension(filename, ext, expected_real_ext=None):
4039 name, real_ext = os.path.splitext(filename)
4040 return '{0}.{1}'.format(
4041 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
4042 ext)
4043
4044
4045 def check_executable(exe, args=[]):
4046 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
4047 args can be a list of arguments for a short output (like -version) """
4048 try:
4049 Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate_or_kill()
4050 except OSError:
4051 return False
4052 return exe
4053
4054
4055 def _get_exe_version_output(exe, args):
4056 try:
4057 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
4058 # SIGTTOU if yt-dlp is run in the background.
4059 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
4060 out, _ = Popen(
4061 [encodeArgument(exe)] + args, stdin=subprocess.PIPE,
4062 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate_or_kill()
4063 except OSError:
4064 return False
4065 if isinstance(out, bytes): # Python 2.x
4066 out = out.decode('ascii', 'ignore')
4067 return out
4068
4069
4070 def detect_exe_version(output, version_re=None, unrecognized='present'):
4071 assert isinstance(output, compat_str)
4072 if version_re is None:
4073 version_re = r'version\s+([-0-9._a-zA-Z]+)'
4074 m = re.search(version_re, output)
4075 if m:
4076 return m.group(1)
4077 else:
4078 return unrecognized
4079
4080
4081 def get_exe_version(exe, args=['--version'],
4082 version_re=None, unrecognized='present'):
4083 """ Returns the version of the specified executable,
4084 or False if the executable is not present """
4085 out = _get_exe_version_output(exe, args)
4086 return detect_exe_version(out, version_re, unrecognized) if out else False
4087
4088
4089 class LazyList(collections.abc.Sequence):
4090 ''' Lazy immutable list from an iterable
4091 Note that slices of a LazyList are lists and not LazyList'''
4092
4093 class IndexError(IndexError):
4094 pass
4095
4096 def __init__(self, iterable, *, reverse=False, _cache=None):
4097 self.__iterable = iter(iterable)
4098 self.__cache = [] if _cache is None else _cache
4099 self.__reversed = reverse
4100
4101 def __iter__(self):
4102 if self.__reversed:
4103 # We need to consume the entire iterable to iterate in reverse
4104 yield from self.exhaust()
4105 return
4106 yield from self.__cache
4107 for item in self.__iterable:
4108 self.__cache.append(item)
4109 yield item
4110
4111 def __exhaust(self):
4112 self.__cache.extend(self.__iterable)
4113 # Discard the emptied iterable to make it pickle-able
4114 self.__iterable = []
4115 return self.__cache
4116
4117 def exhaust(self):
4118 ''' Evaluate the entire iterable '''
4119 return self.__exhaust()[::-1 if self.__reversed else 1]
4120
4121 @staticmethod
4122 def __reverse_index(x):
4123 return None if x is None else -(x + 1)
4124
4125 def __getitem__(self, idx):
4126 if isinstance(idx, slice):
4127 if self.__reversed:
4128 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
4129 start, stop, step = idx.start, idx.stop, idx.step or 1
4130 elif isinstance(idx, int):
4131 if self.__reversed:
4132 idx = self.__reverse_index(idx)
4133 start, stop, step = idx, idx, 0
4134 else:
4135 raise TypeError('indices must be integers or slices')
4136 if ((start or 0) < 0 or (stop or 0) < 0
4137 or (start is None and step < 0)
4138 or (stop is None and step > 0)):
4139 # We need to consume the entire iterable to be able to slice from the end
4140 # Obviously, never use this with infinite iterables
4141 self.__exhaust()
4142 try:
4143 return self.__cache[idx]
4144 except IndexError as e:
4145 raise self.IndexError(e) from e
4146 n = max(start or 0, stop or 0) - len(self.__cache) + 1
4147 if n > 0:
4148 self.__cache.extend(itertools.islice(self.__iterable, n))
4149 try:
4150 return self.__cache[idx]
4151 except IndexError as e:
4152 raise self.IndexError(e) from e
4153
4154 def __bool__(self):
4155 try:
4156 self[-1] if self.__reversed else self[0]
4157 except self.IndexError:
4158 return False
4159 return True
4160
4161 def __len__(self):
4162 self.__exhaust()
4163 return len(self.__cache)
4164
4165 def __reversed__(self):
4166 return type(self)(self.__iterable, reverse=not self.__reversed, _cache=self.__cache)
4167
4168 def __copy__(self):
4169 return type(self)(self.__iterable, reverse=self.__reversed, _cache=self.__cache)
4170
4171 def __deepcopy__(self, memo):
4172 # FIXME: This is actually just a shallow copy
4173 id_ = id(self)
4174 memo[id_] = self.__copy__()
4175 return memo[id_]
4176
4177 def __repr__(self):
4178 # repr and str should mimic a list. So we exhaust the iterable
4179 return repr(self.exhaust())
4180
4181 def __str__(self):
4182 return repr(self.exhaust())
4183
4184
4185 class PagedList:
4186
4187 class IndexError(IndexError):
4188 pass
4189
4190 def __len__(self):
4191 # This is only useful for tests
4192 return len(self.getslice())
4193
4194 def __init__(self, pagefunc, pagesize, use_cache=True):
4195 self._pagefunc = pagefunc
4196 self._pagesize = pagesize
4197 self._use_cache = use_cache
4198 self._cache = {}
4199
4200 def getpage(self, pagenum):
4201 page_results = self._cache.get(pagenum)
4202 if page_results is None:
4203 page_results = list(self._pagefunc(pagenum))
4204 if self._use_cache:
4205 self._cache[pagenum] = page_results
4206 return page_results
4207
4208 def getslice(self, start=0, end=None):
4209 return list(self._getslice(start, end))
4210
4211 def _getslice(self, start, end):
4212 raise NotImplementedError('This method must be implemented by subclasses')
4213
4214 def __getitem__(self, idx):
4215 # NOTE: cache must be enabled if this is used
4216 if not isinstance(idx, int) or idx < 0:
4217 raise TypeError('indices must be non-negative integers')
4218 entries = self.getslice(idx, idx + 1)
4219 if not entries:
4220 raise self.IndexError()
4221 return entries[0]
4222
4223
4224 class OnDemandPagedList(PagedList):
4225 def _getslice(self, start, end):
4226 for pagenum in itertools.count(start // self._pagesize):
4227 firstid = pagenum * self._pagesize
4228 nextfirstid = pagenum * self._pagesize + self._pagesize
4229 if start >= nextfirstid:
4230 continue
4231
4232 startv = (
4233 start % self._pagesize
4234 if firstid <= start < nextfirstid
4235 else 0)
4236 endv = (
4237 ((end - 1) % self._pagesize) + 1
4238 if (end is not None and firstid <= end <= nextfirstid)
4239 else None)
4240
4241 page_results = self.getpage(pagenum)
4242 if startv != 0 or endv is not None:
4243 page_results = page_results[startv:endv]
4244 yield from page_results
4245
4246 # A little optimization - if current page is not "full", ie. does
4247 # not contain page_size videos then we can assume that this page
4248 # is the last one - there are no more ids on further pages -
4249 # i.e. no need to query again.
4250 if len(page_results) + startv < self._pagesize:
4251 break
4252
4253 # If we got the whole page, but the next page is not interesting,
4254 # break out early as well
4255 if end == nextfirstid:
4256 break
4257
4258
4259 class InAdvancePagedList(PagedList):
4260 def __init__(self, pagefunc, pagecount, pagesize):
4261 self._pagecount = pagecount
4262 PagedList.__init__(self, pagefunc, pagesize, True)
4263
4264 def _getslice(self, start, end):
4265 start_page = start // self._pagesize
4266 end_page = (
4267 self._pagecount if end is None else (end // self._pagesize + 1))
4268 skip_elems = start - start_page * self._pagesize
4269 only_more = None if end is None else end - start
4270 for pagenum in range(start_page, end_page):
4271 page_results = self.getpage(pagenum)
4272 if skip_elems:
4273 page_results = page_results[skip_elems:]
4274 skip_elems = None
4275 if only_more is not None:
4276 if len(page_results) < only_more:
4277 only_more -= len(page_results)
4278 else:
4279 yield from page_results[:only_more]
4280 break
4281 yield from page_results
4282
4283
4284 def uppercase_escape(s):
4285 unicode_escape = codecs.getdecoder('unicode_escape')
4286 return re.sub(
4287 r'\\U[0-9a-fA-F]{8}',
4288 lambda m: unicode_escape(m.group(0))[0],
4289 s)
4290
4291
4292 def lowercase_escape(s):
4293 unicode_escape = codecs.getdecoder('unicode_escape')
4294 return re.sub(
4295 r'\\u[0-9a-fA-F]{4}',
4296 lambda m: unicode_escape(m.group(0))[0],
4297 s)
4298
4299
4300 def escape_rfc3986(s):
4301 """Escape non-ASCII characters as suggested by RFC 3986"""
4302 if sys.version_info < (3, 0) and isinstance(s, compat_str):
4303 s = s.encode('utf-8')
4304 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
4305
4306
4307 def escape_url(url):
4308 """Escape URL as suggested by RFC 3986"""
4309 url_parsed = compat_urllib_parse_urlparse(url)
4310 return url_parsed._replace(
4311 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
4312 path=escape_rfc3986(url_parsed.path),
4313 params=escape_rfc3986(url_parsed.params),
4314 query=escape_rfc3986(url_parsed.query),
4315 fragment=escape_rfc3986(url_parsed.fragment)
4316 ).geturl()
4317
4318
4319 def parse_qs(url):
4320 return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4321
4322
4323 def read_batch_urls(batch_fd):
4324 def fixup(url):
4325 if not isinstance(url, compat_str):
4326 url = url.decode('utf-8', 'replace')
4327 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4328 for bom in BOM_UTF8:
4329 if url.startswith(bom):
4330 url = url[len(bom):]
4331 url = url.lstrip()
4332 if not url or url.startswith(('#', ';', ']')):
4333 return False
4334 # "#" cannot be stripped out since it is part of the URI
4335 # However, it can be safely stipped out if follwing a whitespace
4336 return re.split(r'\s#', url, 1)[0].rstrip()
4337
4338 with contextlib.closing(batch_fd) as fd:
4339 return [url for url in map(fixup, fd) if url]
4340
4341
4342 def urlencode_postdata(*args, **kargs):
4343 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
4344
4345
4346 def update_url_query(url, query):
4347 if not query:
4348 return url
4349 parsed_url = compat_urlparse.urlparse(url)
4350 qs = compat_parse_qs(parsed_url.query)
4351 qs.update(query)
4352 return compat_urlparse.urlunparse(parsed_url._replace(
4353 query=compat_urllib_parse_urlencode(qs, True)))
4354
4355
4356 def update_Request(req, url=None, data=None, headers={}, query={}):
4357 req_headers = req.headers.copy()
4358 req_headers.update(headers)
4359 req_data = data or req.data
4360 req_url = update_url_query(url or req.get_full_url(), query)
4361 req_get_method = req.get_method()
4362 if req_get_method == 'HEAD':
4363 req_type = HEADRequest
4364 elif req_get_method == 'PUT':
4365 req_type = PUTRequest
4366 else:
4367 req_type = compat_urllib_request.Request
4368 new_req = req_type(
4369 req_url, data=req_data, headers=req_headers,
4370 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4371 if hasattr(req, 'timeout'):
4372 new_req.timeout = req.timeout
4373 return new_req
4374
4375
4376 def _multipart_encode_impl(data, boundary):
4377 content_type = 'multipart/form-data; boundary=%s' % boundary
4378
4379 out = b''
4380 for k, v in data.items():
4381 out += b'--' + boundary.encode('ascii') + b'\r\n'
4382 if isinstance(k, compat_str):
4383 k = k.encode('utf-8')
4384 if isinstance(v, compat_str):
4385 v = v.encode('utf-8')
4386 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4387 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4388 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
4389 if boundary.encode('ascii') in content:
4390 raise ValueError('Boundary overlaps with data')
4391 out += content
4392
4393 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4394
4395 return out, content_type
4396
4397
4398 def multipart_encode(data, boundary=None):
4399 '''
4400 Encode a dict to RFC 7578-compliant form-data
4401
4402 data:
4403 A dict where keys and values can be either Unicode or bytes-like
4404 objects.
4405 boundary:
4406 If specified a Unicode object, it's used as the boundary. Otherwise
4407 a random boundary is generated.
4408
4409 Reference: https://tools.ietf.org/html/rfc7578
4410 '''
4411 has_specified_boundary = boundary is not None
4412
4413 while True:
4414 if boundary is None:
4415 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4416
4417 try:
4418 out, content_type = _multipart_encode_impl(data, boundary)
4419 break
4420 except ValueError:
4421 if has_specified_boundary:
4422 raise
4423 boundary = None
4424
4425 return out, content_type
4426
4427
4428 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4429 if isinstance(key_or_keys, (list, tuple)):
4430 for key in key_or_keys:
4431 if key not in d or d[key] is None or skip_false_values and not d[key]:
4432 continue
4433 return d[key]
4434 return default
4435 return d.get(key_or_keys, default)
4436
4437
4438 def try_get(src, getter, expected_type=None):
4439 for get in variadic(getter):
4440 try:
4441 v = get(src)
4442 except (AttributeError, KeyError, TypeError, IndexError):
4443 pass
4444 else:
4445 if expected_type is None or isinstance(v, expected_type):
4446 return v
4447
4448
4449 def merge_dicts(*dicts):
4450 merged = {}
4451 for a_dict in dicts:
4452 for k, v in a_dict.items():
4453 if v is None:
4454 continue
4455 if (k not in merged
4456 or (isinstance(v, compat_str) and v
4457 and isinstance(merged[k], compat_str)
4458 and not merged[k])):
4459 merged[k] = v
4460 return merged
4461
4462
4463 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4464 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4465
4466
4467 US_RATINGS = {
4468 'G': 0,
4469 'PG': 10,
4470 'PG-13': 13,
4471 'R': 16,
4472 'NC': 18,
4473 }
4474
4475
4476 TV_PARENTAL_GUIDELINES = {
4477 'TV-Y': 0,
4478 'TV-Y7': 7,
4479 'TV-G': 0,
4480 'TV-PG': 0,
4481 'TV-14': 14,
4482 'TV-MA': 17,
4483 }
4484
4485
4486 def parse_age_limit(s):
4487 if type(s) == int:
4488 return s if 0 <= s <= 21 else None
4489 if not isinstance(s, compat_basestring):
4490 return None
4491 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4492 if m:
4493 return int(m.group('age'))
4494 s = s.upper()
4495 if s in US_RATINGS:
4496 return US_RATINGS[s]
4497 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4498 if m:
4499 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4500 return None
4501
4502
4503 def strip_jsonp(code):
4504 return re.sub(
4505 r'''(?sx)^
4506 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4507 (?:\s*&&\s*(?P=func_name))?
4508 \s*\(\s*(?P<callback_data>.*)\);?
4509 \s*?(?://[^\n]*)*$''',
4510 r'\g<callback_data>', code)
4511
4512
4513 def js_to_json(code, vars={}):
4514 # vars is a dict of var, val pairs to substitute
4515 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4516 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4517 INTEGER_TABLE = (
4518 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4519 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4520 )
4521
4522 def fix_kv(m):
4523 v = m.group(0)
4524 if v in ('true', 'false', 'null'):
4525 return v
4526 elif v in ('undefined', 'void 0'):
4527 return 'null'
4528 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4529 return ""
4530
4531 if v[0] in ("'", '"'):
4532 v = re.sub(r'(?s)\\.|"', lambda m: {
4533 '"': '\\"',
4534 "\\'": "'",
4535 '\\\n': '',
4536 '\\x': '\\u00',
4537 }.get(m.group(0), m.group(0)), v[1:-1])
4538 else:
4539 for regex, base in INTEGER_TABLE:
4540 im = re.match(regex, v)
4541 if im:
4542 i = int(im.group(1), base)
4543 return '"%d":' % i if v.endswith(':') else '%d' % i
4544
4545 if v in vars:
4546 return vars[v]
4547
4548 return '"%s"' % v
4549
4550 return re.sub(r'''(?sx)
4551 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4552 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4553 {comment}|,(?={skip}[\]}}])|
4554 void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
4555 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4556 [0-9]+(?={skip}:)|
4557 !+
4558 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4559
4560
4561 def qualities(quality_ids):
4562 """ Get a numeric quality value out of a list of possible values """
4563 def q(qid):
4564 try:
4565 return quality_ids.index(qid)
4566 except ValueError:
4567 return -1
4568 return q
4569
4570
4571 DEFAULT_OUTTMPL = {
4572 'default': '%(title)s [%(id)s].%(ext)s',
4573 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
4574 }
4575 OUTTMPL_TYPES = {
4576 'chapter': None,
4577 'subtitle': None,
4578 'thumbnail': None,
4579 'description': 'description',
4580 'annotation': 'annotations.xml',
4581 'infojson': 'info.json',
4582 'link': None,
4583 'pl_thumbnail': None,
4584 'pl_description': 'description',
4585 'pl_infojson': 'info.json',
4586 }
4587
4588 # As of [1] format syntax is:
4589 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4590 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
4591 STR_FORMAT_RE_TMPL = r'''(?x)
4592 (?<!%)(?P<prefix>(?:%%)*)
4593 %
4594 (?P<has_key>\((?P<key>{0})\))?
4595 (?P<format>
4596 (?P<conversion>[#0\-+ ]+)?
4597 (?P<min_width>\d+)?
4598 (?P<precision>\.\d+)?
4599 (?P<len_mod>[hlL])? # unused in python
4600 {1} # conversion type
4601 )
4602 '''
4603
4604
4605 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
4606
4607
4608 def limit_length(s, length):
4609 """ Add ellipses to overly long strings """
4610 if s is None:
4611 return None
4612 ELLIPSES = '...'
4613 if len(s) > length:
4614 return s[:length - len(ELLIPSES)] + ELLIPSES
4615 return s
4616
4617
4618 def version_tuple(v):
4619 return tuple(int(e) for e in re.split(r'[-.]', v))
4620
4621
4622 def is_outdated_version(version, limit, assume_new=True):
4623 if not version:
4624 return not assume_new
4625 try:
4626 return version_tuple(version) < version_tuple(limit)
4627 except ValueError:
4628 return not assume_new
4629
4630
4631 def ytdl_is_updateable():
4632 """ Returns if yt-dlp can be updated with -U """
4633
4634 from .update import is_non_updateable
4635
4636 return not is_non_updateable()
4637
4638
4639 def args_to_str(args):
4640 # Get a short string representation for a subprocess command
4641 return ' '.join(compat_shlex_quote(a) for a in args)
4642
4643
4644 def error_to_compat_str(err):
4645 err_str = str(err)
4646 # On python 2 error byte string must be decoded with proper
4647 # encoding rather than ascii
4648 if sys.version_info[0] < 3:
4649 err_str = err_str.decode(preferredencoding())
4650 return err_str
4651
4652
4653 def mimetype2ext(mt):
4654 if mt is None:
4655 return None
4656
4657 mt, _, params = mt.partition(';')
4658 mt = mt.strip()
4659
4660 FULL_MAP = {
4661 'audio/mp4': 'm4a',
4662 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4663 # it's the most popular one
4664 'audio/mpeg': 'mp3',
4665 'audio/x-wav': 'wav',
4666 'audio/wav': 'wav',
4667 'audio/wave': 'wav',
4668 }
4669
4670 ext = FULL_MAP.get(mt)
4671 if ext is not None:
4672 return ext
4673
4674 SUBTYPE_MAP = {
4675 '3gpp': '3gp',
4676 'smptett+xml': 'tt',
4677 'ttaf+xml': 'dfxp',
4678 'ttml+xml': 'ttml',
4679 'x-flv': 'flv',
4680 'x-mp4-fragmented': 'mp4',
4681 'x-ms-sami': 'sami',
4682 'x-ms-wmv': 'wmv',
4683 'mpegurl': 'm3u8',
4684 'x-mpegurl': 'm3u8',
4685 'vnd.apple.mpegurl': 'm3u8',
4686 'dash+xml': 'mpd',
4687 'f4m+xml': 'f4m',
4688 'hds+xml': 'f4m',
4689 'vnd.ms-sstr+xml': 'ism',
4690 'quicktime': 'mov',
4691 'mp2t': 'ts',
4692 'x-wav': 'wav',
4693 'filmstrip+json': 'fs',
4694 'svg+xml': 'svg',
4695 }
4696
4697 _, _, subtype = mt.rpartition('/')
4698 ext = SUBTYPE_MAP.get(subtype.lower())
4699 if ext is not None:
4700 return ext
4701
4702 SUFFIX_MAP = {
4703 'json': 'json',
4704 'xml': 'xml',
4705 'zip': 'zip',
4706 'gzip': 'gz',
4707 }
4708
4709 _, _, suffix = subtype.partition('+')
4710 ext = SUFFIX_MAP.get(suffix)
4711 if ext is not None:
4712 return ext
4713
4714 return subtype.replace('+', '.')
4715
4716
4717 def parse_codecs(codecs_str):
4718 # http://tools.ietf.org/html/rfc6381
4719 if not codecs_str:
4720 return {}
4721 split_codecs = list(filter(None, map(
4722 str.strip, codecs_str.strip().strip(',').split(','))))
4723 vcodec, acodec, hdr = None, None, None
4724 for full_codec in split_codecs:
4725 parts = full_codec.split('.')
4726 codec = parts[0].replace('0', '')
4727 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
4728 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
4729 if not vcodec:
4730 vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1') else full_codec
4731 if codec in ('dvh1', 'dvhe'):
4732 hdr = 'DV'
4733 elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
4734 hdr = 'HDR10'
4735 elif full_codec.replace('0', '').startswith('vp9.2'):
4736 hdr = 'HDR10'
4737 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4738 if not acodec:
4739 acodec = full_codec
4740 else:
4741 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4742 if not vcodec and not acodec:
4743 if len(split_codecs) == 2:
4744 return {
4745 'vcodec': split_codecs[0],
4746 'acodec': split_codecs[1],
4747 }
4748 else:
4749 return {
4750 'vcodec': vcodec or 'none',
4751 'acodec': acodec or 'none',
4752 'dynamic_range': hdr,
4753 }
4754 return {}
4755
4756
4757 def urlhandle_detect_ext(url_handle):
4758 getheader = url_handle.headers.get
4759
4760 cd = getheader('Content-Disposition')
4761 if cd:
4762 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4763 if m:
4764 e = determine_ext(m.group('filename'), default_ext=None)
4765 if e:
4766 return e
4767
4768 return mimetype2ext(getheader('Content-Type'))
4769
4770
4771 def encode_data_uri(data, mime_type):
4772 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4773
4774
4775 def age_restricted(content_limit, age_limit):
4776 """ Returns True iff the content should be blocked """
4777
4778 if age_limit is None: # No limit set
4779 return False
4780 if content_limit is None:
4781 return False # Content available for everyone
4782 return age_limit < content_limit
4783
4784
4785 def is_html(first_bytes):
4786 """ Detect whether a file contains HTML by examining its first bytes. """
4787
4788 BOMS = [
4789 (b'\xef\xbb\xbf', 'utf-8'),
4790 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4791 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4792 (b'\xff\xfe', 'utf-16-le'),
4793 (b'\xfe\xff', 'utf-16-be'),
4794 ]
4795 for bom, enc in BOMS:
4796 if first_bytes.startswith(bom):
4797 s = first_bytes[len(bom):].decode(enc, 'replace')
4798 break
4799 else:
4800 s = first_bytes.decode('utf-8', 'replace')
4801
4802 return re.match(r'^\s*<', s)
4803
4804
4805 def determine_protocol(info_dict):
4806 protocol = info_dict.get('protocol')
4807 if protocol is not None:
4808 return protocol
4809
4810 url = sanitize_url(info_dict['url'])
4811 if url.startswith('rtmp'):
4812 return 'rtmp'
4813 elif url.startswith('mms'):
4814 return 'mms'
4815 elif url.startswith('rtsp'):
4816 return 'rtsp'
4817
4818 ext = determine_ext(url)
4819 if ext == 'm3u8':
4820 return 'm3u8'
4821 elif ext == 'f4m':
4822 return 'f4m'
4823
4824 return compat_urllib_parse_urlparse(url).scheme
4825
4826
4827 def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
4828 """ Render a list of rows, each as a list of values.
4829 Text after a \t will be right aligned """
4830 def width(string):
4831 return len(remove_terminal_sequences(string).replace('\t', ''))
4832
4833 def get_max_lens(table):
4834 return [max(width(str(v)) for v in col) for col in zip(*table)]
4835
4836 def filter_using_list(row, filterArray):
4837 return [col for (take, col) in zip(filterArray, row) if take]
4838
4839 if hide_empty:
4840 max_lens = get_max_lens(data)
4841 header_row = filter_using_list(header_row, max_lens)
4842 data = [filter_using_list(row, max_lens) for row in data]
4843
4844 table = [header_row] + data
4845 max_lens = get_max_lens(table)
4846 extra_gap += 1
4847 if delim:
4848 table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
4849 table[1][-1] = table[1][-1][:-extra_gap] # Remove extra_gap from end of delimiter
4850 for row in table:
4851 for pos, text in enumerate(map(str, row)):
4852 if '\t' in text:
4853 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
4854 else:
4855 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
4856 ret = '\n'.join(''.join(row).rstrip() for row in table)
4857 return ret
4858
4859
4860 def _match_one(filter_part, dct, incomplete):
4861 # TODO: Generalize code with YoutubeDL._build_format_filter
4862 STRING_OPERATORS = {
4863 '*=': operator.contains,
4864 '^=': lambda attr, value: attr.startswith(value),
4865 '$=': lambda attr, value: attr.endswith(value),
4866 '~=': lambda attr, value: re.search(value, attr),
4867 }
4868 COMPARISON_OPERATORS = {
4869 **STRING_OPERATORS,
4870 '<=': operator.le, # "<=" must be defined above "<"
4871 '<': operator.lt,
4872 '>=': operator.ge,
4873 '>': operator.gt,
4874 '=': operator.eq,
4875 }
4876
4877 operator_rex = re.compile(r'''(?x)\s*
4878 (?P<key>[a-z_]+)
4879 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4880 (?:
4881 (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
4882 (?P<strval>.+?)
4883 )
4884 \s*$
4885 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4886 m = operator_rex.search(filter_part)
4887 if m:
4888 m = m.groupdict()
4889 unnegated_op = COMPARISON_OPERATORS[m['op']]
4890 if m['negation']:
4891 op = lambda attr, value: not unnegated_op(attr, value)
4892 else:
4893 op = unnegated_op
4894 comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
4895 if m['quote']:
4896 comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
4897 actual_value = dct.get(m['key'])
4898 numeric_comparison = None
4899 if isinstance(actual_value, compat_numeric_types):
4900 # If the original field is a string and matching comparisonvalue is
4901 # a number we should respect the origin of the original field
4902 # and process comparison value as a string (see
4903 # https://github.com/ytdl-org/youtube-dl/issues/11082)
4904 try:
4905 numeric_comparison = int(comparison_value)
4906 except ValueError:
4907 numeric_comparison = parse_filesize(comparison_value)
4908 if numeric_comparison is None:
4909 numeric_comparison = parse_filesize(f'{comparison_value}B')
4910 if numeric_comparison is None:
4911 numeric_comparison = parse_duration(comparison_value)
4912 if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
4913 raise ValueError('Operator %s only supports string values!' % m['op'])
4914 if actual_value is None:
4915 return incomplete or m['none_inclusive']
4916 return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
4917
4918 UNARY_OPERATORS = {
4919 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4920 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4921 }
4922 operator_rex = re.compile(r'''(?x)\s*
4923 (?P<op>%s)\s*(?P<key>[a-z_]+)
4924 \s*$
4925 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4926 m = operator_rex.search(filter_part)
4927 if m:
4928 op = UNARY_OPERATORS[m.group('op')]
4929 actual_value = dct.get(m.group('key'))
4930 if incomplete and actual_value is None:
4931 return True
4932 return op(actual_value)
4933
4934 raise ValueError('Invalid filter part %r' % filter_part)
4935
4936
4937 def match_str(filter_str, dct, incomplete=False):
4938 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
4939 When incomplete, all conditions passes on missing fields
4940 """
4941 return all(
4942 _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
4943 for filter_part in re.split(r'(?<!\\)&', filter_str))
4944
4945
4946 def match_filter_func(filter_str):
4947 def _match_func(info_dict, *args, **kwargs):
4948 if match_str(filter_str, info_dict, *args, **kwargs):
4949 return None
4950 else:
4951 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4952 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4953 return _match_func
4954
4955
4956 def parse_dfxp_time_expr(time_expr):
4957 if not time_expr:
4958 return
4959
4960 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4961 if mobj:
4962 return float(mobj.group('time_offset'))
4963
4964 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4965 if mobj:
4966 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4967
4968
4969 def srt_subtitles_timecode(seconds):
4970 return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
4971
4972
4973 def ass_subtitles_timecode(seconds):
4974 time = timetuple_from_msec(seconds * 1000)
4975 return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
4976
4977
4978 def dfxp2srt(dfxp_data):
4979 '''
4980 @param dfxp_data A bytes-like object containing DFXP data
4981 @returns A unicode object containing converted SRT data
4982 '''
4983 LEGACY_NAMESPACES = (
4984 (b'http://www.w3.org/ns/ttml', [
4985 b'http://www.w3.org/2004/11/ttaf1',
4986 b'http://www.w3.org/2006/04/ttaf1',
4987 b'http://www.w3.org/2006/10/ttaf1',
4988 ]),
4989 (b'http://www.w3.org/ns/ttml#styling', [
4990 b'http://www.w3.org/ns/ttml#style',
4991 ]),
4992 )
4993
4994 SUPPORTED_STYLING = [
4995 'color',
4996 'fontFamily',
4997 'fontSize',
4998 'fontStyle',
4999 'fontWeight',
5000 'textDecoration'
5001 ]
5002
5003 _x = functools.partial(xpath_with_ns, ns_map={
5004 'xml': 'http://www.w3.org/XML/1998/namespace',
5005 'ttml': 'http://www.w3.org/ns/ttml',
5006 'tts': 'http://www.w3.org/ns/ttml#styling',
5007 })
5008
5009 styles = {}
5010 default_style = {}
5011
5012 class TTMLPElementParser(object):
5013 _out = ''
5014 _unclosed_elements = []
5015 _applied_styles = []
5016
5017 def start(self, tag, attrib):
5018 if tag in (_x('ttml:br'), 'br'):
5019 self._out += '\n'
5020 else:
5021 unclosed_elements = []
5022 style = {}
5023 element_style_id = attrib.get('style')
5024 if default_style:
5025 style.update(default_style)
5026 if element_style_id:
5027 style.update(styles.get(element_style_id, {}))
5028 for prop in SUPPORTED_STYLING:
5029 prop_val = attrib.get(_x('tts:' + prop))
5030 if prop_val:
5031 style[prop] = prop_val
5032 if style:
5033 font = ''
5034 for k, v in sorted(style.items()):
5035 if self._applied_styles and self._applied_styles[-1].get(k) == v:
5036 continue
5037 if k == 'color':
5038 font += ' color="%s"' % v
5039 elif k == 'fontSize':
5040 font += ' size="%s"' % v
5041 elif k == 'fontFamily':
5042 font += ' face="%s"' % v
5043 elif k == 'fontWeight' and v == 'bold':
5044 self._out += '<b>'
5045 unclosed_elements.append('b')
5046 elif k == 'fontStyle' and v == 'italic':
5047 self._out += '<i>'
5048 unclosed_elements.append('i')
5049 elif k == 'textDecoration' and v == 'underline':
5050 self._out += '<u>'
5051 unclosed_elements.append('u')
5052 if font:
5053 self._out += '<font' + font + '>'
5054 unclosed_elements.append('font')
5055 applied_style = {}
5056 if self._applied_styles:
5057 applied_style.update(self._applied_styles[-1])
5058 applied_style.update(style)
5059 self._applied_styles.append(applied_style)
5060 self._unclosed_elements.append(unclosed_elements)
5061
5062 def end(self, tag):
5063 if tag not in (_x('ttml:br'), 'br'):
5064 unclosed_elements = self._unclosed_elements.pop()
5065 for element in reversed(unclosed_elements):
5066 self._out += '</%s>' % element
5067 if unclosed_elements and self._applied_styles:
5068 self._applied_styles.pop()
5069
5070 def data(self, data):
5071 self._out += data
5072
5073 def close(self):
5074 return self._out.strip()
5075
5076 def parse_node(node):
5077 target = TTMLPElementParser()
5078 parser = xml.etree.ElementTree.XMLParser(target=target)
5079 parser.feed(xml.etree.ElementTree.tostring(node))
5080 return parser.close()
5081
5082 for k, v in LEGACY_NAMESPACES:
5083 for ns in v:
5084 dfxp_data = dfxp_data.replace(ns, k)
5085
5086 dfxp = compat_etree_fromstring(dfxp_data)
5087 out = []
5088 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
5089
5090 if not paras:
5091 raise ValueError('Invalid dfxp/TTML subtitle')
5092
5093 repeat = False
5094 while True:
5095 for style in dfxp.findall(_x('.//ttml:style')):
5096 style_id = style.get('id') or style.get(_x('xml:id'))
5097 if not style_id:
5098 continue
5099 parent_style_id = style.get('style')
5100 if parent_style_id:
5101 if parent_style_id not in styles:
5102 repeat = True
5103 continue
5104 styles[style_id] = styles[parent_style_id].copy()
5105 for prop in SUPPORTED_STYLING:
5106 prop_val = style.get(_x('tts:' + prop))
5107 if prop_val:
5108 styles.setdefault(style_id, {})[prop] = prop_val
5109 if repeat:
5110 repeat = False
5111 else:
5112 break
5113
5114 for p in ('body', 'div'):
5115 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
5116 if ele is None:
5117 continue
5118 style = styles.get(ele.get('style'))
5119 if not style:
5120 continue
5121 default_style.update(style)
5122
5123 for para, index in zip(paras, itertools.count(1)):
5124 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
5125 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
5126 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
5127 if begin_time is None:
5128 continue
5129 if not end_time:
5130 if not dur:
5131 continue
5132 end_time = begin_time + dur
5133 out.append('%d\n%s --> %s\n%s\n\n' % (
5134 index,
5135 srt_subtitles_timecode(begin_time),
5136 srt_subtitles_timecode(end_time),
5137 parse_node(para)))
5138
5139 return ''.join(out)
5140
5141
5142 def cli_option(params, command_option, param):
5143 param = params.get(param)
5144 if param:
5145 param = compat_str(param)
5146 return [command_option, param] if param is not None else []
5147
5148
5149 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
5150 param = params.get(param)
5151 if param is None:
5152 return []
5153 assert isinstance(param, bool)
5154 if separator:
5155 return [command_option + separator + (true_value if param else false_value)]
5156 return [command_option, true_value if param else false_value]
5157
5158
5159 def cli_valueless_option(params, command_option, param, expected_value=True):
5160 param = params.get(param)
5161 return [command_option] if param == expected_value else []
5162
5163
5164 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
5165 if isinstance(argdict, (list, tuple)): # for backward compatibility
5166 if use_compat:
5167 return argdict
5168 else:
5169 argdict = None
5170 if argdict is None:
5171 return default
5172 assert isinstance(argdict, dict)
5173
5174 assert isinstance(keys, (list, tuple))
5175 for key_list in keys:
5176 arg_list = list(filter(
5177 lambda x: x is not None,
5178 [argdict.get(key.lower()) for key in variadic(key_list)]))
5179 if arg_list:
5180 return [arg for args in arg_list for arg in args]
5181 return default
5182
5183
5184 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
5185 main_key, exe = main_key.lower(), exe.lower()
5186 root_key = exe if main_key == exe else f'{main_key}+{exe}'
5187 keys = [f'{root_key}{k}' for k in (keys or [''])]
5188 if root_key in keys:
5189 if main_key != exe:
5190 keys.append((main_key, exe))
5191 keys.append('default')
5192 else:
5193 use_compat = False
5194 return cli_configuration_args(argdict, keys, default, use_compat)
5195
5196
5197 class ISO639Utils(object):
5198 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
5199 _lang_map = {
5200 'aa': 'aar',
5201 'ab': 'abk',
5202 'ae': 'ave',
5203 'af': 'afr',
5204 'ak': 'aka',
5205 'am': 'amh',
5206 'an': 'arg',
5207 'ar': 'ara',
5208 'as': 'asm',
5209 'av': 'ava',
5210 'ay': 'aym',
5211 'az': 'aze',
5212 'ba': 'bak',
5213 'be': 'bel',
5214 'bg': 'bul',
5215 'bh': 'bih',
5216 'bi': 'bis',
5217 'bm': 'bam',
5218 'bn': 'ben',
5219 'bo': 'bod',
5220 'br': 'bre',
5221 'bs': 'bos',
5222 'ca': 'cat',
5223 'ce': 'che',
5224 'ch': 'cha',
5225 'co': 'cos',
5226 'cr': 'cre',
5227 'cs': 'ces',
5228 'cu': 'chu',
5229 'cv': 'chv',
5230 'cy': 'cym',
5231 'da': 'dan',
5232 'de': 'deu',
5233 'dv': 'div',
5234 'dz': 'dzo',
5235 'ee': 'ewe',
5236 'el': 'ell',
5237 'en': 'eng',
5238 'eo': 'epo',
5239 'es': 'spa',
5240 'et': 'est',
5241 'eu': 'eus',
5242 'fa': 'fas',
5243 'ff': 'ful',
5244 'fi': 'fin',
5245 'fj': 'fij',
5246 'fo': 'fao',
5247 'fr': 'fra',
5248 'fy': 'fry',
5249 'ga': 'gle',
5250 'gd': 'gla',
5251 'gl': 'glg',
5252 'gn': 'grn',
5253 'gu': 'guj',
5254 'gv': 'glv',
5255 'ha': 'hau',
5256 'he': 'heb',
5257 'iw': 'heb', # Replaced by he in 1989 revision
5258 'hi': 'hin',
5259 'ho': 'hmo',
5260 'hr': 'hrv',
5261 'ht': 'hat',
5262 'hu': 'hun',
5263 'hy': 'hye',
5264 'hz': 'her',
5265 'ia': 'ina',
5266 'id': 'ind',
5267 'in': 'ind', # Replaced by id in 1989 revision
5268 'ie': 'ile',
5269 'ig': 'ibo',
5270 'ii': 'iii',
5271 'ik': 'ipk',
5272 'io': 'ido',
5273 'is': 'isl',
5274 'it': 'ita',
5275 'iu': 'iku',
5276 'ja': 'jpn',
5277 'jv': 'jav',
5278 'ka': 'kat',
5279 'kg': 'kon',
5280 'ki': 'kik',
5281 'kj': 'kua',
5282 'kk': 'kaz',
5283 'kl': 'kal',
5284 'km': 'khm',
5285 'kn': 'kan',
5286 'ko': 'kor',
5287 'kr': 'kau',
5288 'ks': 'kas',
5289 'ku': 'kur',
5290 'kv': 'kom',
5291 'kw': 'cor',
5292 'ky': 'kir',
5293 'la': 'lat',
5294 'lb': 'ltz',
5295 'lg': 'lug',
5296 'li': 'lim',
5297 'ln': 'lin',
5298 'lo': 'lao',
5299 'lt': 'lit',
5300 'lu': 'lub',
5301 'lv': 'lav',
5302 'mg': 'mlg',
5303 'mh': 'mah',
5304 'mi': 'mri',
5305 'mk': 'mkd',
5306 'ml': 'mal',
5307 'mn': 'mon',
5308 'mr': 'mar',
5309 'ms': 'msa',
5310 'mt': 'mlt',
5311 'my': 'mya',
5312 'na': 'nau',
5313 'nb': 'nob',
5314 'nd': 'nde',
5315 'ne': 'nep',
5316 'ng': 'ndo',
5317 'nl': 'nld',
5318 'nn': 'nno',
5319 'no': 'nor',
5320 'nr': 'nbl',
5321 'nv': 'nav',
5322 'ny': 'nya',
5323 'oc': 'oci',
5324 'oj': 'oji',
5325 'om': 'orm',
5326 'or': 'ori',
5327 'os': 'oss',
5328 'pa': 'pan',
5329 'pi': 'pli',
5330 'pl': 'pol',
5331 'ps': 'pus',
5332 'pt': 'por',
5333 'qu': 'que',
5334 'rm': 'roh',
5335 'rn': 'run',
5336 'ro': 'ron',
5337 'ru': 'rus',
5338 'rw': 'kin',
5339 'sa': 'san',
5340 'sc': 'srd',
5341 'sd': 'snd',
5342 'se': 'sme',
5343 'sg': 'sag',
5344 'si': 'sin',
5345 'sk': 'slk',
5346 'sl': 'slv',
5347 'sm': 'smo',
5348 'sn': 'sna',
5349 'so': 'som',
5350 'sq': 'sqi',
5351 'sr': 'srp',
5352 'ss': 'ssw',
5353 'st': 'sot',
5354 'su': 'sun',
5355 'sv': 'swe',
5356 'sw': 'swa',
5357 'ta': 'tam',
5358 'te': 'tel',
5359 'tg': 'tgk',
5360 'th': 'tha',
5361 'ti': 'tir',
5362 'tk': 'tuk',
5363 'tl': 'tgl',
5364 'tn': 'tsn',
5365 'to': 'ton',
5366 'tr': 'tur',
5367 'ts': 'tso',
5368 'tt': 'tat',
5369 'tw': 'twi',
5370 'ty': 'tah',
5371 'ug': 'uig',
5372 'uk': 'ukr',
5373 'ur': 'urd',
5374 'uz': 'uzb',
5375 've': 'ven',
5376 'vi': 'vie',
5377 'vo': 'vol',
5378 'wa': 'wln',
5379 'wo': 'wol',
5380 'xh': 'xho',
5381 'yi': 'yid',
5382 'ji': 'yid', # Replaced by yi in 1989 revision
5383 'yo': 'yor',
5384 'za': 'zha',
5385 'zh': 'zho',
5386 'zu': 'zul',
5387 }
5388
5389 @classmethod
5390 def short2long(cls, code):
5391 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5392 return cls._lang_map.get(code[:2])
5393
5394 @classmethod
5395 def long2short(cls, code):
5396 """Convert language code from ISO 639-2/T to ISO 639-1"""
5397 for short_name, long_name in cls._lang_map.items():
5398 if long_name == code:
5399 return short_name
5400
5401
5402 class ISO3166Utils(object):
5403 # From http://data.okfn.org/data/core/country-list
5404 _country_map = {
5405 'AF': 'Afghanistan',
5406 'AX': 'Åland Islands',
5407 'AL': 'Albania',
5408 'DZ': 'Algeria',
5409 'AS': 'American Samoa',
5410 'AD': 'Andorra',
5411 'AO': 'Angola',
5412 'AI': 'Anguilla',
5413 'AQ': 'Antarctica',
5414 'AG': 'Antigua and Barbuda',
5415 'AR': 'Argentina',
5416 'AM': 'Armenia',
5417 'AW': 'Aruba',
5418 'AU': 'Australia',
5419 'AT': 'Austria',
5420 'AZ': 'Azerbaijan',
5421 'BS': 'Bahamas',
5422 'BH': 'Bahrain',
5423 'BD': 'Bangladesh',
5424 'BB': 'Barbados',
5425 'BY': 'Belarus',
5426 'BE': 'Belgium',
5427 'BZ': 'Belize',
5428 'BJ': 'Benin',
5429 'BM': 'Bermuda',
5430 'BT': 'Bhutan',
5431 'BO': 'Bolivia, Plurinational State of',
5432 'BQ': 'Bonaire, Sint Eustatius and Saba',
5433 'BA': 'Bosnia and Herzegovina',
5434 'BW': 'Botswana',
5435 'BV': 'Bouvet Island',
5436 'BR': 'Brazil',
5437 'IO': 'British Indian Ocean Territory',
5438 'BN': 'Brunei Darussalam',
5439 'BG': 'Bulgaria',
5440 'BF': 'Burkina Faso',
5441 'BI': 'Burundi',
5442 'KH': 'Cambodia',
5443 'CM': 'Cameroon',
5444 'CA': 'Canada',
5445 'CV': 'Cape Verde',
5446 'KY': 'Cayman Islands',
5447 'CF': 'Central African Republic',
5448 'TD': 'Chad',
5449 'CL': 'Chile',
5450 'CN': 'China',
5451 'CX': 'Christmas Island',
5452 'CC': 'Cocos (Keeling) Islands',
5453 'CO': 'Colombia',
5454 'KM': 'Comoros',
5455 'CG': 'Congo',
5456 'CD': 'Congo, the Democratic Republic of the',
5457 'CK': 'Cook Islands',
5458 'CR': 'Costa Rica',
5459 'CI': 'Côte d\'Ivoire',
5460 'HR': 'Croatia',
5461 'CU': 'Cuba',
5462 'CW': 'Curaçao',
5463 'CY': 'Cyprus',
5464 'CZ': 'Czech Republic',
5465 'DK': 'Denmark',
5466 'DJ': 'Djibouti',
5467 'DM': 'Dominica',
5468 'DO': 'Dominican Republic',
5469 'EC': 'Ecuador',
5470 'EG': 'Egypt',
5471 'SV': 'El Salvador',
5472 'GQ': 'Equatorial Guinea',
5473 'ER': 'Eritrea',
5474 'EE': 'Estonia',
5475 'ET': 'Ethiopia',
5476 'FK': 'Falkland Islands (Malvinas)',
5477 'FO': 'Faroe Islands',
5478 'FJ': 'Fiji',
5479 'FI': 'Finland',
5480 'FR': 'France',
5481 'GF': 'French Guiana',
5482 'PF': 'French Polynesia',
5483 'TF': 'French Southern Territories',
5484 'GA': 'Gabon',
5485 'GM': 'Gambia',
5486 'GE': 'Georgia',
5487 'DE': 'Germany',
5488 'GH': 'Ghana',
5489 'GI': 'Gibraltar',
5490 'GR': 'Greece',
5491 'GL': 'Greenland',
5492 'GD': 'Grenada',
5493 'GP': 'Guadeloupe',
5494 'GU': 'Guam',
5495 'GT': 'Guatemala',
5496 'GG': 'Guernsey',
5497 'GN': 'Guinea',
5498 'GW': 'Guinea-Bissau',
5499 'GY': 'Guyana',
5500 'HT': 'Haiti',
5501 'HM': 'Heard Island and McDonald Islands',
5502 'VA': 'Holy See (Vatican City State)',
5503 'HN': 'Honduras',
5504 'HK': 'Hong Kong',
5505 'HU': 'Hungary',
5506 'IS': 'Iceland',
5507 'IN': 'India',
5508 'ID': 'Indonesia',
5509 'IR': 'Iran, Islamic Republic of',
5510 'IQ': 'Iraq',
5511 'IE': 'Ireland',
5512 'IM': 'Isle of Man',
5513 'IL': 'Israel',
5514 'IT': 'Italy',
5515 'JM': 'Jamaica',
5516 'JP': 'Japan',
5517 'JE': 'Jersey',
5518 'JO': 'Jordan',
5519 'KZ': 'Kazakhstan',
5520 'KE': 'Kenya',
5521 'KI': 'Kiribati',
5522 'KP': 'Korea, Democratic People\'s Republic of',
5523 'KR': 'Korea, Republic of',
5524 'KW': 'Kuwait',
5525 'KG': 'Kyrgyzstan',
5526 'LA': 'Lao People\'s Democratic Republic',
5527 'LV': 'Latvia',
5528 'LB': 'Lebanon',
5529 'LS': 'Lesotho',
5530 'LR': 'Liberia',
5531 'LY': 'Libya',
5532 'LI': 'Liechtenstein',
5533 'LT': 'Lithuania',
5534 'LU': 'Luxembourg',
5535 'MO': 'Macao',
5536 'MK': 'Macedonia, the Former Yugoslav Republic of',
5537 'MG': 'Madagascar',
5538 'MW': 'Malawi',
5539 'MY': 'Malaysia',
5540 'MV': 'Maldives',
5541 'ML': 'Mali',
5542 'MT': 'Malta',
5543 'MH': 'Marshall Islands',
5544 'MQ': 'Martinique',
5545 'MR': 'Mauritania',
5546 'MU': 'Mauritius',
5547 'YT': 'Mayotte',
5548 'MX': 'Mexico',
5549 'FM': 'Micronesia, Federated States of',
5550 'MD': 'Moldova, Republic of',
5551 'MC': 'Monaco',
5552 'MN': 'Mongolia',
5553 'ME': 'Montenegro',
5554 'MS': 'Montserrat',
5555 'MA': 'Morocco',
5556 'MZ': 'Mozambique',
5557 'MM': 'Myanmar',
5558 'NA': 'Namibia',
5559 'NR': 'Nauru',
5560 'NP': 'Nepal',
5561 'NL': 'Netherlands',
5562 'NC': 'New Caledonia',
5563 'NZ': 'New Zealand',
5564 'NI': 'Nicaragua',
5565 'NE': 'Niger',
5566 'NG': 'Nigeria',
5567 'NU': 'Niue',
5568 'NF': 'Norfolk Island',
5569 'MP': 'Northern Mariana Islands',
5570 'NO': 'Norway',
5571 'OM': 'Oman',
5572 'PK': 'Pakistan',
5573 'PW': 'Palau',
5574 'PS': 'Palestine, State of',
5575 'PA': 'Panama',
5576 'PG': 'Papua New Guinea',
5577 'PY': 'Paraguay',
5578 'PE': 'Peru',
5579 'PH': 'Philippines',
5580 'PN': 'Pitcairn',
5581 'PL': 'Poland',
5582 'PT': 'Portugal',
5583 'PR': 'Puerto Rico',
5584 'QA': 'Qatar',
5585 'RE': 'Réunion',
5586 'RO': 'Romania',
5587 'RU': 'Russian Federation',
5588 'RW': 'Rwanda',
5589 'BL': 'Saint Barthélemy',
5590 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5591 'KN': 'Saint Kitts and Nevis',
5592 'LC': 'Saint Lucia',
5593 'MF': 'Saint Martin (French part)',
5594 'PM': 'Saint Pierre and Miquelon',
5595 'VC': 'Saint Vincent and the Grenadines',
5596 'WS': 'Samoa',
5597 'SM': 'San Marino',
5598 'ST': 'Sao Tome and Principe',
5599 'SA': 'Saudi Arabia',
5600 'SN': 'Senegal',
5601 'RS': 'Serbia',
5602 'SC': 'Seychelles',
5603 'SL': 'Sierra Leone',
5604 'SG': 'Singapore',
5605 'SX': 'Sint Maarten (Dutch part)',
5606 'SK': 'Slovakia',
5607 'SI': 'Slovenia',
5608 'SB': 'Solomon Islands',
5609 'SO': 'Somalia',
5610 'ZA': 'South Africa',
5611 'GS': 'South Georgia and the South Sandwich Islands',
5612 'SS': 'South Sudan',
5613 'ES': 'Spain',
5614 'LK': 'Sri Lanka',
5615 'SD': 'Sudan',
5616 'SR': 'Suriname',
5617 'SJ': 'Svalbard and Jan Mayen',
5618 'SZ': 'Swaziland',
5619 'SE': 'Sweden',
5620 'CH': 'Switzerland',
5621 'SY': 'Syrian Arab Republic',
5622 'TW': 'Taiwan, Province of China',
5623 'TJ': 'Tajikistan',
5624 'TZ': 'Tanzania, United Republic of',
5625 'TH': 'Thailand',
5626 'TL': 'Timor-Leste',
5627 'TG': 'Togo',
5628 'TK': 'Tokelau',
5629 'TO': 'Tonga',
5630 'TT': 'Trinidad and Tobago',
5631 'TN': 'Tunisia',
5632 'TR': 'Turkey',
5633 'TM': 'Turkmenistan',
5634 'TC': 'Turks and Caicos Islands',
5635 'TV': 'Tuvalu',
5636 'UG': 'Uganda',
5637 'UA': 'Ukraine',
5638 'AE': 'United Arab Emirates',
5639 'GB': 'United Kingdom',
5640 'US': 'United States',
5641 'UM': 'United States Minor Outlying Islands',
5642 'UY': 'Uruguay',
5643 'UZ': 'Uzbekistan',
5644 'VU': 'Vanuatu',
5645 'VE': 'Venezuela, Bolivarian Republic of',
5646 'VN': 'Viet Nam',
5647 'VG': 'Virgin Islands, British',
5648 'VI': 'Virgin Islands, U.S.',
5649 'WF': 'Wallis and Futuna',
5650 'EH': 'Western Sahara',
5651 'YE': 'Yemen',
5652 'ZM': 'Zambia',
5653 'ZW': 'Zimbabwe',
5654 }
5655
5656 @classmethod
5657 def short2full(cls, code):
5658 """Convert an ISO 3166-2 country code to the corresponding full name"""
5659 return cls._country_map.get(code.upper())
5660
5661
5662 class GeoUtils(object):
5663 # Major IPv4 address blocks per country
5664 _country_ip_map = {
5665 'AD': '46.172.224.0/19',
5666 'AE': '94.200.0.0/13',
5667 'AF': '149.54.0.0/17',
5668 'AG': '209.59.64.0/18',
5669 'AI': '204.14.248.0/21',
5670 'AL': '46.99.0.0/16',
5671 'AM': '46.70.0.0/15',
5672 'AO': '105.168.0.0/13',
5673 'AP': '182.50.184.0/21',
5674 'AQ': '23.154.160.0/24',
5675 'AR': '181.0.0.0/12',
5676 'AS': '202.70.112.0/20',
5677 'AT': '77.116.0.0/14',
5678 'AU': '1.128.0.0/11',
5679 'AW': '181.41.0.0/18',
5680 'AX': '185.217.4.0/22',
5681 'AZ': '5.197.0.0/16',
5682 'BA': '31.176.128.0/17',
5683 'BB': '65.48.128.0/17',
5684 'BD': '114.130.0.0/16',
5685 'BE': '57.0.0.0/8',
5686 'BF': '102.178.0.0/15',
5687 'BG': '95.42.0.0/15',
5688 'BH': '37.131.0.0/17',
5689 'BI': '154.117.192.0/18',
5690 'BJ': '137.255.0.0/16',
5691 'BL': '185.212.72.0/23',
5692 'BM': '196.12.64.0/18',
5693 'BN': '156.31.0.0/16',
5694 'BO': '161.56.0.0/16',
5695 'BQ': '161.0.80.0/20',
5696 'BR': '191.128.0.0/12',
5697 'BS': '24.51.64.0/18',
5698 'BT': '119.2.96.0/19',
5699 'BW': '168.167.0.0/16',
5700 'BY': '178.120.0.0/13',
5701 'BZ': '179.42.192.0/18',
5702 'CA': '99.224.0.0/11',
5703 'CD': '41.243.0.0/16',
5704 'CF': '197.242.176.0/21',
5705 'CG': '160.113.0.0/16',
5706 'CH': '85.0.0.0/13',
5707 'CI': '102.136.0.0/14',
5708 'CK': '202.65.32.0/19',
5709 'CL': '152.172.0.0/14',
5710 'CM': '102.244.0.0/14',
5711 'CN': '36.128.0.0/10',
5712 'CO': '181.240.0.0/12',
5713 'CR': '201.192.0.0/12',
5714 'CU': '152.206.0.0/15',
5715 'CV': '165.90.96.0/19',
5716 'CW': '190.88.128.0/17',
5717 'CY': '31.153.0.0/16',
5718 'CZ': '88.100.0.0/14',
5719 'DE': '53.0.0.0/8',
5720 'DJ': '197.241.0.0/17',
5721 'DK': '87.48.0.0/12',
5722 'DM': '192.243.48.0/20',
5723 'DO': '152.166.0.0/15',
5724 'DZ': '41.96.0.0/12',
5725 'EC': '186.68.0.0/15',
5726 'EE': '90.190.0.0/15',
5727 'EG': '156.160.0.0/11',
5728 'ER': '196.200.96.0/20',
5729 'ES': '88.0.0.0/11',
5730 'ET': '196.188.0.0/14',
5731 'EU': '2.16.0.0/13',
5732 'FI': '91.152.0.0/13',
5733 'FJ': '144.120.0.0/16',
5734 'FK': '80.73.208.0/21',
5735 'FM': '119.252.112.0/20',
5736 'FO': '88.85.32.0/19',
5737 'FR': '90.0.0.0/9',
5738 'GA': '41.158.0.0/15',
5739 'GB': '25.0.0.0/8',
5740 'GD': '74.122.88.0/21',
5741 'GE': '31.146.0.0/16',
5742 'GF': '161.22.64.0/18',
5743 'GG': '62.68.160.0/19',
5744 'GH': '154.160.0.0/12',
5745 'GI': '95.164.0.0/16',
5746 'GL': '88.83.0.0/19',
5747 'GM': '160.182.0.0/15',
5748 'GN': '197.149.192.0/18',
5749 'GP': '104.250.0.0/19',
5750 'GQ': '105.235.224.0/20',
5751 'GR': '94.64.0.0/13',
5752 'GT': '168.234.0.0/16',
5753 'GU': '168.123.0.0/16',
5754 'GW': '197.214.80.0/20',
5755 'GY': '181.41.64.0/18',
5756 'HK': '113.252.0.0/14',
5757 'HN': '181.210.0.0/16',
5758 'HR': '93.136.0.0/13',
5759 'HT': '148.102.128.0/17',
5760 'HU': '84.0.0.0/14',
5761 'ID': '39.192.0.0/10',
5762 'IE': '87.32.0.0/12',
5763 'IL': '79.176.0.0/13',
5764 'IM': '5.62.80.0/20',
5765 'IN': '117.192.0.0/10',
5766 'IO': '203.83.48.0/21',
5767 'IQ': '37.236.0.0/14',
5768 'IR': '2.176.0.0/12',
5769 'IS': '82.221.0.0/16',
5770 'IT': '79.0.0.0/10',
5771 'JE': '87.244.64.0/18',
5772 'JM': '72.27.0.0/17',
5773 'JO': '176.29.0.0/16',
5774 'JP': '133.0.0.0/8',
5775 'KE': '105.48.0.0/12',
5776 'KG': '158.181.128.0/17',
5777 'KH': '36.37.128.0/17',
5778 'KI': '103.25.140.0/22',
5779 'KM': '197.255.224.0/20',
5780 'KN': '198.167.192.0/19',
5781 'KP': '175.45.176.0/22',
5782 'KR': '175.192.0.0/10',
5783 'KW': '37.36.0.0/14',
5784 'KY': '64.96.0.0/15',
5785 'KZ': '2.72.0.0/13',
5786 'LA': '115.84.64.0/18',
5787 'LB': '178.135.0.0/16',
5788 'LC': '24.92.144.0/20',
5789 'LI': '82.117.0.0/19',
5790 'LK': '112.134.0.0/15',
5791 'LR': '102.183.0.0/16',
5792 'LS': '129.232.0.0/17',
5793 'LT': '78.56.0.0/13',
5794 'LU': '188.42.0.0/16',
5795 'LV': '46.109.0.0/16',
5796 'LY': '41.252.0.0/14',
5797 'MA': '105.128.0.0/11',
5798 'MC': '88.209.64.0/18',
5799 'MD': '37.246.0.0/16',
5800 'ME': '178.175.0.0/17',
5801 'MF': '74.112.232.0/21',
5802 'MG': '154.126.0.0/17',
5803 'MH': '117.103.88.0/21',
5804 'MK': '77.28.0.0/15',
5805 'ML': '154.118.128.0/18',
5806 'MM': '37.111.0.0/17',
5807 'MN': '49.0.128.0/17',
5808 'MO': '60.246.0.0/16',
5809 'MP': '202.88.64.0/20',
5810 'MQ': '109.203.224.0/19',
5811 'MR': '41.188.64.0/18',
5812 'MS': '208.90.112.0/22',
5813 'MT': '46.11.0.0/16',
5814 'MU': '105.16.0.0/12',
5815 'MV': '27.114.128.0/18',
5816 'MW': '102.70.0.0/15',
5817 'MX': '187.192.0.0/11',
5818 'MY': '175.136.0.0/13',
5819 'MZ': '197.218.0.0/15',
5820 'NA': '41.182.0.0/16',
5821 'NC': '101.101.0.0/18',
5822 'NE': '197.214.0.0/18',
5823 'NF': '203.17.240.0/22',
5824 'NG': '105.112.0.0/12',
5825 'NI': '186.76.0.0/15',
5826 'NL': '145.96.0.0/11',
5827 'NO': '84.208.0.0/13',
5828 'NP': '36.252.0.0/15',
5829 'NR': '203.98.224.0/19',
5830 'NU': '49.156.48.0/22',
5831 'NZ': '49.224.0.0/14',
5832 'OM': '5.36.0.0/15',
5833 'PA': '186.72.0.0/15',
5834 'PE': '186.160.0.0/14',
5835 'PF': '123.50.64.0/18',
5836 'PG': '124.240.192.0/19',
5837 'PH': '49.144.0.0/13',
5838 'PK': '39.32.0.0/11',
5839 'PL': '83.0.0.0/11',
5840 'PM': '70.36.0.0/20',
5841 'PR': '66.50.0.0/16',
5842 'PS': '188.161.0.0/16',
5843 'PT': '85.240.0.0/13',
5844 'PW': '202.124.224.0/20',
5845 'PY': '181.120.0.0/14',
5846 'QA': '37.210.0.0/15',
5847 'RE': '102.35.0.0/16',
5848 'RO': '79.112.0.0/13',
5849 'RS': '93.86.0.0/15',
5850 'RU': '5.136.0.0/13',
5851 'RW': '41.186.0.0/16',
5852 'SA': '188.48.0.0/13',
5853 'SB': '202.1.160.0/19',
5854 'SC': '154.192.0.0/11',
5855 'SD': '102.120.0.0/13',
5856 'SE': '78.64.0.0/12',
5857 'SG': '8.128.0.0/10',
5858 'SI': '188.196.0.0/14',
5859 'SK': '78.98.0.0/15',
5860 'SL': '102.143.0.0/17',
5861 'SM': '89.186.32.0/19',
5862 'SN': '41.82.0.0/15',
5863 'SO': '154.115.192.0/18',
5864 'SR': '186.179.128.0/17',
5865 'SS': '105.235.208.0/21',
5866 'ST': '197.159.160.0/19',
5867 'SV': '168.243.0.0/16',
5868 'SX': '190.102.0.0/20',
5869 'SY': '5.0.0.0/16',
5870 'SZ': '41.84.224.0/19',
5871 'TC': '65.255.48.0/20',
5872 'TD': '154.68.128.0/19',
5873 'TG': '196.168.0.0/14',
5874 'TH': '171.96.0.0/13',
5875 'TJ': '85.9.128.0/18',
5876 'TK': '27.96.24.0/21',
5877 'TL': '180.189.160.0/20',
5878 'TM': '95.85.96.0/19',
5879 'TN': '197.0.0.0/11',
5880 'TO': '175.176.144.0/21',
5881 'TR': '78.160.0.0/11',
5882 'TT': '186.44.0.0/15',
5883 'TV': '202.2.96.0/19',
5884 'TW': '120.96.0.0/11',
5885 'TZ': '156.156.0.0/14',
5886 'UA': '37.52.0.0/14',
5887 'UG': '102.80.0.0/13',
5888 'US': '6.0.0.0/8',
5889 'UY': '167.56.0.0/13',
5890 'UZ': '84.54.64.0/18',
5891 'VA': '212.77.0.0/19',
5892 'VC': '207.191.240.0/21',
5893 'VE': '186.88.0.0/13',
5894 'VG': '66.81.192.0/20',
5895 'VI': '146.226.0.0/16',
5896 'VN': '14.160.0.0/11',
5897 'VU': '202.80.32.0/20',
5898 'WF': '117.20.32.0/21',
5899 'WS': '202.4.32.0/19',
5900 'YE': '134.35.0.0/16',
5901 'YT': '41.242.116.0/22',
5902 'ZA': '41.0.0.0/11',
5903 'ZM': '102.144.0.0/13',
5904 'ZW': '102.177.192.0/18',
5905 }
5906
5907 @classmethod
5908 def random_ipv4(cls, code_or_block):
5909 if len(code_or_block) == 2:
5910 block = cls._country_ip_map.get(code_or_block.upper())
5911 if not block:
5912 return None
5913 else:
5914 block = code_or_block
5915 addr, preflen = block.split('/')
5916 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5917 addr_max = addr_min | (0xffffffff >> int(preflen))
5918 return compat_str(socket.inet_ntoa(
5919 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5920
5921
5922 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5923 def __init__(self, proxies=None):
5924 # Set default handlers
5925 for type in ('http', 'https'):
5926 setattr(self, '%s_open' % type,
5927 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5928 meth(r, proxy, type))
5929 compat_urllib_request.ProxyHandler.__init__(self, proxies)
5930
5931 def proxy_open(self, req, proxy, type):
5932 req_proxy = req.headers.get('Ytdl-request-proxy')
5933 if req_proxy is not None:
5934 proxy = req_proxy
5935 del req.headers['Ytdl-request-proxy']
5936
5937 if proxy == '__noproxy__':
5938 return None # No Proxy
5939 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5940 req.add_header('Ytdl-socks-proxy', proxy)
5941 # yt-dlp's http/https handlers do wrapping the socket with socks
5942 return None
5943 return compat_urllib_request.ProxyHandler.proxy_open(
5944 self, req, proxy, type)
5945
5946
5947 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5948 # released into Public Domain
5949 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5950
5951 def long_to_bytes(n, blocksize=0):
5952 """long_to_bytes(n:long, blocksize:int) : string
5953 Convert a long integer to a byte string.
5954
5955 If optional blocksize is given and greater than zero, pad the front of the
5956 byte string with binary zeros so that the length is a multiple of
5957 blocksize.
5958 """
5959 # after much testing, this algorithm was deemed to be the fastest
5960 s = b''
5961 n = int(n)
5962 while n > 0:
5963 s = compat_struct_pack('>I', n & 0xffffffff) + s
5964 n = n >> 32
5965 # strip off leading zeros
5966 for i in range(len(s)):
5967 if s[i] != b'\000'[0]:
5968 break
5969 else:
5970 # only happens when n == 0
5971 s = b'\000'
5972 i = 0
5973 s = s[i:]
5974 # add back some pad bytes. this could be done more efficiently w.r.t. the
5975 # de-padding being done above, but sigh...
5976 if blocksize > 0 and len(s) % blocksize:
5977 s = (blocksize - len(s) % blocksize) * b'\000' + s
5978 return s
5979
5980
5981 def bytes_to_long(s):
5982 """bytes_to_long(string) : long
5983 Convert a byte string to a long integer.
5984
5985 This is (essentially) the inverse of long_to_bytes().
5986 """
5987 acc = 0
5988 length = len(s)
5989 if length % 4:
5990 extra = (4 - length % 4)
5991 s = b'\000' * extra + s
5992 length = length + extra
5993 for i in range(0, length, 4):
5994 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5995 return acc
5996
5997
5998 def ohdave_rsa_encrypt(data, exponent, modulus):
5999 '''
6000 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
6001
6002 Input:
6003 data: data to encrypt, bytes-like object
6004 exponent, modulus: parameter e and N of RSA algorithm, both integer
6005 Output: hex string of encrypted data
6006
6007 Limitation: supports one block encryption only
6008 '''
6009
6010 payload = int(binascii.hexlify(data[::-1]), 16)
6011 encrypted = pow(payload, exponent, modulus)
6012 return '%x' % encrypted
6013
6014
6015 def pkcs1pad(data, length):
6016 """
6017 Padding input data with PKCS#1 scheme
6018
6019 @param {int[]} data input data
6020 @param {int} length target length
6021 @returns {int[]} padded data
6022 """
6023 if len(data) > length - 11:
6024 raise ValueError('Input data too long for PKCS#1 padding')
6025
6026 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
6027 return [0, 2] + pseudo_random + [0] + data
6028
6029
6030 def encode_base_n(num, n, table=None):
6031 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
6032 if not table:
6033 table = FULL_TABLE[:n]
6034
6035 if n > len(table):
6036 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
6037
6038 if num == 0:
6039 return table[0]
6040
6041 ret = ''
6042 while num:
6043 ret = table[num % n] + ret
6044 num = num // n
6045 return ret
6046
6047
6048 def decode_packed_codes(code):
6049 mobj = re.search(PACKED_CODES_RE, code)
6050 obfuscated_code, base, count, symbols = mobj.groups()
6051 base = int(base)
6052 count = int(count)
6053 symbols = symbols.split('|')
6054 symbol_table = {}
6055
6056 while count:
6057 count -= 1
6058 base_n_count = encode_base_n(count, base)
6059 symbol_table[base_n_count] = symbols[count] or base_n_count
6060
6061 return re.sub(
6062 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
6063 obfuscated_code)
6064
6065
6066 def caesar(s, alphabet, shift):
6067 if shift == 0:
6068 return s
6069 l = len(alphabet)
6070 return ''.join(
6071 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
6072 for c in s)
6073
6074
6075 def rot47(s):
6076 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
6077
6078
6079 def parse_m3u8_attributes(attrib):
6080 info = {}
6081 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
6082 if val.startswith('"'):
6083 val = val[1:-1]
6084 info[key] = val
6085 return info
6086
6087
6088 def urshift(val, n):
6089 return val >> n if val >= 0 else (val + 0x100000000) >> n
6090
6091
6092 # Based on png2str() written by @gdkchan and improved by @yokrysty
6093 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
6094 def decode_png(png_data):
6095 # Reference: https://www.w3.org/TR/PNG/
6096 header = png_data[8:]
6097
6098 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
6099 raise IOError('Not a valid PNG file.')
6100
6101 int_map = {1: '>B', 2: '>H', 4: '>I'}
6102 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
6103
6104 chunks = []
6105
6106 while header:
6107 length = unpack_integer(header[:4])
6108 header = header[4:]
6109
6110 chunk_type = header[:4]
6111 header = header[4:]
6112
6113 chunk_data = header[:length]
6114 header = header[length:]
6115
6116 header = header[4:] # Skip CRC
6117
6118 chunks.append({
6119 'type': chunk_type,
6120 'length': length,
6121 'data': chunk_data
6122 })
6123
6124 ihdr = chunks[0]['data']
6125
6126 width = unpack_integer(ihdr[:4])
6127 height = unpack_integer(ihdr[4:8])
6128
6129 idat = b''
6130
6131 for chunk in chunks:
6132 if chunk['type'] == b'IDAT':
6133 idat += chunk['data']
6134
6135 if not idat:
6136 raise IOError('Unable to read PNG data.')
6137
6138 decompressed_data = bytearray(zlib.decompress(idat))
6139
6140 stride = width * 3
6141 pixels = []
6142
6143 def _get_pixel(idx):
6144 x = idx % stride
6145 y = idx // stride
6146 return pixels[y][x]
6147
6148 for y in range(height):
6149 basePos = y * (1 + stride)
6150 filter_type = decompressed_data[basePos]
6151
6152 current_row = []
6153
6154 pixels.append(current_row)
6155
6156 for x in range(stride):
6157 color = decompressed_data[1 + basePos + x]
6158 basex = y * stride + x
6159 left = 0
6160 up = 0
6161
6162 if x > 2:
6163 left = _get_pixel(basex - 3)
6164 if y > 0:
6165 up = _get_pixel(basex - stride)
6166
6167 if filter_type == 1: # Sub
6168 color = (color + left) & 0xff
6169 elif filter_type == 2: # Up
6170 color = (color + up) & 0xff
6171 elif filter_type == 3: # Average
6172 color = (color + ((left + up) >> 1)) & 0xff
6173 elif filter_type == 4: # Paeth
6174 a = left
6175 b = up
6176 c = 0
6177
6178 if x > 2 and y > 0:
6179 c = _get_pixel(basex - stride - 3)
6180
6181 p = a + b - c
6182
6183 pa = abs(p - a)
6184 pb = abs(p - b)
6185 pc = abs(p - c)
6186
6187 if pa <= pb and pa <= pc:
6188 color = (color + a) & 0xff
6189 elif pb <= pc:
6190 color = (color + b) & 0xff
6191 else:
6192 color = (color + c) & 0xff
6193
6194 current_row.append(color)
6195
6196 return width, height, pixels
6197
6198
6199 def write_xattr(path, key, value):
6200 # This mess below finds the best xattr tool for the job
6201 try:
6202 # try the pyxattr module...
6203 import xattr
6204
6205 if hasattr(xattr, 'set'): # pyxattr
6206 # Unicode arguments are not supported in python-pyxattr until
6207 # version 0.5.0
6208 # See https://github.com/ytdl-org/youtube-dl/issues/5498
6209 pyxattr_required_version = '0.5.0'
6210 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
6211 # TODO: fallback to CLI tools
6212 raise XAttrUnavailableError(
6213 'python-pyxattr is detected but is too old. '
6214 'yt-dlp requires %s or above while your version is %s. '
6215 'Falling back to other xattr implementations' % (
6216 pyxattr_required_version, xattr.__version__))
6217
6218 setxattr = xattr.set
6219 else: # xattr
6220 setxattr = xattr.setxattr
6221
6222 try:
6223 setxattr(path, key, value)
6224 except EnvironmentError as e:
6225 raise XAttrMetadataError(e.errno, e.strerror)
6226
6227 except ImportError:
6228 if compat_os_name == 'nt':
6229 # Write xattrs to NTFS Alternate Data Streams:
6230 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6231 assert ':' not in key
6232 assert os.path.exists(path)
6233
6234 ads_fn = path + ':' + key
6235 try:
6236 with open(ads_fn, 'wb') as f:
6237 f.write(value)
6238 except EnvironmentError as e:
6239 raise XAttrMetadataError(e.errno, e.strerror)
6240 else:
6241 user_has_setfattr = check_executable('setfattr', ['--version'])
6242 user_has_xattr = check_executable('xattr', ['-h'])
6243
6244 if user_has_setfattr or user_has_xattr:
6245
6246 value = value.decode('utf-8')
6247 if user_has_setfattr:
6248 executable = 'setfattr'
6249 opts = ['-n', key, '-v', value]
6250 elif user_has_xattr:
6251 executable = 'xattr'
6252 opts = ['-w', key, value]
6253
6254 cmd = ([encodeFilename(executable, True)]
6255 + [encodeArgument(o) for o in opts]
6256 + [encodeFilename(path, True)])
6257
6258 try:
6259 p = Popen(
6260 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6261 except EnvironmentError as e:
6262 raise XAttrMetadataError(e.errno, e.strerror)
6263 stdout, stderr = p.communicate_or_kill()
6264 stderr = stderr.decode('utf-8', 'replace')
6265 if p.returncode != 0:
6266 raise XAttrMetadataError(p.returncode, stderr)
6267
6268 else:
6269 # On Unix, and can't find pyxattr, setfattr, or xattr.
6270 if sys.platform.startswith('linux'):
6271 raise XAttrUnavailableError(
6272 "Couldn't find a tool to set the xattrs. "
6273 "Install either the python 'pyxattr' or 'xattr' "
6274 "modules, or the GNU 'attr' package "
6275 "(which contains the 'setfattr' tool).")
6276 else:
6277 raise XAttrUnavailableError(
6278 "Couldn't find a tool to set the xattrs. "
6279 "Install either the python 'xattr' module, "
6280 "or the 'xattr' binary.")
6281
6282
6283 def random_birthday(year_field, month_field, day_field):
6284 start_date = datetime.date(1950, 1, 1)
6285 end_date = datetime.date(1995, 12, 31)
6286 offset = random.randint(0, (end_date - start_date).days)
6287 random_date = start_date + datetime.timedelta(offset)
6288 return {
6289 year_field: str(random_date.year),
6290 month_field: str(random_date.month),
6291 day_field: str(random_date.day),
6292 }
6293
6294
6295 # Templates for internet shortcut files, which are plain text files.
6296 DOT_URL_LINK_TEMPLATE = '''
6297 [InternetShortcut]
6298 URL=%(url)s
6299 '''.lstrip()
6300
6301 DOT_WEBLOC_LINK_TEMPLATE = '''
6302 <?xml version="1.0" encoding="UTF-8"?>
6303 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6304 <plist version="1.0">
6305 <dict>
6306 \t<key>URL</key>
6307 \t<string>%(url)s</string>
6308 </dict>
6309 </plist>
6310 '''.lstrip()
6311
6312 DOT_DESKTOP_LINK_TEMPLATE = '''
6313 [Desktop Entry]
6314 Encoding=UTF-8
6315 Name=%(filename)s
6316 Type=Link
6317 URL=%(url)s
6318 Icon=text-html
6319 '''.lstrip()
6320
6321 LINK_TEMPLATES = {
6322 'url': DOT_URL_LINK_TEMPLATE,
6323 'desktop': DOT_DESKTOP_LINK_TEMPLATE,
6324 'webloc': DOT_WEBLOC_LINK_TEMPLATE,
6325 }
6326
6327
6328 def iri_to_uri(iri):
6329 """
6330 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6331
6332 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6333 """
6334
6335 iri_parts = compat_urllib_parse_urlparse(iri)
6336
6337 if '[' in iri_parts.netloc:
6338 raise ValueError('IPv6 URIs are not, yet, supported.')
6339 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6340
6341 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6342
6343 net_location = ''
6344 if iri_parts.username:
6345 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6346 if iri_parts.password is not None:
6347 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6348 net_location += '@'
6349
6350 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
6351 # The 'idna' encoding produces ASCII text.
6352 if iri_parts.port is not None and iri_parts.port != 80:
6353 net_location += ':' + str(iri_parts.port)
6354
6355 return compat_urllib_parse_urlunparse(
6356 (iri_parts.scheme,
6357 net_location,
6358
6359 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6360
6361 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6362 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6363
6364 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6365 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6366
6367 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6368
6369 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6370
6371
6372 def to_high_limit_path(path):
6373 if sys.platform in ['win32', 'cygwin']:
6374 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6375 return r'\\?\ '.rstrip() + os.path.abspath(path)
6376
6377 return path
6378
6379
6380 def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
6381 if field is None:
6382 val = obj if obj is not None else default
6383 else:
6384 val = obj.get(field, default)
6385 if func and val not in ignore:
6386 val = func(val)
6387 return template % val if val not in ignore else default
6388
6389
6390 def clean_podcast_url(url):
6391 return re.sub(r'''(?x)
6392 (?:
6393 (?:
6394 chtbl\.com/track|
6395 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6396 play\.podtrac\.com
6397 )/[^/]+|
6398 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6399 flex\.acast\.com|
6400 pd(?:
6401 cn\.co| # https://podcorn.com/analytics-prefix/
6402 st\.fm # https://podsights.com/docs/
6403 )/e
6404 )/''', '', url)
6405
6406
6407 _HEX_TABLE = '0123456789abcdef'
6408
6409
6410 def random_uuidv4():
6411 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
6412
6413
6414 def make_dir(path, to_screen=None):
6415 try:
6416 dn = os.path.dirname(path)
6417 if dn and not os.path.exists(dn):
6418 os.makedirs(dn)
6419 return True
6420 except (OSError, IOError) as err:
6421 if callable(to_screen) is not None:
6422 to_screen('unable to create directory ' + error_to_compat_str(err))
6423 return False
6424
6425
6426 def get_executable_path():
6427 from zipimport import zipimporter
6428 if hasattr(sys, 'frozen'): # Running from PyInstaller
6429 path = os.path.dirname(sys.executable)
6430 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
6431 path = os.path.join(os.path.dirname(__file__), '../..')
6432 else:
6433 path = os.path.join(os.path.dirname(__file__), '..')
6434 return os.path.abspath(path)
6435
6436
6437 def load_plugins(name, suffix, namespace):
6438 classes = {}
6439 try:
6440 plugins_spec = importlib.util.spec_from_file_location(
6441 name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
6442 plugins = importlib.util.module_from_spec(plugins_spec)
6443 sys.modules[plugins_spec.name] = plugins
6444 plugins_spec.loader.exec_module(plugins)
6445 for name in dir(plugins):
6446 if name in namespace:
6447 continue
6448 if not name.endswith(suffix):
6449 continue
6450 klass = getattr(plugins, name)
6451 classes[name] = namespace[name] = klass
6452 except FileNotFoundError:
6453 pass
6454 return classes
6455
6456
6457 def traverse_obj(
6458 obj, *path_list, default=None, expected_type=None, get_all=True,
6459 casesense=True, is_user_input=False, traverse_string=False):
6460 ''' Traverse nested list/dict/tuple
6461 @param path_list A list of paths which are checked one by one.
6462 Each path is a list of keys where each key is a string,
6463 a function, a tuple of strings or "...".
6464 When a fuction is given, it takes the key as argument and
6465 returns whether the key matches or not. When a tuple is given,
6466 all the keys given in the tuple are traversed, and
6467 "..." traverses all the keys in the object
6468 @param default Default value to return
6469 @param expected_type Only accept final value of this type (Can also be any callable)
6470 @param get_all Return all the values obtained from a path or only the first one
6471 @param casesense Whether to consider dictionary keys as case sensitive
6472 @param is_user_input Whether the keys are generated from user input. If True,
6473 strings are converted to int/slice if necessary
6474 @param traverse_string Whether to traverse inside strings. If True, any
6475 non-compatible object will also be converted into a string
6476 # TODO: Write tests
6477 '''
6478 if not casesense:
6479 _lower = lambda k: (k.lower() if isinstance(k, str) else k)
6480 path_list = (map(_lower, variadic(path)) for path in path_list)
6481
6482 def _traverse_obj(obj, path, _current_depth=0):
6483 nonlocal depth
6484 path = tuple(variadic(path))
6485 for i, key in enumerate(path):
6486 if obj is None:
6487 return None
6488 if isinstance(key, (list, tuple)):
6489 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
6490 key = ...
6491 if key is ...:
6492 obj = (obj.values() if isinstance(obj, dict)
6493 else obj if isinstance(obj, (list, tuple, LazyList))
6494 else str(obj) if traverse_string else [])
6495 _current_depth += 1
6496 depth = max(depth, _current_depth)
6497 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
6498 elif callable(key):
6499 if isinstance(obj, (list, tuple, LazyList)):
6500 obj = enumerate(obj)
6501 elif isinstance(obj, dict):
6502 obj = obj.items()
6503 else:
6504 if not traverse_string:
6505 return None
6506 obj = str(obj)
6507 _current_depth += 1
6508 depth = max(depth, _current_depth)
6509 return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if key(k)]
6510 elif isinstance(obj, dict) and not (is_user_input and key == ':'):
6511 obj = (obj.get(key) if casesense or (key in obj)
6512 else next((v for k, v in obj.items() if _lower(k) == key), None))
6513 else:
6514 if is_user_input:
6515 key = (int_or_none(key) if ':' not in key
6516 else slice(*map(int_or_none, key.split(':'))))
6517 if key == slice(None):
6518 return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
6519 if not isinstance(key, (int, slice)):
6520 return None
6521 if not isinstance(obj, (list, tuple, LazyList)):
6522 if not traverse_string:
6523 return None
6524 obj = str(obj)
6525 try:
6526 obj = obj[key]
6527 except IndexError:
6528 return None
6529 return obj
6530
6531 if isinstance(expected_type, type):
6532 type_test = lambda val: val if isinstance(val, expected_type) else None
6533 elif expected_type is not None:
6534 type_test = expected_type
6535 else:
6536 type_test = lambda val: val
6537
6538 for path in path_list:
6539 depth = 0
6540 val = _traverse_obj(obj, path)
6541 if val is not None:
6542 if depth:
6543 for _ in range(depth - 1):
6544 val = itertools.chain.from_iterable(v for v in val if v is not None)
6545 val = [v for v in map(type_test, val) if v is not None]
6546 if val:
6547 return val if get_all else val[0]
6548 else:
6549 val = type_test(val)
6550 if val is not None:
6551 return val
6552 return default
6553
6554
6555 def traverse_dict(dictn, keys, casesense=True):
6556 ''' For backward compatibility. Do not use '''
6557 return traverse_obj(dictn, keys, casesense=casesense,
6558 is_user_input=True, traverse_string=True)
6559
6560
6561 def variadic(x, allowed_types=(str, bytes)):
6562 return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
6563
6564
6565 # create a JSON Web Signature (jws) with HS256 algorithm
6566 # the resulting format is in JWS Compact Serialization
6567 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
6568 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
6569 def jwt_encode_hs256(payload_data, key, headers={}):
6570 header_data = {
6571 'alg': 'HS256',
6572 'typ': 'JWT',
6573 }
6574 if headers:
6575 header_data.update(headers)
6576 header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8'))
6577 payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8'))
6578 h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256)
6579 signature_b64 = base64.b64encode(h.digest())
6580 token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
6581 return token
6582
6583
6584 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
6585 def jwt_decode_hs256(jwt):
6586 header_b64, payload_b64, signature_b64 = jwt.split('.')
6587 payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
6588 return payload_data
6589
6590
6591 def supports_terminal_sequences(stream):
6592 if compat_os_name == 'nt':
6593 if get_windows_version() < (10, 0, 10586):
6594 return False
6595 elif not os.getenv('TERM'):
6596 return False
6597 try:
6598 return stream.isatty()
6599 except BaseException:
6600 return False
6601
6602
6603 _terminal_sequences_re = re.compile('\033\\[[^m]+m')
6604
6605
6606 def remove_terminal_sequences(string):
6607 return _terminal_sequences_re.sub('', string)
6608
6609
6610 def number_of_digits(number):
6611 return len('%d' % number)
6612
6613
6614 def join_nonempty(*values, delim='-', from_dict=None):
6615 if from_dict is not None:
6616 values = map(from_dict.get, values)
6617 return delim.join(map(str, filter(None, values)))