]> jfr.im git - yt-dlp.git/blob - yt_dlp/utils.py
[utils] Fix error when copying `LazyList`
[yt-dlp.git] / yt_dlp / utils.py
1 #!/usr/bin/env python3
2 # coding: utf-8
3
4 from __future__ import unicode_literals
5
6 import base64
7 import binascii
8 import calendar
9 import codecs
10 import collections
11 import contextlib
12 import ctypes
13 import datetime
14 import email.utils
15 import email.header
16 import errno
17 import functools
18 import gzip
19 import hashlib
20 import hmac
21 import importlib.util
22 import io
23 import itertools
24 import json
25 import locale
26 import math
27 import operator
28 import os
29 import platform
30 import random
31 import re
32 import socket
33 import ssl
34 import subprocess
35 import sys
36 import tempfile
37 import time
38 import traceback
39 import xml.etree.ElementTree
40 import zlib
41
42 from .compat import (
43 compat_HTMLParseError,
44 compat_HTMLParser,
45 compat_HTTPError,
46 compat_basestring,
47 compat_chr,
48 compat_cookiejar,
49 compat_ctypes_WINFUNCTYPE,
50 compat_etree_fromstring,
51 compat_expanduser,
52 compat_html_entities,
53 compat_html_entities_html5,
54 compat_http_client,
55 compat_integer_types,
56 compat_numeric_types,
57 compat_kwargs,
58 compat_os_name,
59 compat_parse_qs,
60 compat_shlex_quote,
61 compat_str,
62 compat_struct_pack,
63 compat_struct_unpack,
64 compat_urllib_error,
65 compat_urllib_parse,
66 compat_urllib_parse_urlencode,
67 compat_urllib_parse_urlparse,
68 compat_urllib_parse_urlunparse,
69 compat_urllib_parse_quote,
70 compat_urllib_parse_quote_plus,
71 compat_urllib_parse_unquote_plus,
72 compat_urllib_request,
73 compat_urlparse,
74 compat_xpath,
75 )
76
77 from .socks import (
78 ProxyType,
79 sockssocket,
80 )
81
82
83 def register_socks_protocols():
84 # "Register" SOCKS protocols
85 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
86 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
87 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
88 if scheme not in compat_urlparse.uses_netloc:
89 compat_urlparse.uses_netloc.append(scheme)
90
91
92 # This is not clearly defined otherwise
93 compiled_regex_type = type(re.compile(''))
94
95
96 def random_user_agent():
97 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
98 _CHROME_VERSIONS = (
99 '74.0.3729.129',
100 '76.0.3780.3',
101 '76.0.3780.2',
102 '74.0.3729.128',
103 '76.0.3780.1',
104 '76.0.3780.0',
105 '75.0.3770.15',
106 '74.0.3729.127',
107 '74.0.3729.126',
108 '76.0.3779.1',
109 '76.0.3779.0',
110 '75.0.3770.14',
111 '74.0.3729.125',
112 '76.0.3778.1',
113 '76.0.3778.0',
114 '75.0.3770.13',
115 '74.0.3729.124',
116 '74.0.3729.123',
117 '73.0.3683.121',
118 '76.0.3777.1',
119 '76.0.3777.0',
120 '75.0.3770.12',
121 '74.0.3729.122',
122 '76.0.3776.4',
123 '75.0.3770.11',
124 '74.0.3729.121',
125 '76.0.3776.3',
126 '76.0.3776.2',
127 '73.0.3683.120',
128 '74.0.3729.120',
129 '74.0.3729.119',
130 '74.0.3729.118',
131 '76.0.3776.1',
132 '76.0.3776.0',
133 '76.0.3775.5',
134 '75.0.3770.10',
135 '74.0.3729.117',
136 '76.0.3775.4',
137 '76.0.3775.3',
138 '74.0.3729.116',
139 '75.0.3770.9',
140 '76.0.3775.2',
141 '76.0.3775.1',
142 '76.0.3775.0',
143 '75.0.3770.8',
144 '74.0.3729.115',
145 '74.0.3729.114',
146 '76.0.3774.1',
147 '76.0.3774.0',
148 '75.0.3770.7',
149 '74.0.3729.113',
150 '74.0.3729.112',
151 '74.0.3729.111',
152 '76.0.3773.1',
153 '76.0.3773.0',
154 '75.0.3770.6',
155 '74.0.3729.110',
156 '74.0.3729.109',
157 '76.0.3772.1',
158 '76.0.3772.0',
159 '75.0.3770.5',
160 '74.0.3729.108',
161 '74.0.3729.107',
162 '76.0.3771.1',
163 '76.0.3771.0',
164 '75.0.3770.4',
165 '74.0.3729.106',
166 '74.0.3729.105',
167 '75.0.3770.3',
168 '74.0.3729.104',
169 '74.0.3729.103',
170 '74.0.3729.102',
171 '75.0.3770.2',
172 '74.0.3729.101',
173 '75.0.3770.1',
174 '75.0.3770.0',
175 '74.0.3729.100',
176 '75.0.3769.5',
177 '75.0.3769.4',
178 '74.0.3729.99',
179 '75.0.3769.3',
180 '75.0.3769.2',
181 '75.0.3768.6',
182 '74.0.3729.98',
183 '75.0.3769.1',
184 '75.0.3769.0',
185 '74.0.3729.97',
186 '73.0.3683.119',
187 '73.0.3683.118',
188 '74.0.3729.96',
189 '75.0.3768.5',
190 '75.0.3768.4',
191 '75.0.3768.3',
192 '75.0.3768.2',
193 '74.0.3729.95',
194 '74.0.3729.94',
195 '75.0.3768.1',
196 '75.0.3768.0',
197 '74.0.3729.93',
198 '74.0.3729.92',
199 '73.0.3683.117',
200 '74.0.3729.91',
201 '75.0.3766.3',
202 '74.0.3729.90',
203 '75.0.3767.2',
204 '75.0.3767.1',
205 '75.0.3767.0',
206 '74.0.3729.89',
207 '73.0.3683.116',
208 '75.0.3766.2',
209 '74.0.3729.88',
210 '75.0.3766.1',
211 '75.0.3766.0',
212 '74.0.3729.87',
213 '73.0.3683.115',
214 '74.0.3729.86',
215 '75.0.3765.1',
216 '75.0.3765.0',
217 '74.0.3729.85',
218 '73.0.3683.114',
219 '74.0.3729.84',
220 '75.0.3764.1',
221 '75.0.3764.0',
222 '74.0.3729.83',
223 '73.0.3683.113',
224 '75.0.3763.2',
225 '75.0.3761.4',
226 '74.0.3729.82',
227 '75.0.3763.1',
228 '75.0.3763.0',
229 '74.0.3729.81',
230 '73.0.3683.112',
231 '75.0.3762.1',
232 '75.0.3762.0',
233 '74.0.3729.80',
234 '75.0.3761.3',
235 '74.0.3729.79',
236 '73.0.3683.111',
237 '75.0.3761.2',
238 '74.0.3729.78',
239 '74.0.3729.77',
240 '75.0.3761.1',
241 '75.0.3761.0',
242 '73.0.3683.110',
243 '74.0.3729.76',
244 '74.0.3729.75',
245 '75.0.3760.0',
246 '74.0.3729.74',
247 '75.0.3759.8',
248 '75.0.3759.7',
249 '75.0.3759.6',
250 '74.0.3729.73',
251 '75.0.3759.5',
252 '74.0.3729.72',
253 '73.0.3683.109',
254 '75.0.3759.4',
255 '75.0.3759.3',
256 '74.0.3729.71',
257 '75.0.3759.2',
258 '74.0.3729.70',
259 '73.0.3683.108',
260 '74.0.3729.69',
261 '75.0.3759.1',
262 '75.0.3759.0',
263 '74.0.3729.68',
264 '73.0.3683.107',
265 '74.0.3729.67',
266 '75.0.3758.1',
267 '75.0.3758.0',
268 '74.0.3729.66',
269 '73.0.3683.106',
270 '74.0.3729.65',
271 '75.0.3757.1',
272 '75.0.3757.0',
273 '74.0.3729.64',
274 '73.0.3683.105',
275 '74.0.3729.63',
276 '75.0.3756.1',
277 '75.0.3756.0',
278 '74.0.3729.62',
279 '73.0.3683.104',
280 '75.0.3755.3',
281 '75.0.3755.2',
282 '73.0.3683.103',
283 '75.0.3755.1',
284 '75.0.3755.0',
285 '74.0.3729.61',
286 '73.0.3683.102',
287 '74.0.3729.60',
288 '75.0.3754.2',
289 '74.0.3729.59',
290 '75.0.3753.4',
291 '74.0.3729.58',
292 '75.0.3754.1',
293 '75.0.3754.0',
294 '74.0.3729.57',
295 '73.0.3683.101',
296 '75.0.3753.3',
297 '75.0.3752.2',
298 '75.0.3753.2',
299 '74.0.3729.56',
300 '75.0.3753.1',
301 '75.0.3753.0',
302 '74.0.3729.55',
303 '73.0.3683.100',
304 '74.0.3729.54',
305 '75.0.3752.1',
306 '75.0.3752.0',
307 '74.0.3729.53',
308 '73.0.3683.99',
309 '74.0.3729.52',
310 '75.0.3751.1',
311 '75.0.3751.0',
312 '74.0.3729.51',
313 '73.0.3683.98',
314 '74.0.3729.50',
315 '75.0.3750.0',
316 '74.0.3729.49',
317 '74.0.3729.48',
318 '74.0.3729.47',
319 '75.0.3749.3',
320 '74.0.3729.46',
321 '73.0.3683.97',
322 '75.0.3749.2',
323 '74.0.3729.45',
324 '75.0.3749.1',
325 '75.0.3749.0',
326 '74.0.3729.44',
327 '73.0.3683.96',
328 '74.0.3729.43',
329 '74.0.3729.42',
330 '75.0.3748.1',
331 '75.0.3748.0',
332 '74.0.3729.41',
333 '75.0.3747.1',
334 '73.0.3683.95',
335 '75.0.3746.4',
336 '74.0.3729.40',
337 '74.0.3729.39',
338 '75.0.3747.0',
339 '75.0.3746.3',
340 '75.0.3746.2',
341 '74.0.3729.38',
342 '75.0.3746.1',
343 '75.0.3746.0',
344 '74.0.3729.37',
345 '73.0.3683.94',
346 '75.0.3745.5',
347 '75.0.3745.4',
348 '75.0.3745.3',
349 '75.0.3745.2',
350 '74.0.3729.36',
351 '75.0.3745.1',
352 '75.0.3745.0',
353 '75.0.3744.2',
354 '74.0.3729.35',
355 '73.0.3683.93',
356 '74.0.3729.34',
357 '75.0.3744.1',
358 '75.0.3744.0',
359 '74.0.3729.33',
360 '73.0.3683.92',
361 '74.0.3729.32',
362 '74.0.3729.31',
363 '73.0.3683.91',
364 '75.0.3741.2',
365 '75.0.3740.5',
366 '74.0.3729.30',
367 '75.0.3741.1',
368 '75.0.3741.0',
369 '74.0.3729.29',
370 '75.0.3740.4',
371 '73.0.3683.90',
372 '74.0.3729.28',
373 '75.0.3740.3',
374 '73.0.3683.89',
375 '75.0.3740.2',
376 '74.0.3729.27',
377 '75.0.3740.1',
378 '75.0.3740.0',
379 '74.0.3729.26',
380 '73.0.3683.88',
381 '73.0.3683.87',
382 '74.0.3729.25',
383 '75.0.3739.1',
384 '75.0.3739.0',
385 '73.0.3683.86',
386 '74.0.3729.24',
387 '73.0.3683.85',
388 '75.0.3738.4',
389 '75.0.3738.3',
390 '75.0.3738.2',
391 '75.0.3738.1',
392 '75.0.3738.0',
393 '74.0.3729.23',
394 '73.0.3683.84',
395 '74.0.3729.22',
396 '74.0.3729.21',
397 '75.0.3737.1',
398 '75.0.3737.0',
399 '74.0.3729.20',
400 '73.0.3683.83',
401 '74.0.3729.19',
402 '75.0.3736.1',
403 '75.0.3736.0',
404 '74.0.3729.18',
405 '73.0.3683.82',
406 '74.0.3729.17',
407 '75.0.3735.1',
408 '75.0.3735.0',
409 '74.0.3729.16',
410 '73.0.3683.81',
411 '75.0.3734.1',
412 '75.0.3734.0',
413 '74.0.3729.15',
414 '73.0.3683.80',
415 '74.0.3729.14',
416 '75.0.3733.1',
417 '75.0.3733.0',
418 '75.0.3732.1',
419 '74.0.3729.13',
420 '74.0.3729.12',
421 '73.0.3683.79',
422 '74.0.3729.11',
423 '75.0.3732.0',
424 '74.0.3729.10',
425 '73.0.3683.78',
426 '74.0.3729.9',
427 '74.0.3729.8',
428 '74.0.3729.7',
429 '75.0.3731.3',
430 '75.0.3731.2',
431 '75.0.3731.0',
432 '74.0.3729.6',
433 '73.0.3683.77',
434 '73.0.3683.76',
435 '75.0.3730.5',
436 '75.0.3730.4',
437 '73.0.3683.75',
438 '74.0.3729.5',
439 '73.0.3683.74',
440 '75.0.3730.3',
441 '75.0.3730.2',
442 '74.0.3729.4',
443 '73.0.3683.73',
444 '73.0.3683.72',
445 '75.0.3730.1',
446 '75.0.3730.0',
447 '74.0.3729.3',
448 '73.0.3683.71',
449 '74.0.3729.2',
450 '73.0.3683.70',
451 '74.0.3729.1',
452 '74.0.3729.0',
453 '74.0.3726.4',
454 '73.0.3683.69',
455 '74.0.3726.3',
456 '74.0.3728.0',
457 '74.0.3726.2',
458 '73.0.3683.68',
459 '74.0.3726.1',
460 '74.0.3726.0',
461 '74.0.3725.4',
462 '73.0.3683.67',
463 '73.0.3683.66',
464 '74.0.3725.3',
465 '74.0.3725.2',
466 '74.0.3725.1',
467 '74.0.3724.8',
468 '74.0.3725.0',
469 '73.0.3683.65',
470 '74.0.3724.7',
471 '74.0.3724.6',
472 '74.0.3724.5',
473 '74.0.3724.4',
474 '74.0.3724.3',
475 '74.0.3724.2',
476 '74.0.3724.1',
477 '74.0.3724.0',
478 '73.0.3683.64',
479 '74.0.3723.1',
480 '74.0.3723.0',
481 '73.0.3683.63',
482 '74.0.3722.1',
483 '74.0.3722.0',
484 '73.0.3683.62',
485 '74.0.3718.9',
486 '74.0.3702.3',
487 '74.0.3721.3',
488 '74.0.3721.2',
489 '74.0.3721.1',
490 '74.0.3721.0',
491 '74.0.3720.6',
492 '73.0.3683.61',
493 '72.0.3626.122',
494 '73.0.3683.60',
495 '74.0.3720.5',
496 '72.0.3626.121',
497 '74.0.3718.8',
498 '74.0.3720.4',
499 '74.0.3720.3',
500 '74.0.3718.7',
501 '74.0.3720.2',
502 '74.0.3720.1',
503 '74.0.3720.0',
504 '74.0.3718.6',
505 '74.0.3719.5',
506 '73.0.3683.59',
507 '74.0.3718.5',
508 '74.0.3718.4',
509 '74.0.3719.4',
510 '74.0.3719.3',
511 '74.0.3719.2',
512 '74.0.3719.1',
513 '73.0.3683.58',
514 '74.0.3719.0',
515 '73.0.3683.57',
516 '73.0.3683.56',
517 '74.0.3718.3',
518 '73.0.3683.55',
519 '74.0.3718.2',
520 '74.0.3718.1',
521 '74.0.3718.0',
522 '73.0.3683.54',
523 '74.0.3717.2',
524 '73.0.3683.53',
525 '74.0.3717.1',
526 '74.0.3717.0',
527 '73.0.3683.52',
528 '74.0.3716.1',
529 '74.0.3716.0',
530 '73.0.3683.51',
531 '74.0.3715.1',
532 '74.0.3715.0',
533 '73.0.3683.50',
534 '74.0.3711.2',
535 '74.0.3714.2',
536 '74.0.3713.3',
537 '74.0.3714.1',
538 '74.0.3714.0',
539 '73.0.3683.49',
540 '74.0.3713.1',
541 '74.0.3713.0',
542 '72.0.3626.120',
543 '73.0.3683.48',
544 '74.0.3712.2',
545 '74.0.3712.1',
546 '74.0.3712.0',
547 '73.0.3683.47',
548 '72.0.3626.119',
549 '73.0.3683.46',
550 '74.0.3710.2',
551 '72.0.3626.118',
552 '74.0.3711.1',
553 '74.0.3711.0',
554 '73.0.3683.45',
555 '72.0.3626.117',
556 '74.0.3710.1',
557 '74.0.3710.0',
558 '73.0.3683.44',
559 '72.0.3626.116',
560 '74.0.3709.1',
561 '74.0.3709.0',
562 '74.0.3704.9',
563 '73.0.3683.43',
564 '72.0.3626.115',
565 '74.0.3704.8',
566 '74.0.3704.7',
567 '74.0.3708.0',
568 '74.0.3706.7',
569 '74.0.3704.6',
570 '73.0.3683.42',
571 '72.0.3626.114',
572 '74.0.3706.6',
573 '72.0.3626.113',
574 '74.0.3704.5',
575 '74.0.3706.5',
576 '74.0.3706.4',
577 '74.0.3706.3',
578 '74.0.3706.2',
579 '74.0.3706.1',
580 '74.0.3706.0',
581 '73.0.3683.41',
582 '72.0.3626.112',
583 '74.0.3705.1',
584 '74.0.3705.0',
585 '73.0.3683.40',
586 '72.0.3626.111',
587 '73.0.3683.39',
588 '74.0.3704.4',
589 '73.0.3683.38',
590 '74.0.3704.3',
591 '74.0.3704.2',
592 '74.0.3704.1',
593 '74.0.3704.0',
594 '73.0.3683.37',
595 '72.0.3626.110',
596 '72.0.3626.109',
597 '74.0.3703.3',
598 '74.0.3703.2',
599 '73.0.3683.36',
600 '74.0.3703.1',
601 '74.0.3703.0',
602 '73.0.3683.35',
603 '72.0.3626.108',
604 '74.0.3702.2',
605 '74.0.3699.3',
606 '74.0.3702.1',
607 '74.0.3702.0',
608 '73.0.3683.34',
609 '72.0.3626.107',
610 '73.0.3683.33',
611 '74.0.3701.1',
612 '74.0.3701.0',
613 '73.0.3683.32',
614 '73.0.3683.31',
615 '72.0.3626.105',
616 '74.0.3700.1',
617 '74.0.3700.0',
618 '73.0.3683.29',
619 '72.0.3626.103',
620 '74.0.3699.2',
621 '74.0.3699.1',
622 '74.0.3699.0',
623 '73.0.3683.28',
624 '72.0.3626.102',
625 '73.0.3683.27',
626 '73.0.3683.26',
627 '74.0.3698.0',
628 '74.0.3696.2',
629 '72.0.3626.101',
630 '73.0.3683.25',
631 '74.0.3696.1',
632 '74.0.3696.0',
633 '74.0.3694.8',
634 '72.0.3626.100',
635 '74.0.3694.7',
636 '74.0.3694.6',
637 '74.0.3694.5',
638 '74.0.3694.4',
639 '72.0.3626.99',
640 '72.0.3626.98',
641 '74.0.3694.3',
642 '73.0.3683.24',
643 '72.0.3626.97',
644 '72.0.3626.96',
645 '72.0.3626.95',
646 '73.0.3683.23',
647 '72.0.3626.94',
648 '73.0.3683.22',
649 '73.0.3683.21',
650 '72.0.3626.93',
651 '74.0.3694.2',
652 '72.0.3626.92',
653 '74.0.3694.1',
654 '74.0.3694.0',
655 '74.0.3693.6',
656 '73.0.3683.20',
657 '72.0.3626.91',
658 '74.0.3693.5',
659 '74.0.3693.4',
660 '74.0.3693.3',
661 '74.0.3693.2',
662 '73.0.3683.19',
663 '74.0.3693.1',
664 '74.0.3693.0',
665 '73.0.3683.18',
666 '72.0.3626.90',
667 '74.0.3692.1',
668 '74.0.3692.0',
669 '73.0.3683.17',
670 '72.0.3626.89',
671 '74.0.3687.3',
672 '74.0.3691.1',
673 '74.0.3691.0',
674 '73.0.3683.16',
675 '72.0.3626.88',
676 '72.0.3626.87',
677 '73.0.3683.15',
678 '74.0.3690.1',
679 '74.0.3690.0',
680 '73.0.3683.14',
681 '72.0.3626.86',
682 '73.0.3683.13',
683 '73.0.3683.12',
684 '74.0.3689.1',
685 '74.0.3689.0',
686 '73.0.3683.11',
687 '72.0.3626.85',
688 '73.0.3683.10',
689 '72.0.3626.84',
690 '73.0.3683.9',
691 '74.0.3688.1',
692 '74.0.3688.0',
693 '73.0.3683.8',
694 '72.0.3626.83',
695 '74.0.3687.2',
696 '74.0.3687.1',
697 '74.0.3687.0',
698 '73.0.3683.7',
699 '72.0.3626.82',
700 '74.0.3686.4',
701 '72.0.3626.81',
702 '74.0.3686.3',
703 '74.0.3686.2',
704 '74.0.3686.1',
705 '74.0.3686.0',
706 '73.0.3683.6',
707 '72.0.3626.80',
708 '74.0.3685.1',
709 '74.0.3685.0',
710 '73.0.3683.5',
711 '72.0.3626.79',
712 '74.0.3684.1',
713 '74.0.3684.0',
714 '73.0.3683.4',
715 '72.0.3626.78',
716 '72.0.3626.77',
717 '73.0.3683.3',
718 '73.0.3683.2',
719 '72.0.3626.76',
720 '73.0.3683.1',
721 '73.0.3683.0',
722 '72.0.3626.75',
723 '71.0.3578.141',
724 '73.0.3682.1',
725 '73.0.3682.0',
726 '72.0.3626.74',
727 '71.0.3578.140',
728 '73.0.3681.4',
729 '73.0.3681.3',
730 '73.0.3681.2',
731 '73.0.3681.1',
732 '73.0.3681.0',
733 '72.0.3626.73',
734 '71.0.3578.139',
735 '72.0.3626.72',
736 '72.0.3626.71',
737 '73.0.3680.1',
738 '73.0.3680.0',
739 '72.0.3626.70',
740 '71.0.3578.138',
741 '73.0.3678.2',
742 '73.0.3679.1',
743 '73.0.3679.0',
744 '72.0.3626.69',
745 '71.0.3578.137',
746 '73.0.3678.1',
747 '73.0.3678.0',
748 '71.0.3578.136',
749 '73.0.3677.1',
750 '73.0.3677.0',
751 '72.0.3626.68',
752 '72.0.3626.67',
753 '71.0.3578.135',
754 '73.0.3676.1',
755 '73.0.3676.0',
756 '73.0.3674.2',
757 '72.0.3626.66',
758 '71.0.3578.134',
759 '73.0.3674.1',
760 '73.0.3674.0',
761 '72.0.3626.65',
762 '71.0.3578.133',
763 '73.0.3673.2',
764 '73.0.3673.1',
765 '73.0.3673.0',
766 '72.0.3626.64',
767 '71.0.3578.132',
768 '72.0.3626.63',
769 '72.0.3626.62',
770 '72.0.3626.61',
771 '72.0.3626.60',
772 '73.0.3672.1',
773 '73.0.3672.0',
774 '72.0.3626.59',
775 '71.0.3578.131',
776 '73.0.3671.3',
777 '73.0.3671.2',
778 '73.0.3671.1',
779 '73.0.3671.0',
780 '72.0.3626.58',
781 '71.0.3578.130',
782 '73.0.3670.1',
783 '73.0.3670.0',
784 '72.0.3626.57',
785 '71.0.3578.129',
786 '73.0.3669.1',
787 '73.0.3669.0',
788 '72.0.3626.56',
789 '71.0.3578.128',
790 '73.0.3668.2',
791 '73.0.3668.1',
792 '73.0.3668.0',
793 '72.0.3626.55',
794 '71.0.3578.127',
795 '73.0.3667.2',
796 '73.0.3667.1',
797 '73.0.3667.0',
798 '72.0.3626.54',
799 '71.0.3578.126',
800 '73.0.3666.1',
801 '73.0.3666.0',
802 '72.0.3626.53',
803 '71.0.3578.125',
804 '73.0.3665.4',
805 '73.0.3665.3',
806 '72.0.3626.52',
807 '73.0.3665.2',
808 '73.0.3664.4',
809 '73.0.3665.1',
810 '73.0.3665.0',
811 '72.0.3626.51',
812 '71.0.3578.124',
813 '72.0.3626.50',
814 '73.0.3664.3',
815 '73.0.3664.2',
816 '73.0.3664.1',
817 '73.0.3664.0',
818 '73.0.3663.2',
819 '72.0.3626.49',
820 '71.0.3578.123',
821 '73.0.3663.1',
822 '73.0.3663.0',
823 '72.0.3626.48',
824 '71.0.3578.122',
825 '73.0.3662.1',
826 '73.0.3662.0',
827 '72.0.3626.47',
828 '71.0.3578.121',
829 '73.0.3661.1',
830 '72.0.3626.46',
831 '73.0.3661.0',
832 '72.0.3626.45',
833 '71.0.3578.120',
834 '73.0.3660.2',
835 '73.0.3660.1',
836 '73.0.3660.0',
837 '72.0.3626.44',
838 '71.0.3578.119',
839 '73.0.3659.1',
840 '73.0.3659.0',
841 '72.0.3626.43',
842 '71.0.3578.118',
843 '73.0.3658.1',
844 '73.0.3658.0',
845 '72.0.3626.42',
846 '71.0.3578.117',
847 '73.0.3657.1',
848 '73.0.3657.0',
849 '72.0.3626.41',
850 '71.0.3578.116',
851 '73.0.3656.1',
852 '73.0.3656.0',
853 '72.0.3626.40',
854 '71.0.3578.115',
855 '73.0.3655.1',
856 '73.0.3655.0',
857 '72.0.3626.39',
858 '71.0.3578.114',
859 '73.0.3654.1',
860 '73.0.3654.0',
861 '72.0.3626.38',
862 '71.0.3578.113',
863 '73.0.3653.1',
864 '73.0.3653.0',
865 '72.0.3626.37',
866 '71.0.3578.112',
867 '73.0.3652.1',
868 '73.0.3652.0',
869 '72.0.3626.36',
870 '71.0.3578.111',
871 '73.0.3651.1',
872 '73.0.3651.0',
873 '72.0.3626.35',
874 '71.0.3578.110',
875 '73.0.3650.1',
876 '73.0.3650.0',
877 '72.0.3626.34',
878 '71.0.3578.109',
879 '73.0.3649.1',
880 '73.0.3649.0',
881 '72.0.3626.33',
882 '71.0.3578.108',
883 '73.0.3648.2',
884 '73.0.3648.1',
885 '73.0.3648.0',
886 '72.0.3626.32',
887 '71.0.3578.107',
888 '73.0.3647.2',
889 '73.0.3647.1',
890 '73.0.3647.0',
891 '72.0.3626.31',
892 '71.0.3578.106',
893 '73.0.3635.3',
894 '73.0.3646.2',
895 '73.0.3646.1',
896 '73.0.3646.0',
897 '72.0.3626.30',
898 '71.0.3578.105',
899 '72.0.3626.29',
900 '73.0.3645.2',
901 '73.0.3645.1',
902 '73.0.3645.0',
903 '72.0.3626.28',
904 '71.0.3578.104',
905 '72.0.3626.27',
906 '72.0.3626.26',
907 '72.0.3626.25',
908 '72.0.3626.24',
909 '73.0.3644.0',
910 '73.0.3643.2',
911 '72.0.3626.23',
912 '71.0.3578.103',
913 '73.0.3643.1',
914 '73.0.3643.0',
915 '72.0.3626.22',
916 '71.0.3578.102',
917 '73.0.3642.1',
918 '73.0.3642.0',
919 '72.0.3626.21',
920 '71.0.3578.101',
921 '73.0.3641.1',
922 '73.0.3641.0',
923 '72.0.3626.20',
924 '71.0.3578.100',
925 '72.0.3626.19',
926 '73.0.3640.1',
927 '73.0.3640.0',
928 '72.0.3626.18',
929 '73.0.3639.1',
930 '71.0.3578.99',
931 '73.0.3639.0',
932 '72.0.3626.17',
933 '73.0.3638.2',
934 '72.0.3626.16',
935 '73.0.3638.1',
936 '73.0.3638.0',
937 '72.0.3626.15',
938 '71.0.3578.98',
939 '73.0.3635.2',
940 '71.0.3578.97',
941 '73.0.3637.1',
942 '73.0.3637.0',
943 '72.0.3626.14',
944 '71.0.3578.96',
945 '71.0.3578.95',
946 '72.0.3626.13',
947 '71.0.3578.94',
948 '73.0.3636.2',
949 '71.0.3578.93',
950 '73.0.3636.1',
951 '73.0.3636.0',
952 '72.0.3626.12',
953 '71.0.3578.92',
954 '73.0.3635.1',
955 '73.0.3635.0',
956 '72.0.3626.11',
957 '71.0.3578.91',
958 '73.0.3634.2',
959 '73.0.3634.1',
960 '73.0.3634.0',
961 '72.0.3626.10',
962 '71.0.3578.90',
963 '71.0.3578.89',
964 '73.0.3633.2',
965 '73.0.3633.1',
966 '73.0.3633.0',
967 '72.0.3610.4',
968 '72.0.3626.9',
969 '71.0.3578.88',
970 '73.0.3632.5',
971 '73.0.3632.4',
972 '73.0.3632.3',
973 '73.0.3632.2',
974 '73.0.3632.1',
975 '73.0.3632.0',
976 '72.0.3626.8',
977 '71.0.3578.87',
978 '73.0.3631.2',
979 '73.0.3631.1',
980 '73.0.3631.0',
981 '72.0.3626.7',
982 '71.0.3578.86',
983 '72.0.3626.6',
984 '73.0.3630.1',
985 '73.0.3630.0',
986 '72.0.3626.5',
987 '71.0.3578.85',
988 '72.0.3626.4',
989 '73.0.3628.3',
990 '73.0.3628.2',
991 '73.0.3629.1',
992 '73.0.3629.0',
993 '72.0.3626.3',
994 '71.0.3578.84',
995 '73.0.3628.1',
996 '73.0.3628.0',
997 '71.0.3578.83',
998 '73.0.3627.1',
999 '73.0.3627.0',
1000 '72.0.3626.2',
1001 '71.0.3578.82',
1002 '71.0.3578.81',
1003 '71.0.3578.80',
1004 '72.0.3626.1',
1005 '72.0.3626.0',
1006 '71.0.3578.79',
1007 '70.0.3538.124',
1008 '71.0.3578.78',
1009 '72.0.3623.4',
1010 '72.0.3625.2',
1011 '72.0.3625.1',
1012 '72.0.3625.0',
1013 '71.0.3578.77',
1014 '70.0.3538.123',
1015 '72.0.3624.4',
1016 '72.0.3624.3',
1017 '72.0.3624.2',
1018 '71.0.3578.76',
1019 '72.0.3624.1',
1020 '72.0.3624.0',
1021 '72.0.3623.3',
1022 '71.0.3578.75',
1023 '70.0.3538.122',
1024 '71.0.3578.74',
1025 '72.0.3623.2',
1026 '72.0.3610.3',
1027 '72.0.3623.1',
1028 '72.0.3623.0',
1029 '72.0.3622.3',
1030 '72.0.3622.2',
1031 '71.0.3578.73',
1032 '70.0.3538.121',
1033 '72.0.3622.1',
1034 '72.0.3622.0',
1035 '71.0.3578.72',
1036 '70.0.3538.120',
1037 '72.0.3621.1',
1038 '72.0.3621.0',
1039 '71.0.3578.71',
1040 '70.0.3538.119',
1041 '72.0.3620.1',
1042 '72.0.3620.0',
1043 '71.0.3578.70',
1044 '70.0.3538.118',
1045 '71.0.3578.69',
1046 '72.0.3619.1',
1047 '72.0.3619.0',
1048 '71.0.3578.68',
1049 '70.0.3538.117',
1050 '71.0.3578.67',
1051 '72.0.3618.1',
1052 '72.0.3618.0',
1053 '71.0.3578.66',
1054 '70.0.3538.116',
1055 '72.0.3617.1',
1056 '72.0.3617.0',
1057 '71.0.3578.65',
1058 '70.0.3538.115',
1059 '72.0.3602.3',
1060 '71.0.3578.64',
1061 '72.0.3616.1',
1062 '72.0.3616.0',
1063 '71.0.3578.63',
1064 '70.0.3538.114',
1065 '71.0.3578.62',
1066 '72.0.3615.1',
1067 '72.0.3615.0',
1068 '71.0.3578.61',
1069 '70.0.3538.113',
1070 '72.0.3614.1',
1071 '72.0.3614.0',
1072 '71.0.3578.60',
1073 '70.0.3538.112',
1074 '72.0.3613.1',
1075 '72.0.3613.0',
1076 '71.0.3578.59',
1077 '70.0.3538.111',
1078 '72.0.3612.2',
1079 '72.0.3612.1',
1080 '72.0.3612.0',
1081 '70.0.3538.110',
1082 '71.0.3578.58',
1083 '70.0.3538.109',
1084 '72.0.3611.2',
1085 '72.0.3611.1',
1086 '72.0.3611.0',
1087 '71.0.3578.57',
1088 '70.0.3538.108',
1089 '72.0.3610.2',
1090 '71.0.3578.56',
1091 '71.0.3578.55',
1092 '72.0.3610.1',
1093 '72.0.3610.0',
1094 '71.0.3578.54',
1095 '70.0.3538.107',
1096 '71.0.3578.53',
1097 '72.0.3609.3',
1098 '71.0.3578.52',
1099 '72.0.3609.2',
1100 '71.0.3578.51',
1101 '72.0.3608.5',
1102 '72.0.3609.1',
1103 '72.0.3609.0',
1104 '71.0.3578.50',
1105 '70.0.3538.106',
1106 '72.0.3608.4',
1107 '72.0.3608.3',
1108 '72.0.3608.2',
1109 '71.0.3578.49',
1110 '72.0.3608.1',
1111 '72.0.3608.0',
1112 '70.0.3538.105',
1113 '71.0.3578.48',
1114 '72.0.3607.1',
1115 '72.0.3607.0',
1116 '71.0.3578.47',
1117 '70.0.3538.104',
1118 '72.0.3606.2',
1119 '72.0.3606.1',
1120 '72.0.3606.0',
1121 '71.0.3578.46',
1122 '70.0.3538.103',
1123 '70.0.3538.102',
1124 '72.0.3605.3',
1125 '72.0.3605.2',
1126 '72.0.3605.1',
1127 '72.0.3605.0',
1128 '71.0.3578.45',
1129 '70.0.3538.101',
1130 '71.0.3578.44',
1131 '71.0.3578.43',
1132 '70.0.3538.100',
1133 '70.0.3538.99',
1134 '71.0.3578.42',
1135 '72.0.3604.1',
1136 '72.0.3604.0',
1137 '71.0.3578.41',
1138 '70.0.3538.98',
1139 '71.0.3578.40',
1140 '72.0.3603.2',
1141 '72.0.3603.1',
1142 '72.0.3603.0',
1143 '71.0.3578.39',
1144 '70.0.3538.97',
1145 '72.0.3602.2',
1146 '71.0.3578.38',
1147 '71.0.3578.37',
1148 '72.0.3602.1',
1149 '72.0.3602.0',
1150 '71.0.3578.36',
1151 '70.0.3538.96',
1152 '72.0.3601.1',
1153 '72.0.3601.0',
1154 '71.0.3578.35',
1155 '70.0.3538.95',
1156 '72.0.3600.1',
1157 '72.0.3600.0',
1158 '71.0.3578.34',
1159 '70.0.3538.94',
1160 '72.0.3599.3',
1161 '72.0.3599.2',
1162 '72.0.3599.1',
1163 '72.0.3599.0',
1164 '71.0.3578.33',
1165 '70.0.3538.93',
1166 '72.0.3598.1',
1167 '72.0.3598.0',
1168 '71.0.3578.32',
1169 '70.0.3538.87',
1170 '72.0.3597.1',
1171 '72.0.3597.0',
1172 '72.0.3596.2',
1173 '71.0.3578.31',
1174 '70.0.3538.86',
1175 '71.0.3578.30',
1176 '71.0.3578.29',
1177 '72.0.3596.1',
1178 '72.0.3596.0',
1179 '71.0.3578.28',
1180 '70.0.3538.85',
1181 '72.0.3595.2',
1182 '72.0.3591.3',
1183 '72.0.3595.1',
1184 '72.0.3595.0',
1185 '71.0.3578.27',
1186 '70.0.3538.84',
1187 '72.0.3594.1',
1188 '72.0.3594.0',
1189 '71.0.3578.26',
1190 '70.0.3538.83',
1191 '72.0.3593.2',
1192 '72.0.3593.1',
1193 '72.0.3593.0',
1194 '71.0.3578.25',
1195 '70.0.3538.82',
1196 '72.0.3589.3',
1197 '72.0.3592.2',
1198 '72.0.3592.1',
1199 '72.0.3592.0',
1200 '71.0.3578.24',
1201 '72.0.3589.2',
1202 '70.0.3538.81',
1203 '70.0.3538.80',
1204 '72.0.3591.2',
1205 '72.0.3591.1',
1206 '72.0.3591.0',
1207 '71.0.3578.23',
1208 '70.0.3538.79',
1209 '71.0.3578.22',
1210 '72.0.3590.1',
1211 '72.0.3590.0',
1212 '71.0.3578.21',
1213 '70.0.3538.78',
1214 '70.0.3538.77',
1215 '72.0.3589.1',
1216 '72.0.3589.0',
1217 '71.0.3578.20',
1218 '70.0.3538.76',
1219 '71.0.3578.19',
1220 '70.0.3538.75',
1221 '72.0.3588.1',
1222 '72.0.3588.0',
1223 '71.0.3578.18',
1224 '70.0.3538.74',
1225 '72.0.3586.2',
1226 '72.0.3587.0',
1227 '71.0.3578.17',
1228 '70.0.3538.73',
1229 '72.0.3586.1',
1230 '72.0.3586.0',
1231 '71.0.3578.16',
1232 '70.0.3538.72',
1233 '72.0.3585.1',
1234 '72.0.3585.0',
1235 '71.0.3578.15',
1236 '70.0.3538.71',
1237 '71.0.3578.14',
1238 '72.0.3584.1',
1239 '72.0.3584.0',
1240 '71.0.3578.13',
1241 '70.0.3538.70',
1242 '72.0.3583.2',
1243 '71.0.3578.12',
1244 '72.0.3583.1',
1245 '72.0.3583.0',
1246 '71.0.3578.11',
1247 '70.0.3538.69',
1248 '71.0.3578.10',
1249 '72.0.3582.0',
1250 '72.0.3581.4',
1251 '71.0.3578.9',
1252 '70.0.3538.67',
1253 '72.0.3581.3',
1254 '72.0.3581.2',
1255 '72.0.3581.1',
1256 '72.0.3581.0',
1257 '71.0.3578.8',
1258 '70.0.3538.66',
1259 '72.0.3580.1',
1260 '72.0.3580.0',
1261 '71.0.3578.7',
1262 '70.0.3538.65',
1263 '71.0.3578.6',
1264 '72.0.3579.1',
1265 '72.0.3579.0',
1266 '71.0.3578.5',
1267 '70.0.3538.64',
1268 '71.0.3578.4',
1269 '71.0.3578.3',
1270 '71.0.3578.2',
1271 '71.0.3578.1',
1272 '71.0.3578.0',
1273 '70.0.3538.63',
1274 '69.0.3497.128',
1275 '70.0.3538.62',
1276 '70.0.3538.61',
1277 '70.0.3538.60',
1278 '70.0.3538.59',
1279 '71.0.3577.1',
1280 '71.0.3577.0',
1281 '70.0.3538.58',
1282 '69.0.3497.127',
1283 '71.0.3576.2',
1284 '71.0.3576.1',
1285 '71.0.3576.0',
1286 '70.0.3538.57',
1287 '70.0.3538.56',
1288 '71.0.3575.2',
1289 '70.0.3538.55',
1290 '69.0.3497.126',
1291 '70.0.3538.54',
1292 '71.0.3575.1',
1293 '71.0.3575.0',
1294 '71.0.3574.1',
1295 '71.0.3574.0',
1296 '70.0.3538.53',
1297 '69.0.3497.125',
1298 '70.0.3538.52',
1299 '71.0.3573.1',
1300 '71.0.3573.0',
1301 '70.0.3538.51',
1302 '69.0.3497.124',
1303 '71.0.3572.1',
1304 '71.0.3572.0',
1305 '70.0.3538.50',
1306 '69.0.3497.123',
1307 '71.0.3571.2',
1308 '70.0.3538.49',
1309 '69.0.3497.122',
1310 '71.0.3571.1',
1311 '71.0.3571.0',
1312 '70.0.3538.48',
1313 '69.0.3497.121',
1314 '71.0.3570.1',
1315 '71.0.3570.0',
1316 '70.0.3538.47',
1317 '69.0.3497.120',
1318 '71.0.3568.2',
1319 '71.0.3569.1',
1320 '71.0.3569.0',
1321 '70.0.3538.46',
1322 '69.0.3497.119',
1323 '70.0.3538.45',
1324 '71.0.3568.1',
1325 '71.0.3568.0',
1326 '70.0.3538.44',
1327 '69.0.3497.118',
1328 '70.0.3538.43',
1329 '70.0.3538.42',
1330 '71.0.3567.1',
1331 '71.0.3567.0',
1332 '70.0.3538.41',
1333 '69.0.3497.117',
1334 '71.0.3566.1',
1335 '71.0.3566.0',
1336 '70.0.3538.40',
1337 '69.0.3497.116',
1338 '71.0.3565.1',
1339 '71.0.3565.0',
1340 '70.0.3538.39',
1341 '69.0.3497.115',
1342 '71.0.3564.1',
1343 '71.0.3564.0',
1344 '70.0.3538.38',
1345 '69.0.3497.114',
1346 '71.0.3563.0',
1347 '71.0.3562.2',
1348 '70.0.3538.37',
1349 '69.0.3497.113',
1350 '70.0.3538.36',
1351 '70.0.3538.35',
1352 '71.0.3562.1',
1353 '71.0.3562.0',
1354 '70.0.3538.34',
1355 '69.0.3497.112',
1356 '70.0.3538.33',
1357 '71.0.3561.1',
1358 '71.0.3561.0',
1359 '70.0.3538.32',
1360 '69.0.3497.111',
1361 '71.0.3559.6',
1362 '71.0.3560.1',
1363 '71.0.3560.0',
1364 '71.0.3559.5',
1365 '71.0.3559.4',
1366 '70.0.3538.31',
1367 '69.0.3497.110',
1368 '71.0.3559.3',
1369 '70.0.3538.30',
1370 '69.0.3497.109',
1371 '71.0.3559.2',
1372 '71.0.3559.1',
1373 '71.0.3559.0',
1374 '70.0.3538.29',
1375 '69.0.3497.108',
1376 '71.0.3558.2',
1377 '71.0.3558.1',
1378 '71.0.3558.0',
1379 '70.0.3538.28',
1380 '69.0.3497.107',
1381 '71.0.3557.2',
1382 '71.0.3557.1',
1383 '71.0.3557.0',
1384 '70.0.3538.27',
1385 '69.0.3497.106',
1386 '71.0.3554.4',
1387 '70.0.3538.26',
1388 '71.0.3556.1',
1389 '71.0.3556.0',
1390 '70.0.3538.25',
1391 '71.0.3554.3',
1392 '69.0.3497.105',
1393 '71.0.3554.2',
1394 '70.0.3538.24',
1395 '69.0.3497.104',
1396 '71.0.3555.2',
1397 '70.0.3538.23',
1398 '71.0.3555.1',
1399 '71.0.3555.0',
1400 '70.0.3538.22',
1401 '69.0.3497.103',
1402 '71.0.3554.1',
1403 '71.0.3554.0',
1404 '70.0.3538.21',
1405 '69.0.3497.102',
1406 '71.0.3553.3',
1407 '70.0.3538.20',
1408 '69.0.3497.101',
1409 '71.0.3553.2',
1410 '69.0.3497.100',
1411 '71.0.3553.1',
1412 '71.0.3553.0',
1413 '70.0.3538.19',
1414 '69.0.3497.99',
1415 '69.0.3497.98',
1416 '69.0.3497.97',
1417 '71.0.3552.6',
1418 '71.0.3552.5',
1419 '71.0.3552.4',
1420 '71.0.3552.3',
1421 '71.0.3552.2',
1422 '71.0.3552.1',
1423 '71.0.3552.0',
1424 '70.0.3538.18',
1425 '69.0.3497.96',
1426 '71.0.3551.3',
1427 '71.0.3551.2',
1428 '71.0.3551.1',
1429 '71.0.3551.0',
1430 '70.0.3538.17',
1431 '69.0.3497.95',
1432 '71.0.3550.3',
1433 '71.0.3550.2',
1434 '71.0.3550.1',
1435 '71.0.3550.0',
1436 '70.0.3538.16',
1437 '69.0.3497.94',
1438 '71.0.3549.1',
1439 '71.0.3549.0',
1440 '70.0.3538.15',
1441 '69.0.3497.93',
1442 '69.0.3497.92',
1443 '71.0.3548.1',
1444 '71.0.3548.0',
1445 '70.0.3538.14',
1446 '69.0.3497.91',
1447 '71.0.3547.1',
1448 '71.0.3547.0',
1449 '70.0.3538.13',
1450 '69.0.3497.90',
1451 '71.0.3546.2',
1452 '69.0.3497.89',
1453 '71.0.3546.1',
1454 '71.0.3546.0',
1455 '70.0.3538.12',
1456 '69.0.3497.88',
1457 '71.0.3545.4',
1458 '71.0.3545.3',
1459 '71.0.3545.2',
1460 '71.0.3545.1',
1461 '71.0.3545.0',
1462 '70.0.3538.11',
1463 '69.0.3497.87',
1464 '71.0.3544.5',
1465 '71.0.3544.4',
1466 '71.0.3544.3',
1467 '71.0.3544.2',
1468 '71.0.3544.1',
1469 '71.0.3544.0',
1470 '69.0.3497.86',
1471 '70.0.3538.10',
1472 '69.0.3497.85',
1473 '70.0.3538.9',
1474 '69.0.3497.84',
1475 '71.0.3543.4',
1476 '70.0.3538.8',
1477 '71.0.3543.3',
1478 '71.0.3543.2',
1479 '71.0.3543.1',
1480 '71.0.3543.0',
1481 '70.0.3538.7',
1482 '69.0.3497.83',
1483 '71.0.3542.2',
1484 '71.0.3542.1',
1485 '71.0.3542.0',
1486 '70.0.3538.6',
1487 '69.0.3497.82',
1488 '69.0.3497.81',
1489 '71.0.3541.1',
1490 '71.0.3541.0',
1491 '70.0.3538.5',
1492 '69.0.3497.80',
1493 '71.0.3540.1',
1494 '71.0.3540.0',
1495 '70.0.3538.4',
1496 '69.0.3497.79',
1497 '70.0.3538.3',
1498 '71.0.3539.1',
1499 '71.0.3539.0',
1500 '69.0.3497.78',
1501 '68.0.3440.134',
1502 '69.0.3497.77',
1503 '70.0.3538.2',
1504 '70.0.3538.1',
1505 '70.0.3538.0',
1506 '69.0.3497.76',
1507 '68.0.3440.133',
1508 '69.0.3497.75',
1509 '70.0.3537.2',
1510 '70.0.3537.1',
1511 '70.0.3537.0',
1512 '69.0.3497.74',
1513 '68.0.3440.132',
1514 '70.0.3536.0',
1515 '70.0.3535.5',
1516 '70.0.3535.4',
1517 '70.0.3535.3',
1518 '69.0.3497.73',
1519 '68.0.3440.131',
1520 '70.0.3532.8',
1521 '70.0.3532.7',
1522 '69.0.3497.72',
1523 '69.0.3497.71',
1524 '70.0.3535.2',
1525 '70.0.3535.1',
1526 '70.0.3535.0',
1527 '69.0.3497.70',
1528 '68.0.3440.130',
1529 '69.0.3497.69',
1530 '68.0.3440.129',
1531 '70.0.3534.4',
1532 '70.0.3534.3',
1533 '70.0.3534.2',
1534 '70.0.3534.1',
1535 '70.0.3534.0',
1536 '69.0.3497.68',
1537 '68.0.3440.128',
1538 '70.0.3533.2',
1539 '70.0.3533.1',
1540 '70.0.3533.0',
1541 '69.0.3497.67',
1542 '68.0.3440.127',
1543 '70.0.3532.6',
1544 '70.0.3532.5',
1545 '70.0.3532.4',
1546 '69.0.3497.66',
1547 '68.0.3440.126',
1548 '70.0.3532.3',
1549 '70.0.3532.2',
1550 '70.0.3532.1',
1551 '69.0.3497.60',
1552 '69.0.3497.65',
1553 '69.0.3497.64',
1554 '70.0.3532.0',
1555 '70.0.3531.0',
1556 '70.0.3530.4',
1557 '70.0.3530.3',
1558 '70.0.3530.2',
1559 '69.0.3497.58',
1560 '68.0.3440.125',
1561 '69.0.3497.57',
1562 '69.0.3497.56',
1563 '69.0.3497.55',
1564 '69.0.3497.54',
1565 '70.0.3530.1',
1566 '70.0.3530.0',
1567 '69.0.3497.53',
1568 '68.0.3440.124',
1569 '69.0.3497.52',
1570 '70.0.3529.3',
1571 '70.0.3529.2',
1572 '70.0.3529.1',
1573 '70.0.3529.0',
1574 '69.0.3497.51',
1575 '70.0.3528.4',
1576 '68.0.3440.123',
1577 '70.0.3528.3',
1578 '70.0.3528.2',
1579 '70.0.3528.1',
1580 '70.0.3528.0',
1581 '69.0.3497.50',
1582 '68.0.3440.122',
1583 '70.0.3527.1',
1584 '70.0.3527.0',
1585 '69.0.3497.49',
1586 '68.0.3440.121',
1587 '70.0.3526.1',
1588 '70.0.3526.0',
1589 '68.0.3440.120',
1590 '69.0.3497.48',
1591 '69.0.3497.47',
1592 '68.0.3440.119',
1593 '68.0.3440.118',
1594 '70.0.3525.5',
1595 '70.0.3525.4',
1596 '70.0.3525.3',
1597 '68.0.3440.117',
1598 '69.0.3497.46',
1599 '70.0.3525.2',
1600 '70.0.3525.1',
1601 '70.0.3525.0',
1602 '69.0.3497.45',
1603 '68.0.3440.116',
1604 '70.0.3524.4',
1605 '70.0.3524.3',
1606 '69.0.3497.44',
1607 '70.0.3524.2',
1608 '70.0.3524.1',
1609 '70.0.3524.0',
1610 '70.0.3523.2',
1611 '69.0.3497.43',
1612 '68.0.3440.115',
1613 '70.0.3505.9',
1614 '69.0.3497.42',
1615 '70.0.3505.8',
1616 '70.0.3523.1',
1617 '70.0.3523.0',
1618 '69.0.3497.41',
1619 '68.0.3440.114',
1620 '70.0.3505.7',
1621 '69.0.3497.40',
1622 '70.0.3522.1',
1623 '70.0.3522.0',
1624 '70.0.3521.2',
1625 '69.0.3497.39',
1626 '68.0.3440.113',
1627 '70.0.3505.6',
1628 '70.0.3521.1',
1629 '70.0.3521.0',
1630 '69.0.3497.38',
1631 '68.0.3440.112',
1632 '70.0.3520.1',
1633 '70.0.3520.0',
1634 '69.0.3497.37',
1635 '68.0.3440.111',
1636 '70.0.3519.3',
1637 '70.0.3519.2',
1638 '70.0.3519.1',
1639 '70.0.3519.0',
1640 '69.0.3497.36',
1641 '68.0.3440.110',
1642 '70.0.3518.1',
1643 '70.0.3518.0',
1644 '69.0.3497.35',
1645 '69.0.3497.34',
1646 '68.0.3440.109',
1647 '70.0.3517.1',
1648 '70.0.3517.0',
1649 '69.0.3497.33',
1650 '68.0.3440.108',
1651 '69.0.3497.32',
1652 '70.0.3516.3',
1653 '70.0.3516.2',
1654 '70.0.3516.1',
1655 '70.0.3516.0',
1656 '69.0.3497.31',
1657 '68.0.3440.107',
1658 '70.0.3515.4',
1659 '68.0.3440.106',
1660 '70.0.3515.3',
1661 '70.0.3515.2',
1662 '70.0.3515.1',
1663 '70.0.3515.0',
1664 '69.0.3497.30',
1665 '68.0.3440.105',
1666 '68.0.3440.104',
1667 '70.0.3514.2',
1668 '70.0.3514.1',
1669 '70.0.3514.0',
1670 '69.0.3497.29',
1671 '68.0.3440.103',
1672 '70.0.3513.1',
1673 '70.0.3513.0',
1674 '69.0.3497.28',
1675 )
1676 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1677
1678
1679 std_headers = {
1680 'User-Agent': random_user_agent(),
1681 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1682 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1683 'Accept-Encoding': 'gzip, deflate',
1684 'Accept-Language': 'en-us,en;q=0.5',
1685 }
1686
1687
1688 USER_AGENTS = {
1689 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1690 }
1691
1692
1693 NO_DEFAULT = object()
1694
1695 ENGLISH_MONTH_NAMES = [
1696 'January', 'February', 'March', 'April', 'May', 'June',
1697 'July', 'August', 'September', 'October', 'November', 'December']
1698
1699 MONTH_NAMES = {
1700 'en': ENGLISH_MONTH_NAMES,
1701 'fr': [
1702 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1703 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1704 }
1705
1706 KNOWN_EXTENSIONS = (
1707 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1708 'flv', 'f4v', 'f4a', 'f4b',
1709 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1710 'mkv', 'mka', 'mk3d',
1711 'avi', 'divx',
1712 'mov',
1713 'asf', 'wmv', 'wma',
1714 '3gp', '3g2',
1715 'mp3',
1716 'flac',
1717 'ape',
1718 'wav',
1719 'f4f', 'f4m', 'm3u8', 'smil')
1720
1721 # needed for sanitizing filenames in restricted mode
1722 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1723 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1724 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1725
1726 DATE_FORMATS = (
1727 '%d %B %Y',
1728 '%d %b %Y',
1729 '%B %d %Y',
1730 '%B %dst %Y',
1731 '%B %dnd %Y',
1732 '%B %drd %Y',
1733 '%B %dth %Y',
1734 '%b %d %Y',
1735 '%b %dst %Y',
1736 '%b %dnd %Y',
1737 '%b %drd %Y',
1738 '%b %dth %Y',
1739 '%b %dst %Y %I:%M',
1740 '%b %dnd %Y %I:%M',
1741 '%b %drd %Y %I:%M',
1742 '%b %dth %Y %I:%M',
1743 '%Y %m %d',
1744 '%Y-%m-%d',
1745 '%Y.%m.%d.',
1746 '%Y/%m/%d',
1747 '%Y/%m/%d %H:%M',
1748 '%Y/%m/%d %H:%M:%S',
1749 '%Y%m%d%H%M',
1750 '%Y%m%d%H%M%S',
1751 '%Y-%m-%d %H:%M',
1752 '%Y-%m-%d %H:%M:%S',
1753 '%Y-%m-%d %H:%M:%S.%f',
1754 '%Y-%m-%d %H:%M:%S:%f',
1755 '%d.%m.%Y %H:%M',
1756 '%d.%m.%Y %H.%M',
1757 '%Y-%m-%dT%H:%M:%SZ',
1758 '%Y-%m-%dT%H:%M:%S.%fZ',
1759 '%Y-%m-%dT%H:%M:%S.%f0Z',
1760 '%Y-%m-%dT%H:%M:%S',
1761 '%Y-%m-%dT%H:%M:%S.%f',
1762 '%Y-%m-%dT%H:%M',
1763 '%b %d %Y at %H:%M',
1764 '%b %d %Y at %H:%M:%S',
1765 '%B %d %Y at %H:%M',
1766 '%B %d %Y at %H:%M:%S',
1767 '%H:%M %d-%b-%Y',
1768 )
1769
1770 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1771 DATE_FORMATS_DAY_FIRST.extend([
1772 '%d-%m-%Y',
1773 '%d.%m.%Y',
1774 '%d.%m.%y',
1775 '%d/%m/%Y',
1776 '%d/%m/%y',
1777 '%d/%m/%Y %H:%M:%S',
1778 ])
1779
1780 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1781 DATE_FORMATS_MONTH_FIRST.extend([
1782 '%m-%d-%Y',
1783 '%m.%d.%Y',
1784 '%m/%d/%Y',
1785 '%m/%d/%y',
1786 '%m/%d/%Y %H:%M:%S',
1787 ])
1788
1789 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1790 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1791
1792
1793 def preferredencoding():
1794 """Get preferred encoding.
1795
1796 Returns the best encoding scheme for the system, based on
1797 locale.getpreferredencoding() and some further tweaks.
1798 """
1799 try:
1800 pref = locale.getpreferredencoding()
1801 'TEST'.encode(pref)
1802 except Exception:
1803 pref = 'UTF-8'
1804
1805 return pref
1806
1807
1808 def write_json_file(obj, fn):
1809 """ Encode obj as JSON and write it to fn, atomically if possible """
1810
1811 fn = encodeFilename(fn)
1812 if sys.version_info < (3, 0) and sys.platform != 'win32':
1813 encoding = get_filesystem_encoding()
1814 # os.path.basename returns a bytes object, but NamedTemporaryFile
1815 # will fail if the filename contains non ascii characters unless we
1816 # use a unicode object
1817 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1818 # the same for os.path.dirname
1819 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1820 else:
1821 path_basename = os.path.basename
1822 path_dirname = os.path.dirname
1823
1824 args = {
1825 'suffix': '.tmp',
1826 'prefix': path_basename(fn) + '.',
1827 'dir': path_dirname(fn),
1828 'delete': False,
1829 }
1830
1831 # In Python 2.x, json.dump expects a bytestream.
1832 # In Python 3.x, it writes to a character stream
1833 if sys.version_info < (3, 0):
1834 args['mode'] = 'wb'
1835 else:
1836 args.update({
1837 'mode': 'w',
1838 'encoding': 'utf-8',
1839 })
1840
1841 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1842
1843 try:
1844 with tf:
1845 json.dump(obj, tf)
1846 if sys.platform == 'win32':
1847 # Need to remove existing file on Windows, else os.rename raises
1848 # WindowsError or FileExistsError.
1849 try:
1850 os.unlink(fn)
1851 except OSError:
1852 pass
1853 try:
1854 mask = os.umask(0)
1855 os.umask(mask)
1856 os.chmod(tf.name, 0o666 & ~mask)
1857 except OSError:
1858 pass
1859 os.rename(tf.name, fn)
1860 except Exception:
1861 try:
1862 os.remove(tf.name)
1863 except OSError:
1864 pass
1865 raise
1866
1867
1868 if sys.version_info >= (2, 7):
1869 def find_xpath_attr(node, xpath, key, val=None):
1870 """ Find the xpath xpath[@key=val] """
1871 assert re.match(r'^[a-zA-Z_-]+$', key)
1872 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1873 return node.find(expr)
1874 else:
1875 def find_xpath_attr(node, xpath, key, val=None):
1876 for f in node.findall(compat_xpath(xpath)):
1877 if key not in f.attrib:
1878 continue
1879 if val is None or f.attrib.get(key) == val:
1880 return f
1881 return None
1882
1883 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1884 # the namespace parameter
1885
1886
1887 def xpath_with_ns(path, ns_map):
1888 components = [c.split(':') for c in path.split('/')]
1889 replaced = []
1890 for c in components:
1891 if len(c) == 1:
1892 replaced.append(c[0])
1893 else:
1894 ns, tag = c
1895 replaced.append('{%s}%s' % (ns_map[ns], tag))
1896 return '/'.join(replaced)
1897
1898
1899 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1900 def _find_xpath(xpath):
1901 return node.find(compat_xpath(xpath))
1902
1903 if isinstance(xpath, (str, compat_str)):
1904 n = _find_xpath(xpath)
1905 else:
1906 for xp in xpath:
1907 n = _find_xpath(xp)
1908 if n is not None:
1909 break
1910
1911 if n is None:
1912 if default is not NO_DEFAULT:
1913 return default
1914 elif fatal:
1915 name = xpath if name is None else name
1916 raise ExtractorError('Could not find XML element %s' % name)
1917 else:
1918 return None
1919 return n
1920
1921
1922 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1923 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1924 if n is None or n == default:
1925 return n
1926 if n.text is None:
1927 if default is not NO_DEFAULT:
1928 return default
1929 elif fatal:
1930 name = xpath if name is None else name
1931 raise ExtractorError('Could not find XML element\'s text %s' % name)
1932 else:
1933 return None
1934 return n.text
1935
1936
1937 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1938 n = find_xpath_attr(node, xpath, key)
1939 if n is None:
1940 if default is not NO_DEFAULT:
1941 return default
1942 elif fatal:
1943 name = '%s[@%s]' % (xpath, key) if name is None else name
1944 raise ExtractorError('Could not find XML attribute %s' % name)
1945 else:
1946 return None
1947 return n.attrib[key]
1948
1949
1950 def get_element_by_id(id, html):
1951 """Return the content of the tag with the specified ID in the passed HTML document"""
1952 return get_element_by_attribute('id', id, html)
1953
1954
1955 def get_element_by_class(class_name, html):
1956 """Return the content of the first tag with the specified class in the passed HTML document"""
1957 retval = get_elements_by_class(class_name, html)
1958 return retval[0] if retval else None
1959
1960
1961 def get_element_by_attribute(attribute, value, html, escape_value=True):
1962 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1963 return retval[0] if retval else None
1964
1965
1966 def get_elements_by_class(class_name, html):
1967 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1968 return get_elements_by_attribute(
1969 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1970 html, escape_value=False)
1971
1972
1973 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1974 """Return the content of the tag with the specified attribute in the passed HTML document"""
1975
1976 value = re.escape(value) if escape_value else value
1977
1978 retlist = []
1979 for m in re.finditer(r'''(?xs)
1980 <([a-zA-Z0-9:._-]+)
1981 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1982 \s+%s=['"]?%s['"]?
1983 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1984 \s*>
1985 (?P<content>.*?)
1986 </\1>
1987 ''' % (re.escape(attribute), value), html):
1988 res = m.group('content')
1989
1990 if res.startswith('"') or res.startswith("'"):
1991 res = res[1:-1]
1992
1993 retlist.append(unescapeHTML(res))
1994
1995 return retlist
1996
1997
1998 class HTMLAttributeParser(compat_HTMLParser):
1999 """Trivial HTML parser to gather the attributes for a single element"""
2000
2001 def __init__(self):
2002 self.attrs = {}
2003 compat_HTMLParser.__init__(self)
2004
2005 def handle_starttag(self, tag, attrs):
2006 self.attrs = dict(attrs)
2007
2008
2009 class HTMLListAttrsParser(compat_HTMLParser):
2010 """HTML parser to gather the attributes for the elements of a list"""
2011
2012 def __init__(self):
2013 compat_HTMLParser.__init__(self)
2014 self.items = []
2015 self._level = 0
2016
2017 def handle_starttag(self, tag, attrs):
2018 if tag == 'li' and self._level == 0:
2019 self.items.append(dict(attrs))
2020 self._level += 1
2021
2022 def handle_endtag(self, tag):
2023 self._level -= 1
2024
2025
2026 def extract_attributes(html_element):
2027 """Given a string for an HTML element such as
2028 <el
2029 a="foo" B="bar" c="&98;az" d=boz
2030 empty= noval entity="&amp;"
2031 sq='"' dq="'"
2032 >
2033 Decode and return a dictionary of attributes.
2034 {
2035 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2036 'empty': '', 'noval': None, 'entity': '&',
2037 'sq': '"', 'dq': '\''
2038 }.
2039 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2040 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2041 """
2042 parser = HTMLAttributeParser()
2043 try:
2044 parser.feed(html_element)
2045 parser.close()
2046 # Older Python may throw HTMLParseError in case of malformed HTML
2047 except compat_HTMLParseError:
2048 pass
2049 return parser.attrs
2050
2051
2052 def parse_list(webpage):
2053 """Given a string for an series of HTML <li> elements,
2054 return a dictionary of their attributes"""
2055 parser = HTMLListAttrsParser()
2056 parser.feed(webpage)
2057 parser.close()
2058 return parser.items
2059
2060
2061 def clean_html(html):
2062 """Clean an HTML snippet into a readable string"""
2063
2064 if html is None: # Convenience for sanitizing descriptions etc.
2065 return html
2066
2067 # Newline vs <br />
2068 html = html.replace('\n', ' ')
2069 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2070 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2071 # Strip html tags
2072 html = re.sub('<.*?>', '', html)
2073 # Replace html entities
2074 html = unescapeHTML(html)
2075 return html.strip()
2076
2077
2078 def sanitize_open(filename, open_mode):
2079 """Try to open the given filename, and slightly tweak it if this fails.
2080
2081 Attempts to open the given filename. If this fails, it tries to change
2082 the filename slightly, step by step, until it's either able to open it
2083 or it fails and raises a final exception, like the standard open()
2084 function.
2085
2086 It returns the tuple (stream, definitive_file_name).
2087 """
2088 try:
2089 if filename == '-':
2090 if sys.platform == 'win32':
2091 import msvcrt
2092 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2093 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2094 stream = open(encodeFilename(filename), open_mode)
2095 return (stream, filename)
2096 except (IOError, OSError) as err:
2097 if err.errno in (errno.EACCES,):
2098 raise
2099
2100 # In case of error, try to remove win32 forbidden chars
2101 alt_filename = sanitize_path(filename)
2102 if alt_filename == filename:
2103 raise
2104 else:
2105 # An exception here should be caught in the caller
2106 stream = open(encodeFilename(alt_filename), open_mode)
2107 return (stream, alt_filename)
2108
2109
2110 def timeconvert(timestr):
2111 """Convert RFC 2822 defined time string into system timestamp"""
2112 timestamp = None
2113 timetuple = email.utils.parsedate_tz(timestr)
2114 if timetuple is not None:
2115 timestamp = email.utils.mktime_tz(timetuple)
2116 return timestamp
2117
2118
2119 def sanitize_filename(s, restricted=False, is_id=False):
2120 """Sanitizes a string so it could be used as part of a filename.
2121 If restricted is set, use a stricter subset of allowed characters.
2122 Set is_id if this is not an arbitrary string, but an ID that should be kept
2123 if possible.
2124 """
2125 def replace_insane(char):
2126 if restricted and char in ACCENT_CHARS:
2127 return ACCENT_CHARS[char]
2128 elif not restricted and char == '\n':
2129 return ' '
2130 elif char == '?' or ord(char) < 32 or ord(char) == 127:
2131 return ''
2132 elif char == '"':
2133 return '' if restricted else '\''
2134 elif char == ':':
2135 return '_-' if restricted else ' -'
2136 elif char in '\\/|*<>':
2137 return '_'
2138 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2139 return '_'
2140 if restricted and ord(char) > 127:
2141 return '_'
2142 return char
2143
2144 if s == '':
2145 return ''
2146 # Handle timestamps
2147 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2148 result = ''.join(map(replace_insane, s))
2149 if not is_id:
2150 while '__' in result:
2151 result = result.replace('__', '_')
2152 result = result.strip('_')
2153 # Common case of "Foreign band name - English song title"
2154 if restricted and result.startswith('-_'):
2155 result = result[2:]
2156 if result.startswith('-'):
2157 result = '_' + result[len('-'):]
2158 result = result.lstrip('.')
2159 if not result:
2160 result = '_'
2161 return result
2162
2163
2164 def sanitize_path(s, force=False):
2165 """Sanitizes and normalizes path on Windows"""
2166 if sys.platform == 'win32':
2167 force = False
2168 drive_or_unc, _ = os.path.splitdrive(s)
2169 if sys.version_info < (2, 7) and not drive_or_unc:
2170 drive_or_unc, _ = os.path.splitunc(s)
2171 elif force:
2172 drive_or_unc = ''
2173 else:
2174 return s
2175
2176 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2177 if drive_or_unc:
2178 norm_path.pop(0)
2179 sanitized_path = [
2180 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2181 for path_part in norm_path]
2182 if drive_or_unc:
2183 sanitized_path.insert(0, drive_or_unc + os.path.sep)
2184 elif force and s[0] == os.path.sep:
2185 sanitized_path.insert(0, os.path.sep)
2186 return os.path.join(*sanitized_path)
2187
2188
2189 def sanitize_url(url):
2190 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2191 # the number of unwanted failures due to missing protocol
2192 if url.startswith('//'):
2193 return 'http:%s' % url
2194 # Fix some common typos seen so far
2195 COMMON_TYPOS = (
2196 # https://github.com/ytdl-org/youtube-dl/issues/15649
2197 (r'^httpss://', r'https://'),
2198 # https://bx1.be/lives/direct-tv/
2199 (r'^rmtp([es]?)://', r'rtmp\1://'),
2200 )
2201 for mistake, fixup in COMMON_TYPOS:
2202 if re.match(mistake, url):
2203 return re.sub(mistake, fixup, url)
2204 return url
2205
2206
2207 def extract_basic_auth(url):
2208 parts = compat_urlparse.urlsplit(url)
2209 if parts.username is None:
2210 return url, None
2211 url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2212 parts.hostname if parts.port is None
2213 else '%s:%d' % (parts.hostname, parts.port))))
2214 auth_payload = base64.b64encode(
2215 ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2216 return url, 'Basic ' + auth_payload.decode('utf-8')
2217
2218
2219 def sanitized_Request(url, *args, **kwargs):
2220 url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
2221 if auth_header is not None:
2222 headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2223 headers['Authorization'] = auth_header
2224 return compat_urllib_request.Request(url, *args, **kwargs)
2225
2226
2227 def expand_path(s):
2228 """Expand shell variables and ~"""
2229 return os.path.expandvars(compat_expanduser(s))
2230
2231
2232 def orderedSet(iterable):
2233 """ Remove all duplicates from the input iterable """
2234 res = []
2235 for el in iterable:
2236 if el not in res:
2237 res.append(el)
2238 return res
2239
2240
2241 def _htmlentity_transform(entity_with_semicolon):
2242 """Transforms an HTML entity to a character."""
2243 entity = entity_with_semicolon[:-1]
2244
2245 # Known non-numeric HTML entity
2246 if entity in compat_html_entities.name2codepoint:
2247 return compat_chr(compat_html_entities.name2codepoint[entity])
2248
2249 # TODO: HTML5 allows entities without a semicolon. For example,
2250 # '&Eacuteric' should be decoded as 'Éric'.
2251 if entity_with_semicolon in compat_html_entities_html5:
2252 return compat_html_entities_html5[entity_with_semicolon]
2253
2254 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2255 if mobj is not None:
2256 numstr = mobj.group(1)
2257 if numstr.startswith('x'):
2258 base = 16
2259 numstr = '0%s' % numstr
2260 else:
2261 base = 10
2262 # See https://github.com/ytdl-org/youtube-dl/issues/7518
2263 try:
2264 return compat_chr(int(numstr, base))
2265 except ValueError:
2266 pass
2267
2268 # Unknown entity in name, return its literal representation
2269 return '&%s;' % entity
2270
2271
2272 def unescapeHTML(s):
2273 if s is None:
2274 return None
2275 assert type(s) == compat_str
2276
2277 return re.sub(
2278 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2279
2280
2281 def escapeHTML(text):
2282 return (
2283 text
2284 .replace('&', '&amp;')
2285 .replace('<', '&lt;')
2286 .replace('>', '&gt;')
2287 .replace('"', '&quot;')
2288 .replace("'", '&#39;')
2289 )
2290
2291
2292 def process_communicate_or_kill(p, *args, **kwargs):
2293 try:
2294 return p.communicate(*args, **kwargs)
2295 except BaseException: # Including KeyboardInterrupt
2296 p.kill()
2297 p.wait()
2298 raise
2299
2300
2301 class Popen(subprocess.Popen):
2302 if sys.platform == 'win32':
2303 _startupinfo = subprocess.STARTUPINFO()
2304 _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
2305 else:
2306 _startupinfo = None
2307
2308 def __init__(self, *args, **kwargs):
2309 super(Popen, self).__init__(*args, **kwargs, startupinfo=self._startupinfo)
2310
2311 def communicate_or_kill(self, *args, **kwargs):
2312 return process_communicate_or_kill(self, *args, **kwargs)
2313
2314
2315 def get_subprocess_encoding():
2316 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2317 # For subprocess calls, encode with locale encoding
2318 # Refer to http://stackoverflow.com/a/9951851/35070
2319 encoding = preferredencoding()
2320 else:
2321 encoding = sys.getfilesystemencoding()
2322 if encoding is None:
2323 encoding = 'utf-8'
2324 return encoding
2325
2326
2327 def encodeFilename(s, for_subprocess=False):
2328 """
2329 @param s The name of the file
2330 """
2331
2332 assert type(s) == compat_str
2333
2334 # Python 3 has a Unicode API
2335 if sys.version_info >= (3, 0):
2336 return s
2337
2338 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2339 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2340 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2341 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2342 return s
2343
2344 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2345 if sys.platform.startswith('java'):
2346 return s
2347
2348 return s.encode(get_subprocess_encoding(), 'ignore')
2349
2350
2351 def decodeFilename(b, for_subprocess=False):
2352
2353 if sys.version_info >= (3, 0):
2354 return b
2355
2356 if not isinstance(b, bytes):
2357 return b
2358
2359 return b.decode(get_subprocess_encoding(), 'ignore')
2360
2361
2362 def encodeArgument(s):
2363 if not isinstance(s, compat_str):
2364 # Legacy code that uses byte strings
2365 # Uncomment the following line after fixing all post processors
2366 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2367 s = s.decode('ascii')
2368 return encodeFilename(s, True)
2369
2370
2371 def decodeArgument(b):
2372 return decodeFilename(b, True)
2373
2374
2375 def decodeOption(optval):
2376 if optval is None:
2377 return optval
2378 if isinstance(optval, bytes):
2379 optval = optval.decode(preferredencoding())
2380
2381 assert isinstance(optval, compat_str)
2382 return optval
2383
2384
2385 _timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
2386
2387
2388 def timetuple_from_msec(msec):
2389 secs, msec = divmod(msec, 1000)
2390 mins, secs = divmod(secs, 60)
2391 hrs, mins = divmod(mins, 60)
2392 return _timetuple(hrs, mins, secs, msec)
2393
2394
2395 def formatSeconds(secs, delim=':', msec=False):
2396 time = timetuple_from_msec(secs * 1000)
2397 if time.hours:
2398 ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
2399 elif time.minutes:
2400 ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
2401 else:
2402 ret = '%d' % time.seconds
2403 return '%s.%03d' % (ret, time.milliseconds) if msec else ret
2404
2405
2406 def _ssl_load_windows_store_certs(ssl_context, storename):
2407 # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
2408 try:
2409 certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
2410 if encoding == 'x509_asn' and (
2411 trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
2412 except PermissionError:
2413 return
2414 for cert in certs:
2415 try:
2416 ssl_context.load_verify_locations(cadata=cert)
2417 except ssl.SSLError:
2418 pass
2419
2420
2421 def make_HTTPS_handler(params, **kwargs):
2422 opts_check_certificate = not params.get('nocheckcertificate')
2423 context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
2424 context.check_hostname = opts_check_certificate
2425 context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
2426 if opts_check_certificate:
2427 try:
2428 context.load_default_certs()
2429 # Work around the issue in load_default_certs when there are bad certificates. See:
2430 # https://github.com/yt-dlp/yt-dlp/issues/1060,
2431 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
2432 except ssl.SSLError:
2433 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
2434 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
2435 # Create a new context to discard any certificates that were already loaded
2436 context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
2437 context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED
2438 for storename in ('CA', 'ROOT'):
2439 _ssl_load_windows_store_certs(context, storename)
2440 context.set_default_verify_paths()
2441 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2442
2443
2444 def bug_reports_message(before=';'):
2445 if ytdl_is_updateable():
2446 update_cmd = 'type yt-dlp -U to update'
2447 else:
2448 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
2449 msg = 'please report this issue on https://github.com/yt-dlp/yt-dlp .'
2450 msg += ' Make sure you are using the latest version; %s.' % update_cmd
2451 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
2452
2453 before = before.rstrip()
2454 if not before or before.endswith(('.', '!', '?')):
2455 msg = msg[0].title() + msg[1:]
2456
2457 return (before + ' ' if before else '') + msg
2458
2459
2460 class YoutubeDLError(Exception):
2461 """Base exception for YoutubeDL errors."""
2462 msg = None
2463
2464 def __init__(self, msg=None):
2465 if msg is not None:
2466 self.msg = msg
2467 elif self.msg is None:
2468 self.msg = type(self).__name__
2469 super().__init__(self.msg)
2470
2471
2472 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2473 if hasattr(ssl, 'CertificateError'):
2474 network_exceptions.append(ssl.CertificateError)
2475 network_exceptions = tuple(network_exceptions)
2476
2477
2478 class ExtractorError(YoutubeDLError):
2479 """Error during info extraction."""
2480
2481 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
2482 """ tb, if given, is the original traceback (so that it can be printed out).
2483 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
2484 """
2485 if sys.exc_info()[0] in network_exceptions:
2486 expected = True
2487
2488 self.msg = str(msg)
2489 self.traceback = tb
2490 self.expected = expected
2491 self.cause = cause
2492 self.video_id = video_id
2493 self.ie = ie
2494 self.exc_info = sys.exc_info() # preserve original exception
2495
2496 super(ExtractorError, self).__init__(''.join((
2497 format_field(ie, template='[%s] '),
2498 format_field(video_id, template='%s: '),
2499 self.msg,
2500 format_field(cause, template=' (caused by %r)'),
2501 '' if expected else bug_reports_message())))
2502
2503 def format_traceback(self):
2504 if self.traceback is None:
2505 return None
2506 return ''.join(traceback.format_tb(self.traceback))
2507
2508
2509 class UnsupportedError(ExtractorError):
2510 def __init__(self, url):
2511 super(UnsupportedError, self).__init__(
2512 'Unsupported URL: %s' % url, expected=True)
2513 self.url = url
2514
2515
2516 class RegexNotFoundError(ExtractorError):
2517 """Error when a regex didn't match"""
2518 pass
2519
2520
2521 class GeoRestrictedError(ExtractorError):
2522 """Geographic restriction Error exception.
2523
2524 This exception may be thrown when a video is not available from your
2525 geographic location due to geographic restrictions imposed by a website.
2526 """
2527
2528 def __init__(self, msg, countries=None, **kwargs):
2529 kwargs['expected'] = True
2530 super(GeoRestrictedError, self).__init__(msg, **kwargs)
2531 self.countries = countries
2532
2533
2534 class DownloadError(YoutubeDLError):
2535 """Download Error exception.
2536
2537 This exception may be thrown by FileDownloader objects if they are not
2538 configured to continue on errors. They will contain the appropriate
2539 error message.
2540 """
2541
2542 def __init__(self, msg, exc_info=None):
2543 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2544 super(DownloadError, self).__init__(msg)
2545 self.exc_info = exc_info
2546
2547
2548 class EntryNotInPlaylist(YoutubeDLError):
2549 """Entry not in playlist exception.
2550
2551 This exception will be thrown by YoutubeDL when a requested entry
2552 is not found in the playlist info_dict
2553 """
2554 msg = 'Entry not found in info'
2555
2556
2557 class SameFileError(YoutubeDLError):
2558 """Same File exception.
2559
2560 This exception will be thrown by FileDownloader objects if they detect
2561 multiple files would have to be downloaded to the same file on disk.
2562 """
2563 msg = 'Fixed output name but more than one file to download'
2564
2565 def __init__(self, filename=None):
2566 if filename is not None:
2567 self.msg += f': {filename}'
2568 super().__init__(self.msg)
2569
2570
2571 class PostProcessingError(YoutubeDLError):
2572 """Post Processing exception.
2573
2574 This exception may be raised by PostProcessor's .run() method to
2575 indicate an error in the postprocessing task.
2576 """
2577
2578 def __init__(self, msg):
2579 super(PostProcessingError, self).__init__(msg)
2580 self.msg = msg
2581
2582
2583 class DownloadCancelled(YoutubeDLError):
2584 """ Exception raised when the download queue should be interrupted """
2585 msg = 'The download was cancelled'
2586
2587
2588 class ExistingVideoReached(DownloadCancelled):
2589 """ --break-on-existing triggered """
2590 msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
2591
2592
2593 class RejectedVideoReached(DownloadCancelled):
2594 """ --break-on-reject triggered """
2595 msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
2596
2597
2598 class MaxDownloadsReached(DownloadCancelled):
2599 """ --max-downloads limit has been reached. """
2600 msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
2601
2602
2603 class ThrottledDownload(YoutubeDLError):
2604 """ Download speed below --throttled-rate. """
2605 msg = 'The download speed is below throttle limit'
2606
2607
2608 class UnavailableVideoError(YoutubeDLError):
2609 """Unavailable Format exception.
2610
2611 This exception will be thrown when a video is requested
2612 in a format that is not available for that video.
2613 """
2614 msg = 'Unable to download video'
2615
2616 def __init__(self, err=None):
2617 if err is not None:
2618 self.msg += f': {err}'
2619 super().__init__(self.msg)
2620
2621
2622 class ContentTooShortError(YoutubeDLError):
2623 """Content Too Short exception.
2624
2625 This exception may be raised by FileDownloader objects when a file they
2626 download is too small for what the server announced first, indicating
2627 the connection was probably interrupted.
2628 """
2629
2630 def __init__(self, downloaded, expected):
2631 super(ContentTooShortError, self).__init__(
2632 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2633 )
2634 # Both in bytes
2635 self.downloaded = downloaded
2636 self.expected = expected
2637
2638
2639 class XAttrMetadataError(YoutubeDLError):
2640 def __init__(self, code=None, msg='Unknown error'):
2641 super(XAttrMetadataError, self).__init__(msg)
2642 self.code = code
2643 self.msg = msg
2644
2645 # Parsing code and msg
2646 if (self.code in (errno.ENOSPC, errno.EDQUOT)
2647 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2648 self.reason = 'NO_SPACE'
2649 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2650 self.reason = 'VALUE_TOO_LONG'
2651 else:
2652 self.reason = 'NOT_SUPPORTED'
2653
2654
2655 class XAttrUnavailableError(YoutubeDLError):
2656 pass
2657
2658
2659 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2660 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2661 # expected HTTP responses to meet HTTP/1.0 or later (see also
2662 # https://github.com/ytdl-org/youtube-dl/issues/6727)
2663 if sys.version_info < (3, 0):
2664 kwargs['strict'] = True
2665 hc = http_class(*args, **compat_kwargs(kwargs))
2666 source_address = ydl_handler._params.get('source_address')
2667
2668 if source_address is not None:
2669 # This is to workaround _create_connection() from socket where it will try all
2670 # address data from getaddrinfo() including IPv6. This filters the result from
2671 # getaddrinfo() based on the source_address value.
2672 # This is based on the cpython socket.create_connection() function.
2673 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2674 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2675 host, port = address
2676 err = None
2677 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2678 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2679 ip_addrs = [addr for addr in addrs if addr[0] == af]
2680 if addrs and not ip_addrs:
2681 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2682 raise socket.error(
2683 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2684 % (ip_version, source_address[0]))
2685 for res in ip_addrs:
2686 af, socktype, proto, canonname, sa = res
2687 sock = None
2688 try:
2689 sock = socket.socket(af, socktype, proto)
2690 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2691 sock.settimeout(timeout)
2692 sock.bind(source_address)
2693 sock.connect(sa)
2694 err = None # Explicitly break reference cycle
2695 return sock
2696 except socket.error as _:
2697 err = _
2698 if sock is not None:
2699 sock.close()
2700 if err is not None:
2701 raise err
2702 else:
2703 raise socket.error('getaddrinfo returns an empty list')
2704 if hasattr(hc, '_create_connection'):
2705 hc._create_connection = _create_connection
2706 sa = (source_address, 0)
2707 if hasattr(hc, 'source_address'): # Python 2.7+
2708 hc.source_address = sa
2709 else: # Python 2.6
2710 def _hc_connect(self, *args, **kwargs):
2711 sock = _create_connection(
2712 (self.host, self.port), self.timeout, sa)
2713 if is_https:
2714 self.sock = ssl.wrap_socket(
2715 sock, self.key_file, self.cert_file,
2716 ssl_version=ssl.PROTOCOL_TLSv1)
2717 else:
2718 self.sock = sock
2719 hc.connect = functools.partial(_hc_connect, hc)
2720
2721 return hc
2722
2723
2724 def handle_youtubedl_headers(headers):
2725 filtered_headers = headers
2726
2727 if 'Youtubedl-no-compression' in filtered_headers:
2728 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2729 del filtered_headers['Youtubedl-no-compression']
2730
2731 return filtered_headers
2732
2733
2734 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2735 """Handler for HTTP requests and responses.
2736
2737 This class, when installed with an OpenerDirector, automatically adds
2738 the standard headers to every HTTP request and handles gzipped and
2739 deflated responses from web servers. If compression is to be avoided in
2740 a particular request, the original request in the program code only has
2741 to include the HTTP header "Youtubedl-no-compression", which will be
2742 removed before making the real request.
2743
2744 Part of this code was copied from:
2745
2746 http://techknack.net/python-urllib2-handlers/
2747
2748 Andrew Rowls, the author of that code, agreed to release it to the
2749 public domain.
2750 """
2751
2752 def __init__(self, params, *args, **kwargs):
2753 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2754 self._params = params
2755
2756 def http_open(self, req):
2757 conn_class = compat_http_client.HTTPConnection
2758
2759 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2760 if socks_proxy:
2761 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2762 del req.headers['Ytdl-socks-proxy']
2763
2764 return self.do_open(functools.partial(
2765 _create_http_connection, self, conn_class, False),
2766 req)
2767
2768 @staticmethod
2769 def deflate(data):
2770 if not data:
2771 return data
2772 try:
2773 return zlib.decompress(data, -zlib.MAX_WBITS)
2774 except zlib.error:
2775 return zlib.decompress(data)
2776
2777 def http_request(self, req):
2778 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2779 # always respected by websites, some tend to give out URLs with non percent-encoded
2780 # non-ASCII characters (see telemb.py, ard.py [#3412])
2781 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2782 # To work around aforementioned issue we will replace request's original URL with
2783 # percent-encoded one
2784 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2785 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2786 url = req.get_full_url()
2787 url_escaped = escape_url(url)
2788
2789 # Substitute URL if any change after escaping
2790 if url != url_escaped:
2791 req = update_Request(req, url=url_escaped)
2792
2793 for h, v in std_headers.items():
2794 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2795 # The dict keys are capitalized because of this bug by urllib
2796 if h.capitalize() not in req.headers:
2797 req.add_header(h, v)
2798
2799 req.headers = handle_youtubedl_headers(req.headers)
2800
2801 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2802 # Python 2.6 is brain-dead when it comes to fragments
2803 req._Request__original = req._Request__original.partition('#')[0]
2804 req._Request__r_type = req._Request__r_type.partition('#')[0]
2805
2806 return req
2807
2808 def http_response(self, req, resp):
2809 old_resp = resp
2810 # gzip
2811 if resp.headers.get('Content-encoding', '') == 'gzip':
2812 content = resp.read()
2813 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2814 try:
2815 uncompressed = io.BytesIO(gz.read())
2816 except IOError as original_ioerror:
2817 # There may be junk add the end of the file
2818 # See http://stackoverflow.com/q/4928560/35070 for details
2819 for i in range(1, 1024):
2820 try:
2821 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2822 uncompressed = io.BytesIO(gz.read())
2823 except IOError:
2824 continue
2825 break
2826 else:
2827 raise original_ioerror
2828 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2829 resp.msg = old_resp.msg
2830 del resp.headers['Content-encoding']
2831 # deflate
2832 if resp.headers.get('Content-encoding', '') == 'deflate':
2833 gz = io.BytesIO(self.deflate(resp.read()))
2834 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2835 resp.msg = old_resp.msg
2836 del resp.headers['Content-encoding']
2837 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2838 # https://github.com/ytdl-org/youtube-dl/issues/6457).
2839 if 300 <= resp.code < 400:
2840 location = resp.headers.get('Location')
2841 if location:
2842 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2843 if sys.version_info >= (3, 0):
2844 location = location.encode('iso-8859-1').decode('utf-8')
2845 else:
2846 location = location.decode('utf-8')
2847 location_escaped = escape_url(location)
2848 if location != location_escaped:
2849 del resp.headers['Location']
2850 if sys.version_info < (3, 0):
2851 location_escaped = location_escaped.encode('utf-8')
2852 resp.headers['Location'] = location_escaped
2853 return resp
2854
2855 https_request = http_request
2856 https_response = http_response
2857
2858
2859 def make_socks_conn_class(base_class, socks_proxy):
2860 assert issubclass(base_class, (
2861 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2862
2863 url_components = compat_urlparse.urlparse(socks_proxy)
2864 if url_components.scheme.lower() == 'socks5':
2865 socks_type = ProxyType.SOCKS5
2866 elif url_components.scheme.lower() in ('socks', 'socks4'):
2867 socks_type = ProxyType.SOCKS4
2868 elif url_components.scheme.lower() == 'socks4a':
2869 socks_type = ProxyType.SOCKS4A
2870
2871 def unquote_if_non_empty(s):
2872 if not s:
2873 return s
2874 return compat_urllib_parse_unquote_plus(s)
2875
2876 proxy_args = (
2877 socks_type,
2878 url_components.hostname, url_components.port or 1080,
2879 True, # Remote DNS
2880 unquote_if_non_empty(url_components.username),
2881 unquote_if_non_empty(url_components.password),
2882 )
2883
2884 class SocksConnection(base_class):
2885 def connect(self):
2886 self.sock = sockssocket()
2887 self.sock.setproxy(*proxy_args)
2888 if type(self.timeout) in (int, float):
2889 self.sock.settimeout(self.timeout)
2890 self.sock.connect((self.host, self.port))
2891
2892 if isinstance(self, compat_http_client.HTTPSConnection):
2893 if hasattr(self, '_context'): # Python > 2.6
2894 self.sock = self._context.wrap_socket(
2895 self.sock, server_hostname=self.host)
2896 else:
2897 self.sock = ssl.wrap_socket(self.sock)
2898
2899 return SocksConnection
2900
2901
2902 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2903 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2904 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2905 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2906 self._params = params
2907
2908 def https_open(self, req):
2909 kwargs = {}
2910 conn_class = self._https_conn_class
2911
2912 if hasattr(self, '_context'): # python > 2.6
2913 kwargs['context'] = self._context
2914 if hasattr(self, '_check_hostname'): # python 3.x
2915 kwargs['check_hostname'] = self._check_hostname
2916
2917 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2918 if socks_proxy:
2919 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2920 del req.headers['Ytdl-socks-proxy']
2921
2922 return self.do_open(functools.partial(
2923 _create_http_connection, self, conn_class, True),
2924 req, **kwargs)
2925
2926
2927 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2928 """
2929 See [1] for cookie file format.
2930
2931 1. https://curl.haxx.se/docs/http-cookies.html
2932 """
2933 _HTTPONLY_PREFIX = '#HttpOnly_'
2934 _ENTRY_LEN = 7
2935 _HEADER = '''# Netscape HTTP Cookie File
2936 # This file is generated by yt-dlp. Do not edit.
2937
2938 '''
2939 _CookieFileEntry = collections.namedtuple(
2940 'CookieFileEntry',
2941 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2942
2943 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2944 """
2945 Save cookies to a file.
2946
2947 Most of the code is taken from CPython 3.8 and slightly adapted
2948 to support cookie files with UTF-8 in both python 2 and 3.
2949 """
2950 if filename is None:
2951 if self.filename is not None:
2952 filename = self.filename
2953 else:
2954 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2955
2956 # Store session cookies with `expires` set to 0 instead of an empty
2957 # string
2958 for cookie in self:
2959 if cookie.expires is None:
2960 cookie.expires = 0
2961
2962 with io.open(filename, 'w', encoding='utf-8') as f:
2963 f.write(self._HEADER)
2964 now = time.time()
2965 for cookie in self:
2966 if not ignore_discard and cookie.discard:
2967 continue
2968 if not ignore_expires and cookie.is_expired(now):
2969 continue
2970 if cookie.secure:
2971 secure = 'TRUE'
2972 else:
2973 secure = 'FALSE'
2974 if cookie.domain.startswith('.'):
2975 initial_dot = 'TRUE'
2976 else:
2977 initial_dot = 'FALSE'
2978 if cookie.expires is not None:
2979 expires = compat_str(cookie.expires)
2980 else:
2981 expires = ''
2982 if cookie.value is None:
2983 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2984 # with no name, whereas http.cookiejar regards it as a
2985 # cookie with no value.
2986 name = ''
2987 value = cookie.name
2988 else:
2989 name = cookie.name
2990 value = cookie.value
2991 f.write(
2992 '\t'.join([cookie.domain, initial_dot, cookie.path,
2993 secure, expires, name, value]) + '\n')
2994
2995 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2996 """Load cookies from a file."""
2997 if filename is None:
2998 if self.filename is not None:
2999 filename = self.filename
3000 else:
3001 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
3002
3003 def prepare_line(line):
3004 if line.startswith(self._HTTPONLY_PREFIX):
3005 line = line[len(self._HTTPONLY_PREFIX):]
3006 # comments and empty lines are fine
3007 if line.startswith('#') or not line.strip():
3008 return line
3009 cookie_list = line.split('\t')
3010 if len(cookie_list) != self._ENTRY_LEN:
3011 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
3012 cookie = self._CookieFileEntry(*cookie_list)
3013 if cookie.expires_at and not cookie.expires_at.isdigit():
3014 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
3015 return line
3016
3017 cf = io.StringIO()
3018 with io.open(filename, encoding='utf-8') as f:
3019 for line in f:
3020 try:
3021 cf.write(prepare_line(line))
3022 except compat_cookiejar.LoadError as e:
3023 write_string(
3024 'WARNING: skipping cookie file entry due to %s: %r\n'
3025 % (e, line), sys.stderr)
3026 continue
3027 cf.seek(0)
3028 self._really_load(cf, filename, ignore_discard, ignore_expires)
3029 # Session cookies are denoted by either `expires` field set to
3030 # an empty string or 0. MozillaCookieJar only recognizes the former
3031 # (see [1]). So we need force the latter to be recognized as session
3032 # cookies on our own.
3033 # Session cookies may be important for cookies-based authentication,
3034 # e.g. usually, when user does not check 'Remember me' check box while
3035 # logging in on a site, some important cookies are stored as session
3036 # cookies so that not recognizing them will result in failed login.
3037 # 1. https://bugs.python.org/issue17164
3038 for cookie in self:
3039 # Treat `expires=0` cookies as session cookies
3040 if cookie.expires == 0:
3041 cookie.expires = None
3042 cookie.discard = True
3043
3044
3045 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
3046 def __init__(self, cookiejar=None):
3047 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
3048
3049 def http_response(self, request, response):
3050 # Python 2 will choke on next HTTP request in row if there are non-ASCII
3051 # characters in Set-Cookie HTTP header of last response (see
3052 # https://github.com/ytdl-org/youtube-dl/issues/6769).
3053 # In order to at least prevent crashing we will percent encode Set-Cookie
3054 # header before HTTPCookieProcessor starts processing it.
3055 # if sys.version_info < (3, 0) and response.headers:
3056 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
3057 # set_cookie = response.headers.get(set_cookie_header)
3058 # if set_cookie:
3059 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
3060 # if set_cookie != set_cookie_escaped:
3061 # del response.headers[set_cookie_header]
3062 # response.headers[set_cookie_header] = set_cookie_escaped
3063 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
3064
3065 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
3066 https_response = http_response
3067
3068
3069 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
3070 """YoutubeDL redirect handler
3071
3072 The code is based on HTTPRedirectHandler implementation from CPython [1].
3073
3074 This redirect handler solves two issues:
3075 - ensures redirect URL is always unicode under python 2
3076 - introduces support for experimental HTTP response status code
3077 308 Permanent Redirect [2] used by some sites [3]
3078
3079 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
3080 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
3081 3. https://github.com/ytdl-org/youtube-dl/issues/28768
3082 """
3083
3084 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
3085
3086 def redirect_request(self, req, fp, code, msg, headers, newurl):
3087 """Return a Request or None in response to a redirect.
3088
3089 This is called by the http_error_30x methods when a
3090 redirection response is received. If a redirection should
3091 take place, return a new Request to allow http_error_30x to
3092 perform the redirect. Otherwise, raise HTTPError if no-one
3093 else should try to handle this url. Return None if you can't
3094 but another Handler might.
3095 """
3096 m = req.get_method()
3097 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
3098 or code in (301, 302, 303) and m == "POST")):
3099 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
3100 # Strictly (according to RFC 2616), 301 or 302 in response to
3101 # a POST MUST NOT cause a redirection without confirmation
3102 # from the user (of urllib.request, in this case). In practice,
3103 # essentially all clients do redirect in this case, so we do
3104 # the same.
3105
3106 # On python 2 urlh.geturl() may sometimes return redirect URL
3107 # as byte string instead of unicode. This workaround allows
3108 # to force it always return unicode.
3109 if sys.version_info[0] < 3:
3110 newurl = compat_str(newurl)
3111
3112 # Be conciliant with URIs containing a space. This is mainly
3113 # redundant with the more complete encoding done in http_error_302(),
3114 # but it is kept for compatibility with other callers.
3115 newurl = newurl.replace(' ', '%20')
3116
3117 CONTENT_HEADERS = ("content-length", "content-type")
3118 # NB: don't use dict comprehension for python 2.6 compatibility
3119 newheaders = dict((k, v) for k, v in req.headers.items()
3120 if k.lower() not in CONTENT_HEADERS)
3121 return compat_urllib_request.Request(
3122 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3123 unverifiable=True)
3124
3125
3126 def extract_timezone(date_str):
3127 m = re.search(
3128 r'''(?x)
3129 ^.{8,}? # >=8 char non-TZ prefix, if present
3130 (?P<tz>Z| # just the UTC Z, or
3131 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
3132 (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
3133 [ ]? # optional space
3134 (?P<sign>\+|-) # +/-
3135 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
3136 $)
3137 ''', date_str)
3138 if not m:
3139 timezone = datetime.timedelta()
3140 else:
3141 date_str = date_str[:-len(m.group('tz'))]
3142 if not m.group('sign'):
3143 timezone = datetime.timedelta()
3144 else:
3145 sign = 1 if m.group('sign') == '+' else -1
3146 timezone = datetime.timedelta(
3147 hours=sign * int(m.group('hours')),
3148 minutes=sign * int(m.group('minutes')))
3149 return timezone, date_str
3150
3151
3152 def parse_iso8601(date_str, delimiter='T', timezone=None):
3153 """ Return a UNIX timestamp from the given date """
3154
3155 if date_str is None:
3156 return None
3157
3158 date_str = re.sub(r'\.[0-9]+', '', date_str)
3159
3160 if timezone is None:
3161 timezone, date_str = extract_timezone(date_str)
3162
3163 try:
3164 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3165 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3166 return calendar.timegm(dt.timetuple())
3167 except ValueError:
3168 pass
3169
3170
3171 def date_formats(day_first=True):
3172 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3173
3174
3175 def unified_strdate(date_str, day_first=True):
3176 """Return a string with the date in the format YYYYMMDD"""
3177
3178 if date_str is None:
3179 return None
3180 upload_date = None
3181 # Replace commas
3182 date_str = date_str.replace(',', ' ')
3183 # Remove AM/PM + timezone
3184 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3185 _, date_str = extract_timezone(date_str)
3186
3187 for expression in date_formats(day_first):
3188 try:
3189 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
3190 except ValueError:
3191 pass
3192 if upload_date is None:
3193 timetuple = email.utils.parsedate_tz(date_str)
3194 if timetuple:
3195 try:
3196 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3197 except ValueError:
3198 pass
3199 if upload_date is not None:
3200 return compat_str(upload_date)
3201
3202
3203 def unified_timestamp(date_str, day_first=True):
3204 if date_str is None:
3205 return None
3206
3207 date_str = re.sub(r'[,|]', '', date_str)
3208
3209 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
3210 timezone, date_str = extract_timezone(date_str)
3211
3212 # Remove AM/PM + timezone
3213 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3214
3215 # Remove unrecognized timezones from ISO 8601 alike timestamps
3216 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3217 if m:
3218 date_str = date_str[:-len(m.group('tz'))]
3219
3220 # Python only supports microseconds, so remove nanoseconds
3221 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3222 if m:
3223 date_str = m.group(1)
3224
3225 for expression in date_formats(day_first):
3226 try:
3227 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3228 return calendar.timegm(dt.timetuple())
3229 except ValueError:
3230 pass
3231 timetuple = email.utils.parsedate_tz(date_str)
3232 if timetuple:
3233 return calendar.timegm(timetuple) + pm_delta * 3600
3234
3235
3236 def determine_ext(url, default_ext='unknown_video'):
3237 if url is None or '.' not in url:
3238 return default_ext
3239 guess = url.partition('?')[0].rpartition('.')[2]
3240 if re.match(r'^[A-Za-z0-9]+$', guess):
3241 return guess
3242 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3243 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3244 return guess.rstrip('/')
3245 else:
3246 return default_ext
3247
3248
3249 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3250 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3251
3252
3253 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
3254 """
3255 Return a datetime object from a string in the format YYYYMMDD or
3256 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3257
3258 format: string date format used to return datetime object from
3259 precision: round the time portion of a datetime object.
3260 auto|microsecond|second|minute|hour|day.
3261 auto: round to the unit provided in date_str (if applicable).
3262 """
3263 auto_precision = False
3264 if precision == 'auto':
3265 auto_precision = True
3266 precision = 'microsecond'
3267 today = datetime_round(datetime.datetime.now(), precision)
3268 if date_str in ('now', 'today'):
3269 return today
3270 if date_str == 'yesterday':
3271 return today - datetime.timedelta(days=1)
3272 match = re.match(
3273 r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3274 date_str)
3275 if match is not None:
3276 start_time = datetime_from_str(match.group('start'), precision, format)
3277 time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
3278 unit = match.group('unit')
3279 if unit == 'month' or unit == 'year':
3280 new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
3281 unit = 'day'
3282 else:
3283 if unit == 'week':
3284 unit = 'day'
3285 time *= 7
3286 delta = datetime.timedelta(**{unit + 's': time})
3287 new_date = start_time + delta
3288 if auto_precision:
3289 return datetime_round(new_date, unit)
3290 return new_date
3291
3292 return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3293
3294
3295 def date_from_str(date_str, format='%Y%m%d'):
3296 """
3297 Return a datetime object from a string in the format YYYYMMDD or
3298 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3299
3300 format: string date format used to return datetime object from
3301 """
3302 return datetime_from_str(date_str, precision='microsecond', format=format).date()
3303
3304
3305 def datetime_add_months(dt, months):
3306 """Increment/Decrement a datetime object by months."""
3307 month = dt.month + months - 1
3308 year = dt.year + month // 12
3309 month = month % 12 + 1
3310 day = min(dt.day, calendar.monthrange(year, month)[1])
3311 return dt.replace(year, month, day)
3312
3313
3314 def datetime_round(dt, precision='day'):
3315 """
3316 Round a datetime object's time to a specific precision
3317 """
3318 if precision == 'microsecond':
3319 return dt
3320
3321 unit_seconds = {
3322 'day': 86400,
3323 'hour': 3600,
3324 'minute': 60,
3325 'second': 1,
3326 }
3327 roundto = lambda x, n: ((x + n / 2) // n) * n
3328 timestamp = calendar.timegm(dt.timetuple())
3329 return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
3330
3331
3332 def hyphenate_date(date_str):
3333 """
3334 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3335 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3336 if match is not None:
3337 return '-'.join(match.groups())
3338 else:
3339 return date_str
3340
3341
3342 class DateRange(object):
3343 """Represents a time interval between two dates"""
3344
3345 def __init__(self, start=None, end=None):
3346 """start and end must be strings in the format accepted by date"""
3347 if start is not None:
3348 self.start = date_from_str(start)
3349 else:
3350 self.start = datetime.datetime.min.date()
3351 if end is not None:
3352 self.end = date_from_str(end)
3353 else:
3354 self.end = datetime.datetime.max.date()
3355 if self.start > self.end:
3356 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3357
3358 @classmethod
3359 def day(cls, day):
3360 """Returns a range that only contains the given day"""
3361 return cls(day, day)
3362
3363 def __contains__(self, date):
3364 """Check if the date is in the range"""
3365 if not isinstance(date, datetime.date):
3366 date = date_from_str(date)
3367 return self.start <= date <= self.end
3368
3369 def __str__(self):
3370 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3371
3372
3373 def platform_name():
3374 """ Returns the platform name as a compat_str """
3375 res = platform.platform()
3376 if isinstance(res, bytes):
3377 res = res.decode(preferredencoding())
3378
3379 assert isinstance(res, compat_str)
3380 return res
3381
3382
3383 def get_windows_version():
3384 ''' Get Windows version. None if it's not running on Windows '''
3385 if compat_os_name == 'nt':
3386 return version_tuple(platform.win32_ver()[1])
3387 else:
3388 return None
3389
3390
3391 def _windows_write_string(s, out):
3392 """ Returns True if the string was written using special methods,
3393 False if it has yet to be written out."""
3394 # Adapted from http://stackoverflow.com/a/3259271/35070
3395
3396 import ctypes
3397 import ctypes.wintypes
3398
3399 WIN_OUTPUT_IDS = {
3400 1: -11,
3401 2: -12,
3402 }
3403
3404 try:
3405 fileno = out.fileno()
3406 except AttributeError:
3407 # If the output stream doesn't have a fileno, it's virtual
3408 return False
3409 except io.UnsupportedOperation:
3410 # Some strange Windows pseudo files?
3411 return False
3412 if fileno not in WIN_OUTPUT_IDS:
3413 return False
3414
3415 GetStdHandle = compat_ctypes_WINFUNCTYPE(
3416 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3417 ('GetStdHandle', ctypes.windll.kernel32))
3418 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3419
3420 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3421 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3422 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3423 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3424 written = ctypes.wintypes.DWORD(0)
3425
3426 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3427 FILE_TYPE_CHAR = 0x0002
3428 FILE_TYPE_REMOTE = 0x8000
3429 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3430 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3431 ctypes.POINTER(ctypes.wintypes.DWORD))(
3432 ('GetConsoleMode', ctypes.windll.kernel32))
3433 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3434
3435 def not_a_console(handle):
3436 if handle == INVALID_HANDLE_VALUE or handle is None:
3437 return True
3438 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3439 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3440
3441 if not_a_console(h):
3442 return False
3443
3444 def next_nonbmp_pos(s):
3445 try:
3446 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3447 except StopIteration:
3448 return len(s)
3449
3450 while s:
3451 count = min(next_nonbmp_pos(s), 1024)
3452
3453 ret = WriteConsoleW(
3454 h, s, count if count else 2, ctypes.byref(written), None)
3455 if ret == 0:
3456 raise OSError('Failed to write string')
3457 if not count: # We just wrote a non-BMP character
3458 assert written.value == 2
3459 s = s[1:]
3460 else:
3461 assert written.value > 0
3462 s = s[written.value:]
3463 return True
3464
3465
3466 def write_string(s, out=None, encoding=None):
3467 if out is None:
3468 out = sys.stderr
3469 assert type(s) == compat_str
3470
3471 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3472 if _windows_write_string(s, out):
3473 return
3474
3475 if ('b' in getattr(out, 'mode', '')
3476 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
3477 byt = s.encode(encoding or preferredencoding(), 'ignore')
3478 out.write(byt)
3479 elif hasattr(out, 'buffer'):
3480 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3481 byt = s.encode(enc, 'ignore')
3482 out.buffer.write(byt)
3483 else:
3484 out.write(s)
3485 out.flush()
3486
3487
3488 def bytes_to_intlist(bs):
3489 if not bs:
3490 return []
3491 if isinstance(bs[0], int): # Python 3
3492 return list(bs)
3493 else:
3494 return [ord(c) for c in bs]
3495
3496
3497 def intlist_to_bytes(xs):
3498 if not xs:
3499 return b''
3500 return compat_struct_pack('%dB' % len(xs), *xs)
3501
3502
3503 # Cross-platform file locking
3504 if sys.platform == 'win32':
3505 import ctypes.wintypes
3506 import msvcrt
3507
3508 class OVERLAPPED(ctypes.Structure):
3509 _fields_ = [
3510 ('Internal', ctypes.wintypes.LPVOID),
3511 ('InternalHigh', ctypes.wintypes.LPVOID),
3512 ('Offset', ctypes.wintypes.DWORD),
3513 ('OffsetHigh', ctypes.wintypes.DWORD),
3514 ('hEvent', ctypes.wintypes.HANDLE),
3515 ]
3516
3517 kernel32 = ctypes.windll.kernel32
3518 LockFileEx = kernel32.LockFileEx
3519 LockFileEx.argtypes = [
3520 ctypes.wintypes.HANDLE, # hFile
3521 ctypes.wintypes.DWORD, # dwFlags
3522 ctypes.wintypes.DWORD, # dwReserved
3523 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3524 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3525 ctypes.POINTER(OVERLAPPED) # Overlapped
3526 ]
3527 LockFileEx.restype = ctypes.wintypes.BOOL
3528 UnlockFileEx = kernel32.UnlockFileEx
3529 UnlockFileEx.argtypes = [
3530 ctypes.wintypes.HANDLE, # hFile
3531 ctypes.wintypes.DWORD, # dwReserved
3532 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3533 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3534 ctypes.POINTER(OVERLAPPED) # Overlapped
3535 ]
3536 UnlockFileEx.restype = ctypes.wintypes.BOOL
3537 whole_low = 0xffffffff
3538 whole_high = 0x7fffffff
3539
3540 def _lock_file(f, exclusive):
3541 overlapped = OVERLAPPED()
3542 overlapped.Offset = 0
3543 overlapped.OffsetHigh = 0
3544 overlapped.hEvent = 0
3545 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3546 handle = msvcrt.get_osfhandle(f.fileno())
3547 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3548 whole_low, whole_high, f._lock_file_overlapped_p):
3549 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3550
3551 def _unlock_file(f):
3552 assert f._lock_file_overlapped_p
3553 handle = msvcrt.get_osfhandle(f.fileno())
3554 if not UnlockFileEx(handle, 0,
3555 whole_low, whole_high, f._lock_file_overlapped_p):
3556 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3557
3558 else:
3559 # Some platforms, such as Jython, is missing fcntl
3560 try:
3561 import fcntl
3562
3563 def _lock_file(f, exclusive):
3564 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3565
3566 def _unlock_file(f):
3567 fcntl.flock(f, fcntl.LOCK_UN)
3568 except ImportError:
3569 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3570
3571 def _lock_file(f, exclusive):
3572 raise IOError(UNSUPPORTED_MSG)
3573
3574 def _unlock_file(f):
3575 raise IOError(UNSUPPORTED_MSG)
3576
3577
3578 class locked_file(object):
3579 def __init__(self, filename, mode, encoding=None):
3580 assert mode in ['r', 'a', 'w']
3581 self.f = io.open(filename, mode, encoding=encoding)
3582 self.mode = mode
3583
3584 def __enter__(self):
3585 exclusive = self.mode != 'r'
3586 try:
3587 _lock_file(self.f, exclusive)
3588 except IOError:
3589 self.f.close()
3590 raise
3591 return self
3592
3593 def __exit__(self, etype, value, traceback):
3594 try:
3595 _unlock_file(self.f)
3596 finally:
3597 self.f.close()
3598
3599 def __iter__(self):
3600 return iter(self.f)
3601
3602 def write(self, *args):
3603 return self.f.write(*args)
3604
3605 def read(self, *args):
3606 return self.f.read(*args)
3607
3608
3609 def get_filesystem_encoding():
3610 encoding = sys.getfilesystemencoding()
3611 return encoding if encoding is not None else 'utf-8'
3612
3613
3614 def shell_quote(args):
3615 quoted_args = []
3616 encoding = get_filesystem_encoding()
3617 for a in args:
3618 if isinstance(a, bytes):
3619 # We may get a filename encoded with 'encodeFilename'
3620 a = a.decode(encoding)
3621 quoted_args.append(compat_shlex_quote(a))
3622 return ' '.join(quoted_args)
3623
3624
3625 def smuggle_url(url, data):
3626 """ Pass additional data in a URL for internal use. """
3627
3628 url, idata = unsmuggle_url(url, {})
3629 data.update(idata)
3630 sdata = compat_urllib_parse_urlencode(
3631 {'__youtubedl_smuggle': json.dumps(data)})
3632 return url + '#' + sdata
3633
3634
3635 def unsmuggle_url(smug_url, default=None):
3636 if '#__youtubedl_smuggle' not in smug_url:
3637 return smug_url, default
3638 url, _, sdata = smug_url.rpartition('#')
3639 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3640 data = json.loads(jsond)
3641 return url, data
3642
3643
3644 def format_bytes(bytes):
3645 if bytes is None:
3646 return 'N/A'
3647 if type(bytes) is str:
3648 bytes = float(bytes)
3649 if bytes == 0.0:
3650 exponent = 0
3651 else:
3652 exponent = int(math.log(bytes, 1024.0))
3653 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3654 converted = float(bytes) / float(1024 ** exponent)
3655 return '%.2f%s' % (converted, suffix)
3656
3657
3658 def lookup_unit_table(unit_table, s):
3659 units_re = '|'.join(re.escape(u) for u in unit_table)
3660 m = re.match(
3661 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3662 if not m:
3663 return None
3664 num_str = m.group('num').replace(',', '.')
3665 mult = unit_table[m.group('unit')]
3666 return int(float(num_str) * mult)
3667
3668
3669 def parse_filesize(s):
3670 if s is None:
3671 return None
3672
3673 # The lower-case forms are of course incorrect and unofficial,
3674 # but we support those too
3675 _UNIT_TABLE = {
3676 'B': 1,
3677 'b': 1,
3678 'bytes': 1,
3679 'KiB': 1024,
3680 'KB': 1000,
3681 'kB': 1024,
3682 'Kb': 1000,
3683 'kb': 1000,
3684 'kilobytes': 1000,
3685 'kibibytes': 1024,
3686 'MiB': 1024 ** 2,
3687 'MB': 1000 ** 2,
3688 'mB': 1024 ** 2,
3689 'Mb': 1000 ** 2,
3690 'mb': 1000 ** 2,
3691 'megabytes': 1000 ** 2,
3692 'mebibytes': 1024 ** 2,
3693 'GiB': 1024 ** 3,
3694 'GB': 1000 ** 3,
3695 'gB': 1024 ** 3,
3696 'Gb': 1000 ** 3,
3697 'gb': 1000 ** 3,
3698 'gigabytes': 1000 ** 3,
3699 'gibibytes': 1024 ** 3,
3700 'TiB': 1024 ** 4,
3701 'TB': 1000 ** 4,
3702 'tB': 1024 ** 4,
3703 'Tb': 1000 ** 4,
3704 'tb': 1000 ** 4,
3705 'terabytes': 1000 ** 4,
3706 'tebibytes': 1024 ** 4,
3707 'PiB': 1024 ** 5,
3708 'PB': 1000 ** 5,
3709 'pB': 1024 ** 5,
3710 'Pb': 1000 ** 5,
3711 'pb': 1000 ** 5,
3712 'petabytes': 1000 ** 5,
3713 'pebibytes': 1024 ** 5,
3714 'EiB': 1024 ** 6,
3715 'EB': 1000 ** 6,
3716 'eB': 1024 ** 6,
3717 'Eb': 1000 ** 6,
3718 'eb': 1000 ** 6,
3719 'exabytes': 1000 ** 6,
3720 'exbibytes': 1024 ** 6,
3721 'ZiB': 1024 ** 7,
3722 'ZB': 1000 ** 7,
3723 'zB': 1024 ** 7,
3724 'Zb': 1000 ** 7,
3725 'zb': 1000 ** 7,
3726 'zettabytes': 1000 ** 7,
3727 'zebibytes': 1024 ** 7,
3728 'YiB': 1024 ** 8,
3729 'YB': 1000 ** 8,
3730 'yB': 1024 ** 8,
3731 'Yb': 1000 ** 8,
3732 'yb': 1000 ** 8,
3733 'yottabytes': 1000 ** 8,
3734 'yobibytes': 1024 ** 8,
3735 }
3736
3737 return lookup_unit_table(_UNIT_TABLE, s)
3738
3739
3740 def parse_count(s):
3741 if s is None:
3742 return None
3743
3744 s = s.strip()
3745
3746 if re.match(r'^[\d,.]+$', s):
3747 return str_to_int(s)
3748
3749 _UNIT_TABLE = {
3750 'k': 1000,
3751 'K': 1000,
3752 'm': 1000 ** 2,
3753 'M': 1000 ** 2,
3754 'kk': 1000 ** 2,
3755 'KK': 1000 ** 2,
3756 }
3757
3758 return lookup_unit_table(_UNIT_TABLE, s)
3759
3760
3761 def parse_resolution(s):
3762 if s is None:
3763 return {}
3764
3765 mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
3766 if mobj:
3767 return {
3768 'width': int(mobj.group('w')),
3769 'height': int(mobj.group('h')),
3770 }
3771
3772 mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
3773 if mobj:
3774 return {'height': int(mobj.group(1))}
3775
3776 mobj = re.search(r'\b([48])[kK]\b', s)
3777 if mobj:
3778 return {'height': int(mobj.group(1)) * 540}
3779
3780 return {}
3781
3782
3783 def parse_bitrate(s):
3784 if not isinstance(s, compat_str):
3785 return
3786 mobj = re.search(r'\b(\d+)\s*kbps', s)
3787 if mobj:
3788 return int(mobj.group(1))
3789
3790
3791 def month_by_name(name, lang='en'):
3792 """ Return the number of a month by (locale-independently) English name """
3793
3794 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3795
3796 try:
3797 return month_names.index(name) + 1
3798 except ValueError:
3799 return None
3800
3801
3802 def month_by_abbreviation(abbrev):
3803 """ Return the number of a month by (locale-independently) English
3804 abbreviations """
3805
3806 try:
3807 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3808 except ValueError:
3809 return None
3810
3811
3812 def fix_xml_ampersands(xml_str):
3813 """Replace all the '&' by '&amp;' in XML"""
3814 return re.sub(
3815 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3816 '&amp;',
3817 xml_str)
3818
3819
3820 def setproctitle(title):
3821 assert isinstance(title, compat_str)
3822
3823 # ctypes in Jython is not complete
3824 # http://bugs.jython.org/issue2148
3825 if sys.platform.startswith('java'):
3826 return
3827
3828 try:
3829 libc = ctypes.cdll.LoadLibrary('libc.so.6')
3830 except OSError:
3831 return
3832 except TypeError:
3833 # LoadLibrary in Windows Python 2.7.13 only expects
3834 # a bytestring, but since unicode_literals turns
3835 # every string into a unicode string, it fails.
3836 return
3837 title_bytes = title.encode('utf-8')
3838 buf = ctypes.create_string_buffer(len(title_bytes))
3839 buf.value = title_bytes
3840 try:
3841 libc.prctl(15, buf, 0, 0, 0)
3842 except AttributeError:
3843 return # Strange libc, just skip this
3844
3845
3846 def remove_start(s, start):
3847 return s[len(start):] if s is not None and s.startswith(start) else s
3848
3849
3850 def remove_end(s, end):
3851 return s[:-len(end)] if s is not None and s.endswith(end) else s
3852
3853
3854 def remove_quotes(s):
3855 if s is None or len(s) < 2:
3856 return s
3857 for quote in ('"', "'", ):
3858 if s[0] == quote and s[-1] == quote:
3859 return s[1:-1]
3860 return s
3861
3862
3863 def get_domain(url):
3864 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3865 return domain.group('domain') if domain else None
3866
3867
3868 def url_basename(url):
3869 path = compat_urlparse.urlparse(url).path
3870 return path.strip('/').split('/')[-1]
3871
3872
3873 def base_url(url):
3874 return re.match(r'https?://[^?#&]+/', url).group()
3875
3876
3877 def urljoin(base, path):
3878 if isinstance(path, bytes):
3879 path = path.decode('utf-8')
3880 if not isinstance(path, compat_str) or not path:
3881 return None
3882 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3883 return path
3884 if isinstance(base, bytes):
3885 base = base.decode('utf-8')
3886 if not isinstance(base, compat_str) or not re.match(
3887 r'^(?:https?:)?//', base):
3888 return None
3889 return compat_urlparse.urljoin(base, path)
3890
3891
3892 class HEADRequest(compat_urllib_request.Request):
3893 def get_method(self):
3894 return 'HEAD'
3895
3896
3897 class PUTRequest(compat_urllib_request.Request):
3898 def get_method(self):
3899 return 'PUT'
3900
3901
3902 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3903 if get_attr:
3904 if v is not None:
3905 v = getattr(v, get_attr, None)
3906 if v == '':
3907 v = None
3908 if v is None:
3909 return default
3910 try:
3911 return int(v) * invscale // scale
3912 except (ValueError, TypeError, OverflowError):
3913 return default
3914
3915
3916 def str_or_none(v, default=None):
3917 return default if v is None else compat_str(v)
3918
3919
3920 def str_to_int(int_str):
3921 """ A more relaxed version of int_or_none """
3922 if isinstance(int_str, compat_integer_types):
3923 return int_str
3924 elif isinstance(int_str, compat_str):
3925 int_str = re.sub(r'[,\.\+]', '', int_str)
3926 return int_or_none(int_str)
3927
3928
3929 def float_or_none(v, scale=1, invscale=1, default=None):
3930 if v is None:
3931 return default
3932 try:
3933 return float(v) * invscale / scale
3934 except (ValueError, TypeError):
3935 return default
3936
3937
3938 def bool_or_none(v, default=None):
3939 return v if isinstance(v, bool) else default
3940
3941
3942 def strip_or_none(v, default=None):
3943 return v.strip() if isinstance(v, compat_str) else default
3944
3945
3946 def url_or_none(url):
3947 if not url or not isinstance(url, compat_str):
3948 return None
3949 url = url.strip()
3950 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3951
3952
3953 def strftime_or_none(timestamp, date_format, default=None):
3954 datetime_object = None
3955 try:
3956 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3957 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3958 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3959 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3960 return datetime_object.strftime(date_format)
3961 except (ValueError, TypeError, AttributeError):
3962 return default
3963
3964
3965 def parse_duration(s):
3966 if not isinstance(s, compat_basestring):
3967 return None
3968
3969 s = s.strip()
3970
3971 days, hours, mins, secs, ms = [None] * 5
3972 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3973 if m:
3974 days, hours, mins, secs, ms = m.groups()
3975 else:
3976 m = re.match(
3977 r'''(?ix)(?:P?
3978 (?:
3979 [0-9]+\s*y(?:ears?)?\s*
3980 )?
3981 (?:
3982 [0-9]+\s*m(?:onths?)?\s*
3983 )?
3984 (?:
3985 [0-9]+\s*w(?:eeks?)?\s*
3986 )?
3987 (?:
3988 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3989 )?
3990 T)?
3991 (?:
3992 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3993 )?
3994 (?:
3995 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3996 )?
3997 (?:
3998 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3999 )?Z?$''', s)
4000 if m:
4001 days, hours, mins, secs, ms = m.groups()
4002 else:
4003 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
4004 if m:
4005 hours, mins = m.groups()
4006 else:
4007 return None
4008
4009 duration = 0
4010 if secs:
4011 duration += float(secs)
4012 if mins:
4013 duration += float(mins) * 60
4014 if hours:
4015 duration += float(hours) * 60 * 60
4016 if days:
4017 duration += float(days) * 24 * 60 * 60
4018 if ms:
4019 duration += float(ms)
4020 return duration
4021
4022
4023 def prepend_extension(filename, ext, expected_real_ext=None):
4024 name, real_ext = os.path.splitext(filename)
4025 return (
4026 '{0}.{1}{2}'.format(name, ext, real_ext)
4027 if not expected_real_ext or real_ext[1:] == expected_real_ext
4028 else '{0}.{1}'.format(filename, ext))
4029
4030
4031 def replace_extension(filename, ext, expected_real_ext=None):
4032 name, real_ext = os.path.splitext(filename)
4033 return '{0}.{1}'.format(
4034 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
4035 ext)
4036
4037
4038 def check_executable(exe, args=[]):
4039 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
4040 args can be a list of arguments for a short output (like -version) """
4041 try:
4042 Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate_or_kill()
4043 except OSError:
4044 return False
4045 return exe
4046
4047
4048 def _get_exe_version_output(exe, args):
4049 try:
4050 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
4051 # SIGTTOU if yt-dlp is run in the background.
4052 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
4053 out, _ = Popen(
4054 [encodeArgument(exe)] + args, stdin=subprocess.PIPE,
4055 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate_or_kill()
4056 except OSError:
4057 return False
4058 if isinstance(out, bytes): # Python 2.x
4059 out = out.decode('ascii', 'ignore')
4060 return out
4061
4062
4063 def detect_exe_version(output, version_re=None, unrecognized='present'):
4064 assert isinstance(output, compat_str)
4065 if version_re is None:
4066 version_re = r'version\s+([-0-9._a-zA-Z]+)'
4067 m = re.search(version_re, output)
4068 if m:
4069 return m.group(1)
4070 else:
4071 return unrecognized
4072
4073
4074 def get_exe_version(exe, args=['--version'],
4075 version_re=None, unrecognized='present'):
4076 """ Returns the version of the specified executable,
4077 or False if the executable is not present """
4078 out = _get_exe_version_output(exe, args)
4079 return detect_exe_version(out, version_re, unrecognized) if out else False
4080
4081
4082 class LazyList(collections.abc.Sequence):
4083 ''' Lazy immutable list from an iterable
4084 Note that slices of a LazyList are lists and not LazyList'''
4085
4086 class IndexError(IndexError):
4087 pass
4088
4089 def __init__(self, iterable, *, reverse=False, _cache=None):
4090 self.__iterable = iter(iterable)
4091 self.__cache = [] if _cache is None else _cache
4092 self.__reversed = reverse
4093
4094 def __iter__(self):
4095 if self.__reversed:
4096 # We need to consume the entire iterable to iterate in reverse
4097 yield from self.exhaust()
4098 return
4099 yield from self.__cache
4100 for item in self.__iterable:
4101 self.__cache.append(item)
4102 yield item
4103
4104 def __exhaust(self):
4105 self.__cache.extend(self.__iterable)
4106 # Discard the emptied iterable to make it pickle-able
4107 self.__iterable = []
4108 return self.__cache
4109
4110 def exhaust(self):
4111 ''' Evaluate the entire iterable '''
4112 return self.__exhaust()[::-1 if self.__reversed else 1]
4113
4114 @staticmethod
4115 def __reverse_index(x):
4116 return None if x is None else -(x + 1)
4117
4118 def __getitem__(self, idx):
4119 if isinstance(idx, slice):
4120 if self.__reversed:
4121 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
4122 start, stop, step = idx.start, idx.stop, idx.step or 1
4123 elif isinstance(idx, int):
4124 if self.__reversed:
4125 idx = self.__reverse_index(idx)
4126 start, stop, step = idx, idx, 0
4127 else:
4128 raise TypeError('indices must be integers or slices')
4129 if ((start or 0) < 0 or (stop or 0) < 0
4130 or (start is None and step < 0)
4131 or (stop is None and step > 0)):
4132 # We need to consume the entire iterable to be able to slice from the end
4133 # Obviously, never use this with infinite iterables
4134 self.__exhaust()
4135 try:
4136 return self.__cache[idx]
4137 except IndexError as e:
4138 raise self.IndexError(e) from e
4139 n = max(start or 0, stop or 0) - len(self.__cache) + 1
4140 if n > 0:
4141 self.__cache.extend(itertools.islice(self.__iterable, n))
4142 try:
4143 return self.__cache[idx]
4144 except IndexError as e:
4145 raise self.IndexError(e) from e
4146
4147 def __bool__(self):
4148 try:
4149 self[-1] if self.__reversed else self[0]
4150 except self.IndexError:
4151 return False
4152 return True
4153
4154 def __len__(self):
4155 self.__exhaust()
4156 return len(self.__cache)
4157
4158 def __reversed__(self):
4159 return type(self)(self.__iterable, reverse=not self.__reversed, _cache=self.__cache)
4160
4161 def __copy__(self):
4162 return type(self)(self.__iterable, reverse=self.__reversed, _cache=self.__cache)
4163
4164 def __deepcopy__(self, memo):
4165 # FIXME: This is actually just a shallow copy
4166 id_ = id(self)
4167 memo[id_] = self.__copy__()
4168 return memo[id_]
4169
4170 def __repr__(self):
4171 # repr and str should mimic a list. So we exhaust the iterable
4172 return repr(self.exhaust())
4173
4174 def __str__(self):
4175 return repr(self.exhaust())
4176
4177
4178 class PagedList:
4179
4180 class IndexError(IndexError):
4181 pass
4182
4183 def __len__(self):
4184 # This is only useful for tests
4185 return len(self.getslice())
4186
4187 def __init__(self, pagefunc, pagesize, use_cache=True):
4188 self._pagefunc = pagefunc
4189 self._pagesize = pagesize
4190 self._use_cache = use_cache
4191 self._cache = {}
4192
4193 def getpage(self, pagenum):
4194 page_results = self._cache.get(pagenum)
4195 if page_results is None:
4196 page_results = list(self._pagefunc(pagenum))
4197 if self._use_cache:
4198 self._cache[pagenum] = page_results
4199 return page_results
4200
4201 def getslice(self, start=0, end=None):
4202 return list(self._getslice(start, end))
4203
4204 def _getslice(self, start, end):
4205 raise NotImplementedError('This method must be implemented by subclasses')
4206
4207 def __getitem__(self, idx):
4208 # NOTE: cache must be enabled if this is used
4209 if not isinstance(idx, int) or idx < 0:
4210 raise TypeError('indices must be non-negative integers')
4211 entries = self.getslice(idx, idx + 1)
4212 if not entries:
4213 raise self.IndexError()
4214 return entries[0]
4215
4216
4217 class OnDemandPagedList(PagedList):
4218 def _getslice(self, start, end):
4219 for pagenum in itertools.count(start // self._pagesize):
4220 firstid = pagenum * self._pagesize
4221 nextfirstid = pagenum * self._pagesize + self._pagesize
4222 if start >= nextfirstid:
4223 continue
4224
4225 startv = (
4226 start % self._pagesize
4227 if firstid <= start < nextfirstid
4228 else 0)
4229 endv = (
4230 ((end - 1) % self._pagesize) + 1
4231 if (end is not None and firstid <= end <= nextfirstid)
4232 else None)
4233
4234 page_results = self.getpage(pagenum)
4235 if startv != 0 or endv is not None:
4236 page_results = page_results[startv:endv]
4237 yield from page_results
4238
4239 # A little optimization - if current page is not "full", ie. does
4240 # not contain page_size videos then we can assume that this page
4241 # is the last one - there are no more ids on further pages -
4242 # i.e. no need to query again.
4243 if len(page_results) + startv < self._pagesize:
4244 break
4245
4246 # If we got the whole page, but the next page is not interesting,
4247 # break out early as well
4248 if end == nextfirstid:
4249 break
4250
4251
4252 class InAdvancePagedList(PagedList):
4253 def __init__(self, pagefunc, pagecount, pagesize):
4254 self._pagecount = pagecount
4255 PagedList.__init__(self, pagefunc, pagesize, True)
4256
4257 def _getslice(self, start, end):
4258 start_page = start // self._pagesize
4259 end_page = (
4260 self._pagecount if end is None else (end // self._pagesize + 1))
4261 skip_elems = start - start_page * self._pagesize
4262 only_more = None if end is None else end - start
4263 for pagenum in range(start_page, end_page):
4264 page_results = self.getpage(pagenum)
4265 if skip_elems:
4266 page_results = page_results[skip_elems:]
4267 skip_elems = None
4268 if only_more is not None:
4269 if len(page_results) < only_more:
4270 only_more -= len(page_results)
4271 else:
4272 yield from page_results[:only_more]
4273 break
4274 yield from page_results
4275
4276
4277 def uppercase_escape(s):
4278 unicode_escape = codecs.getdecoder('unicode_escape')
4279 return re.sub(
4280 r'\\U[0-9a-fA-F]{8}',
4281 lambda m: unicode_escape(m.group(0))[0],
4282 s)
4283
4284
4285 def lowercase_escape(s):
4286 unicode_escape = codecs.getdecoder('unicode_escape')
4287 return re.sub(
4288 r'\\u[0-9a-fA-F]{4}',
4289 lambda m: unicode_escape(m.group(0))[0],
4290 s)
4291
4292
4293 def escape_rfc3986(s):
4294 """Escape non-ASCII characters as suggested by RFC 3986"""
4295 if sys.version_info < (3, 0) and isinstance(s, compat_str):
4296 s = s.encode('utf-8')
4297 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
4298
4299
4300 def escape_url(url):
4301 """Escape URL as suggested by RFC 3986"""
4302 url_parsed = compat_urllib_parse_urlparse(url)
4303 return url_parsed._replace(
4304 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
4305 path=escape_rfc3986(url_parsed.path),
4306 params=escape_rfc3986(url_parsed.params),
4307 query=escape_rfc3986(url_parsed.query),
4308 fragment=escape_rfc3986(url_parsed.fragment)
4309 ).geturl()
4310
4311
4312 def parse_qs(url):
4313 return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4314
4315
4316 def read_batch_urls(batch_fd):
4317 def fixup(url):
4318 if not isinstance(url, compat_str):
4319 url = url.decode('utf-8', 'replace')
4320 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4321 for bom in BOM_UTF8:
4322 if url.startswith(bom):
4323 url = url[len(bom):]
4324 url = url.lstrip()
4325 if not url or url.startswith(('#', ';', ']')):
4326 return False
4327 # "#" cannot be stripped out since it is part of the URI
4328 # However, it can be safely stipped out if follwing a whitespace
4329 return re.split(r'\s#', url, 1)[0].rstrip()
4330
4331 with contextlib.closing(batch_fd) as fd:
4332 return [url for url in map(fixup, fd) if url]
4333
4334
4335 def urlencode_postdata(*args, **kargs):
4336 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
4337
4338
4339 def update_url_query(url, query):
4340 if not query:
4341 return url
4342 parsed_url = compat_urlparse.urlparse(url)
4343 qs = compat_parse_qs(parsed_url.query)
4344 qs.update(query)
4345 return compat_urlparse.urlunparse(parsed_url._replace(
4346 query=compat_urllib_parse_urlencode(qs, True)))
4347
4348
4349 def update_Request(req, url=None, data=None, headers={}, query={}):
4350 req_headers = req.headers.copy()
4351 req_headers.update(headers)
4352 req_data = data or req.data
4353 req_url = update_url_query(url or req.get_full_url(), query)
4354 req_get_method = req.get_method()
4355 if req_get_method == 'HEAD':
4356 req_type = HEADRequest
4357 elif req_get_method == 'PUT':
4358 req_type = PUTRequest
4359 else:
4360 req_type = compat_urllib_request.Request
4361 new_req = req_type(
4362 req_url, data=req_data, headers=req_headers,
4363 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4364 if hasattr(req, 'timeout'):
4365 new_req.timeout = req.timeout
4366 return new_req
4367
4368
4369 def _multipart_encode_impl(data, boundary):
4370 content_type = 'multipart/form-data; boundary=%s' % boundary
4371
4372 out = b''
4373 for k, v in data.items():
4374 out += b'--' + boundary.encode('ascii') + b'\r\n'
4375 if isinstance(k, compat_str):
4376 k = k.encode('utf-8')
4377 if isinstance(v, compat_str):
4378 v = v.encode('utf-8')
4379 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4380 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4381 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
4382 if boundary.encode('ascii') in content:
4383 raise ValueError('Boundary overlaps with data')
4384 out += content
4385
4386 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4387
4388 return out, content_type
4389
4390
4391 def multipart_encode(data, boundary=None):
4392 '''
4393 Encode a dict to RFC 7578-compliant form-data
4394
4395 data:
4396 A dict where keys and values can be either Unicode or bytes-like
4397 objects.
4398 boundary:
4399 If specified a Unicode object, it's used as the boundary. Otherwise
4400 a random boundary is generated.
4401
4402 Reference: https://tools.ietf.org/html/rfc7578
4403 '''
4404 has_specified_boundary = boundary is not None
4405
4406 while True:
4407 if boundary is None:
4408 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4409
4410 try:
4411 out, content_type = _multipart_encode_impl(data, boundary)
4412 break
4413 except ValueError:
4414 if has_specified_boundary:
4415 raise
4416 boundary = None
4417
4418 return out, content_type
4419
4420
4421 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4422 if isinstance(key_or_keys, (list, tuple)):
4423 for key in key_or_keys:
4424 if key not in d or d[key] is None or skip_false_values and not d[key]:
4425 continue
4426 return d[key]
4427 return default
4428 return d.get(key_or_keys, default)
4429
4430
4431 def try_get(src, getter, expected_type=None):
4432 for get in variadic(getter):
4433 try:
4434 v = get(src)
4435 except (AttributeError, KeyError, TypeError, IndexError):
4436 pass
4437 else:
4438 if expected_type is None or isinstance(v, expected_type):
4439 return v
4440
4441
4442 def merge_dicts(*dicts):
4443 merged = {}
4444 for a_dict in dicts:
4445 for k, v in a_dict.items():
4446 if v is None:
4447 continue
4448 if (k not in merged
4449 or (isinstance(v, compat_str) and v
4450 and isinstance(merged[k], compat_str)
4451 and not merged[k])):
4452 merged[k] = v
4453 return merged
4454
4455
4456 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4457 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4458
4459
4460 US_RATINGS = {
4461 'G': 0,
4462 'PG': 10,
4463 'PG-13': 13,
4464 'R': 16,
4465 'NC': 18,
4466 }
4467
4468
4469 TV_PARENTAL_GUIDELINES = {
4470 'TV-Y': 0,
4471 'TV-Y7': 7,
4472 'TV-G': 0,
4473 'TV-PG': 0,
4474 'TV-14': 14,
4475 'TV-MA': 17,
4476 }
4477
4478
4479 def parse_age_limit(s):
4480 if type(s) == int:
4481 return s if 0 <= s <= 21 else None
4482 if not isinstance(s, compat_basestring):
4483 return None
4484 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4485 if m:
4486 return int(m.group('age'))
4487 s = s.upper()
4488 if s in US_RATINGS:
4489 return US_RATINGS[s]
4490 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4491 if m:
4492 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4493 return None
4494
4495
4496 def strip_jsonp(code):
4497 return re.sub(
4498 r'''(?sx)^
4499 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4500 (?:\s*&&\s*(?P=func_name))?
4501 \s*\(\s*(?P<callback_data>.*)\);?
4502 \s*?(?://[^\n]*)*$''',
4503 r'\g<callback_data>', code)
4504
4505
4506 def js_to_json(code, vars={}):
4507 # vars is a dict of var, val pairs to substitute
4508 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4509 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4510 INTEGER_TABLE = (
4511 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4512 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4513 )
4514
4515 def fix_kv(m):
4516 v = m.group(0)
4517 if v in ('true', 'false', 'null'):
4518 return v
4519 elif v in ('undefined', 'void 0'):
4520 return 'null'
4521 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4522 return ""
4523
4524 if v[0] in ("'", '"'):
4525 v = re.sub(r'(?s)\\.|"', lambda m: {
4526 '"': '\\"',
4527 "\\'": "'",
4528 '\\\n': '',
4529 '\\x': '\\u00',
4530 }.get(m.group(0), m.group(0)), v[1:-1])
4531 else:
4532 for regex, base in INTEGER_TABLE:
4533 im = re.match(regex, v)
4534 if im:
4535 i = int(im.group(1), base)
4536 return '"%d":' % i if v.endswith(':') else '%d' % i
4537
4538 if v in vars:
4539 return vars[v]
4540
4541 return '"%s"' % v
4542
4543 return re.sub(r'''(?sx)
4544 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4545 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4546 {comment}|,(?={skip}[\]}}])|
4547 void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
4548 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4549 [0-9]+(?={skip}:)|
4550 !+
4551 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4552
4553
4554 def qualities(quality_ids):
4555 """ Get a numeric quality value out of a list of possible values """
4556 def q(qid):
4557 try:
4558 return quality_ids.index(qid)
4559 except ValueError:
4560 return -1
4561 return q
4562
4563
4564 DEFAULT_OUTTMPL = {
4565 'default': '%(title)s [%(id)s].%(ext)s',
4566 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
4567 }
4568 OUTTMPL_TYPES = {
4569 'chapter': None,
4570 'subtitle': None,
4571 'thumbnail': None,
4572 'description': 'description',
4573 'annotation': 'annotations.xml',
4574 'infojson': 'info.json',
4575 'link': None,
4576 'pl_thumbnail': None,
4577 'pl_description': 'description',
4578 'pl_infojson': 'info.json',
4579 }
4580
4581 # As of [1] format syntax is:
4582 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4583 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
4584 STR_FORMAT_RE_TMPL = r'''(?x)
4585 (?<!%)(?P<prefix>(?:%%)*)
4586 %
4587 (?P<has_key>\((?P<key>{0})\))?
4588 (?P<format>
4589 (?P<conversion>[#0\-+ ]+)?
4590 (?P<min_width>\d+)?
4591 (?P<precision>\.\d+)?
4592 (?P<len_mod>[hlL])? # unused in python
4593 {1} # conversion type
4594 )
4595 '''
4596
4597
4598 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
4599
4600
4601 def limit_length(s, length):
4602 """ Add ellipses to overly long strings """
4603 if s is None:
4604 return None
4605 ELLIPSES = '...'
4606 if len(s) > length:
4607 return s[:length - len(ELLIPSES)] + ELLIPSES
4608 return s
4609
4610
4611 def version_tuple(v):
4612 return tuple(int(e) for e in re.split(r'[-.]', v))
4613
4614
4615 def is_outdated_version(version, limit, assume_new=True):
4616 if not version:
4617 return not assume_new
4618 try:
4619 return version_tuple(version) < version_tuple(limit)
4620 except ValueError:
4621 return not assume_new
4622
4623
4624 def ytdl_is_updateable():
4625 """ Returns if yt-dlp can be updated with -U """
4626
4627 from .update import is_non_updateable
4628
4629 return not is_non_updateable()
4630
4631
4632 def args_to_str(args):
4633 # Get a short string representation for a subprocess command
4634 return ' '.join(compat_shlex_quote(a) for a in args)
4635
4636
4637 def error_to_compat_str(err):
4638 err_str = str(err)
4639 # On python 2 error byte string must be decoded with proper
4640 # encoding rather than ascii
4641 if sys.version_info[0] < 3:
4642 err_str = err_str.decode(preferredencoding())
4643 return err_str
4644
4645
4646 def mimetype2ext(mt):
4647 if mt is None:
4648 return None
4649
4650 mt, _, params = mt.partition(';')
4651 mt = mt.strip()
4652
4653 FULL_MAP = {
4654 'audio/mp4': 'm4a',
4655 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4656 # it's the most popular one
4657 'audio/mpeg': 'mp3',
4658 'audio/x-wav': 'wav',
4659 'audio/wav': 'wav',
4660 'audio/wave': 'wav',
4661 }
4662
4663 ext = FULL_MAP.get(mt)
4664 if ext is not None:
4665 return ext
4666
4667 SUBTYPE_MAP = {
4668 '3gpp': '3gp',
4669 'smptett+xml': 'tt',
4670 'ttaf+xml': 'dfxp',
4671 'ttml+xml': 'ttml',
4672 'x-flv': 'flv',
4673 'x-mp4-fragmented': 'mp4',
4674 'x-ms-sami': 'sami',
4675 'x-ms-wmv': 'wmv',
4676 'mpegurl': 'm3u8',
4677 'x-mpegurl': 'm3u8',
4678 'vnd.apple.mpegurl': 'm3u8',
4679 'dash+xml': 'mpd',
4680 'f4m+xml': 'f4m',
4681 'hds+xml': 'f4m',
4682 'vnd.ms-sstr+xml': 'ism',
4683 'quicktime': 'mov',
4684 'mp2t': 'ts',
4685 'x-wav': 'wav',
4686 'filmstrip+json': 'fs',
4687 'svg+xml': 'svg',
4688 }
4689
4690 _, _, subtype = mt.rpartition('/')
4691 ext = SUBTYPE_MAP.get(subtype.lower())
4692 if ext is not None:
4693 return ext
4694
4695 SUFFIX_MAP = {
4696 'json': 'json',
4697 'xml': 'xml',
4698 'zip': 'zip',
4699 'gzip': 'gz',
4700 }
4701
4702 _, _, suffix = subtype.partition('+')
4703 ext = SUFFIX_MAP.get(suffix)
4704 if ext is not None:
4705 return ext
4706
4707 return subtype.replace('+', '.')
4708
4709
4710 def parse_codecs(codecs_str):
4711 # http://tools.ietf.org/html/rfc6381
4712 if not codecs_str:
4713 return {}
4714 split_codecs = list(filter(None, map(
4715 str.strip, codecs_str.strip().strip(',').split(','))))
4716 vcodec, acodec, hdr = None, None, None
4717 for full_codec in split_codecs:
4718 parts = full_codec.split('.')
4719 codec = parts[0].replace('0', '')
4720 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
4721 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
4722 if not vcodec:
4723 vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1') else full_codec
4724 if codec in ('dvh1', 'dvhe'):
4725 hdr = 'DV'
4726 elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
4727 hdr = 'HDR10'
4728 elif full_codec.replace('0', '').startswith('vp9.2'):
4729 hdr = 'HDR10'
4730 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4731 if not acodec:
4732 acodec = full_codec
4733 else:
4734 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4735 if not vcodec and not acodec:
4736 if len(split_codecs) == 2:
4737 return {
4738 'vcodec': split_codecs[0],
4739 'acodec': split_codecs[1],
4740 }
4741 else:
4742 return {
4743 'vcodec': vcodec or 'none',
4744 'acodec': acodec or 'none',
4745 'dynamic_range': hdr,
4746 }
4747 return {}
4748
4749
4750 def urlhandle_detect_ext(url_handle):
4751 getheader = url_handle.headers.get
4752
4753 cd = getheader('Content-Disposition')
4754 if cd:
4755 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4756 if m:
4757 e = determine_ext(m.group('filename'), default_ext=None)
4758 if e:
4759 return e
4760
4761 return mimetype2ext(getheader('Content-Type'))
4762
4763
4764 def encode_data_uri(data, mime_type):
4765 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4766
4767
4768 def age_restricted(content_limit, age_limit):
4769 """ Returns True iff the content should be blocked """
4770
4771 if age_limit is None: # No limit set
4772 return False
4773 if content_limit is None:
4774 return False # Content available for everyone
4775 return age_limit < content_limit
4776
4777
4778 def is_html(first_bytes):
4779 """ Detect whether a file contains HTML by examining its first bytes. """
4780
4781 BOMS = [
4782 (b'\xef\xbb\xbf', 'utf-8'),
4783 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4784 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4785 (b'\xff\xfe', 'utf-16-le'),
4786 (b'\xfe\xff', 'utf-16-be'),
4787 ]
4788 for bom, enc in BOMS:
4789 if first_bytes.startswith(bom):
4790 s = first_bytes[len(bom):].decode(enc, 'replace')
4791 break
4792 else:
4793 s = first_bytes.decode('utf-8', 'replace')
4794
4795 return re.match(r'^\s*<', s)
4796
4797
4798 def determine_protocol(info_dict):
4799 protocol = info_dict.get('protocol')
4800 if protocol is not None:
4801 return protocol
4802
4803 url = sanitize_url(info_dict['url'])
4804 if url.startswith('rtmp'):
4805 return 'rtmp'
4806 elif url.startswith('mms'):
4807 return 'mms'
4808 elif url.startswith('rtsp'):
4809 return 'rtsp'
4810
4811 ext = determine_ext(url)
4812 if ext == 'm3u8':
4813 return 'm3u8'
4814 elif ext == 'f4m':
4815 return 'f4m'
4816
4817 return compat_urllib_parse_urlparse(url).scheme
4818
4819
4820 def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
4821 """ Render a list of rows, each as a list of values.
4822 Text after a \t will be right aligned """
4823 def width(string):
4824 return len(remove_terminal_sequences(string).replace('\t', ''))
4825
4826 def get_max_lens(table):
4827 return [max(width(str(v)) for v in col) for col in zip(*table)]
4828
4829 def filter_using_list(row, filterArray):
4830 return [col for (take, col) in zip(filterArray, row) if take]
4831
4832 if hide_empty:
4833 max_lens = get_max_lens(data)
4834 header_row = filter_using_list(header_row, max_lens)
4835 data = [filter_using_list(row, max_lens) for row in data]
4836
4837 table = [header_row] + data
4838 max_lens = get_max_lens(table)
4839 extra_gap += 1
4840 if delim:
4841 table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
4842 table[1][-1] = table[1][-1][:-extra_gap] # Remove extra_gap from end of delimiter
4843 for row in table:
4844 for pos, text in enumerate(map(str, row)):
4845 if '\t' in text:
4846 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
4847 else:
4848 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
4849 ret = '\n'.join(''.join(row).rstrip() for row in table)
4850 return ret
4851
4852
4853 def _match_one(filter_part, dct, incomplete):
4854 # TODO: Generalize code with YoutubeDL._build_format_filter
4855 STRING_OPERATORS = {
4856 '*=': operator.contains,
4857 '^=': lambda attr, value: attr.startswith(value),
4858 '$=': lambda attr, value: attr.endswith(value),
4859 '~=': lambda attr, value: re.search(value, attr),
4860 }
4861 COMPARISON_OPERATORS = {
4862 **STRING_OPERATORS,
4863 '<=': operator.le, # "<=" must be defined above "<"
4864 '<': operator.lt,
4865 '>=': operator.ge,
4866 '>': operator.gt,
4867 '=': operator.eq,
4868 }
4869
4870 operator_rex = re.compile(r'''(?x)\s*
4871 (?P<key>[a-z_]+)
4872 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4873 (?:
4874 (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
4875 (?P<strval>.+?)
4876 )
4877 \s*$
4878 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4879 m = operator_rex.search(filter_part)
4880 if m:
4881 m = m.groupdict()
4882 unnegated_op = COMPARISON_OPERATORS[m['op']]
4883 if m['negation']:
4884 op = lambda attr, value: not unnegated_op(attr, value)
4885 else:
4886 op = unnegated_op
4887 comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
4888 if m['quote']:
4889 comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
4890 actual_value = dct.get(m['key'])
4891 numeric_comparison = None
4892 if isinstance(actual_value, compat_numeric_types):
4893 # If the original field is a string and matching comparisonvalue is
4894 # a number we should respect the origin of the original field
4895 # and process comparison value as a string (see
4896 # https://github.com/ytdl-org/youtube-dl/issues/11082)
4897 try:
4898 numeric_comparison = int(comparison_value)
4899 except ValueError:
4900 numeric_comparison = parse_filesize(comparison_value)
4901 if numeric_comparison is None:
4902 numeric_comparison = parse_filesize(f'{comparison_value}B')
4903 if numeric_comparison is None:
4904 numeric_comparison = parse_duration(comparison_value)
4905 if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
4906 raise ValueError('Operator %s only supports string values!' % m['op'])
4907 if actual_value is None:
4908 return incomplete or m['none_inclusive']
4909 return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
4910
4911 UNARY_OPERATORS = {
4912 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4913 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4914 }
4915 operator_rex = re.compile(r'''(?x)\s*
4916 (?P<op>%s)\s*(?P<key>[a-z_]+)
4917 \s*$
4918 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4919 m = operator_rex.search(filter_part)
4920 if m:
4921 op = UNARY_OPERATORS[m.group('op')]
4922 actual_value = dct.get(m.group('key'))
4923 if incomplete and actual_value is None:
4924 return True
4925 return op(actual_value)
4926
4927 raise ValueError('Invalid filter part %r' % filter_part)
4928
4929
4930 def match_str(filter_str, dct, incomplete=False):
4931 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
4932 When incomplete, all conditions passes on missing fields
4933 """
4934 return all(
4935 _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
4936 for filter_part in re.split(r'(?<!\\)&', filter_str))
4937
4938
4939 def match_filter_func(filter_str):
4940 def _match_func(info_dict, *args, **kwargs):
4941 if match_str(filter_str, info_dict, *args, **kwargs):
4942 return None
4943 else:
4944 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4945 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4946 return _match_func
4947
4948
4949 def parse_dfxp_time_expr(time_expr):
4950 if not time_expr:
4951 return
4952
4953 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4954 if mobj:
4955 return float(mobj.group('time_offset'))
4956
4957 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4958 if mobj:
4959 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4960
4961
4962 def srt_subtitles_timecode(seconds):
4963 return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
4964
4965
4966 def ass_subtitles_timecode(seconds):
4967 time = timetuple_from_msec(seconds * 1000)
4968 return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
4969
4970
4971 def dfxp2srt(dfxp_data):
4972 '''
4973 @param dfxp_data A bytes-like object containing DFXP data
4974 @returns A unicode object containing converted SRT data
4975 '''
4976 LEGACY_NAMESPACES = (
4977 (b'http://www.w3.org/ns/ttml', [
4978 b'http://www.w3.org/2004/11/ttaf1',
4979 b'http://www.w3.org/2006/04/ttaf1',
4980 b'http://www.w3.org/2006/10/ttaf1',
4981 ]),
4982 (b'http://www.w3.org/ns/ttml#styling', [
4983 b'http://www.w3.org/ns/ttml#style',
4984 ]),
4985 )
4986
4987 SUPPORTED_STYLING = [
4988 'color',
4989 'fontFamily',
4990 'fontSize',
4991 'fontStyle',
4992 'fontWeight',
4993 'textDecoration'
4994 ]
4995
4996 _x = functools.partial(xpath_with_ns, ns_map={
4997 'xml': 'http://www.w3.org/XML/1998/namespace',
4998 'ttml': 'http://www.w3.org/ns/ttml',
4999 'tts': 'http://www.w3.org/ns/ttml#styling',
5000 })
5001
5002 styles = {}
5003 default_style = {}
5004
5005 class TTMLPElementParser(object):
5006 _out = ''
5007 _unclosed_elements = []
5008 _applied_styles = []
5009
5010 def start(self, tag, attrib):
5011 if tag in (_x('ttml:br'), 'br'):
5012 self._out += '\n'
5013 else:
5014 unclosed_elements = []
5015 style = {}
5016 element_style_id = attrib.get('style')
5017 if default_style:
5018 style.update(default_style)
5019 if element_style_id:
5020 style.update(styles.get(element_style_id, {}))
5021 for prop in SUPPORTED_STYLING:
5022 prop_val = attrib.get(_x('tts:' + prop))
5023 if prop_val:
5024 style[prop] = prop_val
5025 if style:
5026 font = ''
5027 for k, v in sorted(style.items()):
5028 if self._applied_styles and self._applied_styles[-1].get(k) == v:
5029 continue
5030 if k == 'color':
5031 font += ' color="%s"' % v
5032 elif k == 'fontSize':
5033 font += ' size="%s"' % v
5034 elif k == 'fontFamily':
5035 font += ' face="%s"' % v
5036 elif k == 'fontWeight' and v == 'bold':
5037 self._out += '<b>'
5038 unclosed_elements.append('b')
5039 elif k == 'fontStyle' and v == 'italic':
5040 self._out += '<i>'
5041 unclosed_elements.append('i')
5042 elif k == 'textDecoration' and v == 'underline':
5043 self._out += '<u>'
5044 unclosed_elements.append('u')
5045 if font:
5046 self._out += '<font' + font + '>'
5047 unclosed_elements.append('font')
5048 applied_style = {}
5049 if self._applied_styles:
5050 applied_style.update(self._applied_styles[-1])
5051 applied_style.update(style)
5052 self._applied_styles.append(applied_style)
5053 self._unclosed_elements.append(unclosed_elements)
5054
5055 def end(self, tag):
5056 if tag not in (_x('ttml:br'), 'br'):
5057 unclosed_elements = self._unclosed_elements.pop()
5058 for element in reversed(unclosed_elements):
5059 self._out += '</%s>' % element
5060 if unclosed_elements and self._applied_styles:
5061 self._applied_styles.pop()
5062
5063 def data(self, data):
5064 self._out += data
5065
5066 def close(self):
5067 return self._out.strip()
5068
5069 def parse_node(node):
5070 target = TTMLPElementParser()
5071 parser = xml.etree.ElementTree.XMLParser(target=target)
5072 parser.feed(xml.etree.ElementTree.tostring(node))
5073 return parser.close()
5074
5075 for k, v in LEGACY_NAMESPACES:
5076 for ns in v:
5077 dfxp_data = dfxp_data.replace(ns, k)
5078
5079 dfxp = compat_etree_fromstring(dfxp_data)
5080 out = []
5081 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
5082
5083 if not paras:
5084 raise ValueError('Invalid dfxp/TTML subtitle')
5085
5086 repeat = False
5087 while True:
5088 for style in dfxp.findall(_x('.//ttml:style')):
5089 style_id = style.get('id') or style.get(_x('xml:id'))
5090 if not style_id:
5091 continue
5092 parent_style_id = style.get('style')
5093 if parent_style_id:
5094 if parent_style_id not in styles:
5095 repeat = True
5096 continue
5097 styles[style_id] = styles[parent_style_id].copy()
5098 for prop in SUPPORTED_STYLING:
5099 prop_val = style.get(_x('tts:' + prop))
5100 if prop_val:
5101 styles.setdefault(style_id, {})[prop] = prop_val
5102 if repeat:
5103 repeat = False
5104 else:
5105 break
5106
5107 for p in ('body', 'div'):
5108 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
5109 if ele is None:
5110 continue
5111 style = styles.get(ele.get('style'))
5112 if not style:
5113 continue
5114 default_style.update(style)
5115
5116 for para, index in zip(paras, itertools.count(1)):
5117 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
5118 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
5119 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
5120 if begin_time is None:
5121 continue
5122 if not end_time:
5123 if not dur:
5124 continue
5125 end_time = begin_time + dur
5126 out.append('%d\n%s --> %s\n%s\n\n' % (
5127 index,
5128 srt_subtitles_timecode(begin_time),
5129 srt_subtitles_timecode(end_time),
5130 parse_node(para)))
5131
5132 return ''.join(out)
5133
5134
5135 def cli_option(params, command_option, param):
5136 param = params.get(param)
5137 if param:
5138 param = compat_str(param)
5139 return [command_option, param] if param is not None else []
5140
5141
5142 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
5143 param = params.get(param)
5144 if param is None:
5145 return []
5146 assert isinstance(param, bool)
5147 if separator:
5148 return [command_option + separator + (true_value if param else false_value)]
5149 return [command_option, true_value if param else false_value]
5150
5151
5152 def cli_valueless_option(params, command_option, param, expected_value=True):
5153 param = params.get(param)
5154 return [command_option] if param == expected_value else []
5155
5156
5157 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
5158 if isinstance(argdict, (list, tuple)): # for backward compatibility
5159 if use_compat:
5160 return argdict
5161 else:
5162 argdict = None
5163 if argdict is None:
5164 return default
5165 assert isinstance(argdict, dict)
5166
5167 assert isinstance(keys, (list, tuple))
5168 for key_list in keys:
5169 arg_list = list(filter(
5170 lambda x: x is not None,
5171 [argdict.get(key.lower()) for key in variadic(key_list)]))
5172 if arg_list:
5173 return [arg for args in arg_list for arg in args]
5174 return default
5175
5176
5177 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
5178 main_key, exe = main_key.lower(), exe.lower()
5179 root_key = exe if main_key == exe else f'{main_key}+{exe}'
5180 keys = [f'{root_key}{k}' for k in (keys or [''])]
5181 if root_key in keys:
5182 if main_key != exe:
5183 keys.append((main_key, exe))
5184 keys.append('default')
5185 else:
5186 use_compat = False
5187 return cli_configuration_args(argdict, keys, default, use_compat)
5188
5189
5190 class ISO639Utils(object):
5191 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
5192 _lang_map = {
5193 'aa': 'aar',
5194 'ab': 'abk',
5195 'ae': 'ave',
5196 'af': 'afr',
5197 'ak': 'aka',
5198 'am': 'amh',
5199 'an': 'arg',
5200 'ar': 'ara',
5201 'as': 'asm',
5202 'av': 'ava',
5203 'ay': 'aym',
5204 'az': 'aze',
5205 'ba': 'bak',
5206 'be': 'bel',
5207 'bg': 'bul',
5208 'bh': 'bih',
5209 'bi': 'bis',
5210 'bm': 'bam',
5211 'bn': 'ben',
5212 'bo': 'bod',
5213 'br': 'bre',
5214 'bs': 'bos',
5215 'ca': 'cat',
5216 'ce': 'che',
5217 'ch': 'cha',
5218 'co': 'cos',
5219 'cr': 'cre',
5220 'cs': 'ces',
5221 'cu': 'chu',
5222 'cv': 'chv',
5223 'cy': 'cym',
5224 'da': 'dan',
5225 'de': 'deu',
5226 'dv': 'div',
5227 'dz': 'dzo',
5228 'ee': 'ewe',
5229 'el': 'ell',
5230 'en': 'eng',
5231 'eo': 'epo',
5232 'es': 'spa',
5233 'et': 'est',
5234 'eu': 'eus',
5235 'fa': 'fas',
5236 'ff': 'ful',
5237 'fi': 'fin',
5238 'fj': 'fij',
5239 'fo': 'fao',
5240 'fr': 'fra',
5241 'fy': 'fry',
5242 'ga': 'gle',
5243 'gd': 'gla',
5244 'gl': 'glg',
5245 'gn': 'grn',
5246 'gu': 'guj',
5247 'gv': 'glv',
5248 'ha': 'hau',
5249 'he': 'heb',
5250 'iw': 'heb', # Replaced by he in 1989 revision
5251 'hi': 'hin',
5252 'ho': 'hmo',
5253 'hr': 'hrv',
5254 'ht': 'hat',
5255 'hu': 'hun',
5256 'hy': 'hye',
5257 'hz': 'her',
5258 'ia': 'ina',
5259 'id': 'ind',
5260 'in': 'ind', # Replaced by id in 1989 revision
5261 'ie': 'ile',
5262 'ig': 'ibo',
5263 'ii': 'iii',
5264 'ik': 'ipk',
5265 'io': 'ido',
5266 'is': 'isl',
5267 'it': 'ita',
5268 'iu': 'iku',
5269 'ja': 'jpn',
5270 'jv': 'jav',
5271 'ka': 'kat',
5272 'kg': 'kon',
5273 'ki': 'kik',
5274 'kj': 'kua',
5275 'kk': 'kaz',
5276 'kl': 'kal',
5277 'km': 'khm',
5278 'kn': 'kan',
5279 'ko': 'kor',
5280 'kr': 'kau',
5281 'ks': 'kas',
5282 'ku': 'kur',
5283 'kv': 'kom',
5284 'kw': 'cor',
5285 'ky': 'kir',
5286 'la': 'lat',
5287 'lb': 'ltz',
5288 'lg': 'lug',
5289 'li': 'lim',
5290 'ln': 'lin',
5291 'lo': 'lao',
5292 'lt': 'lit',
5293 'lu': 'lub',
5294 'lv': 'lav',
5295 'mg': 'mlg',
5296 'mh': 'mah',
5297 'mi': 'mri',
5298 'mk': 'mkd',
5299 'ml': 'mal',
5300 'mn': 'mon',
5301 'mr': 'mar',
5302 'ms': 'msa',
5303 'mt': 'mlt',
5304 'my': 'mya',
5305 'na': 'nau',
5306 'nb': 'nob',
5307 'nd': 'nde',
5308 'ne': 'nep',
5309 'ng': 'ndo',
5310 'nl': 'nld',
5311 'nn': 'nno',
5312 'no': 'nor',
5313 'nr': 'nbl',
5314 'nv': 'nav',
5315 'ny': 'nya',
5316 'oc': 'oci',
5317 'oj': 'oji',
5318 'om': 'orm',
5319 'or': 'ori',
5320 'os': 'oss',
5321 'pa': 'pan',
5322 'pi': 'pli',
5323 'pl': 'pol',
5324 'ps': 'pus',
5325 'pt': 'por',
5326 'qu': 'que',
5327 'rm': 'roh',
5328 'rn': 'run',
5329 'ro': 'ron',
5330 'ru': 'rus',
5331 'rw': 'kin',
5332 'sa': 'san',
5333 'sc': 'srd',
5334 'sd': 'snd',
5335 'se': 'sme',
5336 'sg': 'sag',
5337 'si': 'sin',
5338 'sk': 'slk',
5339 'sl': 'slv',
5340 'sm': 'smo',
5341 'sn': 'sna',
5342 'so': 'som',
5343 'sq': 'sqi',
5344 'sr': 'srp',
5345 'ss': 'ssw',
5346 'st': 'sot',
5347 'su': 'sun',
5348 'sv': 'swe',
5349 'sw': 'swa',
5350 'ta': 'tam',
5351 'te': 'tel',
5352 'tg': 'tgk',
5353 'th': 'tha',
5354 'ti': 'tir',
5355 'tk': 'tuk',
5356 'tl': 'tgl',
5357 'tn': 'tsn',
5358 'to': 'ton',
5359 'tr': 'tur',
5360 'ts': 'tso',
5361 'tt': 'tat',
5362 'tw': 'twi',
5363 'ty': 'tah',
5364 'ug': 'uig',
5365 'uk': 'ukr',
5366 'ur': 'urd',
5367 'uz': 'uzb',
5368 've': 'ven',
5369 'vi': 'vie',
5370 'vo': 'vol',
5371 'wa': 'wln',
5372 'wo': 'wol',
5373 'xh': 'xho',
5374 'yi': 'yid',
5375 'ji': 'yid', # Replaced by yi in 1989 revision
5376 'yo': 'yor',
5377 'za': 'zha',
5378 'zh': 'zho',
5379 'zu': 'zul',
5380 }
5381
5382 @classmethod
5383 def short2long(cls, code):
5384 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5385 return cls._lang_map.get(code[:2])
5386
5387 @classmethod
5388 def long2short(cls, code):
5389 """Convert language code from ISO 639-2/T to ISO 639-1"""
5390 for short_name, long_name in cls._lang_map.items():
5391 if long_name == code:
5392 return short_name
5393
5394
5395 class ISO3166Utils(object):
5396 # From http://data.okfn.org/data/core/country-list
5397 _country_map = {
5398 'AF': 'Afghanistan',
5399 'AX': 'Åland Islands',
5400 'AL': 'Albania',
5401 'DZ': 'Algeria',
5402 'AS': 'American Samoa',
5403 'AD': 'Andorra',
5404 'AO': 'Angola',
5405 'AI': 'Anguilla',
5406 'AQ': 'Antarctica',
5407 'AG': 'Antigua and Barbuda',
5408 'AR': 'Argentina',
5409 'AM': 'Armenia',
5410 'AW': 'Aruba',
5411 'AU': 'Australia',
5412 'AT': 'Austria',
5413 'AZ': 'Azerbaijan',
5414 'BS': 'Bahamas',
5415 'BH': 'Bahrain',
5416 'BD': 'Bangladesh',
5417 'BB': 'Barbados',
5418 'BY': 'Belarus',
5419 'BE': 'Belgium',
5420 'BZ': 'Belize',
5421 'BJ': 'Benin',
5422 'BM': 'Bermuda',
5423 'BT': 'Bhutan',
5424 'BO': 'Bolivia, Plurinational State of',
5425 'BQ': 'Bonaire, Sint Eustatius and Saba',
5426 'BA': 'Bosnia and Herzegovina',
5427 'BW': 'Botswana',
5428 'BV': 'Bouvet Island',
5429 'BR': 'Brazil',
5430 'IO': 'British Indian Ocean Territory',
5431 'BN': 'Brunei Darussalam',
5432 'BG': 'Bulgaria',
5433 'BF': 'Burkina Faso',
5434 'BI': 'Burundi',
5435 'KH': 'Cambodia',
5436 'CM': 'Cameroon',
5437 'CA': 'Canada',
5438 'CV': 'Cape Verde',
5439 'KY': 'Cayman Islands',
5440 'CF': 'Central African Republic',
5441 'TD': 'Chad',
5442 'CL': 'Chile',
5443 'CN': 'China',
5444 'CX': 'Christmas Island',
5445 'CC': 'Cocos (Keeling) Islands',
5446 'CO': 'Colombia',
5447 'KM': 'Comoros',
5448 'CG': 'Congo',
5449 'CD': 'Congo, the Democratic Republic of the',
5450 'CK': 'Cook Islands',
5451 'CR': 'Costa Rica',
5452 'CI': 'Côte d\'Ivoire',
5453 'HR': 'Croatia',
5454 'CU': 'Cuba',
5455 'CW': 'Curaçao',
5456 'CY': 'Cyprus',
5457 'CZ': 'Czech Republic',
5458 'DK': 'Denmark',
5459 'DJ': 'Djibouti',
5460 'DM': 'Dominica',
5461 'DO': 'Dominican Republic',
5462 'EC': 'Ecuador',
5463 'EG': 'Egypt',
5464 'SV': 'El Salvador',
5465 'GQ': 'Equatorial Guinea',
5466 'ER': 'Eritrea',
5467 'EE': 'Estonia',
5468 'ET': 'Ethiopia',
5469 'FK': 'Falkland Islands (Malvinas)',
5470 'FO': 'Faroe Islands',
5471 'FJ': 'Fiji',
5472 'FI': 'Finland',
5473 'FR': 'France',
5474 'GF': 'French Guiana',
5475 'PF': 'French Polynesia',
5476 'TF': 'French Southern Territories',
5477 'GA': 'Gabon',
5478 'GM': 'Gambia',
5479 'GE': 'Georgia',
5480 'DE': 'Germany',
5481 'GH': 'Ghana',
5482 'GI': 'Gibraltar',
5483 'GR': 'Greece',
5484 'GL': 'Greenland',
5485 'GD': 'Grenada',
5486 'GP': 'Guadeloupe',
5487 'GU': 'Guam',
5488 'GT': 'Guatemala',
5489 'GG': 'Guernsey',
5490 'GN': 'Guinea',
5491 'GW': 'Guinea-Bissau',
5492 'GY': 'Guyana',
5493 'HT': 'Haiti',
5494 'HM': 'Heard Island and McDonald Islands',
5495 'VA': 'Holy See (Vatican City State)',
5496 'HN': 'Honduras',
5497 'HK': 'Hong Kong',
5498 'HU': 'Hungary',
5499 'IS': 'Iceland',
5500 'IN': 'India',
5501 'ID': 'Indonesia',
5502 'IR': 'Iran, Islamic Republic of',
5503 'IQ': 'Iraq',
5504 'IE': 'Ireland',
5505 'IM': 'Isle of Man',
5506 'IL': 'Israel',
5507 'IT': 'Italy',
5508 'JM': 'Jamaica',
5509 'JP': 'Japan',
5510 'JE': 'Jersey',
5511 'JO': 'Jordan',
5512 'KZ': 'Kazakhstan',
5513 'KE': 'Kenya',
5514 'KI': 'Kiribati',
5515 'KP': 'Korea, Democratic People\'s Republic of',
5516 'KR': 'Korea, Republic of',
5517 'KW': 'Kuwait',
5518 'KG': 'Kyrgyzstan',
5519 'LA': 'Lao People\'s Democratic Republic',
5520 'LV': 'Latvia',
5521 'LB': 'Lebanon',
5522 'LS': 'Lesotho',
5523 'LR': 'Liberia',
5524 'LY': 'Libya',
5525 'LI': 'Liechtenstein',
5526 'LT': 'Lithuania',
5527 'LU': 'Luxembourg',
5528 'MO': 'Macao',
5529 'MK': 'Macedonia, the Former Yugoslav Republic of',
5530 'MG': 'Madagascar',
5531 'MW': 'Malawi',
5532 'MY': 'Malaysia',
5533 'MV': 'Maldives',
5534 'ML': 'Mali',
5535 'MT': 'Malta',
5536 'MH': 'Marshall Islands',
5537 'MQ': 'Martinique',
5538 'MR': 'Mauritania',
5539 'MU': 'Mauritius',
5540 'YT': 'Mayotte',
5541 'MX': 'Mexico',
5542 'FM': 'Micronesia, Federated States of',
5543 'MD': 'Moldova, Republic of',
5544 'MC': 'Monaco',
5545 'MN': 'Mongolia',
5546 'ME': 'Montenegro',
5547 'MS': 'Montserrat',
5548 'MA': 'Morocco',
5549 'MZ': 'Mozambique',
5550 'MM': 'Myanmar',
5551 'NA': 'Namibia',
5552 'NR': 'Nauru',
5553 'NP': 'Nepal',
5554 'NL': 'Netherlands',
5555 'NC': 'New Caledonia',
5556 'NZ': 'New Zealand',
5557 'NI': 'Nicaragua',
5558 'NE': 'Niger',
5559 'NG': 'Nigeria',
5560 'NU': 'Niue',
5561 'NF': 'Norfolk Island',
5562 'MP': 'Northern Mariana Islands',
5563 'NO': 'Norway',
5564 'OM': 'Oman',
5565 'PK': 'Pakistan',
5566 'PW': 'Palau',
5567 'PS': 'Palestine, State of',
5568 'PA': 'Panama',
5569 'PG': 'Papua New Guinea',
5570 'PY': 'Paraguay',
5571 'PE': 'Peru',
5572 'PH': 'Philippines',
5573 'PN': 'Pitcairn',
5574 'PL': 'Poland',
5575 'PT': 'Portugal',
5576 'PR': 'Puerto Rico',
5577 'QA': 'Qatar',
5578 'RE': 'Réunion',
5579 'RO': 'Romania',
5580 'RU': 'Russian Federation',
5581 'RW': 'Rwanda',
5582 'BL': 'Saint Barthélemy',
5583 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5584 'KN': 'Saint Kitts and Nevis',
5585 'LC': 'Saint Lucia',
5586 'MF': 'Saint Martin (French part)',
5587 'PM': 'Saint Pierre and Miquelon',
5588 'VC': 'Saint Vincent and the Grenadines',
5589 'WS': 'Samoa',
5590 'SM': 'San Marino',
5591 'ST': 'Sao Tome and Principe',
5592 'SA': 'Saudi Arabia',
5593 'SN': 'Senegal',
5594 'RS': 'Serbia',
5595 'SC': 'Seychelles',
5596 'SL': 'Sierra Leone',
5597 'SG': 'Singapore',
5598 'SX': 'Sint Maarten (Dutch part)',
5599 'SK': 'Slovakia',
5600 'SI': 'Slovenia',
5601 'SB': 'Solomon Islands',
5602 'SO': 'Somalia',
5603 'ZA': 'South Africa',
5604 'GS': 'South Georgia and the South Sandwich Islands',
5605 'SS': 'South Sudan',
5606 'ES': 'Spain',
5607 'LK': 'Sri Lanka',
5608 'SD': 'Sudan',
5609 'SR': 'Suriname',
5610 'SJ': 'Svalbard and Jan Mayen',
5611 'SZ': 'Swaziland',
5612 'SE': 'Sweden',
5613 'CH': 'Switzerland',
5614 'SY': 'Syrian Arab Republic',
5615 'TW': 'Taiwan, Province of China',
5616 'TJ': 'Tajikistan',
5617 'TZ': 'Tanzania, United Republic of',
5618 'TH': 'Thailand',
5619 'TL': 'Timor-Leste',
5620 'TG': 'Togo',
5621 'TK': 'Tokelau',
5622 'TO': 'Tonga',
5623 'TT': 'Trinidad and Tobago',
5624 'TN': 'Tunisia',
5625 'TR': 'Turkey',
5626 'TM': 'Turkmenistan',
5627 'TC': 'Turks and Caicos Islands',
5628 'TV': 'Tuvalu',
5629 'UG': 'Uganda',
5630 'UA': 'Ukraine',
5631 'AE': 'United Arab Emirates',
5632 'GB': 'United Kingdom',
5633 'US': 'United States',
5634 'UM': 'United States Minor Outlying Islands',
5635 'UY': 'Uruguay',
5636 'UZ': 'Uzbekistan',
5637 'VU': 'Vanuatu',
5638 'VE': 'Venezuela, Bolivarian Republic of',
5639 'VN': 'Viet Nam',
5640 'VG': 'Virgin Islands, British',
5641 'VI': 'Virgin Islands, U.S.',
5642 'WF': 'Wallis and Futuna',
5643 'EH': 'Western Sahara',
5644 'YE': 'Yemen',
5645 'ZM': 'Zambia',
5646 'ZW': 'Zimbabwe',
5647 }
5648
5649 @classmethod
5650 def short2full(cls, code):
5651 """Convert an ISO 3166-2 country code to the corresponding full name"""
5652 return cls._country_map.get(code.upper())
5653
5654
5655 class GeoUtils(object):
5656 # Major IPv4 address blocks per country
5657 _country_ip_map = {
5658 'AD': '46.172.224.0/19',
5659 'AE': '94.200.0.0/13',
5660 'AF': '149.54.0.0/17',
5661 'AG': '209.59.64.0/18',
5662 'AI': '204.14.248.0/21',
5663 'AL': '46.99.0.0/16',
5664 'AM': '46.70.0.0/15',
5665 'AO': '105.168.0.0/13',
5666 'AP': '182.50.184.0/21',
5667 'AQ': '23.154.160.0/24',
5668 'AR': '181.0.0.0/12',
5669 'AS': '202.70.112.0/20',
5670 'AT': '77.116.0.0/14',
5671 'AU': '1.128.0.0/11',
5672 'AW': '181.41.0.0/18',
5673 'AX': '185.217.4.0/22',
5674 'AZ': '5.197.0.0/16',
5675 'BA': '31.176.128.0/17',
5676 'BB': '65.48.128.0/17',
5677 'BD': '114.130.0.0/16',
5678 'BE': '57.0.0.0/8',
5679 'BF': '102.178.0.0/15',
5680 'BG': '95.42.0.0/15',
5681 'BH': '37.131.0.0/17',
5682 'BI': '154.117.192.0/18',
5683 'BJ': '137.255.0.0/16',
5684 'BL': '185.212.72.0/23',
5685 'BM': '196.12.64.0/18',
5686 'BN': '156.31.0.0/16',
5687 'BO': '161.56.0.0/16',
5688 'BQ': '161.0.80.0/20',
5689 'BR': '191.128.0.0/12',
5690 'BS': '24.51.64.0/18',
5691 'BT': '119.2.96.0/19',
5692 'BW': '168.167.0.0/16',
5693 'BY': '178.120.0.0/13',
5694 'BZ': '179.42.192.0/18',
5695 'CA': '99.224.0.0/11',
5696 'CD': '41.243.0.0/16',
5697 'CF': '197.242.176.0/21',
5698 'CG': '160.113.0.0/16',
5699 'CH': '85.0.0.0/13',
5700 'CI': '102.136.0.0/14',
5701 'CK': '202.65.32.0/19',
5702 'CL': '152.172.0.0/14',
5703 'CM': '102.244.0.0/14',
5704 'CN': '36.128.0.0/10',
5705 'CO': '181.240.0.0/12',
5706 'CR': '201.192.0.0/12',
5707 'CU': '152.206.0.0/15',
5708 'CV': '165.90.96.0/19',
5709 'CW': '190.88.128.0/17',
5710 'CY': '31.153.0.0/16',
5711 'CZ': '88.100.0.0/14',
5712 'DE': '53.0.0.0/8',
5713 'DJ': '197.241.0.0/17',
5714 'DK': '87.48.0.0/12',
5715 'DM': '192.243.48.0/20',
5716 'DO': '152.166.0.0/15',
5717 'DZ': '41.96.0.0/12',
5718 'EC': '186.68.0.0/15',
5719 'EE': '90.190.0.0/15',
5720 'EG': '156.160.0.0/11',
5721 'ER': '196.200.96.0/20',
5722 'ES': '88.0.0.0/11',
5723 'ET': '196.188.0.0/14',
5724 'EU': '2.16.0.0/13',
5725 'FI': '91.152.0.0/13',
5726 'FJ': '144.120.0.0/16',
5727 'FK': '80.73.208.0/21',
5728 'FM': '119.252.112.0/20',
5729 'FO': '88.85.32.0/19',
5730 'FR': '90.0.0.0/9',
5731 'GA': '41.158.0.0/15',
5732 'GB': '25.0.0.0/8',
5733 'GD': '74.122.88.0/21',
5734 'GE': '31.146.0.0/16',
5735 'GF': '161.22.64.0/18',
5736 'GG': '62.68.160.0/19',
5737 'GH': '154.160.0.0/12',
5738 'GI': '95.164.0.0/16',
5739 'GL': '88.83.0.0/19',
5740 'GM': '160.182.0.0/15',
5741 'GN': '197.149.192.0/18',
5742 'GP': '104.250.0.0/19',
5743 'GQ': '105.235.224.0/20',
5744 'GR': '94.64.0.0/13',
5745 'GT': '168.234.0.0/16',
5746 'GU': '168.123.0.0/16',
5747 'GW': '197.214.80.0/20',
5748 'GY': '181.41.64.0/18',
5749 'HK': '113.252.0.0/14',
5750 'HN': '181.210.0.0/16',
5751 'HR': '93.136.0.0/13',
5752 'HT': '148.102.128.0/17',
5753 'HU': '84.0.0.0/14',
5754 'ID': '39.192.0.0/10',
5755 'IE': '87.32.0.0/12',
5756 'IL': '79.176.0.0/13',
5757 'IM': '5.62.80.0/20',
5758 'IN': '117.192.0.0/10',
5759 'IO': '203.83.48.0/21',
5760 'IQ': '37.236.0.0/14',
5761 'IR': '2.176.0.0/12',
5762 'IS': '82.221.0.0/16',
5763 'IT': '79.0.0.0/10',
5764 'JE': '87.244.64.0/18',
5765 'JM': '72.27.0.0/17',
5766 'JO': '176.29.0.0/16',
5767 'JP': '133.0.0.0/8',
5768 'KE': '105.48.0.0/12',
5769 'KG': '158.181.128.0/17',
5770 'KH': '36.37.128.0/17',
5771 'KI': '103.25.140.0/22',
5772 'KM': '197.255.224.0/20',
5773 'KN': '198.167.192.0/19',
5774 'KP': '175.45.176.0/22',
5775 'KR': '175.192.0.0/10',
5776 'KW': '37.36.0.0/14',
5777 'KY': '64.96.0.0/15',
5778 'KZ': '2.72.0.0/13',
5779 'LA': '115.84.64.0/18',
5780 'LB': '178.135.0.0/16',
5781 'LC': '24.92.144.0/20',
5782 'LI': '82.117.0.0/19',
5783 'LK': '112.134.0.0/15',
5784 'LR': '102.183.0.0/16',
5785 'LS': '129.232.0.0/17',
5786 'LT': '78.56.0.0/13',
5787 'LU': '188.42.0.0/16',
5788 'LV': '46.109.0.0/16',
5789 'LY': '41.252.0.0/14',
5790 'MA': '105.128.0.0/11',
5791 'MC': '88.209.64.0/18',
5792 'MD': '37.246.0.0/16',
5793 'ME': '178.175.0.0/17',
5794 'MF': '74.112.232.0/21',
5795 'MG': '154.126.0.0/17',
5796 'MH': '117.103.88.0/21',
5797 'MK': '77.28.0.0/15',
5798 'ML': '154.118.128.0/18',
5799 'MM': '37.111.0.0/17',
5800 'MN': '49.0.128.0/17',
5801 'MO': '60.246.0.0/16',
5802 'MP': '202.88.64.0/20',
5803 'MQ': '109.203.224.0/19',
5804 'MR': '41.188.64.0/18',
5805 'MS': '208.90.112.0/22',
5806 'MT': '46.11.0.0/16',
5807 'MU': '105.16.0.0/12',
5808 'MV': '27.114.128.0/18',
5809 'MW': '102.70.0.0/15',
5810 'MX': '187.192.0.0/11',
5811 'MY': '175.136.0.0/13',
5812 'MZ': '197.218.0.0/15',
5813 'NA': '41.182.0.0/16',
5814 'NC': '101.101.0.0/18',
5815 'NE': '197.214.0.0/18',
5816 'NF': '203.17.240.0/22',
5817 'NG': '105.112.0.0/12',
5818 'NI': '186.76.0.0/15',
5819 'NL': '145.96.0.0/11',
5820 'NO': '84.208.0.0/13',
5821 'NP': '36.252.0.0/15',
5822 'NR': '203.98.224.0/19',
5823 'NU': '49.156.48.0/22',
5824 'NZ': '49.224.0.0/14',
5825 'OM': '5.36.0.0/15',
5826 'PA': '186.72.0.0/15',
5827 'PE': '186.160.0.0/14',
5828 'PF': '123.50.64.0/18',
5829 'PG': '124.240.192.0/19',
5830 'PH': '49.144.0.0/13',
5831 'PK': '39.32.0.0/11',
5832 'PL': '83.0.0.0/11',
5833 'PM': '70.36.0.0/20',
5834 'PR': '66.50.0.0/16',
5835 'PS': '188.161.0.0/16',
5836 'PT': '85.240.0.0/13',
5837 'PW': '202.124.224.0/20',
5838 'PY': '181.120.0.0/14',
5839 'QA': '37.210.0.0/15',
5840 'RE': '102.35.0.0/16',
5841 'RO': '79.112.0.0/13',
5842 'RS': '93.86.0.0/15',
5843 'RU': '5.136.0.0/13',
5844 'RW': '41.186.0.0/16',
5845 'SA': '188.48.0.0/13',
5846 'SB': '202.1.160.0/19',
5847 'SC': '154.192.0.0/11',
5848 'SD': '102.120.0.0/13',
5849 'SE': '78.64.0.0/12',
5850 'SG': '8.128.0.0/10',
5851 'SI': '188.196.0.0/14',
5852 'SK': '78.98.0.0/15',
5853 'SL': '102.143.0.0/17',
5854 'SM': '89.186.32.0/19',
5855 'SN': '41.82.0.0/15',
5856 'SO': '154.115.192.0/18',
5857 'SR': '186.179.128.0/17',
5858 'SS': '105.235.208.0/21',
5859 'ST': '197.159.160.0/19',
5860 'SV': '168.243.0.0/16',
5861 'SX': '190.102.0.0/20',
5862 'SY': '5.0.0.0/16',
5863 'SZ': '41.84.224.0/19',
5864 'TC': '65.255.48.0/20',
5865 'TD': '154.68.128.0/19',
5866 'TG': '196.168.0.0/14',
5867 'TH': '171.96.0.0/13',
5868 'TJ': '85.9.128.0/18',
5869 'TK': '27.96.24.0/21',
5870 'TL': '180.189.160.0/20',
5871 'TM': '95.85.96.0/19',
5872 'TN': '197.0.0.0/11',
5873 'TO': '175.176.144.0/21',
5874 'TR': '78.160.0.0/11',
5875 'TT': '186.44.0.0/15',
5876 'TV': '202.2.96.0/19',
5877 'TW': '120.96.0.0/11',
5878 'TZ': '156.156.0.0/14',
5879 'UA': '37.52.0.0/14',
5880 'UG': '102.80.0.0/13',
5881 'US': '6.0.0.0/8',
5882 'UY': '167.56.0.0/13',
5883 'UZ': '84.54.64.0/18',
5884 'VA': '212.77.0.0/19',
5885 'VC': '207.191.240.0/21',
5886 'VE': '186.88.0.0/13',
5887 'VG': '66.81.192.0/20',
5888 'VI': '146.226.0.0/16',
5889 'VN': '14.160.0.0/11',
5890 'VU': '202.80.32.0/20',
5891 'WF': '117.20.32.0/21',
5892 'WS': '202.4.32.0/19',
5893 'YE': '134.35.0.0/16',
5894 'YT': '41.242.116.0/22',
5895 'ZA': '41.0.0.0/11',
5896 'ZM': '102.144.0.0/13',
5897 'ZW': '102.177.192.0/18',
5898 }
5899
5900 @classmethod
5901 def random_ipv4(cls, code_or_block):
5902 if len(code_or_block) == 2:
5903 block = cls._country_ip_map.get(code_or_block.upper())
5904 if not block:
5905 return None
5906 else:
5907 block = code_or_block
5908 addr, preflen = block.split('/')
5909 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5910 addr_max = addr_min | (0xffffffff >> int(preflen))
5911 return compat_str(socket.inet_ntoa(
5912 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5913
5914
5915 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5916 def __init__(self, proxies=None):
5917 # Set default handlers
5918 for type in ('http', 'https'):
5919 setattr(self, '%s_open' % type,
5920 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5921 meth(r, proxy, type))
5922 compat_urllib_request.ProxyHandler.__init__(self, proxies)
5923
5924 def proxy_open(self, req, proxy, type):
5925 req_proxy = req.headers.get('Ytdl-request-proxy')
5926 if req_proxy is not None:
5927 proxy = req_proxy
5928 del req.headers['Ytdl-request-proxy']
5929
5930 if proxy == '__noproxy__':
5931 return None # No Proxy
5932 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5933 req.add_header('Ytdl-socks-proxy', proxy)
5934 # yt-dlp's http/https handlers do wrapping the socket with socks
5935 return None
5936 return compat_urllib_request.ProxyHandler.proxy_open(
5937 self, req, proxy, type)
5938
5939
5940 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5941 # released into Public Domain
5942 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5943
5944 def long_to_bytes(n, blocksize=0):
5945 """long_to_bytes(n:long, blocksize:int) : string
5946 Convert a long integer to a byte string.
5947
5948 If optional blocksize is given and greater than zero, pad the front of the
5949 byte string with binary zeros so that the length is a multiple of
5950 blocksize.
5951 """
5952 # after much testing, this algorithm was deemed to be the fastest
5953 s = b''
5954 n = int(n)
5955 while n > 0:
5956 s = compat_struct_pack('>I', n & 0xffffffff) + s
5957 n = n >> 32
5958 # strip off leading zeros
5959 for i in range(len(s)):
5960 if s[i] != b'\000'[0]:
5961 break
5962 else:
5963 # only happens when n == 0
5964 s = b'\000'
5965 i = 0
5966 s = s[i:]
5967 # add back some pad bytes. this could be done more efficiently w.r.t. the
5968 # de-padding being done above, but sigh...
5969 if blocksize > 0 and len(s) % blocksize:
5970 s = (blocksize - len(s) % blocksize) * b'\000' + s
5971 return s
5972
5973
5974 def bytes_to_long(s):
5975 """bytes_to_long(string) : long
5976 Convert a byte string to a long integer.
5977
5978 This is (essentially) the inverse of long_to_bytes().
5979 """
5980 acc = 0
5981 length = len(s)
5982 if length % 4:
5983 extra = (4 - length % 4)
5984 s = b'\000' * extra + s
5985 length = length + extra
5986 for i in range(0, length, 4):
5987 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5988 return acc
5989
5990
5991 def ohdave_rsa_encrypt(data, exponent, modulus):
5992 '''
5993 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5994
5995 Input:
5996 data: data to encrypt, bytes-like object
5997 exponent, modulus: parameter e and N of RSA algorithm, both integer
5998 Output: hex string of encrypted data
5999
6000 Limitation: supports one block encryption only
6001 '''
6002
6003 payload = int(binascii.hexlify(data[::-1]), 16)
6004 encrypted = pow(payload, exponent, modulus)
6005 return '%x' % encrypted
6006
6007
6008 def pkcs1pad(data, length):
6009 """
6010 Padding input data with PKCS#1 scheme
6011
6012 @param {int[]} data input data
6013 @param {int} length target length
6014 @returns {int[]} padded data
6015 """
6016 if len(data) > length - 11:
6017 raise ValueError('Input data too long for PKCS#1 padding')
6018
6019 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
6020 return [0, 2] + pseudo_random + [0] + data
6021
6022
6023 def encode_base_n(num, n, table=None):
6024 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
6025 if not table:
6026 table = FULL_TABLE[:n]
6027
6028 if n > len(table):
6029 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
6030
6031 if num == 0:
6032 return table[0]
6033
6034 ret = ''
6035 while num:
6036 ret = table[num % n] + ret
6037 num = num // n
6038 return ret
6039
6040
6041 def decode_packed_codes(code):
6042 mobj = re.search(PACKED_CODES_RE, code)
6043 obfuscated_code, base, count, symbols = mobj.groups()
6044 base = int(base)
6045 count = int(count)
6046 symbols = symbols.split('|')
6047 symbol_table = {}
6048
6049 while count:
6050 count -= 1
6051 base_n_count = encode_base_n(count, base)
6052 symbol_table[base_n_count] = symbols[count] or base_n_count
6053
6054 return re.sub(
6055 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
6056 obfuscated_code)
6057
6058
6059 def caesar(s, alphabet, shift):
6060 if shift == 0:
6061 return s
6062 l = len(alphabet)
6063 return ''.join(
6064 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
6065 for c in s)
6066
6067
6068 def rot47(s):
6069 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
6070
6071
6072 def parse_m3u8_attributes(attrib):
6073 info = {}
6074 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
6075 if val.startswith('"'):
6076 val = val[1:-1]
6077 info[key] = val
6078 return info
6079
6080
6081 def urshift(val, n):
6082 return val >> n if val >= 0 else (val + 0x100000000) >> n
6083
6084
6085 # Based on png2str() written by @gdkchan and improved by @yokrysty
6086 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
6087 def decode_png(png_data):
6088 # Reference: https://www.w3.org/TR/PNG/
6089 header = png_data[8:]
6090
6091 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
6092 raise IOError('Not a valid PNG file.')
6093
6094 int_map = {1: '>B', 2: '>H', 4: '>I'}
6095 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
6096
6097 chunks = []
6098
6099 while header:
6100 length = unpack_integer(header[:4])
6101 header = header[4:]
6102
6103 chunk_type = header[:4]
6104 header = header[4:]
6105
6106 chunk_data = header[:length]
6107 header = header[length:]
6108
6109 header = header[4:] # Skip CRC
6110
6111 chunks.append({
6112 'type': chunk_type,
6113 'length': length,
6114 'data': chunk_data
6115 })
6116
6117 ihdr = chunks[0]['data']
6118
6119 width = unpack_integer(ihdr[:4])
6120 height = unpack_integer(ihdr[4:8])
6121
6122 idat = b''
6123
6124 for chunk in chunks:
6125 if chunk['type'] == b'IDAT':
6126 idat += chunk['data']
6127
6128 if not idat:
6129 raise IOError('Unable to read PNG data.')
6130
6131 decompressed_data = bytearray(zlib.decompress(idat))
6132
6133 stride = width * 3
6134 pixels = []
6135
6136 def _get_pixel(idx):
6137 x = idx % stride
6138 y = idx // stride
6139 return pixels[y][x]
6140
6141 for y in range(height):
6142 basePos = y * (1 + stride)
6143 filter_type = decompressed_data[basePos]
6144
6145 current_row = []
6146
6147 pixels.append(current_row)
6148
6149 for x in range(stride):
6150 color = decompressed_data[1 + basePos + x]
6151 basex = y * stride + x
6152 left = 0
6153 up = 0
6154
6155 if x > 2:
6156 left = _get_pixel(basex - 3)
6157 if y > 0:
6158 up = _get_pixel(basex - stride)
6159
6160 if filter_type == 1: # Sub
6161 color = (color + left) & 0xff
6162 elif filter_type == 2: # Up
6163 color = (color + up) & 0xff
6164 elif filter_type == 3: # Average
6165 color = (color + ((left + up) >> 1)) & 0xff
6166 elif filter_type == 4: # Paeth
6167 a = left
6168 b = up
6169 c = 0
6170
6171 if x > 2 and y > 0:
6172 c = _get_pixel(basex - stride - 3)
6173
6174 p = a + b - c
6175
6176 pa = abs(p - a)
6177 pb = abs(p - b)
6178 pc = abs(p - c)
6179
6180 if pa <= pb and pa <= pc:
6181 color = (color + a) & 0xff
6182 elif pb <= pc:
6183 color = (color + b) & 0xff
6184 else:
6185 color = (color + c) & 0xff
6186
6187 current_row.append(color)
6188
6189 return width, height, pixels
6190
6191
6192 def write_xattr(path, key, value):
6193 # This mess below finds the best xattr tool for the job
6194 try:
6195 # try the pyxattr module...
6196 import xattr
6197
6198 if hasattr(xattr, 'set'): # pyxattr
6199 # Unicode arguments are not supported in python-pyxattr until
6200 # version 0.5.0
6201 # See https://github.com/ytdl-org/youtube-dl/issues/5498
6202 pyxattr_required_version = '0.5.0'
6203 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
6204 # TODO: fallback to CLI tools
6205 raise XAttrUnavailableError(
6206 'python-pyxattr is detected but is too old. '
6207 'yt-dlp requires %s or above while your version is %s. '
6208 'Falling back to other xattr implementations' % (
6209 pyxattr_required_version, xattr.__version__))
6210
6211 setxattr = xattr.set
6212 else: # xattr
6213 setxattr = xattr.setxattr
6214
6215 try:
6216 setxattr(path, key, value)
6217 except EnvironmentError as e:
6218 raise XAttrMetadataError(e.errno, e.strerror)
6219
6220 except ImportError:
6221 if compat_os_name == 'nt':
6222 # Write xattrs to NTFS Alternate Data Streams:
6223 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6224 assert ':' not in key
6225 assert os.path.exists(path)
6226
6227 ads_fn = path + ':' + key
6228 try:
6229 with open(ads_fn, 'wb') as f:
6230 f.write(value)
6231 except EnvironmentError as e:
6232 raise XAttrMetadataError(e.errno, e.strerror)
6233 else:
6234 user_has_setfattr = check_executable('setfattr', ['--version'])
6235 user_has_xattr = check_executable('xattr', ['-h'])
6236
6237 if user_has_setfattr or user_has_xattr:
6238
6239 value = value.decode('utf-8')
6240 if user_has_setfattr:
6241 executable = 'setfattr'
6242 opts = ['-n', key, '-v', value]
6243 elif user_has_xattr:
6244 executable = 'xattr'
6245 opts = ['-w', key, value]
6246
6247 cmd = ([encodeFilename(executable, True)]
6248 + [encodeArgument(o) for o in opts]
6249 + [encodeFilename(path, True)])
6250
6251 try:
6252 p = Popen(
6253 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6254 except EnvironmentError as e:
6255 raise XAttrMetadataError(e.errno, e.strerror)
6256 stdout, stderr = p.communicate_or_kill()
6257 stderr = stderr.decode('utf-8', 'replace')
6258 if p.returncode != 0:
6259 raise XAttrMetadataError(p.returncode, stderr)
6260
6261 else:
6262 # On Unix, and can't find pyxattr, setfattr, or xattr.
6263 if sys.platform.startswith('linux'):
6264 raise XAttrUnavailableError(
6265 "Couldn't find a tool to set the xattrs. "
6266 "Install either the python 'pyxattr' or 'xattr' "
6267 "modules, or the GNU 'attr' package "
6268 "(which contains the 'setfattr' tool).")
6269 else:
6270 raise XAttrUnavailableError(
6271 "Couldn't find a tool to set the xattrs. "
6272 "Install either the python 'xattr' module, "
6273 "or the 'xattr' binary.")
6274
6275
6276 def random_birthday(year_field, month_field, day_field):
6277 start_date = datetime.date(1950, 1, 1)
6278 end_date = datetime.date(1995, 12, 31)
6279 offset = random.randint(0, (end_date - start_date).days)
6280 random_date = start_date + datetime.timedelta(offset)
6281 return {
6282 year_field: str(random_date.year),
6283 month_field: str(random_date.month),
6284 day_field: str(random_date.day),
6285 }
6286
6287
6288 # Templates for internet shortcut files, which are plain text files.
6289 DOT_URL_LINK_TEMPLATE = '''
6290 [InternetShortcut]
6291 URL=%(url)s
6292 '''.lstrip()
6293
6294 DOT_WEBLOC_LINK_TEMPLATE = '''
6295 <?xml version="1.0" encoding="UTF-8"?>
6296 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6297 <plist version="1.0">
6298 <dict>
6299 \t<key>URL</key>
6300 \t<string>%(url)s</string>
6301 </dict>
6302 </plist>
6303 '''.lstrip()
6304
6305 DOT_DESKTOP_LINK_TEMPLATE = '''
6306 [Desktop Entry]
6307 Encoding=UTF-8
6308 Name=%(filename)s
6309 Type=Link
6310 URL=%(url)s
6311 Icon=text-html
6312 '''.lstrip()
6313
6314 LINK_TEMPLATES = {
6315 'url': DOT_URL_LINK_TEMPLATE,
6316 'desktop': DOT_DESKTOP_LINK_TEMPLATE,
6317 'webloc': DOT_WEBLOC_LINK_TEMPLATE,
6318 }
6319
6320
6321 def iri_to_uri(iri):
6322 """
6323 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6324
6325 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6326 """
6327
6328 iri_parts = compat_urllib_parse_urlparse(iri)
6329
6330 if '[' in iri_parts.netloc:
6331 raise ValueError('IPv6 URIs are not, yet, supported.')
6332 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6333
6334 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6335
6336 net_location = ''
6337 if iri_parts.username:
6338 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6339 if iri_parts.password is not None:
6340 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6341 net_location += '@'
6342
6343 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
6344 # The 'idna' encoding produces ASCII text.
6345 if iri_parts.port is not None and iri_parts.port != 80:
6346 net_location += ':' + str(iri_parts.port)
6347
6348 return compat_urllib_parse_urlunparse(
6349 (iri_parts.scheme,
6350 net_location,
6351
6352 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6353
6354 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6355 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6356
6357 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6358 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6359
6360 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6361
6362 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6363
6364
6365 def to_high_limit_path(path):
6366 if sys.platform in ['win32', 'cygwin']:
6367 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6368 return r'\\?\ '.rstrip() + os.path.abspath(path)
6369
6370 return path
6371
6372
6373 def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
6374 if field is None:
6375 val = obj if obj is not None else default
6376 else:
6377 val = obj.get(field, default)
6378 if func and val not in ignore:
6379 val = func(val)
6380 return template % val if val not in ignore else default
6381
6382
6383 def clean_podcast_url(url):
6384 return re.sub(r'''(?x)
6385 (?:
6386 (?:
6387 chtbl\.com/track|
6388 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6389 play\.podtrac\.com
6390 )/[^/]+|
6391 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6392 flex\.acast\.com|
6393 pd(?:
6394 cn\.co| # https://podcorn.com/analytics-prefix/
6395 st\.fm # https://podsights.com/docs/
6396 )/e
6397 )/''', '', url)
6398
6399
6400 _HEX_TABLE = '0123456789abcdef'
6401
6402
6403 def random_uuidv4():
6404 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
6405
6406
6407 def make_dir(path, to_screen=None):
6408 try:
6409 dn = os.path.dirname(path)
6410 if dn and not os.path.exists(dn):
6411 os.makedirs(dn)
6412 return True
6413 except (OSError, IOError) as err:
6414 if callable(to_screen) is not None:
6415 to_screen('unable to create directory ' + error_to_compat_str(err))
6416 return False
6417
6418
6419 def get_executable_path():
6420 from zipimport import zipimporter
6421 if hasattr(sys, 'frozen'): # Running from PyInstaller
6422 path = os.path.dirname(sys.executable)
6423 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
6424 path = os.path.join(os.path.dirname(__file__), '../..')
6425 else:
6426 path = os.path.join(os.path.dirname(__file__), '..')
6427 return os.path.abspath(path)
6428
6429
6430 def load_plugins(name, suffix, namespace):
6431 classes = {}
6432 try:
6433 plugins_spec = importlib.util.spec_from_file_location(
6434 name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
6435 plugins = importlib.util.module_from_spec(plugins_spec)
6436 sys.modules[plugins_spec.name] = plugins
6437 plugins_spec.loader.exec_module(plugins)
6438 for name in dir(plugins):
6439 if name in namespace:
6440 continue
6441 if not name.endswith(suffix):
6442 continue
6443 klass = getattr(plugins, name)
6444 classes[name] = namespace[name] = klass
6445 except FileNotFoundError:
6446 pass
6447 return classes
6448
6449
6450 def traverse_obj(
6451 obj, *path_list, default=None, expected_type=None, get_all=True,
6452 casesense=True, is_user_input=False, traverse_string=False):
6453 ''' Traverse nested list/dict/tuple
6454 @param path_list A list of paths which are checked one by one.
6455 Each path is a list of keys where each key is a string,
6456 a function, a tuple of strings or "...".
6457 When a fuction is given, it takes the key as argument and
6458 returns whether the key matches or not. When a tuple is given,
6459 all the keys given in the tuple are traversed, and
6460 "..." traverses all the keys in the object
6461 @param default Default value to return
6462 @param expected_type Only accept final value of this type (Can also be any callable)
6463 @param get_all Return all the values obtained from a path or only the first one
6464 @param casesense Whether to consider dictionary keys as case sensitive
6465 @param is_user_input Whether the keys are generated from user input. If True,
6466 strings are converted to int/slice if necessary
6467 @param traverse_string Whether to traverse inside strings. If True, any
6468 non-compatible object will also be converted into a string
6469 # TODO: Write tests
6470 '''
6471 if not casesense:
6472 _lower = lambda k: (k.lower() if isinstance(k, str) else k)
6473 path_list = (map(_lower, variadic(path)) for path in path_list)
6474
6475 def _traverse_obj(obj, path, _current_depth=0):
6476 nonlocal depth
6477 path = tuple(variadic(path))
6478 for i, key in enumerate(path):
6479 if obj is None:
6480 return None
6481 if isinstance(key, (list, tuple)):
6482 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
6483 key = ...
6484 if key is ...:
6485 obj = (obj.values() if isinstance(obj, dict)
6486 else obj if isinstance(obj, (list, tuple, LazyList))
6487 else str(obj) if traverse_string else [])
6488 _current_depth += 1
6489 depth = max(depth, _current_depth)
6490 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
6491 elif callable(key):
6492 if isinstance(obj, (list, tuple, LazyList)):
6493 obj = enumerate(obj)
6494 elif isinstance(obj, dict):
6495 obj = obj.items()
6496 else:
6497 if not traverse_string:
6498 return None
6499 obj = str(obj)
6500 _current_depth += 1
6501 depth = max(depth, _current_depth)
6502 return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if key(k)]
6503 elif isinstance(obj, dict) and not (is_user_input and key == ':'):
6504 obj = (obj.get(key) if casesense or (key in obj)
6505 else next((v for k, v in obj.items() if _lower(k) == key), None))
6506 else:
6507 if is_user_input:
6508 key = (int_or_none(key) if ':' not in key
6509 else slice(*map(int_or_none, key.split(':'))))
6510 if key == slice(None):
6511 return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
6512 if not isinstance(key, (int, slice)):
6513 return None
6514 if not isinstance(obj, (list, tuple, LazyList)):
6515 if not traverse_string:
6516 return None
6517 obj = str(obj)
6518 try:
6519 obj = obj[key]
6520 except IndexError:
6521 return None
6522 return obj
6523
6524 if isinstance(expected_type, type):
6525 type_test = lambda val: val if isinstance(val, expected_type) else None
6526 elif expected_type is not None:
6527 type_test = expected_type
6528 else:
6529 type_test = lambda val: val
6530
6531 for path in path_list:
6532 depth = 0
6533 val = _traverse_obj(obj, path)
6534 if val is not None:
6535 if depth:
6536 for _ in range(depth - 1):
6537 val = itertools.chain.from_iterable(v for v in val if v is not None)
6538 val = [v for v in map(type_test, val) if v is not None]
6539 if val:
6540 return val if get_all else val[0]
6541 else:
6542 val = type_test(val)
6543 if val is not None:
6544 return val
6545 return default
6546
6547
6548 def traverse_dict(dictn, keys, casesense=True):
6549 ''' For backward compatibility. Do not use '''
6550 return traverse_obj(dictn, keys, casesense=casesense,
6551 is_user_input=True, traverse_string=True)
6552
6553
6554 def variadic(x, allowed_types=(str, bytes)):
6555 return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
6556
6557
6558 # create a JSON Web Signature (jws) with HS256 algorithm
6559 # the resulting format is in JWS Compact Serialization
6560 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
6561 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
6562 def jwt_encode_hs256(payload_data, key, headers={}):
6563 header_data = {
6564 'alg': 'HS256',
6565 'typ': 'JWT',
6566 }
6567 if headers:
6568 header_data.update(headers)
6569 header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8'))
6570 payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8'))
6571 h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256)
6572 signature_b64 = base64.b64encode(h.digest())
6573 token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
6574 return token
6575
6576
6577 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
6578 def jwt_decode_hs256(jwt):
6579 header_b64, payload_b64, signature_b64 = jwt.split('.')
6580 payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
6581 return payload_data
6582
6583
6584 def supports_terminal_sequences(stream):
6585 if compat_os_name == 'nt':
6586 if get_windows_version() < (10, 0, 10586):
6587 return False
6588 elif not os.getenv('TERM'):
6589 return False
6590 try:
6591 return stream.isatty()
6592 except BaseException:
6593 return False
6594
6595
6596 _terminal_sequences_re = re.compile('\033\\[[^m]+m')
6597
6598
6599 def remove_terminal_sequences(string):
6600 return _terminal_sequences_re.sub('', string)
6601
6602
6603 def number_of_digits(number):
6604 return len('%d' % number)
6605
6606
6607 def join_nonempty(*values, delim='-', from_dict=None):
6608 if from_dict is not None:
6609 values = map(from_dict.get, values)
6610 return delim.join(map(str, filter(None, values)))