]> jfr.im git - yt-dlp.git/blob - yt_dlp/utils.py
[minicurses] Add more colors
[yt-dlp.git] / yt_dlp / utils.py
1 #!/usr/bin/env python3
2 # coding: utf-8
3
4 from __future__ import unicode_literals
5
6 import base64
7 import binascii
8 import calendar
9 import codecs
10 import collections
11 import contextlib
12 import ctypes
13 import datetime
14 import email.utils
15 import email.header
16 import errno
17 import functools
18 import gzip
19 import hashlib
20 import hmac
21 import importlib.util
22 import io
23 import itertools
24 import json
25 import locale
26 import math
27 import operator
28 import os
29 import platform
30 import random
31 import re
32 import socket
33 import ssl
34 import subprocess
35 import sys
36 import tempfile
37 import time
38 import traceback
39 import xml.etree.ElementTree
40 import zlib
41
42 from .compat import (
43 compat_HTMLParseError,
44 compat_HTMLParser,
45 compat_HTTPError,
46 compat_basestring,
47 compat_chr,
48 compat_cookiejar,
49 compat_ctypes_WINFUNCTYPE,
50 compat_etree_fromstring,
51 compat_expanduser,
52 compat_html_entities,
53 compat_html_entities_html5,
54 compat_http_client,
55 compat_integer_types,
56 compat_numeric_types,
57 compat_kwargs,
58 compat_os_name,
59 compat_parse_qs,
60 compat_shlex_quote,
61 compat_str,
62 compat_struct_pack,
63 compat_struct_unpack,
64 compat_urllib_error,
65 compat_urllib_parse,
66 compat_urllib_parse_urlencode,
67 compat_urllib_parse_urlparse,
68 compat_urllib_parse_urlunparse,
69 compat_urllib_parse_quote,
70 compat_urllib_parse_quote_plus,
71 compat_urllib_parse_unquote_plus,
72 compat_urllib_request,
73 compat_urlparse,
74 compat_xpath,
75 )
76
77 from .socks import (
78 ProxyType,
79 sockssocket,
80 )
81
82
83 def register_socks_protocols():
84 # "Register" SOCKS protocols
85 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
86 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
87 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
88 if scheme not in compat_urlparse.uses_netloc:
89 compat_urlparse.uses_netloc.append(scheme)
90
91
92 # This is not clearly defined otherwise
93 compiled_regex_type = type(re.compile(''))
94
95
96 def random_user_agent():
97 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
98 _CHROME_VERSIONS = (
99 '74.0.3729.129',
100 '76.0.3780.3',
101 '76.0.3780.2',
102 '74.0.3729.128',
103 '76.0.3780.1',
104 '76.0.3780.0',
105 '75.0.3770.15',
106 '74.0.3729.127',
107 '74.0.3729.126',
108 '76.0.3779.1',
109 '76.0.3779.0',
110 '75.0.3770.14',
111 '74.0.3729.125',
112 '76.0.3778.1',
113 '76.0.3778.0',
114 '75.0.3770.13',
115 '74.0.3729.124',
116 '74.0.3729.123',
117 '73.0.3683.121',
118 '76.0.3777.1',
119 '76.0.3777.0',
120 '75.0.3770.12',
121 '74.0.3729.122',
122 '76.0.3776.4',
123 '75.0.3770.11',
124 '74.0.3729.121',
125 '76.0.3776.3',
126 '76.0.3776.2',
127 '73.0.3683.120',
128 '74.0.3729.120',
129 '74.0.3729.119',
130 '74.0.3729.118',
131 '76.0.3776.1',
132 '76.0.3776.0',
133 '76.0.3775.5',
134 '75.0.3770.10',
135 '74.0.3729.117',
136 '76.0.3775.4',
137 '76.0.3775.3',
138 '74.0.3729.116',
139 '75.0.3770.9',
140 '76.0.3775.2',
141 '76.0.3775.1',
142 '76.0.3775.0',
143 '75.0.3770.8',
144 '74.0.3729.115',
145 '74.0.3729.114',
146 '76.0.3774.1',
147 '76.0.3774.0',
148 '75.0.3770.7',
149 '74.0.3729.113',
150 '74.0.3729.112',
151 '74.0.3729.111',
152 '76.0.3773.1',
153 '76.0.3773.0',
154 '75.0.3770.6',
155 '74.0.3729.110',
156 '74.0.3729.109',
157 '76.0.3772.1',
158 '76.0.3772.0',
159 '75.0.3770.5',
160 '74.0.3729.108',
161 '74.0.3729.107',
162 '76.0.3771.1',
163 '76.0.3771.0',
164 '75.0.3770.4',
165 '74.0.3729.106',
166 '74.0.3729.105',
167 '75.0.3770.3',
168 '74.0.3729.104',
169 '74.0.3729.103',
170 '74.0.3729.102',
171 '75.0.3770.2',
172 '74.0.3729.101',
173 '75.0.3770.1',
174 '75.0.3770.0',
175 '74.0.3729.100',
176 '75.0.3769.5',
177 '75.0.3769.4',
178 '74.0.3729.99',
179 '75.0.3769.3',
180 '75.0.3769.2',
181 '75.0.3768.6',
182 '74.0.3729.98',
183 '75.0.3769.1',
184 '75.0.3769.0',
185 '74.0.3729.97',
186 '73.0.3683.119',
187 '73.0.3683.118',
188 '74.0.3729.96',
189 '75.0.3768.5',
190 '75.0.3768.4',
191 '75.0.3768.3',
192 '75.0.3768.2',
193 '74.0.3729.95',
194 '74.0.3729.94',
195 '75.0.3768.1',
196 '75.0.3768.0',
197 '74.0.3729.93',
198 '74.0.3729.92',
199 '73.0.3683.117',
200 '74.0.3729.91',
201 '75.0.3766.3',
202 '74.0.3729.90',
203 '75.0.3767.2',
204 '75.0.3767.1',
205 '75.0.3767.0',
206 '74.0.3729.89',
207 '73.0.3683.116',
208 '75.0.3766.2',
209 '74.0.3729.88',
210 '75.0.3766.1',
211 '75.0.3766.0',
212 '74.0.3729.87',
213 '73.0.3683.115',
214 '74.0.3729.86',
215 '75.0.3765.1',
216 '75.0.3765.0',
217 '74.0.3729.85',
218 '73.0.3683.114',
219 '74.0.3729.84',
220 '75.0.3764.1',
221 '75.0.3764.0',
222 '74.0.3729.83',
223 '73.0.3683.113',
224 '75.0.3763.2',
225 '75.0.3761.4',
226 '74.0.3729.82',
227 '75.0.3763.1',
228 '75.0.3763.0',
229 '74.0.3729.81',
230 '73.0.3683.112',
231 '75.0.3762.1',
232 '75.0.3762.0',
233 '74.0.3729.80',
234 '75.0.3761.3',
235 '74.0.3729.79',
236 '73.0.3683.111',
237 '75.0.3761.2',
238 '74.0.3729.78',
239 '74.0.3729.77',
240 '75.0.3761.1',
241 '75.0.3761.0',
242 '73.0.3683.110',
243 '74.0.3729.76',
244 '74.0.3729.75',
245 '75.0.3760.0',
246 '74.0.3729.74',
247 '75.0.3759.8',
248 '75.0.3759.7',
249 '75.0.3759.6',
250 '74.0.3729.73',
251 '75.0.3759.5',
252 '74.0.3729.72',
253 '73.0.3683.109',
254 '75.0.3759.4',
255 '75.0.3759.3',
256 '74.0.3729.71',
257 '75.0.3759.2',
258 '74.0.3729.70',
259 '73.0.3683.108',
260 '74.0.3729.69',
261 '75.0.3759.1',
262 '75.0.3759.0',
263 '74.0.3729.68',
264 '73.0.3683.107',
265 '74.0.3729.67',
266 '75.0.3758.1',
267 '75.0.3758.0',
268 '74.0.3729.66',
269 '73.0.3683.106',
270 '74.0.3729.65',
271 '75.0.3757.1',
272 '75.0.3757.0',
273 '74.0.3729.64',
274 '73.0.3683.105',
275 '74.0.3729.63',
276 '75.0.3756.1',
277 '75.0.3756.0',
278 '74.0.3729.62',
279 '73.0.3683.104',
280 '75.0.3755.3',
281 '75.0.3755.2',
282 '73.0.3683.103',
283 '75.0.3755.1',
284 '75.0.3755.0',
285 '74.0.3729.61',
286 '73.0.3683.102',
287 '74.0.3729.60',
288 '75.0.3754.2',
289 '74.0.3729.59',
290 '75.0.3753.4',
291 '74.0.3729.58',
292 '75.0.3754.1',
293 '75.0.3754.0',
294 '74.0.3729.57',
295 '73.0.3683.101',
296 '75.0.3753.3',
297 '75.0.3752.2',
298 '75.0.3753.2',
299 '74.0.3729.56',
300 '75.0.3753.1',
301 '75.0.3753.0',
302 '74.0.3729.55',
303 '73.0.3683.100',
304 '74.0.3729.54',
305 '75.0.3752.1',
306 '75.0.3752.0',
307 '74.0.3729.53',
308 '73.0.3683.99',
309 '74.0.3729.52',
310 '75.0.3751.1',
311 '75.0.3751.0',
312 '74.0.3729.51',
313 '73.0.3683.98',
314 '74.0.3729.50',
315 '75.0.3750.0',
316 '74.0.3729.49',
317 '74.0.3729.48',
318 '74.0.3729.47',
319 '75.0.3749.3',
320 '74.0.3729.46',
321 '73.0.3683.97',
322 '75.0.3749.2',
323 '74.0.3729.45',
324 '75.0.3749.1',
325 '75.0.3749.0',
326 '74.0.3729.44',
327 '73.0.3683.96',
328 '74.0.3729.43',
329 '74.0.3729.42',
330 '75.0.3748.1',
331 '75.0.3748.0',
332 '74.0.3729.41',
333 '75.0.3747.1',
334 '73.0.3683.95',
335 '75.0.3746.4',
336 '74.0.3729.40',
337 '74.0.3729.39',
338 '75.0.3747.0',
339 '75.0.3746.3',
340 '75.0.3746.2',
341 '74.0.3729.38',
342 '75.0.3746.1',
343 '75.0.3746.0',
344 '74.0.3729.37',
345 '73.0.3683.94',
346 '75.0.3745.5',
347 '75.0.3745.4',
348 '75.0.3745.3',
349 '75.0.3745.2',
350 '74.0.3729.36',
351 '75.0.3745.1',
352 '75.0.3745.0',
353 '75.0.3744.2',
354 '74.0.3729.35',
355 '73.0.3683.93',
356 '74.0.3729.34',
357 '75.0.3744.1',
358 '75.0.3744.0',
359 '74.0.3729.33',
360 '73.0.3683.92',
361 '74.0.3729.32',
362 '74.0.3729.31',
363 '73.0.3683.91',
364 '75.0.3741.2',
365 '75.0.3740.5',
366 '74.0.3729.30',
367 '75.0.3741.1',
368 '75.0.3741.0',
369 '74.0.3729.29',
370 '75.0.3740.4',
371 '73.0.3683.90',
372 '74.0.3729.28',
373 '75.0.3740.3',
374 '73.0.3683.89',
375 '75.0.3740.2',
376 '74.0.3729.27',
377 '75.0.3740.1',
378 '75.0.3740.0',
379 '74.0.3729.26',
380 '73.0.3683.88',
381 '73.0.3683.87',
382 '74.0.3729.25',
383 '75.0.3739.1',
384 '75.0.3739.0',
385 '73.0.3683.86',
386 '74.0.3729.24',
387 '73.0.3683.85',
388 '75.0.3738.4',
389 '75.0.3738.3',
390 '75.0.3738.2',
391 '75.0.3738.1',
392 '75.0.3738.0',
393 '74.0.3729.23',
394 '73.0.3683.84',
395 '74.0.3729.22',
396 '74.0.3729.21',
397 '75.0.3737.1',
398 '75.0.3737.0',
399 '74.0.3729.20',
400 '73.0.3683.83',
401 '74.0.3729.19',
402 '75.0.3736.1',
403 '75.0.3736.0',
404 '74.0.3729.18',
405 '73.0.3683.82',
406 '74.0.3729.17',
407 '75.0.3735.1',
408 '75.0.3735.0',
409 '74.0.3729.16',
410 '73.0.3683.81',
411 '75.0.3734.1',
412 '75.0.3734.0',
413 '74.0.3729.15',
414 '73.0.3683.80',
415 '74.0.3729.14',
416 '75.0.3733.1',
417 '75.0.3733.0',
418 '75.0.3732.1',
419 '74.0.3729.13',
420 '74.0.3729.12',
421 '73.0.3683.79',
422 '74.0.3729.11',
423 '75.0.3732.0',
424 '74.0.3729.10',
425 '73.0.3683.78',
426 '74.0.3729.9',
427 '74.0.3729.8',
428 '74.0.3729.7',
429 '75.0.3731.3',
430 '75.0.3731.2',
431 '75.0.3731.0',
432 '74.0.3729.6',
433 '73.0.3683.77',
434 '73.0.3683.76',
435 '75.0.3730.5',
436 '75.0.3730.4',
437 '73.0.3683.75',
438 '74.0.3729.5',
439 '73.0.3683.74',
440 '75.0.3730.3',
441 '75.0.3730.2',
442 '74.0.3729.4',
443 '73.0.3683.73',
444 '73.0.3683.72',
445 '75.0.3730.1',
446 '75.0.3730.0',
447 '74.0.3729.3',
448 '73.0.3683.71',
449 '74.0.3729.2',
450 '73.0.3683.70',
451 '74.0.3729.1',
452 '74.0.3729.0',
453 '74.0.3726.4',
454 '73.0.3683.69',
455 '74.0.3726.3',
456 '74.0.3728.0',
457 '74.0.3726.2',
458 '73.0.3683.68',
459 '74.0.3726.1',
460 '74.0.3726.0',
461 '74.0.3725.4',
462 '73.0.3683.67',
463 '73.0.3683.66',
464 '74.0.3725.3',
465 '74.0.3725.2',
466 '74.0.3725.1',
467 '74.0.3724.8',
468 '74.0.3725.0',
469 '73.0.3683.65',
470 '74.0.3724.7',
471 '74.0.3724.6',
472 '74.0.3724.5',
473 '74.0.3724.4',
474 '74.0.3724.3',
475 '74.0.3724.2',
476 '74.0.3724.1',
477 '74.0.3724.0',
478 '73.0.3683.64',
479 '74.0.3723.1',
480 '74.0.3723.0',
481 '73.0.3683.63',
482 '74.0.3722.1',
483 '74.0.3722.0',
484 '73.0.3683.62',
485 '74.0.3718.9',
486 '74.0.3702.3',
487 '74.0.3721.3',
488 '74.0.3721.2',
489 '74.0.3721.1',
490 '74.0.3721.0',
491 '74.0.3720.6',
492 '73.0.3683.61',
493 '72.0.3626.122',
494 '73.0.3683.60',
495 '74.0.3720.5',
496 '72.0.3626.121',
497 '74.0.3718.8',
498 '74.0.3720.4',
499 '74.0.3720.3',
500 '74.0.3718.7',
501 '74.0.3720.2',
502 '74.0.3720.1',
503 '74.0.3720.0',
504 '74.0.3718.6',
505 '74.0.3719.5',
506 '73.0.3683.59',
507 '74.0.3718.5',
508 '74.0.3718.4',
509 '74.0.3719.4',
510 '74.0.3719.3',
511 '74.0.3719.2',
512 '74.0.3719.1',
513 '73.0.3683.58',
514 '74.0.3719.0',
515 '73.0.3683.57',
516 '73.0.3683.56',
517 '74.0.3718.3',
518 '73.0.3683.55',
519 '74.0.3718.2',
520 '74.0.3718.1',
521 '74.0.3718.0',
522 '73.0.3683.54',
523 '74.0.3717.2',
524 '73.0.3683.53',
525 '74.0.3717.1',
526 '74.0.3717.0',
527 '73.0.3683.52',
528 '74.0.3716.1',
529 '74.0.3716.0',
530 '73.0.3683.51',
531 '74.0.3715.1',
532 '74.0.3715.0',
533 '73.0.3683.50',
534 '74.0.3711.2',
535 '74.0.3714.2',
536 '74.0.3713.3',
537 '74.0.3714.1',
538 '74.0.3714.0',
539 '73.0.3683.49',
540 '74.0.3713.1',
541 '74.0.3713.0',
542 '72.0.3626.120',
543 '73.0.3683.48',
544 '74.0.3712.2',
545 '74.0.3712.1',
546 '74.0.3712.0',
547 '73.0.3683.47',
548 '72.0.3626.119',
549 '73.0.3683.46',
550 '74.0.3710.2',
551 '72.0.3626.118',
552 '74.0.3711.1',
553 '74.0.3711.0',
554 '73.0.3683.45',
555 '72.0.3626.117',
556 '74.0.3710.1',
557 '74.0.3710.0',
558 '73.0.3683.44',
559 '72.0.3626.116',
560 '74.0.3709.1',
561 '74.0.3709.0',
562 '74.0.3704.9',
563 '73.0.3683.43',
564 '72.0.3626.115',
565 '74.0.3704.8',
566 '74.0.3704.7',
567 '74.0.3708.0',
568 '74.0.3706.7',
569 '74.0.3704.6',
570 '73.0.3683.42',
571 '72.0.3626.114',
572 '74.0.3706.6',
573 '72.0.3626.113',
574 '74.0.3704.5',
575 '74.0.3706.5',
576 '74.0.3706.4',
577 '74.0.3706.3',
578 '74.0.3706.2',
579 '74.0.3706.1',
580 '74.0.3706.0',
581 '73.0.3683.41',
582 '72.0.3626.112',
583 '74.0.3705.1',
584 '74.0.3705.0',
585 '73.0.3683.40',
586 '72.0.3626.111',
587 '73.0.3683.39',
588 '74.0.3704.4',
589 '73.0.3683.38',
590 '74.0.3704.3',
591 '74.0.3704.2',
592 '74.0.3704.1',
593 '74.0.3704.0',
594 '73.0.3683.37',
595 '72.0.3626.110',
596 '72.0.3626.109',
597 '74.0.3703.3',
598 '74.0.3703.2',
599 '73.0.3683.36',
600 '74.0.3703.1',
601 '74.0.3703.0',
602 '73.0.3683.35',
603 '72.0.3626.108',
604 '74.0.3702.2',
605 '74.0.3699.3',
606 '74.0.3702.1',
607 '74.0.3702.0',
608 '73.0.3683.34',
609 '72.0.3626.107',
610 '73.0.3683.33',
611 '74.0.3701.1',
612 '74.0.3701.0',
613 '73.0.3683.32',
614 '73.0.3683.31',
615 '72.0.3626.105',
616 '74.0.3700.1',
617 '74.0.3700.0',
618 '73.0.3683.29',
619 '72.0.3626.103',
620 '74.0.3699.2',
621 '74.0.3699.1',
622 '74.0.3699.0',
623 '73.0.3683.28',
624 '72.0.3626.102',
625 '73.0.3683.27',
626 '73.0.3683.26',
627 '74.0.3698.0',
628 '74.0.3696.2',
629 '72.0.3626.101',
630 '73.0.3683.25',
631 '74.0.3696.1',
632 '74.0.3696.0',
633 '74.0.3694.8',
634 '72.0.3626.100',
635 '74.0.3694.7',
636 '74.0.3694.6',
637 '74.0.3694.5',
638 '74.0.3694.4',
639 '72.0.3626.99',
640 '72.0.3626.98',
641 '74.0.3694.3',
642 '73.0.3683.24',
643 '72.0.3626.97',
644 '72.0.3626.96',
645 '72.0.3626.95',
646 '73.0.3683.23',
647 '72.0.3626.94',
648 '73.0.3683.22',
649 '73.0.3683.21',
650 '72.0.3626.93',
651 '74.0.3694.2',
652 '72.0.3626.92',
653 '74.0.3694.1',
654 '74.0.3694.0',
655 '74.0.3693.6',
656 '73.0.3683.20',
657 '72.0.3626.91',
658 '74.0.3693.5',
659 '74.0.3693.4',
660 '74.0.3693.3',
661 '74.0.3693.2',
662 '73.0.3683.19',
663 '74.0.3693.1',
664 '74.0.3693.0',
665 '73.0.3683.18',
666 '72.0.3626.90',
667 '74.0.3692.1',
668 '74.0.3692.0',
669 '73.0.3683.17',
670 '72.0.3626.89',
671 '74.0.3687.3',
672 '74.0.3691.1',
673 '74.0.3691.0',
674 '73.0.3683.16',
675 '72.0.3626.88',
676 '72.0.3626.87',
677 '73.0.3683.15',
678 '74.0.3690.1',
679 '74.0.3690.0',
680 '73.0.3683.14',
681 '72.0.3626.86',
682 '73.0.3683.13',
683 '73.0.3683.12',
684 '74.0.3689.1',
685 '74.0.3689.0',
686 '73.0.3683.11',
687 '72.0.3626.85',
688 '73.0.3683.10',
689 '72.0.3626.84',
690 '73.0.3683.9',
691 '74.0.3688.1',
692 '74.0.3688.0',
693 '73.0.3683.8',
694 '72.0.3626.83',
695 '74.0.3687.2',
696 '74.0.3687.1',
697 '74.0.3687.0',
698 '73.0.3683.7',
699 '72.0.3626.82',
700 '74.0.3686.4',
701 '72.0.3626.81',
702 '74.0.3686.3',
703 '74.0.3686.2',
704 '74.0.3686.1',
705 '74.0.3686.0',
706 '73.0.3683.6',
707 '72.0.3626.80',
708 '74.0.3685.1',
709 '74.0.3685.0',
710 '73.0.3683.5',
711 '72.0.3626.79',
712 '74.0.3684.1',
713 '74.0.3684.0',
714 '73.0.3683.4',
715 '72.0.3626.78',
716 '72.0.3626.77',
717 '73.0.3683.3',
718 '73.0.3683.2',
719 '72.0.3626.76',
720 '73.0.3683.1',
721 '73.0.3683.0',
722 '72.0.3626.75',
723 '71.0.3578.141',
724 '73.0.3682.1',
725 '73.0.3682.0',
726 '72.0.3626.74',
727 '71.0.3578.140',
728 '73.0.3681.4',
729 '73.0.3681.3',
730 '73.0.3681.2',
731 '73.0.3681.1',
732 '73.0.3681.0',
733 '72.0.3626.73',
734 '71.0.3578.139',
735 '72.0.3626.72',
736 '72.0.3626.71',
737 '73.0.3680.1',
738 '73.0.3680.0',
739 '72.0.3626.70',
740 '71.0.3578.138',
741 '73.0.3678.2',
742 '73.0.3679.1',
743 '73.0.3679.0',
744 '72.0.3626.69',
745 '71.0.3578.137',
746 '73.0.3678.1',
747 '73.0.3678.0',
748 '71.0.3578.136',
749 '73.0.3677.1',
750 '73.0.3677.0',
751 '72.0.3626.68',
752 '72.0.3626.67',
753 '71.0.3578.135',
754 '73.0.3676.1',
755 '73.0.3676.0',
756 '73.0.3674.2',
757 '72.0.3626.66',
758 '71.0.3578.134',
759 '73.0.3674.1',
760 '73.0.3674.0',
761 '72.0.3626.65',
762 '71.0.3578.133',
763 '73.0.3673.2',
764 '73.0.3673.1',
765 '73.0.3673.0',
766 '72.0.3626.64',
767 '71.0.3578.132',
768 '72.0.3626.63',
769 '72.0.3626.62',
770 '72.0.3626.61',
771 '72.0.3626.60',
772 '73.0.3672.1',
773 '73.0.3672.0',
774 '72.0.3626.59',
775 '71.0.3578.131',
776 '73.0.3671.3',
777 '73.0.3671.2',
778 '73.0.3671.1',
779 '73.0.3671.0',
780 '72.0.3626.58',
781 '71.0.3578.130',
782 '73.0.3670.1',
783 '73.0.3670.0',
784 '72.0.3626.57',
785 '71.0.3578.129',
786 '73.0.3669.1',
787 '73.0.3669.0',
788 '72.0.3626.56',
789 '71.0.3578.128',
790 '73.0.3668.2',
791 '73.0.3668.1',
792 '73.0.3668.0',
793 '72.0.3626.55',
794 '71.0.3578.127',
795 '73.0.3667.2',
796 '73.0.3667.1',
797 '73.0.3667.0',
798 '72.0.3626.54',
799 '71.0.3578.126',
800 '73.0.3666.1',
801 '73.0.3666.0',
802 '72.0.3626.53',
803 '71.0.3578.125',
804 '73.0.3665.4',
805 '73.0.3665.3',
806 '72.0.3626.52',
807 '73.0.3665.2',
808 '73.0.3664.4',
809 '73.0.3665.1',
810 '73.0.3665.0',
811 '72.0.3626.51',
812 '71.0.3578.124',
813 '72.0.3626.50',
814 '73.0.3664.3',
815 '73.0.3664.2',
816 '73.0.3664.1',
817 '73.0.3664.0',
818 '73.0.3663.2',
819 '72.0.3626.49',
820 '71.0.3578.123',
821 '73.0.3663.1',
822 '73.0.3663.0',
823 '72.0.3626.48',
824 '71.0.3578.122',
825 '73.0.3662.1',
826 '73.0.3662.0',
827 '72.0.3626.47',
828 '71.0.3578.121',
829 '73.0.3661.1',
830 '72.0.3626.46',
831 '73.0.3661.0',
832 '72.0.3626.45',
833 '71.0.3578.120',
834 '73.0.3660.2',
835 '73.0.3660.1',
836 '73.0.3660.0',
837 '72.0.3626.44',
838 '71.0.3578.119',
839 '73.0.3659.1',
840 '73.0.3659.0',
841 '72.0.3626.43',
842 '71.0.3578.118',
843 '73.0.3658.1',
844 '73.0.3658.0',
845 '72.0.3626.42',
846 '71.0.3578.117',
847 '73.0.3657.1',
848 '73.0.3657.0',
849 '72.0.3626.41',
850 '71.0.3578.116',
851 '73.0.3656.1',
852 '73.0.3656.0',
853 '72.0.3626.40',
854 '71.0.3578.115',
855 '73.0.3655.1',
856 '73.0.3655.0',
857 '72.0.3626.39',
858 '71.0.3578.114',
859 '73.0.3654.1',
860 '73.0.3654.0',
861 '72.0.3626.38',
862 '71.0.3578.113',
863 '73.0.3653.1',
864 '73.0.3653.0',
865 '72.0.3626.37',
866 '71.0.3578.112',
867 '73.0.3652.1',
868 '73.0.3652.0',
869 '72.0.3626.36',
870 '71.0.3578.111',
871 '73.0.3651.1',
872 '73.0.3651.0',
873 '72.0.3626.35',
874 '71.0.3578.110',
875 '73.0.3650.1',
876 '73.0.3650.0',
877 '72.0.3626.34',
878 '71.0.3578.109',
879 '73.0.3649.1',
880 '73.0.3649.0',
881 '72.0.3626.33',
882 '71.0.3578.108',
883 '73.0.3648.2',
884 '73.0.3648.1',
885 '73.0.3648.0',
886 '72.0.3626.32',
887 '71.0.3578.107',
888 '73.0.3647.2',
889 '73.0.3647.1',
890 '73.0.3647.0',
891 '72.0.3626.31',
892 '71.0.3578.106',
893 '73.0.3635.3',
894 '73.0.3646.2',
895 '73.0.3646.1',
896 '73.0.3646.0',
897 '72.0.3626.30',
898 '71.0.3578.105',
899 '72.0.3626.29',
900 '73.0.3645.2',
901 '73.0.3645.1',
902 '73.0.3645.0',
903 '72.0.3626.28',
904 '71.0.3578.104',
905 '72.0.3626.27',
906 '72.0.3626.26',
907 '72.0.3626.25',
908 '72.0.3626.24',
909 '73.0.3644.0',
910 '73.0.3643.2',
911 '72.0.3626.23',
912 '71.0.3578.103',
913 '73.0.3643.1',
914 '73.0.3643.0',
915 '72.0.3626.22',
916 '71.0.3578.102',
917 '73.0.3642.1',
918 '73.0.3642.0',
919 '72.0.3626.21',
920 '71.0.3578.101',
921 '73.0.3641.1',
922 '73.0.3641.0',
923 '72.0.3626.20',
924 '71.0.3578.100',
925 '72.0.3626.19',
926 '73.0.3640.1',
927 '73.0.3640.0',
928 '72.0.3626.18',
929 '73.0.3639.1',
930 '71.0.3578.99',
931 '73.0.3639.0',
932 '72.0.3626.17',
933 '73.0.3638.2',
934 '72.0.3626.16',
935 '73.0.3638.1',
936 '73.0.3638.0',
937 '72.0.3626.15',
938 '71.0.3578.98',
939 '73.0.3635.2',
940 '71.0.3578.97',
941 '73.0.3637.1',
942 '73.0.3637.0',
943 '72.0.3626.14',
944 '71.0.3578.96',
945 '71.0.3578.95',
946 '72.0.3626.13',
947 '71.0.3578.94',
948 '73.0.3636.2',
949 '71.0.3578.93',
950 '73.0.3636.1',
951 '73.0.3636.0',
952 '72.0.3626.12',
953 '71.0.3578.92',
954 '73.0.3635.1',
955 '73.0.3635.0',
956 '72.0.3626.11',
957 '71.0.3578.91',
958 '73.0.3634.2',
959 '73.0.3634.1',
960 '73.0.3634.0',
961 '72.0.3626.10',
962 '71.0.3578.90',
963 '71.0.3578.89',
964 '73.0.3633.2',
965 '73.0.3633.1',
966 '73.0.3633.0',
967 '72.0.3610.4',
968 '72.0.3626.9',
969 '71.0.3578.88',
970 '73.0.3632.5',
971 '73.0.3632.4',
972 '73.0.3632.3',
973 '73.0.3632.2',
974 '73.0.3632.1',
975 '73.0.3632.0',
976 '72.0.3626.8',
977 '71.0.3578.87',
978 '73.0.3631.2',
979 '73.0.3631.1',
980 '73.0.3631.0',
981 '72.0.3626.7',
982 '71.0.3578.86',
983 '72.0.3626.6',
984 '73.0.3630.1',
985 '73.0.3630.0',
986 '72.0.3626.5',
987 '71.0.3578.85',
988 '72.0.3626.4',
989 '73.0.3628.3',
990 '73.0.3628.2',
991 '73.0.3629.1',
992 '73.0.3629.0',
993 '72.0.3626.3',
994 '71.0.3578.84',
995 '73.0.3628.1',
996 '73.0.3628.0',
997 '71.0.3578.83',
998 '73.0.3627.1',
999 '73.0.3627.0',
1000 '72.0.3626.2',
1001 '71.0.3578.82',
1002 '71.0.3578.81',
1003 '71.0.3578.80',
1004 '72.0.3626.1',
1005 '72.0.3626.0',
1006 '71.0.3578.79',
1007 '70.0.3538.124',
1008 '71.0.3578.78',
1009 '72.0.3623.4',
1010 '72.0.3625.2',
1011 '72.0.3625.1',
1012 '72.0.3625.0',
1013 '71.0.3578.77',
1014 '70.0.3538.123',
1015 '72.0.3624.4',
1016 '72.0.3624.3',
1017 '72.0.3624.2',
1018 '71.0.3578.76',
1019 '72.0.3624.1',
1020 '72.0.3624.0',
1021 '72.0.3623.3',
1022 '71.0.3578.75',
1023 '70.0.3538.122',
1024 '71.0.3578.74',
1025 '72.0.3623.2',
1026 '72.0.3610.3',
1027 '72.0.3623.1',
1028 '72.0.3623.0',
1029 '72.0.3622.3',
1030 '72.0.3622.2',
1031 '71.0.3578.73',
1032 '70.0.3538.121',
1033 '72.0.3622.1',
1034 '72.0.3622.0',
1035 '71.0.3578.72',
1036 '70.0.3538.120',
1037 '72.0.3621.1',
1038 '72.0.3621.0',
1039 '71.0.3578.71',
1040 '70.0.3538.119',
1041 '72.0.3620.1',
1042 '72.0.3620.0',
1043 '71.0.3578.70',
1044 '70.0.3538.118',
1045 '71.0.3578.69',
1046 '72.0.3619.1',
1047 '72.0.3619.0',
1048 '71.0.3578.68',
1049 '70.0.3538.117',
1050 '71.0.3578.67',
1051 '72.0.3618.1',
1052 '72.0.3618.0',
1053 '71.0.3578.66',
1054 '70.0.3538.116',
1055 '72.0.3617.1',
1056 '72.0.3617.0',
1057 '71.0.3578.65',
1058 '70.0.3538.115',
1059 '72.0.3602.3',
1060 '71.0.3578.64',
1061 '72.0.3616.1',
1062 '72.0.3616.0',
1063 '71.0.3578.63',
1064 '70.0.3538.114',
1065 '71.0.3578.62',
1066 '72.0.3615.1',
1067 '72.0.3615.0',
1068 '71.0.3578.61',
1069 '70.0.3538.113',
1070 '72.0.3614.1',
1071 '72.0.3614.0',
1072 '71.0.3578.60',
1073 '70.0.3538.112',
1074 '72.0.3613.1',
1075 '72.0.3613.0',
1076 '71.0.3578.59',
1077 '70.0.3538.111',
1078 '72.0.3612.2',
1079 '72.0.3612.1',
1080 '72.0.3612.0',
1081 '70.0.3538.110',
1082 '71.0.3578.58',
1083 '70.0.3538.109',
1084 '72.0.3611.2',
1085 '72.0.3611.1',
1086 '72.0.3611.0',
1087 '71.0.3578.57',
1088 '70.0.3538.108',
1089 '72.0.3610.2',
1090 '71.0.3578.56',
1091 '71.0.3578.55',
1092 '72.0.3610.1',
1093 '72.0.3610.0',
1094 '71.0.3578.54',
1095 '70.0.3538.107',
1096 '71.0.3578.53',
1097 '72.0.3609.3',
1098 '71.0.3578.52',
1099 '72.0.3609.2',
1100 '71.0.3578.51',
1101 '72.0.3608.5',
1102 '72.0.3609.1',
1103 '72.0.3609.0',
1104 '71.0.3578.50',
1105 '70.0.3538.106',
1106 '72.0.3608.4',
1107 '72.0.3608.3',
1108 '72.0.3608.2',
1109 '71.0.3578.49',
1110 '72.0.3608.1',
1111 '72.0.3608.0',
1112 '70.0.3538.105',
1113 '71.0.3578.48',
1114 '72.0.3607.1',
1115 '72.0.3607.0',
1116 '71.0.3578.47',
1117 '70.0.3538.104',
1118 '72.0.3606.2',
1119 '72.0.3606.1',
1120 '72.0.3606.0',
1121 '71.0.3578.46',
1122 '70.0.3538.103',
1123 '70.0.3538.102',
1124 '72.0.3605.3',
1125 '72.0.3605.2',
1126 '72.0.3605.1',
1127 '72.0.3605.0',
1128 '71.0.3578.45',
1129 '70.0.3538.101',
1130 '71.0.3578.44',
1131 '71.0.3578.43',
1132 '70.0.3538.100',
1133 '70.0.3538.99',
1134 '71.0.3578.42',
1135 '72.0.3604.1',
1136 '72.0.3604.0',
1137 '71.0.3578.41',
1138 '70.0.3538.98',
1139 '71.0.3578.40',
1140 '72.0.3603.2',
1141 '72.0.3603.1',
1142 '72.0.3603.0',
1143 '71.0.3578.39',
1144 '70.0.3538.97',
1145 '72.0.3602.2',
1146 '71.0.3578.38',
1147 '71.0.3578.37',
1148 '72.0.3602.1',
1149 '72.0.3602.0',
1150 '71.0.3578.36',
1151 '70.0.3538.96',
1152 '72.0.3601.1',
1153 '72.0.3601.0',
1154 '71.0.3578.35',
1155 '70.0.3538.95',
1156 '72.0.3600.1',
1157 '72.0.3600.0',
1158 '71.0.3578.34',
1159 '70.0.3538.94',
1160 '72.0.3599.3',
1161 '72.0.3599.2',
1162 '72.0.3599.1',
1163 '72.0.3599.0',
1164 '71.0.3578.33',
1165 '70.0.3538.93',
1166 '72.0.3598.1',
1167 '72.0.3598.0',
1168 '71.0.3578.32',
1169 '70.0.3538.87',
1170 '72.0.3597.1',
1171 '72.0.3597.0',
1172 '72.0.3596.2',
1173 '71.0.3578.31',
1174 '70.0.3538.86',
1175 '71.0.3578.30',
1176 '71.0.3578.29',
1177 '72.0.3596.1',
1178 '72.0.3596.0',
1179 '71.0.3578.28',
1180 '70.0.3538.85',
1181 '72.0.3595.2',
1182 '72.0.3591.3',
1183 '72.0.3595.1',
1184 '72.0.3595.0',
1185 '71.0.3578.27',
1186 '70.0.3538.84',
1187 '72.0.3594.1',
1188 '72.0.3594.0',
1189 '71.0.3578.26',
1190 '70.0.3538.83',
1191 '72.0.3593.2',
1192 '72.0.3593.1',
1193 '72.0.3593.0',
1194 '71.0.3578.25',
1195 '70.0.3538.82',
1196 '72.0.3589.3',
1197 '72.0.3592.2',
1198 '72.0.3592.1',
1199 '72.0.3592.0',
1200 '71.0.3578.24',
1201 '72.0.3589.2',
1202 '70.0.3538.81',
1203 '70.0.3538.80',
1204 '72.0.3591.2',
1205 '72.0.3591.1',
1206 '72.0.3591.0',
1207 '71.0.3578.23',
1208 '70.0.3538.79',
1209 '71.0.3578.22',
1210 '72.0.3590.1',
1211 '72.0.3590.0',
1212 '71.0.3578.21',
1213 '70.0.3538.78',
1214 '70.0.3538.77',
1215 '72.0.3589.1',
1216 '72.0.3589.0',
1217 '71.0.3578.20',
1218 '70.0.3538.76',
1219 '71.0.3578.19',
1220 '70.0.3538.75',
1221 '72.0.3588.1',
1222 '72.0.3588.0',
1223 '71.0.3578.18',
1224 '70.0.3538.74',
1225 '72.0.3586.2',
1226 '72.0.3587.0',
1227 '71.0.3578.17',
1228 '70.0.3538.73',
1229 '72.0.3586.1',
1230 '72.0.3586.0',
1231 '71.0.3578.16',
1232 '70.0.3538.72',
1233 '72.0.3585.1',
1234 '72.0.3585.0',
1235 '71.0.3578.15',
1236 '70.0.3538.71',
1237 '71.0.3578.14',
1238 '72.0.3584.1',
1239 '72.0.3584.0',
1240 '71.0.3578.13',
1241 '70.0.3538.70',
1242 '72.0.3583.2',
1243 '71.0.3578.12',
1244 '72.0.3583.1',
1245 '72.0.3583.0',
1246 '71.0.3578.11',
1247 '70.0.3538.69',
1248 '71.0.3578.10',
1249 '72.0.3582.0',
1250 '72.0.3581.4',
1251 '71.0.3578.9',
1252 '70.0.3538.67',
1253 '72.0.3581.3',
1254 '72.0.3581.2',
1255 '72.0.3581.1',
1256 '72.0.3581.0',
1257 '71.0.3578.8',
1258 '70.0.3538.66',
1259 '72.0.3580.1',
1260 '72.0.3580.0',
1261 '71.0.3578.7',
1262 '70.0.3538.65',
1263 '71.0.3578.6',
1264 '72.0.3579.1',
1265 '72.0.3579.0',
1266 '71.0.3578.5',
1267 '70.0.3538.64',
1268 '71.0.3578.4',
1269 '71.0.3578.3',
1270 '71.0.3578.2',
1271 '71.0.3578.1',
1272 '71.0.3578.0',
1273 '70.0.3538.63',
1274 '69.0.3497.128',
1275 '70.0.3538.62',
1276 '70.0.3538.61',
1277 '70.0.3538.60',
1278 '70.0.3538.59',
1279 '71.0.3577.1',
1280 '71.0.3577.0',
1281 '70.0.3538.58',
1282 '69.0.3497.127',
1283 '71.0.3576.2',
1284 '71.0.3576.1',
1285 '71.0.3576.0',
1286 '70.0.3538.57',
1287 '70.0.3538.56',
1288 '71.0.3575.2',
1289 '70.0.3538.55',
1290 '69.0.3497.126',
1291 '70.0.3538.54',
1292 '71.0.3575.1',
1293 '71.0.3575.0',
1294 '71.0.3574.1',
1295 '71.0.3574.0',
1296 '70.0.3538.53',
1297 '69.0.3497.125',
1298 '70.0.3538.52',
1299 '71.0.3573.1',
1300 '71.0.3573.0',
1301 '70.0.3538.51',
1302 '69.0.3497.124',
1303 '71.0.3572.1',
1304 '71.0.3572.0',
1305 '70.0.3538.50',
1306 '69.0.3497.123',
1307 '71.0.3571.2',
1308 '70.0.3538.49',
1309 '69.0.3497.122',
1310 '71.0.3571.1',
1311 '71.0.3571.0',
1312 '70.0.3538.48',
1313 '69.0.3497.121',
1314 '71.0.3570.1',
1315 '71.0.3570.0',
1316 '70.0.3538.47',
1317 '69.0.3497.120',
1318 '71.0.3568.2',
1319 '71.0.3569.1',
1320 '71.0.3569.0',
1321 '70.0.3538.46',
1322 '69.0.3497.119',
1323 '70.0.3538.45',
1324 '71.0.3568.1',
1325 '71.0.3568.0',
1326 '70.0.3538.44',
1327 '69.0.3497.118',
1328 '70.0.3538.43',
1329 '70.0.3538.42',
1330 '71.0.3567.1',
1331 '71.0.3567.0',
1332 '70.0.3538.41',
1333 '69.0.3497.117',
1334 '71.0.3566.1',
1335 '71.0.3566.0',
1336 '70.0.3538.40',
1337 '69.0.3497.116',
1338 '71.0.3565.1',
1339 '71.0.3565.0',
1340 '70.0.3538.39',
1341 '69.0.3497.115',
1342 '71.0.3564.1',
1343 '71.0.3564.0',
1344 '70.0.3538.38',
1345 '69.0.3497.114',
1346 '71.0.3563.0',
1347 '71.0.3562.2',
1348 '70.0.3538.37',
1349 '69.0.3497.113',
1350 '70.0.3538.36',
1351 '70.0.3538.35',
1352 '71.0.3562.1',
1353 '71.0.3562.0',
1354 '70.0.3538.34',
1355 '69.0.3497.112',
1356 '70.0.3538.33',
1357 '71.0.3561.1',
1358 '71.0.3561.0',
1359 '70.0.3538.32',
1360 '69.0.3497.111',
1361 '71.0.3559.6',
1362 '71.0.3560.1',
1363 '71.0.3560.0',
1364 '71.0.3559.5',
1365 '71.0.3559.4',
1366 '70.0.3538.31',
1367 '69.0.3497.110',
1368 '71.0.3559.3',
1369 '70.0.3538.30',
1370 '69.0.3497.109',
1371 '71.0.3559.2',
1372 '71.0.3559.1',
1373 '71.0.3559.0',
1374 '70.0.3538.29',
1375 '69.0.3497.108',
1376 '71.0.3558.2',
1377 '71.0.3558.1',
1378 '71.0.3558.0',
1379 '70.0.3538.28',
1380 '69.0.3497.107',
1381 '71.0.3557.2',
1382 '71.0.3557.1',
1383 '71.0.3557.0',
1384 '70.0.3538.27',
1385 '69.0.3497.106',
1386 '71.0.3554.4',
1387 '70.0.3538.26',
1388 '71.0.3556.1',
1389 '71.0.3556.0',
1390 '70.0.3538.25',
1391 '71.0.3554.3',
1392 '69.0.3497.105',
1393 '71.0.3554.2',
1394 '70.0.3538.24',
1395 '69.0.3497.104',
1396 '71.0.3555.2',
1397 '70.0.3538.23',
1398 '71.0.3555.1',
1399 '71.0.3555.0',
1400 '70.0.3538.22',
1401 '69.0.3497.103',
1402 '71.0.3554.1',
1403 '71.0.3554.0',
1404 '70.0.3538.21',
1405 '69.0.3497.102',
1406 '71.0.3553.3',
1407 '70.0.3538.20',
1408 '69.0.3497.101',
1409 '71.0.3553.2',
1410 '69.0.3497.100',
1411 '71.0.3553.1',
1412 '71.0.3553.0',
1413 '70.0.3538.19',
1414 '69.0.3497.99',
1415 '69.0.3497.98',
1416 '69.0.3497.97',
1417 '71.0.3552.6',
1418 '71.0.3552.5',
1419 '71.0.3552.4',
1420 '71.0.3552.3',
1421 '71.0.3552.2',
1422 '71.0.3552.1',
1423 '71.0.3552.0',
1424 '70.0.3538.18',
1425 '69.0.3497.96',
1426 '71.0.3551.3',
1427 '71.0.3551.2',
1428 '71.0.3551.1',
1429 '71.0.3551.0',
1430 '70.0.3538.17',
1431 '69.0.3497.95',
1432 '71.0.3550.3',
1433 '71.0.3550.2',
1434 '71.0.3550.1',
1435 '71.0.3550.0',
1436 '70.0.3538.16',
1437 '69.0.3497.94',
1438 '71.0.3549.1',
1439 '71.0.3549.0',
1440 '70.0.3538.15',
1441 '69.0.3497.93',
1442 '69.0.3497.92',
1443 '71.0.3548.1',
1444 '71.0.3548.0',
1445 '70.0.3538.14',
1446 '69.0.3497.91',
1447 '71.0.3547.1',
1448 '71.0.3547.0',
1449 '70.0.3538.13',
1450 '69.0.3497.90',
1451 '71.0.3546.2',
1452 '69.0.3497.89',
1453 '71.0.3546.1',
1454 '71.0.3546.0',
1455 '70.0.3538.12',
1456 '69.0.3497.88',
1457 '71.0.3545.4',
1458 '71.0.3545.3',
1459 '71.0.3545.2',
1460 '71.0.3545.1',
1461 '71.0.3545.0',
1462 '70.0.3538.11',
1463 '69.0.3497.87',
1464 '71.0.3544.5',
1465 '71.0.3544.4',
1466 '71.0.3544.3',
1467 '71.0.3544.2',
1468 '71.0.3544.1',
1469 '71.0.3544.0',
1470 '69.0.3497.86',
1471 '70.0.3538.10',
1472 '69.0.3497.85',
1473 '70.0.3538.9',
1474 '69.0.3497.84',
1475 '71.0.3543.4',
1476 '70.0.3538.8',
1477 '71.0.3543.3',
1478 '71.0.3543.2',
1479 '71.0.3543.1',
1480 '71.0.3543.0',
1481 '70.0.3538.7',
1482 '69.0.3497.83',
1483 '71.0.3542.2',
1484 '71.0.3542.1',
1485 '71.0.3542.0',
1486 '70.0.3538.6',
1487 '69.0.3497.82',
1488 '69.0.3497.81',
1489 '71.0.3541.1',
1490 '71.0.3541.0',
1491 '70.0.3538.5',
1492 '69.0.3497.80',
1493 '71.0.3540.1',
1494 '71.0.3540.0',
1495 '70.0.3538.4',
1496 '69.0.3497.79',
1497 '70.0.3538.3',
1498 '71.0.3539.1',
1499 '71.0.3539.0',
1500 '69.0.3497.78',
1501 '68.0.3440.134',
1502 '69.0.3497.77',
1503 '70.0.3538.2',
1504 '70.0.3538.1',
1505 '70.0.3538.0',
1506 '69.0.3497.76',
1507 '68.0.3440.133',
1508 '69.0.3497.75',
1509 '70.0.3537.2',
1510 '70.0.3537.1',
1511 '70.0.3537.0',
1512 '69.0.3497.74',
1513 '68.0.3440.132',
1514 '70.0.3536.0',
1515 '70.0.3535.5',
1516 '70.0.3535.4',
1517 '70.0.3535.3',
1518 '69.0.3497.73',
1519 '68.0.3440.131',
1520 '70.0.3532.8',
1521 '70.0.3532.7',
1522 '69.0.3497.72',
1523 '69.0.3497.71',
1524 '70.0.3535.2',
1525 '70.0.3535.1',
1526 '70.0.3535.0',
1527 '69.0.3497.70',
1528 '68.0.3440.130',
1529 '69.0.3497.69',
1530 '68.0.3440.129',
1531 '70.0.3534.4',
1532 '70.0.3534.3',
1533 '70.0.3534.2',
1534 '70.0.3534.1',
1535 '70.0.3534.0',
1536 '69.0.3497.68',
1537 '68.0.3440.128',
1538 '70.0.3533.2',
1539 '70.0.3533.1',
1540 '70.0.3533.0',
1541 '69.0.3497.67',
1542 '68.0.3440.127',
1543 '70.0.3532.6',
1544 '70.0.3532.5',
1545 '70.0.3532.4',
1546 '69.0.3497.66',
1547 '68.0.3440.126',
1548 '70.0.3532.3',
1549 '70.0.3532.2',
1550 '70.0.3532.1',
1551 '69.0.3497.60',
1552 '69.0.3497.65',
1553 '69.0.3497.64',
1554 '70.0.3532.0',
1555 '70.0.3531.0',
1556 '70.0.3530.4',
1557 '70.0.3530.3',
1558 '70.0.3530.2',
1559 '69.0.3497.58',
1560 '68.0.3440.125',
1561 '69.0.3497.57',
1562 '69.0.3497.56',
1563 '69.0.3497.55',
1564 '69.0.3497.54',
1565 '70.0.3530.1',
1566 '70.0.3530.0',
1567 '69.0.3497.53',
1568 '68.0.3440.124',
1569 '69.0.3497.52',
1570 '70.0.3529.3',
1571 '70.0.3529.2',
1572 '70.0.3529.1',
1573 '70.0.3529.0',
1574 '69.0.3497.51',
1575 '70.0.3528.4',
1576 '68.0.3440.123',
1577 '70.0.3528.3',
1578 '70.0.3528.2',
1579 '70.0.3528.1',
1580 '70.0.3528.0',
1581 '69.0.3497.50',
1582 '68.0.3440.122',
1583 '70.0.3527.1',
1584 '70.0.3527.0',
1585 '69.0.3497.49',
1586 '68.0.3440.121',
1587 '70.0.3526.1',
1588 '70.0.3526.0',
1589 '68.0.3440.120',
1590 '69.0.3497.48',
1591 '69.0.3497.47',
1592 '68.0.3440.119',
1593 '68.0.3440.118',
1594 '70.0.3525.5',
1595 '70.0.3525.4',
1596 '70.0.3525.3',
1597 '68.0.3440.117',
1598 '69.0.3497.46',
1599 '70.0.3525.2',
1600 '70.0.3525.1',
1601 '70.0.3525.0',
1602 '69.0.3497.45',
1603 '68.0.3440.116',
1604 '70.0.3524.4',
1605 '70.0.3524.3',
1606 '69.0.3497.44',
1607 '70.0.3524.2',
1608 '70.0.3524.1',
1609 '70.0.3524.0',
1610 '70.0.3523.2',
1611 '69.0.3497.43',
1612 '68.0.3440.115',
1613 '70.0.3505.9',
1614 '69.0.3497.42',
1615 '70.0.3505.8',
1616 '70.0.3523.1',
1617 '70.0.3523.0',
1618 '69.0.3497.41',
1619 '68.0.3440.114',
1620 '70.0.3505.7',
1621 '69.0.3497.40',
1622 '70.0.3522.1',
1623 '70.0.3522.0',
1624 '70.0.3521.2',
1625 '69.0.3497.39',
1626 '68.0.3440.113',
1627 '70.0.3505.6',
1628 '70.0.3521.1',
1629 '70.0.3521.0',
1630 '69.0.3497.38',
1631 '68.0.3440.112',
1632 '70.0.3520.1',
1633 '70.0.3520.0',
1634 '69.0.3497.37',
1635 '68.0.3440.111',
1636 '70.0.3519.3',
1637 '70.0.3519.2',
1638 '70.0.3519.1',
1639 '70.0.3519.0',
1640 '69.0.3497.36',
1641 '68.0.3440.110',
1642 '70.0.3518.1',
1643 '70.0.3518.0',
1644 '69.0.3497.35',
1645 '69.0.3497.34',
1646 '68.0.3440.109',
1647 '70.0.3517.1',
1648 '70.0.3517.0',
1649 '69.0.3497.33',
1650 '68.0.3440.108',
1651 '69.0.3497.32',
1652 '70.0.3516.3',
1653 '70.0.3516.2',
1654 '70.0.3516.1',
1655 '70.0.3516.0',
1656 '69.0.3497.31',
1657 '68.0.3440.107',
1658 '70.0.3515.4',
1659 '68.0.3440.106',
1660 '70.0.3515.3',
1661 '70.0.3515.2',
1662 '70.0.3515.1',
1663 '70.0.3515.0',
1664 '69.0.3497.30',
1665 '68.0.3440.105',
1666 '68.0.3440.104',
1667 '70.0.3514.2',
1668 '70.0.3514.1',
1669 '70.0.3514.0',
1670 '69.0.3497.29',
1671 '68.0.3440.103',
1672 '70.0.3513.1',
1673 '70.0.3513.0',
1674 '69.0.3497.28',
1675 )
1676 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1677
1678
1679 std_headers = {
1680 'User-Agent': random_user_agent(),
1681 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1682 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1683 'Accept-Encoding': 'gzip, deflate',
1684 'Accept-Language': 'en-us,en;q=0.5',
1685 }
1686
1687
1688 USER_AGENTS = {
1689 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1690 }
1691
1692
1693 NO_DEFAULT = object()
1694
1695 ENGLISH_MONTH_NAMES = [
1696 'January', 'February', 'March', 'April', 'May', 'June',
1697 'July', 'August', 'September', 'October', 'November', 'December']
1698
1699 MONTH_NAMES = {
1700 'en': ENGLISH_MONTH_NAMES,
1701 'fr': [
1702 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1703 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1704 }
1705
1706 KNOWN_EXTENSIONS = (
1707 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1708 'flv', 'f4v', 'f4a', 'f4b',
1709 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1710 'mkv', 'mka', 'mk3d',
1711 'avi', 'divx',
1712 'mov',
1713 'asf', 'wmv', 'wma',
1714 '3gp', '3g2',
1715 'mp3',
1716 'flac',
1717 'ape',
1718 'wav',
1719 'f4f', 'f4m', 'm3u8', 'smil')
1720
1721 # needed for sanitizing filenames in restricted mode
1722 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1723 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1724 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1725
1726 DATE_FORMATS = (
1727 '%d %B %Y',
1728 '%d %b %Y',
1729 '%B %d %Y',
1730 '%B %dst %Y',
1731 '%B %dnd %Y',
1732 '%B %drd %Y',
1733 '%B %dth %Y',
1734 '%b %d %Y',
1735 '%b %dst %Y',
1736 '%b %dnd %Y',
1737 '%b %drd %Y',
1738 '%b %dth %Y',
1739 '%b %dst %Y %I:%M',
1740 '%b %dnd %Y %I:%M',
1741 '%b %drd %Y %I:%M',
1742 '%b %dth %Y %I:%M',
1743 '%Y %m %d',
1744 '%Y-%m-%d',
1745 '%Y.%m.%d.',
1746 '%Y/%m/%d',
1747 '%Y/%m/%d %H:%M',
1748 '%Y/%m/%d %H:%M:%S',
1749 '%Y%m%d%H%M',
1750 '%Y%m%d%H%M%S',
1751 '%Y-%m-%d %H:%M',
1752 '%Y-%m-%d %H:%M:%S',
1753 '%Y-%m-%d %H:%M:%S.%f',
1754 '%Y-%m-%d %H:%M:%S:%f',
1755 '%d.%m.%Y %H:%M',
1756 '%d.%m.%Y %H.%M',
1757 '%Y-%m-%dT%H:%M:%SZ',
1758 '%Y-%m-%dT%H:%M:%S.%fZ',
1759 '%Y-%m-%dT%H:%M:%S.%f0Z',
1760 '%Y-%m-%dT%H:%M:%S',
1761 '%Y-%m-%dT%H:%M:%S.%f',
1762 '%Y-%m-%dT%H:%M',
1763 '%b %d %Y at %H:%M',
1764 '%b %d %Y at %H:%M:%S',
1765 '%B %d %Y at %H:%M',
1766 '%B %d %Y at %H:%M:%S',
1767 '%H:%M %d-%b-%Y',
1768 )
1769
1770 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1771 DATE_FORMATS_DAY_FIRST.extend([
1772 '%d-%m-%Y',
1773 '%d.%m.%Y',
1774 '%d.%m.%y',
1775 '%d/%m/%Y',
1776 '%d/%m/%y',
1777 '%d/%m/%Y %H:%M:%S',
1778 ])
1779
1780 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1781 DATE_FORMATS_MONTH_FIRST.extend([
1782 '%m-%d-%Y',
1783 '%m.%d.%Y',
1784 '%m/%d/%Y',
1785 '%m/%d/%y',
1786 '%m/%d/%Y %H:%M:%S',
1787 ])
1788
1789 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1790 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1791
1792
1793 def preferredencoding():
1794 """Get preferred encoding.
1795
1796 Returns the best encoding scheme for the system, based on
1797 locale.getpreferredencoding() and some further tweaks.
1798 """
1799 try:
1800 pref = locale.getpreferredencoding()
1801 'TEST'.encode(pref)
1802 except Exception:
1803 pref = 'UTF-8'
1804
1805 return pref
1806
1807
1808 def write_json_file(obj, fn):
1809 """ Encode obj as JSON and write it to fn, atomically if possible """
1810
1811 fn = encodeFilename(fn)
1812 if sys.version_info < (3, 0) and sys.platform != 'win32':
1813 encoding = get_filesystem_encoding()
1814 # os.path.basename returns a bytes object, but NamedTemporaryFile
1815 # will fail if the filename contains non ascii characters unless we
1816 # use a unicode object
1817 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1818 # the same for os.path.dirname
1819 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1820 else:
1821 path_basename = os.path.basename
1822 path_dirname = os.path.dirname
1823
1824 args = {
1825 'suffix': '.tmp',
1826 'prefix': path_basename(fn) + '.',
1827 'dir': path_dirname(fn),
1828 'delete': False,
1829 }
1830
1831 # In Python 2.x, json.dump expects a bytestream.
1832 # In Python 3.x, it writes to a character stream
1833 if sys.version_info < (3, 0):
1834 args['mode'] = 'wb'
1835 else:
1836 args.update({
1837 'mode': 'w',
1838 'encoding': 'utf-8',
1839 })
1840
1841 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1842
1843 try:
1844 with tf:
1845 json.dump(obj, tf)
1846 if sys.platform == 'win32':
1847 # Need to remove existing file on Windows, else os.rename raises
1848 # WindowsError or FileExistsError.
1849 try:
1850 os.unlink(fn)
1851 except OSError:
1852 pass
1853 try:
1854 mask = os.umask(0)
1855 os.umask(mask)
1856 os.chmod(tf.name, 0o666 & ~mask)
1857 except OSError:
1858 pass
1859 os.rename(tf.name, fn)
1860 except Exception:
1861 try:
1862 os.remove(tf.name)
1863 except OSError:
1864 pass
1865 raise
1866
1867
1868 if sys.version_info >= (2, 7):
1869 def find_xpath_attr(node, xpath, key, val=None):
1870 """ Find the xpath xpath[@key=val] """
1871 assert re.match(r'^[a-zA-Z_-]+$', key)
1872 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1873 return node.find(expr)
1874 else:
1875 def find_xpath_attr(node, xpath, key, val=None):
1876 for f in node.findall(compat_xpath(xpath)):
1877 if key not in f.attrib:
1878 continue
1879 if val is None or f.attrib.get(key) == val:
1880 return f
1881 return None
1882
1883 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1884 # the namespace parameter
1885
1886
1887 def xpath_with_ns(path, ns_map):
1888 components = [c.split(':') for c in path.split('/')]
1889 replaced = []
1890 for c in components:
1891 if len(c) == 1:
1892 replaced.append(c[0])
1893 else:
1894 ns, tag = c
1895 replaced.append('{%s}%s' % (ns_map[ns], tag))
1896 return '/'.join(replaced)
1897
1898
1899 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1900 def _find_xpath(xpath):
1901 return node.find(compat_xpath(xpath))
1902
1903 if isinstance(xpath, (str, compat_str)):
1904 n = _find_xpath(xpath)
1905 else:
1906 for xp in xpath:
1907 n = _find_xpath(xp)
1908 if n is not None:
1909 break
1910
1911 if n is None:
1912 if default is not NO_DEFAULT:
1913 return default
1914 elif fatal:
1915 name = xpath if name is None else name
1916 raise ExtractorError('Could not find XML element %s' % name)
1917 else:
1918 return None
1919 return n
1920
1921
1922 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1923 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1924 if n is None or n == default:
1925 return n
1926 if n.text is None:
1927 if default is not NO_DEFAULT:
1928 return default
1929 elif fatal:
1930 name = xpath if name is None else name
1931 raise ExtractorError('Could not find XML element\'s text %s' % name)
1932 else:
1933 return None
1934 return n.text
1935
1936
1937 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1938 n = find_xpath_attr(node, xpath, key)
1939 if n is None:
1940 if default is not NO_DEFAULT:
1941 return default
1942 elif fatal:
1943 name = '%s[@%s]' % (xpath, key) if name is None else name
1944 raise ExtractorError('Could not find XML attribute %s' % name)
1945 else:
1946 return None
1947 return n.attrib[key]
1948
1949
1950 def get_element_by_id(id, html):
1951 """Return the content of the tag with the specified ID in the passed HTML document"""
1952 return get_element_by_attribute('id', id, html)
1953
1954
1955 def get_element_by_class(class_name, html):
1956 """Return the content of the first tag with the specified class in the passed HTML document"""
1957 retval = get_elements_by_class(class_name, html)
1958 return retval[0] if retval else None
1959
1960
1961 def get_element_by_attribute(attribute, value, html, escape_value=True):
1962 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1963 return retval[0] if retval else None
1964
1965
1966 def get_elements_by_class(class_name, html):
1967 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1968 return get_elements_by_attribute(
1969 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1970 html, escape_value=False)
1971
1972
1973 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1974 """Return the content of the tag with the specified attribute in the passed HTML document"""
1975
1976 value = re.escape(value) if escape_value else value
1977
1978 retlist = []
1979 for m in re.finditer(r'''(?xs)
1980 <([a-zA-Z0-9:._-]+)
1981 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1982 \s+%s=['"]?%s['"]?
1983 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1984 \s*>
1985 (?P<content>.*?)
1986 </\1>
1987 ''' % (re.escape(attribute), value), html):
1988 res = m.group('content')
1989
1990 if res.startswith('"') or res.startswith("'"):
1991 res = res[1:-1]
1992
1993 retlist.append(unescapeHTML(res))
1994
1995 return retlist
1996
1997
1998 class HTMLAttributeParser(compat_HTMLParser):
1999 """Trivial HTML parser to gather the attributes for a single element"""
2000
2001 def __init__(self):
2002 self.attrs = {}
2003 compat_HTMLParser.__init__(self)
2004
2005 def handle_starttag(self, tag, attrs):
2006 self.attrs = dict(attrs)
2007
2008
2009 def extract_attributes(html_element):
2010 """Given a string for an HTML element such as
2011 <el
2012 a="foo" B="bar" c="&98;az" d=boz
2013 empty= noval entity="&amp;"
2014 sq='"' dq="'"
2015 >
2016 Decode and return a dictionary of attributes.
2017 {
2018 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2019 'empty': '', 'noval': None, 'entity': '&',
2020 'sq': '"', 'dq': '\''
2021 }.
2022 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2023 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2024 """
2025 parser = HTMLAttributeParser()
2026 try:
2027 parser.feed(html_element)
2028 parser.close()
2029 # Older Python may throw HTMLParseError in case of malformed HTML
2030 except compat_HTMLParseError:
2031 pass
2032 return parser.attrs
2033
2034
2035 def clean_html(html):
2036 """Clean an HTML snippet into a readable string"""
2037
2038 if html is None: # Convenience for sanitizing descriptions etc.
2039 return html
2040
2041 # Newline vs <br />
2042 html = html.replace('\n', ' ')
2043 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2044 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2045 # Strip html tags
2046 html = re.sub('<.*?>', '', html)
2047 # Replace html entities
2048 html = unescapeHTML(html)
2049 return html.strip()
2050
2051
2052 def sanitize_open(filename, open_mode):
2053 """Try to open the given filename, and slightly tweak it if this fails.
2054
2055 Attempts to open the given filename. If this fails, it tries to change
2056 the filename slightly, step by step, until it's either able to open it
2057 or it fails and raises a final exception, like the standard open()
2058 function.
2059
2060 It returns the tuple (stream, definitive_file_name).
2061 """
2062 try:
2063 if filename == '-':
2064 if sys.platform == 'win32':
2065 import msvcrt
2066 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2067 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2068 stream = open(encodeFilename(filename), open_mode)
2069 return (stream, filename)
2070 except (IOError, OSError) as err:
2071 if err.errno in (errno.EACCES,):
2072 raise
2073
2074 # In case of error, try to remove win32 forbidden chars
2075 alt_filename = sanitize_path(filename)
2076 if alt_filename == filename:
2077 raise
2078 else:
2079 # An exception here should be caught in the caller
2080 stream = open(encodeFilename(alt_filename), open_mode)
2081 return (stream, alt_filename)
2082
2083
2084 def timeconvert(timestr):
2085 """Convert RFC 2822 defined time string into system timestamp"""
2086 timestamp = None
2087 timetuple = email.utils.parsedate_tz(timestr)
2088 if timetuple is not None:
2089 timestamp = email.utils.mktime_tz(timetuple)
2090 return timestamp
2091
2092
2093 def sanitize_filename(s, restricted=False, is_id=False):
2094 """Sanitizes a string so it could be used as part of a filename.
2095 If restricted is set, use a stricter subset of allowed characters.
2096 Set is_id if this is not an arbitrary string, but an ID that should be kept
2097 if possible.
2098 """
2099 def replace_insane(char):
2100 if restricted and char in ACCENT_CHARS:
2101 return ACCENT_CHARS[char]
2102 elif not restricted and char == '\n':
2103 return ' '
2104 elif char == '?' or ord(char) < 32 or ord(char) == 127:
2105 return ''
2106 elif char == '"':
2107 return '' if restricted else '\''
2108 elif char == ':':
2109 return '_-' if restricted else ' -'
2110 elif char in '\\/|*<>':
2111 return '_'
2112 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2113 return '_'
2114 if restricted and ord(char) > 127:
2115 return '_'
2116 return char
2117
2118 if s == '':
2119 return ''
2120 # Handle timestamps
2121 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2122 result = ''.join(map(replace_insane, s))
2123 if not is_id:
2124 while '__' in result:
2125 result = result.replace('__', '_')
2126 result = result.strip('_')
2127 # Common case of "Foreign band name - English song title"
2128 if restricted and result.startswith('-_'):
2129 result = result[2:]
2130 if result.startswith('-'):
2131 result = '_' + result[len('-'):]
2132 result = result.lstrip('.')
2133 if not result:
2134 result = '_'
2135 return result
2136
2137
2138 def sanitize_path(s, force=False):
2139 """Sanitizes and normalizes path on Windows"""
2140 if sys.platform == 'win32':
2141 force = False
2142 drive_or_unc, _ = os.path.splitdrive(s)
2143 if sys.version_info < (2, 7) and not drive_or_unc:
2144 drive_or_unc, _ = os.path.splitunc(s)
2145 elif force:
2146 drive_or_unc = ''
2147 else:
2148 return s
2149
2150 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2151 if drive_or_unc:
2152 norm_path.pop(0)
2153 sanitized_path = [
2154 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2155 for path_part in norm_path]
2156 if drive_or_unc:
2157 sanitized_path.insert(0, drive_or_unc + os.path.sep)
2158 elif force and s[0] == os.path.sep:
2159 sanitized_path.insert(0, os.path.sep)
2160 return os.path.join(*sanitized_path)
2161
2162
2163 def sanitize_url(url):
2164 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2165 # the number of unwanted failures due to missing protocol
2166 if url.startswith('//'):
2167 return 'http:%s' % url
2168 # Fix some common typos seen so far
2169 COMMON_TYPOS = (
2170 # https://github.com/ytdl-org/youtube-dl/issues/15649
2171 (r'^httpss://', r'https://'),
2172 # https://bx1.be/lives/direct-tv/
2173 (r'^rmtp([es]?)://', r'rtmp\1://'),
2174 )
2175 for mistake, fixup in COMMON_TYPOS:
2176 if re.match(mistake, url):
2177 return re.sub(mistake, fixup, url)
2178 return url
2179
2180
2181 def extract_basic_auth(url):
2182 parts = compat_urlparse.urlsplit(url)
2183 if parts.username is None:
2184 return url, None
2185 url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2186 parts.hostname if parts.port is None
2187 else '%s:%d' % (parts.hostname, parts.port))))
2188 auth_payload = base64.b64encode(
2189 ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2190 return url, 'Basic ' + auth_payload.decode('utf-8')
2191
2192
2193 def sanitized_Request(url, *args, **kwargs):
2194 url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
2195 if auth_header is not None:
2196 headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2197 headers['Authorization'] = auth_header
2198 return compat_urllib_request.Request(url, *args, **kwargs)
2199
2200
2201 def expand_path(s):
2202 """Expand shell variables and ~"""
2203 return os.path.expandvars(compat_expanduser(s))
2204
2205
2206 def orderedSet(iterable):
2207 """ Remove all duplicates from the input iterable """
2208 res = []
2209 for el in iterable:
2210 if el not in res:
2211 res.append(el)
2212 return res
2213
2214
2215 def _htmlentity_transform(entity_with_semicolon):
2216 """Transforms an HTML entity to a character."""
2217 entity = entity_with_semicolon[:-1]
2218
2219 # Known non-numeric HTML entity
2220 if entity in compat_html_entities.name2codepoint:
2221 return compat_chr(compat_html_entities.name2codepoint[entity])
2222
2223 # TODO: HTML5 allows entities without a semicolon. For example,
2224 # '&Eacuteric' should be decoded as 'Éric'.
2225 if entity_with_semicolon in compat_html_entities_html5:
2226 return compat_html_entities_html5[entity_with_semicolon]
2227
2228 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2229 if mobj is not None:
2230 numstr = mobj.group(1)
2231 if numstr.startswith('x'):
2232 base = 16
2233 numstr = '0%s' % numstr
2234 else:
2235 base = 10
2236 # See https://github.com/ytdl-org/youtube-dl/issues/7518
2237 try:
2238 return compat_chr(int(numstr, base))
2239 except ValueError:
2240 pass
2241
2242 # Unknown entity in name, return its literal representation
2243 return '&%s;' % entity
2244
2245
2246 def unescapeHTML(s):
2247 if s is None:
2248 return None
2249 assert type(s) == compat_str
2250
2251 return re.sub(
2252 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2253
2254
2255 def escapeHTML(text):
2256 return (
2257 text
2258 .replace('&', '&amp;')
2259 .replace('<', '&lt;')
2260 .replace('>', '&gt;')
2261 .replace('"', '&quot;')
2262 .replace("'", '&#39;')
2263 )
2264
2265
2266 def process_communicate_or_kill(p, *args, **kwargs):
2267 try:
2268 return p.communicate(*args, **kwargs)
2269 except BaseException: # Including KeyboardInterrupt
2270 p.kill()
2271 p.wait()
2272 raise
2273
2274
2275 class Popen(subprocess.Popen):
2276 if sys.platform == 'win32':
2277 _startupinfo = subprocess.STARTUPINFO()
2278 _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
2279 else:
2280 _startupinfo = None
2281
2282 def __init__(self, *args, **kwargs):
2283 super(Popen, self).__init__(*args, **kwargs, startupinfo=self._startupinfo)
2284
2285 def communicate_or_kill(self, *args, **kwargs):
2286 return process_communicate_or_kill(self, *args, **kwargs)
2287
2288
2289 def get_subprocess_encoding():
2290 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2291 # For subprocess calls, encode with locale encoding
2292 # Refer to http://stackoverflow.com/a/9951851/35070
2293 encoding = preferredencoding()
2294 else:
2295 encoding = sys.getfilesystemencoding()
2296 if encoding is None:
2297 encoding = 'utf-8'
2298 return encoding
2299
2300
2301 def encodeFilename(s, for_subprocess=False):
2302 """
2303 @param s The name of the file
2304 """
2305
2306 assert type(s) == compat_str
2307
2308 # Python 3 has a Unicode API
2309 if sys.version_info >= (3, 0):
2310 return s
2311
2312 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2313 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2314 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2315 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2316 return s
2317
2318 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2319 if sys.platform.startswith('java'):
2320 return s
2321
2322 return s.encode(get_subprocess_encoding(), 'ignore')
2323
2324
2325 def decodeFilename(b, for_subprocess=False):
2326
2327 if sys.version_info >= (3, 0):
2328 return b
2329
2330 if not isinstance(b, bytes):
2331 return b
2332
2333 return b.decode(get_subprocess_encoding(), 'ignore')
2334
2335
2336 def encodeArgument(s):
2337 if not isinstance(s, compat_str):
2338 # Legacy code that uses byte strings
2339 # Uncomment the following line after fixing all post processors
2340 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2341 s = s.decode('ascii')
2342 return encodeFilename(s, True)
2343
2344
2345 def decodeArgument(b):
2346 return decodeFilename(b, True)
2347
2348
2349 def decodeOption(optval):
2350 if optval is None:
2351 return optval
2352 if isinstance(optval, bytes):
2353 optval = optval.decode(preferredencoding())
2354
2355 assert isinstance(optval, compat_str)
2356 return optval
2357
2358
2359 _timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
2360
2361
2362 def timetuple_from_msec(msec):
2363 secs, msec = divmod(msec, 1000)
2364 mins, secs = divmod(secs, 60)
2365 hrs, mins = divmod(mins, 60)
2366 return _timetuple(hrs, mins, secs, msec)
2367
2368
2369 def formatSeconds(secs, delim=':', msec=False):
2370 time = timetuple_from_msec(secs * 1000)
2371 if time.hours:
2372 ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
2373 elif time.minutes:
2374 ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
2375 else:
2376 ret = '%d' % time.seconds
2377 return '%s.%03d' % (ret, time.milliseconds) if msec else ret
2378
2379
2380 def _ssl_load_windows_store_certs(ssl_context, storename):
2381 # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
2382 try:
2383 certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
2384 if encoding == 'x509_asn' and (
2385 trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
2386 except PermissionError:
2387 return
2388 for cert in certs:
2389 try:
2390 ssl_context.load_verify_locations(cadata=cert)
2391 except ssl.SSLError:
2392 pass
2393
2394
2395 def make_HTTPS_handler(params, **kwargs):
2396 opts_check_certificate = not params.get('nocheckcertificate')
2397 context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
2398 context.check_hostname = opts_check_certificate
2399 context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
2400 if opts_check_certificate:
2401 try:
2402 context.load_default_certs()
2403 # Work around the issue in load_default_certs when there are bad certificates. See:
2404 # https://github.com/yt-dlp/yt-dlp/issues/1060,
2405 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
2406 except ssl.SSLError:
2407 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
2408 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
2409 # Create a new context to discard any certificates that were already loaded
2410 context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
2411 context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED
2412 for storename in ('CA', 'ROOT'):
2413 _ssl_load_windows_store_certs(context, storename)
2414 context.set_default_verify_paths()
2415 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2416
2417
2418 def bug_reports_message(before=';'):
2419 if ytdl_is_updateable():
2420 update_cmd = 'type yt-dlp -U to update'
2421 else:
2422 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
2423 msg = 'please report this issue on https://github.com/yt-dlp/yt-dlp .'
2424 msg += ' Make sure you are using the latest version; %s.' % update_cmd
2425 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
2426
2427 before = before.rstrip()
2428 if not before or before.endswith(('.', '!', '?')):
2429 msg = msg[0].title() + msg[1:]
2430
2431 return (before + ' ' if before else '') + msg
2432
2433
2434 class YoutubeDLError(Exception):
2435 """Base exception for YoutubeDL errors."""
2436 pass
2437
2438
2439 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2440 if hasattr(ssl, 'CertificateError'):
2441 network_exceptions.append(ssl.CertificateError)
2442 network_exceptions = tuple(network_exceptions)
2443
2444
2445 class ExtractorError(YoutubeDLError):
2446 """Error during info extraction."""
2447
2448 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
2449 """ tb, if given, is the original traceback (so that it can be printed out).
2450 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
2451 """
2452 if sys.exc_info()[0] in network_exceptions:
2453 expected = True
2454
2455 self.msg = str(msg)
2456 self.traceback = tb
2457 self.expected = expected
2458 self.cause = cause
2459 self.video_id = video_id
2460 self.ie = ie
2461 self.exc_info = sys.exc_info() # preserve original exception
2462
2463 super(ExtractorError, self).__init__(''.join((
2464 format_field(ie, template='[%s] '),
2465 format_field(video_id, template='%s: '),
2466 self.msg,
2467 format_field(cause, template=' (caused by %r)'),
2468 '' if expected else bug_reports_message())))
2469
2470 def format_traceback(self):
2471 if self.traceback is None:
2472 return None
2473 return ''.join(traceback.format_tb(self.traceback))
2474
2475
2476 class UnsupportedError(ExtractorError):
2477 def __init__(self, url):
2478 super(UnsupportedError, self).__init__(
2479 'Unsupported URL: %s' % url, expected=True)
2480 self.url = url
2481
2482
2483 class RegexNotFoundError(ExtractorError):
2484 """Error when a regex didn't match"""
2485 pass
2486
2487
2488 class GeoRestrictedError(ExtractorError):
2489 """Geographic restriction Error exception.
2490
2491 This exception may be thrown when a video is not available from your
2492 geographic location due to geographic restrictions imposed by a website.
2493 """
2494
2495 def __init__(self, msg, countries=None):
2496 super(GeoRestrictedError, self).__init__(msg, expected=True)
2497 self.msg = msg
2498 self.countries = countries
2499
2500
2501 class DownloadError(YoutubeDLError):
2502 """Download Error exception.
2503
2504 This exception may be thrown by FileDownloader objects if they are not
2505 configured to continue on errors. They will contain the appropriate
2506 error message.
2507 """
2508
2509 def __init__(self, msg, exc_info=None):
2510 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2511 super(DownloadError, self).__init__(msg)
2512 self.exc_info = exc_info
2513
2514
2515 class EntryNotInPlaylist(YoutubeDLError):
2516 """Entry not in playlist exception.
2517
2518 This exception will be thrown by YoutubeDL when a requested entry
2519 is not found in the playlist info_dict
2520 """
2521 pass
2522
2523
2524 class SameFileError(YoutubeDLError):
2525 """Same File exception.
2526
2527 This exception will be thrown by FileDownloader objects if they detect
2528 multiple files would have to be downloaded to the same file on disk.
2529 """
2530 pass
2531
2532
2533 class PostProcessingError(YoutubeDLError):
2534 """Post Processing exception.
2535
2536 This exception may be raised by PostProcessor's .run() method to
2537 indicate an error in the postprocessing task.
2538 """
2539
2540 def __init__(self, msg):
2541 super(PostProcessingError, self).__init__(msg)
2542 self.msg = msg
2543
2544
2545 class ExistingVideoReached(YoutubeDLError):
2546 """ --max-downloads limit has been reached. """
2547 pass
2548
2549
2550 class RejectedVideoReached(YoutubeDLError):
2551 """ --max-downloads limit has been reached. """
2552 pass
2553
2554
2555 class ThrottledDownload(YoutubeDLError):
2556 """ Download speed below --throttled-rate. """
2557 pass
2558
2559
2560 class MaxDownloadsReached(YoutubeDLError):
2561 """ --max-downloads limit has been reached. """
2562 pass
2563
2564
2565 class UnavailableVideoError(YoutubeDLError):
2566 """Unavailable Format exception.
2567
2568 This exception will be thrown when a video is requested
2569 in a format that is not available for that video.
2570 """
2571 pass
2572
2573
2574 class ContentTooShortError(YoutubeDLError):
2575 """Content Too Short exception.
2576
2577 This exception may be raised by FileDownloader objects when a file they
2578 download is too small for what the server announced first, indicating
2579 the connection was probably interrupted.
2580 """
2581
2582 def __init__(self, downloaded, expected):
2583 super(ContentTooShortError, self).__init__(
2584 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2585 )
2586 # Both in bytes
2587 self.downloaded = downloaded
2588 self.expected = expected
2589
2590
2591 class XAttrMetadataError(YoutubeDLError):
2592 def __init__(self, code=None, msg='Unknown error'):
2593 super(XAttrMetadataError, self).__init__(msg)
2594 self.code = code
2595 self.msg = msg
2596
2597 # Parsing code and msg
2598 if (self.code in (errno.ENOSPC, errno.EDQUOT)
2599 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2600 self.reason = 'NO_SPACE'
2601 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2602 self.reason = 'VALUE_TOO_LONG'
2603 else:
2604 self.reason = 'NOT_SUPPORTED'
2605
2606
2607 class XAttrUnavailableError(YoutubeDLError):
2608 pass
2609
2610
2611 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2612 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2613 # expected HTTP responses to meet HTTP/1.0 or later (see also
2614 # https://github.com/ytdl-org/youtube-dl/issues/6727)
2615 if sys.version_info < (3, 0):
2616 kwargs['strict'] = True
2617 hc = http_class(*args, **compat_kwargs(kwargs))
2618 source_address = ydl_handler._params.get('source_address')
2619
2620 if source_address is not None:
2621 # This is to workaround _create_connection() from socket where it will try all
2622 # address data from getaddrinfo() including IPv6. This filters the result from
2623 # getaddrinfo() based on the source_address value.
2624 # This is based on the cpython socket.create_connection() function.
2625 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2626 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2627 host, port = address
2628 err = None
2629 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2630 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2631 ip_addrs = [addr for addr in addrs if addr[0] == af]
2632 if addrs and not ip_addrs:
2633 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2634 raise socket.error(
2635 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2636 % (ip_version, source_address[0]))
2637 for res in ip_addrs:
2638 af, socktype, proto, canonname, sa = res
2639 sock = None
2640 try:
2641 sock = socket.socket(af, socktype, proto)
2642 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2643 sock.settimeout(timeout)
2644 sock.bind(source_address)
2645 sock.connect(sa)
2646 err = None # Explicitly break reference cycle
2647 return sock
2648 except socket.error as _:
2649 err = _
2650 if sock is not None:
2651 sock.close()
2652 if err is not None:
2653 raise err
2654 else:
2655 raise socket.error('getaddrinfo returns an empty list')
2656 if hasattr(hc, '_create_connection'):
2657 hc._create_connection = _create_connection
2658 sa = (source_address, 0)
2659 if hasattr(hc, 'source_address'): # Python 2.7+
2660 hc.source_address = sa
2661 else: # Python 2.6
2662 def _hc_connect(self, *args, **kwargs):
2663 sock = _create_connection(
2664 (self.host, self.port), self.timeout, sa)
2665 if is_https:
2666 self.sock = ssl.wrap_socket(
2667 sock, self.key_file, self.cert_file,
2668 ssl_version=ssl.PROTOCOL_TLSv1)
2669 else:
2670 self.sock = sock
2671 hc.connect = functools.partial(_hc_connect, hc)
2672
2673 return hc
2674
2675
2676 def handle_youtubedl_headers(headers):
2677 filtered_headers = headers
2678
2679 if 'Youtubedl-no-compression' in filtered_headers:
2680 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2681 del filtered_headers['Youtubedl-no-compression']
2682
2683 return filtered_headers
2684
2685
2686 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2687 """Handler for HTTP requests and responses.
2688
2689 This class, when installed with an OpenerDirector, automatically adds
2690 the standard headers to every HTTP request and handles gzipped and
2691 deflated responses from web servers. If compression is to be avoided in
2692 a particular request, the original request in the program code only has
2693 to include the HTTP header "Youtubedl-no-compression", which will be
2694 removed before making the real request.
2695
2696 Part of this code was copied from:
2697
2698 http://techknack.net/python-urllib2-handlers/
2699
2700 Andrew Rowls, the author of that code, agreed to release it to the
2701 public domain.
2702 """
2703
2704 def __init__(self, params, *args, **kwargs):
2705 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2706 self._params = params
2707
2708 def http_open(self, req):
2709 conn_class = compat_http_client.HTTPConnection
2710
2711 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2712 if socks_proxy:
2713 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2714 del req.headers['Ytdl-socks-proxy']
2715
2716 return self.do_open(functools.partial(
2717 _create_http_connection, self, conn_class, False),
2718 req)
2719
2720 @staticmethod
2721 def deflate(data):
2722 if not data:
2723 return data
2724 try:
2725 return zlib.decompress(data, -zlib.MAX_WBITS)
2726 except zlib.error:
2727 return zlib.decompress(data)
2728
2729 def http_request(self, req):
2730 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2731 # always respected by websites, some tend to give out URLs with non percent-encoded
2732 # non-ASCII characters (see telemb.py, ard.py [#3412])
2733 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2734 # To work around aforementioned issue we will replace request's original URL with
2735 # percent-encoded one
2736 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2737 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2738 url = req.get_full_url()
2739 url_escaped = escape_url(url)
2740
2741 # Substitute URL if any change after escaping
2742 if url != url_escaped:
2743 req = update_Request(req, url=url_escaped)
2744
2745 for h, v in std_headers.items():
2746 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2747 # The dict keys are capitalized because of this bug by urllib
2748 if h.capitalize() not in req.headers:
2749 req.add_header(h, v)
2750
2751 req.headers = handle_youtubedl_headers(req.headers)
2752
2753 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2754 # Python 2.6 is brain-dead when it comes to fragments
2755 req._Request__original = req._Request__original.partition('#')[0]
2756 req._Request__r_type = req._Request__r_type.partition('#')[0]
2757
2758 return req
2759
2760 def http_response(self, req, resp):
2761 old_resp = resp
2762 # gzip
2763 if resp.headers.get('Content-encoding', '') == 'gzip':
2764 content = resp.read()
2765 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2766 try:
2767 uncompressed = io.BytesIO(gz.read())
2768 except IOError as original_ioerror:
2769 # There may be junk add the end of the file
2770 # See http://stackoverflow.com/q/4928560/35070 for details
2771 for i in range(1, 1024):
2772 try:
2773 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2774 uncompressed = io.BytesIO(gz.read())
2775 except IOError:
2776 continue
2777 break
2778 else:
2779 raise original_ioerror
2780 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2781 resp.msg = old_resp.msg
2782 del resp.headers['Content-encoding']
2783 # deflate
2784 if resp.headers.get('Content-encoding', '') == 'deflate':
2785 gz = io.BytesIO(self.deflate(resp.read()))
2786 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2787 resp.msg = old_resp.msg
2788 del resp.headers['Content-encoding']
2789 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2790 # https://github.com/ytdl-org/youtube-dl/issues/6457).
2791 if 300 <= resp.code < 400:
2792 location = resp.headers.get('Location')
2793 if location:
2794 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2795 if sys.version_info >= (3, 0):
2796 location = location.encode('iso-8859-1').decode('utf-8')
2797 else:
2798 location = location.decode('utf-8')
2799 location_escaped = escape_url(location)
2800 if location != location_escaped:
2801 del resp.headers['Location']
2802 if sys.version_info < (3, 0):
2803 location_escaped = location_escaped.encode('utf-8')
2804 resp.headers['Location'] = location_escaped
2805 return resp
2806
2807 https_request = http_request
2808 https_response = http_response
2809
2810
2811 def make_socks_conn_class(base_class, socks_proxy):
2812 assert issubclass(base_class, (
2813 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2814
2815 url_components = compat_urlparse.urlparse(socks_proxy)
2816 if url_components.scheme.lower() == 'socks5':
2817 socks_type = ProxyType.SOCKS5
2818 elif url_components.scheme.lower() in ('socks', 'socks4'):
2819 socks_type = ProxyType.SOCKS4
2820 elif url_components.scheme.lower() == 'socks4a':
2821 socks_type = ProxyType.SOCKS4A
2822
2823 def unquote_if_non_empty(s):
2824 if not s:
2825 return s
2826 return compat_urllib_parse_unquote_plus(s)
2827
2828 proxy_args = (
2829 socks_type,
2830 url_components.hostname, url_components.port or 1080,
2831 True, # Remote DNS
2832 unquote_if_non_empty(url_components.username),
2833 unquote_if_non_empty(url_components.password),
2834 )
2835
2836 class SocksConnection(base_class):
2837 def connect(self):
2838 self.sock = sockssocket()
2839 self.sock.setproxy(*proxy_args)
2840 if type(self.timeout) in (int, float):
2841 self.sock.settimeout(self.timeout)
2842 self.sock.connect((self.host, self.port))
2843
2844 if isinstance(self, compat_http_client.HTTPSConnection):
2845 if hasattr(self, '_context'): # Python > 2.6
2846 self.sock = self._context.wrap_socket(
2847 self.sock, server_hostname=self.host)
2848 else:
2849 self.sock = ssl.wrap_socket(self.sock)
2850
2851 return SocksConnection
2852
2853
2854 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2855 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2856 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2857 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2858 self._params = params
2859
2860 def https_open(self, req):
2861 kwargs = {}
2862 conn_class = self._https_conn_class
2863
2864 if hasattr(self, '_context'): # python > 2.6
2865 kwargs['context'] = self._context
2866 if hasattr(self, '_check_hostname'): # python 3.x
2867 kwargs['check_hostname'] = self._check_hostname
2868
2869 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2870 if socks_proxy:
2871 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2872 del req.headers['Ytdl-socks-proxy']
2873
2874 return self.do_open(functools.partial(
2875 _create_http_connection, self, conn_class, True),
2876 req, **kwargs)
2877
2878
2879 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2880 """
2881 See [1] for cookie file format.
2882
2883 1. https://curl.haxx.se/docs/http-cookies.html
2884 """
2885 _HTTPONLY_PREFIX = '#HttpOnly_'
2886 _ENTRY_LEN = 7
2887 _HEADER = '''# Netscape HTTP Cookie File
2888 # This file is generated by yt-dlp. Do not edit.
2889
2890 '''
2891 _CookieFileEntry = collections.namedtuple(
2892 'CookieFileEntry',
2893 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2894
2895 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2896 """
2897 Save cookies to a file.
2898
2899 Most of the code is taken from CPython 3.8 and slightly adapted
2900 to support cookie files with UTF-8 in both python 2 and 3.
2901 """
2902 if filename is None:
2903 if self.filename is not None:
2904 filename = self.filename
2905 else:
2906 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2907
2908 # Store session cookies with `expires` set to 0 instead of an empty
2909 # string
2910 for cookie in self:
2911 if cookie.expires is None:
2912 cookie.expires = 0
2913
2914 with io.open(filename, 'w', encoding='utf-8') as f:
2915 f.write(self._HEADER)
2916 now = time.time()
2917 for cookie in self:
2918 if not ignore_discard and cookie.discard:
2919 continue
2920 if not ignore_expires and cookie.is_expired(now):
2921 continue
2922 if cookie.secure:
2923 secure = 'TRUE'
2924 else:
2925 secure = 'FALSE'
2926 if cookie.domain.startswith('.'):
2927 initial_dot = 'TRUE'
2928 else:
2929 initial_dot = 'FALSE'
2930 if cookie.expires is not None:
2931 expires = compat_str(cookie.expires)
2932 else:
2933 expires = ''
2934 if cookie.value is None:
2935 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2936 # with no name, whereas http.cookiejar regards it as a
2937 # cookie with no value.
2938 name = ''
2939 value = cookie.name
2940 else:
2941 name = cookie.name
2942 value = cookie.value
2943 f.write(
2944 '\t'.join([cookie.domain, initial_dot, cookie.path,
2945 secure, expires, name, value]) + '\n')
2946
2947 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2948 """Load cookies from a file."""
2949 if filename is None:
2950 if self.filename is not None:
2951 filename = self.filename
2952 else:
2953 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2954
2955 def prepare_line(line):
2956 if line.startswith(self._HTTPONLY_PREFIX):
2957 line = line[len(self._HTTPONLY_PREFIX):]
2958 # comments and empty lines are fine
2959 if line.startswith('#') or not line.strip():
2960 return line
2961 cookie_list = line.split('\t')
2962 if len(cookie_list) != self._ENTRY_LEN:
2963 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2964 cookie = self._CookieFileEntry(*cookie_list)
2965 if cookie.expires_at and not cookie.expires_at.isdigit():
2966 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2967 return line
2968
2969 cf = io.StringIO()
2970 with io.open(filename, encoding='utf-8') as f:
2971 for line in f:
2972 try:
2973 cf.write(prepare_line(line))
2974 except compat_cookiejar.LoadError as e:
2975 write_string(
2976 'WARNING: skipping cookie file entry due to %s: %r\n'
2977 % (e, line), sys.stderr)
2978 continue
2979 cf.seek(0)
2980 self._really_load(cf, filename, ignore_discard, ignore_expires)
2981 # Session cookies are denoted by either `expires` field set to
2982 # an empty string or 0. MozillaCookieJar only recognizes the former
2983 # (see [1]). So we need force the latter to be recognized as session
2984 # cookies on our own.
2985 # Session cookies may be important for cookies-based authentication,
2986 # e.g. usually, when user does not check 'Remember me' check box while
2987 # logging in on a site, some important cookies are stored as session
2988 # cookies so that not recognizing them will result in failed login.
2989 # 1. https://bugs.python.org/issue17164
2990 for cookie in self:
2991 # Treat `expires=0` cookies as session cookies
2992 if cookie.expires == 0:
2993 cookie.expires = None
2994 cookie.discard = True
2995
2996
2997 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2998 def __init__(self, cookiejar=None):
2999 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
3000
3001 def http_response(self, request, response):
3002 # Python 2 will choke on next HTTP request in row if there are non-ASCII
3003 # characters in Set-Cookie HTTP header of last response (see
3004 # https://github.com/ytdl-org/youtube-dl/issues/6769).
3005 # In order to at least prevent crashing we will percent encode Set-Cookie
3006 # header before HTTPCookieProcessor starts processing it.
3007 # if sys.version_info < (3, 0) and response.headers:
3008 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
3009 # set_cookie = response.headers.get(set_cookie_header)
3010 # if set_cookie:
3011 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
3012 # if set_cookie != set_cookie_escaped:
3013 # del response.headers[set_cookie_header]
3014 # response.headers[set_cookie_header] = set_cookie_escaped
3015 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
3016
3017 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
3018 https_response = http_response
3019
3020
3021 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
3022 """YoutubeDL redirect handler
3023
3024 The code is based on HTTPRedirectHandler implementation from CPython [1].
3025
3026 This redirect handler solves two issues:
3027 - ensures redirect URL is always unicode under python 2
3028 - introduces support for experimental HTTP response status code
3029 308 Permanent Redirect [2] used by some sites [3]
3030
3031 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
3032 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
3033 3. https://github.com/ytdl-org/youtube-dl/issues/28768
3034 """
3035
3036 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
3037
3038 def redirect_request(self, req, fp, code, msg, headers, newurl):
3039 """Return a Request or None in response to a redirect.
3040
3041 This is called by the http_error_30x methods when a
3042 redirection response is received. If a redirection should
3043 take place, return a new Request to allow http_error_30x to
3044 perform the redirect. Otherwise, raise HTTPError if no-one
3045 else should try to handle this url. Return None if you can't
3046 but another Handler might.
3047 """
3048 m = req.get_method()
3049 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
3050 or code in (301, 302, 303) and m == "POST")):
3051 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
3052 # Strictly (according to RFC 2616), 301 or 302 in response to
3053 # a POST MUST NOT cause a redirection without confirmation
3054 # from the user (of urllib.request, in this case). In practice,
3055 # essentially all clients do redirect in this case, so we do
3056 # the same.
3057
3058 # On python 2 urlh.geturl() may sometimes return redirect URL
3059 # as byte string instead of unicode. This workaround allows
3060 # to force it always return unicode.
3061 if sys.version_info[0] < 3:
3062 newurl = compat_str(newurl)
3063
3064 # Be conciliant with URIs containing a space. This is mainly
3065 # redundant with the more complete encoding done in http_error_302(),
3066 # but it is kept for compatibility with other callers.
3067 newurl = newurl.replace(' ', '%20')
3068
3069 CONTENT_HEADERS = ("content-length", "content-type")
3070 # NB: don't use dict comprehension for python 2.6 compatibility
3071 newheaders = dict((k, v) for k, v in req.headers.items()
3072 if k.lower() not in CONTENT_HEADERS)
3073 return compat_urllib_request.Request(
3074 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3075 unverifiable=True)
3076
3077
3078 def extract_timezone(date_str):
3079 m = re.search(
3080 r'''(?x)
3081 ^.{8,}? # >=8 char non-TZ prefix, if present
3082 (?P<tz>Z| # just the UTC Z, or
3083 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
3084 (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
3085 [ ]? # optional space
3086 (?P<sign>\+|-) # +/-
3087 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
3088 $)
3089 ''', date_str)
3090 if not m:
3091 timezone = datetime.timedelta()
3092 else:
3093 date_str = date_str[:-len(m.group('tz'))]
3094 if not m.group('sign'):
3095 timezone = datetime.timedelta()
3096 else:
3097 sign = 1 if m.group('sign') == '+' else -1
3098 timezone = datetime.timedelta(
3099 hours=sign * int(m.group('hours')),
3100 minutes=sign * int(m.group('minutes')))
3101 return timezone, date_str
3102
3103
3104 def parse_iso8601(date_str, delimiter='T', timezone=None):
3105 """ Return a UNIX timestamp from the given date """
3106
3107 if date_str is None:
3108 return None
3109
3110 date_str = re.sub(r'\.[0-9]+', '', date_str)
3111
3112 if timezone is None:
3113 timezone, date_str = extract_timezone(date_str)
3114
3115 try:
3116 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3117 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3118 return calendar.timegm(dt.timetuple())
3119 except ValueError:
3120 pass
3121
3122
3123 def date_formats(day_first=True):
3124 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3125
3126
3127 def unified_strdate(date_str, day_first=True):
3128 """Return a string with the date in the format YYYYMMDD"""
3129
3130 if date_str is None:
3131 return None
3132 upload_date = None
3133 # Replace commas
3134 date_str = date_str.replace(',', ' ')
3135 # Remove AM/PM + timezone
3136 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3137 _, date_str = extract_timezone(date_str)
3138
3139 for expression in date_formats(day_first):
3140 try:
3141 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
3142 except ValueError:
3143 pass
3144 if upload_date is None:
3145 timetuple = email.utils.parsedate_tz(date_str)
3146 if timetuple:
3147 try:
3148 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3149 except ValueError:
3150 pass
3151 if upload_date is not None:
3152 return compat_str(upload_date)
3153
3154
3155 def unified_timestamp(date_str, day_first=True):
3156 if date_str is None:
3157 return None
3158
3159 date_str = re.sub(r'[,|]', '', date_str)
3160
3161 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
3162 timezone, date_str = extract_timezone(date_str)
3163
3164 # Remove AM/PM + timezone
3165 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3166
3167 # Remove unrecognized timezones from ISO 8601 alike timestamps
3168 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3169 if m:
3170 date_str = date_str[:-len(m.group('tz'))]
3171
3172 # Python only supports microseconds, so remove nanoseconds
3173 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3174 if m:
3175 date_str = m.group(1)
3176
3177 for expression in date_formats(day_first):
3178 try:
3179 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3180 return calendar.timegm(dt.timetuple())
3181 except ValueError:
3182 pass
3183 timetuple = email.utils.parsedate_tz(date_str)
3184 if timetuple:
3185 return calendar.timegm(timetuple) + pm_delta * 3600
3186
3187
3188 def determine_ext(url, default_ext='unknown_video'):
3189 if url is None or '.' not in url:
3190 return default_ext
3191 guess = url.partition('?')[0].rpartition('.')[2]
3192 if re.match(r'^[A-Za-z0-9]+$', guess):
3193 return guess
3194 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3195 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3196 return guess.rstrip('/')
3197 else:
3198 return default_ext
3199
3200
3201 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3202 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3203
3204
3205 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
3206 """
3207 Return a datetime object from a string in the format YYYYMMDD or
3208 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3209
3210 format: string date format used to return datetime object from
3211 precision: round the time portion of a datetime object.
3212 auto|microsecond|second|minute|hour|day.
3213 auto: round to the unit provided in date_str (if applicable).
3214 """
3215 auto_precision = False
3216 if precision == 'auto':
3217 auto_precision = True
3218 precision = 'microsecond'
3219 today = datetime_round(datetime.datetime.now(), precision)
3220 if date_str in ('now', 'today'):
3221 return today
3222 if date_str == 'yesterday':
3223 return today - datetime.timedelta(days=1)
3224 match = re.match(
3225 r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3226 date_str)
3227 if match is not None:
3228 start_time = datetime_from_str(match.group('start'), precision, format)
3229 time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
3230 unit = match.group('unit')
3231 if unit == 'month' or unit == 'year':
3232 new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
3233 unit = 'day'
3234 else:
3235 if unit == 'week':
3236 unit = 'day'
3237 time *= 7
3238 delta = datetime.timedelta(**{unit + 's': time})
3239 new_date = start_time + delta
3240 if auto_precision:
3241 return datetime_round(new_date, unit)
3242 return new_date
3243
3244 return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3245
3246
3247 def date_from_str(date_str, format='%Y%m%d'):
3248 """
3249 Return a datetime object from a string in the format YYYYMMDD or
3250 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3251
3252 format: string date format used to return datetime object from
3253 """
3254 return datetime_from_str(date_str, precision='microsecond', format=format).date()
3255
3256
3257 def datetime_add_months(dt, months):
3258 """Increment/Decrement a datetime object by months."""
3259 month = dt.month + months - 1
3260 year = dt.year + month // 12
3261 month = month % 12 + 1
3262 day = min(dt.day, calendar.monthrange(year, month)[1])
3263 return dt.replace(year, month, day)
3264
3265
3266 def datetime_round(dt, precision='day'):
3267 """
3268 Round a datetime object's time to a specific precision
3269 """
3270 if precision == 'microsecond':
3271 return dt
3272
3273 unit_seconds = {
3274 'day': 86400,
3275 'hour': 3600,
3276 'minute': 60,
3277 'second': 1,
3278 }
3279 roundto = lambda x, n: ((x + n / 2) // n) * n
3280 timestamp = calendar.timegm(dt.timetuple())
3281 return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
3282
3283
3284 def hyphenate_date(date_str):
3285 """
3286 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3287 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3288 if match is not None:
3289 return '-'.join(match.groups())
3290 else:
3291 return date_str
3292
3293
3294 class DateRange(object):
3295 """Represents a time interval between two dates"""
3296
3297 def __init__(self, start=None, end=None):
3298 """start and end must be strings in the format accepted by date"""
3299 if start is not None:
3300 self.start = date_from_str(start)
3301 else:
3302 self.start = datetime.datetime.min.date()
3303 if end is not None:
3304 self.end = date_from_str(end)
3305 else:
3306 self.end = datetime.datetime.max.date()
3307 if self.start > self.end:
3308 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3309
3310 @classmethod
3311 def day(cls, day):
3312 """Returns a range that only contains the given day"""
3313 return cls(day, day)
3314
3315 def __contains__(self, date):
3316 """Check if the date is in the range"""
3317 if not isinstance(date, datetime.date):
3318 date = date_from_str(date)
3319 return self.start <= date <= self.end
3320
3321 def __str__(self):
3322 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3323
3324
3325 def platform_name():
3326 """ Returns the platform name as a compat_str """
3327 res = platform.platform()
3328 if isinstance(res, bytes):
3329 res = res.decode(preferredencoding())
3330
3331 assert isinstance(res, compat_str)
3332 return res
3333
3334
3335 def get_windows_version():
3336 ''' Get Windows version. None if it's not running on Windows '''
3337 if compat_os_name == 'nt':
3338 return version_tuple(platform.win32_ver()[1])
3339 else:
3340 return None
3341
3342
3343 def _windows_write_string(s, out):
3344 """ Returns True if the string was written using special methods,
3345 False if it has yet to be written out."""
3346 # Adapted from http://stackoverflow.com/a/3259271/35070
3347
3348 import ctypes
3349 import ctypes.wintypes
3350
3351 WIN_OUTPUT_IDS = {
3352 1: -11,
3353 2: -12,
3354 }
3355
3356 try:
3357 fileno = out.fileno()
3358 except AttributeError:
3359 # If the output stream doesn't have a fileno, it's virtual
3360 return False
3361 except io.UnsupportedOperation:
3362 # Some strange Windows pseudo files?
3363 return False
3364 if fileno not in WIN_OUTPUT_IDS:
3365 return False
3366
3367 GetStdHandle = compat_ctypes_WINFUNCTYPE(
3368 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3369 ('GetStdHandle', ctypes.windll.kernel32))
3370 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3371
3372 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3373 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3374 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3375 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3376 written = ctypes.wintypes.DWORD(0)
3377
3378 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3379 FILE_TYPE_CHAR = 0x0002
3380 FILE_TYPE_REMOTE = 0x8000
3381 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3382 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3383 ctypes.POINTER(ctypes.wintypes.DWORD))(
3384 ('GetConsoleMode', ctypes.windll.kernel32))
3385 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3386
3387 def not_a_console(handle):
3388 if handle == INVALID_HANDLE_VALUE or handle is None:
3389 return True
3390 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3391 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3392
3393 if not_a_console(h):
3394 return False
3395
3396 def next_nonbmp_pos(s):
3397 try:
3398 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3399 except StopIteration:
3400 return len(s)
3401
3402 while s:
3403 count = min(next_nonbmp_pos(s), 1024)
3404
3405 ret = WriteConsoleW(
3406 h, s, count if count else 2, ctypes.byref(written), None)
3407 if ret == 0:
3408 raise OSError('Failed to write string')
3409 if not count: # We just wrote a non-BMP character
3410 assert written.value == 2
3411 s = s[1:]
3412 else:
3413 assert written.value > 0
3414 s = s[written.value:]
3415 return True
3416
3417
3418 def write_string(s, out=None, encoding=None):
3419 if out is None:
3420 out = sys.stderr
3421 assert type(s) == compat_str
3422
3423 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3424 if _windows_write_string(s, out):
3425 return
3426
3427 if ('b' in getattr(out, 'mode', '')
3428 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
3429 byt = s.encode(encoding or preferredencoding(), 'ignore')
3430 out.write(byt)
3431 elif hasattr(out, 'buffer'):
3432 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3433 byt = s.encode(enc, 'ignore')
3434 out.buffer.write(byt)
3435 else:
3436 out.write(s)
3437 out.flush()
3438
3439
3440 def bytes_to_intlist(bs):
3441 if not bs:
3442 return []
3443 if isinstance(bs[0], int): # Python 3
3444 return list(bs)
3445 else:
3446 return [ord(c) for c in bs]
3447
3448
3449 def intlist_to_bytes(xs):
3450 if not xs:
3451 return b''
3452 return compat_struct_pack('%dB' % len(xs), *xs)
3453
3454
3455 # Cross-platform file locking
3456 if sys.platform == 'win32':
3457 import ctypes.wintypes
3458 import msvcrt
3459
3460 class OVERLAPPED(ctypes.Structure):
3461 _fields_ = [
3462 ('Internal', ctypes.wintypes.LPVOID),
3463 ('InternalHigh', ctypes.wintypes.LPVOID),
3464 ('Offset', ctypes.wintypes.DWORD),
3465 ('OffsetHigh', ctypes.wintypes.DWORD),
3466 ('hEvent', ctypes.wintypes.HANDLE),
3467 ]
3468
3469 kernel32 = ctypes.windll.kernel32
3470 LockFileEx = kernel32.LockFileEx
3471 LockFileEx.argtypes = [
3472 ctypes.wintypes.HANDLE, # hFile
3473 ctypes.wintypes.DWORD, # dwFlags
3474 ctypes.wintypes.DWORD, # dwReserved
3475 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3476 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3477 ctypes.POINTER(OVERLAPPED) # Overlapped
3478 ]
3479 LockFileEx.restype = ctypes.wintypes.BOOL
3480 UnlockFileEx = kernel32.UnlockFileEx
3481 UnlockFileEx.argtypes = [
3482 ctypes.wintypes.HANDLE, # hFile
3483 ctypes.wintypes.DWORD, # dwReserved
3484 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3485 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3486 ctypes.POINTER(OVERLAPPED) # Overlapped
3487 ]
3488 UnlockFileEx.restype = ctypes.wintypes.BOOL
3489 whole_low = 0xffffffff
3490 whole_high = 0x7fffffff
3491
3492 def _lock_file(f, exclusive):
3493 overlapped = OVERLAPPED()
3494 overlapped.Offset = 0
3495 overlapped.OffsetHigh = 0
3496 overlapped.hEvent = 0
3497 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3498 handle = msvcrt.get_osfhandle(f.fileno())
3499 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3500 whole_low, whole_high, f._lock_file_overlapped_p):
3501 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3502
3503 def _unlock_file(f):
3504 assert f._lock_file_overlapped_p
3505 handle = msvcrt.get_osfhandle(f.fileno())
3506 if not UnlockFileEx(handle, 0,
3507 whole_low, whole_high, f._lock_file_overlapped_p):
3508 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3509
3510 else:
3511 # Some platforms, such as Jython, is missing fcntl
3512 try:
3513 import fcntl
3514
3515 def _lock_file(f, exclusive):
3516 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3517
3518 def _unlock_file(f):
3519 fcntl.flock(f, fcntl.LOCK_UN)
3520 except ImportError:
3521 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3522
3523 def _lock_file(f, exclusive):
3524 raise IOError(UNSUPPORTED_MSG)
3525
3526 def _unlock_file(f):
3527 raise IOError(UNSUPPORTED_MSG)
3528
3529
3530 class locked_file(object):
3531 def __init__(self, filename, mode, encoding=None):
3532 assert mode in ['r', 'a', 'w']
3533 self.f = io.open(filename, mode, encoding=encoding)
3534 self.mode = mode
3535
3536 def __enter__(self):
3537 exclusive = self.mode != 'r'
3538 try:
3539 _lock_file(self.f, exclusive)
3540 except IOError:
3541 self.f.close()
3542 raise
3543 return self
3544
3545 def __exit__(self, etype, value, traceback):
3546 try:
3547 _unlock_file(self.f)
3548 finally:
3549 self.f.close()
3550
3551 def __iter__(self):
3552 return iter(self.f)
3553
3554 def write(self, *args):
3555 return self.f.write(*args)
3556
3557 def read(self, *args):
3558 return self.f.read(*args)
3559
3560
3561 def get_filesystem_encoding():
3562 encoding = sys.getfilesystemencoding()
3563 return encoding if encoding is not None else 'utf-8'
3564
3565
3566 def shell_quote(args):
3567 quoted_args = []
3568 encoding = get_filesystem_encoding()
3569 for a in args:
3570 if isinstance(a, bytes):
3571 # We may get a filename encoded with 'encodeFilename'
3572 a = a.decode(encoding)
3573 quoted_args.append(compat_shlex_quote(a))
3574 return ' '.join(quoted_args)
3575
3576
3577 def smuggle_url(url, data):
3578 """ Pass additional data in a URL for internal use. """
3579
3580 url, idata = unsmuggle_url(url, {})
3581 data.update(idata)
3582 sdata = compat_urllib_parse_urlencode(
3583 {'__youtubedl_smuggle': json.dumps(data)})
3584 return url + '#' + sdata
3585
3586
3587 def unsmuggle_url(smug_url, default=None):
3588 if '#__youtubedl_smuggle' not in smug_url:
3589 return smug_url, default
3590 url, _, sdata = smug_url.rpartition('#')
3591 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3592 data = json.loads(jsond)
3593 return url, data
3594
3595
3596 def format_bytes(bytes):
3597 if bytes is None:
3598 return 'N/A'
3599 if type(bytes) is str:
3600 bytes = float(bytes)
3601 if bytes == 0.0:
3602 exponent = 0
3603 else:
3604 exponent = int(math.log(bytes, 1024.0))
3605 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3606 converted = float(bytes) / float(1024 ** exponent)
3607 return '%.2f%s' % (converted, suffix)
3608
3609
3610 def lookup_unit_table(unit_table, s):
3611 units_re = '|'.join(re.escape(u) for u in unit_table)
3612 m = re.match(
3613 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3614 if not m:
3615 return None
3616 num_str = m.group('num').replace(',', '.')
3617 mult = unit_table[m.group('unit')]
3618 return int(float(num_str) * mult)
3619
3620
3621 def parse_filesize(s):
3622 if s is None:
3623 return None
3624
3625 # The lower-case forms are of course incorrect and unofficial,
3626 # but we support those too
3627 _UNIT_TABLE = {
3628 'B': 1,
3629 'b': 1,
3630 'bytes': 1,
3631 'KiB': 1024,
3632 'KB': 1000,
3633 'kB': 1024,
3634 'Kb': 1000,
3635 'kb': 1000,
3636 'kilobytes': 1000,
3637 'kibibytes': 1024,
3638 'MiB': 1024 ** 2,
3639 'MB': 1000 ** 2,
3640 'mB': 1024 ** 2,
3641 'Mb': 1000 ** 2,
3642 'mb': 1000 ** 2,
3643 'megabytes': 1000 ** 2,
3644 'mebibytes': 1024 ** 2,
3645 'GiB': 1024 ** 3,
3646 'GB': 1000 ** 3,
3647 'gB': 1024 ** 3,
3648 'Gb': 1000 ** 3,
3649 'gb': 1000 ** 3,
3650 'gigabytes': 1000 ** 3,
3651 'gibibytes': 1024 ** 3,
3652 'TiB': 1024 ** 4,
3653 'TB': 1000 ** 4,
3654 'tB': 1024 ** 4,
3655 'Tb': 1000 ** 4,
3656 'tb': 1000 ** 4,
3657 'terabytes': 1000 ** 4,
3658 'tebibytes': 1024 ** 4,
3659 'PiB': 1024 ** 5,
3660 'PB': 1000 ** 5,
3661 'pB': 1024 ** 5,
3662 'Pb': 1000 ** 5,
3663 'pb': 1000 ** 5,
3664 'petabytes': 1000 ** 5,
3665 'pebibytes': 1024 ** 5,
3666 'EiB': 1024 ** 6,
3667 'EB': 1000 ** 6,
3668 'eB': 1024 ** 6,
3669 'Eb': 1000 ** 6,
3670 'eb': 1000 ** 6,
3671 'exabytes': 1000 ** 6,
3672 'exbibytes': 1024 ** 6,
3673 'ZiB': 1024 ** 7,
3674 'ZB': 1000 ** 7,
3675 'zB': 1024 ** 7,
3676 'Zb': 1000 ** 7,
3677 'zb': 1000 ** 7,
3678 'zettabytes': 1000 ** 7,
3679 'zebibytes': 1024 ** 7,
3680 'YiB': 1024 ** 8,
3681 'YB': 1000 ** 8,
3682 'yB': 1024 ** 8,
3683 'Yb': 1000 ** 8,
3684 'yb': 1000 ** 8,
3685 'yottabytes': 1000 ** 8,
3686 'yobibytes': 1024 ** 8,
3687 }
3688
3689 return lookup_unit_table(_UNIT_TABLE, s)
3690
3691
3692 def parse_count(s):
3693 if s is None:
3694 return None
3695
3696 s = s.strip()
3697
3698 if re.match(r'^[\d,.]+$', s):
3699 return str_to_int(s)
3700
3701 _UNIT_TABLE = {
3702 'k': 1000,
3703 'K': 1000,
3704 'm': 1000 ** 2,
3705 'M': 1000 ** 2,
3706 'kk': 1000 ** 2,
3707 'KK': 1000 ** 2,
3708 }
3709
3710 return lookup_unit_table(_UNIT_TABLE, s)
3711
3712
3713 def parse_resolution(s):
3714 if s is None:
3715 return {}
3716
3717 mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
3718 if mobj:
3719 return {
3720 'width': int(mobj.group('w')),
3721 'height': int(mobj.group('h')),
3722 }
3723
3724 mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
3725 if mobj:
3726 return {'height': int(mobj.group(1))}
3727
3728 mobj = re.search(r'\b([48])[kK]\b', s)
3729 if mobj:
3730 return {'height': int(mobj.group(1)) * 540}
3731
3732 return {}
3733
3734
3735 def parse_bitrate(s):
3736 if not isinstance(s, compat_str):
3737 return
3738 mobj = re.search(r'\b(\d+)\s*kbps', s)
3739 if mobj:
3740 return int(mobj.group(1))
3741
3742
3743 def month_by_name(name, lang='en'):
3744 """ Return the number of a month by (locale-independently) English name """
3745
3746 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3747
3748 try:
3749 return month_names.index(name) + 1
3750 except ValueError:
3751 return None
3752
3753
3754 def month_by_abbreviation(abbrev):
3755 """ Return the number of a month by (locale-independently) English
3756 abbreviations """
3757
3758 try:
3759 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3760 except ValueError:
3761 return None
3762
3763
3764 def fix_xml_ampersands(xml_str):
3765 """Replace all the '&' by '&amp;' in XML"""
3766 return re.sub(
3767 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3768 '&amp;',
3769 xml_str)
3770
3771
3772 def setproctitle(title):
3773 assert isinstance(title, compat_str)
3774
3775 # ctypes in Jython is not complete
3776 # http://bugs.jython.org/issue2148
3777 if sys.platform.startswith('java'):
3778 return
3779
3780 try:
3781 libc = ctypes.cdll.LoadLibrary('libc.so.6')
3782 except OSError:
3783 return
3784 except TypeError:
3785 # LoadLibrary in Windows Python 2.7.13 only expects
3786 # a bytestring, but since unicode_literals turns
3787 # every string into a unicode string, it fails.
3788 return
3789 title_bytes = title.encode('utf-8')
3790 buf = ctypes.create_string_buffer(len(title_bytes))
3791 buf.value = title_bytes
3792 try:
3793 libc.prctl(15, buf, 0, 0, 0)
3794 except AttributeError:
3795 return # Strange libc, just skip this
3796
3797
3798 def remove_start(s, start):
3799 return s[len(start):] if s is not None and s.startswith(start) else s
3800
3801
3802 def remove_end(s, end):
3803 return s[:-len(end)] if s is not None and s.endswith(end) else s
3804
3805
3806 def remove_quotes(s):
3807 if s is None or len(s) < 2:
3808 return s
3809 for quote in ('"', "'", ):
3810 if s[0] == quote and s[-1] == quote:
3811 return s[1:-1]
3812 return s
3813
3814
3815 def get_domain(url):
3816 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3817 return domain.group('domain') if domain else None
3818
3819
3820 def url_basename(url):
3821 path = compat_urlparse.urlparse(url).path
3822 return path.strip('/').split('/')[-1]
3823
3824
3825 def base_url(url):
3826 return re.match(r'https?://[^?#&]+/', url).group()
3827
3828
3829 def urljoin(base, path):
3830 if isinstance(path, bytes):
3831 path = path.decode('utf-8')
3832 if not isinstance(path, compat_str) or not path:
3833 return None
3834 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3835 return path
3836 if isinstance(base, bytes):
3837 base = base.decode('utf-8')
3838 if not isinstance(base, compat_str) or not re.match(
3839 r'^(?:https?:)?//', base):
3840 return None
3841 return compat_urlparse.urljoin(base, path)
3842
3843
3844 class HEADRequest(compat_urllib_request.Request):
3845 def get_method(self):
3846 return 'HEAD'
3847
3848
3849 class PUTRequest(compat_urllib_request.Request):
3850 def get_method(self):
3851 return 'PUT'
3852
3853
3854 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3855 if get_attr:
3856 if v is not None:
3857 v = getattr(v, get_attr, None)
3858 if v == '':
3859 v = None
3860 if v is None:
3861 return default
3862 try:
3863 return int(v) * invscale // scale
3864 except (ValueError, TypeError):
3865 return default
3866
3867
3868 def str_or_none(v, default=None):
3869 return default if v is None else compat_str(v)
3870
3871
3872 def str_to_int(int_str):
3873 """ A more relaxed version of int_or_none """
3874 if isinstance(int_str, compat_integer_types):
3875 return int_str
3876 elif isinstance(int_str, compat_str):
3877 int_str = re.sub(r'[,\.\+]', '', int_str)
3878 return int_or_none(int_str)
3879
3880
3881 def float_or_none(v, scale=1, invscale=1, default=None):
3882 if v is None:
3883 return default
3884 try:
3885 return float(v) * invscale / scale
3886 except (ValueError, TypeError):
3887 return default
3888
3889
3890 def bool_or_none(v, default=None):
3891 return v if isinstance(v, bool) else default
3892
3893
3894 def strip_or_none(v, default=None):
3895 return v.strip() if isinstance(v, compat_str) else default
3896
3897
3898 def url_or_none(url):
3899 if not url or not isinstance(url, compat_str):
3900 return None
3901 url = url.strip()
3902 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3903
3904
3905 def strftime_or_none(timestamp, date_format, default=None):
3906 datetime_object = None
3907 try:
3908 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3909 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3910 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3911 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3912 return datetime_object.strftime(date_format)
3913 except (ValueError, TypeError, AttributeError):
3914 return default
3915
3916
3917 def parse_duration(s):
3918 if not isinstance(s, compat_basestring):
3919 return None
3920
3921 s = s.strip()
3922
3923 days, hours, mins, secs, ms = [None] * 5
3924 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3925 if m:
3926 days, hours, mins, secs, ms = m.groups()
3927 else:
3928 m = re.match(
3929 r'''(?ix)(?:P?
3930 (?:
3931 [0-9]+\s*y(?:ears?)?\s*
3932 )?
3933 (?:
3934 [0-9]+\s*m(?:onths?)?\s*
3935 )?
3936 (?:
3937 [0-9]+\s*w(?:eeks?)?\s*
3938 )?
3939 (?:
3940 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3941 )?
3942 T)?
3943 (?:
3944 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3945 )?
3946 (?:
3947 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3948 )?
3949 (?:
3950 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3951 )?Z?$''', s)
3952 if m:
3953 days, hours, mins, secs, ms = m.groups()
3954 else:
3955 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3956 if m:
3957 hours, mins = m.groups()
3958 else:
3959 return None
3960
3961 duration = 0
3962 if secs:
3963 duration += float(secs)
3964 if mins:
3965 duration += float(mins) * 60
3966 if hours:
3967 duration += float(hours) * 60 * 60
3968 if days:
3969 duration += float(days) * 24 * 60 * 60
3970 if ms:
3971 duration += float(ms)
3972 return duration
3973
3974
3975 def prepend_extension(filename, ext, expected_real_ext=None):
3976 name, real_ext = os.path.splitext(filename)
3977 return (
3978 '{0}.{1}{2}'.format(name, ext, real_ext)
3979 if not expected_real_ext or real_ext[1:] == expected_real_ext
3980 else '{0}.{1}'.format(filename, ext))
3981
3982
3983 def replace_extension(filename, ext, expected_real_ext=None):
3984 name, real_ext = os.path.splitext(filename)
3985 return '{0}.{1}'.format(
3986 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3987 ext)
3988
3989
3990 def check_executable(exe, args=[]):
3991 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3992 args can be a list of arguments for a short output (like -version) """
3993 try:
3994 Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate_or_kill()
3995 except OSError:
3996 return False
3997 return exe
3998
3999
4000 def get_exe_version(exe, args=['--version'],
4001 version_re=None, unrecognized='present'):
4002 """ Returns the version of the specified executable,
4003 or False if the executable is not present """
4004 try:
4005 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
4006 # SIGTTOU if yt-dlp is run in the background.
4007 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
4008 out, _ = Popen(
4009 [encodeArgument(exe)] + args, stdin=subprocess.PIPE,
4010 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate_or_kill()
4011 except OSError:
4012 return False
4013 if isinstance(out, bytes): # Python 2.x
4014 out = out.decode('ascii', 'ignore')
4015 return detect_exe_version(out, version_re, unrecognized)
4016
4017
4018 def detect_exe_version(output, version_re=None, unrecognized='present'):
4019 assert isinstance(output, compat_str)
4020 if version_re is None:
4021 version_re = r'version\s+([-0-9._a-zA-Z]+)'
4022 m = re.search(version_re, output)
4023 if m:
4024 return m.group(1)
4025 else:
4026 return unrecognized
4027
4028
4029 class LazyList(collections.abc.Sequence):
4030 ''' Lazy immutable list from an iterable
4031 Note that slices of a LazyList are lists and not LazyList'''
4032
4033 class IndexError(IndexError):
4034 pass
4035
4036 def __init__(self, iterable):
4037 self.__iterable = iter(iterable)
4038 self.__cache = []
4039 self.__reversed = False
4040
4041 def __iter__(self):
4042 if self.__reversed:
4043 # We need to consume the entire iterable to iterate in reverse
4044 yield from self.exhaust()
4045 return
4046 yield from self.__cache
4047 for item in self.__iterable:
4048 self.__cache.append(item)
4049 yield item
4050
4051 def __exhaust(self):
4052 self.__cache.extend(self.__iterable)
4053 return self.__cache
4054
4055 def exhaust(self):
4056 ''' Evaluate the entire iterable '''
4057 return self.__exhaust()[::-1 if self.__reversed else 1]
4058
4059 @staticmethod
4060 def __reverse_index(x):
4061 return None if x is None else -(x + 1)
4062
4063 def __getitem__(self, idx):
4064 if isinstance(idx, slice):
4065 if self.__reversed:
4066 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
4067 start, stop, step = idx.start, idx.stop, idx.step or 1
4068 elif isinstance(idx, int):
4069 if self.__reversed:
4070 idx = self.__reverse_index(idx)
4071 start, stop, step = idx, idx, 0
4072 else:
4073 raise TypeError('indices must be integers or slices')
4074 if ((start or 0) < 0 or (stop or 0) < 0
4075 or (start is None and step < 0)
4076 or (stop is None and step > 0)):
4077 # We need to consume the entire iterable to be able to slice from the end
4078 # Obviously, never use this with infinite iterables
4079 self.__exhaust()
4080 try:
4081 return self.__cache[idx]
4082 except IndexError as e:
4083 raise self.IndexError(e) from e
4084 n = max(start or 0, stop or 0) - len(self.__cache) + 1
4085 if n > 0:
4086 self.__cache.extend(itertools.islice(self.__iterable, n))
4087 try:
4088 return self.__cache[idx]
4089 except IndexError as e:
4090 raise self.IndexError(e) from e
4091
4092 def __bool__(self):
4093 try:
4094 self[-1] if self.__reversed else self[0]
4095 except self.IndexError:
4096 return False
4097 return True
4098
4099 def __len__(self):
4100 self.__exhaust()
4101 return len(self.__cache)
4102
4103 def reverse(self):
4104 self.__reversed = not self.__reversed
4105 return self
4106
4107 def __repr__(self):
4108 # repr and str should mimic a list. So we exhaust the iterable
4109 return repr(self.exhaust())
4110
4111 def __str__(self):
4112 return repr(self.exhaust())
4113
4114
4115 class PagedList:
4116 def __len__(self):
4117 # This is only useful for tests
4118 return len(self.getslice())
4119
4120 def __init__(self, pagefunc, pagesize, use_cache=True):
4121 self._pagefunc = pagefunc
4122 self._pagesize = pagesize
4123 self._use_cache = use_cache
4124 self._cache = {}
4125
4126 def getpage(self, pagenum):
4127 page_results = self._cache.get(pagenum) or list(self._pagefunc(pagenum))
4128 if self._use_cache:
4129 self._cache[pagenum] = page_results
4130 return page_results
4131
4132 def getslice(self, start=0, end=None):
4133 return list(self._getslice(start, end))
4134
4135 def _getslice(self, start, end):
4136 raise NotImplementedError('This method must be implemented by subclasses')
4137
4138 def __getitem__(self, idx):
4139 # NOTE: cache must be enabled if this is used
4140 if not isinstance(idx, int) or idx < 0:
4141 raise TypeError('indices must be non-negative integers')
4142 entries = self.getslice(idx, idx + 1)
4143 return entries[0] if entries else None
4144
4145
4146 class OnDemandPagedList(PagedList):
4147 def _getslice(self, start, end):
4148 for pagenum in itertools.count(start // self._pagesize):
4149 firstid = pagenum * self._pagesize
4150 nextfirstid = pagenum * self._pagesize + self._pagesize
4151 if start >= nextfirstid:
4152 continue
4153
4154 startv = (
4155 start % self._pagesize
4156 if firstid <= start < nextfirstid
4157 else 0)
4158 endv = (
4159 ((end - 1) % self._pagesize) + 1
4160 if (end is not None and firstid <= end <= nextfirstid)
4161 else None)
4162
4163 page_results = self.getpage(pagenum)
4164 if startv != 0 or endv is not None:
4165 page_results = page_results[startv:endv]
4166 yield from page_results
4167
4168 # A little optimization - if current page is not "full", ie. does
4169 # not contain page_size videos then we can assume that this page
4170 # is the last one - there are no more ids on further pages -
4171 # i.e. no need to query again.
4172 if len(page_results) + startv < self._pagesize:
4173 break
4174
4175 # If we got the whole page, but the next page is not interesting,
4176 # break out early as well
4177 if end == nextfirstid:
4178 break
4179
4180
4181 class InAdvancePagedList(PagedList):
4182 def __init__(self, pagefunc, pagecount, pagesize):
4183 self._pagecount = pagecount
4184 PagedList.__init__(self, pagefunc, pagesize, True)
4185
4186 def _getslice(self, start, end):
4187 start_page = start // self._pagesize
4188 end_page = (
4189 self._pagecount if end is None else (end // self._pagesize + 1))
4190 skip_elems = start - start_page * self._pagesize
4191 only_more = None if end is None else end - start
4192 for pagenum in range(start_page, end_page):
4193 page_results = self.getpage(pagenum)
4194 if skip_elems:
4195 page_results = page_results[skip_elems:]
4196 skip_elems = None
4197 if only_more is not None:
4198 if len(page_results) < only_more:
4199 only_more -= len(page_results)
4200 else:
4201 yield from page_results[:only_more]
4202 break
4203 yield from page_results
4204
4205
4206 def uppercase_escape(s):
4207 unicode_escape = codecs.getdecoder('unicode_escape')
4208 return re.sub(
4209 r'\\U[0-9a-fA-F]{8}',
4210 lambda m: unicode_escape(m.group(0))[0],
4211 s)
4212
4213
4214 def lowercase_escape(s):
4215 unicode_escape = codecs.getdecoder('unicode_escape')
4216 return re.sub(
4217 r'\\u[0-9a-fA-F]{4}',
4218 lambda m: unicode_escape(m.group(0))[0],
4219 s)
4220
4221
4222 def escape_rfc3986(s):
4223 """Escape non-ASCII characters as suggested by RFC 3986"""
4224 if sys.version_info < (3, 0) and isinstance(s, compat_str):
4225 s = s.encode('utf-8')
4226 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
4227
4228
4229 def escape_url(url):
4230 """Escape URL as suggested by RFC 3986"""
4231 url_parsed = compat_urllib_parse_urlparse(url)
4232 return url_parsed._replace(
4233 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
4234 path=escape_rfc3986(url_parsed.path),
4235 params=escape_rfc3986(url_parsed.params),
4236 query=escape_rfc3986(url_parsed.query),
4237 fragment=escape_rfc3986(url_parsed.fragment)
4238 ).geturl()
4239
4240
4241 def parse_qs(url):
4242 return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4243
4244
4245 def read_batch_urls(batch_fd):
4246 def fixup(url):
4247 if not isinstance(url, compat_str):
4248 url = url.decode('utf-8', 'replace')
4249 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4250 for bom in BOM_UTF8:
4251 if url.startswith(bom):
4252 url = url[len(bom):]
4253 url = url.lstrip()
4254 if not url or url.startswith(('#', ';', ']')):
4255 return False
4256 # "#" cannot be stripped out since it is part of the URI
4257 # However, it can be safely stipped out if follwing a whitespace
4258 return re.split(r'\s#', url, 1)[0].rstrip()
4259
4260 with contextlib.closing(batch_fd) as fd:
4261 return [url for url in map(fixup, fd) if url]
4262
4263
4264 def urlencode_postdata(*args, **kargs):
4265 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
4266
4267
4268 def update_url_query(url, query):
4269 if not query:
4270 return url
4271 parsed_url = compat_urlparse.urlparse(url)
4272 qs = compat_parse_qs(parsed_url.query)
4273 qs.update(query)
4274 return compat_urlparse.urlunparse(parsed_url._replace(
4275 query=compat_urllib_parse_urlencode(qs, True)))
4276
4277
4278 def update_Request(req, url=None, data=None, headers={}, query={}):
4279 req_headers = req.headers.copy()
4280 req_headers.update(headers)
4281 req_data = data or req.data
4282 req_url = update_url_query(url or req.get_full_url(), query)
4283 req_get_method = req.get_method()
4284 if req_get_method == 'HEAD':
4285 req_type = HEADRequest
4286 elif req_get_method == 'PUT':
4287 req_type = PUTRequest
4288 else:
4289 req_type = compat_urllib_request.Request
4290 new_req = req_type(
4291 req_url, data=req_data, headers=req_headers,
4292 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4293 if hasattr(req, 'timeout'):
4294 new_req.timeout = req.timeout
4295 return new_req
4296
4297
4298 def _multipart_encode_impl(data, boundary):
4299 content_type = 'multipart/form-data; boundary=%s' % boundary
4300
4301 out = b''
4302 for k, v in data.items():
4303 out += b'--' + boundary.encode('ascii') + b'\r\n'
4304 if isinstance(k, compat_str):
4305 k = k.encode('utf-8')
4306 if isinstance(v, compat_str):
4307 v = v.encode('utf-8')
4308 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4309 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4310 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
4311 if boundary.encode('ascii') in content:
4312 raise ValueError('Boundary overlaps with data')
4313 out += content
4314
4315 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4316
4317 return out, content_type
4318
4319
4320 def multipart_encode(data, boundary=None):
4321 '''
4322 Encode a dict to RFC 7578-compliant form-data
4323
4324 data:
4325 A dict where keys and values can be either Unicode or bytes-like
4326 objects.
4327 boundary:
4328 If specified a Unicode object, it's used as the boundary. Otherwise
4329 a random boundary is generated.
4330
4331 Reference: https://tools.ietf.org/html/rfc7578
4332 '''
4333 has_specified_boundary = boundary is not None
4334
4335 while True:
4336 if boundary is None:
4337 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4338
4339 try:
4340 out, content_type = _multipart_encode_impl(data, boundary)
4341 break
4342 except ValueError:
4343 if has_specified_boundary:
4344 raise
4345 boundary = None
4346
4347 return out, content_type
4348
4349
4350 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4351 if isinstance(key_or_keys, (list, tuple)):
4352 for key in key_or_keys:
4353 if key not in d or d[key] is None or skip_false_values and not d[key]:
4354 continue
4355 return d[key]
4356 return default
4357 return d.get(key_or_keys, default)
4358
4359
4360 def try_get(src, getter, expected_type=None):
4361 for get in variadic(getter):
4362 try:
4363 v = get(src)
4364 except (AttributeError, KeyError, TypeError, IndexError):
4365 pass
4366 else:
4367 if expected_type is None or isinstance(v, expected_type):
4368 return v
4369
4370
4371 def merge_dicts(*dicts):
4372 merged = {}
4373 for a_dict in dicts:
4374 for k, v in a_dict.items():
4375 if v is None:
4376 continue
4377 if (k not in merged
4378 or (isinstance(v, compat_str) and v
4379 and isinstance(merged[k], compat_str)
4380 and not merged[k])):
4381 merged[k] = v
4382 return merged
4383
4384
4385 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4386 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4387
4388
4389 US_RATINGS = {
4390 'G': 0,
4391 'PG': 10,
4392 'PG-13': 13,
4393 'R': 16,
4394 'NC': 18,
4395 }
4396
4397
4398 TV_PARENTAL_GUIDELINES = {
4399 'TV-Y': 0,
4400 'TV-Y7': 7,
4401 'TV-G': 0,
4402 'TV-PG': 0,
4403 'TV-14': 14,
4404 'TV-MA': 17,
4405 }
4406
4407
4408 def parse_age_limit(s):
4409 if type(s) == int:
4410 return s if 0 <= s <= 21 else None
4411 if not isinstance(s, compat_basestring):
4412 return None
4413 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4414 if m:
4415 return int(m.group('age'))
4416 s = s.upper()
4417 if s in US_RATINGS:
4418 return US_RATINGS[s]
4419 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4420 if m:
4421 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4422 return None
4423
4424
4425 def strip_jsonp(code):
4426 return re.sub(
4427 r'''(?sx)^
4428 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4429 (?:\s*&&\s*(?P=func_name))?
4430 \s*\(\s*(?P<callback_data>.*)\);?
4431 \s*?(?://[^\n]*)*$''',
4432 r'\g<callback_data>', code)
4433
4434
4435 def js_to_json(code, vars={}):
4436 # vars is a dict of var, val pairs to substitute
4437 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4438 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4439 INTEGER_TABLE = (
4440 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4441 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4442 )
4443
4444 def fix_kv(m):
4445 v = m.group(0)
4446 if v in ('true', 'false', 'null'):
4447 return v
4448 elif v in ('undefined', 'void 0'):
4449 return 'null'
4450 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4451 return ""
4452
4453 if v[0] in ("'", '"'):
4454 v = re.sub(r'(?s)\\.|"', lambda m: {
4455 '"': '\\"',
4456 "\\'": "'",
4457 '\\\n': '',
4458 '\\x': '\\u00',
4459 }.get(m.group(0), m.group(0)), v[1:-1])
4460 else:
4461 for regex, base in INTEGER_TABLE:
4462 im = re.match(regex, v)
4463 if im:
4464 i = int(im.group(1), base)
4465 return '"%d":' % i if v.endswith(':') else '%d' % i
4466
4467 if v in vars:
4468 return vars[v]
4469
4470 return '"%s"' % v
4471
4472 return re.sub(r'''(?sx)
4473 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4474 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4475 {comment}|,(?={skip}[\]}}])|
4476 void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
4477 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4478 [0-9]+(?={skip}:)|
4479 !+
4480 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4481
4482
4483 def qualities(quality_ids):
4484 """ Get a numeric quality value out of a list of possible values """
4485 def q(qid):
4486 try:
4487 return quality_ids.index(qid)
4488 except ValueError:
4489 return -1
4490 return q
4491
4492
4493 DEFAULT_OUTTMPL = {
4494 'default': '%(title)s [%(id)s].%(ext)s',
4495 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
4496 }
4497 OUTTMPL_TYPES = {
4498 'chapter': None,
4499 'subtitle': None,
4500 'thumbnail': None,
4501 'description': 'description',
4502 'annotation': 'annotations.xml',
4503 'infojson': 'info.json',
4504 'pl_thumbnail': None,
4505 'pl_description': 'description',
4506 'pl_infojson': 'info.json',
4507 }
4508
4509 # As of [1] format syntax is:
4510 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4511 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
4512 STR_FORMAT_RE_TMPL = r'''(?x)
4513 (?<!%)(?P<prefix>(?:%%)*)
4514 %
4515 (?P<has_key>\((?P<key>{0})\))?
4516 (?P<format>
4517 (?P<conversion>[#0\-+ ]+)?
4518 (?P<min_width>\d+)?
4519 (?P<precision>\.\d+)?
4520 (?P<len_mod>[hlL])? # unused in python
4521 {1} # conversion type
4522 )
4523 '''
4524
4525
4526 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
4527
4528
4529 def limit_length(s, length):
4530 """ Add ellipses to overly long strings """
4531 if s is None:
4532 return None
4533 ELLIPSES = '...'
4534 if len(s) > length:
4535 return s[:length - len(ELLIPSES)] + ELLIPSES
4536 return s
4537
4538
4539 def version_tuple(v):
4540 return tuple(int(e) for e in re.split(r'[-.]', v))
4541
4542
4543 def is_outdated_version(version, limit, assume_new=True):
4544 if not version:
4545 return not assume_new
4546 try:
4547 return version_tuple(version) < version_tuple(limit)
4548 except ValueError:
4549 return not assume_new
4550
4551
4552 def ytdl_is_updateable():
4553 """ Returns if yt-dlp can be updated with -U """
4554
4555 from .update import is_non_updateable
4556
4557 return not is_non_updateable()
4558
4559
4560 def args_to_str(args):
4561 # Get a short string representation for a subprocess command
4562 return ' '.join(compat_shlex_quote(a) for a in args)
4563
4564
4565 def error_to_compat_str(err):
4566 err_str = str(err)
4567 # On python 2 error byte string must be decoded with proper
4568 # encoding rather than ascii
4569 if sys.version_info[0] < 3:
4570 err_str = err_str.decode(preferredencoding())
4571 return err_str
4572
4573
4574 def mimetype2ext(mt):
4575 if mt is None:
4576 return None
4577
4578 mt, _, params = mt.partition(';')
4579 mt = mt.strip()
4580
4581 FULL_MAP = {
4582 'audio/mp4': 'm4a',
4583 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4584 # it's the most popular one
4585 'audio/mpeg': 'mp3',
4586 'audio/x-wav': 'wav',
4587 'audio/wav': 'wav',
4588 'audio/wave': 'wav',
4589 }
4590
4591 ext = FULL_MAP.get(mt)
4592 if ext is not None:
4593 return ext
4594
4595 SUBTYPE_MAP = {
4596 '3gpp': '3gp',
4597 'smptett+xml': 'tt',
4598 'ttaf+xml': 'dfxp',
4599 'ttml+xml': 'ttml',
4600 'x-flv': 'flv',
4601 'x-mp4-fragmented': 'mp4',
4602 'x-ms-sami': 'sami',
4603 'x-ms-wmv': 'wmv',
4604 'mpegurl': 'm3u8',
4605 'x-mpegurl': 'm3u8',
4606 'vnd.apple.mpegurl': 'm3u8',
4607 'dash+xml': 'mpd',
4608 'f4m+xml': 'f4m',
4609 'hds+xml': 'f4m',
4610 'vnd.ms-sstr+xml': 'ism',
4611 'quicktime': 'mov',
4612 'mp2t': 'ts',
4613 'x-wav': 'wav',
4614 'filmstrip+json': 'fs',
4615 'svg+xml': 'svg',
4616 }
4617
4618 _, _, subtype = mt.rpartition('/')
4619 ext = SUBTYPE_MAP.get(subtype.lower())
4620 if ext is not None:
4621 return ext
4622
4623 SUFFIX_MAP = {
4624 'json': 'json',
4625 'xml': 'xml',
4626 'zip': 'zip',
4627 'gzip': 'gz',
4628 }
4629
4630 _, _, suffix = subtype.partition('+')
4631 ext = SUFFIX_MAP.get(suffix)
4632 if ext is not None:
4633 return ext
4634
4635 return subtype.replace('+', '.')
4636
4637
4638 def parse_codecs(codecs_str):
4639 # http://tools.ietf.org/html/rfc6381
4640 if not codecs_str:
4641 return {}
4642 split_codecs = list(filter(None, map(
4643 str.strip, codecs_str.strip().strip(',').split(','))))
4644 vcodec, acodec, hdr = None, None, None
4645 for full_codec in split_codecs:
4646 codec = full_codec.split('.')[0]
4647 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora', 'dvh1', 'dvhe'):
4648 if not vcodec:
4649 vcodec = full_codec
4650 if codec in ('dvh1', 'dvhe'):
4651 hdr = 'DV'
4652 elif codec == 'vp9' and vcodec.startswith('vp9.2'):
4653 hdr = 'HDR10'
4654 elif codec == 'av01':
4655 parts = full_codec.split('.')
4656 if len(parts) > 3 and parts[3] == '10':
4657 hdr = 'HDR10'
4658 vcodec = '.'.join(parts[:4])
4659 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4660 if not acodec:
4661 acodec = full_codec
4662 else:
4663 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4664 if not vcodec and not acodec:
4665 if len(split_codecs) == 2:
4666 return {
4667 'vcodec': split_codecs[0],
4668 'acodec': split_codecs[1],
4669 }
4670 else:
4671 return {
4672 'vcodec': vcodec or 'none',
4673 'acodec': acodec or 'none',
4674 'dynamic_range': hdr,
4675 }
4676 return {}
4677
4678
4679 def urlhandle_detect_ext(url_handle):
4680 getheader = url_handle.headers.get
4681
4682 cd = getheader('Content-Disposition')
4683 if cd:
4684 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4685 if m:
4686 e = determine_ext(m.group('filename'), default_ext=None)
4687 if e:
4688 return e
4689
4690 return mimetype2ext(getheader('Content-Type'))
4691
4692
4693 def encode_data_uri(data, mime_type):
4694 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4695
4696
4697 def age_restricted(content_limit, age_limit):
4698 """ Returns True iff the content should be blocked """
4699
4700 if age_limit is None: # No limit set
4701 return False
4702 if content_limit is None:
4703 return False # Content available for everyone
4704 return age_limit < content_limit
4705
4706
4707 def is_html(first_bytes):
4708 """ Detect whether a file contains HTML by examining its first bytes. """
4709
4710 BOMS = [
4711 (b'\xef\xbb\xbf', 'utf-8'),
4712 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4713 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4714 (b'\xff\xfe', 'utf-16-le'),
4715 (b'\xfe\xff', 'utf-16-be'),
4716 ]
4717 for bom, enc in BOMS:
4718 if first_bytes.startswith(bom):
4719 s = first_bytes[len(bom):].decode(enc, 'replace')
4720 break
4721 else:
4722 s = first_bytes.decode('utf-8', 'replace')
4723
4724 return re.match(r'^\s*<', s)
4725
4726
4727 def determine_protocol(info_dict):
4728 protocol = info_dict.get('protocol')
4729 if protocol is not None:
4730 return protocol
4731
4732 url = info_dict['url']
4733 if url.startswith('rtmp'):
4734 return 'rtmp'
4735 elif url.startswith('mms'):
4736 return 'mms'
4737 elif url.startswith('rtsp'):
4738 return 'rtsp'
4739
4740 ext = determine_ext(url)
4741 if ext == 'm3u8':
4742 return 'm3u8'
4743 elif ext == 'f4m':
4744 return 'f4m'
4745
4746 return compat_urllib_parse_urlparse(url).scheme
4747
4748
4749 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4750 """ Render a list of rows, each as a list of values """
4751 def width(string):
4752 return len(remove_terminal_sequences(string))
4753
4754 def get_max_lens(table):
4755 return [max(width(str(v)) for v in col) for col in zip(*table)]
4756
4757 def filter_using_list(row, filterArray):
4758 return [col for (take, col) in zip(filterArray, row) if take]
4759
4760 if hideEmpty:
4761 max_lens = get_max_lens(data)
4762 header_row = filter_using_list(header_row, max_lens)
4763 data = [filter_using_list(row, max_lens) for row in data]
4764
4765 table = [header_row] + data
4766 max_lens = get_max_lens(table)
4767 extraGap += 1
4768 if delim:
4769 table = [header_row] + [[delim * (ml + extraGap) for ml in max_lens]] + data
4770 max_lens[-1] = 0
4771 for row in table:
4772 for pos, text in enumerate(map(str, row)):
4773 row[pos] = text + (' ' * (max_lens[pos] - width(text) + extraGap))
4774 ret = '\n'.join(''.join(row) for row in table)
4775 return ret
4776
4777
4778 def _match_one(filter_part, dct, incomplete):
4779 # TODO: Generalize code with YoutubeDL._build_format_filter
4780 STRING_OPERATORS = {
4781 '*=': operator.contains,
4782 '^=': lambda attr, value: attr.startswith(value),
4783 '$=': lambda attr, value: attr.endswith(value),
4784 '~=': lambda attr, value: re.search(value, attr),
4785 }
4786 COMPARISON_OPERATORS = {
4787 **STRING_OPERATORS,
4788 '<=': operator.le, # "<=" must be defined above "<"
4789 '<': operator.lt,
4790 '>=': operator.ge,
4791 '>': operator.gt,
4792 '=': operator.eq,
4793 }
4794
4795 operator_rex = re.compile(r'''(?x)\s*
4796 (?P<key>[a-z_]+)
4797 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4798 (?:
4799 (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
4800 (?P<strval>.+?)
4801 )
4802 \s*$
4803 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4804 m = operator_rex.search(filter_part)
4805 if m:
4806 m = m.groupdict()
4807 unnegated_op = COMPARISON_OPERATORS[m['op']]
4808 if m['negation']:
4809 op = lambda attr, value: not unnegated_op(attr, value)
4810 else:
4811 op = unnegated_op
4812 comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
4813 if m['quote']:
4814 comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
4815 actual_value = dct.get(m['key'])
4816 numeric_comparison = None
4817 if isinstance(actual_value, compat_numeric_types):
4818 # If the original field is a string and matching comparisonvalue is
4819 # a number we should respect the origin of the original field
4820 # and process comparison value as a string (see
4821 # https://github.com/ytdl-org/youtube-dl/issues/11082)
4822 try:
4823 numeric_comparison = int(comparison_value)
4824 except ValueError:
4825 numeric_comparison = parse_filesize(comparison_value)
4826 if numeric_comparison is None:
4827 numeric_comparison = parse_filesize(f'{comparison_value}B')
4828 if numeric_comparison is None:
4829 numeric_comparison = parse_duration(comparison_value)
4830 if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
4831 raise ValueError('Operator %s only supports string values!' % m['op'])
4832 if actual_value is None:
4833 return incomplete or m['none_inclusive']
4834 return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
4835
4836 UNARY_OPERATORS = {
4837 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4838 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4839 }
4840 operator_rex = re.compile(r'''(?x)\s*
4841 (?P<op>%s)\s*(?P<key>[a-z_]+)
4842 \s*$
4843 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4844 m = operator_rex.search(filter_part)
4845 if m:
4846 op = UNARY_OPERATORS[m.group('op')]
4847 actual_value = dct.get(m.group('key'))
4848 if incomplete and actual_value is None:
4849 return True
4850 return op(actual_value)
4851
4852 raise ValueError('Invalid filter part %r' % filter_part)
4853
4854
4855 def match_str(filter_str, dct, incomplete=False):
4856 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
4857 When incomplete, all conditions passes on missing fields
4858 """
4859 return all(
4860 _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
4861 for filter_part in re.split(r'(?<!\\)&', filter_str))
4862
4863
4864 def match_filter_func(filter_str):
4865 def _match_func(info_dict, *args, **kwargs):
4866 if match_str(filter_str, info_dict, *args, **kwargs):
4867 return None
4868 else:
4869 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4870 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4871 return _match_func
4872
4873
4874 def parse_dfxp_time_expr(time_expr):
4875 if not time_expr:
4876 return
4877
4878 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4879 if mobj:
4880 return float(mobj.group('time_offset'))
4881
4882 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4883 if mobj:
4884 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4885
4886
4887 def srt_subtitles_timecode(seconds):
4888 return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
4889
4890
4891 def ass_subtitles_timecode(seconds):
4892 time = timetuple_from_msec(seconds * 1000)
4893 return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
4894
4895
4896 def dfxp2srt(dfxp_data):
4897 '''
4898 @param dfxp_data A bytes-like object containing DFXP data
4899 @returns A unicode object containing converted SRT data
4900 '''
4901 LEGACY_NAMESPACES = (
4902 (b'http://www.w3.org/ns/ttml', [
4903 b'http://www.w3.org/2004/11/ttaf1',
4904 b'http://www.w3.org/2006/04/ttaf1',
4905 b'http://www.w3.org/2006/10/ttaf1',
4906 ]),
4907 (b'http://www.w3.org/ns/ttml#styling', [
4908 b'http://www.w3.org/ns/ttml#style',
4909 ]),
4910 )
4911
4912 SUPPORTED_STYLING = [
4913 'color',
4914 'fontFamily',
4915 'fontSize',
4916 'fontStyle',
4917 'fontWeight',
4918 'textDecoration'
4919 ]
4920
4921 _x = functools.partial(xpath_with_ns, ns_map={
4922 'xml': 'http://www.w3.org/XML/1998/namespace',
4923 'ttml': 'http://www.w3.org/ns/ttml',
4924 'tts': 'http://www.w3.org/ns/ttml#styling',
4925 })
4926
4927 styles = {}
4928 default_style = {}
4929
4930 class TTMLPElementParser(object):
4931 _out = ''
4932 _unclosed_elements = []
4933 _applied_styles = []
4934
4935 def start(self, tag, attrib):
4936 if tag in (_x('ttml:br'), 'br'):
4937 self._out += '\n'
4938 else:
4939 unclosed_elements = []
4940 style = {}
4941 element_style_id = attrib.get('style')
4942 if default_style:
4943 style.update(default_style)
4944 if element_style_id:
4945 style.update(styles.get(element_style_id, {}))
4946 for prop in SUPPORTED_STYLING:
4947 prop_val = attrib.get(_x('tts:' + prop))
4948 if prop_val:
4949 style[prop] = prop_val
4950 if style:
4951 font = ''
4952 for k, v in sorted(style.items()):
4953 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4954 continue
4955 if k == 'color':
4956 font += ' color="%s"' % v
4957 elif k == 'fontSize':
4958 font += ' size="%s"' % v
4959 elif k == 'fontFamily':
4960 font += ' face="%s"' % v
4961 elif k == 'fontWeight' and v == 'bold':
4962 self._out += '<b>'
4963 unclosed_elements.append('b')
4964 elif k == 'fontStyle' and v == 'italic':
4965 self._out += '<i>'
4966 unclosed_elements.append('i')
4967 elif k == 'textDecoration' and v == 'underline':
4968 self._out += '<u>'
4969 unclosed_elements.append('u')
4970 if font:
4971 self._out += '<font' + font + '>'
4972 unclosed_elements.append('font')
4973 applied_style = {}
4974 if self._applied_styles:
4975 applied_style.update(self._applied_styles[-1])
4976 applied_style.update(style)
4977 self._applied_styles.append(applied_style)
4978 self._unclosed_elements.append(unclosed_elements)
4979
4980 def end(self, tag):
4981 if tag not in (_x('ttml:br'), 'br'):
4982 unclosed_elements = self._unclosed_elements.pop()
4983 for element in reversed(unclosed_elements):
4984 self._out += '</%s>' % element
4985 if unclosed_elements and self._applied_styles:
4986 self._applied_styles.pop()
4987
4988 def data(self, data):
4989 self._out += data
4990
4991 def close(self):
4992 return self._out.strip()
4993
4994 def parse_node(node):
4995 target = TTMLPElementParser()
4996 parser = xml.etree.ElementTree.XMLParser(target=target)
4997 parser.feed(xml.etree.ElementTree.tostring(node))
4998 return parser.close()
4999
5000 for k, v in LEGACY_NAMESPACES:
5001 for ns in v:
5002 dfxp_data = dfxp_data.replace(ns, k)
5003
5004 dfxp = compat_etree_fromstring(dfxp_data)
5005 out = []
5006 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
5007
5008 if not paras:
5009 raise ValueError('Invalid dfxp/TTML subtitle')
5010
5011 repeat = False
5012 while True:
5013 for style in dfxp.findall(_x('.//ttml:style')):
5014 style_id = style.get('id') or style.get(_x('xml:id'))
5015 if not style_id:
5016 continue
5017 parent_style_id = style.get('style')
5018 if parent_style_id:
5019 if parent_style_id not in styles:
5020 repeat = True
5021 continue
5022 styles[style_id] = styles[parent_style_id].copy()
5023 for prop in SUPPORTED_STYLING:
5024 prop_val = style.get(_x('tts:' + prop))
5025 if prop_val:
5026 styles.setdefault(style_id, {})[prop] = prop_val
5027 if repeat:
5028 repeat = False
5029 else:
5030 break
5031
5032 for p in ('body', 'div'):
5033 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
5034 if ele is None:
5035 continue
5036 style = styles.get(ele.get('style'))
5037 if not style:
5038 continue
5039 default_style.update(style)
5040
5041 for para, index in zip(paras, itertools.count(1)):
5042 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
5043 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
5044 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
5045 if begin_time is None:
5046 continue
5047 if not end_time:
5048 if not dur:
5049 continue
5050 end_time = begin_time + dur
5051 out.append('%d\n%s --> %s\n%s\n\n' % (
5052 index,
5053 srt_subtitles_timecode(begin_time),
5054 srt_subtitles_timecode(end_time),
5055 parse_node(para)))
5056
5057 return ''.join(out)
5058
5059
5060 def cli_option(params, command_option, param):
5061 param = params.get(param)
5062 if param:
5063 param = compat_str(param)
5064 return [command_option, param] if param is not None else []
5065
5066
5067 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
5068 param = params.get(param)
5069 if param is None:
5070 return []
5071 assert isinstance(param, bool)
5072 if separator:
5073 return [command_option + separator + (true_value if param else false_value)]
5074 return [command_option, true_value if param else false_value]
5075
5076
5077 def cli_valueless_option(params, command_option, param, expected_value=True):
5078 param = params.get(param)
5079 return [command_option] if param == expected_value else []
5080
5081
5082 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
5083 if isinstance(argdict, (list, tuple)): # for backward compatibility
5084 if use_compat:
5085 return argdict
5086 else:
5087 argdict = None
5088 if argdict is None:
5089 return default
5090 assert isinstance(argdict, dict)
5091
5092 assert isinstance(keys, (list, tuple))
5093 for key_list in keys:
5094 arg_list = list(filter(
5095 lambda x: x is not None,
5096 [argdict.get(key.lower()) for key in variadic(key_list)]))
5097 if arg_list:
5098 return [arg for args in arg_list for arg in args]
5099 return default
5100
5101
5102 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
5103 main_key, exe = main_key.lower(), exe.lower()
5104 root_key = exe if main_key == exe else f'{main_key}+{exe}'
5105 keys = [f'{root_key}{k}' for k in (keys or [''])]
5106 if root_key in keys:
5107 if main_key != exe:
5108 keys.append((main_key, exe))
5109 keys.append('default')
5110 else:
5111 use_compat = False
5112 return cli_configuration_args(argdict, keys, default, use_compat)
5113
5114
5115 class ISO639Utils(object):
5116 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
5117 _lang_map = {
5118 'aa': 'aar',
5119 'ab': 'abk',
5120 'ae': 'ave',
5121 'af': 'afr',
5122 'ak': 'aka',
5123 'am': 'amh',
5124 'an': 'arg',
5125 'ar': 'ara',
5126 'as': 'asm',
5127 'av': 'ava',
5128 'ay': 'aym',
5129 'az': 'aze',
5130 'ba': 'bak',
5131 'be': 'bel',
5132 'bg': 'bul',
5133 'bh': 'bih',
5134 'bi': 'bis',
5135 'bm': 'bam',
5136 'bn': 'ben',
5137 'bo': 'bod',
5138 'br': 'bre',
5139 'bs': 'bos',
5140 'ca': 'cat',
5141 'ce': 'che',
5142 'ch': 'cha',
5143 'co': 'cos',
5144 'cr': 'cre',
5145 'cs': 'ces',
5146 'cu': 'chu',
5147 'cv': 'chv',
5148 'cy': 'cym',
5149 'da': 'dan',
5150 'de': 'deu',
5151 'dv': 'div',
5152 'dz': 'dzo',
5153 'ee': 'ewe',
5154 'el': 'ell',
5155 'en': 'eng',
5156 'eo': 'epo',
5157 'es': 'spa',
5158 'et': 'est',
5159 'eu': 'eus',
5160 'fa': 'fas',
5161 'ff': 'ful',
5162 'fi': 'fin',
5163 'fj': 'fij',
5164 'fo': 'fao',
5165 'fr': 'fra',
5166 'fy': 'fry',
5167 'ga': 'gle',
5168 'gd': 'gla',
5169 'gl': 'glg',
5170 'gn': 'grn',
5171 'gu': 'guj',
5172 'gv': 'glv',
5173 'ha': 'hau',
5174 'he': 'heb',
5175 'iw': 'heb', # Replaced by he in 1989 revision
5176 'hi': 'hin',
5177 'ho': 'hmo',
5178 'hr': 'hrv',
5179 'ht': 'hat',
5180 'hu': 'hun',
5181 'hy': 'hye',
5182 'hz': 'her',
5183 'ia': 'ina',
5184 'id': 'ind',
5185 'in': 'ind', # Replaced by id in 1989 revision
5186 'ie': 'ile',
5187 'ig': 'ibo',
5188 'ii': 'iii',
5189 'ik': 'ipk',
5190 'io': 'ido',
5191 'is': 'isl',
5192 'it': 'ita',
5193 'iu': 'iku',
5194 'ja': 'jpn',
5195 'jv': 'jav',
5196 'ka': 'kat',
5197 'kg': 'kon',
5198 'ki': 'kik',
5199 'kj': 'kua',
5200 'kk': 'kaz',
5201 'kl': 'kal',
5202 'km': 'khm',
5203 'kn': 'kan',
5204 'ko': 'kor',
5205 'kr': 'kau',
5206 'ks': 'kas',
5207 'ku': 'kur',
5208 'kv': 'kom',
5209 'kw': 'cor',
5210 'ky': 'kir',
5211 'la': 'lat',
5212 'lb': 'ltz',
5213 'lg': 'lug',
5214 'li': 'lim',
5215 'ln': 'lin',
5216 'lo': 'lao',
5217 'lt': 'lit',
5218 'lu': 'lub',
5219 'lv': 'lav',
5220 'mg': 'mlg',
5221 'mh': 'mah',
5222 'mi': 'mri',
5223 'mk': 'mkd',
5224 'ml': 'mal',
5225 'mn': 'mon',
5226 'mr': 'mar',
5227 'ms': 'msa',
5228 'mt': 'mlt',
5229 'my': 'mya',
5230 'na': 'nau',
5231 'nb': 'nob',
5232 'nd': 'nde',
5233 'ne': 'nep',
5234 'ng': 'ndo',
5235 'nl': 'nld',
5236 'nn': 'nno',
5237 'no': 'nor',
5238 'nr': 'nbl',
5239 'nv': 'nav',
5240 'ny': 'nya',
5241 'oc': 'oci',
5242 'oj': 'oji',
5243 'om': 'orm',
5244 'or': 'ori',
5245 'os': 'oss',
5246 'pa': 'pan',
5247 'pi': 'pli',
5248 'pl': 'pol',
5249 'ps': 'pus',
5250 'pt': 'por',
5251 'qu': 'que',
5252 'rm': 'roh',
5253 'rn': 'run',
5254 'ro': 'ron',
5255 'ru': 'rus',
5256 'rw': 'kin',
5257 'sa': 'san',
5258 'sc': 'srd',
5259 'sd': 'snd',
5260 'se': 'sme',
5261 'sg': 'sag',
5262 'si': 'sin',
5263 'sk': 'slk',
5264 'sl': 'slv',
5265 'sm': 'smo',
5266 'sn': 'sna',
5267 'so': 'som',
5268 'sq': 'sqi',
5269 'sr': 'srp',
5270 'ss': 'ssw',
5271 'st': 'sot',
5272 'su': 'sun',
5273 'sv': 'swe',
5274 'sw': 'swa',
5275 'ta': 'tam',
5276 'te': 'tel',
5277 'tg': 'tgk',
5278 'th': 'tha',
5279 'ti': 'tir',
5280 'tk': 'tuk',
5281 'tl': 'tgl',
5282 'tn': 'tsn',
5283 'to': 'ton',
5284 'tr': 'tur',
5285 'ts': 'tso',
5286 'tt': 'tat',
5287 'tw': 'twi',
5288 'ty': 'tah',
5289 'ug': 'uig',
5290 'uk': 'ukr',
5291 'ur': 'urd',
5292 'uz': 'uzb',
5293 've': 'ven',
5294 'vi': 'vie',
5295 'vo': 'vol',
5296 'wa': 'wln',
5297 'wo': 'wol',
5298 'xh': 'xho',
5299 'yi': 'yid',
5300 'ji': 'yid', # Replaced by yi in 1989 revision
5301 'yo': 'yor',
5302 'za': 'zha',
5303 'zh': 'zho',
5304 'zu': 'zul',
5305 }
5306
5307 @classmethod
5308 def short2long(cls, code):
5309 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5310 return cls._lang_map.get(code[:2])
5311
5312 @classmethod
5313 def long2short(cls, code):
5314 """Convert language code from ISO 639-2/T to ISO 639-1"""
5315 for short_name, long_name in cls._lang_map.items():
5316 if long_name == code:
5317 return short_name
5318
5319
5320 class ISO3166Utils(object):
5321 # From http://data.okfn.org/data/core/country-list
5322 _country_map = {
5323 'AF': 'Afghanistan',
5324 'AX': 'Åland Islands',
5325 'AL': 'Albania',
5326 'DZ': 'Algeria',
5327 'AS': 'American Samoa',
5328 'AD': 'Andorra',
5329 'AO': 'Angola',
5330 'AI': 'Anguilla',
5331 'AQ': 'Antarctica',
5332 'AG': 'Antigua and Barbuda',
5333 'AR': 'Argentina',
5334 'AM': 'Armenia',
5335 'AW': 'Aruba',
5336 'AU': 'Australia',
5337 'AT': 'Austria',
5338 'AZ': 'Azerbaijan',
5339 'BS': 'Bahamas',
5340 'BH': 'Bahrain',
5341 'BD': 'Bangladesh',
5342 'BB': 'Barbados',
5343 'BY': 'Belarus',
5344 'BE': 'Belgium',
5345 'BZ': 'Belize',
5346 'BJ': 'Benin',
5347 'BM': 'Bermuda',
5348 'BT': 'Bhutan',
5349 'BO': 'Bolivia, Plurinational State of',
5350 'BQ': 'Bonaire, Sint Eustatius and Saba',
5351 'BA': 'Bosnia and Herzegovina',
5352 'BW': 'Botswana',
5353 'BV': 'Bouvet Island',
5354 'BR': 'Brazil',
5355 'IO': 'British Indian Ocean Territory',
5356 'BN': 'Brunei Darussalam',
5357 'BG': 'Bulgaria',
5358 'BF': 'Burkina Faso',
5359 'BI': 'Burundi',
5360 'KH': 'Cambodia',
5361 'CM': 'Cameroon',
5362 'CA': 'Canada',
5363 'CV': 'Cape Verde',
5364 'KY': 'Cayman Islands',
5365 'CF': 'Central African Republic',
5366 'TD': 'Chad',
5367 'CL': 'Chile',
5368 'CN': 'China',
5369 'CX': 'Christmas Island',
5370 'CC': 'Cocos (Keeling) Islands',
5371 'CO': 'Colombia',
5372 'KM': 'Comoros',
5373 'CG': 'Congo',
5374 'CD': 'Congo, the Democratic Republic of the',
5375 'CK': 'Cook Islands',
5376 'CR': 'Costa Rica',
5377 'CI': 'Côte d\'Ivoire',
5378 'HR': 'Croatia',
5379 'CU': 'Cuba',
5380 'CW': 'Curaçao',
5381 'CY': 'Cyprus',
5382 'CZ': 'Czech Republic',
5383 'DK': 'Denmark',
5384 'DJ': 'Djibouti',
5385 'DM': 'Dominica',
5386 'DO': 'Dominican Republic',
5387 'EC': 'Ecuador',
5388 'EG': 'Egypt',
5389 'SV': 'El Salvador',
5390 'GQ': 'Equatorial Guinea',
5391 'ER': 'Eritrea',
5392 'EE': 'Estonia',
5393 'ET': 'Ethiopia',
5394 'FK': 'Falkland Islands (Malvinas)',
5395 'FO': 'Faroe Islands',
5396 'FJ': 'Fiji',
5397 'FI': 'Finland',
5398 'FR': 'France',
5399 'GF': 'French Guiana',
5400 'PF': 'French Polynesia',
5401 'TF': 'French Southern Territories',
5402 'GA': 'Gabon',
5403 'GM': 'Gambia',
5404 'GE': 'Georgia',
5405 'DE': 'Germany',
5406 'GH': 'Ghana',
5407 'GI': 'Gibraltar',
5408 'GR': 'Greece',
5409 'GL': 'Greenland',
5410 'GD': 'Grenada',
5411 'GP': 'Guadeloupe',
5412 'GU': 'Guam',
5413 'GT': 'Guatemala',
5414 'GG': 'Guernsey',
5415 'GN': 'Guinea',
5416 'GW': 'Guinea-Bissau',
5417 'GY': 'Guyana',
5418 'HT': 'Haiti',
5419 'HM': 'Heard Island and McDonald Islands',
5420 'VA': 'Holy See (Vatican City State)',
5421 'HN': 'Honduras',
5422 'HK': 'Hong Kong',
5423 'HU': 'Hungary',
5424 'IS': 'Iceland',
5425 'IN': 'India',
5426 'ID': 'Indonesia',
5427 'IR': 'Iran, Islamic Republic of',
5428 'IQ': 'Iraq',
5429 'IE': 'Ireland',
5430 'IM': 'Isle of Man',
5431 'IL': 'Israel',
5432 'IT': 'Italy',
5433 'JM': 'Jamaica',
5434 'JP': 'Japan',
5435 'JE': 'Jersey',
5436 'JO': 'Jordan',
5437 'KZ': 'Kazakhstan',
5438 'KE': 'Kenya',
5439 'KI': 'Kiribati',
5440 'KP': 'Korea, Democratic People\'s Republic of',
5441 'KR': 'Korea, Republic of',
5442 'KW': 'Kuwait',
5443 'KG': 'Kyrgyzstan',
5444 'LA': 'Lao People\'s Democratic Republic',
5445 'LV': 'Latvia',
5446 'LB': 'Lebanon',
5447 'LS': 'Lesotho',
5448 'LR': 'Liberia',
5449 'LY': 'Libya',
5450 'LI': 'Liechtenstein',
5451 'LT': 'Lithuania',
5452 'LU': 'Luxembourg',
5453 'MO': 'Macao',
5454 'MK': 'Macedonia, the Former Yugoslav Republic of',
5455 'MG': 'Madagascar',
5456 'MW': 'Malawi',
5457 'MY': 'Malaysia',
5458 'MV': 'Maldives',
5459 'ML': 'Mali',
5460 'MT': 'Malta',
5461 'MH': 'Marshall Islands',
5462 'MQ': 'Martinique',
5463 'MR': 'Mauritania',
5464 'MU': 'Mauritius',
5465 'YT': 'Mayotte',
5466 'MX': 'Mexico',
5467 'FM': 'Micronesia, Federated States of',
5468 'MD': 'Moldova, Republic of',
5469 'MC': 'Monaco',
5470 'MN': 'Mongolia',
5471 'ME': 'Montenegro',
5472 'MS': 'Montserrat',
5473 'MA': 'Morocco',
5474 'MZ': 'Mozambique',
5475 'MM': 'Myanmar',
5476 'NA': 'Namibia',
5477 'NR': 'Nauru',
5478 'NP': 'Nepal',
5479 'NL': 'Netherlands',
5480 'NC': 'New Caledonia',
5481 'NZ': 'New Zealand',
5482 'NI': 'Nicaragua',
5483 'NE': 'Niger',
5484 'NG': 'Nigeria',
5485 'NU': 'Niue',
5486 'NF': 'Norfolk Island',
5487 'MP': 'Northern Mariana Islands',
5488 'NO': 'Norway',
5489 'OM': 'Oman',
5490 'PK': 'Pakistan',
5491 'PW': 'Palau',
5492 'PS': 'Palestine, State of',
5493 'PA': 'Panama',
5494 'PG': 'Papua New Guinea',
5495 'PY': 'Paraguay',
5496 'PE': 'Peru',
5497 'PH': 'Philippines',
5498 'PN': 'Pitcairn',
5499 'PL': 'Poland',
5500 'PT': 'Portugal',
5501 'PR': 'Puerto Rico',
5502 'QA': 'Qatar',
5503 'RE': 'Réunion',
5504 'RO': 'Romania',
5505 'RU': 'Russian Federation',
5506 'RW': 'Rwanda',
5507 'BL': 'Saint Barthélemy',
5508 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5509 'KN': 'Saint Kitts and Nevis',
5510 'LC': 'Saint Lucia',
5511 'MF': 'Saint Martin (French part)',
5512 'PM': 'Saint Pierre and Miquelon',
5513 'VC': 'Saint Vincent and the Grenadines',
5514 'WS': 'Samoa',
5515 'SM': 'San Marino',
5516 'ST': 'Sao Tome and Principe',
5517 'SA': 'Saudi Arabia',
5518 'SN': 'Senegal',
5519 'RS': 'Serbia',
5520 'SC': 'Seychelles',
5521 'SL': 'Sierra Leone',
5522 'SG': 'Singapore',
5523 'SX': 'Sint Maarten (Dutch part)',
5524 'SK': 'Slovakia',
5525 'SI': 'Slovenia',
5526 'SB': 'Solomon Islands',
5527 'SO': 'Somalia',
5528 'ZA': 'South Africa',
5529 'GS': 'South Georgia and the South Sandwich Islands',
5530 'SS': 'South Sudan',
5531 'ES': 'Spain',
5532 'LK': 'Sri Lanka',
5533 'SD': 'Sudan',
5534 'SR': 'Suriname',
5535 'SJ': 'Svalbard and Jan Mayen',
5536 'SZ': 'Swaziland',
5537 'SE': 'Sweden',
5538 'CH': 'Switzerland',
5539 'SY': 'Syrian Arab Republic',
5540 'TW': 'Taiwan, Province of China',
5541 'TJ': 'Tajikistan',
5542 'TZ': 'Tanzania, United Republic of',
5543 'TH': 'Thailand',
5544 'TL': 'Timor-Leste',
5545 'TG': 'Togo',
5546 'TK': 'Tokelau',
5547 'TO': 'Tonga',
5548 'TT': 'Trinidad and Tobago',
5549 'TN': 'Tunisia',
5550 'TR': 'Turkey',
5551 'TM': 'Turkmenistan',
5552 'TC': 'Turks and Caicos Islands',
5553 'TV': 'Tuvalu',
5554 'UG': 'Uganda',
5555 'UA': 'Ukraine',
5556 'AE': 'United Arab Emirates',
5557 'GB': 'United Kingdom',
5558 'US': 'United States',
5559 'UM': 'United States Minor Outlying Islands',
5560 'UY': 'Uruguay',
5561 'UZ': 'Uzbekistan',
5562 'VU': 'Vanuatu',
5563 'VE': 'Venezuela, Bolivarian Republic of',
5564 'VN': 'Viet Nam',
5565 'VG': 'Virgin Islands, British',
5566 'VI': 'Virgin Islands, U.S.',
5567 'WF': 'Wallis and Futuna',
5568 'EH': 'Western Sahara',
5569 'YE': 'Yemen',
5570 'ZM': 'Zambia',
5571 'ZW': 'Zimbabwe',
5572 }
5573
5574 @classmethod
5575 def short2full(cls, code):
5576 """Convert an ISO 3166-2 country code to the corresponding full name"""
5577 return cls._country_map.get(code.upper())
5578
5579
5580 class GeoUtils(object):
5581 # Major IPv4 address blocks per country
5582 _country_ip_map = {
5583 'AD': '46.172.224.0/19',
5584 'AE': '94.200.0.0/13',
5585 'AF': '149.54.0.0/17',
5586 'AG': '209.59.64.0/18',
5587 'AI': '204.14.248.0/21',
5588 'AL': '46.99.0.0/16',
5589 'AM': '46.70.0.0/15',
5590 'AO': '105.168.0.0/13',
5591 'AP': '182.50.184.0/21',
5592 'AQ': '23.154.160.0/24',
5593 'AR': '181.0.0.0/12',
5594 'AS': '202.70.112.0/20',
5595 'AT': '77.116.0.0/14',
5596 'AU': '1.128.0.0/11',
5597 'AW': '181.41.0.0/18',
5598 'AX': '185.217.4.0/22',
5599 'AZ': '5.197.0.0/16',
5600 'BA': '31.176.128.0/17',
5601 'BB': '65.48.128.0/17',
5602 'BD': '114.130.0.0/16',
5603 'BE': '57.0.0.0/8',
5604 'BF': '102.178.0.0/15',
5605 'BG': '95.42.0.0/15',
5606 'BH': '37.131.0.0/17',
5607 'BI': '154.117.192.0/18',
5608 'BJ': '137.255.0.0/16',
5609 'BL': '185.212.72.0/23',
5610 'BM': '196.12.64.0/18',
5611 'BN': '156.31.0.0/16',
5612 'BO': '161.56.0.0/16',
5613 'BQ': '161.0.80.0/20',
5614 'BR': '191.128.0.0/12',
5615 'BS': '24.51.64.0/18',
5616 'BT': '119.2.96.0/19',
5617 'BW': '168.167.0.0/16',
5618 'BY': '178.120.0.0/13',
5619 'BZ': '179.42.192.0/18',
5620 'CA': '99.224.0.0/11',
5621 'CD': '41.243.0.0/16',
5622 'CF': '197.242.176.0/21',
5623 'CG': '160.113.0.0/16',
5624 'CH': '85.0.0.0/13',
5625 'CI': '102.136.0.0/14',
5626 'CK': '202.65.32.0/19',
5627 'CL': '152.172.0.0/14',
5628 'CM': '102.244.0.0/14',
5629 'CN': '36.128.0.0/10',
5630 'CO': '181.240.0.0/12',
5631 'CR': '201.192.0.0/12',
5632 'CU': '152.206.0.0/15',
5633 'CV': '165.90.96.0/19',
5634 'CW': '190.88.128.0/17',
5635 'CY': '31.153.0.0/16',
5636 'CZ': '88.100.0.0/14',
5637 'DE': '53.0.0.0/8',
5638 'DJ': '197.241.0.0/17',
5639 'DK': '87.48.0.0/12',
5640 'DM': '192.243.48.0/20',
5641 'DO': '152.166.0.0/15',
5642 'DZ': '41.96.0.0/12',
5643 'EC': '186.68.0.0/15',
5644 'EE': '90.190.0.0/15',
5645 'EG': '156.160.0.0/11',
5646 'ER': '196.200.96.0/20',
5647 'ES': '88.0.0.0/11',
5648 'ET': '196.188.0.0/14',
5649 'EU': '2.16.0.0/13',
5650 'FI': '91.152.0.0/13',
5651 'FJ': '144.120.0.0/16',
5652 'FK': '80.73.208.0/21',
5653 'FM': '119.252.112.0/20',
5654 'FO': '88.85.32.0/19',
5655 'FR': '90.0.0.0/9',
5656 'GA': '41.158.0.0/15',
5657 'GB': '25.0.0.0/8',
5658 'GD': '74.122.88.0/21',
5659 'GE': '31.146.0.0/16',
5660 'GF': '161.22.64.0/18',
5661 'GG': '62.68.160.0/19',
5662 'GH': '154.160.0.0/12',
5663 'GI': '95.164.0.0/16',
5664 'GL': '88.83.0.0/19',
5665 'GM': '160.182.0.0/15',
5666 'GN': '197.149.192.0/18',
5667 'GP': '104.250.0.0/19',
5668 'GQ': '105.235.224.0/20',
5669 'GR': '94.64.0.0/13',
5670 'GT': '168.234.0.0/16',
5671 'GU': '168.123.0.0/16',
5672 'GW': '197.214.80.0/20',
5673 'GY': '181.41.64.0/18',
5674 'HK': '113.252.0.0/14',
5675 'HN': '181.210.0.0/16',
5676 'HR': '93.136.0.0/13',
5677 'HT': '148.102.128.0/17',
5678 'HU': '84.0.0.0/14',
5679 'ID': '39.192.0.0/10',
5680 'IE': '87.32.0.0/12',
5681 'IL': '79.176.0.0/13',
5682 'IM': '5.62.80.0/20',
5683 'IN': '117.192.0.0/10',
5684 'IO': '203.83.48.0/21',
5685 'IQ': '37.236.0.0/14',
5686 'IR': '2.176.0.0/12',
5687 'IS': '82.221.0.0/16',
5688 'IT': '79.0.0.0/10',
5689 'JE': '87.244.64.0/18',
5690 'JM': '72.27.0.0/17',
5691 'JO': '176.29.0.0/16',
5692 'JP': '133.0.0.0/8',
5693 'KE': '105.48.0.0/12',
5694 'KG': '158.181.128.0/17',
5695 'KH': '36.37.128.0/17',
5696 'KI': '103.25.140.0/22',
5697 'KM': '197.255.224.0/20',
5698 'KN': '198.167.192.0/19',
5699 'KP': '175.45.176.0/22',
5700 'KR': '175.192.0.0/10',
5701 'KW': '37.36.0.0/14',
5702 'KY': '64.96.0.0/15',
5703 'KZ': '2.72.0.0/13',
5704 'LA': '115.84.64.0/18',
5705 'LB': '178.135.0.0/16',
5706 'LC': '24.92.144.0/20',
5707 'LI': '82.117.0.0/19',
5708 'LK': '112.134.0.0/15',
5709 'LR': '102.183.0.0/16',
5710 'LS': '129.232.0.0/17',
5711 'LT': '78.56.0.0/13',
5712 'LU': '188.42.0.0/16',
5713 'LV': '46.109.0.0/16',
5714 'LY': '41.252.0.0/14',
5715 'MA': '105.128.0.0/11',
5716 'MC': '88.209.64.0/18',
5717 'MD': '37.246.0.0/16',
5718 'ME': '178.175.0.0/17',
5719 'MF': '74.112.232.0/21',
5720 'MG': '154.126.0.0/17',
5721 'MH': '117.103.88.0/21',
5722 'MK': '77.28.0.0/15',
5723 'ML': '154.118.128.0/18',
5724 'MM': '37.111.0.0/17',
5725 'MN': '49.0.128.0/17',
5726 'MO': '60.246.0.0/16',
5727 'MP': '202.88.64.0/20',
5728 'MQ': '109.203.224.0/19',
5729 'MR': '41.188.64.0/18',
5730 'MS': '208.90.112.0/22',
5731 'MT': '46.11.0.0/16',
5732 'MU': '105.16.0.0/12',
5733 'MV': '27.114.128.0/18',
5734 'MW': '102.70.0.0/15',
5735 'MX': '187.192.0.0/11',
5736 'MY': '175.136.0.0/13',
5737 'MZ': '197.218.0.0/15',
5738 'NA': '41.182.0.0/16',
5739 'NC': '101.101.0.0/18',
5740 'NE': '197.214.0.0/18',
5741 'NF': '203.17.240.0/22',
5742 'NG': '105.112.0.0/12',
5743 'NI': '186.76.0.0/15',
5744 'NL': '145.96.0.0/11',
5745 'NO': '84.208.0.0/13',
5746 'NP': '36.252.0.0/15',
5747 'NR': '203.98.224.0/19',
5748 'NU': '49.156.48.0/22',
5749 'NZ': '49.224.0.0/14',
5750 'OM': '5.36.0.0/15',
5751 'PA': '186.72.0.0/15',
5752 'PE': '186.160.0.0/14',
5753 'PF': '123.50.64.0/18',
5754 'PG': '124.240.192.0/19',
5755 'PH': '49.144.0.0/13',
5756 'PK': '39.32.0.0/11',
5757 'PL': '83.0.0.0/11',
5758 'PM': '70.36.0.0/20',
5759 'PR': '66.50.0.0/16',
5760 'PS': '188.161.0.0/16',
5761 'PT': '85.240.0.0/13',
5762 'PW': '202.124.224.0/20',
5763 'PY': '181.120.0.0/14',
5764 'QA': '37.210.0.0/15',
5765 'RE': '102.35.0.0/16',
5766 'RO': '79.112.0.0/13',
5767 'RS': '93.86.0.0/15',
5768 'RU': '5.136.0.0/13',
5769 'RW': '41.186.0.0/16',
5770 'SA': '188.48.0.0/13',
5771 'SB': '202.1.160.0/19',
5772 'SC': '154.192.0.0/11',
5773 'SD': '102.120.0.0/13',
5774 'SE': '78.64.0.0/12',
5775 'SG': '8.128.0.0/10',
5776 'SI': '188.196.0.0/14',
5777 'SK': '78.98.0.0/15',
5778 'SL': '102.143.0.0/17',
5779 'SM': '89.186.32.0/19',
5780 'SN': '41.82.0.0/15',
5781 'SO': '154.115.192.0/18',
5782 'SR': '186.179.128.0/17',
5783 'SS': '105.235.208.0/21',
5784 'ST': '197.159.160.0/19',
5785 'SV': '168.243.0.0/16',
5786 'SX': '190.102.0.0/20',
5787 'SY': '5.0.0.0/16',
5788 'SZ': '41.84.224.0/19',
5789 'TC': '65.255.48.0/20',
5790 'TD': '154.68.128.0/19',
5791 'TG': '196.168.0.0/14',
5792 'TH': '171.96.0.0/13',
5793 'TJ': '85.9.128.0/18',
5794 'TK': '27.96.24.0/21',
5795 'TL': '180.189.160.0/20',
5796 'TM': '95.85.96.0/19',
5797 'TN': '197.0.0.0/11',
5798 'TO': '175.176.144.0/21',
5799 'TR': '78.160.0.0/11',
5800 'TT': '186.44.0.0/15',
5801 'TV': '202.2.96.0/19',
5802 'TW': '120.96.0.0/11',
5803 'TZ': '156.156.0.0/14',
5804 'UA': '37.52.0.0/14',
5805 'UG': '102.80.0.0/13',
5806 'US': '6.0.0.0/8',
5807 'UY': '167.56.0.0/13',
5808 'UZ': '84.54.64.0/18',
5809 'VA': '212.77.0.0/19',
5810 'VC': '207.191.240.0/21',
5811 'VE': '186.88.0.0/13',
5812 'VG': '66.81.192.0/20',
5813 'VI': '146.226.0.0/16',
5814 'VN': '14.160.0.0/11',
5815 'VU': '202.80.32.0/20',
5816 'WF': '117.20.32.0/21',
5817 'WS': '202.4.32.0/19',
5818 'YE': '134.35.0.0/16',
5819 'YT': '41.242.116.0/22',
5820 'ZA': '41.0.0.0/11',
5821 'ZM': '102.144.0.0/13',
5822 'ZW': '102.177.192.0/18',
5823 }
5824
5825 @classmethod
5826 def random_ipv4(cls, code_or_block):
5827 if len(code_or_block) == 2:
5828 block = cls._country_ip_map.get(code_or_block.upper())
5829 if not block:
5830 return None
5831 else:
5832 block = code_or_block
5833 addr, preflen = block.split('/')
5834 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5835 addr_max = addr_min | (0xffffffff >> int(preflen))
5836 return compat_str(socket.inet_ntoa(
5837 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5838
5839
5840 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5841 def __init__(self, proxies=None):
5842 # Set default handlers
5843 for type in ('http', 'https'):
5844 setattr(self, '%s_open' % type,
5845 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5846 meth(r, proxy, type))
5847 compat_urllib_request.ProxyHandler.__init__(self, proxies)
5848
5849 def proxy_open(self, req, proxy, type):
5850 req_proxy = req.headers.get('Ytdl-request-proxy')
5851 if req_proxy is not None:
5852 proxy = req_proxy
5853 del req.headers['Ytdl-request-proxy']
5854
5855 if proxy == '__noproxy__':
5856 return None # No Proxy
5857 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5858 req.add_header('Ytdl-socks-proxy', proxy)
5859 # yt-dlp's http/https handlers do wrapping the socket with socks
5860 return None
5861 return compat_urllib_request.ProxyHandler.proxy_open(
5862 self, req, proxy, type)
5863
5864
5865 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5866 # released into Public Domain
5867 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5868
5869 def long_to_bytes(n, blocksize=0):
5870 """long_to_bytes(n:long, blocksize:int) : string
5871 Convert a long integer to a byte string.
5872
5873 If optional blocksize is given and greater than zero, pad the front of the
5874 byte string with binary zeros so that the length is a multiple of
5875 blocksize.
5876 """
5877 # after much testing, this algorithm was deemed to be the fastest
5878 s = b''
5879 n = int(n)
5880 while n > 0:
5881 s = compat_struct_pack('>I', n & 0xffffffff) + s
5882 n = n >> 32
5883 # strip off leading zeros
5884 for i in range(len(s)):
5885 if s[i] != b'\000'[0]:
5886 break
5887 else:
5888 # only happens when n == 0
5889 s = b'\000'
5890 i = 0
5891 s = s[i:]
5892 # add back some pad bytes. this could be done more efficiently w.r.t. the
5893 # de-padding being done above, but sigh...
5894 if blocksize > 0 and len(s) % blocksize:
5895 s = (blocksize - len(s) % blocksize) * b'\000' + s
5896 return s
5897
5898
5899 def bytes_to_long(s):
5900 """bytes_to_long(string) : long
5901 Convert a byte string to a long integer.
5902
5903 This is (essentially) the inverse of long_to_bytes().
5904 """
5905 acc = 0
5906 length = len(s)
5907 if length % 4:
5908 extra = (4 - length % 4)
5909 s = b'\000' * extra + s
5910 length = length + extra
5911 for i in range(0, length, 4):
5912 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5913 return acc
5914
5915
5916 def ohdave_rsa_encrypt(data, exponent, modulus):
5917 '''
5918 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5919
5920 Input:
5921 data: data to encrypt, bytes-like object
5922 exponent, modulus: parameter e and N of RSA algorithm, both integer
5923 Output: hex string of encrypted data
5924
5925 Limitation: supports one block encryption only
5926 '''
5927
5928 payload = int(binascii.hexlify(data[::-1]), 16)
5929 encrypted = pow(payload, exponent, modulus)
5930 return '%x' % encrypted
5931
5932
5933 def pkcs1pad(data, length):
5934 """
5935 Padding input data with PKCS#1 scheme
5936
5937 @param {int[]} data input data
5938 @param {int} length target length
5939 @returns {int[]} padded data
5940 """
5941 if len(data) > length - 11:
5942 raise ValueError('Input data too long for PKCS#1 padding')
5943
5944 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5945 return [0, 2] + pseudo_random + [0] + data
5946
5947
5948 def encode_base_n(num, n, table=None):
5949 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5950 if not table:
5951 table = FULL_TABLE[:n]
5952
5953 if n > len(table):
5954 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5955
5956 if num == 0:
5957 return table[0]
5958
5959 ret = ''
5960 while num:
5961 ret = table[num % n] + ret
5962 num = num // n
5963 return ret
5964
5965
5966 def decode_packed_codes(code):
5967 mobj = re.search(PACKED_CODES_RE, code)
5968 obfuscated_code, base, count, symbols = mobj.groups()
5969 base = int(base)
5970 count = int(count)
5971 symbols = symbols.split('|')
5972 symbol_table = {}
5973
5974 while count:
5975 count -= 1
5976 base_n_count = encode_base_n(count, base)
5977 symbol_table[base_n_count] = symbols[count] or base_n_count
5978
5979 return re.sub(
5980 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5981 obfuscated_code)
5982
5983
5984 def caesar(s, alphabet, shift):
5985 if shift == 0:
5986 return s
5987 l = len(alphabet)
5988 return ''.join(
5989 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5990 for c in s)
5991
5992
5993 def rot47(s):
5994 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5995
5996
5997 def parse_m3u8_attributes(attrib):
5998 info = {}
5999 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
6000 if val.startswith('"'):
6001 val = val[1:-1]
6002 info[key] = val
6003 return info
6004
6005
6006 def urshift(val, n):
6007 return val >> n if val >= 0 else (val + 0x100000000) >> n
6008
6009
6010 # Based on png2str() written by @gdkchan and improved by @yokrysty
6011 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
6012 def decode_png(png_data):
6013 # Reference: https://www.w3.org/TR/PNG/
6014 header = png_data[8:]
6015
6016 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
6017 raise IOError('Not a valid PNG file.')
6018
6019 int_map = {1: '>B', 2: '>H', 4: '>I'}
6020 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
6021
6022 chunks = []
6023
6024 while header:
6025 length = unpack_integer(header[:4])
6026 header = header[4:]
6027
6028 chunk_type = header[:4]
6029 header = header[4:]
6030
6031 chunk_data = header[:length]
6032 header = header[length:]
6033
6034 header = header[4:] # Skip CRC
6035
6036 chunks.append({
6037 'type': chunk_type,
6038 'length': length,
6039 'data': chunk_data
6040 })
6041
6042 ihdr = chunks[0]['data']
6043
6044 width = unpack_integer(ihdr[:4])
6045 height = unpack_integer(ihdr[4:8])
6046
6047 idat = b''
6048
6049 for chunk in chunks:
6050 if chunk['type'] == b'IDAT':
6051 idat += chunk['data']
6052
6053 if not idat:
6054 raise IOError('Unable to read PNG data.')
6055
6056 decompressed_data = bytearray(zlib.decompress(idat))
6057
6058 stride = width * 3
6059 pixels = []
6060
6061 def _get_pixel(idx):
6062 x = idx % stride
6063 y = idx // stride
6064 return pixels[y][x]
6065
6066 for y in range(height):
6067 basePos = y * (1 + stride)
6068 filter_type = decompressed_data[basePos]
6069
6070 current_row = []
6071
6072 pixels.append(current_row)
6073
6074 for x in range(stride):
6075 color = decompressed_data[1 + basePos + x]
6076 basex = y * stride + x
6077 left = 0
6078 up = 0
6079
6080 if x > 2:
6081 left = _get_pixel(basex - 3)
6082 if y > 0:
6083 up = _get_pixel(basex - stride)
6084
6085 if filter_type == 1: # Sub
6086 color = (color + left) & 0xff
6087 elif filter_type == 2: # Up
6088 color = (color + up) & 0xff
6089 elif filter_type == 3: # Average
6090 color = (color + ((left + up) >> 1)) & 0xff
6091 elif filter_type == 4: # Paeth
6092 a = left
6093 b = up
6094 c = 0
6095
6096 if x > 2 and y > 0:
6097 c = _get_pixel(basex - stride - 3)
6098
6099 p = a + b - c
6100
6101 pa = abs(p - a)
6102 pb = abs(p - b)
6103 pc = abs(p - c)
6104
6105 if pa <= pb and pa <= pc:
6106 color = (color + a) & 0xff
6107 elif pb <= pc:
6108 color = (color + b) & 0xff
6109 else:
6110 color = (color + c) & 0xff
6111
6112 current_row.append(color)
6113
6114 return width, height, pixels
6115
6116
6117 def write_xattr(path, key, value):
6118 # This mess below finds the best xattr tool for the job
6119 try:
6120 # try the pyxattr module...
6121 import xattr
6122
6123 if hasattr(xattr, 'set'): # pyxattr
6124 # Unicode arguments are not supported in python-pyxattr until
6125 # version 0.5.0
6126 # See https://github.com/ytdl-org/youtube-dl/issues/5498
6127 pyxattr_required_version = '0.5.0'
6128 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
6129 # TODO: fallback to CLI tools
6130 raise XAttrUnavailableError(
6131 'python-pyxattr is detected but is too old. '
6132 'yt-dlp requires %s or above while your version is %s. '
6133 'Falling back to other xattr implementations' % (
6134 pyxattr_required_version, xattr.__version__))
6135
6136 setxattr = xattr.set
6137 else: # xattr
6138 setxattr = xattr.setxattr
6139
6140 try:
6141 setxattr(path, key, value)
6142 except EnvironmentError as e:
6143 raise XAttrMetadataError(e.errno, e.strerror)
6144
6145 except ImportError:
6146 if compat_os_name == 'nt':
6147 # Write xattrs to NTFS Alternate Data Streams:
6148 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6149 assert ':' not in key
6150 assert os.path.exists(path)
6151
6152 ads_fn = path + ':' + key
6153 try:
6154 with open(ads_fn, 'wb') as f:
6155 f.write(value)
6156 except EnvironmentError as e:
6157 raise XAttrMetadataError(e.errno, e.strerror)
6158 else:
6159 user_has_setfattr = check_executable('setfattr', ['--version'])
6160 user_has_xattr = check_executable('xattr', ['-h'])
6161
6162 if user_has_setfattr or user_has_xattr:
6163
6164 value = value.decode('utf-8')
6165 if user_has_setfattr:
6166 executable = 'setfattr'
6167 opts = ['-n', key, '-v', value]
6168 elif user_has_xattr:
6169 executable = 'xattr'
6170 opts = ['-w', key, value]
6171
6172 cmd = ([encodeFilename(executable, True)]
6173 + [encodeArgument(o) for o in opts]
6174 + [encodeFilename(path, True)])
6175
6176 try:
6177 p = Popen(
6178 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6179 except EnvironmentError as e:
6180 raise XAttrMetadataError(e.errno, e.strerror)
6181 stdout, stderr = p.communicate_or_kill()
6182 stderr = stderr.decode('utf-8', 'replace')
6183 if p.returncode != 0:
6184 raise XAttrMetadataError(p.returncode, stderr)
6185
6186 else:
6187 # On Unix, and can't find pyxattr, setfattr, or xattr.
6188 if sys.platform.startswith('linux'):
6189 raise XAttrUnavailableError(
6190 "Couldn't find a tool to set the xattrs. "
6191 "Install either the python 'pyxattr' or 'xattr' "
6192 "modules, or the GNU 'attr' package "
6193 "(which contains the 'setfattr' tool).")
6194 else:
6195 raise XAttrUnavailableError(
6196 "Couldn't find a tool to set the xattrs. "
6197 "Install either the python 'xattr' module, "
6198 "or the 'xattr' binary.")
6199
6200
6201 def random_birthday(year_field, month_field, day_field):
6202 start_date = datetime.date(1950, 1, 1)
6203 end_date = datetime.date(1995, 12, 31)
6204 offset = random.randint(0, (end_date - start_date).days)
6205 random_date = start_date + datetime.timedelta(offset)
6206 return {
6207 year_field: str(random_date.year),
6208 month_field: str(random_date.month),
6209 day_field: str(random_date.day),
6210 }
6211
6212
6213 # Templates for internet shortcut files, which are plain text files.
6214 DOT_URL_LINK_TEMPLATE = '''
6215 [InternetShortcut]
6216 URL=%(url)s
6217 '''.lstrip()
6218
6219 DOT_WEBLOC_LINK_TEMPLATE = '''
6220 <?xml version="1.0" encoding="UTF-8"?>
6221 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6222 <plist version="1.0">
6223 <dict>
6224 \t<key>URL</key>
6225 \t<string>%(url)s</string>
6226 </dict>
6227 </plist>
6228 '''.lstrip()
6229
6230 DOT_DESKTOP_LINK_TEMPLATE = '''
6231 [Desktop Entry]
6232 Encoding=UTF-8
6233 Name=%(filename)s
6234 Type=Link
6235 URL=%(url)s
6236 Icon=text-html
6237 '''.lstrip()
6238
6239
6240 def iri_to_uri(iri):
6241 """
6242 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6243
6244 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6245 """
6246
6247 iri_parts = compat_urllib_parse_urlparse(iri)
6248
6249 if '[' in iri_parts.netloc:
6250 raise ValueError('IPv6 URIs are not, yet, supported.')
6251 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6252
6253 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6254
6255 net_location = ''
6256 if iri_parts.username:
6257 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6258 if iri_parts.password is not None:
6259 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6260 net_location += '@'
6261
6262 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
6263 # The 'idna' encoding produces ASCII text.
6264 if iri_parts.port is not None and iri_parts.port != 80:
6265 net_location += ':' + str(iri_parts.port)
6266
6267 return compat_urllib_parse_urlunparse(
6268 (iri_parts.scheme,
6269 net_location,
6270
6271 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6272
6273 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6274 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6275
6276 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6277 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6278
6279 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6280
6281 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6282
6283
6284 def to_high_limit_path(path):
6285 if sys.platform in ['win32', 'cygwin']:
6286 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6287 return r'\\?\ '.rstrip() + os.path.abspath(path)
6288
6289 return path
6290
6291
6292 def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
6293 if field is None:
6294 val = obj if obj is not None else default
6295 else:
6296 val = obj.get(field, default)
6297 if func and val not in ignore:
6298 val = func(val)
6299 return template % val if val not in ignore else default
6300
6301
6302 def clean_podcast_url(url):
6303 return re.sub(r'''(?x)
6304 (?:
6305 (?:
6306 chtbl\.com/track|
6307 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6308 play\.podtrac\.com
6309 )/[^/]+|
6310 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6311 flex\.acast\.com|
6312 pd(?:
6313 cn\.co| # https://podcorn.com/analytics-prefix/
6314 st\.fm # https://podsights.com/docs/
6315 )/e
6316 )/''', '', url)
6317
6318
6319 _HEX_TABLE = '0123456789abcdef'
6320
6321
6322 def random_uuidv4():
6323 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
6324
6325
6326 def make_dir(path, to_screen=None):
6327 try:
6328 dn = os.path.dirname(path)
6329 if dn and not os.path.exists(dn):
6330 os.makedirs(dn)
6331 return True
6332 except (OSError, IOError) as err:
6333 if callable(to_screen) is not None:
6334 to_screen('unable to create directory ' + error_to_compat_str(err))
6335 return False
6336
6337
6338 def get_executable_path():
6339 from zipimport import zipimporter
6340 if hasattr(sys, 'frozen'): # Running from PyInstaller
6341 path = os.path.dirname(sys.executable)
6342 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
6343 path = os.path.join(os.path.dirname(__file__), '../..')
6344 else:
6345 path = os.path.join(os.path.dirname(__file__), '..')
6346 return os.path.abspath(path)
6347
6348
6349 def load_plugins(name, suffix, namespace):
6350 classes = {}
6351 try:
6352 plugins_spec = importlib.util.spec_from_file_location(
6353 name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
6354 plugins = importlib.util.module_from_spec(plugins_spec)
6355 sys.modules[plugins_spec.name] = plugins
6356 plugins_spec.loader.exec_module(plugins)
6357 for name in dir(plugins):
6358 if name in namespace:
6359 continue
6360 if not name.endswith(suffix):
6361 continue
6362 klass = getattr(plugins, name)
6363 classes[name] = namespace[name] = klass
6364 except FileNotFoundError:
6365 pass
6366 return classes
6367
6368
6369 def traverse_obj(
6370 obj, *path_list, default=None, expected_type=None, get_all=True,
6371 casesense=True, is_user_input=False, traverse_string=False):
6372 ''' Traverse nested list/dict/tuple
6373 @param path_list A list of paths which are checked one by one.
6374 Each path is a list of keys where each key is a string,
6375 a function, a tuple of strings or "...".
6376 When a fuction is given, it takes the key as argument and
6377 returns whether the key matches or not. When a tuple is given,
6378 all the keys given in the tuple are traversed, and
6379 "..." traverses all the keys in the object
6380 @param default Default value to return
6381 @param expected_type Only accept final value of this type (Can also be any callable)
6382 @param get_all Return all the values obtained from a path or only the first one
6383 @param casesense Whether to consider dictionary keys as case sensitive
6384 @param is_user_input Whether the keys are generated from user input. If True,
6385 strings are converted to int/slice if necessary
6386 @param traverse_string Whether to traverse inside strings. If True, any
6387 non-compatible object will also be converted into a string
6388 # TODO: Write tests
6389 '''
6390 if not casesense:
6391 _lower = lambda k: (k.lower() if isinstance(k, str) else k)
6392 path_list = (map(_lower, variadic(path)) for path in path_list)
6393
6394 def _traverse_obj(obj, path, _current_depth=0):
6395 nonlocal depth
6396 if obj is None:
6397 return None
6398 path = tuple(variadic(path))
6399 for i, key in enumerate(path):
6400 if isinstance(key, (list, tuple)):
6401 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
6402 key = ...
6403 if key is ...:
6404 obj = (obj.values() if isinstance(obj, dict)
6405 else obj if isinstance(obj, (list, tuple, LazyList))
6406 else str(obj) if traverse_string else [])
6407 _current_depth += 1
6408 depth = max(depth, _current_depth)
6409 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
6410 elif callable(key):
6411 if isinstance(obj, (list, tuple, LazyList)):
6412 obj = enumerate(obj)
6413 elif isinstance(obj, dict):
6414 obj = obj.items()
6415 else:
6416 if not traverse_string:
6417 return None
6418 obj = str(obj)
6419 _current_depth += 1
6420 depth = max(depth, _current_depth)
6421 return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if key(k)]
6422 elif isinstance(obj, dict) and not (is_user_input and key == ':'):
6423 obj = (obj.get(key) if casesense or (key in obj)
6424 else next((v for k, v in obj.items() if _lower(k) == key), None))
6425 else:
6426 if is_user_input:
6427 key = (int_or_none(key) if ':' not in key
6428 else slice(*map(int_or_none, key.split(':'))))
6429 if key == slice(None):
6430 return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
6431 if not isinstance(key, (int, slice)):
6432 return None
6433 if not isinstance(obj, (list, tuple, LazyList)):
6434 if not traverse_string:
6435 return None
6436 obj = str(obj)
6437 try:
6438 obj = obj[key]
6439 except IndexError:
6440 return None
6441 return obj
6442
6443 if isinstance(expected_type, type):
6444 type_test = lambda val: val if isinstance(val, expected_type) else None
6445 elif expected_type is not None:
6446 type_test = expected_type
6447 else:
6448 type_test = lambda val: val
6449
6450 for path in path_list:
6451 depth = 0
6452 val = _traverse_obj(obj, path)
6453 if val is not None:
6454 if depth:
6455 for _ in range(depth - 1):
6456 val = itertools.chain.from_iterable(v for v in val if v is not None)
6457 val = [v for v in map(type_test, val) if v is not None]
6458 if val:
6459 return val if get_all else val[0]
6460 else:
6461 val = type_test(val)
6462 if val is not None:
6463 return val
6464 return default
6465
6466
6467 def traverse_dict(dictn, keys, casesense=True):
6468 ''' For backward compatibility. Do not use '''
6469 return traverse_obj(dictn, keys, casesense=casesense,
6470 is_user_input=True, traverse_string=True)
6471
6472
6473 def variadic(x, allowed_types=(str, bytes)):
6474 return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
6475
6476
6477 # create a JSON Web Signature (jws) with HS256 algorithm
6478 # the resulting format is in JWS Compact Serialization
6479 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
6480 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
6481 def jwt_encode_hs256(payload_data, key, headers={}):
6482 header_data = {
6483 'alg': 'HS256',
6484 'typ': 'JWT',
6485 }
6486 if headers:
6487 header_data.update(headers)
6488 header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8'))
6489 payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8'))
6490 h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256)
6491 signature_b64 = base64.b64encode(h.digest())
6492 token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
6493 return token
6494
6495
6496 def supports_terminal_sequences(stream):
6497 if compat_os_name == 'nt':
6498 if get_windows_version() < (10, 0, 10586):
6499 return False
6500 elif not os.getenv('TERM'):
6501 return False
6502 try:
6503 return stream.isatty()
6504 except BaseException:
6505 return False
6506
6507
6508 _terminal_sequences_re = re.compile('\033\\[[^m]+m')
6509
6510
6511 def remove_terminal_sequences(string):
6512 return _terminal_sequences_re.sub('', string)
6513
6514
6515 def number_of_digits(number):
6516 return len('%d' % number)