]> jfr.im git - yt-dlp.git/blob - yt_dlp/utils.py
[ExtractAudio] Rescale --audio-quality correctly
[yt-dlp.git] / yt_dlp / utils.py
1 #!/usr/bin/env python3
2 # coding: utf-8
3
4 from __future__ import unicode_literals
5
6 import base64
7 import binascii
8 import calendar
9 import codecs
10 import collections
11 import contextlib
12 import ctypes
13 import datetime
14 import email.utils
15 import email.header
16 import errno
17 import functools
18 import gzip
19 import hashlib
20 import hmac
21 import importlib.util
22 import io
23 import itertools
24 import json
25 import locale
26 import math
27 import operator
28 import os
29 import platform
30 import random
31 import re
32 import socket
33 import ssl
34 import subprocess
35 import sys
36 import tempfile
37 import time
38 import traceback
39 import xml.etree.ElementTree
40 import zlib
41
42 from .compat import (
43 compat_HTMLParseError,
44 compat_HTMLParser,
45 compat_HTTPError,
46 compat_basestring,
47 compat_chr,
48 compat_cookiejar,
49 compat_ctypes_WINFUNCTYPE,
50 compat_etree_fromstring,
51 compat_expanduser,
52 compat_html_entities,
53 compat_html_entities_html5,
54 compat_http_client,
55 compat_integer_types,
56 compat_numeric_types,
57 compat_kwargs,
58 compat_os_name,
59 compat_parse_qs,
60 compat_shlex_quote,
61 compat_str,
62 compat_struct_pack,
63 compat_struct_unpack,
64 compat_urllib_error,
65 compat_urllib_parse,
66 compat_urllib_parse_urlencode,
67 compat_urllib_parse_urlparse,
68 compat_urllib_parse_urlunparse,
69 compat_urllib_parse_quote,
70 compat_urllib_parse_quote_plus,
71 compat_urllib_parse_unquote_plus,
72 compat_urllib_request,
73 compat_urlparse,
74 compat_xpath,
75 )
76
77 from .socks import (
78 ProxyType,
79 sockssocket,
80 )
81
82
83 def register_socks_protocols():
84 # "Register" SOCKS protocols
85 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
86 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
87 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
88 if scheme not in compat_urlparse.uses_netloc:
89 compat_urlparse.uses_netloc.append(scheme)
90
91
92 # This is not clearly defined otherwise
93 compiled_regex_type = type(re.compile(''))
94
95
96 def random_user_agent():
97 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
98 _CHROME_VERSIONS = (
99 '74.0.3729.129',
100 '76.0.3780.3',
101 '76.0.3780.2',
102 '74.0.3729.128',
103 '76.0.3780.1',
104 '76.0.3780.0',
105 '75.0.3770.15',
106 '74.0.3729.127',
107 '74.0.3729.126',
108 '76.0.3779.1',
109 '76.0.3779.0',
110 '75.0.3770.14',
111 '74.0.3729.125',
112 '76.0.3778.1',
113 '76.0.3778.0',
114 '75.0.3770.13',
115 '74.0.3729.124',
116 '74.0.3729.123',
117 '73.0.3683.121',
118 '76.0.3777.1',
119 '76.0.3777.0',
120 '75.0.3770.12',
121 '74.0.3729.122',
122 '76.0.3776.4',
123 '75.0.3770.11',
124 '74.0.3729.121',
125 '76.0.3776.3',
126 '76.0.3776.2',
127 '73.0.3683.120',
128 '74.0.3729.120',
129 '74.0.3729.119',
130 '74.0.3729.118',
131 '76.0.3776.1',
132 '76.0.3776.0',
133 '76.0.3775.5',
134 '75.0.3770.10',
135 '74.0.3729.117',
136 '76.0.3775.4',
137 '76.0.3775.3',
138 '74.0.3729.116',
139 '75.0.3770.9',
140 '76.0.3775.2',
141 '76.0.3775.1',
142 '76.0.3775.0',
143 '75.0.3770.8',
144 '74.0.3729.115',
145 '74.0.3729.114',
146 '76.0.3774.1',
147 '76.0.3774.0',
148 '75.0.3770.7',
149 '74.0.3729.113',
150 '74.0.3729.112',
151 '74.0.3729.111',
152 '76.0.3773.1',
153 '76.0.3773.0',
154 '75.0.3770.6',
155 '74.0.3729.110',
156 '74.0.3729.109',
157 '76.0.3772.1',
158 '76.0.3772.0',
159 '75.0.3770.5',
160 '74.0.3729.108',
161 '74.0.3729.107',
162 '76.0.3771.1',
163 '76.0.3771.0',
164 '75.0.3770.4',
165 '74.0.3729.106',
166 '74.0.3729.105',
167 '75.0.3770.3',
168 '74.0.3729.104',
169 '74.0.3729.103',
170 '74.0.3729.102',
171 '75.0.3770.2',
172 '74.0.3729.101',
173 '75.0.3770.1',
174 '75.0.3770.0',
175 '74.0.3729.100',
176 '75.0.3769.5',
177 '75.0.3769.4',
178 '74.0.3729.99',
179 '75.0.3769.3',
180 '75.0.3769.2',
181 '75.0.3768.6',
182 '74.0.3729.98',
183 '75.0.3769.1',
184 '75.0.3769.0',
185 '74.0.3729.97',
186 '73.0.3683.119',
187 '73.0.3683.118',
188 '74.0.3729.96',
189 '75.0.3768.5',
190 '75.0.3768.4',
191 '75.0.3768.3',
192 '75.0.3768.2',
193 '74.0.3729.95',
194 '74.0.3729.94',
195 '75.0.3768.1',
196 '75.0.3768.0',
197 '74.0.3729.93',
198 '74.0.3729.92',
199 '73.0.3683.117',
200 '74.0.3729.91',
201 '75.0.3766.3',
202 '74.0.3729.90',
203 '75.0.3767.2',
204 '75.0.3767.1',
205 '75.0.3767.0',
206 '74.0.3729.89',
207 '73.0.3683.116',
208 '75.0.3766.2',
209 '74.0.3729.88',
210 '75.0.3766.1',
211 '75.0.3766.0',
212 '74.0.3729.87',
213 '73.0.3683.115',
214 '74.0.3729.86',
215 '75.0.3765.1',
216 '75.0.3765.0',
217 '74.0.3729.85',
218 '73.0.3683.114',
219 '74.0.3729.84',
220 '75.0.3764.1',
221 '75.0.3764.0',
222 '74.0.3729.83',
223 '73.0.3683.113',
224 '75.0.3763.2',
225 '75.0.3761.4',
226 '74.0.3729.82',
227 '75.0.3763.1',
228 '75.0.3763.0',
229 '74.0.3729.81',
230 '73.0.3683.112',
231 '75.0.3762.1',
232 '75.0.3762.0',
233 '74.0.3729.80',
234 '75.0.3761.3',
235 '74.0.3729.79',
236 '73.0.3683.111',
237 '75.0.3761.2',
238 '74.0.3729.78',
239 '74.0.3729.77',
240 '75.0.3761.1',
241 '75.0.3761.0',
242 '73.0.3683.110',
243 '74.0.3729.76',
244 '74.0.3729.75',
245 '75.0.3760.0',
246 '74.0.3729.74',
247 '75.0.3759.8',
248 '75.0.3759.7',
249 '75.0.3759.6',
250 '74.0.3729.73',
251 '75.0.3759.5',
252 '74.0.3729.72',
253 '73.0.3683.109',
254 '75.0.3759.4',
255 '75.0.3759.3',
256 '74.0.3729.71',
257 '75.0.3759.2',
258 '74.0.3729.70',
259 '73.0.3683.108',
260 '74.0.3729.69',
261 '75.0.3759.1',
262 '75.0.3759.0',
263 '74.0.3729.68',
264 '73.0.3683.107',
265 '74.0.3729.67',
266 '75.0.3758.1',
267 '75.0.3758.0',
268 '74.0.3729.66',
269 '73.0.3683.106',
270 '74.0.3729.65',
271 '75.0.3757.1',
272 '75.0.3757.0',
273 '74.0.3729.64',
274 '73.0.3683.105',
275 '74.0.3729.63',
276 '75.0.3756.1',
277 '75.0.3756.0',
278 '74.0.3729.62',
279 '73.0.3683.104',
280 '75.0.3755.3',
281 '75.0.3755.2',
282 '73.0.3683.103',
283 '75.0.3755.1',
284 '75.0.3755.0',
285 '74.0.3729.61',
286 '73.0.3683.102',
287 '74.0.3729.60',
288 '75.0.3754.2',
289 '74.0.3729.59',
290 '75.0.3753.4',
291 '74.0.3729.58',
292 '75.0.3754.1',
293 '75.0.3754.0',
294 '74.0.3729.57',
295 '73.0.3683.101',
296 '75.0.3753.3',
297 '75.0.3752.2',
298 '75.0.3753.2',
299 '74.0.3729.56',
300 '75.0.3753.1',
301 '75.0.3753.0',
302 '74.0.3729.55',
303 '73.0.3683.100',
304 '74.0.3729.54',
305 '75.0.3752.1',
306 '75.0.3752.0',
307 '74.0.3729.53',
308 '73.0.3683.99',
309 '74.0.3729.52',
310 '75.0.3751.1',
311 '75.0.3751.0',
312 '74.0.3729.51',
313 '73.0.3683.98',
314 '74.0.3729.50',
315 '75.0.3750.0',
316 '74.0.3729.49',
317 '74.0.3729.48',
318 '74.0.3729.47',
319 '75.0.3749.3',
320 '74.0.3729.46',
321 '73.0.3683.97',
322 '75.0.3749.2',
323 '74.0.3729.45',
324 '75.0.3749.1',
325 '75.0.3749.0',
326 '74.0.3729.44',
327 '73.0.3683.96',
328 '74.0.3729.43',
329 '74.0.3729.42',
330 '75.0.3748.1',
331 '75.0.3748.0',
332 '74.0.3729.41',
333 '75.0.3747.1',
334 '73.0.3683.95',
335 '75.0.3746.4',
336 '74.0.3729.40',
337 '74.0.3729.39',
338 '75.0.3747.0',
339 '75.0.3746.3',
340 '75.0.3746.2',
341 '74.0.3729.38',
342 '75.0.3746.1',
343 '75.0.3746.0',
344 '74.0.3729.37',
345 '73.0.3683.94',
346 '75.0.3745.5',
347 '75.0.3745.4',
348 '75.0.3745.3',
349 '75.0.3745.2',
350 '74.0.3729.36',
351 '75.0.3745.1',
352 '75.0.3745.0',
353 '75.0.3744.2',
354 '74.0.3729.35',
355 '73.0.3683.93',
356 '74.0.3729.34',
357 '75.0.3744.1',
358 '75.0.3744.0',
359 '74.0.3729.33',
360 '73.0.3683.92',
361 '74.0.3729.32',
362 '74.0.3729.31',
363 '73.0.3683.91',
364 '75.0.3741.2',
365 '75.0.3740.5',
366 '74.0.3729.30',
367 '75.0.3741.1',
368 '75.0.3741.0',
369 '74.0.3729.29',
370 '75.0.3740.4',
371 '73.0.3683.90',
372 '74.0.3729.28',
373 '75.0.3740.3',
374 '73.0.3683.89',
375 '75.0.3740.2',
376 '74.0.3729.27',
377 '75.0.3740.1',
378 '75.0.3740.0',
379 '74.0.3729.26',
380 '73.0.3683.88',
381 '73.0.3683.87',
382 '74.0.3729.25',
383 '75.0.3739.1',
384 '75.0.3739.0',
385 '73.0.3683.86',
386 '74.0.3729.24',
387 '73.0.3683.85',
388 '75.0.3738.4',
389 '75.0.3738.3',
390 '75.0.3738.2',
391 '75.0.3738.1',
392 '75.0.3738.0',
393 '74.0.3729.23',
394 '73.0.3683.84',
395 '74.0.3729.22',
396 '74.0.3729.21',
397 '75.0.3737.1',
398 '75.0.3737.0',
399 '74.0.3729.20',
400 '73.0.3683.83',
401 '74.0.3729.19',
402 '75.0.3736.1',
403 '75.0.3736.0',
404 '74.0.3729.18',
405 '73.0.3683.82',
406 '74.0.3729.17',
407 '75.0.3735.1',
408 '75.0.3735.0',
409 '74.0.3729.16',
410 '73.0.3683.81',
411 '75.0.3734.1',
412 '75.0.3734.0',
413 '74.0.3729.15',
414 '73.0.3683.80',
415 '74.0.3729.14',
416 '75.0.3733.1',
417 '75.0.3733.0',
418 '75.0.3732.1',
419 '74.0.3729.13',
420 '74.0.3729.12',
421 '73.0.3683.79',
422 '74.0.3729.11',
423 '75.0.3732.0',
424 '74.0.3729.10',
425 '73.0.3683.78',
426 '74.0.3729.9',
427 '74.0.3729.8',
428 '74.0.3729.7',
429 '75.0.3731.3',
430 '75.0.3731.2',
431 '75.0.3731.0',
432 '74.0.3729.6',
433 '73.0.3683.77',
434 '73.0.3683.76',
435 '75.0.3730.5',
436 '75.0.3730.4',
437 '73.0.3683.75',
438 '74.0.3729.5',
439 '73.0.3683.74',
440 '75.0.3730.3',
441 '75.0.3730.2',
442 '74.0.3729.4',
443 '73.0.3683.73',
444 '73.0.3683.72',
445 '75.0.3730.1',
446 '75.0.3730.0',
447 '74.0.3729.3',
448 '73.0.3683.71',
449 '74.0.3729.2',
450 '73.0.3683.70',
451 '74.0.3729.1',
452 '74.0.3729.0',
453 '74.0.3726.4',
454 '73.0.3683.69',
455 '74.0.3726.3',
456 '74.0.3728.0',
457 '74.0.3726.2',
458 '73.0.3683.68',
459 '74.0.3726.1',
460 '74.0.3726.0',
461 '74.0.3725.4',
462 '73.0.3683.67',
463 '73.0.3683.66',
464 '74.0.3725.3',
465 '74.0.3725.2',
466 '74.0.3725.1',
467 '74.0.3724.8',
468 '74.0.3725.0',
469 '73.0.3683.65',
470 '74.0.3724.7',
471 '74.0.3724.6',
472 '74.0.3724.5',
473 '74.0.3724.4',
474 '74.0.3724.3',
475 '74.0.3724.2',
476 '74.0.3724.1',
477 '74.0.3724.0',
478 '73.0.3683.64',
479 '74.0.3723.1',
480 '74.0.3723.0',
481 '73.0.3683.63',
482 '74.0.3722.1',
483 '74.0.3722.0',
484 '73.0.3683.62',
485 '74.0.3718.9',
486 '74.0.3702.3',
487 '74.0.3721.3',
488 '74.0.3721.2',
489 '74.0.3721.1',
490 '74.0.3721.0',
491 '74.0.3720.6',
492 '73.0.3683.61',
493 '72.0.3626.122',
494 '73.0.3683.60',
495 '74.0.3720.5',
496 '72.0.3626.121',
497 '74.0.3718.8',
498 '74.0.3720.4',
499 '74.0.3720.3',
500 '74.0.3718.7',
501 '74.0.3720.2',
502 '74.0.3720.1',
503 '74.0.3720.0',
504 '74.0.3718.6',
505 '74.0.3719.5',
506 '73.0.3683.59',
507 '74.0.3718.5',
508 '74.0.3718.4',
509 '74.0.3719.4',
510 '74.0.3719.3',
511 '74.0.3719.2',
512 '74.0.3719.1',
513 '73.0.3683.58',
514 '74.0.3719.0',
515 '73.0.3683.57',
516 '73.0.3683.56',
517 '74.0.3718.3',
518 '73.0.3683.55',
519 '74.0.3718.2',
520 '74.0.3718.1',
521 '74.0.3718.0',
522 '73.0.3683.54',
523 '74.0.3717.2',
524 '73.0.3683.53',
525 '74.0.3717.1',
526 '74.0.3717.0',
527 '73.0.3683.52',
528 '74.0.3716.1',
529 '74.0.3716.0',
530 '73.0.3683.51',
531 '74.0.3715.1',
532 '74.0.3715.0',
533 '73.0.3683.50',
534 '74.0.3711.2',
535 '74.0.3714.2',
536 '74.0.3713.3',
537 '74.0.3714.1',
538 '74.0.3714.0',
539 '73.0.3683.49',
540 '74.0.3713.1',
541 '74.0.3713.0',
542 '72.0.3626.120',
543 '73.0.3683.48',
544 '74.0.3712.2',
545 '74.0.3712.1',
546 '74.0.3712.0',
547 '73.0.3683.47',
548 '72.0.3626.119',
549 '73.0.3683.46',
550 '74.0.3710.2',
551 '72.0.3626.118',
552 '74.0.3711.1',
553 '74.0.3711.0',
554 '73.0.3683.45',
555 '72.0.3626.117',
556 '74.0.3710.1',
557 '74.0.3710.0',
558 '73.0.3683.44',
559 '72.0.3626.116',
560 '74.0.3709.1',
561 '74.0.3709.0',
562 '74.0.3704.9',
563 '73.0.3683.43',
564 '72.0.3626.115',
565 '74.0.3704.8',
566 '74.0.3704.7',
567 '74.0.3708.0',
568 '74.0.3706.7',
569 '74.0.3704.6',
570 '73.0.3683.42',
571 '72.0.3626.114',
572 '74.0.3706.6',
573 '72.0.3626.113',
574 '74.0.3704.5',
575 '74.0.3706.5',
576 '74.0.3706.4',
577 '74.0.3706.3',
578 '74.0.3706.2',
579 '74.0.3706.1',
580 '74.0.3706.0',
581 '73.0.3683.41',
582 '72.0.3626.112',
583 '74.0.3705.1',
584 '74.0.3705.0',
585 '73.0.3683.40',
586 '72.0.3626.111',
587 '73.0.3683.39',
588 '74.0.3704.4',
589 '73.0.3683.38',
590 '74.0.3704.3',
591 '74.0.3704.2',
592 '74.0.3704.1',
593 '74.0.3704.0',
594 '73.0.3683.37',
595 '72.0.3626.110',
596 '72.0.3626.109',
597 '74.0.3703.3',
598 '74.0.3703.2',
599 '73.0.3683.36',
600 '74.0.3703.1',
601 '74.0.3703.0',
602 '73.0.3683.35',
603 '72.0.3626.108',
604 '74.0.3702.2',
605 '74.0.3699.3',
606 '74.0.3702.1',
607 '74.0.3702.0',
608 '73.0.3683.34',
609 '72.0.3626.107',
610 '73.0.3683.33',
611 '74.0.3701.1',
612 '74.0.3701.0',
613 '73.0.3683.32',
614 '73.0.3683.31',
615 '72.0.3626.105',
616 '74.0.3700.1',
617 '74.0.3700.0',
618 '73.0.3683.29',
619 '72.0.3626.103',
620 '74.0.3699.2',
621 '74.0.3699.1',
622 '74.0.3699.0',
623 '73.0.3683.28',
624 '72.0.3626.102',
625 '73.0.3683.27',
626 '73.0.3683.26',
627 '74.0.3698.0',
628 '74.0.3696.2',
629 '72.0.3626.101',
630 '73.0.3683.25',
631 '74.0.3696.1',
632 '74.0.3696.0',
633 '74.0.3694.8',
634 '72.0.3626.100',
635 '74.0.3694.7',
636 '74.0.3694.6',
637 '74.0.3694.5',
638 '74.0.3694.4',
639 '72.0.3626.99',
640 '72.0.3626.98',
641 '74.0.3694.3',
642 '73.0.3683.24',
643 '72.0.3626.97',
644 '72.0.3626.96',
645 '72.0.3626.95',
646 '73.0.3683.23',
647 '72.0.3626.94',
648 '73.0.3683.22',
649 '73.0.3683.21',
650 '72.0.3626.93',
651 '74.0.3694.2',
652 '72.0.3626.92',
653 '74.0.3694.1',
654 '74.0.3694.0',
655 '74.0.3693.6',
656 '73.0.3683.20',
657 '72.0.3626.91',
658 '74.0.3693.5',
659 '74.0.3693.4',
660 '74.0.3693.3',
661 '74.0.3693.2',
662 '73.0.3683.19',
663 '74.0.3693.1',
664 '74.0.3693.0',
665 '73.0.3683.18',
666 '72.0.3626.90',
667 '74.0.3692.1',
668 '74.0.3692.0',
669 '73.0.3683.17',
670 '72.0.3626.89',
671 '74.0.3687.3',
672 '74.0.3691.1',
673 '74.0.3691.0',
674 '73.0.3683.16',
675 '72.0.3626.88',
676 '72.0.3626.87',
677 '73.0.3683.15',
678 '74.0.3690.1',
679 '74.0.3690.0',
680 '73.0.3683.14',
681 '72.0.3626.86',
682 '73.0.3683.13',
683 '73.0.3683.12',
684 '74.0.3689.1',
685 '74.0.3689.0',
686 '73.0.3683.11',
687 '72.0.3626.85',
688 '73.0.3683.10',
689 '72.0.3626.84',
690 '73.0.3683.9',
691 '74.0.3688.1',
692 '74.0.3688.0',
693 '73.0.3683.8',
694 '72.0.3626.83',
695 '74.0.3687.2',
696 '74.0.3687.1',
697 '74.0.3687.0',
698 '73.0.3683.7',
699 '72.0.3626.82',
700 '74.0.3686.4',
701 '72.0.3626.81',
702 '74.0.3686.3',
703 '74.0.3686.2',
704 '74.0.3686.1',
705 '74.0.3686.0',
706 '73.0.3683.6',
707 '72.0.3626.80',
708 '74.0.3685.1',
709 '74.0.3685.0',
710 '73.0.3683.5',
711 '72.0.3626.79',
712 '74.0.3684.1',
713 '74.0.3684.0',
714 '73.0.3683.4',
715 '72.0.3626.78',
716 '72.0.3626.77',
717 '73.0.3683.3',
718 '73.0.3683.2',
719 '72.0.3626.76',
720 '73.0.3683.1',
721 '73.0.3683.0',
722 '72.0.3626.75',
723 '71.0.3578.141',
724 '73.0.3682.1',
725 '73.0.3682.0',
726 '72.0.3626.74',
727 '71.0.3578.140',
728 '73.0.3681.4',
729 '73.0.3681.3',
730 '73.0.3681.2',
731 '73.0.3681.1',
732 '73.0.3681.0',
733 '72.0.3626.73',
734 '71.0.3578.139',
735 '72.0.3626.72',
736 '72.0.3626.71',
737 '73.0.3680.1',
738 '73.0.3680.0',
739 '72.0.3626.70',
740 '71.0.3578.138',
741 '73.0.3678.2',
742 '73.0.3679.1',
743 '73.0.3679.0',
744 '72.0.3626.69',
745 '71.0.3578.137',
746 '73.0.3678.1',
747 '73.0.3678.0',
748 '71.0.3578.136',
749 '73.0.3677.1',
750 '73.0.3677.0',
751 '72.0.3626.68',
752 '72.0.3626.67',
753 '71.0.3578.135',
754 '73.0.3676.1',
755 '73.0.3676.0',
756 '73.0.3674.2',
757 '72.0.3626.66',
758 '71.0.3578.134',
759 '73.0.3674.1',
760 '73.0.3674.0',
761 '72.0.3626.65',
762 '71.0.3578.133',
763 '73.0.3673.2',
764 '73.0.3673.1',
765 '73.0.3673.0',
766 '72.0.3626.64',
767 '71.0.3578.132',
768 '72.0.3626.63',
769 '72.0.3626.62',
770 '72.0.3626.61',
771 '72.0.3626.60',
772 '73.0.3672.1',
773 '73.0.3672.0',
774 '72.0.3626.59',
775 '71.0.3578.131',
776 '73.0.3671.3',
777 '73.0.3671.2',
778 '73.0.3671.1',
779 '73.0.3671.0',
780 '72.0.3626.58',
781 '71.0.3578.130',
782 '73.0.3670.1',
783 '73.0.3670.0',
784 '72.0.3626.57',
785 '71.0.3578.129',
786 '73.0.3669.1',
787 '73.0.3669.0',
788 '72.0.3626.56',
789 '71.0.3578.128',
790 '73.0.3668.2',
791 '73.0.3668.1',
792 '73.0.3668.0',
793 '72.0.3626.55',
794 '71.0.3578.127',
795 '73.0.3667.2',
796 '73.0.3667.1',
797 '73.0.3667.0',
798 '72.0.3626.54',
799 '71.0.3578.126',
800 '73.0.3666.1',
801 '73.0.3666.0',
802 '72.0.3626.53',
803 '71.0.3578.125',
804 '73.0.3665.4',
805 '73.0.3665.3',
806 '72.0.3626.52',
807 '73.0.3665.2',
808 '73.0.3664.4',
809 '73.0.3665.1',
810 '73.0.3665.0',
811 '72.0.3626.51',
812 '71.0.3578.124',
813 '72.0.3626.50',
814 '73.0.3664.3',
815 '73.0.3664.2',
816 '73.0.3664.1',
817 '73.0.3664.0',
818 '73.0.3663.2',
819 '72.0.3626.49',
820 '71.0.3578.123',
821 '73.0.3663.1',
822 '73.0.3663.0',
823 '72.0.3626.48',
824 '71.0.3578.122',
825 '73.0.3662.1',
826 '73.0.3662.0',
827 '72.0.3626.47',
828 '71.0.3578.121',
829 '73.0.3661.1',
830 '72.0.3626.46',
831 '73.0.3661.0',
832 '72.0.3626.45',
833 '71.0.3578.120',
834 '73.0.3660.2',
835 '73.0.3660.1',
836 '73.0.3660.0',
837 '72.0.3626.44',
838 '71.0.3578.119',
839 '73.0.3659.1',
840 '73.0.3659.0',
841 '72.0.3626.43',
842 '71.0.3578.118',
843 '73.0.3658.1',
844 '73.0.3658.0',
845 '72.0.3626.42',
846 '71.0.3578.117',
847 '73.0.3657.1',
848 '73.0.3657.0',
849 '72.0.3626.41',
850 '71.0.3578.116',
851 '73.0.3656.1',
852 '73.0.3656.0',
853 '72.0.3626.40',
854 '71.0.3578.115',
855 '73.0.3655.1',
856 '73.0.3655.0',
857 '72.0.3626.39',
858 '71.0.3578.114',
859 '73.0.3654.1',
860 '73.0.3654.0',
861 '72.0.3626.38',
862 '71.0.3578.113',
863 '73.0.3653.1',
864 '73.0.3653.0',
865 '72.0.3626.37',
866 '71.0.3578.112',
867 '73.0.3652.1',
868 '73.0.3652.0',
869 '72.0.3626.36',
870 '71.0.3578.111',
871 '73.0.3651.1',
872 '73.0.3651.0',
873 '72.0.3626.35',
874 '71.0.3578.110',
875 '73.0.3650.1',
876 '73.0.3650.0',
877 '72.0.3626.34',
878 '71.0.3578.109',
879 '73.0.3649.1',
880 '73.0.3649.0',
881 '72.0.3626.33',
882 '71.0.3578.108',
883 '73.0.3648.2',
884 '73.0.3648.1',
885 '73.0.3648.0',
886 '72.0.3626.32',
887 '71.0.3578.107',
888 '73.0.3647.2',
889 '73.0.3647.1',
890 '73.0.3647.0',
891 '72.0.3626.31',
892 '71.0.3578.106',
893 '73.0.3635.3',
894 '73.0.3646.2',
895 '73.0.3646.1',
896 '73.0.3646.0',
897 '72.0.3626.30',
898 '71.0.3578.105',
899 '72.0.3626.29',
900 '73.0.3645.2',
901 '73.0.3645.1',
902 '73.0.3645.0',
903 '72.0.3626.28',
904 '71.0.3578.104',
905 '72.0.3626.27',
906 '72.0.3626.26',
907 '72.0.3626.25',
908 '72.0.3626.24',
909 '73.0.3644.0',
910 '73.0.3643.2',
911 '72.0.3626.23',
912 '71.0.3578.103',
913 '73.0.3643.1',
914 '73.0.3643.0',
915 '72.0.3626.22',
916 '71.0.3578.102',
917 '73.0.3642.1',
918 '73.0.3642.0',
919 '72.0.3626.21',
920 '71.0.3578.101',
921 '73.0.3641.1',
922 '73.0.3641.0',
923 '72.0.3626.20',
924 '71.0.3578.100',
925 '72.0.3626.19',
926 '73.0.3640.1',
927 '73.0.3640.0',
928 '72.0.3626.18',
929 '73.0.3639.1',
930 '71.0.3578.99',
931 '73.0.3639.0',
932 '72.0.3626.17',
933 '73.0.3638.2',
934 '72.0.3626.16',
935 '73.0.3638.1',
936 '73.0.3638.0',
937 '72.0.3626.15',
938 '71.0.3578.98',
939 '73.0.3635.2',
940 '71.0.3578.97',
941 '73.0.3637.1',
942 '73.0.3637.0',
943 '72.0.3626.14',
944 '71.0.3578.96',
945 '71.0.3578.95',
946 '72.0.3626.13',
947 '71.0.3578.94',
948 '73.0.3636.2',
949 '71.0.3578.93',
950 '73.0.3636.1',
951 '73.0.3636.0',
952 '72.0.3626.12',
953 '71.0.3578.92',
954 '73.0.3635.1',
955 '73.0.3635.0',
956 '72.0.3626.11',
957 '71.0.3578.91',
958 '73.0.3634.2',
959 '73.0.3634.1',
960 '73.0.3634.0',
961 '72.0.3626.10',
962 '71.0.3578.90',
963 '71.0.3578.89',
964 '73.0.3633.2',
965 '73.0.3633.1',
966 '73.0.3633.0',
967 '72.0.3610.4',
968 '72.0.3626.9',
969 '71.0.3578.88',
970 '73.0.3632.5',
971 '73.0.3632.4',
972 '73.0.3632.3',
973 '73.0.3632.2',
974 '73.0.3632.1',
975 '73.0.3632.0',
976 '72.0.3626.8',
977 '71.0.3578.87',
978 '73.0.3631.2',
979 '73.0.3631.1',
980 '73.0.3631.0',
981 '72.0.3626.7',
982 '71.0.3578.86',
983 '72.0.3626.6',
984 '73.0.3630.1',
985 '73.0.3630.0',
986 '72.0.3626.5',
987 '71.0.3578.85',
988 '72.0.3626.4',
989 '73.0.3628.3',
990 '73.0.3628.2',
991 '73.0.3629.1',
992 '73.0.3629.0',
993 '72.0.3626.3',
994 '71.0.3578.84',
995 '73.0.3628.1',
996 '73.0.3628.0',
997 '71.0.3578.83',
998 '73.0.3627.1',
999 '73.0.3627.0',
1000 '72.0.3626.2',
1001 '71.0.3578.82',
1002 '71.0.3578.81',
1003 '71.0.3578.80',
1004 '72.0.3626.1',
1005 '72.0.3626.0',
1006 '71.0.3578.79',
1007 '70.0.3538.124',
1008 '71.0.3578.78',
1009 '72.0.3623.4',
1010 '72.0.3625.2',
1011 '72.0.3625.1',
1012 '72.0.3625.0',
1013 '71.0.3578.77',
1014 '70.0.3538.123',
1015 '72.0.3624.4',
1016 '72.0.3624.3',
1017 '72.0.3624.2',
1018 '71.0.3578.76',
1019 '72.0.3624.1',
1020 '72.0.3624.0',
1021 '72.0.3623.3',
1022 '71.0.3578.75',
1023 '70.0.3538.122',
1024 '71.0.3578.74',
1025 '72.0.3623.2',
1026 '72.0.3610.3',
1027 '72.0.3623.1',
1028 '72.0.3623.0',
1029 '72.0.3622.3',
1030 '72.0.3622.2',
1031 '71.0.3578.73',
1032 '70.0.3538.121',
1033 '72.0.3622.1',
1034 '72.0.3622.0',
1035 '71.0.3578.72',
1036 '70.0.3538.120',
1037 '72.0.3621.1',
1038 '72.0.3621.0',
1039 '71.0.3578.71',
1040 '70.0.3538.119',
1041 '72.0.3620.1',
1042 '72.0.3620.0',
1043 '71.0.3578.70',
1044 '70.0.3538.118',
1045 '71.0.3578.69',
1046 '72.0.3619.1',
1047 '72.0.3619.0',
1048 '71.0.3578.68',
1049 '70.0.3538.117',
1050 '71.0.3578.67',
1051 '72.0.3618.1',
1052 '72.0.3618.0',
1053 '71.0.3578.66',
1054 '70.0.3538.116',
1055 '72.0.3617.1',
1056 '72.0.3617.0',
1057 '71.0.3578.65',
1058 '70.0.3538.115',
1059 '72.0.3602.3',
1060 '71.0.3578.64',
1061 '72.0.3616.1',
1062 '72.0.3616.0',
1063 '71.0.3578.63',
1064 '70.0.3538.114',
1065 '71.0.3578.62',
1066 '72.0.3615.1',
1067 '72.0.3615.0',
1068 '71.0.3578.61',
1069 '70.0.3538.113',
1070 '72.0.3614.1',
1071 '72.0.3614.0',
1072 '71.0.3578.60',
1073 '70.0.3538.112',
1074 '72.0.3613.1',
1075 '72.0.3613.0',
1076 '71.0.3578.59',
1077 '70.0.3538.111',
1078 '72.0.3612.2',
1079 '72.0.3612.1',
1080 '72.0.3612.0',
1081 '70.0.3538.110',
1082 '71.0.3578.58',
1083 '70.0.3538.109',
1084 '72.0.3611.2',
1085 '72.0.3611.1',
1086 '72.0.3611.0',
1087 '71.0.3578.57',
1088 '70.0.3538.108',
1089 '72.0.3610.2',
1090 '71.0.3578.56',
1091 '71.0.3578.55',
1092 '72.0.3610.1',
1093 '72.0.3610.0',
1094 '71.0.3578.54',
1095 '70.0.3538.107',
1096 '71.0.3578.53',
1097 '72.0.3609.3',
1098 '71.0.3578.52',
1099 '72.0.3609.2',
1100 '71.0.3578.51',
1101 '72.0.3608.5',
1102 '72.0.3609.1',
1103 '72.0.3609.0',
1104 '71.0.3578.50',
1105 '70.0.3538.106',
1106 '72.0.3608.4',
1107 '72.0.3608.3',
1108 '72.0.3608.2',
1109 '71.0.3578.49',
1110 '72.0.3608.1',
1111 '72.0.3608.0',
1112 '70.0.3538.105',
1113 '71.0.3578.48',
1114 '72.0.3607.1',
1115 '72.0.3607.0',
1116 '71.0.3578.47',
1117 '70.0.3538.104',
1118 '72.0.3606.2',
1119 '72.0.3606.1',
1120 '72.0.3606.0',
1121 '71.0.3578.46',
1122 '70.0.3538.103',
1123 '70.0.3538.102',
1124 '72.0.3605.3',
1125 '72.0.3605.2',
1126 '72.0.3605.1',
1127 '72.0.3605.0',
1128 '71.0.3578.45',
1129 '70.0.3538.101',
1130 '71.0.3578.44',
1131 '71.0.3578.43',
1132 '70.0.3538.100',
1133 '70.0.3538.99',
1134 '71.0.3578.42',
1135 '72.0.3604.1',
1136 '72.0.3604.0',
1137 '71.0.3578.41',
1138 '70.0.3538.98',
1139 '71.0.3578.40',
1140 '72.0.3603.2',
1141 '72.0.3603.1',
1142 '72.0.3603.0',
1143 '71.0.3578.39',
1144 '70.0.3538.97',
1145 '72.0.3602.2',
1146 '71.0.3578.38',
1147 '71.0.3578.37',
1148 '72.0.3602.1',
1149 '72.0.3602.0',
1150 '71.0.3578.36',
1151 '70.0.3538.96',
1152 '72.0.3601.1',
1153 '72.0.3601.0',
1154 '71.0.3578.35',
1155 '70.0.3538.95',
1156 '72.0.3600.1',
1157 '72.0.3600.0',
1158 '71.0.3578.34',
1159 '70.0.3538.94',
1160 '72.0.3599.3',
1161 '72.0.3599.2',
1162 '72.0.3599.1',
1163 '72.0.3599.0',
1164 '71.0.3578.33',
1165 '70.0.3538.93',
1166 '72.0.3598.1',
1167 '72.0.3598.0',
1168 '71.0.3578.32',
1169 '70.0.3538.87',
1170 '72.0.3597.1',
1171 '72.0.3597.0',
1172 '72.0.3596.2',
1173 '71.0.3578.31',
1174 '70.0.3538.86',
1175 '71.0.3578.30',
1176 '71.0.3578.29',
1177 '72.0.3596.1',
1178 '72.0.3596.0',
1179 '71.0.3578.28',
1180 '70.0.3538.85',
1181 '72.0.3595.2',
1182 '72.0.3591.3',
1183 '72.0.3595.1',
1184 '72.0.3595.0',
1185 '71.0.3578.27',
1186 '70.0.3538.84',
1187 '72.0.3594.1',
1188 '72.0.3594.0',
1189 '71.0.3578.26',
1190 '70.0.3538.83',
1191 '72.0.3593.2',
1192 '72.0.3593.1',
1193 '72.0.3593.0',
1194 '71.0.3578.25',
1195 '70.0.3538.82',
1196 '72.0.3589.3',
1197 '72.0.3592.2',
1198 '72.0.3592.1',
1199 '72.0.3592.0',
1200 '71.0.3578.24',
1201 '72.0.3589.2',
1202 '70.0.3538.81',
1203 '70.0.3538.80',
1204 '72.0.3591.2',
1205 '72.0.3591.1',
1206 '72.0.3591.0',
1207 '71.0.3578.23',
1208 '70.0.3538.79',
1209 '71.0.3578.22',
1210 '72.0.3590.1',
1211 '72.0.3590.0',
1212 '71.0.3578.21',
1213 '70.0.3538.78',
1214 '70.0.3538.77',
1215 '72.0.3589.1',
1216 '72.0.3589.0',
1217 '71.0.3578.20',
1218 '70.0.3538.76',
1219 '71.0.3578.19',
1220 '70.0.3538.75',
1221 '72.0.3588.1',
1222 '72.0.3588.0',
1223 '71.0.3578.18',
1224 '70.0.3538.74',
1225 '72.0.3586.2',
1226 '72.0.3587.0',
1227 '71.0.3578.17',
1228 '70.0.3538.73',
1229 '72.0.3586.1',
1230 '72.0.3586.0',
1231 '71.0.3578.16',
1232 '70.0.3538.72',
1233 '72.0.3585.1',
1234 '72.0.3585.0',
1235 '71.0.3578.15',
1236 '70.0.3538.71',
1237 '71.0.3578.14',
1238 '72.0.3584.1',
1239 '72.0.3584.0',
1240 '71.0.3578.13',
1241 '70.0.3538.70',
1242 '72.0.3583.2',
1243 '71.0.3578.12',
1244 '72.0.3583.1',
1245 '72.0.3583.0',
1246 '71.0.3578.11',
1247 '70.0.3538.69',
1248 '71.0.3578.10',
1249 '72.0.3582.0',
1250 '72.0.3581.4',
1251 '71.0.3578.9',
1252 '70.0.3538.67',
1253 '72.0.3581.3',
1254 '72.0.3581.2',
1255 '72.0.3581.1',
1256 '72.0.3581.0',
1257 '71.0.3578.8',
1258 '70.0.3538.66',
1259 '72.0.3580.1',
1260 '72.0.3580.0',
1261 '71.0.3578.7',
1262 '70.0.3538.65',
1263 '71.0.3578.6',
1264 '72.0.3579.1',
1265 '72.0.3579.0',
1266 '71.0.3578.5',
1267 '70.0.3538.64',
1268 '71.0.3578.4',
1269 '71.0.3578.3',
1270 '71.0.3578.2',
1271 '71.0.3578.1',
1272 '71.0.3578.0',
1273 '70.0.3538.63',
1274 '69.0.3497.128',
1275 '70.0.3538.62',
1276 '70.0.3538.61',
1277 '70.0.3538.60',
1278 '70.0.3538.59',
1279 '71.0.3577.1',
1280 '71.0.3577.0',
1281 '70.0.3538.58',
1282 '69.0.3497.127',
1283 '71.0.3576.2',
1284 '71.0.3576.1',
1285 '71.0.3576.0',
1286 '70.0.3538.57',
1287 '70.0.3538.56',
1288 '71.0.3575.2',
1289 '70.0.3538.55',
1290 '69.0.3497.126',
1291 '70.0.3538.54',
1292 '71.0.3575.1',
1293 '71.0.3575.0',
1294 '71.0.3574.1',
1295 '71.0.3574.0',
1296 '70.0.3538.53',
1297 '69.0.3497.125',
1298 '70.0.3538.52',
1299 '71.0.3573.1',
1300 '71.0.3573.0',
1301 '70.0.3538.51',
1302 '69.0.3497.124',
1303 '71.0.3572.1',
1304 '71.0.3572.0',
1305 '70.0.3538.50',
1306 '69.0.3497.123',
1307 '71.0.3571.2',
1308 '70.0.3538.49',
1309 '69.0.3497.122',
1310 '71.0.3571.1',
1311 '71.0.3571.0',
1312 '70.0.3538.48',
1313 '69.0.3497.121',
1314 '71.0.3570.1',
1315 '71.0.3570.0',
1316 '70.0.3538.47',
1317 '69.0.3497.120',
1318 '71.0.3568.2',
1319 '71.0.3569.1',
1320 '71.0.3569.0',
1321 '70.0.3538.46',
1322 '69.0.3497.119',
1323 '70.0.3538.45',
1324 '71.0.3568.1',
1325 '71.0.3568.0',
1326 '70.0.3538.44',
1327 '69.0.3497.118',
1328 '70.0.3538.43',
1329 '70.0.3538.42',
1330 '71.0.3567.1',
1331 '71.0.3567.0',
1332 '70.0.3538.41',
1333 '69.0.3497.117',
1334 '71.0.3566.1',
1335 '71.0.3566.0',
1336 '70.0.3538.40',
1337 '69.0.3497.116',
1338 '71.0.3565.1',
1339 '71.0.3565.0',
1340 '70.0.3538.39',
1341 '69.0.3497.115',
1342 '71.0.3564.1',
1343 '71.0.3564.0',
1344 '70.0.3538.38',
1345 '69.0.3497.114',
1346 '71.0.3563.0',
1347 '71.0.3562.2',
1348 '70.0.3538.37',
1349 '69.0.3497.113',
1350 '70.0.3538.36',
1351 '70.0.3538.35',
1352 '71.0.3562.1',
1353 '71.0.3562.0',
1354 '70.0.3538.34',
1355 '69.0.3497.112',
1356 '70.0.3538.33',
1357 '71.0.3561.1',
1358 '71.0.3561.0',
1359 '70.0.3538.32',
1360 '69.0.3497.111',
1361 '71.0.3559.6',
1362 '71.0.3560.1',
1363 '71.0.3560.0',
1364 '71.0.3559.5',
1365 '71.0.3559.4',
1366 '70.0.3538.31',
1367 '69.0.3497.110',
1368 '71.0.3559.3',
1369 '70.0.3538.30',
1370 '69.0.3497.109',
1371 '71.0.3559.2',
1372 '71.0.3559.1',
1373 '71.0.3559.0',
1374 '70.0.3538.29',
1375 '69.0.3497.108',
1376 '71.0.3558.2',
1377 '71.0.3558.1',
1378 '71.0.3558.0',
1379 '70.0.3538.28',
1380 '69.0.3497.107',
1381 '71.0.3557.2',
1382 '71.0.3557.1',
1383 '71.0.3557.0',
1384 '70.0.3538.27',
1385 '69.0.3497.106',
1386 '71.0.3554.4',
1387 '70.0.3538.26',
1388 '71.0.3556.1',
1389 '71.0.3556.0',
1390 '70.0.3538.25',
1391 '71.0.3554.3',
1392 '69.0.3497.105',
1393 '71.0.3554.2',
1394 '70.0.3538.24',
1395 '69.0.3497.104',
1396 '71.0.3555.2',
1397 '70.0.3538.23',
1398 '71.0.3555.1',
1399 '71.0.3555.0',
1400 '70.0.3538.22',
1401 '69.0.3497.103',
1402 '71.0.3554.1',
1403 '71.0.3554.0',
1404 '70.0.3538.21',
1405 '69.0.3497.102',
1406 '71.0.3553.3',
1407 '70.0.3538.20',
1408 '69.0.3497.101',
1409 '71.0.3553.2',
1410 '69.0.3497.100',
1411 '71.0.3553.1',
1412 '71.0.3553.0',
1413 '70.0.3538.19',
1414 '69.0.3497.99',
1415 '69.0.3497.98',
1416 '69.0.3497.97',
1417 '71.0.3552.6',
1418 '71.0.3552.5',
1419 '71.0.3552.4',
1420 '71.0.3552.3',
1421 '71.0.3552.2',
1422 '71.0.3552.1',
1423 '71.0.3552.0',
1424 '70.0.3538.18',
1425 '69.0.3497.96',
1426 '71.0.3551.3',
1427 '71.0.3551.2',
1428 '71.0.3551.1',
1429 '71.0.3551.0',
1430 '70.0.3538.17',
1431 '69.0.3497.95',
1432 '71.0.3550.3',
1433 '71.0.3550.2',
1434 '71.0.3550.1',
1435 '71.0.3550.0',
1436 '70.0.3538.16',
1437 '69.0.3497.94',
1438 '71.0.3549.1',
1439 '71.0.3549.0',
1440 '70.0.3538.15',
1441 '69.0.3497.93',
1442 '69.0.3497.92',
1443 '71.0.3548.1',
1444 '71.0.3548.0',
1445 '70.0.3538.14',
1446 '69.0.3497.91',
1447 '71.0.3547.1',
1448 '71.0.3547.0',
1449 '70.0.3538.13',
1450 '69.0.3497.90',
1451 '71.0.3546.2',
1452 '69.0.3497.89',
1453 '71.0.3546.1',
1454 '71.0.3546.0',
1455 '70.0.3538.12',
1456 '69.0.3497.88',
1457 '71.0.3545.4',
1458 '71.0.3545.3',
1459 '71.0.3545.2',
1460 '71.0.3545.1',
1461 '71.0.3545.0',
1462 '70.0.3538.11',
1463 '69.0.3497.87',
1464 '71.0.3544.5',
1465 '71.0.3544.4',
1466 '71.0.3544.3',
1467 '71.0.3544.2',
1468 '71.0.3544.1',
1469 '71.0.3544.0',
1470 '69.0.3497.86',
1471 '70.0.3538.10',
1472 '69.0.3497.85',
1473 '70.0.3538.9',
1474 '69.0.3497.84',
1475 '71.0.3543.4',
1476 '70.0.3538.8',
1477 '71.0.3543.3',
1478 '71.0.3543.2',
1479 '71.0.3543.1',
1480 '71.0.3543.0',
1481 '70.0.3538.7',
1482 '69.0.3497.83',
1483 '71.0.3542.2',
1484 '71.0.3542.1',
1485 '71.0.3542.0',
1486 '70.0.3538.6',
1487 '69.0.3497.82',
1488 '69.0.3497.81',
1489 '71.0.3541.1',
1490 '71.0.3541.0',
1491 '70.0.3538.5',
1492 '69.0.3497.80',
1493 '71.0.3540.1',
1494 '71.0.3540.0',
1495 '70.0.3538.4',
1496 '69.0.3497.79',
1497 '70.0.3538.3',
1498 '71.0.3539.1',
1499 '71.0.3539.0',
1500 '69.0.3497.78',
1501 '68.0.3440.134',
1502 '69.0.3497.77',
1503 '70.0.3538.2',
1504 '70.0.3538.1',
1505 '70.0.3538.0',
1506 '69.0.3497.76',
1507 '68.0.3440.133',
1508 '69.0.3497.75',
1509 '70.0.3537.2',
1510 '70.0.3537.1',
1511 '70.0.3537.0',
1512 '69.0.3497.74',
1513 '68.0.3440.132',
1514 '70.0.3536.0',
1515 '70.0.3535.5',
1516 '70.0.3535.4',
1517 '70.0.3535.3',
1518 '69.0.3497.73',
1519 '68.0.3440.131',
1520 '70.0.3532.8',
1521 '70.0.3532.7',
1522 '69.0.3497.72',
1523 '69.0.3497.71',
1524 '70.0.3535.2',
1525 '70.0.3535.1',
1526 '70.0.3535.0',
1527 '69.0.3497.70',
1528 '68.0.3440.130',
1529 '69.0.3497.69',
1530 '68.0.3440.129',
1531 '70.0.3534.4',
1532 '70.0.3534.3',
1533 '70.0.3534.2',
1534 '70.0.3534.1',
1535 '70.0.3534.0',
1536 '69.0.3497.68',
1537 '68.0.3440.128',
1538 '70.0.3533.2',
1539 '70.0.3533.1',
1540 '70.0.3533.0',
1541 '69.0.3497.67',
1542 '68.0.3440.127',
1543 '70.0.3532.6',
1544 '70.0.3532.5',
1545 '70.0.3532.4',
1546 '69.0.3497.66',
1547 '68.0.3440.126',
1548 '70.0.3532.3',
1549 '70.0.3532.2',
1550 '70.0.3532.1',
1551 '69.0.3497.60',
1552 '69.0.3497.65',
1553 '69.0.3497.64',
1554 '70.0.3532.0',
1555 '70.0.3531.0',
1556 '70.0.3530.4',
1557 '70.0.3530.3',
1558 '70.0.3530.2',
1559 '69.0.3497.58',
1560 '68.0.3440.125',
1561 '69.0.3497.57',
1562 '69.0.3497.56',
1563 '69.0.3497.55',
1564 '69.0.3497.54',
1565 '70.0.3530.1',
1566 '70.0.3530.0',
1567 '69.0.3497.53',
1568 '68.0.3440.124',
1569 '69.0.3497.52',
1570 '70.0.3529.3',
1571 '70.0.3529.2',
1572 '70.0.3529.1',
1573 '70.0.3529.0',
1574 '69.0.3497.51',
1575 '70.0.3528.4',
1576 '68.0.3440.123',
1577 '70.0.3528.3',
1578 '70.0.3528.2',
1579 '70.0.3528.1',
1580 '70.0.3528.0',
1581 '69.0.3497.50',
1582 '68.0.3440.122',
1583 '70.0.3527.1',
1584 '70.0.3527.0',
1585 '69.0.3497.49',
1586 '68.0.3440.121',
1587 '70.0.3526.1',
1588 '70.0.3526.0',
1589 '68.0.3440.120',
1590 '69.0.3497.48',
1591 '69.0.3497.47',
1592 '68.0.3440.119',
1593 '68.0.3440.118',
1594 '70.0.3525.5',
1595 '70.0.3525.4',
1596 '70.0.3525.3',
1597 '68.0.3440.117',
1598 '69.0.3497.46',
1599 '70.0.3525.2',
1600 '70.0.3525.1',
1601 '70.0.3525.0',
1602 '69.0.3497.45',
1603 '68.0.3440.116',
1604 '70.0.3524.4',
1605 '70.0.3524.3',
1606 '69.0.3497.44',
1607 '70.0.3524.2',
1608 '70.0.3524.1',
1609 '70.0.3524.0',
1610 '70.0.3523.2',
1611 '69.0.3497.43',
1612 '68.0.3440.115',
1613 '70.0.3505.9',
1614 '69.0.3497.42',
1615 '70.0.3505.8',
1616 '70.0.3523.1',
1617 '70.0.3523.0',
1618 '69.0.3497.41',
1619 '68.0.3440.114',
1620 '70.0.3505.7',
1621 '69.0.3497.40',
1622 '70.0.3522.1',
1623 '70.0.3522.0',
1624 '70.0.3521.2',
1625 '69.0.3497.39',
1626 '68.0.3440.113',
1627 '70.0.3505.6',
1628 '70.0.3521.1',
1629 '70.0.3521.0',
1630 '69.0.3497.38',
1631 '68.0.3440.112',
1632 '70.0.3520.1',
1633 '70.0.3520.0',
1634 '69.0.3497.37',
1635 '68.0.3440.111',
1636 '70.0.3519.3',
1637 '70.0.3519.2',
1638 '70.0.3519.1',
1639 '70.0.3519.0',
1640 '69.0.3497.36',
1641 '68.0.3440.110',
1642 '70.0.3518.1',
1643 '70.0.3518.0',
1644 '69.0.3497.35',
1645 '69.0.3497.34',
1646 '68.0.3440.109',
1647 '70.0.3517.1',
1648 '70.0.3517.0',
1649 '69.0.3497.33',
1650 '68.0.3440.108',
1651 '69.0.3497.32',
1652 '70.0.3516.3',
1653 '70.0.3516.2',
1654 '70.0.3516.1',
1655 '70.0.3516.0',
1656 '69.0.3497.31',
1657 '68.0.3440.107',
1658 '70.0.3515.4',
1659 '68.0.3440.106',
1660 '70.0.3515.3',
1661 '70.0.3515.2',
1662 '70.0.3515.1',
1663 '70.0.3515.0',
1664 '69.0.3497.30',
1665 '68.0.3440.105',
1666 '68.0.3440.104',
1667 '70.0.3514.2',
1668 '70.0.3514.1',
1669 '70.0.3514.0',
1670 '69.0.3497.29',
1671 '68.0.3440.103',
1672 '70.0.3513.1',
1673 '70.0.3513.0',
1674 '69.0.3497.28',
1675 )
1676 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1677
1678
1679 std_headers = {
1680 'User-Agent': random_user_agent(),
1681 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1682 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1683 'Accept-Encoding': 'gzip, deflate',
1684 'Accept-Language': 'en-us,en;q=0.5',
1685 }
1686
1687
1688 USER_AGENTS = {
1689 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1690 }
1691
1692
1693 NO_DEFAULT = object()
1694
1695 ENGLISH_MONTH_NAMES = [
1696 'January', 'February', 'March', 'April', 'May', 'June',
1697 'July', 'August', 'September', 'October', 'November', 'December']
1698
1699 MONTH_NAMES = {
1700 'en': ENGLISH_MONTH_NAMES,
1701 'fr': [
1702 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1703 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1704 }
1705
1706 KNOWN_EXTENSIONS = (
1707 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1708 'flv', 'f4v', 'f4a', 'f4b',
1709 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1710 'mkv', 'mka', 'mk3d',
1711 'avi', 'divx',
1712 'mov',
1713 'asf', 'wmv', 'wma',
1714 '3gp', '3g2',
1715 'mp3',
1716 'flac',
1717 'ape',
1718 'wav',
1719 'f4f', 'f4m', 'm3u8', 'smil')
1720
1721 # needed for sanitizing filenames in restricted mode
1722 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1723 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1724 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1725
1726 DATE_FORMATS = (
1727 '%d %B %Y',
1728 '%d %b %Y',
1729 '%B %d %Y',
1730 '%B %dst %Y',
1731 '%B %dnd %Y',
1732 '%B %drd %Y',
1733 '%B %dth %Y',
1734 '%b %d %Y',
1735 '%b %dst %Y',
1736 '%b %dnd %Y',
1737 '%b %drd %Y',
1738 '%b %dth %Y',
1739 '%b %dst %Y %I:%M',
1740 '%b %dnd %Y %I:%M',
1741 '%b %drd %Y %I:%M',
1742 '%b %dth %Y %I:%M',
1743 '%Y %m %d',
1744 '%Y-%m-%d',
1745 '%Y.%m.%d.',
1746 '%Y/%m/%d',
1747 '%Y/%m/%d %H:%M',
1748 '%Y/%m/%d %H:%M:%S',
1749 '%Y%m%d%H%M',
1750 '%Y%m%d%H%M%S',
1751 '%Y-%m-%d %H:%M',
1752 '%Y-%m-%d %H:%M:%S',
1753 '%Y-%m-%d %H:%M:%S.%f',
1754 '%Y-%m-%d %H:%M:%S:%f',
1755 '%d.%m.%Y %H:%M',
1756 '%d.%m.%Y %H.%M',
1757 '%Y-%m-%dT%H:%M:%SZ',
1758 '%Y-%m-%dT%H:%M:%S.%fZ',
1759 '%Y-%m-%dT%H:%M:%S.%f0Z',
1760 '%Y-%m-%dT%H:%M:%S',
1761 '%Y-%m-%dT%H:%M:%S.%f',
1762 '%Y-%m-%dT%H:%M',
1763 '%b %d %Y at %H:%M',
1764 '%b %d %Y at %H:%M:%S',
1765 '%B %d %Y at %H:%M',
1766 '%B %d %Y at %H:%M:%S',
1767 '%H:%M %d-%b-%Y',
1768 )
1769
1770 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1771 DATE_FORMATS_DAY_FIRST.extend([
1772 '%d-%m-%Y',
1773 '%d.%m.%Y',
1774 '%d.%m.%y',
1775 '%d/%m/%Y',
1776 '%d/%m/%y',
1777 '%d/%m/%Y %H:%M:%S',
1778 ])
1779
1780 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1781 DATE_FORMATS_MONTH_FIRST.extend([
1782 '%m-%d-%Y',
1783 '%m.%d.%Y',
1784 '%m/%d/%Y',
1785 '%m/%d/%y',
1786 '%m/%d/%Y %H:%M:%S',
1787 ])
1788
1789 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1790 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1791
1792
1793 def preferredencoding():
1794 """Get preferred encoding.
1795
1796 Returns the best encoding scheme for the system, based on
1797 locale.getpreferredencoding() and some further tweaks.
1798 """
1799 try:
1800 pref = locale.getpreferredencoding()
1801 'TEST'.encode(pref)
1802 except Exception:
1803 pref = 'UTF-8'
1804
1805 return pref
1806
1807
1808 def write_json_file(obj, fn):
1809 """ Encode obj as JSON and write it to fn, atomically if possible """
1810
1811 fn = encodeFilename(fn)
1812 if sys.version_info < (3, 0) and sys.platform != 'win32':
1813 encoding = get_filesystem_encoding()
1814 # os.path.basename returns a bytes object, but NamedTemporaryFile
1815 # will fail if the filename contains non ascii characters unless we
1816 # use a unicode object
1817 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1818 # the same for os.path.dirname
1819 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1820 else:
1821 path_basename = os.path.basename
1822 path_dirname = os.path.dirname
1823
1824 args = {
1825 'suffix': '.tmp',
1826 'prefix': path_basename(fn) + '.',
1827 'dir': path_dirname(fn),
1828 'delete': False,
1829 }
1830
1831 # In Python 2.x, json.dump expects a bytestream.
1832 # In Python 3.x, it writes to a character stream
1833 if sys.version_info < (3, 0):
1834 args['mode'] = 'wb'
1835 else:
1836 args.update({
1837 'mode': 'w',
1838 'encoding': 'utf-8',
1839 })
1840
1841 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1842
1843 try:
1844 with tf:
1845 json.dump(obj, tf)
1846 if sys.platform == 'win32':
1847 # Need to remove existing file on Windows, else os.rename raises
1848 # WindowsError or FileExistsError.
1849 try:
1850 os.unlink(fn)
1851 except OSError:
1852 pass
1853 try:
1854 mask = os.umask(0)
1855 os.umask(mask)
1856 os.chmod(tf.name, 0o666 & ~mask)
1857 except OSError:
1858 pass
1859 os.rename(tf.name, fn)
1860 except Exception:
1861 try:
1862 os.remove(tf.name)
1863 except OSError:
1864 pass
1865 raise
1866
1867
1868 if sys.version_info >= (2, 7):
1869 def find_xpath_attr(node, xpath, key, val=None):
1870 """ Find the xpath xpath[@key=val] """
1871 assert re.match(r'^[a-zA-Z_-]+$', key)
1872 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1873 return node.find(expr)
1874 else:
1875 def find_xpath_attr(node, xpath, key, val=None):
1876 for f in node.findall(compat_xpath(xpath)):
1877 if key not in f.attrib:
1878 continue
1879 if val is None or f.attrib.get(key) == val:
1880 return f
1881 return None
1882
1883 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1884 # the namespace parameter
1885
1886
1887 def xpath_with_ns(path, ns_map):
1888 components = [c.split(':') for c in path.split('/')]
1889 replaced = []
1890 for c in components:
1891 if len(c) == 1:
1892 replaced.append(c[0])
1893 else:
1894 ns, tag = c
1895 replaced.append('{%s}%s' % (ns_map[ns], tag))
1896 return '/'.join(replaced)
1897
1898
1899 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1900 def _find_xpath(xpath):
1901 return node.find(compat_xpath(xpath))
1902
1903 if isinstance(xpath, (str, compat_str)):
1904 n = _find_xpath(xpath)
1905 else:
1906 for xp in xpath:
1907 n = _find_xpath(xp)
1908 if n is not None:
1909 break
1910
1911 if n is None:
1912 if default is not NO_DEFAULT:
1913 return default
1914 elif fatal:
1915 name = xpath if name is None else name
1916 raise ExtractorError('Could not find XML element %s' % name)
1917 else:
1918 return None
1919 return n
1920
1921
1922 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1923 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1924 if n is None or n == default:
1925 return n
1926 if n.text is None:
1927 if default is not NO_DEFAULT:
1928 return default
1929 elif fatal:
1930 name = xpath if name is None else name
1931 raise ExtractorError('Could not find XML element\'s text %s' % name)
1932 else:
1933 return None
1934 return n.text
1935
1936
1937 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1938 n = find_xpath_attr(node, xpath, key)
1939 if n is None:
1940 if default is not NO_DEFAULT:
1941 return default
1942 elif fatal:
1943 name = '%s[@%s]' % (xpath, key) if name is None else name
1944 raise ExtractorError('Could not find XML attribute %s' % name)
1945 else:
1946 return None
1947 return n.attrib[key]
1948
1949
1950 def get_element_by_id(id, html):
1951 """Return the content of the tag with the specified ID in the passed HTML document"""
1952 return get_element_by_attribute('id', id, html)
1953
1954
1955 def get_element_by_class(class_name, html):
1956 """Return the content of the first tag with the specified class in the passed HTML document"""
1957 retval = get_elements_by_class(class_name, html)
1958 return retval[0] if retval else None
1959
1960
1961 def get_element_by_attribute(attribute, value, html, escape_value=True):
1962 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1963 return retval[0] if retval else None
1964
1965
1966 def get_elements_by_class(class_name, html):
1967 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1968 return get_elements_by_attribute(
1969 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1970 html, escape_value=False)
1971
1972
1973 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1974 """Return the content of the tag with the specified attribute in the passed HTML document"""
1975
1976 value = re.escape(value) if escape_value else value
1977
1978 retlist = []
1979 for m in re.finditer(r'''(?xs)
1980 <([a-zA-Z0-9:._-]+)
1981 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1982 \s+%s=['"]?%s['"]?
1983 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1984 \s*>
1985 (?P<content>.*?)
1986 </\1>
1987 ''' % (re.escape(attribute), value), html):
1988 res = m.group('content')
1989
1990 if res.startswith('"') or res.startswith("'"):
1991 res = res[1:-1]
1992
1993 retlist.append(unescapeHTML(res))
1994
1995 return retlist
1996
1997
1998 class HTMLAttributeParser(compat_HTMLParser):
1999 """Trivial HTML parser to gather the attributes for a single element"""
2000
2001 def __init__(self):
2002 self.attrs = {}
2003 compat_HTMLParser.__init__(self)
2004
2005 def handle_starttag(self, tag, attrs):
2006 self.attrs = dict(attrs)
2007
2008
2009 def extract_attributes(html_element):
2010 """Given a string for an HTML element such as
2011 <el
2012 a="foo" B="bar" c="&98;az" d=boz
2013 empty= noval entity="&amp;"
2014 sq='"' dq="'"
2015 >
2016 Decode and return a dictionary of attributes.
2017 {
2018 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2019 'empty': '', 'noval': None, 'entity': '&',
2020 'sq': '"', 'dq': '\''
2021 }.
2022 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2023 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2024 """
2025 parser = HTMLAttributeParser()
2026 try:
2027 parser.feed(html_element)
2028 parser.close()
2029 # Older Python may throw HTMLParseError in case of malformed HTML
2030 except compat_HTMLParseError:
2031 pass
2032 return parser.attrs
2033
2034
2035 def clean_html(html):
2036 """Clean an HTML snippet into a readable string"""
2037
2038 if html is None: # Convenience for sanitizing descriptions etc.
2039 return html
2040
2041 # Newline vs <br />
2042 html = html.replace('\n', ' ')
2043 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2044 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2045 # Strip html tags
2046 html = re.sub('<.*?>', '', html)
2047 # Replace html entities
2048 html = unescapeHTML(html)
2049 return html.strip()
2050
2051
2052 def sanitize_open(filename, open_mode):
2053 """Try to open the given filename, and slightly tweak it if this fails.
2054
2055 Attempts to open the given filename. If this fails, it tries to change
2056 the filename slightly, step by step, until it's either able to open it
2057 or it fails and raises a final exception, like the standard open()
2058 function.
2059
2060 It returns the tuple (stream, definitive_file_name).
2061 """
2062 try:
2063 if filename == '-':
2064 if sys.platform == 'win32':
2065 import msvcrt
2066 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2067 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2068 stream = open(encodeFilename(filename), open_mode)
2069 return (stream, filename)
2070 except (IOError, OSError) as err:
2071 if err.errno in (errno.EACCES,):
2072 raise
2073
2074 # In case of error, try to remove win32 forbidden chars
2075 alt_filename = sanitize_path(filename)
2076 if alt_filename == filename:
2077 raise
2078 else:
2079 # An exception here should be caught in the caller
2080 stream = open(encodeFilename(alt_filename), open_mode)
2081 return (stream, alt_filename)
2082
2083
2084 def timeconvert(timestr):
2085 """Convert RFC 2822 defined time string into system timestamp"""
2086 timestamp = None
2087 timetuple = email.utils.parsedate_tz(timestr)
2088 if timetuple is not None:
2089 timestamp = email.utils.mktime_tz(timetuple)
2090 return timestamp
2091
2092
2093 def sanitize_filename(s, restricted=False, is_id=False):
2094 """Sanitizes a string so it could be used as part of a filename.
2095 If restricted is set, use a stricter subset of allowed characters.
2096 Set is_id if this is not an arbitrary string, but an ID that should be kept
2097 if possible.
2098 """
2099 def replace_insane(char):
2100 if restricted and char in ACCENT_CHARS:
2101 return ACCENT_CHARS[char]
2102 elif not restricted and char == '\n':
2103 return ' '
2104 elif char == '?' or ord(char) < 32 or ord(char) == 127:
2105 return ''
2106 elif char == '"':
2107 return '' if restricted else '\''
2108 elif char == ':':
2109 return '_-' if restricted else ' -'
2110 elif char in '\\/|*<>':
2111 return '_'
2112 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2113 return '_'
2114 if restricted and ord(char) > 127:
2115 return '_'
2116 return char
2117
2118 if s == '':
2119 return ''
2120 # Handle timestamps
2121 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2122 result = ''.join(map(replace_insane, s))
2123 if not is_id:
2124 while '__' in result:
2125 result = result.replace('__', '_')
2126 result = result.strip('_')
2127 # Common case of "Foreign band name - English song title"
2128 if restricted and result.startswith('-_'):
2129 result = result[2:]
2130 if result.startswith('-'):
2131 result = '_' + result[len('-'):]
2132 result = result.lstrip('.')
2133 if not result:
2134 result = '_'
2135 return result
2136
2137
2138 def sanitize_path(s, force=False):
2139 """Sanitizes and normalizes path on Windows"""
2140 if sys.platform == 'win32':
2141 force = False
2142 drive_or_unc, _ = os.path.splitdrive(s)
2143 if sys.version_info < (2, 7) and not drive_or_unc:
2144 drive_or_unc, _ = os.path.splitunc(s)
2145 elif force:
2146 drive_or_unc = ''
2147 else:
2148 return s
2149
2150 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2151 if drive_or_unc:
2152 norm_path.pop(0)
2153 sanitized_path = [
2154 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2155 for path_part in norm_path]
2156 if drive_or_unc:
2157 sanitized_path.insert(0, drive_or_unc + os.path.sep)
2158 elif force and s[0] == os.path.sep:
2159 sanitized_path.insert(0, os.path.sep)
2160 return os.path.join(*sanitized_path)
2161
2162
2163 def sanitize_url(url):
2164 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2165 # the number of unwanted failures due to missing protocol
2166 if url.startswith('//'):
2167 return 'http:%s' % url
2168 # Fix some common typos seen so far
2169 COMMON_TYPOS = (
2170 # https://github.com/ytdl-org/youtube-dl/issues/15649
2171 (r'^httpss://', r'https://'),
2172 # https://bx1.be/lives/direct-tv/
2173 (r'^rmtp([es]?)://', r'rtmp\1://'),
2174 )
2175 for mistake, fixup in COMMON_TYPOS:
2176 if re.match(mistake, url):
2177 return re.sub(mistake, fixup, url)
2178 return url
2179
2180
2181 def extract_basic_auth(url):
2182 parts = compat_urlparse.urlsplit(url)
2183 if parts.username is None:
2184 return url, None
2185 url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2186 parts.hostname if parts.port is None
2187 else '%s:%d' % (parts.hostname, parts.port))))
2188 auth_payload = base64.b64encode(
2189 ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2190 return url, 'Basic ' + auth_payload.decode('utf-8')
2191
2192
2193 def sanitized_Request(url, *args, **kwargs):
2194 url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
2195 if auth_header is not None:
2196 headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2197 headers['Authorization'] = auth_header
2198 return compat_urllib_request.Request(url, *args, **kwargs)
2199
2200
2201 def expand_path(s):
2202 """Expand shell variables and ~"""
2203 return os.path.expandvars(compat_expanduser(s))
2204
2205
2206 def orderedSet(iterable):
2207 """ Remove all duplicates from the input iterable """
2208 res = []
2209 for el in iterable:
2210 if el not in res:
2211 res.append(el)
2212 return res
2213
2214
2215 def _htmlentity_transform(entity_with_semicolon):
2216 """Transforms an HTML entity to a character."""
2217 entity = entity_with_semicolon[:-1]
2218
2219 # Known non-numeric HTML entity
2220 if entity in compat_html_entities.name2codepoint:
2221 return compat_chr(compat_html_entities.name2codepoint[entity])
2222
2223 # TODO: HTML5 allows entities without a semicolon. For example,
2224 # '&Eacuteric' should be decoded as 'Éric'.
2225 if entity_with_semicolon in compat_html_entities_html5:
2226 return compat_html_entities_html5[entity_with_semicolon]
2227
2228 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2229 if mobj is not None:
2230 numstr = mobj.group(1)
2231 if numstr.startswith('x'):
2232 base = 16
2233 numstr = '0%s' % numstr
2234 else:
2235 base = 10
2236 # See https://github.com/ytdl-org/youtube-dl/issues/7518
2237 try:
2238 return compat_chr(int(numstr, base))
2239 except ValueError:
2240 pass
2241
2242 # Unknown entity in name, return its literal representation
2243 return '&%s;' % entity
2244
2245
2246 def unescapeHTML(s):
2247 if s is None:
2248 return None
2249 assert type(s) == compat_str
2250
2251 return re.sub(
2252 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2253
2254
2255 def escapeHTML(text):
2256 return (
2257 text
2258 .replace('&', '&amp;')
2259 .replace('<', '&lt;')
2260 .replace('>', '&gt;')
2261 .replace('"', '&quot;')
2262 .replace("'", '&#39;')
2263 )
2264
2265
2266 def process_communicate_or_kill(p, *args, **kwargs):
2267 try:
2268 return p.communicate(*args, **kwargs)
2269 except BaseException: # Including KeyboardInterrupt
2270 p.kill()
2271 p.wait()
2272 raise
2273
2274
2275 class Popen(subprocess.Popen):
2276 if sys.platform == 'win32':
2277 _startupinfo = subprocess.STARTUPINFO()
2278 _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
2279 else:
2280 _startupinfo = None
2281
2282 def __init__(self, *args, **kwargs):
2283 super(Popen, self).__init__(*args, **kwargs, startupinfo=self._startupinfo)
2284
2285 def communicate_or_kill(self, *args, **kwargs):
2286 return process_communicate_or_kill(self, *args, **kwargs)
2287
2288
2289 def get_subprocess_encoding():
2290 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2291 # For subprocess calls, encode with locale encoding
2292 # Refer to http://stackoverflow.com/a/9951851/35070
2293 encoding = preferredencoding()
2294 else:
2295 encoding = sys.getfilesystemencoding()
2296 if encoding is None:
2297 encoding = 'utf-8'
2298 return encoding
2299
2300
2301 def encodeFilename(s, for_subprocess=False):
2302 """
2303 @param s The name of the file
2304 """
2305
2306 assert type(s) == compat_str
2307
2308 # Python 3 has a Unicode API
2309 if sys.version_info >= (3, 0):
2310 return s
2311
2312 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2313 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2314 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2315 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2316 return s
2317
2318 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2319 if sys.platform.startswith('java'):
2320 return s
2321
2322 return s.encode(get_subprocess_encoding(), 'ignore')
2323
2324
2325 def decodeFilename(b, for_subprocess=False):
2326
2327 if sys.version_info >= (3, 0):
2328 return b
2329
2330 if not isinstance(b, bytes):
2331 return b
2332
2333 return b.decode(get_subprocess_encoding(), 'ignore')
2334
2335
2336 def encodeArgument(s):
2337 if not isinstance(s, compat_str):
2338 # Legacy code that uses byte strings
2339 # Uncomment the following line after fixing all post processors
2340 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2341 s = s.decode('ascii')
2342 return encodeFilename(s, True)
2343
2344
2345 def decodeArgument(b):
2346 return decodeFilename(b, True)
2347
2348
2349 def decodeOption(optval):
2350 if optval is None:
2351 return optval
2352 if isinstance(optval, bytes):
2353 optval = optval.decode(preferredencoding())
2354
2355 assert isinstance(optval, compat_str)
2356 return optval
2357
2358
2359 _timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
2360
2361
2362 def timetuple_from_msec(msec):
2363 secs, msec = divmod(msec, 1000)
2364 mins, secs = divmod(secs, 60)
2365 hrs, mins = divmod(mins, 60)
2366 return _timetuple(hrs, mins, secs, msec)
2367
2368
2369 def formatSeconds(secs, delim=':', msec=False):
2370 time = timetuple_from_msec(secs * 1000)
2371 if time.hours:
2372 ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
2373 elif time.minutes:
2374 ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
2375 else:
2376 ret = '%d' % time.seconds
2377 return '%s.%03d' % (ret, time.milliseconds) if msec else ret
2378
2379
2380 def _ssl_load_windows_store_certs(ssl_context, storename):
2381 # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
2382 try:
2383 certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
2384 if encoding == 'x509_asn' and (
2385 trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
2386 except PermissionError:
2387 return
2388 for cert in certs:
2389 try:
2390 ssl_context.load_verify_locations(cadata=cert)
2391 except ssl.SSLError:
2392 pass
2393
2394
2395 def make_HTTPS_handler(params, **kwargs):
2396 opts_check_certificate = not params.get('nocheckcertificate')
2397 context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
2398 context.check_hostname = opts_check_certificate
2399 context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
2400 if opts_check_certificate:
2401 try:
2402 context.load_default_certs()
2403 # Work around the issue in load_default_certs when there are bad certificates. See:
2404 # https://github.com/yt-dlp/yt-dlp/issues/1060,
2405 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
2406 except ssl.SSLError:
2407 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
2408 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
2409 # Create a new context to discard any certificates that were already loaded
2410 context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
2411 context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED
2412 for storename in ('CA', 'ROOT'):
2413 _ssl_load_windows_store_certs(context, storename)
2414 context.set_default_verify_paths()
2415 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2416
2417
2418 def bug_reports_message(before=';'):
2419 if ytdl_is_updateable():
2420 update_cmd = 'type yt-dlp -U to update'
2421 else:
2422 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
2423 msg = 'please report this issue on https://github.com/yt-dlp/yt-dlp .'
2424 msg += ' Make sure you are using the latest version; %s.' % update_cmd
2425 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
2426
2427 before = before.rstrip()
2428 if not before or before.endswith(('.', '!', '?')):
2429 msg = msg[0].title() + msg[1:]
2430
2431 return (before + ' ' if before else '') + msg
2432
2433
2434 class YoutubeDLError(Exception):
2435 """Base exception for YoutubeDL errors."""
2436 pass
2437
2438
2439 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2440 if hasattr(ssl, 'CertificateError'):
2441 network_exceptions.append(ssl.CertificateError)
2442 network_exceptions = tuple(network_exceptions)
2443
2444
2445 class ExtractorError(YoutubeDLError):
2446 """Error during info extraction."""
2447
2448 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
2449 """ tb, if given, is the original traceback (so that it can be printed out).
2450 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
2451 """
2452 if sys.exc_info()[0] in network_exceptions:
2453 expected = True
2454
2455 self.msg = str(msg)
2456 self.traceback = tb
2457 self.expected = expected
2458 self.cause = cause
2459 self.video_id = video_id
2460 self.ie = ie
2461 self.exc_info = sys.exc_info() # preserve original exception
2462
2463 super(ExtractorError, self).__init__(''.join((
2464 format_field(ie, template='[%s] '),
2465 format_field(video_id, template='%s: '),
2466 self.msg,
2467 format_field(cause, template=' (caused by %r)'),
2468 '' if expected else bug_reports_message())))
2469
2470 def format_traceback(self):
2471 if self.traceback is None:
2472 return None
2473 return ''.join(traceback.format_tb(self.traceback))
2474
2475
2476 class UnsupportedError(ExtractorError):
2477 def __init__(self, url):
2478 super(UnsupportedError, self).__init__(
2479 'Unsupported URL: %s' % url, expected=True)
2480 self.url = url
2481
2482
2483 class RegexNotFoundError(ExtractorError):
2484 """Error when a regex didn't match"""
2485 pass
2486
2487
2488 class GeoRestrictedError(ExtractorError):
2489 """Geographic restriction Error exception.
2490
2491 This exception may be thrown when a video is not available from your
2492 geographic location due to geographic restrictions imposed by a website.
2493 """
2494
2495 def __init__(self, msg, countries=None, **kwargs):
2496 kwargs['expected'] = True
2497 super(GeoRestrictedError, self).__init__(msg, **kwargs)
2498 self.countries = countries
2499
2500
2501 class DownloadError(YoutubeDLError):
2502 """Download Error exception.
2503
2504 This exception may be thrown by FileDownloader objects if they are not
2505 configured to continue on errors. They will contain the appropriate
2506 error message.
2507 """
2508
2509 def __init__(self, msg, exc_info=None):
2510 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2511 super(DownloadError, self).__init__(msg)
2512 self.exc_info = exc_info
2513
2514
2515 class EntryNotInPlaylist(YoutubeDLError):
2516 """Entry not in playlist exception.
2517
2518 This exception will be thrown by YoutubeDL when a requested entry
2519 is not found in the playlist info_dict
2520 """
2521 pass
2522
2523
2524 class SameFileError(YoutubeDLError):
2525 """Same File exception.
2526
2527 This exception will be thrown by FileDownloader objects if they detect
2528 multiple files would have to be downloaded to the same file on disk.
2529 """
2530 pass
2531
2532
2533 class PostProcessingError(YoutubeDLError):
2534 """Post Processing exception.
2535
2536 This exception may be raised by PostProcessor's .run() method to
2537 indicate an error in the postprocessing task.
2538 """
2539
2540 def __init__(self, msg):
2541 super(PostProcessingError, self).__init__(msg)
2542 self.msg = msg
2543
2544
2545 class DownloadCancelled(YoutubeDLError):
2546 """ Exception raised when the download queue should be interrupted """
2547 msg = 'The download was cancelled'
2548
2549 def __init__(self, msg=None):
2550 if msg is not None:
2551 self.msg = msg
2552 YoutubeDLError.__init__(self, self.msg)
2553
2554
2555 class ExistingVideoReached(DownloadCancelled):
2556 """ --break-on-existing triggered """
2557 msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
2558
2559
2560 class RejectedVideoReached(DownloadCancelled):
2561 """ --break-on-reject triggered """
2562 msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
2563
2564
2565 class MaxDownloadsReached(DownloadCancelled):
2566 """ --max-downloads limit has been reached. """
2567 msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
2568
2569
2570 class ThrottledDownload(YoutubeDLError):
2571 """ Download speed below --throttled-rate. """
2572 pass
2573
2574
2575 class UnavailableVideoError(YoutubeDLError):
2576 """Unavailable Format exception.
2577
2578 This exception will be thrown when a video is requested
2579 in a format that is not available for that video.
2580 """
2581 pass
2582
2583
2584 class ContentTooShortError(YoutubeDLError):
2585 """Content Too Short exception.
2586
2587 This exception may be raised by FileDownloader objects when a file they
2588 download is too small for what the server announced first, indicating
2589 the connection was probably interrupted.
2590 """
2591
2592 def __init__(self, downloaded, expected):
2593 super(ContentTooShortError, self).__init__(
2594 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2595 )
2596 # Both in bytes
2597 self.downloaded = downloaded
2598 self.expected = expected
2599
2600
2601 class XAttrMetadataError(YoutubeDLError):
2602 def __init__(self, code=None, msg='Unknown error'):
2603 super(XAttrMetadataError, self).__init__(msg)
2604 self.code = code
2605 self.msg = msg
2606
2607 # Parsing code and msg
2608 if (self.code in (errno.ENOSPC, errno.EDQUOT)
2609 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2610 self.reason = 'NO_SPACE'
2611 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2612 self.reason = 'VALUE_TOO_LONG'
2613 else:
2614 self.reason = 'NOT_SUPPORTED'
2615
2616
2617 class XAttrUnavailableError(YoutubeDLError):
2618 pass
2619
2620
2621 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2622 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2623 # expected HTTP responses to meet HTTP/1.0 or later (see also
2624 # https://github.com/ytdl-org/youtube-dl/issues/6727)
2625 if sys.version_info < (3, 0):
2626 kwargs['strict'] = True
2627 hc = http_class(*args, **compat_kwargs(kwargs))
2628 source_address = ydl_handler._params.get('source_address')
2629
2630 if source_address is not None:
2631 # This is to workaround _create_connection() from socket where it will try all
2632 # address data from getaddrinfo() including IPv6. This filters the result from
2633 # getaddrinfo() based on the source_address value.
2634 # This is based on the cpython socket.create_connection() function.
2635 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2636 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2637 host, port = address
2638 err = None
2639 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2640 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2641 ip_addrs = [addr for addr in addrs if addr[0] == af]
2642 if addrs and not ip_addrs:
2643 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2644 raise socket.error(
2645 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2646 % (ip_version, source_address[0]))
2647 for res in ip_addrs:
2648 af, socktype, proto, canonname, sa = res
2649 sock = None
2650 try:
2651 sock = socket.socket(af, socktype, proto)
2652 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2653 sock.settimeout(timeout)
2654 sock.bind(source_address)
2655 sock.connect(sa)
2656 err = None # Explicitly break reference cycle
2657 return sock
2658 except socket.error as _:
2659 err = _
2660 if sock is not None:
2661 sock.close()
2662 if err is not None:
2663 raise err
2664 else:
2665 raise socket.error('getaddrinfo returns an empty list')
2666 if hasattr(hc, '_create_connection'):
2667 hc._create_connection = _create_connection
2668 sa = (source_address, 0)
2669 if hasattr(hc, 'source_address'): # Python 2.7+
2670 hc.source_address = sa
2671 else: # Python 2.6
2672 def _hc_connect(self, *args, **kwargs):
2673 sock = _create_connection(
2674 (self.host, self.port), self.timeout, sa)
2675 if is_https:
2676 self.sock = ssl.wrap_socket(
2677 sock, self.key_file, self.cert_file,
2678 ssl_version=ssl.PROTOCOL_TLSv1)
2679 else:
2680 self.sock = sock
2681 hc.connect = functools.partial(_hc_connect, hc)
2682
2683 return hc
2684
2685
2686 def handle_youtubedl_headers(headers):
2687 filtered_headers = headers
2688
2689 if 'Youtubedl-no-compression' in filtered_headers:
2690 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2691 del filtered_headers['Youtubedl-no-compression']
2692
2693 return filtered_headers
2694
2695
2696 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2697 """Handler for HTTP requests and responses.
2698
2699 This class, when installed with an OpenerDirector, automatically adds
2700 the standard headers to every HTTP request and handles gzipped and
2701 deflated responses from web servers. If compression is to be avoided in
2702 a particular request, the original request in the program code only has
2703 to include the HTTP header "Youtubedl-no-compression", which will be
2704 removed before making the real request.
2705
2706 Part of this code was copied from:
2707
2708 http://techknack.net/python-urllib2-handlers/
2709
2710 Andrew Rowls, the author of that code, agreed to release it to the
2711 public domain.
2712 """
2713
2714 def __init__(self, params, *args, **kwargs):
2715 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2716 self._params = params
2717
2718 def http_open(self, req):
2719 conn_class = compat_http_client.HTTPConnection
2720
2721 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2722 if socks_proxy:
2723 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2724 del req.headers['Ytdl-socks-proxy']
2725
2726 return self.do_open(functools.partial(
2727 _create_http_connection, self, conn_class, False),
2728 req)
2729
2730 @staticmethod
2731 def deflate(data):
2732 if not data:
2733 return data
2734 try:
2735 return zlib.decompress(data, -zlib.MAX_WBITS)
2736 except zlib.error:
2737 return zlib.decompress(data)
2738
2739 def http_request(self, req):
2740 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2741 # always respected by websites, some tend to give out URLs with non percent-encoded
2742 # non-ASCII characters (see telemb.py, ard.py [#3412])
2743 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2744 # To work around aforementioned issue we will replace request's original URL with
2745 # percent-encoded one
2746 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2747 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2748 url = req.get_full_url()
2749 url_escaped = escape_url(url)
2750
2751 # Substitute URL if any change after escaping
2752 if url != url_escaped:
2753 req = update_Request(req, url=url_escaped)
2754
2755 for h, v in std_headers.items():
2756 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2757 # The dict keys are capitalized because of this bug by urllib
2758 if h.capitalize() not in req.headers:
2759 req.add_header(h, v)
2760
2761 req.headers = handle_youtubedl_headers(req.headers)
2762
2763 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2764 # Python 2.6 is brain-dead when it comes to fragments
2765 req._Request__original = req._Request__original.partition('#')[0]
2766 req._Request__r_type = req._Request__r_type.partition('#')[0]
2767
2768 return req
2769
2770 def http_response(self, req, resp):
2771 old_resp = resp
2772 # gzip
2773 if resp.headers.get('Content-encoding', '') == 'gzip':
2774 content = resp.read()
2775 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2776 try:
2777 uncompressed = io.BytesIO(gz.read())
2778 except IOError as original_ioerror:
2779 # There may be junk add the end of the file
2780 # See http://stackoverflow.com/q/4928560/35070 for details
2781 for i in range(1, 1024):
2782 try:
2783 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2784 uncompressed = io.BytesIO(gz.read())
2785 except IOError:
2786 continue
2787 break
2788 else:
2789 raise original_ioerror
2790 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2791 resp.msg = old_resp.msg
2792 del resp.headers['Content-encoding']
2793 # deflate
2794 if resp.headers.get('Content-encoding', '') == 'deflate':
2795 gz = io.BytesIO(self.deflate(resp.read()))
2796 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2797 resp.msg = old_resp.msg
2798 del resp.headers['Content-encoding']
2799 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2800 # https://github.com/ytdl-org/youtube-dl/issues/6457).
2801 if 300 <= resp.code < 400:
2802 location = resp.headers.get('Location')
2803 if location:
2804 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2805 if sys.version_info >= (3, 0):
2806 location = location.encode('iso-8859-1').decode('utf-8')
2807 else:
2808 location = location.decode('utf-8')
2809 location_escaped = escape_url(location)
2810 if location != location_escaped:
2811 del resp.headers['Location']
2812 if sys.version_info < (3, 0):
2813 location_escaped = location_escaped.encode('utf-8')
2814 resp.headers['Location'] = location_escaped
2815 return resp
2816
2817 https_request = http_request
2818 https_response = http_response
2819
2820
2821 def make_socks_conn_class(base_class, socks_proxy):
2822 assert issubclass(base_class, (
2823 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2824
2825 url_components = compat_urlparse.urlparse(socks_proxy)
2826 if url_components.scheme.lower() == 'socks5':
2827 socks_type = ProxyType.SOCKS5
2828 elif url_components.scheme.lower() in ('socks', 'socks4'):
2829 socks_type = ProxyType.SOCKS4
2830 elif url_components.scheme.lower() == 'socks4a':
2831 socks_type = ProxyType.SOCKS4A
2832
2833 def unquote_if_non_empty(s):
2834 if not s:
2835 return s
2836 return compat_urllib_parse_unquote_plus(s)
2837
2838 proxy_args = (
2839 socks_type,
2840 url_components.hostname, url_components.port or 1080,
2841 True, # Remote DNS
2842 unquote_if_non_empty(url_components.username),
2843 unquote_if_non_empty(url_components.password),
2844 )
2845
2846 class SocksConnection(base_class):
2847 def connect(self):
2848 self.sock = sockssocket()
2849 self.sock.setproxy(*proxy_args)
2850 if type(self.timeout) in (int, float):
2851 self.sock.settimeout(self.timeout)
2852 self.sock.connect((self.host, self.port))
2853
2854 if isinstance(self, compat_http_client.HTTPSConnection):
2855 if hasattr(self, '_context'): # Python > 2.6
2856 self.sock = self._context.wrap_socket(
2857 self.sock, server_hostname=self.host)
2858 else:
2859 self.sock = ssl.wrap_socket(self.sock)
2860
2861 return SocksConnection
2862
2863
2864 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2865 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2866 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2867 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2868 self._params = params
2869
2870 def https_open(self, req):
2871 kwargs = {}
2872 conn_class = self._https_conn_class
2873
2874 if hasattr(self, '_context'): # python > 2.6
2875 kwargs['context'] = self._context
2876 if hasattr(self, '_check_hostname'): # python 3.x
2877 kwargs['check_hostname'] = self._check_hostname
2878
2879 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2880 if socks_proxy:
2881 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2882 del req.headers['Ytdl-socks-proxy']
2883
2884 return self.do_open(functools.partial(
2885 _create_http_connection, self, conn_class, True),
2886 req, **kwargs)
2887
2888
2889 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2890 """
2891 See [1] for cookie file format.
2892
2893 1. https://curl.haxx.se/docs/http-cookies.html
2894 """
2895 _HTTPONLY_PREFIX = '#HttpOnly_'
2896 _ENTRY_LEN = 7
2897 _HEADER = '''# Netscape HTTP Cookie File
2898 # This file is generated by yt-dlp. Do not edit.
2899
2900 '''
2901 _CookieFileEntry = collections.namedtuple(
2902 'CookieFileEntry',
2903 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2904
2905 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2906 """
2907 Save cookies to a file.
2908
2909 Most of the code is taken from CPython 3.8 and slightly adapted
2910 to support cookie files with UTF-8 in both python 2 and 3.
2911 """
2912 if filename is None:
2913 if self.filename is not None:
2914 filename = self.filename
2915 else:
2916 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2917
2918 # Store session cookies with `expires` set to 0 instead of an empty
2919 # string
2920 for cookie in self:
2921 if cookie.expires is None:
2922 cookie.expires = 0
2923
2924 with io.open(filename, 'w', encoding='utf-8') as f:
2925 f.write(self._HEADER)
2926 now = time.time()
2927 for cookie in self:
2928 if not ignore_discard and cookie.discard:
2929 continue
2930 if not ignore_expires and cookie.is_expired(now):
2931 continue
2932 if cookie.secure:
2933 secure = 'TRUE'
2934 else:
2935 secure = 'FALSE'
2936 if cookie.domain.startswith('.'):
2937 initial_dot = 'TRUE'
2938 else:
2939 initial_dot = 'FALSE'
2940 if cookie.expires is not None:
2941 expires = compat_str(cookie.expires)
2942 else:
2943 expires = ''
2944 if cookie.value is None:
2945 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2946 # with no name, whereas http.cookiejar regards it as a
2947 # cookie with no value.
2948 name = ''
2949 value = cookie.name
2950 else:
2951 name = cookie.name
2952 value = cookie.value
2953 f.write(
2954 '\t'.join([cookie.domain, initial_dot, cookie.path,
2955 secure, expires, name, value]) + '\n')
2956
2957 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2958 """Load cookies from a file."""
2959 if filename is None:
2960 if self.filename is not None:
2961 filename = self.filename
2962 else:
2963 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2964
2965 def prepare_line(line):
2966 if line.startswith(self._HTTPONLY_PREFIX):
2967 line = line[len(self._HTTPONLY_PREFIX):]
2968 # comments and empty lines are fine
2969 if line.startswith('#') or not line.strip():
2970 return line
2971 cookie_list = line.split('\t')
2972 if len(cookie_list) != self._ENTRY_LEN:
2973 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2974 cookie = self._CookieFileEntry(*cookie_list)
2975 if cookie.expires_at and not cookie.expires_at.isdigit():
2976 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2977 return line
2978
2979 cf = io.StringIO()
2980 with io.open(filename, encoding='utf-8') as f:
2981 for line in f:
2982 try:
2983 cf.write(prepare_line(line))
2984 except compat_cookiejar.LoadError as e:
2985 write_string(
2986 'WARNING: skipping cookie file entry due to %s: %r\n'
2987 % (e, line), sys.stderr)
2988 continue
2989 cf.seek(0)
2990 self._really_load(cf, filename, ignore_discard, ignore_expires)
2991 # Session cookies are denoted by either `expires` field set to
2992 # an empty string or 0. MozillaCookieJar only recognizes the former
2993 # (see [1]). So we need force the latter to be recognized as session
2994 # cookies on our own.
2995 # Session cookies may be important for cookies-based authentication,
2996 # e.g. usually, when user does not check 'Remember me' check box while
2997 # logging in on a site, some important cookies are stored as session
2998 # cookies so that not recognizing them will result in failed login.
2999 # 1. https://bugs.python.org/issue17164
3000 for cookie in self:
3001 # Treat `expires=0` cookies as session cookies
3002 if cookie.expires == 0:
3003 cookie.expires = None
3004 cookie.discard = True
3005
3006
3007 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
3008 def __init__(self, cookiejar=None):
3009 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
3010
3011 def http_response(self, request, response):
3012 # Python 2 will choke on next HTTP request in row if there are non-ASCII
3013 # characters in Set-Cookie HTTP header of last response (see
3014 # https://github.com/ytdl-org/youtube-dl/issues/6769).
3015 # In order to at least prevent crashing we will percent encode Set-Cookie
3016 # header before HTTPCookieProcessor starts processing it.
3017 # if sys.version_info < (3, 0) and response.headers:
3018 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
3019 # set_cookie = response.headers.get(set_cookie_header)
3020 # if set_cookie:
3021 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
3022 # if set_cookie != set_cookie_escaped:
3023 # del response.headers[set_cookie_header]
3024 # response.headers[set_cookie_header] = set_cookie_escaped
3025 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
3026
3027 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
3028 https_response = http_response
3029
3030
3031 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
3032 """YoutubeDL redirect handler
3033
3034 The code is based on HTTPRedirectHandler implementation from CPython [1].
3035
3036 This redirect handler solves two issues:
3037 - ensures redirect URL is always unicode under python 2
3038 - introduces support for experimental HTTP response status code
3039 308 Permanent Redirect [2] used by some sites [3]
3040
3041 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
3042 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
3043 3. https://github.com/ytdl-org/youtube-dl/issues/28768
3044 """
3045
3046 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
3047
3048 def redirect_request(self, req, fp, code, msg, headers, newurl):
3049 """Return a Request or None in response to a redirect.
3050
3051 This is called by the http_error_30x methods when a
3052 redirection response is received. If a redirection should
3053 take place, return a new Request to allow http_error_30x to
3054 perform the redirect. Otherwise, raise HTTPError if no-one
3055 else should try to handle this url. Return None if you can't
3056 but another Handler might.
3057 """
3058 m = req.get_method()
3059 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
3060 or code in (301, 302, 303) and m == "POST")):
3061 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
3062 # Strictly (according to RFC 2616), 301 or 302 in response to
3063 # a POST MUST NOT cause a redirection without confirmation
3064 # from the user (of urllib.request, in this case). In practice,
3065 # essentially all clients do redirect in this case, so we do
3066 # the same.
3067
3068 # On python 2 urlh.geturl() may sometimes return redirect URL
3069 # as byte string instead of unicode. This workaround allows
3070 # to force it always return unicode.
3071 if sys.version_info[0] < 3:
3072 newurl = compat_str(newurl)
3073
3074 # Be conciliant with URIs containing a space. This is mainly
3075 # redundant with the more complete encoding done in http_error_302(),
3076 # but it is kept for compatibility with other callers.
3077 newurl = newurl.replace(' ', '%20')
3078
3079 CONTENT_HEADERS = ("content-length", "content-type")
3080 # NB: don't use dict comprehension for python 2.6 compatibility
3081 newheaders = dict((k, v) for k, v in req.headers.items()
3082 if k.lower() not in CONTENT_HEADERS)
3083 return compat_urllib_request.Request(
3084 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3085 unverifiable=True)
3086
3087
3088 def extract_timezone(date_str):
3089 m = re.search(
3090 r'''(?x)
3091 ^.{8,}? # >=8 char non-TZ prefix, if present
3092 (?P<tz>Z| # just the UTC Z, or
3093 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
3094 (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
3095 [ ]? # optional space
3096 (?P<sign>\+|-) # +/-
3097 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
3098 $)
3099 ''', date_str)
3100 if not m:
3101 timezone = datetime.timedelta()
3102 else:
3103 date_str = date_str[:-len(m.group('tz'))]
3104 if not m.group('sign'):
3105 timezone = datetime.timedelta()
3106 else:
3107 sign = 1 if m.group('sign') == '+' else -1
3108 timezone = datetime.timedelta(
3109 hours=sign * int(m.group('hours')),
3110 minutes=sign * int(m.group('minutes')))
3111 return timezone, date_str
3112
3113
3114 def parse_iso8601(date_str, delimiter='T', timezone=None):
3115 """ Return a UNIX timestamp from the given date """
3116
3117 if date_str is None:
3118 return None
3119
3120 date_str = re.sub(r'\.[0-9]+', '', date_str)
3121
3122 if timezone is None:
3123 timezone, date_str = extract_timezone(date_str)
3124
3125 try:
3126 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3127 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3128 return calendar.timegm(dt.timetuple())
3129 except ValueError:
3130 pass
3131
3132
3133 def date_formats(day_first=True):
3134 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3135
3136
3137 def unified_strdate(date_str, day_first=True):
3138 """Return a string with the date in the format YYYYMMDD"""
3139
3140 if date_str is None:
3141 return None
3142 upload_date = None
3143 # Replace commas
3144 date_str = date_str.replace(',', ' ')
3145 # Remove AM/PM + timezone
3146 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3147 _, date_str = extract_timezone(date_str)
3148
3149 for expression in date_formats(day_first):
3150 try:
3151 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
3152 except ValueError:
3153 pass
3154 if upload_date is None:
3155 timetuple = email.utils.parsedate_tz(date_str)
3156 if timetuple:
3157 try:
3158 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3159 except ValueError:
3160 pass
3161 if upload_date is not None:
3162 return compat_str(upload_date)
3163
3164
3165 def unified_timestamp(date_str, day_first=True):
3166 if date_str is None:
3167 return None
3168
3169 date_str = re.sub(r'[,|]', '', date_str)
3170
3171 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
3172 timezone, date_str = extract_timezone(date_str)
3173
3174 # Remove AM/PM + timezone
3175 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3176
3177 # Remove unrecognized timezones from ISO 8601 alike timestamps
3178 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3179 if m:
3180 date_str = date_str[:-len(m.group('tz'))]
3181
3182 # Python only supports microseconds, so remove nanoseconds
3183 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3184 if m:
3185 date_str = m.group(1)
3186
3187 for expression in date_formats(day_first):
3188 try:
3189 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3190 return calendar.timegm(dt.timetuple())
3191 except ValueError:
3192 pass
3193 timetuple = email.utils.parsedate_tz(date_str)
3194 if timetuple:
3195 return calendar.timegm(timetuple) + pm_delta * 3600
3196
3197
3198 def determine_ext(url, default_ext='unknown_video'):
3199 if url is None or '.' not in url:
3200 return default_ext
3201 guess = url.partition('?')[0].rpartition('.')[2]
3202 if re.match(r'^[A-Za-z0-9]+$', guess):
3203 return guess
3204 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3205 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3206 return guess.rstrip('/')
3207 else:
3208 return default_ext
3209
3210
3211 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3212 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3213
3214
3215 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
3216 """
3217 Return a datetime object from a string in the format YYYYMMDD or
3218 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3219
3220 format: string date format used to return datetime object from
3221 precision: round the time portion of a datetime object.
3222 auto|microsecond|second|minute|hour|day.
3223 auto: round to the unit provided in date_str (if applicable).
3224 """
3225 auto_precision = False
3226 if precision == 'auto':
3227 auto_precision = True
3228 precision = 'microsecond'
3229 today = datetime_round(datetime.datetime.now(), precision)
3230 if date_str in ('now', 'today'):
3231 return today
3232 if date_str == 'yesterday':
3233 return today - datetime.timedelta(days=1)
3234 match = re.match(
3235 r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3236 date_str)
3237 if match is not None:
3238 start_time = datetime_from_str(match.group('start'), precision, format)
3239 time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
3240 unit = match.group('unit')
3241 if unit == 'month' or unit == 'year':
3242 new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
3243 unit = 'day'
3244 else:
3245 if unit == 'week':
3246 unit = 'day'
3247 time *= 7
3248 delta = datetime.timedelta(**{unit + 's': time})
3249 new_date = start_time + delta
3250 if auto_precision:
3251 return datetime_round(new_date, unit)
3252 return new_date
3253
3254 return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3255
3256
3257 def date_from_str(date_str, format='%Y%m%d'):
3258 """
3259 Return a datetime object from a string in the format YYYYMMDD or
3260 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3261
3262 format: string date format used to return datetime object from
3263 """
3264 return datetime_from_str(date_str, precision='microsecond', format=format).date()
3265
3266
3267 def datetime_add_months(dt, months):
3268 """Increment/Decrement a datetime object by months."""
3269 month = dt.month + months - 1
3270 year = dt.year + month // 12
3271 month = month % 12 + 1
3272 day = min(dt.day, calendar.monthrange(year, month)[1])
3273 return dt.replace(year, month, day)
3274
3275
3276 def datetime_round(dt, precision='day'):
3277 """
3278 Round a datetime object's time to a specific precision
3279 """
3280 if precision == 'microsecond':
3281 return dt
3282
3283 unit_seconds = {
3284 'day': 86400,
3285 'hour': 3600,
3286 'minute': 60,
3287 'second': 1,
3288 }
3289 roundto = lambda x, n: ((x + n / 2) // n) * n
3290 timestamp = calendar.timegm(dt.timetuple())
3291 return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
3292
3293
3294 def hyphenate_date(date_str):
3295 """
3296 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3297 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3298 if match is not None:
3299 return '-'.join(match.groups())
3300 else:
3301 return date_str
3302
3303
3304 class DateRange(object):
3305 """Represents a time interval between two dates"""
3306
3307 def __init__(self, start=None, end=None):
3308 """start and end must be strings in the format accepted by date"""
3309 if start is not None:
3310 self.start = date_from_str(start)
3311 else:
3312 self.start = datetime.datetime.min.date()
3313 if end is not None:
3314 self.end = date_from_str(end)
3315 else:
3316 self.end = datetime.datetime.max.date()
3317 if self.start > self.end:
3318 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3319
3320 @classmethod
3321 def day(cls, day):
3322 """Returns a range that only contains the given day"""
3323 return cls(day, day)
3324
3325 def __contains__(self, date):
3326 """Check if the date is in the range"""
3327 if not isinstance(date, datetime.date):
3328 date = date_from_str(date)
3329 return self.start <= date <= self.end
3330
3331 def __str__(self):
3332 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3333
3334
3335 def platform_name():
3336 """ Returns the platform name as a compat_str """
3337 res = platform.platform()
3338 if isinstance(res, bytes):
3339 res = res.decode(preferredencoding())
3340
3341 assert isinstance(res, compat_str)
3342 return res
3343
3344
3345 def get_windows_version():
3346 ''' Get Windows version. None if it's not running on Windows '''
3347 if compat_os_name == 'nt':
3348 return version_tuple(platform.win32_ver()[1])
3349 else:
3350 return None
3351
3352
3353 def _windows_write_string(s, out):
3354 """ Returns True if the string was written using special methods,
3355 False if it has yet to be written out."""
3356 # Adapted from http://stackoverflow.com/a/3259271/35070
3357
3358 import ctypes
3359 import ctypes.wintypes
3360
3361 WIN_OUTPUT_IDS = {
3362 1: -11,
3363 2: -12,
3364 }
3365
3366 try:
3367 fileno = out.fileno()
3368 except AttributeError:
3369 # If the output stream doesn't have a fileno, it's virtual
3370 return False
3371 except io.UnsupportedOperation:
3372 # Some strange Windows pseudo files?
3373 return False
3374 if fileno not in WIN_OUTPUT_IDS:
3375 return False
3376
3377 GetStdHandle = compat_ctypes_WINFUNCTYPE(
3378 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3379 ('GetStdHandle', ctypes.windll.kernel32))
3380 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3381
3382 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3383 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3384 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3385 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3386 written = ctypes.wintypes.DWORD(0)
3387
3388 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3389 FILE_TYPE_CHAR = 0x0002
3390 FILE_TYPE_REMOTE = 0x8000
3391 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3392 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3393 ctypes.POINTER(ctypes.wintypes.DWORD))(
3394 ('GetConsoleMode', ctypes.windll.kernel32))
3395 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3396
3397 def not_a_console(handle):
3398 if handle == INVALID_HANDLE_VALUE or handle is None:
3399 return True
3400 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3401 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3402
3403 if not_a_console(h):
3404 return False
3405
3406 def next_nonbmp_pos(s):
3407 try:
3408 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3409 except StopIteration:
3410 return len(s)
3411
3412 while s:
3413 count = min(next_nonbmp_pos(s), 1024)
3414
3415 ret = WriteConsoleW(
3416 h, s, count if count else 2, ctypes.byref(written), None)
3417 if ret == 0:
3418 raise OSError('Failed to write string')
3419 if not count: # We just wrote a non-BMP character
3420 assert written.value == 2
3421 s = s[1:]
3422 else:
3423 assert written.value > 0
3424 s = s[written.value:]
3425 return True
3426
3427
3428 def write_string(s, out=None, encoding=None):
3429 if out is None:
3430 out = sys.stderr
3431 assert type(s) == compat_str
3432
3433 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3434 if _windows_write_string(s, out):
3435 return
3436
3437 if ('b' in getattr(out, 'mode', '')
3438 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
3439 byt = s.encode(encoding or preferredencoding(), 'ignore')
3440 out.write(byt)
3441 elif hasattr(out, 'buffer'):
3442 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3443 byt = s.encode(enc, 'ignore')
3444 out.buffer.write(byt)
3445 else:
3446 out.write(s)
3447 out.flush()
3448
3449
3450 def bytes_to_intlist(bs):
3451 if not bs:
3452 return []
3453 if isinstance(bs[0], int): # Python 3
3454 return list(bs)
3455 else:
3456 return [ord(c) for c in bs]
3457
3458
3459 def intlist_to_bytes(xs):
3460 if not xs:
3461 return b''
3462 return compat_struct_pack('%dB' % len(xs), *xs)
3463
3464
3465 # Cross-platform file locking
3466 if sys.platform == 'win32':
3467 import ctypes.wintypes
3468 import msvcrt
3469
3470 class OVERLAPPED(ctypes.Structure):
3471 _fields_ = [
3472 ('Internal', ctypes.wintypes.LPVOID),
3473 ('InternalHigh', ctypes.wintypes.LPVOID),
3474 ('Offset', ctypes.wintypes.DWORD),
3475 ('OffsetHigh', ctypes.wintypes.DWORD),
3476 ('hEvent', ctypes.wintypes.HANDLE),
3477 ]
3478
3479 kernel32 = ctypes.windll.kernel32
3480 LockFileEx = kernel32.LockFileEx
3481 LockFileEx.argtypes = [
3482 ctypes.wintypes.HANDLE, # hFile
3483 ctypes.wintypes.DWORD, # dwFlags
3484 ctypes.wintypes.DWORD, # dwReserved
3485 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3486 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3487 ctypes.POINTER(OVERLAPPED) # Overlapped
3488 ]
3489 LockFileEx.restype = ctypes.wintypes.BOOL
3490 UnlockFileEx = kernel32.UnlockFileEx
3491 UnlockFileEx.argtypes = [
3492 ctypes.wintypes.HANDLE, # hFile
3493 ctypes.wintypes.DWORD, # dwReserved
3494 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3495 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3496 ctypes.POINTER(OVERLAPPED) # Overlapped
3497 ]
3498 UnlockFileEx.restype = ctypes.wintypes.BOOL
3499 whole_low = 0xffffffff
3500 whole_high = 0x7fffffff
3501
3502 def _lock_file(f, exclusive):
3503 overlapped = OVERLAPPED()
3504 overlapped.Offset = 0
3505 overlapped.OffsetHigh = 0
3506 overlapped.hEvent = 0
3507 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3508 handle = msvcrt.get_osfhandle(f.fileno())
3509 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3510 whole_low, whole_high, f._lock_file_overlapped_p):
3511 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3512
3513 def _unlock_file(f):
3514 assert f._lock_file_overlapped_p
3515 handle = msvcrt.get_osfhandle(f.fileno())
3516 if not UnlockFileEx(handle, 0,
3517 whole_low, whole_high, f._lock_file_overlapped_p):
3518 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3519
3520 else:
3521 # Some platforms, such as Jython, is missing fcntl
3522 try:
3523 import fcntl
3524
3525 def _lock_file(f, exclusive):
3526 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3527
3528 def _unlock_file(f):
3529 fcntl.flock(f, fcntl.LOCK_UN)
3530 except ImportError:
3531 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3532
3533 def _lock_file(f, exclusive):
3534 raise IOError(UNSUPPORTED_MSG)
3535
3536 def _unlock_file(f):
3537 raise IOError(UNSUPPORTED_MSG)
3538
3539
3540 class locked_file(object):
3541 def __init__(self, filename, mode, encoding=None):
3542 assert mode in ['r', 'a', 'w']
3543 self.f = io.open(filename, mode, encoding=encoding)
3544 self.mode = mode
3545
3546 def __enter__(self):
3547 exclusive = self.mode != 'r'
3548 try:
3549 _lock_file(self.f, exclusive)
3550 except IOError:
3551 self.f.close()
3552 raise
3553 return self
3554
3555 def __exit__(self, etype, value, traceback):
3556 try:
3557 _unlock_file(self.f)
3558 finally:
3559 self.f.close()
3560
3561 def __iter__(self):
3562 return iter(self.f)
3563
3564 def write(self, *args):
3565 return self.f.write(*args)
3566
3567 def read(self, *args):
3568 return self.f.read(*args)
3569
3570
3571 def get_filesystem_encoding():
3572 encoding = sys.getfilesystemencoding()
3573 return encoding if encoding is not None else 'utf-8'
3574
3575
3576 def shell_quote(args):
3577 quoted_args = []
3578 encoding = get_filesystem_encoding()
3579 for a in args:
3580 if isinstance(a, bytes):
3581 # We may get a filename encoded with 'encodeFilename'
3582 a = a.decode(encoding)
3583 quoted_args.append(compat_shlex_quote(a))
3584 return ' '.join(quoted_args)
3585
3586
3587 def smuggle_url(url, data):
3588 """ Pass additional data in a URL for internal use. """
3589
3590 url, idata = unsmuggle_url(url, {})
3591 data.update(idata)
3592 sdata = compat_urllib_parse_urlencode(
3593 {'__youtubedl_smuggle': json.dumps(data)})
3594 return url + '#' + sdata
3595
3596
3597 def unsmuggle_url(smug_url, default=None):
3598 if '#__youtubedl_smuggle' not in smug_url:
3599 return smug_url, default
3600 url, _, sdata = smug_url.rpartition('#')
3601 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3602 data = json.loads(jsond)
3603 return url, data
3604
3605
3606 def format_bytes(bytes):
3607 if bytes is None:
3608 return 'N/A'
3609 if type(bytes) is str:
3610 bytes = float(bytes)
3611 if bytes == 0.0:
3612 exponent = 0
3613 else:
3614 exponent = int(math.log(bytes, 1024.0))
3615 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3616 converted = float(bytes) / float(1024 ** exponent)
3617 return '%.2f%s' % (converted, suffix)
3618
3619
3620 def lookup_unit_table(unit_table, s):
3621 units_re = '|'.join(re.escape(u) for u in unit_table)
3622 m = re.match(
3623 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3624 if not m:
3625 return None
3626 num_str = m.group('num').replace(',', '.')
3627 mult = unit_table[m.group('unit')]
3628 return int(float(num_str) * mult)
3629
3630
3631 def parse_filesize(s):
3632 if s is None:
3633 return None
3634
3635 # The lower-case forms are of course incorrect and unofficial,
3636 # but we support those too
3637 _UNIT_TABLE = {
3638 'B': 1,
3639 'b': 1,
3640 'bytes': 1,
3641 'KiB': 1024,
3642 'KB': 1000,
3643 'kB': 1024,
3644 'Kb': 1000,
3645 'kb': 1000,
3646 'kilobytes': 1000,
3647 'kibibytes': 1024,
3648 'MiB': 1024 ** 2,
3649 'MB': 1000 ** 2,
3650 'mB': 1024 ** 2,
3651 'Mb': 1000 ** 2,
3652 'mb': 1000 ** 2,
3653 'megabytes': 1000 ** 2,
3654 'mebibytes': 1024 ** 2,
3655 'GiB': 1024 ** 3,
3656 'GB': 1000 ** 3,
3657 'gB': 1024 ** 3,
3658 'Gb': 1000 ** 3,
3659 'gb': 1000 ** 3,
3660 'gigabytes': 1000 ** 3,
3661 'gibibytes': 1024 ** 3,
3662 'TiB': 1024 ** 4,
3663 'TB': 1000 ** 4,
3664 'tB': 1024 ** 4,
3665 'Tb': 1000 ** 4,
3666 'tb': 1000 ** 4,
3667 'terabytes': 1000 ** 4,
3668 'tebibytes': 1024 ** 4,
3669 'PiB': 1024 ** 5,
3670 'PB': 1000 ** 5,
3671 'pB': 1024 ** 5,
3672 'Pb': 1000 ** 5,
3673 'pb': 1000 ** 5,
3674 'petabytes': 1000 ** 5,
3675 'pebibytes': 1024 ** 5,
3676 'EiB': 1024 ** 6,
3677 'EB': 1000 ** 6,
3678 'eB': 1024 ** 6,
3679 'Eb': 1000 ** 6,
3680 'eb': 1000 ** 6,
3681 'exabytes': 1000 ** 6,
3682 'exbibytes': 1024 ** 6,
3683 'ZiB': 1024 ** 7,
3684 'ZB': 1000 ** 7,
3685 'zB': 1024 ** 7,
3686 'Zb': 1000 ** 7,
3687 'zb': 1000 ** 7,
3688 'zettabytes': 1000 ** 7,
3689 'zebibytes': 1024 ** 7,
3690 'YiB': 1024 ** 8,
3691 'YB': 1000 ** 8,
3692 'yB': 1024 ** 8,
3693 'Yb': 1000 ** 8,
3694 'yb': 1000 ** 8,
3695 'yottabytes': 1000 ** 8,
3696 'yobibytes': 1024 ** 8,
3697 }
3698
3699 return lookup_unit_table(_UNIT_TABLE, s)
3700
3701
3702 def parse_count(s):
3703 if s is None:
3704 return None
3705
3706 s = s.strip()
3707
3708 if re.match(r'^[\d,.]+$', s):
3709 return str_to_int(s)
3710
3711 _UNIT_TABLE = {
3712 'k': 1000,
3713 'K': 1000,
3714 'm': 1000 ** 2,
3715 'M': 1000 ** 2,
3716 'kk': 1000 ** 2,
3717 'KK': 1000 ** 2,
3718 }
3719
3720 return lookup_unit_table(_UNIT_TABLE, s)
3721
3722
3723 def parse_resolution(s):
3724 if s is None:
3725 return {}
3726
3727 mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
3728 if mobj:
3729 return {
3730 'width': int(mobj.group('w')),
3731 'height': int(mobj.group('h')),
3732 }
3733
3734 mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
3735 if mobj:
3736 return {'height': int(mobj.group(1))}
3737
3738 mobj = re.search(r'\b([48])[kK]\b', s)
3739 if mobj:
3740 return {'height': int(mobj.group(1)) * 540}
3741
3742 return {}
3743
3744
3745 def parse_bitrate(s):
3746 if not isinstance(s, compat_str):
3747 return
3748 mobj = re.search(r'\b(\d+)\s*kbps', s)
3749 if mobj:
3750 return int(mobj.group(1))
3751
3752
3753 def month_by_name(name, lang='en'):
3754 """ Return the number of a month by (locale-independently) English name """
3755
3756 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3757
3758 try:
3759 return month_names.index(name) + 1
3760 except ValueError:
3761 return None
3762
3763
3764 def month_by_abbreviation(abbrev):
3765 """ Return the number of a month by (locale-independently) English
3766 abbreviations """
3767
3768 try:
3769 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3770 except ValueError:
3771 return None
3772
3773
3774 def fix_xml_ampersands(xml_str):
3775 """Replace all the '&' by '&amp;' in XML"""
3776 return re.sub(
3777 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3778 '&amp;',
3779 xml_str)
3780
3781
3782 def setproctitle(title):
3783 assert isinstance(title, compat_str)
3784
3785 # ctypes in Jython is not complete
3786 # http://bugs.jython.org/issue2148
3787 if sys.platform.startswith('java'):
3788 return
3789
3790 try:
3791 libc = ctypes.cdll.LoadLibrary('libc.so.6')
3792 except OSError:
3793 return
3794 except TypeError:
3795 # LoadLibrary in Windows Python 2.7.13 only expects
3796 # a bytestring, but since unicode_literals turns
3797 # every string into a unicode string, it fails.
3798 return
3799 title_bytes = title.encode('utf-8')
3800 buf = ctypes.create_string_buffer(len(title_bytes))
3801 buf.value = title_bytes
3802 try:
3803 libc.prctl(15, buf, 0, 0, 0)
3804 except AttributeError:
3805 return # Strange libc, just skip this
3806
3807
3808 def remove_start(s, start):
3809 return s[len(start):] if s is not None and s.startswith(start) else s
3810
3811
3812 def remove_end(s, end):
3813 return s[:-len(end)] if s is not None and s.endswith(end) else s
3814
3815
3816 def remove_quotes(s):
3817 if s is None or len(s) < 2:
3818 return s
3819 for quote in ('"', "'", ):
3820 if s[0] == quote and s[-1] == quote:
3821 return s[1:-1]
3822 return s
3823
3824
3825 def get_domain(url):
3826 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3827 return domain.group('domain') if domain else None
3828
3829
3830 def url_basename(url):
3831 path = compat_urlparse.urlparse(url).path
3832 return path.strip('/').split('/')[-1]
3833
3834
3835 def base_url(url):
3836 return re.match(r'https?://[^?#&]+/', url).group()
3837
3838
3839 def urljoin(base, path):
3840 if isinstance(path, bytes):
3841 path = path.decode('utf-8')
3842 if not isinstance(path, compat_str) or not path:
3843 return None
3844 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3845 return path
3846 if isinstance(base, bytes):
3847 base = base.decode('utf-8')
3848 if not isinstance(base, compat_str) or not re.match(
3849 r'^(?:https?:)?//', base):
3850 return None
3851 return compat_urlparse.urljoin(base, path)
3852
3853
3854 class HEADRequest(compat_urllib_request.Request):
3855 def get_method(self):
3856 return 'HEAD'
3857
3858
3859 class PUTRequest(compat_urllib_request.Request):
3860 def get_method(self):
3861 return 'PUT'
3862
3863
3864 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3865 if get_attr:
3866 if v is not None:
3867 v = getattr(v, get_attr, None)
3868 if v == '':
3869 v = None
3870 if v is None:
3871 return default
3872 try:
3873 return int(v) * invscale // scale
3874 except (ValueError, TypeError, OverflowError):
3875 return default
3876
3877
3878 def str_or_none(v, default=None):
3879 return default if v is None else compat_str(v)
3880
3881
3882 def str_to_int(int_str):
3883 """ A more relaxed version of int_or_none """
3884 if isinstance(int_str, compat_integer_types):
3885 return int_str
3886 elif isinstance(int_str, compat_str):
3887 int_str = re.sub(r'[,\.\+]', '', int_str)
3888 return int_or_none(int_str)
3889
3890
3891 def float_or_none(v, scale=1, invscale=1, default=None):
3892 if v is None:
3893 return default
3894 try:
3895 return float(v) * invscale / scale
3896 except (ValueError, TypeError):
3897 return default
3898
3899
3900 def bool_or_none(v, default=None):
3901 return v if isinstance(v, bool) else default
3902
3903
3904 def strip_or_none(v, default=None):
3905 return v.strip() if isinstance(v, compat_str) else default
3906
3907
3908 def url_or_none(url):
3909 if not url or not isinstance(url, compat_str):
3910 return None
3911 url = url.strip()
3912 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3913
3914
3915 def strftime_or_none(timestamp, date_format, default=None):
3916 datetime_object = None
3917 try:
3918 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3919 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3920 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3921 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3922 return datetime_object.strftime(date_format)
3923 except (ValueError, TypeError, AttributeError):
3924 return default
3925
3926
3927 def parse_duration(s):
3928 if not isinstance(s, compat_basestring):
3929 return None
3930
3931 s = s.strip()
3932
3933 days, hours, mins, secs, ms = [None] * 5
3934 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3935 if m:
3936 days, hours, mins, secs, ms = m.groups()
3937 else:
3938 m = re.match(
3939 r'''(?ix)(?:P?
3940 (?:
3941 [0-9]+\s*y(?:ears?)?\s*
3942 )?
3943 (?:
3944 [0-9]+\s*m(?:onths?)?\s*
3945 )?
3946 (?:
3947 [0-9]+\s*w(?:eeks?)?\s*
3948 )?
3949 (?:
3950 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3951 )?
3952 T)?
3953 (?:
3954 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3955 )?
3956 (?:
3957 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3958 )?
3959 (?:
3960 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3961 )?Z?$''', s)
3962 if m:
3963 days, hours, mins, secs, ms = m.groups()
3964 else:
3965 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3966 if m:
3967 hours, mins = m.groups()
3968 else:
3969 return None
3970
3971 duration = 0
3972 if secs:
3973 duration += float(secs)
3974 if mins:
3975 duration += float(mins) * 60
3976 if hours:
3977 duration += float(hours) * 60 * 60
3978 if days:
3979 duration += float(days) * 24 * 60 * 60
3980 if ms:
3981 duration += float(ms)
3982 return duration
3983
3984
3985 def prepend_extension(filename, ext, expected_real_ext=None):
3986 name, real_ext = os.path.splitext(filename)
3987 return (
3988 '{0}.{1}{2}'.format(name, ext, real_ext)
3989 if not expected_real_ext or real_ext[1:] == expected_real_ext
3990 else '{0}.{1}'.format(filename, ext))
3991
3992
3993 def replace_extension(filename, ext, expected_real_ext=None):
3994 name, real_ext = os.path.splitext(filename)
3995 return '{0}.{1}'.format(
3996 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3997 ext)
3998
3999
4000 def check_executable(exe, args=[]):
4001 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
4002 args can be a list of arguments for a short output (like -version) """
4003 try:
4004 Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate_or_kill()
4005 except OSError:
4006 return False
4007 return exe
4008
4009
4010 def get_exe_version(exe, args=['--version'],
4011 version_re=None, unrecognized='present'):
4012 """ Returns the version of the specified executable,
4013 or False if the executable is not present """
4014 try:
4015 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
4016 # SIGTTOU if yt-dlp is run in the background.
4017 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
4018 out, _ = Popen(
4019 [encodeArgument(exe)] + args, stdin=subprocess.PIPE,
4020 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate_or_kill()
4021 except OSError:
4022 return False
4023 if isinstance(out, bytes): # Python 2.x
4024 out = out.decode('ascii', 'ignore')
4025 return detect_exe_version(out, version_re, unrecognized)
4026
4027
4028 def detect_exe_version(output, version_re=None, unrecognized='present'):
4029 assert isinstance(output, compat_str)
4030 if version_re is None:
4031 version_re = r'version\s+([-0-9._a-zA-Z]+)'
4032 m = re.search(version_re, output)
4033 if m:
4034 return m.group(1)
4035 else:
4036 return unrecognized
4037
4038
4039 class LazyList(collections.abc.Sequence):
4040 ''' Lazy immutable list from an iterable
4041 Note that slices of a LazyList are lists and not LazyList'''
4042
4043 class IndexError(IndexError):
4044 pass
4045
4046 def __init__(self, iterable):
4047 self.__iterable = iter(iterable)
4048 self.__cache = []
4049 self.__reversed = False
4050
4051 def __iter__(self):
4052 if self.__reversed:
4053 # We need to consume the entire iterable to iterate in reverse
4054 yield from self.exhaust()
4055 return
4056 yield from self.__cache
4057 for item in self.__iterable:
4058 self.__cache.append(item)
4059 yield item
4060
4061 def __exhaust(self):
4062 self.__cache.extend(self.__iterable)
4063 # Discard the emptied iterable to make it pickle-able
4064 self.__iterable = []
4065 return self.__cache
4066
4067 def exhaust(self):
4068 ''' Evaluate the entire iterable '''
4069 return self.__exhaust()[::-1 if self.__reversed else 1]
4070
4071 @staticmethod
4072 def __reverse_index(x):
4073 return None if x is None else -(x + 1)
4074
4075 def __getitem__(self, idx):
4076 if isinstance(idx, slice):
4077 if self.__reversed:
4078 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
4079 start, stop, step = idx.start, idx.stop, idx.step or 1
4080 elif isinstance(idx, int):
4081 if self.__reversed:
4082 idx = self.__reverse_index(idx)
4083 start, stop, step = idx, idx, 0
4084 else:
4085 raise TypeError('indices must be integers or slices')
4086 if ((start or 0) < 0 or (stop or 0) < 0
4087 or (start is None and step < 0)
4088 or (stop is None and step > 0)):
4089 # We need to consume the entire iterable to be able to slice from the end
4090 # Obviously, never use this with infinite iterables
4091 self.__exhaust()
4092 try:
4093 return self.__cache[idx]
4094 except IndexError as e:
4095 raise self.IndexError(e) from e
4096 n = max(start or 0, stop or 0) - len(self.__cache) + 1
4097 if n > 0:
4098 self.__cache.extend(itertools.islice(self.__iterable, n))
4099 try:
4100 return self.__cache[idx]
4101 except IndexError as e:
4102 raise self.IndexError(e) from e
4103
4104 def __bool__(self):
4105 try:
4106 self[-1] if self.__reversed else self[0]
4107 except self.IndexError:
4108 return False
4109 return True
4110
4111 def __len__(self):
4112 self.__exhaust()
4113 return len(self.__cache)
4114
4115 def reverse(self):
4116 self.__reversed = not self.__reversed
4117 return self
4118
4119 def __repr__(self):
4120 # repr and str should mimic a list. So we exhaust the iterable
4121 return repr(self.exhaust())
4122
4123 def __str__(self):
4124 return repr(self.exhaust())
4125
4126
4127 class PagedList:
4128 def __len__(self):
4129 # This is only useful for tests
4130 return len(self.getslice())
4131
4132 def __init__(self, pagefunc, pagesize, use_cache=True):
4133 self._pagefunc = pagefunc
4134 self._pagesize = pagesize
4135 self._use_cache = use_cache
4136 self._cache = {}
4137
4138 def getpage(self, pagenum):
4139 page_results = self._cache.get(pagenum) or list(self._pagefunc(pagenum))
4140 if self._use_cache:
4141 self._cache[pagenum] = page_results
4142 return page_results
4143
4144 def getslice(self, start=0, end=None):
4145 return list(self._getslice(start, end))
4146
4147 def _getslice(self, start, end):
4148 raise NotImplementedError('This method must be implemented by subclasses')
4149
4150 def __getitem__(self, idx):
4151 # NOTE: cache must be enabled if this is used
4152 if not isinstance(idx, int) or idx < 0:
4153 raise TypeError('indices must be non-negative integers')
4154 entries = self.getslice(idx, idx + 1)
4155 return entries[0] if entries else None
4156
4157
4158 class OnDemandPagedList(PagedList):
4159 def _getslice(self, start, end):
4160 for pagenum in itertools.count(start // self._pagesize):
4161 firstid = pagenum * self._pagesize
4162 nextfirstid = pagenum * self._pagesize + self._pagesize
4163 if start >= nextfirstid:
4164 continue
4165
4166 startv = (
4167 start % self._pagesize
4168 if firstid <= start < nextfirstid
4169 else 0)
4170 endv = (
4171 ((end - 1) % self._pagesize) + 1
4172 if (end is not None and firstid <= end <= nextfirstid)
4173 else None)
4174
4175 page_results = self.getpage(pagenum)
4176 if startv != 0 or endv is not None:
4177 page_results = page_results[startv:endv]
4178 yield from page_results
4179
4180 # A little optimization - if current page is not "full", ie. does
4181 # not contain page_size videos then we can assume that this page
4182 # is the last one - there are no more ids on further pages -
4183 # i.e. no need to query again.
4184 if len(page_results) + startv < self._pagesize:
4185 break
4186
4187 # If we got the whole page, but the next page is not interesting,
4188 # break out early as well
4189 if end == nextfirstid:
4190 break
4191
4192
4193 class InAdvancePagedList(PagedList):
4194 def __init__(self, pagefunc, pagecount, pagesize):
4195 self._pagecount = pagecount
4196 PagedList.__init__(self, pagefunc, pagesize, True)
4197
4198 def _getslice(self, start, end):
4199 start_page = start // self._pagesize
4200 end_page = (
4201 self._pagecount if end is None else (end // self._pagesize + 1))
4202 skip_elems = start - start_page * self._pagesize
4203 only_more = None if end is None else end - start
4204 for pagenum in range(start_page, end_page):
4205 page_results = self.getpage(pagenum)
4206 if skip_elems:
4207 page_results = page_results[skip_elems:]
4208 skip_elems = None
4209 if only_more is not None:
4210 if len(page_results) < only_more:
4211 only_more -= len(page_results)
4212 else:
4213 yield from page_results[:only_more]
4214 break
4215 yield from page_results
4216
4217
4218 def uppercase_escape(s):
4219 unicode_escape = codecs.getdecoder('unicode_escape')
4220 return re.sub(
4221 r'\\U[0-9a-fA-F]{8}',
4222 lambda m: unicode_escape(m.group(0))[0],
4223 s)
4224
4225
4226 def lowercase_escape(s):
4227 unicode_escape = codecs.getdecoder('unicode_escape')
4228 return re.sub(
4229 r'\\u[0-9a-fA-F]{4}',
4230 lambda m: unicode_escape(m.group(0))[0],
4231 s)
4232
4233
4234 def escape_rfc3986(s):
4235 """Escape non-ASCII characters as suggested by RFC 3986"""
4236 if sys.version_info < (3, 0) and isinstance(s, compat_str):
4237 s = s.encode('utf-8')
4238 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
4239
4240
4241 def escape_url(url):
4242 """Escape URL as suggested by RFC 3986"""
4243 url_parsed = compat_urllib_parse_urlparse(url)
4244 return url_parsed._replace(
4245 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
4246 path=escape_rfc3986(url_parsed.path),
4247 params=escape_rfc3986(url_parsed.params),
4248 query=escape_rfc3986(url_parsed.query),
4249 fragment=escape_rfc3986(url_parsed.fragment)
4250 ).geturl()
4251
4252
4253 def parse_qs(url):
4254 return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4255
4256
4257 def read_batch_urls(batch_fd):
4258 def fixup(url):
4259 if not isinstance(url, compat_str):
4260 url = url.decode('utf-8', 'replace')
4261 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4262 for bom in BOM_UTF8:
4263 if url.startswith(bom):
4264 url = url[len(bom):]
4265 url = url.lstrip()
4266 if not url or url.startswith(('#', ';', ']')):
4267 return False
4268 # "#" cannot be stripped out since it is part of the URI
4269 # However, it can be safely stipped out if follwing a whitespace
4270 return re.split(r'\s#', url, 1)[0].rstrip()
4271
4272 with contextlib.closing(batch_fd) as fd:
4273 return [url for url in map(fixup, fd) if url]
4274
4275
4276 def urlencode_postdata(*args, **kargs):
4277 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
4278
4279
4280 def update_url_query(url, query):
4281 if not query:
4282 return url
4283 parsed_url = compat_urlparse.urlparse(url)
4284 qs = compat_parse_qs(parsed_url.query)
4285 qs.update(query)
4286 return compat_urlparse.urlunparse(parsed_url._replace(
4287 query=compat_urllib_parse_urlencode(qs, True)))
4288
4289
4290 def update_Request(req, url=None, data=None, headers={}, query={}):
4291 req_headers = req.headers.copy()
4292 req_headers.update(headers)
4293 req_data = data or req.data
4294 req_url = update_url_query(url or req.get_full_url(), query)
4295 req_get_method = req.get_method()
4296 if req_get_method == 'HEAD':
4297 req_type = HEADRequest
4298 elif req_get_method == 'PUT':
4299 req_type = PUTRequest
4300 else:
4301 req_type = compat_urllib_request.Request
4302 new_req = req_type(
4303 req_url, data=req_data, headers=req_headers,
4304 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4305 if hasattr(req, 'timeout'):
4306 new_req.timeout = req.timeout
4307 return new_req
4308
4309
4310 def _multipart_encode_impl(data, boundary):
4311 content_type = 'multipart/form-data; boundary=%s' % boundary
4312
4313 out = b''
4314 for k, v in data.items():
4315 out += b'--' + boundary.encode('ascii') + b'\r\n'
4316 if isinstance(k, compat_str):
4317 k = k.encode('utf-8')
4318 if isinstance(v, compat_str):
4319 v = v.encode('utf-8')
4320 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4321 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4322 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
4323 if boundary.encode('ascii') in content:
4324 raise ValueError('Boundary overlaps with data')
4325 out += content
4326
4327 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4328
4329 return out, content_type
4330
4331
4332 def multipart_encode(data, boundary=None):
4333 '''
4334 Encode a dict to RFC 7578-compliant form-data
4335
4336 data:
4337 A dict where keys and values can be either Unicode or bytes-like
4338 objects.
4339 boundary:
4340 If specified a Unicode object, it's used as the boundary. Otherwise
4341 a random boundary is generated.
4342
4343 Reference: https://tools.ietf.org/html/rfc7578
4344 '''
4345 has_specified_boundary = boundary is not None
4346
4347 while True:
4348 if boundary is None:
4349 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4350
4351 try:
4352 out, content_type = _multipart_encode_impl(data, boundary)
4353 break
4354 except ValueError:
4355 if has_specified_boundary:
4356 raise
4357 boundary = None
4358
4359 return out, content_type
4360
4361
4362 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4363 if isinstance(key_or_keys, (list, tuple)):
4364 for key in key_or_keys:
4365 if key not in d or d[key] is None or skip_false_values and not d[key]:
4366 continue
4367 return d[key]
4368 return default
4369 return d.get(key_or_keys, default)
4370
4371
4372 def try_get(src, getter, expected_type=None):
4373 for get in variadic(getter):
4374 try:
4375 v = get(src)
4376 except (AttributeError, KeyError, TypeError, IndexError):
4377 pass
4378 else:
4379 if expected_type is None or isinstance(v, expected_type):
4380 return v
4381
4382
4383 def merge_dicts(*dicts):
4384 merged = {}
4385 for a_dict in dicts:
4386 for k, v in a_dict.items():
4387 if v is None:
4388 continue
4389 if (k not in merged
4390 or (isinstance(v, compat_str) and v
4391 and isinstance(merged[k], compat_str)
4392 and not merged[k])):
4393 merged[k] = v
4394 return merged
4395
4396
4397 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4398 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4399
4400
4401 US_RATINGS = {
4402 'G': 0,
4403 'PG': 10,
4404 'PG-13': 13,
4405 'R': 16,
4406 'NC': 18,
4407 }
4408
4409
4410 TV_PARENTAL_GUIDELINES = {
4411 'TV-Y': 0,
4412 'TV-Y7': 7,
4413 'TV-G': 0,
4414 'TV-PG': 0,
4415 'TV-14': 14,
4416 'TV-MA': 17,
4417 }
4418
4419
4420 def parse_age_limit(s):
4421 if type(s) == int:
4422 return s if 0 <= s <= 21 else None
4423 if not isinstance(s, compat_basestring):
4424 return None
4425 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4426 if m:
4427 return int(m.group('age'))
4428 s = s.upper()
4429 if s in US_RATINGS:
4430 return US_RATINGS[s]
4431 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4432 if m:
4433 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4434 return None
4435
4436
4437 def strip_jsonp(code):
4438 return re.sub(
4439 r'''(?sx)^
4440 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4441 (?:\s*&&\s*(?P=func_name))?
4442 \s*\(\s*(?P<callback_data>.*)\);?
4443 \s*?(?://[^\n]*)*$''',
4444 r'\g<callback_data>', code)
4445
4446
4447 def js_to_json(code, vars={}):
4448 # vars is a dict of var, val pairs to substitute
4449 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4450 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4451 INTEGER_TABLE = (
4452 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4453 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4454 )
4455
4456 def fix_kv(m):
4457 v = m.group(0)
4458 if v in ('true', 'false', 'null'):
4459 return v
4460 elif v in ('undefined', 'void 0'):
4461 return 'null'
4462 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4463 return ""
4464
4465 if v[0] in ("'", '"'):
4466 v = re.sub(r'(?s)\\.|"', lambda m: {
4467 '"': '\\"',
4468 "\\'": "'",
4469 '\\\n': '',
4470 '\\x': '\\u00',
4471 }.get(m.group(0), m.group(0)), v[1:-1])
4472 else:
4473 for regex, base in INTEGER_TABLE:
4474 im = re.match(regex, v)
4475 if im:
4476 i = int(im.group(1), base)
4477 return '"%d":' % i if v.endswith(':') else '%d' % i
4478
4479 if v in vars:
4480 return vars[v]
4481
4482 return '"%s"' % v
4483
4484 return re.sub(r'''(?sx)
4485 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4486 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4487 {comment}|,(?={skip}[\]}}])|
4488 void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
4489 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4490 [0-9]+(?={skip}:)|
4491 !+
4492 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4493
4494
4495 def qualities(quality_ids):
4496 """ Get a numeric quality value out of a list of possible values """
4497 def q(qid):
4498 try:
4499 return quality_ids.index(qid)
4500 except ValueError:
4501 return -1
4502 return q
4503
4504
4505 DEFAULT_OUTTMPL = {
4506 'default': '%(title)s [%(id)s].%(ext)s',
4507 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
4508 }
4509 OUTTMPL_TYPES = {
4510 'chapter': None,
4511 'subtitle': None,
4512 'thumbnail': None,
4513 'description': 'description',
4514 'annotation': 'annotations.xml',
4515 'infojson': 'info.json',
4516 'link': None,
4517 'pl_thumbnail': None,
4518 'pl_description': 'description',
4519 'pl_infojson': 'info.json',
4520 }
4521
4522 # As of [1] format syntax is:
4523 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4524 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
4525 STR_FORMAT_RE_TMPL = r'''(?x)
4526 (?<!%)(?P<prefix>(?:%%)*)
4527 %
4528 (?P<has_key>\((?P<key>{0})\))?
4529 (?P<format>
4530 (?P<conversion>[#0\-+ ]+)?
4531 (?P<min_width>\d+)?
4532 (?P<precision>\.\d+)?
4533 (?P<len_mod>[hlL])? # unused in python
4534 {1} # conversion type
4535 )
4536 '''
4537
4538
4539 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
4540
4541
4542 def limit_length(s, length):
4543 """ Add ellipses to overly long strings """
4544 if s is None:
4545 return None
4546 ELLIPSES = '...'
4547 if len(s) > length:
4548 return s[:length - len(ELLIPSES)] + ELLIPSES
4549 return s
4550
4551
4552 def version_tuple(v):
4553 return tuple(int(e) for e in re.split(r'[-.]', v))
4554
4555
4556 def is_outdated_version(version, limit, assume_new=True):
4557 if not version:
4558 return not assume_new
4559 try:
4560 return version_tuple(version) < version_tuple(limit)
4561 except ValueError:
4562 return not assume_new
4563
4564
4565 def ytdl_is_updateable():
4566 """ Returns if yt-dlp can be updated with -U """
4567
4568 from .update import is_non_updateable
4569
4570 return not is_non_updateable()
4571
4572
4573 def args_to_str(args):
4574 # Get a short string representation for a subprocess command
4575 return ' '.join(compat_shlex_quote(a) for a in args)
4576
4577
4578 def error_to_compat_str(err):
4579 err_str = str(err)
4580 # On python 2 error byte string must be decoded with proper
4581 # encoding rather than ascii
4582 if sys.version_info[0] < 3:
4583 err_str = err_str.decode(preferredencoding())
4584 return err_str
4585
4586
4587 def mimetype2ext(mt):
4588 if mt is None:
4589 return None
4590
4591 mt, _, params = mt.partition(';')
4592 mt = mt.strip()
4593
4594 FULL_MAP = {
4595 'audio/mp4': 'm4a',
4596 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4597 # it's the most popular one
4598 'audio/mpeg': 'mp3',
4599 'audio/x-wav': 'wav',
4600 'audio/wav': 'wav',
4601 'audio/wave': 'wav',
4602 }
4603
4604 ext = FULL_MAP.get(mt)
4605 if ext is not None:
4606 return ext
4607
4608 SUBTYPE_MAP = {
4609 '3gpp': '3gp',
4610 'smptett+xml': 'tt',
4611 'ttaf+xml': 'dfxp',
4612 'ttml+xml': 'ttml',
4613 'x-flv': 'flv',
4614 'x-mp4-fragmented': 'mp4',
4615 'x-ms-sami': 'sami',
4616 'x-ms-wmv': 'wmv',
4617 'mpegurl': 'm3u8',
4618 'x-mpegurl': 'm3u8',
4619 'vnd.apple.mpegurl': 'm3u8',
4620 'dash+xml': 'mpd',
4621 'f4m+xml': 'f4m',
4622 'hds+xml': 'f4m',
4623 'vnd.ms-sstr+xml': 'ism',
4624 'quicktime': 'mov',
4625 'mp2t': 'ts',
4626 'x-wav': 'wav',
4627 'filmstrip+json': 'fs',
4628 'svg+xml': 'svg',
4629 }
4630
4631 _, _, subtype = mt.rpartition('/')
4632 ext = SUBTYPE_MAP.get(subtype.lower())
4633 if ext is not None:
4634 return ext
4635
4636 SUFFIX_MAP = {
4637 'json': 'json',
4638 'xml': 'xml',
4639 'zip': 'zip',
4640 'gzip': 'gz',
4641 }
4642
4643 _, _, suffix = subtype.partition('+')
4644 ext = SUFFIX_MAP.get(suffix)
4645 if ext is not None:
4646 return ext
4647
4648 return subtype.replace('+', '.')
4649
4650
4651 def parse_codecs(codecs_str):
4652 # http://tools.ietf.org/html/rfc6381
4653 if not codecs_str:
4654 return {}
4655 split_codecs = list(filter(None, map(
4656 str.strip, codecs_str.strip().strip(',').split(','))))
4657 vcodec, acodec, hdr = None, None, None
4658 for full_codec in split_codecs:
4659 parts = full_codec.split('.')
4660 codec = parts[0].replace('0', '')
4661 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
4662 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
4663 if not vcodec:
4664 vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1') else full_codec
4665 if codec in ('dvh1', 'dvhe'):
4666 hdr = 'DV'
4667 elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
4668 hdr = 'HDR10'
4669 elif full_codec.replace('0', '').startswith('vp9.2'):
4670 hdr = 'HDR10'
4671 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4672 if not acodec:
4673 acodec = full_codec
4674 else:
4675 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4676 if not vcodec and not acodec:
4677 if len(split_codecs) == 2:
4678 return {
4679 'vcodec': split_codecs[0],
4680 'acodec': split_codecs[1],
4681 }
4682 else:
4683 return {
4684 'vcodec': vcodec or 'none',
4685 'acodec': acodec or 'none',
4686 'dynamic_range': hdr,
4687 }
4688 return {}
4689
4690
4691 def urlhandle_detect_ext(url_handle):
4692 getheader = url_handle.headers.get
4693
4694 cd = getheader('Content-Disposition')
4695 if cd:
4696 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4697 if m:
4698 e = determine_ext(m.group('filename'), default_ext=None)
4699 if e:
4700 return e
4701
4702 return mimetype2ext(getheader('Content-Type'))
4703
4704
4705 def encode_data_uri(data, mime_type):
4706 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4707
4708
4709 def age_restricted(content_limit, age_limit):
4710 """ Returns True iff the content should be blocked """
4711
4712 if age_limit is None: # No limit set
4713 return False
4714 if content_limit is None:
4715 return False # Content available for everyone
4716 return age_limit < content_limit
4717
4718
4719 def is_html(first_bytes):
4720 """ Detect whether a file contains HTML by examining its first bytes. """
4721
4722 BOMS = [
4723 (b'\xef\xbb\xbf', 'utf-8'),
4724 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4725 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4726 (b'\xff\xfe', 'utf-16-le'),
4727 (b'\xfe\xff', 'utf-16-be'),
4728 ]
4729 for bom, enc in BOMS:
4730 if first_bytes.startswith(bom):
4731 s = first_bytes[len(bom):].decode(enc, 'replace')
4732 break
4733 else:
4734 s = first_bytes.decode('utf-8', 'replace')
4735
4736 return re.match(r'^\s*<', s)
4737
4738
4739 def determine_protocol(info_dict):
4740 protocol = info_dict.get('protocol')
4741 if protocol is not None:
4742 return protocol
4743
4744 url = sanitize_url(info_dict['url'])
4745 if url.startswith('rtmp'):
4746 return 'rtmp'
4747 elif url.startswith('mms'):
4748 return 'mms'
4749 elif url.startswith('rtsp'):
4750 return 'rtsp'
4751
4752 ext = determine_ext(url)
4753 if ext == 'm3u8':
4754 return 'm3u8'
4755 elif ext == 'f4m':
4756 return 'f4m'
4757
4758 return compat_urllib_parse_urlparse(url).scheme
4759
4760
4761 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4762 """ Render a list of rows, each as a list of values """
4763 def width(string):
4764 return len(remove_terminal_sequences(string))
4765
4766 def get_max_lens(table):
4767 return [max(width(str(v)) for v in col) for col in zip(*table)]
4768
4769 def filter_using_list(row, filterArray):
4770 return [col for (take, col) in zip(filterArray, row) if take]
4771
4772 if hideEmpty:
4773 max_lens = get_max_lens(data)
4774 header_row = filter_using_list(header_row, max_lens)
4775 data = [filter_using_list(row, max_lens) for row in data]
4776
4777 table = [header_row] + data
4778 max_lens = get_max_lens(table)
4779 extraGap += 1
4780 if delim:
4781 table = [header_row] + [[delim * (ml + extraGap) for ml in max_lens]] + data
4782 max_lens[-1] = 0
4783 for row in table:
4784 for pos, text in enumerate(map(str, row)):
4785 row[pos] = text + (' ' * (max_lens[pos] - width(text) + extraGap))
4786 ret = '\n'.join(''.join(row) for row in table)
4787 return ret
4788
4789
4790 def _match_one(filter_part, dct, incomplete):
4791 # TODO: Generalize code with YoutubeDL._build_format_filter
4792 STRING_OPERATORS = {
4793 '*=': operator.contains,
4794 '^=': lambda attr, value: attr.startswith(value),
4795 '$=': lambda attr, value: attr.endswith(value),
4796 '~=': lambda attr, value: re.search(value, attr),
4797 }
4798 COMPARISON_OPERATORS = {
4799 **STRING_OPERATORS,
4800 '<=': operator.le, # "<=" must be defined above "<"
4801 '<': operator.lt,
4802 '>=': operator.ge,
4803 '>': operator.gt,
4804 '=': operator.eq,
4805 }
4806
4807 operator_rex = re.compile(r'''(?x)\s*
4808 (?P<key>[a-z_]+)
4809 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4810 (?:
4811 (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
4812 (?P<strval>.+?)
4813 )
4814 \s*$
4815 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4816 m = operator_rex.search(filter_part)
4817 if m:
4818 m = m.groupdict()
4819 unnegated_op = COMPARISON_OPERATORS[m['op']]
4820 if m['negation']:
4821 op = lambda attr, value: not unnegated_op(attr, value)
4822 else:
4823 op = unnegated_op
4824 comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
4825 if m['quote']:
4826 comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
4827 actual_value = dct.get(m['key'])
4828 numeric_comparison = None
4829 if isinstance(actual_value, compat_numeric_types):
4830 # If the original field is a string and matching comparisonvalue is
4831 # a number we should respect the origin of the original field
4832 # and process comparison value as a string (see
4833 # https://github.com/ytdl-org/youtube-dl/issues/11082)
4834 try:
4835 numeric_comparison = int(comparison_value)
4836 except ValueError:
4837 numeric_comparison = parse_filesize(comparison_value)
4838 if numeric_comparison is None:
4839 numeric_comparison = parse_filesize(f'{comparison_value}B')
4840 if numeric_comparison is None:
4841 numeric_comparison = parse_duration(comparison_value)
4842 if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
4843 raise ValueError('Operator %s only supports string values!' % m['op'])
4844 if actual_value is None:
4845 return incomplete or m['none_inclusive']
4846 return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
4847
4848 UNARY_OPERATORS = {
4849 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4850 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4851 }
4852 operator_rex = re.compile(r'''(?x)\s*
4853 (?P<op>%s)\s*(?P<key>[a-z_]+)
4854 \s*$
4855 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4856 m = operator_rex.search(filter_part)
4857 if m:
4858 op = UNARY_OPERATORS[m.group('op')]
4859 actual_value = dct.get(m.group('key'))
4860 if incomplete and actual_value is None:
4861 return True
4862 return op(actual_value)
4863
4864 raise ValueError('Invalid filter part %r' % filter_part)
4865
4866
4867 def match_str(filter_str, dct, incomplete=False):
4868 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
4869 When incomplete, all conditions passes on missing fields
4870 """
4871 return all(
4872 _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
4873 for filter_part in re.split(r'(?<!\\)&', filter_str))
4874
4875
4876 def match_filter_func(filter_str):
4877 def _match_func(info_dict, *args, **kwargs):
4878 if match_str(filter_str, info_dict, *args, **kwargs):
4879 return None
4880 else:
4881 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4882 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4883 return _match_func
4884
4885
4886 def parse_dfxp_time_expr(time_expr):
4887 if not time_expr:
4888 return
4889
4890 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4891 if mobj:
4892 return float(mobj.group('time_offset'))
4893
4894 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4895 if mobj:
4896 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4897
4898
4899 def srt_subtitles_timecode(seconds):
4900 return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
4901
4902
4903 def ass_subtitles_timecode(seconds):
4904 time = timetuple_from_msec(seconds * 1000)
4905 return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
4906
4907
4908 def dfxp2srt(dfxp_data):
4909 '''
4910 @param dfxp_data A bytes-like object containing DFXP data
4911 @returns A unicode object containing converted SRT data
4912 '''
4913 LEGACY_NAMESPACES = (
4914 (b'http://www.w3.org/ns/ttml', [
4915 b'http://www.w3.org/2004/11/ttaf1',
4916 b'http://www.w3.org/2006/04/ttaf1',
4917 b'http://www.w3.org/2006/10/ttaf1',
4918 ]),
4919 (b'http://www.w3.org/ns/ttml#styling', [
4920 b'http://www.w3.org/ns/ttml#style',
4921 ]),
4922 )
4923
4924 SUPPORTED_STYLING = [
4925 'color',
4926 'fontFamily',
4927 'fontSize',
4928 'fontStyle',
4929 'fontWeight',
4930 'textDecoration'
4931 ]
4932
4933 _x = functools.partial(xpath_with_ns, ns_map={
4934 'xml': 'http://www.w3.org/XML/1998/namespace',
4935 'ttml': 'http://www.w3.org/ns/ttml',
4936 'tts': 'http://www.w3.org/ns/ttml#styling',
4937 })
4938
4939 styles = {}
4940 default_style = {}
4941
4942 class TTMLPElementParser(object):
4943 _out = ''
4944 _unclosed_elements = []
4945 _applied_styles = []
4946
4947 def start(self, tag, attrib):
4948 if tag in (_x('ttml:br'), 'br'):
4949 self._out += '\n'
4950 else:
4951 unclosed_elements = []
4952 style = {}
4953 element_style_id = attrib.get('style')
4954 if default_style:
4955 style.update(default_style)
4956 if element_style_id:
4957 style.update(styles.get(element_style_id, {}))
4958 for prop in SUPPORTED_STYLING:
4959 prop_val = attrib.get(_x('tts:' + prop))
4960 if prop_val:
4961 style[prop] = prop_val
4962 if style:
4963 font = ''
4964 for k, v in sorted(style.items()):
4965 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4966 continue
4967 if k == 'color':
4968 font += ' color="%s"' % v
4969 elif k == 'fontSize':
4970 font += ' size="%s"' % v
4971 elif k == 'fontFamily':
4972 font += ' face="%s"' % v
4973 elif k == 'fontWeight' and v == 'bold':
4974 self._out += '<b>'
4975 unclosed_elements.append('b')
4976 elif k == 'fontStyle' and v == 'italic':
4977 self._out += '<i>'
4978 unclosed_elements.append('i')
4979 elif k == 'textDecoration' and v == 'underline':
4980 self._out += '<u>'
4981 unclosed_elements.append('u')
4982 if font:
4983 self._out += '<font' + font + '>'
4984 unclosed_elements.append('font')
4985 applied_style = {}
4986 if self._applied_styles:
4987 applied_style.update(self._applied_styles[-1])
4988 applied_style.update(style)
4989 self._applied_styles.append(applied_style)
4990 self._unclosed_elements.append(unclosed_elements)
4991
4992 def end(self, tag):
4993 if tag not in (_x('ttml:br'), 'br'):
4994 unclosed_elements = self._unclosed_elements.pop()
4995 for element in reversed(unclosed_elements):
4996 self._out += '</%s>' % element
4997 if unclosed_elements and self._applied_styles:
4998 self._applied_styles.pop()
4999
5000 def data(self, data):
5001 self._out += data
5002
5003 def close(self):
5004 return self._out.strip()
5005
5006 def parse_node(node):
5007 target = TTMLPElementParser()
5008 parser = xml.etree.ElementTree.XMLParser(target=target)
5009 parser.feed(xml.etree.ElementTree.tostring(node))
5010 return parser.close()
5011
5012 for k, v in LEGACY_NAMESPACES:
5013 for ns in v:
5014 dfxp_data = dfxp_data.replace(ns, k)
5015
5016 dfxp = compat_etree_fromstring(dfxp_data)
5017 out = []
5018 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
5019
5020 if not paras:
5021 raise ValueError('Invalid dfxp/TTML subtitle')
5022
5023 repeat = False
5024 while True:
5025 for style in dfxp.findall(_x('.//ttml:style')):
5026 style_id = style.get('id') or style.get(_x('xml:id'))
5027 if not style_id:
5028 continue
5029 parent_style_id = style.get('style')
5030 if parent_style_id:
5031 if parent_style_id not in styles:
5032 repeat = True
5033 continue
5034 styles[style_id] = styles[parent_style_id].copy()
5035 for prop in SUPPORTED_STYLING:
5036 prop_val = style.get(_x('tts:' + prop))
5037 if prop_val:
5038 styles.setdefault(style_id, {})[prop] = prop_val
5039 if repeat:
5040 repeat = False
5041 else:
5042 break
5043
5044 for p in ('body', 'div'):
5045 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
5046 if ele is None:
5047 continue
5048 style = styles.get(ele.get('style'))
5049 if not style:
5050 continue
5051 default_style.update(style)
5052
5053 for para, index in zip(paras, itertools.count(1)):
5054 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
5055 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
5056 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
5057 if begin_time is None:
5058 continue
5059 if not end_time:
5060 if not dur:
5061 continue
5062 end_time = begin_time + dur
5063 out.append('%d\n%s --> %s\n%s\n\n' % (
5064 index,
5065 srt_subtitles_timecode(begin_time),
5066 srt_subtitles_timecode(end_time),
5067 parse_node(para)))
5068
5069 return ''.join(out)
5070
5071
5072 def cli_option(params, command_option, param):
5073 param = params.get(param)
5074 if param:
5075 param = compat_str(param)
5076 return [command_option, param] if param is not None else []
5077
5078
5079 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
5080 param = params.get(param)
5081 if param is None:
5082 return []
5083 assert isinstance(param, bool)
5084 if separator:
5085 return [command_option + separator + (true_value if param else false_value)]
5086 return [command_option, true_value if param else false_value]
5087
5088
5089 def cli_valueless_option(params, command_option, param, expected_value=True):
5090 param = params.get(param)
5091 return [command_option] if param == expected_value else []
5092
5093
5094 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
5095 if isinstance(argdict, (list, tuple)): # for backward compatibility
5096 if use_compat:
5097 return argdict
5098 else:
5099 argdict = None
5100 if argdict is None:
5101 return default
5102 assert isinstance(argdict, dict)
5103
5104 assert isinstance(keys, (list, tuple))
5105 for key_list in keys:
5106 arg_list = list(filter(
5107 lambda x: x is not None,
5108 [argdict.get(key.lower()) for key in variadic(key_list)]))
5109 if arg_list:
5110 return [arg for args in arg_list for arg in args]
5111 return default
5112
5113
5114 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
5115 main_key, exe = main_key.lower(), exe.lower()
5116 root_key = exe if main_key == exe else f'{main_key}+{exe}'
5117 keys = [f'{root_key}{k}' for k in (keys or [''])]
5118 if root_key in keys:
5119 if main_key != exe:
5120 keys.append((main_key, exe))
5121 keys.append('default')
5122 else:
5123 use_compat = False
5124 return cli_configuration_args(argdict, keys, default, use_compat)
5125
5126
5127 class ISO639Utils(object):
5128 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
5129 _lang_map = {
5130 'aa': 'aar',
5131 'ab': 'abk',
5132 'ae': 'ave',
5133 'af': 'afr',
5134 'ak': 'aka',
5135 'am': 'amh',
5136 'an': 'arg',
5137 'ar': 'ara',
5138 'as': 'asm',
5139 'av': 'ava',
5140 'ay': 'aym',
5141 'az': 'aze',
5142 'ba': 'bak',
5143 'be': 'bel',
5144 'bg': 'bul',
5145 'bh': 'bih',
5146 'bi': 'bis',
5147 'bm': 'bam',
5148 'bn': 'ben',
5149 'bo': 'bod',
5150 'br': 'bre',
5151 'bs': 'bos',
5152 'ca': 'cat',
5153 'ce': 'che',
5154 'ch': 'cha',
5155 'co': 'cos',
5156 'cr': 'cre',
5157 'cs': 'ces',
5158 'cu': 'chu',
5159 'cv': 'chv',
5160 'cy': 'cym',
5161 'da': 'dan',
5162 'de': 'deu',
5163 'dv': 'div',
5164 'dz': 'dzo',
5165 'ee': 'ewe',
5166 'el': 'ell',
5167 'en': 'eng',
5168 'eo': 'epo',
5169 'es': 'spa',
5170 'et': 'est',
5171 'eu': 'eus',
5172 'fa': 'fas',
5173 'ff': 'ful',
5174 'fi': 'fin',
5175 'fj': 'fij',
5176 'fo': 'fao',
5177 'fr': 'fra',
5178 'fy': 'fry',
5179 'ga': 'gle',
5180 'gd': 'gla',
5181 'gl': 'glg',
5182 'gn': 'grn',
5183 'gu': 'guj',
5184 'gv': 'glv',
5185 'ha': 'hau',
5186 'he': 'heb',
5187 'iw': 'heb', # Replaced by he in 1989 revision
5188 'hi': 'hin',
5189 'ho': 'hmo',
5190 'hr': 'hrv',
5191 'ht': 'hat',
5192 'hu': 'hun',
5193 'hy': 'hye',
5194 'hz': 'her',
5195 'ia': 'ina',
5196 'id': 'ind',
5197 'in': 'ind', # Replaced by id in 1989 revision
5198 'ie': 'ile',
5199 'ig': 'ibo',
5200 'ii': 'iii',
5201 'ik': 'ipk',
5202 'io': 'ido',
5203 'is': 'isl',
5204 'it': 'ita',
5205 'iu': 'iku',
5206 'ja': 'jpn',
5207 'jv': 'jav',
5208 'ka': 'kat',
5209 'kg': 'kon',
5210 'ki': 'kik',
5211 'kj': 'kua',
5212 'kk': 'kaz',
5213 'kl': 'kal',
5214 'km': 'khm',
5215 'kn': 'kan',
5216 'ko': 'kor',
5217 'kr': 'kau',
5218 'ks': 'kas',
5219 'ku': 'kur',
5220 'kv': 'kom',
5221 'kw': 'cor',
5222 'ky': 'kir',
5223 'la': 'lat',
5224 'lb': 'ltz',
5225 'lg': 'lug',
5226 'li': 'lim',
5227 'ln': 'lin',
5228 'lo': 'lao',
5229 'lt': 'lit',
5230 'lu': 'lub',
5231 'lv': 'lav',
5232 'mg': 'mlg',
5233 'mh': 'mah',
5234 'mi': 'mri',
5235 'mk': 'mkd',
5236 'ml': 'mal',
5237 'mn': 'mon',
5238 'mr': 'mar',
5239 'ms': 'msa',
5240 'mt': 'mlt',
5241 'my': 'mya',
5242 'na': 'nau',
5243 'nb': 'nob',
5244 'nd': 'nde',
5245 'ne': 'nep',
5246 'ng': 'ndo',
5247 'nl': 'nld',
5248 'nn': 'nno',
5249 'no': 'nor',
5250 'nr': 'nbl',
5251 'nv': 'nav',
5252 'ny': 'nya',
5253 'oc': 'oci',
5254 'oj': 'oji',
5255 'om': 'orm',
5256 'or': 'ori',
5257 'os': 'oss',
5258 'pa': 'pan',
5259 'pi': 'pli',
5260 'pl': 'pol',
5261 'ps': 'pus',
5262 'pt': 'por',
5263 'qu': 'que',
5264 'rm': 'roh',
5265 'rn': 'run',
5266 'ro': 'ron',
5267 'ru': 'rus',
5268 'rw': 'kin',
5269 'sa': 'san',
5270 'sc': 'srd',
5271 'sd': 'snd',
5272 'se': 'sme',
5273 'sg': 'sag',
5274 'si': 'sin',
5275 'sk': 'slk',
5276 'sl': 'slv',
5277 'sm': 'smo',
5278 'sn': 'sna',
5279 'so': 'som',
5280 'sq': 'sqi',
5281 'sr': 'srp',
5282 'ss': 'ssw',
5283 'st': 'sot',
5284 'su': 'sun',
5285 'sv': 'swe',
5286 'sw': 'swa',
5287 'ta': 'tam',
5288 'te': 'tel',
5289 'tg': 'tgk',
5290 'th': 'tha',
5291 'ti': 'tir',
5292 'tk': 'tuk',
5293 'tl': 'tgl',
5294 'tn': 'tsn',
5295 'to': 'ton',
5296 'tr': 'tur',
5297 'ts': 'tso',
5298 'tt': 'tat',
5299 'tw': 'twi',
5300 'ty': 'tah',
5301 'ug': 'uig',
5302 'uk': 'ukr',
5303 'ur': 'urd',
5304 'uz': 'uzb',
5305 've': 'ven',
5306 'vi': 'vie',
5307 'vo': 'vol',
5308 'wa': 'wln',
5309 'wo': 'wol',
5310 'xh': 'xho',
5311 'yi': 'yid',
5312 'ji': 'yid', # Replaced by yi in 1989 revision
5313 'yo': 'yor',
5314 'za': 'zha',
5315 'zh': 'zho',
5316 'zu': 'zul',
5317 }
5318
5319 @classmethod
5320 def short2long(cls, code):
5321 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5322 return cls._lang_map.get(code[:2])
5323
5324 @classmethod
5325 def long2short(cls, code):
5326 """Convert language code from ISO 639-2/T to ISO 639-1"""
5327 for short_name, long_name in cls._lang_map.items():
5328 if long_name == code:
5329 return short_name
5330
5331
5332 class ISO3166Utils(object):
5333 # From http://data.okfn.org/data/core/country-list
5334 _country_map = {
5335 'AF': 'Afghanistan',
5336 'AX': 'Åland Islands',
5337 'AL': 'Albania',
5338 'DZ': 'Algeria',
5339 'AS': 'American Samoa',
5340 'AD': 'Andorra',
5341 'AO': 'Angola',
5342 'AI': 'Anguilla',
5343 'AQ': 'Antarctica',
5344 'AG': 'Antigua and Barbuda',
5345 'AR': 'Argentina',
5346 'AM': 'Armenia',
5347 'AW': 'Aruba',
5348 'AU': 'Australia',
5349 'AT': 'Austria',
5350 'AZ': 'Azerbaijan',
5351 'BS': 'Bahamas',
5352 'BH': 'Bahrain',
5353 'BD': 'Bangladesh',
5354 'BB': 'Barbados',
5355 'BY': 'Belarus',
5356 'BE': 'Belgium',
5357 'BZ': 'Belize',
5358 'BJ': 'Benin',
5359 'BM': 'Bermuda',
5360 'BT': 'Bhutan',
5361 'BO': 'Bolivia, Plurinational State of',
5362 'BQ': 'Bonaire, Sint Eustatius and Saba',
5363 'BA': 'Bosnia and Herzegovina',
5364 'BW': 'Botswana',
5365 'BV': 'Bouvet Island',
5366 'BR': 'Brazil',
5367 'IO': 'British Indian Ocean Territory',
5368 'BN': 'Brunei Darussalam',
5369 'BG': 'Bulgaria',
5370 'BF': 'Burkina Faso',
5371 'BI': 'Burundi',
5372 'KH': 'Cambodia',
5373 'CM': 'Cameroon',
5374 'CA': 'Canada',
5375 'CV': 'Cape Verde',
5376 'KY': 'Cayman Islands',
5377 'CF': 'Central African Republic',
5378 'TD': 'Chad',
5379 'CL': 'Chile',
5380 'CN': 'China',
5381 'CX': 'Christmas Island',
5382 'CC': 'Cocos (Keeling) Islands',
5383 'CO': 'Colombia',
5384 'KM': 'Comoros',
5385 'CG': 'Congo',
5386 'CD': 'Congo, the Democratic Republic of the',
5387 'CK': 'Cook Islands',
5388 'CR': 'Costa Rica',
5389 'CI': 'Côte d\'Ivoire',
5390 'HR': 'Croatia',
5391 'CU': 'Cuba',
5392 'CW': 'Curaçao',
5393 'CY': 'Cyprus',
5394 'CZ': 'Czech Republic',
5395 'DK': 'Denmark',
5396 'DJ': 'Djibouti',
5397 'DM': 'Dominica',
5398 'DO': 'Dominican Republic',
5399 'EC': 'Ecuador',
5400 'EG': 'Egypt',
5401 'SV': 'El Salvador',
5402 'GQ': 'Equatorial Guinea',
5403 'ER': 'Eritrea',
5404 'EE': 'Estonia',
5405 'ET': 'Ethiopia',
5406 'FK': 'Falkland Islands (Malvinas)',
5407 'FO': 'Faroe Islands',
5408 'FJ': 'Fiji',
5409 'FI': 'Finland',
5410 'FR': 'France',
5411 'GF': 'French Guiana',
5412 'PF': 'French Polynesia',
5413 'TF': 'French Southern Territories',
5414 'GA': 'Gabon',
5415 'GM': 'Gambia',
5416 'GE': 'Georgia',
5417 'DE': 'Germany',
5418 'GH': 'Ghana',
5419 'GI': 'Gibraltar',
5420 'GR': 'Greece',
5421 'GL': 'Greenland',
5422 'GD': 'Grenada',
5423 'GP': 'Guadeloupe',
5424 'GU': 'Guam',
5425 'GT': 'Guatemala',
5426 'GG': 'Guernsey',
5427 'GN': 'Guinea',
5428 'GW': 'Guinea-Bissau',
5429 'GY': 'Guyana',
5430 'HT': 'Haiti',
5431 'HM': 'Heard Island and McDonald Islands',
5432 'VA': 'Holy See (Vatican City State)',
5433 'HN': 'Honduras',
5434 'HK': 'Hong Kong',
5435 'HU': 'Hungary',
5436 'IS': 'Iceland',
5437 'IN': 'India',
5438 'ID': 'Indonesia',
5439 'IR': 'Iran, Islamic Republic of',
5440 'IQ': 'Iraq',
5441 'IE': 'Ireland',
5442 'IM': 'Isle of Man',
5443 'IL': 'Israel',
5444 'IT': 'Italy',
5445 'JM': 'Jamaica',
5446 'JP': 'Japan',
5447 'JE': 'Jersey',
5448 'JO': 'Jordan',
5449 'KZ': 'Kazakhstan',
5450 'KE': 'Kenya',
5451 'KI': 'Kiribati',
5452 'KP': 'Korea, Democratic People\'s Republic of',
5453 'KR': 'Korea, Republic of',
5454 'KW': 'Kuwait',
5455 'KG': 'Kyrgyzstan',
5456 'LA': 'Lao People\'s Democratic Republic',
5457 'LV': 'Latvia',
5458 'LB': 'Lebanon',
5459 'LS': 'Lesotho',
5460 'LR': 'Liberia',
5461 'LY': 'Libya',
5462 'LI': 'Liechtenstein',
5463 'LT': 'Lithuania',
5464 'LU': 'Luxembourg',
5465 'MO': 'Macao',
5466 'MK': 'Macedonia, the Former Yugoslav Republic of',
5467 'MG': 'Madagascar',
5468 'MW': 'Malawi',
5469 'MY': 'Malaysia',
5470 'MV': 'Maldives',
5471 'ML': 'Mali',
5472 'MT': 'Malta',
5473 'MH': 'Marshall Islands',
5474 'MQ': 'Martinique',
5475 'MR': 'Mauritania',
5476 'MU': 'Mauritius',
5477 'YT': 'Mayotte',
5478 'MX': 'Mexico',
5479 'FM': 'Micronesia, Federated States of',
5480 'MD': 'Moldova, Republic of',
5481 'MC': 'Monaco',
5482 'MN': 'Mongolia',
5483 'ME': 'Montenegro',
5484 'MS': 'Montserrat',
5485 'MA': 'Morocco',
5486 'MZ': 'Mozambique',
5487 'MM': 'Myanmar',
5488 'NA': 'Namibia',
5489 'NR': 'Nauru',
5490 'NP': 'Nepal',
5491 'NL': 'Netherlands',
5492 'NC': 'New Caledonia',
5493 'NZ': 'New Zealand',
5494 'NI': 'Nicaragua',
5495 'NE': 'Niger',
5496 'NG': 'Nigeria',
5497 'NU': 'Niue',
5498 'NF': 'Norfolk Island',
5499 'MP': 'Northern Mariana Islands',
5500 'NO': 'Norway',
5501 'OM': 'Oman',
5502 'PK': 'Pakistan',
5503 'PW': 'Palau',
5504 'PS': 'Palestine, State of',
5505 'PA': 'Panama',
5506 'PG': 'Papua New Guinea',
5507 'PY': 'Paraguay',
5508 'PE': 'Peru',
5509 'PH': 'Philippines',
5510 'PN': 'Pitcairn',
5511 'PL': 'Poland',
5512 'PT': 'Portugal',
5513 'PR': 'Puerto Rico',
5514 'QA': 'Qatar',
5515 'RE': 'Réunion',
5516 'RO': 'Romania',
5517 'RU': 'Russian Federation',
5518 'RW': 'Rwanda',
5519 'BL': 'Saint Barthélemy',
5520 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5521 'KN': 'Saint Kitts and Nevis',
5522 'LC': 'Saint Lucia',
5523 'MF': 'Saint Martin (French part)',
5524 'PM': 'Saint Pierre and Miquelon',
5525 'VC': 'Saint Vincent and the Grenadines',
5526 'WS': 'Samoa',
5527 'SM': 'San Marino',
5528 'ST': 'Sao Tome and Principe',
5529 'SA': 'Saudi Arabia',
5530 'SN': 'Senegal',
5531 'RS': 'Serbia',
5532 'SC': 'Seychelles',
5533 'SL': 'Sierra Leone',
5534 'SG': 'Singapore',
5535 'SX': 'Sint Maarten (Dutch part)',
5536 'SK': 'Slovakia',
5537 'SI': 'Slovenia',
5538 'SB': 'Solomon Islands',
5539 'SO': 'Somalia',
5540 'ZA': 'South Africa',
5541 'GS': 'South Georgia and the South Sandwich Islands',
5542 'SS': 'South Sudan',
5543 'ES': 'Spain',
5544 'LK': 'Sri Lanka',
5545 'SD': 'Sudan',
5546 'SR': 'Suriname',
5547 'SJ': 'Svalbard and Jan Mayen',
5548 'SZ': 'Swaziland',
5549 'SE': 'Sweden',
5550 'CH': 'Switzerland',
5551 'SY': 'Syrian Arab Republic',
5552 'TW': 'Taiwan, Province of China',
5553 'TJ': 'Tajikistan',
5554 'TZ': 'Tanzania, United Republic of',
5555 'TH': 'Thailand',
5556 'TL': 'Timor-Leste',
5557 'TG': 'Togo',
5558 'TK': 'Tokelau',
5559 'TO': 'Tonga',
5560 'TT': 'Trinidad and Tobago',
5561 'TN': 'Tunisia',
5562 'TR': 'Turkey',
5563 'TM': 'Turkmenistan',
5564 'TC': 'Turks and Caicos Islands',
5565 'TV': 'Tuvalu',
5566 'UG': 'Uganda',
5567 'UA': 'Ukraine',
5568 'AE': 'United Arab Emirates',
5569 'GB': 'United Kingdom',
5570 'US': 'United States',
5571 'UM': 'United States Minor Outlying Islands',
5572 'UY': 'Uruguay',
5573 'UZ': 'Uzbekistan',
5574 'VU': 'Vanuatu',
5575 'VE': 'Venezuela, Bolivarian Republic of',
5576 'VN': 'Viet Nam',
5577 'VG': 'Virgin Islands, British',
5578 'VI': 'Virgin Islands, U.S.',
5579 'WF': 'Wallis and Futuna',
5580 'EH': 'Western Sahara',
5581 'YE': 'Yemen',
5582 'ZM': 'Zambia',
5583 'ZW': 'Zimbabwe',
5584 }
5585
5586 @classmethod
5587 def short2full(cls, code):
5588 """Convert an ISO 3166-2 country code to the corresponding full name"""
5589 return cls._country_map.get(code.upper())
5590
5591
5592 class GeoUtils(object):
5593 # Major IPv4 address blocks per country
5594 _country_ip_map = {
5595 'AD': '46.172.224.0/19',
5596 'AE': '94.200.0.0/13',
5597 'AF': '149.54.0.0/17',
5598 'AG': '209.59.64.0/18',
5599 'AI': '204.14.248.0/21',
5600 'AL': '46.99.0.0/16',
5601 'AM': '46.70.0.0/15',
5602 'AO': '105.168.0.0/13',
5603 'AP': '182.50.184.0/21',
5604 'AQ': '23.154.160.0/24',
5605 'AR': '181.0.0.0/12',
5606 'AS': '202.70.112.0/20',
5607 'AT': '77.116.0.0/14',
5608 'AU': '1.128.0.0/11',
5609 'AW': '181.41.0.0/18',
5610 'AX': '185.217.4.0/22',
5611 'AZ': '5.197.0.0/16',
5612 'BA': '31.176.128.0/17',
5613 'BB': '65.48.128.0/17',
5614 'BD': '114.130.0.0/16',
5615 'BE': '57.0.0.0/8',
5616 'BF': '102.178.0.0/15',
5617 'BG': '95.42.0.0/15',
5618 'BH': '37.131.0.0/17',
5619 'BI': '154.117.192.0/18',
5620 'BJ': '137.255.0.0/16',
5621 'BL': '185.212.72.0/23',
5622 'BM': '196.12.64.0/18',
5623 'BN': '156.31.0.0/16',
5624 'BO': '161.56.0.0/16',
5625 'BQ': '161.0.80.0/20',
5626 'BR': '191.128.0.0/12',
5627 'BS': '24.51.64.0/18',
5628 'BT': '119.2.96.0/19',
5629 'BW': '168.167.0.0/16',
5630 'BY': '178.120.0.0/13',
5631 'BZ': '179.42.192.0/18',
5632 'CA': '99.224.0.0/11',
5633 'CD': '41.243.0.0/16',
5634 'CF': '197.242.176.0/21',
5635 'CG': '160.113.0.0/16',
5636 'CH': '85.0.0.0/13',
5637 'CI': '102.136.0.0/14',
5638 'CK': '202.65.32.0/19',
5639 'CL': '152.172.0.0/14',
5640 'CM': '102.244.0.0/14',
5641 'CN': '36.128.0.0/10',
5642 'CO': '181.240.0.0/12',
5643 'CR': '201.192.0.0/12',
5644 'CU': '152.206.0.0/15',
5645 'CV': '165.90.96.0/19',
5646 'CW': '190.88.128.0/17',
5647 'CY': '31.153.0.0/16',
5648 'CZ': '88.100.0.0/14',
5649 'DE': '53.0.0.0/8',
5650 'DJ': '197.241.0.0/17',
5651 'DK': '87.48.0.0/12',
5652 'DM': '192.243.48.0/20',
5653 'DO': '152.166.0.0/15',
5654 'DZ': '41.96.0.0/12',
5655 'EC': '186.68.0.0/15',
5656 'EE': '90.190.0.0/15',
5657 'EG': '156.160.0.0/11',
5658 'ER': '196.200.96.0/20',
5659 'ES': '88.0.0.0/11',
5660 'ET': '196.188.0.0/14',
5661 'EU': '2.16.0.0/13',
5662 'FI': '91.152.0.0/13',
5663 'FJ': '144.120.0.0/16',
5664 'FK': '80.73.208.0/21',
5665 'FM': '119.252.112.0/20',
5666 'FO': '88.85.32.0/19',
5667 'FR': '90.0.0.0/9',
5668 'GA': '41.158.0.0/15',
5669 'GB': '25.0.0.0/8',
5670 'GD': '74.122.88.0/21',
5671 'GE': '31.146.0.0/16',
5672 'GF': '161.22.64.0/18',
5673 'GG': '62.68.160.0/19',
5674 'GH': '154.160.0.0/12',
5675 'GI': '95.164.0.0/16',
5676 'GL': '88.83.0.0/19',
5677 'GM': '160.182.0.0/15',
5678 'GN': '197.149.192.0/18',
5679 'GP': '104.250.0.0/19',
5680 'GQ': '105.235.224.0/20',
5681 'GR': '94.64.0.0/13',
5682 'GT': '168.234.0.0/16',
5683 'GU': '168.123.0.0/16',
5684 'GW': '197.214.80.0/20',
5685 'GY': '181.41.64.0/18',
5686 'HK': '113.252.0.0/14',
5687 'HN': '181.210.0.0/16',
5688 'HR': '93.136.0.0/13',
5689 'HT': '148.102.128.0/17',
5690 'HU': '84.0.0.0/14',
5691 'ID': '39.192.0.0/10',
5692 'IE': '87.32.0.0/12',
5693 'IL': '79.176.0.0/13',
5694 'IM': '5.62.80.0/20',
5695 'IN': '117.192.0.0/10',
5696 'IO': '203.83.48.0/21',
5697 'IQ': '37.236.0.0/14',
5698 'IR': '2.176.0.0/12',
5699 'IS': '82.221.0.0/16',
5700 'IT': '79.0.0.0/10',
5701 'JE': '87.244.64.0/18',
5702 'JM': '72.27.0.0/17',
5703 'JO': '176.29.0.0/16',
5704 'JP': '133.0.0.0/8',
5705 'KE': '105.48.0.0/12',
5706 'KG': '158.181.128.0/17',
5707 'KH': '36.37.128.0/17',
5708 'KI': '103.25.140.0/22',
5709 'KM': '197.255.224.0/20',
5710 'KN': '198.167.192.0/19',
5711 'KP': '175.45.176.0/22',
5712 'KR': '175.192.0.0/10',
5713 'KW': '37.36.0.0/14',
5714 'KY': '64.96.0.0/15',
5715 'KZ': '2.72.0.0/13',
5716 'LA': '115.84.64.0/18',
5717 'LB': '178.135.0.0/16',
5718 'LC': '24.92.144.0/20',
5719 'LI': '82.117.0.0/19',
5720 'LK': '112.134.0.0/15',
5721 'LR': '102.183.0.0/16',
5722 'LS': '129.232.0.0/17',
5723 'LT': '78.56.0.0/13',
5724 'LU': '188.42.0.0/16',
5725 'LV': '46.109.0.0/16',
5726 'LY': '41.252.0.0/14',
5727 'MA': '105.128.0.0/11',
5728 'MC': '88.209.64.0/18',
5729 'MD': '37.246.0.0/16',
5730 'ME': '178.175.0.0/17',
5731 'MF': '74.112.232.0/21',
5732 'MG': '154.126.0.0/17',
5733 'MH': '117.103.88.0/21',
5734 'MK': '77.28.0.0/15',
5735 'ML': '154.118.128.0/18',
5736 'MM': '37.111.0.0/17',
5737 'MN': '49.0.128.0/17',
5738 'MO': '60.246.0.0/16',
5739 'MP': '202.88.64.0/20',
5740 'MQ': '109.203.224.0/19',
5741 'MR': '41.188.64.0/18',
5742 'MS': '208.90.112.0/22',
5743 'MT': '46.11.0.0/16',
5744 'MU': '105.16.0.0/12',
5745 'MV': '27.114.128.0/18',
5746 'MW': '102.70.0.0/15',
5747 'MX': '187.192.0.0/11',
5748 'MY': '175.136.0.0/13',
5749 'MZ': '197.218.0.0/15',
5750 'NA': '41.182.0.0/16',
5751 'NC': '101.101.0.0/18',
5752 'NE': '197.214.0.0/18',
5753 'NF': '203.17.240.0/22',
5754 'NG': '105.112.0.0/12',
5755 'NI': '186.76.0.0/15',
5756 'NL': '145.96.0.0/11',
5757 'NO': '84.208.0.0/13',
5758 'NP': '36.252.0.0/15',
5759 'NR': '203.98.224.0/19',
5760 'NU': '49.156.48.0/22',
5761 'NZ': '49.224.0.0/14',
5762 'OM': '5.36.0.0/15',
5763 'PA': '186.72.0.0/15',
5764 'PE': '186.160.0.0/14',
5765 'PF': '123.50.64.0/18',
5766 'PG': '124.240.192.0/19',
5767 'PH': '49.144.0.0/13',
5768 'PK': '39.32.0.0/11',
5769 'PL': '83.0.0.0/11',
5770 'PM': '70.36.0.0/20',
5771 'PR': '66.50.0.0/16',
5772 'PS': '188.161.0.0/16',
5773 'PT': '85.240.0.0/13',
5774 'PW': '202.124.224.0/20',
5775 'PY': '181.120.0.0/14',
5776 'QA': '37.210.0.0/15',
5777 'RE': '102.35.0.0/16',
5778 'RO': '79.112.0.0/13',
5779 'RS': '93.86.0.0/15',
5780 'RU': '5.136.0.0/13',
5781 'RW': '41.186.0.0/16',
5782 'SA': '188.48.0.0/13',
5783 'SB': '202.1.160.0/19',
5784 'SC': '154.192.0.0/11',
5785 'SD': '102.120.0.0/13',
5786 'SE': '78.64.0.0/12',
5787 'SG': '8.128.0.0/10',
5788 'SI': '188.196.0.0/14',
5789 'SK': '78.98.0.0/15',
5790 'SL': '102.143.0.0/17',
5791 'SM': '89.186.32.0/19',
5792 'SN': '41.82.0.0/15',
5793 'SO': '154.115.192.0/18',
5794 'SR': '186.179.128.0/17',
5795 'SS': '105.235.208.0/21',
5796 'ST': '197.159.160.0/19',
5797 'SV': '168.243.0.0/16',
5798 'SX': '190.102.0.0/20',
5799 'SY': '5.0.0.0/16',
5800 'SZ': '41.84.224.0/19',
5801 'TC': '65.255.48.0/20',
5802 'TD': '154.68.128.0/19',
5803 'TG': '196.168.0.0/14',
5804 'TH': '171.96.0.0/13',
5805 'TJ': '85.9.128.0/18',
5806 'TK': '27.96.24.0/21',
5807 'TL': '180.189.160.0/20',
5808 'TM': '95.85.96.0/19',
5809 'TN': '197.0.0.0/11',
5810 'TO': '175.176.144.0/21',
5811 'TR': '78.160.0.0/11',
5812 'TT': '186.44.0.0/15',
5813 'TV': '202.2.96.0/19',
5814 'TW': '120.96.0.0/11',
5815 'TZ': '156.156.0.0/14',
5816 'UA': '37.52.0.0/14',
5817 'UG': '102.80.0.0/13',
5818 'US': '6.0.0.0/8',
5819 'UY': '167.56.0.0/13',
5820 'UZ': '84.54.64.0/18',
5821 'VA': '212.77.0.0/19',
5822 'VC': '207.191.240.0/21',
5823 'VE': '186.88.0.0/13',
5824 'VG': '66.81.192.0/20',
5825 'VI': '146.226.0.0/16',
5826 'VN': '14.160.0.0/11',
5827 'VU': '202.80.32.0/20',
5828 'WF': '117.20.32.0/21',
5829 'WS': '202.4.32.0/19',
5830 'YE': '134.35.0.0/16',
5831 'YT': '41.242.116.0/22',
5832 'ZA': '41.0.0.0/11',
5833 'ZM': '102.144.0.0/13',
5834 'ZW': '102.177.192.0/18',
5835 }
5836
5837 @classmethod
5838 def random_ipv4(cls, code_or_block):
5839 if len(code_or_block) == 2:
5840 block = cls._country_ip_map.get(code_or_block.upper())
5841 if not block:
5842 return None
5843 else:
5844 block = code_or_block
5845 addr, preflen = block.split('/')
5846 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5847 addr_max = addr_min | (0xffffffff >> int(preflen))
5848 return compat_str(socket.inet_ntoa(
5849 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5850
5851
5852 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5853 def __init__(self, proxies=None):
5854 # Set default handlers
5855 for type in ('http', 'https'):
5856 setattr(self, '%s_open' % type,
5857 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5858 meth(r, proxy, type))
5859 compat_urllib_request.ProxyHandler.__init__(self, proxies)
5860
5861 def proxy_open(self, req, proxy, type):
5862 req_proxy = req.headers.get('Ytdl-request-proxy')
5863 if req_proxy is not None:
5864 proxy = req_proxy
5865 del req.headers['Ytdl-request-proxy']
5866
5867 if proxy == '__noproxy__':
5868 return None # No Proxy
5869 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5870 req.add_header('Ytdl-socks-proxy', proxy)
5871 # yt-dlp's http/https handlers do wrapping the socket with socks
5872 return None
5873 return compat_urllib_request.ProxyHandler.proxy_open(
5874 self, req, proxy, type)
5875
5876
5877 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5878 # released into Public Domain
5879 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5880
5881 def long_to_bytes(n, blocksize=0):
5882 """long_to_bytes(n:long, blocksize:int) : string
5883 Convert a long integer to a byte string.
5884
5885 If optional blocksize is given and greater than zero, pad the front of the
5886 byte string with binary zeros so that the length is a multiple of
5887 blocksize.
5888 """
5889 # after much testing, this algorithm was deemed to be the fastest
5890 s = b''
5891 n = int(n)
5892 while n > 0:
5893 s = compat_struct_pack('>I', n & 0xffffffff) + s
5894 n = n >> 32
5895 # strip off leading zeros
5896 for i in range(len(s)):
5897 if s[i] != b'\000'[0]:
5898 break
5899 else:
5900 # only happens when n == 0
5901 s = b'\000'
5902 i = 0
5903 s = s[i:]
5904 # add back some pad bytes. this could be done more efficiently w.r.t. the
5905 # de-padding being done above, but sigh...
5906 if blocksize > 0 and len(s) % blocksize:
5907 s = (blocksize - len(s) % blocksize) * b'\000' + s
5908 return s
5909
5910
5911 def bytes_to_long(s):
5912 """bytes_to_long(string) : long
5913 Convert a byte string to a long integer.
5914
5915 This is (essentially) the inverse of long_to_bytes().
5916 """
5917 acc = 0
5918 length = len(s)
5919 if length % 4:
5920 extra = (4 - length % 4)
5921 s = b'\000' * extra + s
5922 length = length + extra
5923 for i in range(0, length, 4):
5924 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5925 return acc
5926
5927
5928 def ohdave_rsa_encrypt(data, exponent, modulus):
5929 '''
5930 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5931
5932 Input:
5933 data: data to encrypt, bytes-like object
5934 exponent, modulus: parameter e and N of RSA algorithm, both integer
5935 Output: hex string of encrypted data
5936
5937 Limitation: supports one block encryption only
5938 '''
5939
5940 payload = int(binascii.hexlify(data[::-1]), 16)
5941 encrypted = pow(payload, exponent, modulus)
5942 return '%x' % encrypted
5943
5944
5945 def pkcs1pad(data, length):
5946 """
5947 Padding input data with PKCS#1 scheme
5948
5949 @param {int[]} data input data
5950 @param {int} length target length
5951 @returns {int[]} padded data
5952 """
5953 if len(data) > length - 11:
5954 raise ValueError('Input data too long for PKCS#1 padding')
5955
5956 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5957 return [0, 2] + pseudo_random + [0] + data
5958
5959
5960 def encode_base_n(num, n, table=None):
5961 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5962 if not table:
5963 table = FULL_TABLE[:n]
5964
5965 if n > len(table):
5966 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5967
5968 if num == 0:
5969 return table[0]
5970
5971 ret = ''
5972 while num:
5973 ret = table[num % n] + ret
5974 num = num // n
5975 return ret
5976
5977
5978 def decode_packed_codes(code):
5979 mobj = re.search(PACKED_CODES_RE, code)
5980 obfuscated_code, base, count, symbols = mobj.groups()
5981 base = int(base)
5982 count = int(count)
5983 symbols = symbols.split('|')
5984 symbol_table = {}
5985
5986 while count:
5987 count -= 1
5988 base_n_count = encode_base_n(count, base)
5989 symbol_table[base_n_count] = symbols[count] or base_n_count
5990
5991 return re.sub(
5992 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5993 obfuscated_code)
5994
5995
5996 def caesar(s, alphabet, shift):
5997 if shift == 0:
5998 return s
5999 l = len(alphabet)
6000 return ''.join(
6001 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
6002 for c in s)
6003
6004
6005 def rot47(s):
6006 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
6007
6008
6009 def parse_m3u8_attributes(attrib):
6010 info = {}
6011 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
6012 if val.startswith('"'):
6013 val = val[1:-1]
6014 info[key] = val
6015 return info
6016
6017
6018 def urshift(val, n):
6019 return val >> n if val >= 0 else (val + 0x100000000) >> n
6020
6021
6022 # Based on png2str() written by @gdkchan and improved by @yokrysty
6023 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
6024 def decode_png(png_data):
6025 # Reference: https://www.w3.org/TR/PNG/
6026 header = png_data[8:]
6027
6028 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
6029 raise IOError('Not a valid PNG file.')
6030
6031 int_map = {1: '>B', 2: '>H', 4: '>I'}
6032 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
6033
6034 chunks = []
6035
6036 while header:
6037 length = unpack_integer(header[:4])
6038 header = header[4:]
6039
6040 chunk_type = header[:4]
6041 header = header[4:]
6042
6043 chunk_data = header[:length]
6044 header = header[length:]
6045
6046 header = header[4:] # Skip CRC
6047
6048 chunks.append({
6049 'type': chunk_type,
6050 'length': length,
6051 'data': chunk_data
6052 })
6053
6054 ihdr = chunks[0]['data']
6055
6056 width = unpack_integer(ihdr[:4])
6057 height = unpack_integer(ihdr[4:8])
6058
6059 idat = b''
6060
6061 for chunk in chunks:
6062 if chunk['type'] == b'IDAT':
6063 idat += chunk['data']
6064
6065 if not idat:
6066 raise IOError('Unable to read PNG data.')
6067
6068 decompressed_data = bytearray(zlib.decompress(idat))
6069
6070 stride = width * 3
6071 pixels = []
6072
6073 def _get_pixel(idx):
6074 x = idx % stride
6075 y = idx // stride
6076 return pixels[y][x]
6077
6078 for y in range(height):
6079 basePos = y * (1 + stride)
6080 filter_type = decompressed_data[basePos]
6081
6082 current_row = []
6083
6084 pixels.append(current_row)
6085
6086 for x in range(stride):
6087 color = decompressed_data[1 + basePos + x]
6088 basex = y * stride + x
6089 left = 0
6090 up = 0
6091
6092 if x > 2:
6093 left = _get_pixel(basex - 3)
6094 if y > 0:
6095 up = _get_pixel(basex - stride)
6096
6097 if filter_type == 1: # Sub
6098 color = (color + left) & 0xff
6099 elif filter_type == 2: # Up
6100 color = (color + up) & 0xff
6101 elif filter_type == 3: # Average
6102 color = (color + ((left + up) >> 1)) & 0xff
6103 elif filter_type == 4: # Paeth
6104 a = left
6105 b = up
6106 c = 0
6107
6108 if x > 2 and y > 0:
6109 c = _get_pixel(basex - stride - 3)
6110
6111 p = a + b - c
6112
6113 pa = abs(p - a)
6114 pb = abs(p - b)
6115 pc = abs(p - c)
6116
6117 if pa <= pb and pa <= pc:
6118 color = (color + a) & 0xff
6119 elif pb <= pc:
6120 color = (color + b) & 0xff
6121 else:
6122 color = (color + c) & 0xff
6123
6124 current_row.append(color)
6125
6126 return width, height, pixels
6127
6128
6129 def write_xattr(path, key, value):
6130 # This mess below finds the best xattr tool for the job
6131 try:
6132 # try the pyxattr module...
6133 import xattr
6134
6135 if hasattr(xattr, 'set'): # pyxattr
6136 # Unicode arguments are not supported in python-pyxattr until
6137 # version 0.5.0
6138 # See https://github.com/ytdl-org/youtube-dl/issues/5498
6139 pyxattr_required_version = '0.5.0'
6140 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
6141 # TODO: fallback to CLI tools
6142 raise XAttrUnavailableError(
6143 'python-pyxattr is detected but is too old. '
6144 'yt-dlp requires %s or above while your version is %s. '
6145 'Falling back to other xattr implementations' % (
6146 pyxattr_required_version, xattr.__version__))
6147
6148 setxattr = xattr.set
6149 else: # xattr
6150 setxattr = xattr.setxattr
6151
6152 try:
6153 setxattr(path, key, value)
6154 except EnvironmentError as e:
6155 raise XAttrMetadataError(e.errno, e.strerror)
6156
6157 except ImportError:
6158 if compat_os_name == 'nt':
6159 # Write xattrs to NTFS Alternate Data Streams:
6160 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6161 assert ':' not in key
6162 assert os.path.exists(path)
6163
6164 ads_fn = path + ':' + key
6165 try:
6166 with open(ads_fn, 'wb') as f:
6167 f.write(value)
6168 except EnvironmentError as e:
6169 raise XAttrMetadataError(e.errno, e.strerror)
6170 else:
6171 user_has_setfattr = check_executable('setfattr', ['--version'])
6172 user_has_xattr = check_executable('xattr', ['-h'])
6173
6174 if user_has_setfattr or user_has_xattr:
6175
6176 value = value.decode('utf-8')
6177 if user_has_setfattr:
6178 executable = 'setfattr'
6179 opts = ['-n', key, '-v', value]
6180 elif user_has_xattr:
6181 executable = 'xattr'
6182 opts = ['-w', key, value]
6183
6184 cmd = ([encodeFilename(executable, True)]
6185 + [encodeArgument(o) for o in opts]
6186 + [encodeFilename(path, True)])
6187
6188 try:
6189 p = Popen(
6190 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6191 except EnvironmentError as e:
6192 raise XAttrMetadataError(e.errno, e.strerror)
6193 stdout, stderr = p.communicate_or_kill()
6194 stderr = stderr.decode('utf-8', 'replace')
6195 if p.returncode != 0:
6196 raise XAttrMetadataError(p.returncode, stderr)
6197
6198 else:
6199 # On Unix, and can't find pyxattr, setfattr, or xattr.
6200 if sys.platform.startswith('linux'):
6201 raise XAttrUnavailableError(
6202 "Couldn't find a tool to set the xattrs. "
6203 "Install either the python 'pyxattr' or 'xattr' "
6204 "modules, or the GNU 'attr' package "
6205 "(which contains the 'setfattr' tool).")
6206 else:
6207 raise XAttrUnavailableError(
6208 "Couldn't find a tool to set the xattrs. "
6209 "Install either the python 'xattr' module, "
6210 "or the 'xattr' binary.")
6211
6212
6213 def random_birthday(year_field, month_field, day_field):
6214 start_date = datetime.date(1950, 1, 1)
6215 end_date = datetime.date(1995, 12, 31)
6216 offset = random.randint(0, (end_date - start_date).days)
6217 random_date = start_date + datetime.timedelta(offset)
6218 return {
6219 year_field: str(random_date.year),
6220 month_field: str(random_date.month),
6221 day_field: str(random_date.day),
6222 }
6223
6224
6225 # Templates for internet shortcut files, which are plain text files.
6226 DOT_URL_LINK_TEMPLATE = '''
6227 [InternetShortcut]
6228 URL=%(url)s
6229 '''.lstrip()
6230
6231 DOT_WEBLOC_LINK_TEMPLATE = '''
6232 <?xml version="1.0" encoding="UTF-8"?>
6233 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6234 <plist version="1.0">
6235 <dict>
6236 \t<key>URL</key>
6237 \t<string>%(url)s</string>
6238 </dict>
6239 </plist>
6240 '''.lstrip()
6241
6242 DOT_DESKTOP_LINK_TEMPLATE = '''
6243 [Desktop Entry]
6244 Encoding=UTF-8
6245 Name=%(filename)s
6246 Type=Link
6247 URL=%(url)s
6248 Icon=text-html
6249 '''.lstrip()
6250
6251 LINK_TEMPLATES = {
6252 'url': DOT_URL_LINK_TEMPLATE,
6253 'desktop': DOT_DESKTOP_LINK_TEMPLATE,
6254 'webloc': DOT_WEBLOC_LINK_TEMPLATE,
6255 }
6256
6257
6258 def iri_to_uri(iri):
6259 """
6260 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6261
6262 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6263 """
6264
6265 iri_parts = compat_urllib_parse_urlparse(iri)
6266
6267 if '[' in iri_parts.netloc:
6268 raise ValueError('IPv6 URIs are not, yet, supported.')
6269 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6270
6271 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6272
6273 net_location = ''
6274 if iri_parts.username:
6275 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6276 if iri_parts.password is not None:
6277 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6278 net_location += '@'
6279
6280 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
6281 # The 'idna' encoding produces ASCII text.
6282 if iri_parts.port is not None and iri_parts.port != 80:
6283 net_location += ':' + str(iri_parts.port)
6284
6285 return compat_urllib_parse_urlunparse(
6286 (iri_parts.scheme,
6287 net_location,
6288
6289 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6290
6291 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6292 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6293
6294 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6295 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6296
6297 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6298
6299 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6300
6301
6302 def to_high_limit_path(path):
6303 if sys.platform in ['win32', 'cygwin']:
6304 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6305 return r'\\?\ '.rstrip() + os.path.abspath(path)
6306
6307 return path
6308
6309
6310 def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
6311 if field is None:
6312 val = obj if obj is not None else default
6313 else:
6314 val = obj.get(field, default)
6315 if func and val not in ignore:
6316 val = func(val)
6317 return template % val if val not in ignore else default
6318
6319
6320 def clean_podcast_url(url):
6321 return re.sub(r'''(?x)
6322 (?:
6323 (?:
6324 chtbl\.com/track|
6325 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6326 play\.podtrac\.com
6327 )/[^/]+|
6328 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6329 flex\.acast\.com|
6330 pd(?:
6331 cn\.co| # https://podcorn.com/analytics-prefix/
6332 st\.fm # https://podsights.com/docs/
6333 )/e
6334 )/''', '', url)
6335
6336
6337 _HEX_TABLE = '0123456789abcdef'
6338
6339
6340 def random_uuidv4():
6341 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
6342
6343
6344 def make_dir(path, to_screen=None):
6345 try:
6346 dn = os.path.dirname(path)
6347 if dn and not os.path.exists(dn):
6348 os.makedirs(dn)
6349 return True
6350 except (OSError, IOError) as err:
6351 if callable(to_screen) is not None:
6352 to_screen('unable to create directory ' + error_to_compat_str(err))
6353 return False
6354
6355
6356 def get_executable_path():
6357 from zipimport import zipimporter
6358 if hasattr(sys, 'frozen'): # Running from PyInstaller
6359 path = os.path.dirname(sys.executable)
6360 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
6361 path = os.path.join(os.path.dirname(__file__), '../..')
6362 else:
6363 path = os.path.join(os.path.dirname(__file__), '..')
6364 return os.path.abspath(path)
6365
6366
6367 def load_plugins(name, suffix, namespace):
6368 classes = {}
6369 try:
6370 plugins_spec = importlib.util.spec_from_file_location(
6371 name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
6372 plugins = importlib.util.module_from_spec(plugins_spec)
6373 sys.modules[plugins_spec.name] = plugins
6374 plugins_spec.loader.exec_module(plugins)
6375 for name in dir(plugins):
6376 if name in namespace:
6377 continue
6378 if not name.endswith(suffix):
6379 continue
6380 klass = getattr(plugins, name)
6381 classes[name] = namespace[name] = klass
6382 except FileNotFoundError:
6383 pass
6384 return classes
6385
6386
6387 def traverse_obj(
6388 obj, *path_list, default=None, expected_type=None, get_all=True,
6389 casesense=True, is_user_input=False, traverse_string=False):
6390 ''' Traverse nested list/dict/tuple
6391 @param path_list A list of paths which are checked one by one.
6392 Each path is a list of keys where each key is a string,
6393 a function, a tuple of strings or "...".
6394 When a fuction is given, it takes the key as argument and
6395 returns whether the key matches or not. When a tuple is given,
6396 all the keys given in the tuple are traversed, and
6397 "..." traverses all the keys in the object
6398 @param default Default value to return
6399 @param expected_type Only accept final value of this type (Can also be any callable)
6400 @param get_all Return all the values obtained from a path or only the first one
6401 @param casesense Whether to consider dictionary keys as case sensitive
6402 @param is_user_input Whether the keys are generated from user input. If True,
6403 strings are converted to int/slice if necessary
6404 @param traverse_string Whether to traverse inside strings. If True, any
6405 non-compatible object will also be converted into a string
6406 # TODO: Write tests
6407 '''
6408 if not casesense:
6409 _lower = lambda k: (k.lower() if isinstance(k, str) else k)
6410 path_list = (map(_lower, variadic(path)) for path in path_list)
6411
6412 def _traverse_obj(obj, path, _current_depth=0):
6413 nonlocal depth
6414 if obj is None:
6415 return None
6416 path = tuple(variadic(path))
6417 for i, key in enumerate(path):
6418 if isinstance(key, (list, tuple)):
6419 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
6420 key = ...
6421 if key is ...:
6422 obj = (obj.values() if isinstance(obj, dict)
6423 else obj if isinstance(obj, (list, tuple, LazyList))
6424 else str(obj) if traverse_string else [])
6425 _current_depth += 1
6426 depth = max(depth, _current_depth)
6427 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
6428 elif callable(key):
6429 if isinstance(obj, (list, tuple, LazyList)):
6430 obj = enumerate(obj)
6431 elif isinstance(obj, dict):
6432 obj = obj.items()
6433 else:
6434 if not traverse_string:
6435 return None
6436 obj = str(obj)
6437 _current_depth += 1
6438 depth = max(depth, _current_depth)
6439 return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if key(k)]
6440 elif isinstance(obj, dict) and not (is_user_input and key == ':'):
6441 obj = (obj.get(key) if casesense or (key in obj)
6442 else next((v for k, v in obj.items() if _lower(k) == key), None))
6443 else:
6444 if is_user_input:
6445 key = (int_or_none(key) if ':' not in key
6446 else slice(*map(int_or_none, key.split(':'))))
6447 if key == slice(None):
6448 return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
6449 if not isinstance(key, (int, slice)):
6450 return None
6451 if not isinstance(obj, (list, tuple, LazyList)):
6452 if not traverse_string:
6453 return None
6454 obj = str(obj)
6455 try:
6456 obj = obj[key]
6457 except IndexError:
6458 return None
6459 return obj
6460
6461 if isinstance(expected_type, type):
6462 type_test = lambda val: val if isinstance(val, expected_type) else None
6463 elif expected_type is not None:
6464 type_test = expected_type
6465 else:
6466 type_test = lambda val: val
6467
6468 for path in path_list:
6469 depth = 0
6470 val = _traverse_obj(obj, path)
6471 if val is not None:
6472 if depth:
6473 for _ in range(depth - 1):
6474 val = itertools.chain.from_iterable(v for v in val if v is not None)
6475 val = [v for v in map(type_test, val) if v is not None]
6476 if val:
6477 return val if get_all else val[0]
6478 else:
6479 val = type_test(val)
6480 if val is not None:
6481 return val
6482 return default
6483
6484
6485 def traverse_dict(dictn, keys, casesense=True):
6486 ''' For backward compatibility. Do not use '''
6487 return traverse_obj(dictn, keys, casesense=casesense,
6488 is_user_input=True, traverse_string=True)
6489
6490
6491 def variadic(x, allowed_types=(str, bytes)):
6492 return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
6493
6494
6495 # create a JSON Web Signature (jws) with HS256 algorithm
6496 # the resulting format is in JWS Compact Serialization
6497 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
6498 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
6499 def jwt_encode_hs256(payload_data, key, headers={}):
6500 header_data = {
6501 'alg': 'HS256',
6502 'typ': 'JWT',
6503 }
6504 if headers:
6505 header_data.update(headers)
6506 header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8'))
6507 payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8'))
6508 h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256)
6509 signature_b64 = base64.b64encode(h.digest())
6510 token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
6511 return token
6512
6513
6514 # can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
6515 def jwt_decode_hs256(jwt):
6516 header_b64, payload_b64, signature_b64 = jwt.split('.')
6517 payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
6518 return payload_data
6519
6520
6521 def supports_terminal_sequences(stream):
6522 if compat_os_name == 'nt':
6523 if get_windows_version() < (10, 0, 10586):
6524 return False
6525 elif not os.getenv('TERM'):
6526 return False
6527 try:
6528 return stream.isatty()
6529 except BaseException:
6530 return False
6531
6532
6533 _terminal_sequences_re = re.compile('\033\\[[^m]+m')
6534
6535
6536 def remove_terminal_sequences(string):
6537 return _terminal_sequences_re.sub('', string)
6538
6539
6540 def number_of_digits(number):
6541 return len('%d' % number)