]> jfr.im git - yt-dlp.git/blob - yt_dlp/utils.py
[outtmpl] Format type `U` for unicode normalization
[yt-dlp.git] / yt_dlp / utils.py
1 #!/usr/bin/env python3
2 # coding: utf-8
3
4 from __future__ import unicode_literals
5
6 import base64
7 import binascii
8 import calendar
9 import codecs
10 import collections
11 import contextlib
12 import ctypes
13 import datetime
14 import email.utils
15 import email.header
16 import errno
17 import functools
18 import gzip
19 import hashlib
20 import hmac
21 import imp
22 import io
23 import itertools
24 import json
25 import locale
26 import math
27 import operator
28 import os
29 import platform
30 import random
31 import re
32 import socket
33 import ssl
34 import subprocess
35 import sys
36 import tempfile
37 import time
38 import traceback
39 import xml.etree.ElementTree
40 import zlib
41
42 from .compat import (
43 compat_HTMLParseError,
44 compat_HTMLParser,
45 compat_HTTPError,
46 compat_basestring,
47 compat_chr,
48 compat_cookiejar,
49 compat_ctypes_WINFUNCTYPE,
50 compat_etree_fromstring,
51 compat_expanduser,
52 compat_html_entities,
53 compat_html_entities_html5,
54 compat_http_client,
55 compat_integer_types,
56 compat_numeric_types,
57 compat_kwargs,
58 compat_os_name,
59 compat_parse_qs,
60 compat_shlex_quote,
61 compat_str,
62 compat_struct_pack,
63 compat_struct_unpack,
64 compat_urllib_error,
65 compat_urllib_parse,
66 compat_urllib_parse_urlencode,
67 compat_urllib_parse_urlparse,
68 compat_urllib_parse_urlunparse,
69 compat_urllib_parse_quote,
70 compat_urllib_parse_quote_plus,
71 compat_urllib_parse_unquote_plus,
72 compat_urllib_request,
73 compat_urlparse,
74 compat_xpath,
75 )
76
77 from .socks import (
78 ProxyType,
79 sockssocket,
80 )
81
82
83 def register_socks_protocols():
84 # "Register" SOCKS protocols
85 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
86 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
87 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
88 if scheme not in compat_urlparse.uses_netloc:
89 compat_urlparse.uses_netloc.append(scheme)
90
91
92 # This is not clearly defined otherwise
93 compiled_regex_type = type(re.compile(''))
94
95
96 def random_user_agent():
97 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
98 _CHROME_VERSIONS = (
99 '74.0.3729.129',
100 '76.0.3780.3',
101 '76.0.3780.2',
102 '74.0.3729.128',
103 '76.0.3780.1',
104 '76.0.3780.0',
105 '75.0.3770.15',
106 '74.0.3729.127',
107 '74.0.3729.126',
108 '76.0.3779.1',
109 '76.0.3779.0',
110 '75.0.3770.14',
111 '74.0.3729.125',
112 '76.0.3778.1',
113 '76.0.3778.0',
114 '75.0.3770.13',
115 '74.0.3729.124',
116 '74.0.3729.123',
117 '73.0.3683.121',
118 '76.0.3777.1',
119 '76.0.3777.0',
120 '75.0.3770.12',
121 '74.0.3729.122',
122 '76.0.3776.4',
123 '75.0.3770.11',
124 '74.0.3729.121',
125 '76.0.3776.3',
126 '76.0.3776.2',
127 '73.0.3683.120',
128 '74.0.3729.120',
129 '74.0.3729.119',
130 '74.0.3729.118',
131 '76.0.3776.1',
132 '76.0.3776.0',
133 '76.0.3775.5',
134 '75.0.3770.10',
135 '74.0.3729.117',
136 '76.0.3775.4',
137 '76.0.3775.3',
138 '74.0.3729.116',
139 '75.0.3770.9',
140 '76.0.3775.2',
141 '76.0.3775.1',
142 '76.0.3775.0',
143 '75.0.3770.8',
144 '74.0.3729.115',
145 '74.0.3729.114',
146 '76.0.3774.1',
147 '76.0.3774.0',
148 '75.0.3770.7',
149 '74.0.3729.113',
150 '74.0.3729.112',
151 '74.0.3729.111',
152 '76.0.3773.1',
153 '76.0.3773.0',
154 '75.0.3770.6',
155 '74.0.3729.110',
156 '74.0.3729.109',
157 '76.0.3772.1',
158 '76.0.3772.0',
159 '75.0.3770.5',
160 '74.0.3729.108',
161 '74.0.3729.107',
162 '76.0.3771.1',
163 '76.0.3771.0',
164 '75.0.3770.4',
165 '74.0.3729.106',
166 '74.0.3729.105',
167 '75.0.3770.3',
168 '74.0.3729.104',
169 '74.0.3729.103',
170 '74.0.3729.102',
171 '75.0.3770.2',
172 '74.0.3729.101',
173 '75.0.3770.1',
174 '75.0.3770.0',
175 '74.0.3729.100',
176 '75.0.3769.5',
177 '75.0.3769.4',
178 '74.0.3729.99',
179 '75.0.3769.3',
180 '75.0.3769.2',
181 '75.0.3768.6',
182 '74.0.3729.98',
183 '75.0.3769.1',
184 '75.0.3769.0',
185 '74.0.3729.97',
186 '73.0.3683.119',
187 '73.0.3683.118',
188 '74.0.3729.96',
189 '75.0.3768.5',
190 '75.0.3768.4',
191 '75.0.3768.3',
192 '75.0.3768.2',
193 '74.0.3729.95',
194 '74.0.3729.94',
195 '75.0.3768.1',
196 '75.0.3768.0',
197 '74.0.3729.93',
198 '74.0.3729.92',
199 '73.0.3683.117',
200 '74.0.3729.91',
201 '75.0.3766.3',
202 '74.0.3729.90',
203 '75.0.3767.2',
204 '75.0.3767.1',
205 '75.0.3767.0',
206 '74.0.3729.89',
207 '73.0.3683.116',
208 '75.0.3766.2',
209 '74.0.3729.88',
210 '75.0.3766.1',
211 '75.0.3766.0',
212 '74.0.3729.87',
213 '73.0.3683.115',
214 '74.0.3729.86',
215 '75.0.3765.1',
216 '75.0.3765.0',
217 '74.0.3729.85',
218 '73.0.3683.114',
219 '74.0.3729.84',
220 '75.0.3764.1',
221 '75.0.3764.0',
222 '74.0.3729.83',
223 '73.0.3683.113',
224 '75.0.3763.2',
225 '75.0.3761.4',
226 '74.0.3729.82',
227 '75.0.3763.1',
228 '75.0.3763.0',
229 '74.0.3729.81',
230 '73.0.3683.112',
231 '75.0.3762.1',
232 '75.0.3762.0',
233 '74.0.3729.80',
234 '75.0.3761.3',
235 '74.0.3729.79',
236 '73.0.3683.111',
237 '75.0.3761.2',
238 '74.0.3729.78',
239 '74.0.3729.77',
240 '75.0.3761.1',
241 '75.0.3761.0',
242 '73.0.3683.110',
243 '74.0.3729.76',
244 '74.0.3729.75',
245 '75.0.3760.0',
246 '74.0.3729.74',
247 '75.0.3759.8',
248 '75.0.3759.7',
249 '75.0.3759.6',
250 '74.0.3729.73',
251 '75.0.3759.5',
252 '74.0.3729.72',
253 '73.0.3683.109',
254 '75.0.3759.4',
255 '75.0.3759.3',
256 '74.0.3729.71',
257 '75.0.3759.2',
258 '74.0.3729.70',
259 '73.0.3683.108',
260 '74.0.3729.69',
261 '75.0.3759.1',
262 '75.0.3759.0',
263 '74.0.3729.68',
264 '73.0.3683.107',
265 '74.0.3729.67',
266 '75.0.3758.1',
267 '75.0.3758.0',
268 '74.0.3729.66',
269 '73.0.3683.106',
270 '74.0.3729.65',
271 '75.0.3757.1',
272 '75.0.3757.0',
273 '74.0.3729.64',
274 '73.0.3683.105',
275 '74.0.3729.63',
276 '75.0.3756.1',
277 '75.0.3756.0',
278 '74.0.3729.62',
279 '73.0.3683.104',
280 '75.0.3755.3',
281 '75.0.3755.2',
282 '73.0.3683.103',
283 '75.0.3755.1',
284 '75.0.3755.0',
285 '74.0.3729.61',
286 '73.0.3683.102',
287 '74.0.3729.60',
288 '75.0.3754.2',
289 '74.0.3729.59',
290 '75.0.3753.4',
291 '74.0.3729.58',
292 '75.0.3754.1',
293 '75.0.3754.0',
294 '74.0.3729.57',
295 '73.0.3683.101',
296 '75.0.3753.3',
297 '75.0.3752.2',
298 '75.0.3753.2',
299 '74.0.3729.56',
300 '75.0.3753.1',
301 '75.0.3753.0',
302 '74.0.3729.55',
303 '73.0.3683.100',
304 '74.0.3729.54',
305 '75.0.3752.1',
306 '75.0.3752.0',
307 '74.0.3729.53',
308 '73.0.3683.99',
309 '74.0.3729.52',
310 '75.0.3751.1',
311 '75.0.3751.0',
312 '74.0.3729.51',
313 '73.0.3683.98',
314 '74.0.3729.50',
315 '75.0.3750.0',
316 '74.0.3729.49',
317 '74.0.3729.48',
318 '74.0.3729.47',
319 '75.0.3749.3',
320 '74.0.3729.46',
321 '73.0.3683.97',
322 '75.0.3749.2',
323 '74.0.3729.45',
324 '75.0.3749.1',
325 '75.0.3749.0',
326 '74.0.3729.44',
327 '73.0.3683.96',
328 '74.0.3729.43',
329 '74.0.3729.42',
330 '75.0.3748.1',
331 '75.0.3748.0',
332 '74.0.3729.41',
333 '75.0.3747.1',
334 '73.0.3683.95',
335 '75.0.3746.4',
336 '74.0.3729.40',
337 '74.0.3729.39',
338 '75.0.3747.0',
339 '75.0.3746.3',
340 '75.0.3746.2',
341 '74.0.3729.38',
342 '75.0.3746.1',
343 '75.0.3746.0',
344 '74.0.3729.37',
345 '73.0.3683.94',
346 '75.0.3745.5',
347 '75.0.3745.4',
348 '75.0.3745.3',
349 '75.0.3745.2',
350 '74.0.3729.36',
351 '75.0.3745.1',
352 '75.0.3745.0',
353 '75.0.3744.2',
354 '74.0.3729.35',
355 '73.0.3683.93',
356 '74.0.3729.34',
357 '75.0.3744.1',
358 '75.0.3744.0',
359 '74.0.3729.33',
360 '73.0.3683.92',
361 '74.0.3729.32',
362 '74.0.3729.31',
363 '73.0.3683.91',
364 '75.0.3741.2',
365 '75.0.3740.5',
366 '74.0.3729.30',
367 '75.0.3741.1',
368 '75.0.3741.0',
369 '74.0.3729.29',
370 '75.0.3740.4',
371 '73.0.3683.90',
372 '74.0.3729.28',
373 '75.0.3740.3',
374 '73.0.3683.89',
375 '75.0.3740.2',
376 '74.0.3729.27',
377 '75.0.3740.1',
378 '75.0.3740.0',
379 '74.0.3729.26',
380 '73.0.3683.88',
381 '73.0.3683.87',
382 '74.0.3729.25',
383 '75.0.3739.1',
384 '75.0.3739.0',
385 '73.0.3683.86',
386 '74.0.3729.24',
387 '73.0.3683.85',
388 '75.0.3738.4',
389 '75.0.3738.3',
390 '75.0.3738.2',
391 '75.0.3738.1',
392 '75.0.3738.0',
393 '74.0.3729.23',
394 '73.0.3683.84',
395 '74.0.3729.22',
396 '74.0.3729.21',
397 '75.0.3737.1',
398 '75.0.3737.0',
399 '74.0.3729.20',
400 '73.0.3683.83',
401 '74.0.3729.19',
402 '75.0.3736.1',
403 '75.0.3736.0',
404 '74.0.3729.18',
405 '73.0.3683.82',
406 '74.0.3729.17',
407 '75.0.3735.1',
408 '75.0.3735.0',
409 '74.0.3729.16',
410 '73.0.3683.81',
411 '75.0.3734.1',
412 '75.0.3734.0',
413 '74.0.3729.15',
414 '73.0.3683.80',
415 '74.0.3729.14',
416 '75.0.3733.1',
417 '75.0.3733.0',
418 '75.0.3732.1',
419 '74.0.3729.13',
420 '74.0.3729.12',
421 '73.0.3683.79',
422 '74.0.3729.11',
423 '75.0.3732.0',
424 '74.0.3729.10',
425 '73.0.3683.78',
426 '74.0.3729.9',
427 '74.0.3729.8',
428 '74.0.3729.7',
429 '75.0.3731.3',
430 '75.0.3731.2',
431 '75.0.3731.0',
432 '74.0.3729.6',
433 '73.0.3683.77',
434 '73.0.3683.76',
435 '75.0.3730.5',
436 '75.0.3730.4',
437 '73.0.3683.75',
438 '74.0.3729.5',
439 '73.0.3683.74',
440 '75.0.3730.3',
441 '75.0.3730.2',
442 '74.0.3729.4',
443 '73.0.3683.73',
444 '73.0.3683.72',
445 '75.0.3730.1',
446 '75.0.3730.0',
447 '74.0.3729.3',
448 '73.0.3683.71',
449 '74.0.3729.2',
450 '73.0.3683.70',
451 '74.0.3729.1',
452 '74.0.3729.0',
453 '74.0.3726.4',
454 '73.0.3683.69',
455 '74.0.3726.3',
456 '74.0.3728.0',
457 '74.0.3726.2',
458 '73.0.3683.68',
459 '74.0.3726.1',
460 '74.0.3726.0',
461 '74.0.3725.4',
462 '73.0.3683.67',
463 '73.0.3683.66',
464 '74.0.3725.3',
465 '74.0.3725.2',
466 '74.0.3725.1',
467 '74.0.3724.8',
468 '74.0.3725.0',
469 '73.0.3683.65',
470 '74.0.3724.7',
471 '74.0.3724.6',
472 '74.0.3724.5',
473 '74.0.3724.4',
474 '74.0.3724.3',
475 '74.0.3724.2',
476 '74.0.3724.1',
477 '74.0.3724.0',
478 '73.0.3683.64',
479 '74.0.3723.1',
480 '74.0.3723.0',
481 '73.0.3683.63',
482 '74.0.3722.1',
483 '74.0.3722.0',
484 '73.0.3683.62',
485 '74.0.3718.9',
486 '74.0.3702.3',
487 '74.0.3721.3',
488 '74.0.3721.2',
489 '74.0.3721.1',
490 '74.0.3721.0',
491 '74.0.3720.6',
492 '73.0.3683.61',
493 '72.0.3626.122',
494 '73.0.3683.60',
495 '74.0.3720.5',
496 '72.0.3626.121',
497 '74.0.3718.8',
498 '74.0.3720.4',
499 '74.0.3720.3',
500 '74.0.3718.7',
501 '74.0.3720.2',
502 '74.0.3720.1',
503 '74.0.3720.0',
504 '74.0.3718.6',
505 '74.0.3719.5',
506 '73.0.3683.59',
507 '74.0.3718.5',
508 '74.0.3718.4',
509 '74.0.3719.4',
510 '74.0.3719.3',
511 '74.0.3719.2',
512 '74.0.3719.1',
513 '73.0.3683.58',
514 '74.0.3719.0',
515 '73.0.3683.57',
516 '73.0.3683.56',
517 '74.0.3718.3',
518 '73.0.3683.55',
519 '74.0.3718.2',
520 '74.0.3718.1',
521 '74.0.3718.0',
522 '73.0.3683.54',
523 '74.0.3717.2',
524 '73.0.3683.53',
525 '74.0.3717.1',
526 '74.0.3717.0',
527 '73.0.3683.52',
528 '74.0.3716.1',
529 '74.0.3716.0',
530 '73.0.3683.51',
531 '74.0.3715.1',
532 '74.0.3715.0',
533 '73.0.3683.50',
534 '74.0.3711.2',
535 '74.0.3714.2',
536 '74.0.3713.3',
537 '74.0.3714.1',
538 '74.0.3714.0',
539 '73.0.3683.49',
540 '74.0.3713.1',
541 '74.0.3713.0',
542 '72.0.3626.120',
543 '73.0.3683.48',
544 '74.0.3712.2',
545 '74.0.3712.1',
546 '74.0.3712.0',
547 '73.0.3683.47',
548 '72.0.3626.119',
549 '73.0.3683.46',
550 '74.0.3710.2',
551 '72.0.3626.118',
552 '74.0.3711.1',
553 '74.0.3711.0',
554 '73.0.3683.45',
555 '72.0.3626.117',
556 '74.0.3710.1',
557 '74.0.3710.0',
558 '73.0.3683.44',
559 '72.0.3626.116',
560 '74.0.3709.1',
561 '74.0.3709.0',
562 '74.0.3704.9',
563 '73.0.3683.43',
564 '72.0.3626.115',
565 '74.0.3704.8',
566 '74.0.3704.7',
567 '74.0.3708.0',
568 '74.0.3706.7',
569 '74.0.3704.6',
570 '73.0.3683.42',
571 '72.0.3626.114',
572 '74.0.3706.6',
573 '72.0.3626.113',
574 '74.0.3704.5',
575 '74.0.3706.5',
576 '74.0.3706.4',
577 '74.0.3706.3',
578 '74.0.3706.2',
579 '74.0.3706.1',
580 '74.0.3706.0',
581 '73.0.3683.41',
582 '72.0.3626.112',
583 '74.0.3705.1',
584 '74.0.3705.0',
585 '73.0.3683.40',
586 '72.0.3626.111',
587 '73.0.3683.39',
588 '74.0.3704.4',
589 '73.0.3683.38',
590 '74.0.3704.3',
591 '74.0.3704.2',
592 '74.0.3704.1',
593 '74.0.3704.0',
594 '73.0.3683.37',
595 '72.0.3626.110',
596 '72.0.3626.109',
597 '74.0.3703.3',
598 '74.0.3703.2',
599 '73.0.3683.36',
600 '74.0.3703.1',
601 '74.0.3703.0',
602 '73.0.3683.35',
603 '72.0.3626.108',
604 '74.0.3702.2',
605 '74.0.3699.3',
606 '74.0.3702.1',
607 '74.0.3702.0',
608 '73.0.3683.34',
609 '72.0.3626.107',
610 '73.0.3683.33',
611 '74.0.3701.1',
612 '74.0.3701.0',
613 '73.0.3683.32',
614 '73.0.3683.31',
615 '72.0.3626.105',
616 '74.0.3700.1',
617 '74.0.3700.0',
618 '73.0.3683.29',
619 '72.0.3626.103',
620 '74.0.3699.2',
621 '74.0.3699.1',
622 '74.0.3699.0',
623 '73.0.3683.28',
624 '72.0.3626.102',
625 '73.0.3683.27',
626 '73.0.3683.26',
627 '74.0.3698.0',
628 '74.0.3696.2',
629 '72.0.3626.101',
630 '73.0.3683.25',
631 '74.0.3696.1',
632 '74.0.3696.0',
633 '74.0.3694.8',
634 '72.0.3626.100',
635 '74.0.3694.7',
636 '74.0.3694.6',
637 '74.0.3694.5',
638 '74.0.3694.4',
639 '72.0.3626.99',
640 '72.0.3626.98',
641 '74.0.3694.3',
642 '73.0.3683.24',
643 '72.0.3626.97',
644 '72.0.3626.96',
645 '72.0.3626.95',
646 '73.0.3683.23',
647 '72.0.3626.94',
648 '73.0.3683.22',
649 '73.0.3683.21',
650 '72.0.3626.93',
651 '74.0.3694.2',
652 '72.0.3626.92',
653 '74.0.3694.1',
654 '74.0.3694.0',
655 '74.0.3693.6',
656 '73.0.3683.20',
657 '72.0.3626.91',
658 '74.0.3693.5',
659 '74.0.3693.4',
660 '74.0.3693.3',
661 '74.0.3693.2',
662 '73.0.3683.19',
663 '74.0.3693.1',
664 '74.0.3693.0',
665 '73.0.3683.18',
666 '72.0.3626.90',
667 '74.0.3692.1',
668 '74.0.3692.0',
669 '73.0.3683.17',
670 '72.0.3626.89',
671 '74.0.3687.3',
672 '74.0.3691.1',
673 '74.0.3691.0',
674 '73.0.3683.16',
675 '72.0.3626.88',
676 '72.0.3626.87',
677 '73.0.3683.15',
678 '74.0.3690.1',
679 '74.0.3690.0',
680 '73.0.3683.14',
681 '72.0.3626.86',
682 '73.0.3683.13',
683 '73.0.3683.12',
684 '74.0.3689.1',
685 '74.0.3689.0',
686 '73.0.3683.11',
687 '72.0.3626.85',
688 '73.0.3683.10',
689 '72.0.3626.84',
690 '73.0.3683.9',
691 '74.0.3688.1',
692 '74.0.3688.0',
693 '73.0.3683.8',
694 '72.0.3626.83',
695 '74.0.3687.2',
696 '74.0.3687.1',
697 '74.0.3687.0',
698 '73.0.3683.7',
699 '72.0.3626.82',
700 '74.0.3686.4',
701 '72.0.3626.81',
702 '74.0.3686.3',
703 '74.0.3686.2',
704 '74.0.3686.1',
705 '74.0.3686.0',
706 '73.0.3683.6',
707 '72.0.3626.80',
708 '74.0.3685.1',
709 '74.0.3685.0',
710 '73.0.3683.5',
711 '72.0.3626.79',
712 '74.0.3684.1',
713 '74.0.3684.0',
714 '73.0.3683.4',
715 '72.0.3626.78',
716 '72.0.3626.77',
717 '73.0.3683.3',
718 '73.0.3683.2',
719 '72.0.3626.76',
720 '73.0.3683.1',
721 '73.0.3683.0',
722 '72.0.3626.75',
723 '71.0.3578.141',
724 '73.0.3682.1',
725 '73.0.3682.0',
726 '72.0.3626.74',
727 '71.0.3578.140',
728 '73.0.3681.4',
729 '73.0.3681.3',
730 '73.0.3681.2',
731 '73.0.3681.1',
732 '73.0.3681.0',
733 '72.0.3626.73',
734 '71.0.3578.139',
735 '72.0.3626.72',
736 '72.0.3626.71',
737 '73.0.3680.1',
738 '73.0.3680.0',
739 '72.0.3626.70',
740 '71.0.3578.138',
741 '73.0.3678.2',
742 '73.0.3679.1',
743 '73.0.3679.0',
744 '72.0.3626.69',
745 '71.0.3578.137',
746 '73.0.3678.1',
747 '73.0.3678.0',
748 '71.0.3578.136',
749 '73.0.3677.1',
750 '73.0.3677.0',
751 '72.0.3626.68',
752 '72.0.3626.67',
753 '71.0.3578.135',
754 '73.0.3676.1',
755 '73.0.3676.0',
756 '73.0.3674.2',
757 '72.0.3626.66',
758 '71.0.3578.134',
759 '73.0.3674.1',
760 '73.0.3674.0',
761 '72.0.3626.65',
762 '71.0.3578.133',
763 '73.0.3673.2',
764 '73.0.3673.1',
765 '73.0.3673.0',
766 '72.0.3626.64',
767 '71.0.3578.132',
768 '72.0.3626.63',
769 '72.0.3626.62',
770 '72.0.3626.61',
771 '72.0.3626.60',
772 '73.0.3672.1',
773 '73.0.3672.0',
774 '72.0.3626.59',
775 '71.0.3578.131',
776 '73.0.3671.3',
777 '73.0.3671.2',
778 '73.0.3671.1',
779 '73.0.3671.0',
780 '72.0.3626.58',
781 '71.0.3578.130',
782 '73.0.3670.1',
783 '73.0.3670.0',
784 '72.0.3626.57',
785 '71.0.3578.129',
786 '73.0.3669.1',
787 '73.0.3669.0',
788 '72.0.3626.56',
789 '71.0.3578.128',
790 '73.0.3668.2',
791 '73.0.3668.1',
792 '73.0.3668.0',
793 '72.0.3626.55',
794 '71.0.3578.127',
795 '73.0.3667.2',
796 '73.0.3667.1',
797 '73.0.3667.0',
798 '72.0.3626.54',
799 '71.0.3578.126',
800 '73.0.3666.1',
801 '73.0.3666.0',
802 '72.0.3626.53',
803 '71.0.3578.125',
804 '73.0.3665.4',
805 '73.0.3665.3',
806 '72.0.3626.52',
807 '73.0.3665.2',
808 '73.0.3664.4',
809 '73.0.3665.1',
810 '73.0.3665.0',
811 '72.0.3626.51',
812 '71.0.3578.124',
813 '72.0.3626.50',
814 '73.0.3664.3',
815 '73.0.3664.2',
816 '73.0.3664.1',
817 '73.0.3664.0',
818 '73.0.3663.2',
819 '72.0.3626.49',
820 '71.0.3578.123',
821 '73.0.3663.1',
822 '73.0.3663.0',
823 '72.0.3626.48',
824 '71.0.3578.122',
825 '73.0.3662.1',
826 '73.0.3662.0',
827 '72.0.3626.47',
828 '71.0.3578.121',
829 '73.0.3661.1',
830 '72.0.3626.46',
831 '73.0.3661.0',
832 '72.0.3626.45',
833 '71.0.3578.120',
834 '73.0.3660.2',
835 '73.0.3660.1',
836 '73.0.3660.0',
837 '72.0.3626.44',
838 '71.0.3578.119',
839 '73.0.3659.1',
840 '73.0.3659.0',
841 '72.0.3626.43',
842 '71.0.3578.118',
843 '73.0.3658.1',
844 '73.0.3658.0',
845 '72.0.3626.42',
846 '71.0.3578.117',
847 '73.0.3657.1',
848 '73.0.3657.0',
849 '72.0.3626.41',
850 '71.0.3578.116',
851 '73.0.3656.1',
852 '73.0.3656.0',
853 '72.0.3626.40',
854 '71.0.3578.115',
855 '73.0.3655.1',
856 '73.0.3655.0',
857 '72.0.3626.39',
858 '71.0.3578.114',
859 '73.0.3654.1',
860 '73.0.3654.0',
861 '72.0.3626.38',
862 '71.0.3578.113',
863 '73.0.3653.1',
864 '73.0.3653.0',
865 '72.0.3626.37',
866 '71.0.3578.112',
867 '73.0.3652.1',
868 '73.0.3652.0',
869 '72.0.3626.36',
870 '71.0.3578.111',
871 '73.0.3651.1',
872 '73.0.3651.0',
873 '72.0.3626.35',
874 '71.0.3578.110',
875 '73.0.3650.1',
876 '73.0.3650.0',
877 '72.0.3626.34',
878 '71.0.3578.109',
879 '73.0.3649.1',
880 '73.0.3649.0',
881 '72.0.3626.33',
882 '71.0.3578.108',
883 '73.0.3648.2',
884 '73.0.3648.1',
885 '73.0.3648.0',
886 '72.0.3626.32',
887 '71.0.3578.107',
888 '73.0.3647.2',
889 '73.0.3647.1',
890 '73.0.3647.0',
891 '72.0.3626.31',
892 '71.0.3578.106',
893 '73.0.3635.3',
894 '73.0.3646.2',
895 '73.0.3646.1',
896 '73.0.3646.0',
897 '72.0.3626.30',
898 '71.0.3578.105',
899 '72.0.3626.29',
900 '73.0.3645.2',
901 '73.0.3645.1',
902 '73.0.3645.0',
903 '72.0.3626.28',
904 '71.0.3578.104',
905 '72.0.3626.27',
906 '72.0.3626.26',
907 '72.0.3626.25',
908 '72.0.3626.24',
909 '73.0.3644.0',
910 '73.0.3643.2',
911 '72.0.3626.23',
912 '71.0.3578.103',
913 '73.0.3643.1',
914 '73.0.3643.0',
915 '72.0.3626.22',
916 '71.0.3578.102',
917 '73.0.3642.1',
918 '73.0.3642.0',
919 '72.0.3626.21',
920 '71.0.3578.101',
921 '73.0.3641.1',
922 '73.0.3641.0',
923 '72.0.3626.20',
924 '71.0.3578.100',
925 '72.0.3626.19',
926 '73.0.3640.1',
927 '73.0.3640.0',
928 '72.0.3626.18',
929 '73.0.3639.1',
930 '71.0.3578.99',
931 '73.0.3639.0',
932 '72.0.3626.17',
933 '73.0.3638.2',
934 '72.0.3626.16',
935 '73.0.3638.1',
936 '73.0.3638.0',
937 '72.0.3626.15',
938 '71.0.3578.98',
939 '73.0.3635.2',
940 '71.0.3578.97',
941 '73.0.3637.1',
942 '73.0.3637.0',
943 '72.0.3626.14',
944 '71.0.3578.96',
945 '71.0.3578.95',
946 '72.0.3626.13',
947 '71.0.3578.94',
948 '73.0.3636.2',
949 '71.0.3578.93',
950 '73.0.3636.1',
951 '73.0.3636.0',
952 '72.0.3626.12',
953 '71.0.3578.92',
954 '73.0.3635.1',
955 '73.0.3635.0',
956 '72.0.3626.11',
957 '71.0.3578.91',
958 '73.0.3634.2',
959 '73.0.3634.1',
960 '73.0.3634.0',
961 '72.0.3626.10',
962 '71.0.3578.90',
963 '71.0.3578.89',
964 '73.0.3633.2',
965 '73.0.3633.1',
966 '73.0.3633.0',
967 '72.0.3610.4',
968 '72.0.3626.9',
969 '71.0.3578.88',
970 '73.0.3632.5',
971 '73.0.3632.4',
972 '73.0.3632.3',
973 '73.0.3632.2',
974 '73.0.3632.1',
975 '73.0.3632.0',
976 '72.0.3626.8',
977 '71.0.3578.87',
978 '73.0.3631.2',
979 '73.0.3631.1',
980 '73.0.3631.0',
981 '72.0.3626.7',
982 '71.0.3578.86',
983 '72.0.3626.6',
984 '73.0.3630.1',
985 '73.0.3630.0',
986 '72.0.3626.5',
987 '71.0.3578.85',
988 '72.0.3626.4',
989 '73.0.3628.3',
990 '73.0.3628.2',
991 '73.0.3629.1',
992 '73.0.3629.0',
993 '72.0.3626.3',
994 '71.0.3578.84',
995 '73.0.3628.1',
996 '73.0.3628.0',
997 '71.0.3578.83',
998 '73.0.3627.1',
999 '73.0.3627.0',
1000 '72.0.3626.2',
1001 '71.0.3578.82',
1002 '71.0.3578.81',
1003 '71.0.3578.80',
1004 '72.0.3626.1',
1005 '72.0.3626.0',
1006 '71.0.3578.79',
1007 '70.0.3538.124',
1008 '71.0.3578.78',
1009 '72.0.3623.4',
1010 '72.0.3625.2',
1011 '72.0.3625.1',
1012 '72.0.3625.0',
1013 '71.0.3578.77',
1014 '70.0.3538.123',
1015 '72.0.3624.4',
1016 '72.0.3624.3',
1017 '72.0.3624.2',
1018 '71.0.3578.76',
1019 '72.0.3624.1',
1020 '72.0.3624.0',
1021 '72.0.3623.3',
1022 '71.0.3578.75',
1023 '70.0.3538.122',
1024 '71.0.3578.74',
1025 '72.0.3623.2',
1026 '72.0.3610.3',
1027 '72.0.3623.1',
1028 '72.0.3623.0',
1029 '72.0.3622.3',
1030 '72.0.3622.2',
1031 '71.0.3578.73',
1032 '70.0.3538.121',
1033 '72.0.3622.1',
1034 '72.0.3622.0',
1035 '71.0.3578.72',
1036 '70.0.3538.120',
1037 '72.0.3621.1',
1038 '72.0.3621.0',
1039 '71.0.3578.71',
1040 '70.0.3538.119',
1041 '72.0.3620.1',
1042 '72.0.3620.0',
1043 '71.0.3578.70',
1044 '70.0.3538.118',
1045 '71.0.3578.69',
1046 '72.0.3619.1',
1047 '72.0.3619.0',
1048 '71.0.3578.68',
1049 '70.0.3538.117',
1050 '71.0.3578.67',
1051 '72.0.3618.1',
1052 '72.0.3618.0',
1053 '71.0.3578.66',
1054 '70.0.3538.116',
1055 '72.0.3617.1',
1056 '72.0.3617.0',
1057 '71.0.3578.65',
1058 '70.0.3538.115',
1059 '72.0.3602.3',
1060 '71.0.3578.64',
1061 '72.0.3616.1',
1062 '72.0.3616.0',
1063 '71.0.3578.63',
1064 '70.0.3538.114',
1065 '71.0.3578.62',
1066 '72.0.3615.1',
1067 '72.0.3615.0',
1068 '71.0.3578.61',
1069 '70.0.3538.113',
1070 '72.0.3614.1',
1071 '72.0.3614.0',
1072 '71.0.3578.60',
1073 '70.0.3538.112',
1074 '72.0.3613.1',
1075 '72.0.3613.0',
1076 '71.0.3578.59',
1077 '70.0.3538.111',
1078 '72.0.3612.2',
1079 '72.0.3612.1',
1080 '72.0.3612.0',
1081 '70.0.3538.110',
1082 '71.0.3578.58',
1083 '70.0.3538.109',
1084 '72.0.3611.2',
1085 '72.0.3611.1',
1086 '72.0.3611.0',
1087 '71.0.3578.57',
1088 '70.0.3538.108',
1089 '72.0.3610.2',
1090 '71.0.3578.56',
1091 '71.0.3578.55',
1092 '72.0.3610.1',
1093 '72.0.3610.0',
1094 '71.0.3578.54',
1095 '70.0.3538.107',
1096 '71.0.3578.53',
1097 '72.0.3609.3',
1098 '71.0.3578.52',
1099 '72.0.3609.2',
1100 '71.0.3578.51',
1101 '72.0.3608.5',
1102 '72.0.3609.1',
1103 '72.0.3609.0',
1104 '71.0.3578.50',
1105 '70.0.3538.106',
1106 '72.0.3608.4',
1107 '72.0.3608.3',
1108 '72.0.3608.2',
1109 '71.0.3578.49',
1110 '72.0.3608.1',
1111 '72.0.3608.0',
1112 '70.0.3538.105',
1113 '71.0.3578.48',
1114 '72.0.3607.1',
1115 '72.0.3607.0',
1116 '71.0.3578.47',
1117 '70.0.3538.104',
1118 '72.0.3606.2',
1119 '72.0.3606.1',
1120 '72.0.3606.0',
1121 '71.0.3578.46',
1122 '70.0.3538.103',
1123 '70.0.3538.102',
1124 '72.0.3605.3',
1125 '72.0.3605.2',
1126 '72.0.3605.1',
1127 '72.0.3605.0',
1128 '71.0.3578.45',
1129 '70.0.3538.101',
1130 '71.0.3578.44',
1131 '71.0.3578.43',
1132 '70.0.3538.100',
1133 '70.0.3538.99',
1134 '71.0.3578.42',
1135 '72.0.3604.1',
1136 '72.0.3604.0',
1137 '71.0.3578.41',
1138 '70.0.3538.98',
1139 '71.0.3578.40',
1140 '72.0.3603.2',
1141 '72.0.3603.1',
1142 '72.0.3603.0',
1143 '71.0.3578.39',
1144 '70.0.3538.97',
1145 '72.0.3602.2',
1146 '71.0.3578.38',
1147 '71.0.3578.37',
1148 '72.0.3602.1',
1149 '72.0.3602.0',
1150 '71.0.3578.36',
1151 '70.0.3538.96',
1152 '72.0.3601.1',
1153 '72.0.3601.0',
1154 '71.0.3578.35',
1155 '70.0.3538.95',
1156 '72.0.3600.1',
1157 '72.0.3600.0',
1158 '71.0.3578.34',
1159 '70.0.3538.94',
1160 '72.0.3599.3',
1161 '72.0.3599.2',
1162 '72.0.3599.1',
1163 '72.0.3599.0',
1164 '71.0.3578.33',
1165 '70.0.3538.93',
1166 '72.0.3598.1',
1167 '72.0.3598.0',
1168 '71.0.3578.32',
1169 '70.0.3538.87',
1170 '72.0.3597.1',
1171 '72.0.3597.0',
1172 '72.0.3596.2',
1173 '71.0.3578.31',
1174 '70.0.3538.86',
1175 '71.0.3578.30',
1176 '71.0.3578.29',
1177 '72.0.3596.1',
1178 '72.0.3596.0',
1179 '71.0.3578.28',
1180 '70.0.3538.85',
1181 '72.0.3595.2',
1182 '72.0.3591.3',
1183 '72.0.3595.1',
1184 '72.0.3595.0',
1185 '71.0.3578.27',
1186 '70.0.3538.84',
1187 '72.0.3594.1',
1188 '72.0.3594.0',
1189 '71.0.3578.26',
1190 '70.0.3538.83',
1191 '72.0.3593.2',
1192 '72.0.3593.1',
1193 '72.0.3593.0',
1194 '71.0.3578.25',
1195 '70.0.3538.82',
1196 '72.0.3589.3',
1197 '72.0.3592.2',
1198 '72.0.3592.1',
1199 '72.0.3592.0',
1200 '71.0.3578.24',
1201 '72.0.3589.2',
1202 '70.0.3538.81',
1203 '70.0.3538.80',
1204 '72.0.3591.2',
1205 '72.0.3591.1',
1206 '72.0.3591.0',
1207 '71.0.3578.23',
1208 '70.0.3538.79',
1209 '71.0.3578.22',
1210 '72.0.3590.1',
1211 '72.0.3590.0',
1212 '71.0.3578.21',
1213 '70.0.3538.78',
1214 '70.0.3538.77',
1215 '72.0.3589.1',
1216 '72.0.3589.0',
1217 '71.0.3578.20',
1218 '70.0.3538.76',
1219 '71.0.3578.19',
1220 '70.0.3538.75',
1221 '72.0.3588.1',
1222 '72.0.3588.0',
1223 '71.0.3578.18',
1224 '70.0.3538.74',
1225 '72.0.3586.2',
1226 '72.0.3587.0',
1227 '71.0.3578.17',
1228 '70.0.3538.73',
1229 '72.0.3586.1',
1230 '72.0.3586.0',
1231 '71.0.3578.16',
1232 '70.0.3538.72',
1233 '72.0.3585.1',
1234 '72.0.3585.0',
1235 '71.0.3578.15',
1236 '70.0.3538.71',
1237 '71.0.3578.14',
1238 '72.0.3584.1',
1239 '72.0.3584.0',
1240 '71.0.3578.13',
1241 '70.0.3538.70',
1242 '72.0.3583.2',
1243 '71.0.3578.12',
1244 '72.0.3583.1',
1245 '72.0.3583.0',
1246 '71.0.3578.11',
1247 '70.0.3538.69',
1248 '71.0.3578.10',
1249 '72.0.3582.0',
1250 '72.0.3581.4',
1251 '71.0.3578.9',
1252 '70.0.3538.67',
1253 '72.0.3581.3',
1254 '72.0.3581.2',
1255 '72.0.3581.1',
1256 '72.0.3581.0',
1257 '71.0.3578.8',
1258 '70.0.3538.66',
1259 '72.0.3580.1',
1260 '72.0.3580.0',
1261 '71.0.3578.7',
1262 '70.0.3538.65',
1263 '71.0.3578.6',
1264 '72.0.3579.1',
1265 '72.0.3579.0',
1266 '71.0.3578.5',
1267 '70.0.3538.64',
1268 '71.0.3578.4',
1269 '71.0.3578.3',
1270 '71.0.3578.2',
1271 '71.0.3578.1',
1272 '71.0.3578.0',
1273 '70.0.3538.63',
1274 '69.0.3497.128',
1275 '70.0.3538.62',
1276 '70.0.3538.61',
1277 '70.0.3538.60',
1278 '70.0.3538.59',
1279 '71.0.3577.1',
1280 '71.0.3577.0',
1281 '70.0.3538.58',
1282 '69.0.3497.127',
1283 '71.0.3576.2',
1284 '71.0.3576.1',
1285 '71.0.3576.0',
1286 '70.0.3538.57',
1287 '70.0.3538.56',
1288 '71.0.3575.2',
1289 '70.0.3538.55',
1290 '69.0.3497.126',
1291 '70.0.3538.54',
1292 '71.0.3575.1',
1293 '71.0.3575.0',
1294 '71.0.3574.1',
1295 '71.0.3574.0',
1296 '70.0.3538.53',
1297 '69.0.3497.125',
1298 '70.0.3538.52',
1299 '71.0.3573.1',
1300 '71.0.3573.0',
1301 '70.0.3538.51',
1302 '69.0.3497.124',
1303 '71.0.3572.1',
1304 '71.0.3572.0',
1305 '70.0.3538.50',
1306 '69.0.3497.123',
1307 '71.0.3571.2',
1308 '70.0.3538.49',
1309 '69.0.3497.122',
1310 '71.0.3571.1',
1311 '71.0.3571.0',
1312 '70.0.3538.48',
1313 '69.0.3497.121',
1314 '71.0.3570.1',
1315 '71.0.3570.0',
1316 '70.0.3538.47',
1317 '69.0.3497.120',
1318 '71.0.3568.2',
1319 '71.0.3569.1',
1320 '71.0.3569.0',
1321 '70.0.3538.46',
1322 '69.0.3497.119',
1323 '70.0.3538.45',
1324 '71.0.3568.1',
1325 '71.0.3568.0',
1326 '70.0.3538.44',
1327 '69.0.3497.118',
1328 '70.0.3538.43',
1329 '70.0.3538.42',
1330 '71.0.3567.1',
1331 '71.0.3567.0',
1332 '70.0.3538.41',
1333 '69.0.3497.117',
1334 '71.0.3566.1',
1335 '71.0.3566.0',
1336 '70.0.3538.40',
1337 '69.0.3497.116',
1338 '71.0.3565.1',
1339 '71.0.3565.0',
1340 '70.0.3538.39',
1341 '69.0.3497.115',
1342 '71.0.3564.1',
1343 '71.0.3564.0',
1344 '70.0.3538.38',
1345 '69.0.3497.114',
1346 '71.0.3563.0',
1347 '71.0.3562.2',
1348 '70.0.3538.37',
1349 '69.0.3497.113',
1350 '70.0.3538.36',
1351 '70.0.3538.35',
1352 '71.0.3562.1',
1353 '71.0.3562.0',
1354 '70.0.3538.34',
1355 '69.0.3497.112',
1356 '70.0.3538.33',
1357 '71.0.3561.1',
1358 '71.0.3561.0',
1359 '70.0.3538.32',
1360 '69.0.3497.111',
1361 '71.0.3559.6',
1362 '71.0.3560.1',
1363 '71.0.3560.0',
1364 '71.0.3559.5',
1365 '71.0.3559.4',
1366 '70.0.3538.31',
1367 '69.0.3497.110',
1368 '71.0.3559.3',
1369 '70.0.3538.30',
1370 '69.0.3497.109',
1371 '71.0.3559.2',
1372 '71.0.3559.1',
1373 '71.0.3559.0',
1374 '70.0.3538.29',
1375 '69.0.3497.108',
1376 '71.0.3558.2',
1377 '71.0.3558.1',
1378 '71.0.3558.0',
1379 '70.0.3538.28',
1380 '69.0.3497.107',
1381 '71.0.3557.2',
1382 '71.0.3557.1',
1383 '71.0.3557.0',
1384 '70.0.3538.27',
1385 '69.0.3497.106',
1386 '71.0.3554.4',
1387 '70.0.3538.26',
1388 '71.0.3556.1',
1389 '71.0.3556.0',
1390 '70.0.3538.25',
1391 '71.0.3554.3',
1392 '69.0.3497.105',
1393 '71.0.3554.2',
1394 '70.0.3538.24',
1395 '69.0.3497.104',
1396 '71.0.3555.2',
1397 '70.0.3538.23',
1398 '71.0.3555.1',
1399 '71.0.3555.0',
1400 '70.0.3538.22',
1401 '69.0.3497.103',
1402 '71.0.3554.1',
1403 '71.0.3554.0',
1404 '70.0.3538.21',
1405 '69.0.3497.102',
1406 '71.0.3553.3',
1407 '70.0.3538.20',
1408 '69.0.3497.101',
1409 '71.0.3553.2',
1410 '69.0.3497.100',
1411 '71.0.3553.1',
1412 '71.0.3553.0',
1413 '70.0.3538.19',
1414 '69.0.3497.99',
1415 '69.0.3497.98',
1416 '69.0.3497.97',
1417 '71.0.3552.6',
1418 '71.0.3552.5',
1419 '71.0.3552.4',
1420 '71.0.3552.3',
1421 '71.0.3552.2',
1422 '71.0.3552.1',
1423 '71.0.3552.0',
1424 '70.0.3538.18',
1425 '69.0.3497.96',
1426 '71.0.3551.3',
1427 '71.0.3551.2',
1428 '71.0.3551.1',
1429 '71.0.3551.0',
1430 '70.0.3538.17',
1431 '69.0.3497.95',
1432 '71.0.3550.3',
1433 '71.0.3550.2',
1434 '71.0.3550.1',
1435 '71.0.3550.0',
1436 '70.0.3538.16',
1437 '69.0.3497.94',
1438 '71.0.3549.1',
1439 '71.0.3549.0',
1440 '70.0.3538.15',
1441 '69.0.3497.93',
1442 '69.0.3497.92',
1443 '71.0.3548.1',
1444 '71.0.3548.0',
1445 '70.0.3538.14',
1446 '69.0.3497.91',
1447 '71.0.3547.1',
1448 '71.0.3547.0',
1449 '70.0.3538.13',
1450 '69.0.3497.90',
1451 '71.0.3546.2',
1452 '69.0.3497.89',
1453 '71.0.3546.1',
1454 '71.0.3546.0',
1455 '70.0.3538.12',
1456 '69.0.3497.88',
1457 '71.0.3545.4',
1458 '71.0.3545.3',
1459 '71.0.3545.2',
1460 '71.0.3545.1',
1461 '71.0.3545.0',
1462 '70.0.3538.11',
1463 '69.0.3497.87',
1464 '71.0.3544.5',
1465 '71.0.3544.4',
1466 '71.0.3544.3',
1467 '71.0.3544.2',
1468 '71.0.3544.1',
1469 '71.0.3544.0',
1470 '69.0.3497.86',
1471 '70.0.3538.10',
1472 '69.0.3497.85',
1473 '70.0.3538.9',
1474 '69.0.3497.84',
1475 '71.0.3543.4',
1476 '70.0.3538.8',
1477 '71.0.3543.3',
1478 '71.0.3543.2',
1479 '71.0.3543.1',
1480 '71.0.3543.0',
1481 '70.0.3538.7',
1482 '69.0.3497.83',
1483 '71.0.3542.2',
1484 '71.0.3542.1',
1485 '71.0.3542.0',
1486 '70.0.3538.6',
1487 '69.0.3497.82',
1488 '69.0.3497.81',
1489 '71.0.3541.1',
1490 '71.0.3541.0',
1491 '70.0.3538.5',
1492 '69.0.3497.80',
1493 '71.0.3540.1',
1494 '71.0.3540.0',
1495 '70.0.3538.4',
1496 '69.0.3497.79',
1497 '70.0.3538.3',
1498 '71.0.3539.1',
1499 '71.0.3539.0',
1500 '69.0.3497.78',
1501 '68.0.3440.134',
1502 '69.0.3497.77',
1503 '70.0.3538.2',
1504 '70.0.3538.1',
1505 '70.0.3538.0',
1506 '69.0.3497.76',
1507 '68.0.3440.133',
1508 '69.0.3497.75',
1509 '70.0.3537.2',
1510 '70.0.3537.1',
1511 '70.0.3537.0',
1512 '69.0.3497.74',
1513 '68.0.3440.132',
1514 '70.0.3536.0',
1515 '70.0.3535.5',
1516 '70.0.3535.4',
1517 '70.0.3535.3',
1518 '69.0.3497.73',
1519 '68.0.3440.131',
1520 '70.0.3532.8',
1521 '70.0.3532.7',
1522 '69.0.3497.72',
1523 '69.0.3497.71',
1524 '70.0.3535.2',
1525 '70.0.3535.1',
1526 '70.0.3535.0',
1527 '69.0.3497.70',
1528 '68.0.3440.130',
1529 '69.0.3497.69',
1530 '68.0.3440.129',
1531 '70.0.3534.4',
1532 '70.0.3534.3',
1533 '70.0.3534.2',
1534 '70.0.3534.1',
1535 '70.0.3534.0',
1536 '69.0.3497.68',
1537 '68.0.3440.128',
1538 '70.0.3533.2',
1539 '70.0.3533.1',
1540 '70.0.3533.0',
1541 '69.0.3497.67',
1542 '68.0.3440.127',
1543 '70.0.3532.6',
1544 '70.0.3532.5',
1545 '70.0.3532.4',
1546 '69.0.3497.66',
1547 '68.0.3440.126',
1548 '70.0.3532.3',
1549 '70.0.3532.2',
1550 '70.0.3532.1',
1551 '69.0.3497.60',
1552 '69.0.3497.65',
1553 '69.0.3497.64',
1554 '70.0.3532.0',
1555 '70.0.3531.0',
1556 '70.0.3530.4',
1557 '70.0.3530.3',
1558 '70.0.3530.2',
1559 '69.0.3497.58',
1560 '68.0.3440.125',
1561 '69.0.3497.57',
1562 '69.0.3497.56',
1563 '69.0.3497.55',
1564 '69.0.3497.54',
1565 '70.0.3530.1',
1566 '70.0.3530.0',
1567 '69.0.3497.53',
1568 '68.0.3440.124',
1569 '69.0.3497.52',
1570 '70.0.3529.3',
1571 '70.0.3529.2',
1572 '70.0.3529.1',
1573 '70.0.3529.0',
1574 '69.0.3497.51',
1575 '70.0.3528.4',
1576 '68.0.3440.123',
1577 '70.0.3528.3',
1578 '70.0.3528.2',
1579 '70.0.3528.1',
1580 '70.0.3528.0',
1581 '69.0.3497.50',
1582 '68.0.3440.122',
1583 '70.0.3527.1',
1584 '70.0.3527.0',
1585 '69.0.3497.49',
1586 '68.0.3440.121',
1587 '70.0.3526.1',
1588 '70.0.3526.0',
1589 '68.0.3440.120',
1590 '69.0.3497.48',
1591 '69.0.3497.47',
1592 '68.0.3440.119',
1593 '68.0.3440.118',
1594 '70.0.3525.5',
1595 '70.0.3525.4',
1596 '70.0.3525.3',
1597 '68.0.3440.117',
1598 '69.0.3497.46',
1599 '70.0.3525.2',
1600 '70.0.3525.1',
1601 '70.0.3525.0',
1602 '69.0.3497.45',
1603 '68.0.3440.116',
1604 '70.0.3524.4',
1605 '70.0.3524.3',
1606 '69.0.3497.44',
1607 '70.0.3524.2',
1608 '70.0.3524.1',
1609 '70.0.3524.0',
1610 '70.0.3523.2',
1611 '69.0.3497.43',
1612 '68.0.3440.115',
1613 '70.0.3505.9',
1614 '69.0.3497.42',
1615 '70.0.3505.8',
1616 '70.0.3523.1',
1617 '70.0.3523.0',
1618 '69.0.3497.41',
1619 '68.0.3440.114',
1620 '70.0.3505.7',
1621 '69.0.3497.40',
1622 '70.0.3522.1',
1623 '70.0.3522.0',
1624 '70.0.3521.2',
1625 '69.0.3497.39',
1626 '68.0.3440.113',
1627 '70.0.3505.6',
1628 '70.0.3521.1',
1629 '70.0.3521.0',
1630 '69.0.3497.38',
1631 '68.0.3440.112',
1632 '70.0.3520.1',
1633 '70.0.3520.0',
1634 '69.0.3497.37',
1635 '68.0.3440.111',
1636 '70.0.3519.3',
1637 '70.0.3519.2',
1638 '70.0.3519.1',
1639 '70.0.3519.0',
1640 '69.0.3497.36',
1641 '68.0.3440.110',
1642 '70.0.3518.1',
1643 '70.0.3518.0',
1644 '69.0.3497.35',
1645 '69.0.3497.34',
1646 '68.0.3440.109',
1647 '70.0.3517.1',
1648 '70.0.3517.0',
1649 '69.0.3497.33',
1650 '68.0.3440.108',
1651 '69.0.3497.32',
1652 '70.0.3516.3',
1653 '70.0.3516.2',
1654 '70.0.3516.1',
1655 '70.0.3516.0',
1656 '69.0.3497.31',
1657 '68.0.3440.107',
1658 '70.0.3515.4',
1659 '68.0.3440.106',
1660 '70.0.3515.3',
1661 '70.0.3515.2',
1662 '70.0.3515.1',
1663 '70.0.3515.0',
1664 '69.0.3497.30',
1665 '68.0.3440.105',
1666 '68.0.3440.104',
1667 '70.0.3514.2',
1668 '70.0.3514.1',
1669 '70.0.3514.0',
1670 '69.0.3497.29',
1671 '68.0.3440.103',
1672 '70.0.3513.1',
1673 '70.0.3513.0',
1674 '69.0.3497.28',
1675 )
1676 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1677
1678
1679 std_headers = {
1680 'User-Agent': random_user_agent(),
1681 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1682 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1683 'Accept-Encoding': 'gzip, deflate',
1684 'Accept-Language': 'en-us,en;q=0.5',
1685 }
1686
1687
1688 USER_AGENTS = {
1689 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1690 }
1691
1692
1693 NO_DEFAULT = object()
1694
1695 ENGLISH_MONTH_NAMES = [
1696 'January', 'February', 'March', 'April', 'May', 'June',
1697 'July', 'August', 'September', 'October', 'November', 'December']
1698
1699 MONTH_NAMES = {
1700 'en': ENGLISH_MONTH_NAMES,
1701 'fr': [
1702 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1703 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1704 }
1705
1706 KNOWN_EXTENSIONS = (
1707 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1708 'flv', 'f4v', 'f4a', 'f4b',
1709 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1710 'mkv', 'mka', 'mk3d',
1711 'avi', 'divx',
1712 'mov',
1713 'asf', 'wmv', 'wma',
1714 '3gp', '3g2',
1715 'mp3',
1716 'flac',
1717 'ape',
1718 'wav',
1719 'f4f', 'f4m', 'm3u8', 'smil')
1720
1721 # needed for sanitizing filenames in restricted mode
1722 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1723 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1724 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1725
1726 DATE_FORMATS = (
1727 '%d %B %Y',
1728 '%d %b %Y',
1729 '%B %d %Y',
1730 '%B %dst %Y',
1731 '%B %dnd %Y',
1732 '%B %drd %Y',
1733 '%B %dth %Y',
1734 '%b %d %Y',
1735 '%b %dst %Y',
1736 '%b %dnd %Y',
1737 '%b %drd %Y',
1738 '%b %dth %Y',
1739 '%b %dst %Y %I:%M',
1740 '%b %dnd %Y %I:%M',
1741 '%b %drd %Y %I:%M',
1742 '%b %dth %Y %I:%M',
1743 '%Y %m %d',
1744 '%Y-%m-%d',
1745 '%Y.%m.%d.',
1746 '%Y/%m/%d',
1747 '%Y/%m/%d %H:%M',
1748 '%Y/%m/%d %H:%M:%S',
1749 '%Y%m%d%H%M',
1750 '%Y%m%d%H%M%S',
1751 '%Y-%m-%d %H:%M',
1752 '%Y-%m-%d %H:%M:%S',
1753 '%Y-%m-%d %H:%M:%S.%f',
1754 '%Y-%m-%d %H:%M:%S:%f',
1755 '%d.%m.%Y %H:%M',
1756 '%d.%m.%Y %H.%M',
1757 '%Y-%m-%dT%H:%M:%SZ',
1758 '%Y-%m-%dT%H:%M:%S.%fZ',
1759 '%Y-%m-%dT%H:%M:%S.%f0Z',
1760 '%Y-%m-%dT%H:%M:%S',
1761 '%Y-%m-%dT%H:%M:%S.%f',
1762 '%Y-%m-%dT%H:%M',
1763 '%b %d %Y at %H:%M',
1764 '%b %d %Y at %H:%M:%S',
1765 '%B %d %Y at %H:%M',
1766 '%B %d %Y at %H:%M:%S',
1767 '%H:%M %d-%b-%Y',
1768 )
1769
1770 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1771 DATE_FORMATS_DAY_FIRST.extend([
1772 '%d-%m-%Y',
1773 '%d.%m.%Y',
1774 '%d.%m.%y',
1775 '%d/%m/%Y',
1776 '%d/%m/%y',
1777 '%d/%m/%Y %H:%M:%S',
1778 ])
1779
1780 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1781 DATE_FORMATS_MONTH_FIRST.extend([
1782 '%m-%d-%Y',
1783 '%m.%d.%Y',
1784 '%m/%d/%Y',
1785 '%m/%d/%y',
1786 '%m/%d/%Y %H:%M:%S',
1787 ])
1788
1789 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1790 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1791
1792
1793 def preferredencoding():
1794 """Get preferred encoding.
1795
1796 Returns the best encoding scheme for the system, based on
1797 locale.getpreferredencoding() and some further tweaks.
1798 """
1799 try:
1800 pref = locale.getpreferredencoding()
1801 'TEST'.encode(pref)
1802 except Exception:
1803 pref = 'UTF-8'
1804
1805 return pref
1806
1807
1808 def write_json_file(obj, fn):
1809 """ Encode obj as JSON and write it to fn, atomically if possible """
1810
1811 fn = encodeFilename(fn)
1812 if sys.version_info < (3, 0) and sys.platform != 'win32':
1813 encoding = get_filesystem_encoding()
1814 # os.path.basename returns a bytes object, but NamedTemporaryFile
1815 # will fail if the filename contains non ascii characters unless we
1816 # use a unicode object
1817 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1818 # the same for os.path.dirname
1819 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1820 else:
1821 path_basename = os.path.basename
1822 path_dirname = os.path.dirname
1823
1824 args = {
1825 'suffix': '.tmp',
1826 'prefix': path_basename(fn) + '.',
1827 'dir': path_dirname(fn),
1828 'delete': False,
1829 }
1830
1831 # In Python 2.x, json.dump expects a bytestream.
1832 # In Python 3.x, it writes to a character stream
1833 if sys.version_info < (3, 0):
1834 args['mode'] = 'wb'
1835 else:
1836 args.update({
1837 'mode': 'w',
1838 'encoding': 'utf-8',
1839 })
1840
1841 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1842
1843 try:
1844 with tf:
1845 json.dump(obj, tf)
1846 if sys.platform == 'win32':
1847 # Need to remove existing file on Windows, else os.rename raises
1848 # WindowsError or FileExistsError.
1849 try:
1850 os.unlink(fn)
1851 except OSError:
1852 pass
1853 try:
1854 mask = os.umask(0)
1855 os.umask(mask)
1856 os.chmod(tf.name, 0o666 & ~mask)
1857 except OSError:
1858 pass
1859 os.rename(tf.name, fn)
1860 except Exception:
1861 try:
1862 os.remove(tf.name)
1863 except OSError:
1864 pass
1865 raise
1866
1867
1868 if sys.version_info >= (2, 7):
1869 def find_xpath_attr(node, xpath, key, val=None):
1870 """ Find the xpath xpath[@key=val] """
1871 assert re.match(r'^[a-zA-Z_-]+$', key)
1872 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1873 return node.find(expr)
1874 else:
1875 def find_xpath_attr(node, xpath, key, val=None):
1876 for f in node.findall(compat_xpath(xpath)):
1877 if key not in f.attrib:
1878 continue
1879 if val is None or f.attrib.get(key) == val:
1880 return f
1881 return None
1882
1883 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1884 # the namespace parameter
1885
1886
1887 def xpath_with_ns(path, ns_map):
1888 components = [c.split(':') for c in path.split('/')]
1889 replaced = []
1890 for c in components:
1891 if len(c) == 1:
1892 replaced.append(c[0])
1893 else:
1894 ns, tag = c
1895 replaced.append('{%s}%s' % (ns_map[ns], tag))
1896 return '/'.join(replaced)
1897
1898
1899 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1900 def _find_xpath(xpath):
1901 return node.find(compat_xpath(xpath))
1902
1903 if isinstance(xpath, (str, compat_str)):
1904 n = _find_xpath(xpath)
1905 else:
1906 for xp in xpath:
1907 n = _find_xpath(xp)
1908 if n is not None:
1909 break
1910
1911 if n is None:
1912 if default is not NO_DEFAULT:
1913 return default
1914 elif fatal:
1915 name = xpath if name is None else name
1916 raise ExtractorError('Could not find XML element %s' % name)
1917 else:
1918 return None
1919 return n
1920
1921
1922 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1923 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1924 if n is None or n == default:
1925 return n
1926 if n.text is None:
1927 if default is not NO_DEFAULT:
1928 return default
1929 elif fatal:
1930 name = xpath if name is None else name
1931 raise ExtractorError('Could not find XML element\'s text %s' % name)
1932 else:
1933 return None
1934 return n.text
1935
1936
1937 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1938 n = find_xpath_attr(node, xpath, key)
1939 if n is None:
1940 if default is not NO_DEFAULT:
1941 return default
1942 elif fatal:
1943 name = '%s[@%s]' % (xpath, key) if name is None else name
1944 raise ExtractorError('Could not find XML attribute %s' % name)
1945 else:
1946 return None
1947 return n.attrib[key]
1948
1949
1950 def get_element_by_id(id, html):
1951 """Return the content of the tag with the specified ID in the passed HTML document"""
1952 return get_element_by_attribute('id', id, html)
1953
1954
1955 def get_element_by_class(class_name, html):
1956 """Return the content of the first tag with the specified class in the passed HTML document"""
1957 retval = get_elements_by_class(class_name, html)
1958 return retval[0] if retval else None
1959
1960
1961 def get_element_by_attribute(attribute, value, html, escape_value=True):
1962 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1963 return retval[0] if retval else None
1964
1965
1966 def get_elements_by_class(class_name, html):
1967 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1968 return get_elements_by_attribute(
1969 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1970 html, escape_value=False)
1971
1972
1973 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1974 """Return the content of the tag with the specified attribute in the passed HTML document"""
1975
1976 value = re.escape(value) if escape_value else value
1977
1978 retlist = []
1979 for m in re.finditer(r'''(?xs)
1980 <([a-zA-Z0-9:._-]+)
1981 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1982 \s+%s=['"]?%s['"]?
1983 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1984 \s*>
1985 (?P<content>.*?)
1986 </\1>
1987 ''' % (re.escape(attribute), value), html):
1988 res = m.group('content')
1989
1990 if res.startswith('"') or res.startswith("'"):
1991 res = res[1:-1]
1992
1993 retlist.append(unescapeHTML(res))
1994
1995 return retlist
1996
1997
1998 class HTMLAttributeParser(compat_HTMLParser):
1999 """Trivial HTML parser to gather the attributes for a single element"""
2000
2001 def __init__(self):
2002 self.attrs = {}
2003 compat_HTMLParser.__init__(self)
2004
2005 def handle_starttag(self, tag, attrs):
2006 self.attrs = dict(attrs)
2007
2008
2009 def extract_attributes(html_element):
2010 """Given a string for an HTML element such as
2011 <el
2012 a="foo" B="bar" c="&98;az" d=boz
2013 empty= noval entity="&amp;"
2014 sq='"' dq="'"
2015 >
2016 Decode and return a dictionary of attributes.
2017 {
2018 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2019 'empty': '', 'noval': None, 'entity': '&',
2020 'sq': '"', 'dq': '\''
2021 }.
2022 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2023 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2024 """
2025 parser = HTMLAttributeParser()
2026 try:
2027 parser.feed(html_element)
2028 parser.close()
2029 # Older Python may throw HTMLParseError in case of malformed HTML
2030 except compat_HTMLParseError:
2031 pass
2032 return parser.attrs
2033
2034
2035 def clean_html(html):
2036 """Clean an HTML snippet into a readable string"""
2037
2038 if html is None: # Convenience for sanitizing descriptions etc.
2039 return html
2040
2041 # Newline vs <br />
2042 html = html.replace('\n', ' ')
2043 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2044 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2045 # Strip html tags
2046 html = re.sub('<.*?>', '', html)
2047 # Replace html entities
2048 html = unescapeHTML(html)
2049 return html.strip()
2050
2051
2052 def sanitize_open(filename, open_mode):
2053 """Try to open the given filename, and slightly tweak it if this fails.
2054
2055 Attempts to open the given filename. If this fails, it tries to change
2056 the filename slightly, step by step, until it's either able to open it
2057 or it fails and raises a final exception, like the standard open()
2058 function.
2059
2060 It returns the tuple (stream, definitive_file_name).
2061 """
2062 try:
2063 if filename == '-':
2064 if sys.platform == 'win32':
2065 import msvcrt
2066 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2067 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2068 stream = open(encodeFilename(filename), open_mode)
2069 return (stream, filename)
2070 except (IOError, OSError) as err:
2071 if err.errno in (errno.EACCES,):
2072 raise
2073
2074 # In case of error, try to remove win32 forbidden chars
2075 alt_filename = sanitize_path(filename)
2076 if alt_filename == filename:
2077 raise
2078 else:
2079 # An exception here should be caught in the caller
2080 stream = open(encodeFilename(alt_filename), open_mode)
2081 return (stream, alt_filename)
2082
2083
2084 def timeconvert(timestr):
2085 """Convert RFC 2822 defined time string into system timestamp"""
2086 timestamp = None
2087 timetuple = email.utils.parsedate_tz(timestr)
2088 if timetuple is not None:
2089 timestamp = email.utils.mktime_tz(timetuple)
2090 return timestamp
2091
2092
2093 def sanitize_filename(s, restricted=False, is_id=False):
2094 """Sanitizes a string so it could be used as part of a filename.
2095 If restricted is set, use a stricter subset of allowed characters.
2096 Set is_id if this is not an arbitrary string, but an ID that should be kept
2097 if possible.
2098 """
2099 def replace_insane(char):
2100 if restricted and char in ACCENT_CHARS:
2101 return ACCENT_CHARS[char]
2102 if char == '?' or ord(char) < 32 or ord(char) == 127:
2103 return ''
2104 elif char == '"':
2105 return '' if restricted else '\''
2106 elif char == ':':
2107 return '_-' if restricted else ' -'
2108 elif char in '\\/|*<>':
2109 return '_'
2110 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2111 return '_'
2112 if restricted and ord(char) > 127:
2113 return '_'
2114 return char
2115
2116 if s == '':
2117 return ''
2118 # Handle timestamps
2119 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2120 result = ''.join(map(replace_insane, s))
2121 if not is_id:
2122 while '__' in result:
2123 result = result.replace('__', '_')
2124 result = result.strip('_')
2125 # Common case of "Foreign band name - English song title"
2126 if restricted and result.startswith('-_'):
2127 result = result[2:]
2128 if result.startswith('-'):
2129 result = '_' + result[len('-'):]
2130 result = result.lstrip('.')
2131 if not result:
2132 result = '_'
2133 return result
2134
2135
2136 def sanitize_path(s, force=False):
2137 """Sanitizes and normalizes path on Windows"""
2138 if sys.platform == 'win32':
2139 force = False
2140 drive_or_unc, _ = os.path.splitdrive(s)
2141 if sys.version_info < (2, 7) and not drive_or_unc:
2142 drive_or_unc, _ = os.path.splitunc(s)
2143 elif force:
2144 drive_or_unc = ''
2145 else:
2146 return s
2147
2148 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2149 if drive_or_unc:
2150 norm_path.pop(0)
2151 sanitized_path = [
2152 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2153 for path_part in norm_path]
2154 if drive_or_unc:
2155 sanitized_path.insert(0, drive_or_unc + os.path.sep)
2156 elif force and s[0] == os.path.sep:
2157 sanitized_path.insert(0, os.path.sep)
2158 return os.path.join(*sanitized_path)
2159
2160
2161 def sanitize_url(url):
2162 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2163 # the number of unwanted failures due to missing protocol
2164 if url.startswith('//'):
2165 return 'http:%s' % url
2166 # Fix some common typos seen so far
2167 COMMON_TYPOS = (
2168 # https://github.com/ytdl-org/youtube-dl/issues/15649
2169 (r'^httpss://', r'https://'),
2170 # https://bx1.be/lives/direct-tv/
2171 (r'^rmtp([es]?)://', r'rtmp\1://'),
2172 )
2173 for mistake, fixup in COMMON_TYPOS:
2174 if re.match(mistake, url):
2175 return re.sub(mistake, fixup, url)
2176 return url
2177
2178
2179 def extract_basic_auth(url):
2180 parts = compat_urlparse.urlsplit(url)
2181 if parts.username is None:
2182 return url, None
2183 url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2184 parts.hostname if parts.port is None
2185 else '%s:%d' % (parts.hostname, parts.port))))
2186 auth_payload = base64.b64encode(
2187 ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2188 return url, 'Basic ' + auth_payload.decode('utf-8')
2189
2190
2191 def sanitized_Request(url, *args, **kwargs):
2192 url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
2193 if auth_header is not None:
2194 headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2195 headers['Authorization'] = auth_header
2196 return compat_urllib_request.Request(url, *args, **kwargs)
2197
2198
2199 def expand_path(s):
2200 """Expand shell variables and ~"""
2201 return os.path.expandvars(compat_expanduser(s))
2202
2203
2204 def orderedSet(iterable):
2205 """ Remove all duplicates from the input iterable """
2206 res = []
2207 for el in iterable:
2208 if el not in res:
2209 res.append(el)
2210 return res
2211
2212
2213 def _htmlentity_transform(entity_with_semicolon):
2214 """Transforms an HTML entity to a character."""
2215 entity = entity_with_semicolon[:-1]
2216
2217 # Known non-numeric HTML entity
2218 if entity in compat_html_entities.name2codepoint:
2219 return compat_chr(compat_html_entities.name2codepoint[entity])
2220
2221 # TODO: HTML5 allows entities without a semicolon. For example,
2222 # '&Eacuteric' should be decoded as 'Éric'.
2223 if entity_with_semicolon in compat_html_entities_html5:
2224 return compat_html_entities_html5[entity_with_semicolon]
2225
2226 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2227 if mobj is not None:
2228 numstr = mobj.group(1)
2229 if numstr.startswith('x'):
2230 base = 16
2231 numstr = '0%s' % numstr
2232 else:
2233 base = 10
2234 # See https://github.com/ytdl-org/youtube-dl/issues/7518
2235 try:
2236 return compat_chr(int(numstr, base))
2237 except ValueError:
2238 pass
2239
2240 # Unknown entity in name, return its literal representation
2241 return '&%s;' % entity
2242
2243
2244 def unescapeHTML(s):
2245 if s is None:
2246 return None
2247 assert type(s) == compat_str
2248
2249 return re.sub(
2250 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2251
2252
2253 def escapeHTML(text):
2254 return (
2255 text
2256 .replace('&', '&amp;')
2257 .replace('<', '&lt;')
2258 .replace('>', '&gt;')
2259 .replace('"', '&quot;')
2260 .replace("'", '&#39;')
2261 )
2262
2263
2264 def process_communicate_or_kill(p, *args, **kwargs):
2265 try:
2266 return p.communicate(*args, **kwargs)
2267 except BaseException: # Including KeyboardInterrupt
2268 p.kill()
2269 p.wait()
2270 raise
2271
2272
2273 def get_subprocess_encoding():
2274 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2275 # For subprocess calls, encode with locale encoding
2276 # Refer to http://stackoverflow.com/a/9951851/35070
2277 encoding = preferredencoding()
2278 else:
2279 encoding = sys.getfilesystemencoding()
2280 if encoding is None:
2281 encoding = 'utf-8'
2282 return encoding
2283
2284
2285 def encodeFilename(s, for_subprocess=False):
2286 """
2287 @param s The name of the file
2288 """
2289
2290 assert type(s) == compat_str
2291
2292 # Python 3 has a Unicode API
2293 if sys.version_info >= (3, 0):
2294 return s
2295
2296 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2297 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2298 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2299 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2300 return s
2301
2302 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2303 if sys.platform.startswith('java'):
2304 return s
2305
2306 return s.encode(get_subprocess_encoding(), 'ignore')
2307
2308
2309 def decodeFilename(b, for_subprocess=False):
2310
2311 if sys.version_info >= (3, 0):
2312 return b
2313
2314 if not isinstance(b, bytes):
2315 return b
2316
2317 return b.decode(get_subprocess_encoding(), 'ignore')
2318
2319
2320 def encodeArgument(s):
2321 if not isinstance(s, compat_str):
2322 # Legacy code that uses byte strings
2323 # Uncomment the following line after fixing all post processors
2324 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2325 s = s.decode('ascii')
2326 return encodeFilename(s, True)
2327
2328
2329 def decodeArgument(b):
2330 return decodeFilename(b, True)
2331
2332
2333 def decodeOption(optval):
2334 if optval is None:
2335 return optval
2336 if isinstance(optval, bytes):
2337 optval = optval.decode(preferredencoding())
2338
2339 assert isinstance(optval, compat_str)
2340 return optval
2341
2342
2343 def formatSeconds(secs, delim=':', msec=False):
2344 if secs > 3600:
2345 ret = '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
2346 elif secs > 60:
2347 ret = '%d%s%02d' % (secs // 60, delim, secs % 60)
2348 else:
2349 ret = '%d' % secs
2350 return '%s.%03d' % (ret, secs % 1) if msec else ret
2351
2352
2353 def make_HTTPS_handler(params, **kwargs):
2354 opts_no_check_certificate = params.get('nocheckcertificate', False)
2355 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
2356 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2357 if opts_no_check_certificate:
2358 context.check_hostname = False
2359 context.verify_mode = ssl.CERT_NONE
2360 try:
2361 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2362 except TypeError:
2363 # Python 2.7.8
2364 # (create_default_context present but HTTPSHandler has no context=)
2365 pass
2366
2367 if sys.version_info < (3, 2):
2368 return YoutubeDLHTTPSHandler(params, **kwargs)
2369 else: # Python < 3.4
2370 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2371 context.verify_mode = (ssl.CERT_NONE
2372 if opts_no_check_certificate
2373 else ssl.CERT_REQUIRED)
2374 context.set_default_verify_paths()
2375 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2376
2377
2378 def bug_reports_message(before=';'):
2379 if ytdl_is_updateable():
2380 update_cmd = 'type yt-dlp -U to update'
2381 else:
2382 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
2383 msg = 'please report this issue on https://github.com/yt-dlp/yt-dlp .'
2384 msg += ' Make sure you are using the latest version; %s.' % update_cmd
2385 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
2386
2387 before = before.rstrip()
2388 if not before or before.endswith(('.', '!', '?')):
2389 msg = msg[0].title() + msg[1:]
2390
2391 return (before + ' ' if before else '') + msg
2392
2393
2394 class YoutubeDLError(Exception):
2395 """Base exception for YoutubeDL errors."""
2396 pass
2397
2398
2399 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2400 if hasattr(ssl, 'CertificateError'):
2401 network_exceptions.append(ssl.CertificateError)
2402 network_exceptions = tuple(network_exceptions)
2403
2404
2405 class ExtractorError(YoutubeDLError):
2406 """Error during info extraction."""
2407
2408 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
2409 """ tb, if given, is the original traceback (so that it can be printed out).
2410 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
2411 """
2412 if sys.exc_info()[0] in network_exceptions:
2413 expected = True
2414
2415 self.msg = str(msg)
2416 self.traceback = tb
2417 self.expected = expected
2418 self.cause = cause
2419 self.video_id = video_id
2420 self.ie = ie
2421 self.exc_info = sys.exc_info() # preserve original exception
2422
2423 super(ExtractorError, self).__init__(''.join((
2424 format_field(ie, template='[%s] '),
2425 format_field(video_id, template='%s: '),
2426 self.msg,
2427 format_field(cause, template=' (caused by %r)'),
2428 '' if expected else bug_reports_message())))
2429
2430 def format_traceback(self):
2431 if self.traceback is None:
2432 return None
2433 return ''.join(traceback.format_tb(self.traceback))
2434
2435
2436 class UnsupportedError(ExtractorError):
2437 def __init__(self, url):
2438 super(UnsupportedError, self).__init__(
2439 'Unsupported URL: %s' % url, expected=True)
2440 self.url = url
2441
2442
2443 class RegexNotFoundError(ExtractorError):
2444 """Error when a regex didn't match"""
2445 pass
2446
2447
2448 class GeoRestrictedError(ExtractorError):
2449 """Geographic restriction Error exception.
2450
2451 This exception may be thrown when a video is not available from your
2452 geographic location due to geographic restrictions imposed by a website.
2453 """
2454
2455 def __init__(self, msg, countries=None):
2456 super(GeoRestrictedError, self).__init__(msg, expected=True)
2457 self.msg = msg
2458 self.countries = countries
2459
2460
2461 class DownloadError(YoutubeDLError):
2462 """Download Error exception.
2463
2464 This exception may be thrown by FileDownloader objects if they are not
2465 configured to continue on errors. They will contain the appropriate
2466 error message.
2467 """
2468
2469 def __init__(self, msg, exc_info=None):
2470 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2471 super(DownloadError, self).__init__(msg)
2472 self.exc_info = exc_info
2473
2474
2475 class EntryNotInPlaylist(YoutubeDLError):
2476 """Entry not in playlist exception.
2477
2478 This exception will be thrown by YoutubeDL when a requested entry
2479 is not found in the playlist info_dict
2480 """
2481 pass
2482
2483
2484 class SameFileError(YoutubeDLError):
2485 """Same File exception.
2486
2487 This exception will be thrown by FileDownloader objects if they detect
2488 multiple files would have to be downloaded to the same file on disk.
2489 """
2490 pass
2491
2492
2493 class PostProcessingError(YoutubeDLError):
2494 """Post Processing exception.
2495
2496 This exception may be raised by PostProcessor's .run() method to
2497 indicate an error in the postprocessing task.
2498 """
2499
2500 def __init__(self, msg):
2501 super(PostProcessingError, self).__init__(msg)
2502 self.msg = msg
2503
2504
2505 class ExistingVideoReached(YoutubeDLError):
2506 """ --max-downloads limit has been reached. """
2507 pass
2508
2509
2510 class RejectedVideoReached(YoutubeDLError):
2511 """ --max-downloads limit has been reached. """
2512 pass
2513
2514
2515 class ThrottledDownload(YoutubeDLError):
2516 """ Download speed below --throttled-rate. """
2517 pass
2518
2519
2520 class MaxDownloadsReached(YoutubeDLError):
2521 """ --max-downloads limit has been reached. """
2522 pass
2523
2524
2525 class UnavailableVideoError(YoutubeDLError):
2526 """Unavailable Format exception.
2527
2528 This exception will be thrown when a video is requested
2529 in a format that is not available for that video.
2530 """
2531 pass
2532
2533
2534 class ContentTooShortError(YoutubeDLError):
2535 """Content Too Short exception.
2536
2537 This exception may be raised by FileDownloader objects when a file they
2538 download is too small for what the server announced first, indicating
2539 the connection was probably interrupted.
2540 """
2541
2542 def __init__(self, downloaded, expected):
2543 super(ContentTooShortError, self).__init__(
2544 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2545 )
2546 # Both in bytes
2547 self.downloaded = downloaded
2548 self.expected = expected
2549
2550
2551 class XAttrMetadataError(YoutubeDLError):
2552 def __init__(self, code=None, msg='Unknown error'):
2553 super(XAttrMetadataError, self).__init__(msg)
2554 self.code = code
2555 self.msg = msg
2556
2557 # Parsing code and msg
2558 if (self.code in (errno.ENOSPC, errno.EDQUOT)
2559 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2560 self.reason = 'NO_SPACE'
2561 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2562 self.reason = 'VALUE_TOO_LONG'
2563 else:
2564 self.reason = 'NOT_SUPPORTED'
2565
2566
2567 class XAttrUnavailableError(YoutubeDLError):
2568 pass
2569
2570
2571 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2572 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2573 # expected HTTP responses to meet HTTP/1.0 or later (see also
2574 # https://github.com/ytdl-org/youtube-dl/issues/6727)
2575 if sys.version_info < (3, 0):
2576 kwargs['strict'] = True
2577 hc = http_class(*args, **compat_kwargs(kwargs))
2578 source_address = ydl_handler._params.get('source_address')
2579
2580 if source_address is not None:
2581 # This is to workaround _create_connection() from socket where it will try all
2582 # address data from getaddrinfo() including IPv6. This filters the result from
2583 # getaddrinfo() based on the source_address value.
2584 # This is based on the cpython socket.create_connection() function.
2585 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2586 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2587 host, port = address
2588 err = None
2589 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2590 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2591 ip_addrs = [addr for addr in addrs if addr[0] == af]
2592 if addrs and not ip_addrs:
2593 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2594 raise socket.error(
2595 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2596 % (ip_version, source_address[0]))
2597 for res in ip_addrs:
2598 af, socktype, proto, canonname, sa = res
2599 sock = None
2600 try:
2601 sock = socket.socket(af, socktype, proto)
2602 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2603 sock.settimeout(timeout)
2604 sock.bind(source_address)
2605 sock.connect(sa)
2606 err = None # Explicitly break reference cycle
2607 return sock
2608 except socket.error as _:
2609 err = _
2610 if sock is not None:
2611 sock.close()
2612 if err is not None:
2613 raise err
2614 else:
2615 raise socket.error('getaddrinfo returns an empty list')
2616 if hasattr(hc, '_create_connection'):
2617 hc._create_connection = _create_connection
2618 sa = (source_address, 0)
2619 if hasattr(hc, 'source_address'): # Python 2.7+
2620 hc.source_address = sa
2621 else: # Python 2.6
2622 def _hc_connect(self, *args, **kwargs):
2623 sock = _create_connection(
2624 (self.host, self.port), self.timeout, sa)
2625 if is_https:
2626 self.sock = ssl.wrap_socket(
2627 sock, self.key_file, self.cert_file,
2628 ssl_version=ssl.PROTOCOL_TLSv1)
2629 else:
2630 self.sock = sock
2631 hc.connect = functools.partial(_hc_connect, hc)
2632
2633 return hc
2634
2635
2636 def handle_youtubedl_headers(headers):
2637 filtered_headers = headers
2638
2639 if 'Youtubedl-no-compression' in filtered_headers:
2640 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2641 del filtered_headers['Youtubedl-no-compression']
2642
2643 return filtered_headers
2644
2645
2646 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2647 """Handler for HTTP requests and responses.
2648
2649 This class, when installed with an OpenerDirector, automatically adds
2650 the standard headers to every HTTP request and handles gzipped and
2651 deflated responses from web servers. If compression is to be avoided in
2652 a particular request, the original request in the program code only has
2653 to include the HTTP header "Youtubedl-no-compression", which will be
2654 removed before making the real request.
2655
2656 Part of this code was copied from:
2657
2658 http://techknack.net/python-urllib2-handlers/
2659
2660 Andrew Rowls, the author of that code, agreed to release it to the
2661 public domain.
2662 """
2663
2664 def __init__(self, params, *args, **kwargs):
2665 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2666 self._params = params
2667
2668 def http_open(self, req):
2669 conn_class = compat_http_client.HTTPConnection
2670
2671 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2672 if socks_proxy:
2673 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2674 del req.headers['Ytdl-socks-proxy']
2675
2676 return self.do_open(functools.partial(
2677 _create_http_connection, self, conn_class, False),
2678 req)
2679
2680 @staticmethod
2681 def deflate(data):
2682 if not data:
2683 return data
2684 try:
2685 return zlib.decompress(data, -zlib.MAX_WBITS)
2686 except zlib.error:
2687 return zlib.decompress(data)
2688
2689 def http_request(self, req):
2690 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2691 # always respected by websites, some tend to give out URLs with non percent-encoded
2692 # non-ASCII characters (see telemb.py, ard.py [#3412])
2693 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2694 # To work around aforementioned issue we will replace request's original URL with
2695 # percent-encoded one
2696 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2697 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2698 url = req.get_full_url()
2699 url_escaped = escape_url(url)
2700
2701 # Substitute URL if any change after escaping
2702 if url != url_escaped:
2703 req = update_Request(req, url=url_escaped)
2704
2705 for h, v in std_headers.items():
2706 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2707 # The dict keys are capitalized because of this bug by urllib
2708 if h.capitalize() not in req.headers:
2709 req.add_header(h, v)
2710
2711 req.headers = handle_youtubedl_headers(req.headers)
2712
2713 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2714 # Python 2.6 is brain-dead when it comes to fragments
2715 req._Request__original = req._Request__original.partition('#')[0]
2716 req._Request__r_type = req._Request__r_type.partition('#')[0]
2717
2718 return req
2719
2720 def http_response(self, req, resp):
2721 old_resp = resp
2722 # gzip
2723 if resp.headers.get('Content-encoding', '') == 'gzip':
2724 content = resp.read()
2725 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2726 try:
2727 uncompressed = io.BytesIO(gz.read())
2728 except IOError as original_ioerror:
2729 # There may be junk add the end of the file
2730 # See http://stackoverflow.com/q/4928560/35070 for details
2731 for i in range(1, 1024):
2732 try:
2733 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2734 uncompressed = io.BytesIO(gz.read())
2735 except IOError:
2736 continue
2737 break
2738 else:
2739 raise original_ioerror
2740 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2741 resp.msg = old_resp.msg
2742 del resp.headers['Content-encoding']
2743 # deflate
2744 if resp.headers.get('Content-encoding', '') == 'deflate':
2745 gz = io.BytesIO(self.deflate(resp.read()))
2746 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2747 resp.msg = old_resp.msg
2748 del resp.headers['Content-encoding']
2749 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2750 # https://github.com/ytdl-org/youtube-dl/issues/6457).
2751 if 300 <= resp.code < 400:
2752 location = resp.headers.get('Location')
2753 if location:
2754 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2755 if sys.version_info >= (3, 0):
2756 location = location.encode('iso-8859-1').decode('utf-8')
2757 else:
2758 location = location.decode('utf-8')
2759 location_escaped = escape_url(location)
2760 if location != location_escaped:
2761 del resp.headers['Location']
2762 if sys.version_info < (3, 0):
2763 location_escaped = location_escaped.encode('utf-8')
2764 resp.headers['Location'] = location_escaped
2765 return resp
2766
2767 https_request = http_request
2768 https_response = http_response
2769
2770
2771 def make_socks_conn_class(base_class, socks_proxy):
2772 assert issubclass(base_class, (
2773 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2774
2775 url_components = compat_urlparse.urlparse(socks_proxy)
2776 if url_components.scheme.lower() == 'socks5':
2777 socks_type = ProxyType.SOCKS5
2778 elif url_components.scheme.lower() in ('socks', 'socks4'):
2779 socks_type = ProxyType.SOCKS4
2780 elif url_components.scheme.lower() == 'socks4a':
2781 socks_type = ProxyType.SOCKS4A
2782
2783 def unquote_if_non_empty(s):
2784 if not s:
2785 return s
2786 return compat_urllib_parse_unquote_plus(s)
2787
2788 proxy_args = (
2789 socks_type,
2790 url_components.hostname, url_components.port or 1080,
2791 True, # Remote DNS
2792 unquote_if_non_empty(url_components.username),
2793 unquote_if_non_empty(url_components.password),
2794 )
2795
2796 class SocksConnection(base_class):
2797 def connect(self):
2798 self.sock = sockssocket()
2799 self.sock.setproxy(*proxy_args)
2800 if type(self.timeout) in (int, float):
2801 self.sock.settimeout(self.timeout)
2802 self.sock.connect((self.host, self.port))
2803
2804 if isinstance(self, compat_http_client.HTTPSConnection):
2805 if hasattr(self, '_context'): # Python > 2.6
2806 self.sock = self._context.wrap_socket(
2807 self.sock, server_hostname=self.host)
2808 else:
2809 self.sock = ssl.wrap_socket(self.sock)
2810
2811 return SocksConnection
2812
2813
2814 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2815 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2816 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2817 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2818 self._params = params
2819
2820 def https_open(self, req):
2821 kwargs = {}
2822 conn_class = self._https_conn_class
2823
2824 if hasattr(self, '_context'): # python > 2.6
2825 kwargs['context'] = self._context
2826 if hasattr(self, '_check_hostname'): # python 3.x
2827 kwargs['check_hostname'] = self._check_hostname
2828
2829 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2830 if socks_proxy:
2831 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2832 del req.headers['Ytdl-socks-proxy']
2833
2834 return self.do_open(functools.partial(
2835 _create_http_connection, self, conn_class, True),
2836 req, **kwargs)
2837
2838
2839 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2840 """
2841 See [1] for cookie file format.
2842
2843 1. https://curl.haxx.se/docs/http-cookies.html
2844 """
2845 _HTTPONLY_PREFIX = '#HttpOnly_'
2846 _ENTRY_LEN = 7
2847 _HEADER = '''# Netscape HTTP Cookie File
2848 # This file is generated by yt-dlp. Do not edit.
2849
2850 '''
2851 _CookieFileEntry = collections.namedtuple(
2852 'CookieFileEntry',
2853 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2854
2855 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2856 """
2857 Save cookies to a file.
2858
2859 Most of the code is taken from CPython 3.8 and slightly adapted
2860 to support cookie files with UTF-8 in both python 2 and 3.
2861 """
2862 if filename is None:
2863 if self.filename is not None:
2864 filename = self.filename
2865 else:
2866 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2867
2868 # Store session cookies with `expires` set to 0 instead of an empty
2869 # string
2870 for cookie in self:
2871 if cookie.expires is None:
2872 cookie.expires = 0
2873
2874 with io.open(filename, 'w', encoding='utf-8') as f:
2875 f.write(self._HEADER)
2876 now = time.time()
2877 for cookie in self:
2878 if not ignore_discard and cookie.discard:
2879 continue
2880 if not ignore_expires and cookie.is_expired(now):
2881 continue
2882 if cookie.secure:
2883 secure = 'TRUE'
2884 else:
2885 secure = 'FALSE'
2886 if cookie.domain.startswith('.'):
2887 initial_dot = 'TRUE'
2888 else:
2889 initial_dot = 'FALSE'
2890 if cookie.expires is not None:
2891 expires = compat_str(cookie.expires)
2892 else:
2893 expires = ''
2894 if cookie.value is None:
2895 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2896 # with no name, whereas http.cookiejar regards it as a
2897 # cookie with no value.
2898 name = ''
2899 value = cookie.name
2900 else:
2901 name = cookie.name
2902 value = cookie.value
2903 f.write(
2904 '\t'.join([cookie.domain, initial_dot, cookie.path,
2905 secure, expires, name, value]) + '\n')
2906
2907 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2908 """Load cookies from a file."""
2909 if filename is None:
2910 if self.filename is not None:
2911 filename = self.filename
2912 else:
2913 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2914
2915 def prepare_line(line):
2916 if line.startswith(self._HTTPONLY_PREFIX):
2917 line = line[len(self._HTTPONLY_PREFIX):]
2918 # comments and empty lines are fine
2919 if line.startswith('#') or not line.strip():
2920 return line
2921 cookie_list = line.split('\t')
2922 if len(cookie_list) != self._ENTRY_LEN:
2923 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2924 cookie = self._CookieFileEntry(*cookie_list)
2925 if cookie.expires_at and not cookie.expires_at.isdigit():
2926 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2927 return line
2928
2929 cf = io.StringIO()
2930 with io.open(filename, encoding='utf-8') as f:
2931 for line in f:
2932 try:
2933 cf.write(prepare_line(line))
2934 except compat_cookiejar.LoadError as e:
2935 write_string(
2936 'WARNING: skipping cookie file entry due to %s: %r\n'
2937 % (e, line), sys.stderr)
2938 continue
2939 cf.seek(0)
2940 self._really_load(cf, filename, ignore_discard, ignore_expires)
2941 # Session cookies are denoted by either `expires` field set to
2942 # an empty string or 0. MozillaCookieJar only recognizes the former
2943 # (see [1]). So we need force the latter to be recognized as session
2944 # cookies on our own.
2945 # Session cookies may be important for cookies-based authentication,
2946 # e.g. usually, when user does not check 'Remember me' check box while
2947 # logging in on a site, some important cookies are stored as session
2948 # cookies so that not recognizing them will result in failed login.
2949 # 1. https://bugs.python.org/issue17164
2950 for cookie in self:
2951 # Treat `expires=0` cookies as session cookies
2952 if cookie.expires == 0:
2953 cookie.expires = None
2954 cookie.discard = True
2955
2956
2957 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2958 def __init__(self, cookiejar=None):
2959 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2960
2961 def http_response(self, request, response):
2962 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2963 # characters in Set-Cookie HTTP header of last response (see
2964 # https://github.com/ytdl-org/youtube-dl/issues/6769).
2965 # In order to at least prevent crashing we will percent encode Set-Cookie
2966 # header before HTTPCookieProcessor starts processing it.
2967 # if sys.version_info < (3, 0) and response.headers:
2968 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2969 # set_cookie = response.headers.get(set_cookie_header)
2970 # if set_cookie:
2971 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2972 # if set_cookie != set_cookie_escaped:
2973 # del response.headers[set_cookie_header]
2974 # response.headers[set_cookie_header] = set_cookie_escaped
2975 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2976
2977 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2978 https_response = http_response
2979
2980
2981 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2982 """YoutubeDL redirect handler
2983
2984 The code is based on HTTPRedirectHandler implementation from CPython [1].
2985
2986 This redirect handler solves two issues:
2987 - ensures redirect URL is always unicode under python 2
2988 - introduces support for experimental HTTP response status code
2989 308 Permanent Redirect [2] used by some sites [3]
2990
2991 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
2992 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
2993 3. https://github.com/ytdl-org/youtube-dl/issues/28768
2994 """
2995
2996 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
2997
2998 def redirect_request(self, req, fp, code, msg, headers, newurl):
2999 """Return a Request or None in response to a redirect.
3000
3001 This is called by the http_error_30x methods when a
3002 redirection response is received. If a redirection should
3003 take place, return a new Request to allow http_error_30x to
3004 perform the redirect. Otherwise, raise HTTPError if no-one
3005 else should try to handle this url. Return None if you can't
3006 but another Handler might.
3007 """
3008 m = req.get_method()
3009 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
3010 or code in (301, 302, 303) and m == "POST")):
3011 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
3012 # Strictly (according to RFC 2616), 301 or 302 in response to
3013 # a POST MUST NOT cause a redirection without confirmation
3014 # from the user (of urllib.request, in this case). In practice,
3015 # essentially all clients do redirect in this case, so we do
3016 # the same.
3017
3018 # On python 2 urlh.geturl() may sometimes return redirect URL
3019 # as byte string instead of unicode. This workaround allows
3020 # to force it always return unicode.
3021 if sys.version_info[0] < 3:
3022 newurl = compat_str(newurl)
3023
3024 # Be conciliant with URIs containing a space. This is mainly
3025 # redundant with the more complete encoding done in http_error_302(),
3026 # but it is kept for compatibility with other callers.
3027 newurl = newurl.replace(' ', '%20')
3028
3029 CONTENT_HEADERS = ("content-length", "content-type")
3030 # NB: don't use dict comprehension for python 2.6 compatibility
3031 newheaders = dict((k, v) for k, v in req.headers.items()
3032 if k.lower() not in CONTENT_HEADERS)
3033 return compat_urllib_request.Request(
3034 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3035 unverifiable=True)
3036
3037
3038 def extract_timezone(date_str):
3039 m = re.search(
3040 r'''(?x)
3041 ^.{8,}? # >=8 char non-TZ prefix, if present
3042 (?P<tz>Z| # just the UTC Z, or
3043 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
3044 (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
3045 [ ]? # optional space
3046 (?P<sign>\+|-) # +/-
3047 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
3048 $)
3049 ''', date_str)
3050 if not m:
3051 timezone = datetime.timedelta()
3052 else:
3053 date_str = date_str[:-len(m.group('tz'))]
3054 if not m.group('sign'):
3055 timezone = datetime.timedelta()
3056 else:
3057 sign = 1 if m.group('sign') == '+' else -1
3058 timezone = datetime.timedelta(
3059 hours=sign * int(m.group('hours')),
3060 minutes=sign * int(m.group('minutes')))
3061 return timezone, date_str
3062
3063
3064 def parse_iso8601(date_str, delimiter='T', timezone=None):
3065 """ Return a UNIX timestamp from the given date """
3066
3067 if date_str is None:
3068 return None
3069
3070 date_str = re.sub(r'\.[0-9]+', '', date_str)
3071
3072 if timezone is None:
3073 timezone, date_str = extract_timezone(date_str)
3074
3075 try:
3076 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3077 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3078 return calendar.timegm(dt.timetuple())
3079 except ValueError:
3080 pass
3081
3082
3083 def date_formats(day_first=True):
3084 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3085
3086
3087 def unified_strdate(date_str, day_first=True):
3088 """Return a string with the date in the format YYYYMMDD"""
3089
3090 if date_str is None:
3091 return None
3092 upload_date = None
3093 # Replace commas
3094 date_str = date_str.replace(',', ' ')
3095 # Remove AM/PM + timezone
3096 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3097 _, date_str = extract_timezone(date_str)
3098
3099 for expression in date_formats(day_first):
3100 try:
3101 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
3102 except ValueError:
3103 pass
3104 if upload_date is None:
3105 timetuple = email.utils.parsedate_tz(date_str)
3106 if timetuple:
3107 try:
3108 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3109 except ValueError:
3110 pass
3111 if upload_date is not None:
3112 return compat_str(upload_date)
3113
3114
3115 def unified_timestamp(date_str, day_first=True):
3116 if date_str is None:
3117 return None
3118
3119 date_str = re.sub(r'[,|]', '', date_str)
3120
3121 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
3122 timezone, date_str = extract_timezone(date_str)
3123
3124 # Remove AM/PM + timezone
3125 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3126
3127 # Remove unrecognized timezones from ISO 8601 alike timestamps
3128 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3129 if m:
3130 date_str = date_str[:-len(m.group('tz'))]
3131
3132 # Python only supports microseconds, so remove nanoseconds
3133 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3134 if m:
3135 date_str = m.group(1)
3136
3137 for expression in date_formats(day_first):
3138 try:
3139 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3140 return calendar.timegm(dt.timetuple())
3141 except ValueError:
3142 pass
3143 timetuple = email.utils.parsedate_tz(date_str)
3144 if timetuple:
3145 return calendar.timegm(timetuple) + pm_delta * 3600
3146
3147
3148 def determine_ext(url, default_ext='unknown_video'):
3149 if url is None or '.' not in url:
3150 return default_ext
3151 guess = url.partition('?')[0].rpartition('.')[2]
3152 if re.match(r'^[A-Za-z0-9]+$', guess):
3153 return guess
3154 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3155 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3156 return guess.rstrip('/')
3157 else:
3158 return default_ext
3159
3160
3161 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3162 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3163
3164
3165 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
3166 """
3167 Return a datetime object from a string in the format YYYYMMDD or
3168 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3169
3170 format: string date format used to return datetime object from
3171 precision: round the time portion of a datetime object.
3172 auto|microsecond|second|minute|hour|day.
3173 auto: round to the unit provided in date_str (if applicable).
3174 """
3175 auto_precision = False
3176 if precision == 'auto':
3177 auto_precision = True
3178 precision = 'microsecond'
3179 today = datetime_round(datetime.datetime.now(), precision)
3180 if date_str in ('now', 'today'):
3181 return today
3182 if date_str == 'yesterday':
3183 return today - datetime.timedelta(days=1)
3184 match = re.match(
3185 r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3186 date_str)
3187 if match is not None:
3188 start_time = datetime_from_str(match.group('start'), precision, format)
3189 time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
3190 unit = match.group('unit')
3191 if unit == 'month' or unit == 'year':
3192 new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
3193 unit = 'day'
3194 else:
3195 if unit == 'week':
3196 unit = 'day'
3197 time *= 7
3198 delta = datetime.timedelta(**{unit + 's': time})
3199 new_date = start_time + delta
3200 if auto_precision:
3201 return datetime_round(new_date, unit)
3202 return new_date
3203
3204 return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3205
3206
3207 def date_from_str(date_str, format='%Y%m%d'):
3208 """
3209 Return a datetime object from a string in the format YYYYMMDD or
3210 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3211
3212 format: string date format used to return datetime object from
3213 """
3214 return datetime_from_str(date_str, precision='microsecond', format=format).date()
3215
3216
3217 def datetime_add_months(dt, months):
3218 """Increment/Decrement a datetime object by months."""
3219 month = dt.month + months - 1
3220 year = dt.year + month // 12
3221 month = month % 12 + 1
3222 day = min(dt.day, calendar.monthrange(year, month)[1])
3223 return dt.replace(year, month, day)
3224
3225
3226 def datetime_round(dt, precision='day'):
3227 """
3228 Round a datetime object's time to a specific precision
3229 """
3230 if precision == 'microsecond':
3231 return dt
3232
3233 unit_seconds = {
3234 'day': 86400,
3235 'hour': 3600,
3236 'minute': 60,
3237 'second': 1,
3238 }
3239 roundto = lambda x, n: ((x + n / 2) // n) * n
3240 timestamp = calendar.timegm(dt.timetuple())
3241 return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
3242
3243
3244 def hyphenate_date(date_str):
3245 """
3246 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3247 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3248 if match is not None:
3249 return '-'.join(match.groups())
3250 else:
3251 return date_str
3252
3253
3254 class DateRange(object):
3255 """Represents a time interval between two dates"""
3256
3257 def __init__(self, start=None, end=None):
3258 """start and end must be strings in the format accepted by date"""
3259 if start is not None:
3260 self.start = date_from_str(start)
3261 else:
3262 self.start = datetime.datetime.min.date()
3263 if end is not None:
3264 self.end = date_from_str(end)
3265 else:
3266 self.end = datetime.datetime.max.date()
3267 if self.start > self.end:
3268 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3269
3270 @classmethod
3271 def day(cls, day):
3272 """Returns a range that only contains the given day"""
3273 return cls(day, day)
3274
3275 def __contains__(self, date):
3276 """Check if the date is in the range"""
3277 if not isinstance(date, datetime.date):
3278 date = date_from_str(date)
3279 return self.start <= date <= self.end
3280
3281 def __str__(self):
3282 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3283
3284
3285 def platform_name():
3286 """ Returns the platform name as a compat_str """
3287 res = platform.platform()
3288 if isinstance(res, bytes):
3289 res = res.decode(preferredencoding())
3290
3291 assert isinstance(res, compat_str)
3292 return res
3293
3294
3295 def get_windows_version():
3296 ''' Get Windows version. None if it's not running on Windows '''
3297 if compat_os_name == 'nt':
3298 return version_tuple(platform.win32_ver()[1])
3299 else:
3300 return None
3301
3302
3303 def _windows_write_string(s, out):
3304 """ Returns True if the string was written using special methods,
3305 False if it has yet to be written out."""
3306 # Adapted from http://stackoverflow.com/a/3259271/35070
3307
3308 import ctypes
3309 import ctypes.wintypes
3310
3311 WIN_OUTPUT_IDS = {
3312 1: -11,
3313 2: -12,
3314 }
3315
3316 try:
3317 fileno = out.fileno()
3318 except AttributeError:
3319 # If the output stream doesn't have a fileno, it's virtual
3320 return False
3321 except io.UnsupportedOperation:
3322 # Some strange Windows pseudo files?
3323 return False
3324 if fileno not in WIN_OUTPUT_IDS:
3325 return False
3326
3327 GetStdHandle = compat_ctypes_WINFUNCTYPE(
3328 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3329 ('GetStdHandle', ctypes.windll.kernel32))
3330 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3331
3332 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3333 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3334 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3335 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3336 written = ctypes.wintypes.DWORD(0)
3337
3338 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3339 FILE_TYPE_CHAR = 0x0002
3340 FILE_TYPE_REMOTE = 0x8000
3341 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3342 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3343 ctypes.POINTER(ctypes.wintypes.DWORD))(
3344 ('GetConsoleMode', ctypes.windll.kernel32))
3345 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3346
3347 def not_a_console(handle):
3348 if handle == INVALID_HANDLE_VALUE or handle is None:
3349 return True
3350 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3351 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3352
3353 if not_a_console(h):
3354 return False
3355
3356 def next_nonbmp_pos(s):
3357 try:
3358 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3359 except StopIteration:
3360 return len(s)
3361
3362 while s:
3363 count = min(next_nonbmp_pos(s), 1024)
3364
3365 ret = WriteConsoleW(
3366 h, s, count if count else 2, ctypes.byref(written), None)
3367 if ret == 0:
3368 raise OSError('Failed to write string')
3369 if not count: # We just wrote a non-BMP character
3370 assert written.value == 2
3371 s = s[1:]
3372 else:
3373 assert written.value > 0
3374 s = s[written.value:]
3375 return True
3376
3377
3378 def write_string(s, out=None, encoding=None):
3379 if out is None:
3380 out = sys.stderr
3381 assert type(s) == compat_str
3382
3383 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3384 if _windows_write_string(s, out):
3385 return
3386
3387 if ('b' in getattr(out, 'mode', '')
3388 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
3389 byt = s.encode(encoding or preferredencoding(), 'ignore')
3390 out.write(byt)
3391 elif hasattr(out, 'buffer'):
3392 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3393 byt = s.encode(enc, 'ignore')
3394 out.buffer.write(byt)
3395 else:
3396 out.write(s)
3397 out.flush()
3398
3399
3400 def bytes_to_intlist(bs):
3401 if not bs:
3402 return []
3403 if isinstance(bs[0], int): # Python 3
3404 return list(bs)
3405 else:
3406 return [ord(c) for c in bs]
3407
3408
3409 def intlist_to_bytes(xs):
3410 if not xs:
3411 return b''
3412 return compat_struct_pack('%dB' % len(xs), *xs)
3413
3414
3415 # Cross-platform file locking
3416 if sys.platform == 'win32':
3417 import ctypes.wintypes
3418 import msvcrt
3419
3420 class OVERLAPPED(ctypes.Structure):
3421 _fields_ = [
3422 ('Internal', ctypes.wintypes.LPVOID),
3423 ('InternalHigh', ctypes.wintypes.LPVOID),
3424 ('Offset', ctypes.wintypes.DWORD),
3425 ('OffsetHigh', ctypes.wintypes.DWORD),
3426 ('hEvent', ctypes.wintypes.HANDLE),
3427 ]
3428
3429 kernel32 = ctypes.windll.kernel32
3430 LockFileEx = kernel32.LockFileEx
3431 LockFileEx.argtypes = [
3432 ctypes.wintypes.HANDLE, # hFile
3433 ctypes.wintypes.DWORD, # dwFlags
3434 ctypes.wintypes.DWORD, # dwReserved
3435 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3436 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3437 ctypes.POINTER(OVERLAPPED) # Overlapped
3438 ]
3439 LockFileEx.restype = ctypes.wintypes.BOOL
3440 UnlockFileEx = kernel32.UnlockFileEx
3441 UnlockFileEx.argtypes = [
3442 ctypes.wintypes.HANDLE, # hFile
3443 ctypes.wintypes.DWORD, # dwReserved
3444 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3445 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3446 ctypes.POINTER(OVERLAPPED) # Overlapped
3447 ]
3448 UnlockFileEx.restype = ctypes.wintypes.BOOL
3449 whole_low = 0xffffffff
3450 whole_high = 0x7fffffff
3451
3452 def _lock_file(f, exclusive):
3453 overlapped = OVERLAPPED()
3454 overlapped.Offset = 0
3455 overlapped.OffsetHigh = 0
3456 overlapped.hEvent = 0
3457 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3458 handle = msvcrt.get_osfhandle(f.fileno())
3459 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3460 whole_low, whole_high, f._lock_file_overlapped_p):
3461 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3462
3463 def _unlock_file(f):
3464 assert f._lock_file_overlapped_p
3465 handle = msvcrt.get_osfhandle(f.fileno())
3466 if not UnlockFileEx(handle, 0,
3467 whole_low, whole_high, f._lock_file_overlapped_p):
3468 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3469
3470 else:
3471 # Some platforms, such as Jython, is missing fcntl
3472 try:
3473 import fcntl
3474
3475 def _lock_file(f, exclusive):
3476 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3477
3478 def _unlock_file(f):
3479 fcntl.flock(f, fcntl.LOCK_UN)
3480 except ImportError:
3481 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3482
3483 def _lock_file(f, exclusive):
3484 raise IOError(UNSUPPORTED_MSG)
3485
3486 def _unlock_file(f):
3487 raise IOError(UNSUPPORTED_MSG)
3488
3489
3490 class locked_file(object):
3491 def __init__(self, filename, mode, encoding=None):
3492 assert mode in ['r', 'a', 'w']
3493 self.f = io.open(filename, mode, encoding=encoding)
3494 self.mode = mode
3495
3496 def __enter__(self):
3497 exclusive = self.mode != 'r'
3498 try:
3499 _lock_file(self.f, exclusive)
3500 except IOError:
3501 self.f.close()
3502 raise
3503 return self
3504
3505 def __exit__(self, etype, value, traceback):
3506 try:
3507 _unlock_file(self.f)
3508 finally:
3509 self.f.close()
3510
3511 def __iter__(self):
3512 return iter(self.f)
3513
3514 def write(self, *args):
3515 return self.f.write(*args)
3516
3517 def read(self, *args):
3518 return self.f.read(*args)
3519
3520
3521 def get_filesystem_encoding():
3522 encoding = sys.getfilesystemencoding()
3523 return encoding if encoding is not None else 'utf-8'
3524
3525
3526 def shell_quote(args):
3527 quoted_args = []
3528 encoding = get_filesystem_encoding()
3529 for a in args:
3530 if isinstance(a, bytes):
3531 # We may get a filename encoded with 'encodeFilename'
3532 a = a.decode(encoding)
3533 quoted_args.append(compat_shlex_quote(a))
3534 return ' '.join(quoted_args)
3535
3536
3537 def smuggle_url(url, data):
3538 """ Pass additional data in a URL for internal use. """
3539
3540 url, idata = unsmuggle_url(url, {})
3541 data.update(idata)
3542 sdata = compat_urllib_parse_urlencode(
3543 {'__youtubedl_smuggle': json.dumps(data)})
3544 return url + '#' + sdata
3545
3546
3547 def unsmuggle_url(smug_url, default=None):
3548 if '#__youtubedl_smuggle' not in smug_url:
3549 return smug_url, default
3550 url, _, sdata = smug_url.rpartition('#')
3551 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3552 data = json.loads(jsond)
3553 return url, data
3554
3555
3556 def format_bytes(bytes):
3557 if bytes is None:
3558 return 'N/A'
3559 if type(bytes) is str:
3560 bytes = float(bytes)
3561 if bytes == 0.0:
3562 exponent = 0
3563 else:
3564 exponent = int(math.log(bytes, 1024.0))
3565 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3566 converted = float(bytes) / float(1024 ** exponent)
3567 return '%.2f%s' % (converted, suffix)
3568
3569
3570 def lookup_unit_table(unit_table, s):
3571 units_re = '|'.join(re.escape(u) for u in unit_table)
3572 m = re.match(
3573 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3574 if not m:
3575 return None
3576 num_str = m.group('num').replace(',', '.')
3577 mult = unit_table[m.group('unit')]
3578 return int(float(num_str) * mult)
3579
3580
3581 def parse_filesize(s):
3582 if s is None:
3583 return None
3584
3585 # The lower-case forms are of course incorrect and unofficial,
3586 # but we support those too
3587 _UNIT_TABLE = {
3588 'B': 1,
3589 'b': 1,
3590 'bytes': 1,
3591 'KiB': 1024,
3592 'KB': 1000,
3593 'kB': 1024,
3594 'Kb': 1000,
3595 'kb': 1000,
3596 'kilobytes': 1000,
3597 'kibibytes': 1024,
3598 'MiB': 1024 ** 2,
3599 'MB': 1000 ** 2,
3600 'mB': 1024 ** 2,
3601 'Mb': 1000 ** 2,
3602 'mb': 1000 ** 2,
3603 'megabytes': 1000 ** 2,
3604 'mebibytes': 1024 ** 2,
3605 'GiB': 1024 ** 3,
3606 'GB': 1000 ** 3,
3607 'gB': 1024 ** 3,
3608 'Gb': 1000 ** 3,
3609 'gb': 1000 ** 3,
3610 'gigabytes': 1000 ** 3,
3611 'gibibytes': 1024 ** 3,
3612 'TiB': 1024 ** 4,
3613 'TB': 1000 ** 4,
3614 'tB': 1024 ** 4,
3615 'Tb': 1000 ** 4,
3616 'tb': 1000 ** 4,
3617 'terabytes': 1000 ** 4,
3618 'tebibytes': 1024 ** 4,
3619 'PiB': 1024 ** 5,
3620 'PB': 1000 ** 5,
3621 'pB': 1024 ** 5,
3622 'Pb': 1000 ** 5,
3623 'pb': 1000 ** 5,
3624 'petabytes': 1000 ** 5,
3625 'pebibytes': 1024 ** 5,
3626 'EiB': 1024 ** 6,
3627 'EB': 1000 ** 6,
3628 'eB': 1024 ** 6,
3629 'Eb': 1000 ** 6,
3630 'eb': 1000 ** 6,
3631 'exabytes': 1000 ** 6,
3632 'exbibytes': 1024 ** 6,
3633 'ZiB': 1024 ** 7,
3634 'ZB': 1000 ** 7,
3635 'zB': 1024 ** 7,
3636 'Zb': 1000 ** 7,
3637 'zb': 1000 ** 7,
3638 'zettabytes': 1000 ** 7,
3639 'zebibytes': 1024 ** 7,
3640 'YiB': 1024 ** 8,
3641 'YB': 1000 ** 8,
3642 'yB': 1024 ** 8,
3643 'Yb': 1000 ** 8,
3644 'yb': 1000 ** 8,
3645 'yottabytes': 1000 ** 8,
3646 'yobibytes': 1024 ** 8,
3647 }
3648
3649 return lookup_unit_table(_UNIT_TABLE, s)
3650
3651
3652 def parse_count(s):
3653 if s is None:
3654 return None
3655
3656 s = s.strip()
3657
3658 if re.match(r'^[\d,.]+$', s):
3659 return str_to_int(s)
3660
3661 _UNIT_TABLE = {
3662 'k': 1000,
3663 'K': 1000,
3664 'm': 1000 ** 2,
3665 'M': 1000 ** 2,
3666 'kk': 1000 ** 2,
3667 'KK': 1000 ** 2,
3668 }
3669
3670 return lookup_unit_table(_UNIT_TABLE, s)
3671
3672
3673 def parse_resolution(s):
3674 if s is None:
3675 return {}
3676
3677 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3678 if mobj:
3679 return {
3680 'width': int(mobj.group('w')),
3681 'height': int(mobj.group('h')),
3682 }
3683
3684 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3685 if mobj:
3686 return {'height': int(mobj.group(1))}
3687
3688 mobj = re.search(r'\b([48])[kK]\b', s)
3689 if mobj:
3690 return {'height': int(mobj.group(1)) * 540}
3691
3692 return {}
3693
3694
3695 def parse_bitrate(s):
3696 if not isinstance(s, compat_str):
3697 return
3698 mobj = re.search(r'\b(\d+)\s*kbps', s)
3699 if mobj:
3700 return int(mobj.group(1))
3701
3702
3703 def month_by_name(name, lang='en'):
3704 """ Return the number of a month by (locale-independently) English name """
3705
3706 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3707
3708 try:
3709 return month_names.index(name) + 1
3710 except ValueError:
3711 return None
3712
3713
3714 def month_by_abbreviation(abbrev):
3715 """ Return the number of a month by (locale-independently) English
3716 abbreviations """
3717
3718 try:
3719 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3720 except ValueError:
3721 return None
3722
3723
3724 def fix_xml_ampersands(xml_str):
3725 """Replace all the '&' by '&amp;' in XML"""
3726 return re.sub(
3727 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3728 '&amp;',
3729 xml_str)
3730
3731
3732 def setproctitle(title):
3733 assert isinstance(title, compat_str)
3734
3735 # ctypes in Jython is not complete
3736 # http://bugs.jython.org/issue2148
3737 if sys.platform.startswith('java'):
3738 return
3739
3740 try:
3741 libc = ctypes.cdll.LoadLibrary('libc.so.6')
3742 except OSError:
3743 return
3744 except TypeError:
3745 # LoadLibrary in Windows Python 2.7.13 only expects
3746 # a bytestring, but since unicode_literals turns
3747 # every string into a unicode string, it fails.
3748 return
3749 title_bytes = title.encode('utf-8')
3750 buf = ctypes.create_string_buffer(len(title_bytes))
3751 buf.value = title_bytes
3752 try:
3753 libc.prctl(15, buf, 0, 0, 0)
3754 except AttributeError:
3755 return # Strange libc, just skip this
3756
3757
3758 def remove_start(s, start):
3759 return s[len(start):] if s is not None and s.startswith(start) else s
3760
3761
3762 def remove_end(s, end):
3763 return s[:-len(end)] if s is not None and s.endswith(end) else s
3764
3765
3766 def remove_quotes(s):
3767 if s is None or len(s) < 2:
3768 return s
3769 for quote in ('"', "'", ):
3770 if s[0] == quote and s[-1] == quote:
3771 return s[1:-1]
3772 return s
3773
3774
3775 def get_domain(url):
3776 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3777 return domain.group('domain') if domain else None
3778
3779
3780 def url_basename(url):
3781 path = compat_urlparse.urlparse(url).path
3782 return path.strip('/').split('/')[-1]
3783
3784
3785 def base_url(url):
3786 return re.match(r'https?://[^?#&]+/', url).group()
3787
3788
3789 def urljoin(base, path):
3790 if isinstance(path, bytes):
3791 path = path.decode('utf-8')
3792 if not isinstance(path, compat_str) or not path:
3793 return None
3794 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3795 return path
3796 if isinstance(base, bytes):
3797 base = base.decode('utf-8')
3798 if not isinstance(base, compat_str) or not re.match(
3799 r'^(?:https?:)?//', base):
3800 return None
3801 return compat_urlparse.urljoin(base, path)
3802
3803
3804 class HEADRequest(compat_urllib_request.Request):
3805 def get_method(self):
3806 return 'HEAD'
3807
3808
3809 class PUTRequest(compat_urllib_request.Request):
3810 def get_method(self):
3811 return 'PUT'
3812
3813
3814 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3815 if get_attr:
3816 if v is not None:
3817 v = getattr(v, get_attr, None)
3818 if v == '':
3819 v = None
3820 if v is None:
3821 return default
3822 try:
3823 return int(v) * invscale // scale
3824 except (ValueError, TypeError):
3825 return default
3826
3827
3828 def str_or_none(v, default=None):
3829 return default if v is None else compat_str(v)
3830
3831
3832 def str_to_int(int_str):
3833 """ A more relaxed version of int_or_none """
3834 if isinstance(int_str, compat_integer_types):
3835 return int_str
3836 elif isinstance(int_str, compat_str):
3837 int_str = re.sub(r'[,\.\+]', '', int_str)
3838 return int_or_none(int_str)
3839
3840
3841 def float_or_none(v, scale=1, invscale=1, default=None):
3842 if v is None:
3843 return default
3844 try:
3845 return float(v) * invscale / scale
3846 except (ValueError, TypeError):
3847 return default
3848
3849
3850 def bool_or_none(v, default=None):
3851 return v if isinstance(v, bool) else default
3852
3853
3854 def strip_or_none(v, default=None):
3855 return v.strip() if isinstance(v, compat_str) else default
3856
3857
3858 def url_or_none(url):
3859 if not url or not isinstance(url, compat_str):
3860 return None
3861 url = url.strip()
3862 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3863
3864
3865 def strftime_or_none(timestamp, date_format, default=None):
3866 datetime_object = None
3867 try:
3868 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3869 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3870 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3871 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3872 return datetime_object.strftime(date_format)
3873 except (ValueError, TypeError, AttributeError):
3874 return default
3875
3876
3877 def parse_duration(s):
3878 if not isinstance(s, compat_basestring):
3879 return None
3880
3881 s = s.strip()
3882
3883 days, hours, mins, secs, ms = [None] * 5
3884 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3885 if m:
3886 days, hours, mins, secs, ms = m.groups()
3887 else:
3888 m = re.match(
3889 r'''(?ix)(?:P?
3890 (?:
3891 [0-9]+\s*y(?:ears?)?\s*
3892 )?
3893 (?:
3894 [0-9]+\s*m(?:onths?)?\s*
3895 )?
3896 (?:
3897 [0-9]+\s*w(?:eeks?)?\s*
3898 )?
3899 (?:
3900 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3901 )?
3902 T)?
3903 (?:
3904 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3905 )?
3906 (?:
3907 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3908 )?
3909 (?:
3910 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3911 )?Z?$''', s)
3912 if m:
3913 days, hours, mins, secs, ms = m.groups()
3914 else:
3915 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3916 if m:
3917 hours, mins = m.groups()
3918 else:
3919 return None
3920
3921 duration = 0
3922 if secs:
3923 duration += float(secs)
3924 if mins:
3925 duration += float(mins) * 60
3926 if hours:
3927 duration += float(hours) * 60 * 60
3928 if days:
3929 duration += float(days) * 24 * 60 * 60
3930 if ms:
3931 duration += float(ms)
3932 return duration
3933
3934
3935 def prepend_extension(filename, ext, expected_real_ext=None):
3936 name, real_ext = os.path.splitext(filename)
3937 return (
3938 '{0}.{1}{2}'.format(name, ext, real_ext)
3939 if not expected_real_ext or real_ext[1:] == expected_real_ext
3940 else '{0}.{1}'.format(filename, ext))
3941
3942
3943 def replace_extension(filename, ext, expected_real_ext=None):
3944 name, real_ext = os.path.splitext(filename)
3945 return '{0}.{1}'.format(
3946 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3947 ext)
3948
3949
3950 def check_executable(exe, args=[]):
3951 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3952 args can be a list of arguments for a short output (like -version) """
3953 try:
3954 process_communicate_or_kill(subprocess.Popen(
3955 [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
3956 except OSError:
3957 return False
3958 return exe
3959
3960
3961 def get_exe_version(exe, args=['--version'],
3962 version_re=None, unrecognized='present'):
3963 """ Returns the version of the specified executable,
3964 or False if the executable is not present """
3965 try:
3966 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3967 # SIGTTOU if yt-dlp is run in the background.
3968 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3969 out, _ = process_communicate_or_kill(subprocess.Popen(
3970 [encodeArgument(exe)] + args,
3971 stdin=subprocess.PIPE,
3972 stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
3973 except OSError:
3974 return False
3975 if isinstance(out, bytes): # Python 2.x
3976 out = out.decode('ascii', 'ignore')
3977 return detect_exe_version(out, version_re, unrecognized)
3978
3979
3980 def detect_exe_version(output, version_re=None, unrecognized='present'):
3981 assert isinstance(output, compat_str)
3982 if version_re is None:
3983 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3984 m = re.search(version_re, output)
3985 if m:
3986 return m.group(1)
3987 else:
3988 return unrecognized
3989
3990
3991 class LazyList(collections.abc.Sequence):
3992 ''' Lazy immutable list from an iterable
3993 Note that slices of a LazyList are lists and not LazyList'''
3994
3995 class IndexError(IndexError):
3996 pass
3997
3998 def __init__(self, iterable):
3999 self.__iterable = iter(iterable)
4000 self.__cache = []
4001 self.__reversed = False
4002
4003 def __iter__(self):
4004 if self.__reversed:
4005 # We need to consume the entire iterable to iterate in reverse
4006 yield from self.exhaust()
4007 return
4008 yield from self.__cache
4009 for item in self.__iterable:
4010 self.__cache.append(item)
4011 yield item
4012
4013 def __exhaust(self):
4014 self.__cache.extend(self.__iterable)
4015 return self.__cache
4016
4017 def exhaust(self):
4018 ''' Evaluate the entire iterable '''
4019 return self.__exhaust()[::-1 if self.__reversed else 1]
4020
4021 @staticmethod
4022 def __reverse_index(x):
4023 return None if x is None else -(x + 1)
4024
4025 def __getitem__(self, idx):
4026 if isinstance(idx, slice):
4027 if self.__reversed:
4028 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
4029 start, stop, step = idx.start, idx.stop, idx.step or 1
4030 elif isinstance(idx, int):
4031 if self.__reversed:
4032 idx = self.__reverse_index(idx)
4033 start, stop, step = idx, idx, 0
4034 else:
4035 raise TypeError('indices must be integers or slices')
4036 if ((start or 0) < 0 or (stop or 0) < 0
4037 or (start is None and step < 0)
4038 or (stop is None and step > 0)):
4039 # We need to consume the entire iterable to be able to slice from the end
4040 # Obviously, never use this with infinite iterables
4041 self.__exhaust()
4042 try:
4043 return self.__cache[idx]
4044 except IndexError as e:
4045 raise self.IndexError(e) from e
4046 n = max(start or 0, stop or 0) - len(self.__cache) + 1
4047 if n > 0:
4048 self.__cache.extend(itertools.islice(self.__iterable, n))
4049 try:
4050 return self.__cache[idx]
4051 except IndexError as e:
4052 raise self.IndexError(e) from e
4053
4054 def __bool__(self):
4055 try:
4056 self[-1] if self.__reversed else self[0]
4057 except self.IndexError:
4058 return False
4059 return True
4060
4061 def __len__(self):
4062 self.__exhaust()
4063 return len(self.__cache)
4064
4065 def reverse(self):
4066 self.__reversed = not self.__reversed
4067 return self
4068
4069 def __repr__(self):
4070 # repr and str should mimic a list. So we exhaust the iterable
4071 return repr(self.exhaust())
4072
4073 def __str__(self):
4074 return repr(self.exhaust())
4075
4076
4077 class PagedList:
4078 def __len__(self):
4079 # This is only useful for tests
4080 return len(self.getslice())
4081
4082 def __init__(self, pagefunc, pagesize, use_cache=True):
4083 self._pagefunc = pagefunc
4084 self._pagesize = pagesize
4085 self._use_cache = use_cache
4086 self._cache = {}
4087
4088 def getpage(self, pagenum):
4089 page_results = self._cache.get(pagenum) or list(self._pagefunc(pagenum))
4090 if self._use_cache:
4091 self._cache[pagenum] = page_results
4092 return page_results
4093
4094 def getslice(self, start=0, end=None):
4095 return list(self._getslice(start, end))
4096
4097 def _getslice(self, start, end):
4098 raise NotImplementedError('This method must be implemented by subclasses')
4099
4100 def __getitem__(self, idx):
4101 # NOTE: cache must be enabled if this is used
4102 if not isinstance(idx, int) or idx < 0:
4103 raise TypeError('indices must be non-negative integers')
4104 entries = self.getslice(idx, idx + 1)
4105 return entries[0] if entries else None
4106
4107
4108 class OnDemandPagedList(PagedList):
4109 def _getslice(self, start, end):
4110 for pagenum in itertools.count(start // self._pagesize):
4111 firstid = pagenum * self._pagesize
4112 nextfirstid = pagenum * self._pagesize + self._pagesize
4113 if start >= nextfirstid:
4114 continue
4115
4116 startv = (
4117 start % self._pagesize
4118 if firstid <= start < nextfirstid
4119 else 0)
4120 endv = (
4121 ((end - 1) % self._pagesize) + 1
4122 if (end is not None and firstid <= end <= nextfirstid)
4123 else None)
4124
4125 page_results = self.getpage(pagenum)
4126 if startv != 0 or endv is not None:
4127 page_results = page_results[startv:endv]
4128 yield from page_results
4129
4130 # A little optimization - if current page is not "full", ie. does
4131 # not contain page_size videos then we can assume that this page
4132 # is the last one - there are no more ids on further pages -
4133 # i.e. no need to query again.
4134 if len(page_results) + startv < self._pagesize:
4135 break
4136
4137 # If we got the whole page, but the next page is not interesting,
4138 # break out early as well
4139 if end == nextfirstid:
4140 break
4141
4142
4143 class InAdvancePagedList(PagedList):
4144 def __init__(self, pagefunc, pagecount, pagesize):
4145 self._pagecount = pagecount
4146 PagedList.__init__(self, pagefunc, pagesize, True)
4147
4148 def _getslice(self, start, end):
4149 start_page = start // self._pagesize
4150 end_page = (
4151 self._pagecount if end is None else (end // self._pagesize + 1))
4152 skip_elems = start - start_page * self._pagesize
4153 only_more = None if end is None else end - start
4154 for pagenum in range(start_page, end_page):
4155 page_results = self.getpage(pagenum)
4156 if skip_elems:
4157 page_results = page_results[skip_elems:]
4158 skip_elems = None
4159 if only_more is not None:
4160 if len(page_results) < only_more:
4161 only_more -= len(page_results)
4162 else:
4163 yield from page_results[:only_more]
4164 break
4165 yield from page_results
4166
4167
4168 def uppercase_escape(s):
4169 unicode_escape = codecs.getdecoder('unicode_escape')
4170 return re.sub(
4171 r'\\U[0-9a-fA-F]{8}',
4172 lambda m: unicode_escape(m.group(0))[0],
4173 s)
4174
4175
4176 def lowercase_escape(s):
4177 unicode_escape = codecs.getdecoder('unicode_escape')
4178 return re.sub(
4179 r'\\u[0-9a-fA-F]{4}',
4180 lambda m: unicode_escape(m.group(0))[0],
4181 s)
4182
4183
4184 def escape_rfc3986(s):
4185 """Escape non-ASCII characters as suggested by RFC 3986"""
4186 if sys.version_info < (3, 0) and isinstance(s, compat_str):
4187 s = s.encode('utf-8')
4188 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
4189
4190
4191 def escape_url(url):
4192 """Escape URL as suggested by RFC 3986"""
4193 url_parsed = compat_urllib_parse_urlparse(url)
4194 return url_parsed._replace(
4195 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
4196 path=escape_rfc3986(url_parsed.path),
4197 params=escape_rfc3986(url_parsed.params),
4198 query=escape_rfc3986(url_parsed.query),
4199 fragment=escape_rfc3986(url_parsed.fragment)
4200 ).geturl()
4201
4202
4203 def parse_qs(url):
4204 return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4205
4206
4207 def read_batch_urls(batch_fd):
4208 def fixup(url):
4209 if not isinstance(url, compat_str):
4210 url = url.decode('utf-8', 'replace')
4211 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4212 for bom in BOM_UTF8:
4213 if url.startswith(bom):
4214 url = url[len(bom):]
4215 url = url.lstrip()
4216 if not url or url.startswith(('#', ';', ']')):
4217 return False
4218 # "#" cannot be stripped out since it is part of the URI
4219 # However, it can be safely stipped out if follwing a whitespace
4220 return re.split(r'\s#', url, 1)[0].rstrip()
4221
4222 with contextlib.closing(batch_fd) as fd:
4223 return [url for url in map(fixup, fd) if url]
4224
4225
4226 def urlencode_postdata(*args, **kargs):
4227 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
4228
4229
4230 def update_url_query(url, query):
4231 if not query:
4232 return url
4233 parsed_url = compat_urlparse.urlparse(url)
4234 qs = compat_parse_qs(parsed_url.query)
4235 qs.update(query)
4236 return compat_urlparse.urlunparse(parsed_url._replace(
4237 query=compat_urllib_parse_urlencode(qs, True)))
4238
4239
4240 def update_Request(req, url=None, data=None, headers={}, query={}):
4241 req_headers = req.headers.copy()
4242 req_headers.update(headers)
4243 req_data = data or req.data
4244 req_url = update_url_query(url or req.get_full_url(), query)
4245 req_get_method = req.get_method()
4246 if req_get_method == 'HEAD':
4247 req_type = HEADRequest
4248 elif req_get_method == 'PUT':
4249 req_type = PUTRequest
4250 else:
4251 req_type = compat_urllib_request.Request
4252 new_req = req_type(
4253 req_url, data=req_data, headers=req_headers,
4254 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4255 if hasattr(req, 'timeout'):
4256 new_req.timeout = req.timeout
4257 return new_req
4258
4259
4260 def _multipart_encode_impl(data, boundary):
4261 content_type = 'multipart/form-data; boundary=%s' % boundary
4262
4263 out = b''
4264 for k, v in data.items():
4265 out += b'--' + boundary.encode('ascii') + b'\r\n'
4266 if isinstance(k, compat_str):
4267 k = k.encode('utf-8')
4268 if isinstance(v, compat_str):
4269 v = v.encode('utf-8')
4270 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4271 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4272 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
4273 if boundary.encode('ascii') in content:
4274 raise ValueError('Boundary overlaps with data')
4275 out += content
4276
4277 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4278
4279 return out, content_type
4280
4281
4282 def multipart_encode(data, boundary=None):
4283 '''
4284 Encode a dict to RFC 7578-compliant form-data
4285
4286 data:
4287 A dict where keys and values can be either Unicode or bytes-like
4288 objects.
4289 boundary:
4290 If specified a Unicode object, it's used as the boundary. Otherwise
4291 a random boundary is generated.
4292
4293 Reference: https://tools.ietf.org/html/rfc7578
4294 '''
4295 has_specified_boundary = boundary is not None
4296
4297 while True:
4298 if boundary is None:
4299 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4300
4301 try:
4302 out, content_type = _multipart_encode_impl(data, boundary)
4303 break
4304 except ValueError:
4305 if has_specified_boundary:
4306 raise
4307 boundary = None
4308
4309 return out, content_type
4310
4311
4312 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4313 if isinstance(key_or_keys, (list, tuple)):
4314 for key in key_or_keys:
4315 if key not in d or d[key] is None or skip_false_values and not d[key]:
4316 continue
4317 return d[key]
4318 return default
4319 return d.get(key_or_keys, default)
4320
4321
4322 def try_get(src, getter, expected_type=None):
4323 for get in variadic(getter):
4324 try:
4325 v = get(src)
4326 except (AttributeError, KeyError, TypeError, IndexError):
4327 pass
4328 else:
4329 if expected_type is None or isinstance(v, expected_type):
4330 return v
4331
4332
4333 def merge_dicts(*dicts):
4334 merged = {}
4335 for a_dict in dicts:
4336 for k, v in a_dict.items():
4337 if v is None:
4338 continue
4339 if (k not in merged
4340 or (isinstance(v, compat_str) and v
4341 and isinstance(merged[k], compat_str)
4342 and not merged[k])):
4343 merged[k] = v
4344 return merged
4345
4346
4347 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4348 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4349
4350
4351 US_RATINGS = {
4352 'G': 0,
4353 'PG': 10,
4354 'PG-13': 13,
4355 'R': 16,
4356 'NC': 18,
4357 }
4358
4359
4360 TV_PARENTAL_GUIDELINES = {
4361 'TV-Y': 0,
4362 'TV-Y7': 7,
4363 'TV-G': 0,
4364 'TV-PG': 0,
4365 'TV-14': 14,
4366 'TV-MA': 17,
4367 }
4368
4369
4370 def parse_age_limit(s):
4371 if type(s) == int:
4372 return s if 0 <= s <= 21 else None
4373 if not isinstance(s, compat_basestring):
4374 return None
4375 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4376 if m:
4377 return int(m.group('age'))
4378 s = s.upper()
4379 if s in US_RATINGS:
4380 return US_RATINGS[s]
4381 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4382 if m:
4383 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4384 return None
4385
4386
4387 def strip_jsonp(code):
4388 return re.sub(
4389 r'''(?sx)^
4390 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4391 (?:\s*&&\s*(?P=func_name))?
4392 \s*\(\s*(?P<callback_data>.*)\);?
4393 \s*?(?://[^\n]*)*$''',
4394 r'\g<callback_data>', code)
4395
4396
4397 def js_to_json(code, vars={}):
4398 # vars is a dict of var, val pairs to substitute
4399 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4400 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4401 INTEGER_TABLE = (
4402 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4403 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4404 )
4405
4406 def fix_kv(m):
4407 v = m.group(0)
4408 if v in ('true', 'false', 'null'):
4409 return v
4410 elif v in ('undefined', 'void 0'):
4411 return 'null'
4412 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4413 return ""
4414
4415 if v[0] in ("'", '"'):
4416 v = re.sub(r'(?s)\\.|"', lambda m: {
4417 '"': '\\"',
4418 "\\'": "'",
4419 '\\\n': '',
4420 '\\x': '\\u00',
4421 }.get(m.group(0), m.group(0)), v[1:-1])
4422 else:
4423 for regex, base in INTEGER_TABLE:
4424 im = re.match(regex, v)
4425 if im:
4426 i = int(im.group(1), base)
4427 return '"%d":' % i if v.endswith(':') else '%d' % i
4428
4429 if v in vars:
4430 return vars[v]
4431
4432 return '"%s"' % v
4433
4434 return re.sub(r'''(?sx)
4435 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4436 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4437 {comment}|,(?={skip}[\]}}])|
4438 void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
4439 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4440 [0-9]+(?={skip}:)|
4441 !+
4442 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4443
4444
4445 def qualities(quality_ids):
4446 """ Get a numeric quality value out of a list of possible values """
4447 def q(qid):
4448 try:
4449 return quality_ids.index(qid)
4450 except ValueError:
4451 return -1
4452 return q
4453
4454
4455 DEFAULT_OUTTMPL = {
4456 'default': '%(title)s [%(id)s].%(ext)s',
4457 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
4458 }
4459 OUTTMPL_TYPES = {
4460 'chapter': None,
4461 'subtitle': None,
4462 'thumbnail': None,
4463 'description': 'description',
4464 'annotation': 'annotations.xml',
4465 'infojson': 'info.json',
4466 'pl_thumbnail': None,
4467 'pl_description': 'description',
4468 'pl_infojson': 'info.json',
4469 }
4470
4471 # As of [1] format syntax is:
4472 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4473 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
4474 STR_FORMAT_RE_TMPL = r'''(?x)
4475 (?<!%)(?P<prefix>(?:%%)*)
4476 %
4477 (?P<has_key>\((?P<key>{0})\))?
4478 (?P<format>
4479 (?P<conversion>[#0\-+ ]+)?
4480 (?P<min_width>\d+)?
4481 (?P<precision>\.\d+)?
4482 (?P<len_mod>[hlL])? # unused in python
4483 {1} # conversion type
4484 )
4485 '''
4486
4487
4488 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
4489
4490
4491 def limit_length(s, length):
4492 """ Add ellipses to overly long strings """
4493 if s is None:
4494 return None
4495 ELLIPSES = '...'
4496 if len(s) > length:
4497 return s[:length - len(ELLIPSES)] + ELLIPSES
4498 return s
4499
4500
4501 def version_tuple(v):
4502 return tuple(int(e) for e in re.split(r'[-.]', v))
4503
4504
4505 def is_outdated_version(version, limit, assume_new=True):
4506 if not version:
4507 return not assume_new
4508 try:
4509 return version_tuple(version) < version_tuple(limit)
4510 except ValueError:
4511 return not assume_new
4512
4513
4514 def ytdl_is_updateable():
4515 """ Returns if yt-dlp can be updated with -U """
4516 return False
4517
4518 from zipimport import zipimporter
4519
4520 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4521
4522
4523 def args_to_str(args):
4524 # Get a short string representation for a subprocess command
4525 return ' '.join(compat_shlex_quote(a) for a in args)
4526
4527
4528 def error_to_compat_str(err):
4529 err_str = str(err)
4530 # On python 2 error byte string must be decoded with proper
4531 # encoding rather than ascii
4532 if sys.version_info[0] < 3:
4533 err_str = err_str.decode(preferredencoding())
4534 return err_str
4535
4536
4537 def mimetype2ext(mt):
4538 if mt is None:
4539 return None
4540
4541 ext = {
4542 'audio/mp4': 'm4a',
4543 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4544 # it's the most popular one
4545 'audio/mpeg': 'mp3',
4546 'audio/x-wav': 'wav',
4547 }.get(mt)
4548 if ext is not None:
4549 return ext
4550
4551 _, _, res = mt.rpartition('/')
4552 res = res.split(';')[0].strip().lower()
4553
4554 return {
4555 '3gpp': '3gp',
4556 'smptett+xml': 'tt',
4557 'ttaf+xml': 'dfxp',
4558 'ttml+xml': 'ttml',
4559 'x-flv': 'flv',
4560 'x-mp4-fragmented': 'mp4',
4561 'x-ms-sami': 'sami',
4562 'x-ms-wmv': 'wmv',
4563 'mpegurl': 'm3u8',
4564 'x-mpegurl': 'm3u8',
4565 'vnd.apple.mpegurl': 'm3u8',
4566 'dash+xml': 'mpd',
4567 'f4m+xml': 'f4m',
4568 'hds+xml': 'f4m',
4569 'vnd.ms-sstr+xml': 'ism',
4570 'quicktime': 'mov',
4571 'mp2t': 'ts',
4572 'x-wav': 'wav',
4573 }.get(res, res)
4574
4575
4576 def parse_codecs(codecs_str):
4577 # http://tools.ietf.org/html/rfc6381
4578 if not codecs_str:
4579 return {}
4580 split_codecs = list(filter(None, map(
4581 str.strip, codecs_str.strip().strip(',').split(','))))
4582 vcodec, acodec = None, None
4583 for full_codec in split_codecs:
4584 codec = full_codec.split('.')[0]
4585 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4586 if not vcodec:
4587 vcodec = full_codec
4588 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4589 if not acodec:
4590 acodec = full_codec
4591 else:
4592 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4593 if not vcodec and not acodec:
4594 if len(split_codecs) == 2:
4595 return {
4596 'vcodec': split_codecs[0],
4597 'acodec': split_codecs[1],
4598 }
4599 else:
4600 return {
4601 'vcodec': vcodec or 'none',
4602 'acodec': acodec or 'none',
4603 }
4604 return {}
4605
4606
4607 def urlhandle_detect_ext(url_handle):
4608 getheader = url_handle.headers.get
4609
4610 cd = getheader('Content-Disposition')
4611 if cd:
4612 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4613 if m:
4614 e = determine_ext(m.group('filename'), default_ext=None)
4615 if e:
4616 return e
4617
4618 return mimetype2ext(getheader('Content-Type'))
4619
4620
4621 def encode_data_uri(data, mime_type):
4622 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4623
4624
4625 def age_restricted(content_limit, age_limit):
4626 """ Returns True iff the content should be blocked """
4627
4628 if age_limit is None: # No limit set
4629 return False
4630 if content_limit is None:
4631 return False # Content available for everyone
4632 return age_limit < content_limit
4633
4634
4635 def is_html(first_bytes):
4636 """ Detect whether a file contains HTML by examining its first bytes. """
4637
4638 BOMS = [
4639 (b'\xef\xbb\xbf', 'utf-8'),
4640 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4641 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4642 (b'\xff\xfe', 'utf-16-le'),
4643 (b'\xfe\xff', 'utf-16-be'),
4644 ]
4645 for bom, enc in BOMS:
4646 if first_bytes.startswith(bom):
4647 s = first_bytes[len(bom):].decode(enc, 'replace')
4648 break
4649 else:
4650 s = first_bytes.decode('utf-8', 'replace')
4651
4652 return re.match(r'^\s*<', s)
4653
4654
4655 def determine_protocol(info_dict):
4656 protocol = info_dict.get('protocol')
4657 if protocol is not None:
4658 return protocol
4659
4660 url = info_dict['url']
4661 if url.startswith('rtmp'):
4662 return 'rtmp'
4663 elif url.startswith('mms'):
4664 return 'mms'
4665 elif url.startswith('rtsp'):
4666 return 'rtsp'
4667
4668 ext = determine_ext(url)
4669 if ext == 'm3u8':
4670 return 'm3u8'
4671 elif ext == 'f4m':
4672 return 'f4m'
4673
4674 return compat_urllib_parse_urlparse(url).scheme
4675
4676
4677 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4678 """ Render a list of rows, each as a list of values """
4679
4680 def get_max_lens(table):
4681 return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4682
4683 def filter_using_list(row, filterArray):
4684 return [col for (take, col) in zip(filterArray, row) if take]
4685
4686 if hideEmpty:
4687 max_lens = get_max_lens(data)
4688 header_row = filter_using_list(header_row, max_lens)
4689 data = [filter_using_list(row, max_lens) for row in data]
4690
4691 table = [header_row] + data
4692 max_lens = get_max_lens(table)
4693 if delim:
4694 table = [header_row] + [['-' * ml for ml in max_lens]] + data
4695 format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
4696 return '\n'.join(format_str % tuple(row) for row in table)
4697
4698
4699 def _match_one(filter_part, dct, incomplete):
4700 # TODO: Generalize code with YoutubeDL._build_format_filter
4701 STRING_OPERATORS = {
4702 '*=': operator.contains,
4703 '^=': lambda attr, value: attr.startswith(value),
4704 '$=': lambda attr, value: attr.endswith(value),
4705 '~=': lambda attr, value: re.search(value, attr),
4706 }
4707 COMPARISON_OPERATORS = {
4708 **STRING_OPERATORS,
4709 '<=': operator.le, # "<=" must be defined above "<"
4710 '<': operator.lt,
4711 '>=': operator.ge,
4712 '>': operator.gt,
4713 '=': operator.eq,
4714 }
4715
4716 operator_rex = re.compile(r'''(?x)\s*
4717 (?P<key>[a-z_]+)
4718 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4719 (?:
4720 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4721 (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
4722 (?P<strval>.+?)
4723 )
4724 \s*$
4725 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4726 m = operator_rex.search(filter_part)
4727 if m:
4728 unnegated_op = COMPARISON_OPERATORS[m.group('op')]
4729 if m.group('negation'):
4730 op = lambda attr, value: not unnegated_op(attr, value)
4731 else:
4732 op = unnegated_op
4733 actual_value = dct.get(m.group('key'))
4734 if (m.group('quotedstrval') is not None
4735 or m.group('strval') is not None
4736 # If the original field is a string and matching comparisonvalue is
4737 # a number we should respect the origin of the original field
4738 # and process comparison value as a string (see
4739 # https://github.com/ytdl-org/youtube-dl/issues/11082).
4740 or actual_value is not None and m.group('intval') is not None
4741 and isinstance(actual_value, compat_str)):
4742 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4743 quote = m.group('quote')
4744 if quote is not None:
4745 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4746 else:
4747 if m.group('op') in STRING_OPERATORS:
4748 raise ValueError('Operator %s only supports string values!' % m.group('op'))
4749 try:
4750 comparison_value = int(m.group('intval'))
4751 except ValueError:
4752 comparison_value = parse_filesize(m.group('intval'))
4753 if comparison_value is None:
4754 comparison_value = parse_filesize(m.group('intval') + 'B')
4755 if comparison_value is None:
4756 raise ValueError(
4757 'Invalid integer value %r in filter part %r' % (
4758 m.group('intval'), filter_part))
4759 if actual_value is None:
4760 return incomplete or m.group('none_inclusive')
4761 return op(actual_value, comparison_value)
4762
4763 UNARY_OPERATORS = {
4764 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4765 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4766 }
4767 operator_rex = re.compile(r'''(?x)\s*
4768 (?P<op>%s)\s*(?P<key>[a-z_]+)
4769 \s*$
4770 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4771 m = operator_rex.search(filter_part)
4772 if m:
4773 op = UNARY_OPERATORS[m.group('op')]
4774 actual_value = dct.get(m.group('key'))
4775 if incomplete and actual_value is None:
4776 return True
4777 return op(actual_value)
4778
4779 raise ValueError('Invalid filter part %r' % filter_part)
4780
4781
4782 def match_str(filter_str, dct, incomplete=False):
4783 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
4784 When incomplete, all conditions passes on missing fields
4785 """
4786 return all(
4787 _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
4788 for filter_part in re.split(r'(?<!\\)&', filter_str))
4789
4790
4791 def match_filter_func(filter_str):
4792 def _match_func(info_dict, *args, **kwargs):
4793 if match_str(filter_str, info_dict, *args, **kwargs):
4794 return None
4795 else:
4796 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4797 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4798 return _match_func
4799
4800
4801 def parse_dfxp_time_expr(time_expr):
4802 if not time_expr:
4803 return
4804
4805 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4806 if mobj:
4807 return float(mobj.group('time_offset'))
4808
4809 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4810 if mobj:
4811 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4812
4813
4814 def srt_subtitles_timecode(seconds):
4815 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4816
4817
4818 def dfxp2srt(dfxp_data):
4819 '''
4820 @param dfxp_data A bytes-like object containing DFXP data
4821 @returns A unicode object containing converted SRT data
4822 '''
4823 LEGACY_NAMESPACES = (
4824 (b'http://www.w3.org/ns/ttml', [
4825 b'http://www.w3.org/2004/11/ttaf1',
4826 b'http://www.w3.org/2006/04/ttaf1',
4827 b'http://www.w3.org/2006/10/ttaf1',
4828 ]),
4829 (b'http://www.w3.org/ns/ttml#styling', [
4830 b'http://www.w3.org/ns/ttml#style',
4831 ]),
4832 )
4833
4834 SUPPORTED_STYLING = [
4835 'color',
4836 'fontFamily',
4837 'fontSize',
4838 'fontStyle',
4839 'fontWeight',
4840 'textDecoration'
4841 ]
4842
4843 _x = functools.partial(xpath_with_ns, ns_map={
4844 'xml': 'http://www.w3.org/XML/1998/namespace',
4845 'ttml': 'http://www.w3.org/ns/ttml',
4846 'tts': 'http://www.w3.org/ns/ttml#styling',
4847 })
4848
4849 styles = {}
4850 default_style = {}
4851
4852 class TTMLPElementParser(object):
4853 _out = ''
4854 _unclosed_elements = []
4855 _applied_styles = []
4856
4857 def start(self, tag, attrib):
4858 if tag in (_x('ttml:br'), 'br'):
4859 self._out += '\n'
4860 else:
4861 unclosed_elements = []
4862 style = {}
4863 element_style_id = attrib.get('style')
4864 if default_style:
4865 style.update(default_style)
4866 if element_style_id:
4867 style.update(styles.get(element_style_id, {}))
4868 for prop in SUPPORTED_STYLING:
4869 prop_val = attrib.get(_x('tts:' + prop))
4870 if prop_val:
4871 style[prop] = prop_val
4872 if style:
4873 font = ''
4874 for k, v in sorted(style.items()):
4875 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4876 continue
4877 if k == 'color':
4878 font += ' color="%s"' % v
4879 elif k == 'fontSize':
4880 font += ' size="%s"' % v
4881 elif k == 'fontFamily':
4882 font += ' face="%s"' % v
4883 elif k == 'fontWeight' and v == 'bold':
4884 self._out += '<b>'
4885 unclosed_elements.append('b')
4886 elif k == 'fontStyle' and v == 'italic':
4887 self._out += '<i>'
4888 unclosed_elements.append('i')
4889 elif k == 'textDecoration' and v == 'underline':
4890 self._out += '<u>'
4891 unclosed_elements.append('u')
4892 if font:
4893 self._out += '<font' + font + '>'
4894 unclosed_elements.append('font')
4895 applied_style = {}
4896 if self._applied_styles:
4897 applied_style.update(self._applied_styles[-1])
4898 applied_style.update(style)
4899 self._applied_styles.append(applied_style)
4900 self._unclosed_elements.append(unclosed_elements)
4901
4902 def end(self, tag):
4903 if tag not in (_x('ttml:br'), 'br'):
4904 unclosed_elements = self._unclosed_elements.pop()
4905 for element in reversed(unclosed_elements):
4906 self._out += '</%s>' % element
4907 if unclosed_elements and self._applied_styles:
4908 self._applied_styles.pop()
4909
4910 def data(self, data):
4911 self._out += data
4912
4913 def close(self):
4914 return self._out.strip()
4915
4916 def parse_node(node):
4917 target = TTMLPElementParser()
4918 parser = xml.etree.ElementTree.XMLParser(target=target)
4919 parser.feed(xml.etree.ElementTree.tostring(node))
4920 return parser.close()
4921
4922 for k, v in LEGACY_NAMESPACES:
4923 for ns in v:
4924 dfxp_data = dfxp_data.replace(ns, k)
4925
4926 dfxp = compat_etree_fromstring(dfxp_data)
4927 out = []
4928 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4929
4930 if not paras:
4931 raise ValueError('Invalid dfxp/TTML subtitle')
4932
4933 repeat = False
4934 while True:
4935 for style in dfxp.findall(_x('.//ttml:style')):
4936 style_id = style.get('id') or style.get(_x('xml:id'))
4937 if not style_id:
4938 continue
4939 parent_style_id = style.get('style')
4940 if parent_style_id:
4941 if parent_style_id not in styles:
4942 repeat = True
4943 continue
4944 styles[style_id] = styles[parent_style_id].copy()
4945 for prop in SUPPORTED_STYLING:
4946 prop_val = style.get(_x('tts:' + prop))
4947 if prop_val:
4948 styles.setdefault(style_id, {})[prop] = prop_val
4949 if repeat:
4950 repeat = False
4951 else:
4952 break
4953
4954 for p in ('body', 'div'):
4955 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4956 if ele is None:
4957 continue
4958 style = styles.get(ele.get('style'))
4959 if not style:
4960 continue
4961 default_style.update(style)
4962
4963 for para, index in zip(paras, itertools.count(1)):
4964 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4965 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4966 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4967 if begin_time is None:
4968 continue
4969 if not end_time:
4970 if not dur:
4971 continue
4972 end_time = begin_time + dur
4973 out.append('%d\n%s --> %s\n%s\n\n' % (
4974 index,
4975 srt_subtitles_timecode(begin_time),
4976 srt_subtitles_timecode(end_time),
4977 parse_node(para)))
4978
4979 return ''.join(out)
4980
4981
4982 def cli_option(params, command_option, param):
4983 param = params.get(param)
4984 if param:
4985 param = compat_str(param)
4986 return [command_option, param] if param is not None else []
4987
4988
4989 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4990 param = params.get(param)
4991 if param is None:
4992 return []
4993 assert isinstance(param, bool)
4994 if separator:
4995 return [command_option + separator + (true_value if param else false_value)]
4996 return [command_option, true_value if param else false_value]
4997
4998
4999 def cli_valueless_option(params, command_option, param, expected_value=True):
5000 param = params.get(param)
5001 return [command_option] if param == expected_value else []
5002
5003
5004 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
5005 if isinstance(argdict, (list, tuple)): # for backward compatibility
5006 if use_compat:
5007 return argdict
5008 else:
5009 argdict = None
5010 if argdict is None:
5011 return default
5012 assert isinstance(argdict, dict)
5013
5014 assert isinstance(keys, (list, tuple))
5015 for key_list in keys:
5016 arg_list = list(filter(
5017 lambda x: x is not None,
5018 [argdict.get(key.lower()) for key in variadic(key_list)]))
5019 if arg_list:
5020 return [arg for args in arg_list for arg in args]
5021 return default
5022
5023
5024 def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
5025 main_key, exe = main_key.lower(), exe.lower()
5026 root_key = exe if main_key == exe else f'{main_key}+{exe}'
5027 keys = [f'{root_key}{k}' for k in (keys or [''])]
5028 if root_key in keys:
5029 if main_key != exe:
5030 keys.append((main_key, exe))
5031 keys.append('default')
5032 else:
5033 use_compat = False
5034 return cli_configuration_args(argdict, keys, default, use_compat)
5035
5036
5037 class ISO639Utils(object):
5038 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
5039 _lang_map = {
5040 'aa': 'aar',
5041 'ab': 'abk',
5042 'ae': 'ave',
5043 'af': 'afr',
5044 'ak': 'aka',
5045 'am': 'amh',
5046 'an': 'arg',
5047 'ar': 'ara',
5048 'as': 'asm',
5049 'av': 'ava',
5050 'ay': 'aym',
5051 'az': 'aze',
5052 'ba': 'bak',
5053 'be': 'bel',
5054 'bg': 'bul',
5055 'bh': 'bih',
5056 'bi': 'bis',
5057 'bm': 'bam',
5058 'bn': 'ben',
5059 'bo': 'bod',
5060 'br': 'bre',
5061 'bs': 'bos',
5062 'ca': 'cat',
5063 'ce': 'che',
5064 'ch': 'cha',
5065 'co': 'cos',
5066 'cr': 'cre',
5067 'cs': 'ces',
5068 'cu': 'chu',
5069 'cv': 'chv',
5070 'cy': 'cym',
5071 'da': 'dan',
5072 'de': 'deu',
5073 'dv': 'div',
5074 'dz': 'dzo',
5075 'ee': 'ewe',
5076 'el': 'ell',
5077 'en': 'eng',
5078 'eo': 'epo',
5079 'es': 'spa',
5080 'et': 'est',
5081 'eu': 'eus',
5082 'fa': 'fas',
5083 'ff': 'ful',
5084 'fi': 'fin',
5085 'fj': 'fij',
5086 'fo': 'fao',
5087 'fr': 'fra',
5088 'fy': 'fry',
5089 'ga': 'gle',
5090 'gd': 'gla',
5091 'gl': 'glg',
5092 'gn': 'grn',
5093 'gu': 'guj',
5094 'gv': 'glv',
5095 'ha': 'hau',
5096 'he': 'heb',
5097 'iw': 'heb', # Replaced by he in 1989 revision
5098 'hi': 'hin',
5099 'ho': 'hmo',
5100 'hr': 'hrv',
5101 'ht': 'hat',
5102 'hu': 'hun',
5103 'hy': 'hye',
5104 'hz': 'her',
5105 'ia': 'ina',
5106 'id': 'ind',
5107 'in': 'ind', # Replaced by id in 1989 revision
5108 'ie': 'ile',
5109 'ig': 'ibo',
5110 'ii': 'iii',
5111 'ik': 'ipk',
5112 'io': 'ido',
5113 'is': 'isl',
5114 'it': 'ita',
5115 'iu': 'iku',
5116 'ja': 'jpn',
5117 'jv': 'jav',
5118 'ka': 'kat',
5119 'kg': 'kon',
5120 'ki': 'kik',
5121 'kj': 'kua',
5122 'kk': 'kaz',
5123 'kl': 'kal',
5124 'km': 'khm',
5125 'kn': 'kan',
5126 'ko': 'kor',
5127 'kr': 'kau',
5128 'ks': 'kas',
5129 'ku': 'kur',
5130 'kv': 'kom',
5131 'kw': 'cor',
5132 'ky': 'kir',
5133 'la': 'lat',
5134 'lb': 'ltz',
5135 'lg': 'lug',
5136 'li': 'lim',
5137 'ln': 'lin',
5138 'lo': 'lao',
5139 'lt': 'lit',
5140 'lu': 'lub',
5141 'lv': 'lav',
5142 'mg': 'mlg',
5143 'mh': 'mah',
5144 'mi': 'mri',
5145 'mk': 'mkd',
5146 'ml': 'mal',
5147 'mn': 'mon',
5148 'mr': 'mar',
5149 'ms': 'msa',
5150 'mt': 'mlt',
5151 'my': 'mya',
5152 'na': 'nau',
5153 'nb': 'nob',
5154 'nd': 'nde',
5155 'ne': 'nep',
5156 'ng': 'ndo',
5157 'nl': 'nld',
5158 'nn': 'nno',
5159 'no': 'nor',
5160 'nr': 'nbl',
5161 'nv': 'nav',
5162 'ny': 'nya',
5163 'oc': 'oci',
5164 'oj': 'oji',
5165 'om': 'orm',
5166 'or': 'ori',
5167 'os': 'oss',
5168 'pa': 'pan',
5169 'pi': 'pli',
5170 'pl': 'pol',
5171 'ps': 'pus',
5172 'pt': 'por',
5173 'qu': 'que',
5174 'rm': 'roh',
5175 'rn': 'run',
5176 'ro': 'ron',
5177 'ru': 'rus',
5178 'rw': 'kin',
5179 'sa': 'san',
5180 'sc': 'srd',
5181 'sd': 'snd',
5182 'se': 'sme',
5183 'sg': 'sag',
5184 'si': 'sin',
5185 'sk': 'slk',
5186 'sl': 'slv',
5187 'sm': 'smo',
5188 'sn': 'sna',
5189 'so': 'som',
5190 'sq': 'sqi',
5191 'sr': 'srp',
5192 'ss': 'ssw',
5193 'st': 'sot',
5194 'su': 'sun',
5195 'sv': 'swe',
5196 'sw': 'swa',
5197 'ta': 'tam',
5198 'te': 'tel',
5199 'tg': 'tgk',
5200 'th': 'tha',
5201 'ti': 'tir',
5202 'tk': 'tuk',
5203 'tl': 'tgl',
5204 'tn': 'tsn',
5205 'to': 'ton',
5206 'tr': 'tur',
5207 'ts': 'tso',
5208 'tt': 'tat',
5209 'tw': 'twi',
5210 'ty': 'tah',
5211 'ug': 'uig',
5212 'uk': 'ukr',
5213 'ur': 'urd',
5214 'uz': 'uzb',
5215 've': 'ven',
5216 'vi': 'vie',
5217 'vo': 'vol',
5218 'wa': 'wln',
5219 'wo': 'wol',
5220 'xh': 'xho',
5221 'yi': 'yid',
5222 'ji': 'yid', # Replaced by yi in 1989 revision
5223 'yo': 'yor',
5224 'za': 'zha',
5225 'zh': 'zho',
5226 'zu': 'zul',
5227 }
5228
5229 @classmethod
5230 def short2long(cls, code):
5231 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5232 return cls._lang_map.get(code[:2])
5233
5234 @classmethod
5235 def long2short(cls, code):
5236 """Convert language code from ISO 639-2/T to ISO 639-1"""
5237 for short_name, long_name in cls._lang_map.items():
5238 if long_name == code:
5239 return short_name
5240
5241
5242 class ISO3166Utils(object):
5243 # From http://data.okfn.org/data/core/country-list
5244 _country_map = {
5245 'AF': 'Afghanistan',
5246 'AX': 'Åland Islands',
5247 'AL': 'Albania',
5248 'DZ': 'Algeria',
5249 'AS': 'American Samoa',
5250 'AD': 'Andorra',
5251 'AO': 'Angola',
5252 'AI': 'Anguilla',
5253 'AQ': 'Antarctica',
5254 'AG': 'Antigua and Barbuda',
5255 'AR': 'Argentina',
5256 'AM': 'Armenia',
5257 'AW': 'Aruba',
5258 'AU': 'Australia',
5259 'AT': 'Austria',
5260 'AZ': 'Azerbaijan',
5261 'BS': 'Bahamas',
5262 'BH': 'Bahrain',
5263 'BD': 'Bangladesh',
5264 'BB': 'Barbados',
5265 'BY': 'Belarus',
5266 'BE': 'Belgium',
5267 'BZ': 'Belize',
5268 'BJ': 'Benin',
5269 'BM': 'Bermuda',
5270 'BT': 'Bhutan',
5271 'BO': 'Bolivia, Plurinational State of',
5272 'BQ': 'Bonaire, Sint Eustatius and Saba',
5273 'BA': 'Bosnia and Herzegovina',
5274 'BW': 'Botswana',
5275 'BV': 'Bouvet Island',
5276 'BR': 'Brazil',
5277 'IO': 'British Indian Ocean Territory',
5278 'BN': 'Brunei Darussalam',
5279 'BG': 'Bulgaria',
5280 'BF': 'Burkina Faso',
5281 'BI': 'Burundi',
5282 'KH': 'Cambodia',
5283 'CM': 'Cameroon',
5284 'CA': 'Canada',
5285 'CV': 'Cape Verde',
5286 'KY': 'Cayman Islands',
5287 'CF': 'Central African Republic',
5288 'TD': 'Chad',
5289 'CL': 'Chile',
5290 'CN': 'China',
5291 'CX': 'Christmas Island',
5292 'CC': 'Cocos (Keeling) Islands',
5293 'CO': 'Colombia',
5294 'KM': 'Comoros',
5295 'CG': 'Congo',
5296 'CD': 'Congo, the Democratic Republic of the',
5297 'CK': 'Cook Islands',
5298 'CR': 'Costa Rica',
5299 'CI': 'Côte d\'Ivoire',
5300 'HR': 'Croatia',
5301 'CU': 'Cuba',
5302 'CW': 'Curaçao',
5303 'CY': 'Cyprus',
5304 'CZ': 'Czech Republic',
5305 'DK': 'Denmark',
5306 'DJ': 'Djibouti',
5307 'DM': 'Dominica',
5308 'DO': 'Dominican Republic',
5309 'EC': 'Ecuador',
5310 'EG': 'Egypt',
5311 'SV': 'El Salvador',
5312 'GQ': 'Equatorial Guinea',
5313 'ER': 'Eritrea',
5314 'EE': 'Estonia',
5315 'ET': 'Ethiopia',
5316 'FK': 'Falkland Islands (Malvinas)',
5317 'FO': 'Faroe Islands',
5318 'FJ': 'Fiji',
5319 'FI': 'Finland',
5320 'FR': 'France',
5321 'GF': 'French Guiana',
5322 'PF': 'French Polynesia',
5323 'TF': 'French Southern Territories',
5324 'GA': 'Gabon',
5325 'GM': 'Gambia',
5326 'GE': 'Georgia',
5327 'DE': 'Germany',
5328 'GH': 'Ghana',
5329 'GI': 'Gibraltar',
5330 'GR': 'Greece',
5331 'GL': 'Greenland',
5332 'GD': 'Grenada',
5333 'GP': 'Guadeloupe',
5334 'GU': 'Guam',
5335 'GT': 'Guatemala',
5336 'GG': 'Guernsey',
5337 'GN': 'Guinea',
5338 'GW': 'Guinea-Bissau',
5339 'GY': 'Guyana',
5340 'HT': 'Haiti',
5341 'HM': 'Heard Island and McDonald Islands',
5342 'VA': 'Holy See (Vatican City State)',
5343 'HN': 'Honduras',
5344 'HK': 'Hong Kong',
5345 'HU': 'Hungary',
5346 'IS': 'Iceland',
5347 'IN': 'India',
5348 'ID': 'Indonesia',
5349 'IR': 'Iran, Islamic Republic of',
5350 'IQ': 'Iraq',
5351 'IE': 'Ireland',
5352 'IM': 'Isle of Man',
5353 'IL': 'Israel',
5354 'IT': 'Italy',
5355 'JM': 'Jamaica',
5356 'JP': 'Japan',
5357 'JE': 'Jersey',
5358 'JO': 'Jordan',
5359 'KZ': 'Kazakhstan',
5360 'KE': 'Kenya',
5361 'KI': 'Kiribati',
5362 'KP': 'Korea, Democratic People\'s Republic of',
5363 'KR': 'Korea, Republic of',
5364 'KW': 'Kuwait',
5365 'KG': 'Kyrgyzstan',
5366 'LA': 'Lao People\'s Democratic Republic',
5367 'LV': 'Latvia',
5368 'LB': 'Lebanon',
5369 'LS': 'Lesotho',
5370 'LR': 'Liberia',
5371 'LY': 'Libya',
5372 'LI': 'Liechtenstein',
5373 'LT': 'Lithuania',
5374 'LU': 'Luxembourg',
5375 'MO': 'Macao',
5376 'MK': 'Macedonia, the Former Yugoslav Republic of',
5377 'MG': 'Madagascar',
5378 'MW': 'Malawi',
5379 'MY': 'Malaysia',
5380 'MV': 'Maldives',
5381 'ML': 'Mali',
5382 'MT': 'Malta',
5383 'MH': 'Marshall Islands',
5384 'MQ': 'Martinique',
5385 'MR': 'Mauritania',
5386 'MU': 'Mauritius',
5387 'YT': 'Mayotte',
5388 'MX': 'Mexico',
5389 'FM': 'Micronesia, Federated States of',
5390 'MD': 'Moldova, Republic of',
5391 'MC': 'Monaco',
5392 'MN': 'Mongolia',
5393 'ME': 'Montenegro',
5394 'MS': 'Montserrat',
5395 'MA': 'Morocco',
5396 'MZ': 'Mozambique',
5397 'MM': 'Myanmar',
5398 'NA': 'Namibia',
5399 'NR': 'Nauru',
5400 'NP': 'Nepal',
5401 'NL': 'Netherlands',
5402 'NC': 'New Caledonia',
5403 'NZ': 'New Zealand',
5404 'NI': 'Nicaragua',
5405 'NE': 'Niger',
5406 'NG': 'Nigeria',
5407 'NU': 'Niue',
5408 'NF': 'Norfolk Island',
5409 'MP': 'Northern Mariana Islands',
5410 'NO': 'Norway',
5411 'OM': 'Oman',
5412 'PK': 'Pakistan',
5413 'PW': 'Palau',
5414 'PS': 'Palestine, State of',
5415 'PA': 'Panama',
5416 'PG': 'Papua New Guinea',
5417 'PY': 'Paraguay',
5418 'PE': 'Peru',
5419 'PH': 'Philippines',
5420 'PN': 'Pitcairn',
5421 'PL': 'Poland',
5422 'PT': 'Portugal',
5423 'PR': 'Puerto Rico',
5424 'QA': 'Qatar',
5425 'RE': 'Réunion',
5426 'RO': 'Romania',
5427 'RU': 'Russian Federation',
5428 'RW': 'Rwanda',
5429 'BL': 'Saint Barthélemy',
5430 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5431 'KN': 'Saint Kitts and Nevis',
5432 'LC': 'Saint Lucia',
5433 'MF': 'Saint Martin (French part)',
5434 'PM': 'Saint Pierre and Miquelon',
5435 'VC': 'Saint Vincent and the Grenadines',
5436 'WS': 'Samoa',
5437 'SM': 'San Marino',
5438 'ST': 'Sao Tome and Principe',
5439 'SA': 'Saudi Arabia',
5440 'SN': 'Senegal',
5441 'RS': 'Serbia',
5442 'SC': 'Seychelles',
5443 'SL': 'Sierra Leone',
5444 'SG': 'Singapore',
5445 'SX': 'Sint Maarten (Dutch part)',
5446 'SK': 'Slovakia',
5447 'SI': 'Slovenia',
5448 'SB': 'Solomon Islands',
5449 'SO': 'Somalia',
5450 'ZA': 'South Africa',
5451 'GS': 'South Georgia and the South Sandwich Islands',
5452 'SS': 'South Sudan',
5453 'ES': 'Spain',
5454 'LK': 'Sri Lanka',
5455 'SD': 'Sudan',
5456 'SR': 'Suriname',
5457 'SJ': 'Svalbard and Jan Mayen',
5458 'SZ': 'Swaziland',
5459 'SE': 'Sweden',
5460 'CH': 'Switzerland',
5461 'SY': 'Syrian Arab Republic',
5462 'TW': 'Taiwan, Province of China',
5463 'TJ': 'Tajikistan',
5464 'TZ': 'Tanzania, United Republic of',
5465 'TH': 'Thailand',
5466 'TL': 'Timor-Leste',
5467 'TG': 'Togo',
5468 'TK': 'Tokelau',
5469 'TO': 'Tonga',
5470 'TT': 'Trinidad and Tobago',
5471 'TN': 'Tunisia',
5472 'TR': 'Turkey',
5473 'TM': 'Turkmenistan',
5474 'TC': 'Turks and Caicos Islands',
5475 'TV': 'Tuvalu',
5476 'UG': 'Uganda',
5477 'UA': 'Ukraine',
5478 'AE': 'United Arab Emirates',
5479 'GB': 'United Kingdom',
5480 'US': 'United States',
5481 'UM': 'United States Minor Outlying Islands',
5482 'UY': 'Uruguay',
5483 'UZ': 'Uzbekistan',
5484 'VU': 'Vanuatu',
5485 'VE': 'Venezuela, Bolivarian Republic of',
5486 'VN': 'Viet Nam',
5487 'VG': 'Virgin Islands, British',
5488 'VI': 'Virgin Islands, U.S.',
5489 'WF': 'Wallis and Futuna',
5490 'EH': 'Western Sahara',
5491 'YE': 'Yemen',
5492 'ZM': 'Zambia',
5493 'ZW': 'Zimbabwe',
5494 }
5495
5496 @classmethod
5497 def short2full(cls, code):
5498 """Convert an ISO 3166-2 country code to the corresponding full name"""
5499 return cls._country_map.get(code.upper())
5500
5501
5502 class GeoUtils(object):
5503 # Major IPv4 address blocks per country
5504 _country_ip_map = {
5505 'AD': '46.172.224.0/19',
5506 'AE': '94.200.0.0/13',
5507 'AF': '149.54.0.0/17',
5508 'AG': '209.59.64.0/18',
5509 'AI': '204.14.248.0/21',
5510 'AL': '46.99.0.0/16',
5511 'AM': '46.70.0.0/15',
5512 'AO': '105.168.0.0/13',
5513 'AP': '182.50.184.0/21',
5514 'AQ': '23.154.160.0/24',
5515 'AR': '181.0.0.0/12',
5516 'AS': '202.70.112.0/20',
5517 'AT': '77.116.0.0/14',
5518 'AU': '1.128.0.0/11',
5519 'AW': '181.41.0.0/18',
5520 'AX': '185.217.4.0/22',
5521 'AZ': '5.197.0.0/16',
5522 'BA': '31.176.128.0/17',
5523 'BB': '65.48.128.0/17',
5524 'BD': '114.130.0.0/16',
5525 'BE': '57.0.0.0/8',
5526 'BF': '102.178.0.0/15',
5527 'BG': '95.42.0.0/15',
5528 'BH': '37.131.0.0/17',
5529 'BI': '154.117.192.0/18',
5530 'BJ': '137.255.0.0/16',
5531 'BL': '185.212.72.0/23',
5532 'BM': '196.12.64.0/18',
5533 'BN': '156.31.0.0/16',
5534 'BO': '161.56.0.0/16',
5535 'BQ': '161.0.80.0/20',
5536 'BR': '191.128.0.0/12',
5537 'BS': '24.51.64.0/18',
5538 'BT': '119.2.96.0/19',
5539 'BW': '168.167.0.0/16',
5540 'BY': '178.120.0.0/13',
5541 'BZ': '179.42.192.0/18',
5542 'CA': '99.224.0.0/11',
5543 'CD': '41.243.0.0/16',
5544 'CF': '197.242.176.0/21',
5545 'CG': '160.113.0.0/16',
5546 'CH': '85.0.0.0/13',
5547 'CI': '102.136.0.0/14',
5548 'CK': '202.65.32.0/19',
5549 'CL': '152.172.0.0/14',
5550 'CM': '102.244.0.0/14',
5551 'CN': '36.128.0.0/10',
5552 'CO': '181.240.0.0/12',
5553 'CR': '201.192.0.0/12',
5554 'CU': '152.206.0.0/15',
5555 'CV': '165.90.96.0/19',
5556 'CW': '190.88.128.0/17',
5557 'CY': '31.153.0.0/16',
5558 'CZ': '88.100.0.0/14',
5559 'DE': '53.0.0.0/8',
5560 'DJ': '197.241.0.0/17',
5561 'DK': '87.48.0.0/12',
5562 'DM': '192.243.48.0/20',
5563 'DO': '152.166.0.0/15',
5564 'DZ': '41.96.0.0/12',
5565 'EC': '186.68.0.0/15',
5566 'EE': '90.190.0.0/15',
5567 'EG': '156.160.0.0/11',
5568 'ER': '196.200.96.0/20',
5569 'ES': '88.0.0.0/11',
5570 'ET': '196.188.0.0/14',
5571 'EU': '2.16.0.0/13',
5572 'FI': '91.152.0.0/13',
5573 'FJ': '144.120.0.0/16',
5574 'FK': '80.73.208.0/21',
5575 'FM': '119.252.112.0/20',
5576 'FO': '88.85.32.0/19',
5577 'FR': '90.0.0.0/9',
5578 'GA': '41.158.0.0/15',
5579 'GB': '25.0.0.0/8',
5580 'GD': '74.122.88.0/21',
5581 'GE': '31.146.0.0/16',
5582 'GF': '161.22.64.0/18',
5583 'GG': '62.68.160.0/19',
5584 'GH': '154.160.0.0/12',
5585 'GI': '95.164.0.0/16',
5586 'GL': '88.83.0.0/19',
5587 'GM': '160.182.0.0/15',
5588 'GN': '197.149.192.0/18',
5589 'GP': '104.250.0.0/19',
5590 'GQ': '105.235.224.0/20',
5591 'GR': '94.64.0.0/13',
5592 'GT': '168.234.0.0/16',
5593 'GU': '168.123.0.0/16',
5594 'GW': '197.214.80.0/20',
5595 'GY': '181.41.64.0/18',
5596 'HK': '113.252.0.0/14',
5597 'HN': '181.210.0.0/16',
5598 'HR': '93.136.0.0/13',
5599 'HT': '148.102.128.0/17',
5600 'HU': '84.0.0.0/14',
5601 'ID': '39.192.0.0/10',
5602 'IE': '87.32.0.0/12',
5603 'IL': '79.176.0.0/13',
5604 'IM': '5.62.80.0/20',
5605 'IN': '117.192.0.0/10',
5606 'IO': '203.83.48.0/21',
5607 'IQ': '37.236.0.0/14',
5608 'IR': '2.176.0.0/12',
5609 'IS': '82.221.0.0/16',
5610 'IT': '79.0.0.0/10',
5611 'JE': '87.244.64.0/18',
5612 'JM': '72.27.0.0/17',
5613 'JO': '176.29.0.0/16',
5614 'JP': '133.0.0.0/8',
5615 'KE': '105.48.0.0/12',
5616 'KG': '158.181.128.0/17',
5617 'KH': '36.37.128.0/17',
5618 'KI': '103.25.140.0/22',
5619 'KM': '197.255.224.0/20',
5620 'KN': '198.167.192.0/19',
5621 'KP': '175.45.176.0/22',
5622 'KR': '175.192.0.0/10',
5623 'KW': '37.36.0.0/14',
5624 'KY': '64.96.0.0/15',
5625 'KZ': '2.72.0.0/13',
5626 'LA': '115.84.64.0/18',
5627 'LB': '178.135.0.0/16',
5628 'LC': '24.92.144.0/20',
5629 'LI': '82.117.0.0/19',
5630 'LK': '112.134.0.0/15',
5631 'LR': '102.183.0.0/16',
5632 'LS': '129.232.0.0/17',
5633 'LT': '78.56.0.0/13',
5634 'LU': '188.42.0.0/16',
5635 'LV': '46.109.0.0/16',
5636 'LY': '41.252.0.0/14',
5637 'MA': '105.128.0.0/11',
5638 'MC': '88.209.64.0/18',
5639 'MD': '37.246.0.0/16',
5640 'ME': '178.175.0.0/17',
5641 'MF': '74.112.232.0/21',
5642 'MG': '154.126.0.0/17',
5643 'MH': '117.103.88.0/21',
5644 'MK': '77.28.0.0/15',
5645 'ML': '154.118.128.0/18',
5646 'MM': '37.111.0.0/17',
5647 'MN': '49.0.128.0/17',
5648 'MO': '60.246.0.0/16',
5649 'MP': '202.88.64.0/20',
5650 'MQ': '109.203.224.0/19',
5651 'MR': '41.188.64.0/18',
5652 'MS': '208.90.112.0/22',
5653 'MT': '46.11.0.0/16',
5654 'MU': '105.16.0.0/12',
5655 'MV': '27.114.128.0/18',
5656 'MW': '102.70.0.0/15',
5657 'MX': '187.192.0.0/11',
5658 'MY': '175.136.0.0/13',
5659 'MZ': '197.218.0.0/15',
5660 'NA': '41.182.0.0/16',
5661 'NC': '101.101.0.0/18',
5662 'NE': '197.214.0.0/18',
5663 'NF': '203.17.240.0/22',
5664 'NG': '105.112.0.0/12',
5665 'NI': '186.76.0.0/15',
5666 'NL': '145.96.0.0/11',
5667 'NO': '84.208.0.0/13',
5668 'NP': '36.252.0.0/15',
5669 'NR': '203.98.224.0/19',
5670 'NU': '49.156.48.0/22',
5671 'NZ': '49.224.0.0/14',
5672 'OM': '5.36.0.0/15',
5673 'PA': '186.72.0.0/15',
5674 'PE': '186.160.0.0/14',
5675 'PF': '123.50.64.0/18',
5676 'PG': '124.240.192.0/19',
5677 'PH': '49.144.0.0/13',
5678 'PK': '39.32.0.0/11',
5679 'PL': '83.0.0.0/11',
5680 'PM': '70.36.0.0/20',
5681 'PR': '66.50.0.0/16',
5682 'PS': '188.161.0.0/16',
5683 'PT': '85.240.0.0/13',
5684 'PW': '202.124.224.0/20',
5685 'PY': '181.120.0.0/14',
5686 'QA': '37.210.0.0/15',
5687 'RE': '102.35.0.0/16',
5688 'RO': '79.112.0.0/13',
5689 'RS': '93.86.0.0/15',
5690 'RU': '5.136.0.0/13',
5691 'RW': '41.186.0.0/16',
5692 'SA': '188.48.0.0/13',
5693 'SB': '202.1.160.0/19',
5694 'SC': '154.192.0.0/11',
5695 'SD': '102.120.0.0/13',
5696 'SE': '78.64.0.0/12',
5697 'SG': '8.128.0.0/10',
5698 'SI': '188.196.0.0/14',
5699 'SK': '78.98.0.0/15',
5700 'SL': '102.143.0.0/17',
5701 'SM': '89.186.32.0/19',
5702 'SN': '41.82.0.0/15',
5703 'SO': '154.115.192.0/18',
5704 'SR': '186.179.128.0/17',
5705 'SS': '105.235.208.0/21',
5706 'ST': '197.159.160.0/19',
5707 'SV': '168.243.0.0/16',
5708 'SX': '190.102.0.0/20',
5709 'SY': '5.0.0.0/16',
5710 'SZ': '41.84.224.0/19',
5711 'TC': '65.255.48.0/20',
5712 'TD': '154.68.128.0/19',
5713 'TG': '196.168.0.0/14',
5714 'TH': '171.96.0.0/13',
5715 'TJ': '85.9.128.0/18',
5716 'TK': '27.96.24.0/21',
5717 'TL': '180.189.160.0/20',
5718 'TM': '95.85.96.0/19',
5719 'TN': '197.0.0.0/11',
5720 'TO': '175.176.144.0/21',
5721 'TR': '78.160.0.0/11',
5722 'TT': '186.44.0.0/15',
5723 'TV': '202.2.96.0/19',
5724 'TW': '120.96.0.0/11',
5725 'TZ': '156.156.0.0/14',
5726 'UA': '37.52.0.0/14',
5727 'UG': '102.80.0.0/13',
5728 'US': '6.0.0.0/8',
5729 'UY': '167.56.0.0/13',
5730 'UZ': '84.54.64.0/18',
5731 'VA': '212.77.0.0/19',
5732 'VC': '207.191.240.0/21',
5733 'VE': '186.88.0.0/13',
5734 'VG': '66.81.192.0/20',
5735 'VI': '146.226.0.0/16',
5736 'VN': '14.160.0.0/11',
5737 'VU': '202.80.32.0/20',
5738 'WF': '117.20.32.0/21',
5739 'WS': '202.4.32.0/19',
5740 'YE': '134.35.0.0/16',
5741 'YT': '41.242.116.0/22',
5742 'ZA': '41.0.0.0/11',
5743 'ZM': '102.144.0.0/13',
5744 'ZW': '102.177.192.0/18',
5745 }
5746
5747 @classmethod
5748 def random_ipv4(cls, code_or_block):
5749 if len(code_or_block) == 2:
5750 block = cls._country_ip_map.get(code_or_block.upper())
5751 if not block:
5752 return None
5753 else:
5754 block = code_or_block
5755 addr, preflen = block.split('/')
5756 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5757 addr_max = addr_min | (0xffffffff >> int(preflen))
5758 return compat_str(socket.inet_ntoa(
5759 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5760
5761
5762 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5763 def __init__(self, proxies=None):
5764 # Set default handlers
5765 for type in ('http', 'https'):
5766 setattr(self, '%s_open' % type,
5767 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5768 meth(r, proxy, type))
5769 compat_urllib_request.ProxyHandler.__init__(self, proxies)
5770
5771 def proxy_open(self, req, proxy, type):
5772 req_proxy = req.headers.get('Ytdl-request-proxy')
5773 if req_proxy is not None:
5774 proxy = req_proxy
5775 del req.headers['Ytdl-request-proxy']
5776
5777 if proxy == '__noproxy__':
5778 return None # No Proxy
5779 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5780 req.add_header('Ytdl-socks-proxy', proxy)
5781 # yt-dlp's http/https handlers do wrapping the socket with socks
5782 return None
5783 return compat_urllib_request.ProxyHandler.proxy_open(
5784 self, req, proxy, type)
5785
5786
5787 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5788 # released into Public Domain
5789 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5790
5791 def long_to_bytes(n, blocksize=0):
5792 """long_to_bytes(n:long, blocksize:int) : string
5793 Convert a long integer to a byte string.
5794
5795 If optional blocksize is given and greater than zero, pad the front of the
5796 byte string with binary zeros so that the length is a multiple of
5797 blocksize.
5798 """
5799 # after much testing, this algorithm was deemed to be the fastest
5800 s = b''
5801 n = int(n)
5802 while n > 0:
5803 s = compat_struct_pack('>I', n & 0xffffffff) + s
5804 n = n >> 32
5805 # strip off leading zeros
5806 for i in range(len(s)):
5807 if s[i] != b'\000'[0]:
5808 break
5809 else:
5810 # only happens when n == 0
5811 s = b'\000'
5812 i = 0
5813 s = s[i:]
5814 # add back some pad bytes. this could be done more efficiently w.r.t. the
5815 # de-padding being done above, but sigh...
5816 if blocksize > 0 and len(s) % blocksize:
5817 s = (blocksize - len(s) % blocksize) * b'\000' + s
5818 return s
5819
5820
5821 def bytes_to_long(s):
5822 """bytes_to_long(string) : long
5823 Convert a byte string to a long integer.
5824
5825 This is (essentially) the inverse of long_to_bytes().
5826 """
5827 acc = 0
5828 length = len(s)
5829 if length % 4:
5830 extra = (4 - length % 4)
5831 s = b'\000' * extra + s
5832 length = length + extra
5833 for i in range(0, length, 4):
5834 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5835 return acc
5836
5837
5838 def ohdave_rsa_encrypt(data, exponent, modulus):
5839 '''
5840 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5841
5842 Input:
5843 data: data to encrypt, bytes-like object
5844 exponent, modulus: parameter e and N of RSA algorithm, both integer
5845 Output: hex string of encrypted data
5846
5847 Limitation: supports one block encryption only
5848 '''
5849
5850 payload = int(binascii.hexlify(data[::-1]), 16)
5851 encrypted = pow(payload, exponent, modulus)
5852 return '%x' % encrypted
5853
5854
5855 def pkcs1pad(data, length):
5856 """
5857 Padding input data with PKCS#1 scheme
5858
5859 @param {int[]} data input data
5860 @param {int} length target length
5861 @returns {int[]} padded data
5862 """
5863 if len(data) > length - 11:
5864 raise ValueError('Input data too long for PKCS#1 padding')
5865
5866 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5867 return [0, 2] + pseudo_random + [0] + data
5868
5869
5870 def encode_base_n(num, n, table=None):
5871 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5872 if not table:
5873 table = FULL_TABLE[:n]
5874
5875 if n > len(table):
5876 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5877
5878 if num == 0:
5879 return table[0]
5880
5881 ret = ''
5882 while num:
5883 ret = table[num % n] + ret
5884 num = num // n
5885 return ret
5886
5887
5888 def decode_packed_codes(code):
5889 mobj = re.search(PACKED_CODES_RE, code)
5890 obfuscated_code, base, count, symbols = mobj.groups()
5891 base = int(base)
5892 count = int(count)
5893 symbols = symbols.split('|')
5894 symbol_table = {}
5895
5896 while count:
5897 count -= 1
5898 base_n_count = encode_base_n(count, base)
5899 symbol_table[base_n_count] = symbols[count] or base_n_count
5900
5901 return re.sub(
5902 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5903 obfuscated_code)
5904
5905
5906 def caesar(s, alphabet, shift):
5907 if shift == 0:
5908 return s
5909 l = len(alphabet)
5910 return ''.join(
5911 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5912 for c in s)
5913
5914
5915 def rot47(s):
5916 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5917
5918
5919 def parse_m3u8_attributes(attrib):
5920 info = {}
5921 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5922 if val.startswith('"'):
5923 val = val[1:-1]
5924 info[key] = val
5925 return info
5926
5927
5928 def urshift(val, n):
5929 return val >> n if val >= 0 else (val + 0x100000000) >> n
5930
5931
5932 # Based on png2str() written by @gdkchan and improved by @yokrysty
5933 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5934 def decode_png(png_data):
5935 # Reference: https://www.w3.org/TR/PNG/
5936 header = png_data[8:]
5937
5938 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5939 raise IOError('Not a valid PNG file.')
5940
5941 int_map = {1: '>B', 2: '>H', 4: '>I'}
5942 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5943
5944 chunks = []
5945
5946 while header:
5947 length = unpack_integer(header[:4])
5948 header = header[4:]
5949
5950 chunk_type = header[:4]
5951 header = header[4:]
5952
5953 chunk_data = header[:length]
5954 header = header[length:]
5955
5956 header = header[4:] # Skip CRC
5957
5958 chunks.append({
5959 'type': chunk_type,
5960 'length': length,
5961 'data': chunk_data
5962 })
5963
5964 ihdr = chunks[0]['data']
5965
5966 width = unpack_integer(ihdr[:4])
5967 height = unpack_integer(ihdr[4:8])
5968
5969 idat = b''
5970
5971 for chunk in chunks:
5972 if chunk['type'] == b'IDAT':
5973 idat += chunk['data']
5974
5975 if not idat:
5976 raise IOError('Unable to read PNG data.')
5977
5978 decompressed_data = bytearray(zlib.decompress(idat))
5979
5980 stride = width * 3
5981 pixels = []
5982
5983 def _get_pixel(idx):
5984 x = idx % stride
5985 y = idx // stride
5986 return pixels[y][x]
5987
5988 for y in range(height):
5989 basePos = y * (1 + stride)
5990 filter_type = decompressed_data[basePos]
5991
5992 current_row = []
5993
5994 pixels.append(current_row)
5995
5996 for x in range(stride):
5997 color = decompressed_data[1 + basePos + x]
5998 basex = y * stride + x
5999 left = 0
6000 up = 0
6001
6002 if x > 2:
6003 left = _get_pixel(basex - 3)
6004 if y > 0:
6005 up = _get_pixel(basex - stride)
6006
6007 if filter_type == 1: # Sub
6008 color = (color + left) & 0xff
6009 elif filter_type == 2: # Up
6010 color = (color + up) & 0xff
6011 elif filter_type == 3: # Average
6012 color = (color + ((left + up) >> 1)) & 0xff
6013 elif filter_type == 4: # Paeth
6014 a = left
6015 b = up
6016 c = 0
6017
6018 if x > 2 and y > 0:
6019 c = _get_pixel(basex - stride - 3)
6020
6021 p = a + b - c
6022
6023 pa = abs(p - a)
6024 pb = abs(p - b)
6025 pc = abs(p - c)
6026
6027 if pa <= pb and pa <= pc:
6028 color = (color + a) & 0xff
6029 elif pb <= pc:
6030 color = (color + b) & 0xff
6031 else:
6032 color = (color + c) & 0xff
6033
6034 current_row.append(color)
6035
6036 return width, height, pixels
6037
6038
6039 def write_xattr(path, key, value):
6040 # This mess below finds the best xattr tool for the job
6041 try:
6042 # try the pyxattr module...
6043 import xattr
6044
6045 if hasattr(xattr, 'set'): # pyxattr
6046 # Unicode arguments are not supported in python-pyxattr until
6047 # version 0.5.0
6048 # See https://github.com/ytdl-org/youtube-dl/issues/5498
6049 pyxattr_required_version = '0.5.0'
6050 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
6051 # TODO: fallback to CLI tools
6052 raise XAttrUnavailableError(
6053 'python-pyxattr is detected but is too old. '
6054 'yt-dlp requires %s or above while your version is %s. '
6055 'Falling back to other xattr implementations' % (
6056 pyxattr_required_version, xattr.__version__))
6057
6058 setxattr = xattr.set
6059 else: # xattr
6060 setxattr = xattr.setxattr
6061
6062 try:
6063 setxattr(path, key, value)
6064 except EnvironmentError as e:
6065 raise XAttrMetadataError(e.errno, e.strerror)
6066
6067 except ImportError:
6068 if compat_os_name == 'nt':
6069 # Write xattrs to NTFS Alternate Data Streams:
6070 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6071 assert ':' not in key
6072 assert os.path.exists(path)
6073
6074 ads_fn = path + ':' + key
6075 try:
6076 with open(ads_fn, 'wb') as f:
6077 f.write(value)
6078 except EnvironmentError as e:
6079 raise XAttrMetadataError(e.errno, e.strerror)
6080 else:
6081 user_has_setfattr = check_executable('setfattr', ['--version'])
6082 user_has_xattr = check_executable('xattr', ['-h'])
6083
6084 if user_has_setfattr or user_has_xattr:
6085
6086 value = value.decode('utf-8')
6087 if user_has_setfattr:
6088 executable = 'setfattr'
6089 opts = ['-n', key, '-v', value]
6090 elif user_has_xattr:
6091 executable = 'xattr'
6092 opts = ['-w', key, value]
6093
6094 cmd = ([encodeFilename(executable, True)]
6095 + [encodeArgument(o) for o in opts]
6096 + [encodeFilename(path, True)])
6097
6098 try:
6099 p = subprocess.Popen(
6100 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6101 except EnvironmentError as e:
6102 raise XAttrMetadataError(e.errno, e.strerror)
6103 stdout, stderr = process_communicate_or_kill(p)
6104 stderr = stderr.decode('utf-8', 'replace')
6105 if p.returncode != 0:
6106 raise XAttrMetadataError(p.returncode, stderr)
6107
6108 else:
6109 # On Unix, and can't find pyxattr, setfattr, or xattr.
6110 if sys.platform.startswith('linux'):
6111 raise XAttrUnavailableError(
6112 "Couldn't find a tool to set the xattrs. "
6113 "Install either the python 'pyxattr' or 'xattr' "
6114 "modules, or the GNU 'attr' package "
6115 "(which contains the 'setfattr' tool).")
6116 else:
6117 raise XAttrUnavailableError(
6118 "Couldn't find a tool to set the xattrs. "
6119 "Install either the python 'xattr' module, "
6120 "or the 'xattr' binary.")
6121
6122
6123 def random_birthday(year_field, month_field, day_field):
6124 start_date = datetime.date(1950, 1, 1)
6125 end_date = datetime.date(1995, 12, 31)
6126 offset = random.randint(0, (end_date - start_date).days)
6127 random_date = start_date + datetime.timedelta(offset)
6128 return {
6129 year_field: str(random_date.year),
6130 month_field: str(random_date.month),
6131 day_field: str(random_date.day),
6132 }
6133
6134
6135 # Templates for internet shortcut files, which are plain text files.
6136 DOT_URL_LINK_TEMPLATE = '''
6137 [InternetShortcut]
6138 URL=%(url)s
6139 '''.lstrip()
6140
6141 DOT_WEBLOC_LINK_TEMPLATE = '''
6142 <?xml version="1.0" encoding="UTF-8"?>
6143 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6144 <plist version="1.0">
6145 <dict>
6146 \t<key>URL</key>
6147 \t<string>%(url)s</string>
6148 </dict>
6149 </plist>
6150 '''.lstrip()
6151
6152 DOT_DESKTOP_LINK_TEMPLATE = '''
6153 [Desktop Entry]
6154 Encoding=UTF-8
6155 Name=%(filename)s
6156 Type=Link
6157 URL=%(url)s
6158 Icon=text-html
6159 '''.lstrip()
6160
6161
6162 def iri_to_uri(iri):
6163 """
6164 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6165
6166 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6167 """
6168
6169 iri_parts = compat_urllib_parse_urlparse(iri)
6170
6171 if '[' in iri_parts.netloc:
6172 raise ValueError('IPv6 URIs are not, yet, supported.')
6173 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6174
6175 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6176
6177 net_location = ''
6178 if iri_parts.username:
6179 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6180 if iri_parts.password is not None:
6181 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6182 net_location += '@'
6183
6184 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
6185 # The 'idna' encoding produces ASCII text.
6186 if iri_parts.port is not None and iri_parts.port != 80:
6187 net_location += ':' + str(iri_parts.port)
6188
6189 return compat_urllib_parse_urlunparse(
6190 (iri_parts.scheme,
6191 net_location,
6192
6193 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6194
6195 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6196 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6197
6198 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6199 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6200
6201 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6202
6203 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6204
6205
6206 def to_high_limit_path(path):
6207 if sys.platform in ['win32', 'cygwin']:
6208 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6209 return r'\\?\ '.rstrip() + os.path.abspath(path)
6210
6211 return path
6212
6213
6214 def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
6215 if field is None:
6216 val = obj if obj is not None else default
6217 else:
6218 val = obj.get(field, default)
6219 if func and val not in ignore:
6220 val = func(val)
6221 return template % val if val not in ignore else default
6222
6223
6224 def clean_podcast_url(url):
6225 return re.sub(r'''(?x)
6226 (?:
6227 (?:
6228 chtbl\.com/track|
6229 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6230 play\.podtrac\.com
6231 )/[^/]+|
6232 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6233 flex\.acast\.com|
6234 pd(?:
6235 cn\.co| # https://podcorn.com/analytics-prefix/
6236 st\.fm # https://podsights.com/docs/
6237 )/e
6238 )/''', '', url)
6239
6240
6241 _HEX_TABLE = '0123456789abcdef'
6242
6243
6244 def random_uuidv4():
6245 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
6246
6247
6248 def make_dir(path, to_screen=None):
6249 try:
6250 dn = os.path.dirname(path)
6251 if dn and not os.path.exists(dn):
6252 os.makedirs(dn)
6253 return True
6254 except (OSError, IOError) as err:
6255 if callable(to_screen) is not None:
6256 to_screen('unable to create directory ' + error_to_compat_str(err))
6257 return False
6258
6259
6260 def get_executable_path():
6261 from zipimport import zipimporter
6262 if hasattr(sys, 'frozen'): # Running from PyInstaller
6263 path = os.path.dirname(sys.executable)
6264 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
6265 path = os.path.join(os.path.dirname(__file__), '../..')
6266 else:
6267 path = os.path.join(os.path.dirname(__file__), '..')
6268 return os.path.abspath(path)
6269
6270
6271 def load_plugins(name, suffix, namespace):
6272 plugin_info = [None]
6273 classes = []
6274 try:
6275 plugin_info = imp.find_module(
6276 name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
6277 plugins = imp.load_module(name, *plugin_info)
6278 for name in dir(plugins):
6279 if name in namespace:
6280 continue
6281 if not name.endswith(suffix):
6282 continue
6283 klass = getattr(plugins, name)
6284 classes.append(klass)
6285 namespace[name] = klass
6286 except ImportError:
6287 pass
6288 finally:
6289 if plugin_info[0] is not None:
6290 plugin_info[0].close()
6291 return classes
6292
6293
6294 def traverse_obj(
6295 obj, *path_list, default=None, expected_type=None, get_all=True,
6296 casesense=True, is_user_input=False, traverse_string=False):
6297 ''' Traverse nested list/dict/tuple
6298 @param path_list A list of paths which are checked one by one.
6299 Each path is a list of keys where each key is a string,
6300 a tuple of strings or "...". When a tuple is given,
6301 all the keys given in the tuple are traversed, and
6302 "..." traverses all the keys in the object
6303 @param default Default value to return
6304 @param expected_type Only accept final value of this type (Can also be any callable)
6305 @param get_all Return all the values obtained from a path or only the first one
6306 @param casesense Whether to consider dictionary keys as case sensitive
6307 @param is_user_input Whether the keys are generated from user input. If True,
6308 strings are converted to int/slice if necessary
6309 @param traverse_string Whether to traverse inside strings. If True, any
6310 non-compatible object will also be converted into a string
6311 # TODO: Write tests
6312 '''
6313 if not casesense:
6314 _lower = lambda k: (k.lower() if isinstance(k, str) else k)
6315 path_list = (map(_lower, variadic(path)) for path in path_list)
6316
6317 def _traverse_obj(obj, path, _current_depth=0):
6318 nonlocal depth
6319 if obj is None:
6320 return None
6321 path = tuple(variadic(path))
6322 for i, key in enumerate(path):
6323 if isinstance(key, (list, tuple)):
6324 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
6325 key = ...
6326 if key is ...:
6327 obj = (obj.values() if isinstance(obj, dict)
6328 else obj if isinstance(obj, (list, tuple, LazyList))
6329 else str(obj) if traverse_string else [])
6330 _current_depth += 1
6331 depth = max(depth, _current_depth)
6332 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
6333 elif isinstance(obj, dict) and not (is_user_input and key == ':'):
6334 obj = (obj.get(key) if casesense or (key in obj)
6335 else next((v for k, v in obj.items() if _lower(k) == key), None))
6336 else:
6337 if is_user_input:
6338 key = (int_or_none(key) if ':' not in key
6339 else slice(*map(int_or_none, key.split(':'))))
6340 if key == slice(None):
6341 return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
6342 if not isinstance(key, (int, slice)):
6343 return None
6344 if not isinstance(obj, (list, tuple, LazyList)):
6345 if not traverse_string:
6346 return None
6347 obj = str(obj)
6348 try:
6349 obj = obj[key]
6350 except IndexError:
6351 return None
6352 return obj
6353
6354 if isinstance(expected_type, type):
6355 type_test = lambda val: val if isinstance(val, expected_type) else None
6356 elif expected_type is not None:
6357 type_test = expected_type
6358 else:
6359 type_test = lambda val: val
6360
6361 for path in path_list:
6362 depth = 0
6363 val = _traverse_obj(obj, path)
6364 if val is not None:
6365 if depth:
6366 for _ in range(depth - 1):
6367 val = itertools.chain.from_iterable(v for v in val if v is not None)
6368 val = [v for v in map(type_test, val) if v is not None]
6369 if val:
6370 return val if get_all else val[0]
6371 else:
6372 val = type_test(val)
6373 if val is not None:
6374 return val
6375 return default
6376
6377
6378 def traverse_dict(dictn, keys, casesense=True):
6379 ''' For backward compatibility. Do not use '''
6380 return traverse_obj(dictn, keys, casesense=casesense,
6381 is_user_input=True, traverse_string=True)
6382
6383
6384 def variadic(x, allowed_types=(str, bytes)):
6385 return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
6386
6387
6388 # create a JSON Web Signature (jws) with HS256 algorithm
6389 # the resulting format is in JWS Compact Serialization
6390 # implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
6391 # implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
6392 def jwt_encode_hs256(payload_data, key, headers={}):
6393 header_data = {
6394 'alg': 'HS256',
6395 'typ': 'JWT',
6396 }
6397 if headers:
6398 header_data.update(headers)
6399 header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8'))
6400 payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8'))
6401 h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256)
6402 signature_b64 = base64.b64encode(h.digest())
6403 token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
6404 return token