]> jfr.im git - yt-dlp.git/blob - yt_dlp/utils.py
[utils] Fix `InAdvancePagedList.__getitem__`
[yt-dlp.git] / yt_dlp / utils.py
1 #!/usr/bin/env python3
2 # coding: utf-8
3
4 from __future__ import unicode_literals
5
6 import base64
7 import binascii
8 import calendar
9 import codecs
10 import collections
11 import contextlib
12 import ctypes
13 import datetime
14 import email.utils
15 import email.header
16 import errno
17 import functools
18 import gzip
19 import imp
20 import io
21 import itertools
22 import json
23 import locale
24 import math
25 import operator
26 import os
27 import platform
28 import random
29 import re
30 import socket
31 import ssl
32 import subprocess
33 import sys
34 import tempfile
35 import time
36 import traceback
37 import xml.etree.ElementTree
38 import zlib
39
40 from .compat import (
41 compat_HTMLParseError,
42 compat_HTMLParser,
43 compat_HTTPError,
44 compat_basestring,
45 compat_chr,
46 compat_cookiejar,
47 compat_ctypes_WINFUNCTYPE,
48 compat_etree_fromstring,
49 compat_expanduser,
50 compat_html_entities,
51 compat_html_entities_html5,
52 compat_http_client,
53 compat_integer_types,
54 compat_numeric_types,
55 compat_kwargs,
56 compat_os_name,
57 compat_parse_qs,
58 compat_shlex_quote,
59 compat_str,
60 compat_struct_pack,
61 compat_struct_unpack,
62 compat_urllib_error,
63 compat_urllib_parse,
64 compat_urllib_parse_urlencode,
65 compat_urllib_parse_urlparse,
66 compat_urllib_parse_urlunparse,
67 compat_urllib_parse_quote,
68 compat_urllib_parse_quote_plus,
69 compat_urllib_parse_unquote_plus,
70 compat_urllib_request,
71 compat_urlparse,
72 compat_xpath,
73 )
74
75 from .socks import (
76 ProxyType,
77 sockssocket,
78 )
79
80
81 def register_socks_protocols():
82 # "Register" SOCKS protocols
83 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
84 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
85 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
86 if scheme not in compat_urlparse.uses_netloc:
87 compat_urlparse.uses_netloc.append(scheme)
88
89
90 # This is not clearly defined otherwise
91 compiled_regex_type = type(re.compile(''))
92
93
94 def random_user_agent():
95 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
96 _CHROME_VERSIONS = (
97 '74.0.3729.129',
98 '76.0.3780.3',
99 '76.0.3780.2',
100 '74.0.3729.128',
101 '76.0.3780.1',
102 '76.0.3780.0',
103 '75.0.3770.15',
104 '74.0.3729.127',
105 '74.0.3729.126',
106 '76.0.3779.1',
107 '76.0.3779.0',
108 '75.0.3770.14',
109 '74.0.3729.125',
110 '76.0.3778.1',
111 '76.0.3778.0',
112 '75.0.3770.13',
113 '74.0.3729.124',
114 '74.0.3729.123',
115 '73.0.3683.121',
116 '76.0.3777.1',
117 '76.0.3777.0',
118 '75.0.3770.12',
119 '74.0.3729.122',
120 '76.0.3776.4',
121 '75.0.3770.11',
122 '74.0.3729.121',
123 '76.0.3776.3',
124 '76.0.3776.2',
125 '73.0.3683.120',
126 '74.0.3729.120',
127 '74.0.3729.119',
128 '74.0.3729.118',
129 '76.0.3776.1',
130 '76.0.3776.0',
131 '76.0.3775.5',
132 '75.0.3770.10',
133 '74.0.3729.117',
134 '76.0.3775.4',
135 '76.0.3775.3',
136 '74.0.3729.116',
137 '75.0.3770.9',
138 '76.0.3775.2',
139 '76.0.3775.1',
140 '76.0.3775.0',
141 '75.0.3770.8',
142 '74.0.3729.115',
143 '74.0.3729.114',
144 '76.0.3774.1',
145 '76.0.3774.0',
146 '75.0.3770.7',
147 '74.0.3729.113',
148 '74.0.3729.112',
149 '74.0.3729.111',
150 '76.0.3773.1',
151 '76.0.3773.0',
152 '75.0.3770.6',
153 '74.0.3729.110',
154 '74.0.3729.109',
155 '76.0.3772.1',
156 '76.0.3772.0',
157 '75.0.3770.5',
158 '74.0.3729.108',
159 '74.0.3729.107',
160 '76.0.3771.1',
161 '76.0.3771.0',
162 '75.0.3770.4',
163 '74.0.3729.106',
164 '74.0.3729.105',
165 '75.0.3770.3',
166 '74.0.3729.104',
167 '74.0.3729.103',
168 '74.0.3729.102',
169 '75.0.3770.2',
170 '74.0.3729.101',
171 '75.0.3770.1',
172 '75.0.3770.0',
173 '74.0.3729.100',
174 '75.0.3769.5',
175 '75.0.3769.4',
176 '74.0.3729.99',
177 '75.0.3769.3',
178 '75.0.3769.2',
179 '75.0.3768.6',
180 '74.0.3729.98',
181 '75.0.3769.1',
182 '75.0.3769.0',
183 '74.0.3729.97',
184 '73.0.3683.119',
185 '73.0.3683.118',
186 '74.0.3729.96',
187 '75.0.3768.5',
188 '75.0.3768.4',
189 '75.0.3768.3',
190 '75.0.3768.2',
191 '74.0.3729.95',
192 '74.0.3729.94',
193 '75.0.3768.1',
194 '75.0.3768.0',
195 '74.0.3729.93',
196 '74.0.3729.92',
197 '73.0.3683.117',
198 '74.0.3729.91',
199 '75.0.3766.3',
200 '74.0.3729.90',
201 '75.0.3767.2',
202 '75.0.3767.1',
203 '75.0.3767.0',
204 '74.0.3729.89',
205 '73.0.3683.116',
206 '75.0.3766.2',
207 '74.0.3729.88',
208 '75.0.3766.1',
209 '75.0.3766.0',
210 '74.0.3729.87',
211 '73.0.3683.115',
212 '74.0.3729.86',
213 '75.0.3765.1',
214 '75.0.3765.0',
215 '74.0.3729.85',
216 '73.0.3683.114',
217 '74.0.3729.84',
218 '75.0.3764.1',
219 '75.0.3764.0',
220 '74.0.3729.83',
221 '73.0.3683.113',
222 '75.0.3763.2',
223 '75.0.3761.4',
224 '74.0.3729.82',
225 '75.0.3763.1',
226 '75.0.3763.0',
227 '74.0.3729.81',
228 '73.0.3683.112',
229 '75.0.3762.1',
230 '75.0.3762.0',
231 '74.0.3729.80',
232 '75.0.3761.3',
233 '74.0.3729.79',
234 '73.0.3683.111',
235 '75.0.3761.2',
236 '74.0.3729.78',
237 '74.0.3729.77',
238 '75.0.3761.1',
239 '75.0.3761.0',
240 '73.0.3683.110',
241 '74.0.3729.76',
242 '74.0.3729.75',
243 '75.0.3760.0',
244 '74.0.3729.74',
245 '75.0.3759.8',
246 '75.0.3759.7',
247 '75.0.3759.6',
248 '74.0.3729.73',
249 '75.0.3759.5',
250 '74.0.3729.72',
251 '73.0.3683.109',
252 '75.0.3759.4',
253 '75.0.3759.3',
254 '74.0.3729.71',
255 '75.0.3759.2',
256 '74.0.3729.70',
257 '73.0.3683.108',
258 '74.0.3729.69',
259 '75.0.3759.1',
260 '75.0.3759.0',
261 '74.0.3729.68',
262 '73.0.3683.107',
263 '74.0.3729.67',
264 '75.0.3758.1',
265 '75.0.3758.0',
266 '74.0.3729.66',
267 '73.0.3683.106',
268 '74.0.3729.65',
269 '75.0.3757.1',
270 '75.0.3757.0',
271 '74.0.3729.64',
272 '73.0.3683.105',
273 '74.0.3729.63',
274 '75.0.3756.1',
275 '75.0.3756.0',
276 '74.0.3729.62',
277 '73.0.3683.104',
278 '75.0.3755.3',
279 '75.0.3755.2',
280 '73.0.3683.103',
281 '75.0.3755.1',
282 '75.0.3755.0',
283 '74.0.3729.61',
284 '73.0.3683.102',
285 '74.0.3729.60',
286 '75.0.3754.2',
287 '74.0.3729.59',
288 '75.0.3753.4',
289 '74.0.3729.58',
290 '75.0.3754.1',
291 '75.0.3754.0',
292 '74.0.3729.57',
293 '73.0.3683.101',
294 '75.0.3753.3',
295 '75.0.3752.2',
296 '75.0.3753.2',
297 '74.0.3729.56',
298 '75.0.3753.1',
299 '75.0.3753.0',
300 '74.0.3729.55',
301 '73.0.3683.100',
302 '74.0.3729.54',
303 '75.0.3752.1',
304 '75.0.3752.0',
305 '74.0.3729.53',
306 '73.0.3683.99',
307 '74.0.3729.52',
308 '75.0.3751.1',
309 '75.0.3751.0',
310 '74.0.3729.51',
311 '73.0.3683.98',
312 '74.0.3729.50',
313 '75.0.3750.0',
314 '74.0.3729.49',
315 '74.0.3729.48',
316 '74.0.3729.47',
317 '75.0.3749.3',
318 '74.0.3729.46',
319 '73.0.3683.97',
320 '75.0.3749.2',
321 '74.0.3729.45',
322 '75.0.3749.1',
323 '75.0.3749.0',
324 '74.0.3729.44',
325 '73.0.3683.96',
326 '74.0.3729.43',
327 '74.0.3729.42',
328 '75.0.3748.1',
329 '75.0.3748.0',
330 '74.0.3729.41',
331 '75.0.3747.1',
332 '73.0.3683.95',
333 '75.0.3746.4',
334 '74.0.3729.40',
335 '74.0.3729.39',
336 '75.0.3747.0',
337 '75.0.3746.3',
338 '75.0.3746.2',
339 '74.0.3729.38',
340 '75.0.3746.1',
341 '75.0.3746.0',
342 '74.0.3729.37',
343 '73.0.3683.94',
344 '75.0.3745.5',
345 '75.0.3745.4',
346 '75.0.3745.3',
347 '75.0.3745.2',
348 '74.0.3729.36',
349 '75.0.3745.1',
350 '75.0.3745.0',
351 '75.0.3744.2',
352 '74.0.3729.35',
353 '73.0.3683.93',
354 '74.0.3729.34',
355 '75.0.3744.1',
356 '75.0.3744.0',
357 '74.0.3729.33',
358 '73.0.3683.92',
359 '74.0.3729.32',
360 '74.0.3729.31',
361 '73.0.3683.91',
362 '75.0.3741.2',
363 '75.0.3740.5',
364 '74.0.3729.30',
365 '75.0.3741.1',
366 '75.0.3741.0',
367 '74.0.3729.29',
368 '75.0.3740.4',
369 '73.0.3683.90',
370 '74.0.3729.28',
371 '75.0.3740.3',
372 '73.0.3683.89',
373 '75.0.3740.2',
374 '74.0.3729.27',
375 '75.0.3740.1',
376 '75.0.3740.0',
377 '74.0.3729.26',
378 '73.0.3683.88',
379 '73.0.3683.87',
380 '74.0.3729.25',
381 '75.0.3739.1',
382 '75.0.3739.0',
383 '73.0.3683.86',
384 '74.0.3729.24',
385 '73.0.3683.85',
386 '75.0.3738.4',
387 '75.0.3738.3',
388 '75.0.3738.2',
389 '75.0.3738.1',
390 '75.0.3738.0',
391 '74.0.3729.23',
392 '73.0.3683.84',
393 '74.0.3729.22',
394 '74.0.3729.21',
395 '75.0.3737.1',
396 '75.0.3737.0',
397 '74.0.3729.20',
398 '73.0.3683.83',
399 '74.0.3729.19',
400 '75.0.3736.1',
401 '75.0.3736.0',
402 '74.0.3729.18',
403 '73.0.3683.82',
404 '74.0.3729.17',
405 '75.0.3735.1',
406 '75.0.3735.0',
407 '74.0.3729.16',
408 '73.0.3683.81',
409 '75.0.3734.1',
410 '75.0.3734.0',
411 '74.0.3729.15',
412 '73.0.3683.80',
413 '74.0.3729.14',
414 '75.0.3733.1',
415 '75.0.3733.0',
416 '75.0.3732.1',
417 '74.0.3729.13',
418 '74.0.3729.12',
419 '73.0.3683.79',
420 '74.0.3729.11',
421 '75.0.3732.0',
422 '74.0.3729.10',
423 '73.0.3683.78',
424 '74.0.3729.9',
425 '74.0.3729.8',
426 '74.0.3729.7',
427 '75.0.3731.3',
428 '75.0.3731.2',
429 '75.0.3731.0',
430 '74.0.3729.6',
431 '73.0.3683.77',
432 '73.0.3683.76',
433 '75.0.3730.5',
434 '75.0.3730.4',
435 '73.0.3683.75',
436 '74.0.3729.5',
437 '73.0.3683.74',
438 '75.0.3730.3',
439 '75.0.3730.2',
440 '74.0.3729.4',
441 '73.0.3683.73',
442 '73.0.3683.72',
443 '75.0.3730.1',
444 '75.0.3730.0',
445 '74.0.3729.3',
446 '73.0.3683.71',
447 '74.0.3729.2',
448 '73.0.3683.70',
449 '74.0.3729.1',
450 '74.0.3729.0',
451 '74.0.3726.4',
452 '73.0.3683.69',
453 '74.0.3726.3',
454 '74.0.3728.0',
455 '74.0.3726.2',
456 '73.0.3683.68',
457 '74.0.3726.1',
458 '74.0.3726.0',
459 '74.0.3725.4',
460 '73.0.3683.67',
461 '73.0.3683.66',
462 '74.0.3725.3',
463 '74.0.3725.2',
464 '74.0.3725.1',
465 '74.0.3724.8',
466 '74.0.3725.0',
467 '73.0.3683.65',
468 '74.0.3724.7',
469 '74.0.3724.6',
470 '74.0.3724.5',
471 '74.0.3724.4',
472 '74.0.3724.3',
473 '74.0.3724.2',
474 '74.0.3724.1',
475 '74.0.3724.0',
476 '73.0.3683.64',
477 '74.0.3723.1',
478 '74.0.3723.0',
479 '73.0.3683.63',
480 '74.0.3722.1',
481 '74.0.3722.0',
482 '73.0.3683.62',
483 '74.0.3718.9',
484 '74.0.3702.3',
485 '74.0.3721.3',
486 '74.0.3721.2',
487 '74.0.3721.1',
488 '74.0.3721.0',
489 '74.0.3720.6',
490 '73.0.3683.61',
491 '72.0.3626.122',
492 '73.0.3683.60',
493 '74.0.3720.5',
494 '72.0.3626.121',
495 '74.0.3718.8',
496 '74.0.3720.4',
497 '74.0.3720.3',
498 '74.0.3718.7',
499 '74.0.3720.2',
500 '74.0.3720.1',
501 '74.0.3720.0',
502 '74.0.3718.6',
503 '74.0.3719.5',
504 '73.0.3683.59',
505 '74.0.3718.5',
506 '74.0.3718.4',
507 '74.0.3719.4',
508 '74.0.3719.3',
509 '74.0.3719.2',
510 '74.0.3719.1',
511 '73.0.3683.58',
512 '74.0.3719.0',
513 '73.0.3683.57',
514 '73.0.3683.56',
515 '74.0.3718.3',
516 '73.0.3683.55',
517 '74.0.3718.2',
518 '74.0.3718.1',
519 '74.0.3718.0',
520 '73.0.3683.54',
521 '74.0.3717.2',
522 '73.0.3683.53',
523 '74.0.3717.1',
524 '74.0.3717.0',
525 '73.0.3683.52',
526 '74.0.3716.1',
527 '74.0.3716.0',
528 '73.0.3683.51',
529 '74.0.3715.1',
530 '74.0.3715.0',
531 '73.0.3683.50',
532 '74.0.3711.2',
533 '74.0.3714.2',
534 '74.0.3713.3',
535 '74.0.3714.1',
536 '74.0.3714.0',
537 '73.0.3683.49',
538 '74.0.3713.1',
539 '74.0.3713.0',
540 '72.0.3626.120',
541 '73.0.3683.48',
542 '74.0.3712.2',
543 '74.0.3712.1',
544 '74.0.3712.0',
545 '73.0.3683.47',
546 '72.0.3626.119',
547 '73.0.3683.46',
548 '74.0.3710.2',
549 '72.0.3626.118',
550 '74.0.3711.1',
551 '74.0.3711.0',
552 '73.0.3683.45',
553 '72.0.3626.117',
554 '74.0.3710.1',
555 '74.0.3710.0',
556 '73.0.3683.44',
557 '72.0.3626.116',
558 '74.0.3709.1',
559 '74.0.3709.0',
560 '74.0.3704.9',
561 '73.0.3683.43',
562 '72.0.3626.115',
563 '74.0.3704.8',
564 '74.0.3704.7',
565 '74.0.3708.0',
566 '74.0.3706.7',
567 '74.0.3704.6',
568 '73.0.3683.42',
569 '72.0.3626.114',
570 '74.0.3706.6',
571 '72.0.3626.113',
572 '74.0.3704.5',
573 '74.0.3706.5',
574 '74.0.3706.4',
575 '74.0.3706.3',
576 '74.0.3706.2',
577 '74.0.3706.1',
578 '74.0.3706.0',
579 '73.0.3683.41',
580 '72.0.3626.112',
581 '74.0.3705.1',
582 '74.0.3705.0',
583 '73.0.3683.40',
584 '72.0.3626.111',
585 '73.0.3683.39',
586 '74.0.3704.4',
587 '73.0.3683.38',
588 '74.0.3704.3',
589 '74.0.3704.2',
590 '74.0.3704.1',
591 '74.0.3704.0',
592 '73.0.3683.37',
593 '72.0.3626.110',
594 '72.0.3626.109',
595 '74.0.3703.3',
596 '74.0.3703.2',
597 '73.0.3683.36',
598 '74.0.3703.1',
599 '74.0.3703.0',
600 '73.0.3683.35',
601 '72.0.3626.108',
602 '74.0.3702.2',
603 '74.0.3699.3',
604 '74.0.3702.1',
605 '74.0.3702.0',
606 '73.0.3683.34',
607 '72.0.3626.107',
608 '73.0.3683.33',
609 '74.0.3701.1',
610 '74.0.3701.0',
611 '73.0.3683.32',
612 '73.0.3683.31',
613 '72.0.3626.105',
614 '74.0.3700.1',
615 '74.0.3700.0',
616 '73.0.3683.29',
617 '72.0.3626.103',
618 '74.0.3699.2',
619 '74.0.3699.1',
620 '74.0.3699.0',
621 '73.0.3683.28',
622 '72.0.3626.102',
623 '73.0.3683.27',
624 '73.0.3683.26',
625 '74.0.3698.0',
626 '74.0.3696.2',
627 '72.0.3626.101',
628 '73.0.3683.25',
629 '74.0.3696.1',
630 '74.0.3696.0',
631 '74.0.3694.8',
632 '72.0.3626.100',
633 '74.0.3694.7',
634 '74.0.3694.6',
635 '74.0.3694.5',
636 '74.0.3694.4',
637 '72.0.3626.99',
638 '72.0.3626.98',
639 '74.0.3694.3',
640 '73.0.3683.24',
641 '72.0.3626.97',
642 '72.0.3626.96',
643 '72.0.3626.95',
644 '73.0.3683.23',
645 '72.0.3626.94',
646 '73.0.3683.22',
647 '73.0.3683.21',
648 '72.0.3626.93',
649 '74.0.3694.2',
650 '72.0.3626.92',
651 '74.0.3694.1',
652 '74.0.3694.0',
653 '74.0.3693.6',
654 '73.0.3683.20',
655 '72.0.3626.91',
656 '74.0.3693.5',
657 '74.0.3693.4',
658 '74.0.3693.3',
659 '74.0.3693.2',
660 '73.0.3683.19',
661 '74.0.3693.1',
662 '74.0.3693.0',
663 '73.0.3683.18',
664 '72.0.3626.90',
665 '74.0.3692.1',
666 '74.0.3692.0',
667 '73.0.3683.17',
668 '72.0.3626.89',
669 '74.0.3687.3',
670 '74.0.3691.1',
671 '74.0.3691.0',
672 '73.0.3683.16',
673 '72.0.3626.88',
674 '72.0.3626.87',
675 '73.0.3683.15',
676 '74.0.3690.1',
677 '74.0.3690.0',
678 '73.0.3683.14',
679 '72.0.3626.86',
680 '73.0.3683.13',
681 '73.0.3683.12',
682 '74.0.3689.1',
683 '74.0.3689.0',
684 '73.0.3683.11',
685 '72.0.3626.85',
686 '73.0.3683.10',
687 '72.0.3626.84',
688 '73.0.3683.9',
689 '74.0.3688.1',
690 '74.0.3688.0',
691 '73.0.3683.8',
692 '72.0.3626.83',
693 '74.0.3687.2',
694 '74.0.3687.1',
695 '74.0.3687.0',
696 '73.0.3683.7',
697 '72.0.3626.82',
698 '74.0.3686.4',
699 '72.0.3626.81',
700 '74.0.3686.3',
701 '74.0.3686.2',
702 '74.0.3686.1',
703 '74.0.3686.0',
704 '73.0.3683.6',
705 '72.0.3626.80',
706 '74.0.3685.1',
707 '74.0.3685.0',
708 '73.0.3683.5',
709 '72.0.3626.79',
710 '74.0.3684.1',
711 '74.0.3684.0',
712 '73.0.3683.4',
713 '72.0.3626.78',
714 '72.0.3626.77',
715 '73.0.3683.3',
716 '73.0.3683.2',
717 '72.0.3626.76',
718 '73.0.3683.1',
719 '73.0.3683.0',
720 '72.0.3626.75',
721 '71.0.3578.141',
722 '73.0.3682.1',
723 '73.0.3682.0',
724 '72.0.3626.74',
725 '71.0.3578.140',
726 '73.0.3681.4',
727 '73.0.3681.3',
728 '73.0.3681.2',
729 '73.0.3681.1',
730 '73.0.3681.0',
731 '72.0.3626.73',
732 '71.0.3578.139',
733 '72.0.3626.72',
734 '72.0.3626.71',
735 '73.0.3680.1',
736 '73.0.3680.0',
737 '72.0.3626.70',
738 '71.0.3578.138',
739 '73.0.3678.2',
740 '73.0.3679.1',
741 '73.0.3679.0',
742 '72.0.3626.69',
743 '71.0.3578.137',
744 '73.0.3678.1',
745 '73.0.3678.0',
746 '71.0.3578.136',
747 '73.0.3677.1',
748 '73.0.3677.0',
749 '72.0.3626.68',
750 '72.0.3626.67',
751 '71.0.3578.135',
752 '73.0.3676.1',
753 '73.0.3676.0',
754 '73.0.3674.2',
755 '72.0.3626.66',
756 '71.0.3578.134',
757 '73.0.3674.1',
758 '73.0.3674.0',
759 '72.0.3626.65',
760 '71.0.3578.133',
761 '73.0.3673.2',
762 '73.0.3673.1',
763 '73.0.3673.0',
764 '72.0.3626.64',
765 '71.0.3578.132',
766 '72.0.3626.63',
767 '72.0.3626.62',
768 '72.0.3626.61',
769 '72.0.3626.60',
770 '73.0.3672.1',
771 '73.0.3672.0',
772 '72.0.3626.59',
773 '71.0.3578.131',
774 '73.0.3671.3',
775 '73.0.3671.2',
776 '73.0.3671.1',
777 '73.0.3671.0',
778 '72.0.3626.58',
779 '71.0.3578.130',
780 '73.0.3670.1',
781 '73.0.3670.0',
782 '72.0.3626.57',
783 '71.0.3578.129',
784 '73.0.3669.1',
785 '73.0.3669.0',
786 '72.0.3626.56',
787 '71.0.3578.128',
788 '73.0.3668.2',
789 '73.0.3668.1',
790 '73.0.3668.0',
791 '72.0.3626.55',
792 '71.0.3578.127',
793 '73.0.3667.2',
794 '73.0.3667.1',
795 '73.0.3667.0',
796 '72.0.3626.54',
797 '71.0.3578.126',
798 '73.0.3666.1',
799 '73.0.3666.0',
800 '72.0.3626.53',
801 '71.0.3578.125',
802 '73.0.3665.4',
803 '73.0.3665.3',
804 '72.0.3626.52',
805 '73.0.3665.2',
806 '73.0.3664.4',
807 '73.0.3665.1',
808 '73.0.3665.0',
809 '72.0.3626.51',
810 '71.0.3578.124',
811 '72.0.3626.50',
812 '73.0.3664.3',
813 '73.0.3664.2',
814 '73.0.3664.1',
815 '73.0.3664.0',
816 '73.0.3663.2',
817 '72.0.3626.49',
818 '71.0.3578.123',
819 '73.0.3663.1',
820 '73.0.3663.0',
821 '72.0.3626.48',
822 '71.0.3578.122',
823 '73.0.3662.1',
824 '73.0.3662.0',
825 '72.0.3626.47',
826 '71.0.3578.121',
827 '73.0.3661.1',
828 '72.0.3626.46',
829 '73.0.3661.0',
830 '72.0.3626.45',
831 '71.0.3578.120',
832 '73.0.3660.2',
833 '73.0.3660.1',
834 '73.0.3660.0',
835 '72.0.3626.44',
836 '71.0.3578.119',
837 '73.0.3659.1',
838 '73.0.3659.0',
839 '72.0.3626.43',
840 '71.0.3578.118',
841 '73.0.3658.1',
842 '73.0.3658.0',
843 '72.0.3626.42',
844 '71.0.3578.117',
845 '73.0.3657.1',
846 '73.0.3657.0',
847 '72.0.3626.41',
848 '71.0.3578.116',
849 '73.0.3656.1',
850 '73.0.3656.0',
851 '72.0.3626.40',
852 '71.0.3578.115',
853 '73.0.3655.1',
854 '73.0.3655.0',
855 '72.0.3626.39',
856 '71.0.3578.114',
857 '73.0.3654.1',
858 '73.0.3654.0',
859 '72.0.3626.38',
860 '71.0.3578.113',
861 '73.0.3653.1',
862 '73.0.3653.0',
863 '72.0.3626.37',
864 '71.0.3578.112',
865 '73.0.3652.1',
866 '73.0.3652.0',
867 '72.0.3626.36',
868 '71.0.3578.111',
869 '73.0.3651.1',
870 '73.0.3651.0',
871 '72.0.3626.35',
872 '71.0.3578.110',
873 '73.0.3650.1',
874 '73.0.3650.0',
875 '72.0.3626.34',
876 '71.0.3578.109',
877 '73.0.3649.1',
878 '73.0.3649.0',
879 '72.0.3626.33',
880 '71.0.3578.108',
881 '73.0.3648.2',
882 '73.0.3648.1',
883 '73.0.3648.0',
884 '72.0.3626.32',
885 '71.0.3578.107',
886 '73.0.3647.2',
887 '73.0.3647.1',
888 '73.0.3647.0',
889 '72.0.3626.31',
890 '71.0.3578.106',
891 '73.0.3635.3',
892 '73.0.3646.2',
893 '73.0.3646.1',
894 '73.0.3646.0',
895 '72.0.3626.30',
896 '71.0.3578.105',
897 '72.0.3626.29',
898 '73.0.3645.2',
899 '73.0.3645.1',
900 '73.0.3645.0',
901 '72.0.3626.28',
902 '71.0.3578.104',
903 '72.0.3626.27',
904 '72.0.3626.26',
905 '72.0.3626.25',
906 '72.0.3626.24',
907 '73.0.3644.0',
908 '73.0.3643.2',
909 '72.0.3626.23',
910 '71.0.3578.103',
911 '73.0.3643.1',
912 '73.0.3643.0',
913 '72.0.3626.22',
914 '71.0.3578.102',
915 '73.0.3642.1',
916 '73.0.3642.0',
917 '72.0.3626.21',
918 '71.0.3578.101',
919 '73.0.3641.1',
920 '73.0.3641.0',
921 '72.0.3626.20',
922 '71.0.3578.100',
923 '72.0.3626.19',
924 '73.0.3640.1',
925 '73.0.3640.0',
926 '72.0.3626.18',
927 '73.0.3639.1',
928 '71.0.3578.99',
929 '73.0.3639.0',
930 '72.0.3626.17',
931 '73.0.3638.2',
932 '72.0.3626.16',
933 '73.0.3638.1',
934 '73.0.3638.0',
935 '72.0.3626.15',
936 '71.0.3578.98',
937 '73.0.3635.2',
938 '71.0.3578.97',
939 '73.0.3637.1',
940 '73.0.3637.0',
941 '72.0.3626.14',
942 '71.0.3578.96',
943 '71.0.3578.95',
944 '72.0.3626.13',
945 '71.0.3578.94',
946 '73.0.3636.2',
947 '71.0.3578.93',
948 '73.0.3636.1',
949 '73.0.3636.0',
950 '72.0.3626.12',
951 '71.0.3578.92',
952 '73.0.3635.1',
953 '73.0.3635.0',
954 '72.0.3626.11',
955 '71.0.3578.91',
956 '73.0.3634.2',
957 '73.0.3634.1',
958 '73.0.3634.0',
959 '72.0.3626.10',
960 '71.0.3578.90',
961 '71.0.3578.89',
962 '73.0.3633.2',
963 '73.0.3633.1',
964 '73.0.3633.0',
965 '72.0.3610.4',
966 '72.0.3626.9',
967 '71.0.3578.88',
968 '73.0.3632.5',
969 '73.0.3632.4',
970 '73.0.3632.3',
971 '73.0.3632.2',
972 '73.0.3632.1',
973 '73.0.3632.0',
974 '72.0.3626.8',
975 '71.0.3578.87',
976 '73.0.3631.2',
977 '73.0.3631.1',
978 '73.0.3631.0',
979 '72.0.3626.7',
980 '71.0.3578.86',
981 '72.0.3626.6',
982 '73.0.3630.1',
983 '73.0.3630.0',
984 '72.0.3626.5',
985 '71.0.3578.85',
986 '72.0.3626.4',
987 '73.0.3628.3',
988 '73.0.3628.2',
989 '73.0.3629.1',
990 '73.0.3629.0',
991 '72.0.3626.3',
992 '71.0.3578.84',
993 '73.0.3628.1',
994 '73.0.3628.0',
995 '71.0.3578.83',
996 '73.0.3627.1',
997 '73.0.3627.0',
998 '72.0.3626.2',
999 '71.0.3578.82',
1000 '71.0.3578.81',
1001 '71.0.3578.80',
1002 '72.0.3626.1',
1003 '72.0.3626.0',
1004 '71.0.3578.79',
1005 '70.0.3538.124',
1006 '71.0.3578.78',
1007 '72.0.3623.4',
1008 '72.0.3625.2',
1009 '72.0.3625.1',
1010 '72.0.3625.0',
1011 '71.0.3578.77',
1012 '70.0.3538.123',
1013 '72.0.3624.4',
1014 '72.0.3624.3',
1015 '72.0.3624.2',
1016 '71.0.3578.76',
1017 '72.0.3624.1',
1018 '72.0.3624.0',
1019 '72.0.3623.3',
1020 '71.0.3578.75',
1021 '70.0.3538.122',
1022 '71.0.3578.74',
1023 '72.0.3623.2',
1024 '72.0.3610.3',
1025 '72.0.3623.1',
1026 '72.0.3623.0',
1027 '72.0.3622.3',
1028 '72.0.3622.2',
1029 '71.0.3578.73',
1030 '70.0.3538.121',
1031 '72.0.3622.1',
1032 '72.0.3622.0',
1033 '71.0.3578.72',
1034 '70.0.3538.120',
1035 '72.0.3621.1',
1036 '72.0.3621.0',
1037 '71.0.3578.71',
1038 '70.0.3538.119',
1039 '72.0.3620.1',
1040 '72.0.3620.0',
1041 '71.0.3578.70',
1042 '70.0.3538.118',
1043 '71.0.3578.69',
1044 '72.0.3619.1',
1045 '72.0.3619.0',
1046 '71.0.3578.68',
1047 '70.0.3538.117',
1048 '71.0.3578.67',
1049 '72.0.3618.1',
1050 '72.0.3618.0',
1051 '71.0.3578.66',
1052 '70.0.3538.116',
1053 '72.0.3617.1',
1054 '72.0.3617.0',
1055 '71.0.3578.65',
1056 '70.0.3538.115',
1057 '72.0.3602.3',
1058 '71.0.3578.64',
1059 '72.0.3616.1',
1060 '72.0.3616.0',
1061 '71.0.3578.63',
1062 '70.0.3538.114',
1063 '71.0.3578.62',
1064 '72.0.3615.1',
1065 '72.0.3615.0',
1066 '71.0.3578.61',
1067 '70.0.3538.113',
1068 '72.0.3614.1',
1069 '72.0.3614.0',
1070 '71.0.3578.60',
1071 '70.0.3538.112',
1072 '72.0.3613.1',
1073 '72.0.3613.0',
1074 '71.0.3578.59',
1075 '70.0.3538.111',
1076 '72.0.3612.2',
1077 '72.0.3612.1',
1078 '72.0.3612.0',
1079 '70.0.3538.110',
1080 '71.0.3578.58',
1081 '70.0.3538.109',
1082 '72.0.3611.2',
1083 '72.0.3611.1',
1084 '72.0.3611.0',
1085 '71.0.3578.57',
1086 '70.0.3538.108',
1087 '72.0.3610.2',
1088 '71.0.3578.56',
1089 '71.0.3578.55',
1090 '72.0.3610.1',
1091 '72.0.3610.0',
1092 '71.0.3578.54',
1093 '70.0.3538.107',
1094 '71.0.3578.53',
1095 '72.0.3609.3',
1096 '71.0.3578.52',
1097 '72.0.3609.2',
1098 '71.0.3578.51',
1099 '72.0.3608.5',
1100 '72.0.3609.1',
1101 '72.0.3609.0',
1102 '71.0.3578.50',
1103 '70.0.3538.106',
1104 '72.0.3608.4',
1105 '72.0.3608.3',
1106 '72.0.3608.2',
1107 '71.0.3578.49',
1108 '72.0.3608.1',
1109 '72.0.3608.0',
1110 '70.0.3538.105',
1111 '71.0.3578.48',
1112 '72.0.3607.1',
1113 '72.0.3607.0',
1114 '71.0.3578.47',
1115 '70.0.3538.104',
1116 '72.0.3606.2',
1117 '72.0.3606.1',
1118 '72.0.3606.0',
1119 '71.0.3578.46',
1120 '70.0.3538.103',
1121 '70.0.3538.102',
1122 '72.0.3605.3',
1123 '72.0.3605.2',
1124 '72.0.3605.1',
1125 '72.0.3605.0',
1126 '71.0.3578.45',
1127 '70.0.3538.101',
1128 '71.0.3578.44',
1129 '71.0.3578.43',
1130 '70.0.3538.100',
1131 '70.0.3538.99',
1132 '71.0.3578.42',
1133 '72.0.3604.1',
1134 '72.0.3604.0',
1135 '71.0.3578.41',
1136 '70.0.3538.98',
1137 '71.0.3578.40',
1138 '72.0.3603.2',
1139 '72.0.3603.1',
1140 '72.0.3603.0',
1141 '71.0.3578.39',
1142 '70.0.3538.97',
1143 '72.0.3602.2',
1144 '71.0.3578.38',
1145 '71.0.3578.37',
1146 '72.0.3602.1',
1147 '72.0.3602.0',
1148 '71.0.3578.36',
1149 '70.0.3538.96',
1150 '72.0.3601.1',
1151 '72.0.3601.0',
1152 '71.0.3578.35',
1153 '70.0.3538.95',
1154 '72.0.3600.1',
1155 '72.0.3600.0',
1156 '71.0.3578.34',
1157 '70.0.3538.94',
1158 '72.0.3599.3',
1159 '72.0.3599.2',
1160 '72.0.3599.1',
1161 '72.0.3599.0',
1162 '71.0.3578.33',
1163 '70.0.3538.93',
1164 '72.0.3598.1',
1165 '72.0.3598.0',
1166 '71.0.3578.32',
1167 '70.0.3538.87',
1168 '72.0.3597.1',
1169 '72.0.3597.0',
1170 '72.0.3596.2',
1171 '71.0.3578.31',
1172 '70.0.3538.86',
1173 '71.0.3578.30',
1174 '71.0.3578.29',
1175 '72.0.3596.1',
1176 '72.0.3596.0',
1177 '71.0.3578.28',
1178 '70.0.3538.85',
1179 '72.0.3595.2',
1180 '72.0.3591.3',
1181 '72.0.3595.1',
1182 '72.0.3595.0',
1183 '71.0.3578.27',
1184 '70.0.3538.84',
1185 '72.0.3594.1',
1186 '72.0.3594.0',
1187 '71.0.3578.26',
1188 '70.0.3538.83',
1189 '72.0.3593.2',
1190 '72.0.3593.1',
1191 '72.0.3593.0',
1192 '71.0.3578.25',
1193 '70.0.3538.82',
1194 '72.0.3589.3',
1195 '72.0.3592.2',
1196 '72.0.3592.1',
1197 '72.0.3592.0',
1198 '71.0.3578.24',
1199 '72.0.3589.2',
1200 '70.0.3538.81',
1201 '70.0.3538.80',
1202 '72.0.3591.2',
1203 '72.0.3591.1',
1204 '72.0.3591.0',
1205 '71.0.3578.23',
1206 '70.0.3538.79',
1207 '71.0.3578.22',
1208 '72.0.3590.1',
1209 '72.0.3590.0',
1210 '71.0.3578.21',
1211 '70.0.3538.78',
1212 '70.0.3538.77',
1213 '72.0.3589.1',
1214 '72.0.3589.0',
1215 '71.0.3578.20',
1216 '70.0.3538.76',
1217 '71.0.3578.19',
1218 '70.0.3538.75',
1219 '72.0.3588.1',
1220 '72.0.3588.0',
1221 '71.0.3578.18',
1222 '70.0.3538.74',
1223 '72.0.3586.2',
1224 '72.0.3587.0',
1225 '71.0.3578.17',
1226 '70.0.3538.73',
1227 '72.0.3586.1',
1228 '72.0.3586.0',
1229 '71.0.3578.16',
1230 '70.0.3538.72',
1231 '72.0.3585.1',
1232 '72.0.3585.0',
1233 '71.0.3578.15',
1234 '70.0.3538.71',
1235 '71.0.3578.14',
1236 '72.0.3584.1',
1237 '72.0.3584.0',
1238 '71.0.3578.13',
1239 '70.0.3538.70',
1240 '72.0.3583.2',
1241 '71.0.3578.12',
1242 '72.0.3583.1',
1243 '72.0.3583.0',
1244 '71.0.3578.11',
1245 '70.0.3538.69',
1246 '71.0.3578.10',
1247 '72.0.3582.0',
1248 '72.0.3581.4',
1249 '71.0.3578.9',
1250 '70.0.3538.67',
1251 '72.0.3581.3',
1252 '72.0.3581.2',
1253 '72.0.3581.1',
1254 '72.0.3581.0',
1255 '71.0.3578.8',
1256 '70.0.3538.66',
1257 '72.0.3580.1',
1258 '72.0.3580.0',
1259 '71.0.3578.7',
1260 '70.0.3538.65',
1261 '71.0.3578.6',
1262 '72.0.3579.1',
1263 '72.0.3579.0',
1264 '71.0.3578.5',
1265 '70.0.3538.64',
1266 '71.0.3578.4',
1267 '71.0.3578.3',
1268 '71.0.3578.2',
1269 '71.0.3578.1',
1270 '71.0.3578.0',
1271 '70.0.3538.63',
1272 '69.0.3497.128',
1273 '70.0.3538.62',
1274 '70.0.3538.61',
1275 '70.0.3538.60',
1276 '70.0.3538.59',
1277 '71.0.3577.1',
1278 '71.0.3577.0',
1279 '70.0.3538.58',
1280 '69.0.3497.127',
1281 '71.0.3576.2',
1282 '71.0.3576.1',
1283 '71.0.3576.0',
1284 '70.0.3538.57',
1285 '70.0.3538.56',
1286 '71.0.3575.2',
1287 '70.0.3538.55',
1288 '69.0.3497.126',
1289 '70.0.3538.54',
1290 '71.0.3575.1',
1291 '71.0.3575.0',
1292 '71.0.3574.1',
1293 '71.0.3574.0',
1294 '70.0.3538.53',
1295 '69.0.3497.125',
1296 '70.0.3538.52',
1297 '71.0.3573.1',
1298 '71.0.3573.0',
1299 '70.0.3538.51',
1300 '69.0.3497.124',
1301 '71.0.3572.1',
1302 '71.0.3572.0',
1303 '70.0.3538.50',
1304 '69.0.3497.123',
1305 '71.0.3571.2',
1306 '70.0.3538.49',
1307 '69.0.3497.122',
1308 '71.0.3571.1',
1309 '71.0.3571.0',
1310 '70.0.3538.48',
1311 '69.0.3497.121',
1312 '71.0.3570.1',
1313 '71.0.3570.0',
1314 '70.0.3538.47',
1315 '69.0.3497.120',
1316 '71.0.3568.2',
1317 '71.0.3569.1',
1318 '71.0.3569.0',
1319 '70.0.3538.46',
1320 '69.0.3497.119',
1321 '70.0.3538.45',
1322 '71.0.3568.1',
1323 '71.0.3568.0',
1324 '70.0.3538.44',
1325 '69.0.3497.118',
1326 '70.0.3538.43',
1327 '70.0.3538.42',
1328 '71.0.3567.1',
1329 '71.0.3567.0',
1330 '70.0.3538.41',
1331 '69.0.3497.117',
1332 '71.0.3566.1',
1333 '71.0.3566.0',
1334 '70.0.3538.40',
1335 '69.0.3497.116',
1336 '71.0.3565.1',
1337 '71.0.3565.0',
1338 '70.0.3538.39',
1339 '69.0.3497.115',
1340 '71.0.3564.1',
1341 '71.0.3564.0',
1342 '70.0.3538.38',
1343 '69.0.3497.114',
1344 '71.0.3563.0',
1345 '71.0.3562.2',
1346 '70.0.3538.37',
1347 '69.0.3497.113',
1348 '70.0.3538.36',
1349 '70.0.3538.35',
1350 '71.0.3562.1',
1351 '71.0.3562.0',
1352 '70.0.3538.34',
1353 '69.0.3497.112',
1354 '70.0.3538.33',
1355 '71.0.3561.1',
1356 '71.0.3561.0',
1357 '70.0.3538.32',
1358 '69.0.3497.111',
1359 '71.0.3559.6',
1360 '71.0.3560.1',
1361 '71.0.3560.0',
1362 '71.0.3559.5',
1363 '71.0.3559.4',
1364 '70.0.3538.31',
1365 '69.0.3497.110',
1366 '71.0.3559.3',
1367 '70.0.3538.30',
1368 '69.0.3497.109',
1369 '71.0.3559.2',
1370 '71.0.3559.1',
1371 '71.0.3559.0',
1372 '70.0.3538.29',
1373 '69.0.3497.108',
1374 '71.0.3558.2',
1375 '71.0.3558.1',
1376 '71.0.3558.0',
1377 '70.0.3538.28',
1378 '69.0.3497.107',
1379 '71.0.3557.2',
1380 '71.0.3557.1',
1381 '71.0.3557.0',
1382 '70.0.3538.27',
1383 '69.0.3497.106',
1384 '71.0.3554.4',
1385 '70.0.3538.26',
1386 '71.0.3556.1',
1387 '71.0.3556.0',
1388 '70.0.3538.25',
1389 '71.0.3554.3',
1390 '69.0.3497.105',
1391 '71.0.3554.2',
1392 '70.0.3538.24',
1393 '69.0.3497.104',
1394 '71.0.3555.2',
1395 '70.0.3538.23',
1396 '71.0.3555.1',
1397 '71.0.3555.0',
1398 '70.0.3538.22',
1399 '69.0.3497.103',
1400 '71.0.3554.1',
1401 '71.0.3554.0',
1402 '70.0.3538.21',
1403 '69.0.3497.102',
1404 '71.0.3553.3',
1405 '70.0.3538.20',
1406 '69.0.3497.101',
1407 '71.0.3553.2',
1408 '69.0.3497.100',
1409 '71.0.3553.1',
1410 '71.0.3553.0',
1411 '70.0.3538.19',
1412 '69.0.3497.99',
1413 '69.0.3497.98',
1414 '69.0.3497.97',
1415 '71.0.3552.6',
1416 '71.0.3552.5',
1417 '71.0.3552.4',
1418 '71.0.3552.3',
1419 '71.0.3552.2',
1420 '71.0.3552.1',
1421 '71.0.3552.0',
1422 '70.0.3538.18',
1423 '69.0.3497.96',
1424 '71.0.3551.3',
1425 '71.0.3551.2',
1426 '71.0.3551.1',
1427 '71.0.3551.0',
1428 '70.0.3538.17',
1429 '69.0.3497.95',
1430 '71.0.3550.3',
1431 '71.0.3550.2',
1432 '71.0.3550.1',
1433 '71.0.3550.0',
1434 '70.0.3538.16',
1435 '69.0.3497.94',
1436 '71.0.3549.1',
1437 '71.0.3549.0',
1438 '70.0.3538.15',
1439 '69.0.3497.93',
1440 '69.0.3497.92',
1441 '71.0.3548.1',
1442 '71.0.3548.0',
1443 '70.0.3538.14',
1444 '69.0.3497.91',
1445 '71.0.3547.1',
1446 '71.0.3547.0',
1447 '70.0.3538.13',
1448 '69.0.3497.90',
1449 '71.0.3546.2',
1450 '69.0.3497.89',
1451 '71.0.3546.1',
1452 '71.0.3546.0',
1453 '70.0.3538.12',
1454 '69.0.3497.88',
1455 '71.0.3545.4',
1456 '71.0.3545.3',
1457 '71.0.3545.2',
1458 '71.0.3545.1',
1459 '71.0.3545.0',
1460 '70.0.3538.11',
1461 '69.0.3497.87',
1462 '71.0.3544.5',
1463 '71.0.3544.4',
1464 '71.0.3544.3',
1465 '71.0.3544.2',
1466 '71.0.3544.1',
1467 '71.0.3544.0',
1468 '69.0.3497.86',
1469 '70.0.3538.10',
1470 '69.0.3497.85',
1471 '70.0.3538.9',
1472 '69.0.3497.84',
1473 '71.0.3543.4',
1474 '70.0.3538.8',
1475 '71.0.3543.3',
1476 '71.0.3543.2',
1477 '71.0.3543.1',
1478 '71.0.3543.0',
1479 '70.0.3538.7',
1480 '69.0.3497.83',
1481 '71.0.3542.2',
1482 '71.0.3542.1',
1483 '71.0.3542.0',
1484 '70.0.3538.6',
1485 '69.0.3497.82',
1486 '69.0.3497.81',
1487 '71.0.3541.1',
1488 '71.0.3541.0',
1489 '70.0.3538.5',
1490 '69.0.3497.80',
1491 '71.0.3540.1',
1492 '71.0.3540.0',
1493 '70.0.3538.4',
1494 '69.0.3497.79',
1495 '70.0.3538.3',
1496 '71.0.3539.1',
1497 '71.0.3539.0',
1498 '69.0.3497.78',
1499 '68.0.3440.134',
1500 '69.0.3497.77',
1501 '70.0.3538.2',
1502 '70.0.3538.1',
1503 '70.0.3538.0',
1504 '69.0.3497.76',
1505 '68.0.3440.133',
1506 '69.0.3497.75',
1507 '70.0.3537.2',
1508 '70.0.3537.1',
1509 '70.0.3537.0',
1510 '69.0.3497.74',
1511 '68.0.3440.132',
1512 '70.0.3536.0',
1513 '70.0.3535.5',
1514 '70.0.3535.4',
1515 '70.0.3535.3',
1516 '69.0.3497.73',
1517 '68.0.3440.131',
1518 '70.0.3532.8',
1519 '70.0.3532.7',
1520 '69.0.3497.72',
1521 '69.0.3497.71',
1522 '70.0.3535.2',
1523 '70.0.3535.1',
1524 '70.0.3535.0',
1525 '69.0.3497.70',
1526 '68.0.3440.130',
1527 '69.0.3497.69',
1528 '68.0.3440.129',
1529 '70.0.3534.4',
1530 '70.0.3534.3',
1531 '70.0.3534.2',
1532 '70.0.3534.1',
1533 '70.0.3534.0',
1534 '69.0.3497.68',
1535 '68.0.3440.128',
1536 '70.0.3533.2',
1537 '70.0.3533.1',
1538 '70.0.3533.0',
1539 '69.0.3497.67',
1540 '68.0.3440.127',
1541 '70.0.3532.6',
1542 '70.0.3532.5',
1543 '70.0.3532.4',
1544 '69.0.3497.66',
1545 '68.0.3440.126',
1546 '70.0.3532.3',
1547 '70.0.3532.2',
1548 '70.0.3532.1',
1549 '69.0.3497.60',
1550 '69.0.3497.65',
1551 '69.0.3497.64',
1552 '70.0.3532.0',
1553 '70.0.3531.0',
1554 '70.0.3530.4',
1555 '70.0.3530.3',
1556 '70.0.3530.2',
1557 '69.0.3497.58',
1558 '68.0.3440.125',
1559 '69.0.3497.57',
1560 '69.0.3497.56',
1561 '69.0.3497.55',
1562 '69.0.3497.54',
1563 '70.0.3530.1',
1564 '70.0.3530.0',
1565 '69.0.3497.53',
1566 '68.0.3440.124',
1567 '69.0.3497.52',
1568 '70.0.3529.3',
1569 '70.0.3529.2',
1570 '70.0.3529.1',
1571 '70.0.3529.0',
1572 '69.0.3497.51',
1573 '70.0.3528.4',
1574 '68.0.3440.123',
1575 '70.0.3528.3',
1576 '70.0.3528.2',
1577 '70.0.3528.1',
1578 '70.0.3528.0',
1579 '69.0.3497.50',
1580 '68.0.3440.122',
1581 '70.0.3527.1',
1582 '70.0.3527.0',
1583 '69.0.3497.49',
1584 '68.0.3440.121',
1585 '70.0.3526.1',
1586 '70.0.3526.0',
1587 '68.0.3440.120',
1588 '69.0.3497.48',
1589 '69.0.3497.47',
1590 '68.0.3440.119',
1591 '68.0.3440.118',
1592 '70.0.3525.5',
1593 '70.0.3525.4',
1594 '70.0.3525.3',
1595 '68.0.3440.117',
1596 '69.0.3497.46',
1597 '70.0.3525.2',
1598 '70.0.3525.1',
1599 '70.0.3525.0',
1600 '69.0.3497.45',
1601 '68.0.3440.116',
1602 '70.0.3524.4',
1603 '70.0.3524.3',
1604 '69.0.3497.44',
1605 '70.0.3524.2',
1606 '70.0.3524.1',
1607 '70.0.3524.0',
1608 '70.0.3523.2',
1609 '69.0.3497.43',
1610 '68.0.3440.115',
1611 '70.0.3505.9',
1612 '69.0.3497.42',
1613 '70.0.3505.8',
1614 '70.0.3523.1',
1615 '70.0.3523.0',
1616 '69.0.3497.41',
1617 '68.0.3440.114',
1618 '70.0.3505.7',
1619 '69.0.3497.40',
1620 '70.0.3522.1',
1621 '70.0.3522.0',
1622 '70.0.3521.2',
1623 '69.0.3497.39',
1624 '68.0.3440.113',
1625 '70.0.3505.6',
1626 '70.0.3521.1',
1627 '70.0.3521.0',
1628 '69.0.3497.38',
1629 '68.0.3440.112',
1630 '70.0.3520.1',
1631 '70.0.3520.0',
1632 '69.0.3497.37',
1633 '68.0.3440.111',
1634 '70.0.3519.3',
1635 '70.0.3519.2',
1636 '70.0.3519.1',
1637 '70.0.3519.0',
1638 '69.0.3497.36',
1639 '68.0.3440.110',
1640 '70.0.3518.1',
1641 '70.0.3518.0',
1642 '69.0.3497.35',
1643 '69.0.3497.34',
1644 '68.0.3440.109',
1645 '70.0.3517.1',
1646 '70.0.3517.0',
1647 '69.0.3497.33',
1648 '68.0.3440.108',
1649 '69.0.3497.32',
1650 '70.0.3516.3',
1651 '70.0.3516.2',
1652 '70.0.3516.1',
1653 '70.0.3516.0',
1654 '69.0.3497.31',
1655 '68.0.3440.107',
1656 '70.0.3515.4',
1657 '68.0.3440.106',
1658 '70.0.3515.3',
1659 '70.0.3515.2',
1660 '70.0.3515.1',
1661 '70.0.3515.0',
1662 '69.0.3497.30',
1663 '68.0.3440.105',
1664 '68.0.3440.104',
1665 '70.0.3514.2',
1666 '70.0.3514.1',
1667 '70.0.3514.0',
1668 '69.0.3497.29',
1669 '68.0.3440.103',
1670 '70.0.3513.1',
1671 '70.0.3513.0',
1672 '69.0.3497.28',
1673 )
1674 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1675
1676
1677 std_headers = {
1678 'User-Agent': random_user_agent(),
1679 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1680 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1681 'Accept-Encoding': 'gzip, deflate',
1682 'Accept-Language': 'en-us,en;q=0.5',
1683 }
1684
1685
1686 USER_AGENTS = {
1687 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1688 }
1689
1690
1691 NO_DEFAULT = object()
1692
1693 ENGLISH_MONTH_NAMES = [
1694 'January', 'February', 'March', 'April', 'May', 'June',
1695 'July', 'August', 'September', 'October', 'November', 'December']
1696
1697 MONTH_NAMES = {
1698 'en': ENGLISH_MONTH_NAMES,
1699 'fr': [
1700 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1701 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1702 }
1703
1704 KNOWN_EXTENSIONS = (
1705 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1706 'flv', 'f4v', 'f4a', 'f4b',
1707 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1708 'mkv', 'mka', 'mk3d',
1709 'avi', 'divx',
1710 'mov',
1711 'asf', 'wmv', 'wma',
1712 '3gp', '3g2',
1713 'mp3',
1714 'flac',
1715 'ape',
1716 'wav',
1717 'f4f', 'f4m', 'm3u8', 'smil')
1718
1719 # needed for sanitizing filenames in restricted mode
1720 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1721 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1722 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1723
1724 DATE_FORMATS = (
1725 '%d %B %Y',
1726 '%d %b %Y',
1727 '%B %d %Y',
1728 '%B %dst %Y',
1729 '%B %dnd %Y',
1730 '%B %drd %Y',
1731 '%B %dth %Y',
1732 '%b %d %Y',
1733 '%b %dst %Y',
1734 '%b %dnd %Y',
1735 '%b %drd %Y',
1736 '%b %dth %Y',
1737 '%b %dst %Y %I:%M',
1738 '%b %dnd %Y %I:%M',
1739 '%b %drd %Y %I:%M',
1740 '%b %dth %Y %I:%M',
1741 '%Y %m %d',
1742 '%Y-%m-%d',
1743 '%Y/%m/%d',
1744 '%Y/%m/%d %H:%M',
1745 '%Y/%m/%d %H:%M:%S',
1746 '%Y-%m-%d %H:%M',
1747 '%Y-%m-%d %H:%M:%S',
1748 '%Y-%m-%d %H:%M:%S.%f',
1749 '%Y-%m-%d %H:%M:%S:%f',
1750 '%d.%m.%Y %H:%M',
1751 '%d.%m.%Y %H.%M',
1752 '%Y-%m-%dT%H:%M:%SZ',
1753 '%Y-%m-%dT%H:%M:%S.%fZ',
1754 '%Y-%m-%dT%H:%M:%S.%f0Z',
1755 '%Y-%m-%dT%H:%M:%S',
1756 '%Y-%m-%dT%H:%M:%S.%f',
1757 '%Y-%m-%dT%H:%M',
1758 '%b %d %Y at %H:%M',
1759 '%b %d %Y at %H:%M:%S',
1760 '%B %d %Y at %H:%M',
1761 '%B %d %Y at %H:%M:%S',
1762 )
1763
1764 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1765 DATE_FORMATS_DAY_FIRST.extend([
1766 '%d-%m-%Y',
1767 '%d.%m.%Y',
1768 '%d.%m.%y',
1769 '%d/%m/%Y',
1770 '%d/%m/%y',
1771 '%d/%m/%Y %H:%M:%S',
1772 ])
1773
1774 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1775 DATE_FORMATS_MONTH_FIRST.extend([
1776 '%m-%d-%Y',
1777 '%m.%d.%Y',
1778 '%m/%d/%Y',
1779 '%m/%d/%y',
1780 '%m/%d/%Y %H:%M:%S',
1781 ])
1782
1783 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1784 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1785
1786
1787 def preferredencoding():
1788 """Get preferred encoding.
1789
1790 Returns the best encoding scheme for the system, based on
1791 locale.getpreferredencoding() and some further tweaks.
1792 """
1793 try:
1794 pref = locale.getpreferredencoding()
1795 'TEST'.encode(pref)
1796 except Exception:
1797 pref = 'UTF-8'
1798
1799 return pref
1800
1801
1802 def write_json_file(obj, fn):
1803 """ Encode obj as JSON and write it to fn, atomically if possible """
1804
1805 fn = encodeFilename(fn)
1806 if sys.version_info < (3, 0) and sys.platform != 'win32':
1807 encoding = get_filesystem_encoding()
1808 # os.path.basename returns a bytes object, but NamedTemporaryFile
1809 # will fail if the filename contains non ascii characters unless we
1810 # use a unicode object
1811 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1812 # the same for os.path.dirname
1813 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1814 else:
1815 path_basename = os.path.basename
1816 path_dirname = os.path.dirname
1817
1818 args = {
1819 'suffix': '.tmp',
1820 'prefix': path_basename(fn) + '.',
1821 'dir': path_dirname(fn),
1822 'delete': False,
1823 }
1824
1825 # In Python 2.x, json.dump expects a bytestream.
1826 # In Python 3.x, it writes to a character stream
1827 if sys.version_info < (3, 0):
1828 args['mode'] = 'wb'
1829 else:
1830 args.update({
1831 'mode': 'w',
1832 'encoding': 'utf-8',
1833 })
1834
1835 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1836
1837 try:
1838 with tf:
1839 json.dump(obj, tf)
1840 if sys.platform == 'win32':
1841 # Need to remove existing file on Windows, else os.rename raises
1842 # WindowsError or FileExistsError.
1843 try:
1844 os.unlink(fn)
1845 except OSError:
1846 pass
1847 try:
1848 mask = os.umask(0)
1849 os.umask(mask)
1850 os.chmod(tf.name, 0o666 & ~mask)
1851 except OSError:
1852 pass
1853 os.rename(tf.name, fn)
1854 except Exception:
1855 try:
1856 os.remove(tf.name)
1857 except OSError:
1858 pass
1859 raise
1860
1861
1862 if sys.version_info >= (2, 7):
1863 def find_xpath_attr(node, xpath, key, val=None):
1864 """ Find the xpath xpath[@key=val] """
1865 assert re.match(r'^[a-zA-Z_-]+$', key)
1866 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1867 return node.find(expr)
1868 else:
1869 def find_xpath_attr(node, xpath, key, val=None):
1870 for f in node.findall(compat_xpath(xpath)):
1871 if key not in f.attrib:
1872 continue
1873 if val is None or f.attrib.get(key) == val:
1874 return f
1875 return None
1876
1877 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1878 # the namespace parameter
1879
1880
1881 def xpath_with_ns(path, ns_map):
1882 components = [c.split(':') for c in path.split('/')]
1883 replaced = []
1884 for c in components:
1885 if len(c) == 1:
1886 replaced.append(c[0])
1887 else:
1888 ns, tag = c
1889 replaced.append('{%s}%s' % (ns_map[ns], tag))
1890 return '/'.join(replaced)
1891
1892
1893 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1894 def _find_xpath(xpath):
1895 return node.find(compat_xpath(xpath))
1896
1897 if isinstance(xpath, (str, compat_str)):
1898 n = _find_xpath(xpath)
1899 else:
1900 for xp in xpath:
1901 n = _find_xpath(xp)
1902 if n is not None:
1903 break
1904
1905 if n is None:
1906 if default is not NO_DEFAULT:
1907 return default
1908 elif fatal:
1909 name = xpath if name is None else name
1910 raise ExtractorError('Could not find XML element %s' % name)
1911 else:
1912 return None
1913 return n
1914
1915
1916 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1917 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1918 if n is None or n == default:
1919 return n
1920 if n.text is None:
1921 if default is not NO_DEFAULT:
1922 return default
1923 elif fatal:
1924 name = xpath if name is None else name
1925 raise ExtractorError('Could not find XML element\'s text %s' % name)
1926 else:
1927 return None
1928 return n.text
1929
1930
1931 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1932 n = find_xpath_attr(node, xpath, key)
1933 if n is None:
1934 if default is not NO_DEFAULT:
1935 return default
1936 elif fatal:
1937 name = '%s[@%s]' % (xpath, key) if name is None else name
1938 raise ExtractorError('Could not find XML attribute %s' % name)
1939 else:
1940 return None
1941 return n.attrib[key]
1942
1943
1944 def get_element_by_id(id, html):
1945 """Return the content of the tag with the specified ID in the passed HTML document"""
1946 return get_element_by_attribute('id', id, html)
1947
1948
1949 def get_element_by_class(class_name, html):
1950 """Return the content of the first tag with the specified class in the passed HTML document"""
1951 retval = get_elements_by_class(class_name, html)
1952 return retval[0] if retval else None
1953
1954
1955 def get_element_by_attribute(attribute, value, html, escape_value=True):
1956 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1957 return retval[0] if retval else None
1958
1959
1960 def get_elements_by_class(class_name, html):
1961 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1962 return get_elements_by_attribute(
1963 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1964 html, escape_value=False)
1965
1966
1967 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1968 """Return the content of the tag with the specified attribute in the passed HTML document"""
1969
1970 value = re.escape(value) if escape_value else value
1971
1972 retlist = []
1973 for m in re.finditer(r'''(?xs)
1974 <([a-zA-Z0-9:._-]+)
1975 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1976 \s+%s=['"]?%s['"]?
1977 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1978 \s*>
1979 (?P<content>.*?)
1980 </\1>
1981 ''' % (re.escape(attribute), value), html):
1982 res = m.group('content')
1983
1984 if res.startswith('"') or res.startswith("'"):
1985 res = res[1:-1]
1986
1987 retlist.append(unescapeHTML(res))
1988
1989 return retlist
1990
1991
1992 class HTMLAttributeParser(compat_HTMLParser):
1993 """Trivial HTML parser to gather the attributes for a single element"""
1994
1995 def __init__(self):
1996 self.attrs = {}
1997 compat_HTMLParser.__init__(self)
1998
1999 def handle_starttag(self, tag, attrs):
2000 self.attrs = dict(attrs)
2001
2002
2003 def extract_attributes(html_element):
2004 """Given a string for an HTML element such as
2005 <el
2006 a="foo" B="bar" c="&98;az" d=boz
2007 empty= noval entity="&amp;"
2008 sq='"' dq="'"
2009 >
2010 Decode and return a dictionary of attributes.
2011 {
2012 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2013 'empty': '', 'noval': None, 'entity': '&',
2014 'sq': '"', 'dq': '\''
2015 }.
2016 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2017 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2018 """
2019 parser = HTMLAttributeParser()
2020 try:
2021 parser.feed(html_element)
2022 parser.close()
2023 # Older Python may throw HTMLParseError in case of malformed HTML
2024 except compat_HTMLParseError:
2025 pass
2026 return parser.attrs
2027
2028
2029 def clean_html(html):
2030 """Clean an HTML snippet into a readable string"""
2031
2032 if html is None: # Convenience for sanitizing descriptions etc.
2033 return html
2034
2035 # Newline vs <br />
2036 html = html.replace('\n', ' ')
2037 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2038 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2039 # Strip html tags
2040 html = re.sub('<.*?>', '', html)
2041 # Replace html entities
2042 html = unescapeHTML(html)
2043 return html.strip()
2044
2045
2046 def sanitize_open(filename, open_mode):
2047 """Try to open the given filename, and slightly tweak it if this fails.
2048
2049 Attempts to open the given filename. If this fails, it tries to change
2050 the filename slightly, step by step, until it's either able to open it
2051 or it fails and raises a final exception, like the standard open()
2052 function.
2053
2054 It returns the tuple (stream, definitive_file_name).
2055 """
2056 try:
2057 if filename == '-':
2058 if sys.platform == 'win32':
2059 import msvcrt
2060 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2061 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2062 stream = open(encodeFilename(filename), open_mode)
2063 return (stream, filename)
2064 except (IOError, OSError) as err:
2065 if err.errno in (errno.EACCES,):
2066 raise
2067
2068 # In case of error, try to remove win32 forbidden chars
2069 alt_filename = sanitize_path(filename)
2070 if alt_filename == filename:
2071 raise
2072 else:
2073 # An exception here should be caught in the caller
2074 stream = open(encodeFilename(alt_filename), open_mode)
2075 return (stream, alt_filename)
2076
2077
2078 def timeconvert(timestr):
2079 """Convert RFC 2822 defined time string into system timestamp"""
2080 timestamp = None
2081 timetuple = email.utils.parsedate_tz(timestr)
2082 if timetuple is not None:
2083 timestamp = email.utils.mktime_tz(timetuple)
2084 return timestamp
2085
2086
2087 def sanitize_filename(s, restricted=False, is_id=False):
2088 """Sanitizes a string so it could be used as part of a filename.
2089 If restricted is set, use a stricter subset of allowed characters.
2090 Set is_id if this is not an arbitrary string, but an ID that should be kept
2091 if possible.
2092 """
2093 def replace_insane(char):
2094 if restricted and char in ACCENT_CHARS:
2095 return ACCENT_CHARS[char]
2096 if char == '?' or ord(char) < 32 or ord(char) == 127:
2097 return ''
2098 elif char == '"':
2099 return '' if restricted else '\''
2100 elif char == ':':
2101 return '_-' if restricted else ' -'
2102 elif char in '\\/|*<>':
2103 return '_'
2104 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2105 return '_'
2106 if restricted and ord(char) > 127:
2107 return '_'
2108 return char
2109
2110 if s == '':
2111 return ''
2112 # Handle timestamps
2113 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2114 result = ''.join(map(replace_insane, s))
2115 if not is_id:
2116 while '__' in result:
2117 result = result.replace('__', '_')
2118 result = result.strip('_')
2119 # Common case of "Foreign band name - English song title"
2120 if restricted and result.startswith('-_'):
2121 result = result[2:]
2122 if result.startswith('-'):
2123 result = '_' + result[len('-'):]
2124 result = result.lstrip('.')
2125 if not result:
2126 result = '_'
2127 return result
2128
2129
2130 def sanitize_path(s, force=False):
2131 """Sanitizes and normalizes path on Windows"""
2132 if sys.platform == 'win32':
2133 force = False
2134 drive_or_unc, _ = os.path.splitdrive(s)
2135 if sys.version_info < (2, 7) and not drive_or_unc:
2136 drive_or_unc, _ = os.path.splitunc(s)
2137 elif force:
2138 drive_or_unc = ''
2139 else:
2140 return s
2141
2142 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2143 if drive_or_unc:
2144 norm_path.pop(0)
2145 sanitized_path = [
2146 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2147 for path_part in norm_path]
2148 if drive_or_unc:
2149 sanitized_path.insert(0, drive_or_unc + os.path.sep)
2150 elif force and s[0] == os.path.sep:
2151 sanitized_path.insert(0, os.path.sep)
2152 return os.path.join(*sanitized_path)
2153
2154
2155 def sanitize_url(url):
2156 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2157 # the number of unwanted failures due to missing protocol
2158 if url.startswith('//'):
2159 return 'http:%s' % url
2160 # Fix some common typos seen so far
2161 COMMON_TYPOS = (
2162 # https://github.com/ytdl-org/youtube-dl/issues/15649
2163 (r'^httpss://', r'https://'),
2164 # https://bx1.be/lives/direct-tv/
2165 (r'^rmtp([es]?)://', r'rtmp\1://'),
2166 )
2167 for mistake, fixup in COMMON_TYPOS:
2168 if re.match(mistake, url):
2169 return re.sub(mistake, fixup, url)
2170 return url
2171
2172
2173 def extract_basic_auth(url):
2174 parts = compat_urlparse.urlsplit(url)
2175 if parts.username is None:
2176 return url, None
2177 url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2178 parts.hostname if parts.port is None
2179 else '%s:%d' % (parts.hostname, parts.port))))
2180 auth_payload = base64.b64encode(
2181 ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2182 return url, 'Basic ' + auth_payload.decode('utf-8')
2183
2184
2185 def sanitized_Request(url, *args, **kwargs):
2186 url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
2187 if auth_header is not None:
2188 headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2189 headers['Authorization'] = auth_header
2190 return compat_urllib_request.Request(url, *args, **kwargs)
2191
2192
2193 def expand_path(s):
2194 """Expand shell variables and ~"""
2195 return os.path.expandvars(compat_expanduser(s))
2196
2197
2198 def orderedSet(iterable):
2199 """ Remove all duplicates from the input iterable """
2200 res = []
2201 for el in iterable:
2202 if el not in res:
2203 res.append(el)
2204 return res
2205
2206
2207 def _htmlentity_transform(entity_with_semicolon):
2208 """Transforms an HTML entity to a character."""
2209 entity = entity_with_semicolon[:-1]
2210
2211 # Known non-numeric HTML entity
2212 if entity in compat_html_entities.name2codepoint:
2213 return compat_chr(compat_html_entities.name2codepoint[entity])
2214
2215 # TODO: HTML5 allows entities without a semicolon. For example,
2216 # '&Eacuteric' should be decoded as 'Éric'.
2217 if entity_with_semicolon in compat_html_entities_html5:
2218 return compat_html_entities_html5[entity_with_semicolon]
2219
2220 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2221 if mobj is not None:
2222 numstr = mobj.group(1)
2223 if numstr.startswith('x'):
2224 base = 16
2225 numstr = '0%s' % numstr
2226 else:
2227 base = 10
2228 # See https://github.com/ytdl-org/youtube-dl/issues/7518
2229 try:
2230 return compat_chr(int(numstr, base))
2231 except ValueError:
2232 pass
2233
2234 # Unknown entity in name, return its literal representation
2235 return '&%s;' % entity
2236
2237
2238 def unescapeHTML(s):
2239 if s is None:
2240 return None
2241 assert type(s) == compat_str
2242
2243 return re.sub(
2244 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2245
2246
2247 def escapeHTML(text):
2248 return (
2249 text
2250 .replace('&', '&amp;')
2251 .replace('<', '&lt;')
2252 .replace('>', '&gt;')
2253 .replace('"', '&quot;')
2254 .replace("'", '&#39;')
2255 )
2256
2257
2258 def process_communicate_or_kill(p, *args, **kwargs):
2259 try:
2260 return p.communicate(*args, **kwargs)
2261 except BaseException: # Including KeyboardInterrupt
2262 p.kill()
2263 p.wait()
2264 raise
2265
2266
2267 def get_subprocess_encoding():
2268 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2269 # For subprocess calls, encode with locale encoding
2270 # Refer to http://stackoverflow.com/a/9951851/35070
2271 encoding = preferredencoding()
2272 else:
2273 encoding = sys.getfilesystemencoding()
2274 if encoding is None:
2275 encoding = 'utf-8'
2276 return encoding
2277
2278
2279 def encodeFilename(s, for_subprocess=False):
2280 """
2281 @param s The name of the file
2282 """
2283
2284 assert type(s) == compat_str
2285
2286 # Python 3 has a Unicode API
2287 if sys.version_info >= (3, 0):
2288 return s
2289
2290 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2291 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2292 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2293 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2294 return s
2295
2296 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2297 if sys.platform.startswith('java'):
2298 return s
2299
2300 return s.encode(get_subprocess_encoding(), 'ignore')
2301
2302
2303 def decodeFilename(b, for_subprocess=False):
2304
2305 if sys.version_info >= (3, 0):
2306 return b
2307
2308 if not isinstance(b, bytes):
2309 return b
2310
2311 return b.decode(get_subprocess_encoding(), 'ignore')
2312
2313
2314 def encodeArgument(s):
2315 if not isinstance(s, compat_str):
2316 # Legacy code that uses byte strings
2317 # Uncomment the following line after fixing all post processors
2318 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2319 s = s.decode('ascii')
2320 return encodeFilename(s, True)
2321
2322
2323 def decodeArgument(b):
2324 return decodeFilename(b, True)
2325
2326
2327 def decodeOption(optval):
2328 if optval is None:
2329 return optval
2330 if isinstance(optval, bytes):
2331 optval = optval.decode(preferredencoding())
2332
2333 assert isinstance(optval, compat_str)
2334 return optval
2335
2336
2337 def formatSeconds(secs, delim=':', msec=False):
2338 if secs > 3600:
2339 ret = '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
2340 elif secs > 60:
2341 ret = '%d%s%02d' % (secs // 60, delim, secs % 60)
2342 else:
2343 ret = '%d' % secs
2344 return '%s.%03d' % (ret, secs % 1) if msec else ret
2345
2346
2347 def make_HTTPS_handler(params, **kwargs):
2348 opts_no_check_certificate = params.get('nocheckcertificate', False)
2349 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
2350 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2351 if opts_no_check_certificate:
2352 context.check_hostname = False
2353 context.verify_mode = ssl.CERT_NONE
2354 try:
2355 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2356 except TypeError:
2357 # Python 2.7.8
2358 # (create_default_context present but HTTPSHandler has no context=)
2359 pass
2360
2361 if sys.version_info < (3, 2):
2362 return YoutubeDLHTTPSHandler(params, **kwargs)
2363 else: # Python < 3.4
2364 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2365 context.verify_mode = (ssl.CERT_NONE
2366 if opts_no_check_certificate
2367 else ssl.CERT_REQUIRED)
2368 context.set_default_verify_paths()
2369 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2370
2371
2372 def bug_reports_message(before=';'):
2373 if ytdl_is_updateable():
2374 update_cmd = 'type yt-dlp -U to update'
2375 else:
2376 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
2377 msg = 'please report this issue on https://github.com/yt-dlp/yt-dlp .'
2378 msg += ' Make sure you are using the latest version; %s.' % update_cmd
2379 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
2380
2381 before = before.rstrip()
2382 if not before or before.endswith(('.', '!', '?')):
2383 msg = msg[0].title() + msg[1:]
2384
2385 return (before + ' ' if before else '') + msg
2386
2387
2388 class YoutubeDLError(Exception):
2389 """Base exception for YoutubeDL errors."""
2390 pass
2391
2392
2393 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2394 if hasattr(ssl, 'CertificateError'):
2395 network_exceptions.append(ssl.CertificateError)
2396 network_exceptions = tuple(network_exceptions)
2397
2398
2399 class ExtractorError(YoutubeDLError):
2400 """Error during info extraction."""
2401
2402 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2403 """ tb, if given, is the original traceback (so that it can be printed out).
2404 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
2405 """
2406
2407 if sys.exc_info()[0] in network_exceptions:
2408 expected = True
2409 if video_id is not None:
2410 msg = video_id + ': ' + msg
2411 if cause:
2412 msg += ' (caused by %r)' % cause
2413 if not expected:
2414 msg += bug_reports_message()
2415 super(ExtractorError, self).__init__(msg)
2416
2417 self.traceback = tb
2418 self.exc_info = sys.exc_info() # preserve original exception
2419 self.cause = cause
2420 self.video_id = video_id
2421
2422 def format_traceback(self):
2423 if self.traceback is None:
2424 return None
2425 return ''.join(traceback.format_tb(self.traceback))
2426
2427
2428 class UnsupportedError(ExtractorError):
2429 def __init__(self, url):
2430 super(UnsupportedError, self).__init__(
2431 'Unsupported URL: %s' % url, expected=True)
2432 self.url = url
2433
2434
2435 class RegexNotFoundError(ExtractorError):
2436 """Error when a regex didn't match"""
2437 pass
2438
2439
2440 class GeoRestrictedError(ExtractorError):
2441 """Geographic restriction Error exception.
2442
2443 This exception may be thrown when a video is not available from your
2444 geographic location due to geographic restrictions imposed by a website.
2445 """
2446
2447 def __init__(self, msg, countries=None):
2448 super(GeoRestrictedError, self).__init__(msg, expected=True)
2449 self.msg = msg
2450 self.countries = countries
2451
2452
2453 class DownloadError(YoutubeDLError):
2454 """Download Error exception.
2455
2456 This exception may be thrown by FileDownloader objects if they are not
2457 configured to continue on errors. They will contain the appropriate
2458 error message.
2459 """
2460
2461 def __init__(self, msg, exc_info=None):
2462 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2463 super(DownloadError, self).__init__(msg)
2464 self.exc_info = exc_info
2465
2466
2467 class EntryNotInPlaylist(YoutubeDLError):
2468 """Entry not in playlist exception.
2469
2470 This exception will be thrown by YoutubeDL when a requested entry
2471 is not found in the playlist info_dict
2472 """
2473 pass
2474
2475
2476 class SameFileError(YoutubeDLError):
2477 """Same File exception.
2478
2479 This exception will be thrown by FileDownloader objects if they detect
2480 multiple files would have to be downloaded to the same file on disk.
2481 """
2482 pass
2483
2484
2485 class PostProcessingError(YoutubeDLError):
2486 """Post Processing exception.
2487
2488 This exception may be raised by PostProcessor's .run() method to
2489 indicate an error in the postprocessing task.
2490 """
2491
2492 def __init__(self, msg):
2493 super(PostProcessingError, self).__init__(msg)
2494 self.msg = msg
2495
2496
2497 class ExistingVideoReached(YoutubeDLError):
2498 """ --max-downloads limit has been reached. """
2499 pass
2500
2501
2502 class RejectedVideoReached(YoutubeDLError):
2503 """ --max-downloads limit has been reached. """
2504 pass
2505
2506
2507 class ThrottledDownload(YoutubeDLError):
2508 """ Download speed below --throttled-rate. """
2509 pass
2510
2511
2512 class MaxDownloadsReached(YoutubeDLError):
2513 """ --max-downloads limit has been reached. """
2514 pass
2515
2516
2517 class UnavailableVideoError(YoutubeDLError):
2518 """Unavailable Format exception.
2519
2520 This exception will be thrown when a video is requested
2521 in a format that is not available for that video.
2522 """
2523 pass
2524
2525
2526 class ContentTooShortError(YoutubeDLError):
2527 """Content Too Short exception.
2528
2529 This exception may be raised by FileDownloader objects when a file they
2530 download is too small for what the server announced first, indicating
2531 the connection was probably interrupted.
2532 """
2533
2534 def __init__(self, downloaded, expected):
2535 super(ContentTooShortError, self).__init__(
2536 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2537 )
2538 # Both in bytes
2539 self.downloaded = downloaded
2540 self.expected = expected
2541
2542
2543 class XAttrMetadataError(YoutubeDLError):
2544 def __init__(self, code=None, msg='Unknown error'):
2545 super(XAttrMetadataError, self).__init__(msg)
2546 self.code = code
2547 self.msg = msg
2548
2549 # Parsing code and msg
2550 if (self.code in (errno.ENOSPC, errno.EDQUOT)
2551 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2552 self.reason = 'NO_SPACE'
2553 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2554 self.reason = 'VALUE_TOO_LONG'
2555 else:
2556 self.reason = 'NOT_SUPPORTED'
2557
2558
2559 class XAttrUnavailableError(YoutubeDLError):
2560 pass
2561
2562
2563 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2564 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2565 # expected HTTP responses to meet HTTP/1.0 or later (see also
2566 # https://github.com/ytdl-org/youtube-dl/issues/6727)
2567 if sys.version_info < (3, 0):
2568 kwargs['strict'] = True
2569 hc = http_class(*args, **compat_kwargs(kwargs))
2570 source_address = ydl_handler._params.get('source_address')
2571
2572 if source_address is not None:
2573 # This is to workaround _create_connection() from socket where it will try all
2574 # address data from getaddrinfo() including IPv6. This filters the result from
2575 # getaddrinfo() based on the source_address value.
2576 # This is based on the cpython socket.create_connection() function.
2577 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2578 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2579 host, port = address
2580 err = None
2581 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2582 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2583 ip_addrs = [addr for addr in addrs if addr[0] == af]
2584 if addrs and not ip_addrs:
2585 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2586 raise socket.error(
2587 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2588 % (ip_version, source_address[0]))
2589 for res in ip_addrs:
2590 af, socktype, proto, canonname, sa = res
2591 sock = None
2592 try:
2593 sock = socket.socket(af, socktype, proto)
2594 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2595 sock.settimeout(timeout)
2596 sock.bind(source_address)
2597 sock.connect(sa)
2598 err = None # Explicitly break reference cycle
2599 return sock
2600 except socket.error as _:
2601 err = _
2602 if sock is not None:
2603 sock.close()
2604 if err is not None:
2605 raise err
2606 else:
2607 raise socket.error('getaddrinfo returns an empty list')
2608 if hasattr(hc, '_create_connection'):
2609 hc._create_connection = _create_connection
2610 sa = (source_address, 0)
2611 if hasattr(hc, 'source_address'): # Python 2.7+
2612 hc.source_address = sa
2613 else: # Python 2.6
2614 def _hc_connect(self, *args, **kwargs):
2615 sock = _create_connection(
2616 (self.host, self.port), self.timeout, sa)
2617 if is_https:
2618 self.sock = ssl.wrap_socket(
2619 sock, self.key_file, self.cert_file,
2620 ssl_version=ssl.PROTOCOL_TLSv1)
2621 else:
2622 self.sock = sock
2623 hc.connect = functools.partial(_hc_connect, hc)
2624
2625 return hc
2626
2627
2628 def handle_youtubedl_headers(headers):
2629 filtered_headers = headers
2630
2631 if 'Youtubedl-no-compression' in filtered_headers:
2632 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2633 del filtered_headers['Youtubedl-no-compression']
2634
2635 return filtered_headers
2636
2637
2638 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2639 """Handler for HTTP requests and responses.
2640
2641 This class, when installed with an OpenerDirector, automatically adds
2642 the standard headers to every HTTP request and handles gzipped and
2643 deflated responses from web servers. If compression is to be avoided in
2644 a particular request, the original request in the program code only has
2645 to include the HTTP header "Youtubedl-no-compression", which will be
2646 removed before making the real request.
2647
2648 Part of this code was copied from:
2649
2650 http://techknack.net/python-urllib2-handlers/
2651
2652 Andrew Rowls, the author of that code, agreed to release it to the
2653 public domain.
2654 """
2655
2656 def __init__(self, params, *args, **kwargs):
2657 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2658 self._params = params
2659
2660 def http_open(self, req):
2661 conn_class = compat_http_client.HTTPConnection
2662
2663 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2664 if socks_proxy:
2665 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2666 del req.headers['Ytdl-socks-proxy']
2667
2668 return self.do_open(functools.partial(
2669 _create_http_connection, self, conn_class, False),
2670 req)
2671
2672 @staticmethod
2673 def deflate(data):
2674 if not data:
2675 return data
2676 try:
2677 return zlib.decompress(data, -zlib.MAX_WBITS)
2678 except zlib.error:
2679 return zlib.decompress(data)
2680
2681 def http_request(self, req):
2682 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2683 # always respected by websites, some tend to give out URLs with non percent-encoded
2684 # non-ASCII characters (see telemb.py, ard.py [#3412])
2685 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2686 # To work around aforementioned issue we will replace request's original URL with
2687 # percent-encoded one
2688 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2689 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2690 url = req.get_full_url()
2691 url_escaped = escape_url(url)
2692
2693 # Substitute URL if any change after escaping
2694 if url != url_escaped:
2695 req = update_Request(req, url=url_escaped)
2696
2697 for h, v in std_headers.items():
2698 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2699 # The dict keys are capitalized because of this bug by urllib
2700 if h.capitalize() not in req.headers:
2701 req.add_header(h, v)
2702
2703 req.headers = handle_youtubedl_headers(req.headers)
2704
2705 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2706 # Python 2.6 is brain-dead when it comes to fragments
2707 req._Request__original = req._Request__original.partition('#')[0]
2708 req._Request__r_type = req._Request__r_type.partition('#')[0]
2709
2710 return req
2711
2712 def http_response(self, req, resp):
2713 old_resp = resp
2714 # gzip
2715 if resp.headers.get('Content-encoding', '') == 'gzip':
2716 content = resp.read()
2717 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2718 try:
2719 uncompressed = io.BytesIO(gz.read())
2720 except IOError as original_ioerror:
2721 # There may be junk add the end of the file
2722 # See http://stackoverflow.com/q/4928560/35070 for details
2723 for i in range(1, 1024):
2724 try:
2725 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2726 uncompressed = io.BytesIO(gz.read())
2727 except IOError:
2728 continue
2729 break
2730 else:
2731 raise original_ioerror
2732 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2733 resp.msg = old_resp.msg
2734 del resp.headers['Content-encoding']
2735 # deflate
2736 if resp.headers.get('Content-encoding', '') == 'deflate':
2737 gz = io.BytesIO(self.deflate(resp.read()))
2738 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2739 resp.msg = old_resp.msg
2740 del resp.headers['Content-encoding']
2741 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2742 # https://github.com/ytdl-org/youtube-dl/issues/6457).
2743 if 300 <= resp.code < 400:
2744 location = resp.headers.get('Location')
2745 if location:
2746 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2747 if sys.version_info >= (3, 0):
2748 location = location.encode('iso-8859-1').decode('utf-8')
2749 else:
2750 location = location.decode('utf-8')
2751 location_escaped = escape_url(location)
2752 if location != location_escaped:
2753 del resp.headers['Location']
2754 if sys.version_info < (3, 0):
2755 location_escaped = location_escaped.encode('utf-8')
2756 resp.headers['Location'] = location_escaped
2757 return resp
2758
2759 https_request = http_request
2760 https_response = http_response
2761
2762
2763 def make_socks_conn_class(base_class, socks_proxy):
2764 assert issubclass(base_class, (
2765 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2766
2767 url_components = compat_urlparse.urlparse(socks_proxy)
2768 if url_components.scheme.lower() == 'socks5':
2769 socks_type = ProxyType.SOCKS5
2770 elif url_components.scheme.lower() in ('socks', 'socks4'):
2771 socks_type = ProxyType.SOCKS4
2772 elif url_components.scheme.lower() == 'socks4a':
2773 socks_type = ProxyType.SOCKS4A
2774
2775 def unquote_if_non_empty(s):
2776 if not s:
2777 return s
2778 return compat_urllib_parse_unquote_plus(s)
2779
2780 proxy_args = (
2781 socks_type,
2782 url_components.hostname, url_components.port or 1080,
2783 True, # Remote DNS
2784 unquote_if_non_empty(url_components.username),
2785 unquote_if_non_empty(url_components.password),
2786 )
2787
2788 class SocksConnection(base_class):
2789 def connect(self):
2790 self.sock = sockssocket()
2791 self.sock.setproxy(*proxy_args)
2792 if type(self.timeout) in (int, float):
2793 self.sock.settimeout(self.timeout)
2794 self.sock.connect((self.host, self.port))
2795
2796 if isinstance(self, compat_http_client.HTTPSConnection):
2797 if hasattr(self, '_context'): # Python > 2.6
2798 self.sock = self._context.wrap_socket(
2799 self.sock, server_hostname=self.host)
2800 else:
2801 self.sock = ssl.wrap_socket(self.sock)
2802
2803 return SocksConnection
2804
2805
2806 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2807 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2808 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2809 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2810 self._params = params
2811
2812 def https_open(self, req):
2813 kwargs = {}
2814 conn_class = self._https_conn_class
2815
2816 if hasattr(self, '_context'): # python > 2.6
2817 kwargs['context'] = self._context
2818 if hasattr(self, '_check_hostname'): # python 3.x
2819 kwargs['check_hostname'] = self._check_hostname
2820
2821 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2822 if socks_proxy:
2823 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2824 del req.headers['Ytdl-socks-proxy']
2825
2826 return self.do_open(functools.partial(
2827 _create_http_connection, self, conn_class, True),
2828 req, **kwargs)
2829
2830
2831 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2832 """
2833 See [1] for cookie file format.
2834
2835 1. https://curl.haxx.se/docs/http-cookies.html
2836 """
2837 _HTTPONLY_PREFIX = '#HttpOnly_'
2838 _ENTRY_LEN = 7
2839 _HEADER = '''# Netscape HTTP Cookie File
2840 # This file is generated by yt-dlp. Do not edit.
2841
2842 '''
2843 _CookieFileEntry = collections.namedtuple(
2844 'CookieFileEntry',
2845 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2846
2847 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2848 """
2849 Save cookies to a file.
2850
2851 Most of the code is taken from CPython 3.8 and slightly adapted
2852 to support cookie files with UTF-8 in both python 2 and 3.
2853 """
2854 if filename is None:
2855 if self.filename is not None:
2856 filename = self.filename
2857 else:
2858 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2859
2860 # Store session cookies with `expires` set to 0 instead of an empty
2861 # string
2862 for cookie in self:
2863 if cookie.expires is None:
2864 cookie.expires = 0
2865
2866 with io.open(filename, 'w', encoding='utf-8') as f:
2867 f.write(self._HEADER)
2868 now = time.time()
2869 for cookie in self:
2870 if not ignore_discard and cookie.discard:
2871 continue
2872 if not ignore_expires and cookie.is_expired(now):
2873 continue
2874 if cookie.secure:
2875 secure = 'TRUE'
2876 else:
2877 secure = 'FALSE'
2878 if cookie.domain.startswith('.'):
2879 initial_dot = 'TRUE'
2880 else:
2881 initial_dot = 'FALSE'
2882 if cookie.expires is not None:
2883 expires = compat_str(cookie.expires)
2884 else:
2885 expires = ''
2886 if cookie.value is None:
2887 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2888 # with no name, whereas http.cookiejar regards it as a
2889 # cookie with no value.
2890 name = ''
2891 value = cookie.name
2892 else:
2893 name = cookie.name
2894 value = cookie.value
2895 f.write(
2896 '\t'.join([cookie.domain, initial_dot, cookie.path,
2897 secure, expires, name, value]) + '\n')
2898
2899 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2900 """Load cookies from a file."""
2901 if filename is None:
2902 if self.filename is not None:
2903 filename = self.filename
2904 else:
2905 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2906
2907 def prepare_line(line):
2908 if line.startswith(self._HTTPONLY_PREFIX):
2909 line = line[len(self._HTTPONLY_PREFIX):]
2910 # comments and empty lines are fine
2911 if line.startswith('#') or not line.strip():
2912 return line
2913 cookie_list = line.split('\t')
2914 if len(cookie_list) != self._ENTRY_LEN:
2915 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2916 cookie = self._CookieFileEntry(*cookie_list)
2917 if cookie.expires_at and not cookie.expires_at.isdigit():
2918 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2919 return line
2920
2921 cf = io.StringIO()
2922 with io.open(filename, encoding='utf-8') as f:
2923 for line in f:
2924 try:
2925 cf.write(prepare_line(line))
2926 except compat_cookiejar.LoadError as e:
2927 write_string(
2928 'WARNING: skipping cookie file entry due to %s: %r\n'
2929 % (e, line), sys.stderr)
2930 continue
2931 cf.seek(0)
2932 self._really_load(cf, filename, ignore_discard, ignore_expires)
2933 # Session cookies are denoted by either `expires` field set to
2934 # an empty string or 0. MozillaCookieJar only recognizes the former
2935 # (see [1]). So we need force the latter to be recognized as session
2936 # cookies on our own.
2937 # Session cookies may be important for cookies-based authentication,
2938 # e.g. usually, when user does not check 'Remember me' check box while
2939 # logging in on a site, some important cookies are stored as session
2940 # cookies so that not recognizing them will result in failed login.
2941 # 1. https://bugs.python.org/issue17164
2942 for cookie in self:
2943 # Treat `expires=0` cookies as session cookies
2944 if cookie.expires == 0:
2945 cookie.expires = None
2946 cookie.discard = True
2947
2948
2949 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2950 def __init__(self, cookiejar=None):
2951 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2952
2953 def http_response(self, request, response):
2954 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2955 # characters in Set-Cookie HTTP header of last response (see
2956 # https://github.com/ytdl-org/youtube-dl/issues/6769).
2957 # In order to at least prevent crashing we will percent encode Set-Cookie
2958 # header before HTTPCookieProcessor starts processing it.
2959 # if sys.version_info < (3, 0) and response.headers:
2960 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2961 # set_cookie = response.headers.get(set_cookie_header)
2962 # if set_cookie:
2963 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2964 # if set_cookie != set_cookie_escaped:
2965 # del response.headers[set_cookie_header]
2966 # response.headers[set_cookie_header] = set_cookie_escaped
2967 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2968
2969 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2970 https_response = http_response
2971
2972
2973 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2974 """YoutubeDL redirect handler
2975
2976 The code is based on HTTPRedirectHandler implementation from CPython [1].
2977
2978 This redirect handler solves two issues:
2979 - ensures redirect URL is always unicode under python 2
2980 - introduces support for experimental HTTP response status code
2981 308 Permanent Redirect [2] used by some sites [3]
2982
2983 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
2984 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
2985 3. https://github.com/ytdl-org/youtube-dl/issues/28768
2986 """
2987
2988 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
2989
2990 def redirect_request(self, req, fp, code, msg, headers, newurl):
2991 """Return a Request or None in response to a redirect.
2992
2993 This is called by the http_error_30x methods when a
2994 redirection response is received. If a redirection should
2995 take place, return a new Request to allow http_error_30x to
2996 perform the redirect. Otherwise, raise HTTPError if no-one
2997 else should try to handle this url. Return None if you can't
2998 but another Handler might.
2999 """
3000 m = req.get_method()
3001 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
3002 or code in (301, 302, 303) and m == "POST")):
3003 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
3004 # Strictly (according to RFC 2616), 301 or 302 in response to
3005 # a POST MUST NOT cause a redirection without confirmation
3006 # from the user (of urllib.request, in this case). In practice,
3007 # essentially all clients do redirect in this case, so we do
3008 # the same.
3009
3010 # On python 2 urlh.geturl() may sometimes return redirect URL
3011 # as byte string instead of unicode. This workaround allows
3012 # to force it always return unicode.
3013 if sys.version_info[0] < 3:
3014 newurl = compat_str(newurl)
3015
3016 # Be conciliant with URIs containing a space. This is mainly
3017 # redundant with the more complete encoding done in http_error_302(),
3018 # but it is kept for compatibility with other callers.
3019 newurl = newurl.replace(' ', '%20')
3020
3021 CONTENT_HEADERS = ("content-length", "content-type")
3022 # NB: don't use dict comprehension for python 2.6 compatibility
3023 newheaders = dict((k, v) for k, v in req.headers.items()
3024 if k.lower() not in CONTENT_HEADERS)
3025 return compat_urllib_request.Request(
3026 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3027 unverifiable=True)
3028
3029
3030 def extract_timezone(date_str):
3031 m = re.search(
3032 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
3033 date_str)
3034 if not m:
3035 timezone = datetime.timedelta()
3036 else:
3037 date_str = date_str[:-len(m.group('tz'))]
3038 if not m.group('sign'):
3039 timezone = datetime.timedelta()
3040 else:
3041 sign = 1 if m.group('sign') == '+' else -1
3042 timezone = datetime.timedelta(
3043 hours=sign * int(m.group('hours')),
3044 minutes=sign * int(m.group('minutes')))
3045 return timezone, date_str
3046
3047
3048 def parse_iso8601(date_str, delimiter='T', timezone=None):
3049 """ Return a UNIX timestamp from the given date """
3050
3051 if date_str is None:
3052 return None
3053
3054 date_str = re.sub(r'\.[0-9]+', '', date_str)
3055
3056 if timezone is None:
3057 timezone, date_str = extract_timezone(date_str)
3058
3059 try:
3060 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3061 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3062 return calendar.timegm(dt.timetuple())
3063 except ValueError:
3064 pass
3065
3066
3067 def date_formats(day_first=True):
3068 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3069
3070
3071 def unified_strdate(date_str, day_first=True):
3072 """Return a string with the date in the format YYYYMMDD"""
3073
3074 if date_str is None:
3075 return None
3076 upload_date = None
3077 # Replace commas
3078 date_str = date_str.replace(',', ' ')
3079 # Remove AM/PM + timezone
3080 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3081 _, date_str = extract_timezone(date_str)
3082
3083 for expression in date_formats(day_first):
3084 try:
3085 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
3086 except ValueError:
3087 pass
3088 if upload_date is None:
3089 timetuple = email.utils.parsedate_tz(date_str)
3090 if timetuple:
3091 try:
3092 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3093 except ValueError:
3094 pass
3095 if upload_date is not None:
3096 return compat_str(upload_date)
3097
3098
3099 def unified_timestamp(date_str, day_first=True):
3100 if date_str is None:
3101 return None
3102
3103 date_str = re.sub(r'[,|]', '', date_str)
3104
3105 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
3106 timezone, date_str = extract_timezone(date_str)
3107
3108 # Remove AM/PM + timezone
3109 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3110
3111 # Remove unrecognized timezones from ISO 8601 alike timestamps
3112 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3113 if m:
3114 date_str = date_str[:-len(m.group('tz'))]
3115
3116 # Python only supports microseconds, so remove nanoseconds
3117 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3118 if m:
3119 date_str = m.group(1)
3120
3121 for expression in date_formats(day_first):
3122 try:
3123 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3124 return calendar.timegm(dt.timetuple())
3125 except ValueError:
3126 pass
3127 timetuple = email.utils.parsedate_tz(date_str)
3128 if timetuple:
3129 return calendar.timegm(timetuple) + pm_delta * 3600
3130
3131
3132 def determine_ext(url, default_ext='unknown_video'):
3133 if url is None or '.' not in url:
3134 return default_ext
3135 guess = url.partition('?')[0].rpartition('.')[2]
3136 if re.match(r'^[A-Za-z0-9]+$', guess):
3137 return guess
3138 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3139 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3140 return guess.rstrip('/')
3141 else:
3142 return default_ext
3143
3144
3145 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3146 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3147
3148
3149 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
3150 """
3151 Return a datetime object from a string in the format YYYYMMDD or
3152 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3153
3154 format: string date format used to return datetime object from
3155 precision: round the time portion of a datetime object.
3156 auto|microsecond|second|minute|hour|day.
3157 auto: round to the unit provided in date_str (if applicable).
3158 """
3159 auto_precision = False
3160 if precision == 'auto':
3161 auto_precision = True
3162 precision = 'microsecond'
3163 today = datetime_round(datetime.datetime.now(), precision)
3164 if date_str in ('now', 'today'):
3165 return today
3166 if date_str == 'yesterday':
3167 return today - datetime.timedelta(days=1)
3168 match = re.match(
3169 r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3170 date_str)
3171 if match is not None:
3172 start_time = datetime_from_str(match.group('start'), precision, format)
3173 time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
3174 unit = match.group('unit')
3175 if unit == 'month' or unit == 'year':
3176 new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
3177 unit = 'day'
3178 else:
3179 if unit == 'week':
3180 unit = 'day'
3181 time *= 7
3182 delta = datetime.timedelta(**{unit + 's': time})
3183 new_date = start_time + delta
3184 if auto_precision:
3185 return datetime_round(new_date, unit)
3186 return new_date
3187
3188 return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3189
3190
3191 def date_from_str(date_str, format='%Y%m%d'):
3192 """
3193 Return a datetime object from a string in the format YYYYMMDD or
3194 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3195
3196 format: string date format used to return datetime object from
3197 """
3198 return datetime_from_str(date_str, precision='microsecond', format=format).date()
3199
3200
3201 def datetime_add_months(dt, months):
3202 """Increment/Decrement a datetime object by months."""
3203 month = dt.month + months - 1
3204 year = dt.year + month // 12
3205 month = month % 12 + 1
3206 day = min(dt.day, calendar.monthrange(year, month)[1])
3207 return dt.replace(year, month, day)
3208
3209
3210 def datetime_round(dt, precision='day'):
3211 """
3212 Round a datetime object's time to a specific precision
3213 """
3214 if precision == 'microsecond':
3215 return dt
3216
3217 unit_seconds = {
3218 'day': 86400,
3219 'hour': 3600,
3220 'minute': 60,
3221 'second': 1,
3222 }
3223 roundto = lambda x, n: ((x + n / 2) // n) * n
3224 timestamp = calendar.timegm(dt.timetuple())
3225 return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
3226
3227
3228 def hyphenate_date(date_str):
3229 """
3230 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3231 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3232 if match is not None:
3233 return '-'.join(match.groups())
3234 else:
3235 return date_str
3236
3237
3238 class DateRange(object):
3239 """Represents a time interval between two dates"""
3240
3241 def __init__(self, start=None, end=None):
3242 """start and end must be strings in the format accepted by date"""
3243 if start is not None:
3244 self.start = date_from_str(start)
3245 else:
3246 self.start = datetime.datetime.min.date()
3247 if end is not None:
3248 self.end = date_from_str(end)
3249 else:
3250 self.end = datetime.datetime.max.date()
3251 if self.start > self.end:
3252 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3253
3254 @classmethod
3255 def day(cls, day):
3256 """Returns a range that only contains the given day"""
3257 return cls(day, day)
3258
3259 def __contains__(self, date):
3260 """Check if the date is in the range"""
3261 if not isinstance(date, datetime.date):
3262 date = date_from_str(date)
3263 return self.start <= date <= self.end
3264
3265 def __str__(self):
3266 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3267
3268
3269 def platform_name():
3270 """ Returns the platform name as a compat_str """
3271 res = platform.platform()
3272 if isinstance(res, bytes):
3273 res = res.decode(preferredencoding())
3274
3275 assert isinstance(res, compat_str)
3276 return res
3277
3278
3279 def _windows_write_string(s, out):
3280 """ Returns True if the string was written using special methods,
3281 False if it has yet to be written out."""
3282 # Adapted from http://stackoverflow.com/a/3259271/35070
3283
3284 import ctypes
3285 import ctypes.wintypes
3286
3287 WIN_OUTPUT_IDS = {
3288 1: -11,
3289 2: -12,
3290 }
3291
3292 try:
3293 fileno = out.fileno()
3294 except AttributeError:
3295 # If the output stream doesn't have a fileno, it's virtual
3296 return False
3297 except io.UnsupportedOperation:
3298 # Some strange Windows pseudo files?
3299 return False
3300 if fileno not in WIN_OUTPUT_IDS:
3301 return False
3302
3303 GetStdHandle = compat_ctypes_WINFUNCTYPE(
3304 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3305 ('GetStdHandle', ctypes.windll.kernel32))
3306 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3307
3308 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3309 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3310 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3311 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3312 written = ctypes.wintypes.DWORD(0)
3313
3314 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3315 FILE_TYPE_CHAR = 0x0002
3316 FILE_TYPE_REMOTE = 0x8000
3317 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3318 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3319 ctypes.POINTER(ctypes.wintypes.DWORD))(
3320 ('GetConsoleMode', ctypes.windll.kernel32))
3321 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3322
3323 def not_a_console(handle):
3324 if handle == INVALID_HANDLE_VALUE or handle is None:
3325 return True
3326 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3327 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3328
3329 if not_a_console(h):
3330 return False
3331
3332 def next_nonbmp_pos(s):
3333 try:
3334 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3335 except StopIteration:
3336 return len(s)
3337
3338 while s:
3339 count = min(next_nonbmp_pos(s), 1024)
3340
3341 ret = WriteConsoleW(
3342 h, s, count if count else 2, ctypes.byref(written), None)
3343 if ret == 0:
3344 raise OSError('Failed to write string')
3345 if not count: # We just wrote a non-BMP character
3346 assert written.value == 2
3347 s = s[1:]
3348 else:
3349 assert written.value > 0
3350 s = s[written.value:]
3351 return True
3352
3353
3354 def write_string(s, out=None, encoding=None):
3355 if out is None:
3356 out = sys.stderr
3357 assert type(s) == compat_str
3358
3359 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3360 if _windows_write_string(s, out):
3361 return
3362
3363 if ('b' in getattr(out, 'mode', '')
3364 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
3365 byt = s.encode(encoding or preferredencoding(), 'ignore')
3366 out.write(byt)
3367 elif hasattr(out, 'buffer'):
3368 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3369 byt = s.encode(enc, 'ignore')
3370 out.buffer.write(byt)
3371 else:
3372 out.write(s)
3373 out.flush()
3374
3375
3376 def bytes_to_intlist(bs):
3377 if not bs:
3378 return []
3379 if isinstance(bs[0], int): # Python 3
3380 return list(bs)
3381 else:
3382 return [ord(c) for c in bs]
3383
3384
3385 def intlist_to_bytes(xs):
3386 if not xs:
3387 return b''
3388 return compat_struct_pack('%dB' % len(xs), *xs)
3389
3390
3391 # Cross-platform file locking
3392 if sys.platform == 'win32':
3393 import ctypes.wintypes
3394 import msvcrt
3395
3396 class OVERLAPPED(ctypes.Structure):
3397 _fields_ = [
3398 ('Internal', ctypes.wintypes.LPVOID),
3399 ('InternalHigh', ctypes.wintypes.LPVOID),
3400 ('Offset', ctypes.wintypes.DWORD),
3401 ('OffsetHigh', ctypes.wintypes.DWORD),
3402 ('hEvent', ctypes.wintypes.HANDLE),
3403 ]
3404
3405 kernel32 = ctypes.windll.kernel32
3406 LockFileEx = kernel32.LockFileEx
3407 LockFileEx.argtypes = [
3408 ctypes.wintypes.HANDLE, # hFile
3409 ctypes.wintypes.DWORD, # dwFlags
3410 ctypes.wintypes.DWORD, # dwReserved
3411 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3412 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3413 ctypes.POINTER(OVERLAPPED) # Overlapped
3414 ]
3415 LockFileEx.restype = ctypes.wintypes.BOOL
3416 UnlockFileEx = kernel32.UnlockFileEx
3417 UnlockFileEx.argtypes = [
3418 ctypes.wintypes.HANDLE, # hFile
3419 ctypes.wintypes.DWORD, # dwReserved
3420 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3421 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3422 ctypes.POINTER(OVERLAPPED) # Overlapped
3423 ]
3424 UnlockFileEx.restype = ctypes.wintypes.BOOL
3425 whole_low = 0xffffffff
3426 whole_high = 0x7fffffff
3427
3428 def _lock_file(f, exclusive):
3429 overlapped = OVERLAPPED()
3430 overlapped.Offset = 0
3431 overlapped.OffsetHigh = 0
3432 overlapped.hEvent = 0
3433 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3434 handle = msvcrt.get_osfhandle(f.fileno())
3435 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3436 whole_low, whole_high, f._lock_file_overlapped_p):
3437 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3438
3439 def _unlock_file(f):
3440 assert f._lock_file_overlapped_p
3441 handle = msvcrt.get_osfhandle(f.fileno())
3442 if not UnlockFileEx(handle, 0,
3443 whole_low, whole_high, f._lock_file_overlapped_p):
3444 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3445
3446 else:
3447 # Some platforms, such as Jython, is missing fcntl
3448 try:
3449 import fcntl
3450
3451 def _lock_file(f, exclusive):
3452 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3453
3454 def _unlock_file(f):
3455 fcntl.flock(f, fcntl.LOCK_UN)
3456 except ImportError:
3457 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3458
3459 def _lock_file(f, exclusive):
3460 raise IOError(UNSUPPORTED_MSG)
3461
3462 def _unlock_file(f):
3463 raise IOError(UNSUPPORTED_MSG)
3464
3465
3466 class locked_file(object):
3467 def __init__(self, filename, mode, encoding=None):
3468 assert mode in ['r', 'a', 'w']
3469 self.f = io.open(filename, mode, encoding=encoding)
3470 self.mode = mode
3471
3472 def __enter__(self):
3473 exclusive = self.mode != 'r'
3474 try:
3475 _lock_file(self.f, exclusive)
3476 except IOError:
3477 self.f.close()
3478 raise
3479 return self
3480
3481 def __exit__(self, etype, value, traceback):
3482 try:
3483 _unlock_file(self.f)
3484 finally:
3485 self.f.close()
3486
3487 def __iter__(self):
3488 return iter(self.f)
3489
3490 def write(self, *args):
3491 return self.f.write(*args)
3492
3493 def read(self, *args):
3494 return self.f.read(*args)
3495
3496
3497 def get_filesystem_encoding():
3498 encoding = sys.getfilesystemencoding()
3499 return encoding if encoding is not None else 'utf-8'
3500
3501
3502 def shell_quote(args):
3503 quoted_args = []
3504 encoding = get_filesystem_encoding()
3505 for a in args:
3506 if isinstance(a, bytes):
3507 # We may get a filename encoded with 'encodeFilename'
3508 a = a.decode(encoding)
3509 quoted_args.append(compat_shlex_quote(a))
3510 return ' '.join(quoted_args)
3511
3512
3513 def smuggle_url(url, data):
3514 """ Pass additional data in a URL for internal use. """
3515
3516 url, idata = unsmuggle_url(url, {})
3517 data.update(idata)
3518 sdata = compat_urllib_parse_urlencode(
3519 {'__youtubedl_smuggle': json.dumps(data)})
3520 return url + '#' + sdata
3521
3522
3523 def unsmuggle_url(smug_url, default=None):
3524 if '#__youtubedl_smuggle' not in smug_url:
3525 return smug_url, default
3526 url, _, sdata = smug_url.rpartition('#')
3527 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3528 data = json.loads(jsond)
3529 return url, data
3530
3531
3532 def format_bytes(bytes):
3533 if bytes is None:
3534 return 'N/A'
3535 if type(bytes) is str:
3536 bytes = float(bytes)
3537 if bytes == 0.0:
3538 exponent = 0
3539 else:
3540 exponent = int(math.log(bytes, 1024.0))
3541 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3542 converted = float(bytes) / float(1024 ** exponent)
3543 return '%.2f%s' % (converted, suffix)
3544
3545
3546 def lookup_unit_table(unit_table, s):
3547 units_re = '|'.join(re.escape(u) for u in unit_table)
3548 m = re.match(
3549 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3550 if not m:
3551 return None
3552 num_str = m.group('num').replace(',', '.')
3553 mult = unit_table[m.group('unit')]
3554 return int(float(num_str) * mult)
3555
3556
3557 def parse_filesize(s):
3558 if s is None:
3559 return None
3560
3561 # The lower-case forms are of course incorrect and unofficial,
3562 # but we support those too
3563 _UNIT_TABLE = {
3564 'B': 1,
3565 'b': 1,
3566 'bytes': 1,
3567 'KiB': 1024,
3568 'KB': 1000,
3569 'kB': 1024,
3570 'Kb': 1000,
3571 'kb': 1000,
3572 'kilobytes': 1000,
3573 'kibibytes': 1024,
3574 'MiB': 1024 ** 2,
3575 'MB': 1000 ** 2,
3576 'mB': 1024 ** 2,
3577 'Mb': 1000 ** 2,
3578 'mb': 1000 ** 2,
3579 'megabytes': 1000 ** 2,
3580 'mebibytes': 1024 ** 2,
3581 'GiB': 1024 ** 3,
3582 'GB': 1000 ** 3,
3583 'gB': 1024 ** 3,
3584 'Gb': 1000 ** 3,
3585 'gb': 1000 ** 3,
3586 'gigabytes': 1000 ** 3,
3587 'gibibytes': 1024 ** 3,
3588 'TiB': 1024 ** 4,
3589 'TB': 1000 ** 4,
3590 'tB': 1024 ** 4,
3591 'Tb': 1000 ** 4,
3592 'tb': 1000 ** 4,
3593 'terabytes': 1000 ** 4,
3594 'tebibytes': 1024 ** 4,
3595 'PiB': 1024 ** 5,
3596 'PB': 1000 ** 5,
3597 'pB': 1024 ** 5,
3598 'Pb': 1000 ** 5,
3599 'pb': 1000 ** 5,
3600 'petabytes': 1000 ** 5,
3601 'pebibytes': 1024 ** 5,
3602 'EiB': 1024 ** 6,
3603 'EB': 1000 ** 6,
3604 'eB': 1024 ** 6,
3605 'Eb': 1000 ** 6,
3606 'eb': 1000 ** 6,
3607 'exabytes': 1000 ** 6,
3608 'exbibytes': 1024 ** 6,
3609 'ZiB': 1024 ** 7,
3610 'ZB': 1000 ** 7,
3611 'zB': 1024 ** 7,
3612 'Zb': 1000 ** 7,
3613 'zb': 1000 ** 7,
3614 'zettabytes': 1000 ** 7,
3615 'zebibytes': 1024 ** 7,
3616 'YiB': 1024 ** 8,
3617 'YB': 1000 ** 8,
3618 'yB': 1024 ** 8,
3619 'Yb': 1000 ** 8,
3620 'yb': 1000 ** 8,
3621 'yottabytes': 1000 ** 8,
3622 'yobibytes': 1024 ** 8,
3623 }
3624
3625 return lookup_unit_table(_UNIT_TABLE, s)
3626
3627
3628 def parse_count(s):
3629 if s is None:
3630 return None
3631
3632 s = s.strip()
3633
3634 if re.match(r'^[\d,.]+$', s):
3635 return str_to_int(s)
3636
3637 _UNIT_TABLE = {
3638 'k': 1000,
3639 'K': 1000,
3640 'm': 1000 ** 2,
3641 'M': 1000 ** 2,
3642 'kk': 1000 ** 2,
3643 'KK': 1000 ** 2,
3644 }
3645
3646 return lookup_unit_table(_UNIT_TABLE, s)
3647
3648
3649 def parse_resolution(s):
3650 if s is None:
3651 return {}
3652
3653 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3654 if mobj:
3655 return {
3656 'width': int(mobj.group('w')),
3657 'height': int(mobj.group('h')),
3658 }
3659
3660 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3661 if mobj:
3662 return {'height': int(mobj.group(1))}
3663
3664 mobj = re.search(r'\b([48])[kK]\b', s)
3665 if mobj:
3666 return {'height': int(mobj.group(1)) * 540}
3667
3668 return {}
3669
3670
3671 def parse_bitrate(s):
3672 if not isinstance(s, compat_str):
3673 return
3674 mobj = re.search(r'\b(\d+)\s*kbps', s)
3675 if mobj:
3676 return int(mobj.group(1))
3677
3678
3679 def month_by_name(name, lang='en'):
3680 """ Return the number of a month by (locale-independently) English name """
3681
3682 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3683
3684 try:
3685 return month_names.index(name) + 1
3686 except ValueError:
3687 return None
3688
3689
3690 def month_by_abbreviation(abbrev):
3691 """ Return the number of a month by (locale-independently) English
3692 abbreviations """
3693
3694 try:
3695 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3696 except ValueError:
3697 return None
3698
3699
3700 def fix_xml_ampersands(xml_str):
3701 """Replace all the '&' by '&amp;' in XML"""
3702 return re.sub(
3703 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3704 '&amp;',
3705 xml_str)
3706
3707
3708 def setproctitle(title):
3709 assert isinstance(title, compat_str)
3710
3711 # ctypes in Jython is not complete
3712 # http://bugs.jython.org/issue2148
3713 if sys.platform.startswith('java'):
3714 return
3715
3716 try:
3717 libc = ctypes.cdll.LoadLibrary('libc.so.6')
3718 except OSError:
3719 return
3720 except TypeError:
3721 # LoadLibrary in Windows Python 2.7.13 only expects
3722 # a bytestring, but since unicode_literals turns
3723 # every string into a unicode string, it fails.
3724 return
3725 title_bytes = title.encode('utf-8')
3726 buf = ctypes.create_string_buffer(len(title_bytes))
3727 buf.value = title_bytes
3728 try:
3729 libc.prctl(15, buf, 0, 0, 0)
3730 except AttributeError:
3731 return # Strange libc, just skip this
3732
3733
3734 def remove_start(s, start):
3735 return s[len(start):] if s is not None and s.startswith(start) else s
3736
3737
3738 def remove_end(s, end):
3739 return s[:-len(end)] if s is not None and s.endswith(end) else s
3740
3741
3742 def remove_quotes(s):
3743 if s is None or len(s) < 2:
3744 return s
3745 for quote in ('"', "'", ):
3746 if s[0] == quote and s[-1] == quote:
3747 return s[1:-1]
3748 return s
3749
3750
3751 def get_domain(url):
3752 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3753 return domain.group('domain') if domain else None
3754
3755
3756 def url_basename(url):
3757 path = compat_urlparse.urlparse(url).path
3758 return path.strip('/').split('/')[-1]
3759
3760
3761 def base_url(url):
3762 return re.match(r'https?://[^?#&]+/', url).group()
3763
3764
3765 def urljoin(base, path):
3766 if isinstance(path, bytes):
3767 path = path.decode('utf-8')
3768 if not isinstance(path, compat_str) or not path:
3769 return None
3770 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3771 return path
3772 if isinstance(base, bytes):
3773 base = base.decode('utf-8')
3774 if not isinstance(base, compat_str) or not re.match(
3775 r'^(?:https?:)?//', base):
3776 return None
3777 return compat_urlparse.urljoin(base, path)
3778
3779
3780 class HEADRequest(compat_urllib_request.Request):
3781 def get_method(self):
3782 return 'HEAD'
3783
3784
3785 class PUTRequest(compat_urllib_request.Request):
3786 def get_method(self):
3787 return 'PUT'
3788
3789
3790 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3791 if get_attr:
3792 if v is not None:
3793 v = getattr(v, get_attr, None)
3794 if v == '':
3795 v = None
3796 if v is None:
3797 return default
3798 try:
3799 return int(v) * invscale // scale
3800 except (ValueError, TypeError):
3801 return default
3802
3803
3804 def str_or_none(v, default=None):
3805 return default if v is None else compat_str(v)
3806
3807
3808 def str_to_int(int_str):
3809 """ A more relaxed version of int_or_none """
3810 if isinstance(int_str, compat_integer_types):
3811 return int_str
3812 elif isinstance(int_str, compat_str):
3813 int_str = re.sub(r'[,\.\+]', '', int_str)
3814 return int_or_none(int_str)
3815
3816
3817 def float_or_none(v, scale=1, invscale=1, default=None):
3818 if v is None:
3819 return default
3820 try:
3821 return float(v) * invscale / scale
3822 except (ValueError, TypeError):
3823 return default
3824
3825
3826 def bool_or_none(v, default=None):
3827 return v if isinstance(v, bool) else default
3828
3829
3830 def strip_or_none(v, default=None):
3831 return v.strip() if isinstance(v, compat_str) else default
3832
3833
3834 def url_or_none(url):
3835 if not url or not isinstance(url, compat_str):
3836 return None
3837 url = url.strip()
3838 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3839
3840
3841 def strftime_or_none(timestamp, date_format, default=None):
3842 datetime_object = None
3843 try:
3844 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3845 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3846 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3847 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3848 return datetime_object.strftime(date_format)
3849 except (ValueError, TypeError, AttributeError):
3850 return default
3851
3852
3853 def parse_duration(s):
3854 if not isinstance(s, compat_basestring):
3855 return None
3856
3857 s = s.strip()
3858
3859 days, hours, mins, secs, ms = [None] * 5
3860 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3861 if m:
3862 days, hours, mins, secs, ms = m.groups()
3863 else:
3864 m = re.match(
3865 r'''(?ix)(?:P?
3866 (?:
3867 [0-9]+\s*y(?:ears?)?\s*
3868 )?
3869 (?:
3870 [0-9]+\s*m(?:onths?)?\s*
3871 )?
3872 (?:
3873 [0-9]+\s*w(?:eeks?)?\s*
3874 )?
3875 (?:
3876 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3877 )?
3878 T)?
3879 (?:
3880 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3881 )?
3882 (?:
3883 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3884 )?
3885 (?:
3886 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3887 )?Z?$''', s)
3888 if m:
3889 days, hours, mins, secs, ms = m.groups()
3890 else:
3891 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3892 if m:
3893 hours, mins = m.groups()
3894 else:
3895 return None
3896
3897 duration = 0
3898 if secs:
3899 duration += float(secs)
3900 if mins:
3901 duration += float(mins) * 60
3902 if hours:
3903 duration += float(hours) * 60 * 60
3904 if days:
3905 duration += float(days) * 24 * 60 * 60
3906 if ms:
3907 duration += float(ms)
3908 return duration
3909
3910
3911 def prepend_extension(filename, ext, expected_real_ext=None):
3912 name, real_ext = os.path.splitext(filename)
3913 return (
3914 '{0}.{1}{2}'.format(name, ext, real_ext)
3915 if not expected_real_ext or real_ext[1:] == expected_real_ext
3916 else '{0}.{1}'.format(filename, ext))
3917
3918
3919 def replace_extension(filename, ext, expected_real_ext=None):
3920 name, real_ext = os.path.splitext(filename)
3921 return '{0}.{1}'.format(
3922 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3923 ext)
3924
3925
3926 def check_executable(exe, args=[]):
3927 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3928 args can be a list of arguments for a short output (like -version) """
3929 try:
3930 process_communicate_or_kill(subprocess.Popen(
3931 [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
3932 except OSError:
3933 return False
3934 return exe
3935
3936
3937 def get_exe_version(exe, args=['--version'],
3938 version_re=None, unrecognized='present'):
3939 """ Returns the version of the specified executable,
3940 or False if the executable is not present """
3941 try:
3942 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3943 # SIGTTOU if yt-dlp is run in the background.
3944 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3945 out, _ = process_communicate_or_kill(subprocess.Popen(
3946 [encodeArgument(exe)] + args,
3947 stdin=subprocess.PIPE,
3948 stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
3949 except OSError:
3950 return False
3951 if isinstance(out, bytes): # Python 2.x
3952 out = out.decode('ascii', 'ignore')
3953 return detect_exe_version(out, version_re, unrecognized)
3954
3955
3956 def detect_exe_version(output, version_re=None, unrecognized='present'):
3957 assert isinstance(output, compat_str)
3958 if version_re is None:
3959 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3960 m = re.search(version_re, output)
3961 if m:
3962 return m.group(1)
3963 else:
3964 return unrecognized
3965
3966
3967 class LazyList(collections.abc.Sequence):
3968 ''' Lazy immutable list from an iterable
3969 Note that slices of a LazyList are lists and not LazyList'''
3970
3971 def __init__(self, iterable):
3972 self.__iterable = iter(iterable)
3973 self.__cache = []
3974 self.__reversed = False
3975
3976 def __iter__(self):
3977 if self.__reversed:
3978 # We need to consume the entire iterable to iterate in reverse
3979 yield from self.exhaust()
3980 return
3981 yield from self.__cache
3982 for item in self.__iterable:
3983 self.__cache.append(item)
3984 yield item
3985
3986 def __exhaust(self):
3987 self.__cache.extend(self.__iterable)
3988 return self.__cache
3989
3990 def exhaust(self):
3991 ''' Evaluate the entire iterable '''
3992 return self.__exhaust()[::-1 if self.__reversed else 1]
3993
3994 @staticmethod
3995 def __reverse_index(x):
3996 return None if x is None else -(x + 1)
3997
3998 def __getitem__(self, idx):
3999 if isinstance(idx, slice):
4000 if self.__reversed:
4001 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
4002 start, stop, step = idx.start, idx.stop, idx.step or 1
4003 elif isinstance(idx, int):
4004 if self.__reversed:
4005 idx = self.__reverse_index(idx)
4006 start, stop, step = idx, idx, 0
4007 else:
4008 raise TypeError('indices must be integers or slices')
4009 if ((start or 0) < 0 or (stop or 0) < 0
4010 or (start is None and step < 0)
4011 or (stop is None and step > 0)):
4012 # We need to consume the entire iterable to be able to slice from the end
4013 # Obviously, never use this with infinite iterables
4014 return self.__exhaust()[idx]
4015
4016 n = max(start or 0, stop or 0) - len(self.__cache) + 1
4017 if n > 0:
4018 self.__cache.extend(itertools.islice(self.__iterable, n))
4019 return self.__cache[idx]
4020
4021 def __bool__(self):
4022 try:
4023 self[-1] if self.__reversed else self[0]
4024 except IndexError:
4025 return False
4026 return True
4027
4028 def __len__(self):
4029 self.exhaust()
4030 return len(self.__cache)
4031
4032 def reverse(self):
4033 self.__reversed = not self.__reversed
4034 return self
4035
4036 def __repr__(self):
4037 # repr and str should mimic a list. So we exhaust the iterable
4038 return repr(self.exhaust())
4039
4040 def __str__(self):
4041 return repr(self.exhaust())
4042
4043
4044 class PagedList:
4045 def __len__(self):
4046 # This is only useful for tests
4047 return len(self.getslice())
4048
4049 def __init__(self, pagefunc, pagesize, use_cache=True):
4050 self._pagefunc = pagefunc
4051 self._pagesize = pagesize
4052 self._use_cache = use_cache
4053 self._cache = {}
4054
4055 def getpage(self, pagenum):
4056 page_results = self._cache.get(pagenum) or list(self._pagefunc(pagenum))
4057 if self._use_cache:
4058 self._cache[pagenum] = page_results
4059 return page_results
4060
4061 def getslice(self, start=0, end=None):
4062 return list(self._getslice(start, end))
4063
4064 def _getslice(self, start, end):
4065 raise NotImplementedError('This method must be implemented by subclasses')
4066
4067 def __getitem__(self, idx):
4068 # NOTE: cache must be enabled if this is used
4069 if not isinstance(idx, int) or idx < 0:
4070 raise TypeError('indices must be non-negative integers')
4071 entries = self.getslice(idx, idx + 1)
4072 return entries[0] if entries else None
4073
4074
4075 class OnDemandPagedList(PagedList):
4076 def _getslice(self, start, end):
4077 for pagenum in itertools.count(start // self._pagesize):
4078 firstid = pagenum * self._pagesize
4079 nextfirstid = pagenum * self._pagesize + self._pagesize
4080 if start >= nextfirstid:
4081 continue
4082
4083 startv = (
4084 start % self._pagesize
4085 if firstid <= start < nextfirstid
4086 else 0)
4087 endv = (
4088 ((end - 1) % self._pagesize) + 1
4089 if (end is not None and firstid <= end <= nextfirstid)
4090 else None)
4091
4092 page_results = self.getpage(pagenum)
4093 if startv != 0 or endv is not None:
4094 page_results = page_results[startv:endv]
4095 yield from page_results
4096
4097 # A little optimization - if current page is not "full", ie. does
4098 # not contain page_size videos then we can assume that this page
4099 # is the last one - there are no more ids on further pages -
4100 # i.e. no need to query again.
4101 if len(page_results) + startv < self._pagesize:
4102 break
4103
4104 # If we got the whole page, but the next page is not interesting,
4105 # break out early as well
4106 if end == nextfirstid:
4107 break
4108
4109
4110 class InAdvancePagedList(PagedList):
4111 def __init__(self, pagefunc, pagecount, pagesize):
4112 self._pagecount = pagecount
4113 PagedList.__init__(self, pagefunc, pagesize, True)
4114
4115 def _getslice(self, start, end):
4116 start_page = start // self._pagesize
4117 end_page = (
4118 self._pagecount if end is None else (end // self._pagesize + 1))
4119 skip_elems = start - start_page * self._pagesize
4120 only_more = None if end is None else end - start
4121 for pagenum in range(start_page, end_page):
4122 page_results = self.getpage(pagenum)
4123 if skip_elems:
4124 page_results = page_results[skip_elems:]
4125 skip_elems = None
4126 if only_more is not None:
4127 if len(page_results) < only_more:
4128 only_more -= len(page_results)
4129 else:
4130 yield from page_results[:only_more]
4131 break
4132 yield from page_results
4133
4134
4135 def uppercase_escape(s):
4136 unicode_escape = codecs.getdecoder('unicode_escape')
4137 return re.sub(
4138 r'\\U[0-9a-fA-F]{8}',
4139 lambda m: unicode_escape(m.group(0))[0],
4140 s)
4141
4142
4143 def lowercase_escape(s):
4144 unicode_escape = codecs.getdecoder('unicode_escape')
4145 return re.sub(
4146 r'\\u[0-9a-fA-F]{4}',
4147 lambda m: unicode_escape(m.group(0))[0],
4148 s)
4149
4150
4151 def escape_rfc3986(s):
4152 """Escape non-ASCII characters as suggested by RFC 3986"""
4153 if sys.version_info < (3, 0) and isinstance(s, compat_str):
4154 s = s.encode('utf-8')
4155 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
4156
4157
4158 def escape_url(url):
4159 """Escape URL as suggested by RFC 3986"""
4160 url_parsed = compat_urllib_parse_urlparse(url)
4161 return url_parsed._replace(
4162 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
4163 path=escape_rfc3986(url_parsed.path),
4164 params=escape_rfc3986(url_parsed.params),
4165 query=escape_rfc3986(url_parsed.query),
4166 fragment=escape_rfc3986(url_parsed.fragment)
4167 ).geturl()
4168
4169
4170 def read_batch_urls(batch_fd):
4171 def fixup(url):
4172 if not isinstance(url, compat_str):
4173 url = url.decode('utf-8', 'replace')
4174 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4175 for bom in BOM_UTF8:
4176 if url.startswith(bom):
4177 url = url[len(bom):]
4178 url = url.lstrip()
4179 if not url or url.startswith(('#', ';', ']')):
4180 return False
4181 # "#" cannot be stripped out since it is part of the URI
4182 # However, it can be safely stipped out if follwing a whitespace
4183 return re.split(r'\s#', url, 1)[0].rstrip()
4184
4185 with contextlib.closing(batch_fd) as fd:
4186 return [url for url in map(fixup, fd) if url]
4187
4188
4189 def urlencode_postdata(*args, **kargs):
4190 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
4191
4192
4193 def update_url_query(url, query):
4194 if not query:
4195 return url
4196 parsed_url = compat_urlparse.urlparse(url)
4197 qs = compat_parse_qs(parsed_url.query)
4198 qs.update(query)
4199 return compat_urlparse.urlunparse(parsed_url._replace(
4200 query=compat_urllib_parse_urlencode(qs, True)))
4201
4202
4203 def update_Request(req, url=None, data=None, headers={}, query={}):
4204 req_headers = req.headers.copy()
4205 req_headers.update(headers)
4206 req_data = data or req.data
4207 req_url = update_url_query(url or req.get_full_url(), query)
4208 req_get_method = req.get_method()
4209 if req_get_method == 'HEAD':
4210 req_type = HEADRequest
4211 elif req_get_method == 'PUT':
4212 req_type = PUTRequest
4213 else:
4214 req_type = compat_urllib_request.Request
4215 new_req = req_type(
4216 req_url, data=req_data, headers=req_headers,
4217 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4218 if hasattr(req, 'timeout'):
4219 new_req.timeout = req.timeout
4220 return new_req
4221
4222
4223 def _multipart_encode_impl(data, boundary):
4224 content_type = 'multipart/form-data; boundary=%s' % boundary
4225
4226 out = b''
4227 for k, v in data.items():
4228 out += b'--' + boundary.encode('ascii') + b'\r\n'
4229 if isinstance(k, compat_str):
4230 k = k.encode('utf-8')
4231 if isinstance(v, compat_str):
4232 v = v.encode('utf-8')
4233 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4234 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4235 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
4236 if boundary.encode('ascii') in content:
4237 raise ValueError('Boundary overlaps with data')
4238 out += content
4239
4240 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4241
4242 return out, content_type
4243
4244
4245 def multipart_encode(data, boundary=None):
4246 '''
4247 Encode a dict to RFC 7578-compliant form-data
4248
4249 data:
4250 A dict where keys and values can be either Unicode or bytes-like
4251 objects.
4252 boundary:
4253 If specified a Unicode object, it's used as the boundary. Otherwise
4254 a random boundary is generated.
4255
4256 Reference: https://tools.ietf.org/html/rfc7578
4257 '''
4258 has_specified_boundary = boundary is not None
4259
4260 while True:
4261 if boundary is None:
4262 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4263
4264 try:
4265 out, content_type = _multipart_encode_impl(data, boundary)
4266 break
4267 except ValueError:
4268 if has_specified_boundary:
4269 raise
4270 boundary = None
4271
4272 return out, content_type
4273
4274
4275 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4276 if isinstance(key_or_keys, (list, tuple)):
4277 for key in key_or_keys:
4278 if key not in d or d[key] is None or skip_false_values and not d[key]:
4279 continue
4280 return d[key]
4281 return default
4282 return d.get(key_or_keys, default)
4283
4284
4285 def try_get(src, getter, expected_type=None):
4286 for get in variadic(getter):
4287 try:
4288 v = get(src)
4289 except (AttributeError, KeyError, TypeError, IndexError):
4290 pass
4291 else:
4292 if expected_type is None or isinstance(v, expected_type):
4293 return v
4294
4295
4296 def merge_dicts(*dicts):
4297 merged = {}
4298 for a_dict in dicts:
4299 for k, v in a_dict.items():
4300 if v is None:
4301 continue
4302 if (k not in merged
4303 or (isinstance(v, compat_str) and v
4304 and isinstance(merged[k], compat_str)
4305 and not merged[k])):
4306 merged[k] = v
4307 return merged
4308
4309
4310 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4311 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4312
4313
4314 US_RATINGS = {
4315 'G': 0,
4316 'PG': 10,
4317 'PG-13': 13,
4318 'R': 16,
4319 'NC': 18,
4320 }
4321
4322
4323 TV_PARENTAL_GUIDELINES = {
4324 'TV-Y': 0,
4325 'TV-Y7': 7,
4326 'TV-G': 0,
4327 'TV-PG': 0,
4328 'TV-14': 14,
4329 'TV-MA': 17,
4330 }
4331
4332
4333 def parse_age_limit(s):
4334 if type(s) == int:
4335 return s if 0 <= s <= 21 else None
4336 if not isinstance(s, compat_basestring):
4337 return None
4338 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4339 if m:
4340 return int(m.group('age'))
4341 s = s.upper()
4342 if s in US_RATINGS:
4343 return US_RATINGS[s]
4344 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4345 if m:
4346 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4347 return None
4348
4349
4350 def strip_jsonp(code):
4351 return re.sub(
4352 r'''(?sx)^
4353 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4354 (?:\s*&&\s*(?P=func_name))?
4355 \s*\(\s*(?P<callback_data>.*)\);?
4356 \s*?(?://[^\n]*)*$''',
4357 r'\g<callback_data>', code)
4358
4359
4360 def js_to_json(code, vars={}):
4361 # vars is a dict of var, val pairs to substitute
4362 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4363 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4364 INTEGER_TABLE = (
4365 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4366 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4367 )
4368
4369 def fix_kv(m):
4370 v = m.group(0)
4371 if v in ('true', 'false', 'null'):
4372 return v
4373 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4374 return ""
4375
4376 if v[0] in ("'", '"'):
4377 v = re.sub(r'(?s)\\.|"', lambda m: {
4378 '"': '\\"',
4379 "\\'": "'",
4380 '\\\n': '',
4381 '\\x': '\\u00',
4382 }.get(m.group(0), m.group(0)), v[1:-1])
4383 else:
4384 for regex, base in INTEGER_TABLE:
4385 im = re.match(regex, v)
4386 if im:
4387 i = int(im.group(1), base)
4388 return '"%d":' % i if v.endswith(':') else '%d' % i
4389
4390 if v in vars:
4391 return vars[v]
4392
4393 return '"%s"' % v
4394
4395 return re.sub(r'''(?sx)
4396 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4397 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4398 {comment}|,(?={skip}[\]}}])|
4399 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4400 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4401 [0-9]+(?={skip}:)|
4402 !+
4403 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4404
4405
4406 def qualities(quality_ids):
4407 """ Get a numeric quality value out of a list of possible values """
4408 def q(qid):
4409 try:
4410 return quality_ids.index(qid)
4411 except ValueError:
4412 return -1
4413 return q
4414
4415
4416 DEFAULT_OUTTMPL = {
4417 'default': '%(title)s [%(id)s].%(ext)s',
4418 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
4419 }
4420 OUTTMPL_TYPES = {
4421 'chapter': None,
4422 'subtitle': None,
4423 'thumbnail': None,
4424 'description': 'description',
4425 'annotation': 'annotations.xml',
4426 'infojson': 'info.json',
4427 'pl_thumbnail': None,
4428 'pl_description': 'description',
4429 'pl_infojson': 'info.json',
4430 }
4431
4432 # As of [1] format syntax is:
4433 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4434 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
4435 STR_FORMAT_RE_TMPL = r'''(?x)
4436 (?<!%)(?P<prefix>(?:%%)*)
4437 %
4438 (?P<has_key>\((?P<key>{0})\))? # mapping key
4439 (?P<format>
4440 (?:[#0\-+ ]+)? # conversion flags (optional)
4441 (?:\d+)? # minimum field width (optional)
4442 (?:\.\d+)? # precision (optional)
4443 [hlL]? # length modifier (optional)
4444 {1} # conversion type
4445 )
4446 '''
4447
4448
4449 STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
4450
4451
4452 def limit_length(s, length):
4453 """ Add ellipses to overly long strings """
4454 if s is None:
4455 return None
4456 ELLIPSES = '...'
4457 if len(s) > length:
4458 return s[:length - len(ELLIPSES)] + ELLIPSES
4459 return s
4460
4461
4462 def version_tuple(v):
4463 return tuple(int(e) for e in re.split(r'[-.]', v))
4464
4465
4466 def is_outdated_version(version, limit, assume_new=True):
4467 if not version:
4468 return not assume_new
4469 try:
4470 return version_tuple(version) < version_tuple(limit)
4471 except ValueError:
4472 return not assume_new
4473
4474
4475 def ytdl_is_updateable():
4476 """ Returns if yt-dlp can be updated with -U """
4477 return False
4478
4479 from zipimport import zipimporter
4480
4481 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4482
4483
4484 def args_to_str(args):
4485 # Get a short string representation for a subprocess command
4486 return ' '.join(compat_shlex_quote(a) for a in args)
4487
4488
4489 def error_to_compat_str(err):
4490 err_str = str(err)
4491 # On python 2 error byte string must be decoded with proper
4492 # encoding rather than ascii
4493 if sys.version_info[0] < 3:
4494 err_str = err_str.decode(preferredencoding())
4495 return err_str
4496
4497
4498 def mimetype2ext(mt):
4499 if mt is None:
4500 return None
4501
4502 ext = {
4503 'audio/mp4': 'm4a',
4504 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4505 # it's the most popular one
4506 'audio/mpeg': 'mp3',
4507 'audio/x-wav': 'wav',
4508 }.get(mt)
4509 if ext is not None:
4510 return ext
4511
4512 _, _, res = mt.rpartition('/')
4513 res = res.split(';')[0].strip().lower()
4514
4515 return {
4516 '3gpp': '3gp',
4517 'smptett+xml': 'tt',
4518 'ttaf+xml': 'dfxp',
4519 'ttml+xml': 'ttml',
4520 'x-flv': 'flv',
4521 'x-mp4-fragmented': 'mp4',
4522 'x-ms-sami': 'sami',
4523 'x-ms-wmv': 'wmv',
4524 'mpegurl': 'm3u8',
4525 'x-mpegurl': 'm3u8',
4526 'vnd.apple.mpegurl': 'm3u8',
4527 'dash+xml': 'mpd',
4528 'f4m+xml': 'f4m',
4529 'hds+xml': 'f4m',
4530 'vnd.ms-sstr+xml': 'ism',
4531 'quicktime': 'mov',
4532 'mp2t': 'ts',
4533 'x-wav': 'wav',
4534 }.get(res, res)
4535
4536
4537 def parse_codecs(codecs_str):
4538 # http://tools.ietf.org/html/rfc6381
4539 if not codecs_str:
4540 return {}
4541 split_codecs = list(filter(None, map(
4542 str.strip, codecs_str.strip().strip(',').split(','))))
4543 vcodec, acodec = None, None
4544 for full_codec in split_codecs:
4545 codec = full_codec.split('.')[0]
4546 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4547 if not vcodec:
4548 vcodec = full_codec
4549 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4550 if not acodec:
4551 acodec = full_codec
4552 else:
4553 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4554 if not vcodec and not acodec:
4555 if len(split_codecs) == 2:
4556 return {
4557 'vcodec': split_codecs[0],
4558 'acodec': split_codecs[1],
4559 }
4560 else:
4561 return {
4562 'vcodec': vcodec or 'none',
4563 'acodec': acodec or 'none',
4564 }
4565 return {}
4566
4567
4568 def urlhandle_detect_ext(url_handle):
4569 getheader = url_handle.headers.get
4570
4571 cd = getheader('Content-Disposition')
4572 if cd:
4573 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4574 if m:
4575 e = determine_ext(m.group('filename'), default_ext=None)
4576 if e:
4577 return e
4578
4579 return mimetype2ext(getheader('Content-Type'))
4580
4581
4582 def encode_data_uri(data, mime_type):
4583 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4584
4585
4586 def age_restricted(content_limit, age_limit):
4587 """ Returns True iff the content should be blocked """
4588
4589 if age_limit is None: # No limit set
4590 return False
4591 if content_limit is None:
4592 return False # Content available for everyone
4593 return age_limit < content_limit
4594
4595
4596 def is_html(first_bytes):
4597 """ Detect whether a file contains HTML by examining its first bytes. """
4598
4599 BOMS = [
4600 (b'\xef\xbb\xbf', 'utf-8'),
4601 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4602 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4603 (b'\xff\xfe', 'utf-16-le'),
4604 (b'\xfe\xff', 'utf-16-be'),
4605 ]
4606 for bom, enc in BOMS:
4607 if first_bytes.startswith(bom):
4608 s = first_bytes[len(bom):].decode(enc, 'replace')
4609 break
4610 else:
4611 s = first_bytes.decode('utf-8', 'replace')
4612
4613 return re.match(r'^\s*<', s)
4614
4615
4616 def determine_protocol(info_dict):
4617 protocol = info_dict.get('protocol')
4618 if protocol is not None:
4619 return protocol
4620
4621 url = info_dict['url']
4622 if url.startswith('rtmp'):
4623 return 'rtmp'
4624 elif url.startswith('mms'):
4625 return 'mms'
4626 elif url.startswith('rtsp'):
4627 return 'rtsp'
4628
4629 ext = determine_ext(url)
4630 if ext == 'm3u8':
4631 return 'm3u8'
4632 elif ext == 'f4m':
4633 return 'f4m'
4634
4635 return compat_urllib_parse_urlparse(url).scheme
4636
4637
4638 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4639 """ Render a list of rows, each as a list of values """
4640
4641 def get_max_lens(table):
4642 return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4643
4644 def filter_using_list(row, filterArray):
4645 return [col for (take, col) in zip(filterArray, row) if take]
4646
4647 if hideEmpty:
4648 max_lens = get_max_lens(data)
4649 header_row = filter_using_list(header_row, max_lens)
4650 data = [filter_using_list(row, max_lens) for row in data]
4651
4652 table = [header_row] + data
4653 max_lens = get_max_lens(table)
4654 if delim:
4655 table = [header_row] + [['-' * ml for ml in max_lens]] + data
4656 format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
4657 return '\n'.join(format_str % tuple(row) for row in table)
4658
4659
4660 def _match_one(filter_part, dct):
4661 # TODO: Generalize code with YoutubeDL._build_format_filter
4662 STRING_OPERATORS = {
4663 '*=': operator.contains,
4664 '^=': lambda attr, value: attr.startswith(value),
4665 '$=': lambda attr, value: attr.endswith(value),
4666 '~=': lambda attr, value: re.search(value, attr),
4667 }
4668 COMPARISON_OPERATORS = {
4669 **STRING_OPERATORS,
4670 '<=': operator.le, # "<=" must be defined above "<"
4671 '<': operator.lt,
4672 '>=': operator.ge,
4673 '>': operator.gt,
4674 '=': operator.eq,
4675 }
4676
4677 operator_rex = re.compile(r'''(?x)\s*
4678 (?P<key>[a-z_]+)
4679 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4680 (?:
4681 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4682 (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
4683 (?P<strval>.+?)
4684 )
4685 \s*$
4686 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4687 m = operator_rex.search(filter_part)
4688 if m:
4689 unnegated_op = COMPARISON_OPERATORS[m.group('op')]
4690 if m.group('negation'):
4691 op = lambda attr, value: not unnegated_op(attr, value)
4692 else:
4693 op = unnegated_op
4694 actual_value = dct.get(m.group('key'))
4695 if (m.group('quotedstrval') is not None
4696 or m.group('strval') is not None
4697 # If the original field is a string and matching comparisonvalue is
4698 # a number we should respect the origin of the original field
4699 # and process comparison value as a string (see
4700 # https://github.com/ytdl-org/youtube-dl/issues/11082).
4701 or actual_value is not None and m.group('intval') is not None
4702 and isinstance(actual_value, compat_str)):
4703 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4704 quote = m.group('quote')
4705 if quote is not None:
4706 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4707 else:
4708 if m.group('op') in STRING_OPERATORS:
4709 raise ValueError('Operator %s only supports string values!' % m.group('op'))
4710 try:
4711 comparison_value = int(m.group('intval'))
4712 except ValueError:
4713 comparison_value = parse_filesize(m.group('intval'))
4714 if comparison_value is None:
4715 comparison_value = parse_filesize(m.group('intval') + 'B')
4716 if comparison_value is None:
4717 raise ValueError(
4718 'Invalid integer value %r in filter part %r' % (
4719 m.group('intval'), filter_part))
4720 if actual_value is None:
4721 return m.group('none_inclusive')
4722 return op(actual_value, comparison_value)
4723
4724 UNARY_OPERATORS = {
4725 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4726 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4727 }
4728 operator_rex = re.compile(r'''(?x)\s*
4729 (?P<op>%s)\s*(?P<key>[a-z_]+)
4730 \s*$
4731 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4732 m = operator_rex.search(filter_part)
4733 if m:
4734 op = UNARY_OPERATORS[m.group('op')]
4735 actual_value = dct.get(m.group('key'))
4736 return op(actual_value)
4737
4738 raise ValueError('Invalid filter part %r' % filter_part)
4739
4740
4741 def match_str(filter_str, dct):
4742 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4743
4744 return all(
4745 _match_one(filter_part.replace(r'\&', '&'), dct)
4746 for filter_part in re.split(r'(?<!\\)&', filter_str))
4747
4748
4749 def match_filter_func(filter_str):
4750 def _match_func(info_dict):
4751 if match_str(filter_str, info_dict):
4752 return None
4753 else:
4754 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4755 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4756 return _match_func
4757
4758
4759 def parse_dfxp_time_expr(time_expr):
4760 if not time_expr:
4761 return
4762
4763 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4764 if mobj:
4765 return float(mobj.group('time_offset'))
4766
4767 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4768 if mobj:
4769 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4770
4771
4772 def srt_subtitles_timecode(seconds):
4773 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4774
4775
4776 def dfxp2srt(dfxp_data):
4777 '''
4778 @param dfxp_data A bytes-like object containing DFXP data
4779 @returns A unicode object containing converted SRT data
4780 '''
4781 LEGACY_NAMESPACES = (
4782 (b'http://www.w3.org/ns/ttml', [
4783 b'http://www.w3.org/2004/11/ttaf1',
4784 b'http://www.w3.org/2006/04/ttaf1',
4785 b'http://www.w3.org/2006/10/ttaf1',
4786 ]),
4787 (b'http://www.w3.org/ns/ttml#styling', [
4788 b'http://www.w3.org/ns/ttml#style',
4789 ]),
4790 )
4791
4792 SUPPORTED_STYLING = [
4793 'color',
4794 'fontFamily',
4795 'fontSize',
4796 'fontStyle',
4797 'fontWeight',
4798 'textDecoration'
4799 ]
4800
4801 _x = functools.partial(xpath_with_ns, ns_map={
4802 'xml': 'http://www.w3.org/XML/1998/namespace',
4803 'ttml': 'http://www.w3.org/ns/ttml',
4804 'tts': 'http://www.w3.org/ns/ttml#styling',
4805 })
4806
4807 styles = {}
4808 default_style = {}
4809
4810 class TTMLPElementParser(object):
4811 _out = ''
4812 _unclosed_elements = []
4813 _applied_styles = []
4814
4815 def start(self, tag, attrib):
4816 if tag in (_x('ttml:br'), 'br'):
4817 self._out += '\n'
4818 else:
4819 unclosed_elements = []
4820 style = {}
4821 element_style_id = attrib.get('style')
4822 if default_style:
4823 style.update(default_style)
4824 if element_style_id:
4825 style.update(styles.get(element_style_id, {}))
4826 for prop in SUPPORTED_STYLING:
4827 prop_val = attrib.get(_x('tts:' + prop))
4828 if prop_val:
4829 style[prop] = prop_val
4830 if style:
4831 font = ''
4832 for k, v in sorted(style.items()):
4833 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4834 continue
4835 if k == 'color':
4836 font += ' color="%s"' % v
4837 elif k == 'fontSize':
4838 font += ' size="%s"' % v
4839 elif k == 'fontFamily':
4840 font += ' face="%s"' % v
4841 elif k == 'fontWeight' and v == 'bold':
4842 self._out += '<b>'
4843 unclosed_elements.append('b')
4844 elif k == 'fontStyle' and v == 'italic':
4845 self._out += '<i>'
4846 unclosed_elements.append('i')
4847 elif k == 'textDecoration' and v == 'underline':
4848 self._out += '<u>'
4849 unclosed_elements.append('u')
4850 if font:
4851 self._out += '<font' + font + '>'
4852 unclosed_elements.append('font')
4853 applied_style = {}
4854 if self._applied_styles:
4855 applied_style.update(self._applied_styles[-1])
4856 applied_style.update(style)
4857 self._applied_styles.append(applied_style)
4858 self._unclosed_elements.append(unclosed_elements)
4859
4860 def end(self, tag):
4861 if tag not in (_x('ttml:br'), 'br'):
4862 unclosed_elements = self._unclosed_elements.pop()
4863 for element in reversed(unclosed_elements):
4864 self._out += '</%s>' % element
4865 if unclosed_elements and self._applied_styles:
4866 self._applied_styles.pop()
4867
4868 def data(self, data):
4869 self._out += data
4870
4871 def close(self):
4872 return self._out.strip()
4873
4874 def parse_node(node):
4875 target = TTMLPElementParser()
4876 parser = xml.etree.ElementTree.XMLParser(target=target)
4877 parser.feed(xml.etree.ElementTree.tostring(node))
4878 return parser.close()
4879
4880 for k, v in LEGACY_NAMESPACES:
4881 for ns in v:
4882 dfxp_data = dfxp_data.replace(ns, k)
4883
4884 dfxp = compat_etree_fromstring(dfxp_data)
4885 out = []
4886 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4887
4888 if not paras:
4889 raise ValueError('Invalid dfxp/TTML subtitle')
4890
4891 repeat = False
4892 while True:
4893 for style in dfxp.findall(_x('.//ttml:style')):
4894 style_id = style.get('id') or style.get(_x('xml:id'))
4895 if not style_id:
4896 continue
4897 parent_style_id = style.get('style')
4898 if parent_style_id:
4899 if parent_style_id not in styles:
4900 repeat = True
4901 continue
4902 styles[style_id] = styles[parent_style_id].copy()
4903 for prop in SUPPORTED_STYLING:
4904 prop_val = style.get(_x('tts:' + prop))
4905 if prop_val:
4906 styles.setdefault(style_id, {})[prop] = prop_val
4907 if repeat:
4908 repeat = False
4909 else:
4910 break
4911
4912 for p in ('body', 'div'):
4913 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4914 if ele is None:
4915 continue
4916 style = styles.get(ele.get('style'))
4917 if not style:
4918 continue
4919 default_style.update(style)
4920
4921 for para, index in zip(paras, itertools.count(1)):
4922 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4923 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4924 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4925 if begin_time is None:
4926 continue
4927 if not end_time:
4928 if not dur:
4929 continue
4930 end_time = begin_time + dur
4931 out.append('%d\n%s --> %s\n%s\n\n' % (
4932 index,
4933 srt_subtitles_timecode(begin_time),
4934 srt_subtitles_timecode(end_time),
4935 parse_node(para)))
4936
4937 return ''.join(out)
4938
4939
4940 def cli_option(params, command_option, param):
4941 param = params.get(param)
4942 if param:
4943 param = compat_str(param)
4944 return [command_option, param] if param is not None else []
4945
4946
4947 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4948 param = params.get(param)
4949 if param is None:
4950 return []
4951 assert isinstance(param, bool)
4952 if separator:
4953 return [command_option + separator + (true_value if param else false_value)]
4954 return [command_option, true_value if param else false_value]
4955
4956
4957 def cli_valueless_option(params, command_option, param, expected_value=True):
4958 param = params.get(param)
4959 return [command_option] if param == expected_value else []
4960
4961
4962 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
4963 if isinstance(argdict, (list, tuple)): # for backward compatibility
4964 if use_compat:
4965 return argdict
4966 else:
4967 argdict = None
4968 if argdict is None:
4969 return default
4970 assert isinstance(argdict, dict)
4971
4972 assert isinstance(keys, (list, tuple))
4973 for key_list in keys:
4974 arg_list = list(filter(
4975 lambda x: x is not None,
4976 [argdict.get(key.lower()) for key in variadic(key_list)]))
4977 if arg_list:
4978 return [arg for args in arg_list for arg in args]
4979 return default
4980
4981
4982 class ISO639Utils(object):
4983 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4984 _lang_map = {
4985 'aa': 'aar',
4986 'ab': 'abk',
4987 'ae': 'ave',
4988 'af': 'afr',
4989 'ak': 'aka',
4990 'am': 'amh',
4991 'an': 'arg',
4992 'ar': 'ara',
4993 'as': 'asm',
4994 'av': 'ava',
4995 'ay': 'aym',
4996 'az': 'aze',
4997 'ba': 'bak',
4998 'be': 'bel',
4999 'bg': 'bul',
5000 'bh': 'bih',
5001 'bi': 'bis',
5002 'bm': 'bam',
5003 'bn': 'ben',
5004 'bo': 'bod',
5005 'br': 'bre',
5006 'bs': 'bos',
5007 'ca': 'cat',
5008 'ce': 'che',
5009 'ch': 'cha',
5010 'co': 'cos',
5011 'cr': 'cre',
5012 'cs': 'ces',
5013 'cu': 'chu',
5014 'cv': 'chv',
5015 'cy': 'cym',
5016 'da': 'dan',
5017 'de': 'deu',
5018 'dv': 'div',
5019 'dz': 'dzo',
5020 'ee': 'ewe',
5021 'el': 'ell',
5022 'en': 'eng',
5023 'eo': 'epo',
5024 'es': 'spa',
5025 'et': 'est',
5026 'eu': 'eus',
5027 'fa': 'fas',
5028 'ff': 'ful',
5029 'fi': 'fin',
5030 'fj': 'fij',
5031 'fo': 'fao',
5032 'fr': 'fra',
5033 'fy': 'fry',
5034 'ga': 'gle',
5035 'gd': 'gla',
5036 'gl': 'glg',
5037 'gn': 'grn',
5038 'gu': 'guj',
5039 'gv': 'glv',
5040 'ha': 'hau',
5041 'he': 'heb',
5042 'iw': 'heb', # Replaced by he in 1989 revision
5043 'hi': 'hin',
5044 'ho': 'hmo',
5045 'hr': 'hrv',
5046 'ht': 'hat',
5047 'hu': 'hun',
5048 'hy': 'hye',
5049 'hz': 'her',
5050 'ia': 'ina',
5051 'id': 'ind',
5052 'in': 'ind', # Replaced by id in 1989 revision
5053 'ie': 'ile',
5054 'ig': 'ibo',
5055 'ii': 'iii',
5056 'ik': 'ipk',
5057 'io': 'ido',
5058 'is': 'isl',
5059 'it': 'ita',
5060 'iu': 'iku',
5061 'ja': 'jpn',
5062 'jv': 'jav',
5063 'ka': 'kat',
5064 'kg': 'kon',
5065 'ki': 'kik',
5066 'kj': 'kua',
5067 'kk': 'kaz',
5068 'kl': 'kal',
5069 'km': 'khm',
5070 'kn': 'kan',
5071 'ko': 'kor',
5072 'kr': 'kau',
5073 'ks': 'kas',
5074 'ku': 'kur',
5075 'kv': 'kom',
5076 'kw': 'cor',
5077 'ky': 'kir',
5078 'la': 'lat',
5079 'lb': 'ltz',
5080 'lg': 'lug',
5081 'li': 'lim',
5082 'ln': 'lin',
5083 'lo': 'lao',
5084 'lt': 'lit',
5085 'lu': 'lub',
5086 'lv': 'lav',
5087 'mg': 'mlg',
5088 'mh': 'mah',
5089 'mi': 'mri',
5090 'mk': 'mkd',
5091 'ml': 'mal',
5092 'mn': 'mon',
5093 'mr': 'mar',
5094 'ms': 'msa',
5095 'mt': 'mlt',
5096 'my': 'mya',
5097 'na': 'nau',
5098 'nb': 'nob',
5099 'nd': 'nde',
5100 'ne': 'nep',
5101 'ng': 'ndo',
5102 'nl': 'nld',
5103 'nn': 'nno',
5104 'no': 'nor',
5105 'nr': 'nbl',
5106 'nv': 'nav',
5107 'ny': 'nya',
5108 'oc': 'oci',
5109 'oj': 'oji',
5110 'om': 'orm',
5111 'or': 'ori',
5112 'os': 'oss',
5113 'pa': 'pan',
5114 'pi': 'pli',
5115 'pl': 'pol',
5116 'ps': 'pus',
5117 'pt': 'por',
5118 'qu': 'que',
5119 'rm': 'roh',
5120 'rn': 'run',
5121 'ro': 'ron',
5122 'ru': 'rus',
5123 'rw': 'kin',
5124 'sa': 'san',
5125 'sc': 'srd',
5126 'sd': 'snd',
5127 'se': 'sme',
5128 'sg': 'sag',
5129 'si': 'sin',
5130 'sk': 'slk',
5131 'sl': 'slv',
5132 'sm': 'smo',
5133 'sn': 'sna',
5134 'so': 'som',
5135 'sq': 'sqi',
5136 'sr': 'srp',
5137 'ss': 'ssw',
5138 'st': 'sot',
5139 'su': 'sun',
5140 'sv': 'swe',
5141 'sw': 'swa',
5142 'ta': 'tam',
5143 'te': 'tel',
5144 'tg': 'tgk',
5145 'th': 'tha',
5146 'ti': 'tir',
5147 'tk': 'tuk',
5148 'tl': 'tgl',
5149 'tn': 'tsn',
5150 'to': 'ton',
5151 'tr': 'tur',
5152 'ts': 'tso',
5153 'tt': 'tat',
5154 'tw': 'twi',
5155 'ty': 'tah',
5156 'ug': 'uig',
5157 'uk': 'ukr',
5158 'ur': 'urd',
5159 'uz': 'uzb',
5160 've': 'ven',
5161 'vi': 'vie',
5162 'vo': 'vol',
5163 'wa': 'wln',
5164 'wo': 'wol',
5165 'xh': 'xho',
5166 'yi': 'yid',
5167 'ji': 'yid', # Replaced by yi in 1989 revision
5168 'yo': 'yor',
5169 'za': 'zha',
5170 'zh': 'zho',
5171 'zu': 'zul',
5172 }
5173
5174 @classmethod
5175 def short2long(cls, code):
5176 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5177 return cls._lang_map.get(code[:2])
5178
5179 @classmethod
5180 def long2short(cls, code):
5181 """Convert language code from ISO 639-2/T to ISO 639-1"""
5182 for short_name, long_name in cls._lang_map.items():
5183 if long_name == code:
5184 return short_name
5185
5186
5187 class ISO3166Utils(object):
5188 # From http://data.okfn.org/data/core/country-list
5189 _country_map = {
5190 'AF': 'Afghanistan',
5191 'AX': 'Åland Islands',
5192 'AL': 'Albania',
5193 'DZ': 'Algeria',
5194 'AS': 'American Samoa',
5195 'AD': 'Andorra',
5196 'AO': 'Angola',
5197 'AI': 'Anguilla',
5198 'AQ': 'Antarctica',
5199 'AG': 'Antigua and Barbuda',
5200 'AR': 'Argentina',
5201 'AM': 'Armenia',
5202 'AW': 'Aruba',
5203 'AU': 'Australia',
5204 'AT': 'Austria',
5205 'AZ': 'Azerbaijan',
5206 'BS': 'Bahamas',
5207 'BH': 'Bahrain',
5208 'BD': 'Bangladesh',
5209 'BB': 'Barbados',
5210 'BY': 'Belarus',
5211 'BE': 'Belgium',
5212 'BZ': 'Belize',
5213 'BJ': 'Benin',
5214 'BM': 'Bermuda',
5215 'BT': 'Bhutan',
5216 'BO': 'Bolivia, Plurinational State of',
5217 'BQ': 'Bonaire, Sint Eustatius and Saba',
5218 'BA': 'Bosnia and Herzegovina',
5219 'BW': 'Botswana',
5220 'BV': 'Bouvet Island',
5221 'BR': 'Brazil',
5222 'IO': 'British Indian Ocean Territory',
5223 'BN': 'Brunei Darussalam',
5224 'BG': 'Bulgaria',
5225 'BF': 'Burkina Faso',
5226 'BI': 'Burundi',
5227 'KH': 'Cambodia',
5228 'CM': 'Cameroon',
5229 'CA': 'Canada',
5230 'CV': 'Cape Verde',
5231 'KY': 'Cayman Islands',
5232 'CF': 'Central African Republic',
5233 'TD': 'Chad',
5234 'CL': 'Chile',
5235 'CN': 'China',
5236 'CX': 'Christmas Island',
5237 'CC': 'Cocos (Keeling) Islands',
5238 'CO': 'Colombia',
5239 'KM': 'Comoros',
5240 'CG': 'Congo',
5241 'CD': 'Congo, the Democratic Republic of the',
5242 'CK': 'Cook Islands',
5243 'CR': 'Costa Rica',
5244 'CI': 'Côte d\'Ivoire',
5245 'HR': 'Croatia',
5246 'CU': 'Cuba',
5247 'CW': 'Curaçao',
5248 'CY': 'Cyprus',
5249 'CZ': 'Czech Republic',
5250 'DK': 'Denmark',
5251 'DJ': 'Djibouti',
5252 'DM': 'Dominica',
5253 'DO': 'Dominican Republic',
5254 'EC': 'Ecuador',
5255 'EG': 'Egypt',
5256 'SV': 'El Salvador',
5257 'GQ': 'Equatorial Guinea',
5258 'ER': 'Eritrea',
5259 'EE': 'Estonia',
5260 'ET': 'Ethiopia',
5261 'FK': 'Falkland Islands (Malvinas)',
5262 'FO': 'Faroe Islands',
5263 'FJ': 'Fiji',
5264 'FI': 'Finland',
5265 'FR': 'France',
5266 'GF': 'French Guiana',
5267 'PF': 'French Polynesia',
5268 'TF': 'French Southern Territories',
5269 'GA': 'Gabon',
5270 'GM': 'Gambia',
5271 'GE': 'Georgia',
5272 'DE': 'Germany',
5273 'GH': 'Ghana',
5274 'GI': 'Gibraltar',
5275 'GR': 'Greece',
5276 'GL': 'Greenland',
5277 'GD': 'Grenada',
5278 'GP': 'Guadeloupe',
5279 'GU': 'Guam',
5280 'GT': 'Guatemala',
5281 'GG': 'Guernsey',
5282 'GN': 'Guinea',
5283 'GW': 'Guinea-Bissau',
5284 'GY': 'Guyana',
5285 'HT': 'Haiti',
5286 'HM': 'Heard Island and McDonald Islands',
5287 'VA': 'Holy See (Vatican City State)',
5288 'HN': 'Honduras',
5289 'HK': 'Hong Kong',
5290 'HU': 'Hungary',
5291 'IS': 'Iceland',
5292 'IN': 'India',
5293 'ID': 'Indonesia',
5294 'IR': 'Iran, Islamic Republic of',
5295 'IQ': 'Iraq',
5296 'IE': 'Ireland',
5297 'IM': 'Isle of Man',
5298 'IL': 'Israel',
5299 'IT': 'Italy',
5300 'JM': 'Jamaica',
5301 'JP': 'Japan',
5302 'JE': 'Jersey',
5303 'JO': 'Jordan',
5304 'KZ': 'Kazakhstan',
5305 'KE': 'Kenya',
5306 'KI': 'Kiribati',
5307 'KP': 'Korea, Democratic People\'s Republic of',
5308 'KR': 'Korea, Republic of',
5309 'KW': 'Kuwait',
5310 'KG': 'Kyrgyzstan',
5311 'LA': 'Lao People\'s Democratic Republic',
5312 'LV': 'Latvia',
5313 'LB': 'Lebanon',
5314 'LS': 'Lesotho',
5315 'LR': 'Liberia',
5316 'LY': 'Libya',
5317 'LI': 'Liechtenstein',
5318 'LT': 'Lithuania',
5319 'LU': 'Luxembourg',
5320 'MO': 'Macao',
5321 'MK': 'Macedonia, the Former Yugoslav Republic of',
5322 'MG': 'Madagascar',
5323 'MW': 'Malawi',
5324 'MY': 'Malaysia',
5325 'MV': 'Maldives',
5326 'ML': 'Mali',
5327 'MT': 'Malta',
5328 'MH': 'Marshall Islands',
5329 'MQ': 'Martinique',
5330 'MR': 'Mauritania',
5331 'MU': 'Mauritius',
5332 'YT': 'Mayotte',
5333 'MX': 'Mexico',
5334 'FM': 'Micronesia, Federated States of',
5335 'MD': 'Moldova, Republic of',
5336 'MC': 'Monaco',
5337 'MN': 'Mongolia',
5338 'ME': 'Montenegro',
5339 'MS': 'Montserrat',
5340 'MA': 'Morocco',
5341 'MZ': 'Mozambique',
5342 'MM': 'Myanmar',
5343 'NA': 'Namibia',
5344 'NR': 'Nauru',
5345 'NP': 'Nepal',
5346 'NL': 'Netherlands',
5347 'NC': 'New Caledonia',
5348 'NZ': 'New Zealand',
5349 'NI': 'Nicaragua',
5350 'NE': 'Niger',
5351 'NG': 'Nigeria',
5352 'NU': 'Niue',
5353 'NF': 'Norfolk Island',
5354 'MP': 'Northern Mariana Islands',
5355 'NO': 'Norway',
5356 'OM': 'Oman',
5357 'PK': 'Pakistan',
5358 'PW': 'Palau',
5359 'PS': 'Palestine, State of',
5360 'PA': 'Panama',
5361 'PG': 'Papua New Guinea',
5362 'PY': 'Paraguay',
5363 'PE': 'Peru',
5364 'PH': 'Philippines',
5365 'PN': 'Pitcairn',
5366 'PL': 'Poland',
5367 'PT': 'Portugal',
5368 'PR': 'Puerto Rico',
5369 'QA': 'Qatar',
5370 'RE': 'Réunion',
5371 'RO': 'Romania',
5372 'RU': 'Russian Federation',
5373 'RW': 'Rwanda',
5374 'BL': 'Saint Barthélemy',
5375 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5376 'KN': 'Saint Kitts and Nevis',
5377 'LC': 'Saint Lucia',
5378 'MF': 'Saint Martin (French part)',
5379 'PM': 'Saint Pierre and Miquelon',
5380 'VC': 'Saint Vincent and the Grenadines',
5381 'WS': 'Samoa',
5382 'SM': 'San Marino',
5383 'ST': 'Sao Tome and Principe',
5384 'SA': 'Saudi Arabia',
5385 'SN': 'Senegal',
5386 'RS': 'Serbia',
5387 'SC': 'Seychelles',
5388 'SL': 'Sierra Leone',
5389 'SG': 'Singapore',
5390 'SX': 'Sint Maarten (Dutch part)',
5391 'SK': 'Slovakia',
5392 'SI': 'Slovenia',
5393 'SB': 'Solomon Islands',
5394 'SO': 'Somalia',
5395 'ZA': 'South Africa',
5396 'GS': 'South Georgia and the South Sandwich Islands',
5397 'SS': 'South Sudan',
5398 'ES': 'Spain',
5399 'LK': 'Sri Lanka',
5400 'SD': 'Sudan',
5401 'SR': 'Suriname',
5402 'SJ': 'Svalbard and Jan Mayen',
5403 'SZ': 'Swaziland',
5404 'SE': 'Sweden',
5405 'CH': 'Switzerland',
5406 'SY': 'Syrian Arab Republic',
5407 'TW': 'Taiwan, Province of China',
5408 'TJ': 'Tajikistan',
5409 'TZ': 'Tanzania, United Republic of',
5410 'TH': 'Thailand',
5411 'TL': 'Timor-Leste',
5412 'TG': 'Togo',
5413 'TK': 'Tokelau',
5414 'TO': 'Tonga',
5415 'TT': 'Trinidad and Tobago',
5416 'TN': 'Tunisia',
5417 'TR': 'Turkey',
5418 'TM': 'Turkmenistan',
5419 'TC': 'Turks and Caicos Islands',
5420 'TV': 'Tuvalu',
5421 'UG': 'Uganda',
5422 'UA': 'Ukraine',
5423 'AE': 'United Arab Emirates',
5424 'GB': 'United Kingdom',
5425 'US': 'United States',
5426 'UM': 'United States Minor Outlying Islands',
5427 'UY': 'Uruguay',
5428 'UZ': 'Uzbekistan',
5429 'VU': 'Vanuatu',
5430 'VE': 'Venezuela, Bolivarian Republic of',
5431 'VN': 'Viet Nam',
5432 'VG': 'Virgin Islands, British',
5433 'VI': 'Virgin Islands, U.S.',
5434 'WF': 'Wallis and Futuna',
5435 'EH': 'Western Sahara',
5436 'YE': 'Yemen',
5437 'ZM': 'Zambia',
5438 'ZW': 'Zimbabwe',
5439 }
5440
5441 @classmethod
5442 def short2full(cls, code):
5443 """Convert an ISO 3166-2 country code to the corresponding full name"""
5444 return cls._country_map.get(code.upper())
5445
5446
5447 class GeoUtils(object):
5448 # Major IPv4 address blocks per country
5449 _country_ip_map = {
5450 'AD': '46.172.224.0/19',
5451 'AE': '94.200.0.0/13',
5452 'AF': '149.54.0.0/17',
5453 'AG': '209.59.64.0/18',
5454 'AI': '204.14.248.0/21',
5455 'AL': '46.99.0.0/16',
5456 'AM': '46.70.0.0/15',
5457 'AO': '105.168.0.0/13',
5458 'AP': '182.50.184.0/21',
5459 'AQ': '23.154.160.0/24',
5460 'AR': '181.0.0.0/12',
5461 'AS': '202.70.112.0/20',
5462 'AT': '77.116.0.0/14',
5463 'AU': '1.128.0.0/11',
5464 'AW': '181.41.0.0/18',
5465 'AX': '185.217.4.0/22',
5466 'AZ': '5.197.0.0/16',
5467 'BA': '31.176.128.0/17',
5468 'BB': '65.48.128.0/17',
5469 'BD': '114.130.0.0/16',
5470 'BE': '57.0.0.0/8',
5471 'BF': '102.178.0.0/15',
5472 'BG': '95.42.0.0/15',
5473 'BH': '37.131.0.0/17',
5474 'BI': '154.117.192.0/18',
5475 'BJ': '137.255.0.0/16',
5476 'BL': '185.212.72.0/23',
5477 'BM': '196.12.64.0/18',
5478 'BN': '156.31.0.0/16',
5479 'BO': '161.56.0.0/16',
5480 'BQ': '161.0.80.0/20',
5481 'BR': '191.128.0.0/12',
5482 'BS': '24.51.64.0/18',
5483 'BT': '119.2.96.0/19',
5484 'BW': '168.167.0.0/16',
5485 'BY': '178.120.0.0/13',
5486 'BZ': '179.42.192.0/18',
5487 'CA': '99.224.0.0/11',
5488 'CD': '41.243.0.0/16',
5489 'CF': '197.242.176.0/21',
5490 'CG': '160.113.0.0/16',
5491 'CH': '85.0.0.0/13',
5492 'CI': '102.136.0.0/14',
5493 'CK': '202.65.32.0/19',
5494 'CL': '152.172.0.0/14',
5495 'CM': '102.244.0.0/14',
5496 'CN': '36.128.0.0/10',
5497 'CO': '181.240.0.0/12',
5498 'CR': '201.192.0.0/12',
5499 'CU': '152.206.0.0/15',
5500 'CV': '165.90.96.0/19',
5501 'CW': '190.88.128.0/17',
5502 'CY': '31.153.0.0/16',
5503 'CZ': '88.100.0.0/14',
5504 'DE': '53.0.0.0/8',
5505 'DJ': '197.241.0.0/17',
5506 'DK': '87.48.0.0/12',
5507 'DM': '192.243.48.0/20',
5508 'DO': '152.166.0.0/15',
5509 'DZ': '41.96.0.0/12',
5510 'EC': '186.68.0.0/15',
5511 'EE': '90.190.0.0/15',
5512 'EG': '156.160.0.0/11',
5513 'ER': '196.200.96.0/20',
5514 'ES': '88.0.0.0/11',
5515 'ET': '196.188.0.0/14',
5516 'EU': '2.16.0.0/13',
5517 'FI': '91.152.0.0/13',
5518 'FJ': '144.120.0.0/16',
5519 'FK': '80.73.208.0/21',
5520 'FM': '119.252.112.0/20',
5521 'FO': '88.85.32.0/19',
5522 'FR': '90.0.0.0/9',
5523 'GA': '41.158.0.0/15',
5524 'GB': '25.0.0.0/8',
5525 'GD': '74.122.88.0/21',
5526 'GE': '31.146.0.0/16',
5527 'GF': '161.22.64.0/18',
5528 'GG': '62.68.160.0/19',
5529 'GH': '154.160.0.0/12',
5530 'GI': '95.164.0.0/16',
5531 'GL': '88.83.0.0/19',
5532 'GM': '160.182.0.0/15',
5533 'GN': '197.149.192.0/18',
5534 'GP': '104.250.0.0/19',
5535 'GQ': '105.235.224.0/20',
5536 'GR': '94.64.0.0/13',
5537 'GT': '168.234.0.0/16',
5538 'GU': '168.123.0.0/16',
5539 'GW': '197.214.80.0/20',
5540 'GY': '181.41.64.0/18',
5541 'HK': '113.252.0.0/14',
5542 'HN': '181.210.0.0/16',
5543 'HR': '93.136.0.0/13',
5544 'HT': '148.102.128.0/17',
5545 'HU': '84.0.0.0/14',
5546 'ID': '39.192.0.0/10',
5547 'IE': '87.32.0.0/12',
5548 'IL': '79.176.0.0/13',
5549 'IM': '5.62.80.0/20',
5550 'IN': '117.192.0.0/10',
5551 'IO': '203.83.48.0/21',
5552 'IQ': '37.236.0.0/14',
5553 'IR': '2.176.0.0/12',
5554 'IS': '82.221.0.0/16',
5555 'IT': '79.0.0.0/10',
5556 'JE': '87.244.64.0/18',
5557 'JM': '72.27.0.0/17',
5558 'JO': '176.29.0.0/16',
5559 'JP': '133.0.0.0/8',
5560 'KE': '105.48.0.0/12',
5561 'KG': '158.181.128.0/17',
5562 'KH': '36.37.128.0/17',
5563 'KI': '103.25.140.0/22',
5564 'KM': '197.255.224.0/20',
5565 'KN': '198.167.192.0/19',
5566 'KP': '175.45.176.0/22',
5567 'KR': '175.192.0.0/10',
5568 'KW': '37.36.0.0/14',
5569 'KY': '64.96.0.0/15',
5570 'KZ': '2.72.0.0/13',
5571 'LA': '115.84.64.0/18',
5572 'LB': '178.135.0.0/16',
5573 'LC': '24.92.144.0/20',
5574 'LI': '82.117.0.0/19',
5575 'LK': '112.134.0.0/15',
5576 'LR': '102.183.0.0/16',
5577 'LS': '129.232.0.0/17',
5578 'LT': '78.56.0.0/13',
5579 'LU': '188.42.0.0/16',
5580 'LV': '46.109.0.0/16',
5581 'LY': '41.252.0.0/14',
5582 'MA': '105.128.0.0/11',
5583 'MC': '88.209.64.0/18',
5584 'MD': '37.246.0.0/16',
5585 'ME': '178.175.0.0/17',
5586 'MF': '74.112.232.0/21',
5587 'MG': '154.126.0.0/17',
5588 'MH': '117.103.88.0/21',
5589 'MK': '77.28.0.0/15',
5590 'ML': '154.118.128.0/18',
5591 'MM': '37.111.0.0/17',
5592 'MN': '49.0.128.0/17',
5593 'MO': '60.246.0.0/16',
5594 'MP': '202.88.64.0/20',
5595 'MQ': '109.203.224.0/19',
5596 'MR': '41.188.64.0/18',
5597 'MS': '208.90.112.0/22',
5598 'MT': '46.11.0.0/16',
5599 'MU': '105.16.0.0/12',
5600 'MV': '27.114.128.0/18',
5601 'MW': '102.70.0.0/15',
5602 'MX': '187.192.0.0/11',
5603 'MY': '175.136.0.0/13',
5604 'MZ': '197.218.0.0/15',
5605 'NA': '41.182.0.0/16',
5606 'NC': '101.101.0.0/18',
5607 'NE': '197.214.0.0/18',
5608 'NF': '203.17.240.0/22',
5609 'NG': '105.112.0.0/12',
5610 'NI': '186.76.0.0/15',
5611 'NL': '145.96.0.0/11',
5612 'NO': '84.208.0.0/13',
5613 'NP': '36.252.0.0/15',
5614 'NR': '203.98.224.0/19',
5615 'NU': '49.156.48.0/22',
5616 'NZ': '49.224.0.0/14',
5617 'OM': '5.36.0.0/15',
5618 'PA': '186.72.0.0/15',
5619 'PE': '186.160.0.0/14',
5620 'PF': '123.50.64.0/18',
5621 'PG': '124.240.192.0/19',
5622 'PH': '49.144.0.0/13',
5623 'PK': '39.32.0.0/11',
5624 'PL': '83.0.0.0/11',
5625 'PM': '70.36.0.0/20',
5626 'PR': '66.50.0.0/16',
5627 'PS': '188.161.0.0/16',
5628 'PT': '85.240.0.0/13',
5629 'PW': '202.124.224.0/20',
5630 'PY': '181.120.0.0/14',
5631 'QA': '37.210.0.0/15',
5632 'RE': '102.35.0.0/16',
5633 'RO': '79.112.0.0/13',
5634 'RS': '93.86.0.0/15',
5635 'RU': '5.136.0.0/13',
5636 'RW': '41.186.0.0/16',
5637 'SA': '188.48.0.0/13',
5638 'SB': '202.1.160.0/19',
5639 'SC': '154.192.0.0/11',
5640 'SD': '102.120.0.0/13',
5641 'SE': '78.64.0.0/12',
5642 'SG': '8.128.0.0/10',
5643 'SI': '188.196.0.0/14',
5644 'SK': '78.98.0.0/15',
5645 'SL': '102.143.0.0/17',
5646 'SM': '89.186.32.0/19',
5647 'SN': '41.82.0.0/15',
5648 'SO': '154.115.192.0/18',
5649 'SR': '186.179.128.0/17',
5650 'SS': '105.235.208.0/21',
5651 'ST': '197.159.160.0/19',
5652 'SV': '168.243.0.0/16',
5653 'SX': '190.102.0.0/20',
5654 'SY': '5.0.0.0/16',
5655 'SZ': '41.84.224.0/19',
5656 'TC': '65.255.48.0/20',
5657 'TD': '154.68.128.0/19',
5658 'TG': '196.168.0.0/14',
5659 'TH': '171.96.0.0/13',
5660 'TJ': '85.9.128.0/18',
5661 'TK': '27.96.24.0/21',
5662 'TL': '180.189.160.0/20',
5663 'TM': '95.85.96.0/19',
5664 'TN': '197.0.0.0/11',
5665 'TO': '175.176.144.0/21',
5666 'TR': '78.160.0.0/11',
5667 'TT': '186.44.0.0/15',
5668 'TV': '202.2.96.0/19',
5669 'TW': '120.96.0.0/11',
5670 'TZ': '156.156.0.0/14',
5671 'UA': '37.52.0.0/14',
5672 'UG': '102.80.0.0/13',
5673 'US': '6.0.0.0/8',
5674 'UY': '167.56.0.0/13',
5675 'UZ': '84.54.64.0/18',
5676 'VA': '212.77.0.0/19',
5677 'VC': '207.191.240.0/21',
5678 'VE': '186.88.0.0/13',
5679 'VG': '66.81.192.0/20',
5680 'VI': '146.226.0.0/16',
5681 'VN': '14.160.0.0/11',
5682 'VU': '202.80.32.0/20',
5683 'WF': '117.20.32.0/21',
5684 'WS': '202.4.32.0/19',
5685 'YE': '134.35.0.0/16',
5686 'YT': '41.242.116.0/22',
5687 'ZA': '41.0.0.0/11',
5688 'ZM': '102.144.0.0/13',
5689 'ZW': '102.177.192.0/18',
5690 }
5691
5692 @classmethod
5693 def random_ipv4(cls, code_or_block):
5694 if len(code_or_block) == 2:
5695 block = cls._country_ip_map.get(code_or_block.upper())
5696 if not block:
5697 return None
5698 else:
5699 block = code_or_block
5700 addr, preflen = block.split('/')
5701 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5702 addr_max = addr_min | (0xffffffff >> int(preflen))
5703 return compat_str(socket.inet_ntoa(
5704 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5705
5706
5707 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5708 def __init__(self, proxies=None):
5709 # Set default handlers
5710 for type in ('http', 'https'):
5711 setattr(self, '%s_open' % type,
5712 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5713 meth(r, proxy, type))
5714 compat_urllib_request.ProxyHandler.__init__(self, proxies)
5715
5716 def proxy_open(self, req, proxy, type):
5717 req_proxy = req.headers.get('Ytdl-request-proxy')
5718 if req_proxy is not None:
5719 proxy = req_proxy
5720 del req.headers['Ytdl-request-proxy']
5721
5722 if proxy == '__noproxy__':
5723 return None # No Proxy
5724 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5725 req.add_header('Ytdl-socks-proxy', proxy)
5726 # yt-dlp's http/https handlers do wrapping the socket with socks
5727 return None
5728 return compat_urllib_request.ProxyHandler.proxy_open(
5729 self, req, proxy, type)
5730
5731
5732 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5733 # released into Public Domain
5734 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5735
5736 def long_to_bytes(n, blocksize=0):
5737 """long_to_bytes(n:long, blocksize:int) : string
5738 Convert a long integer to a byte string.
5739
5740 If optional blocksize is given and greater than zero, pad the front of the
5741 byte string with binary zeros so that the length is a multiple of
5742 blocksize.
5743 """
5744 # after much testing, this algorithm was deemed to be the fastest
5745 s = b''
5746 n = int(n)
5747 while n > 0:
5748 s = compat_struct_pack('>I', n & 0xffffffff) + s
5749 n = n >> 32
5750 # strip off leading zeros
5751 for i in range(len(s)):
5752 if s[i] != b'\000'[0]:
5753 break
5754 else:
5755 # only happens when n == 0
5756 s = b'\000'
5757 i = 0
5758 s = s[i:]
5759 # add back some pad bytes. this could be done more efficiently w.r.t. the
5760 # de-padding being done above, but sigh...
5761 if blocksize > 0 and len(s) % blocksize:
5762 s = (blocksize - len(s) % blocksize) * b'\000' + s
5763 return s
5764
5765
5766 def bytes_to_long(s):
5767 """bytes_to_long(string) : long
5768 Convert a byte string to a long integer.
5769
5770 This is (essentially) the inverse of long_to_bytes().
5771 """
5772 acc = 0
5773 length = len(s)
5774 if length % 4:
5775 extra = (4 - length % 4)
5776 s = b'\000' * extra + s
5777 length = length + extra
5778 for i in range(0, length, 4):
5779 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5780 return acc
5781
5782
5783 def ohdave_rsa_encrypt(data, exponent, modulus):
5784 '''
5785 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5786
5787 Input:
5788 data: data to encrypt, bytes-like object
5789 exponent, modulus: parameter e and N of RSA algorithm, both integer
5790 Output: hex string of encrypted data
5791
5792 Limitation: supports one block encryption only
5793 '''
5794
5795 payload = int(binascii.hexlify(data[::-1]), 16)
5796 encrypted = pow(payload, exponent, modulus)
5797 return '%x' % encrypted
5798
5799
5800 def pkcs1pad(data, length):
5801 """
5802 Padding input data with PKCS#1 scheme
5803
5804 @param {int[]} data input data
5805 @param {int} length target length
5806 @returns {int[]} padded data
5807 """
5808 if len(data) > length - 11:
5809 raise ValueError('Input data too long for PKCS#1 padding')
5810
5811 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5812 return [0, 2] + pseudo_random + [0] + data
5813
5814
5815 def encode_base_n(num, n, table=None):
5816 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5817 if not table:
5818 table = FULL_TABLE[:n]
5819
5820 if n > len(table):
5821 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5822
5823 if num == 0:
5824 return table[0]
5825
5826 ret = ''
5827 while num:
5828 ret = table[num % n] + ret
5829 num = num // n
5830 return ret
5831
5832
5833 def decode_packed_codes(code):
5834 mobj = re.search(PACKED_CODES_RE, code)
5835 obfuscated_code, base, count, symbols = mobj.groups()
5836 base = int(base)
5837 count = int(count)
5838 symbols = symbols.split('|')
5839 symbol_table = {}
5840
5841 while count:
5842 count -= 1
5843 base_n_count = encode_base_n(count, base)
5844 symbol_table[base_n_count] = symbols[count] or base_n_count
5845
5846 return re.sub(
5847 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5848 obfuscated_code)
5849
5850
5851 def caesar(s, alphabet, shift):
5852 if shift == 0:
5853 return s
5854 l = len(alphabet)
5855 return ''.join(
5856 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5857 for c in s)
5858
5859
5860 def rot47(s):
5861 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5862
5863
5864 def parse_m3u8_attributes(attrib):
5865 info = {}
5866 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5867 if val.startswith('"'):
5868 val = val[1:-1]
5869 info[key] = val
5870 return info
5871
5872
5873 def urshift(val, n):
5874 return val >> n if val >= 0 else (val + 0x100000000) >> n
5875
5876
5877 # Based on png2str() written by @gdkchan and improved by @yokrysty
5878 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5879 def decode_png(png_data):
5880 # Reference: https://www.w3.org/TR/PNG/
5881 header = png_data[8:]
5882
5883 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5884 raise IOError('Not a valid PNG file.')
5885
5886 int_map = {1: '>B', 2: '>H', 4: '>I'}
5887 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5888
5889 chunks = []
5890
5891 while header:
5892 length = unpack_integer(header[:4])
5893 header = header[4:]
5894
5895 chunk_type = header[:4]
5896 header = header[4:]
5897
5898 chunk_data = header[:length]
5899 header = header[length:]
5900
5901 header = header[4:] # Skip CRC
5902
5903 chunks.append({
5904 'type': chunk_type,
5905 'length': length,
5906 'data': chunk_data
5907 })
5908
5909 ihdr = chunks[0]['data']
5910
5911 width = unpack_integer(ihdr[:4])
5912 height = unpack_integer(ihdr[4:8])
5913
5914 idat = b''
5915
5916 for chunk in chunks:
5917 if chunk['type'] == b'IDAT':
5918 idat += chunk['data']
5919
5920 if not idat:
5921 raise IOError('Unable to read PNG data.')
5922
5923 decompressed_data = bytearray(zlib.decompress(idat))
5924
5925 stride = width * 3
5926 pixels = []
5927
5928 def _get_pixel(idx):
5929 x = idx % stride
5930 y = idx // stride
5931 return pixels[y][x]
5932
5933 for y in range(height):
5934 basePos = y * (1 + stride)
5935 filter_type = decompressed_data[basePos]
5936
5937 current_row = []
5938
5939 pixels.append(current_row)
5940
5941 for x in range(stride):
5942 color = decompressed_data[1 + basePos + x]
5943 basex = y * stride + x
5944 left = 0
5945 up = 0
5946
5947 if x > 2:
5948 left = _get_pixel(basex - 3)
5949 if y > 0:
5950 up = _get_pixel(basex - stride)
5951
5952 if filter_type == 1: # Sub
5953 color = (color + left) & 0xff
5954 elif filter_type == 2: # Up
5955 color = (color + up) & 0xff
5956 elif filter_type == 3: # Average
5957 color = (color + ((left + up) >> 1)) & 0xff
5958 elif filter_type == 4: # Paeth
5959 a = left
5960 b = up
5961 c = 0
5962
5963 if x > 2 and y > 0:
5964 c = _get_pixel(basex - stride - 3)
5965
5966 p = a + b - c
5967
5968 pa = abs(p - a)
5969 pb = abs(p - b)
5970 pc = abs(p - c)
5971
5972 if pa <= pb and pa <= pc:
5973 color = (color + a) & 0xff
5974 elif pb <= pc:
5975 color = (color + b) & 0xff
5976 else:
5977 color = (color + c) & 0xff
5978
5979 current_row.append(color)
5980
5981 return width, height, pixels
5982
5983
5984 def write_xattr(path, key, value):
5985 # This mess below finds the best xattr tool for the job
5986 try:
5987 # try the pyxattr module...
5988 import xattr
5989
5990 if hasattr(xattr, 'set'): # pyxattr
5991 # Unicode arguments are not supported in python-pyxattr until
5992 # version 0.5.0
5993 # See https://github.com/ytdl-org/youtube-dl/issues/5498
5994 pyxattr_required_version = '0.5.0'
5995 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5996 # TODO: fallback to CLI tools
5997 raise XAttrUnavailableError(
5998 'python-pyxattr is detected but is too old. '
5999 'yt-dlp requires %s or above while your version is %s. '
6000 'Falling back to other xattr implementations' % (
6001 pyxattr_required_version, xattr.__version__))
6002
6003 setxattr = xattr.set
6004 else: # xattr
6005 setxattr = xattr.setxattr
6006
6007 try:
6008 setxattr(path, key, value)
6009 except EnvironmentError as e:
6010 raise XAttrMetadataError(e.errno, e.strerror)
6011
6012 except ImportError:
6013 if compat_os_name == 'nt':
6014 # Write xattrs to NTFS Alternate Data Streams:
6015 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6016 assert ':' not in key
6017 assert os.path.exists(path)
6018
6019 ads_fn = path + ':' + key
6020 try:
6021 with open(ads_fn, 'wb') as f:
6022 f.write(value)
6023 except EnvironmentError as e:
6024 raise XAttrMetadataError(e.errno, e.strerror)
6025 else:
6026 user_has_setfattr = check_executable('setfattr', ['--version'])
6027 user_has_xattr = check_executable('xattr', ['-h'])
6028
6029 if user_has_setfattr or user_has_xattr:
6030
6031 value = value.decode('utf-8')
6032 if user_has_setfattr:
6033 executable = 'setfattr'
6034 opts = ['-n', key, '-v', value]
6035 elif user_has_xattr:
6036 executable = 'xattr'
6037 opts = ['-w', key, value]
6038
6039 cmd = ([encodeFilename(executable, True)]
6040 + [encodeArgument(o) for o in opts]
6041 + [encodeFilename(path, True)])
6042
6043 try:
6044 p = subprocess.Popen(
6045 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6046 except EnvironmentError as e:
6047 raise XAttrMetadataError(e.errno, e.strerror)
6048 stdout, stderr = process_communicate_or_kill(p)
6049 stderr = stderr.decode('utf-8', 'replace')
6050 if p.returncode != 0:
6051 raise XAttrMetadataError(p.returncode, stderr)
6052
6053 else:
6054 # On Unix, and can't find pyxattr, setfattr, or xattr.
6055 if sys.platform.startswith('linux'):
6056 raise XAttrUnavailableError(
6057 "Couldn't find a tool to set the xattrs. "
6058 "Install either the python 'pyxattr' or 'xattr' "
6059 "modules, or the GNU 'attr' package "
6060 "(which contains the 'setfattr' tool).")
6061 else:
6062 raise XAttrUnavailableError(
6063 "Couldn't find a tool to set the xattrs. "
6064 "Install either the python 'xattr' module, "
6065 "or the 'xattr' binary.")
6066
6067
6068 def random_birthday(year_field, month_field, day_field):
6069 start_date = datetime.date(1950, 1, 1)
6070 end_date = datetime.date(1995, 12, 31)
6071 offset = random.randint(0, (end_date - start_date).days)
6072 random_date = start_date + datetime.timedelta(offset)
6073 return {
6074 year_field: str(random_date.year),
6075 month_field: str(random_date.month),
6076 day_field: str(random_date.day),
6077 }
6078
6079
6080 # Templates for internet shortcut files, which are plain text files.
6081 DOT_URL_LINK_TEMPLATE = '''
6082 [InternetShortcut]
6083 URL=%(url)s
6084 '''.lstrip()
6085
6086 DOT_WEBLOC_LINK_TEMPLATE = '''
6087 <?xml version="1.0" encoding="UTF-8"?>
6088 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6089 <plist version="1.0">
6090 <dict>
6091 \t<key>URL</key>
6092 \t<string>%(url)s</string>
6093 </dict>
6094 </plist>
6095 '''.lstrip()
6096
6097 DOT_DESKTOP_LINK_TEMPLATE = '''
6098 [Desktop Entry]
6099 Encoding=UTF-8
6100 Name=%(filename)s
6101 Type=Link
6102 URL=%(url)s
6103 Icon=text-html
6104 '''.lstrip()
6105
6106
6107 def iri_to_uri(iri):
6108 """
6109 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6110
6111 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6112 """
6113
6114 iri_parts = compat_urllib_parse_urlparse(iri)
6115
6116 if '[' in iri_parts.netloc:
6117 raise ValueError('IPv6 URIs are not, yet, supported.')
6118 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6119
6120 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6121
6122 net_location = ''
6123 if iri_parts.username:
6124 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6125 if iri_parts.password is not None:
6126 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6127 net_location += '@'
6128
6129 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
6130 # The 'idna' encoding produces ASCII text.
6131 if iri_parts.port is not None and iri_parts.port != 80:
6132 net_location += ':' + str(iri_parts.port)
6133
6134 return compat_urllib_parse_urlunparse(
6135 (iri_parts.scheme,
6136 net_location,
6137
6138 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6139
6140 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6141 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6142
6143 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6144 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6145
6146 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6147
6148 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6149
6150
6151 def to_high_limit_path(path):
6152 if sys.platform in ['win32', 'cygwin']:
6153 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6154 return r'\\?\ '.rstrip() + os.path.abspath(path)
6155
6156 return path
6157
6158
6159 def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
6160 if field is None:
6161 val = obj if obj is not None else default
6162 else:
6163 val = obj.get(field, default)
6164 if func and val not in ignore:
6165 val = func(val)
6166 return template % val if val not in ignore else default
6167
6168
6169 def clean_podcast_url(url):
6170 return re.sub(r'''(?x)
6171 (?:
6172 (?:
6173 chtbl\.com/track|
6174 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6175 play\.podtrac\.com
6176 )/[^/]+|
6177 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6178 flex\.acast\.com|
6179 pd(?:
6180 cn\.co| # https://podcorn.com/analytics-prefix/
6181 st\.fm # https://podsights.com/docs/
6182 )/e
6183 )/''', '', url)
6184
6185
6186 _HEX_TABLE = '0123456789abcdef'
6187
6188
6189 def random_uuidv4():
6190 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
6191
6192
6193 def make_dir(path, to_screen=None):
6194 try:
6195 dn = os.path.dirname(path)
6196 if dn and not os.path.exists(dn):
6197 os.makedirs(dn)
6198 return True
6199 except (OSError, IOError) as err:
6200 if callable(to_screen) is not None:
6201 to_screen('unable to create directory ' + error_to_compat_str(err))
6202 return False
6203
6204
6205 def get_executable_path():
6206 from zipimport import zipimporter
6207 if hasattr(sys, 'frozen'): # Running from PyInstaller
6208 path = os.path.dirname(sys.executable)
6209 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
6210 path = os.path.join(os.path.dirname(__file__), '../..')
6211 else:
6212 path = os.path.join(os.path.dirname(__file__), '..')
6213 return os.path.abspath(path)
6214
6215
6216 def load_plugins(name, suffix, namespace):
6217 plugin_info = [None]
6218 classes = []
6219 try:
6220 plugin_info = imp.find_module(
6221 name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
6222 plugins = imp.load_module(name, *plugin_info)
6223 for name in dir(plugins):
6224 if name in namespace:
6225 continue
6226 if not name.endswith(suffix):
6227 continue
6228 klass = getattr(plugins, name)
6229 classes.append(klass)
6230 namespace[name] = klass
6231 except ImportError:
6232 pass
6233 finally:
6234 if plugin_info[0] is not None:
6235 plugin_info[0].close()
6236 return classes
6237
6238
6239 def traverse_obj(
6240 obj, *path_list, default=None, expected_type=None, get_all=True,
6241 casesense=True, is_user_input=False, traverse_string=False):
6242 ''' Traverse nested list/dict/tuple
6243 @param path_list A list of paths which are checked one by one.
6244 Each path is a list of keys where each key is a string,
6245 a tuple of strings or "...". When a tuple is given,
6246 all the keys given in the tuple are traversed, and
6247 "..." traverses all the keys in the object
6248 @param default Default value to return
6249 @param expected_type Only accept final value of this type (Can also be any callable)
6250 @param get_all Return all the values obtained from a path or only the first one
6251 @param casesense Whether to consider dictionary keys as case sensitive
6252 @param is_user_input Whether the keys are generated from user input. If True,
6253 strings are converted to int/slice if necessary
6254 @param traverse_string Whether to traverse inside strings. If True, any
6255 non-compatible object will also be converted into a string
6256 # TODO: Write tests
6257 '''
6258 if not casesense:
6259 _lower = lambda k: (k.lower() if isinstance(k, str) else k)
6260 path_list = (map(_lower, variadic(path)) for path in path_list)
6261
6262 def _traverse_obj(obj, path, _current_depth=0):
6263 nonlocal depth
6264 if obj is None:
6265 return None
6266 path = tuple(variadic(path))
6267 for i, key in enumerate(path):
6268 if isinstance(key, (list, tuple)):
6269 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
6270 key = ...
6271 if key is ...:
6272 obj = (obj.values() if isinstance(obj, dict)
6273 else obj if isinstance(obj, (list, tuple, LazyList))
6274 else str(obj) if traverse_string else [])
6275 _current_depth += 1
6276 depth = max(depth, _current_depth)
6277 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
6278 elif isinstance(obj, dict) and not (is_user_input and key == ':'):
6279 obj = (obj.get(key) if casesense or (key in obj)
6280 else next((v for k, v in obj.items() if _lower(k) == key), None))
6281 else:
6282 if is_user_input:
6283 key = (int_or_none(key) if ':' not in key
6284 else slice(*map(int_or_none, key.split(':'))))
6285 if key == slice(None):
6286 return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
6287 if not isinstance(key, (int, slice)):
6288 return None
6289 if not isinstance(obj, (list, tuple, LazyList)):
6290 if not traverse_string:
6291 return None
6292 obj = str(obj)
6293 try:
6294 obj = obj[key]
6295 except IndexError:
6296 return None
6297 return obj
6298
6299 if isinstance(expected_type, type):
6300 type_test = lambda val: val if isinstance(val, expected_type) else None
6301 elif expected_type is not None:
6302 type_test = expected_type
6303 else:
6304 type_test = lambda val: val
6305
6306 for path in path_list:
6307 depth = 0
6308 val = _traverse_obj(obj, path)
6309 if val is not None:
6310 if depth:
6311 for _ in range(depth - 1):
6312 val = itertools.chain.from_iterable(v for v in val if v is not None)
6313 val = [v for v in map(type_test, val) if v is not None]
6314 if val:
6315 return val if get_all else val[0]
6316 else:
6317 val = type_test(val)
6318 if val is not None:
6319 return val
6320 return default
6321
6322
6323 def traverse_dict(dictn, keys, casesense=True):
6324 ''' For backward compatibility. Do not use '''
6325 return traverse_obj(dictn, keys, casesense=casesense,
6326 is_user_input=True, traverse_string=True)
6327
6328
6329 def variadic(x, allowed_types=(str, bytes)):
6330 return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)