]> jfr.im git - yt-dlp.git/blob - yt_dlp/utils.py
Improve output template internal formatting
[yt-dlp.git] / yt_dlp / utils.py
1 #!/usr/bin/env python
2 # coding: utf-8
3
4 from __future__ import unicode_literals
5
6 import base64
7 import binascii
8 import calendar
9 import codecs
10 import collections
11 import contextlib
12 import ctypes
13 import datetime
14 import email.utils
15 import email.header
16 import errno
17 import functools
18 import gzip
19 import imp
20 import io
21 import itertools
22 import json
23 import locale
24 import math
25 import operator
26 import os
27 import platform
28 import random
29 import re
30 import socket
31 import ssl
32 import subprocess
33 import sys
34 import tempfile
35 import time
36 import traceback
37 import xml.etree.ElementTree
38 import zlib
39
40 from .compat import (
41 compat_HTMLParseError,
42 compat_HTMLParser,
43 compat_HTTPError,
44 compat_basestring,
45 compat_chr,
46 compat_cookiejar,
47 compat_ctypes_WINFUNCTYPE,
48 compat_etree_fromstring,
49 compat_expanduser,
50 compat_html_entities,
51 compat_html_entities_html5,
52 compat_http_client,
53 compat_integer_types,
54 compat_numeric_types,
55 compat_kwargs,
56 compat_os_name,
57 compat_parse_qs,
58 compat_shlex_quote,
59 compat_str,
60 compat_struct_pack,
61 compat_struct_unpack,
62 compat_urllib_error,
63 compat_urllib_parse,
64 compat_urllib_parse_urlencode,
65 compat_urllib_parse_urlparse,
66 compat_urllib_parse_urlunparse,
67 compat_urllib_parse_quote,
68 compat_urllib_parse_quote_plus,
69 compat_urllib_parse_unquote_plus,
70 compat_urllib_request,
71 compat_urlparse,
72 compat_xpath,
73 )
74
75 from .socks import (
76 ProxyType,
77 sockssocket,
78 )
79
80
81 def register_socks_protocols():
82 # "Register" SOCKS protocols
83 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
84 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
85 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
86 if scheme not in compat_urlparse.uses_netloc:
87 compat_urlparse.uses_netloc.append(scheme)
88
89
90 # This is not clearly defined otherwise
91 compiled_regex_type = type(re.compile(''))
92
93
94 def random_user_agent():
95 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
96 _CHROME_VERSIONS = (
97 '74.0.3729.129',
98 '76.0.3780.3',
99 '76.0.3780.2',
100 '74.0.3729.128',
101 '76.0.3780.1',
102 '76.0.3780.0',
103 '75.0.3770.15',
104 '74.0.3729.127',
105 '74.0.3729.126',
106 '76.0.3779.1',
107 '76.0.3779.0',
108 '75.0.3770.14',
109 '74.0.3729.125',
110 '76.0.3778.1',
111 '76.0.3778.0',
112 '75.0.3770.13',
113 '74.0.3729.124',
114 '74.0.3729.123',
115 '73.0.3683.121',
116 '76.0.3777.1',
117 '76.0.3777.0',
118 '75.0.3770.12',
119 '74.0.3729.122',
120 '76.0.3776.4',
121 '75.0.3770.11',
122 '74.0.3729.121',
123 '76.0.3776.3',
124 '76.0.3776.2',
125 '73.0.3683.120',
126 '74.0.3729.120',
127 '74.0.3729.119',
128 '74.0.3729.118',
129 '76.0.3776.1',
130 '76.0.3776.0',
131 '76.0.3775.5',
132 '75.0.3770.10',
133 '74.0.3729.117',
134 '76.0.3775.4',
135 '76.0.3775.3',
136 '74.0.3729.116',
137 '75.0.3770.9',
138 '76.0.3775.2',
139 '76.0.3775.1',
140 '76.0.3775.0',
141 '75.0.3770.8',
142 '74.0.3729.115',
143 '74.0.3729.114',
144 '76.0.3774.1',
145 '76.0.3774.0',
146 '75.0.3770.7',
147 '74.0.3729.113',
148 '74.0.3729.112',
149 '74.0.3729.111',
150 '76.0.3773.1',
151 '76.0.3773.0',
152 '75.0.3770.6',
153 '74.0.3729.110',
154 '74.0.3729.109',
155 '76.0.3772.1',
156 '76.0.3772.0',
157 '75.0.3770.5',
158 '74.0.3729.108',
159 '74.0.3729.107',
160 '76.0.3771.1',
161 '76.0.3771.0',
162 '75.0.3770.4',
163 '74.0.3729.106',
164 '74.0.3729.105',
165 '75.0.3770.3',
166 '74.0.3729.104',
167 '74.0.3729.103',
168 '74.0.3729.102',
169 '75.0.3770.2',
170 '74.0.3729.101',
171 '75.0.3770.1',
172 '75.0.3770.0',
173 '74.0.3729.100',
174 '75.0.3769.5',
175 '75.0.3769.4',
176 '74.0.3729.99',
177 '75.0.3769.3',
178 '75.0.3769.2',
179 '75.0.3768.6',
180 '74.0.3729.98',
181 '75.0.3769.1',
182 '75.0.3769.0',
183 '74.0.3729.97',
184 '73.0.3683.119',
185 '73.0.3683.118',
186 '74.0.3729.96',
187 '75.0.3768.5',
188 '75.0.3768.4',
189 '75.0.3768.3',
190 '75.0.3768.2',
191 '74.0.3729.95',
192 '74.0.3729.94',
193 '75.0.3768.1',
194 '75.0.3768.0',
195 '74.0.3729.93',
196 '74.0.3729.92',
197 '73.0.3683.117',
198 '74.0.3729.91',
199 '75.0.3766.3',
200 '74.0.3729.90',
201 '75.0.3767.2',
202 '75.0.3767.1',
203 '75.0.3767.0',
204 '74.0.3729.89',
205 '73.0.3683.116',
206 '75.0.3766.2',
207 '74.0.3729.88',
208 '75.0.3766.1',
209 '75.0.3766.0',
210 '74.0.3729.87',
211 '73.0.3683.115',
212 '74.0.3729.86',
213 '75.0.3765.1',
214 '75.0.3765.0',
215 '74.0.3729.85',
216 '73.0.3683.114',
217 '74.0.3729.84',
218 '75.0.3764.1',
219 '75.0.3764.0',
220 '74.0.3729.83',
221 '73.0.3683.113',
222 '75.0.3763.2',
223 '75.0.3761.4',
224 '74.0.3729.82',
225 '75.0.3763.1',
226 '75.0.3763.0',
227 '74.0.3729.81',
228 '73.0.3683.112',
229 '75.0.3762.1',
230 '75.0.3762.0',
231 '74.0.3729.80',
232 '75.0.3761.3',
233 '74.0.3729.79',
234 '73.0.3683.111',
235 '75.0.3761.2',
236 '74.0.3729.78',
237 '74.0.3729.77',
238 '75.0.3761.1',
239 '75.0.3761.0',
240 '73.0.3683.110',
241 '74.0.3729.76',
242 '74.0.3729.75',
243 '75.0.3760.0',
244 '74.0.3729.74',
245 '75.0.3759.8',
246 '75.0.3759.7',
247 '75.0.3759.6',
248 '74.0.3729.73',
249 '75.0.3759.5',
250 '74.0.3729.72',
251 '73.0.3683.109',
252 '75.0.3759.4',
253 '75.0.3759.3',
254 '74.0.3729.71',
255 '75.0.3759.2',
256 '74.0.3729.70',
257 '73.0.3683.108',
258 '74.0.3729.69',
259 '75.0.3759.1',
260 '75.0.3759.0',
261 '74.0.3729.68',
262 '73.0.3683.107',
263 '74.0.3729.67',
264 '75.0.3758.1',
265 '75.0.3758.0',
266 '74.0.3729.66',
267 '73.0.3683.106',
268 '74.0.3729.65',
269 '75.0.3757.1',
270 '75.0.3757.0',
271 '74.0.3729.64',
272 '73.0.3683.105',
273 '74.0.3729.63',
274 '75.0.3756.1',
275 '75.0.3756.0',
276 '74.0.3729.62',
277 '73.0.3683.104',
278 '75.0.3755.3',
279 '75.0.3755.2',
280 '73.0.3683.103',
281 '75.0.3755.1',
282 '75.0.3755.0',
283 '74.0.3729.61',
284 '73.0.3683.102',
285 '74.0.3729.60',
286 '75.0.3754.2',
287 '74.0.3729.59',
288 '75.0.3753.4',
289 '74.0.3729.58',
290 '75.0.3754.1',
291 '75.0.3754.0',
292 '74.0.3729.57',
293 '73.0.3683.101',
294 '75.0.3753.3',
295 '75.0.3752.2',
296 '75.0.3753.2',
297 '74.0.3729.56',
298 '75.0.3753.1',
299 '75.0.3753.0',
300 '74.0.3729.55',
301 '73.0.3683.100',
302 '74.0.3729.54',
303 '75.0.3752.1',
304 '75.0.3752.0',
305 '74.0.3729.53',
306 '73.0.3683.99',
307 '74.0.3729.52',
308 '75.0.3751.1',
309 '75.0.3751.0',
310 '74.0.3729.51',
311 '73.0.3683.98',
312 '74.0.3729.50',
313 '75.0.3750.0',
314 '74.0.3729.49',
315 '74.0.3729.48',
316 '74.0.3729.47',
317 '75.0.3749.3',
318 '74.0.3729.46',
319 '73.0.3683.97',
320 '75.0.3749.2',
321 '74.0.3729.45',
322 '75.0.3749.1',
323 '75.0.3749.0',
324 '74.0.3729.44',
325 '73.0.3683.96',
326 '74.0.3729.43',
327 '74.0.3729.42',
328 '75.0.3748.1',
329 '75.0.3748.0',
330 '74.0.3729.41',
331 '75.0.3747.1',
332 '73.0.3683.95',
333 '75.0.3746.4',
334 '74.0.3729.40',
335 '74.0.3729.39',
336 '75.0.3747.0',
337 '75.0.3746.3',
338 '75.0.3746.2',
339 '74.0.3729.38',
340 '75.0.3746.1',
341 '75.0.3746.0',
342 '74.0.3729.37',
343 '73.0.3683.94',
344 '75.0.3745.5',
345 '75.0.3745.4',
346 '75.0.3745.3',
347 '75.0.3745.2',
348 '74.0.3729.36',
349 '75.0.3745.1',
350 '75.0.3745.0',
351 '75.0.3744.2',
352 '74.0.3729.35',
353 '73.0.3683.93',
354 '74.0.3729.34',
355 '75.0.3744.1',
356 '75.0.3744.0',
357 '74.0.3729.33',
358 '73.0.3683.92',
359 '74.0.3729.32',
360 '74.0.3729.31',
361 '73.0.3683.91',
362 '75.0.3741.2',
363 '75.0.3740.5',
364 '74.0.3729.30',
365 '75.0.3741.1',
366 '75.0.3741.0',
367 '74.0.3729.29',
368 '75.0.3740.4',
369 '73.0.3683.90',
370 '74.0.3729.28',
371 '75.0.3740.3',
372 '73.0.3683.89',
373 '75.0.3740.2',
374 '74.0.3729.27',
375 '75.0.3740.1',
376 '75.0.3740.0',
377 '74.0.3729.26',
378 '73.0.3683.88',
379 '73.0.3683.87',
380 '74.0.3729.25',
381 '75.0.3739.1',
382 '75.0.3739.0',
383 '73.0.3683.86',
384 '74.0.3729.24',
385 '73.0.3683.85',
386 '75.0.3738.4',
387 '75.0.3738.3',
388 '75.0.3738.2',
389 '75.0.3738.1',
390 '75.0.3738.0',
391 '74.0.3729.23',
392 '73.0.3683.84',
393 '74.0.3729.22',
394 '74.0.3729.21',
395 '75.0.3737.1',
396 '75.0.3737.0',
397 '74.0.3729.20',
398 '73.0.3683.83',
399 '74.0.3729.19',
400 '75.0.3736.1',
401 '75.0.3736.0',
402 '74.0.3729.18',
403 '73.0.3683.82',
404 '74.0.3729.17',
405 '75.0.3735.1',
406 '75.0.3735.0',
407 '74.0.3729.16',
408 '73.0.3683.81',
409 '75.0.3734.1',
410 '75.0.3734.0',
411 '74.0.3729.15',
412 '73.0.3683.80',
413 '74.0.3729.14',
414 '75.0.3733.1',
415 '75.0.3733.0',
416 '75.0.3732.1',
417 '74.0.3729.13',
418 '74.0.3729.12',
419 '73.0.3683.79',
420 '74.0.3729.11',
421 '75.0.3732.0',
422 '74.0.3729.10',
423 '73.0.3683.78',
424 '74.0.3729.9',
425 '74.0.3729.8',
426 '74.0.3729.7',
427 '75.0.3731.3',
428 '75.0.3731.2',
429 '75.0.3731.0',
430 '74.0.3729.6',
431 '73.0.3683.77',
432 '73.0.3683.76',
433 '75.0.3730.5',
434 '75.0.3730.4',
435 '73.0.3683.75',
436 '74.0.3729.5',
437 '73.0.3683.74',
438 '75.0.3730.3',
439 '75.0.3730.2',
440 '74.0.3729.4',
441 '73.0.3683.73',
442 '73.0.3683.72',
443 '75.0.3730.1',
444 '75.0.3730.0',
445 '74.0.3729.3',
446 '73.0.3683.71',
447 '74.0.3729.2',
448 '73.0.3683.70',
449 '74.0.3729.1',
450 '74.0.3729.0',
451 '74.0.3726.4',
452 '73.0.3683.69',
453 '74.0.3726.3',
454 '74.0.3728.0',
455 '74.0.3726.2',
456 '73.0.3683.68',
457 '74.0.3726.1',
458 '74.0.3726.0',
459 '74.0.3725.4',
460 '73.0.3683.67',
461 '73.0.3683.66',
462 '74.0.3725.3',
463 '74.0.3725.2',
464 '74.0.3725.1',
465 '74.0.3724.8',
466 '74.0.3725.0',
467 '73.0.3683.65',
468 '74.0.3724.7',
469 '74.0.3724.6',
470 '74.0.3724.5',
471 '74.0.3724.4',
472 '74.0.3724.3',
473 '74.0.3724.2',
474 '74.0.3724.1',
475 '74.0.3724.0',
476 '73.0.3683.64',
477 '74.0.3723.1',
478 '74.0.3723.0',
479 '73.0.3683.63',
480 '74.0.3722.1',
481 '74.0.3722.0',
482 '73.0.3683.62',
483 '74.0.3718.9',
484 '74.0.3702.3',
485 '74.0.3721.3',
486 '74.0.3721.2',
487 '74.0.3721.1',
488 '74.0.3721.0',
489 '74.0.3720.6',
490 '73.0.3683.61',
491 '72.0.3626.122',
492 '73.0.3683.60',
493 '74.0.3720.5',
494 '72.0.3626.121',
495 '74.0.3718.8',
496 '74.0.3720.4',
497 '74.0.3720.3',
498 '74.0.3718.7',
499 '74.0.3720.2',
500 '74.0.3720.1',
501 '74.0.3720.0',
502 '74.0.3718.6',
503 '74.0.3719.5',
504 '73.0.3683.59',
505 '74.0.3718.5',
506 '74.0.3718.4',
507 '74.0.3719.4',
508 '74.0.3719.3',
509 '74.0.3719.2',
510 '74.0.3719.1',
511 '73.0.3683.58',
512 '74.0.3719.0',
513 '73.0.3683.57',
514 '73.0.3683.56',
515 '74.0.3718.3',
516 '73.0.3683.55',
517 '74.0.3718.2',
518 '74.0.3718.1',
519 '74.0.3718.0',
520 '73.0.3683.54',
521 '74.0.3717.2',
522 '73.0.3683.53',
523 '74.0.3717.1',
524 '74.0.3717.0',
525 '73.0.3683.52',
526 '74.0.3716.1',
527 '74.0.3716.0',
528 '73.0.3683.51',
529 '74.0.3715.1',
530 '74.0.3715.0',
531 '73.0.3683.50',
532 '74.0.3711.2',
533 '74.0.3714.2',
534 '74.0.3713.3',
535 '74.0.3714.1',
536 '74.0.3714.0',
537 '73.0.3683.49',
538 '74.0.3713.1',
539 '74.0.3713.0',
540 '72.0.3626.120',
541 '73.0.3683.48',
542 '74.0.3712.2',
543 '74.0.3712.1',
544 '74.0.3712.0',
545 '73.0.3683.47',
546 '72.0.3626.119',
547 '73.0.3683.46',
548 '74.0.3710.2',
549 '72.0.3626.118',
550 '74.0.3711.1',
551 '74.0.3711.0',
552 '73.0.3683.45',
553 '72.0.3626.117',
554 '74.0.3710.1',
555 '74.0.3710.0',
556 '73.0.3683.44',
557 '72.0.3626.116',
558 '74.0.3709.1',
559 '74.0.3709.0',
560 '74.0.3704.9',
561 '73.0.3683.43',
562 '72.0.3626.115',
563 '74.0.3704.8',
564 '74.0.3704.7',
565 '74.0.3708.0',
566 '74.0.3706.7',
567 '74.0.3704.6',
568 '73.0.3683.42',
569 '72.0.3626.114',
570 '74.0.3706.6',
571 '72.0.3626.113',
572 '74.0.3704.5',
573 '74.0.3706.5',
574 '74.0.3706.4',
575 '74.0.3706.3',
576 '74.0.3706.2',
577 '74.0.3706.1',
578 '74.0.3706.0',
579 '73.0.3683.41',
580 '72.0.3626.112',
581 '74.0.3705.1',
582 '74.0.3705.0',
583 '73.0.3683.40',
584 '72.0.3626.111',
585 '73.0.3683.39',
586 '74.0.3704.4',
587 '73.0.3683.38',
588 '74.0.3704.3',
589 '74.0.3704.2',
590 '74.0.3704.1',
591 '74.0.3704.0',
592 '73.0.3683.37',
593 '72.0.3626.110',
594 '72.0.3626.109',
595 '74.0.3703.3',
596 '74.0.3703.2',
597 '73.0.3683.36',
598 '74.0.3703.1',
599 '74.0.3703.0',
600 '73.0.3683.35',
601 '72.0.3626.108',
602 '74.0.3702.2',
603 '74.0.3699.3',
604 '74.0.3702.1',
605 '74.0.3702.0',
606 '73.0.3683.34',
607 '72.0.3626.107',
608 '73.0.3683.33',
609 '74.0.3701.1',
610 '74.0.3701.0',
611 '73.0.3683.32',
612 '73.0.3683.31',
613 '72.0.3626.105',
614 '74.0.3700.1',
615 '74.0.3700.0',
616 '73.0.3683.29',
617 '72.0.3626.103',
618 '74.0.3699.2',
619 '74.0.3699.1',
620 '74.0.3699.0',
621 '73.0.3683.28',
622 '72.0.3626.102',
623 '73.0.3683.27',
624 '73.0.3683.26',
625 '74.0.3698.0',
626 '74.0.3696.2',
627 '72.0.3626.101',
628 '73.0.3683.25',
629 '74.0.3696.1',
630 '74.0.3696.0',
631 '74.0.3694.8',
632 '72.0.3626.100',
633 '74.0.3694.7',
634 '74.0.3694.6',
635 '74.0.3694.5',
636 '74.0.3694.4',
637 '72.0.3626.99',
638 '72.0.3626.98',
639 '74.0.3694.3',
640 '73.0.3683.24',
641 '72.0.3626.97',
642 '72.0.3626.96',
643 '72.0.3626.95',
644 '73.0.3683.23',
645 '72.0.3626.94',
646 '73.0.3683.22',
647 '73.0.3683.21',
648 '72.0.3626.93',
649 '74.0.3694.2',
650 '72.0.3626.92',
651 '74.0.3694.1',
652 '74.0.3694.0',
653 '74.0.3693.6',
654 '73.0.3683.20',
655 '72.0.3626.91',
656 '74.0.3693.5',
657 '74.0.3693.4',
658 '74.0.3693.3',
659 '74.0.3693.2',
660 '73.0.3683.19',
661 '74.0.3693.1',
662 '74.0.3693.0',
663 '73.0.3683.18',
664 '72.0.3626.90',
665 '74.0.3692.1',
666 '74.0.3692.0',
667 '73.0.3683.17',
668 '72.0.3626.89',
669 '74.0.3687.3',
670 '74.0.3691.1',
671 '74.0.3691.0',
672 '73.0.3683.16',
673 '72.0.3626.88',
674 '72.0.3626.87',
675 '73.0.3683.15',
676 '74.0.3690.1',
677 '74.0.3690.0',
678 '73.0.3683.14',
679 '72.0.3626.86',
680 '73.0.3683.13',
681 '73.0.3683.12',
682 '74.0.3689.1',
683 '74.0.3689.0',
684 '73.0.3683.11',
685 '72.0.3626.85',
686 '73.0.3683.10',
687 '72.0.3626.84',
688 '73.0.3683.9',
689 '74.0.3688.1',
690 '74.0.3688.0',
691 '73.0.3683.8',
692 '72.0.3626.83',
693 '74.0.3687.2',
694 '74.0.3687.1',
695 '74.0.3687.0',
696 '73.0.3683.7',
697 '72.0.3626.82',
698 '74.0.3686.4',
699 '72.0.3626.81',
700 '74.0.3686.3',
701 '74.0.3686.2',
702 '74.0.3686.1',
703 '74.0.3686.0',
704 '73.0.3683.6',
705 '72.0.3626.80',
706 '74.0.3685.1',
707 '74.0.3685.0',
708 '73.0.3683.5',
709 '72.0.3626.79',
710 '74.0.3684.1',
711 '74.0.3684.0',
712 '73.0.3683.4',
713 '72.0.3626.78',
714 '72.0.3626.77',
715 '73.0.3683.3',
716 '73.0.3683.2',
717 '72.0.3626.76',
718 '73.0.3683.1',
719 '73.0.3683.0',
720 '72.0.3626.75',
721 '71.0.3578.141',
722 '73.0.3682.1',
723 '73.0.3682.0',
724 '72.0.3626.74',
725 '71.0.3578.140',
726 '73.0.3681.4',
727 '73.0.3681.3',
728 '73.0.3681.2',
729 '73.0.3681.1',
730 '73.0.3681.0',
731 '72.0.3626.73',
732 '71.0.3578.139',
733 '72.0.3626.72',
734 '72.0.3626.71',
735 '73.0.3680.1',
736 '73.0.3680.0',
737 '72.0.3626.70',
738 '71.0.3578.138',
739 '73.0.3678.2',
740 '73.0.3679.1',
741 '73.0.3679.0',
742 '72.0.3626.69',
743 '71.0.3578.137',
744 '73.0.3678.1',
745 '73.0.3678.0',
746 '71.0.3578.136',
747 '73.0.3677.1',
748 '73.0.3677.0',
749 '72.0.3626.68',
750 '72.0.3626.67',
751 '71.0.3578.135',
752 '73.0.3676.1',
753 '73.0.3676.0',
754 '73.0.3674.2',
755 '72.0.3626.66',
756 '71.0.3578.134',
757 '73.0.3674.1',
758 '73.0.3674.0',
759 '72.0.3626.65',
760 '71.0.3578.133',
761 '73.0.3673.2',
762 '73.0.3673.1',
763 '73.0.3673.0',
764 '72.0.3626.64',
765 '71.0.3578.132',
766 '72.0.3626.63',
767 '72.0.3626.62',
768 '72.0.3626.61',
769 '72.0.3626.60',
770 '73.0.3672.1',
771 '73.0.3672.0',
772 '72.0.3626.59',
773 '71.0.3578.131',
774 '73.0.3671.3',
775 '73.0.3671.2',
776 '73.0.3671.1',
777 '73.0.3671.0',
778 '72.0.3626.58',
779 '71.0.3578.130',
780 '73.0.3670.1',
781 '73.0.3670.0',
782 '72.0.3626.57',
783 '71.0.3578.129',
784 '73.0.3669.1',
785 '73.0.3669.0',
786 '72.0.3626.56',
787 '71.0.3578.128',
788 '73.0.3668.2',
789 '73.0.3668.1',
790 '73.0.3668.0',
791 '72.0.3626.55',
792 '71.0.3578.127',
793 '73.0.3667.2',
794 '73.0.3667.1',
795 '73.0.3667.0',
796 '72.0.3626.54',
797 '71.0.3578.126',
798 '73.0.3666.1',
799 '73.0.3666.0',
800 '72.0.3626.53',
801 '71.0.3578.125',
802 '73.0.3665.4',
803 '73.0.3665.3',
804 '72.0.3626.52',
805 '73.0.3665.2',
806 '73.0.3664.4',
807 '73.0.3665.1',
808 '73.0.3665.0',
809 '72.0.3626.51',
810 '71.0.3578.124',
811 '72.0.3626.50',
812 '73.0.3664.3',
813 '73.0.3664.2',
814 '73.0.3664.1',
815 '73.0.3664.0',
816 '73.0.3663.2',
817 '72.0.3626.49',
818 '71.0.3578.123',
819 '73.0.3663.1',
820 '73.0.3663.0',
821 '72.0.3626.48',
822 '71.0.3578.122',
823 '73.0.3662.1',
824 '73.0.3662.0',
825 '72.0.3626.47',
826 '71.0.3578.121',
827 '73.0.3661.1',
828 '72.0.3626.46',
829 '73.0.3661.0',
830 '72.0.3626.45',
831 '71.0.3578.120',
832 '73.0.3660.2',
833 '73.0.3660.1',
834 '73.0.3660.0',
835 '72.0.3626.44',
836 '71.0.3578.119',
837 '73.0.3659.1',
838 '73.0.3659.0',
839 '72.0.3626.43',
840 '71.0.3578.118',
841 '73.0.3658.1',
842 '73.0.3658.0',
843 '72.0.3626.42',
844 '71.0.3578.117',
845 '73.0.3657.1',
846 '73.0.3657.0',
847 '72.0.3626.41',
848 '71.0.3578.116',
849 '73.0.3656.1',
850 '73.0.3656.0',
851 '72.0.3626.40',
852 '71.0.3578.115',
853 '73.0.3655.1',
854 '73.0.3655.0',
855 '72.0.3626.39',
856 '71.0.3578.114',
857 '73.0.3654.1',
858 '73.0.3654.0',
859 '72.0.3626.38',
860 '71.0.3578.113',
861 '73.0.3653.1',
862 '73.0.3653.0',
863 '72.0.3626.37',
864 '71.0.3578.112',
865 '73.0.3652.1',
866 '73.0.3652.0',
867 '72.0.3626.36',
868 '71.0.3578.111',
869 '73.0.3651.1',
870 '73.0.3651.0',
871 '72.0.3626.35',
872 '71.0.3578.110',
873 '73.0.3650.1',
874 '73.0.3650.0',
875 '72.0.3626.34',
876 '71.0.3578.109',
877 '73.0.3649.1',
878 '73.0.3649.0',
879 '72.0.3626.33',
880 '71.0.3578.108',
881 '73.0.3648.2',
882 '73.0.3648.1',
883 '73.0.3648.0',
884 '72.0.3626.32',
885 '71.0.3578.107',
886 '73.0.3647.2',
887 '73.0.3647.1',
888 '73.0.3647.0',
889 '72.0.3626.31',
890 '71.0.3578.106',
891 '73.0.3635.3',
892 '73.0.3646.2',
893 '73.0.3646.1',
894 '73.0.3646.0',
895 '72.0.3626.30',
896 '71.0.3578.105',
897 '72.0.3626.29',
898 '73.0.3645.2',
899 '73.0.3645.1',
900 '73.0.3645.0',
901 '72.0.3626.28',
902 '71.0.3578.104',
903 '72.0.3626.27',
904 '72.0.3626.26',
905 '72.0.3626.25',
906 '72.0.3626.24',
907 '73.0.3644.0',
908 '73.0.3643.2',
909 '72.0.3626.23',
910 '71.0.3578.103',
911 '73.0.3643.1',
912 '73.0.3643.0',
913 '72.0.3626.22',
914 '71.0.3578.102',
915 '73.0.3642.1',
916 '73.0.3642.0',
917 '72.0.3626.21',
918 '71.0.3578.101',
919 '73.0.3641.1',
920 '73.0.3641.0',
921 '72.0.3626.20',
922 '71.0.3578.100',
923 '72.0.3626.19',
924 '73.0.3640.1',
925 '73.0.3640.0',
926 '72.0.3626.18',
927 '73.0.3639.1',
928 '71.0.3578.99',
929 '73.0.3639.0',
930 '72.0.3626.17',
931 '73.0.3638.2',
932 '72.0.3626.16',
933 '73.0.3638.1',
934 '73.0.3638.0',
935 '72.0.3626.15',
936 '71.0.3578.98',
937 '73.0.3635.2',
938 '71.0.3578.97',
939 '73.0.3637.1',
940 '73.0.3637.0',
941 '72.0.3626.14',
942 '71.0.3578.96',
943 '71.0.3578.95',
944 '72.0.3626.13',
945 '71.0.3578.94',
946 '73.0.3636.2',
947 '71.0.3578.93',
948 '73.0.3636.1',
949 '73.0.3636.0',
950 '72.0.3626.12',
951 '71.0.3578.92',
952 '73.0.3635.1',
953 '73.0.3635.0',
954 '72.0.3626.11',
955 '71.0.3578.91',
956 '73.0.3634.2',
957 '73.0.3634.1',
958 '73.0.3634.0',
959 '72.0.3626.10',
960 '71.0.3578.90',
961 '71.0.3578.89',
962 '73.0.3633.2',
963 '73.0.3633.1',
964 '73.0.3633.0',
965 '72.0.3610.4',
966 '72.0.3626.9',
967 '71.0.3578.88',
968 '73.0.3632.5',
969 '73.0.3632.4',
970 '73.0.3632.3',
971 '73.0.3632.2',
972 '73.0.3632.1',
973 '73.0.3632.0',
974 '72.0.3626.8',
975 '71.0.3578.87',
976 '73.0.3631.2',
977 '73.0.3631.1',
978 '73.0.3631.0',
979 '72.0.3626.7',
980 '71.0.3578.86',
981 '72.0.3626.6',
982 '73.0.3630.1',
983 '73.0.3630.0',
984 '72.0.3626.5',
985 '71.0.3578.85',
986 '72.0.3626.4',
987 '73.0.3628.3',
988 '73.0.3628.2',
989 '73.0.3629.1',
990 '73.0.3629.0',
991 '72.0.3626.3',
992 '71.0.3578.84',
993 '73.0.3628.1',
994 '73.0.3628.0',
995 '71.0.3578.83',
996 '73.0.3627.1',
997 '73.0.3627.0',
998 '72.0.3626.2',
999 '71.0.3578.82',
1000 '71.0.3578.81',
1001 '71.0.3578.80',
1002 '72.0.3626.1',
1003 '72.0.3626.0',
1004 '71.0.3578.79',
1005 '70.0.3538.124',
1006 '71.0.3578.78',
1007 '72.0.3623.4',
1008 '72.0.3625.2',
1009 '72.0.3625.1',
1010 '72.0.3625.0',
1011 '71.0.3578.77',
1012 '70.0.3538.123',
1013 '72.0.3624.4',
1014 '72.0.3624.3',
1015 '72.0.3624.2',
1016 '71.0.3578.76',
1017 '72.0.3624.1',
1018 '72.0.3624.0',
1019 '72.0.3623.3',
1020 '71.0.3578.75',
1021 '70.0.3538.122',
1022 '71.0.3578.74',
1023 '72.0.3623.2',
1024 '72.0.3610.3',
1025 '72.0.3623.1',
1026 '72.0.3623.0',
1027 '72.0.3622.3',
1028 '72.0.3622.2',
1029 '71.0.3578.73',
1030 '70.0.3538.121',
1031 '72.0.3622.1',
1032 '72.0.3622.0',
1033 '71.0.3578.72',
1034 '70.0.3538.120',
1035 '72.0.3621.1',
1036 '72.0.3621.0',
1037 '71.0.3578.71',
1038 '70.0.3538.119',
1039 '72.0.3620.1',
1040 '72.0.3620.0',
1041 '71.0.3578.70',
1042 '70.0.3538.118',
1043 '71.0.3578.69',
1044 '72.0.3619.1',
1045 '72.0.3619.0',
1046 '71.0.3578.68',
1047 '70.0.3538.117',
1048 '71.0.3578.67',
1049 '72.0.3618.1',
1050 '72.0.3618.0',
1051 '71.0.3578.66',
1052 '70.0.3538.116',
1053 '72.0.3617.1',
1054 '72.0.3617.0',
1055 '71.0.3578.65',
1056 '70.0.3538.115',
1057 '72.0.3602.3',
1058 '71.0.3578.64',
1059 '72.0.3616.1',
1060 '72.0.3616.0',
1061 '71.0.3578.63',
1062 '70.0.3538.114',
1063 '71.0.3578.62',
1064 '72.0.3615.1',
1065 '72.0.3615.0',
1066 '71.0.3578.61',
1067 '70.0.3538.113',
1068 '72.0.3614.1',
1069 '72.0.3614.0',
1070 '71.0.3578.60',
1071 '70.0.3538.112',
1072 '72.0.3613.1',
1073 '72.0.3613.0',
1074 '71.0.3578.59',
1075 '70.0.3538.111',
1076 '72.0.3612.2',
1077 '72.0.3612.1',
1078 '72.0.3612.0',
1079 '70.0.3538.110',
1080 '71.0.3578.58',
1081 '70.0.3538.109',
1082 '72.0.3611.2',
1083 '72.0.3611.1',
1084 '72.0.3611.0',
1085 '71.0.3578.57',
1086 '70.0.3538.108',
1087 '72.0.3610.2',
1088 '71.0.3578.56',
1089 '71.0.3578.55',
1090 '72.0.3610.1',
1091 '72.0.3610.0',
1092 '71.0.3578.54',
1093 '70.0.3538.107',
1094 '71.0.3578.53',
1095 '72.0.3609.3',
1096 '71.0.3578.52',
1097 '72.0.3609.2',
1098 '71.0.3578.51',
1099 '72.0.3608.5',
1100 '72.0.3609.1',
1101 '72.0.3609.0',
1102 '71.0.3578.50',
1103 '70.0.3538.106',
1104 '72.0.3608.4',
1105 '72.0.3608.3',
1106 '72.0.3608.2',
1107 '71.0.3578.49',
1108 '72.0.3608.1',
1109 '72.0.3608.0',
1110 '70.0.3538.105',
1111 '71.0.3578.48',
1112 '72.0.3607.1',
1113 '72.0.3607.0',
1114 '71.0.3578.47',
1115 '70.0.3538.104',
1116 '72.0.3606.2',
1117 '72.0.3606.1',
1118 '72.0.3606.0',
1119 '71.0.3578.46',
1120 '70.0.3538.103',
1121 '70.0.3538.102',
1122 '72.0.3605.3',
1123 '72.0.3605.2',
1124 '72.0.3605.1',
1125 '72.0.3605.0',
1126 '71.0.3578.45',
1127 '70.0.3538.101',
1128 '71.0.3578.44',
1129 '71.0.3578.43',
1130 '70.0.3538.100',
1131 '70.0.3538.99',
1132 '71.0.3578.42',
1133 '72.0.3604.1',
1134 '72.0.3604.0',
1135 '71.0.3578.41',
1136 '70.0.3538.98',
1137 '71.0.3578.40',
1138 '72.0.3603.2',
1139 '72.0.3603.1',
1140 '72.0.3603.0',
1141 '71.0.3578.39',
1142 '70.0.3538.97',
1143 '72.0.3602.2',
1144 '71.0.3578.38',
1145 '71.0.3578.37',
1146 '72.0.3602.1',
1147 '72.0.3602.0',
1148 '71.0.3578.36',
1149 '70.0.3538.96',
1150 '72.0.3601.1',
1151 '72.0.3601.0',
1152 '71.0.3578.35',
1153 '70.0.3538.95',
1154 '72.0.3600.1',
1155 '72.0.3600.0',
1156 '71.0.3578.34',
1157 '70.0.3538.94',
1158 '72.0.3599.3',
1159 '72.0.3599.2',
1160 '72.0.3599.1',
1161 '72.0.3599.0',
1162 '71.0.3578.33',
1163 '70.0.3538.93',
1164 '72.0.3598.1',
1165 '72.0.3598.0',
1166 '71.0.3578.32',
1167 '70.0.3538.87',
1168 '72.0.3597.1',
1169 '72.0.3597.0',
1170 '72.0.3596.2',
1171 '71.0.3578.31',
1172 '70.0.3538.86',
1173 '71.0.3578.30',
1174 '71.0.3578.29',
1175 '72.0.3596.1',
1176 '72.0.3596.0',
1177 '71.0.3578.28',
1178 '70.0.3538.85',
1179 '72.0.3595.2',
1180 '72.0.3591.3',
1181 '72.0.3595.1',
1182 '72.0.3595.0',
1183 '71.0.3578.27',
1184 '70.0.3538.84',
1185 '72.0.3594.1',
1186 '72.0.3594.0',
1187 '71.0.3578.26',
1188 '70.0.3538.83',
1189 '72.0.3593.2',
1190 '72.0.3593.1',
1191 '72.0.3593.0',
1192 '71.0.3578.25',
1193 '70.0.3538.82',
1194 '72.0.3589.3',
1195 '72.0.3592.2',
1196 '72.0.3592.1',
1197 '72.0.3592.0',
1198 '71.0.3578.24',
1199 '72.0.3589.2',
1200 '70.0.3538.81',
1201 '70.0.3538.80',
1202 '72.0.3591.2',
1203 '72.0.3591.1',
1204 '72.0.3591.0',
1205 '71.0.3578.23',
1206 '70.0.3538.79',
1207 '71.0.3578.22',
1208 '72.0.3590.1',
1209 '72.0.3590.0',
1210 '71.0.3578.21',
1211 '70.0.3538.78',
1212 '70.0.3538.77',
1213 '72.0.3589.1',
1214 '72.0.3589.0',
1215 '71.0.3578.20',
1216 '70.0.3538.76',
1217 '71.0.3578.19',
1218 '70.0.3538.75',
1219 '72.0.3588.1',
1220 '72.0.3588.0',
1221 '71.0.3578.18',
1222 '70.0.3538.74',
1223 '72.0.3586.2',
1224 '72.0.3587.0',
1225 '71.0.3578.17',
1226 '70.0.3538.73',
1227 '72.0.3586.1',
1228 '72.0.3586.0',
1229 '71.0.3578.16',
1230 '70.0.3538.72',
1231 '72.0.3585.1',
1232 '72.0.3585.0',
1233 '71.0.3578.15',
1234 '70.0.3538.71',
1235 '71.0.3578.14',
1236 '72.0.3584.1',
1237 '72.0.3584.0',
1238 '71.0.3578.13',
1239 '70.0.3538.70',
1240 '72.0.3583.2',
1241 '71.0.3578.12',
1242 '72.0.3583.1',
1243 '72.0.3583.0',
1244 '71.0.3578.11',
1245 '70.0.3538.69',
1246 '71.0.3578.10',
1247 '72.0.3582.0',
1248 '72.0.3581.4',
1249 '71.0.3578.9',
1250 '70.0.3538.67',
1251 '72.0.3581.3',
1252 '72.0.3581.2',
1253 '72.0.3581.1',
1254 '72.0.3581.0',
1255 '71.0.3578.8',
1256 '70.0.3538.66',
1257 '72.0.3580.1',
1258 '72.0.3580.0',
1259 '71.0.3578.7',
1260 '70.0.3538.65',
1261 '71.0.3578.6',
1262 '72.0.3579.1',
1263 '72.0.3579.0',
1264 '71.0.3578.5',
1265 '70.0.3538.64',
1266 '71.0.3578.4',
1267 '71.0.3578.3',
1268 '71.0.3578.2',
1269 '71.0.3578.1',
1270 '71.0.3578.0',
1271 '70.0.3538.63',
1272 '69.0.3497.128',
1273 '70.0.3538.62',
1274 '70.0.3538.61',
1275 '70.0.3538.60',
1276 '70.0.3538.59',
1277 '71.0.3577.1',
1278 '71.0.3577.0',
1279 '70.0.3538.58',
1280 '69.0.3497.127',
1281 '71.0.3576.2',
1282 '71.0.3576.1',
1283 '71.0.3576.0',
1284 '70.0.3538.57',
1285 '70.0.3538.56',
1286 '71.0.3575.2',
1287 '70.0.3538.55',
1288 '69.0.3497.126',
1289 '70.0.3538.54',
1290 '71.0.3575.1',
1291 '71.0.3575.0',
1292 '71.0.3574.1',
1293 '71.0.3574.0',
1294 '70.0.3538.53',
1295 '69.0.3497.125',
1296 '70.0.3538.52',
1297 '71.0.3573.1',
1298 '71.0.3573.0',
1299 '70.0.3538.51',
1300 '69.0.3497.124',
1301 '71.0.3572.1',
1302 '71.0.3572.0',
1303 '70.0.3538.50',
1304 '69.0.3497.123',
1305 '71.0.3571.2',
1306 '70.0.3538.49',
1307 '69.0.3497.122',
1308 '71.0.3571.1',
1309 '71.0.3571.0',
1310 '70.0.3538.48',
1311 '69.0.3497.121',
1312 '71.0.3570.1',
1313 '71.0.3570.0',
1314 '70.0.3538.47',
1315 '69.0.3497.120',
1316 '71.0.3568.2',
1317 '71.0.3569.1',
1318 '71.0.3569.0',
1319 '70.0.3538.46',
1320 '69.0.3497.119',
1321 '70.0.3538.45',
1322 '71.0.3568.1',
1323 '71.0.3568.0',
1324 '70.0.3538.44',
1325 '69.0.3497.118',
1326 '70.0.3538.43',
1327 '70.0.3538.42',
1328 '71.0.3567.1',
1329 '71.0.3567.0',
1330 '70.0.3538.41',
1331 '69.0.3497.117',
1332 '71.0.3566.1',
1333 '71.0.3566.0',
1334 '70.0.3538.40',
1335 '69.0.3497.116',
1336 '71.0.3565.1',
1337 '71.0.3565.0',
1338 '70.0.3538.39',
1339 '69.0.3497.115',
1340 '71.0.3564.1',
1341 '71.0.3564.0',
1342 '70.0.3538.38',
1343 '69.0.3497.114',
1344 '71.0.3563.0',
1345 '71.0.3562.2',
1346 '70.0.3538.37',
1347 '69.0.3497.113',
1348 '70.0.3538.36',
1349 '70.0.3538.35',
1350 '71.0.3562.1',
1351 '71.0.3562.0',
1352 '70.0.3538.34',
1353 '69.0.3497.112',
1354 '70.0.3538.33',
1355 '71.0.3561.1',
1356 '71.0.3561.0',
1357 '70.0.3538.32',
1358 '69.0.3497.111',
1359 '71.0.3559.6',
1360 '71.0.3560.1',
1361 '71.0.3560.0',
1362 '71.0.3559.5',
1363 '71.0.3559.4',
1364 '70.0.3538.31',
1365 '69.0.3497.110',
1366 '71.0.3559.3',
1367 '70.0.3538.30',
1368 '69.0.3497.109',
1369 '71.0.3559.2',
1370 '71.0.3559.1',
1371 '71.0.3559.0',
1372 '70.0.3538.29',
1373 '69.0.3497.108',
1374 '71.0.3558.2',
1375 '71.0.3558.1',
1376 '71.0.3558.0',
1377 '70.0.3538.28',
1378 '69.0.3497.107',
1379 '71.0.3557.2',
1380 '71.0.3557.1',
1381 '71.0.3557.0',
1382 '70.0.3538.27',
1383 '69.0.3497.106',
1384 '71.0.3554.4',
1385 '70.0.3538.26',
1386 '71.0.3556.1',
1387 '71.0.3556.0',
1388 '70.0.3538.25',
1389 '71.0.3554.3',
1390 '69.0.3497.105',
1391 '71.0.3554.2',
1392 '70.0.3538.24',
1393 '69.0.3497.104',
1394 '71.0.3555.2',
1395 '70.0.3538.23',
1396 '71.0.3555.1',
1397 '71.0.3555.0',
1398 '70.0.3538.22',
1399 '69.0.3497.103',
1400 '71.0.3554.1',
1401 '71.0.3554.0',
1402 '70.0.3538.21',
1403 '69.0.3497.102',
1404 '71.0.3553.3',
1405 '70.0.3538.20',
1406 '69.0.3497.101',
1407 '71.0.3553.2',
1408 '69.0.3497.100',
1409 '71.0.3553.1',
1410 '71.0.3553.0',
1411 '70.0.3538.19',
1412 '69.0.3497.99',
1413 '69.0.3497.98',
1414 '69.0.3497.97',
1415 '71.0.3552.6',
1416 '71.0.3552.5',
1417 '71.0.3552.4',
1418 '71.0.3552.3',
1419 '71.0.3552.2',
1420 '71.0.3552.1',
1421 '71.0.3552.0',
1422 '70.0.3538.18',
1423 '69.0.3497.96',
1424 '71.0.3551.3',
1425 '71.0.3551.2',
1426 '71.0.3551.1',
1427 '71.0.3551.0',
1428 '70.0.3538.17',
1429 '69.0.3497.95',
1430 '71.0.3550.3',
1431 '71.0.3550.2',
1432 '71.0.3550.1',
1433 '71.0.3550.0',
1434 '70.0.3538.16',
1435 '69.0.3497.94',
1436 '71.0.3549.1',
1437 '71.0.3549.0',
1438 '70.0.3538.15',
1439 '69.0.3497.93',
1440 '69.0.3497.92',
1441 '71.0.3548.1',
1442 '71.0.3548.0',
1443 '70.0.3538.14',
1444 '69.0.3497.91',
1445 '71.0.3547.1',
1446 '71.0.3547.0',
1447 '70.0.3538.13',
1448 '69.0.3497.90',
1449 '71.0.3546.2',
1450 '69.0.3497.89',
1451 '71.0.3546.1',
1452 '71.0.3546.0',
1453 '70.0.3538.12',
1454 '69.0.3497.88',
1455 '71.0.3545.4',
1456 '71.0.3545.3',
1457 '71.0.3545.2',
1458 '71.0.3545.1',
1459 '71.0.3545.0',
1460 '70.0.3538.11',
1461 '69.0.3497.87',
1462 '71.0.3544.5',
1463 '71.0.3544.4',
1464 '71.0.3544.3',
1465 '71.0.3544.2',
1466 '71.0.3544.1',
1467 '71.0.3544.0',
1468 '69.0.3497.86',
1469 '70.0.3538.10',
1470 '69.0.3497.85',
1471 '70.0.3538.9',
1472 '69.0.3497.84',
1473 '71.0.3543.4',
1474 '70.0.3538.8',
1475 '71.0.3543.3',
1476 '71.0.3543.2',
1477 '71.0.3543.1',
1478 '71.0.3543.0',
1479 '70.0.3538.7',
1480 '69.0.3497.83',
1481 '71.0.3542.2',
1482 '71.0.3542.1',
1483 '71.0.3542.0',
1484 '70.0.3538.6',
1485 '69.0.3497.82',
1486 '69.0.3497.81',
1487 '71.0.3541.1',
1488 '71.0.3541.0',
1489 '70.0.3538.5',
1490 '69.0.3497.80',
1491 '71.0.3540.1',
1492 '71.0.3540.0',
1493 '70.0.3538.4',
1494 '69.0.3497.79',
1495 '70.0.3538.3',
1496 '71.0.3539.1',
1497 '71.0.3539.0',
1498 '69.0.3497.78',
1499 '68.0.3440.134',
1500 '69.0.3497.77',
1501 '70.0.3538.2',
1502 '70.0.3538.1',
1503 '70.0.3538.0',
1504 '69.0.3497.76',
1505 '68.0.3440.133',
1506 '69.0.3497.75',
1507 '70.0.3537.2',
1508 '70.0.3537.1',
1509 '70.0.3537.0',
1510 '69.0.3497.74',
1511 '68.0.3440.132',
1512 '70.0.3536.0',
1513 '70.0.3535.5',
1514 '70.0.3535.4',
1515 '70.0.3535.3',
1516 '69.0.3497.73',
1517 '68.0.3440.131',
1518 '70.0.3532.8',
1519 '70.0.3532.7',
1520 '69.0.3497.72',
1521 '69.0.3497.71',
1522 '70.0.3535.2',
1523 '70.0.3535.1',
1524 '70.0.3535.0',
1525 '69.0.3497.70',
1526 '68.0.3440.130',
1527 '69.0.3497.69',
1528 '68.0.3440.129',
1529 '70.0.3534.4',
1530 '70.0.3534.3',
1531 '70.0.3534.2',
1532 '70.0.3534.1',
1533 '70.0.3534.0',
1534 '69.0.3497.68',
1535 '68.0.3440.128',
1536 '70.0.3533.2',
1537 '70.0.3533.1',
1538 '70.0.3533.0',
1539 '69.0.3497.67',
1540 '68.0.3440.127',
1541 '70.0.3532.6',
1542 '70.0.3532.5',
1543 '70.0.3532.4',
1544 '69.0.3497.66',
1545 '68.0.3440.126',
1546 '70.0.3532.3',
1547 '70.0.3532.2',
1548 '70.0.3532.1',
1549 '69.0.3497.60',
1550 '69.0.3497.65',
1551 '69.0.3497.64',
1552 '70.0.3532.0',
1553 '70.0.3531.0',
1554 '70.0.3530.4',
1555 '70.0.3530.3',
1556 '70.0.3530.2',
1557 '69.0.3497.58',
1558 '68.0.3440.125',
1559 '69.0.3497.57',
1560 '69.0.3497.56',
1561 '69.0.3497.55',
1562 '69.0.3497.54',
1563 '70.0.3530.1',
1564 '70.0.3530.0',
1565 '69.0.3497.53',
1566 '68.0.3440.124',
1567 '69.0.3497.52',
1568 '70.0.3529.3',
1569 '70.0.3529.2',
1570 '70.0.3529.1',
1571 '70.0.3529.0',
1572 '69.0.3497.51',
1573 '70.0.3528.4',
1574 '68.0.3440.123',
1575 '70.0.3528.3',
1576 '70.0.3528.2',
1577 '70.0.3528.1',
1578 '70.0.3528.0',
1579 '69.0.3497.50',
1580 '68.0.3440.122',
1581 '70.0.3527.1',
1582 '70.0.3527.0',
1583 '69.0.3497.49',
1584 '68.0.3440.121',
1585 '70.0.3526.1',
1586 '70.0.3526.0',
1587 '68.0.3440.120',
1588 '69.0.3497.48',
1589 '69.0.3497.47',
1590 '68.0.3440.119',
1591 '68.0.3440.118',
1592 '70.0.3525.5',
1593 '70.0.3525.4',
1594 '70.0.3525.3',
1595 '68.0.3440.117',
1596 '69.0.3497.46',
1597 '70.0.3525.2',
1598 '70.0.3525.1',
1599 '70.0.3525.0',
1600 '69.0.3497.45',
1601 '68.0.3440.116',
1602 '70.0.3524.4',
1603 '70.0.3524.3',
1604 '69.0.3497.44',
1605 '70.0.3524.2',
1606 '70.0.3524.1',
1607 '70.0.3524.0',
1608 '70.0.3523.2',
1609 '69.0.3497.43',
1610 '68.0.3440.115',
1611 '70.0.3505.9',
1612 '69.0.3497.42',
1613 '70.0.3505.8',
1614 '70.0.3523.1',
1615 '70.0.3523.0',
1616 '69.0.3497.41',
1617 '68.0.3440.114',
1618 '70.0.3505.7',
1619 '69.0.3497.40',
1620 '70.0.3522.1',
1621 '70.0.3522.0',
1622 '70.0.3521.2',
1623 '69.0.3497.39',
1624 '68.0.3440.113',
1625 '70.0.3505.6',
1626 '70.0.3521.1',
1627 '70.0.3521.0',
1628 '69.0.3497.38',
1629 '68.0.3440.112',
1630 '70.0.3520.1',
1631 '70.0.3520.0',
1632 '69.0.3497.37',
1633 '68.0.3440.111',
1634 '70.0.3519.3',
1635 '70.0.3519.2',
1636 '70.0.3519.1',
1637 '70.0.3519.0',
1638 '69.0.3497.36',
1639 '68.0.3440.110',
1640 '70.0.3518.1',
1641 '70.0.3518.0',
1642 '69.0.3497.35',
1643 '69.0.3497.34',
1644 '68.0.3440.109',
1645 '70.0.3517.1',
1646 '70.0.3517.0',
1647 '69.0.3497.33',
1648 '68.0.3440.108',
1649 '69.0.3497.32',
1650 '70.0.3516.3',
1651 '70.0.3516.2',
1652 '70.0.3516.1',
1653 '70.0.3516.0',
1654 '69.0.3497.31',
1655 '68.0.3440.107',
1656 '70.0.3515.4',
1657 '68.0.3440.106',
1658 '70.0.3515.3',
1659 '70.0.3515.2',
1660 '70.0.3515.1',
1661 '70.0.3515.0',
1662 '69.0.3497.30',
1663 '68.0.3440.105',
1664 '68.0.3440.104',
1665 '70.0.3514.2',
1666 '70.0.3514.1',
1667 '70.0.3514.0',
1668 '69.0.3497.29',
1669 '68.0.3440.103',
1670 '70.0.3513.1',
1671 '70.0.3513.0',
1672 '69.0.3497.28',
1673 )
1674 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1675
1676
1677 std_headers = {
1678 'User-Agent': random_user_agent(),
1679 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1680 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1681 'Accept-Encoding': 'gzip, deflate',
1682 'Accept-Language': 'en-us,en;q=0.5',
1683 }
1684
1685
1686 USER_AGENTS = {
1687 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1688 }
1689
1690
1691 NO_DEFAULT = object()
1692
1693 ENGLISH_MONTH_NAMES = [
1694 'January', 'February', 'March', 'April', 'May', 'June',
1695 'July', 'August', 'September', 'October', 'November', 'December']
1696
1697 MONTH_NAMES = {
1698 'en': ENGLISH_MONTH_NAMES,
1699 'fr': [
1700 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1701 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1702 }
1703
1704 KNOWN_EXTENSIONS = (
1705 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1706 'flv', 'f4v', 'f4a', 'f4b',
1707 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1708 'mkv', 'mka', 'mk3d',
1709 'avi', 'divx',
1710 'mov',
1711 'asf', 'wmv', 'wma',
1712 '3gp', '3g2',
1713 'mp3',
1714 'flac',
1715 'ape',
1716 'wav',
1717 'f4f', 'f4m', 'm3u8', 'smil')
1718
1719 REMUX_EXTENSIONS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mp3', 'mka', 'm4a', 'ogg', 'opus')
1720
1721 # needed for sanitizing filenames in restricted mode
1722 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1723 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1724 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1725
1726 DATE_FORMATS = (
1727 '%d %B %Y',
1728 '%d %b %Y',
1729 '%B %d %Y',
1730 '%B %dst %Y',
1731 '%B %dnd %Y',
1732 '%B %drd %Y',
1733 '%B %dth %Y',
1734 '%b %d %Y',
1735 '%b %dst %Y',
1736 '%b %dnd %Y',
1737 '%b %drd %Y',
1738 '%b %dth %Y',
1739 '%b %dst %Y %I:%M',
1740 '%b %dnd %Y %I:%M',
1741 '%b %drd %Y %I:%M',
1742 '%b %dth %Y %I:%M',
1743 '%Y %m %d',
1744 '%Y-%m-%d',
1745 '%Y/%m/%d',
1746 '%Y/%m/%d %H:%M',
1747 '%Y/%m/%d %H:%M:%S',
1748 '%Y-%m-%d %H:%M',
1749 '%Y-%m-%d %H:%M:%S',
1750 '%Y-%m-%d %H:%M:%S.%f',
1751 '%d.%m.%Y %H:%M',
1752 '%d.%m.%Y %H.%M',
1753 '%Y-%m-%dT%H:%M:%SZ',
1754 '%Y-%m-%dT%H:%M:%S.%fZ',
1755 '%Y-%m-%dT%H:%M:%S.%f0Z',
1756 '%Y-%m-%dT%H:%M:%S',
1757 '%Y-%m-%dT%H:%M:%S.%f',
1758 '%Y-%m-%dT%H:%M',
1759 '%b %d %Y at %H:%M',
1760 '%b %d %Y at %H:%M:%S',
1761 '%B %d %Y at %H:%M',
1762 '%B %d %Y at %H:%M:%S',
1763 )
1764
1765 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1766 DATE_FORMATS_DAY_FIRST.extend([
1767 '%d-%m-%Y',
1768 '%d.%m.%Y',
1769 '%d.%m.%y',
1770 '%d/%m/%Y',
1771 '%d/%m/%y',
1772 '%d/%m/%Y %H:%M:%S',
1773 ])
1774
1775 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1776 DATE_FORMATS_MONTH_FIRST.extend([
1777 '%m-%d-%Y',
1778 '%m.%d.%Y',
1779 '%m/%d/%Y',
1780 '%m/%d/%y',
1781 '%m/%d/%Y %H:%M:%S',
1782 ])
1783
1784 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1785 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1786
1787
1788 def preferredencoding():
1789 """Get preferred encoding.
1790
1791 Returns the best encoding scheme for the system, based on
1792 locale.getpreferredencoding() and some further tweaks.
1793 """
1794 try:
1795 pref = locale.getpreferredencoding()
1796 'TEST'.encode(pref)
1797 except Exception:
1798 pref = 'UTF-8'
1799
1800 return pref
1801
1802
1803 def write_json_file(obj, fn):
1804 """ Encode obj as JSON and write it to fn, atomically if possible """
1805
1806 fn = encodeFilename(fn)
1807 if sys.version_info < (3, 0) and sys.platform != 'win32':
1808 encoding = get_filesystem_encoding()
1809 # os.path.basename returns a bytes object, but NamedTemporaryFile
1810 # will fail if the filename contains non ascii characters unless we
1811 # use a unicode object
1812 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1813 # the same for os.path.dirname
1814 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1815 else:
1816 path_basename = os.path.basename
1817 path_dirname = os.path.dirname
1818
1819 args = {
1820 'suffix': '.tmp',
1821 'prefix': path_basename(fn) + '.',
1822 'dir': path_dirname(fn),
1823 'delete': False,
1824 }
1825
1826 # In Python 2.x, json.dump expects a bytestream.
1827 # In Python 3.x, it writes to a character stream
1828 if sys.version_info < (3, 0):
1829 args['mode'] = 'wb'
1830 else:
1831 args.update({
1832 'mode': 'w',
1833 'encoding': 'utf-8',
1834 })
1835
1836 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1837
1838 try:
1839 with tf:
1840 json.dump(obj, tf, default=repr)
1841 if sys.platform == 'win32':
1842 # Need to remove existing file on Windows, else os.rename raises
1843 # WindowsError or FileExistsError.
1844 try:
1845 os.unlink(fn)
1846 except OSError:
1847 pass
1848 try:
1849 mask = os.umask(0)
1850 os.umask(mask)
1851 os.chmod(tf.name, 0o666 & ~mask)
1852 except OSError:
1853 pass
1854 os.rename(tf.name, fn)
1855 except Exception:
1856 try:
1857 os.remove(tf.name)
1858 except OSError:
1859 pass
1860 raise
1861
1862
1863 if sys.version_info >= (2, 7):
1864 def find_xpath_attr(node, xpath, key, val=None):
1865 """ Find the xpath xpath[@key=val] """
1866 assert re.match(r'^[a-zA-Z_-]+$', key)
1867 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1868 return node.find(expr)
1869 else:
1870 def find_xpath_attr(node, xpath, key, val=None):
1871 for f in node.findall(compat_xpath(xpath)):
1872 if key not in f.attrib:
1873 continue
1874 if val is None or f.attrib.get(key) == val:
1875 return f
1876 return None
1877
1878 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1879 # the namespace parameter
1880
1881
1882 def xpath_with_ns(path, ns_map):
1883 components = [c.split(':') for c in path.split('/')]
1884 replaced = []
1885 for c in components:
1886 if len(c) == 1:
1887 replaced.append(c[0])
1888 else:
1889 ns, tag = c
1890 replaced.append('{%s}%s' % (ns_map[ns], tag))
1891 return '/'.join(replaced)
1892
1893
1894 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1895 def _find_xpath(xpath):
1896 return node.find(compat_xpath(xpath))
1897
1898 if isinstance(xpath, (str, compat_str)):
1899 n = _find_xpath(xpath)
1900 else:
1901 for xp in xpath:
1902 n = _find_xpath(xp)
1903 if n is not None:
1904 break
1905
1906 if n is None:
1907 if default is not NO_DEFAULT:
1908 return default
1909 elif fatal:
1910 name = xpath if name is None else name
1911 raise ExtractorError('Could not find XML element %s' % name)
1912 else:
1913 return None
1914 return n
1915
1916
1917 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1918 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1919 if n is None or n == default:
1920 return n
1921 if n.text is None:
1922 if default is not NO_DEFAULT:
1923 return default
1924 elif fatal:
1925 name = xpath if name is None else name
1926 raise ExtractorError('Could not find XML element\'s text %s' % name)
1927 else:
1928 return None
1929 return n.text
1930
1931
1932 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1933 n = find_xpath_attr(node, xpath, key)
1934 if n is None:
1935 if default is not NO_DEFAULT:
1936 return default
1937 elif fatal:
1938 name = '%s[@%s]' % (xpath, key) if name is None else name
1939 raise ExtractorError('Could not find XML attribute %s' % name)
1940 else:
1941 return None
1942 return n.attrib[key]
1943
1944
1945 def get_element_by_id(id, html):
1946 """Return the content of the tag with the specified ID in the passed HTML document"""
1947 return get_element_by_attribute('id', id, html)
1948
1949
1950 def get_element_by_class(class_name, html):
1951 """Return the content of the first tag with the specified class in the passed HTML document"""
1952 retval = get_elements_by_class(class_name, html)
1953 return retval[0] if retval else None
1954
1955
1956 def get_element_by_attribute(attribute, value, html, escape_value=True):
1957 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1958 return retval[0] if retval else None
1959
1960
1961 def get_elements_by_class(class_name, html):
1962 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1963 return get_elements_by_attribute(
1964 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1965 html, escape_value=False)
1966
1967
1968 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1969 """Return the content of the tag with the specified attribute in the passed HTML document"""
1970
1971 value = re.escape(value) if escape_value else value
1972
1973 retlist = []
1974 for m in re.finditer(r'''(?xs)
1975 <([a-zA-Z0-9:._-]+)
1976 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1977 \s+%s=['"]?%s['"]?
1978 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1979 \s*>
1980 (?P<content>.*?)
1981 </\1>
1982 ''' % (re.escape(attribute), value), html):
1983 res = m.group('content')
1984
1985 if res.startswith('"') or res.startswith("'"):
1986 res = res[1:-1]
1987
1988 retlist.append(unescapeHTML(res))
1989
1990 return retlist
1991
1992
1993 class HTMLAttributeParser(compat_HTMLParser):
1994 """Trivial HTML parser to gather the attributes for a single element"""
1995
1996 def __init__(self):
1997 self.attrs = {}
1998 compat_HTMLParser.__init__(self)
1999
2000 def handle_starttag(self, tag, attrs):
2001 self.attrs = dict(attrs)
2002
2003
2004 def extract_attributes(html_element):
2005 """Given a string for an HTML element such as
2006 <el
2007 a="foo" B="bar" c="&98;az" d=boz
2008 empty= noval entity="&amp;"
2009 sq='"' dq="'"
2010 >
2011 Decode and return a dictionary of attributes.
2012 {
2013 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2014 'empty': '', 'noval': None, 'entity': '&',
2015 'sq': '"', 'dq': '\''
2016 }.
2017 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2018 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2019 """
2020 parser = HTMLAttributeParser()
2021 try:
2022 parser.feed(html_element)
2023 parser.close()
2024 # Older Python may throw HTMLParseError in case of malformed HTML
2025 except compat_HTMLParseError:
2026 pass
2027 return parser.attrs
2028
2029
2030 def clean_html(html):
2031 """Clean an HTML snippet into a readable string"""
2032
2033 if html is None: # Convenience for sanitizing descriptions etc.
2034 return html
2035
2036 # Newline vs <br />
2037 html = html.replace('\n', ' ')
2038 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2039 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2040 # Strip html tags
2041 html = re.sub('<.*?>', '', html)
2042 # Replace html entities
2043 html = unescapeHTML(html)
2044 return html.strip()
2045
2046
2047 def sanitize_open(filename, open_mode):
2048 """Try to open the given filename, and slightly tweak it if this fails.
2049
2050 Attempts to open the given filename. If this fails, it tries to change
2051 the filename slightly, step by step, until it's either able to open it
2052 or it fails and raises a final exception, like the standard open()
2053 function.
2054
2055 It returns the tuple (stream, definitive_file_name).
2056 """
2057 try:
2058 if filename == '-':
2059 if sys.platform == 'win32':
2060 import msvcrt
2061 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2062 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2063 stream = open(encodeFilename(filename), open_mode)
2064 return (stream, filename)
2065 except (IOError, OSError) as err:
2066 if err.errno in (errno.EACCES,):
2067 raise
2068
2069 # In case of error, try to remove win32 forbidden chars
2070 alt_filename = sanitize_path(filename)
2071 if alt_filename == filename:
2072 raise
2073 else:
2074 # An exception here should be caught in the caller
2075 stream = open(encodeFilename(alt_filename), open_mode)
2076 return (stream, alt_filename)
2077
2078
2079 def timeconvert(timestr):
2080 """Convert RFC 2822 defined time string into system timestamp"""
2081 timestamp = None
2082 timetuple = email.utils.parsedate_tz(timestr)
2083 if timetuple is not None:
2084 timestamp = email.utils.mktime_tz(timetuple)
2085 return timestamp
2086
2087
2088 def sanitize_filename(s, restricted=False, is_id=False):
2089 """Sanitizes a string so it could be used as part of a filename.
2090 If restricted is set, use a stricter subset of allowed characters.
2091 Set is_id if this is not an arbitrary string, but an ID that should be kept
2092 if possible.
2093 """
2094 def replace_insane(char):
2095 if restricted and char in ACCENT_CHARS:
2096 return ACCENT_CHARS[char]
2097 if char == '?' or ord(char) < 32 or ord(char) == 127:
2098 return ''
2099 elif char == '"':
2100 return '' if restricted else '\''
2101 elif char == ':':
2102 return '_-' if restricted else ' -'
2103 elif char in '\\/|*<>':
2104 return '_'
2105 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2106 return '_'
2107 if restricted and ord(char) > 127:
2108 return '_'
2109 return char
2110
2111 # Handle timestamps
2112 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2113 result = ''.join(map(replace_insane, s))
2114 if not is_id:
2115 while '__' in result:
2116 result = result.replace('__', '_')
2117 result = result.strip('_')
2118 # Common case of "Foreign band name - English song title"
2119 if restricted and result.startswith('-_'):
2120 result = result[2:]
2121 if result.startswith('-'):
2122 result = '_' + result[len('-'):]
2123 result = result.lstrip('.')
2124 if not result:
2125 result = '_'
2126 return result
2127
2128
2129 def sanitize_path(s, force=False):
2130 """Sanitizes and normalizes path on Windows"""
2131 if sys.platform == 'win32':
2132 force = False
2133 drive_or_unc, _ = os.path.splitdrive(s)
2134 if sys.version_info < (2, 7) and not drive_or_unc:
2135 drive_or_unc, _ = os.path.splitunc(s)
2136 elif force:
2137 drive_or_unc = ''
2138 else:
2139 return s
2140
2141 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2142 if drive_or_unc:
2143 norm_path.pop(0)
2144 sanitized_path = [
2145 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2146 for path_part in norm_path]
2147 if drive_or_unc:
2148 sanitized_path.insert(0, drive_or_unc + os.path.sep)
2149 elif force and s[0] == os.path.sep:
2150 sanitized_path.insert(0, os.path.sep)
2151 return os.path.join(*sanitized_path)
2152
2153
2154 def sanitize_url(url):
2155 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2156 # the number of unwanted failures due to missing protocol
2157 if url.startswith('//'):
2158 return 'http:%s' % url
2159 # Fix some common typos seen so far
2160 COMMON_TYPOS = (
2161 # https://github.com/ytdl-org/youtube-dl/issues/15649
2162 (r'^httpss://', r'https://'),
2163 # https://bx1.be/lives/direct-tv/
2164 (r'^rmtp([es]?)://', r'rtmp\1://'),
2165 )
2166 for mistake, fixup in COMMON_TYPOS:
2167 if re.match(mistake, url):
2168 return re.sub(mistake, fixup, url)
2169 return escape_url(url)
2170
2171
2172 def sanitized_Request(url, *args, **kwargs):
2173 return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2174
2175
2176 def expand_path(s):
2177 """Expand shell variables and ~"""
2178 return os.path.expandvars(compat_expanduser(s))
2179
2180
2181 def orderedSet(iterable):
2182 """ Remove all duplicates from the input iterable """
2183 res = []
2184 for el in iterable:
2185 if el not in res:
2186 res.append(el)
2187 return res
2188
2189
2190 def _htmlentity_transform(entity_with_semicolon):
2191 """Transforms an HTML entity to a character."""
2192 entity = entity_with_semicolon[:-1]
2193
2194 # Known non-numeric HTML entity
2195 if entity in compat_html_entities.name2codepoint:
2196 return compat_chr(compat_html_entities.name2codepoint[entity])
2197
2198 # TODO: HTML5 allows entities without a semicolon. For example,
2199 # '&Eacuteric' should be decoded as 'Éric'.
2200 if entity_with_semicolon in compat_html_entities_html5:
2201 return compat_html_entities_html5[entity_with_semicolon]
2202
2203 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2204 if mobj is not None:
2205 numstr = mobj.group(1)
2206 if numstr.startswith('x'):
2207 base = 16
2208 numstr = '0%s' % numstr
2209 else:
2210 base = 10
2211 # See https://github.com/ytdl-org/youtube-dl/issues/7518
2212 try:
2213 return compat_chr(int(numstr, base))
2214 except ValueError:
2215 pass
2216
2217 # Unknown entity in name, return its literal representation
2218 return '&%s;' % entity
2219
2220
2221 def unescapeHTML(s):
2222 if s is None:
2223 return None
2224 assert type(s) == compat_str
2225
2226 return re.sub(
2227 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2228
2229
2230 def process_communicate_or_kill(p, *args, **kwargs):
2231 try:
2232 return p.communicate(*args, **kwargs)
2233 except BaseException: # Including KeyboardInterrupt
2234 p.kill()
2235 p.wait()
2236 raise
2237
2238
2239 def get_subprocess_encoding():
2240 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2241 # For subprocess calls, encode with locale encoding
2242 # Refer to http://stackoverflow.com/a/9951851/35070
2243 encoding = preferredencoding()
2244 else:
2245 encoding = sys.getfilesystemencoding()
2246 if encoding is None:
2247 encoding = 'utf-8'
2248 return encoding
2249
2250
2251 def encodeFilename(s, for_subprocess=False):
2252 """
2253 @param s The name of the file
2254 """
2255
2256 assert type(s) == compat_str
2257
2258 # Python 3 has a Unicode API
2259 if sys.version_info >= (3, 0):
2260 return s
2261
2262 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2263 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2264 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2265 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2266 return s
2267
2268 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2269 if sys.platform.startswith('java'):
2270 return s
2271
2272 return s.encode(get_subprocess_encoding(), 'ignore')
2273
2274
2275 def decodeFilename(b, for_subprocess=False):
2276
2277 if sys.version_info >= (3, 0):
2278 return b
2279
2280 if not isinstance(b, bytes):
2281 return b
2282
2283 return b.decode(get_subprocess_encoding(), 'ignore')
2284
2285
2286 def encodeArgument(s):
2287 if not isinstance(s, compat_str):
2288 # Legacy code that uses byte strings
2289 # Uncomment the following line after fixing all post processors
2290 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2291 s = s.decode('ascii')
2292 return encodeFilename(s, True)
2293
2294
2295 def decodeArgument(b):
2296 return decodeFilename(b, True)
2297
2298
2299 def decodeOption(optval):
2300 if optval is None:
2301 return optval
2302 if isinstance(optval, bytes):
2303 optval = optval.decode(preferredencoding())
2304
2305 assert isinstance(optval, compat_str)
2306 return optval
2307
2308
2309 def formatSeconds(secs, delim=':'):
2310 if secs > 3600:
2311 return '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
2312 elif secs > 60:
2313 return '%d%s%02d' % (secs // 60, delim, secs % 60)
2314 else:
2315 return '%d' % secs
2316
2317
2318 def make_HTTPS_handler(params, **kwargs):
2319 opts_no_check_certificate = params.get('nocheckcertificate', False)
2320 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
2321 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2322 if opts_no_check_certificate:
2323 context.check_hostname = False
2324 context.verify_mode = ssl.CERT_NONE
2325 try:
2326 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2327 except TypeError:
2328 # Python 2.7.8
2329 # (create_default_context present but HTTPSHandler has no context=)
2330 pass
2331
2332 if sys.version_info < (3, 2):
2333 return YoutubeDLHTTPSHandler(params, **kwargs)
2334 else: # Python < 3.4
2335 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2336 context.verify_mode = (ssl.CERT_NONE
2337 if opts_no_check_certificate
2338 else ssl.CERT_REQUIRED)
2339 context.set_default_verify_paths()
2340 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2341
2342
2343 def bug_reports_message(before=';'):
2344 if ytdl_is_updateable():
2345 update_cmd = 'type yt-dlp -U to update'
2346 else:
2347 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
2348 msg = 'please report this issue on https://github.com/yt-dlp/yt-dlp .'
2349 msg += ' Make sure you are using the latest version; %s.' % update_cmd
2350 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
2351
2352 before = before.rstrip()
2353 if not before or before.endswith(('.', '!', '?')):
2354 msg = msg[0].title() + msg[1:]
2355
2356 return (before + ' ' if before else '') + msg
2357
2358
2359 class YoutubeDLError(Exception):
2360 """Base exception for YoutubeDL errors."""
2361 pass
2362
2363
2364 network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2365 if hasattr(ssl, 'CertificateError'):
2366 network_exceptions.append(ssl.CertificateError)
2367 network_exceptions = tuple(network_exceptions)
2368
2369
2370 class ExtractorError(YoutubeDLError):
2371 """Error during info extraction."""
2372
2373 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2374 """ tb, if given, is the original traceback (so that it can be printed out).
2375 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
2376 """
2377
2378 if sys.exc_info()[0] in network_exceptions:
2379 expected = True
2380 if video_id is not None:
2381 msg = video_id + ': ' + msg
2382 if cause:
2383 msg += ' (caused by %r)' % cause
2384 if not expected:
2385 msg += bug_reports_message()
2386 super(ExtractorError, self).__init__(msg)
2387
2388 self.traceback = tb
2389 self.exc_info = sys.exc_info() # preserve original exception
2390 self.cause = cause
2391 self.video_id = video_id
2392
2393 def format_traceback(self):
2394 if self.traceback is None:
2395 return None
2396 return ''.join(traceback.format_tb(self.traceback))
2397
2398
2399 class UnsupportedError(ExtractorError):
2400 def __init__(self, url):
2401 super(UnsupportedError, self).__init__(
2402 'Unsupported URL: %s' % url, expected=True)
2403 self.url = url
2404
2405
2406 class RegexNotFoundError(ExtractorError):
2407 """Error when a regex didn't match"""
2408 pass
2409
2410
2411 class GeoRestrictedError(ExtractorError):
2412 """Geographic restriction Error exception.
2413
2414 This exception may be thrown when a video is not available from your
2415 geographic location due to geographic restrictions imposed by a website.
2416 """
2417
2418 def __init__(self, msg, countries=None):
2419 super(GeoRestrictedError, self).__init__(msg, expected=True)
2420 self.msg = msg
2421 self.countries = countries
2422
2423
2424 class DownloadError(YoutubeDLError):
2425 """Download Error exception.
2426
2427 This exception may be thrown by FileDownloader objects if they are not
2428 configured to continue on errors. They will contain the appropriate
2429 error message.
2430 """
2431
2432 def __init__(self, msg, exc_info=None):
2433 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2434 super(DownloadError, self).__init__(msg)
2435 self.exc_info = exc_info
2436
2437
2438 class EntryNotInPlaylist(YoutubeDLError):
2439 """Entry not in playlist exception.
2440
2441 This exception will be thrown by YoutubeDL when a requested entry
2442 is not found in the playlist info_dict
2443 """
2444 pass
2445
2446
2447 class SameFileError(YoutubeDLError):
2448 """Same File exception.
2449
2450 This exception will be thrown by FileDownloader objects if they detect
2451 multiple files would have to be downloaded to the same file on disk.
2452 """
2453 pass
2454
2455
2456 class PostProcessingError(YoutubeDLError):
2457 """Post Processing exception.
2458
2459 This exception may be raised by PostProcessor's .run() method to
2460 indicate an error in the postprocessing task.
2461 """
2462
2463 def __init__(self, msg):
2464 super(PostProcessingError, self).__init__(msg)
2465 self.msg = msg
2466
2467
2468 class ExistingVideoReached(YoutubeDLError):
2469 """ --max-downloads limit has been reached. """
2470 pass
2471
2472
2473 class RejectedVideoReached(YoutubeDLError):
2474 """ --max-downloads limit has been reached. """
2475 pass
2476
2477
2478 class MaxDownloadsReached(YoutubeDLError):
2479 """ --max-downloads limit has been reached. """
2480 pass
2481
2482
2483 class UnavailableVideoError(YoutubeDLError):
2484 """Unavailable Format exception.
2485
2486 This exception will be thrown when a video is requested
2487 in a format that is not available for that video.
2488 """
2489 pass
2490
2491
2492 class ContentTooShortError(YoutubeDLError):
2493 """Content Too Short exception.
2494
2495 This exception may be raised by FileDownloader objects when a file they
2496 download is too small for what the server announced first, indicating
2497 the connection was probably interrupted.
2498 """
2499
2500 def __init__(self, downloaded, expected):
2501 super(ContentTooShortError, self).__init__(
2502 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2503 )
2504 # Both in bytes
2505 self.downloaded = downloaded
2506 self.expected = expected
2507
2508
2509 class XAttrMetadataError(YoutubeDLError):
2510 def __init__(self, code=None, msg='Unknown error'):
2511 super(XAttrMetadataError, self).__init__(msg)
2512 self.code = code
2513 self.msg = msg
2514
2515 # Parsing code and msg
2516 if (self.code in (errno.ENOSPC, errno.EDQUOT)
2517 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2518 self.reason = 'NO_SPACE'
2519 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2520 self.reason = 'VALUE_TOO_LONG'
2521 else:
2522 self.reason = 'NOT_SUPPORTED'
2523
2524
2525 class XAttrUnavailableError(YoutubeDLError):
2526 pass
2527
2528
2529 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2530 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2531 # expected HTTP responses to meet HTTP/1.0 or later (see also
2532 # https://github.com/ytdl-org/youtube-dl/issues/6727)
2533 if sys.version_info < (3, 0):
2534 kwargs['strict'] = True
2535 hc = http_class(*args, **compat_kwargs(kwargs))
2536 source_address = ydl_handler._params.get('source_address')
2537
2538 if source_address is not None:
2539 # This is to workaround _create_connection() from socket where it will try all
2540 # address data from getaddrinfo() including IPv6. This filters the result from
2541 # getaddrinfo() based on the source_address value.
2542 # This is based on the cpython socket.create_connection() function.
2543 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2544 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2545 host, port = address
2546 err = None
2547 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2548 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2549 ip_addrs = [addr for addr in addrs if addr[0] == af]
2550 if addrs and not ip_addrs:
2551 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2552 raise socket.error(
2553 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2554 % (ip_version, source_address[0]))
2555 for res in ip_addrs:
2556 af, socktype, proto, canonname, sa = res
2557 sock = None
2558 try:
2559 sock = socket.socket(af, socktype, proto)
2560 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2561 sock.settimeout(timeout)
2562 sock.bind(source_address)
2563 sock.connect(sa)
2564 err = None # Explicitly break reference cycle
2565 return sock
2566 except socket.error as _:
2567 err = _
2568 if sock is not None:
2569 sock.close()
2570 if err is not None:
2571 raise err
2572 else:
2573 raise socket.error('getaddrinfo returns an empty list')
2574 if hasattr(hc, '_create_connection'):
2575 hc._create_connection = _create_connection
2576 sa = (source_address, 0)
2577 if hasattr(hc, 'source_address'): # Python 2.7+
2578 hc.source_address = sa
2579 else: # Python 2.6
2580 def _hc_connect(self, *args, **kwargs):
2581 sock = _create_connection(
2582 (self.host, self.port), self.timeout, sa)
2583 if is_https:
2584 self.sock = ssl.wrap_socket(
2585 sock, self.key_file, self.cert_file,
2586 ssl_version=ssl.PROTOCOL_TLSv1)
2587 else:
2588 self.sock = sock
2589 hc.connect = functools.partial(_hc_connect, hc)
2590
2591 return hc
2592
2593
2594 def handle_youtubedl_headers(headers):
2595 filtered_headers = headers
2596
2597 if 'Youtubedl-no-compression' in filtered_headers:
2598 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2599 del filtered_headers['Youtubedl-no-compression']
2600
2601 return filtered_headers
2602
2603
2604 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2605 """Handler for HTTP requests and responses.
2606
2607 This class, when installed with an OpenerDirector, automatically adds
2608 the standard headers to every HTTP request and handles gzipped and
2609 deflated responses from web servers. If compression is to be avoided in
2610 a particular request, the original request in the program code only has
2611 to include the HTTP header "Youtubedl-no-compression", which will be
2612 removed before making the real request.
2613
2614 Part of this code was copied from:
2615
2616 http://techknack.net/python-urllib2-handlers/
2617
2618 Andrew Rowls, the author of that code, agreed to release it to the
2619 public domain.
2620 """
2621
2622 def __init__(self, params, *args, **kwargs):
2623 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2624 self._params = params
2625
2626 def http_open(self, req):
2627 conn_class = compat_http_client.HTTPConnection
2628
2629 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2630 if socks_proxy:
2631 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2632 del req.headers['Ytdl-socks-proxy']
2633
2634 return self.do_open(functools.partial(
2635 _create_http_connection, self, conn_class, False),
2636 req)
2637
2638 @staticmethod
2639 def deflate(data):
2640 if not data:
2641 return data
2642 try:
2643 return zlib.decompress(data, -zlib.MAX_WBITS)
2644 except zlib.error:
2645 return zlib.decompress(data)
2646
2647 def http_request(self, req):
2648 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2649 # always respected by websites, some tend to give out URLs with non percent-encoded
2650 # non-ASCII characters (see telemb.py, ard.py [#3412])
2651 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2652 # To work around aforementioned issue we will replace request's original URL with
2653 # percent-encoded one
2654 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2655 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2656 url = req.get_full_url()
2657 url_escaped = escape_url(url)
2658
2659 # Substitute URL if any change after escaping
2660 if url != url_escaped:
2661 req = update_Request(req, url=url_escaped)
2662
2663 for h, v in std_headers.items():
2664 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2665 # The dict keys are capitalized because of this bug by urllib
2666 if h.capitalize() not in req.headers:
2667 req.add_header(h, v)
2668
2669 req.headers = handle_youtubedl_headers(req.headers)
2670
2671 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2672 # Python 2.6 is brain-dead when it comes to fragments
2673 req._Request__original = req._Request__original.partition('#')[0]
2674 req._Request__r_type = req._Request__r_type.partition('#')[0]
2675
2676 return req
2677
2678 def http_response(self, req, resp):
2679 old_resp = resp
2680 # gzip
2681 if resp.headers.get('Content-encoding', '') == 'gzip':
2682 content = resp.read()
2683 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2684 try:
2685 uncompressed = io.BytesIO(gz.read())
2686 except IOError as original_ioerror:
2687 # There may be junk add the end of the file
2688 # See http://stackoverflow.com/q/4928560/35070 for details
2689 for i in range(1, 1024):
2690 try:
2691 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2692 uncompressed = io.BytesIO(gz.read())
2693 except IOError:
2694 continue
2695 break
2696 else:
2697 raise original_ioerror
2698 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2699 resp.msg = old_resp.msg
2700 del resp.headers['Content-encoding']
2701 # deflate
2702 if resp.headers.get('Content-encoding', '') == 'deflate':
2703 gz = io.BytesIO(self.deflate(resp.read()))
2704 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2705 resp.msg = old_resp.msg
2706 del resp.headers['Content-encoding']
2707 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2708 # https://github.com/ytdl-org/youtube-dl/issues/6457).
2709 if 300 <= resp.code < 400:
2710 location = resp.headers.get('Location')
2711 if location:
2712 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2713 if sys.version_info >= (3, 0):
2714 location = location.encode('iso-8859-1').decode('utf-8')
2715 else:
2716 location = location.decode('utf-8')
2717 location_escaped = escape_url(location)
2718 if location != location_escaped:
2719 del resp.headers['Location']
2720 if sys.version_info < (3, 0):
2721 location_escaped = location_escaped.encode('utf-8')
2722 resp.headers['Location'] = location_escaped
2723 return resp
2724
2725 https_request = http_request
2726 https_response = http_response
2727
2728
2729 def make_socks_conn_class(base_class, socks_proxy):
2730 assert issubclass(base_class, (
2731 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2732
2733 url_components = compat_urlparse.urlparse(socks_proxy)
2734 if url_components.scheme.lower() == 'socks5':
2735 socks_type = ProxyType.SOCKS5
2736 elif url_components.scheme.lower() in ('socks', 'socks4'):
2737 socks_type = ProxyType.SOCKS4
2738 elif url_components.scheme.lower() == 'socks4a':
2739 socks_type = ProxyType.SOCKS4A
2740
2741 def unquote_if_non_empty(s):
2742 if not s:
2743 return s
2744 return compat_urllib_parse_unquote_plus(s)
2745
2746 proxy_args = (
2747 socks_type,
2748 url_components.hostname, url_components.port or 1080,
2749 True, # Remote DNS
2750 unquote_if_non_empty(url_components.username),
2751 unquote_if_non_empty(url_components.password),
2752 )
2753
2754 class SocksConnection(base_class):
2755 def connect(self):
2756 self.sock = sockssocket()
2757 self.sock.setproxy(*proxy_args)
2758 if type(self.timeout) in (int, float):
2759 self.sock.settimeout(self.timeout)
2760 self.sock.connect((self.host, self.port))
2761
2762 if isinstance(self, compat_http_client.HTTPSConnection):
2763 if hasattr(self, '_context'): # Python > 2.6
2764 self.sock = self._context.wrap_socket(
2765 self.sock, server_hostname=self.host)
2766 else:
2767 self.sock = ssl.wrap_socket(self.sock)
2768
2769 return SocksConnection
2770
2771
2772 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2773 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2774 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2775 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2776 self._params = params
2777
2778 def https_open(self, req):
2779 kwargs = {}
2780 conn_class = self._https_conn_class
2781
2782 if hasattr(self, '_context'): # python > 2.6
2783 kwargs['context'] = self._context
2784 if hasattr(self, '_check_hostname'): # python 3.x
2785 kwargs['check_hostname'] = self._check_hostname
2786
2787 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2788 if socks_proxy:
2789 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2790 del req.headers['Ytdl-socks-proxy']
2791
2792 return self.do_open(functools.partial(
2793 _create_http_connection, self, conn_class, True),
2794 req, **kwargs)
2795
2796
2797 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2798 """
2799 See [1] for cookie file format.
2800
2801 1. https://curl.haxx.se/docs/http-cookies.html
2802 """
2803 _HTTPONLY_PREFIX = '#HttpOnly_'
2804 _ENTRY_LEN = 7
2805 _HEADER = '''# Netscape HTTP Cookie File
2806 # This file is generated by yt-dlp. Do not edit.
2807
2808 '''
2809 _CookieFileEntry = collections.namedtuple(
2810 'CookieFileEntry',
2811 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2812
2813 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2814 """
2815 Save cookies to a file.
2816
2817 Most of the code is taken from CPython 3.8 and slightly adapted
2818 to support cookie files with UTF-8 in both python 2 and 3.
2819 """
2820 if filename is None:
2821 if self.filename is not None:
2822 filename = self.filename
2823 else:
2824 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2825
2826 # Store session cookies with `expires` set to 0 instead of an empty
2827 # string
2828 for cookie in self:
2829 if cookie.expires is None:
2830 cookie.expires = 0
2831
2832 with io.open(filename, 'w', encoding='utf-8') as f:
2833 f.write(self._HEADER)
2834 now = time.time()
2835 for cookie in self:
2836 if not ignore_discard and cookie.discard:
2837 continue
2838 if not ignore_expires and cookie.is_expired(now):
2839 continue
2840 if cookie.secure:
2841 secure = 'TRUE'
2842 else:
2843 secure = 'FALSE'
2844 if cookie.domain.startswith('.'):
2845 initial_dot = 'TRUE'
2846 else:
2847 initial_dot = 'FALSE'
2848 if cookie.expires is not None:
2849 expires = compat_str(cookie.expires)
2850 else:
2851 expires = ''
2852 if cookie.value is None:
2853 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2854 # with no name, whereas http.cookiejar regards it as a
2855 # cookie with no value.
2856 name = ''
2857 value = cookie.name
2858 else:
2859 name = cookie.name
2860 value = cookie.value
2861 f.write(
2862 '\t'.join([cookie.domain, initial_dot, cookie.path,
2863 secure, expires, name, value]) + '\n')
2864
2865 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2866 """Load cookies from a file."""
2867 if filename is None:
2868 if self.filename is not None:
2869 filename = self.filename
2870 else:
2871 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2872
2873 def prepare_line(line):
2874 if line.startswith(self._HTTPONLY_PREFIX):
2875 line = line[len(self._HTTPONLY_PREFIX):]
2876 # comments and empty lines are fine
2877 if line.startswith('#') or not line.strip():
2878 return line
2879 cookie_list = line.split('\t')
2880 if len(cookie_list) != self._ENTRY_LEN:
2881 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2882 cookie = self._CookieFileEntry(*cookie_list)
2883 if cookie.expires_at and not cookie.expires_at.isdigit():
2884 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2885 return line
2886
2887 cf = io.StringIO()
2888 with io.open(filename, encoding='utf-8') as f:
2889 for line in f:
2890 try:
2891 cf.write(prepare_line(line))
2892 except compat_cookiejar.LoadError as e:
2893 write_string(
2894 'WARNING: skipping cookie file entry due to %s: %r\n'
2895 % (e, line), sys.stderr)
2896 continue
2897 cf.seek(0)
2898 self._really_load(cf, filename, ignore_discard, ignore_expires)
2899 # Session cookies are denoted by either `expires` field set to
2900 # an empty string or 0. MozillaCookieJar only recognizes the former
2901 # (see [1]). So we need force the latter to be recognized as session
2902 # cookies on our own.
2903 # Session cookies may be important for cookies-based authentication,
2904 # e.g. usually, when user does not check 'Remember me' check box while
2905 # logging in on a site, some important cookies are stored as session
2906 # cookies so that not recognizing them will result in failed login.
2907 # 1. https://bugs.python.org/issue17164
2908 for cookie in self:
2909 # Treat `expires=0` cookies as session cookies
2910 if cookie.expires == 0:
2911 cookie.expires = None
2912 cookie.discard = True
2913
2914
2915 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2916 def __init__(self, cookiejar=None):
2917 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2918
2919 def http_response(self, request, response):
2920 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2921 # characters in Set-Cookie HTTP header of last response (see
2922 # https://github.com/ytdl-org/youtube-dl/issues/6769).
2923 # In order to at least prevent crashing we will percent encode Set-Cookie
2924 # header before HTTPCookieProcessor starts processing it.
2925 # if sys.version_info < (3, 0) and response.headers:
2926 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2927 # set_cookie = response.headers.get(set_cookie_header)
2928 # if set_cookie:
2929 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2930 # if set_cookie != set_cookie_escaped:
2931 # del response.headers[set_cookie_header]
2932 # response.headers[set_cookie_header] = set_cookie_escaped
2933 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2934
2935 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2936 https_response = http_response
2937
2938
2939 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2940 """YoutubeDL redirect handler
2941
2942 The code is based on HTTPRedirectHandler implementation from CPython [1].
2943
2944 This redirect handler solves two issues:
2945 - ensures redirect URL is always unicode under python 2
2946 - introduces support for experimental HTTP response status code
2947 308 Permanent Redirect [2] used by some sites [3]
2948
2949 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
2950 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
2951 3. https://github.com/ytdl-org/youtube-dl/issues/28768
2952 """
2953
2954 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
2955
2956 def redirect_request(self, req, fp, code, msg, headers, newurl):
2957 """Return a Request or None in response to a redirect.
2958
2959 This is called by the http_error_30x methods when a
2960 redirection response is received. If a redirection should
2961 take place, return a new Request to allow http_error_30x to
2962 perform the redirect. Otherwise, raise HTTPError if no-one
2963 else should try to handle this url. Return None if you can't
2964 but another Handler might.
2965 """
2966 m = req.get_method()
2967 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
2968 or code in (301, 302, 303) and m == "POST")):
2969 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
2970 # Strictly (according to RFC 2616), 301 or 302 in response to
2971 # a POST MUST NOT cause a redirection without confirmation
2972 # from the user (of urllib.request, in this case). In practice,
2973 # essentially all clients do redirect in this case, so we do
2974 # the same.
2975
2976 # On python 2 urlh.geturl() may sometimes return redirect URL
2977 # as byte string instead of unicode. This workaround allows
2978 # to force it always return unicode.
2979 if sys.version_info[0] < 3:
2980 newurl = compat_str(newurl)
2981
2982 # Be conciliant with URIs containing a space. This is mainly
2983 # redundant with the more complete encoding done in http_error_302(),
2984 # but it is kept for compatibility with other callers.
2985 newurl = newurl.replace(' ', '%20')
2986
2987 CONTENT_HEADERS = ("content-length", "content-type")
2988 # NB: don't use dict comprehension for python 2.6 compatibility
2989 newheaders = dict((k, v) for k, v in req.headers.items()
2990 if k.lower() not in CONTENT_HEADERS)
2991 return compat_urllib_request.Request(
2992 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
2993 unverifiable=True)
2994
2995
2996 def extract_timezone(date_str):
2997 m = re.search(
2998 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2999 date_str)
3000 if not m:
3001 timezone = datetime.timedelta()
3002 else:
3003 date_str = date_str[:-len(m.group('tz'))]
3004 if not m.group('sign'):
3005 timezone = datetime.timedelta()
3006 else:
3007 sign = 1 if m.group('sign') == '+' else -1
3008 timezone = datetime.timedelta(
3009 hours=sign * int(m.group('hours')),
3010 minutes=sign * int(m.group('minutes')))
3011 return timezone, date_str
3012
3013
3014 def parse_iso8601(date_str, delimiter='T', timezone=None):
3015 """ Return a UNIX timestamp from the given date """
3016
3017 if date_str is None:
3018 return None
3019
3020 date_str = re.sub(r'\.[0-9]+', '', date_str)
3021
3022 if timezone is None:
3023 timezone, date_str = extract_timezone(date_str)
3024
3025 try:
3026 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3027 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3028 return calendar.timegm(dt.timetuple())
3029 except ValueError:
3030 pass
3031
3032
3033 def date_formats(day_first=True):
3034 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3035
3036
3037 def unified_strdate(date_str, day_first=True):
3038 """Return a string with the date in the format YYYYMMDD"""
3039
3040 if date_str is None:
3041 return None
3042 upload_date = None
3043 # Replace commas
3044 date_str = date_str.replace(',', ' ')
3045 # Remove AM/PM + timezone
3046 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3047 _, date_str = extract_timezone(date_str)
3048
3049 for expression in date_formats(day_first):
3050 try:
3051 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
3052 except ValueError:
3053 pass
3054 if upload_date is None:
3055 timetuple = email.utils.parsedate_tz(date_str)
3056 if timetuple:
3057 try:
3058 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3059 except ValueError:
3060 pass
3061 if upload_date is not None:
3062 return compat_str(upload_date)
3063
3064
3065 def unified_timestamp(date_str, day_first=True):
3066 if date_str is None:
3067 return None
3068
3069 date_str = re.sub(r'[,|]', '', date_str)
3070
3071 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
3072 timezone, date_str = extract_timezone(date_str)
3073
3074 # Remove AM/PM + timezone
3075 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3076
3077 # Remove unrecognized timezones from ISO 8601 alike timestamps
3078 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3079 if m:
3080 date_str = date_str[:-len(m.group('tz'))]
3081
3082 # Python only supports microseconds, so remove nanoseconds
3083 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3084 if m:
3085 date_str = m.group(1)
3086
3087 for expression in date_formats(day_first):
3088 try:
3089 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3090 return calendar.timegm(dt.timetuple())
3091 except ValueError:
3092 pass
3093 timetuple = email.utils.parsedate_tz(date_str)
3094 if timetuple:
3095 return calendar.timegm(timetuple) + pm_delta * 3600
3096
3097
3098 def determine_ext(url, default_ext='unknown_video'):
3099 if url is None or '.' not in url:
3100 return default_ext
3101 guess = url.partition('?')[0].rpartition('.')[2]
3102 if re.match(r'^[A-Za-z0-9]+$', guess):
3103 return guess
3104 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3105 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3106 return guess.rstrip('/')
3107 else:
3108 return default_ext
3109
3110
3111 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3112 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3113
3114
3115 def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
3116 """
3117 Return a datetime object from a string in the format YYYYMMDD or
3118 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3119
3120 format: string date format used to return datetime object from
3121 precision: round the time portion of a datetime object.
3122 auto|microsecond|second|minute|hour|day.
3123 auto: round to the unit provided in date_str (if applicable).
3124 """
3125 auto_precision = False
3126 if precision == 'auto':
3127 auto_precision = True
3128 precision = 'microsecond'
3129 today = datetime_round(datetime.datetime.now(), precision)
3130 if date_str in ('now', 'today'):
3131 return today
3132 if date_str == 'yesterday':
3133 return today - datetime.timedelta(days=1)
3134 match = re.match(
3135 r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3136 date_str)
3137 if match is not None:
3138 start_time = datetime_from_str(match.group('start'), precision, format)
3139 time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
3140 unit = match.group('unit')
3141 if unit == 'month' or unit == 'year':
3142 new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
3143 unit = 'day'
3144 else:
3145 if unit == 'week':
3146 unit = 'day'
3147 time *= 7
3148 delta = datetime.timedelta(**{unit + 's': time})
3149 new_date = start_time + delta
3150 if auto_precision:
3151 return datetime_round(new_date, unit)
3152 return new_date
3153
3154 return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3155
3156
3157 def date_from_str(date_str, format='%Y%m%d'):
3158 """
3159 Return a datetime object from a string in the format YYYYMMDD or
3160 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3161
3162 format: string date format used to return datetime object from
3163 """
3164 return datetime_from_str(date_str, precision='microsecond', format=format).date()
3165
3166
3167 def datetime_add_months(dt, months):
3168 """Increment/Decrement a datetime object by months."""
3169 month = dt.month + months - 1
3170 year = dt.year + month // 12
3171 month = month % 12 + 1
3172 day = min(dt.day, calendar.monthrange(year, month)[1])
3173 return dt.replace(year, month, day)
3174
3175
3176 def datetime_round(dt, precision='day'):
3177 """
3178 Round a datetime object's time to a specific precision
3179 """
3180 if precision == 'microsecond':
3181 return dt
3182
3183 unit_seconds = {
3184 'day': 86400,
3185 'hour': 3600,
3186 'minute': 60,
3187 'second': 1,
3188 }
3189 roundto = lambda x, n: ((x + n / 2) // n) * n
3190 timestamp = calendar.timegm(dt.timetuple())
3191 return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
3192
3193
3194 def hyphenate_date(date_str):
3195 """
3196 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3197 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3198 if match is not None:
3199 return '-'.join(match.groups())
3200 else:
3201 return date_str
3202
3203
3204 class DateRange(object):
3205 """Represents a time interval between two dates"""
3206
3207 def __init__(self, start=None, end=None):
3208 """start and end must be strings in the format accepted by date"""
3209 if start is not None:
3210 self.start = date_from_str(start)
3211 else:
3212 self.start = datetime.datetime.min.date()
3213 if end is not None:
3214 self.end = date_from_str(end)
3215 else:
3216 self.end = datetime.datetime.max.date()
3217 if self.start > self.end:
3218 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3219
3220 @classmethod
3221 def day(cls, day):
3222 """Returns a range that only contains the given day"""
3223 return cls(day, day)
3224
3225 def __contains__(self, date):
3226 """Check if the date is in the range"""
3227 if not isinstance(date, datetime.date):
3228 date = date_from_str(date)
3229 return self.start <= date <= self.end
3230
3231 def __str__(self):
3232 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3233
3234
3235 def platform_name():
3236 """ Returns the platform name as a compat_str """
3237 res = platform.platform()
3238 if isinstance(res, bytes):
3239 res = res.decode(preferredencoding())
3240
3241 assert isinstance(res, compat_str)
3242 return res
3243
3244
3245 def _windows_write_string(s, out):
3246 """ Returns True if the string was written using special methods,
3247 False if it has yet to be written out."""
3248 # Adapted from http://stackoverflow.com/a/3259271/35070
3249
3250 import ctypes
3251 import ctypes.wintypes
3252
3253 WIN_OUTPUT_IDS = {
3254 1: -11,
3255 2: -12,
3256 }
3257
3258 try:
3259 fileno = out.fileno()
3260 except AttributeError:
3261 # If the output stream doesn't have a fileno, it's virtual
3262 return False
3263 except io.UnsupportedOperation:
3264 # Some strange Windows pseudo files?
3265 return False
3266 if fileno not in WIN_OUTPUT_IDS:
3267 return False
3268
3269 GetStdHandle = compat_ctypes_WINFUNCTYPE(
3270 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3271 ('GetStdHandle', ctypes.windll.kernel32))
3272 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3273
3274 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3275 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3276 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3277 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3278 written = ctypes.wintypes.DWORD(0)
3279
3280 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3281 FILE_TYPE_CHAR = 0x0002
3282 FILE_TYPE_REMOTE = 0x8000
3283 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3284 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3285 ctypes.POINTER(ctypes.wintypes.DWORD))(
3286 ('GetConsoleMode', ctypes.windll.kernel32))
3287 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3288
3289 def not_a_console(handle):
3290 if handle == INVALID_HANDLE_VALUE or handle is None:
3291 return True
3292 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3293 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3294
3295 if not_a_console(h):
3296 return False
3297
3298 def next_nonbmp_pos(s):
3299 try:
3300 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3301 except StopIteration:
3302 return len(s)
3303
3304 while s:
3305 count = min(next_nonbmp_pos(s), 1024)
3306
3307 ret = WriteConsoleW(
3308 h, s, count if count else 2, ctypes.byref(written), None)
3309 if ret == 0:
3310 raise OSError('Failed to write string')
3311 if not count: # We just wrote a non-BMP character
3312 assert written.value == 2
3313 s = s[1:]
3314 else:
3315 assert written.value > 0
3316 s = s[written.value:]
3317 return True
3318
3319
3320 def write_string(s, out=None, encoding=None):
3321 if out is None:
3322 out = sys.stderr
3323 assert type(s) == compat_str
3324
3325 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3326 if _windows_write_string(s, out):
3327 return
3328
3329 if ('b' in getattr(out, 'mode', '')
3330 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
3331 byt = s.encode(encoding or preferredencoding(), 'ignore')
3332 out.write(byt)
3333 elif hasattr(out, 'buffer'):
3334 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3335 byt = s.encode(enc, 'ignore')
3336 out.buffer.write(byt)
3337 else:
3338 out.write(s)
3339 out.flush()
3340
3341
3342 def bytes_to_intlist(bs):
3343 if not bs:
3344 return []
3345 if isinstance(bs[0], int): # Python 3
3346 return list(bs)
3347 else:
3348 return [ord(c) for c in bs]
3349
3350
3351 def intlist_to_bytes(xs):
3352 if not xs:
3353 return b''
3354 return compat_struct_pack('%dB' % len(xs), *xs)
3355
3356
3357 # Cross-platform file locking
3358 if sys.platform == 'win32':
3359 import ctypes.wintypes
3360 import msvcrt
3361
3362 class OVERLAPPED(ctypes.Structure):
3363 _fields_ = [
3364 ('Internal', ctypes.wintypes.LPVOID),
3365 ('InternalHigh', ctypes.wintypes.LPVOID),
3366 ('Offset', ctypes.wintypes.DWORD),
3367 ('OffsetHigh', ctypes.wintypes.DWORD),
3368 ('hEvent', ctypes.wintypes.HANDLE),
3369 ]
3370
3371 kernel32 = ctypes.windll.kernel32
3372 LockFileEx = kernel32.LockFileEx
3373 LockFileEx.argtypes = [
3374 ctypes.wintypes.HANDLE, # hFile
3375 ctypes.wintypes.DWORD, # dwFlags
3376 ctypes.wintypes.DWORD, # dwReserved
3377 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3378 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3379 ctypes.POINTER(OVERLAPPED) # Overlapped
3380 ]
3381 LockFileEx.restype = ctypes.wintypes.BOOL
3382 UnlockFileEx = kernel32.UnlockFileEx
3383 UnlockFileEx.argtypes = [
3384 ctypes.wintypes.HANDLE, # hFile
3385 ctypes.wintypes.DWORD, # dwReserved
3386 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3387 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3388 ctypes.POINTER(OVERLAPPED) # Overlapped
3389 ]
3390 UnlockFileEx.restype = ctypes.wintypes.BOOL
3391 whole_low = 0xffffffff
3392 whole_high = 0x7fffffff
3393
3394 def _lock_file(f, exclusive):
3395 overlapped = OVERLAPPED()
3396 overlapped.Offset = 0
3397 overlapped.OffsetHigh = 0
3398 overlapped.hEvent = 0
3399 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3400 handle = msvcrt.get_osfhandle(f.fileno())
3401 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3402 whole_low, whole_high, f._lock_file_overlapped_p):
3403 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3404
3405 def _unlock_file(f):
3406 assert f._lock_file_overlapped_p
3407 handle = msvcrt.get_osfhandle(f.fileno())
3408 if not UnlockFileEx(handle, 0,
3409 whole_low, whole_high, f._lock_file_overlapped_p):
3410 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3411
3412 else:
3413 # Some platforms, such as Jython, is missing fcntl
3414 try:
3415 import fcntl
3416
3417 def _lock_file(f, exclusive):
3418 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3419
3420 def _unlock_file(f):
3421 fcntl.flock(f, fcntl.LOCK_UN)
3422 except ImportError:
3423 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3424
3425 def _lock_file(f, exclusive):
3426 raise IOError(UNSUPPORTED_MSG)
3427
3428 def _unlock_file(f):
3429 raise IOError(UNSUPPORTED_MSG)
3430
3431
3432 class locked_file(object):
3433 def __init__(self, filename, mode, encoding=None):
3434 assert mode in ['r', 'a', 'w']
3435 self.f = io.open(filename, mode, encoding=encoding)
3436 self.mode = mode
3437
3438 def __enter__(self):
3439 exclusive = self.mode != 'r'
3440 try:
3441 _lock_file(self.f, exclusive)
3442 except IOError:
3443 self.f.close()
3444 raise
3445 return self
3446
3447 def __exit__(self, etype, value, traceback):
3448 try:
3449 _unlock_file(self.f)
3450 finally:
3451 self.f.close()
3452
3453 def __iter__(self):
3454 return iter(self.f)
3455
3456 def write(self, *args):
3457 return self.f.write(*args)
3458
3459 def read(self, *args):
3460 return self.f.read(*args)
3461
3462
3463 def get_filesystem_encoding():
3464 encoding = sys.getfilesystemencoding()
3465 return encoding if encoding is not None else 'utf-8'
3466
3467
3468 def shell_quote(args):
3469 quoted_args = []
3470 encoding = get_filesystem_encoding()
3471 for a in args:
3472 if isinstance(a, bytes):
3473 # We may get a filename encoded with 'encodeFilename'
3474 a = a.decode(encoding)
3475 quoted_args.append(compat_shlex_quote(a))
3476 return ' '.join(quoted_args)
3477
3478
3479 def smuggle_url(url, data):
3480 """ Pass additional data in a URL for internal use. """
3481
3482 url, idata = unsmuggle_url(url, {})
3483 data.update(idata)
3484 sdata = compat_urllib_parse_urlencode(
3485 {'__youtubedl_smuggle': json.dumps(data)})
3486 return url + '#' + sdata
3487
3488
3489 def unsmuggle_url(smug_url, default=None):
3490 if '#__youtubedl_smuggle' not in smug_url:
3491 return smug_url, default
3492 url, _, sdata = smug_url.rpartition('#')
3493 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3494 data = json.loads(jsond)
3495 return url, data
3496
3497
3498 def format_bytes(bytes):
3499 if bytes is None:
3500 return 'N/A'
3501 if type(bytes) is str:
3502 bytes = float(bytes)
3503 if bytes == 0.0:
3504 exponent = 0
3505 else:
3506 exponent = int(math.log(bytes, 1024.0))
3507 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3508 converted = float(bytes) / float(1024 ** exponent)
3509 return '%.2f%s' % (converted, suffix)
3510
3511
3512 def lookup_unit_table(unit_table, s):
3513 units_re = '|'.join(re.escape(u) for u in unit_table)
3514 m = re.match(
3515 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3516 if not m:
3517 return None
3518 num_str = m.group('num').replace(',', '.')
3519 mult = unit_table[m.group('unit')]
3520 return int(float(num_str) * mult)
3521
3522
3523 def parse_filesize(s):
3524 if s is None:
3525 return None
3526
3527 # The lower-case forms are of course incorrect and unofficial,
3528 # but we support those too
3529 _UNIT_TABLE = {
3530 'B': 1,
3531 'b': 1,
3532 'bytes': 1,
3533 'KiB': 1024,
3534 'KB': 1000,
3535 'kB': 1024,
3536 'Kb': 1000,
3537 'kb': 1000,
3538 'kilobytes': 1000,
3539 'kibibytes': 1024,
3540 'MiB': 1024 ** 2,
3541 'MB': 1000 ** 2,
3542 'mB': 1024 ** 2,
3543 'Mb': 1000 ** 2,
3544 'mb': 1000 ** 2,
3545 'megabytes': 1000 ** 2,
3546 'mebibytes': 1024 ** 2,
3547 'GiB': 1024 ** 3,
3548 'GB': 1000 ** 3,
3549 'gB': 1024 ** 3,
3550 'Gb': 1000 ** 3,
3551 'gb': 1000 ** 3,
3552 'gigabytes': 1000 ** 3,
3553 'gibibytes': 1024 ** 3,
3554 'TiB': 1024 ** 4,
3555 'TB': 1000 ** 4,
3556 'tB': 1024 ** 4,
3557 'Tb': 1000 ** 4,
3558 'tb': 1000 ** 4,
3559 'terabytes': 1000 ** 4,
3560 'tebibytes': 1024 ** 4,
3561 'PiB': 1024 ** 5,
3562 'PB': 1000 ** 5,
3563 'pB': 1024 ** 5,
3564 'Pb': 1000 ** 5,
3565 'pb': 1000 ** 5,
3566 'petabytes': 1000 ** 5,
3567 'pebibytes': 1024 ** 5,
3568 'EiB': 1024 ** 6,
3569 'EB': 1000 ** 6,
3570 'eB': 1024 ** 6,
3571 'Eb': 1000 ** 6,
3572 'eb': 1000 ** 6,
3573 'exabytes': 1000 ** 6,
3574 'exbibytes': 1024 ** 6,
3575 'ZiB': 1024 ** 7,
3576 'ZB': 1000 ** 7,
3577 'zB': 1024 ** 7,
3578 'Zb': 1000 ** 7,
3579 'zb': 1000 ** 7,
3580 'zettabytes': 1000 ** 7,
3581 'zebibytes': 1024 ** 7,
3582 'YiB': 1024 ** 8,
3583 'YB': 1000 ** 8,
3584 'yB': 1024 ** 8,
3585 'Yb': 1000 ** 8,
3586 'yb': 1000 ** 8,
3587 'yottabytes': 1000 ** 8,
3588 'yobibytes': 1024 ** 8,
3589 }
3590
3591 return lookup_unit_table(_UNIT_TABLE, s)
3592
3593
3594 def parse_count(s):
3595 if s is None:
3596 return None
3597
3598 s = s.strip()
3599
3600 if re.match(r'^[\d,.]+$', s):
3601 return str_to_int(s)
3602
3603 _UNIT_TABLE = {
3604 'k': 1000,
3605 'K': 1000,
3606 'm': 1000 ** 2,
3607 'M': 1000 ** 2,
3608 'kk': 1000 ** 2,
3609 'KK': 1000 ** 2,
3610 }
3611
3612 return lookup_unit_table(_UNIT_TABLE, s)
3613
3614
3615 def parse_resolution(s):
3616 if s is None:
3617 return {}
3618
3619 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3620 if mobj:
3621 return {
3622 'width': int(mobj.group('w')),
3623 'height': int(mobj.group('h')),
3624 }
3625
3626 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3627 if mobj:
3628 return {'height': int(mobj.group(1))}
3629
3630 mobj = re.search(r'\b([48])[kK]\b', s)
3631 if mobj:
3632 return {'height': int(mobj.group(1)) * 540}
3633
3634 return {}
3635
3636
3637 def parse_bitrate(s):
3638 if not isinstance(s, compat_str):
3639 return
3640 mobj = re.search(r'\b(\d+)\s*kbps', s)
3641 if mobj:
3642 return int(mobj.group(1))
3643
3644
3645 def month_by_name(name, lang='en'):
3646 """ Return the number of a month by (locale-independently) English name """
3647
3648 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3649
3650 try:
3651 return month_names.index(name) + 1
3652 except ValueError:
3653 return None
3654
3655
3656 def month_by_abbreviation(abbrev):
3657 """ Return the number of a month by (locale-independently) English
3658 abbreviations """
3659
3660 try:
3661 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3662 except ValueError:
3663 return None
3664
3665
3666 def fix_xml_ampersands(xml_str):
3667 """Replace all the '&' by '&amp;' in XML"""
3668 return re.sub(
3669 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3670 '&amp;',
3671 xml_str)
3672
3673
3674 def setproctitle(title):
3675 assert isinstance(title, compat_str)
3676
3677 # ctypes in Jython is not complete
3678 # http://bugs.jython.org/issue2148
3679 if sys.platform.startswith('java'):
3680 return
3681
3682 try:
3683 libc = ctypes.cdll.LoadLibrary('libc.so.6')
3684 except OSError:
3685 return
3686 except TypeError:
3687 # LoadLibrary in Windows Python 2.7.13 only expects
3688 # a bytestring, but since unicode_literals turns
3689 # every string into a unicode string, it fails.
3690 return
3691 title_bytes = title.encode('utf-8')
3692 buf = ctypes.create_string_buffer(len(title_bytes))
3693 buf.value = title_bytes
3694 try:
3695 libc.prctl(15, buf, 0, 0, 0)
3696 except AttributeError:
3697 return # Strange libc, just skip this
3698
3699
3700 def remove_start(s, start):
3701 return s[len(start):] if s is not None and s.startswith(start) else s
3702
3703
3704 def remove_end(s, end):
3705 return s[:-len(end)] if s is not None and s.endswith(end) else s
3706
3707
3708 def remove_quotes(s):
3709 if s is None or len(s) < 2:
3710 return s
3711 for quote in ('"', "'", ):
3712 if s[0] == quote and s[-1] == quote:
3713 return s[1:-1]
3714 return s
3715
3716
3717 def get_domain(url):
3718 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3719 return domain.group('domain') if domain else None
3720
3721
3722 def url_basename(url):
3723 path = compat_urlparse.urlparse(url).path
3724 return path.strip('/').split('/')[-1]
3725
3726
3727 def base_url(url):
3728 return re.match(r'https?://[^?#&]+/', url).group()
3729
3730
3731 def urljoin(base, path):
3732 if isinstance(path, bytes):
3733 path = path.decode('utf-8')
3734 if not isinstance(path, compat_str) or not path:
3735 return None
3736 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3737 return path
3738 if isinstance(base, bytes):
3739 base = base.decode('utf-8')
3740 if not isinstance(base, compat_str) or not re.match(
3741 r'^(?:https?:)?//', base):
3742 return None
3743 return compat_urlparse.urljoin(base, path)
3744
3745
3746 class HEADRequest(compat_urllib_request.Request):
3747 def get_method(self):
3748 return 'HEAD'
3749
3750
3751 class PUTRequest(compat_urllib_request.Request):
3752 def get_method(self):
3753 return 'PUT'
3754
3755
3756 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3757 if get_attr:
3758 if v is not None:
3759 v = getattr(v, get_attr, None)
3760 if v == '':
3761 v = None
3762 if v is None:
3763 return default
3764 try:
3765 return int(v) * invscale // scale
3766 except (ValueError, TypeError):
3767 return default
3768
3769
3770 def str_or_none(v, default=None):
3771 return default if v is None else compat_str(v)
3772
3773
3774 def str_to_int(int_str):
3775 """ A more relaxed version of int_or_none """
3776 if isinstance(int_str, compat_integer_types):
3777 return int_str
3778 elif isinstance(int_str, compat_str):
3779 int_str = re.sub(r'[,\.\+]', '', int_str)
3780 return int_or_none(int_str)
3781
3782
3783 def float_or_none(v, scale=1, invscale=1, default=None):
3784 if v is None:
3785 return default
3786 try:
3787 return float(v) * invscale / scale
3788 except (ValueError, TypeError):
3789 return default
3790
3791
3792 def bool_or_none(v, default=None):
3793 return v if isinstance(v, bool) else default
3794
3795
3796 def strip_or_none(v, default=None):
3797 return v.strip() if isinstance(v, compat_str) else default
3798
3799
3800 def url_or_none(url):
3801 if not url or not isinstance(url, compat_str):
3802 return None
3803 url = url.strip()
3804 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3805
3806
3807 def strftime_or_none(timestamp, date_format, default=None):
3808 datetime_object = None
3809 try:
3810 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3811 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3812 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3813 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3814 return datetime_object.strftime(date_format)
3815 except (ValueError, TypeError, AttributeError):
3816 return default
3817
3818
3819 def parse_duration(s):
3820 if not isinstance(s, compat_basestring):
3821 return None
3822
3823 s = s.strip()
3824
3825 days, hours, mins, secs, ms = [None] * 5
3826 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3827 if m:
3828 days, hours, mins, secs, ms = m.groups()
3829 else:
3830 m = re.match(
3831 r'''(?ix)(?:P?
3832 (?:
3833 [0-9]+\s*y(?:ears?)?\s*
3834 )?
3835 (?:
3836 [0-9]+\s*m(?:onths?)?\s*
3837 )?
3838 (?:
3839 [0-9]+\s*w(?:eeks?)?\s*
3840 )?
3841 (?:
3842 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3843 )?
3844 T)?
3845 (?:
3846 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3847 )?
3848 (?:
3849 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3850 )?
3851 (?:
3852 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3853 )?Z?$''', s)
3854 if m:
3855 days, hours, mins, secs, ms = m.groups()
3856 else:
3857 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3858 if m:
3859 hours, mins = m.groups()
3860 else:
3861 return None
3862
3863 duration = 0
3864 if secs:
3865 duration += float(secs)
3866 if mins:
3867 duration += float(mins) * 60
3868 if hours:
3869 duration += float(hours) * 60 * 60
3870 if days:
3871 duration += float(days) * 24 * 60 * 60
3872 if ms:
3873 duration += float(ms)
3874 return duration
3875
3876
3877 def prepend_extension(filename, ext, expected_real_ext=None):
3878 name, real_ext = os.path.splitext(filename)
3879 return (
3880 '{0}.{1}{2}'.format(name, ext, real_ext)
3881 if not expected_real_ext or real_ext[1:] == expected_real_ext
3882 else '{0}.{1}'.format(filename, ext))
3883
3884
3885 def replace_extension(filename, ext, expected_real_ext=None):
3886 name, real_ext = os.path.splitext(filename)
3887 return '{0}.{1}'.format(
3888 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3889 ext)
3890
3891
3892 def check_executable(exe, args=[]):
3893 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3894 args can be a list of arguments for a short output (like -version) """
3895 try:
3896 process_communicate_or_kill(subprocess.Popen(
3897 [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
3898 except OSError:
3899 return False
3900 return exe
3901
3902
3903 def get_exe_version(exe, args=['--version'],
3904 version_re=None, unrecognized='present'):
3905 """ Returns the version of the specified executable,
3906 or False if the executable is not present """
3907 try:
3908 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3909 # SIGTTOU if yt-dlp is run in the background.
3910 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3911 out, _ = process_communicate_or_kill(subprocess.Popen(
3912 [encodeArgument(exe)] + args,
3913 stdin=subprocess.PIPE,
3914 stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
3915 except OSError:
3916 return False
3917 if isinstance(out, bytes): # Python 2.x
3918 out = out.decode('ascii', 'ignore')
3919 return detect_exe_version(out, version_re, unrecognized)
3920
3921
3922 def detect_exe_version(output, version_re=None, unrecognized='present'):
3923 assert isinstance(output, compat_str)
3924 if version_re is None:
3925 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3926 m = re.search(version_re, output)
3927 if m:
3928 return m.group(1)
3929 else:
3930 return unrecognized
3931
3932
3933 class PagedList(object):
3934 def __len__(self):
3935 # This is only useful for tests
3936 return len(self.getslice())
3937
3938
3939 class OnDemandPagedList(PagedList):
3940 def __init__(self, pagefunc, pagesize, use_cache=True):
3941 self._pagefunc = pagefunc
3942 self._pagesize = pagesize
3943 self._use_cache = use_cache
3944 if use_cache:
3945 self._cache = {}
3946
3947 def getslice(self, start=0, end=None):
3948 res = []
3949 for pagenum in itertools.count(start // self._pagesize):
3950 firstid = pagenum * self._pagesize
3951 nextfirstid = pagenum * self._pagesize + self._pagesize
3952 if start >= nextfirstid:
3953 continue
3954
3955 page_results = None
3956 if self._use_cache:
3957 page_results = self._cache.get(pagenum)
3958 if page_results is None:
3959 page_results = list(self._pagefunc(pagenum))
3960 if self._use_cache:
3961 self._cache[pagenum] = page_results
3962
3963 startv = (
3964 start % self._pagesize
3965 if firstid <= start < nextfirstid
3966 else 0)
3967
3968 endv = (
3969 ((end - 1) % self._pagesize) + 1
3970 if (end is not None and firstid <= end <= nextfirstid)
3971 else None)
3972
3973 if startv != 0 or endv is not None:
3974 page_results = page_results[startv:endv]
3975 res.extend(page_results)
3976
3977 # A little optimization - if current page is not "full", ie. does
3978 # not contain page_size videos then we can assume that this page
3979 # is the last one - there are no more ids on further pages -
3980 # i.e. no need to query again.
3981 if len(page_results) + startv < self._pagesize:
3982 break
3983
3984 # If we got the whole page, but the next page is not interesting,
3985 # break out early as well
3986 if end == nextfirstid:
3987 break
3988 return res
3989
3990
3991 class InAdvancePagedList(PagedList):
3992 def __init__(self, pagefunc, pagecount, pagesize):
3993 self._pagefunc = pagefunc
3994 self._pagecount = pagecount
3995 self._pagesize = pagesize
3996
3997 def getslice(self, start=0, end=None):
3998 res = []
3999 start_page = start // self._pagesize
4000 end_page = (
4001 self._pagecount if end is None else (end // self._pagesize + 1))
4002 skip_elems = start - start_page * self._pagesize
4003 only_more = None if end is None else end - start
4004 for pagenum in range(start_page, end_page):
4005 page = list(self._pagefunc(pagenum))
4006 if skip_elems:
4007 page = page[skip_elems:]
4008 skip_elems = None
4009 if only_more is not None:
4010 if len(page) < only_more:
4011 only_more -= len(page)
4012 else:
4013 page = page[:only_more]
4014 res.extend(page)
4015 break
4016 res.extend(page)
4017 return res
4018
4019
4020 def uppercase_escape(s):
4021 unicode_escape = codecs.getdecoder('unicode_escape')
4022 return re.sub(
4023 r'\\U[0-9a-fA-F]{8}',
4024 lambda m: unicode_escape(m.group(0))[0],
4025 s)
4026
4027
4028 def lowercase_escape(s):
4029 unicode_escape = codecs.getdecoder('unicode_escape')
4030 return re.sub(
4031 r'\\u[0-9a-fA-F]{4}',
4032 lambda m: unicode_escape(m.group(0))[0],
4033 s)
4034
4035
4036 def escape_rfc3986(s):
4037 """Escape non-ASCII characters as suggested by RFC 3986"""
4038 if sys.version_info < (3, 0) and isinstance(s, compat_str):
4039 s = s.encode('utf-8')
4040 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
4041
4042
4043 def escape_url(url):
4044 """Escape URL as suggested by RFC 3986"""
4045 url_parsed = compat_urllib_parse_urlparse(url)
4046 return url_parsed._replace(
4047 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
4048 path=escape_rfc3986(url_parsed.path),
4049 params=escape_rfc3986(url_parsed.params),
4050 query=escape_rfc3986(url_parsed.query),
4051 fragment=escape_rfc3986(url_parsed.fragment)
4052 ).geturl()
4053
4054
4055 def read_batch_urls(batch_fd):
4056 def fixup(url):
4057 if not isinstance(url, compat_str):
4058 url = url.decode('utf-8', 'replace')
4059 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4060 for bom in BOM_UTF8:
4061 if url.startswith(bom):
4062 url = url[len(bom):]
4063 url = url.lstrip()
4064 if not url or url.startswith(('#', ';', ']')):
4065 return False
4066 # "#" cannot be stripped out since it is part of the URI
4067 # However, it can be safely stipped out if follwing a whitespace
4068 return re.split(r'\s#', url, 1)[0].rstrip()
4069
4070 with contextlib.closing(batch_fd) as fd:
4071 return [url for url in map(fixup, fd) if url]
4072
4073
4074 def urlencode_postdata(*args, **kargs):
4075 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
4076
4077
4078 def update_url_query(url, query):
4079 if not query:
4080 return url
4081 parsed_url = compat_urlparse.urlparse(url)
4082 qs = compat_parse_qs(parsed_url.query)
4083 qs.update(query)
4084 return compat_urlparse.urlunparse(parsed_url._replace(
4085 query=compat_urllib_parse_urlencode(qs, True)))
4086
4087
4088 def update_Request(req, url=None, data=None, headers={}, query={}):
4089 req_headers = req.headers.copy()
4090 req_headers.update(headers)
4091 req_data = data or req.data
4092 req_url = update_url_query(url or req.get_full_url(), query)
4093 req_get_method = req.get_method()
4094 if req_get_method == 'HEAD':
4095 req_type = HEADRequest
4096 elif req_get_method == 'PUT':
4097 req_type = PUTRequest
4098 else:
4099 req_type = compat_urllib_request.Request
4100 new_req = req_type(
4101 req_url, data=req_data, headers=req_headers,
4102 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4103 if hasattr(req, 'timeout'):
4104 new_req.timeout = req.timeout
4105 return new_req
4106
4107
4108 def _multipart_encode_impl(data, boundary):
4109 content_type = 'multipart/form-data; boundary=%s' % boundary
4110
4111 out = b''
4112 for k, v in data.items():
4113 out += b'--' + boundary.encode('ascii') + b'\r\n'
4114 if isinstance(k, compat_str):
4115 k = k.encode('utf-8')
4116 if isinstance(v, compat_str):
4117 v = v.encode('utf-8')
4118 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4119 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4120 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
4121 if boundary.encode('ascii') in content:
4122 raise ValueError('Boundary overlaps with data')
4123 out += content
4124
4125 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4126
4127 return out, content_type
4128
4129
4130 def multipart_encode(data, boundary=None):
4131 '''
4132 Encode a dict to RFC 7578-compliant form-data
4133
4134 data:
4135 A dict where keys and values can be either Unicode or bytes-like
4136 objects.
4137 boundary:
4138 If specified a Unicode object, it's used as the boundary. Otherwise
4139 a random boundary is generated.
4140
4141 Reference: https://tools.ietf.org/html/rfc7578
4142 '''
4143 has_specified_boundary = boundary is not None
4144
4145 while True:
4146 if boundary is None:
4147 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4148
4149 try:
4150 out, content_type = _multipart_encode_impl(data, boundary)
4151 break
4152 except ValueError:
4153 if has_specified_boundary:
4154 raise
4155 boundary = None
4156
4157 return out, content_type
4158
4159
4160 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4161 if isinstance(key_or_keys, (list, tuple)):
4162 for key in key_or_keys:
4163 if key not in d or d[key] is None or skip_false_values and not d[key]:
4164 continue
4165 return d[key]
4166 return default
4167 return d.get(key_or_keys, default)
4168
4169
4170 def try_get(src, getter, expected_type=None):
4171 if not isinstance(getter, (list, tuple)):
4172 getter = [getter]
4173 for get in getter:
4174 try:
4175 v = get(src)
4176 except (AttributeError, KeyError, TypeError, IndexError):
4177 pass
4178 else:
4179 if expected_type is None or isinstance(v, expected_type):
4180 return v
4181
4182
4183 def merge_dicts(*dicts):
4184 merged = {}
4185 for a_dict in dicts:
4186 for k, v in a_dict.items():
4187 if v is None:
4188 continue
4189 if (k not in merged
4190 or (isinstance(v, compat_str) and v
4191 and isinstance(merged[k], compat_str)
4192 and not merged[k])):
4193 merged[k] = v
4194 return merged
4195
4196
4197 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4198 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4199
4200
4201 US_RATINGS = {
4202 'G': 0,
4203 'PG': 10,
4204 'PG-13': 13,
4205 'R': 16,
4206 'NC': 18,
4207 }
4208
4209
4210 TV_PARENTAL_GUIDELINES = {
4211 'TV-Y': 0,
4212 'TV-Y7': 7,
4213 'TV-G': 0,
4214 'TV-PG': 0,
4215 'TV-14': 14,
4216 'TV-MA': 17,
4217 }
4218
4219
4220 def parse_age_limit(s):
4221 if type(s) == int:
4222 return s if 0 <= s <= 21 else None
4223 if not isinstance(s, compat_basestring):
4224 return None
4225 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4226 if m:
4227 return int(m.group('age'))
4228 s = s.upper()
4229 if s in US_RATINGS:
4230 return US_RATINGS[s]
4231 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4232 if m:
4233 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4234 return None
4235
4236
4237 def strip_jsonp(code):
4238 return re.sub(
4239 r'''(?sx)^
4240 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4241 (?:\s*&&\s*(?P=func_name))?
4242 \s*\(\s*(?P<callback_data>.*)\);?
4243 \s*?(?://[^\n]*)*$''',
4244 r'\g<callback_data>', code)
4245
4246
4247 def js_to_json(code, vars={}):
4248 # vars is a dict of var, val pairs to substitute
4249 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4250 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4251 INTEGER_TABLE = (
4252 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4253 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4254 )
4255
4256 def fix_kv(m):
4257 v = m.group(0)
4258 if v in ('true', 'false', 'null'):
4259 return v
4260 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4261 return ""
4262
4263 if v[0] in ("'", '"'):
4264 v = re.sub(r'(?s)\\.|"', lambda m: {
4265 '"': '\\"',
4266 "\\'": "'",
4267 '\\\n': '',
4268 '\\x': '\\u00',
4269 }.get(m.group(0), m.group(0)), v[1:-1])
4270 else:
4271 for regex, base in INTEGER_TABLE:
4272 im = re.match(regex, v)
4273 if im:
4274 i = int(im.group(1), base)
4275 return '"%d":' % i if v.endswith(':') else '%d' % i
4276
4277 if v in vars:
4278 return vars[v]
4279
4280 return '"%s"' % v
4281
4282 return re.sub(r'''(?sx)
4283 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4284 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4285 {comment}|,(?={skip}[\]}}])|
4286 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4287 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4288 [0-9]+(?={skip}:)|
4289 !+
4290 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4291
4292
4293 def qualities(quality_ids):
4294 """ Get a numeric quality value out of a list of possible values """
4295 def q(qid):
4296 try:
4297 return quality_ids.index(qid)
4298 except ValueError:
4299 return -1
4300 return q
4301
4302
4303 DEFAULT_OUTTMPL = {
4304 'default': '%(title)s [%(id)s].%(ext)s',
4305 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
4306 }
4307 OUTTMPL_TYPES = {
4308 'chapter': None,
4309 'subtitle': None,
4310 'thumbnail': None,
4311 'description': 'description',
4312 'annotation': 'annotations.xml',
4313 'infojson': 'info.json',
4314 'pl_description': 'description',
4315 'pl_infojson': 'info.json',
4316 }
4317
4318 # As of [1] format syntax is:
4319 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4320 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
4321 FORMAT_RE = r'''(?x)
4322 (?<!%)
4323 %
4324 \({0}\) # mapping key
4325 (?:[#0\-+ ]+)? # conversion flags (optional)
4326 (?:\d+)? # minimum field width (optional)
4327 (?:\.\d+)? # precision (optional)
4328 [hlL]? # length modifier (optional)
4329 (?P<type>[diouxXeEfFgGcrs%]) # conversion type
4330 '''
4331
4332
4333 def limit_length(s, length):
4334 """ Add ellipses to overly long strings """
4335 if s is None:
4336 return None
4337 ELLIPSES = '...'
4338 if len(s) > length:
4339 return s[:length - len(ELLIPSES)] + ELLIPSES
4340 return s
4341
4342
4343 def version_tuple(v):
4344 return tuple(int(e) for e in re.split(r'[-.]', v))
4345
4346
4347 def is_outdated_version(version, limit, assume_new=True):
4348 if not version:
4349 return not assume_new
4350 try:
4351 return version_tuple(version) < version_tuple(limit)
4352 except ValueError:
4353 return not assume_new
4354
4355
4356 def ytdl_is_updateable():
4357 """ Returns if yt-dlp can be updated with -U """
4358 return False
4359
4360 from zipimport import zipimporter
4361
4362 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4363
4364
4365 def args_to_str(args):
4366 # Get a short string representation for a subprocess command
4367 return ' '.join(compat_shlex_quote(a) for a in args)
4368
4369
4370 def error_to_compat_str(err):
4371 err_str = str(err)
4372 # On python 2 error byte string must be decoded with proper
4373 # encoding rather than ascii
4374 if sys.version_info[0] < 3:
4375 err_str = err_str.decode(preferredencoding())
4376 return err_str
4377
4378
4379 def mimetype2ext(mt):
4380 if mt is None:
4381 return None
4382
4383 ext = {
4384 'audio/mp4': 'm4a',
4385 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4386 # it's the most popular one
4387 'audio/mpeg': 'mp3',
4388 'audio/x-wav': 'wav',
4389 }.get(mt)
4390 if ext is not None:
4391 return ext
4392
4393 _, _, res = mt.rpartition('/')
4394 res = res.split(';')[0].strip().lower()
4395
4396 return {
4397 '3gpp': '3gp',
4398 'smptett+xml': 'tt',
4399 'ttaf+xml': 'dfxp',
4400 'ttml+xml': 'ttml',
4401 'x-flv': 'flv',
4402 'x-mp4-fragmented': 'mp4',
4403 'x-ms-sami': 'sami',
4404 'x-ms-wmv': 'wmv',
4405 'mpegurl': 'm3u8',
4406 'x-mpegurl': 'm3u8',
4407 'vnd.apple.mpegurl': 'm3u8',
4408 'dash+xml': 'mpd',
4409 'f4m+xml': 'f4m',
4410 'hds+xml': 'f4m',
4411 'vnd.ms-sstr+xml': 'ism',
4412 'quicktime': 'mov',
4413 'mp2t': 'ts',
4414 'x-wav': 'wav',
4415 }.get(res, res)
4416
4417
4418 def parse_codecs(codecs_str):
4419 # http://tools.ietf.org/html/rfc6381
4420 if not codecs_str:
4421 return {}
4422 split_codecs = list(filter(None, map(
4423 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4424 vcodec, acodec = None, None
4425 for full_codec in split_codecs:
4426 codec = full_codec.split('.')[0]
4427 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4428 if not vcodec:
4429 vcodec = full_codec
4430 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4431 if not acodec:
4432 acodec = full_codec
4433 else:
4434 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4435 if not vcodec and not acodec:
4436 if len(split_codecs) == 2:
4437 return {
4438 'vcodec': split_codecs[0],
4439 'acodec': split_codecs[1],
4440 }
4441 else:
4442 return {
4443 'vcodec': vcodec or 'none',
4444 'acodec': acodec or 'none',
4445 }
4446 return {}
4447
4448
4449 def urlhandle_detect_ext(url_handle):
4450 getheader = url_handle.headers.get
4451
4452 cd = getheader('Content-Disposition')
4453 if cd:
4454 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4455 if m:
4456 e = determine_ext(m.group('filename'), default_ext=None)
4457 if e:
4458 return e
4459
4460 return mimetype2ext(getheader('Content-Type'))
4461
4462
4463 def encode_data_uri(data, mime_type):
4464 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4465
4466
4467 def age_restricted(content_limit, age_limit):
4468 """ Returns True iff the content should be blocked """
4469
4470 if age_limit is None: # No limit set
4471 return False
4472 if content_limit is None:
4473 return False # Content available for everyone
4474 return age_limit < content_limit
4475
4476
4477 def is_html(first_bytes):
4478 """ Detect whether a file contains HTML by examining its first bytes. """
4479
4480 BOMS = [
4481 (b'\xef\xbb\xbf', 'utf-8'),
4482 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4483 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4484 (b'\xff\xfe', 'utf-16-le'),
4485 (b'\xfe\xff', 'utf-16-be'),
4486 ]
4487 for bom, enc in BOMS:
4488 if first_bytes.startswith(bom):
4489 s = first_bytes[len(bom):].decode(enc, 'replace')
4490 break
4491 else:
4492 s = first_bytes.decode('utf-8', 'replace')
4493
4494 return re.match(r'^\s*<', s)
4495
4496
4497 def determine_protocol(info_dict):
4498 protocol = info_dict.get('protocol')
4499 if protocol is not None:
4500 return protocol
4501
4502 url = info_dict['url']
4503 if url.startswith('rtmp'):
4504 return 'rtmp'
4505 elif url.startswith('mms'):
4506 return 'mms'
4507 elif url.startswith('rtsp'):
4508 return 'rtsp'
4509
4510 ext = determine_ext(url)
4511 if ext == 'm3u8':
4512 return 'm3u8'
4513 elif ext == 'f4m':
4514 return 'f4m'
4515
4516 return compat_urllib_parse_urlparse(url).scheme
4517
4518
4519 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4520 """ Render a list of rows, each as a list of values """
4521
4522 def get_max_lens(table):
4523 return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4524
4525 def filter_using_list(row, filterArray):
4526 return [col for (take, col) in zip(filterArray, row) if take]
4527
4528 if hideEmpty:
4529 max_lens = get_max_lens(data)
4530 header_row = filter_using_list(header_row, max_lens)
4531 data = [filter_using_list(row, max_lens) for row in data]
4532
4533 table = [header_row] + data
4534 max_lens = get_max_lens(table)
4535 if delim:
4536 table = [header_row] + [['-' * ml for ml in max_lens]] + data
4537 format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
4538 return '\n'.join(format_str % tuple(row) for row in table)
4539
4540
4541 def _match_one(filter_part, dct):
4542 COMPARISON_OPERATORS = {
4543 '<': operator.lt,
4544 '<=': operator.le,
4545 '>': operator.gt,
4546 '>=': operator.ge,
4547 '=': operator.eq,
4548 '!=': operator.ne,
4549 }
4550 operator_rex = re.compile(r'''(?x)\s*
4551 (?P<key>[a-z_]+)
4552 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4553 (?:
4554 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4555 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4556 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4557 )
4558 \s*$
4559 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4560 m = operator_rex.search(filter_part)
4561 if m:
4562 op = COMPARISON_OPERATORS[m.group('op')]
4563 actual_value = dct.get(m.group('key'))
4564 if (m.group('quotedstrval') is not None
4565 or m.group('strval') is not None
4566 # If the original field is a string and matching comparisonvalue is
4567 # a number we should respect the origin of the original field
4568 # and process comparison value as a string (see
4569 # https://github.com/ytdl-org/youtube-dl/issues/11082).
4570 or actual_value is not None and m.group('intval') is not None
4571 and isinstance(actual_value, compat_str)):
4572 if m.group('op') not in ('=', '!='):
4573 raise ValueError(
4574 'Operator %s does not support string values!' % m.group('op'))
4575 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4576 quote = m.group('quote')
4577 if quote is not None:
4578 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4579 else:
4580 try:
4581 comparison_value = int(m.group('intval'))
4582 except ValueError:
4583 comparison_value = parse_filesize(m.group('intval'))
4584 if comparison_value is None:
4585 comparison_value = parse_filesize(m.group('intval') + 'B')
4586 if comparison_value is None:
4587 raise ValueError(
4588 'Invalid integer value %r in filter part %r' % (
4589 m.group('intval'), filter_part))
4590 if actual_value is None:
4591 return m.group('none_inclusive')
4592 return op(actual_value, comparison_value)
4593
4594 UNARY_OPERATORS = {
4595 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4596 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4597 }
4598 operator_rex = re.compile(r'''(?x)\s*
4599 (?P<op>%s)\s*(?P<key>[a-z_]+)
4600 \s*$
4601 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4602 m = operator_rex.search(filter_part)
4603 if m:
4604 op = UNARY_OPERATORS[m.group('op')]
4605 actual_value = dct.get(m.group('key'))
4606 return op(actual_value)
4607
4608 raise ValueError('Invalid filter part %r' % filter_part)
4609
4610
4611 def match_str(filter_str, dct):
4612 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4613
4614 return all(
4615 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4616
4617
4618 def match_filter_func(filter_str):
4619 def _match_func(info_dict):
4620 if match_str(filter_str, info_dict):
4621 return None
4622 else:
4623 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4624 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4625 return _match_func
4626
4627
4628 def parse_dfxp_time_expr(time_expr):
4629 if not time_expr:
4630 return
4631
4632 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4633 if mobj:
4634 return float(mobj.group('time_offset'))
4635
4636 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4637 if mobj:
4638 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4639
4640
4641 def srt_subtitles_timecode(seconds):
4642 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4643
4644
4645 def dfxp2srt(dfxp_data):
4646 '''
4647 @param dfxp_data A bytes-like object containing DFXP data
4648 @returns A unicode object containing converted SRT data
4649 '''
4650 LEGACY_NAMESPACES = (
4651 (b'http://www.w3.org/ns/ttml', [
4652 b'http://www.w3.org/2004/11/ttaf1',
4653 b'http://www.w3.org/2006/04/ttaf1',
4654 b'http://www.w3.org/2006/10/ttaf1',
4655 ]),
4656 (b'http://www.w3.org/ns/ttml#styling', [
4657 b'http://www.w3.org/ns/ttml#style',
4658 ]),
4659 )
4660
4661 SUPPORTED_STYLING = [
4662 'color',
4663 'fontFamily',
4664 'fontSize',
4665 'fontStyle',
4666 'fontWeight',
4667 'textDecoration'
4668 ]
4669
4670 _x = functools.partial(xpath_with_ns, ns_map={
4671 'xml': 'http://www.w3.org/XML/1998/namespace',
4672 'ttml': 'http://www.w3.org/ns/ttml',
4673 'tts': 'http://www.w3.org/ns/ttml#styling',
4674 })
4675
4676 styles = {}
4677 default_style = {}
4678
4679 class TTMLPElementParser(object):
4680 _out = ''
4681 _unclosed_elements = []
4682 _applied_styles = []
4683
4684 def start(self, tag, attrib):
4685 if tag in (_x('ttml:br'), 'br'):
4686 self._out += '\n'
4687 else:
4688 unclosed_elements = []
4689 style = {}
4690 element_style_id = attrib.get('style')
4691 if default_style:
4692 style.update(default_style)
4693 if element_style_id:
4694 style.update(styles.get(element_style_id, {}))
4695 for prop in SUPPORTED_STYLING:
4696 prop_val = attrib.get(_x('tts:' + prop))
4697 if prop_val:
4698 style[prop] = prop_val
4699 if style:
4700 font = ''
4701 for k, v in sorted(style.items()):
4702 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4703 continue
4704 if k == 'color':
4705 font += ' color="%s"' % v
4706 elif k == 'fontSize':
4707 font += ' size="%s"' % v
4708 elif k == 'fontFamily':
4709 font += ' face="%s"' % v
4710 elif k == 'fontWeight' and v == 'bold':
4711 self._out += '<b>'
4712 unclosed_elements.append('b')
4713 elif k == 'fontStyle' and v == 'italic':
4714 self._out += '<i>'
4715 unclosed_elements.append('i')
4716 elif k == 'textDecoration' and v == 'underline':
4717 self._out += '<u>'
4718 unclosed_elements.append('u')
4719 if font:
4720 self._out += '<font' + font + '>'
4721 unclosed_elements.append('font')
4722 applied_style = {}
4723 if self._applied_styles:
4724 applied_style.update(self._applied_styles[-1])
4725 applied_style.update(style)
4726 self._applied_styles.append(applied_style)
4727 self._unclosed_elements.append(unclosed_elements)
4728
4729 def end(self, tag):
4730 if tag not in (_x('ttml:br'), 'br'):
4731 unclosed_elements = self._unclosed_elements.pop()
4732 for element in reversed(unclosed_elements):
4733 self._out += '</%s>' % element
4734 if unclosed_elements and self._applied_styles:
4735 self._applied_styles.pop()
4736
4737 def data(self, data):
4738 self._out += data
4739
4740 def close(self):
4741 return self._out.strip()
4742
4743 def parse_node(node):
4744 target = TTMLPElementParser()
4745 parser = xml.etree.ElementTree.XMLParser(target=target)
4746 parser.feed(xml.etree.ElementTree.tostring(node))
4747 return parser.close()
4748
4749 for k, v in LEGACY_NAMESPACES:
4750 for ns in v:
4751 dfxp_data = dfxp_data.replace(ns, k)
4752
4753 dfxp = compat_etree_fromstring(dfxp_data)
4754 out = []
4755 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4756
4757 if not paras:
4758 raise ValueError('Invalid dfxp/TTML subtitle')
4759
4760 repeat = False
4761 while True:
4762 for style in dfxp.findall(_x('.//ttml:style')):
4763 style_id = style.get('id') or style.get(_x('xml:id'))
4764 if not style_id:
4765 continue
4766 parent_style_id = style.get('style')
4767 if parent_style_id:
4768 if parent_style_id not in styles:
4769 repeat = True
4770 continue
4771 styles[style_id] = styles[parent_style_id].copy()
4772 for prop in SUPPORTED_STYLING:
4773 prop_val = style.get(_x('tts:' + prop))
4774 if prop_val:
4775 styles.setdefault(style_id, {})[prop] = prop_val
4776 if repeat:
4777 repeat = False
4778 else:
4779 break
4780
4781 for p in ('body', 'div'):
4782 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4783 if ele is None:
4784 continue
4785 style = styles.get(ele.get('style'))
4786 if not style:
4787 continue
4788 default_style.update(style)
4789
4790 for para, index in zip(paras, itertools.count(1)):
4791 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4792 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4793 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4794 if begin_time is None:
4795 continue
4796 if not end_time:
4797 if not dur:
4798 continue
4799 end_time = begin_time + dur
4800 out.append('%d\n%s --> %s\n%s\n\n' % (
4801 index,
4802 srt_subtitles_timecode(begin_time),
4803 srt_subtitles_timecode(end_time),
4804 parse_node(para)))
4805
4806 return ''.join(out)
4807
4808
4809 def cli_option(params, command_option, param):
4810 param = params.get(param)
4811 if param:
4812 param = compat_str(param)
4813 return [command_option, param] if param is not None else []
4814
4815
4816 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4817 param = params.get(param)
4818 if param is None:
4819 return []
4820 assert isinstance(param, bool)
4821 if separator:
4822 return [command_option + separator + (true_value if param else false_value)]
4823 return [command_option, true_value if param else false_value]
4824
4825
4826 def cli_valueless_option(params, command_option, param, expected_value=True):
4827 param = params.get(param)
4828 return [command_option] if param == expected_value else []
4829
4830
4831 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
4832 if isinstance(argdict, (list, tuple)): # for backward compatibility
4833 if use_compat:
4834 return argdict
4835 else:
4836 argdict = None
4837 if argdict is None:
4838 return default
4839 assert isinstance(argdict, dict)
4840
4841 assert isinstance(keys, (list, tuple))
4842 for key_list in keys:
4843 if isinstance(key_list, compat_str):
4844 key_list = (key_list,)
4845 arg_list = list(filter(
4846 lambda x: x is not None,
4847 [argdict.get(key.lower()) for key in key_list]))
4848 if arg_list:
4849 return [arg for args in arg_list for arg in args]
4850 return default
4851
4852
4853 class ISO639Utils(object):
4854 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4855 _lang_map = {
4856 'aa': 'aar',
4857 'ab': 'abk',
4858 'ae': 'ave',
4859 'af': 'afr',
4860 'ak': 'aka',
4861 'am': 'amh',
4862 'an': 'arg',
4863 'ar': 'ara',
4864 'as': 'asm',
4865 'av': 'ava',
4866 'ay': 'aym',
4867 'az': 'aze',
4868 'ba': 'bak',
4869 'be': 'bel',
4870 'bg': 'bul',
4871 'bh': 'bih',
4872 'bi': 'bis',
4873 'bm': 'bam',
4874 'bn': 'ben',
4875 'bo': 'bod',
4876 'br': 'bre',
4877 'bs': 'bos',
4878 'ca': 'cat',
4879 'ce': 'che',
4880 'ch': 'cha',
4881 'co': 'cos',
4882 'cr': 'cre',
4883 'cs': 'ces',
4884 'cu': 'chu',
4885 'cv': 'chv',
4886 'cy': 'cym',
4887 'da': 'dan',
4888 'de': 'deu',
4889 'dv': 'div',
4890 'dz': 'dzo',
4891 'ee': 'ewe',
4892 'el': 'ell',
4893 'en': 'eng',
4894 'eo': 'epo',
4895 'es': 'spa',
4896 'et': 'est',
4897 'eu': 'eus',
4898 'fa': 'fas',
4899 'ff': 'ful',
4900 'fi': 'fin',
4901 'fj': 'fij',
4902 'fo': 'fao',
4903 'fr': 'fra',
4904 'fy': 'fry',
4905 'ga': 'gle',
4906 'gd': 'gla',
4907 'gl': 'glg',
4908 'gn': 'grn',
4909 'gu': 'guj',
4910 'gv': 'glv',
4911 'ha': 'hau',
4912 'he': 'heb',
4913 'iw': 'heb', # Replaced by he in 1989 revision
4914 'hi': 'hin',
4915 'ho': 'hmo',
4916 'hr': 'hrv',
4917 'ht': 'hat',
4918 'hu': 'hun',
4919 'hy': 'hye',
4920 'hz': 'her',
4921 'ia': 'ina',
4922 'id': 'ind',
4923 'in': 'ind', # Replaced by id in 1989 revision
4924 'ie': 'ile',
4925 'ig': 'ibo',
4926 'ii': 'iii',
4927 'ik': 'ipk',
4928 'io': 'ido',
4929 'is': 'isl',
4930 'it': 'ita',
4931 'iu': 'iku',
4932 'ja': 'jpn',
4933 'jv': 'jav',
4934 'ka': 'kat',
4935 'kg': 'kon',
4936 'ki': 'kik',
4937 'kj': 'kua',
4938 'kk': 'kaz',
4939 'kl': 'kal',
4940 'km': 'khm',
4941 'kn': 'kan',
4942 'ko': 'kor',
4943 'kr': 'kau',
4944 'ks': 'kas',
4945 'ku': 'kur',
4946 'kv': 'kom',
4947 'kw': 'cor',
4948 'ky': 'kir',
4949 'la': 'lat',
4950 'lb': 'ltz',
4951 'lg': 'lug',
4952 'li': 'lim',
4953 'ln': 'lin',
4954 'lo': 'lao',
4955 'lt': 'lit',
4956 'lu': 'lub',
4957 'lv': 'lav',
4958 'mg': 'mlg',
4959 'mh': 'mah',
4960 'mi': 'mri',
4961 'mk': 'mkd',
4962 'ml': 'mal',
4963 'mn': 'mon',
4964 'mr': 'mar',
4965 'ms': 'msa',
4966 'mt': 'mlt',
4967 'my': 'mya',
4968 'na': 'nau',
4969 'nb': 'nob',
4970 'nd': 'nde',
4971 'ne': 'nep',
4972 'ng': 'ndo',
4973 'nl': 'nld',
4974 'nn': 'nno',
4975 'no': 'nor',
4976 'nr': 'nbl',
4977 'nv': 'nav',
4978 'ny': 'nya',
4979 'oc': 'oci',
4980 'oj': 'oji',
4981 'om': 'orm',
4982 'or': 'ori',
4983 'os': 'oss',
4984 'pa': 'pan',
4985 'pi': 'pli',
4986 'pl': 'pol',
4987 'ps': 'pus',
4988 'pt': 'por',
4989 'qu': 'que',
4990 'rm': 'roh',
4991 'rn': 'run',
4992 'ro': 'ron',
4993 'ru': 'rus',
4994 'rw': 'kin',
4995 'sa': 'san',
4996 'sc': 'srd',
4997 'sd': 'snd',
4998 'se': 'sme',
4999 'sg': 'sag',
5000 'si': 'sin',
5001 'sk': 'slk',
5002 'sl': 'slv',
5003 'sm': 'smo',
5004 'sn': 'sna',
5005 'so': 'som',
5006 'sq': 'sqi',
5007 'sr': 'srp',
5008 'ss': 'ssw',
5009 'st': 'sot',
5010 'su': 'sun',
5011 'sv': 'swe',
5012 'sw': 'swa',
5013 'ta': 'tam',
5014 'te': 'tel',
5015 'tg': 'tgk',
5016 'th': 'tha',
5017 'ti': 'tir',
5018 'tk': 'tuk',
5019 'tl': 'tgl',
5020 'tn': 'tsn',
5021 'to': 'ton',
5022 'tr': 'tur',
5023 'ts': 'tso',
5024 'tt': 'tat',
5025 'tw': 'twi',
5026 'ty': 'tah',
5027 'ug': 'uig',
5028 'uk': 'ukr',
5029 'ur': 'urd',
5030 'uz': 'uzb',
5031 've': 'ven',
5032 'vi': 'vie',
5033 'vo': 'vol',
5034 'wa': 'wln',
5035 'wo': 'wol',
5036 'xh': 'xho',
5037 'yi': 'yid',
5038 'ji': 'yid', # Replaced by yi in 1989 revision
5039 'yo': 'yor',
5040 'za': 'zha',
5041 'zh': 'zho',
5042 'zu': 'zul',
5043 }
5044
5045 @classmethod
5046 def short2long(cls, code):
5047 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5048 return cls._lang_map.get(code[:2])
5049
5050 @classmethod
5051 def long2short(cls, code):
5052 """Convert language code from ISO 639-2/T to ISO 639-1"""
5053 for short_name, long_name in cls._lang_map.items():
5054 if long_name == code:
5055 return short_name
5056
5057
5058 class ISO3166Utils(object):
5059 # From http://data.okfn.org/data/core/country-list
5060 _country_map = {
5061 'AF': 'Afghanistan',
5062 'AX': 'Åland Islands',
5063 'AL': 'Albania',
5064 'DZ': 'Algeria',
5065 'AS': 'American Samoa',
5066 'AD': 'Andorra',
5067 'AO': 'Angola',
5068 'AI': 'Anguilla',
5069 'AQ': 'Antarctica',
5070 'AG': 'Antigua and Barbuda',
5071 'AR': 'Argentina',
5072 'AM': 'Armenia',
5073 'AW': 'Aruba',
5074 'AU': 'Australia',
5075 'AT': 'Austria',
5076 'AZ': 'Azerbaijan',
5077 'BS': 'Bahamas',
5078 'BH': 'Bahrain',
5079 'BD': 'Bangladesh',
5080 'BB': 'Barbados',
5081 'BY': 'Belarus',
5082 'BE': 'Belgium',
5083 'BZ': 'Belize',
5084 'BJ': 'Benin',
5085 'BM': 'Bermuda',
5086 'BT': 'Bhutan',
5087 'BO': 'Bolivia, Plurinational State of',
5088 'BQ': 'Bonaire, Sint Eustatius and Saba',
5089 'BA': 'Bosnia and Herzegovina',
5090 'BW': 'Botswana',
5091 'BV': 'Bouvet Island',
5092 'BR': 'Brazil',
5093 'IO': 'British Indian Ocean Territory',
5094 'BN': 'Brunei Darussalam',
5095 'BG': 'Bulgaria',
5096 'BF': 'Burkina Faso',
5097 'BI': 'Burundi',
5098 'KH': 'Cambodia',
5099 'CM': 'Cameroon',
5100 'CA': 'Canada',
5101 'CV': 'Cape Verde',
5102 'KY': 'Cayman Islands',
5103 'CF': 'Central African Republic',
5104 'TD': 'Chad',
5105 'CL': 'Chile',
5106 'CN': 'China',
5107 'CX': 'Christmas Island',
5108 'CC': 'Cocos (Keeling) Islands',
5109 'CO': 'Colombia',
5110 'KM': 'Comoros',
5111 'CG': 'Congo',
5112 'CD': 'Congo, the Democratic Republic of the',
5113 'CK': 'Cook Islands',
5114 'CR': 'Costa Rica',
5115 'CI': 'Côte d\'Ivoire',
5116 'HR': 'Croatia',
5117 'CU': 'Cuba',
5118 'CW': 'Curaçao',
5119 'CY': 'Cyprus',
5120 'CZ': 'Czech Republic',
5121 'DK': 'Denmark',
5122 'DJ': 'Djibouti',
5123 'DM': 'Dominica',
5124 'DO': 'Dominican Republic',
5125 'EC': 'Ecuador',
5126 'EG': 'Egypt',
5127 'SV': 'El Salvador',
5128 'GQ': 'Equatorial Guinea',
5129 'ER': 'Eritrea',
5130 'EE': 'Estonia',
5131 'ET': 'Ethiopia',
5132 'FK': 'Falkland Islands (Malvinas)',
5133 'FO': 'Faroe Islands',
5134 'FJ': 'Fiji',
5135 'FI': 'Finland',
5136 'FR': 'France',
5137 'GF': 'French Guiana',
5138 'PF': 'French Polynesia',
5139 'TF': 'French Southern Territories',
5140 'GA': 'Gabon',
5141 'GM': 'Gambia',
5142 'GE': 'Georgia',
5143 'DE': 'Germany',
5144 'GH': 'Ghana',
5145 'GI': 'Gibraltar',
5146 'GR': 'Greece',
5147 'GL': 'Greenland',
5148 'GD': 'Grenada',
5149 'GP': 'Guadeloupe',
5150 'GU': 'Guam',
5151 'GT': 'Guatemala',
5152 'GG': 'Guernsey',
5153 'GN': 'Guinea',
5154 'GW': 'Guinea-Bissau',
5155 'GY': 'Guyana',
5156 'HT': 'Haiti',
5157 'HM': 'Heard Island and McDonald Islands',
5158 'VA': 'Holy See (Vatican City State)',
5159 'HN': 'Honduras',
5160 'HK': 'Hong Kong',
5161 'HU': 'Hungary',
5162 'IS': 'Iceland',
5163 'IN': 'India',
5164 'ID': 'Indonesia',
5165 'IR': 'Iran, Islamic Republic of',
5166 'IQ': 'Iraq',
5167 'IE': 'Ireland',
5168 'IM': 'Isle of Man',
5169 'IL': 'Israel',
5170 'IT': 'Italy',
5171 'JM': 'Jamaica',
5172 'JP': 'Japan',
5173 'JE': 'Jersey',
5174 'JO': 'Jordan',
5175 'KZ': 'Kazakhstan',
5176 'KE': 'Kenya',
5177 'KI': 'Kiribati',
5178 'KP': 'Korea, Democratic People\'s Republic of',
5179 'KR': 'Korea, Republic of',
5180 'KW': 'Kuwait',
5181 'KG': 'Kyrgyzstan',
5182 'LA': 'Lao People\'s Democratic Republic',
5183 'LV': 'Latvia',
5184 'LB': 'Lebanon',
5185 'LS': 'Lesotho',
5186 'LR': 'Liberia',
5187 'LY': 'Libya',
5188 'LI': 'Liechtenstein',
5189 'LT': 'Lithuania',
5190 'LU': 'Luxembourg',
5191 'MO': 'Macao',
5192 'MK': 'Macedonia, the Former Yugoslav Republic of',
5193 'MG': 'Madagascar',
5194 'MW': 'Malawi',
5195 'MY': 'Malaysia',
5196 'MV': 'Maldives',
5197 'ML': 'Mali',
5198 'MT': 'Malta',
5199 'MH': 'Marshall Islands',
5200 'MQ': 'Martinique',
5201 'MR': 'Mauritania',
5202 'MU': 'Mauritius',
5203 'YT': 'Mayotte',
5204 'MX': 'Mexico',
5205 'FM': 'Micronesia, Federated States of',
5206 'MD': 'Moldova, Republic of',
5207 'MC': 'Monaco',
5208 'MN': 'Mongolia',
5209 'ME': 'Montenegro',
5210 'MS': 'Montserrat',
5211 'MA': 'Morocco',
5212 'MZ': 'Mozambique',
5213 'MM': 'Myanmar',
5214 'NA': 'Namibia',
5215 'NR': 'Nauru',
5216 'NP': 'Nepal',
5217 'NL': 'Netherlands',
5218 'NC': 'New Caledonia',
5219 'NZ': 'New Zealand',
5220 'NI': 'Nicaragua',
5221 'NE': 'Niger',
5222 'NG': 'Nigeria',
5223 'NU': 'Niue',
5224 'NF': 'Norfolk Island',
5225 'MP': 'Northern Mariana Islands',
5226 'NO': 'Norway',
5227 'OM': 'Oman',
5228 'PK': 'Pakistan',
5229 'PW': 'Palau',
5230 'PS': 'Palestine, State of',
5231 'PA': 'Panama',
5232 'PG': 'Papua New Guinea',
5233 'PY': 'Paraguay',
5234 'PE': 'Peru',
5235 'PH': 'Philippines',
5236 'PN': 'Pitcairn',
5237 'PL': 'Poland',
5238 'PT': 'Portugal',
5239 'PR': 'Puerto Rico',
5240 'QA': 'Qatar',
5241 'RE': 'Réunion',
5242 'RO': 'Romania',
5243 'RU': 'Russian Federation',
5244 'RW': 'Rwanda',
5245 'BL': 'Saint Barthélemy',
5246 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5247 'KN': 'Saint Kitts and Nevis',
5248 'LC': 'Saint Lucia',
5249 'MF': 'Saint Martin (French part)',
5250 'PM': 'Saint Pierre and Miquelon',
5251 'VC': 'Saint Vincent and the Grenadines',
5252 'WS': 'Samoa',
5253 'SM': 'San Marino',
5254 'ST': 'Sao Tome and Principe',
5255 'SA': 'Saudi Arabia',
5256 'SN': 'Senegal',
5257 'RS': 'Serbia',
5258 'SC': 'Seychelles',
5259 'SL': 'Sierra Leone',
5260 'SG': 'Singapore',
5261 'SX': 'Sint Maarten (Dutch part)',
5262 'SK': 'Slovakia',
5263 'SI': 'Slovenia',
5264 'SB': 'Solomon Islands',
5265 'SO': 'Somalia',
5266 'ZA': 'South Africa',
5267 'GS': 'South Georgia and the South Sandwich Islands',
5268 'SS': 'South Sudan',
5269 'ES': 'Spain',
5270 'LK': 'Sri Lanka',
5271 'SD': 'Sudan',
5272 'SR': 'Suriname',
5273 'SJ': 'Svalbard and Jan Mayen',
5274 'SZ': 'Swaziland',
5275 'SE': 'Sweden',
5276 'CH': 'Switzerland',
5277 'SY': 'Syrian Arab Republic',
5278 'TW': 'Taiwan, Province of China',
5279 'TJ': 'Tajikistan',
5280 'TZ': 'Tanzania, United Republic of',
5281 'TH': 'Thailand',
5282 'TL': 'Timor-Leste',
5283 'TG': 'Togo',
5284 'TK': 'Tokelau',
5285 'TO': 'Tonga',
5286 'TT': 'Trinidad and Tobago',
5287 'TN': 'Tunisia',
5288 'TR': 'Turkey',
5289 'TM': 'Turkmenistan',
5290 'TC': 'Turks and Caicos Islands',
5291 'TV': 'Tuvalu',
5292 'UG': 'Uganda',
5293 'UA': 'Ukraine',
5294 'AE': 'United Arab Emirates',
5295 'GB': 'United Kingdom',
5296 'US': 'United States',
5297 'UM': 'United States Minor Outlying Islands',
5298 'UY': 'Uruguay',
5299 'UZ': 'Uzbekistan',
5300 'VU': 'Vanuatu',
5301 'VE': 'Venezuela, Bolivarian Republic of',
5302 'VN': 'Viet Nam',
5303 'VG': 'Virgin Islands, British',
5304 'VI': 'Virgin Islands, U.S.',
5305 'WF': 'Wallis and Futuna',
5306 'EH': 'Western Sahara',
5307 'YE': 'Yemen',
5308 'ZM': 'Zambia',
5309 'ZW': 'Zimbabwe',
5310 }
5311
5312 @classmethod
5313 def short2full(cls, code):
5314 """Convert an ISO 3166-2 country code to the corresponding full name"""
5315 return cls._country_map.get(code.upper())
5316
5317
5318 class GeoUtils(object):
5319 # Major IPv4 address blocks per country
5320 _country_ip_map = {
5321 'AD': '46.172.224.0/19',
5322 'AE': '94.200.0.0/13',
5323 'AF': '149.54.0.0/17',
5324 'AG': '209.59.64.0/18',
5325 'AI': '204.14.248.0/21',
5326 'AL': '46.99.0.0/16',
5327 'AM': '46.70.0.0/15',
5328 'AO': '105.168.0.0/13',
5329 'AP': '182.50.184.0/21',
5330 'AQ': '23.154.160.0/24',
5331 'AR': '181.0.0.0/12',
5332 'AS': '202.70.112.0/20',
5333 'AT': '77.116.0.0/14',
5334 'AU': '1.128.0.0/11',
5335 'AW': '181.41.0.0/18',
5336 'AX': '185.217.4.0/22',
5337 'AZ': '5.197.0.0/16',
5338 'BA': '31.176.128.0/17',
5339 'BB': '65.48.128.0/17',
5340 'BD': '114.130.0.0/16',
5341 'BE': '57.0.0.0/8',
5342 'BF': '102.178.0.0/15',
5343 'BG': '95.42.0.0/15',
5344 'BH': '37.131.0.0/17',
5345 'BI': '154.117.192.0/18',
5346 'BJ': '137.255.0.0/16',
5347 'BL': '185.212.72.0/23',
5348 'BM': '196.12.64.0/18',
5349 'BN': '156.31.0.0/16',
5350 'BO': '161.56.0.0/16',
5351 'BQ': '161.0.80.0/20',
5352 'BR': '191.128.0.0/12',
5353 'BS': '24.51.64.0/18',
5354 'BT': '119.2.96.0/19',
5355 'BW': '168.167.0.0/16',
5356 'BY': '178.120.0.0/13',
5357 'BZ': '179.42.192.0/18',
5358 'CA': '99.224.0.0/11',
5359 'CD': '41.243.0.0/16',
5360 'CF': '197.242.176.0/21',
5361 'CG': '160.113.0.0/16',
5362 'CH': '85.0.0.0/13',
5363 'CI': '102.136.0.0/14',
5364 'CK': '202.65.32.0/19',
5365 'CL': '152.172.0.0/14',
5366 'CM': '102.244.0.0/14',
5367 'CN': '36.128.0.0/10',
5368 'CO': '181.240.0.0/12',
5369 'CR': '201.192.0.0/12',
5370 'CU': '152.206.0.0/15',
5371 'CV': '165.90.96.0/19',
5372 'CW': '190.88.128.0/17',
5373 'CY': '31.153.0.0/16',
5374 'CZ': '88.100.0.0/14',
5375 'DE': '53.0.0.0/8',
5376 'DJ': '197.241.0.0/17',
5377 'DK': '87.48.0.0/12',
5378 'DM': '192.243.48.0/20',
5379 'DO': '152.166.0.0/15',
5380 'DZ': '41.96.0.0/12',
5381 'EC': '186.68.0.0/15',
5382 'EE': '90.190.0.0/15',
5383 'EG': '156.160.0.0/11',
5384 'ER': '196.200.96.0/20',
5385 'ES': '88.0.0.0/11',
5386 'ET': '196.188.0.0/14',
5387 'EU': '2.16.0.0/13',
5388 'FI': '91.152.0.0/13',
5389 'FJ': '144.120.0.0/16',
5390 'FK': '80.73.208.0/21',
5391 'FM': '119.252.112.0/20',
5392 'FO': '88.85.32.0/19',
5393 'FR': '90.0.0.0/9',
5394 'GA': '41.158.0.0/15',
5395 'GB': '25.0.0.0/8',
5396 'GD': '74.122.88.0/21',
5397 'GE': '31.146.0.0/16',
5398 'GF': '161.22.64.0/18',
5399 'GG': '62.68.160.0/19',
5400 'GH': '154.160.0.0/12',
5401 'GI': '95.164.0.0/16',
5402 'GL': '88.83.0.0/19',
5403 'GM': '160.182.0.0/15',
5404 'GN': '197.149.192.0/18',
5405 'GP': '104.250.0.0/19',
5406 'GQ': '105.235.224.0/20',
5407 'GR': '94.64.0.0/13',
5408 'GT': '168.234.0.0/16',
5409 'GU': '168.123.0.0/16',
5410 'GW': '197.214.80.0/20',
5411 'GY': '181.41.64.0/18',
5412 'HK': '113.252.0.0/14',
5413 'HN': '181.210.0.0/16',
5414 'HR': '93.136.0.0/13',
5415 'HT': '148.102.128.0/17',
5416 'HU': '84.0.0.0/14',
5417 'ID': '39.192.0.0/10',
5418 'IE': '87.32.0.0/12',
5419 'IL': '79.176.0.0/13',
5420 'IM': '5.62.80.0/20',
5421 'IN': '117.192.0.0/10',
5422 'IO': '203.83.48.0/21',
5423 'IQ': '37.236.0.0/14',
5424 'IR': '2.176.0.0/12',
5425 'IS': '82.221.0.0/16',
5426 'IT': '79.0.0.0/10',
5427 'JE': '87.244.64.0/18',
5428 'JM': '72.27.0.0/17',
5429 'JO': '176.29.0.0/16',
5430 'JP': '133.0.0.0/8',
5431 'KE': '105.48.0.0/12',
5432 'KG': '158.181.128.0/17',
5433 'KH': '36.37.128.0/17',
5434 'KI': '103.25.140.0/22',
5435 'KM': '197.255.224.0/20',
5436 'KN': '198.167.192.0/19',
5437 'KP': '175.45.176.0/22',
5438 'KR': '175.192.0.0/10',
5439 'KW': '37.36.0.0/14',
5440 'KY': '64.96.0.0/15',
5441 'KZ': '2.72.0.0/13',
5442 'LA': '115.84.64.0/18',
5443 'LB': '178.135.0.0/16',
5444 'LC': '24.92.144.0/20',
5445 'LI': '82.117.0.0/19',
5446 'LK': '112.134.0.0/15',
5447 'LR': '102.183.0.0/16',
5448 'LS': '129.232.0.0/17',
5449 'LT': '78.56.0.0/13',
5450 'LU': '188.42.0.0/16',
5451 'LV': '46.109.0.0/16',
5452 'LY': '41.252.0.0/14',
5453 'MA': '105.128.0.0/11',
5454 'MC': '88.209.64.0/18',
5455 'MD': '37.246.0.0/16',
5456 'ME': '178.175.0.0/17',
5457 'MF': '74.112.232.0/21',
5458 'MG': '154.126.0.0/17',
5459 'MH': '117.103.88.0/21',
5460 'MK': '77.28.0.0/15',
5461 'ML': '154.118.128.0/18',
5462 'MM': '37.111.0.0/17',
5463 'MN': '49.0.128.0/17',
5464 'MO': '60.246.0.0/16',
5465 'MP': '202.88.64.0/20',
5466 'MQ': '109.203.224.0/19',
5467 'MR': '41.188.64.0/18',
5468 'MS': '208.90.112.0/22',
5469 'MT': '46.11.0.0/16',
5470 'MU': '105.16.0.0/12',
5471 'MV': '27.114.128.0/18',
5472 'MW': '102.70.0.0/15',
5473 'MX': '187.192.0.0/11',
5474 'MY': '175.136.0.0/13',
5475 'MZ': '197.218.0.0/15',
5476 'NA': '41.182.0.0/16',
5477 'NC': '101.101.0.0/18',
5478 'NE': '197.214.0.0/18',
5479 'NF': '203.17.240.0/22',
5480 'NG': '105.112.0.0/12',
5481 'NI': '186.76.0.0/15',
5482 'NL': '145.96.0.0/11',
5483 'NO': '84.208.0.0/13',
5484 'NP': '36.252.0.0/15',
5485 'NR': '203.98.224.0/19',
5486 'NU': '49.156.48.0/22',
5487 'NZ': '49.224.0.0/14',
5488 'OM': '5.36.0.0/15',
5489 'PA': '186.72.0.0/15',
5490 'PE': '186.160.0.0/14',
5491 'PF': '123.50.64.0/18',
5492 'PG': '124.240.192.0/19',
5493 'PH': '49.144.0.0/13',
5494 'PK': '39.32.0.0/11',
5495 'PL': '83.0.0.0/11',
5496 'PM': '70.36.0.0/20',
5497 'PR': '66.50.0.0/16',
5498 'PS': '188.161.0.0/16',
5499 'PT': '85.240.0.0/13',
5500 'PW': '202.124.224.0/20',
5501 'PY': '181.120.0.0/14',
5502 'QA': '37.210.0.0/15',
5503 'RE': '102.35.0.0/16',
5504 'RO': '79.112.0.0/13',
5505 'RS': '93.86.0.0/15',
5506 'RU': '5.136.0.0/13',
5507 'RW': '41.186.0.0/16',
5508 'SA': '188.48.0.0/13',
5509 'SB': '202.1.160.0/19',
5510 'SC': '154.192.0.0/11',
5511 'SD': '102.120.0.0/13',
5512 'SE': '78.64.0.0/12',
5513 'SG': '8.128.0.0/10',
5514 'SI': '188.196.0.0/14',
5515 'SK': '78.98.0.0/15',
5516 'SL': '102.143.0.0/17',
5517 'SM': '89.186.32.0/19',
5518 'SN': '41.82.0.0/15',
5519 'SO': '154.115.192.0/18',
5520 'SR': '186.179.128.0/17',
5521 'SS': '105.235.208.0/21',
5522 'ST': '197.159.160.0/19',
5523 'SV': '168.243.0.0/16',
5524 'SX': '190.102.0.0/20',
5525 'SY': '5.0.0.0/16',
5526 'SZ': '41.84.224.0/19',
5527 'TC': '65.255.48.0/20',
5528 'TD': '154.68.128.0/19',
5529 'TG': '196.168.0.0/14',
5530 'TH': '171.96.0.0/13',
5531 'TJ': '85.9.128.0/18',
5532 'TK': '27.96.24.0/21',
5533 'TL': '180.189.160.0/20',
5534 'TM': '95.85.96.0/19',
5535 'TN': '197.0.0.0/11',
5536 'TO': '175.176.144.0/21',
5537 'TR': '78.160.0.0/11',
5538 'TT': '186.44.0.0/15',
5539 'TV': '202.2.96.0/19',
5540 'TW': '120.96.0.0/11',
5541 'TZ': '156.156.0.0/14',
5542 'UA': '37.52.0.0/14',
5543 'UG': '102.80.0.0/13',
5544 'US': '6.0.0.0/8',
5545 'UY': '167.56.0.0/13',
5546 'UZ': '84.54.64.0/18',
5547 'VA': '212.77.0.0/19',
5548 'VC': '207.191.240.0/21',
5549 'VE': '186.88.0.0/13',
5550 'VG': '66.81.192.0/20',
5551 'VI': '146.226.0.0/16',
5552 'VN': '14.160.0.0/11',
5553 'VU': '202.80.32.0/20',
5554 'WF': '117.20.32.0/21',
5555 'WS': '202.4.32.0/19',
5556 'YE': '134.35.0.0/16',
5557 'YT': '41.242.116.0/22',
5558 'ZA': '41.0.0.0/11',
5559 'ZM': '102.144.0.0/13',
5560 'ZW': '102.177.192.0/18',
5561 }
5562
5563 @classmethod
5564 def random_ipv4(cls, code_or_block):
5565 if len(code_or_block) == 2:
5566 block = cls._country_ip_map.get(code_or_block.upper())
5567 if not block:
5568 return None
5569 else:
5570 block = code_or_block
5571 addr, preflen = block.split('/')
5572 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5573 addr_max = addr_min | (0xffffffff >> int(preflen))
5574 return compat_str(socket.inet_ntoa(
5575 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5576
5577
5578 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5579 def __init__(self, proxies=None):
5580 # Set default handlers
5581 for type in ('http', 'https'):
5582 setattr(self, '%s_open' % type,
5583 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5584 meth(r, proxy, type))
5585 compat_urllib_request.ProxyHandler.__init__(self, proxies)
5586
5587 def proxy_open(self, req, proxy, type):
5588 req_proxy = req.headers.get('Ytdl-request-proxy')
5589 if req_proxy is not None:
5590 proxy = req_proxy
5591 del req.headers['Ytdl-request-proxy']
5592
5593 if proxy == '__noproxy__':
5594 return None # No Proxy
5595 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5596 req.add_header('Ytdl-socks-proxy', proxy)
5597 # yt-dlp's http/https handlers do wrapping the socket with socks
5598 return None
5599 return compat_urllib_request.ProxyHandler.proxy_open(
5600 self, req, proxy, type)
5601
5602
5603 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5604 # released into Public Domain
5605 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5606
5607 def long_to_bytes(n, blocksize=0):
5608 """long_to_bytes(n:long, blocksize:int) : string
5609 Convert a long integer to a byte string.
5610
5611 If optional blocksize is given and greater than zero, pad the front of the
5612 byte string with binary zeros so that the length is a multiple of
5613 blocksize.
5614 """
5615 # after much testing, this algorithm was deemed to be the fastest
5616 s = b''
5617 n = int(n)
5618 while n > 0:
5619 s = compat_struct_pack('>I', n & 0xffffffff) + s
5620 n = n >> 32
5621 # strip off leading zeros
5622 for i in range(len(s)):
5623 if s[i] != b'\000'[0]:
5624 break
5625 else:
5626 # only happens when n == 0
5627 s = b'\000'
5628 i = 0
5629 s = s[i:]
5630 # add back some pad bytes. this could be done more efficiently w.r.t. the
5631 # de-padding being done above, but sigh...
5632 if blocksize > 0 and len(s) % blocksize:
5633 s = (blocksize - len(s) % blocksize) * b'\000' + s
5634 return s
5635
5636
5637 def bytes_to_long(s):
5638 """bytes_to_long(string) : long
5639 Convert a byte string to a long integer.
5640
5641 This is (essentially) the inverse of long_to_bytes().
5642 """
5643 acc = 0
5644 length = len(s)
5645 if length % 4:
5646 extra = (4 - length % 4)
5647 s = b'\000' * extra + s
5648 length = length + extra
5649 for i in range(0, length, 4):
5650 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5651 return acc
5652
5653
5654 def ohdave_rsa_encrypt(data, exponent, modulus):
5655 '''
5656 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5657
5658 Input:
5659 data: data to encrypt, bytes-like object
5660 exponent, modulus: parameter e and N of RSA algorithm, both integer
5661 Output: hex string of encrypted data
5662
5663 Limitation: supports one block encryption only
5664 '''
5665
5666 payload = int(binascii.hexlify(data[::-1]), 16)
5667 encrypted = pow(payload, exponent, modulus)
5668 return '%x' % encrypted
5669
5670
5671 def pkcs1pad(data, length):
5672 """
5673 Padding input data with PKCS#1 scheme
5674
5675 @param {int[]} data input data
5676 @param {int} length target length
5677 @returns {int[]} padded data
5678 """
5679 if len(data) > length - 11:
5680 raise ValueError('Input data too long for PKCS#1 padding')
5681
5682 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5683 return [0, 2] + pseudo_random + [0] + data
5684
5685
5686 def encode_base_n(num, n, table=None):
5687 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5688 if not table:
5689 table = FULL_TABLE[:n]
5690
5691 if n > len(table):
5692 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5693
5694 if num == 0:
5695 return table[0]
5696
5697 ret = ''
5698 while num:
5699 ret = table[num % n] + ret
5700 num = num // n
5701 return ret
5702
5703
5704 def decode_packed_codes(code):
5705 mobj = re.search(PACKED_CODES_RE, code)
5706 obfuscated_code, base, count, symbols = mobj.groups()
5707 base = int(base)
5708 count = int(count)
5709 symbols = symbols.split('|')
5710 symbol_table = {}
5711
5712 while count:
5713 count -= 1
5714 base_n_count = encode_base_n(count, base)
5715 symbol_table[base_n_count] = symbols[count] or base_n_count
5716
5717 return re.sub(
5718 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5719 obfuscated_code)
5720
5721
5722 def caesar(s, alphabet, shift):
5723 if shift == 0:
5724 return s
5725 l = len(alphabet)
5726 return ''.join(
5727 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5728 for c in s)
5729
5730
5731 def rot47(s):
5732 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5733
5734
5735 def parse_m3u8_attributes(attrib):
5736 info = {}
5737 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5738 if val.startswith('"'):
5739 val = val[1:-1]
5740 info[key] = val
5741 return info
5742
5743
5744 def urshift(val, n):
5745 return val >> n if val >= 0 else (val + 0x100000000) >> n
5746
5747
5748 # Based on png2str() written by @gdkchan and improved by @yokrysty
5749 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5750 def decode_png(png_data):
5751 # Reference: https://www.w3.org/TR/PNG/
5752 header = png_data[8:]
5753
5754 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5755 raise IOError('Not a valid PNG file.')
5756
5757 int_map = {1: '>B', 2: '>H', 4: '>I'}
5758 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5759
5760 chunks = []
5761
5762 while header:
5763 length = unpack_integer(header[:4])
5764 header = header[4:]
5765
5766 chunk_type = header[:4]
5767 header = header[4:]
5768
5769 chunk_data = header[:length]
5770 header = header[length:]
5771
5772 header = header[4:] # Skip CRC
5773
5774 chunks.append({
5775 'type': chunk_type,
5776 'length': length,
5777 'data': chunk_data
5778 })
5779
5780 ihdr = chunks[0]['data']
5781
5782 width = unpack_integer(ihdr[:4])
5783 height = unpack_integer(ihdr[4:8])
5784
5785 idat = b''
5786
5787 for chunk in chunks:
5788 if chunk['type'] == b'IDAT':
5789 idat += chunk['data']
5790
5791 if not idat:
5792 raise IOError('Unable to read PNG data.')
5793
5794 decompressed_data = bytearray(zlib.decompress(idat))
5795
5796 stride = width * 3
5797 pixels = []
5798
5799 def _get_pixel(idx):
5800 x = idx % stride
5801 y = idx // stride
5802 return pixels[y][x]
5803
5804 for y in range(height):
5805 basePos = y * (1 + stride)
5806 filter_type = decompressed_data[basePos]
5807
5808 current_row = []
5809
5810 pixels.append(current_row)
5811
5812 for x in range(stride):
5813 color = decompressed_data[1 + basePos + x]
5814 basex = y * stride + x
5815 left = 0
5816 up = 0
5817
5818 if x > 2:
5819 left = _get_pixel(basex - 3)
5820 if y > 0:
5821 up = _get_pixel(basex - stride)
5822
5823 if filter_type == 1: # Sub
5824 color = (color + left) & 0xff
5825 elif filter_type == 2: # Up
5826 color = (color + up) & 0xff
5827 elif filter_type == 3: # Average
5828 color = (color + ((left + up) >> 1)) & 0xff
5829 elif filter_type == 4: # Paeth
5830 a = left
5831 b = up
5832 c = 0
5833
5834 if x > 2 and y > 0:
5835 c = _get_pixel(basex - stride - 3)
5836
5837 p = a + b - c
5838
5839 pa = abs(p - a)
5840 pb = abs(p - b)
5841 pc = abs(p - c)
5842
5843 if pa <= pb and pa <= pc:
5844 color = (color + a) & 0xff
5845 elif pb <= pc:
5846 color = (color + b) & 0xff
5847 else:
5848 color = (color + c) & 0xff
5849
5850 current_row.append(color)
5851
5852 return width, height, pixels
5853
5854
5855 def write_xattr(path, key, value):
5856 # This mess below finds the best xattr tool for the job
5857 try:
5858 # try the pyxattr module...
5859 import xattr
5860
5861 if hasattr(xattr, 'set'): # pyxattr
5862 # Unicode arguments are not supported in python-pyxattr until
5863 # version 0.5.0
5864 # See https://github.com/ytdl-org/youtube-dl/issues/5498
5865 pyxattr_required_version = '0.5.0'
5866 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5867 # TODO: fallback to CLI tools
5868 raise XAttrUnavailableError(
5869 'python-pyxattr is detected but is too old. '
5870 'yt-dlp requires %s or above while your version is %s. '
5871 'Falling back to other xattr implementations' % (
5872 pyxattr_required_version, xattr.__version__))
5873
5874 setxattr = xattr.set
5875 else: # xattr
5876 setxattr = xattr.setxattr
5877
5878 try:
5879 setxattr(path, key, value)
5880 except EnvironmentError as e:
5881 raise XAttrMetadataError(e.errno, e.strerror)
5882
5883 except ImportError:
5884 if compat_os_name == 'nt':
5885 # Write xattrs to NTFS Alternate Data Streams:
5886 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5887 assert ':' not in key
5888 assert os.path.exists(path)
5889
5890 ads_fn = path + ':' + key
5891 try:
5892 with open(ads_fn, 'wb') as f:
5893 f.write(value)
5894 except EnvironmentError as e:
5895 raise XAttrMetadataError(e.errno, e.strerror)
5896 else:
5897 user_has_setfattr = check_executable('setfattr', ['--version'])
5898 user_has_xattr = check_executable('xattr', ['-h'])
5899
5900 if user_has_setfattr or user_has_xattr:
5901
5902 value = value.decode('utf-8')
5903 if user_has_setfattr:
5904 executable = 'setfattr'
5905 opts = ['-n', key, '-v', value]
5906 elif user_has_xattr:
5907 executable = 'xattr'
5908 opts = ['-w', key, value]
5909
5910 cmd = ([encodeFilename(executable, True)]
5911 + [encodeArgument(o) for o in opts]
5912 + [encodeFilename(path, True)])
5913
5914 try:
5915 p = subprocess.Popen(
5916 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5917 except EnvironmentError as e:
5918 raise XAttrMetadataError(e.errno, e.strerror)
5919 stdout, stderr = process_communicate_or_kill(p)
5920 stderr = stderr.decode('utf-8', 'replace')
5921 if p.returncode != 0:
5922 raise XAttrMetadataError(p.returncode, stderr)
5923
5924 else:
5925 # On Unix, and can't find pyxattr, setfattr, or xattr.
5926 if sys.platform.startswith('linux'):
5927 raise XAttrUnavailableError(
5928 "Couldn't find a tool to set the xattrs. "
5929 "Install either the python 'pyxattr' or 'xattr' "
5930 "modules, or the GNU 'attr' package "
5931 "(which contains the 'setfattr' tool).")
5932 else:
5933 raise XAttrUnavailableError(
5934 "Couldn't find a tool to set the xattrs. "
5935 "Install either the python 'xattr' module, "
5936 "or the 'xattr' binary.")
5937
5938
5939 def random_birthday(year_field, month_field, day_field):
5940 start_date = datetime.date(1950, 1, 1)
5941 end_date = datetime.date(1995, 12, 31)
5942 offset = random.randint(0, (end_date - start_date).days)
5943 random_date = start_date + datetime.timedelta(offset)
5944 return {
5945 year_field: str(random_date.year),
5946 month_field: str(random_date.month),
5947 day_field: str(random_date.day),
5948 }
5949
5950
5951 # Templates for internet shortcut files, which are plain text files.
5952 DOT_URL_LINK_TEMPLATE = '''
5953 [InternetShortcut]
5954 URL=%(url)s
5955 '''.lstrip()
5956
5957 DOT_WEBLOC_LINK_TEMPLATE = '''
5958 <?xml version="1.0" encoding="UTF-8"?>
5959 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5960 <plist version="1.0">
5961 <dict>
5962 \t<key>URL</key>
5963 \t<string>%(url)s</string>
5964 </dict>
5965 </plist>
5966 '''.lstrip()
5967
5968 DOT_DESKTOP_LINK_TEMPLATE = '''
5969 [Desktop Entry]
5970 Encoding=UTF-8
5971 Name=%(filename)s
5972 Type=Link
5973 URL=%(url)s
5974 Icon=text-html
5975 '''.lstrip()
5976
5977
5978 def iri_to_uri(iri):
5979 """
5980 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5981
5982 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5983 """
5984
5985 iri_parts = compat_urllib_parse_urlparse(iri)
5986
5987 if '[' in iri_parts.netloc:
5988 raise ValueError('IPv6 URIs are not, yet, supported.')
5989 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5990
5991 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5992
5993 net_location = ''
5994 if iri_parts.username:
5995 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
5996 if iri_parts.password is not None:
5997 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
5998 net_location += '@'
5999
6000 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
6001 # The 'idna' encoding produces ASCII text.
6002 if iri_parts.port is not None and iri_parts.port != 80:
6003 net_location += ':' + str(iri_parts.port)
6004
6005 return compat_urllib_parse_urlunparse(
6006 (iri_parts.scheme,
6007 net_location,
6008
6009 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6010
6011 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6012 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6013
6014 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6015 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6016
6017 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6018
6019 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6020
6021
6022 def to_high_limit_path(path):
6023 if sys.platform in ['win32', 'cygwin']:
6024 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6025 return r'\\?\ '.rstrip() + os.path.abspath(path)
6026
6027 return path
6028
6029
6030 def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
6031 val = obj.get(field, default)
6032 if func and val not in ignore:
6033 val = func(val)
6034 return template % val if val not in ignore else default
6035
6036
6037 def clean_podcast_url(url):
6038 return re.sub(r'''(?x)
6039 (?:
6040 (?:
6041 chtbl\.com/track|
6042 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6043 play\.podtrac\.com
6044 )/[^/]+|
6045 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6046 flex\.acast\.com|
6047 pd(?:
6048 cn\.co| # https://podcorn.com/analytics-prefix/
6049 st\.fm # https://podsights.com/docs/
6050 )/e
6051 )/''', '', url)
6052
6053
6054 _HEX_TABLE = '0123456789abcdef'
6055
6056
6057 def random_uuidv4():
6058 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
6059
6060
6061 def make_dir(path, to_screen=None):
6062 try:
6063 dn = os.path.dirname(path)
6064 if dn and not os.path.exists(dn):
6065 os.makedirs(dn)
6066 return True
6067 except (OSError, IOError) as err:
6068 if callable(to_screen) is not None:
6069 to_screen('unable to create directory ' + error_to_compat_str(err))
6070 return False
6071
6072
6073 def get_executable_path():
6074 from zipimport import zipimporter
6075 if hasattr(sys, 'frozen'): # Running from PyInstaller
6076 path = os.path.dirname(sys.executable)
6077 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
6078 path = os.path.join(os.path.dirname(__file__), '../..')
6079 else:
6080 path = os.path.join(os.path.dirname(__file__), '..')
6081 return os.path.abspath(path)
6082
6083
6084 def load_plugins(name, type, namespace):
6085 plugin_info = [None]
6086 classes = []
6087 try:
6088 plugin_info = imp.find_module(
6089 name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
6090 plugins = imp.load_module(name, *plugin_info)
6091 for name in dir(plugins):
6092 if not name.endswith(type):
6093 continue
6094 klass = getattr(plugins, name)
6095 classes.append(klass)
6096 namespace[name] = klass
6097 except ImportError:
6098 pass
6099 finally:
6100 if plugin_info[0] is not None:
6101 plugin_info[0].close()
6102 return classes
6103
6104
6105 def traverse_dict(dictn, keys, casesense=True):
6106 keys = list(keys)[::-1]
6107 while keys:
6108 key = keys.pop()
6109 if isinstance(dictn, dict):
6110 if not casesense:
6111 dictn = {k.lower(): v for k, v in dictn.items()}
6112 key = key.lower()
6113 dictn = dictn.get(key)
6114 elif isinstance(dictn, (list, tuple, compat_str)):
6115 if ':' in key:
6116 key = slice(*map(int_or_none, key.split(':')))
6117 else:
6118 key = int_or_none(key)
6119 dictn = try_get(dictn, lambda x: x[key])
6120 else:
6121 return None
6122 return dictn