]> jfr.im git - yt-dlp.git/blob - youtube_dlc/utils.py
Update to ytdl-2021.01.03
[yt-dlp.git] / youtube_dlc / utils.py
1 #!/usr/bin/env python
2 # coding: utf-8
3
4 from __future__ import unicode_literals
5
6 import base64
7 import binascii
8 import calendar
9 import codecs
10 import collections
11 import contextlib
12 import ctypes
13 import datetime
14 import email.utils
15 import email.header
16 import errno
17 import functools
18 import gzip
19 import io
20 import itertools
21 import json
22 import locale
23 import math
24 import operator
25 import os
26 import platform
27 import random
28 import re
29 import socket
30 import ssl
31 import subprocess
32 import sys
33 import tempfile
34 import time
35 import traceback
36 import xml.etree.ElementTree
37 import zlib
38
39 from .compat import (
40 compat_HTMLParseError,
41 compat_HTMLParser,
42 compat_basestring,
43 compat_chr,
44 compat_cookiejar,
45 compat_ctypes_WINFUNCTYPE,
46 compat_etree_fromstring,
47 compat_expanduser,
48 compat_html_entities,
49 compat_html_entities_html5,
50 compat_http_client,
51 compat_integer_types,
52 compat_kwargs,
53 compat_os_name,
54 compat_parse_qs,
55 compat_shlex_quote,
56 compat_str,
57 compat_struct_pack,
58 compat_struct_unpack,
59 compat_urllib_error,
60 compat_urllib_parse,
61 compat_urllib_parse_urlencode,
62 compat_urllib_parse_urlparse,
63 compat_urllib_parse_urlunparse,
64 compat_urllib_parse_quote,
65 compat_urllib_parse_quote_plus,
66 compat_urllib_parse_unquote_plus,
67 compat_urllib_request,
68 compat_urlparse,
69 compat_xpath,
70 )
71
72 from .socks import (
73 ProxyType,
74 sockssocket,
75 )
76
77
78 def register_socks_protocols():
79 # "Register" SOCKS protocols
80 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
81 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
82 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
83 if scheme not in compat_urlparse.uses_netloc:
84 compat_urlparse.uses_netloc.append(scheme)
85
86
87 # This is not clearly defined otherwise
88 compiled_regex_type = type(re.compile(''))
89
90
91 def random_user_agent():
92 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
93 _CHROME_VERSIONS = (
94 '74.0.3729.129',
95 '76.0.3780.3',
96 '76.0.3780.2',
97 '74.0.3729.128',
98 '76.0.3780.1',
99 '76.0.3780.0',
100 '75.0.3770.15',
101 '74.0.3729.127',
102 '74.0.3729.126',
103 '76.0.3779.1',
104 '76.0.3779.0',
105 '75.0.3770.14',
106 '74.0.3729.125',
107 '76.0.3778.1',
108 '76.0.3778.0',
109 '75.0.3770.13',
110 '74.0.3729.124',
111 '74.0.3729.123',
112 '73.0.3683.121',
113 '76.0.3777.1',
114 '76.0.3777.0',
115 '75.0.3770.12',
116 '74.0.3729.122',
117 '76.0.3776.4',
118 '75.0.3770.11',
119 '74.0.3729.121',
120 '76.0.3776.3',
121 '76.0.3776.2',
122 '73.0.3683.120',
123 '74.0.3729.120',
124 '74.0.3729.119',
125 '74.0.3729.118',
126 '76.0.3776.1',
127 '76.0.3776.0',
128 '76.0.3775.5',
129 '75.0.3770.10',
130 '74.0.3729.117',
131 '76.0.3775.4',
132 '76.0.3775.3',
133 '74.0.3729.116',
134 '75.0.3770.9',
135 '76.0.3775.2',
136 '76.0.3775.1',
137 '76.0.3775.0',
138 '75.0.3770.8',
139 '74.0.3729.115',
140 '74.0.3729.114',
141 '76.0.3774.1',
142 '76.0.3774.0',
143 '75.0.3770.7',
144 '74.0.3729.113',
145 '74.0.3729.112',
146 '74.0.3729.111',
147 '76.0.3773.1',
148 '76.0.3773.0',
149 '75.0.3770.6',
150 '74.0.3729.110',
151 '74.0.3729.109',
152 '76.0.3772.1',
153 '76.0.3772.0',
154 '75.0.3770.5',
155 '74.0.3729.108',
156 '74.0.3729.107',
157 '76.0.3771.1',
158 '76.0.3771.0',
159 '75.0.3770.4',
160 '74.0.3729.106',
161 '74.0.3729.105',
162 '75.0.3770.3',
163 '74.0.3729.104',
164 '74.0.3729.103',
165 '74.0.3729.102',
166 '75.0.3770.2',
167 '74.0.3729.101',
168 '75.0.3770.1',
169 '75.0.3770.0',
170 '74.0.3729.100',
171 '75.0.3769.5',
172 '75.0.3769.4',
173 '74.0.3729.99',
174 '75.0.3769.3',
175 '75.0.3769.2',
176 '75.0.3768.6',
177 '74.0.3729.98',
178 '75.0.3769.1',
179 '75.0.3769.0',
180 '74.0.3729.97',
181 '73.0.3683.119',
182 '73.0.3683.118',
183 '74.0.3729.96',
184 '75.0.3768.5',
185 '75.0.3768.4',
186 '75.0.3768.3',
187 '75.0.3768.2',
188 '74.0.3729.95',
189 '74.0.3729.94',
190 '75.0.3768.1',
191 '75.0.3768.0',
192 '74.0.3729.93',
193 '74.0.3729.92',
194 '73.0.3683.117',
195 '74.0.3729.91',
196 '75.0.3766.3',
197 '74.0.3729.90',
198 '75.0.3767.2',
199 '75.0.3767.1',
200 '75.0.3767.0',
201 '74.0.3729.89',
202 '73.0.3683.116',
203 '75.0.3766.2',
204 '74.0.3729.88',
205 '75.0.3766.1',
206 '75.0.3766.0',
207 '74.0.3729.87',
208 '73.0.3683.115',
209 '74.0.3729.86',
210 '75.0.3765.1',
211 '75.0.3765.0',
212 '74.0.3729.85',
213 '73.0.3683.114',
214 '74.0.3729.84',
215 '75.0.3764.1',
216 '75.0.3764.0',
217 '74.0.3729.83',
218 '73.0.3683.113',
219 '75.0.3763.2',
220 '75.0.3761.4',
221 '74.0.3729.82',
222 '75.0.3763.1',
223 '75.0.3763.0',
224 '74.0.3729.81',
225 '73.0.3683.112',
226 '75.0.3762.1',
227 '75.0.3762.0',
228 '74.0.3729.80',
229 '75.0.3761.3',
230 '74.0.3729.79',
231 '73.0.3683.111',
232 '75.0.3761.2',
233 '74.0.3729.78',
234 '74.0.3729.77',
235 '75.0.3761.1',
236 '75.0.3761.0',
237 '73.0.3683.110',
238 '74.0.3729.76',
239 '74.0.3729.75',
240 '75.0.3760.0',
241 '74.0.3729.74',
242 '75.0.3759.8',
243 '75.0.3759.7',
244 '75.0.3759.6',
245 '74.0.3729.73',
246 '75.0.3759.5',
247 '74.0.3729.72',
248 '73.0.3683.109',
249 '75.0.3759.4',
250 '75.0.3759.3',
251 '74.0.3729.71',
252 '75.0.3759.2',
253 '74.0.3729.70',
254 '73.0.3683.108',
255 '74.0.3729.69',
256 '75.0.3759.1',
257 '75.0.3759.0',
258 '74.0.3729.68',
259 '73.0.3683.107',
260 '74.0.3729.67',
261 '75.0.3758.1',
262 '75.0.3758.0',
263 '74.0.3729.66',
264 '73.0.3683.106',
265 '74.0.3729.65',
266 '75.0.3757.1',
267 '75.0.3757.0',
268 '74.0.3729.64',
269 '73.0.3683.105',
270 '74.0.3729.63',
271 '75.0.3756.1',
272 '75.0.3756.0',
273 '74.0.3729.62',
274 '73.0.3683.104',
275 '75.0.3755.3',
276 '75.0.3755.2',
277 '73.0.3683.103',
278 '75.0.3755.1',
279 '75.0.3755.0',
280 '74.0.3729.61',
281 '73.0.3683.102',
282 '74.0.3729.60',
283 '75.0.3754.2',
284 '74.0.3729.59',
285 '75.0.3753.4',
286 '74.0.3729.58',
287 '75.0.3754.1',
288 '75.0.3754.0',
289 '74.0.3729.57',
290 '73.0.3683.101',
291 '75.0.3753.3',
292 '75.0.3752.2',
293 '75.0.3753.2',
294 '74.0.3729.56',
295 '75.0.3753.1',
296 '75.0.3753.0',
297 '74.0.3729.55',
298 '73.0.3683.100',
299 '74.0.3729.54',
300 '75.0.3752.1',
301 '75.0.3752.0',
302 '74.0.3729.53',
303 '73.0.3683.99',
304 '74.0.3729.52',
305 '75.0.3751.1',
306 '75.0.3751.0',
307 '74.0.3729.51',
308 '73.0.3683.98',
309 '74.0.3729.50',
310 '75.0.3750.0',
311 '74.0.3729.49',
312 '74.0.3729.48',
313 '74.0.3729.47',
314 '75.0.3749.3',
315 '74.0.3729.46',
316 '73.0.3683.97',
317 '75.0.3749.2',
318 '74.0.3729.45',
319 '75.0.3749.1',
320 '75.0.3749.0',
321 '74.0.3729.44',
322 '73.0.3683.96',
323 '74.0.3729.43',
324 '74.0.3729.42',
325 '75.0.3748.1',
326 '75.0.3748.0',
327 '74.0.3729.41',
328 '75.0.3747.1',
329 '73.0.3683.95',
330 '75.0.3746.4',
331 '74.0.3729.40',
332 '74.0.3729.39',
333 '75.0.3747.0',
334 '75.0.3746.3',
335 '75.0.3746.2',
336 '74.0.3729.38',
337 '75.0.3746.1',
338 '75.0.3746.0',
339 '74.0.3729.37',
340 '73.0.3683.94',
341 '75.0.3745.5',
342 '75.0.3745.4',
343 '75.0.3745.3',
344 '75.0.3745.2',
345 '74.0.3729.36',
346 '75.0.3745.1',
347 '75.0.3745.0',
348 '75.0.3744.2',
349 '74.0.3729.35',
350 '73.0.3683.93',
351 '74.0.3729.34',
352 '75.0.3744.1',
353 '75.0.3744.0',
354 '74.0.3729.33',
355 '73.0.3683.92',
356 '74.0.3729.32',
357 '74.0.3729.31',
358 '73.0.3683.91',
359 '75.0.3741.2',
360 '75.0.3740.5',
361 '74.0.3729.30',
362 '75.0.3741.1',
363 '75.0.3741.0',
364 '74.0.3729.29',
365 '75.0.3740.4',
366 '73.0.3683.90',
367 '74.0.3729.28',
368 '75.0.3740.3',
369 '73.0.3683.89',
370 '75.0.3740.2',
371 '74.0.3729.27',
372 '75.0.3740.1',
373 '75.0.3740.0',
374 '74.0.3729.26',
375 '73.0.3683.88',
376 '73.0.3683.87',
377 '74.0.3729.25',
378 '75.0.3739.1',
379 '75.0.3739.0',
380 '73.0.3683.86',
381 '74.0.3729.24',
382 '73.0.3683.85',
383 '75.0.3738.4',
384 '75.0.3738.3',
385 '75.0.3738.2',
386 '75.0.3738.1',
387 '75.0.3738.0',
388 '74.0.3729.23',
389 '73.0.3683.84',
390 '74.0.3729.22',
391 '74.0.3729.21',
392 '75.0.3737.1',
393 '75.0.3737.0',
394 '74.0.3729.20',
395 '73.0.3683.83',
396 '74.0.3729.19',
397 '75.0.3736.1',
398 '75.0.3736.0',
399 '74.0.3729.18',
400 '73.0.3683.82',
401 '74.0.3729.17',
402 '75.0.3735.1',
403 '75.0.3735.0',
404 '74.0.3729.16',
405 '73.0.3683.81',
406 '75.0.3734.1',
407 '75.0.3734.0',
408 '74.0.3729.15',
409 '73.0.3683.80',
410 '74.0.3729.14',
411 '75.0.3733.1',
412 '75.0.3733.0',
413 '75.0.3732.1',
414 '74.0.3729.13',
415 '74.0.3729.12',
416 '73.0.3683.79',
417 '74.0.3729.11',
418 '75.0.3732.0',
419 '74.0.3729.10',
420 '73.0.3683.78',
421 '74.0.3729.9',
422 '74.0.3729.8',
423 '74.0.3729.7',
424 '75.0.3731.3',
425 '75.0.3731.2',
426 '75.0.3731.0',
427 '74.0.3729.6',
428 '73.0.3683.77',
429 '73.0.3683.76',
430 '75.0.3730.5',
431 '75.0.3730.4',
432 '73.0.3683.75',
433 '74.0.3729.5',
434 '73.0.3683.74',
435 '75.0.3730.3',
436 '75.0.3730.2',
437 '74.0.3729.4',
438 '73.0.3683.73',
439 '73.0.3683.72',
440 '75.0.3730.1',
441 '75.0.3730.0',
442 '74.0.3729.3',
443 '73.0.3683.71',
444 '74.0.3729.2',
445 '73.0.3683.70',
446 '74.0.3729.1',
447 '74.0.3729.0',
448 '74.0.3726.4',
449 '73.0.3683.69',
450 '74.0.3726.3',
451 '74.0.3728.0',
452 '74.0.3726.2',
453 '73.0.3683.68',
454 '74.0.3726.1',
455 '74.0.3726.0',
456 '74.0.3725.4',
457 '73.0.3683.67',
458 '73.0.3683.66',
459 '74.0.3725.3',
460 '74.0.3725.2',
461 '74.0.3725.1',
462 '74.0.3724.8',
463 '74.0.3725.0',
464 '73.0.3683.65',
465 '74.0.3724.7',
466 '74.0.3724.6',
467 '74.0.3724.5',
468 '74.0.3724.4',
469 '74.0.3724.3',
470 '74.0.3724.2',
471 '74.0.3724.1',
472 '74.0.3724.0',
473 '73.0.3683.64',
474 '74.0.3723.1',
475 '74.0.3723.0',
476 '73.0.3683.63',
477 '74.0.3722.1',
478 '74.0.3722.0',
479 '73.0.3683.62',
480 '74.0.3718.9',
481 '74.0.3702.3',
482 '74.0.3721.3',
483 '74.0.3721.2',
484 '74.0.3721.1',
485 '74.0.3721.0',
486 '74.0.3720.6',
487 '73.0.3683.61',
488 '72.0.3626.122',
489 '73.0.3683.60',
490 '74.0.3720.5',
491 '72.0.3626.121',
492 '74.0.3718.8',
493 '74.0.3720.4',
494 '74.0.3720.3',
495 '74.0.3718.7',
496 '74.0.3720.2',
497 '74.0.3720.1',
498 '74.0.3720.0',
499 '74.0.3718.6',
500 '74.0.3719.5',
501 '73.0.3683.59',
502 '74.0.3718.5',
503 '74.0.3718.4',
504 '74.0.3719.4',
505 '74.0.3719.3',
506 '74.0.3719.2',
507 '74.0.3719.1',
508 '73.0.3683.58',
509 '74.0.3719.0',
510 '73.0.3683.57',
511 '73.0.3683.56',
512 '74.0.3718.3',
513 '73.0.3683.55',
514 '74.0.3718.2',
515 '74.0.3718.1',
516 '74.0.3718.0',
517 '73.0.3683.54',
518 '74.0.3717.2',
519 '73.0.3683.53',
520 '74.0.3717.1',
521 '74.0.3717.0',
522 '73.0.3683.52',
523 '74.0.3716.1',
524 '74.0.3716.0',
525 '73.0.3683.51',
526 '74.0.3715.1',
527 '74.0.3715.0',
528 '73.0.3683.50',
529 '74.0.3711.2',
530 '74.0.3714.2',
531 '74.0.3713.3',
532 '74.0.3714.1',
533 '74.0.3714.0',
534 '73.0.3683.49',
535 '74.0.3713.1',
536 '74.0.3713.0',
537 '72.0.3626.120',
538 '73.0.3683.48',
539 '74.0.3712.2',
540 '74.0.3712.1',
541 '74.0.3712.0',
542 '73.0.3683.47',
543 '72.0.3626.119',
544 '73.0.3683.46',
545 '74.0.3710.2',
546 '72.0.3626.118',
547 '74.0.3711.1',
548 '74.0.3711.0',
549 '73.0.3683.45',
550 '72.0.3626.117',
551 '74.0.3710.1',
552 '74.0.3710.0',
553 '73.0.3683.44',
554 '72.0.3626.116',
555 '74.0.3709.1',
556 '74.0.3709.0',
557 '74.0.3704.9',
558 '73.0.3683.43',
559 '72.0.3626.115',
560 '74.0.3704.8',
561 '74.0.3704.7',
562 '74.0.3708.0',
563 '74.0.3706.7',
564 '74.0.3704.6',
565 '73.0.3683.42',
566 '72.0.3626.114',
567 '74.0.3706.6',
568 '72.0.3626.113',
569 '74.0.3704.5',
570 '74.0.3706.5',
571 '74.0.3706.4',
572 '74.0.3706.3',
573 '74.0.3706.2',
574 '74.0.3706.1',
575 '74.0.3706.0',
576 '73.0.3683.41',
577 '72.0.3626.112',
578 '74.0.3705.1',
579 '74.0.3705.0',
580 '73.0.3683.40',
581 '72.0.3626.111',
582 '73.0.3683.39',
583 '74.0.3704.4',
584 '73.0.3683.38',
585 '74.0.3704.3',
586 '74.0.3704.2',
587 '74.0.3704.1',
588 '74.0.3704.0',
589 '73.0.3683.37',
590 '72.0.3626.110',
591 '72.0.3626.109',
592 '74.0.3703.3',
593 '74.0.3703.2',
594 '73.0.3683.36',
595 '74.0.3703.1',
596 '74.0.3703.0',
597 '73.0.3683.35',
598 '72.0.3626.108',
599 '74.0.3702.2',
600 '74.0.3699.3',
601 '74.0.3702.1',
602 '74.0.3702.0',
603 '73.0.3683.34',
604 '72.0.3626.107',
605 '73.0.3683.33',
606 '74.0.3701.1',
607 '74.0.3701.0',
608 '73.0.3683.32',
609 '73.0.3683.31',
610 '72.0.3626.105',
611 '74.0.3700.1',
612 '74.0.3700.0',
613 '73.0.3683.29',
614 '72.0.3626.103',
615 '74.0.3699.2',
616 '74.0.3699.1',
617 '74.0.3699.0',
618 '73.0.3683.28',
619 '72.0.3626.102',
620 '73.0.3683.27',
621 '73.0.3683.26',
622 '74.0.3698.0',
623 '74.0.3696.2',
624 '72.0.3626.101',
625 '73.0.3683.25',
626 '74.0.3696.1',
627 '74.0.3696.0',
628 '74.0.3694.8',
629 '72.0.3626.100',
630 '74.0.3694.7',
631 '74.0.3694.6',
632 '74.0.3694.5',
633 '74.0.3694.4',
634 '72.0.3626.99',
635 '72.0.3626.98',
636 '74.0.3694.3',
637 '73.0.3683.24',
638 '72.0.3626.97',
639 '72.0.3626.96',
640 '72.0.3626.95',
641 '73.0.3683.23',
642 '72.0.3626.94',
643 '73.0.3683.22',
644 '73.0.3683.21',
645 '72.0.3626.93',
646 '74.0.3694.2',
647 '72.0.3626.92',
648 '74.0.3694.1',
649 '74.0.3694.0',
650 '74.0.3693.6',
651 '73.0.3683.20',
652 '72.0.3626.91',
653 '74.0.3693.5',
654 '74.0.3693.4',
655 '74.0.3693.3',
656 '74.0.3693.2',
657 '73.0.3683.19',
658 '74.0.3693.1',
659 '74.0.3693.0',
660 '73.0.3683.18',
661 '72.0.3626.90',
662 '74.0.3692.1',
663 '74.0.3692.0',
664 '73.0.3683.17',
665 '72.0.3626.89',
666 '74.0.3687.3',
667 '74.0.3691.1',
668 '74.0.3691.0',
669 '73.0.3683.16',
670 '72.0.3626.88',
671 '72.0.3626.87',
672 '73.0.3683.15',
673 '74.0.3690.1',
674 '74.0.3690.0',
675 '73.0.3683.14',
676 '72.0.3626.86',
677 '73.0.3683.13',
678 '73.0.3683.12',
679 '74.0.3689.1',
680 '74.0.3689.0',
681 '73.0.3683.11',
682 '72.0.3626.85',
683 '73.0.3683.10',
684 '72.0.3626.84',
685 '73.0.3683.9',
686 '74.0.3688.1',
687 '74.0.3688.0',
688 '73.0.3683.8',
689 '72.0.3626.83',
690 '74.0.3687.2',
691 '74.0.3687.1',
692 '74.0.3687.0',
693 '73.0.3683.7',
694 '72.0.3626.82',
695 '74.0.3686.4',
696 '72.0.3626.81',
697 '74.0.3686.3',
698 '74.0.3686.2',
699 '74.0.3686.1',
700 '74.0.3686.0',
701 '73.0.3683.6',
702 '72.0.3626.80',
703 '74.0.3685.1',
704 '74.0.3685.0',
705 '73.0.3683.5',
706 '72.0.3626.79',
707 '74.0.3684.1',
708 '74.0.3684.0',
709 '73.0.3683.4',
710 '72.0.3626.78',
711 '72.0.3626.77',
712 '73.0.3683.3',
713 '73.0.3683.2',
714 '72.0.3626.76',
715 '73.0.3683.1',
716 '73.0.3683.0',
717 '72.0.3626.75',
718 '71.0.3578.141',
719 '73.0.3682.1',
720 '73.0.3682.0',
721 '72.0.3626.74',
722 '71.0.3578.140',
723 '73.0.3681.4',
724 '73.0.3681.3',
725 '73.0.3681.2',
726 '73.0.3681.1',
727 '73.0.3681.0',
728 '72.0.3626.73',
729 '71.0.3578.139',
730 '72.0.3626.72',
731 '72.0.3626.71',
732 '73.0.3680.1',
733 '73.0.3680.0',
734 '72.0.3626.70',
735 '71.0.3578.138',
736 '73.0.3678.2',
737 '73.0.3679.1',
738 '73.0.3679.0',
739 '72.0.3626.69',
740 '71.0.3578.137',
741 '73.0.3678.1',
742 '73.0.3678.0',
743 '71.0.3578.136',
744 '73.0.3677.1',
745 '73.0.3677.0',
746 '72.0.3626.68',
747 '72.0.3626.67',
748 '71.0.3578.135',
749 '73.0.3676.1',
750 '73.0.3676.0',
751 '73.0.3674.2',
752 '72.0.3626.66',
753 '71.0.3578.134',
754 '73.0.3674.1',
755 '73.0.3674.0',
756 '72.0.3626.65',
757 '71.0.3578.133',
758 '73.0.3673.2',
759 '73.0.3673.1',
760 '73.0.3673.0',
761 '72.0.3626.64',
762 '71.0.3578.132',
763 '72.0.3626.63',
764 '72.0.3626.62',
765 '72.0.3626.61',
766 '72.0.3626.60',
767 '73.0.3672.1',
768 '73.0.3672.0',
769 '72.0.3626.59',
770 '71.0.3578.131',
771 '73.0.3671.3',
772 '73.0.3671.2',
773 '73.0.3671.1',
774 '73.0.3671.0',
775 '72.0.3626.58',
776 '71.0.3578.130',
777 '73.0.3670.1',
778 '73.0.3670.0',
779 '72.0.3626.57',
780 '71.0.3578.129',
781 '73.0.3669.1',
782 '73.0.3669.0',
783 '72.0.3626.56',
784 '71.0.3578.128',
785 '73.0.3668.2',
786 '73.0.3668.1',
787 '73.0.3668.0',
788 '72.0.3626.55',
789 '71.0.3578.127',
790 '73.0.3667.2',
791 '73.0.3667.1',
792 '73.0.3667.0',
793 '72.0.3626.54',
794 '71.0.3578.126',
795 '73.0.3666.1',
796 '73.0.3666.0',
797 '72.0.3626.53',
798 '71.0.3578.125',
799 '73.0.3665.4',
800 '73.0.3665.3',
801 '72.0.3626.52',
802 '73.0.3665.2',
803 '73.0.3664.4',
804 '73.0.3665.1',
805 '73.0.3665.0',
806 '72.0.3626.51',
807 '71.0.3578.124',
808 '72.0.3626.50',
809 '73.0.3664.3',
810 '73.0.3664.2',
811 '73.0.3664.1',
812 '73.0.3664.0',
813 '73.0.3663.2',
814 '72.0.3626.49',
815 '71.0.3578.123',
816 '73.0.3663.1',
817 '73.0.3663.0',
818 '72.0.3626.48',
819 '71.0.3578.122',
820 '73.0.3662.1',
821 '73.0.3662.0',
822 '72.0.3626.47',
823 '71.0.3578.121',
824 '73.0.3661.1',
825 '72.0.3626.46',
826 '73.0.3661.0',
827 '72.0.3626.45',
828 '71.0.3578.120',
829 '73.0.3660.2',
830 '73.0.3660.1',
831 '73.0.3660.0',
832 '72.0.3626.44',
833 '71.0.3578.119',
834 '73.0.3659.1',
835 '73.0.3659.0',
836 '72.0.3626.43',
837 '71.0.3578.118',
838 '73.0.3658.1',
839 '73.0.3658.0',
840 '72.0.3626.42',
841 '71.0.3578.117',
842 '73.0.3657.1',
843 '73.0.3657.0',
844 '72.0.3626.41',
845 '71.0.3578.116',
846 '73.0.3656.1',
847 '73.0.3656.0',
848 '72.0.3626.40',
849 '71.0.3578.115',
850 '73.0.3655.1',
851 '73.0.3655.0',
852 '72.0.3626.39',
853 '71.0.3578.114',
854 '73.0.3654.1',
855 '73.0.3654.0',
856 '72.0.3626.38',
857 '71.0.3578.113',
858 '73.0.3653.1',
859 '73.0.3653.0',
860 '72.0.3626.37',
861 '71.0.3578.112',
862 '73.0.3652.1',
863 '73.0.3652.0',
864 '72.0.3626.36',
865 '71.0.3578.111',
866 '73.0.3651.1',
867 '73.0.3651.0',
868 '72.0.3626.35',
869 '71.0.3578.110',
870 '73.0.3650.1',
871 '73.0.3650.0',
872 '72.0.3626.34',
873 '71.0.3578.109',
874 '73.0.3649.1',
875 '73.0.3649.0',
876 '72.0.3626.33',
877 '71.0.3578.108',
878 '73.0.3648.2',
879 '73.0.3648.1',
880 '73.0.3648.0',
881 '72.0.3626.32',
882 '71.0.3578.107',
883 '73.0.3647.2',
884 '73.0.3647.1',
885 '73.0.3647.0',
886 '72.0.3626.31',
887 '71.0.3578.106',
888 '73.0.3635.3',
889 '73.0.3646.2',
890 '73.0.3646.1',
891 '73.0.3646.0',
892 '72.0.3626.30',
893 '71.0.3578.105',
894 '72.0.3626.29',
895 '73.0.3645.2',
896 '73.0.3645.1',
897 '73.0.3645.0',
898 '72.0.3626.28',
899 '71.0.3578.104',
900 '72.0.3626.27',
901 '72.0.3626.26',
902 '72.0.3626.25',
903 '72.0.3626.24',
904 '73.0.3644.0',
905 '73.0.3643.2',
906 '72.0.3626.23',
907 '71.0.3578.103',
908 '73.0.3643.1',
909 '73.0.3643.0',
910 '72.0.3626.22',
911 '71.0.3578.102',
912 '73.0.3642.1',
913 '73.0.3642.0',
914 '72.0.3626.21',
915 '71.0.3578.101',
916 '73.0.3641.1',
917 '73.0.3641.0',
918 '72.0.3626.20',
919 '71.0.3578.100',
920 '72.0.3626.19',
921 '73.0.3640.1',
922 '73.0.3640.0',
923 '72.0.3626.18',
924 '73.0.3639.1',
925 '71.0.3578.99',
926 '73.0.3639.0',
927 '72.0.3626.17',
928 '73.0.3638.2',
929 '72.0.3626.16',
930 '73.0.3638.1',
931 '73.0.3638.0',
932 '72.0.3626.15',
933 '71.0.3578.98',
934 '73.0.3635.2',
935 '71.0.3578.97',
936 '73.0.3637.1',
937 '73.0.3637.0',
938 '72.0.3626.14',
939 '71.0.3578.96',
940 '71.0.3578.95',
941 '72.0.3626.13',
942 '71.0.3578.94',
943 '73.0.3636.2',
944 '71.0.3578.93',
945 '73.0.3636.1',
946 '73.0.3636.0',
947 '72.0.3626.12',
948 '71.0.3578.92',
949 '73.0.3635.1',
950 '73.0.3635.0',
951 '72.0.3626.11',
952 '71.0.3578.91',
953 '73.0.3634.2',
954 '73.0.3634.1',
955 '73.0.3634.0',
956 '72.0.3626.10',
957 '71.0.3578.90',
958 '71.0.3578.89',
959 '73.0.3633.2',
960 '73.0.3633.1',
961 '73.0.3633.0',
962 '72.0.3610.4',
963 '72.0.3626.9',
964 '71.0.3578.88',
965 '73.0.3632.5',
966 '73.0.3632.4',
967 '73.0.3632.3',
968 '73.0.3632.2',
969 '73.0.3632.1',
970 '73.0.3632.0',
971 '72.0.3626.8',
972 '71.0.3578.87',
973 '73.0.3631.2',
974 '73.0.3631.1',
975 '73.0.3631.0',
976 '72.0.3626.7',
977 '71.0.3578.86',
978 '72.0.3626.6',
979 '73.0.3630.1',
980 '73.0.3630.0',
981 '72.0.3626.5',
982 '71.0.3578.85',
983 '72.0.3626.4',
984 '73.0.3628.3',
985 '73.0.3628.2',
986 '73.0.3629.1',
987 '73.0.3629.0',
988 '72.0.3626.3',
989 '71.0.3578.84',
990 '73.0.3628.1',
991 '73.0.3628.0',
992 '71.0.3578.83',
993 '73.0.3627.1',
994 '73.0.3627.0',
995 '72.0.3626.2',
996 '71.0.3578.82',
997 '71.0.3578.81',
998 '71.0.3578.80',
999 '72.0.3626.1',
1000 '72.0.3626.0',
1001 '71.0.3578.79',
1002 '70.0.3538.124',
1003 '71.0.3578.78',
1004 '72.0.3623.4',
1005 '72.0.3625.2',
1006 '72.0.3625.1',
1007 '72.0.3625.0',
1008 '71.0.3578.77',
1009 '70.0.3538.123',
1010 '72.0.3624.4',
1011 '72.0.3624.3',
1012 '72.0.3624.2',
1013 '71.0.3578.76',
1014 '72.0.3624.1',
1015 '72.0.3624.0',
1016 '72.0.3623.3',
1017 '71.0.3578.75',
1018 '70.0.3538.122',
1019 '71.0.3578.74',
1020 '72.0.3623.2',
1021 '72.0.3610.3',
1022 '72.0.3623.1',
1023 '72.0.3623.0',
1024 '72.0.3622.3',
1025 '72.0.3622.2',
1026 '71.0.3578.73',
1027 '70.0.3538.121',
1028 '72.0.3622.1',
1029 '72.0.3622.0',
1030 '71.0.3578.72',
1031 '70.0.3538.120',
1032 '72.0.3621.1',
1033 '72.0.3621.0',
1034 '71.0.3578.71',
1035 '70.0.3538.119',
1036 '72.0.3620.1',
1037 '72.0.3620.0',
1038 '71.0.3578.70',
1039 '70.0.3538.118',
1040 '71.0.3578.69',
1041 '72.0.3619.1',
1042 '72.0.3619.0',
1043 '71.0.3578.68',
1044 '70.0.3538.117',
1045 '71.0.3578.67',
1046 '72.0.3618.1',
1047 '72.0.3618.0',
1048 '71.0.3578.66',
1049 '70.0.3538.116',
1050 '72.0.3617.1',
1051 '72.0.3617.0',
1052 '71.0.3578.65',
1053 '70.0.3538.115',
1054 '72.0.3602.3',
1055 '71.0.3578.64',
1056 '72.0.3616.1',
1057 '72.0.3616.0',
1058 '71.0.3578.63',
1059 '70.0.3538.114',
1060 '71.0.3578.62',
1061 '72.0.3615.1',
1062 '72.0.3615.0',
1063 '71.0.3578.61',
1064 '70.0.3538.113',
1065 '72.0.3614.1',
1066 '72.0.3614.0',
1067 '71.0.3578.60',
1068 '70.0.3538.112',
1069 '72.0.3613.1',
1070 '72.0.3613.0',
1071 '71.0.3578.59',
1072 '70.0.3538.111',
1073 '72.0.3612.2',
1074 '72.0.3612.1',
1075 '72.0.3612.0',
1076 '70.0.3538.110',
1077 '71.0.3578.58',
1078 '70.0.3538.109',
1079 '72.0.3611.2',
1080 '72.0.3611.1',
1081 '72.0.3611.0',
1082 '71.0.3578.57',
1083 '70.0.3538.108',
1084 '72.0.3610.2',
1085 '71.0.3578.56',
1086 '71.0.3578.55',
1087 '72.0.3610.1',
1088 '72.0.3610.0',
1089 '71.0.3578.54',
1090 '70.0.3538.107',
1091 '71.0.3578.53',
1092 '72.0.3609.3',
1093 '71.0.3578.52',
1094 '72.0.3609.2',
1095 '71.0.3578.51',
1096 '72.0.3608.5',
1097 '72.0.3609.1',
1098 '72.0.3609.0',
1099 '71.0.3578.50',
1100 '70.0.3538.106',
1101 '72.0.3608.4',
1102 '72.0.3608.3',
1103 '72.0.3608.2',
1104 '71.0.3578.49',
1105 '72.0.3608.1',
1106 '72.0.3608.0',
1107 '70.0.3538.105',
1108 '71.0.3578.48',
1109 '72.0.3607.1',
1110 '72.0.3607.0',
1111 '71.0.3578.47',
1112 '70.0.3538.104',
1113 '72.0.3606.2',
1114 '72.0.3606.1',
1115 '72.0.3606.0',
1116 '71.0.3578.46',
1117 '70.0.3538.103',
1118 '70.0.3538.102',
1119 '72.0.3605.3',
1120 '72.0.3605.2',
1121 '72.0.3605.1',
1122 '72.0.3605.0',
1123 '71.0.3578.45',
1124 '70.0.3538.101',
1125 '71.0.3578.44',
1126 '71.0.3578.43',
1127 '70.0.3538.100',
1128 '70.0.3538.99',
1129 '71.0.3578.42',
1130 '72.0.3604.1',
1131 '72.0.3604.0',
1132 '71.0.3578.41',
1133 '70.0.3538.98',
1134 '71.0.3578.40',
1135 '72.0.3603.2',
1136 '72.0.3603.1',
1137 '72.0.3603.0',
1138 '71.0.3578.39',
1139 '70.0.3538.97',
1140 '72.0.3602.2',
1141 '71.0.3578.38',
1142 '71.0.3578.37',
1143 '72.0.3602.1',
1144 '72.0.3602.0',
1145 '71.0.3578.36',
1146 '70.0.3538.96',
1147 '72.0.3601.1',
1148 '72.0.3601.0',
1149 '71.0.3578.35',
1150 '70.0.3538.95',
1151 '72.0.3600.1',
1152 '72.0.3600.0',
1153 '71.0.3578.34',
1154 '70.0.3538.94',
1155 '72.0.3599.3',
1156 '72.0.3599.2',
1157 '72.0.3599.1',
1158 '72.0.3599.0',
1159 '71.0.3578.33',
1160 '70.0.3538.93',
1161 '72.0.3598.1',
1162 '72.0.3598.0',
1163 '71.0.3578.32',
1164 '70.0.3538.87',
1165 '72.0.3597.1',
1166 '72.0.3597.0',
1167 '72.0.3596.2',
1168 '71.0.3578.31',
1169 '70.0.3538.86',
1170 '71.0.3578.30',
1171 '71.0.3578.29',
1172 '72.0.3596.1',
1173 '72.0.3596.0',
1174 '71.0.3578.28',
1175 '70.0.3538.85',
1176 '72.0.3595.2',
1177 '72.0.3591.3',
1178 '72.0.3595.1',
1179 '72.0.3595.0',
1180 '71.0.3578.27',
1181 '70.0.3538.84',
1182 '72.0.3594.1',
1183 '72.0.3594.0',
1184 '71.0.3578.26',
1185 '70.0.3538.83',
1186 '72.0.3593.2',
1187 '72.0.3593.1',
1188 '72.0.3593.0',
1189 '71.0.3578.25',
1190 '70.0.3538.82',
1191 '72.0.3589.3',
1192 '72.0.3592.2',
1193 '72.0.3592.1',
1194 '72.0.3592.0',
1195 '71.0.3578.24',
1196 '72.0.3589.2',
1197 '70.0.3538.81',
1198 '70.0.3538.80',
1199 '72.0.3591.2',
1200 '72.0.3591.1',
1201 '72.0.3591.0',
1202 '71.0.3578.23',
1203 '70.0.3538.79',
1204 '71.0.3578.22',
1205 '72.0.3590.1',
1206 '72.0.3590.0',
1207 '71.0.3578.21',
1208 '70.0.3538.78',
1209 '70.0.3538.77',
1210 '72.0.3589.1',
1211 '72.0.3589.0',
1212 '71.0.3578.20',
1213 '70.0.3538.76',
1214 '71.0.3578.19',
1215 '70.0.3538.75',
1216 '72.0.3588.1',
1217 '72.0.3588.0',
1218 '71.0.3578.18',
1219 '70.0.3538.74',
1220 '72.0.3586.2',
1221 '72.0.3587.0',
1222 '71.0.3578.17',
1223 '70.0.3538.73',
1224 '72.0.3586.1',
1225 '72.0.3586.0',
1226 '71.0.3578.16',
1227 '70.0.3538.72',
1228 '72.0.3585.1',
1229 '72.0.3585.0',
1230 '71.0.3578.15',
1231 '70.0.3538.71',
1232 '71.0.3578.14',
1233 '72.0.3584.1',
1234 '72.0.3584.0',
1235 '71.0.3578.13',
1236 '70.0.3538.70',
1237 '72.0.3583.2',
1238 '71.0.3578.12',
1239 '72.0.3583.1',
1240 '72.0.3583.0',
1241 '71.0.3578.11',
1242 '70.0.3538.69',
1243 '71.0.3578.10',
1244 '72.0.3582.0',
1245 '72.0.3581.4',
1246 '71.0.3578.9',
1247 '70.0.3538.67',
1248 '72.0.3581.3',
1249 '72.0.3581.2',
1250 '72.0.3581.1',
1251 '72.0.3581.0',
1252 '71.0.3578.8',
1253 '70.0.3538.66',
1254 '72.0.3580.1',
1255 '72.0.3580.0',
1256 '71.0.3578.7',
1257 '70.0.3538.65',
1258 '71.0.3578.6',
1259 '72.0.3579.1',
1260 '72.0.3579.0',
1261 '71.0.3578.5',
1262 '70.0.3538.64',
1263 '71.0.3578.4',
1264 '71.0.3578.3',
1265 '71.0.3578.2',
1266 '71.0.3578.1',
1267 '71.0.3578.0',
1268 '70.0.3538.63',
1269 '69.0.3497.128',
1270 '70.0.3538.62',
1271 '70.0.3538.61',
1272 '70.0.3538.60',
1273 '70.0.3538.59',
1274 '71.0.3577.1',
1275 '71.0.3577.0',
1276 '70.0.3538.58',
1277 '69.0.3497.127',
1278 '71.0.3576.2',
1279 '71.0.3576.1',
1280 '71.0.3576.0',
1281 '70.0.3538.57',
1282 '70.0.3538.56',
1283 '71.0.3575.2',
1284 '70.0.3538.55',
1285 '69.0.3497.126',
1286 '70.0.3538.54',
1287 '71.0.3575.1',
1288 '71.0.3575.0',
1289 '71.0.3574.1',
1290 '71.0.3574.0',
1291 '70.0.3538.53',
1292 '69.0.3497.125',
1293 '70.0.3538.52',
1294 '71.0.3573.1',
1295 '71.0.3573.0',
1296 '70.0.3538.51',
1297 '69.0.3497.124',
1298 '71.0.3572.1',
1299 '71.0.3572.0',
1300 '70.0.3538.50',
1301 '69.0.3497.123',
1302 '71.0.3571.2',
1303 '70.0.3538.49',
1304 '69.0.3497.122',
1305 '71.0.3571.1',
1306 '71.0.3571.0',
1307 '70.0.3538.48',
1308 '69.0.3497.121',
1309 '71.0.3570.1',
1310 '71.0.3570.0',
1311 '70.0.3538.47',
1312 '69.0.3497.120',
1313 '71.0.3568.2',
1314 '71.0.3569.1',
1315 '71.0.3569.0',
1316 '70.0.3538.46',
1317 '69.0.3497.119',
1318 '70.0.3538.45',
1319 '71.0.3568.1',
1320 '71.0.3568.0',
1321 '70.0.3538.44',
1322 '69.0.3497.118',
1323 '70.0.3538.43',
1324 '70.0.3538.42',
1325 '71.0.3567.1',
1326 '71.0.3567.0',
1327 '70.0.3538.41',
1328 '69.0.3497.117',
1329 '71.0.3566.1',
1330 '71.0.3566.0',
1331 '70.0.3538.40',
1332 '69.0.3497.116',
1333 '71.0.3565.1',
1334 '71.0.3565.0',
1335 '70.0.3538.39',
1336 '69.0.3497.115',
1337 '71.0.3564.1',
1338 '71.0.3564.0',
1339 '70.0.3538.38',
1340 '69.0.3497.114',
1341 '71.0.3563.0',
1342 '71.0.3562.2',
1343 '70.0.3538.37',
1344 '69.0.3497.113',
1345 '70.0.3538.36',
1346 '70.0.3538.35',
1347 '71.0.3562.1',
1348 '71.0.3562.0',
1349 '70.0.3538.34',
1350 '69.0.3497.112',
1351 '70.0.3538.33',
1352 '71.0.3561.1',
1353 '71.0.3561.0',
1354 '70.0.3538.32',
1355 '69.0.3497.111',
1356 '71.0.3559.6',
1357 '71.0.3560.1',
1358 '71.0.3560.0',
1359 '71.0.3559.5',
1360 '71.0.3559.4',
1361 '70.0.3538.31',
1362 '69.0.3497.110',
1363 '71.0.3559.3',
1364 '70.0.3538.30',
1365 '69.0.3497.109',
1366 '71.0.3559.2',
1367 '71.0.3559.1',
1368 '71.0.3559.0',
1369 '70.0.3538.29',
1370 '69.0.3497.108',
1371 '71.0.3558.2',
1372 '71.0.3558.1',
1373 '71.0.3558.0',
1374 '70.0.3538.28',
1375 '69.0.3497.107',
1376 '71.0.3557.2',
1377 '71.0.3557.1',
1378 '71.0.3557.0',
1379 '70.0.3538.27',
1380 '69.0.3497.106',
1381 '71.0.3554.4',
1382 '70.0.3538.26',
1383 '71.0.3556.1',
1384 '71.0.3556.0',
1385 '70.0.3538.25',
1386 '71.0.3554.3',
1387 '69.0.3497.105',
1388 '71.0.3554.2',
1389 '70.0.3538.24',
1390 '69.0.3497.104',
1391 '71.0.3555.2',
1392 '70.0.3538.23',
1393 '71.0.3555.1',
1394 '71.0.3555.0',
1395 '70.0.3538.22',
1396 '69.0.3497.103',
1397 '71.0.3554.1',
1398 '71.0.3554.0',
1399 '70.0.3538.21',
1400 '69.0.3497.102',
1401 '71.0.3553.3',
1402 '70.0.3538.20',
1403 '69.0.3497.101',
1404 '71.0.3553.2',
1405 '69.0.3497.100',
1406 '71.0.3553.1',
1407 '71.0.3553.0',
1408 '70.0.3538.19',
1409 '69.0.3497.99',
1410 '69.0.3497.98',
1411 '69.0.3497.97',
1412 '71.0.3552.6',
1413 '71.0.3552.5',
1414 '71.0.3552.4',
1415 '71.0.3552.3',
1416 '71.0.3552.2',
1417 '71.0.3552.1',
1418 '71.0.3552.0',
1419 '70.0.3538.18',
1420 '69.0.3497.96',
1421 '71.0.3551.3',
1422 '71.0.3551.2',
1423 '71.0.3551.1',
1424 '71.0.3551.0',
1425 '70.0.3538.17',
1426 '69.0.3497.95',
1427 '71.0.3550.3',
1428 '71.0.3550.2',
1429 '71.0.3550.1',
1430 '71.0.3550.0',
1431 '70.0.3538.16',
1432 '69.0.3497.94',
1433 '71.0.3549.1',
1434 '71.0.3549.0',
1435 '70.0.3538.15',
1436 '69.0.3497.93',
1437 '69.0.3497.92',
1438 '71.0.3548.1',
1439 '71.0.3548.0',
1440 '70.0.3538.14',
1441 '69.0.3497.91',
1442 '71.0.3547.1',
1443 '71.0.3547.0',
1444 '70.0.3538.13',
1445 '69.0.3497.90',
1446 '71.0.3546.2',
1447 '69.0.3497.89',
1448 '71.0.3546.1',
1449 '71.0.3546.0',
1450 '70.0.3538.12',
1451 '69.0.3497.88',
1452 '71.0.3545.4',
1453 '71.0.3545.3',
1454 '71.0.3545.2',
1455 '71.0.3545.1',
1456 '71.0.3545.0',
1457 '70.0.3538.11',
1458 '69.0.3497.87',
1459 '71.0.3544.5',
1460 '71.0.3544.4',
1461 '71.0.3544.3',
1462 '71.0.3544.2',
1463 '71.0.3544.1',
1464 '71.0.3544.0',
1465 '69.0.3497.86',
1466 '70.0.3538.10',
1467 '69.0.3497.85',
1468 '70.0.3538.9',
1469 '69.0.3497.84',
1470 '71.0.3543.4',
1471 '70.0.3538.8',
1472 '71.0.3543.3',
1473 '71.0.3543.2',
1474 '71.0.3543.1',
1475 '71.0.3543.0',
1476 '70.0.3538.7',
1477 '69.0.3497.83',
1478 '71.0.3542.2',
1479 '71.0.3542.1',
1480 '71.0.3542.0',
1481 '70.0.3538.6',
1482 '69.0.3497.82',
1483 '69.0.3497.81',
1484 '71.0.3541.1',
1485 '71.0.3541.0',
1486 '70.0.3538.5',
1487 '69.0.3497.80',
1488 '71.0.3540.1',
1489 '71.0.3540.0',
1490 '70.0.3538.4',
1491 '69.0.3497.79',
1492 '70.0.3538.3',
1493 '71.0.3539.1',
1494 '71.0.3539.0',
1495 '69.0.3497.78',
1496 '68.0.3440.134',
1497 '69.0.3497.77',
1498 '70.0.3538.2',
1499 '70.0.3538.1',
1500 '70.0.3538.0',
1501 '69.0.3497.76',
1502 '68.0.3440.133',
1503 '69.0.3497.75',
1504 '70.0.3537.2',
1505 '70.0.3537.1',
1506 '70.0.3537.0',
1507 '69.0.3497.74',
1508 '68.0.3440.132',
1509 '70.0.3536.0',
1510 '70.0.3535.5',
1511 '70.0.3535.4',
1512 '70.0.3535.3',
1513 '69.0.3497.73',
1514 '68.0.3440.131',
1515 '70.0.3532.8',
1516 '70.0.3532.7',
1517 '69.0.3497.72',
1518 '69.0.3497.71',
1519 '70.0.3535.2',
1520 '70.0.3535.1',
1521 '70.0.3535.0',
1522 '69.0.3497.70',
1523 '68.0.3440.130',
1524 '69.0.3497.69',
1525 '68.0.3440.129',
1526 '70.0.3534.4',
1527 '70.0.3534.3',
1528 '70.0.3534.2',
1529 '70.0.3534.1',
1530 '70.0.3534.0',
1531 '69.0.3497.68',
1532 '68.0.3440.128',
1533 '70.0.3533.2',
1534 '70.0.3533.1',
1535 '70.0.3533.0',
1536 '69.0.3497.67',
1537 '68.0.3440.127',
1538 '70.0.3532.6',
1539 '70.0.3532.5',
1540 '70.0.3532.4',
1541 '69.0.3497.66',
1542 '68.0.3440.126',
1543 '70.0.3532.3',
1544 '70.0.3532.2',
1545 '70.0.3532.1',
1546 '69.0.3497.60',
1547 '69.0.3497.65',
1548 '69.0.3497.64',
1549 '70.0.3532.0',
1550 '70.0.3531.0',
1551 '70.0.3530.4',
1552 '70.0.3530.3',
1553 '70.0.3530.2',
1554 '69.0.3497.58',
1555 '68.0.3440.125',
1556 '69.0.3497.57',
1557 '69.0.3497.56',
1558 '69.0.3497.55',
1559 '69.0.3497.54',
1560 '70.0.3530.1',
1561 '70.0.3530.0',
1562 '69.0.3497.53',
1563 '68.0.3440.124',
1564 '69.0.3497.52',
1565 '70.0.3529.3',
1566 '70.0.3529.2',
1567 '70.0.3529.1',
1568 '70.0.3529.0',
1569 '69.0.3497.51',
1570 '70.0.3528.4',
1571 '68.0.3440.123',
1572 '70.0.3528.3',
1573 '70.0.3528.2',
1574 '70.0.3528.1',
1575 '70.0.3528.0',
1576 '69.0.3497.50',
1577 '68.0.3440.122',
1578 '70.0.3527.1',
1579 '70.0.3527.0',
1580 '69.0.3497.49',
1581 '68.0.3440.121',
1582 '70.0.3526.1',
1583 '70.0.3526.0',
1584 '68.0.3440.120',
1585 '69.0.3497.48',
1586 '69.0.3497.47',
1587 '68.0.3440.119',
1588 '68.0.3440.118',
1589 '70.0.3525.5',
1590 '70.0.3525.4',
1591 '70.0.3525.3',
1592 '68.0.3440.117',
1593 '69.0.3497.46',
1594 '70.0.3525.2',
1595 '70.0.3525.1',
1596 '70.0.3525.0',
1597 '69.0.3497.45',
1598 '68.0.3440.116',
1599 '70.0.3524.4',
1600 '70.0.3524.3',
1601 '69.0.3497.44',
1602 '70.0.3524.2',
1603 '70.0.3524.1',
1604 '70.0.3524.0',
1605 '70.0.3523.2',
1606 '69.0.3497.43',
1607 '68.0.3440.115',
1608 '70.0.3505.9',
1609 '69.0.3497.42',
1610 '70.0.3505.8',
1611 '70.0.3523.1',
1612 '70.0.3523.0',
1613 '69.0.3497.41',
1614 '68.0.3440.114',
1615 '70.0.3505.7',
1616 '69.0.3497.40',
1617 '70.0.3522.1',
1618 '70.0.3522.0',
1619 '70.0.3521.2',
1620 '69.0.3497.39',
1621 '68.0.3440.113',
1622 '70.0.3505.6',
1623 '70.0.3521.1',
1624 '70.0.3521.0',
1625 '69.0.3497.38',
1626 '68.0.3440.112',
1627 '70.0.3520.1',
1628 '70.0.3520.0',
1629 '69.0.3497.37',
1630 '68.0.3440.111',
1631 '70.0.3519.3',
1632 '70.0.3519.2',
1633 '70.0.3519.1',
1634 '70.0.3519.0',
1635 '69.0.3497.36',
1636 '68.0.3440.110',
1637 '70.0.3518.1',
1638 '70.0.3518.0',
1639 '69.0.3497.35',
1640 '69.0.3497.34',
1641 '68.0.3440.109',
1642 '70.0.3517.1',
1643 '70.0.3517.0',
1644 '69.0.3497.33',
1645 '68.0.3440.108',
1646 '69.0.3497.32',
1647 '70.0.3516.3',
1648 '70.0.3516.2',
1649 '70.0.3516.1',
1650 '70.0.3516.0',
1651 '69.0.3497.31',
1652 '68.0.3440.107',
1653 '70.0.3515.4',
1654 '68.0.3440.106',
1655 '70.0.3515.3',
1656 '70.0.3515.2',
1657 '70.0.3515.1',
1658 '70.0.3515.0',
1659 '69.0.3497.30',
1660 '68.0.3440.105',
1661 '68.0.3440.104',
1662 '70.0.3514.2',
1663 '70.0.3514.1',
1664 '70.0.3514.0',
1665 '69.0.3497.29',
1666 '68.0.3440.103',
1667 '70.0.3513.1',
1668 '70.0.3513.0',
1669 '69.0.3497.28',
1670 )
1671 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1672
1673
1674 std_headers = {
1675 'User-Agent': random_user_agent(),
1676 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1677 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1678 'Accept-Encoding': 'gzip, deflate',
1679 'Accept-Language': 'en-us,en;q=0.5',
1680 }
1681
1682
1683 USER_AGENTS = {
1684 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1685 }
1686
1687
1688 NO_DEFAULT = object()
1689
1690 ENGLISH_MONTH_NAMES = [
1691 'January', 'February', 'March', 'April', 'May', 'June',
1692 'July', 'August', 'September', 'October', 'November', 'December']
1693
1694 MONTH_NAMES = {
1695 'en': ENGLISH_MONTH_NAMES,
1696 'fr': [
1697 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1698 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1699 }
1700
1701 KNOWN_EXTENSIONS = (
1702 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1703 'flv', 'f4v', 'f4a', 'f4b',
1704 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1705 'mkv', 'mka', 'mk3d',
1706 'avi', 'divx',
1707 'mov',
1708 'asf', 'wmv', 'wma',
1709 '3gp', '3g2',
1710 'mp3',
1711 'flac',
1712 'ape',
1713 'wav',
1714 'f4f', 'f4m', 'm3u8', 'smil')
1715
1716 # needed for sanitizing filenames in restricted mode
1717 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1718 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1719 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1720
1721 DATE_FORMATS = (
1722 '%d %B %Y',
1723 '%d %b %Y',
1724 '%B %d %Y',
1725 '%B %dst %Y',
1726 '%B %dnd %Y',
1727 '%B %drd %Y',
1728 '%B %dth %Y',
1729 '%b %d %Y',
1730 '%b %dst %Y',
1731 '%b %dnd %Y',
1732 '%b %drd %Y',
1733 '%b %dth %Y',
1734 '%b %dst %Y %I:%M',
1735 '%b %dnd %Y %I:%M',
1736 '%b %drd %Y %I:%M',
1737 '%b %dth %Y %I:%M',
1738 '%Y %m %d',
1739 '%Y-%m-%d',
1740 '%Y/%m/%d',
1741 '%Y/%m/%d %H:%M',
1742 '%Y/%m/%d %H:%M:%S',
1743 '%Y-%m-%d %H:%M',
1744 '%Y-%m-%d %H:%M:%S',
1745 '%Y-%m-%d %H:%M:%S.%f',
1746 '%d.%m.%Y %H:%M',
1747 '%d.%m.%Y %H.%M',
1748 '%Y-%m-%dT%H:%M:%SZ',
1749 '%Y-%m-%dT%H:%M:%S.%fZ',
1750 '%Y-%m-%dT%H:%M:%S.%f0Z',
1751 '%Y-%m-%dT%H:%M:%S',
1752 '%Y-%m-%dT%H:%M:%S.%f',
1753 '%Y-%m-%dT%H:%M',
1754 '%b %d %Y at %H:%M',
1755 '%b %d %Y at %H:%M:%S',
1756 '%B %d %Y at %H:%M',
1757 '%B %d %Y at %H:%M:%S',
1758 )
1759
1760 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1761 DATE_FORMATS_DAY_FIRST.extend([
1762 '%d-%m-%Y',
1763 '%d.%m.%Y',
1764 '%d.%m.%y',
1765 '%d/%m/%Y',
1766 '%d/%m/%y',
1767 '%d/%m/%Y %H:%M:%S',
1768 ])
1769
1770 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1771 DATE_FORMATS_MONTH_FIRST.extend([
1772 '%m-%d-%Y',
1773 '%m.%d.%Y',
1774 '%m/%d/%Y',
1775 '%m/%d/%y',
1776 '%m/%d/%Y %H:%M:%S',
1777 ])
1778
1779 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1780 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1781
1782
1783 def preferredencoding():
1784 """Get preferred encoding.
1785
1786 Returns the best encoding scheme for the system, based on
1787 locale.getpreferredencoding() and some further tweaks.
1788 """
1789 try:
1790 pref = locale.getpreferredencoding()
1791 'TEST'.encode(pref)
1792 except Exception:
1793 pref = 'UTF-8'
1794
1795 return pref
1796
1797
1798 def write_json_file(obj, fn):
1799 """ Encode obj as JSON and write it to fn, atomically if possible """
1800
1801 fn = encodeFilename(fn)
1802 if sys.version_info < (3, 0) and sys.platform != 'win32':
1803 encoding = get_filesystem_encoding()
1804 # os.path.basename returns a bytes object, but NamedTemporaryFile
1805 # will fail if the filename contains non ascii characters unless we
1806 # use a unicode object
1807 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1808 # the same for os.path.dirname
1809 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1810 else:
1811 path_basename = os.path.basename
1812 path_dirname = os.path.dirname
1813
1814 args = {
1815 'suffix': '.tmp',
1816 'prefix': path_basename(fn) + '.',
1817 'dir': path_dirname(fn),
1818 'delete': False,
1819 }
1820
1821 # In Python 2.x, json.dump expects a bytestream.
1822 # In Python 3.x, it writes to a character stream
1823 if sys.version_info < (3, 0):
1824 args['mode'] = 'wb'
1825 else:
1826 args.update({
1827 'mode': 'w',
1828 'encoding': 'utf-8',
1829 })
1830
1831 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1832
1833 try:
1834 with tf:
1835 json.dump(obj, tf)
1836 if sys.platform == 'win32':
1837 # Need to remove existing file on Windows, else os.rename raises
1838 # WindowsError or FileExistsError.
1839 try:
1840 os.unlink(fn)
1841 except OSError:
1842 pass
1843 try:
1844 mask = os.umask(0)
1845 os.umask(mask)
1846 os.chmod(tf.name, 0o666 & ~mask)
1847 except OSError:
1848 pass
1849 os.rename(tf.name, fn)
1850 except Exception:
1851 try:
1852 os.remove(tf.name)
1853 except OSError:
1854 pass
1855 raise
1856
1857
1858 if sys.version_info >= (2, 7):
1859 def find_xpath_attr(node, xpath, key, val=None):
1860 """ Find the xpath xpath[@key=val] """
1861 assert re.match(r'^[a-zA-Z_-]+$', key)
1862 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1863 return node.find(expr)
1864 else:
1865 def find_xpath_attr(node, xpath, key, val=None):
1866 for f in node.findall(compat_xpath(xpath)):
1867 if key not in f.attrib:
1868 continue
1869 if val is None or f.attrib.get(key) == val:
1870 return f
1871 return None
1872
1873 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1874 # the namespace parameter
1875
1876
1877 def xpath_with_ns(path, ns_map):
1878 components = [c.split(':') for c in path.split('/')]
1879 replaced = []
1880 for c in components:
1881 if len(c) == 1:
1882 replaced.append(c[0])
1883 else:
1884 ns, tag = c
1885 replaced.append('{%s}%s' % (ns_map[ns], tag))
1886 return '/'.join(replaced)
1887
1888
1889 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1890 def _find_xpath(xpath):
1891 return node.find(compat_xpath(xpath))
1892
1893 if isinstance(xpath, (str, compat_str)):
1894 n = _find_xpath(xpath)
1895 else:
1896 for xp in xpath:
1897 n = _find_xpath(xp)
1898 if n is not None:
1899 break
1900
1901 if n is None:
1902 if default is not NO_DEFAULT:
1903 return default
1904 elif fatal:
1905 name = xpath if name is None else name
1906 raise ExtractorError('Could not find XML element %s' % name)
1907 else:
1908 return None
1909 return n
1910
1911
1912 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1913 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1914 if n is None or n == default:
1915 return n
1916 if n.text is None:
1917 if default is not NO_DEFAULT:
1918 return default
1919 elif fatal:
1920 name = xpath if name is None else name
1921 raise ExtractorError('Could not find XML element\'s text %s' % name)
1922 else:
1923 return None
1924 return n.text
1925
1926
1927 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1928 n = find_xpath_attr(node, xpath, key)
1929 if n is None:
1930 if default is not NO_DEFAULT:
1931 return default
1932 elif fatal:
1933 name = '%s[@%s]' % (xpath, key) if name is None else name
1934 raise ExtractorError('Could not find XML attribute %s' % name)
1935 else:
1936 return None
1937 return n.attrib[key]
1938
1939
1940 def get_element_by_id(id, html):
1941 """Return the content of the tag with the specified ID in the passed HTML document"""
1942 return get_element_by_attribute('id', id, html)
1943
1944
1945 def get_element_by_class(class_name, html):
1946 """Return the content of the first tag with the specified class in the passed HTML document"""
1947 retval = get_elements_by_class(class_name, html)
1948 return retval[0] if retval else None
1949
1950
1951 def get_element_by_attribute(attribute, value, html, escape_value=True):
1952 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1953 return retval[0] if retval else None
1954
1955
1956 def get_elements_by_class(class_name, html):
1957 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1958 return get_elements_by_attribute(
1959 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1960 html, escape_value=False)
1961
1962
1963 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1964 """Return the content of the tag with the specified attribute in the passed HTML document"""
1965
1966 value = re.escape(value) if escape_value else value
1967
1968 retlist = []
1969 for m in re.finditer(r'''(?xs)
1970 <([a-zA-Z0-9:._-]+)
1971 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1972 \s+%s=['"]?%s['"]?
1973 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1974 \s*>
1975 (?P<content>.*?)
1976 </\1>
1977 ''' % (re.escape(attribute), value), html):
1978 res = m.group('content')
1979
1980 if res.startswith('"') or res.startswith("'"):
1981 res = res[1:-1]
1982
1983 retlist.append(unescapeHTML(res))
1984
1985 return retlist
1986
1987
1988 class HTMLAttributeParser(compat_HTMLParser):
1989 """Trivial HTML parser to gather the attributes for a single element"""
1990
1991 def __init__(self):
1992 self.attrs = {}
1993 compat_HTMLParser.__init__(self)
1994
1995 def handle_starttag(self, tag, attrs):
1996 self.attrs = dict(attrs)
1997
1998
1999 def extract_attributes(html_element):
2000 """Given a string for an HTML element such as
2001 <el
2002 a="foo" B="bar" c="&98;az" d=boz
2003 empty= noval entity="&amp;"
2004 sq='"' dq="'"
2005 >
2006 Decode and return a dictionary of attributes.
2007 {
2008 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2009 'empty': '', 'noval': None, 'entity': '&',
2010 'sq': '"', 'dq': '\''
2011 }.
2012 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2013 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2014 """
2015 parser = HTMLAttributeParser()
2016 try:
2017 parser.feed(html_element)
2018 parser.close()
2019 # Older Python may throw HTMLParseError in case of malformed HTML
2020 except compat_HTMLParseError:
2021 pass
2022 return parser.attrs
2023
2024
2025 def clean_html(html):
2026 """Clean an HTML snippet into a readable string"""
2027
2028 if html is None: # Convenience for sanitizing descriptions etc.
2029 return html
2030
2031 # Newline vs <br />
2032 html = html.replace('\n', ' ')
2033 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2034 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2035 # Strip html tags
2036 html = re.sub('<.*?>', '', html)
2037 # Replace html entities
2038 html = unescapeHTML(html)
2039 return html.strip()
2040
2041
2042 def sanitize_open(filename, open_mode):
2043 """Try to open the given filename, and slightly tweak it if this fails.
2044
2045 Attempts to open the given filename. If this fails, it tries to change
2046 the filename slightly, step by step, until it's either able to open it
2047 or it fails and raises a final exception, like the standard open()
2048 function.
2049
2050 It returns the tuple (stream, definitive_file_name).
2051 """
2052 try:
2053 if filename == '-':
2054 if sys.platform == 'win32':
2055 import msvcrt
2056 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2057 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2058 stream = open(encodeFilename(filename), open_mode)
2059 return (stream, filename)
2060 except (IOError, OSError) as err:
2061 if err.errno in (errno.EACCES,):
2062 raise
2063
2064 # In case of error, try to remove win32 forbidden chars
2065 alt_filename = sanitize_path(filename)
2066 if alt_filename == filename:
2067 raise
2068 else:
2069 # An exception here should be caught in the caller
2070 stream = open(encodeFilename(alt_filename), open_mode)
2071 return (stream, alt_filename)
2072
2073
2074 def timeconvert(timestr):
2075 """Convert RFC 2822 defined time string into system timestamp"""
2076 timestamp = None
2077 timetuple = email.utils.parsedate_tz(timestr)
2078 if timetuple is not None:
2079 timestamp = email.utils.mktime_tz(timetuple)
2080 return timestamp
2081
2082
2083 def sanitize_filename(s, restricted=False, is_id=False):
2084 """Sanitizes a string so it could be used as part of a filename.
2085 If restricted is set, use a stricter subset of allowed characters.
2086 Set is_id if this is not an arbitrary string, but an ID that should be kept
2087 if possible.
2088 """
2089 def replace_insane(char):
2090 if restricted and char in ACCENT_CHARS:
2091 return ACCENT_CHARS[char]
2092 if char == '?' or ord(char) < 32 or ord(char) == 127:
2093 return ''
2094 elif char == '"':
2095 return '' if restricted else '\''
2096 elif char == ':':
2097 return '_-' if restricted else ' -'
2098 elif char in '\\/|*<>':
2099 return '_'
2100 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2101 return '_'
2102 if restricted and ord(char) > 127:
2103 return '_'
2104 return char
2105
2106 # Handle timestamps
2107 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2108 result = ''.join(map(replace_insane, s))
2109 if not is_id:
2110 while '__' in result:
2111 result = result.replace('__', '_')
2112 result = result.strip('_')
2113 # Common case of "Foreign band name - English song title"
2114 if restricted and result.startswith('-_'):
2115 result = result[2:]
2116 if result.startswith('-'):
2117 result = '_' + result[len('-'):]
2118 result = result.lstrip('.')
2119 if not result:
2120 result = '_'
2121 return result
2122
2123
2124 def sanitize_path(s):
2125 """Sanitizes and normalizes path on Windows"""
2126 if sys.platform != 'win32':
2127 return s
2128 drive_or_unc, _ = os.path.splitdrive(s)
2129 if sys.version_info < (2, 7) and not drive_or_unc:
2130 drive_or_unc, _ = os.path.splitunc(s)
2131 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2132 if drive_or_unc:
2133 norm_path.pop(0)
2134 sanitized_path = [
2135 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2136 for path_part in norm_path]
2137 if drive_or_unc:
2138 sanitized_path.insert(0, drive_or_unc + os.path.sep)
2139 return os.path.join(*sanitized_path)
2140
2141
2142 def sanitize_url(url):
2143 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2144 # the number of unwanted failures due to missing protocol
2145 if url.startswith('//'):
2146 return 'http:%s' % url
2147 # Fix some common typos seen so far
2148 COMMON_TYPOS = (
2149 # https://github.com/ytdl-org/youtube-dl/issues/15649
2150 (r'^httpss://', r'https://'),
2151 # https://bx1.be/lives/direct-tv/
2152 (r'^rmtp([es]?)://', r'rtmp\1://'),
2153 )
2154 for mistake, fixup in COMMON_TYPOS:
2155 if re.match(mistake, url):
2156 return re.sub(mistake, fixup, url)
2157 return url
2158
2159
2160 def sanitized_Request(url, *args, **kwargs):
2161 return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2162
2163
2164 def expand_path(s):
2165 """Expand shell variables and ~"""
2166 return os.path.expandvars(compat_expanduser(s))
2167
2168
2169 def orderedSet(iterable):
2170 """ Remove all duplicates from the input iterable """
2171 res = []
2172 for el in iterable:
2173 if el not in res:
2174 res.append(el)
2175 return res
2176
2177
2178 def _htmlentity_transform(entity_with_semicolon):
2179 """Transforms an HTML entity to a character."""
2180 entity = entity_with_semicolon[:-1]
2181
2182 # Known non-numeric HTML entity
2183 if entity in compat_html_entities.name2codepoint:
2184 return compat_chr(compat_html_entities.name2codepoint[entity])
2185
2186 # TODO: HTML5 allows entities without a semicolon. For example,
2187 # '&Eacuteric' should be decoded as 'Éric'.
2188 if entity_with_semicolon in compat_html_entities_html5:
2189 return compat_html_entities_html5[entity_with_semicolon]
2190
2191 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2192 if mobj is not None:
2193 numstr = mobj.group(1)
2194 if numstr.startswith('x'):
2195 base = 16
2196 numstr = '0%s' % numstr
2197 else:
2198 base = 10
2199 # See https://github.com/ytdl-org/youtube-dl/issues/7518
2200 try:
2201 return compat_chr(int(numstr, base))
2202 except ValueError:
2203 pass
2204
2205 # Unknown entity in name, return its literal representation
2206 return '&%s;' % entity
2207
2208
2209 def unescapeHTML(s):
2210 if s is None:
2211 return None
2212 assert type(s) == compat_str
2213
2214 return re.sub(
2215 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2216
2217
2218 def get_subprocess_encoding():
2219 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2220 # For subprocess calls, encode with locale encoding
2221 # Refer to http://stackoverflow.com/a/9951851/35070
2222 encoding = preferredencoding()
2223 else:
2224 encoding = sys.getfilesystemencoding()
2225 if encoding is None:
2226 encoding = 'utf-8'
2227 return encoding
2228
2229
2230 def encodeFilename(s, for_subprocess=False):
2231 """
2232 @param s The name of the file
2233 """
2234
2235 assert type(s) == compat_str
2236
2237 # Python 3 has a Unicode API
2238 if sys.version_info >= (3, 0):
2239 return s
2240
2241 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2242 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2243 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2244 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2245 return s
2246
2247 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2248 if sys.platform.startswith('java'):
2249 return s
2250
2251 return s.encode(get_subprocess_encoding(), 'ignore')
2252
2253
2254 def decodeFilename(b, for_subprocess=False):
2255
2256 if sys.version_info >= (3, 0):
2257 return b
2258
2259 if not isinstance(b, bytes):
2260 return b
2261
2262 return b.decode(get_subprocess_encoding(), 'ignore')
2263
2264
2265 def encodeArgument(s):
2266 if not isinstance(s, compat_str):
2267 # Legacy code that uses byte strings
2268 # Uncomment the following line after fixing all post processors
2269 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2270 s = s.decode('ascii')
2271 return encodeFilename(s, True)
2272
2273
2274 def decodeArgument(b):
2275 return decodeFilename(b, True)
2276
2277
2278 def decodeOption(optval):
2279 if optval is None:
2280 return optval
2281 if isinstance(optval, bytes):
2282 optval = optval.decode(preferredencoding())
2283
2284 assert isinstance(optval, compat_str)
2285 return optval
2286
2287
2288 def formatSeconds(secs):
2289 if secs > 3600:
2290 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
2291 elif secs > 60:
2292 return '%d:%02d' % (secs // 60, secs % 60)
2293 else:
2294 return '%d' % secs
2295
2296
2297 def make_HTTPS_handler(params, **kwargs):
2298 opts_no_check_certificate = params.get('nocheckcertificate', False)
2299 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
2300 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2301 if opts_no_check_certificate:
2302 context.check_hostname = False
2303 context.verify_mode = ssl.CERT_NONE
2304 try:
2305 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2306 except TypeError:
2307 # Python 2.7.8
2308 # (create_default_context present but HTTPSHandler has no context=)
2309 pass
2310
2311 if sys.version_info < (3, 2):
2312 return YoutubeDLHTTPSHandler(params, **kwargs)
2313 else: # Python < 3.4
2314 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2315 context.verify_mode = (ssl.CERT_NONE
2316 if opts_no_check_certificate
2317 else ssl.CERT_REQUIRED)
2318 context.set_default_verify_paths()
2319 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2320
2321
2322 def bug_reports_message():
2323 if ytdl_is_updateable():
2324 update_cmd = 'type youtube-dlc -U to update'
2325 else:
2326 update_cmd = 'see https://github.com/blackjack4494/yt-dlc on how to update'
2327 msg = '; please report this issue on https://github.com/blackjack4494/yt-dlc .'
2328 msg += ' Make sure you are using the latest version; %s.' % update_cmd
2329 msg += ' Be sure to call youtube-dlc with the --verbose flag and include its complete output.'
2330 return msg
2331
2332
2333 class YoutubeDLError(Exception):
2334 """Base exception for YoutubeDL errors."""
2335 pass
2336
2337
2338 class ExtractorError(YoutubeDLError):
2339 """Error during info extraction."""
2340
2341 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2342 """ tb, if given, is the original traceback (so that it can be printed out).
2343 If expected is set, this is a normal error message and most likely not a bug in youtube-dlc.
2344 """
2345
2346 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2347 expected = True
2348 if video_id is not None:
2349 msg = video_id + ': ' + msg
2350 if cause:
2351 msg += ' (caused by %r)' % cause
2352 if not expected:
2353 msg += bug_reports_message()
2354 super(ExtractorError, self).__init__(msg)
2355
2356 self.traceback = tb
2357 self.exc_info = sys.exc_info() # preserve original exception
2358 self.cause = cause
2359 self.video_id = video_id
2360
2361 def format_traceback(self):
2362 if self.traceback is None:
2363 return None
2364 return ''.join(traceback.format_tb(self.traceback))
2365
2366
2367 class UnsupportedError(ExtractorError):
2368 def __init__(self, url):
2369 super(UnsupportedError, self).__init__(
2370 'Unsupported URL: %s' % url, expected=True)
2371 self.url = url
2372
2373
2374 class RegexNotFoundError(ExtractorError):
2375 """Error when a regex didn't match"""
2376 pass
2377
2378
2379 class GeoRestrictedError(ExtractorError):
2380 """Geographic restriction Error exception.
2381
2382 This exception may be thrown when a video is not available from your
2383 geographic location due to geographic restrictions imposed by a website.
2384 """
2385
2386 def __init__(self, msg, countries=None):
2387 super(GeoRestrictedError, self).__init__(msg, expected=True)
2388 self.msg = msg
2389 self.countries = countries
2390
2391
2392 class DownloadError(YoutubeDLError):
2393 """Download Error exception.
2394
2395 This exception may be thrown by FileDownloader objects if they are not
2396 configured to continue on errors. They will contain the appropriate
2397 error message.
2398 """
2399
2400 def __init__(self, msg, exc_info=None):
2401 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2402 super(DownloadError, self).__init__(msg)
2403 self.exc_info = exc_info
2404
2405
2406 class SameFileError(YoutubeDLError):
2407 """Same File exception.
2408
2409 This exception will be thrown by FileDownloader objects if they detect
2410 multiple files would have to be downloaded to the same file on disk.
2411 """
2412 pass
2413
2414
2415 class PostProcessingError(YoutubeDLError):
2416 """Post Processing exception.
2417
2418 This exception may be raised by PostProcessor's .run() method to
2419 indicate an error in the postprocessing task.
2420 """
2421
2422 def __init__(self, msg):
2423 super(PostProcessingError, self).__init__(msg)
2424 self.msg = msg
2425
2426
2427 class MaxDownloadsReached(YoutubeDLError):
2428 """ --max-downloads limit has been reached. """
2429 pass
2430
2431
2432 class UnavailableVideoError(YoutubeDLError):
2433 """Unavailable Format exception.
2434
2435 This exception will be thrown when a video is requested
2436 in a format that is not available for that video.
2437 """
2438 pass
2439
2440
2441 class ContentTooShortError(YoutubeDLError):
2442 """Content Too Short exception.
2443
2444 This exception may be raised by FileDownloader objects when a file they
2445 download is too small for what the server announced first, indicating
2446 the connection was probably interrupted.
2447 """
2448
2449 def __init__(self, downloaded, expected):
2450 super(ContentTooShortError, self).__init__(
2451 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2452 )
2453 # Both in bytes
2454 self.downloaded = downloaded
2455 self.expected = expected
2456
2457
2458 class XAttrMetadataError(YoutubeDLError):
2459 def __init__(self, code=None, msg='Unknown error'):
2460 super(XAttrMetadataError, self).__init__(msg)
2461 self.code = code
2462 self.msg = msg
2463
2464 # Parsing code and msg
2465 if (self.code in (errno.ENOSPC, errno.EDQUOT)
2466 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2467 self.reason = 'NO_SPACE'
2468 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2469 self.reason = 'VALUE_TOO_LONG'
2470 else:
2471 self.reason = 'NOT_SUPPORTED'
2472
2473
2474 class XAttrUnavailableError(YoutubeDLError):
2475 pass
2476
2477
2478 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2479 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2480 # expected HTTP responses to meet HTTP/1.0 or later (see also
2481 # https://github.com/ytdl-org/youtube-dl/issues/6727)
2482 if sys.version_info < (3, 0):
2483 kwargs['strict'] = True
2484 hc = http_class(*args, **compat_kwargs(kwargs))
2485 source_address = ydl_handler._params.get('source_address')
2486
2487 if source_address is not None:
2488 # This is to workaround _create_connection() from socket where it will try all
2489 # address data from getaddrinfo() including IPv6. This filters the result from
2490 # getaddrinfo() based on the source_address value.
2491 # This is based on the cpython socket.create_connection() function.
2492 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2493 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2494 host, port = address
2495 err = None
2496 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2497 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2498 ip_addrs = [addr for addr in addrs if addr[0] == af]
2499 if addrs and not ip_addrs:
2500 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2501 raise socket.error(
2502 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2503 % (ip_version, source_address[0]))
2504 for res in ip_addrs:
2505 af, socktype, proto, canonname, sa = res
2506 sock = None
2507 try:
2508 sock = socket.socket(af, socktype, proto)
2509 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2510 sock.settimeout(timeout)
2511 sock.bind(source_address)
2512 sock.connect(sa)
2513 err = None # Explicitly break reference cycle
2514 return sock
2515 except socket.error as _:
2516 err = _
2517 if sock is not None:
2518 sock.close()
2519 if err is not None:
2520 raise err
2521 else:
2522 raise socket.error('getaddrinfo returns an empty list')
2523 if hasattr(hc, '_create_connection'):
2524 hc._create_connection = _create_connection
2525 sa = (source_address, 0)
2526 if hasattr(hc, 'source_address'): # Python 2.7+
2527 hc.source_address = sa
2528 else: # Python 2.6
2529 def _hc_connect(self, *args, **kwargs):
2530 sock = _create_connection(
2531 (self.host, self.port), self.timeout, sa)
2532 if is_https:
2533 self.sock = ssl.wrap_socket(
2534 sock, self.key_file, self.cert_file,
2535 ssl_version=ssl.PROTOCOL_TLSv1)
2536 else:
2537 self.sock = sock
2538 hc.connect = functools.partial(_hc_connect, hc)
2539
2540 return hc
2541
2542
2543 def handle_youtubedl_headers(headers):
2544 filtered_headers = headers
2545
2546 if 'Youtubedl-no-compression' in filtered_headers:
2547 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2548 del filtered_headers['Youtubedl-no-compression']
2549
2550 return filtered_headers
2551
2552
2553 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2554 """Handler for HTTP requests and responses.
2555
2556 This class, when installed with an OpenerDirector, automatically adds
2557 the standard headers to every HTTP request and handles gzipped and
2558 deflated responses from web servers. If compression is to be avoided in
2559 a particular request, the original request in the program code only has
2560 to include the HTTP header "Youtubedl-no-compression", which will be
2561 removed before making the real request.
2562
2563 Part of this code was copied from:
2564
2565 http://techknack.net/python-urllib2-handlers/
2566
2567 Andrew Rowls, the author of that code, agreed to release it to the
2568 public domain.
2569 """
2570
2571 def __init__(self, params, *args, **kwargs):
2572 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2573 self._params = params
2574
2575 def http_open(self, req):
2576 conn_class = compat_http_client.HTTPConnection
2577
2578 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2579 if socks_proxy:
2580 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2581 del req.headers['Ytdl-socks-proxy']
2582
2583 return self.do_open(functools.partial(
2584 _create_http_connection, self, conn_class, False),
2585 req)
2586
2587 @staticmethod
2588 def deflate(data):
2589 try:
2590 return zlib.decompress(data, -zlib.MAX_WBITS)
2591 except zlib.error:
2592 return zlib.decompress(data)
2593
2594 def http_request(self, req):
2595 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2596 # always respected by websites, some tend to give out URLs with non percent-encoded
2597 # non-ASCII characters (see telemb.py, ard.py [#3412])
2598 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2599 # To work around aforementioned issue we will replace request's original URL with
2600 # percent-encoded one
2601 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2602 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2603 url = req.get_full_url()
2604 url_escaped = escape_url(url)
2605
2606 # Substitute URL if any change after escaping
2607 if url != url_escaped:
2608 req = update_Request(req, url=url_escaped)
2609
2610 for h, v in std_headers.items():
2611 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2612 # The dict keys are capitalized because of this bug by urllib
2613 if h.capitalize() not in req.headers:
2614 req.add_header(h, v)
2615
2616 req.headers = handle_youtubedl_headers(req.headers)
2617
2618 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2619 # Python 2.6 is brain-dead when it comes to fragments
2620 req._Request__original = req._Request__original.partition('#')[0]
2621 req._Request__r_type = req._Request__r_type.partition('#')[0]
2622
2623 return req
2624
2625 def http_response(self, req, resp):
2626 old_resp = resp
2627 # gzip
2628 if resp.headers.get('Content-encoding', '') == 'gzip':
2629 content = resp.read()
2630 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2631 try:
2632 uncompressed = io.BytesIO(gz.read())
2633 except IOError as original_ioerror:
2634 # There may be junk add the end of the file
2635 # See http://stackoverflow.com/q/4928560/35070 for details
2636 for i in range(1, 1024):
2637 try:
2638 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2639 uncompressed = io.BytesIO(gz.read())
2640 except IOError:
2641 continue
2642 break
2643 else:
2644 raise original_ioerror
2645 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2646 resp.msg = old_resp.msg
2647 del resp.headers['Content-encoding']
2648 # deflate
2649 if resp.headers.get('Content-encoding', '') == 'deflate':
2650 gz = io.BytesIO(self.deflate(resp.read()))
2651 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2652 resp.msg = old_resp.msg
2653 del resp.headers['Content-encoding']
2654 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2655 # https://github.com/ytdl-org/youtube-dl/issues/6457).
2656 if 300 <= resp.code < 400:
2657 location = resp.headers.get('Location')
2658 if location:
2659 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2660 if sys.version_info >= (3, 0):
2661 location = location.encode('iso-8859-1').decode('utf-8')
2662 else:
2663 location = location.decode('utf-8')
2664 location_escaped = escape_url(location)
2665 if location != location_escaped:
2666 del resp.headers['Location']
2667 if sys.version_info < (3, 0):
2668 location_escaped = location_escaped.encode('utf-8')
2669 resp.headers['Location'] = location_escaped
2670 return resp
2671
2672 https_request = http_request
2673 https_response = http_response
2674
2675
2676 def make_socks_conn_class(base_class, socks_proxy):
2677 assert issubclass(base_class, (
2678 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2679
2680 url_components = compat_urlparse.urlparse(socks_proxy)
2681 if url_components.scheme.lower() == 'socks5':
2682 socks_type = ProxyType.SOCKS5
2683 elif url_components.scheme.lower() in ('socks', 'socks4'):
2684 socks_type = ProxyType.SOCKS4
2685 elif url_components.scheme.lower() == 'socks4a':
2686 socks_type = ProxyType.SOCKS4A
2687
2688 def unquote_if_non_empty(s):
2689 if not s:
2690 return s
2691 return compat_urllib_parse_unquote_plus(s)
2692
2693 proxy_args = (
2694 socks_type,
2695 url_components.hostname, url_components.port or 1080,
2696 True, # Remote DNS
2697 unquote_if_non_empty(url_components.username),
2698 unquote_if_non_empty(url_components.password),
2699 )
2700
2701 class SocksConnection(base_class):
2702 def connect(self):
2703 self.sock = sockssocket()
2704 self.sock.setproxy(*proxy_args)
2705 if type(self.timeout) in (int, float):
2706 self.sock.settimeout(self.timeout)
2707 self.sock.connect((self.host, self.port))
2708
2709 if isinstance(self, compat_http_client.HTTPSConnection):
2710 if hasattr(self, '_context'): # Python > 2.6
2711 self.sock = self._context.wrap_socket(
2712 self.sock, server_hostname=self.host)
2713 else:
2714 self.sock = ssl.wrap_socket(self.sock)
2715
2716 return SocksConnection
2717
2718
2719 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2720 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2721 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2722 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2723 self._params = params
2724
2725 def https_open(self, req):
2726 kwargs = {}
2727 conn_class = self._https_conn_class
2728
2729 if hasattr(self, '_context'): # python > 2.6
2730 kwargs['context'] = self._context
2731 if hasattr(self, '_check_hostname'): # python 3.x
2732 kwargs['check_hostname'] = self._check_hostname
2733
2734 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2735 if socks_proxy:
2736 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2737 del req.headers['Ytdl-socks-proxy']
2738
2739 return self.do_open(functools.partial(
2740 _create_http_connection, self, conn_class, True),
2741 req, **kwargs)
2742
2743
2744 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2745 """
2746 See [1] for cookie file format.
2747
2748 1. https://curl.haxx.se/docs/http-cookies.html
2749 """
2750 _HTTPONLY_PREFIX = '#HttpOnly_'
2751 _ENTRY_LEN = 7
2752 _HEADER = '''# Netscape HTTP Cookie File
2753 # This file is generated by youtube-dlc. Do not edit.
2754
2755 '''
2756 _CookieFileEntry = collections.namedtuple(
2757 'CookieFileEntry',
2758 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2759
2760 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2761 """
2762 Save cookies to a file.
2763
2764 Most of the code is taken from CPython 3.8 and slightly adapted
2765 to support cookie files with UTF-8 in both python 2 and 3.
2766 """
2767 if filename is None:
2768 if self.filename is not None:
2769 filename = self.filename
2770 else:
2771 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2772
2773 # Store session cookies with `expires` set to 0 instead of an empty
2774 # string
2775 for cookie in self:
2776 if cookie.expires is None:
2777 cookie.expires = 0
2778
2779 with io.open(filename, 'w', encoding='utf-8') as f:
2780 f.write(self._HEADER)
2781 now = time.time()
2782 for cookie in self:
2783 if not ignore_discard and cookie.discard:
2784 continue
2785 if not ignore_expires and cookie.is_expired(now):
2786 continue
2787 if cookie.secure:
2788 secure = 'TRUE'
2789 else:
2790 secure = 'FALSE'
2791 if cookie.domain.startswith('.'):
2792 initial_dot = 'TRUE'
2793 else:
2794 initial_dot = 'FALSE'
2795 if cookie.expires is not None:
2796 expires = compat_str(cookie.expires)
2797 else:
2798 expires = ''
2799 if cookie.value is None:
2800 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2801 # with no name, whereas http.cookiejar regards it as a
2802 # cookie with no value.
2803 name = ''
2804 value = cookie.name
2805 else:
2806 name = cookie.name
2807 value = cookie.value
2808 f.write(
2809 '\t'.join([cookie.domain, initial_dot, cookie.path,
2810 secure, expires, name, value]) + '\n')
2811
2812 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2813 """Load cookies from a file."""
2814 if filename is None:
2815 if self.filename is not None:
2816 filename = self.filename
2817 else:
2818 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2819
2820 def prepare_line(line):
2821 if line.startswith(self._HTTPONLY_PREFIX):
2822 line = line[len(self._HTTPONLY_PREFIX):]
2823 # comments and empty lines are fine
2824 if line.startswith('#') or not line.strip():
2825 return line
2826 cookie_list = line.split('\t')
2827 if len(cookie_list) != self._ENTRY_LEN:
2828 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2829 cookie = self._CookieFileEntry(*cookie_list)
2830 if cookie.expires_at and not cookie.expires_at.isdigit():
2831 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2832 return line
2833
2834 cf = io.StringIO()
2835 with io.open(filename, encoding='utf-8') as f:
2836 for line in f:
2837 try:
2838 cf.write(prepare_line(line))
2839 except compat_cookiejar.LoadError as e:
2840 write_string(
2841 'WARNING: skipping cookie file entry due to %s: %r\n'
2842 % (e, line), sys.stderr)
2843 continue
2844 cf.seek(0)
2845 self._really_load(cf, filename, ignore_discard, ignore_expires)
2846 # Session cookies are denoted by either `expires` field set to
2847 # an empty string or 0. MozillaCookieJar only recognizes the former
2848 # (see [1]). So we need force the latter to be recognized as session
2849 # cookies on our own.
2850 # Session cookies may be important for cookies-based authentication,
2851 # e.g. usually, when user does not check 'Remember me' check box while
2852 # logging in on a site, some important cookies are stored as session
2853 # cookies so that not recognizing them will result in failed login.
2854 # 1. https://bugs.python.org/issue17164
2855 for cookie in self:
2856 # Treat `expires=0` cookies as session cookies
2857 if cookie.expires == 0:
2858 cookie.expires = None
2859 cookie.discard = True
2860
2861
2862 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2863 def __init__(self, cookiejar=None):
2864 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2865
2866 def http_response(self, request, response):
2867 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2868 # characters in Set-Cookie HTTP header of last response (see
2869 # https://github.com/ytdl-org/youtube-dl/issues/6769).
2870 # In order to at least prevent crashing we will percent encode Set-Cookie
2871 # header before HTTPCookieProcessor starts processing it.
2872 # if sys.version_info < (3, 0) and response.headers:
2873 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2874 # set_cookie = response.headers.get(set_cookie_header)
2875 # if set_cookie:
2876 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2877 # if set_cookie != set_cookie_escaped:
2878 # del response.headers[set_cookie_header]
2879 # response.headers[set_cookie_header] = set_cookie_escaped
2880 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2881
2882 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2883 https_response = http_response
2884
2885
2886 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2887 if sys.version_info[0] < 3:
2888 def redirect_request(self, req, fp, code, msg, headers, newurl):
2889 # On python 2 urlh.geturl() may sometimes return redirect URL
2890 # as byte string instead of unicode. This workaround allows
2891 # to force it always return unicode.
2892 return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2893
2894
2895 def extract_timezone(date_str):
2896 m = re.search(
2897 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2898 date_str)
2899 if not m:
2900 timezone = datetime.timedelta()
2901 else:
2902 date_str = date_str[:-len(m.group('tz'))]
2903 if not m.group('sign'):
2904 timezone = datetime.timedelta()
2905 else:
2906 sign = 1 if m.group('sign') == '+' else -1
2907 timezone = datetime.timedelta(
2908 hours=sign * int(m.group('hours')),
2909 minutes=sign * int(m.group('minutes')))
2910 return timezone, date_str
2911
2912
2913 def parse_iso8601(date_str, delimiter='T', timezone=None):
2914 """ Return a UNIX timestamp from the given date """
2915
2916 if date_str is None:
2917 return None
2918
2919 date_str = re.sub(r'\.[0-9]+', '', date_str)
2920
2921 if timezone is None:
2922 timezone, date_str = extract_timezone(date_str)
2923
2924 try:
2925 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2926 dt = datetime.datetime.strptime(date_str, date_format) - timezone
2927 return calendar.timegm(dt.timetuple())
2928 except ValueError:
2929 pass
2930
2931
2932 def date_formats(day_first=True):
2933 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2934
2935
2936 def unified_strdate(date_str, day_first=True):
2937 """Return a string with the date in the format YYYYMMDD"""
2938
2939 if date_str is None:
2940 return None
2941 upload_date = None
2942 # Replace commas
2943 date_str = date_str.replace(',', ' ')
2944 # Remove AM/PM + timezone
2945 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2946 _, date_str = extract_timezone(date_str)
2947
2948 for expression in date_formats(day_first):
2949 try:
2950 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
2951 except ValueError:
2952 pass
2953 if upload_date is None:
2954 timetuple = email.utils.parsedate_tz(date_str)
2955 if timetuple:
2956 try:
2957 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2958 except ValueError:
2959 pass
2960 if upload_date is not None:
2961 return compat_str(upload_date)
2962
2963
2964 def unified_timestamp(date_str, day_first=True):
2965 if date_str is None:
2966 return None
2967
2968 date_str = re.sub(r'[,|]', '', date_str)
2969
2970 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
2971 timezone, date_str = extract_timezone(date_str)
2972
2973 # Remove AM/PM + timezone
2974 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2975
2976 # Remove unrecognized timezones from ISO 8601 alike timestamps
2977 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
2978 if m:
2979 date_str = date_str[:-len(m.group('tz'))]
2980
2981 # Python only supports microseconds, so remove nanoseconds
2982 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
2983 if m:
2984 date_str = m.group(1)
2985
2986 for expression in date_formats(day_first):
2987 try:
2988 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
2989 return calendar.timegm(dt.timetuple())
2990 except ValueError:
2991 pass
2992 timetuple = email.utils.parsedate_tz(date_str)
2993 if timetuple:
2994 return calendar.timegm(timetuple) + pm_delta * 3600
2995
2996
2997 def determine_ext(url, default_ext='unknown_video'):
2998 if url is None or '.' not in url:
2999 return default_ext
3000 guess = url.partition('?')[0].rpartition('.')[2]
3001 if re.match(r'^[A-Za-z0-9]+$', guess):
3002 return guess
3003 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3004 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3005 return guess.rstrip('/')
3006 else:
3007 return default_ext
3008
3009
3010 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3011 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3012
3013
3014 def date_from_str(date_str):
3015 """
3016 Return a datetime object from a string in the format YYYYMMDD or
3017 (now|today)[+-][0-9](day|week|month|year)(s)?"""
3018 today = datetime.date.today()
3019 if date_str in ('now', 'today'):
3020 return today
3021 if date_str == 'yesterday':
3022 return today - datetime.timedelta(days=1)
3023 match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
3024 if match is not None:
3025 sign = match.group('sign')
3026 time = int(match.group('time'))
3027 if sign == '-':
3028 time = -time
3029 unit = match.group('unit')
3030 # A bad approximation?
3031 if unit == 'month':
3032 unit = 'day'
3033 time *= 30
3034 elif unit == 'year':
3035 unit = 'day'
3036 time *= 365
3037 unit += 's'
3038 delta = datetime.timedelta(**{unit: time})
3039 return today + delta
3040 return datetime.datetime.strptime(date_str, '%Y%m%d').date()
3041
3042
3043 def hyphenate_date(date_str):
3044 """
3045 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3046 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3047 if match is not None:
3048 return '-'.join(match.groups())
3049 else:
3050 return date_str
3051
3052
3053 class DateRange(object):
3054 """Represents a time interval between two dates"""
3055
3056 def __init__(self, start=None, end=None):
3057 """start and end must be strings in the format accepted by date"""
3058 if start is not None:
3059 self.start = date_from_str(start)
3060 else:
3061 self.start = datetime.datetime.min.date()
3062 if end is not None:
3063 self.end = date_from_str(end)
3064 else:
3065 self.end = datetime.datetime.max.date()
3066 if self.start > self.end:
3067 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3068
3069 @classmethod
3070 def day(cls, day):
3071 """Returns a range that only contains the given day"""
3072 return cls(day, day)
3073
3074 def __contains__(self, date):
3075 """Check if the date is in the range"""
3076 if not isinstance(date, datetime.date):
3077 date = date_from_str(date)
3078 return self.start <= date <= self.end
3079
3080 def __str__(self):
3081 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3082
3083
3084 def platform_name():
3085 """ Returns the platform name as a compat_str """
3086 res = platform.platform()
3087 if isinstance(res, bytes):
3088 res = res.decode(preferredencoding())
3089
3090 assert isinstance(res, compat_str)
3091 return res
3092
3093
3094 def _windows_write_string(s, out):
3095 """ Returns True if the string was written using special methods,
3096 False if it has yet to be written out."""
3097 # Adapted from http://stackoverflow.com/a/3259271/35070
3098
3099 import ctypes
3100 import ctypes.wintypes
3101
3102 WIN_OUTPUT_IDS = {
3103 1: -11,
3104 2: -12,
3105 }
3106
3107 try:
3108 fileno = out.fileno()
3109 except AttributeError:
3110 # If the output stream doesn't have a fileno, it's virtual
3111 return False
3112 except io.UnsupportedOperation:
3113 # Some strange Windows pseudo files?
3114 return False
3115 if fileno not in WIN_OUTPUT_IDS:
3116 return False
3117
3118 GetStdHandle = compat_ctypes_WINFUNCTYPE(
3119 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3120 ('GetStdHandle', ctypes.windll.kernel32))
3121 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3122
3123 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3124 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3125 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3126 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3127 written = ctypes.wintypes.DWORD(0)
3128
3129 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3130 FILE_TYPE_CHAR = 0x0002
3131 FILE_TYPE_REMOTE = 0x8000
3132 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3133 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3134 ctypes.POINTER(ctypes.wintypes.DWORD))(
3135 ('GetConsoleMode', ctypes.windll.kernel32))
3136 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3137
3138 def not_a_console(handle):
3139 if handle == INVALID_HANDLE_VALUE or handle is None:
3140 return True
3141 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3142 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3143
3144 if not_a_console(h):
3145 return False
3146
3147 def next_nonbmp_pos(s):
3148 try:
3149 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3150 except StopIteration:
3151 return len(s)
3152
3153 while s:
3154 count = min(next_nonbmp_pos(s), 1024)
3155
3156 ret = WriteConsoleW(
3157 h, s, count if count else 2, ctypes.byref(written), None)
3158 if ret == 0:
3159 raise OSError('Failed to write string')
3160 if not count: # We just wrote a non-BMP character
3161 assert written.value == 2
3162 s = s[1:]
3163 else:
3164 assert written.value > 0
3165 s = s[written.value:]
3166 return True
3167
3168
3169 def write_string(s, out=None, encoding=None):
3170 if out is None:
3171 out = sys.stderr
3172 assert type(s) == compat_str
3173
3174 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3175 if _windows_write_string(s, out):
3176 return
3177
3178 if ('b' in getattr(out, 'mode', '')
3179 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
3180 byt = s.encode(encoding or preferredencoding(), 'ignore')
3181 out.write(byt)
3182 elif hasattr(out, 'buffer'):
3183 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3184 byt = s.encode(enc, 'ignore')
3185 out.buffer.write(byt)
3186 else:
3187 out.write(s)
3188 out.flush()
3189
3190
3191 def bytes_to_intlist(bs):
3192 if not bs:
3193 return []
3194 if isinstance(bs[0], int): # Python 3
3195 return list(bs)
3196 else:
3197 return [ord(c) for c in bs]
3198
3199
3200 def intlist_to_bytes(xs):
3201 if not xs:
3202 return b''
3203 return compat_struct_pack('%dB' % len(xs), *xs)
3204
3205
3206 # Cross-platform file locking
3207 if sys.platform == 'win32':
3208 import ctypes.wintypes
3209 import msvcrt
3210
3211 class OVERLAPPED(ctypes.Structure):
3212 _fields_ = [
3213 ('Internal', ctypes.wintypes.LPVOID),
3214 ('InternalHigh', ctypes.wintypes.LPVOID),
3215 ('Offset', ctypes.wintypes.DWORD),
3216 ('OffsetHigh', ctypes.wintypes.DWORD),
3217 ('hEvent', ctypes.wintypes.HANDLE),
3218 ]
3219
3220 kernel32 = ctypes.windll.kernel32
3221 LockFileEx = kernel32.LockFileEx
3222 LockFileEx.argtypes = [
3223 ctypes.wintypes.HANDLE, # hFile
3224 ctypes.wintypes.DWORD, # dwFlags
3225 ctypes.wintypes.DWORD, # dwReserved
3226 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3227 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3228 ctypes.POINTER(OVERLAPPED) # Overlapped
3229 ]
3230 LockFileEx.restype = ctypes.wintypes.BOOL
3231 UnlockFileEx = kernel32.UnlockFileEx
3232 UnlockFileEx.argtypes = [
3233 ctypes.wintypes.HANDLE, # hFile
3234 ctypes.wintypes.DWORD, # dwReserved
3235 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3236 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3237 ctypes.POINTER(OVERLAPPED) # Overlapped
3238 ]
3239 UnlockFileEx.restype = ctypes.wintypes.BOOL
3240 whole_low = 0xffffffff
3241 whole_high = 0x7fffffff
3242
3243 def _lock_file(f, exclusive):
3244 overlapped = OVERLAPPED()
3245 overlapped.Offset = 0
3246 overlapped.OffsetHigh = 0
3247 overlapped.hEvent = 0
3248 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3249 handle = msvcrt.get_osfhandle(f.fileno())
3250 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3251 whole_low, whole_high, f._lock_file_overlapped_p):
3252 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3253
3254 def _unlock_file(f):
3255 assert f._lock_file_overlapped_p
3256 handle = msvcrt.get_osfhandle(f.fileno())
3257 if not UnlockFileEx(handle, 0,
3258 whole_low, whole_high, f._lock_file_overlapped_p):
3259 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3260
3261 else:
3262 # Some platforms, such as Jython, is missing fcntl
3263 try:
3264 import fcntl
3265
3266 def _lock_file(f, exclusive):
3267 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3268
3269 def _unlock_file(f):
3270 fcntl.flock(f, fcntl.LOCK_UN)
3271 except ImportError:
3272 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3273
3274 def _lock_file(f, exclusive):
3275 raise IOError(UNSUPPORTED_MSG)
3276
3277 def _unlock_file(f):
3278 raise IOError(UNSUPPORTED_MSG)
3279
3280
3281 class locked_file(object):
3282 def __init__(self, filename, mode, encoding=None):
3283 assert mode in ['r', 'a', 'w']
3284 self.f = io.open(filename, mode, encoding=encoding)
3285 self.mode = mode
3286
3287 def __enter__(self):
3288 exclusive = self.mode != 'r'
3289 try:
3290 _lock_file(self.f, exclusive)
3291 except IOError:
3292 self.f.close()
3293 raise
3294 return self
3295
3296 def __exit__(self, etype, value, traceback):
3297 try:
3298 _unlock_file(self.f)
3299 finally:
3300 self.f.close()
3301
3302 def __iter__(self):
3303 return iter(self.f)
3304
3305 def write(self, *args):
3306 return self.f.write(*args)
3307
3308 def read(self, *args):
3309 return self.f.read(*args)
3310
3311
3312 def get_filesystem_encoding():
3313 encoding = sys.getfilesystemencoding()
3314 return encoding if encoding is not None else 'utf-8'
3315
3316
3317 def shell_quote(args):
3318 quoted_args = []
3319 encoding = get_filesystem_encoding()
3320 for a in args:
3321 if isinstance(a, bytes):
3322 # We may get a filename encoded with 'encodeFilename'
3323 a = a.decode(encoding)
3324 quoted_args.append(compat_shlex_quote(a))
3325 return ' '.join(quoted_args)
3326
3327
3328 def smuggle_url(url, data):
3329 """ Pass additional data in a URL for internal use. """
3330
3331 url, idata = unsmuggle_url(url, {})
3332 data.update(idata)
3333 sdata = compat_urllib_parse_urlencode(
3334 {'__youtubedl_smuggle': json.dumps(data)})
3335 return url + '#' + sdata
3336
3337
3338 def unsmuggle_url(smug_url, default=None):
3339 if '#__youtubedl_smuggle' not in smug_url:
3340 return smug_url, default
3341 url, _, sdata = smug_url.rpartition('#')
3342 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3343 data = json.loads(jsond)
3344 return url, data
3345
3346
3347 def format_bytes(bytes):
3348 if bytes is None:
3349 return 'N/A'
3350 if type(bytes) is str:
3351 bytes = float(bytes)
3352 if bytes == 0.0:
3353 exponent = 0
3354 else:
3355 exponent = int(math.log(bytes, 1024.0))
3356 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3357 converted = float(bytes) / float(1024 ** exponent)
3358 return '%.2f%s' % (converted, suffix)
3359
3360
3361 def lookup_unit_table(unit_table, s):
3362 units_re = '|'.join(re.escape(u) for u in unit_table)
3363 m = re.match(
3364 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3365 if not m:
3366 return None
3367 num_str = m.group('num').replace(',', '.')
3368 mult = unit_table[m.group('unit')]
3369 return int(float(num_str) * mult)
3370
3371
3372 def parse_filesize(s):
3373 if s is None:
3374 return None
3375
3376 # The lower-case forms are of course incorrect and unofficial,
3377 # but we support those too
3378 _UNIT_TABLE = {
3379 'B': 1,
3380 'b': 1,
3381 'bytes': 1,
3382 'KiB': 1024,
3383 'KB': 1000,
3384 'kB': 1024,
3385 'Kb': 1000,
3386 'kb': 1000,
3387 'kilobytes': 1000,
3388 'kibibytes': 1024,
3389 'MiB': 1024 ** 2,
3390 'MB': 1000 ** 2,
3391 'mB': 1024 ** 2,
3392 'Mb': 1000 ** 2,
3393 'mb': 1000 ** 2,
3394 'megabytes': 1000 ** 2,
3395 'mebibytes': 1024 ** 2,
3396 'GiB': 1024 ** 3,
3397 'GB': 1000 ** 3,
3398 'gB': 1024 ** 3,
3399 'Gb': 1000 ** 3,
3400 'gb': 1000 ** 3,
3401 'gigabytes': 1000 ** 3,
3402 'gibibytes': 1024 ** 3,
3403 'TiB': 1024 ** 4,
3404 'TB': 1000 ** 4,
3405 'tB': 1024 ** 4,
3406 'Tb': 1000 ** 4,
3407 'tb': 1000 ** 4,
3408 'terabytes': 1000 ** 4,
3409 'tebibytes': 1024 ** 4,
3410 'PiB': 1024 ** 5,
3411 'PB': 1000 ** 5,
3412 'pB': 1024 ** 5,
3413 'Pb': 1000 ** 5,
3414 'pb': 1000 ** 5,
3415 'petabytes': 1000 ** 5,
3416 'pebibytes': 1024 ** 5,
3417 'EiB': 1024 ** 6,
3418 'EB': 1000 ** 6,
3419 'eB': 1024 ** 6,
3420 'Eb': 1000 ** 6,
3421 'eb': 1000 ** 6,
3422 'exabytes': 1000 ** 6,
3423 'exbibytes': 1024 ** 6,
3424 'ZiB': 1024 ** 7,
3425 'ZB': 1000 ** 7,
3426 'zB': 1024 ** 7,
3427 'Zb': 1000 ** 7,
3428 'zb': 1000 ** 7,
3429 'zettabytes': 1000 ** 7,
3430 'zebibytes': 1024 ** 7,
3431 'YiB': 1024 ** 8,
3432 'YB': 1000 ** 8,
3433 'yB': 1024 ** 8,
3434 'Yb': 1000 ** 8,
3435 'yb': 1000 ** 8,
3436 'yottabytes': 1000 ** 8,
3437 'yobibytes': 1024 ** 8,
3438 }
3439
3440 return lookup_unit_table(_UNIT_TABLE, s)
3441
3442
3443 def parse_count(s):
3444 if s is None:
3445 return None
3446
3447 s = s.strip()
3448
3449 if re.match(r'^[\d,.]+$', s):
3450 return str_to_int(s)
3451
3452 _UNIT_TABLE = {
3453 'k': 1000,
3454 'K': 1000,
3455 'm': 1000 ** 2,
3456 'M': 1000 ** 2,
3457 'kk': 1000 ** 2,
3458 'KK': 1000 ** 2,
3459 }
3460
3461 return lookup_unit_table(_UNIT_TABLE, s)
3462
3463
3464 def parse_resolution(s):
3465 if s is None:
3466 return {}
3467
3468 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3469 if mobj:
3470 return {
3471 'width': int(mobj.group('w')),
3472 'height': int(mobj.group('h')),
3473 }
3474
3475 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3476 if mobj:
3477 return {'height': int(mobj.group(1))}
3478
3479 mobj = re.search(r'\b([48])[kK]\b', s)
3480 if mobj:
3481 return {'height': int(mobj.group(1)) * 540}
3482
3483 return {}
3484
3485
3486 def parse_bitrate(s):
3487 if not isinstance(s, compat_str):
3488 return
3489 mobj = re.search(r'\b(\d+)\s*kbps', s)
3490 if mobj:
3491 return int(mobj.group(1))
3492
3493
3494 def month_by_name(name, lang='en'):
3495 """ Return the number of a month by (locale-independently) English name """
3496
3497 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3498
3499 try:
3500 return month_names.index(name) + 1
3501 except ValueError:
3502 return None
3503
3504
3505 def month_by_abbreviation(abbrev):
3506 """ Return the number of a month by (locale-independently) English
3507 abbreviations """
3508
3509 try:
3510 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3511 except ValueError:
3512 return None
3513
3514
3515 def fix_xml_ampersands(xml_str):
3516 """Replace all the '&' by '&amp;' in XML"""
3517 return re.sub(
3518 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3519 '&amp;',
3520 xml_str)
3521
3522
3523 def setproctitle(title):
3524 assert isinstance(title, compat_str)
3525
3526 # ctypes in Jython is not complete
3527 # http://bugs.jython.org/issue2148
3528 if sys.platform.startswith('java'):
3529 return
3530
3531 try:
3532 libc = ctypes.cdll.LoadLibrary('libc.so.6')
3533 except OSError:
3534 return
3535 except TypeError:
3536 # LoadLibrary in Windows Python 2.7.13 only expects
3537 # a bytestring, but since unicode_literals turns
3538 # every string into a unicode string, it fails.
3539 return
3540 title_bytes = title.encode('utf-8')
3541 buf = ctypes.create_string_buffer(len(title_bytes))
3542 buf.value = title_bytes
3543 try:
3544 libc.prctl(15, buf, 0, 0, 0)
3545 except AttributeError:
3546 return # Strange libc, just skip this
3547
3548
3549 def remove_start(s, start):
3550 return s[len(start):] if s is not None and s.startswith(start) else s
3551
3552
3553 def remove_end(s, end):
3554 return s[:-len(end)] if s is not None and s.endswith(end) else s
3555
3556
3557 def remove_quotes(s):
3558 if s is None or len(s) < 2:
3559 return s
3560 for quote in ('"', "'", ):
3561 if s[0] == quote and s[-1] == quote:
3562 return s[1:-1]
3563 return s
3564
3565
3566 def get_domain(url):
3567 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3568 return domain.group('domain') if domain else None
3569
3570
3571 def url_basename(url):
3572 path = compat_urlparse.urlparse(url).path
3573 return path.strip('/').split('/')[-1]
3574
3575
3576 def base_url(url):
3577 return re.match(r'https?://[^?#&]+/', url).group()
3578
3579
3580 def urljoin(base, path):
3581 if isinstance(path, bytes):
3582 path = path.decode('utf-8')
3583 if not isinstance(path, compat_str) or not path:
3584 return None
3585 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3586 return path
3587 if isinstance(base, bytes):
3588 base = base.decode('utf-8')
3589 if not isinstance(base, compat_str) or not re.match(
3590 r'^(?:https?:)?//', base):
3591 return None
3592 return compat_urlparse.urljoin(base, path)
3593
3594
3595 class HEADRequest(compat_urllib_request.Request):
3596 def get_method(self):
3597 return 'HEAD'
3598
3599
3600 class PUTRequest(compat_urllib_request.Request):
3601 def get_method(self):
3602 return 'PUT'
3603
3604
3605 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3606 if get_attr:
3607 if v is not None:
3608 v = getattr(v, get_attr, None)
3609 if v == '':
3610 v = None
3611 if v is None:
3612 return default
3613 try:
3614 return int(v) * invscale // scale
3615 except (ValueError, TypeError):
3616 return default
3617
3618
3619 def str_or_none(v, default=None):
3620 return default if v is None else compat_str(v)
3621
3622
3623 def str_to_int(int_str):
3624 """ A more relaxed version of int_or_none """
3625 if isinstance(int_str, compat_integer_types):
3626 return int_str
3627 elif isinstance(int_str, compat_str):
3628 int_str = re.sub(r'[,\.\+]', '', int_str)
3629 return int_or_none(int_str)
3630
3631
3632 def float_or_none(v, scale=1, invscale=1, default=None):
3633 if v is None:
3634 return default
3635 try:
3636 return float(v) * invscale / scale
3637 except (ValueError, TypeError):
3638 return default
3639
3640
3641 def bool_or_none(v, default=None):
3642 return v if isinstance(v, bool) else default
3643
3644
3645 def strip_or_none(v, default=None):
3646 return v.strip() if isinstance(v, compat_str) else default
3647
3648
3649 def url_or_none(url):
3650 if not url or not isinstance(url, compat_str):
3651 return None
3652 url = url.strip()
3653 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3654
3655
3656 def parse_duration(s):
3657 if not isinstance(s, compat_basestring):
3658 return None
3659
3660 s = s.strip()
3661
3662 days, hours, mins, secs, ms = [None] * 5
3663 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3664 if m:
3665 days, hours, mins, secs, ms = m.groups()
3666 else:
3667 m = re.match(
3668 r'''(?ix)(?:P?
3669 (?:
3670 [0-9]+\s*y(?:ears?)?\s*
3671 )?
3672 (?:
3673 [0-9]+\s*m(?:onths?)?\s*
3674 )?
3675 (?:
3676 [0-9]+\s*w(?:eeks?)?\s*
3677 )?
3678 (?:
3679 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3680 )?
3681 T)?
3682 (?:
3683 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3684 )?
3685 (?:
3686 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3687 )?
3688 (?:
3689 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3690 )?Z?$''', s)
3691 if m:
3692 days, hours, mins, secs, ms = m.groups()
3693 else:
3694 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3695 if m:
3696 hours, mins = m.groups()
3697 else:
3698 return None
3699
3700 duration = 0
3701 if secs:
3702 duration += float(secs)
3703 if mins:
3704 duration += float(mins) * 60
3705 if hours:
3706 duration += float(hours) * 60 * 60
3707 if days:
3708 duration += float(days) * 24 * 60 * 60
3709 if ms:
3710 duration += float(ms)
3711 return duration
3712
3713
3714 def prepend_extension(filename, ext, expected_real_ext=None):
3715 name, real_ext = os.path.splitext(filename)
3716 return (
3717 '{0}.{1}{2}'.format(name, ext, real_ext)
3718 if not expected_real_ext or real_ext[1:] == expected_real_ext
3719 else '{0}.{1}'.format(filename, ext))
3720
3721
3722 def replace_extension(filename, ext, expected_real_ext=None):
3723 name, real_ext = os.path.splitext(filename)
3724 return '{0}.{1}'.format(
3725 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3726 ext)
3727
3728
3729 def check_executable(exe, args=[]):
3730 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3731 args can be a list of arguments for a short output (like -version) """
3732 try:
3733 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
3734 except OSError:
3735 return False
3736 return exe
3737
3738
3739 def get_exe_version(exe, args=['--version'],
3740 version_re=None, unrecognized='present'):
3741 """ Returns the version of the specified executable,
3742 or False if the executable is not present """
3743 try:
3744 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3745 # SIGTTOU if youtube-dlc is run in the background.
3746 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3747 out, _ = subprocess.Popen(
3748 [encodeArgument(exe)] + args,
3749 stdin=subprocess.PIPE,
3750 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
3751 except OSError:
3752 return False
3753 if isinstance(out, bytes): # Python 2.x
3754 out = out.decode('ascii', 'ignore')
3755 return detect_exe_version(out, version_re, unrecognized)
3756
3757
3758 def detect_exe_version(output, version_re=None, unrecognized='present'):
3759 assert isinstance(output, compat_str)
3760 if version_re is None:
3761 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3762 m = re.search(version_re, output)
3763 if m:
3764 return m.group(1)
3765 else:
3766 return unrecognized
3767
3768
3769 class PagedList(object):
3770 def __len__(self):
3771 # This is only useful for tests
3772 return len(self.getslice())
3773
3774
3775 class OnDemandPagedList(PagedList):
3776 def __init__(self, pagefunc, pagesize, use_cache=True):
3777 self._pagefunc = pagefunc
3778 self._pagesize = pagesize
3779 self._use_cache = use_cache
3780 if use_cache:
3781 self._cache = {}
3782
3783 def getslice(self, start=0, end=None):
3784 res = []
3785 for pagenum in itertools.count(start // self._pagesize):
3786 firstid = pagenum * self._pagesize
3787 nextfirstid = pagenum * self._pagesize + self._pagesize
3788 if start >= nextfirstid:
3789 continue
3790
3791 page_results = None
3792 if self._use_cache:
3793 page_results = self._cache.get(pagenum)
3794 if page_results is None:
3795 page_results = list(self._pagefunc(pagenum))
3796 if self._use_cache:
3797 self._cache[pagenum] = page_results
3798
3799 startv = (
3800 start % self._pagesize
3801 if firstid <= start < nextfirstid
3802 else 0)
3803
3804 endv = (
3805 ((end - 1) % self._pagesize) + 1
3806 if (end is not None and firstid <= end <= nextfirstid)
3807 else None)
3808
3809 if startv != 0 or endv is not None:
3810 page_results = page_results[startv:endv]
3811 res.extend(page_results)
3812
3813 # A little optimization - if current page is not "full", ie. does
3814 # not contain page_size videos then we can assume that this page
3815 # is the last one - there are no more ids on further pages -
3816 # i.e. no need to query again.
3817 if len(page_results) + startv < self._pagesize:
3818 break
3819
3820 # If we got the whole page, but the next page is not interesting,
3821 # break out early as well
3822 if end == nextfirstid:
3823 break
3824 return res
3825
3826
3827 class InAdvancePagedList(PagedList):
3828 def __init__(self, pagefunc, pagecount, pagesize):
3829 self._pagefunc = pagefunc
3830 self._pagecount = pagecount
3831 self._pagesize = pagesize
3832
3833 def getslice(self, start=0, end=None):
3834 res = []
3835 start_page = start // self._pagesize
3836 end_page = (
3837 self._pagecount if end is None else (end // self._pagesize + 1))
3838 skip_elems = start - start_page * self._pagesize
3839 only_more = None if end is None else end - start
3840 for pagenum in range(start_page, end_page):
3841 page = list(self._pagefunc(pagenum))
3842 if skip_elems:
3843 page = page[skip_elems:]
3844 skip_elems = None
3845 if only_more is not None:
3846 if len(page) < only_more:
3847 only_more -= len(page)
3848 else:
3849 page = page[:only_more]
3850 res.extend(page)
3851 break
3852 res.extend(page)
3853 return res
3854
3855
3856 def uppercase_escape(s):
3857 unicode_escape = codecs.getdecoder('unicode_escape')
3858 return re.sub(
3859 r'\\U[0-9a-fA-F]{8}',
3860 lambda m: unicode_escape(m.group(0))[0],
3861 s)
3862
3863
3864 def lowercase_escape(s):
3865 unicode_escape = codecs.getdecoder('unicode_escape')
3866 return re.sub(
3867 r'\\u[0-9a-fA-F]{4}',
3868 lambda m: unicode_escape(m.group(0))[0],
3869 s)
3870
3871
3872 def escape_rfc3986(s):
3873 """Escape non-ASCII characters as suggested by RFC 3986"""
3874 if sys.version_info < (3, 0) and isinstance(s, compat_str):
3875 s = s.encode('utf-8')
3876 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3877
3878
3879 def escape_url(url):
3880 """Escape URL as suggested by RFC 3986"""
3881 url_parsed = compat_urllib_parse_urlparse(url)
3882 return url_parsed._replace(
3883 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3884 path=escape_rfc3986(url_parsed.path),
3885 params=escape_rfc3986(url_parsed.params),
3886 query=escape_rfc3986(url_parsed.query),
3887 fragment=escape_rfc3986(url_parsed.fragment)
3888 ).geturl()
3889
3890
3891 def read_batch_urls(batch_fd):
3892 def fixup(url):
3893 if not isinstance(url, compat_str):
3894 url = url.decode('utf-8', 'replace')
3895 BOM_UTF8 = '\xef\xbb\xbf'
3896 if url.startswith(BOM_UTF8):
3897 url = url[len(BOM_UTF8):]
3898 url = url.strip()
3899 if url.startswith(('#', ';', ']')):
3900 return False
3901 return url
3902
3903 with contextlib.closing(batch_fd) as fd:
3904 return [url for url in map(fixup, fd) if url]
3905
3906
3907 def urlencode_postdata(*args, **kargs):
3908 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
3909
3910
3911 def update_url_query(url, query):
3912 if not query:
3913 return url
3914 parsed_url = compat_urlparse.urlparse(url)
3915 qs = compat_parse_qs(parsed_url.query)
3916 qs.update(query)
3917 return compat_urlparse.urlunparse(parsed_url._replace(
3918 query=compat_urllib_parse_urlencode(qs, True)))
3919
3920
3921 def update_Request(req, url=None, data=None, headers={}, query={}):
3922 req_headers = req.headers.copy()
3923 req_headers.update(headers)
3924 req_data = data or req.data
3925 req_url = update_url_query(url or req.get_full_url(), query)
3926 req_get_method = req.get_method()
3927 if req_get_method == 'HEAD':
3928 req_type = HEADRequest
3929 elif req_get_method == 'PUT':
3930 req_type = PUTRequest
3931 else:
3932 req_type = compat_urllib_request.Request
3933 new_req = req_type(
3934 req_url, data=req_data, headers=req_headers,
3935 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3936 if hasattr(req, 'timeout'):
3937 new_req.timeout = req.timeout
3938 return new_req
3939
3940
3941 def _multipart_encode_impl(data, boundary):
3942 content_type = 'multipart/form-data; boundary=%s' % boundary
3943
3944 out = b''
3945 for k, v in data.items():
3946 out += b'--' + boundary.encode('ascii') + b'\r\n'
3947 if isinstance(k, compat_str):
3948 k = k.encode('utf-8')
3949 if isinstance(v, compat_str):
3950 v = v.encode('utf-8')
3951 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3952 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
3953 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
3954 if boundary.encode('ascii') in content:
3955 raise ValueError('Boundary overlaps with data')
3956 out += content
3957
3958 out += b'--' + boundary.encode('ascii') + b'--\r\n'
3959
3960 return out, content_type
3961
3962
3963 def multipart_encode(data, boundary=None):
3964 '''
3965 Encode a dict to RFC 7578-compliant form-data
3966
3967 data:
3968 A dict where keys and values can be either Unicode or bytes-like
3969 objects.
3970 boundary:
3971 If specified a Unicode object, it's used as the boundary. Otherwise
3972 a random boundary is generated.
3973
3974 Reference: https://tools.ietf.org/html/rfc7578
3975 '''
3976 has_specified_boundary = boundary is not None
3977
3978 while True:
3979 if boundary is None:
3980 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3981
3982 try:
3983 out, content_type = _multipart_encode_impl(data, boundary)
3984 break
3985 except ValueError:
3986 if has_specified_boundary:
3987 raise
3988 boundary = None
3989
3990 return out, content_type
3991
3992
3993 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
3994 if isinstance(key_or_keys, (list, tuple)):
3995 for key in key_or_keys:
3996 if key not in d or d[key] is None or skip_false_values and not d[key]:
3997 continue
3998 return d[key]
3999 return default
4000 return d.get(key_or_keys, default)
4001
4002
4003 def try_get(src, getter, expected_type=None):
4004 if not isinstance(getter, (list, tuple)):
4005 getter = [getter]
4006 for get in getter:
4007 try:
4008 v = get(src)
4009 except (AttributeError, KeyError, TypeError, IndexError):
4010 pass
4011 else:
4012 if expected_type is None or isinstance(v, expected_type):
4013 return v
4014
4015
4016 def merge_dicts(*dicts):
4017 merged = {}
4018 for a_dict in dicts:
4019 for k, v in a_dict.items():
4020 if v is None:
4021 continue
4022 if (k not in merged
4023 or (isinstance(v, compat_str) and v
4024 and isinstance(merged[k], compat_str)
4025 and not merged[k])):
4026 merged[k] = v
4027 return merged
4028
4029
4030 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4031 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4032
4033
4034 US_RATINGS = {
4035 'G': 0,
4036 'PG': 10,
4037 'PG-13': 13,
4038 'R': 16,
4039 'NC': 18,
4040 }
4041
4042
4043 TV_PARENTAL_GUIDELINES = {
4044 'TV-Y': 0,
4045 'TV-Y7': 7,
4046 'TV-G': 0,
4047 'TV-PG': 0,
4048 'TV-14': 14,
4049 'TV-MA': 17,
4050 }
4051
4052
4053 def parse_age_limit(s):
4054 if type(s) == int:
4055 return s if 0 <= s <= 21 else None
4056 if not isinstance(s, compat_basestring):
4057 return None
4058 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4059 if m:
4060 return int(m.group('age'))
4061 if s in US_RATINGS:
4062 return US_RATINGS[s]
4063 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4064 if m:
4065 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4066 return None
4067
4068
4069 def strip_jsonp(code):
4070 return re.sub(
4071 r'''(?sx)^
4072 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4073 (?:\s*&&\s*(?P=func_name))?
4074 \s*\(\s*(?P<callback_data>.*)\);?
4075 \s*?(?://[^\n]*)*$''',
4076 r'\g<callback_data>', code)
4077
4078
4079 def js_to_json(code):
4080 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4081 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4082 INTEGER_TABLE = (
4083 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4084 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4085 )
4086
4087 def fix_kv(m):
4088 v = m.group(0)
4089 if v in ('true', 'false', 'null'):
4090 return v
4091 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4092 return ""
4093
4094 if v[0] in ("'", '"'):
4095 v = re.sub(r'(?s)\\.|"', lambda m: {
4096 '"': '\\"',
4097 "\\'": "'",
4098 '\\\n': '',
4099 '\\x': '\\u00',
4100 }.get(m.group(0), m.group(0)), v[1:-1])
4101 else:
4102 for regex, base in INTEGER_TABLE:
4103 im = re.match(regex, v)
4104 if im:
4105 i = int(im.group(1), base)
4106 return '"%d":' % i if v.endswith(':') else '%d' % i
4107
4108 return '"%s"' % v
4109
4110 return re.sub(r'''(?sx)
4111 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4112 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4113 {comment}|,(?={skip}[\]}}])|
4114 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4115 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4116 [0-9]+(?={skip}:)|
4117 !+
4118 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4119
4120
4121 def qualities(quality_ids):
4122 """ Get a numeric quality value out of a list of possible values """
4123 def q(qid):
4124 try:
4125 return quality_ids.index(qid)
4126 except ValueError:
4127 return -1
4128 return q
4129
4130
4131 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
4132
4133
4134 def limit_length(s, length):
4135 """ Add ellipses to overly long strings """
4136 if s is None:
4137 return None
4138 ELLIPSES = '...'
4139 if len(s) > length:
4140 return s[:length - len(ELLIPSES)] + ELLIPSES
4141 return s
4142
4143
4144 def version_tuple(v):
4145 return tuple(int(e) for e in re.split(r'[-.]', v))
4146
4147
4148 def is_outdated_version(version, limit, assume_new=True):
4149 if not version:
4150 return not assume_new
4151 try:
4152 return version_tuple(version) < version_tuple(limit)
4153 except ValueError:
4154 return not assume_new
4155
4156
4157 def ytdl_is_updateable():
4158 """ Returns if youtube-dlc can be updated with -U """
4159 from zipimport import zipimporter
4160
4161 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4162
4163
4164 def args_to_str(args):
4165 # Get a short string representation for a subprocess command
4166 return ' '.join(compat_shlex_quote(a) for a in args)
4167
4168
4169 def error_to_compat_str(err):
4170 err_str = str(err)
4171 # On python 2 error byte string must be decoded with proper
4172 # encoding rather than ascii
4173 if sys.version_info[0] < 3:
4174 err_str = err_str.decode(preferredencoding())
4175 return err_str
4176
4177
4178 def mimetype2ext(mt):
4179 if mt is None:
4180 return None
4181
4182 ext = {
4183 'audio/mp4': 'm4a',
4184 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4185 # it's the most popular one
4186 'audio/mpeg': 'mp3',
4187 'audio/x-wav': 'wav',
4188 }.get(mt)
4189 if ext is not None:
4190 return ext
4191
4192 _, _, res = mt.rpartition('/')
4193 res = res.split(';')[0].strip().lower()
4194
4195 return {
4196 '3gpp': '3gp',
4197 'smptett+xml': 'tt',
4198 'ttaf+xml': 'dfxp',
4199 'ttml+xml': 'ttml',
4200 'x-flv': 'flv',
4201 'x-mp4-fragmented': 'mp4',
4202 'x-ms-sami': 'sami',
4203 'x-ms-wmv': 'wmv',
4204 'mpegurl': 'm3u8',
4205 'x-mpegurl': 'm3u8',
4206 'vnd.apple.mpegurl': 'm3u8',
4207 'dash+xml': 'mpd',
4208 'f4m+xml': 'f4m',
4209 'hds+xml': 'f4m',
4210 'vnd.ms-sstr+xml': 'ism',
4211 'quicktime': 'mov',
4212 'mp2t': 'ts',
4213 'x-wav': 'wav',
4214 }.get(res, res)
4215
4216
4217 def parse_codecs(codecs_str):
4218 # http://tools.ietf.org/html/rfc6381
4219 if not codecs_str:
4220 return {}
4221 split_codecs = list(filter(None, map(
4222 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4223 vcodec, acodec = None, None
4224 for full_codec in split_codecs:
4225 codec = full_codec.split('.')[0]
4226 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4227 if not vcodec:
4228 vcodec = full_codec
4229 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4230 if not acodec:
4231 acodec = full_codec
4232 else:
4233 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4234 if not vcodec and not acodec:
4235 if len(split_codecs) == 2:
4236 return {
4237 'vcodec': split_codecs[0],
4238 'acodec': split_codecs[1],
4239 }
4240 else:
4241 return {
4242 'vcodec': vcodec or 'none',
4243 'acodec': acodec or 'none',
4244 }
4245 return {}
4246
4247
4248 def urlhandle_detect_ext(url_handle):
4249 getheader = url_handle.headers.get
4250
4251 cd = getheader('Content-Disposition')
4252 if cd:
4253 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4254 if m:
4255 e = determine_ext(m.group('filename'), default_ext=None)
4256 if e:
4257 return e
4258
4259 return mimetype2ext(getheader('Content-Type'))
4260
4261
4262 def encode_data_uri(data, mime_type):
4263 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4264
4265
4266 def age_restricted(content_limit, age_limit):
4267 """ Returns True iff the content should be blocked """
4268
4269 if age_limit is None: # No limit set
4270 return False
4271 if content_limit is None:
4272 return False # Content available for everyone
4273 return age_limit < content_limit
4274
4275
4276 def is_html(first_bytes):
4277 """ Detect whether a file contains HTML by examining its first bytes. """
4278
4279 BOMS = [
4280 (b'\xef\xbb\xbf', 'utf-8'),
4281 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4282 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4283 (b'\xff\xfe', 'utf-16-le'),
4284 (b'\xfe\xff', 'utf-16-be'),
4285 ]
4286 for bom, enc in BOMS:
4287 if first_bytes.startswith(bom):
4288 s = first_bytes[len(bom):].decode(enc, 'replace')
4289 break
4290 else:
4291 s = first_bytes.decode('utf-8', 'replace')
4292
4293 return re.match(r'^\s*<', s)
4294
4295
4296 def determine_protocol(info_dict):
4297 protocol = info_dict.get('protocol')
4298 if protocol is not None:
4299 return protocol
4300
4301 url = info_dict['url']
4302 if url.startswith('rtmp'):
4303 return 'rtmp'
4304 elif url.startswith('mms'):
4305 return 'mms'
4306 elif url.startswith('rtsp'):
4307 return 'rtsp'
4308
4309 ext = determine_ext(url)
4310 if ext == 'm3u8':
4311 return 'm3u8'
4312 elif ext == 'f4m':
4313 return 'f4m'
4314
4315 return compat_urllib_parse_urlparse(url).scheme
4316
4317
4318 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4319 """ Render a list of rows, each as a list of values """
4320
4321 def get_max_lens(table):
4322 return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4323
4324 def filter_using_list(row, filterArray):
4325 return [col for (take, col) in zip(filterArray, row) if take]
4326
4327 if hideEmpty:
4328 max_lens = get_max_lens(data)
4329 header_row = filter_using_list(header_row, max_lens)
4330 data = [filter_using_list(row, max_lens) for row in data]
4331
4332 table = [header_row] + data
4333 max_lens = get_max_lens(table)
4334 if delim:
4335 table = [header_row] + [['-' * ml for ml in max_lens]] + data
4336 format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
4337 return '\n'.join(format_str % tuple(row) for row in table)
4338
4339
4340 def _match_one(filter_part, dct):
4341 COMPARISON_OPERATORS = {
4342 '<': operator.lt,
4343 '<=': operator.le,
4344 '>': operator.gt,
4345 '>=': operator.ge,
4346 '=': operator.eq,
4347 '!=': operator.ne,
4348 }
4349 operator_rex = re.compile(r'''(?x)\s*
4350 (?P<key>[a-z_]+)
4351 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4352 (?:
4353 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4354 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4355 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4356 )
4357 \s*$
4358 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4359 m = operator_rex.search(filter_part)
4360 if m:
4361 op = COMPARISON_OPERATORS[m.group('op')]
4362 actual_value = dct.get(m.group('key'))
4363 if (m.group('quotedstrval') is not None
4364 or m.group('strval') is not None
4365 # If the original field is a string and matching comparisonvalue is
4366 # a number we should respect the origin of the original field
4367 # and process comparison value as a string (see
4368 # https://github.com/ytdl-org/youtube-dl/issues/11082).
4369 or actual_value is not None and m.group('intval') is not None
4370 and isinstance(actual_value, compat_str)):
4371 if m.group('op') not in ('=', '!='):
4372 raise ValueError(
4373 'Operator %s does not support string values!' % m.group('op'))
4374 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4375 quote = m.group('quote')
4376 if quote is not None:
4377 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4378 else:
4379 try:
4380 comparison_value = int(m.group('intval'))
4381 except ValueError:
4382 comparison_value = parse_filesize(m.group('intval'))
4383 if comparison_value is None:
4384 comparison_value = parse_filesize(m.group('intval') + 'B')
4385 if comparison_value is None:
4386 raise ValueError(
4387 'Invalid integer value %r in filter part %r' % (
4388 m.group('intval'), filter_part))
4389 if actual_value is None:
4390 return m.group('none_inclusive')
4391 return op(actual_value, comparison_value)
4392
4393 UNARY_OPERATORS = {
4394 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4395 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4396 }
4397 operator_rex = re.compile(r'''(?x)\s*
4398 (?P<op>%s)\s*(?P<key>[a-z_]+)
4399 \s*$
4400 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4401 m = operator_rex.search(filter_part)
4402 if m:
4403 op = UNARY_OPERATORS[m.group('op')]
4404 actual_value = dct.get(m.group('key'))
4405 return op(actual_value)
4406
4407 raise ValueError('Invalid filter part %r' % filter_part)
4408
4409
4410 def match_str(filter_str, dct):
4411 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4412
4413 return all(
4414 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4415
4416
4417 def match_filter_func(filter_str):
4418 def _match_func(info_dict):
4419 if match_str(filter_str, info_dict):
4420 return None
4421 else:
4422 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4423 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4424 return _match_func
4425
4426
4427 def parse_dfxp_time_expr(time_expr):
4428 if not time_expr:
4429 return
4430
4431 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4432 if mobj:
4433 return float(mobj.group('time_offset'))
4434
4435 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4436 if mobj:
4437 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4438
4439
4440 def srt_subtitles_timecode(seconds):
4441 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4442
4443
4444 def dfxp2srt(dfxp_data):
4445 '''
4446 @param dfxp_data A bytes-like object containing DFXP data
4447 @returns A unicode object containing converted SRT data
4448 '''
4449 LEGACY_NAMESPACES = (
4450 (b'http://www.w3.org/ns/ttml', [
4451 b'http://www.w3.org/2004/11/ttaf1',
4452 b'http://www.w3.org/2006/04/ttaf1',
4453 b'http://www.w3.org/2006/10/ttaf1',
4454 ]),
4455 (b'http://www.w3.org/ns/ttml#styling', [
4456 b'http://www.w3.org/ns/ttml#style',
4457 ]),
4458 )
4459
4460 SUPPORTED_STYLING = [
4461 'color',
4462 'fontFamily',
4463 'fontSize',
4464 'fontStyle',
4465 'fontWeight',
4466 'textDecoration'
4467 ]
4468
4469 _x = functools.partial(xpath_with_ns, ns_map={
4470 'xml': 'http://www.w3.org/XML/1998/namespace',
4471 'ttml': 'http://www.w3.org/ns/ttml',
4472 'tts': 'http://www.w3.org/ns/ttml#styling',
4473 })
4474
4475 styles = {}
4476 default_style = {}
4477
4478 class TTMLPElementParser(object):
4479 _out = ''
4480 _unclosed_elements = []
4481 _applied_styles = []
4482
4483 def start(self, tag, attrib):
4484 if tag in (_x('ttml:br'), 'br'):
4485 self._out += '\n'
4486 else:
4487 unclosed_elements = []
4488 style = {}
4489 element_style_id = attrib.get('style')
4490 if default_style:
4491 style.update(default_style)
4492 if element_style_id:
4493 style.update(styles.get(element_style_id, {}))
4494 for prop in SUPPORTED_STYLING:
4495 prop_val = attrib.get(_x('tts:' + prop))
4496 if prop_val:
4497 style[prop] = prop_val
4498 if style:
4499 font = ''
4500 for k, v in sorted(style.items()):
4501 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4502 continue
4503 if k == 'color':
4504 font += ' color="%s"' % v
4505 elif k == 'fontSize':
4506 font += ' size="%s"' % v
4507 elif k == 'fontFamily':
4508 font += ' face="%s"' % v
4509 elif k == 'fontWeight' and v == 'bold':
4510 self._out += '<b>'
4511 unclosed_elements.append('b')
4512 elif k == 'fontStyle' and v == 'italic':
4513 self._out += '<i>'
4514 unclosed_elements.append('i')
4515 elif k == 'textDecoration' and v == 'underline':
4516 self._out += '<u>'
4517 unclosed_elements.append('u')
4518 if font:
4519 self._out += '<font' + font + '>'
4520 unclosed_elements.append('font')
4521 applied_style = {}
4522 if self._applied_styles:
4523 applied_style.update(self._applied_styles[-1])
4524 applied_style.update(style)
4525 self._applied_styles.append(applied_style)
4526 self._unclosed_elements.append(unclosed_elements)
4527
4528 def end(self, tag):
4529 if tag not in (_x('ttml:br'), 'br'):
4530 unclosed_elements = self._unclosed_elements.pop()
4531 for element in reversed(unclosed_elements):
4532 self._out += '</%s>' % element
4533 if unclosed_elements and self._applied_styles:
4534 self._applied_styles.pop()
4535
4536 def data(self, data):
4537 self._out += data
4538
4539 def close(self):
4540 return self._out.strip()
4541
4542 def parse_node(node):
4543 target = TTMLPElementParser()
4544 parser = xml.etree.ElementTree.XMLParser(target=target)
4545 parser.feed(xml.etree.ElementTree.tostring(node))
4546 return parser.close()
4547
4548 for k, v in LEGACY_NAMESPACES:
4549 for ns in v:
4550 dfxp_data = dfxp_data.replace(ns, k)
4551
4552 dfxp = compat_etree_fromstring(dfxp_data)
4553 out = []
4554 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4555
4556 if not paras:
4557 raise ValueError('Invalid dfxp/TTML subtitle')
4558
4559 repeat = False
4560 while True:
4561 for style in dfxp.findall(_x('.//ttml:style')):
4562 style_id = style.get('id') or style.get(_x('xml:id'))
4563 if not style_id:
4564 continue
4565 parent_style_id = style.get('style')
4566 if parent_style_id:
4567 if parent_style_id not in styles:
4568 repeat = True
4569 continue
4570 styles[style_id] = styles[parent_style_id].copy()
4571 for prop in SUPPORTED_STYLING:
4572 prop_val = style.get(_x('tts:' + prop))
4573 if prop_val:
4574 styles.setdefault(style_id, {})[prop] = prop_val
4575 if repeat:
4576 repeat = False
4577 else:
4578 break
4579
4580 for p in ('body', 'div'):
4581 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4582 if ele is None:
4583 continue
4584 style = styles.get(ele.get('style'))
4585 if not style:
4586 continue
4587 default_style.update(style)
4588
4589 for para, index in zip(paras, itertools.count(1)):
4590 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4591 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4592 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4593 if begin_time is None:
4594 continue
4595 if not end_time:
4596 if not dur:
4597 continue
4598 end_time = begin_time + dur
4599 out.append('%d\n%s --> %s\n%s\n\n' % (
4600 index,
4601 srt_subtitles_timecode(begin_time),
4602 srt_subtitles_timecode(end_time),
4603 parse_node(para)))
4604
4605 return ''.join(out)
4606
4607
4608 def cli_option(params, command_option, param):
4609 param = params.get(param)
4610 if param:
4611 param = compat_str(param)
4612 return [command_option, param] if param is not None else []
4613
4614
4615 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4616 param = params.get(param)
4617 if param is None:
4618 return []
4619 assert isinstance(param, bool)
4620 if separator:
4621 return [command_option + separator + (true_value if param else false_value)]
4622 return [command_option, true_value if param else false_value]
4623
4624
4625 def cli_valueless_option(params, command_option, param, expected_value=True):
4626 param = params.get(param)
4627 return [command_option] if param == expected_value else []
4628
4629
4630 def cli_configuration_args(params, param, default=[]):
4631 ex_args = params.get(param)
4632 if ex_args is None:
4633 return default
4634 assert isinstance(ex_args, list)
4635 return ex_args
4636
4637
4638 class ISO639Utils(object):
4639 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4640 _lang_map = {
4641 'aa': 'aar',
4642 'ab': 'abk',
4643 'ae': 'ave',
4644 'af': 'afr',
4645 'ak': 'aka',
4646 'am': 'amh',
4647 'an': 'arg',
4648 'ar': 'ara',
4649 'as': 'asm',
4650 'av': 'ava',
4651 'ay': 'aym',
4652 'az': 'aze',
4653 'ba': 'bak',
4654 'be': 'bel',
4655 'bg': 'bul',
4656 'bh': 'bih',
4657 'bi': 'bis',
4658 'bm': 'bam',
4659 'bn': 'ben',
4660 'bo': 'bod',
4661 'br': 'bre',
4662 'bs': 'bos',
4663 'ca': 'cat',
4664 'ce': 'che',
4665 'ch': 'cha',
4666 'co': 'cos',
4667 'cr': 'cre',
4668 'cs': 'ces',
4669 'cu': 'chu',
4670 'cv': 'chv',
4671 'cy': 'cym',
4672 'da': 'dan',
4673 'de': 'deu',
4674 'dv': 'div',
4675 'dz': 'dzo',
4676 'ee': 'ewe',
4677 'el': 'ell',
4678 'en': 'eng',
4679 'eo': 'epo',
4680 'es': 'spa',
4681 'et': 'est',
4682 'eu': 'eus',
4683 'fa': 'fas',
4684 'ff': 'ful',
4685 'fi': 'fin',
4686 'fj': 'fij',
4687 'fo': 'fao',
4688 'fr': 'fra',
4689 'fy': 'fry',
4690 'ga': 'gle',
4691 'gd': 'gla',
4692 'gl': 'glg',
4693 'gn': 'grn',
4694 'gu': 'guj',
4695 'gv': 'glv',
4696 'ha': 'hau',
4697 'he': 'heb',
4698 'iw': 'heb', # Replaced by he in 1989 revision
4699 'hi': 'hin',
4700 'ho': 'hmo',
4701 'hr': 'hrv',
4702 'ht': 'hat',
4703 'hu': 'hun',
4704 'hy': 'hye',
4705 'hz': 'her',
4706 'ia': 'ina',
4707 'id': 'ind',
4708 'in': 'ind', # Replaced by id in 1989 revision
4709 'ie': 'ile',
4710 'ig': 'ibo',
4711 'ii': 'iii',
4712 'ik': 'ipk',
4713 'io': 'ido',
4714 'is': 'isl',
4715 'it': 'ita',
4716 'iu': 'iku',
4717 'ja': 'jpn',
4718 'jv': 'jav',
4719 'ka': 'kat',
4720 'kg': 'kon',
4721 'ki': 'kik',
4722 'kj': 'kua',
4723 'kk': 'kaz',
4724 'kl': 'kal',
4725 'km': 'khm',
4726 'kn': 'kan',
4727 'ko': 'kor',
4728 'kr': 'kau',
4729 'ks': 'kas',
4730 'ku': 'kur',
4731 'kv': 'kom',
4732 'kw': 'cor',
4733 'ky': 'kir',
4734 'la': 'lat',
4735 'lb': 'ltz',
4736 'lg': 'lug',
4737 'li': 'lim',
4738 'ln': 'lin',
4739 'lo': 'lao',
4740 'lt': 'lit',
4741 'lu': 'lub',
4742 'lv': 'lav',
4743 'mg': 'mlg',
4744 'mh': 'mah',
4745 'mi': 'mri',
4746 'mk': 'mkd',
4747 'ml': 'mal',
4748 'mn': 'mon',
4749 'mr': 'mar',
4750 'ms': 'msa',
4751 'mt': 'mlt',
4752 'my': 'mya',
4753 'na': 'nau',
4754 'nb': 'nob',
4755 'nd': 'nde',
4756 'ne': 'nep',
4757 'ng': 'ndo',
4758 'nl': 'nld',
4759 'nn': 'nno',
4760 'no': 'nor',
4761 'nr': 'nbl',
4762 'nv': 'nav',
4763 'ny': 'nya',
4764 'oc': 'oci',
4765 'oj': 'oji',
4766 'om': 'orm',
4767 'or': 'ori',
4768 'os': 'oss',
4769 'pa': 'pan',
4770 'pi': 'pli',
4771 'pl': 'pol',
4772 'ps': 'pus',
4773 'pt': 'por',
4774 'qu': 'que',
4775 'rm': 'roh',
4776 'rn': 'run',
4777 'ro': 'ron',
4778 'ru': 'rus',
4779 'rw': 'kin',
4780 'sa': 'san',
4781 'sc': 'srd',
4782 'sd': 'snd',
4783 'se': 'sme',
4784 'sg': 'sag',
4785 'si': 'sin',
4786 'sk': 'slk',
4787 'sl': 'slv',
4788 'sm': 'smo',
4789 'sn': 'sna',
4790 'so': 'som',
4791 'sq': 'sqi',
4792 'sr': 'srp',
4793 'ss': 'ssw',
4794 'st': 'sot',
4795 'su': 'sun',
4796 'sv': 'swe',
4797 'sw': 'swa',
4798 'ta': 'tam',
4799 'te': 'tel',
4800 'tg': 'tgk',
4801 'th': 'tha',
4802 'ti': 'tir',
4803 'tk': 'tuk',
4804 'tl': 'tgl',
4805 'tn': 'tsn',
4806 'to': 'ton',
4807 'tr': 'tur',
4808 'ts': 'tso',
4809 'tt': 'tat',
4810 'tw': 'twi',
4811 'ty': 'tah',
4812 'ug': 'uig',
4813 'uk': 'ukr',
4814 'ur': 'urd',
4815 'uz': 'uzb',
4816 've': 'ven',
4817 'vi': 'vie',
4818 'vo': 'vol',
4819 'wa': 'wln',
4820 'wo': 'wol',
4821 'xh': 'xho',
4822 'yi': 'yid',
4823 'ji': 'yid', # Replaced by yi in 1989 revision
4824 'yo': 'yor',
4825 'za': 'zha',
4826 'zh': 'zho',
4827 'zu': 'zul',
4828 }
4829
4830 @classmethod
4831 def short2long(cls, code):
4832 """Convert language code from ISO 639-1 to ISO 639-2/T"""
4833 return cls._lang_map.get(code[:2])
4834
4835 @classmethod
4836 def long2short(cls, code):
4837 """Convert language code from ISO 639-2/T to ISO 639-1"""
4838 for short_name, long_name in cls._lang_map.items():
4839 if long_name == code:
4840 return short_name
4841
4842
4843 class ISO3166Utils(object):
4844 # From http://data.okfn.org/data/core/country-list
4845 _country_map = {
4846 'AF': 'Afghanistan',
4847 'AX': 'Åland Islands',
4848 'AL': 'Albania',
4849 'DZ': 'Algeria',
4850 'AS': 'American Samoa',
4851 'AD': 'Andorra',
4852 'AO': 'Angola',
4853 'AI': 'Anguilla',
4854 'AQ': 'Antarctica',
4855 'AG': 'Antigua and Barbuda',
4856 'AR': 'Argentina',
4857 'AM': 'Armenia',
4858 'AW': 'Aruba',
4859 'AU': 'Australia',
4860 'AT': 'Austria',
4861 'AZ': 'Azerbaijan',
4862 'BS': 'Bahamas',
4863 'BH': 'Bahrain',
4864 'BD': 'Bangladesh',
4865 'BB': 'Barbados',
4866 'BY': 'Belarus',
4867 'BE': 'Belgium',
4868 'BZ': 'Belize',
4869 'BJ': 'Benin',
4870 'BM': 'Bermuda',
4871 'BT': 'Bhutan',
4872 'BO': 'Bolivia, Plurinational State of',
4873 'BQ': 'Bonaire, Sint Eustatius and Saba',
4874 'BA': 'Bosnia and Herzegovina',
4875 'BW': 'Botswana',
4876 'BV': 'Bouvet Island',
4877 'BR': 'Brazil',
4878 'IO': 'British Indian Ocean Territory',
4879 'BN': 'Brunei Darussalam',
4880 'BG': 'Bulgaria',
4881 'BF': 'Burkina Faso',
4882 'BI': 'Burundi',
4883 'KH': 'Cambodia',
4884 'CM': 'Cameroon',
4885 'CA': 'Canada',
4886 'CV': 'Cape Verde',
4887 'KY': 'Cayman Islands',
4888 'CF': 'Central African Republic',
4889 'TD': 'Chad',
4890 'CL': 'Chile',
4891 'CN': 'China',
4892 'CX': 'Christmas Island',
4893 'CC': 'Cocos (Keeling) Islands',
4894 'CO': 'Colombia',
4895 'KM': 'Comoros',
4896 'CG': 'Congo',
4897 'CD': 'Congo, the Democratic Republic of the',
4898 'CK': 'Cook Islands',
4899 'CR': 'Costa Rica',
4900 'CI': 'Côte d\'Ivoire',
4901 'HR': 'Croatia',
4902 'CU': 'Cuba',
4903 'CW': 'Curaçao',
4904 'CY': 'Cyprus',
4905 'CZ': 'Czech Republic',
4906 'DK': 'Denmark',
4907 'DJ': 'Djibouti',
4908 'DM': 'Dominica',
4909 'DO': 'Dominican Republic',
4910 'EC': 'Ecuador',
4911 'EG': 'Egypt',
4912 'SV': 'El Salvador',
4913 'GQ': 'Equatorial Guinea',
4914 'ER': 'Eritrea',
4915 'EE': 'Estonia',
4916 'ET': 'Ethiopia',
4917 'FK': 'Falkland Islands (Malvinas)',
4918 'FO': 'Faroe Islands',
4919 'FJ': 'Fiji',
4920 'FI': 'Finland',
4921 'FR': 'France',
4922 'GF': 'French Guiana',
4923 'PF': 'French Polynesia',
4924 'TF': 'French Southern Territories',
4925 'GA': 'Gabon',
4926 'GM': 'Gambia',
4927 'GE': 'Georgia',
4928 'DE': 'Germany',
4929 'GH': 'Ghana',
4930 'GI': 'Gibraltar',
4931 'GR': 'Greece',
4932 'GL': 'Greenland',
4933 'GD': 'Grenada',
4934 'GP': 'Guadeloupe',
4935 'GU': 'Guam',
4936 'GT': 'Guatemala',
4937 'GG': 'Guernsey',
4938 'GN': 'Guinea',
4939 'GW': 'Guinea-Bissau',
4940 'GY': 'Guyana',
4941 'HT': 'Haiti',
4942 'HM': 'Heard Island and McDonald Islands',
4943 'VA': 'Holy See (Vatican City State)',
4944 'HN': 'Honduras',
4945 'HK': 'Hong Kong',
4946 'HU': 'Hungary',
4947 'IS': 'Iceland',
4948 'IN': 'India',
4949 'ID': 'Indonesia',
4950 'IR': 'Iran, Islamic Republic of',
4951 'IQ': 'Iraq',
4952 'IE': 'Ireland',
4953 'IM': 'Isle of Man',
4954 'IL': 'Israel',
4955 'IT': 'Italy',
4956 'JM': 'Jamaica',
4957 'JP': 'Japan',
4958 'JE': 'Jersey',
4959 'JO': 'Jordan',
4960 'KZ': 'Kazakhstan',
4961 'KE': 'Kenya',
4962 'KI': 'Kiribati',
4963 'KP': 'Korea, Democratic People\'s Republic of',
4964 'KR': 'Korea, Republic of',
4965 'KW': 'Kuwait',
4966 'KG': 'Kyrgyzstan',
4967 'LA': 'Lao People\'s Democratic Republic',
4968 'LV': 'Latvia',
4969 'LB': 'Lebanon',
4970 'LS': 'Lesotho',
4971 'LR': 'Liberia',
4972 'LY': 'Libya',
4973 'LI': 'Liechtenstein',
4974 'LT': 'Lithuania',
4975 'LU': 'Luxembourg',
4976 'MO': 'Macao',
4977 'MK': 'Macedonia, the Former Yugoslav Republic of',
4978 'MG': 'Madagascar',
4979 'MW': 'Malawi',
4980 'MY': 'Malaysia',
4981 'MV': 'Maldives',
4982 'ML': 'Mali',
4983 'MT': 'Malta',
4984 'MH': 'Marshall Islands',
4985 'MQ': 'Martinique',
4986 'MR': 'Mauritania',
4987 'MU': 'Mauritius',
4988 'YT': 'Mayotte',
4989 'MX': 'Mexico',
4990 'FM': 'Micronesia, Federated States of',
4991 'MD': 'Moldova, Republic of',
4992 'MC': 'Monaco',
4993 'MN': 'Mongolia',
4994 'ME': 'Montenegro',
4995 'MS': 'Montserrat',
4996 'MA': 'Morocco',
4997 'MZ': 'Mozambique',
4998 'MM': 'Myanmar',
4999 'NA': 'Namibia',
5000 'NR': 'Nauru',
5001 'NP': 'Nepal',
5002 'NL': 'Netherlands',
5003 'NC': 'New Caledonia',
5004 'NZ': 'New Zealand',
5005 'NI': 'Nicaragua',
5006 'NE': 'Niger',
5007 'NG': 'Nigeria',
5008 'NU': 'Niue',
5009 'NF': 'Norfolk Island',
5010 'MP': 'Northern Mariana Islands',
5011 'NO': 'Norway',
5012 'OM': 'Oman',
5013 'PK': 'Pakistan',
5014 'PW': 'Palau',
5015 'PS': 'Palestine, State of',
5016 'PA': 'Panama',
5017 'PG': 'Papua New Guinea',
5018 'PY': 'Paraguay',
5019 'PE': 'Peru',
5020 'PH': 'Philippines',
5021 'PN': 'Pitcairn',
5022 'PL': 'Poland',
5023 'PT': 'Portugal',
5024 'PR': 'Puerto Rico',
5025 'QA': 'Qatar',
5026 'RE': 'Réunion',
5027 'RO': 'Romania',
5028 'RU': 'Russian Federation',
5029 'RW': 'Rwanda',
5030 'BL': 'Saint Barthélemy',
5031 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5032 'KN': 'Saint Kitts and Nevis',
5033 'LC': 'Saint Lucia',
5034 'MF': 'Saint Martin (French part)',
5035 'PM': 'Saint Pierre and Miquelon',
5036 'VC': 'Saint Vincent and the Grenadines',
5037 'WS': 'Samoa',
5038 'SM': 'San Marino',
5039 'ST': 'Sao Tome and Principe',
5040 'SA': 'Saudi Arabia',
5041 'SN': 'Senegal',
5042 'RS': 'Serbia',
5043 'SC': 'Seychelles',
5044 'SL': 'Sierra Leone',
5045 'SG': 'Singapore',
5046 'SX': 'Sint Maarten (Dutch part)',
5047 'SK': 'Slovakia',
5048 'SI': 'Slovenia',
5049 'SB': 'Solomon Islands',
5050 'SO': 'Somalia',
5051 'ZA': 'South Africa',
5052 'GS': 'South Georgia and the South Sandwich Islands',
5053 'SS': 'South Sudan',
5054 'ES': 'Spain',
5055 'LK': 'Sri Lanka',
5056 'SD': 'Sudan',
5057 'SR': 'Suriname',
5058 'SJ': 'Svalbard and Jan Mayen',
5059 'SZ': 'Swaziland',
5060 'SE': 'Sweden',
5061 'CH': 'Switzerland',
5062 'SY': 'Syrian Arab Republic',
5063 'TW': 'Taiwan, Province of China',
5064 'TJ': 'Tajikistan',
5065 'TZ': 'Tanzania, United Republic of',
5066 'TH': 'Thailand',
5067 'TL': 'Timor-Leste',
5068 'TG': 'Togo',
5069 'TK': 'Tokelau',
5070 'TO': 'Tonga',
5071 'TT': 'Trinidad and Tobago',
5072 'TN': 'Tunisia',
5073 'TR': 'Turkey',
5074 'TM': 'Turkmenistan',
5075 'TC': 'Turks and Caicos Islands',
5076 'TV': 'Tuvalu',
5077 'UG': 'Uganda',
5078 'UA': 'Ukraine',
5079 'AE': 'United Arab Emirates',
5080 'GB': 'United Kingdom',
5081 'US': 'United States',
5082 'UM': 'United States Minor Outlying Islands',
5083 'UY': 'Uruguay',
5084 'UZ': 'Uzbekistan',
5085 'VU': 'Vanuatu',
5086 'VE': 'Venezuela, Bolivarian Republic of',
5087 'VN': 'Viet Nam',
5088 'VG': 'Virgin Islands, British',
5089 'VI': 'Virgin Islands, U.S.',
5090 'WF': 'Wallis and Futuna',
5091 'EH': 'Western Sahara',
5092 'YE': 'Yemen',
5093 'ZM': 'Zambia',
5094 'ZW': 'Zimbabwe',
5095 }
5096
5097 @classmethod
5098 def short2full(cls, code):
5099 """Convert an ISO 3166-2 country code to the corresponding full name"""
5100 return cls._country_map.get(code.upper())
5101
5102
5103 class GeoUtils(object):
5104 # Major IPv4 address blocks per country
5105 _country_ip_map = {
5106 'AD': '46.172.224.0/19',
5107 'AE': '94.200.0.0/13',
5108 'AF': '149.54.0.0/17',
5109 'AG': '209.59.64.0/18',
5110 'AI': '204.14.248.0/21',
5111 'AL': '46.99.0.0/16',
5112 'AM': '46.70.0.0/15',
5113 'AO': '105.168.0.0/13',
5114 'AP': '182.50.184.0/21',
5115 'AQ': '23.154.160.0/24',
5116 'AR': '181.0.0.0/12',
5117 'AS': '202.70.112.0/20',
5118 'AT': '77.116.0.0/14',
5119 'AU': '1.128.0.0/11',
5120 'AW': '181.41.0.0/18',
5121 'AX': '185.217.4.0/22',
5122 'AZ': '5.197.0.0/16',
5123 'BA': '31.176.128.0/17',
5124 'BB': '65.48.128.0/17',
5125 'BD': '114.130.0.0/16',
5126 'BE': '57.0.0.0/8',
5127 'BF': '102.178.0.0/15',
5128 'BG': '95.42.0.0/15',
5129 'BH': '37.131.0.0/17',
5130 'BI': '154.117.192.0/18',
5131 'BJ': '137.255.0.0/16',
5132 'BL': '185.212.72.0/23',
5133 'BM': '196.12.64.0/18',
5134 'BN': '156.31.0.0/16',
5135 'BO': '161.56.0.0/16',
5136 'BQ': '161.0.80.0/20',
5137 'BR': '191.128.0.0/12',
5138 'BS': '24.51.64.0/18',
5139 'BT': '119.2.96.0/19',
5140 'BW': '168.167.0.0/16',
5141 'BY': '178.120.0.0/13',
5142 'BZ': '179.42.192.0/18',
5143 'CA': '99.224.0.0/11',
5144 'CD': '41.243.0.0/16',
5145 'CF': '197.242.176.0/21',
5146 'CG': '160.113.0.0/16',
5147 'CH': '85.0.0.0/13',
5148 'CI': '102.136.0.0/14',
5149 'CK': '202.65.32.0/19',
5150 'CL': '152.172.0.0/14',
5151 'CM': '102.244.0.0/14',
5152 'CN': '36.128.0.0/10',
5153 'CO': '181.240.0.0/12',
5154 'CR': '201.192.0.0/12',
5155 'CU': '152.206.0.0/15',
5156 'CV': '165.90.96.0/19',
5157 'CW': '190.88.128.0/17',
5158 'CY': '31.153.0.0/16',
5159 'CZ': '88.100.0.0/14',
5160 'DE': '53.0.0.0/8',
5161 'DJ': '197.241.0.0/17',
5162 'DK': '87.48.0.0/12',
5163 'DM': '192.243.48.0/20',
5164 'DO': '152.166.0.0/15',
5165 'DZ': '41.96.0.0/12',
5166 'EC': '186.68.0.0/15',
5167 'EE': '90.190.0.0/15',
5168 'EG': '156.160.0.0/11',
5169 'ER': '196.200.96.0/20',
5170 'ES': '88.0.0.0/11',
5171 'ET': '196.188.0.0/14',
5172 'EU': '2.16.0.0/13',
5173 'FI': '91.152.0.0/13',
5174 'FJ': '144.120.0.0/16',
5175 'FK': '80.73.208.0/21',
5176 'FM': '119.252.112.0/20',
5177 'FO': '88.85.32.0/19',
5178 'FR': '90.0.0.0/9',
5179 'GA': '41.158.0.0/15',
5180 'GB': '25.0.0.0/8',
5181 'GD': '74.122.88.0/21',
5182 'GE': '31.146.0.0/16',
5183 'GF': '161.22.64.0/18',
5184 'GG': '62.68.160.0/19',
5185 'GH': '154.160.0.0/12',
5186 'GI': '95.164.0.0/16',
5187 'GL': '88.83.0.0/19',
5188 'GM': '160.182.0.0/15',
5189 'GN': '197.149.192.0/18',
5190 'GP': '104.250.0.0/19',
5191 'GQ': '105.235.224.0/20',
5192 'GR': '94.64.0.0/13',
5193 'GT': '168.234.0.0/16',
5194 'GU': '168.123.0.0/16',
5195 'GW': '197.214.80.0/20',
5196 'GY': '181.41.64.0/18',
5197 'HK': '113.252.0.0/14',
5198 'HN': '181.210.0.0/16',
5199 'HR': '93.136.0.0/13',
5200 'HT': '148.102.128.0/17',
5201 'HU': '84.0.0.0/14',
5202 'ID': '39.192.0.0/10',
5203 'IE': '87.32.0.0/12',
5204 'IL': '79.176.0.0/13',
5205 'IM': '5.62.80.0/20',
5206 'IN': '117.192.0.0/10',
5207 'IO': '203.83.48.0/21',
5208 'IQ': '37.236.0.0/14',
5209 'IR': '2.176.0.0/12',
5210 'IS': '82.221.0.0/16',
5211 'IT': '79.0.0.0/10',
5212 'JE': '87.244.64.0/18',
5213 'JM': '72.27.0.0/17',
5214 'JO': '176.29.0.0/16',
5215 'JP': '133.0.0.0/8',
5216 'KE': '105.48.0.0/12',
5217 'KG': '158.181.128.0/17',
5218 'KH': '36.37.128.0/17',
5219 'KI': '103.25.140.0/22',
5220 'KM': '197.255.224.0/20',
5221 'KN': '198.167.192.0/19',
5222 'KP': '175.45.176.0/22',
5223 'KR': '175.192.0.0/10',
5224 'KW': '37.36.0.0/14',
5225 'KY': '64.96.0.0/15',
5226 'KZ': '2.72.0.0/13',
5227 'LA': '115.84.64.0/18',
5228 'LB': '178.135.0.0/16',
5229 'LC': '24.92.144.0/20',
5230 'LI': '82.117.0.0/19',
5231 'LK': '112.134.0.0/15',
5232 'LR': '102.183.0.0/16',
5233 'LS': '129.232.0.0/17',
5234 'LT': '78.56.0.0/13',
5235 'LU': '188.42.0.0/16',
5236 'LV': '46.109.0.0/16',
5237 'LY': '41.252.0.0/14',
5238 'MA': '105.128.0.0/11',
5239 'MC': '88.209.64.0/18',
5240 'MD': '37.246.0.0/16',
5241 'ME': '178.175.0.0/17',
5242 'MF': '74.112.232.0/21',
5243 'MG': '154.126.0.0/17',
5244 'MH': '117.103.88.0/21',
5245 'MK': '77.28.0.0/15',
5246 'ML': '154.118.128.0/18',
5247 'MM': '37.111.0.0/17',
5248 'MN': '49.0.128.0/17',
5249 'MO': '60.246.0.0/16',
5250 'MP': '202.88.64.0/20',
5251 'MQ': '109.203.224.0/19',
5252 'MR': '41.188.64.0/18',
5253 'MS': '208.90.112.0/22',
5254 'MT': '46.11.0.0/16',
5255 'MU': '105.16.0.0/12',
5256 'MV': '27.114.128.0/18',
5257 'MW': '102.70.0.0/15',
5258 'MX': '187.192.0.0/11',
5259 'MY': '175.136.0.0/13',
5260 'MZ': '197.218.0.0/15',
5261 'NA': '41.182.0.0/16',
5262 'NC': '101.101.0.0/18',
5263 'NE': '197.214.0.0/18',
5264 'NF': '203.17.240.0/22',
5265 'NG': '105.112.0.0/12',
5266 'NI': '186.76.0.0/15',
5267 'NL': '145.96.0.0/11',
5268 'NO': '84.208.0.0/13',
5269 'NP': '36.252.0.0/15',
5270 'NR': '203.98.224.0/19',
5271 'NU': '49.156.48.0/22',
5272 'NZ': '49.224.0.0/14',
5273 'OM': '5.36.0.0/15',
5274 'PA': '186.72.0.0/15',
5275 'PE': '186.160.0.0/14',
5276 'PF': '123.50.64.0/18',
5277 'PG': '124.240.192.0/19',
5278 'PH': '49.144.0.0/13',
5279 'PK': '39.32.0.0/11',
5280 'PL': '83.0.0.0/11',
5281 'PM': '70.36.0.0/20',
5282 'PR': '66.50.0.0/16',
5283 'PS': '188.161.0.0/16',
5284 'PT': '85.240.0.0/13',
5285 'PW': '202.124.224.0/20',
5286 'PY': '181.120.0.0/14',
5287 'QA': '37.210.0.0/15',
5288 'RE': '102.35.0.0/16',
5289 'RO': '79.112.0.0/13',
5290 'RS': '93.86.0.0/15',
5291 'RU': '5.136.0.0/13',
5292 'RW': '41.186.0.0/16',
5293 'SA': '188.48.0.0/13',
5294 'SB': '202.1.160.0/19',
5295 'SC': '154.192.0.0/11',
5296 'SD': '102.120.0.0/13',
5297 'SE': '78.64.0.0/12',
5298 'SG': '8.128.0.0/10',
5299 'SI': '188.196.0.0/14',
5300 'SK': '78.98.0.0/15',
5301 'SL': '102.143.0.0/17',
5302 'SM': '89.186.32.0/19',
5303 'SN': '41.82.0.0/15',
5304 'SO': '154.115.192.0/18',
5305 'SR': '186.179.128.0/17',
5306 'SS': '105.235.208.0/21',
5307 'ST': '197.159.160.0/19',
5308 'SV': '168.243.0.0/16',
5309 'SX': '190.102.0.0/20',
5310 'SY': '5.0.0.0/16',
5311 'SZ': '41.84.224.0/19',
5312 'TC': '65.255.48.0/20',
5313 'TD': '154.68.128.0/19',
5314 'TG': '196.168.0.0/14',
5315 'TH': '171.96.0.0/13',
5316 'TJ': '85.9.128.0/18',
5317 'TK': '27.96.24.0/21',
5318 'TL': '180.189.160.0/20',
5319 'TM': '95.85.96.0/19',
5320 'TN': '197.0.0.0/11',
5321 'TO': '175.176.144.0/21',
5322 'TR': '78.160.0.0/11',
5323 'TT': '186.44.0.0/15',
5324 'TV': '202.2.96.0/19',
5325 'TW': '120.96.0.0/11',
5326 'TZ': '156.156.0.0/14',
5327 'UA': '37.52.0.0/14',
5328 'UG': '102.80.0.0/13',
5329 'US': '6.0.0.0/8',
5330 'UY': '167.56.0.0/13',
5331 'UZ': '84.54.64.0/18',
5332 'VA': '212.77.0.0/19',
5333 'VC': '207.191.240.0/21',
5334 'VE': '186.88.0.0/13',
5335 'VG': '66.81.192.0/20',
5336 'VI': '146.226.0.0/16',
5337 'VN': '14.160.0.0/11',
5338 'VU': '202.80.32.0/20',
5339 'WF': '117.20.32.0/21',
5340 'WS': '202.4.32.0/19',
5341 'YE': '134.35.0.0/16',
5342 'YT': '41.242.116.0/22',
5343 'ZA': '41.0.0.0/11',
5344 'ZM': '102.144.0.0/13',
5345 'ZW': '102.177.192.0/18',
5346 }
5347
5348 @classmethod
5349 def random_ipv4(cls, code_or_block):
5350 if len(code_or_block) == 2:
5351 block = cls._country_ip_map.get(code_or_block.upper())
5352 if not block:
5353 return None
5354 else:
5355 block = code_or_block
5356 addr, preflen = block.split('/')
5357 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5358 addr_max = addr_min | (0xffffffff >> int(preflen))
5359 return compat_str(socket.inet_ntoa(
5360 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5361
5362
5363 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5364 def __init__(self, proxies=None):
5365 # Set default handlers
5366 for type in ('http', 'https'):
5367 setattr(self, '%s_open' % type,
5368 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5369 meth(r, proxy, type))
5370 compat_urllib_request.ProxyHandler.__init__(self, proxies)
5371
5372 def proxy_open(self, req, proxy, type):
5373 req_proxy = req.headers.get('Ytdl-request-proxy')
5374 if req_proxy is not None:
5375 proxy = req_proxy
5376 del req.headers['Ytdl-request-proxy']
5377
5378 if proxy == '__noproxy__':
5379 return None # No Proxy
5380 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5381 req.add_header('Ytdl-socks-proxy', proxy)
5382 # youtube-dlc's http/https handlers do wrapping the socket with socks
5383 return None
5384 return compat_urllib_request.ProxyHandler.proxy_open(
5385 self, req, proxy, type)
5386
5387
5388 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5389 # released into Public Domain
5390 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5391
5392 def long_to_bytes(n, blocksize=0):
5393 """long_to_bytes(n:long, blocksize:int) : string
5394 Convert a long integer to a byte string.
5395
5396 If optional blocksize is given and greater than zero, pad the front of the
5397 byte string with binary zeros so that the length is a multiple of
5398 blocksize.
5399 """
5400 # after much testing, this algorithm was deemed to be the fastest
5401 s = b''
5402 n = int(n)
5403 while n > 0:
5404 s = compat_struct_pack('>I', n & 0xffffffff) + s
5405 n = n >> 32
5406 # strip off leading zeros
5407 for i in range(len(s)):
5408 if s[i] != b'\000'[0]:
5409 break
5410 else:
5411 # only happens when n == 0
5412 s = b'\000'
5413 i = 0
5414 s = s[i:]
5415 # add back some pad bytes. this could be done more efficiently w.r.t. the
5416 # de-padding being done above, but sigh...
5417 if blocksize > 0 and len(s) % blocksize:
5418 s = (blocksize - len(s) % blocksize) * b'\000' + s
5419 return s
5420
5421
5422 def bytes_to_long(s):
5423 """bytes_to_long(string) : long
5424 Convert a byte string to a long integer.
5425
5426 This is (essentially) the inverse of long_to_bytes().
5427 """
5428 acc = 0
5429 length = len(s)
5430 if length % 4:
5431 extra = (4 - length % 4)
5432 s = b'\000' * extra + s
5433 length = length + extra
5434 for i in range(0, length, 4):
5435 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5436 return acc
5437
5438
5439 def ohdave_rsa_encrypt(data, exponent, modulus):
5440 '''
5441 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5442
5443 Input:
5444 data: data to encrypt, bytes-like object
5445 exponent, modulus: parameter e and N of RSA algorithm, both integer
5446 Output: hex string of encrypted data
5447
5448 Limitation: supports one block encryption only
5449 '''
5450
5451 payload = int(binascii.hexlify(data[::-1]), 16)
5452 encrypted = pow(payload, exponent, modulus)
5453 return '%x' % encrypted
5454
5455
5456 def pkcs1pad(data, length):
5457 """
5458 Padding input data with PKCS#1 scheme
5459
5460 @param {int[]} data input data
5461 @param {int} length target length
5462 @returns {int[]} padded data
5463 """
5464 if len(data) > length - 11:
5465 raise ValueError('Input data too long for PKCS#1 padding')
5466
5467 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5468 return [0, 2] + pseudo_random + [0] + data
5469
5470
5471 def encode_base_n(num, n, table=None):
5472 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5473 if not table:
5474 table = FULL_TABLE[:n]
5475
5476 if n > len(table):
5477 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5478
5479 if num == 0:
5480 return table[0]
5481
5482 ret = ''
5483 while num:
5484 ret = table[num % n] + ret
5485 num = num // n
5486 return ret
5487
5488
5489 def decode_packed_codes(code):
5490 mobj = re.search(PACKED_CODES_RE, code)
5491 obfuscated_code, base, count, symbols = mobj.groups()
5492 base = int(base)
5493 count = int(count)
5494 symbols = symbols.split('|')
5495 symbol_table = {}
5496
5497 while count:
5498 count -= 1
5499 base_n_count = encode_base_n(count, base)
5500 symbol_table[base_n_count] = symbols[count] or base_n_count
5501
5502 return re.sub(
5503 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5504 obfuscated_code)
5505
5506
5507 def caesar(s, alphabet, shift):
5508 if shift == 0:
5509 return s
5510 l = len(alphabet)
5511 return ''.join(
5512 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5513 for c in s)
5514
5515
5516 def rot47(s):
5517 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5518
5519
5520 def parse_m3u8_attributes(attrib):
5521 info = {}
5522 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5523 if val.startswith('"'):
5524 val = val[1:-1]
5525 info[key] = val
5526 return info
5527
5528
5529 def urshift(val, n):
5530 return val >> n if val >= 0 else (val + 0x100000000) >> n
5531
5532
5533 # Based on png2str() written by @gdkchan and improved by @yokrysty
5534 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5535 def decode_png(png_data):
5536 # Reference: https://www.w3.org/TR/PNG/
5537 header = png_data[8:]
5538
5539 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5540 raise IOError('Not a valid PNG file.')
5541
5542 int_map = {1: '>B', 2: '>H', 4: '>I'}
5543 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5544
5545 chunks = []
5546
5547 while header:
5548 length = unpack_integer(header[:4])
5549 header = header[4:]
5550
5551 chunk_type = header[:4]
5552 header = header[4:]
5553
5554 chunk_data = header[:length]
5555 header = header[length:]
5556
5557 header = header[4:] # Skip CRC
5558
5559 chunks.append({
5560 'type': chunk_type,
5561 'length': length,
5562 'data': chunk_data
5563 })
5564
5565 ihdr = chunks[0]['data']
5566
5567 width = unpack_integer(ihdr[:4])
5568 height = unpack_integer(ihdr[4:8])
5569
5570 idat = b''
5571
5572 for chunk in chunks:
5573 if chunk['type'] == b'IDAT':
5574 idat += chunk['data']
5575
5576 if not idat:
5577 raise IOError('Unable to read PNG data.')
5578
5579 decompressed_data = bytearray(zlib.decompress(idat))
5580
5581 stride = width * 3
5582 pixels = []
5583
5584 def _get_pixel(idx):
5585 x = idx % stride
5586 y = idx // stride
5587 return pixels[y][x]
5588
5589 for y in range(height):
5590 basePos = y * (1 + stride)
5591 filter_type = decompressed_data[basePos]
5592
5593 current_row = []
5594
5595 pixels.append(current_row)
5596
5597 for x in range(stride):
5598 color = decompressed_data[1 + basePos + x]
5599 basex = y * stride + x
5600 left = 0
5601 up = 0
5602
5603 if x > 2:
5604 left = _get_pixel(basex - 3)
5605 if y > 0:
5606 up = _get_pixel(basex - stride)
5607
5608 if filter_type == 1: # Sub
5609 color = (color + left) & 0xff
5610 elif filter_type == 2: # Up
5611 color = (color + up) & 0xff
5612 elif filter_type == 3: # Average
5613 color = (color + ((left + up) >> 1)) & 0xff
5614 elif filter_type == 4: # Paeth
5615 a = left
5616 b = up
5617 c = 0
5618
5619 if x > 2 and y > 0:
5620 c = _get_pixel(basex - stride - 3)
5621
5622 p = a + b - c
5623
5624 pa = abs(p - a)
5625 pb = abs(p - b)
5626 pc = abs(p - c)
5627
5628 if pa <= pb and pa <= pc:
5629 color = (color + a) & 0xff
5630 elif pb <= pc:
5631 color = (color + b) & 0xff
5632 else:
5633 color = (color + c) & 0xff
5634
5635 current_row.append(color)
5636
5637 return width, height, pixels
5638
5639
5640 def write_xattr(path, key, value):
5641 # This mess below finds the best xattr tool for the job
5642 try:
5643 # try the pyxattr module...
5644 import xattr
5645
5646 if hasattr(xattr, 'set'): # pyxattr
5647 # Unicode arguments are not supported in python-pyxattr until
5648 # version 0.5.0
5649 # See https://github.com/ytdl-org/youtube-dl/issues/5498
5650 pyxattr_required_version = '0.5.0'
5651 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5652 # TODO: fallback to CLI tools
5653 raise XAttrUnavailableError(
5654 'python-pyxattr is detected but is too old. '
5655 'youtube-dlc requires %s or above while your version is %s. '
5656 'Falling back to other xattr implementations' % (
5657 pyxattr_required_version, xattr.__version__))
5658
5659 setxattr = xattr.set
5660 else: # xattr
5661 setxattr = xattr.setxattr
5662
5663 try:
5664 setxattr(path, key, value)
5665 except EnvironmentError as e:
5666 raise XAttrMetadataError(e.errno, e.strerror)
5667
5668 except ImportError:
5669 if compat_os_name == 'nt':
5670 # Write xattrs to NTFS Alternate Data Streams:
5671 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5672 assert ':' not in key
5673 assert os.path.exists(path)
5674
5675 ads_fn = path + ':' + key
5676 try:
5677 with open(ads_fn, 'wb') as f:
5678 f.write(value)
5679 except EnvironmentError as e:
5680 raise XAttrMetadataError(e.errno, e.strerror)
5681 else:
5682 user_has_setfattr = check_executable('setfattr', ['--version'])
5683 user_has_xattr = check_executable('xattr', ['-h'])
5684
5685 if user_has_setfattr or user_has_xattr:
5686
5687 value = value.decode('utf-8')
5688 if user_has_setfattr:
5689 executable = 'setfattr'
5690 opts = ['-n', key, '-v', value]
5691 elif user_has_xattr:
5692 executable = 'xattr'
5693 opts = ['-w', key, value]
5694
5695 cmd = ([encodeFilename(executable, True)]
5696 + [encodeArgument(o) for o in opts]
5697 + [encodeFilename(path, True)])
5698
5699 try:
5700 p = subprocess.Popen(
5701 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5702 except EnvironmentError as e:
5703 raise XAttrMetadataError(e.errno, e.strerror)
5704 stdout, stderr = p.communicate()
5705 stderr = stderr.decode('utf-8', 'replace')
5706 if p.returncode != 0:
5707 raise XAttrMetadataError(p.returncode, stderr)
5708
5709 else:
5710 # On Unix, and can't find pyxattr, setfattr, or xattr.
5711 if sys.platform.startswith('linux'):
5712 raise XAttrUnavailableError(
5713 "Couldn't find a tool to set the xattrs. "
5714 "Install either the python 'pyxattr' or 'xattr' "
5715 "modules, or the GNU 'attr' package "
5716 "(which contains the 'setfattr' tool).")
5717 else:
5718 raise XAttrUnavailableError(
5719 "Couldn't find a tool to set the xattrs. "
5720 "Install either the python 'xattr' module, "
5721 "or the 'xattr' binary.")
5722
5723
5724 def random_birthday(year_field, month_field, day_field):
5725 start_date = datetime.date(1950, 1, 1)
5726 end_date = datetime.date(1995, 12, 31)
5727 offset = random.randint(0, (end_date - start_date).days)
5728 random_date = start_date + datetime.timedelta(offset)
5729 return {
5730 year_field: str(random_date.year),
5731 month_field: str(random_date.month),
5732 day_field: str(random_date.day),
5733 }
5734
5735 # Templates for internet shortcut files, which are plain text files.
5736 DOT_URL_LINK_TEMPLATE = '''
5737 [InternetShortcut]
5738 URL=%(url)s
5739 '''.lstrip()
5740
5741 DOT_WEBLOC_LINK_TEMPLATE = '''
5742 <?xml version="1.0" encoding="UTF-8"?>
5743 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5744 <plist version="1.0">
5745 <dict>
5746 \t<key>URL</key>
5747 \t<string>%(url)s</string>
5748 </dict>
5749 </plist>
5750 '''.lstrip()
5751
5752 DOT_DESKTOP_LINK_TEMPLATE = '''
5753 [Desktop Entry]
5754 Encoding=UTF-8
5755 Name=%(filename)s
5756 Type=Link
5757 URL=%(url)s
5758 Icon=text-html
5759 '''.lstrip()
5760
5761
5762 def iri_to_uri(iri):
5763 """
5764 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5765
5766 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5767 """
5768
5769 iri_parts = compat_urllib_parse_urlparse(iri)
5770
5771 if '[' in iri_parts.netloc:
5772 raise ValueError('IPv6 URIs are not, yet, supported.')
5773 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5774
5775 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5776
5777 net_location = ''
5778 if iri_parts.username:
5779 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
5780 if iri_parts.password is not None:
5781 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
5782 net_location += '@'
5783
5784 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
5785 # The 'idna' encoding produces ASCII text.
5786 if iri_parts.port is not None and iri_parts.port != 80:
5787 net_location += ':' + str(iri_parts.port)
5788
5789 return compat_urllib_parse_urlunparse(
5790 (iri_parts.scheme,
5791 net_location,
5792
5793 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5794
5795 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5796 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5797
5798 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5799 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5800
5801 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5802
5803 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5804
5805
5806 def to_high_limit_path(path):
5807 if sys.platform in ['win32', 'cygwin']:
5808 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5809 return r'\\?\ '.rstrip() + os.path.abspath(path)
5810
5811 return path
5812
5813 def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
5814 val = obj.get(field, default)
5815 if func and val not in ignore:
5816 val = func(val)
5817 return template % val if val not in ignore else default