]> jfr.im git - yt-dlp.git/blob - yt_dlp/utils.py
Split video by chapters (#158)
[yt-dlp.git] / yt_dlp / utils.py
1 #!/usr/bin/env python
2 # coding: utf-8
3
4 from __future__ import unicode_literals
5
6 import base64
7 import binascii
8 import calendar
9 import codecs
10 import collections
11 import contextlib
12 import ctypes
13 import datetime
14 import email.utils
15 import email.header
16 import errno
17 import functools
18 import gzip
19 import imp
20 import io
21 import itertools
22 import json
23 import locale
24 import math
25 import operator
26 import os
27 import platform
28 import random
29 import re
30 import socket
31 import ssl
32 import subprocess
33 import sys
34 import tempfile
35 import time
36 import traceback
37 import xml.etree.ElementTree
38 import zlib
39
40 from .compat import (
41 compat_HTMLParseError,
42 compat_HTMLParser,
43 compat_basestring,
44 compat_chr,
45 compat_cookiejar,
46 compat_ctypes_WINFUNCTYPE,
47 compat_etree_fromstring,
48 compat_expanduser,
49 compat_html_entities,
50 compat_html_entities_html5,
51 compat_http_client,
52 compat_integer_types,
53 compat_numeric_types,
54 compat_kwargs,
55 compat_os_name,
56 compat_parse_qs,
57 compat_shlex_quote,
58 compat_str,
59 compat_struct_pack,
60 compat_struct_unpack,
61 compat_urllib_error,
62 compat_urllib_parse,
63 compat_urllib_parse_urlencode,
64 compat_urllib_parse_urlparse,
65 compat_urllib_parse_urlunparse,
66 compat_urllib_parse_quote,
67 compat_urllib_parse_quote_plus,
68 compat_urllib_parse_unquote_plus,
69 compat_urllib_request,
70 compat_urlparse,
71 compat_xpath,
72 )
73
74 from .socks import (
75 ProxyType,
76 sockssocket,
77 )
78
79
80 def register_socks_protocols():
81 # "Register" SOCKS protocols
82 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
83 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
84 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
85 if scheme not in compat_urlparse.uses_netloc:
86 compat_urlparse.uses_netloc.append(scheme)
87
88
89 # This is not clearly defined otherwise
90 compiled_regex_type = type(re.compile(''))
91
92
93 def random_user_agent():
94 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
95 _CHROME_VERSIONS = (
96 '74.0.3729.129',
97 '76.0.3780.3',
98 '76.0.3780.2',
99 '74.0.3729.128',
100 '76.0.3780.1',
101 '76.0.3780.0',
102 '75.0.3770.15',
103 '74.0.3729.127',
104 '74.0.3729.126',
105 '76.0.3779.1',
106 '76.0.3779.0',
107 '75.0.3770.14',
108 '74.0.3729.125',
109 '76.0.3778.1',
110 '76.0.3778.0',
111 '75.0.3770.13',
112 '74.0.3729.124',
113 '74.0.3729.123',
114 '73.0.3683.121',
115 '76.0.3777.1',
116 '76.0.3777.0',
117 '75.0.3770.12',
118 '74.0.3729.122',
119 '76.0.3776.4',
120 '75.0.3770.11',
121 '74.0.3729.121',
122 '76.0.3776.3',
123 '76.0.3776.2',
124 '73.0.3683.120',
125 '74.0.3729.120',
126 '74.0.3729.119',
127 '74.0.3729.118',
128 '76.0.3776.1',
129 '76.0.3776.0',
130 '76.0.3775.5',
131 '75.0.3770.10',
132 '74.0.3729.117',
133 '76.0.3775.4',
134 '76.0.3775.3',
135 '74.0.3729.116',
136 '75.0.3770.9',
137 '76.0.3775.2',
138 '76.0.3775.1',
139 '76.0.3775.0',
140 '75.0.3770.8',
141 '74.0.3729.115',
142 '74.0.3729.114',
143 '76.0.3774.1',
144 '76.0.3774.0',
145 '75.0.3770.7',
146 '74.0.3729.113',
147 '74.0.3729.112',
148 '74.0.3729.111',
149 '76.0.3773.1',
150 '76.0.3773.0',
151 '75.0.3770.6',
152 '74.0.3729.110',
153 '74.0.3729.109',
154 '76.0.3772.1',
155 '76.0.3772.0',
156 '75.0.3770.5',
157 '74.0.3729.108',
158 '74.0.3729.107',
159 '76.0.3771.1',
160 '76.0.3771.0',
161 '75.0.3770.4',
162 '74.0.3729.106',
163 '74.0.3729.105',
164 '75.0.3770.3',
165 '74.0.3729.104',
166 '74.0.3729.103',
167 '74.0.3729.102',
168 '75.0.3770.2',
169 '74.0.3729.101',
170 '75.0.3770.1',
171 '75.0.3770.0',
172 '74.0.3729.100',
173 '75.0.3769.5',
174 '75.0.3769.4',
175 '74.0.3729.99',
176 '75.0.3769.3',
177 '75.0.3769.2',
178 '75.0.3768.6',
179 '74.0.3729.98',
180 '75.0.3769.1',
181 '75.0.3769.0',
182 '74.0.3729.97',
183 '73.0.3683.119',
184 '73.0.3683.118',
185 '74.0.3729.96',
186 '75.0.3768.5',
187 '75.0.3768.4',
188 '75.0.3768.3',
189 '75.0.3768.2',
190 '74.0.3729.95',
191 '74.0.3729.94',
192 '75.0.3768.1',
193 '75.0.3768.0',
194 '74.0.3729.93',
195 '74.0.3729.92',
196 '73.0.3683.117',
197 '74.0.3729.91',
198 '75.0.3766.3',
199 '74.0.3729.90',
200 '75.0.3767.2',
201 '75.0.3767.1',
202 '75.0.3767.0',
203 '74.0.3729.89',
204 '73.0.3683.116',
205 '75.0.3766.2',
206 '74.0.3729.88',
207 '75.0.3766.1',
208 '75.0.3766.0',
209 '74.0.3729.87',
210 '73.0.3683.115',
211 '74.0.3729.86',
212 '75.0.3765.1',
213 '75.0.3765.0',
214 '74.0.3729.85',
215 '73.0.3683.114',
216 '74.0.3729.84',
217 '75.0.3764.1',
218 '75.0.3764.0',
219 '74.0.3729.83',
220 '73.0.3683.113',
221 '75.0.3763.2',
222 '75.0.3761.4',
223 '74.0.3729.82',
224 '75.0.3763.1',
225 '75.0.3763.0',
226 '74.0.3729.81',
227 '73.0.3683.112',
228 '75.0.3762.1',
229 '75.0.3762.0',
230 '74.0.3729.80',
231 '75.0.3761.3',
232 '74.0.3729.79',
233 '73.0.3683.111',
234 '75.0.3761.2',
235 '74.0.3729.78',
236 '74.0.3729.77',
237 '75.0.3761.1',
238 '75.0.3761.0',
239 '73.0.3683.110',
240 '74.0.3729.76',
241 '74.0.3729.75',
242 '75.0.3760.0',
243 '74.0.3729.74',
244 '75.0.3759.8',
245 '75.0.3759.7',
246 '75.0.3759.6',
247 '74.0.3729.73',
248 '75.0.3759.5',
249 '74.0.3729.72',
250 '73.0.3683.109',
251 '75.0.3759.4',
252 '75.0.3759.3',
253 '74.0.3729.71',
254 '75.0.3759.2',
255 '74.0.3729.70',
256 '73.0.3683.108',
257 '74.0.3729.69',
258 '75.0.3759.1',
259 '75.0.3759.0',
260 '74.0.3729.68',
261 '73.0.3683.107',
262 '74.0.3729.67',
263 '75.0.3758.1',
264 '75.0.3758.0',
265 '74.0.3729.66',
266 '73.0.3683.106',
267 '74.0.3729.65',
268 '75.0.3757.1',
269 '75.0.3757.0',
270 '74.0.3729.64',
271 '73.0.3683.105',
272 '74.0.3729.63',
273 '75.0.3756.1',
274 '75.0.3756.0',
275 '74.0.3729.62',
276 '73.0.3683.104',
277 '75.0.3755.3',
278 '75.0.3755.2',
279 '73.0.3683.103',
280 '75.0.3755.1',
281 '75.0.3755.0',
282 '74.0.3729.61',
283 '73.0.3683.102',
284 '74.0.3729.60',
285 '75.0.3754.2',
286 '74.0.3729.59',
287 '75.0.3753.4',
288 '74.0.3729.58',
289 '75.0.3754.1',
290 '75.0.3754.0',
291 '74.0.3729.57',
292 '73.0.3683.101',
293 '75.0.3753.3',
294 '75.0.3752.2',
295 '75.0.3753.2',
296 '74.0.3729.56',
297 '75.0.3753.1',
298 '75.0.3753.0',
299 '74.0.3729.55',
300 '73.0.3683.100',
301 '74.0.3729.54',
302 '75.0.3752.1',
303 '75.0.3752.0',
304 '74.0.3729.53',
305 '73.0.3683.99',
306 '74.0.3729.52',
307 '75.0.3751.1',
308 '75.0.3751.0',
309 '74.0.3729.51',
310 '73.0.3683.98',
311 '74.0.3729.50',
312 '75.0.3750.0',
313 '74.0.3729.49',
314 '74.0.3729.48',
315 '74.0.3729.47',
316 '75.0.3749.3',
317 '74.0.3729.46',
318 '73.0.3683.97',
319 '75.0.3749.2',
320 '74.0.3729.45',
321 '75.0.3749.1',
322 '75.0.3749.0',
323 '74.0.3729.44',
324 '73.0.3683.96',
325 '74.0.3729.43',
326 '74.0.3729.42',
327 '75.0.3748.1',
328 '75.0.3748.0',
329 '74.0.3729.41',
330 '75.0.3747.1',
331 '73.0.3683.95',
332 '75.0.3746.4',
333 '74.0.3729.40',
334 '74.0.3729.39',
335 '75.0.3747.0',
336 '75.0.3746.3',
337 '75.0.3746.2',
338 '74.0.3729.38',
339 '75.0.3746.1',
340 '75.0.3746.0',
341 '74.0.3729.37',
342 '73.0.3683.94',
343 '75.0.3745.5',
344 '75.0.3745.4',
345 '75.0.3745.3',
346 '75.0.3745.2',
347 '74.0.3729.36',
348 '75.0.3745.1',
349 '75.0.3745.0',
350 '75.0.3744.2',
351 '74.0.3729.35',
352 '73.0.3683.93',
353 '74.0.3729.34',
354 '75.0.3744.1',
355 '75.0.3744.0',
356 '74.0.3729.33',
357 '73.0.3683.92',
358 '74.0.3729.32',
359 '74.0.3729.31',
360 '73.0.3683.91',
361 '75.0.3741.2',
362 '75.0.3740.5',
363 '74.0.3729.30',
364 '75.0.3741.1',
365 '75.0.3741.0',
366 '74.0.3729.29',
367 '75.0.3740.4',
368 '73.0.3683.90',
369 '74.0.3729.28',
370 '75.0.3740.3',
371 '73.0.3683.89',
372 '75.0.3740.2',
373 '74.0.3729.27',
374 '75.0.3740.1',
375 '75.0.3740.0',
376 '74.0.3729.26',
377 '73.0.3683.88',
378 '73.0.3683.87',
379 '74.0.3729.25',
380 '75.0.3739.1',
381 '75.0.3739.0',
382 '73.0.3683.86',
383 '74.0.3729.24',
384 '73.0.3683.85',
385 '75.0.3738.4',
386 '75.0.3738.3',
387 '75.0.3738.2',
388 '75.0.3738.1',
389 '75.0.3738.0',
390 '74.0.3729.23',
391 '73.0.3683.84',
392 '74.0.3729.22',
393 '74.0.3729.21',
394 '75.0.3737.1',
395 '75.0.3737.0',
396 '74.0.3729.20',
397 '73.0.3683.83',
398 '74.0.3729.19',
399 '75.0.3736.1',
400 '75.0.3736.0',
401 '74.0.3729.18',
402 '73.0.3683.82',
403 '74.0.3729.17',
404 '75.0.3735.1',
405 '75.0.3735.0',
406 '74.0.3729.16',
407 '73.0.3683.81',
408 '75.0.3734.1',
409 '75.0.3734.0',
410 '74.0.3729.15',
411 '73.0.3683.80',
412 '74.0.3729.14',
413 '75.0.3733.1',
414 '75.0.3733.0',
415 '75.0.3732.1',
416 '74.0.3729.13',
417 '74.0.3729.12',
418 '73.0.3683.79',
419 '74.0.3729.11',
420 '75.0.3732.0',
421 '74.0.3729.10',
422 '73.0.3683.78',
423 '74.0.3729.9',
424 '74.0.3729.8',
425 '74.0.3729.7',
426 '75.0.3731.3',
427 '75.0.3731.2',
428 '75.0.3731.0',
429 '74.0.3729.6',
430 '73.0.3683.77',
431 '73.0.3683.76',
432 '75.0.3730.5',
433 '75.0.3730.4',
434 '73.0.3683.75',
435 '74.0.3729.5',
436 '73.0.3683.74',
437 '75.0.3730.3',
438 '75.0.3730.2',
439 '74.0.3729.4',
440 '73.0.3683.73',
441 '73.0.3683.72',
442 '75.0.3730.1',
443 '75.0.3730.0',
444 '74.0.3729.3',
445 '73.0.3683.71',
446 '74.0.3729.2',
447 '73.0.3683.70',
448 '74.0.3729.1',
449 '74.0.3729.0',
450 '74.0.3726.4',
451 '73.0.3683.69',
452 '74.0.3726.3',
453 '74.0.3728.0',
454 '74.0.3726.2',
455 '73.0.3683.68',
456 '74.0.3726.1',
457 '74.0.3726.0',
458 '74.0.3725.4',
459 '73.0.3683.67',
460 '73.0.3683.66',
461 '74.0.3725.3',
462 '74.0.3725.2',
463 '74.0.3725.1',
464 '74.0.3724.8',
465 '74.0.3725.0',
466 '73.0.3683.65',
467 '74.0.3724.7',
468 '74.0.3724.6',
469 '74.0.3724.5',
470 '74.0.3724.4',
471 '74.0.3724.3',
472 '74.0.3724.2',
473 '74.0.3724.1',
474 '74.0.3724.0',
475 '73.0.3683.64',
476 '74.0.3723.1',
477 '74.0.3723.0',
478 '73.0.3683.63',
479 '74.0.3722.1',
480 '74.0.3722.0',
481 '73.0.3683.62',
482 '74.0.3718.9',
483 '74.0.3702.3',
484 '74.0.3721.3',
485 '74.0.3721.2',
486 '74.0.3721.1',
487 '74.0.3721.0',
488 '74.0.3720.6',
489 '73.0.3683.61',
490 '72.0.3626.122',
491 '73.0.3683.60',
492 '74.0.3720.5',
493 '72.0.3626.121',
494 '74.0.3718.8',
495 '74.0.3720.4',
496 '74.0.3720.3',
497 '74.0.3718.7',
498 '74.0.3720.2',
499 '74.0.3720.1',
500 '74.0.3720.0',
501 '74.0.3718.6',
502 '74.0.3719.5',
503 '73.0.3683.59',
504 '74.0.3718.5',
505 '74.0.3718.4',
506 '74.0.3719.4',
507 '74.0.3719.3',
508 '74.0.3719.2',
509 '74.0.3719.1',
510 '73.0.3683.58',
511 '74.0.3719.0',
512 '73.0.3683.57',
513 '73.0.3683.56',
514 '74.0.3718.3',
515 '73.0.3683.55',
516 '74.0.3718.2',
517 '74.0.3718.1',
518 '74.0.3718.0',
519 '73.0.3683.54',
520 '74.0.3717.2',
521 '73.0.3683.53',
522 '74.0.3717.1',
523 '74.0.3717.0',
524 '73.0.3683.52',
525 '74.0.3716.1',
526 '74.0.3716.0',
527 '73.0.3683.51',
528 '74.0.3715.1',
529 '74.0.3715.0',
530 '73.0.3683.50',
531 '74.0.3711.2',
532 '74.0.3714.2',
533 '74.0.3713.3',
534 '74.0.3714.1',
535 '74.0.3714.0',
536 '73.0.3683.49',
537 '74.0.3713.1',
538 '74.0.3713.0',
539 '72.0.3626.120',
540 '73.0.3683.48',
541 '74.0.3712.2',
542 '74.0.3712.1',
543 '74.0.3712.0',
544 '73.0.3683.47',
545 '72.0.3626.119',
546 '73.0.3683.46',
547 '74.0.3710.2',
548 '72.0.3626.118',
549 '74.0.3711.1',
550 '74.0.3711.0',
551 '73.0.3683.45',
552 '72.0.3626.117',
553 '74.0.3710.1',
554 '74.0.3710.0',
555 '73.0.3683.44',
556 '72.0.3626.116',
557 '74.0.3709.1',
558 '74.0.3709.0',
559 '74.0.3704.9',
560 '73.0.3683.43',
561 '72.0.3626.115',
562 '74.0.3704.8',
563 '74.0.3704.7',
564 '74.0.3708.0',
565 '74.0.3706.7',
566 '74.0.3704.6',
567 '73.0.3683.42',
568 '72.0.3626.114',
569 '74.0.3706.6',
570 '72.0.3626.113',
571 '74.0.3704.5',
572 '74.0.3706.5',
573 '74.0.3706.4',
574 '74.0.3706.3',
575 '74.0.3706.2',
576 '74.0.3706.1',
577 '74.0.3706.0',
578 '73.0.3683.41',
579 '72.0.3626.112',
580 '74.0.3705.1',
581 '74.0.3705.0',
582 '73.0.3683.40',
583 '72.0.3626.111',
584 '73.0.3683.39',
585 '74.0.3704.4',
586 '73.0.3683.38',
587 '74.0.3704.3',
588 '74.0.3704.2',
589 '74.0.3704.1',
590 '74.0.3704.0',
591 '73.0.3683.37',
592 '72.0.3626.110',
593 '72.0.3626.109',
594 '74.0.3703.3',
595 '74.0.3703.2',
596 '73.0.3683.36',
597 '74.0.3703.1',
598 '74.0.3703.0',
599 '73.0.3683.35',
600 '72.0.3626.108',
601 '74.0.3702.2',
602 '74.0.3699.3',
603 '74.0.3702.1',
604 '74.0.3702.0',
605 '73.0.3683.34',
606 '72.0.3626.107',
607 '73.0.3683.33',
608 '74.0.3701.1',
609 '74.0.3701.0',
610 '73.0.3683.32',
611 '73.0.3683.31',
612 '72.0.3626.105',
613 '74.0.3700.1',
614 '74.0.3700.0',
615 '73.0.3683.29',
616 '72.0.3626.103',
617 '74.0.3699.2',
618 '74.0.3699.1',
619 '74.0.3699.0',
620 '73.0.3683.28',
621 '72.0.3626.102',
622 '73.0.3683.27',
623 '73.0.3683.26',
624 '74.0.3698.0',
625 '74.0.3696.2',
626 '72.0.3626.101',
627 '73.0.3683.25',
628 '74.0.3696.1',
629 '74.0.3696.0',
630 '74.0.3694.8',
631 '72.0.3626.100',
632 '74.0.3694.7',
633 '74.0.3694.6',
634 '74.0.3694.5',
635 '74.0.3694.4',
636 '72.0.3626.99',
637 '72.0.3626.98',
638 '74.0.3694.3',
639 '73.0.3683.24',
640 '72.0.3626.97',
641 '72.0.3626.96',
642 '72.0.3626.95',
643 '73.0.3683.23',
644 '72.0.3626.94',
645 '73.0.3683.22',
646 '73.0.3683.21',
647 '72.0.3626.93',
648 '74.0.3694.2',
649 '72.0.3626.92',
650 '74.0.3694.1',
651 '74.0.3694.0',
652 '74.0.3693.6',
653 '73.0.3683.20',
654 '72.0.3626.91',
655 '74.0.3693.5',
656 '74.0.3693.4',
657 '74.0.3693.3',
658 '74.0.3693.2',
659 '73.0.3683.19',
660 '74.0.3693.1',
661 '74.0.3693.0',
662 '73.0.3683.18',
663 '72.0.3626.90',
664 '74.0.3692.1',
665 '74.0.3692.0',
666 '73.0.3683.17',
667 '72.0.3626.89',
668 '74.0.3687.3',
669 '74.0.3691.1',
670 '74.0.3691.0',
671 '73.0.3683.16',
672 '72.0.3626.88',
673 '72.0.3626.87',
674 '73.0.3683.15',
675 '74.0.3690.1',
676 '74.0.3690.0',
677 '73.0.3683.14',
678 '72.0.3626.86',
679 '73.0.3683.13',
680 '73.0.3683.12',
681 '74.0.3689.1',
682 '74.0.3689.0',
683 '73.0.3683.11',
684 '72.0.3626.85',
685 '73.0.3683.10',
686 '72.0.3626.84',
687 '73.0.3683.9',
688 '74.0.3688.1',
689 '74.0.3688.0',
690 '73.0.3683.8',
691 '72.0.3626.83',
692 '74.0.3687.2',
693 '74.0.3687.1',
694 '74.0.3687.0',
695 '73.0.3683.7',
696 '72.0.3626.82',
697 '74.0.3686.4',
698 '72.0.3626.81',
699 '74.0.3686.3',
700 '74.0.3686.2',
701 '74.0.3686.1',
702 '74.0.3686.0',
703 '73.0.3683.6',
704 '72.0.3626.80',
705 '74.0.3685.1',
706 '74.0.3685.0',
707 '73.0.3683.5',
708 '72.0.3626.79',
709 '74.0.3684.1',
710 '74.0.3684.0',
711 '73.0.3683.4',
712 '72.0.3626.78',
713 '72.0.3626.77',
714 '73.0.3683.3',
715 '73.0.3683.2',
716 '72.0.3626.76',
717 '73.0.3683.1',
718 '73.0.3683.0',
719 '72.0.3626.75',
720 '71.0.3578.141',
721 '73.0.3682.1',
722 '73.0.3682.0',
723 '72.0.3626.74',
724 '71.0.3578.140',
725 '73.0.3681.4',
726 '73.0.3681.3',
727 '73.0.3681.2',
728 '73.0.3681.1',
729 '73.0.3681.0',
730 '72.0.3626.73',
731 '71.0.3578.139',
732 '72.0.3626.72',
733 '72.0.3626.71',
734 '73.0.3680.1',
735 '73.0.3680.0',
736 '72.0.3626.70',
737 '71.0.3578.138',
738 '73.0.3678.2',
739 '73.0.3679.1',
740 '73.0.3679.0',
741 '72.0.3626.69',
742 '71.0.3578.137',
743 '73.0.3678.1',
744 '73.0.3678.0',
745 '71.0.3578.136',
746 '73.0.3677.1',
747 '73.0.3677.0',
748 '72.0.3626.68',
749 '72.0.3626.67',
750 '71.0.3578.135',
751 '73.0.3676.1',
752 '73.0.3676.0',
753 '73.0.3674.2',
754 '72.0.3626.66',
755 '71.0.3578.134',
756 '73.0.3674.1',
757 '73.0.3674.0',
758 '72.0.3626.65',
759 '71.0.3578.133',
760 '73.0.3673.2',
761 '73.0.3673.1',
762 '73.0.3673.0',
763 '72.0.3626.64',
764 '71.0.3578.132',
765 '72.0.3626.63',
766 '72.0.3626.62',
767 '72.0.3626.61',
768 '72.0.3626.60',
769 '73.0.3672.1',
770 '73.0.3672.0',
771 '72.0.3626.59',
772 '71.0.3578.131',
773 '73.0.3671.3',
774 '73.0.3671.2',
775 '73.0.3671.1',
776 '73.0.3671.0',
777 '72.0.3626.58',
778 '71.0.3578.130',
779 '73.0.3670.1',
780 '73.0.3670.0',
781 '72.0.3626.57',
782 '71.0.3578.129',
783 '73.0.3669.1',
784 '73.0.3669.0',
785 '72.0.3626.56',
786 '71.0.3578.128',
787 '73.0.3668.2',
788 '73.0.3668.1',
789 '73.0.3668.0',
790 '72.0.3626.55',
791 '71.0.3578.127',
792 '73.0.3667.2',
793 '73.0.3667.1',
794 '73.0.3667.0',
795 '72.0.3626.54',
796 '71.0.3578.126',
797 '73.0.3666.1',
798 '73.0.3666.0',
799 '72.0.3626.53',
800 '71.0.3578.125',
801 '73.0.3665.4',
802 '73.0.3665.3',
803 '72.0.3626.52',
804 '73.0.3665.2',
805 '73.0.3664.4',
806 '73.0.3665.1',
807 '73.0.3665.0',
808 '72.0.3626.51',
809 '71.0.3578.124',
810 '72.0.3626.50',
811 '73.0.3664.3',
812 '73.0.3664.2',
813 '73.0.3664.1',
814 '73.0.3664.0',
815 '73.0.3663.2',
816 '72.0.3626.49',
817 '71.0.3578.123',
818 '73.0.3663.1',
819 '73.0.3663.0',
820 '72.0.3626.48',
821 '71.0.3578.122',
822 '73.0.3662.1',
823 '73.0.3662.0',
824 '72.0.3626.47',
825 '71.0.3578.121',
826 '73.0.3661.1',
827 '72.0.3626.46',
828 '73.0.3661.0',
829 '72.0.3626.45',
830 '71.0.3578.120',
831 '73.0.3660.2',
832 '73.0.3660.1',
833 '73.0.3660.0',
834 '72.0.3626.44',
835 '71.0.3578.119',
836 '73.0.3659.1',
837 '73.0.3659.0',
838 '72.0.3626.43',
839 '71.0.3578.118',
840 '73.0.3658.1',
841 '73.0.3658.0',
842 '72.0.3626.42',
843 '71.0.3578.117',
844 '73.0.3657.1',
845 '73.0.3657.0',
846 '72.0.3626.41',
847 '71.0.3578.116',
848 '73.0.3656.1',
849 '73.0.3656.0',
850 '72.0.3626.40',
851 '71.0.3578.115',
852 '73.0.3655.1',
853 '73.0.3655.0',
854 '72.0.3626.39',
855 '71.0.3578.114',
856 '73.0.3654.1',
857 '73.0.3654.0',
858 '72.0.3626.38',
859 '71.0.3578.113',
860 '73.0.3653.1',
861 '73.0.3653.0',
862 '72.0.3626.37',
863 '71.0.3578.112',
864 '73.0.3652.1',
865 '73.0.3652.0',
866 '72.0.3626.36',
867 '71.0.3578.111',
868 '73.0.3651.1',
869 '73.0.3651.0',
870 '72.0.3626.35',
871 '71.0.3578.110',
872 '73.0.3650.1',
873 '73.0.3650.0',
874 '72.0.3626.34',
875 '71.0.3578.109',
876 '73.0.3649.1',
877 '73.0.3649.0',
878 '72.0.3626.33',
879 '71.0.3578.108',
880 '73.0.3648.2',
881 '73.0.3648.1',
882 '73.0.3648.0',
883 '72.0.3626.32',
884 '71.0.3578.107',
885 '73.0.3647.2',
886 '73.0.3647.1',
887 '73.0.3647.0',
888 '72.0.3626.31',
889 '71.0.3578.106',
890 '73.0.3635.3',
891 '73.0.3646.2',
892 '73.0.3646.1',
893 '73.0.3646.0',
894 '72.0.3626.30',
895 '71.0.3578.105',
896 '72.0.3626.29',
897 '73.0.3645.2',
898 '73.0.3645.1',
899 '73.0.3645.0',
900 '72.0.3626.28',
901 '71.0.3578.104',
902 '72.0.3626.27',
903 '72.0.3626.26',
904 '72.0.3626.25',
905 '72.0.3626.24',
906 '73.0.3644.0',
907 '73.0.3643.2',
908 '72.0.3626.23',
909 '71.0.3578.103',
910 '73.0.3643.1',
911 '73.0.3643.0',
912 '72.0.3626.22',
913 '71.0.3578.102',
914 '73.0.3642.1',
915 '73.0.3642.0',
916 '72.0.3626.21',
917 '71.0.3578.101',
918 '73.0.3641.1',
919 '73.0.3641.0',
920 '72.0.3626.20',
921 '71.0.3578.100',
922 '72.0.3626.19',
923 '73.0.3640.1',
924 '73.0.3640.0',
925 '72.0.3626.18',
926 '73.0.3639.1',
927 '71.0.3578.99',
928 '73.0.3639.0',
929 '72.0.3626.17',
930 '73.0.3638.2',
931 '72.0.3626.16',
932 '73.0.3638.1',
933 '73.0.3638.0',
934 '72.0.3626.15',
935 '71.0.3578.98',
936 '73.0.3635.2',
937 '71.0.3578.97',
938 '73.0.3637.1',
939 '73.0.3637.0',
940 '72.0.3626.14',
941 '71.0.3578.96',
942 '71.0.3578.95',
943 '72.0.3626.13',
944 '71.0.3578.94',
945 '73.0.3636.2',
946 '71.0.3578.93',
947 '73.0.3636.1',
948 '73.0.3636.0',
949 '72.0.3626.12',
950 '71.0.3578.92',
951 '73.0.3635.1',
952 '73.0.3635.0',
953 '72.0.3626.11',
954 '71.0.3578.91',
955 '73.0.3634.2',
956 '73.0.3634.1',
957 '73.0.3634.0',
958 '72.0.3626.10',
959 '71.0.3578.90',
960 '71.0.3578.89',
961 '73.0.3633.2',
962 '73.0.3633.1',
963 '73.0.3633.0',
964 '72.0.3610.4',
965 '72.0.3626.9',
966 '71.0.3578.88',
967 '73.0.3632.5',
968 '73.0.3632.4',
969 '73.0.3632.3',
970 '73.0.3632.2',
971 '73.0.3632.1',
972 '73.0.3632.0',
973 '72.0.3626.8',
974 '71.0.3578.87',
975 '73.0.3631.2',
976 '73.0.3631.1',
977 '73.0.3631.0',
978 '72.0.3626.7',
979 '71.0.3578.86',
980 '72.0.3626.6',
981 '73.0.3630.1',
982 '73.0.3630.0',
983 '72.0.3626.5',
984 '71.0.3578.85',
985 '72.0.3626.4',
986 '73.0.3628.3',
987 '73.0.3628.2',
988 '73.0.3629.1',
989 '73.0.3629.0',
990 '72.0.3626.3',
991 '71.0.3578.84',
992 '73.0.3628.1',
993 '73.0.3628.0',
994 '71.0.3578.83',
995 '73.0.3627.1',
996 '73.0.3627.0',
997 '72.0.3626.2',
998 '71.0.3578.82',
999 '71.0.3578.81',
1000 '71.0.3578.80',
1001 '72.0.3626.1',
1002 '72.0.3626.0',
1003 '71.0.3578.79',
1004 '70.0.3538.124',
1005 '71.0.3578.78',
1006 '72.0.3623.4',
1007 '72.0.3625.2',
1008 '72.0.3625.1',
1009 '72.0.3625.0',
1010 '71.0.3578.77',
1011 '70.0.3538.123',
1012 '72.0.3624.4',
1013 '72.0.3624.3',
1014 '72.0.3624.2',
1015 '71.0.3578.76',
1016 '72.0.3624.1',
1017 '72.0.3624.0',
1018 '72.0.3623.3',
1019 '71.0.3578.75',
1020 '70.0.3538.122',
1021 '71.0.3578.74',
1022 '72.0.3623.2',
1023 '72.0.3610.3',
1024 '72.0.3623.1',
1025 '72.0.3623.0',
1026 '72.0.3622.3',
1027 '72.0.3622.2',
1028 '71.0.3578.73',
1029 '70.0.3538.121',
1030 '72.0.3622.1',
1031 '72.0.3622.0',
1032 '71.0.3578.72',
1033 '70.0.3538.120',
1034 '72.0.3621.1',
1035 '72.0.3621.0',
1036 '71.0.3578.71',
1037 '70.0.3538.119',
1038 '72.0.3620.1',
1039 '72.0.3620.0',
1040 '71.0.3578.70',
1041 '70.0.3538.118',
1042 '71.0.3578.69',
1043 '72.0.3619.1',
1044 '72.0.3619.0',
1045 '71.0.3578.68',
1046 '70.0.3538.117',
1047 '71.0.3578.67',
1048 '72.0.3618.1',
1049 '72.0.3618.0',
1050 '71.0.3578.66',
1051 '70.0.3538.116',
1052 '72.0.3617.1',
1053 '72.0.3617.0',
1054 '71.0.3578.65',
1055 '70.0.3538.115',
1056 '72.0.3602.3',
1057 '71.0.3578.64',
1058 '72.0.3616.1',
1059 '72.0.3616.0',
1060 '71.0.3578.63',
1061 '70.0.3538.114',
1062 '71.0.3578.62',
1063 '72.0.3615.1',
1064 '72.0.3615.0',
1065 '71.0.3578.61',
1066 '70.0.3538.113',
1067 '72.0.3614.1',
1068 '72.0.3614.0',
1069 '71.0.3578.60',
1070 '70.0.3538.112',
1071 '72.0.3613.1',
1072 '72.0.3613.0',
1073 '71.0.3578.59',
1074 '70.0.3538.111',
1075 '72.0.3612.2',
1076 '72.0.3612.1',
1077 '72.0.3612.0',
1078 '70.0.3538.110',
1079 '71.0.3578.58',
1080 '70.0.3538.109',
1081 '72.0.3611.2',
1082 '72.0.3611.1',
1083 '72.0.3611.0',
1084 '71.0.3578.57',
1085 '70.0.3538.108',
1086 '72.0.3610.2',
1087 '71.0.3578.56',
1088 '71.0.3578.55',
1089 '72.0.3610.1',
1090 '72.0.3610.0',
1091 '71.0.3578.54',
1092 '70.0.3538.107',
1093 '71.0.3578.53',
1094 '72.0.3609.3',
1095 '71.0.3578.52',
1096 '72.0.3609.2',
1097 '71.0.3578.51',
1098 '72.0.3608.5',
1099 '72.0.3609.1',
1100 '72.0.3609.0',
1101 '71.0.3578.50',
1102 '70.0.3538.106',
1103 '72.0.3608.4',
1104 '72.0.3608.3',
1105 '72.0.3608.2',
1106 '71.0.3578.49',
1107 '72.0.3608.1',
1108 '72.0.3608.0',
1109 '70.0.3538.105',
1110 '71.0.3578.48',
1111 '72.0.3607.1',
1112 '72.0.3607.0',
1113 '71.0.3578.47',
1114 '70.0.3538.104',
1115 '72.0.3606.2',
1116 '72.0.3606.1',
1117 '72.0.3606.0',
1118 '71.0.3578.46',
1119 '70.0.3538.103',
1120 '70.0.3538.102',
1121 '72.0.3605.3',
1122 '72.0.3605.2',
1123 '72.0.3605.1',
1124 '72.0.3605.0',
1125 '71.0.3578.45',
1126 '70.0.3538.101',
1127 '71.0.3578.44',
1128 '71.0.3578.43',
1129 '70.0.3538.100',
1130 '70.0.3538.99',
1131 '71.0.3578.42',
1132 '72.0.3604.1',
1133 '72.0.3604.0',
1134 '71.0.3578.41',
1135 '70.0.3538.98',
1136 '71.0.3578.40',
1137 '72.0.3603.2',
1138 '72.0.3603.1',
1139 '72.0.3603.0',
1140 '71.0.3578.39',
1141 '70.0.3538.97',
1142 '72.0.3602.2',
1143 '71.0.3578.38',
1144 '71.0.3578.37',
1145 '72.0.3602.1',
1146 '72.0.3602.0',
1147 '71.0.3578.36',
1148 '70.0.3538.96',
1149 '72.0.3601.1',
1150 '72.0.3601.0',
1151 '71.0.3578.35',
1152 '70.0.3538.95',
1153 '72.0.3600.1',
1154 '72.0.3600.0',
1155 '71.0.3578.34',
1156 '70.0.3538.94',
1157 '72.0.3599.3',
1158 '72.0.3599.2',
1159 '72.0.3599.1',
1160 '72.0.3599.0',
1161 '71.0.3578.33',
1162 '70.0.3538.93',
1163 '72.0.3598.1',
1164 '72.0.3598.0',
1165 '71.0.3578.32',
1166 '70.0.3538.87',
1167 '72.0.3597.1',
1168 '72.0.3597.0',
1169 '72.0.3596.2',
1170 '71.0.3578.31',
1171 '70.0.3538.86',
1172 '71.0.3578.30',
1173 '71.0.3578.29',
1174 '72.0.3596.1',
1175 '72.0.3596.0',
1176 '71.0.3578.28',
1177 '70.0.3538.85',
1178 '72.0.3595.2',
1179 '72.0.3591.3',
1180 '72.0.3595.1',
1181 '72.0.3595.0',
1182 '71.0.3578.27',
1183 '70.0.3538.84',
1184 '72.0.3594.1',
1185 '72.0.3594.0',
1186 '71.0.3578.26',
1187 '70.0.3538.83',
1188 '72.0.3593.2',
1189 '72.0.3593.1',
1190 '72.0.3593.0',
1191 '71.0.3578.25',
1192 '70.0.3538.82',
1193 '72.0.3589.3',
1194 '72.0.3592.2',
1195 '72.0.3592.1',
1196 '72.0.3592.0',
1197 '71.0.3578.24',
1198 '72.0.3589.2',
1199 '70.0.3538.81',
1200 '70.0.3538.80',
1201 '72.0.3591.2',
1202 '72.0.3591.1',
1203 '72.0.3591.0',
1204 '71.0.3578.23',
1205 '70.0.3538.79',
1206 '71.0.3578.22',
1207 '72.0.3590.1',
1208 '72.0.3590.0',
1209 '71.0.3578.21',
1210 '70.0.3538.78',
1211 '70.0.3538.77',
1212 '72.0.3589.1',
1213 '72.0.3589.0',
1214 '71.0.3578.20',
1215 '70.0.3538.76',
1216 '71.0.3578.19',
1217 '70.0.3538.75',
1218 '72.0.3588.1',
1219 '72.0.3588.0',
1220 '71.0.3578.18',
1221 '70.0.3538.74',
1222 '72.0.3586.2',
1223 '72.0.3587.0',
1224 '71.0.3578.17',
1225 '70.0.3538.73',
1226 '72.0.3586.1',
1227 '72.0.3586.0',
1228 '71.0.3578.16',
1229 '70.0.3538.72',
1230 '72.0.3585.1',
1231 '72.0.3585.0',
1232 '71.0.3578.15',
1233 '70.0.3538.71',
1234 '71.0.3578.14',
1235 '72.0.3584.1',
1236 '72.0.3584.0',
1237 '71.0.3578.13',
1238 '70.0.3538.70',
1239 '72.0.3583.2',
1240 '71.0.3578.12',
1241 '72.0.3583.1',
1242 '72.0.3583.0',
1243 '71.0.3578.11',
1244 '70.0.3538.69',
1245 '71.0.3578.10',
1246 '72.0.3582.0',
1247 '72.0.3581.4',
1248 '71.0.3578.9',
1249 '70.0.3538.67',
1250 '72.0.3581.3',
1251 '72.0.3581.2',
1252 '72.0.3581.1',
1253 '72.0.3581.0',
1254 '71.0.3578.8',
1255 '70.0.3538.66',
1256 '72.0.3580.1',
1257 '72.0.3580.0',
1258 '71.0.3578.7',
1259 '70.0.3538.65',
1260 '71.0.3578.6',
1261 '72.0.3579.1',
1262 '72.0.3579.0',
1263 '71.0.3578.5',
1264 '70.0.3538.64',
1265 '71.0.3578.4',
1266 '71.0.3578.3',
1267 '71.0.3578.2',
1268 '71.0.3578.1',
1269 '71.0.3578.0',
1270 '70.0.3538.63',
1271 '69.0.3497.128',
1272 '70.0.3538.62',
1273 '70.0.3538.61',
1274 '70.0.3538.60',
1275 '70.0.3538.59',
1276 '71.0.3577.1',
1277 '71.0.3577.0',
1278 '70.0.3538.58',
1279 '69.0.3497.127',
1280 '71.0.3576.2',
1281 '71.0.3576.1',
1282 '71.0.3576.0',
1283 '70.0.3538.57',
1284 '70.0.3538.56',
1285 '71.0.3575.2',
1286 '70.0.3538.55',
1287 '69.0.3497.126',
1288 '70.0.3538.54',
1289 '71.0.3575.1',
1290 '71.0.3575.0',
1291 '71.0.3574.1',
1292 '71.0.3574.0',
1293 '70.0.3538.53',
1294 '69.0.3497.125',
1295 '70.0.3538.52',
1296 '71.0.3573.1',
1297 '71.0.3573.0',
1298 '70.0.3538.51',
1299 '69.0.3497.124',
1300 '71.0.3572.1',
1301 '71.0.3572.0',
1302 '70.0.3538.50',
1303 '69.0.3497.123',
1304 '71.0.3571.2',
1305 '70.0.3538.49',
1306 '69.0.3497.122',
1307 '71.0.3571.1',
1308 '71.0.3571.0',
1309 '70.0.3538.48',
1310 '69.0.3497.121',
1311 '71.0.3570.1',
1312 '71.0.3570.0',
1313 '70.0.3538.47',
1314 '69.0.3497.120',
1315 '71.0.3568.2',
1316 '71.0.3569.1',
1317 '71.0.3569.0',
1318 '70.0.3538.46',
1319 '69.0.3497.119',
1320 '70.0.3538.45',
1321 '71.0.3568.1',
1322 '71.0.3568.0',
1323 '70.0.3538.44',
1324 '69.0.3497.118',
1325 '70.0.3538.43',
1326 '70.0.3538.42',
1327 '71.0.3567.1',
1328 '71.0.3567.0',
1329 '70.0.3538.41',
1330 '69.0.3497.117',
1331 '71.0.3566.1',
1332 '71.0.3566.0',
1333 '70.0.3538.40',
1334 '69.0.3497.116',
1335 '71.0.3565.1',
1336 '71.0.3565.0',
1337 '70.0.3538.39',
1338 '69.0.3497.115',
1339 '71.0.3564.1',
1340 '71.0.3564.0',
1341 '70.0.3538.38',
1342 '69.0.3497.114',
1343 '71.0.3563.0',
1344 '71.0.3562.2',
1345 '70.0.3538.37',
1346 '69.0.3497.113',
1347 '70.0.3538.36',
1348 '70.0.3538.35',
1349 '71.0.3562.1',
1350 '71.0.3562.0',
1351 '70.0.3538.34',
1352 '69.0.3497.112',
1353 '70.0.3538.33',
1354 '71.0.3561.1',
1355 '71.0.3561.0',
1356 '70.0.3538.32',
1357 '69.0.3497.111',
1358 '71.0.3559.6',
1359 '71.0.3560.1',
1360 '71.0.3560.0',
1361 '71.0.3559.5',
1362 '71.0.3559.4',
1363 '70.0.3538.31',
1364 '69.0.3497.110',
1365 '71.0.3559.3',
1366 '70.0.3538.30',
1367 '69.0.3497.109',
1368 '71.0.3559.2',
1369 '71.0.3559.1',
1370 '71.0.3559.0',
1371 '70.0.3538.29',
1372 '69.0.3497.108',
1373 '71.0.3558.2',
1374 '71.0.3558.1',
1375 '71.0.3558.0',
1376 '70.0.3538.28',
1377 '69.0.3497.107',
1378 '71.0.3557.2',
1379 '71.0.3557.1',
1380 '71.0.3557.0',
1381 '70.0.3538.27',
1382 '69.0.3497.106',
1383 '71.0.3554.4',
1384 '70.0.3538.26',
1385 '71.0.3556.1',
1386 '71.0.3556.0',
1387 '70.0.3538.25',
1388 '71.0.3554.3',
1389 '69.0.3497.105',
1390 '71.0.3554.2',
1391 '70.0.3538.24',
1392 '69.0.3497.104',
1393 '71.0.3555.2',
1394 '70.0.3538.23',
1395 '71.0.3555.1',
1396 '71.0.3555.0',
1397 '70.0.3538.22',
1398 '69.0.3497.103',
1399 '71.0.3554.1',
1400 '71.0.3554.0',
1401 '70.0.3538.21',
1402 '69.0.3497.102',
1403 '71.0.3553.3',
1404 '70.0.3538.20',
1405 '69.0.3497.101',
1406 '71.0.3553.2',
1407 '69.0.3497.100',
1408 '71.0.3553.1',
1409 '71.0.3553.0',
1410 '70.0.3538.19',
1411 '69.0.3497.99',
1412 '69.0.3497.98',
1413 '69.0.3497.97',
1414 '71.0.3552.6',
1415 '71.0.3552.5',
1416 '71.0.3552.4',
1417 '71.0.3552.3',
1418 '71.0.3552.2',
1419 '71.0.3552.1',
1420 '71.0.3552.0',
1421 '70.0.3538.18',
1422 '69.0.3497.96',
1423 '71.0.3551.3',
1424 '71.0.3551.2',
1425 '71.0.3551.1',
1426 '71.0.3551.0',
1427 '70.0.3538.17',
1428 '69.0.3497.95',
1429 '71.0.3550.3',
1430 '71.0.3550.2',
1431 '71.0.3550.1',
1432 '71.0.3550.0',
1433 '70.0.3538.16',
1434 '69.0.3497.94',
1435 '71.0.3549.1',
1436 '71.0.3549.0',
1437 '70.0.3538.15',
1438 '69.0.3497.93',
1439 '69.0.3497.92',
1440 '71.0.3548.1',
1441 '71.0.3548.0',
1442 '70.0.3538.14',
1443 '69.0.3497.91',
1444 '71.0.3547.1',
1445 '71.0.3547.0',
1446 '70.0.3538.13',
1447 '69.0.3497.90',
1448 '71.0.3546.2',
1449 '69.0.3497.89',
1450 '71.0.3546.1',
1451 '71.0.3546.0',
1452 '70.0.3538.12',
1453 '69.0.3497.88',
1454 '71.0.3545.4',
1455 '71.0.3545.3',
1456 '71.0.3545.2',
1457 '71.0.3545.1',
1458 '71.0.3545.0',
1459 '70.0.3538.11',
1460 '69.0.3497.87',
1461 '71.0.3544.5',
1462 '71.0.3544.4',
1463 '71.0.3544.3',
1464 '71.0.3544.2',
1465 '71.0.3544.1',
1466 '71.0.3544.0',
1467 '69.0.3497.86',
1468 '70.0.3538.10',
1469 '69.0.3497.85',
1470 '70.0.3538.9',
1471 '69.0.3497.84',
1472 '71.0.3543.4',
1473 '70.0.3538.8',
1474 '71.0.3543.3',
1475 '71.0.3543.2',
1476 '71.0.3543.1',
1477 '71.0.3543.0',
1478 '70.0.3538.7',
1479 '69.0.3497.83',
1480 '71.0.3542.2',
1481 '71.0.3542.1',
1482 '71.0.3542.0',
1483 '70.0.3538.6',
1484 '69.0.3497.82',
1485 '69.0.3497.81',
1486 '71.0.3541.1',
1487 '71.0.3541.0',
1488 '70.0.3538.5',
1489 '69.0.3497.80',
1490 '71.0.3540.1',
1491 '71.0.3540.0',
1492 '70.0.3538.4',
1493 '69.0.3497.79',
1494 '70.0.3538.3',
1495 '71.0.3539.1',
1496 '71.0.3539.0',
1497 '69.0.3497.78',
1498 '68.0.3440.134',
1499 '69.0.3497.77',
1500 '70.0.3538.2',
1501 '70.0.3538.1',
1502 '70.0.3538.0',
1503 '69.0.3497.76',
1504 '68.0.3440.133',
1505 '69.0.3497.75',
1506 '70.0.3537.2',
1507 '70.0.3537.1',
1508 '70.0.3537.0',
1509 '69.0.3497.74',
1510 '68.0.3440.132',
1511 '70.0.3536.0',
1512 '70.0.3535.5',
1513 '70.0.3535.4',
1514 '70.0.3535.3',
1515 '69.0.3497.73',
1516 '68.0.3440.131',
1517 '70.0.3532.8',
1518 '70.0.3532.7',
1519 '69.0.3497.72',
1520 '69.0.3497.71',
1521 '70.0.3535.2',
1522 '70.0.3535.1',
1523 '70.0.3535.0',
1524 '69.0.3497.70',
1525 '68.0.3440.130',
1526 '69.0.3497.69',
1527 '68.0.3440.129',
1528 '70.0.3534.4',
1529 '70.0.3534.3',
1530 '70.0.3534.2',
1531 '70.0.3534.1',
1532 '70.0.3534.0',
1533 '69.0.3497.68',
1534 '68.0.3440.128',
1535 '70.0.3533.2',
1536 '70.0.3533.1',
1537 '70.0.3533.0',
1538 '69.0.3497.67',
1539 '68.0.3440.127',
1540 '70.0.3532.6',
1541 '70.0.3532.5',
1542 '70.0.3532.4',
1543 '69.0.3497.66',
1544 '68.0.3440.126',
1545 '70.0.3532.3',
1546 '70.0.3532.2',
1547 '70.0.3532.1',
1548 '69.0.3497.60',
1549 '69.0.3497.65',
1550 '69.0.3497.64',
1551 '70.0.3532.0',
1552 '70.0.3531.0',
1553 '70.0.3530.4',
1554 '70.0.3530.3',
1555 '70.0.3530.2',
1556 '69.0.3497.58',
1557 '68.0.3440.125',
1558 '69.0.3497.57',
1559 '69.0.3497.56',
1560 '69.0.3497.55',
1561 '69.0.3497.54',
1562 '70.0.3530.1',
1563 '70.0.3530.0',
1564 '69.0.3497.53',
1565 '68.0.3440.124',
1566 '69.0.3497.52',
1567 '70.0.3529.3',
1568 '70.0.3529.2',
1569 '70.0.3529.1',
1570 '70.0.3529.0',
1571 '69.0.3497.51',
1572 '70.0.3528.4',
1573 '68.0.3440.123',
1574 '70.0.3528.3',
1575 '70.0.3528.2',
1576 '70.0.3528.1',
1577 '70.0.3528.0',
1578 '69.0.3497.50',
1579 '68.0.3440.122',
1580 '70.0.3527.1',
1581 '70.0.3527.0',
1582 '69.0.3497.49',
1583 '68.0.3440.121',
1584 '70.0.3526.1',
1585 '70.0.3526.0',
1586 '68.0.3440.120',
1587 '69.0.3497.48',
1588 '69.0.3497.47',
1589 '68.0.3440.119',
1590 '68.0.3440.118',
1591 '70.0.3525.5',
1592 '70.0.3525.4',
1593 '70.0.3525.3',
1594 '68.0.3440.117',
1595 '69.0.3497.46',
1596 '70.0.3525.2',
1597 '70.0.3525.1',
1598 '70.0.3525.0',
1599 '69.0.3497.45',
1600 '68.0.3440.116',
1601 '70.0.3524.4',
1602 '70.0.3524.3',
1603 '69.0.3497.44',
1604 '70.0.3524.2',
1605 '70.0.3524.1',
1606 '70.0.3524.0',
1607 '70.0.3523.2',
1608 '69.0.3497.43',
1609 '68.0.3440.115',
1610 '70.0.3505.9',
1611 '69.0.3497.42',
1612 '70.0.3505.8',
1613 '70.0.3523.1',
1614 '70.0.3523.0',
1615 '69.0.3497.41',
1616 '68.0.3440.114',
1617 '70.0.3505.7',
1618 '69.0.3497.40',
1619 '70.0.3522.1',
1620 '70.0.3522.0',
1621 '70.0.3521.2',
1622 '69.0.3497.39',
1623 '68.0.3440.113',
1624 '70.0.3505.6',
1625 '70.0.3521.1',
1626 '70.0.3521.0',
1627 '69.0.3497.38',
1628 '68.0.3440.112',
1629 '70.0.3520.1',
1630 '70.0.3520.0',
1631 '69.0.3497.37',
1632 '68.0.3440.111',
1633 '70.0.3519.3',
1634 '70.0.3519.2',
1635 '70.0.3519.1',
1636 '70.0.3519.0',
1637 '69.0.3497.36',
1638 '68.0.3440.110',
1639 '70.0.3518.1',
1640 '70.0.3518.0',
1641 '69.0.3497.35',
1642 '69.0.3497.34',
1643 '68.0.3440.109',
1644 '70.0.3517.1',
1645 '70.0.3517.0',
1646 '69.0.3497.33',
1647 '68.0.3440.108',
1648 '69.0.3497.32',
1649 '70.0.3516.3',
1650 '70.0.3516.2',
1651 '70.0.3516.1',
1652 '70.0.3516.0',
1653 '69.0.3497.31',
1654 '68.0.3440.107',
1655 '70.0.3515.4',
1656 '68.0.3440.106',
1657 '70.0.3515.3',
1658 '70.0.3515.2',
1659 '70.0.3515.1',
1660 '70.0.3515.0',
1661 '69.0.3497.30',
1662 '68.0.3440.105',
1663 '68.0.3440.104',
1664 '70.0.3514.2',
1665 '70.0.3514.1',
1666 '70.0.3514.0',
1667 '69.0.3497.29',
1668 '68.0.3440.103',
1669 '70.0.3513.1',
1670 '70.0.3513.0',
1671 '69.0.3497.28',
1672 )
1673 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1674
1675
1676 std_headers = {
1677 'User-Agent': random_user_agent(),
1678 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1679 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1680 'Accept-Encoding': 'gzip, deflate',
1681 'Accept-Language': 'en-us,en;q=0.5',
1682 }
1683
1684
1685 USER_AGENTS = {
1686 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1687 }
1688
1689
1690 NO_DEFAULT = object()
1691
1692 ENGLISH_MONTH_NAMES = [
1693 'January', 'February', 'March', 'April', 'May', 'June',
1694 'July', 'August', 'September', 'October', 'November', 'December']
1695
1696 MONTH_NAMES = {
1697 'en': ENGLISH_MONTH_NAMES,
1698 'fr': [
1699 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1700 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1701 }
1702
1703 KNOWN_EXTENSIONS = (
1704 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1705 'flv', 'f4v', 'f4a', 'f4b',
1706 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1707 'mkv', 'mka', 'mk3d',
1708 'avi', 'divx',
1709 'mov',
1710 'asf', 'wmv', 'wma',
1711 '3gp', '3g2',
1712 'mp3',
1713 'flac',
1714 'ape',
1715 'wav',
1716 'f4f', 'f4m', 'm3u8', 'smil')
1717
1718 REMUX_EXTENSIONS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mp3', 'mka', 'm4a', 'ogg', 'opus')
1719
1720 # needed for sanitizing filenames in restricted mode
1721 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1722 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1723 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1724
1725 DATE_FORMATS = (
1726 '%d %B %Y',
1727 '%d %b %Y',
1728 '%B %d %Y',
1729 '%B %dst %Y',
1730 '%B %dnd %Y',
1731 '%B %drd %Y',
1732 '%B %dth %Y',
1733 '%b %d %Y',
1734 '%b %dst %Y',
1735 '%b %dnd %Y',
1736 '%b %drd %Y',
1737 '%b %dth %Y',
1738 '%b %dst %Y %I:%M',
1739 '%b %dnd %Y %I:%M',
1740 '%b %drd %Y %I:%M',
1741 '%b %dth %Y %I:%M',
1742 '%Y %m %d',
1743 '%Y-%m-%d',
1744 '%Y/%m/%d',
1745 '%Y/%m/%d %H:%M',
1746 '%Y/%m/%d %H:%M:%S',
1747 '%Y-%m-%d %H:%M',
1748 '%Y-%m-%d %H:%M:%S',
1749 '%Y-%m-%d %H:%M:%S.%f',
1750 '%d.%m.%Y %H:%M',
1751 '%d.%m.%Y %H.%M',
1752 '%Y-%m-%dT%H:%M:%SZ',
1753 '%Y-%m-%dT%H:%M:%S.%fZ',
1754 '%Y-%m-%dT%H:%M:%S.%f0Z',
1755 '%Y-%m-%dT%H:%M:%S',
1756 '%Y-%m-%dT%H:%M:%S.%f',
1757 '%Y-%m-%dT%H:%M',
1758 '%b %d %Y at %H:%M',
1759 '%b %d %Y at %H:%M:%S',
1760 '%B %d %Y at %H:%M',
1761 '%B %d %Y at %H:%M:%S',
1762 )
1763
1764 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1765 DATE_FORMATS_DAY_FIRST.extend([
1766 '%d-%m-%Y',
1767 '%d.%m.%Y',
1768 '%d.%m.%y',
1769 '%d/%m/%Y',
1770 '%d/%m/%y',
1771 '%d/%m/%Y %H:%M:%S',
1772 ])
1773
1774 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1775 DATE_FORMATS_MONTH_FIRST.extend([
1776 '%m-%d-%Y',
1777 '%m.%d.%Y',
1778 '%m/%d/%Y',
1779 '%m/%d/%y',
1780 '%m/%d/%Y %H:%M:%S',
1781 ])
1782
1783 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1784 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1785
1786
1787 def preferredencoding():
1788 """Get preferred encoding.
1789
1790 Returns the best encoding scheme for the system, based on
1791 locale.getpreferredencoding() and some further tweaks.
1792 """
1793 try:
1794 pref = locale.getpreferredencoding()
1795 'TEST'.encode(pref)
1796 except Exception:
1797 pref = 'UTF-8'
1798
1799 return pref
1800
1801
1802 def write_json_file(obj, fn):
1803 """ Encode obj as JSON and write it to fn, atomically if possible """
1804
1805 fn = encodeFilename(fn)
1806 if sys.version_info < (3, 0) and sys.platform != 'win32':
1807 encoding = get_filesystem_encoding()
1808 # os.path.basename returns a bytes object, but NamedTemporaryFile
1809 # will fail if the filename contains non ascii characters unless we
1810 # use a unicode object
1811 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1812 # the same for os.path.dirname
1813 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1814 else:
1815 path_basename = os.path.basename
1816 path_dirname = os.path.dirname
1817
1818 args = {
1819 'suffix': '.tmp',
1820 'prefix': path_basename(fn) + '.',
1821 'dir': path_dirname(fn),
1822 'delete': False,
1823 }
1824
1825 # In Python 2.x, json.dump expects a bytestream.
1826 # In Python 3.x, it writes to a character stream
1827 if sys.version_info < (3, 0):
1828 args['mode'] = 'wb'
1829 else:
1830 args.update({
1831 'mode': 'w',
1832 'encoding': 'utf-8',
1833 })
1834
1835 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1836
1837 try:
1838 with tf:
1839 json.dump(obj, tf)
1840 if sys.platform == 'win32':
1841 # Need to remove existing file on Windows, else os.rename raises
1842 # WindowsError or FileExistsError.
1843 try:
1844 os.unlink(fn)
1845 except OSError:
1846 pass
1847 try:
1848 mask = os.umask(0)
1849 os.umask(mask)
1850 os.chmod(tf.name, 0o666 & ~mask)
1851 except OSError:
1852 pass
1853 os.rename(tf.name, fn)
1854 except Exception:
1855 try:
1856 os.remove(tf.name)
1857 except OSError:
1858 pass
1859 raise
1860
1861
1862 if sys.version_info >= (2, 7):
1863 def find_xpath_attr(node, xpath, key, val=None):
1864 """ Find the xpath xpath[@key=val] """
1865 assert re.match(r'^[a-zA-Z_-]+$', key)
1866 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1867 return node.find(expr)
1868 else:
1869 def find_xpath_attr(node, xpath, key, val=None):
1870 for f in node.findall(compat_xpath(xpath)):
1871 if key not in f.attrib:
1872 continue
1873 if val is None or f.attrib.get(key) == val:
1874 return f
1875 return None
1876
1877 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1878 # the namespace parameter
1879
1880
1881 def xpath_with_ns(path, ns_map):
1882 components = [c.split(':') for c in path.split('/')]
1883 replaced = []
1884 for c in components:
1885 if len(c) == 1:
1886 replaced.append(c[0])
1887 else:
1888 ns, tag = c
1889 replaced.append('{%s}%s' % (ns_map[ns], tag))
1890 return '/'.join(replaced)
1891
1892
1893 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1894 def _find_xpath(xpath):
1895 return node.find(compat_xpath(xpath))
1896
1897 if isinstance(xpath, (str, compat_str)):
1898 n = _find_xpath(xpath)
1899 else:
1900 for xp in xpath:
1901 n = _find_xpath(xp)
1902 if n is not None:
1903 break
1904
1905 if n is None:
1906 if default is not NO_DEFAULT:
1907 return default
1908 elif fatal:
1909 name = xpath if name is None else name
1910 raise ExtractorError('Could not find XML element %s' % name)
1911 else:
1912 return None
1913 return n
1914
1915
1916 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1917 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1918 if n is None or n == default:
1919 return n
1920 if n.text is None:
1921 if default is not NO_DEFAULT:
1922 return default
1923 elif fatal:
1924 name = xpath if name is None else name
1925 raise ExtractorError('Could not find XML element\'s text %s' % name)
1926 else:
1927 return None
1928 return n.text
1929
1930
1931 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1932 n = find_xpath_attr(node, xpath, key)
1933 if n is None:
1934 if default is not NO_DEFAULT:
1935 return default
1936 elif fatal:
1937 name = '%s[@%s]' % (xpath, key) if name is None else name
1938 raise ExtractorError('Could not find XML attribute %s' % name)
1939 else:
1940 return None
1941 return n.attrib[key]
1942
1943
1944 def get_element_by_id(id, html):
1945 """Return the content of the tag with the specified ID in the passed HTML document"""
1946 return get_element_by_attribute('id', id, html)
1947
1948
1949 def get_element_by_class(class_name, html):
1950 """Return the content of the first tag with the specified class in the passed HTML document"""
1951 retval = get_elements_by_class(class_name, html)
1952 return retval[0] if retval else None
1953
1954
1955 def get_element_by_attribute(attribute, value, html, escape_value=True):
1956 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1957 return retval[0] if retval else None
1958
1959
1960 def get_elements_by_class(class_name, html):
1961 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1962 return get_elements_by_attribute(
1963 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1964 html, escape_value=False)
1965
1966
1967 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1968 """Return the content of the tag with the specified attribute in the passed HTML document"""
1969
1970 value = re.escape(value) if escape_value else value
1971
1972 retlist = []
1973 for m in re.finditer(r'''(?xs)
1974 <([a-zA-Z0-9:._-]+)
1975 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1976 \s+%s=['"]?%s['"]?
1977 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1978 \s*>
1979 (?P<content>.*?)
1980 </\1>
1981 ''' % (re.escape(attribute), value), html):
1982 res = m.group('content')
1983
1984 if res.startswith('"') or res.startswith("'"):
1985 res = res[1:-1]
1986
1987 retlist.append(unescapeHTML(res))
1988
1989 return retlist
1990
1991
1992 class HTMLAttributeParser(compat_HTMLParser):
1993 """Trivial HTML parser to gather the attributes for a single element"""
1994
1995 def __init__(self):
1996 self.attrs = {}
1997 compat_HTMLParser.__init__(self)
1998
1999 def handle_starttag(self, tag, attrs):
2000 self.attrs = dict(attrs)
2001
2002
2003 def extract_attributes(html_element):
2004 """Given a string for an HTML element such as
2005 <el
2006 a="foo" B="bar" c="&98;az" d=boz
2007 empty= noval entity="&amp;"
2008 sq='"' dq="'"
2009 >
2010 Decode and return a dictionary of attributes.
2011 {
2012 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2013 'empty': '', 'noval': None, 'entity': '&',
2014 'sq': '"', 'dq': '\''
2015 }.
2016 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2017 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2018 """
2019 parser = HTMLAttributeParser()
2020 try:
2021 parser.feed(html_element)
2022 parser.close()
2023 # Older Python may throw HTMLParseError in case of malformed HTML
2024 except compat_HTMLParseError:
2025 pass
2026 return parser.attrs
2027
2028
2029 def clean_html(html):
2030 """Clean an HTML snippet into a readable string"""
2031
2032 if html is None: # Convenience for sanitizing descriptions etc.
2033 return html
2034
2035 # Newline vs <br />
2036 html = html.replace('\n', ' ')
2037 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2038 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2039 # Strip html tags
2040 html = re.sub('<.*?>', '', html)
2041 # Replace html entities
2042 html = unescapeHTML(html)
2043 return html.strip()
2044
2045
2046 def sanitize_open(filename, open_mode):
2047 """Try to open the given filename, and slightly tweak it if this fails.
2048
2049 Attempts to open the given filename. If this fails, it tries to change
2050 the filename slightly, step by step, until it's either able to open it
2051 or it fails and raises a final exception, like the standard open()
2052 function.
2053
2054 It returns the tuple (stream, definitive_file_name).
2055 """
2056 try:
2057 if filename == '-':
2058 if sys.platform == 'win32':
2059 import msvcrt
2060 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2061 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2062 stream = open(encodeFilename(filename), open_mode)
2063 return (stream, filename)
2064 except (IOError, OSError) as err:
2065 if err.errno in (errno.EACCES,):
2066 raise
2067
2068 # In case of error, try to remove win32 forbidden chars
2069 alt_filename = sanitize_path(filename)
2070 if alt_filename == filename:
2071 raise
2072 else:
2073 # An exception here should be caught in the caller
2074 stream = open(encodeFilename(alt_filename), open_mode)
2075 return (stream, alt_filename)
2076
2077
2078 def timeconvert(timestr):
2079 """Convert RFC 2822 defined time string into system timestamp"""
2080 timestamp = None
2081 timetuple = email.utils.parsedate_tz(timestr)
2082 if timetuple is not None:
2083 timestamp = email.utils.mktime_tz(timetuple)
2084 return timestamp
2085
2086
2087 def sanitize_filename(s, restricted=False, is_id=False):
2088 """Sanitizes a string so it could be used as part of a filename.
2089 If restricted is set, use a stricter subset of allowed characters.
2090 Set is_id if this is not an arbitrary string, but an ID that should be kept
2091 if possible.
2092 """
2093 def replace_insane(char):
2094 if restricted and char in ACCENT_CHARS:
2095 return ACCENT_CHARS[char]
2096 if char == '?' or ord(char) < 32 or ord(char) == 127:
2097 return ''
2098 elif char == '"':
2099 return '' if restricted else '\''
2100 elif char == ':':
2101 return '_-' if restricted else ' -'
2102 elif char in '\\/|*<>':
2103 return '_'
2104 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2105 return '_'
2106 if restricted and ord(char) > 127:
2107 return '_'
2108 return char
2109
2110 # Handle timestamps
2111 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2112 result = ''.join(map(replace_insane, s))
2113 if not is_id:
2114 while '__' in result:
2115 result = result.replace('__', '_')
2116 result = result.strip('_')
2117 # Common case of "Foreign band name - English song title"
2118 if restricted and result.startswith('-_'):
2119 result = result[2:]
2120 if result.startswith('-'):
2121 result = '_' + result[len('-'):]
2122 result = result.lstrip('.')
2123 if not result:
2124 result = '_'
2125 return result
2126
2127
2128 def sanitize_path(s, force=False):
2129 """Sanitizes and normalizes path on Windows"""
2130 if sys.platform == 'win32':
2131 force = False
2132 drive_or_unc, _ = os.path.splitdrive(s)
2133 if sys.version_info < (2, 7) and not drive_or_unc:
2134 drive_or_unc, _ = os.path.splitunc(s)
2135 elif force:
2136 drive_or_unc = ''
2137 else:
2138 return s
2139
2140 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2141 if drive_or_unc:
2142 norm_path.pop(0)
2143 sanitized_path = [
2144 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2145 for path_part in norm_path]
2146 if drive_or_unc:
2147 sanitized_path.insert(0, drive_or_unc + os.path.sep)
2148 elif force and s[0] == os.path.sep:
2149 sanitized_path.insert(0, os.path.sep)
2150 return os.path.join(*sanitized_path)
2151
2152
2153 def sanitize_url(url):
2154 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2155 # the number of unwanted failures due to missing protocol
2156 if url.startswith('//'):
2157 return 'http:%s' % url
2158 # Fix some common typos seen so far
2159 COMMON_TYPOS = (
2160 # https://github.com/ytdl-org/youtube-dl/issues/15649
2161 (r'^httpss://', r'https://'),
2162 # https://bx1.be/lives/direct-tv/
2163 (r'^rmtp([es]?)://', r'rtmp\1://'),
2164 )
2165 for mistake, fixup in COMMON_TYPOS:
2166 if re.match(mistake, url):
2167 return re.sub(mistake, fixup, url)
2168 return url
2169
2170
2171 def sanitized_Request(url, *args, **kwargs):
2172 return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2173
2174
2175 def expand_path(s):
2176 """Expand shell variables and ~"""
2177 return os.path.expandvars(compat_expanduser(s))
2178
2179
2180 def orderedSet(iterable):
2181 """ Remove all duplicates from the input iterable """
2182 res = []
2183 for el in iterable:
2184 if el not in res:
2185 res.append(el)
2186 return res
2187
2188
2189 def _htmlentity_transform(entity_with_semicolon):
2190 """Transforms an HTML entity to a character."""
2191 entity = entity_with_semicolon[:-1]
2192
2193 # Known non-numeric HTML entity
2194 if entity in compat_html_entities.name2codepoint:
2195 return compat_chr(compat_html_entities.name2codepoint[entity])
2196
2197 # TODO: HTML5 allows entities without a semicolon. For example,
2198 # '&Eacuteric' should be decoded as 'Éric'.
2199 if entity_with_semicolon in compat_html_entities_html5:
2200 return compat_html_entities_html5[entity_with_semicolon]
2201
2202 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2203 if mobj is not None:
2204 numstr = mobj.group(1)
2205 if numstr.startswith('x'):
2206 base = 16
2207 numstr = '0%s' % numstr
2208 else:
2209 base = 10
2210 # See https://github.com/ytdl-org/youtube-dl/issues/7518
2211 try:
2212 return compat_chr(int(numstr, base))
2213 except ValueError:
2214 pass
2215
2216 # Unknown entity in name, return its literal representation
2217 return '&%s;' % entity
2218
2219
2220 def unescapeHTML(s):
2221 if s is None:
2222 return None
2223 assert type(s) == compat_str
2224
2225 return re.sub(
2226 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2227
2228
2229 def process_communicate_or_kill(p, *args, **kwargs):
2230 try:
2231 return p.communicate(*args, **kwargs)
2232 except BaseException: # Including KeyboardInterrupt
2233 p.kill()
2234 p.wait()
2235 raise
2236
2237
2238 def get_subprocess_encoding():
2239 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2240 # For subprocess calls, encode with locale encoding
2241 # Refer to http://stackoverflow.com/a/9951851/35070
2242 encoding = preferredencoding()
2243 else:
2244 encoding = sys.getfilesystemencoding()
2245 if encoding is None:
2246 encoding = 'utf-8'
2247 return encoding
2248
2249
2250 def encodeFilename(s, for_subprocess=False):
2251 """
2252 @param s The name of the file
2253 """
2254
2255 assert type(s) == compat_str
2256
2257 # Python 3 has a Unicode API
2258 if sys.version_info >= (3, 0):
2259 return s
2260
2261 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2262 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2263 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2264 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2265 return s
2266
2267 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2268 if sys.platform.startswith('java'):
2269 return s
2270
2271 return s.encode(get_subprocess_encoding(), 'ignore')
2272
2273
2274 def decodeFilename(b, for_subprocess=False):
2275
2276 if sys.version_info >= (3, 0):
2277 return b
2278
2279 if not isinstance(b, bytes):
2280 return b
2281
2282 return b.decode(get_subprocess_encoding(), 'ignore')
2283
2284
2285 def encodeArgument(s):
2286 if not isinstance(s, compat_str):
2287 # Legacy code that uses byte strings
2288 # Uncomment the following line after fixing all post processors
2289 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2290 s = s.decode('ascii')
2291 return encodeFilename(s, True)
2292
2293
2294 def decodeArgument(b):
2295 return decodeFilename(b, True)
2296
2297
2298 def decodeOption(optval):
2299 if optval is None:
2300 return optval
2301 if isinstance(optval, bytes):
2302 optval = optval.decode(preferredencoding())
2303
2304 assert isinstance(optval, compat_str)
2305 return optval
2306
2307
2308 def formatSeconds(secs, delim=':'):
2309 if secs > 3600:
2310 return '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
2311 elif secs > 60:
2312 return '%d%s%02d' % (secs // 60, delim, secs % 60)
2313 else:
2314 return '%d' % secs
2315
2316
2317 def make_HTTPS_handler(params, **kwargs):
2318 opts_no_check_certificate = params.get('nocheckcertificate', False)
2319 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
2320 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2321 if opts_no_check_certificate:
2322 context.check_hostname = False
2323 context.verify_mode = ssl.CERT_NONE
2324 try:
2325 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2326 except TypeError:
2327 # Python 2.7.8
2328 # (create_default_context present but HTTPSHandler has no context=)
2329 pass
2330
2331 if sys.version_info < (3, 2):
2332 return YoutubeDLHTTPSHandler(params, **kwargs)
2333 else: # Python < 3.4
2334 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2335 context.verify_mode = (ssl.CERT_NONE
2336 if opts_no_check_certificate
2337 else ssl.CERT_REQUIRED)
2338 context.set_default_verify_paths()
2339 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2340
2341
2342 def bug_reports_message():
2343 if ytdl_is_updateable():
2344 update_cmd = 'type yt-dlp -U to update'
2345 else:
2346 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
2347 msg = '; please report this issue on https://github.com/yt-dlp/yt-dlp .'
2348 msg += ' Make sure you are using the latest version; %s.' % update_cmd
2349 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
2350 return msg
2351
2352
2353 class YoutubeDLError(Exception):
2354 """Base exception for YoutubeDL errors."""
2355 pass
2356
2357
2358 class ExtractorError(YoutubeDLError):
2359 """Error during info extraction."""
2360
2361 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2362 """ tb, if given, is the original traceback (so that it can be printed out).
2363 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
2364 """
2365
2366 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2367 expected = True
2368 if video_id is not None:
2369 msg = video_id + ': ' + msg
2370 if cause:
2371 msg += ' (caused by %r)' % cause
2372 if not expected:
2373 msg += bug_reports_message()
2374 super(ExtractorError, self).__init__(msg)
2375
2376 self.traceback = tb
2377 self.exc_info = sys.exc_info() # preserve original exception
2378 self.cause = cause
2379 self.video_id = video_id
2380
2381 def format_traceback(self):
2382 if self.traceback is None:
2383 return None
2384 return ''.join(traceback.format_tb(self.traceback))
2385
2386
2387 class UnsupportedError(ExtractorError):
2388 def __init__(self, url):
2389 super(UnsupportedError, self).__init__(
2390 'Unsupported URL: %s' % url, expected=True)
2391 self.url = url
2392
2393
2394 class RegexNotFoundError(ExtractorError):
2395 """Error when a regex didn't match"""
2396 pass
2397
2398
2399 class GeoRestrictedError(ExtractorError):
2400 """Geographic restriction Error exception.
2401
2402 This exception may be thrown when a video is not available from your
2403 geographic location due to geographic restrictions imposed by a website.
2404 """
2405
2406 def __init__(self, msg, countries=None):
2407 super(GeoRestrictedError, self).__init__(msg, expected=True)
2408 self.msg = msg
2409 self.countries = countries
2410
2411
2412 class DownloadError(YoutubeDLError):
2413 """Download Error exception.
2414
2415 This exception may be thrown by FileDownloader objects if they are not
2416 configured to continue on errors. They will contain the appropriate
2417 error message.
2418 """
2419
2420 def __init__(self, msg, exc_info=None):
2421 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2422 super(DownloadError, self).__init__(msg)
2423 self.exc_info = exc_info
2424
2425
2426 class SameFileError(YoutubeDLError):
2427 """Same File exception.
2428
2429 This exception will be thrown by FileDownloader objects if they detect
2430 multiple files would have to be downloaded to the same file on disk.
2431 """
2432 pass
2433
2434
2435 class PostProcessingError(YoutubeDLError):
2436 """Post Processing exception.
2437
2438 This exception may be raised by PostProcessor's .run() method to
2439 indicate an error in the postprocessing task.
2440 """
2441
2442 def __init__(self, msg):
2443 super(PostProcessingError, self).__init__(msg)
2444 self.msg = msg
2445
2446
2447 class ExistingVideoReached(YoutubeDLError):
2448 """ --max-downloads limit has been reached. """
2449 pass
2450
2451
2452 class RejectedVideoReached(YoutubeDLError):
2453 """ --max-downloads limit has been reached. """
2454 pass
2455
2456
2457 class MaxDownloadsReached(YoutubeDLError):
2458 """ --max-downloads limit has been reached. """
2459 pass
2460
2461
2462 class UnavailableVideoError(YoutubeDLError):
2463 """Unavailable Format exception.
2464
2465 This exception will be thrown when a video is requested
2466 in a format that is not available for that video.
2467 """
2468 pass
2469
2470
2471 class ContentTooShortError(YoutubeDLError):
2472 """Content Too Short exception.
2473
2474 This exception may be raised by FileDownloader objects when a file they
2475 download is too small for what the server announced first, indicating
2476 the connection was probably interrupted.
2477 """
2478
2479 def __init__(self, downloaded, expected):
2480 super(ContentTooShortError, self).__init__(
2481 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2482 )
2483 # Both in bytes
2484 self.downloaded = downloaded
2485 self.expected = expected
2486
2487
2488 class XAttrMetadataError(YoutubeDLError):
2489 def __init__(self, code=None, msg='Unknown error'):
2490 super(XAttrMetadataError, self).__init__(msg)
2491 self.code = code
2492 self.msg = msg
2493
2494 # Parsing code and msg
2495 if (self.code in (errno.ENOSPC, errno.EDQUOT)
2496 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2497 self.reason = 'NO_SPACE'
2498 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2499 self.reason = 'VALUE_TOO_LONG'
2500 else:
2501 self.reason = 'NOT_SUPPORTED'
2502
2503
2504 class XAttrUnavailableError(YoutubeDLError):
2505 pass
2506
2507
2508 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2509 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2510 # expected HTTP responses to meet HTTP/1.0 or later (see also
2511 # https://github.com/ytdl-org/youtube-dl/issues/6727)
2512 if sys.version_info < (3, 0):
2513 kwargs['strict'] = True
2514 hc = http_class(*args, **compat_kwargs(kwargs))
2515 source_address = ydl_handler._params.get('source_address')
2516
2517 if source_address is not None:
2518 # This is to workaround _create_connection() from socket where it will try all
2519 # address data from getaddrinfo() including IPv6. This filters the result from
2520 # getaddrinfo() based on the source_address value.
2521 # This is based on the cpython socket.create_connection() function.
2522 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2523 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2524 host, port = address
2525 err = None
2526 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2527 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2528 ip_addrs = [addr for addr in addrs if addr[0] == af]
2529 if addrs and not ip_addrs:
2530 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2531 raise socket.error(
2532 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2533 % (ip_version, source_address[0]))
2534 for res in ip_addrs:
2535 af, socktype, proto, canonname, sa = res
2536 sock = None
2537 try:
2538 sock = socket.socket(af, socktype, proto)
2539 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2540 sock.settimeout(timeout)
2541 sock.bind(source_address)
2542 sock.connect(sa)
2543 err = None # Explicitly break reference cycle
2544 return sock
2545 except socket.error as _:
2546 err = _
2547 if sock is not None:
2548 sock.close()
2549 if err is not None:
2550 raise err
2551 else:
2552 raise socket.error('getaddrinfo returns an empty list')
2553 if hasattr(hc, '_create_connection'):
2554 hc._create_connection = _create_connection
2555 sa = (source_address, 0)
2556 if hasattr(hc, 'source_address'): # Python 2.7+
2557 hc.source_address = sa
2558 else: # Python 2.6
2559 def _hc_connect(self, *args, **kwargs):
2560 sock = _create_connection(
2561 (self.host, self.port), self.timeout, sa)
2562 if is_https:
2563 self.sock = ssl.wrap_socket(
2564 sock, self.key_file, self.cert_file,
2565 ssl_version=ssl.PROTOCOL_TLSv1)
2566 else:
2567 self.sock = sock
2568 hc.connect = functools.partial(_hc_connect, hc)
2569
2570 return hc
2571
2572
2573 def handle_youtubedl_headers(headers):
2574 filtered_headers = headers
2575
2576 if 'Youtubedl-no-compression' in filtered_headers:
2577 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2578 del filtered_headers['Youtubedl-no-compression']
2579
2580 return filtered_headers
2581
2582
2583 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2584 """Handler for HTTP requests and responses.
2585
2586 This class, when installed with an OpenerDirector, automatically adds
2587 the standard headers to every HTTP request and handles gzipped and
2588 deflated responses from web servers. If compression is to be avoided in
2589 a particular request, the original request in the program code only has
2590 to include the HTTP header "Youtubedl-no-compression", which will be
2591 removed before making the real request.
2592
2593 Part of this code was copied from:
2594
2595 http://techknack.net/python-urllib2-handlers/
2596
2597 Andrew Rowls, the author of that code, agreed to release it to the
2598 public domain.
2599 """
2600
2601 def __init__(self, params, *args, **kwargs):
2602 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2603 self._params = params
2604
2605 def http_open(self, req):
2606 conn_class = compat_http_client.HTTPConnection
2607
2608 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2609 if socks_proxy:
2610 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2611 del req.headers['Ytdl-socks-proxy']
2612
2613 return self.do_open(functools.partial(
2614 _create_http_connection, self, conn_class, False),
2615 req)
2616
2617 @staticmethod
2618 def deflate(data):
2619 if not data:
2620 return data
2621 try:
2622 return zlib.decompress(data, -zlib.MAX_WBITS)
2623 except zlib.error:
2624 return zlib.decompress(data)
2625
2626 def http_request(self, req):
2627 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2628 # always respected by websites, some tend to give out URLs with non percent-encoded
2629 # non-ASCII characters (see telemb.py, ard.py [#3412])
2630 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2631 # To work around aforementioned issue we will replace request's original URL with
2632 # percent-encoded one
2633 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2634 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2635 url = req.get_full_url()
2636 url_escaped = escape_url(url)
2637
2638 # Substitute URL if any change after escaping
2639 if url != url_escaped:
2640 req = update_Request(req, url=url_escaped)
2641
2642 for h, v in std_headers.items():
2643 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2644 # The dict keys are capitalized because of this bug by urllib
2645 if h.capitalize() not in req.headers:
2646 req.add_header(h, v)
2647
2648 req.headers = handle_youtubedl_headers(req.headers)
2649
2650 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2651 # Python 2.6 is brain-dead when it comes to fragments
2652 req._Request__original = req._Request__original.partition('#')[0]
2653 req._Request__r_type = req._Request__r_type.partition('#')[0]
2654
2655 return req
2656
2657 def http_response(self, req, resp):
2658 old_resp = resp
2659 # gzip
2660 if resp.headers.get('Content-encoding', '') == 'gzip':
2661 content = resp.read()
2662 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2663 try:
2664 uncompressed = io.BytesIO(gz.read())
2665 except IOError as original_ioerror:
2666 # There may be junk add the end of the file
2667 # See http://stackoverflow.com/q/4928560/35070 for details
2668 for i in range(1, 1024):
2669 try:
2670 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2671 uncompressed = io.BytesIO(gz.read())
2672 except IOError:
2673 continue
2674 break
2675 else:
2676 raise original_ioerror
2677 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2678 resp.msg = old_resp.msg
2679 del resp.headers['Content-encoding']
2680 # deflate
2681 if resp.headers.get('Content-encoding', '') == 'deflate':
2682 gz = io.BytesIO(self.deflate(resp.read()))
2683 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2684 resp.msg = old_resp.msg
2685 del resp.headers['Content-encoding']
2686 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2687 # https://github.com/ytdl-org/youtube-dl/issues/6457).
2688 if 300 <= resp.code < 400:
2689 location = resp.headers.get('Location')
2690 if location:
2691 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2692 if sys.version_info >= (3, 0):
2693 location = location.encode('iso-8859-1').decode('utf-8')
2694 else:
2695 location = location.decode('utf-8')
2696 location_escaped = escape_url(location)
2697 if location != location_escaped:
2698 del resp.headers['Location']
2699 if sys.version_info < (3, 0):
2700 location_escaped = location_escaped.encode('utf-8')
2701 resp.headers['Location'] = location_escaped
2702 return resp
2703
2704 https_request = http_request
2705 https_response = http_response
2706
2707
2708 def make_socks_conn_class(base_class, socks_proxy):
2709 assert issubclass(base_class, (
2710 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2711
2712 url_components = compat_urlparse.urlparse(socks_proxy)
2713 if url_components.scheme.lower() == 'socks5':
2714 socks_type = ProxyType.SOCKS5
2715 elif url_components.scheme.lower() in ('socks', 'socks4'):
2716 socks_type = ProxyType.SOCKS4
2717 elif url_components.scheme.lower() == 'socks4a':
2718 socks_type = ProxyType.SOCKS4A
2719
2720 def unquote_if_non_empty(s):
2721 if not s:
2722 return s
2723 return compat_urllib_parse_unquote_plus(s)
2724
2725 proxy_args = (
2726 socks_type,
2727 url_components.hostname, url_components.port or 1080,
2728 True, # Remote DNS
2729 unquote_if_non_empty(url_components.username),
2730 unquote_if_non_empty(url_components.password),
2731 )
2732
2733 class SocksConnection(base_class):
2734 def connect(self):
2735 self.sock = sockssocket()
2736 self.sock.setproxy(*proxy_args)
2737 if type(self.timeout) in (int, float):
2738 self.sock.settimeout(self.timeout)
2739 self.sock.connect((self.host, self.port))
2740
2741 if isinstance(self, compat_http_client.HTTPSConnection):
2742 if hasattr(self, '_context'): # Python > 2.6
2743 self.sock = self._context.wrap_socket(
2744 self.sock, server_hostname=self.host)
2745 else:
2746 self.sock = ssl.wrap_socket(self.sock)
2747
2748 return SocksConnection
2749
2750
2751 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2752 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2753 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2754 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2755 self._params = params
2756
2757 def https_open(self, req):
2758 kwargs = {}
2759 conn_class = self._https_conn_class
2760
2761 if hasattr(self, '_context'): # python > 2.6
2762 kwargs['context'] = self._context
2763 if hasattr(self, '_check_hostname'): # python 3.x
2764 kwargs['check_hostname'] = self._check_hostname
2765
2766 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2767 if socks_proxy:
2768 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2769 del req.headers['Ytdl-socks-proxy']
2770
2771 return self.do_open(functools.partial(
2772 _create_http_connection, self, conn_class, True),
2773 req, **kwargs)
2774
2775
2776 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2777 """
2778 See [1] for cookie file format.
2779
2780 1. https://curl.haxx.se/docs/http-cookies.html
2781 """
2782 _HTTPONLY_PREFIX = '#HttpOnly_'
2783 _ENTRY_LEN = 7
2784 _HEADER = '''# Netscape HTTP Cookie File
2785 # This file is generated by yt-dlp. Do not edit.
2786
2787 '''
2788 _CookieFileEntry = collections.namedtuple(
2789 'CookieFileEntry',
2790 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2791
2792 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2793 """
2794 Save cookies to a file.
2795
2796 Most of the code is taken from CPython 3.8 and slightly adapted
2797 to support cookie files with UTF-8 in both python 2 and 3.
2798 """
2799 if filename is None:
2800 if self.filename is not None:
2801 filename = self.filename
2802 else:
2803 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2804
2805 # Store session cookies with `expires` set to 0 instead of an empty
2806 # string
2807 for cookie in self:
2808 if cookie.expires is None:
2809 cookie.expires = 0
2810
2811 with io.open(filename, 'w', encoding='utf-8') as f:
2812 f.write(self._HEADER)
2813 now = time.time()
2814 for cookie in self:
2815 if not ignore_discard and cookie.discard:
2816 continue
2817 if not ignore_expires and cookie.is_expired(now):
2818 continue
2819 if cookie.secure:
2820 secure = 'TRUE'
2821 else:
2822 secure = 'FALSE'
2823 if cookie.domain.startswith('.'):
2824 initial_dot = 'TRUE'
2825 else:
2826 initial_dot = 'FALSE'
2827 if cookie.expires is not None:
2828 expires = compat_str(cookie.expires)
2829 else:
2830 expires = ''
2831 if cookie.value is None:
2832 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2833 # with no name, whereas http.cookiejar regards it as a
2834 # cookie with no value.
2835 name = ''
2836 value = cookie.name
2837 else:
2838 name = cookie.name
2839 value = cookie.value
2840 f.write(
2841 '\t'.join([cookie.domain, initial_dot, cookie.path,
2842 secure, expires, name, value]) + '\n')
2843
2844 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2845 """Load cookies from a file."""
2846 if filename is None:
2847 if self.filename is not None:
2848 filename = self.filename
2849 else:
2850 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2851
2852 def prepare_line(line):
2853 if line.startswith(self._HTTPONLY_PREFIX):
2854 line = line[len(self._HTTPONLY_PREFIX):]
2855 # comments and empty lines are fine
2856 if line.startswith('#') or not line.strip():
2857 return line
2858 cookie_list = line.split('\t')
2859 if len(cookie_list) != self._ENTRY_LEN:
2860 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2861 cookie = self._CookieFileEntry(*cookie_list)
2862 if cookie.expires_at and not cookie.expires_at.isdigit():
2863 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2864 return line
2865
2866 cf = io.StringIO()
2867 with io.open(filename, encoding='utf-8') as f:
2868 for line in f:
2869 try:
2870 cf.write(prepare_line(line))
2871 except compat_cookiejar.LoadError as e:
2872 write_string(
2873 'WARNING: skipping cookie file entry due to %s: %r\n'
2874 % (e, line), sys.stderr)
2875 continue
2876 cf.seek(0)
2877 self._really_load(cf, filename, ignore_discard, ignore_expires)
2878 # Session cookies are denoted by either `expires` field set to
2879 # an empty string or 0. MozillaCookieJar only recognizes the former
2880 # (see [1]). So we need force the latter to be recognized as session
2881 # cookies on our own.
2882 # Session cookies may be important for cookies-based authentication,
2883 # e.g. usually, when user does not check 'Remember me' check box while
2884 # logging in on a site, some important cookies are stored as session
2885 # cookies so that not recognizing them will result in failed login.
2886 # 1. https://bugs.python.org/issue17164
2887 for cookie in self:
2888 # Treat `expires=0` cookies as session cookies
2889 if cookie.expires == 0:
2890 cookie.expires = None
2891 cookie.discard = True
2892
2893
2894 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2895 def __init__(self, cookiejar=None):
2896 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2897
2898 def http_response(self, request, response):
2899 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2900 # characters in Set-Cookie HTTP header of last response (see
2901 # https://github.com/ytdl-org/youtube-dl/issues/6769).
2902 # In order to at least prevent crashing we will percent encode Set-Cookie
2903 # header before HTTPCookieProcessor starts processing it.
2904 # if sys.version_info < (3, 0) and response.headers:
2905 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2906 # set_cookie = response.headers.get(set_cookie_header)
2907 # if set_cookie:
2908 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2909 # if set_cookie != set_cookie_escaped:
2910 # del response.headers[set_cookie_header]
2911 # response.headers[set_cookie_header] = set_cookie_escaped
2912 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2913
2914 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2915 https_response = http_response
2916
2917
2918 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2919 if sys.version_info[0] < 3:
2920 def redirect_request(self, req, fp, code, msg, headers, newurl):
2921 # On python 2 urlh.geturl() may sometimes return redirect URL
2922 # as byte string instead of unicode. This workaround allows
2923 # to force it always return unicode.
2924 return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2925
2926
2927 def extract_timezone(date_str):
2928 m = re.search(
2929 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2930 date_str)
2931 if not m:
2932 timezone = datetime.timedelta()
2933 else:
2934 date_str = date_str[:-len(m.group('tz'))]
2935 if not m.group('sign'):
2936 timezone = datetime.timedelta()
2937 else:
2938 sign = 1 if m.group('sign') == '+' else -1
2939 timezone = datetime.timedelta(
2940 hours=sign * int(m.group('hours')),
2941 minutes=sign * int(m.group('minutes')))
2942 return timezone, date_str
2943
2944
2945 def parse_iso8601(date_str, delimiter='T', timezone=None):
2946 """ Return a UNIX timestamp from the given date """
2947
2948 if date_str is None:
2949 return None
2950
2951 date_str = re.sub(r'\.[0-9]+', '', date_str)
2952
2953 if timezone is None:
2954 timezone, date_str = extract_timezone(date_str)
2955
2956 try:
2957 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2958 dt = datetime.datetime.strptime(date_str, date_format) - timezone
2959 return calendar.timegm(dt.timetuple())
2960 except ValueError:
2961 pass
2962
2963
2964 def date_formats(day_first=True):
2965 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2966
2967
2968 def unified_strdate(date_str, day_first=True):
2969 """Return a string with the date in the format YYYYMMDD"""
2970
2971 if date_str is None:
2972 return None
2973 upload_date = None
2974 # Replace commas
2975 date_str = date_str.replace(',', ' ')
2976 # Remove AM/PM + timezone
2977 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2978 _, date_str = extract_timezone(date_str)
2979
2980 for expression in date_formats(day_first):
2981 try:
2982 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
2983 except ValueError:
2984 pass
2985 if upload_date is None:
2986 timetuple = email.utils.parsedate_tz(date_str)
2987 if timetuple:
2988 try:
2989 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2990 except ValueError:
2991 pass
2992 if upload_date is not None:
2993 return compat_str(upload_date)
2994
2995
2996 def unified_timestamp(date_str, day_first=True):
2997 if date_str is None:
2998 return None
2999
3000 date_str = re.sub(r'[,|]', '', date_str)
3001
3002 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
3003 timezone, date_str = extract_timezone(date_str)
3004
3005 # Remove AM/PM + timezone
3006 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3007
3008 # Remove unrecognized timezones from ISO 8601 alike timestamps
3009 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3010 if m:
3011 date_str = date_str[:-len(m.group('tz'))]
3012
3013 # Python only supports microseconds, so remove nanoseconds
3014 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3015 if m:
3016 date_str = m.group(1)
3017
3018 for expression in date_formats(day_first):
3019 try:
3020 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3021 return calendar.timegm(dt.timetuple())
3022 except ValueError:
3023 pass
3024 timetuple = email.utils.parsedate_tz(date_str)
3025 if timetuple:
3026 return calendar.timegm(timetuple) + pm_delta * 3600
3027
3028
3029 def determine_ext(url, default_ext='unknown_video'):
3030 if url is None or '.' not in url:
3031 return default_ext
3032 guess = url.partition('?')[0].rpartition('.')[2]
3033 if re.match(r'^[A-Za-z0-9]+$', guess):
3034 return guess
3035 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3036 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3037 return guess.rstrip('/')
3038 else:
3039 return default_ext
3040
3041
3042 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3043 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3044
3045
3046 def date_from_str(date_str):
3047 """
3048 Return a datetime object from a string in the format YYYYMMDD or
3049 (now|today)[+-][0-9](day|week|month|year)(s)?"""
3050 today = datetime.date.today()
3051 if date_str in ('now', 'today'):
3052 return today
3053 if date_str == 'yesterday':
3054 return today - datetime.timedelta(days=1)
3055 match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
3056 if match is not None:
3057 sign = match.group('sign')
3058 time = int(match.group('time'))
3059 if sign == '-':
3060 time = -time
3061 unit = match.group('unit')
3062 # A bad approximation?
3063 if unit == 'month':
3064 unit = 'day'
3065 time *= 30
3066 elif unit == 'year':
3067 unit = 'day'
3068 time *= 365
3069 unit += 's'
3070 delta = datetime.timedelta(**{unit: time})
3071 return today + delta
3072 return datetime.datetime.strptime(date_str, '%Y%m%d').date()
3073
3074
3075 def hyphenate_date(date_str):
3076 """
3077 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3078 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3079 if match is not None:
3080 return '-'.join(match.groups())
3081 else:
3082 return date_str
3083
3084
3085 class DateRange(object):
3086 """Represents a time interval between two dates"""
3087
3088 def __init__(self, start=None, end=None):
3089 """start and end must be strings in the format accepted by date"""
3090 if start is not None:
3091 self.start = date_from_str(start)
3092 else:
3093 self.start = datetime.datetime.min.date()
3094 if end is not None:
3095 self.end = date_from_str(end)
3096 else:
3097 self.end = datetime.datetime.max.date()
3098 if self.start > self.end:
3099 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3100
3101 @classmethod
3102 def day(cls, day):
3103 """Returns a range that only contains the given day"""
3104 return cls(day, day)
3105
3106 def __contains__(self, date):
3107 """Check if the date is in the range"""
3108 if not isinstance(date, datetime.date):
3109 date = date_from_str(date)
3110 return self.start <= date <= self.end
3111
3112 def __str__(self):
3113 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3114
3115
3116 def platform_name():
3117 """ Returns the platform name as a compat_str """
3118 res = platform.platform()
3119 if isinstance(res, bytes):
3120 res = res.decode(preferredencoding())
3121
3122 assert isinstance(res, compat_str)
3123 return res
3124
3125
3126 def _windows_write_string(s, out):
3127 """ Returns True if the string was written using special methods,
3128 False if it has yet to be written out."""
3129 # Adapted from http://stackoverflow.com/a/3259271/35070
3130
3131 import ctypes
3132 import ctypes.wintypes
3133
3134 WIN_OUTPUT_IDS = {
3135 1: -11,
3136 2: -12,
3137 }
3138
3139 try:
3140 fileno = out.fileno()
3141 except AttributeError:
3142 # If the output stream doesn't have a fileno, it's virtual
3143 return False
3144 except io.UnsupportedOperation:
3145 # Some strange Windows pseudo files?
3146 return False
3147 if fileno not in WIN_OUTPUT_IDS:
3148 return False
3149
3150 GetStdHandle = compat_ctypes_WINFUNCTYPE(
3151 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3152 ('GetStdHandle', ctypes.windll.kernel32))
3153 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3154
3155 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3156 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3157 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3158 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3159 written = ctypes.wintypes.DWORD(0)
3160
3161 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3162 FILE_TYPE_CHAR = 0x0002
3163 FILE_TYPE_REMOTE = 0x8000
3164 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3165 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3166 ctypes.POINTER(ctypes.wintypes.DWORD))(
3167 ('GetConsoleMode', ctypes.windll.kernel32))
3168 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3169
3170 def not_a_console(handle):
3171 if handle == INVALID_HANDLE_VALUE or handle is None:
3172 return True
3173 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3174 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3175
3176 if not_a_console(h):
3177 return False
3178
3179 def next_nonbmp_pos(s):
3180 try:
3181 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3182 except StopIteration:
3183 return len(s)
3184
3185 while s:
3186 count = min(next_nonbmp_pos(s), 1024)
3187
3188 ret = WriteConsoleW(
3189 h, s, count if count else 2, ctypes.byref(written), None)
3190 if ret == 0:
3191 raise OSError('Failed to write string')
3192 if not count: # We just wrote a non-BMP character
3193 assert written.value == 2
3194 s = s[1:]
3195 else:
3196 assert written.value > 0
3197 s = s[written.value:]
3198 return True
3199
3200
3201 def write_string(s, out=None, encoding=None):
3202 if out is None:
3203 out = sys.stderr
3204 assert type(s) == compat_str
3205
3206 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3207 if _windows_write_string(s, out):
3208 return
3209
3210 if ('b' in getattr(out, 'mode', '')
3211 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
3212 byt = s.encode(encoding or preferredencoding(), 'ignore')
3213 out.write(byt)
3214 elif hasattr(out, 'buffer'):
3215 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3216 byt = s.encode(enc, 'ignore')
3217 out.buffer.write(byt)
3218 else:
3219 out.write(s)
3220 out.flush()
3221
3222
3223 def bytes_to_intlist(bs):
3224 if not bs:
3225 return []
3226 if isinstance(bs[0], int): # Python 3
3227 return list(bs)
3228 else:
3229 return [ord(c) for c in bs]
3230
3231
3232 def intlist_to_bytes(xs):
3233 if not xs:
3234 return b''
3235 return compat_struct_pack('%dB' % len(xs), *xs)
3236
3237
3238 # Cross-platform file locking
3239 if sys.platform == 'win32':
3240 import ctypes.wintypes
3241 import msvcrt
3242
3243 class OVERLAPPED(ctypes.Structure):
3244 _fields_ = [
3245 ('Internal', ctypes.wintypes.LPVOID),
3246 ('InternalHigh', ctypes.wintypes.LPVOID),
3247 ('Offset', ctypes.wintypes.DWORD),
3248 ('OffsetHigh', ctypes.wintypes.DWORD),
3249 ('hEvent', ctypes.wintypes.HANDLE),
3250 ]
3251
3252 kernel32 = ctypes.windll.kernel32
3253 LockFileEx = kernel32.LockFileEx
3254 LockFileEx.argtypes = [
3255 ctypes.wintypes.HANDLE, # hFile
3256 ctypes.wintypes.DWORD, # dwFlags
3257 ctypes.wintypes.DWORD, # dwReserved
3258 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3259 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3260 ctypes.POINTER(OVERLAPPED) # Overlapped
3261 ]
3262 LockFileEx.restype = ctypes.wintypes.BOOL
3263 UnlockFileEx = kernel32.UnlockFileEx
3264 UnlockFileEx.argtypes = [
3265 ctypes.wintypes.HANDLE, # hFile
3266 ctypes.wintypes.DWORD, # dwReserved
3267 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3268 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3269 ctypes.POINTER(OVERLAPPED) # Overlapped
3270 ]
3271 UnlockFileEx.restype = ctypes.wintypes.BOOL
3272 whole_low = 0xffffffff
3273 whole_high = 0x7fffffff
3274
3275 def _lock_file(f, exclusive):
3276 overlapped = OVERLAPPED()
3277 overlapped.Offset = 0
3278 overlapped.OffsetHigh = 0
3279 overlapped.hEvent = 0
3280 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3281 handle = msvcrt.get_osfhandle(f.fileno())
3282 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3283 whole_low, whole_high, f._lock_file_overlapped_p):
3284 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3285
3286 def _unlock_file(f):
3287 assert f._lock_file_overlapped_p
3288 handle = msvcrt.get_osfhandle(f.fileno())
3289 if not UnlockFileEx(handle, 0,
3290 whole_low, whole_high, f._lock_file_overlapped_p):
3291 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3292
3293 else:
3294 # Some platforms, such as Jython, is missing fcntl
3295 try:
3296 import fcntl
3297
3298 def _lock_file(f, exclusive):
3299 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3300
3301 def _unlock_file(f):
3302 fcntl.flock(f, fcntl.LOCK_UN)
3303 except ImportError:
3304 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3305
3306 def _lock_file(f, exclusive):
3307 raise IOError(UNSUPPORTED_MSG)
3308
3309 def _unlock_file(f):
3310 raise IOError(UNSUPPORTED_MSG)
3311
3312
3313 class locked_file(object):
3314 def __init__(self, filename, mode, encoding=None):
3315 assert mode in ['r', 'a', 'w']
3316 self.f = io.open(filename, mode, encoding=encoding)
3317 self.mode = mode
3318
3319 def __enter__(self):
3320 exclusive = self.mode != 'r'
3321 try:
3322 _lock_file(self.f, exclusive)
3323 except IOError:
3324 self.f.close()
3325 raise
3326 return self
3327
3328 def __exit__(self, etype, value, traceback):
3329 try:
3330 _unlock_file(self.f)
3331 finally:
3332 self.f.close()
3333
3334 def __iter__(self):
3335 return iter(self.f)
3336
3337 def write(self, *args):
3338 return self.f.write(*args)
3339
3340 def read(self, *args):
3341 return self.f.read(*args)
3342
3343
3344 def get_filesystem_encoding():
3345 encoding = sys.getfilesystemencoding()
3346 return encoding if encoding is not None else 'utf-8'
3347
3348
3349 def shell_quote(args):
3350 quoted_args = []
3351 encoding = get_filesystem_encoding()
3352 for a in args:
3353 if isinstance(a, bytes):
3354 # We may get a filename encoded with 'encodeFilename'
3355 a = a.decode(encoding)
3356 quoted_args.append(compat_shlex_quote(a))
3357 return ' '.join(quoted_args)
3358
3359
3360 def smuggle_url(url, data):
3361 """ Pass additional data in a URL for internal use. """
3362
3363 url, idata = unsmuggle_url(url, {})
3364 data.update(idata)
3365 sdata = compat_urllib_parse_urlencode(
3366 {'__youtubedl_smuggle': json.dumps(data)})
3367 return url + '#' + sdata
3368
3369
3370 def unsmuggle_url(smug_url, default=None):
3371 if '#__youtubedl_smuggle' not in smug_url:
3372 return smug_url, default
3373 url, _, sdata = smug_url.rpartition('#')
3374 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3375 data = json.loads(jsond)
3376 return url, data
3377
3378
3379 def format_bytes(bytes):
3380 if bytes is None:
3381 return 'N/A'
3382 if type(bytes) is str:
3383 bytes = float(bytes)
3384 if bytes == 0.0:
3385 exponent = 0
3386 else:
3387 exponent = int(math.log(bytes, 1024.0))
3388 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3389 converted = float(bytes) / float(1024 ** exponent)
3390 return '%.2f%s' % (converted, suffix)
3391
3392
3393 def lookup_unit_table(unit_table, s):
3394 units_re = '|'.join(re.escape(u) for u in unit_table)
3395 m = re.match(
3396 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3397 if not m:
3398 return None
3399 num_str = m.group('num').replace(',', '.')
3400 mult = unit_table[m.group('unit')]
3401 return int(float(num_str) * mult)
3402
3403
3404 def parse_filesize(s):
3405 if s is None:
3406 return None
3407
3408 # The lower-case forms are of course incorrect and unofficial,
3409 # but we support those too
3410 _UNIT_TABLE = {
3411 'B': 1,
3412 'b': 1,
3413 'bytes': 1,
3414 'KiB': 1024,
3415 'KB': 1000,
3416 'kB': 1024,
3417 'Kb': 1000,
3418 'kb': 1000,
3419 'kilobytes': 1000,
3420 'kibibytes': 1024,
3421 'MiB': 1024 ** 2,
3422 'MB': 1000 ** 2,
3423 'mB': 1024 ** 2,
3424 'Mb': 1000 ** 2,
3425 'mb': 1000 ** 2,
3426 'megabytes': 1000 ** 2,
3427 'mebibytes': 1024 ** 2,
3428 'GiB': 1024 ** 3,
3429 'GB': 1000 ** 3,
3430 'gB': 1024 ** 3,
3431 'Gb': 1000 ** 3,
3432 'gb': 1000 ** 3,
3433 'gigabytes': 1000 ** 3,
3434 'gibibytes': 1024 ** 3,
3435 'TiB': 1024 ** 4,
3436 'TB': 1000 ** 4,
3437 'tB': 1024 ** 4,
3438 'Tb': 1000 ** 4,
3439 'tb': 1000 ** 4,
3440 'terabytes': 1000 ** 4,
3441 'tebibytes': 1024 ** 4,
3442 'PiB': 1024 ** 5,
3443 'PB': 1000 ** 5,
3444 'pB': 1024 ** 5,
3445 'Pb': 1000 ** 5,
3446 'pb': 1000 ** 5,
3447 'petabytes': 1000 ** 5,
3448 'pebibytes': 1024 ** 5,
3449 'EiB': 1024 ** 6,
3450 'EB': 1000 ** 6,
3451 'eB': 1024 ** 6,
3452 'Eb': 1000 ** 6,
3453 'eb': 1000 ** 6,
3454 'exabytes': 1000 ** 6,
3455 'exbibytes': 1024 ** 6,
3456 'ZiB': 1024 ** 7,
3457 'ZB': 1000 ** 7,
3458 'zB': 1024 ** 7,
3459 'Zb': 1000 ** 7,
3460 'zb': 1000 ** 7,
3461 'zettabytes': 1000 ** 7,
3462 'zebibytes': 1024 ** 7,
3463 'YiB': 1024 ** 8,
3464 'YB': 1000 ** 8,
3465 'yB': 1024 ** 8,
3466 'Yb': 1000 ** 8,
3467 'yb': 1000 ** 8,
3468 'yottabytes': 1000 ** 8,
3469 'yobibytes': 1024 ** 8,
3470 }
3471
3472 return lookup_unit_table(_UNIT_TABLE, s)
3473
3474
3475 def parse_count(s):
3476 if s is None:
3477 return None
3478
3479 s = s.strip()
3480
3481 if re.match(r'^[\d,.]+$', s):
3482 return str_to_int(s)
3483
3484 _UNIT_TABLE = {
3485 'k': 1000,
3486 'K': 1000,
3487 'm': 1000 ** 2,
3488 'M': 1000 ** 2,
3489 'kk': 1000 ** 2,
3490 'KK': 1000 ** 2,
3491 }
3492
3493 return lookup_unit_table(_UNIT_TABLE, s)
3494
3495
3496 def parse_resolution(s):
3497 if s is None:
3498 return {}
3499
3500 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3501 if mobj:
3502 return {
3503 'width': int(mobj.group('w')),
3504 'height': int(mobj.group('h')),
3505 }
3506
3507 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3508 if mobj:
3509 return {'height': int(mobj.group(1))}
3510
3511 mobj = re.search(r'\b([48])[kK]\b', s)
3512 if mobj:
3513 return {'height': int(mobj.group(1)) * 540}
3514
3515 return {}
3516
3517
3518 def parse_bitrate(s):
3519 if not isinstance(s, compat_str):
3520 return
3521 mobj = re.search(r'\b(\d+)\s*kbps', s)
3522 if mobj:
3523 return int(mobj.group(1))
3524
3525
3526 def month_by_name(name, lang='en'):
3527 """ Return the number of a month by (locale-independently) English name """
3528
3529 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3530
3531 try:
3532 return month_names.index(name) + 1
3533 except ValueError:
3534 return None
3535
3536
3537 def month_by_abbreviation(abbrev):
3538 """ Return the number of a month by (locale-independently) English
3539 abbreviations """
3540
3541 try:
3542 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3543 except ValueError:
3544 return None
3545
3546
3547 def fix_xml_ampersands(xml_str):
3548 """Replace all the '&' by '&amp;' in XML"""
3549 return re.sub(
3550 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3551 '&amp;',
3552 xml_str)
3553
3554
3555 def setproctitle(title):
3556 assert isinstance(title, compat_str)
3557
3558 # ctypes in Jython is not complete
3559 # http://bugs.jython.org/issue2148
3560 if sys.platform.startswith('java'):
3561 return
3562
3563 try:
3564 libc = ctypes.cdll.LoadLibrary('libc.so.6')
3565 except OSError:
3566 return
3567 except TypeError:
3568 # LoadLibrary in Windows Python 2.7.13 only expects
3569 # a bytestring, but since unicode_literals turns
3570 # every string into a unicode string, it fails.
3571 return
3572 title_bytes = title.encode('utf-8')
3573 buf = ctypes.create_string_buffer(len(title_bytes))
3574 buf.value = title_bytes
3575 try:
3576 libc.prctl(15, buf, 0, 0, 0)
3577 except AttributeError:
3578 return # Strange libc, just skip this
3579
3580
3581 def remove_start(s, start):
3582 return s[len(start):] if s is not None and s.startswith(start) else s
3583
3584
3585 def remove_end(s, end):
3586 return s[:-len(end)] if s is not None and s.endswith(end) else s
3587
3588
3589 def remove_quotes(s):
3590 if s is None or len(s) < 2:
3591 return s
3592 for quote in ('"', "'", ):
3593 if s[0] == quote and s[-1] == quote:
3594 return s[1:-1]
3595 return s
3596
3597
3598 def get_domain(url):
3599 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3600 return domain.group('domain') if domain else None
3601
3602
3603 def url_basename(url):
3604 path = compat_urlparse.urlparse(url).path
3605 return path.strip('/').split('/')[-1]
3606
3607
3608 def base_url(url):
3609 return re.match(r'https?://[^?#&]+/', url).group()
3610
3611
3612 def urljoin(base, path):
3613 if isinstance(path, bytes):
3614 path = path.decode('utf-8')
3615 if not isinstance(path, compat_str) or not path:
3616 return None
3617 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3618 return path
3619 if isinstance(base, bytes):
3620 base = base.decode('utf-8')
3621 if not isinstance(base, compat_str) or not re.match(
3622 r'^(?:https?:)?//', base):
3623 return None
3624 return compat_urlparse.urljoin(base, path)
3625
3626
3627 class HEADRequest(compat_urllib_request.Request):
3628 def get_method(self):
3629 return 'HEAD'
3630
3631
3632 class PUTRequest(compat_urllib_request.Request):
3633 def get_method(self):
3634 return 'PUT'
3635
3636
3637 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3638 if get_attr:
3639 if v is not None:
3640 v = getattr(v, get_attr, None)
3641 if v == '':
3642 v = None
3643 if v is None:
3644 return default
3645 try:
3646 return int(v) * invscale // scale
3647 except (ValueError, TypeError):
3648 return default
3649
3650
3651 def str_or_none(v, default=None):
3652 return default if v is None else compat_str(v)
3653
3654
3655 def str_to_int(int_str):
3656 """ A more relaxed version of int_or_none """
3657 if isinstance(int_str, compat_integer_types):
3658 return int_str
3659 elif isinstance(int_str, compat_str):
3660 int_str = re.sub(r'[,\.\+]', '', int_str)
3661 return int_or_none(int_str)
3662
3663
3664 def float_or_none(v, scale=1, invscale=1, default=None):
3665 if v is None:
3666 return default
3667 try:
3668 return float(v) * invscale / scale
3669 except (ValueError, TypeError):
3670 return default
3671
3672
3673 def bool_or_none(v, default=None):
3674 return v if isinstance(v, bool) else default
3675
3676
3677 def strip_or_none(v, default=None):
3678 return v.strip() if isinstance(v, compat_str) else default
3679
3680
3681 def url_or_none(url):
3682 if not url or not isinstance(url, compat_str):
3683 return None
3684 url = url.strip()
3685 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3686
3687
3688 def strftime_or_none(timestamp, date_format, default=None):
3689 datetime_object = None
3690 try:
3691 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3692 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3693 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3694 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3695 return datetime_object.strftime(date_format)
3696 except (ValueError, TypeError, AttributeError):
3697 return default
3698
3699
3700 def parse_duration(s):
3701 if not isinstance(s, compat_basestring):
3702 return None
3703
3704 s = s.strip()
3705
3706 days, hours, mins, secs, ms = [None] * 5
3707 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3708 if m:
3709 days, hours, mins, secs, ms = m.groups()
3710 else:
3711 m = re.match(
3712 r'''(?ix)(?:P?
3713 (?:
3714 [0-9]+\s*y(?:ears?)?\s*
3715 )?
3716 (?:
3717 [0-9]+\s*m(?:onths?)?\s*
3718 )?
3719 (?:
3720 [0-9]+\s*w(?:eeks?)?\s*
3721 )?
3722 (?:
3723 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3724 )?
3725 T)?
3726 (?:
3727 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3728 )?
3729 (?:
3730 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3731 )?
3732 (?:
3733 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3734 )?Z?$''', s)
3735 if m:
3736 days, hours, mins, secs, ms = m.groups()
3737 else:
3738 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3739 if m:
3740 hours, mins = m.groups()
3741 else:
3742 return None
3743
3744 duration = 0
3745 if secs:
3746 duration += float(secs)
3747 if mins:
3748 duration += float(mins) * 60
3749 if hours:
3750 duration += float(hours) * 60 * 60
3751 if days:
3752 duration += float(days) * 24 * 60 * 60
3753 if ms:
3754 duration += float(ms)
3755 return duration
3756
3757
3758 def prepend_extension(filename, ext, expected_real_ext=None):
3759 name, real_ext = os.path.splitext(filename)
3760 return (
3761 '{0}.{1}{2}'.format(name, ext, real_ext)
3762 if not expected_real_ext or real_ext[1:] == expected_real_ext
3763 else '{0}.{1}'.format(filename, ext))
3764
3765
3766 def replace_extension(filename, ext, expected_real_ext=None):
3767 name, real_ext = os.path.splitext(filename)
3768 return '{0}.{1}'.format(
3769 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3770 ext)
3771
3772
3773 def check_executable(exe, args=[]):
3774 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3775 args can be a list of arguments for a short output (like -version) """
3776 try:
3777 process_communicate_or_kill(subprocess.Popen(
3778 [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
3779 except OSError:
3780 return False
3781 return exe
3782
3783
3784 def get_exe_version(exe, args=['--version'],
3785 version_re=None, unrecognized='present'):
3786 """ Returns the version of the specified executable,
3787 or False if the executable is not present """
3788 try:
3789 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3790 # SIGTTOU if yt-dlp is run in the background.
3791 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3792 out, _ = process_communicate_or_kill(subprocess.Popen(
3793 [encodeArgument(exe)] + args,
3794 stdin=subprocess.PIPE,
3795 stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
3796 except OSError:
3797 return False
3798 if isinstance(out, bytes): # Python 2.x
3799 out = out.decode('ascii', 'ignore')
3800 return detect_exe_version(out, version_re, unrecognized)
3801
3802
3803 def detect_exe_version(output, version_re=None, unrecognized='present'):
3804 assert isinstance(output, compat_str)
3805 if version_re is None:
3806 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3807 m = re.search(version_re, output)
3808 if m:
3809 return m.group(1)
3810 else:
3811 return unrecognized
3812
3813
3814 class PagedList(object):
3815 def __len__(self):
3816 # This is only useful for tests
3817 return len(self.getslice())
3818
3819
3820 class OnDemandPagedList(PagedList):
3821 def __init__(self, pagefunc, pagesize, use_cache=True):
3822 self._pagefunc = pagefunc
3823 self._pagesize = pagesize
3824 self._use_cache = use_cache
3825 if use_cache:
3826 self._cache = {}
3827
3828 def getslice(self, start=0, end=None):
3829 res = []
3830 for pagenum in itertools.count(start // self._pagesize):
3831 firstid = pagenum * self._pagesize
3832 nextfirstid = pagenum * self._pagesize + self._pagesize
3833 if start >= nextfirstid:
3834 continue
3835
3836 page_results = None
3837 if self._use_cache:
3838 page_results = self._cache.get(pagenum)
3839 if page_results is None:
3840 page_results = list(self._pagefunc(pagenum))
3841 if self._use_cache:
3842 self._cache[pagenum] = page_results
3843
3844 startv = (
3845 start % self._pagesize
3846 if firstid <= start < nextfirstid
3847 else 0)
3848
3849 endv = (
3850 ((end - 1) % self._pagesize) + 1
3851 if (end is not None and firstid <= end <= nextfirstid)
3852 else None)
3853
3854 if startv != 0 or endv is not None:
3855 page_results = page_results[startv:endv]
3856 res.extend(page_results)
3857
3858 # A little optimization - if current page is not "full", ie. does
3859 # not contain page_size videos then we can assume that this page
3860 # is the last one - there are no more ids on further pages -
3861 # i.e. no need to query again.
3862 if len(page_results) + startv < self._pagesize:
3863 break
3864
3865 # If we got the whole page, but the next page is not interesting,
3866 # break out early as well
3867 if end == nextfirstid:
3868 break
3869 return res
3870
3871
3872 class InAdvancePagedList(PagedList):
3873 def __init__(self, pagefunc, pagecount, pagesize):
3874 self._pagefunc = pagefunc
3875 self._pagecount = pagecount
3876 self._pagesize = pagesize
3877
3878 def getslice(self, start=0, end=None):
3879 res = []
3880 start_page = start // self._pagesize
3881 end_page = (
3882 self._pagecount if end is None else (end // self._pagesize + 1))
3883 skip_elems = start - start_page * self._pagesize
3884 only_more = None if end is None else end - start
3885 for pagenum in range(start_page, end_page):
3886 page = list(self._pagefunc(pagenum))
3887 if skip_elems:
3888 page = page[skip_elems:]
3889 skip_elems = None
3890 if only_more is not None:
3891 if len(page) < only_more:
3892 only_more -= len(page)
3893 else:
3894 page = page[:only_more]
3895 res.extend(page)
3896 break
3897 res.extend(page)
3898 return res
3899
3900
3901 def uppercase_escape(s):
3902 unicode_escape = codecs.getdecoder('unicode_escape')
3903 return re.sub(
3904 r'\\U[0-9a-fA-F]{8}',
3905 lambda m: unicode_escape(m.group(0))[0],
3906 s)
3907
3908
3909 def lowercase_escape(s):
3910 unicode_escape = codecs.getdecoder('unicode_escape')
3911 return re.sub(
3912 r'\\u[0-9a-fA-F]{4}',
3913 lambda m: unicode_escape(m.group(0))[0],
3914 s)
3915
3916
3917 def escape_rfc3986(s):
3918 """Escape non-ASCII characters as suggested by RFC 3986"""
3919 if sys.version_info < (3, 0) and isinstance(s, compat_str):
3920 s = s.encode('utf-8')
3921 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
3922
3923
3924 def escape_url(url):
3925 """Escape URL as suggested by RFC 3986"""
3926 url_parsed = compat_urllib_parse_urlparse(url)
3927 return url_parsed._replace(
3928 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3929 path=escape_rfc3986(url_parsed.path),
3930 params=escape_rfc3986(url_parsed.params),
3931 query=escape_rfc3986(url_parsed.query),
3932 fragment=escape_rfc3986(url_parsed.fragment)
3933 ).geturl()
3934
3935
3936 def read_batch_urls(batch_fd):
3937 def fixup(url):
3938 if not isinstance(url, compat_str):
3939 url = url.decode('utf-8', 'replace')
3940 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
3941 for bom in BOM_UTF8:
3942 if url.startswith(bom):
3943 url = url[len(bom):]
3944 url = url.lstrip()
3945 if not url or url.startswith(('#', ';', ']')):
3946 return False
3947 # "#" cannot be stripped out since it is part of the URI
3948 # However, it can be safely stipped out if follwing a whitespace
3949 return re.split(r'\s#', url, 1)[0].rstrip()
3950
3951 with contextlib.closing(batch_fd) as fd:
3952 return [url for url in map(fixup, fd) if url]
3953
3954
3955 def urlencode_postdata(*args, **kargs):
3956 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
3957
3958
3959 def update_url_query(url, query):
3960 if not query:
3961 return url
3962 parsed_url = compat_urlparse.urlparse(url)
3963 qs = compat_parse_qs(parsed_url.query)
3964 qs.update(query)
3965 return compat_urlparse.urlunparse(parsed_url._replace(
3966 query=compat_urllib_parse_urlencode(qs, True)))
3967
3968
3969 def update_Request(req, url=None, data=None, headers={}, query={}):
3970 req_headers = req.headers.copy()
3971 req_headers.update(headers)
3972 req_data = data or req.data
3973 req_url = update_url_query(url or req.get_full_url(), query)
3974 req_get_method = req.get_method()
3975 if req_get_method == 'HEAD':
3976 req_type = HEADRequest
3977 elif req_get_method == 'PUT':
3978 req_type = PUTRequest
3979 else:
3980 req_type = compat_urllib_request.Request
3981 new_req = req_type(
3982 req_url, data=req_data, headers=req_headers,
3983 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3984 if hasattr(req, 'timeout'):
3985 new_req.timeout = req.timeout
3986 return new_req
3987
3988
3989 def _multipart_encode_impl(data, boundary):
3990 content_type = 'multipart/form-data; boundary=%s' % boundary
3991
3992 out = b''
3993 for k, v in data.items():
3994 out += b'--' + boundary.encode('ascii') + b'\r\n'
3995 if isinstance(k, compat_str):
3996 k = k.encode('utf-8')
3997 if isinstance(v, compat_str):
3998 v = v.encode('utf-8')
3999 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4000 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4001 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
4002 if boundary.encode('ascii') in content:
4003 raise ValueError('Boundary overlaps with data')
4004 out += content
4005
4006 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4007
4008 return out, content_type
4009
4010
4011 def multipart_encode(data, boundary=None):
4012 '''
4013 Encode a dict to RFC 7578-compliant form-data
4014
4015 data:
4016 A dict where keys and values can be either Unicode or bytes-like
4017 objects.
4018 boundary:
4019 If specified a Unicode object, it's used as the boundary. Otherwise
4020 a random boundary is generated.
4021
4022 Reference: https://tools.ietf.org/html/rfc7578
4023 '''
4024 has_specified_boundary = boundary is not None
4025
4026 while True:
4027 if boundary is None:
4028 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4029
4030 try:
4031 out, content_type = _multipart_encode_impl(data, boundary)
4032 break
4033 except ValueError:
4034 if has_specified_boundary:
4035 raise
4036 boundary = None
4037
4038 return out, content_type
4039
4040
4041 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4042 if isinstance(key_or_keys, (list, tuple)):
4043 for key in key_or_keys:
4044 if key not in d or d[key] is None or skip_false_values and not d[key]:
4045 continue
4046 return d[key]
4047 return default
4048 return d.get(key_or_keys, default)
4049
4050
4051 def try_get(src, getter, expected_type=None):
4052 if not isinstance(getter, (list, tuple)):
4053 getter = [getter]
4054 for get in getter:
4055 try:
4056 v = get(src)
4057 except (AttributeError, KeyError, TypeError, IndexError):
4058 pass
4059 else:
4060 if expected_type is None or isinstance(v, expected_type):
4061 return v
4062
4063
4064 def merge_dicts(*dicts):
4065 merged = {}
4066 for a_dict in dicts:
4067 for k, v in a_dict.items():
4068 if v is None:
4069 continue
4070 if (k not in merged
4071 or (isinstance(v, compat_str) and v
4072 and isinstance(merged[k], compat_str)
4073 and not merged[k])):
4074 merged[k] = v
4075 return merged
4076
4077
4078 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4079 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4080
4081
4082 US_RATINGS = {
4083 'G': 0,
4084 'PG': 10,
4085 'PG-13': 13,
4086 'R': 16,
4087 'NC': 18,
4088 }
4089
4090
4091 TV_PARENTAL_GUIDELINES = {
4092 'TV-Y': 0,
4093 'TV-Y7': 7,
4094 'TV-G': 0,
4095 'TV-PG': 0,
4096 'TV-14': 14,
4097 'TV-MA': 17,
4098 }
4099
4100
4101 def parse_age_limit(s):
4102 if type(s) == int:
4103 return s if 0 <= s <= 21 else None
4104 if not isinstance(s, compat_basestring):
4105 return None
4106 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4107 if m:
4108 return int(m.group('age'))
4109 if s in US_RATINGS:
4110 return US_RATINGS[s]
4111 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4112 if m:
4113 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4114 return None
4115
4116
4117 def strip_jsonp(code):
4118 return re.sub(
4119 r'''(?sx)^
4120 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4121 (?:\s*&&\s*(?P=func_name))?
4122 \s*\(\s*(?P<callback_data>.*)\);?
4123 \s*?(?://[^\n]*)*$''',
4124 r'\g<callback_data>', code)
4125
4126
4127 def js_to_json(code, vars={}):
4128 # vars is a dict of var, val pairs to substitute
4129 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4130 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4131 INTEGER_TABLE = (
4132 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4133 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4134 )
4135
4136 def fix_kv(m):
4137 v = m.group(0)
4138 if v in ('true', 'false', 'null'):
4139 return v
4140 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4141 return ""
4142
4143 if v[0] in ("'", '"'):
4144 v = re.sub(r'(?s)\\.|"', lambda m: {
4145 '"': '\\"',
4146 "\\'": "'",
4147 '\\\n': '',
4148 '\\x': '\\u00',
4149 }.get(m.group(0), m.group(0)), v[1:-1])
4150 else:
4151 for regex, base in INTEGER_TABLE:
4152 im = re.match(regex, v)
4153 if im:
4154 i = int(im.group(1), base)
4155 return '"%d":' % i if v.endswith(':') else '%d' % i
4156
4157 if v in vars:
4158 return vars[v]
4159
4160 return '"%s"' % v
4161
4162 return re.sub(r'''(?sx)
4163 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4164 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4165 {comment}|,(?={skip}[\]}}])|
4166 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4167 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4168 [0-9]+(?={skip}:)|
4169 !+
4170 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4171
4172
4173 def qualities(quality_ids):
4174 """ Get a numeric quality value out of a list of possible values """
4175 def q(qid):
4176 try:
4177 return quality_ids.index(qid)
4178 except ValueError:
4179 return -1
4180 return q
4181
4182
4183 DEFAULT_OUTTMPL = {
4184 'default': '%(title)s [%(id)s].%(ext)s',
4185 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
4186 }
4187 OUTTMPL_TYPES = {
4188 'chapter': None,
4189 'subtitle': None,
4190 'thumbnail': None,
4191 'description': 'description',
4192 'annotation': 'annotations.xml',
4193 'infojson': 'info.json',
4194 'pl_description': 'description',
4195 'pl_infojson': 'info.json',
4196 }
4197
4198
4199 def limit_length(s, length):
4200 """ Add ellipses to overly long strings """
4201 if s is None:
4202 return None
4203 ELLIPSES = '...'
4204 if len(s) > length:
4205 return s[:length - len(ELLIPSES)] + ELLIPSES
4206 return s
4207
4208
4209 def version_tuple(v):
4210 return tuple(int(e) for e in re.split(r'[-.]', v))
4211
4212
4213 def is_outdated_version(version, limit, assume_new=True):
4214 if not version:
4215 return not assume_new
4216 try:
4217 return version_tuple(version) < version_tuple(limit)
4218 except ValueError:
4219 return not assume_new
4220
4221
4222 def ytdl_is_updateable():
4223 """ Returns if yt-dlp can be updated with -U """
4224 return False
4225
4226 from zipimport import zipimporter
4227
4228 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4229
4230
4231 def args_to_str(args):
4232 # Get a short string representation for a subprocess command
4233 return ' '.join(compat_shlex_quote(a) for a in args)
4234
4235
4236 def error_to_compat_str(err):
4237 err_str = str(err)
4238 # On python 2 error byte string must be decoded with proper
4239 # encoding rather than ascii
4240 if sys.version_info[0] < 3:
4241 err_str = err_str.decode(preferredencoding())
4242 return err_str
4243
4244
4245 def mimetype2ext(mt):
4246 if mt is None:
4247 return None
4248
4249 ext = {
4250 'audio/mp4': 'm4a',
4251 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4252 # it's the most popular one
4253 'audio/mpeg': 'mp3',
4254 'audio/x-wav': 'wav',
4255 }.get(mt)
4256 if ext is not None:
4257 return ext
4258
4259 _, _, res = mt.rpartition('/')
4260 res = res.split(';')[0].strip().lower()
4261
4262 return {
4263 '3gpp': '3gp',
4264 'smptett+xml': 'tt',
4265 'ttaf+xml': 'dfxp',
4266 'ttml+xml': 'ttml',
4267 'x-flv': 'flv',
4268 'x-mp4-fragmented': 'mp4',
4269 'x-ms-sami': 'sami',
4270 'x-ms-wmv': 'wmv',
4271 'mpegurl': 'm3u8',
4272 'x-mpegurl': 'm3u8',
4273 'vnd.apple.mpegurl': 'm3u8',
4274 'dash+xml': 'mpd',
4275 'f4m+xml': 'f4m',
4276 'hds+xml': 'f4m',
4277 'vnd.ms-sstr+xml': 'ism',
4278 'quicktime': 'mov',
4279 'mp2t': 'ts',
4280 'x-wav': 'wav',
4281 }.get(res, res)
4282
4283
4284 def parse_codecs(codecs_str):
4285 # http://tools.ietf.org/html/rfc6381
4286 if not codecs_str:
4287 return {}
4288 split_codecs = list(filter(None, map(
4289 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4290 vcodec, acodec = None, None
4291 for full_codec in split_codecs:
4292 codec = full_codec.split('.')[0]
4293 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4294 if not vcodec:
4295 vcodec = full_codec
4296 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4297 if not acodec:
4298 acodec = full_codec
4299 else:
4300 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4301 if not vcodec and not acodec:
4302 if len(split_codecs) == 2:
4303 return {
4304 'vcodec': split_codecs[0],
4305 'acodec': split_codecs[1],
4306 }
4307 else:
4308 return {
4309 'vcodec': vcodec or 'none',
4310 'acodec': acodec or 'none',
4311 }
4312 return {}
4313
4314
4315 def urlhandle_detect_ext(url_handle):
4316 getheader = url_handle.headers.get
4317
4318 cd = getheader('Content-Disposition')
4319 if cd:
4320 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4321 if m:
4322 e = determine_ext(m.group('filename'), default_ext=None)
4323 if e:
4324 return e
4325
4326 return mimetype2ext(getheader('Content-Type'))
4327
4328
4329 def encode_data_uri(data, mime_type):
4330 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4331
4332
4333 def age_restricted(content_limit, age_limit):
4334 """ Returns True iff the content should be blocked """
4335
4336 if age_limit is None: # No limit set
4337 return False
4338 if content_limit is None:
4339 return False # Content available for everyone
4340 return age_limit < content_limit
4341
4342
4343 def is_html(first_bytes):
4344 """ Detect whether a file contains HTML by examining its first bytes. """
4345
4346 BOMS = [
4347 (b'\xef\xbb\xbf', 'utf-8'),
4348 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4349 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4350 (b'\xff\xfe', 'utf-16-le'),
4351 (b'\xfe\xff', 'utf-16-be'),
4352 ]
4353 for bom, enc in BOMS:
4354 if first_bytes.startswith(bom):
4355 s = first_bytes[len(bom):].decode(enc, 'replace')
4356 break
4357 else:
4358 s = first_bytes.decode('utf-8', 'replace')
4359
4360 return re.match(r'^\s*<', s)
4361
4362
4363 def determine_protocol(info_dict):
4364 protocol = info_dict.get('protocol')
4365 if protocol is not None:
4366 return protocol
4367
4368 url = info_dict['url']
4369 if url.startswith('rtmp'):
4370 return 'rtmp'
4371 elif url.startswith('mms'):
4372 return 'mms'
4373 elif url.startswith('rtsp'):
4374 return 'rtsp'
4375
4376 ext = determine_ext(url)
4377 if ext == 'm3u8':
4378 return 'm3u8'
4379 elif ext == 'f4m':
4380 return 'f4m'
4381
4382 return compat_urllib_parse_urlparse(url).scheme
4383
4384
4385 def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4386 """ Render a list of rows, each as a list of values """
4387
4388 def get_max_lens(table):
4389 return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4390
4391 def filter_using_list(row, filterArray):
4392 return [col for (take, col) in zip(filterArray, row) if take]
4393
4394 if hideEmpty:
4395 max_lens = get_max_lens(data)
4396 header_row = filter_using_list(header_row, max_lens)
4397 data = [filter_using_list(row, max_lens) for row in data]
4398
4399 table = [header_row] + data
4400 max_lens = get_max_lens(table)
4401 if delim:
4402 table = [header_row] + [['-' * ml for ml in max_lens]] + data
4403 format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
4404 return '\n'.join(format_str % tuple(row) for row in table)
4405
4406
4407 def _match_one(filter_part, dct):
4408 COMPARISON_OPERATORS = {
4409 '<': operator.lt,
4410 '<=': operator.le,
4411 '>': operator.gt,
4412 '>=': operator.ge,
4413 '=': operator.eq,
4414 '!=': operator.ne,
4415 }
4416 operator_rex = re.compile(r'''(?x)\s*
4417 (?P<key>[a-z_]+)
4418 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4419 (?:
4420 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4421 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4422 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4423 )
4424 \s*$
4425 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4426 m = operator_rex.search(filter_part)
4427 if m:
4428 op = COMPARISON_OPERATORS[m.group('op')]
4429 actual_value = dct.get(m.group('key'))
4430 if (m.group('quotedstrval') is not None
4431 or m.group('strval') is not None
4432 # If the original field is a string and matching comparisonvalue is
4433 # a number we should respect the origin of the original field
4434 # and process comparison value as a string (see
4435 # https://github.com/ytdl-org/youtube-dl/issues/11082).
4436 or actual_value is not None and m.group('intval') is not None
4437 and isinstance(actual_value, compat_str)):
4438 if m.group('op') not in ('=', '!='):
4439 raise ValueError(
4440 'Operator %s does not support string values!' % m.group('op'))
4441 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4442 quote = m.group('quote')
4443 if quote is not None:
4444 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4445 else:
4446 try:
4447 comparison_value = int(m.group('intval'))
4448 except ValueError:
4449 comparison_value = parse_filesize(m.group('intval'))
4450 if comparison_value is None:
4451 comparison_value = parse_filesize(m.group('intval') + 'B')
4452 if comparison_value is None:
4453 raise ValueError(
4454 'Invalid integer value %r in filter part %r' % (
4455 m.group('intval'), filter_part))
4456 if actual_value is None:
4457 return m.group('none_inclusive')
4458 return op(actual_value, comparison_value)
4459
4460 UNARY_OPERATORS = {
4461 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4462 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4463 }
4464 operator_rex = re.compile(r'''(?x)\s*
4465 (?P<op>%s)\s*(?P<key>[a-z_]+)
4466 \s*$
4467 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4468 m = operator_rex.search(filter_part)
4469 if m:
4470 op = UNARY_OPERATORS[m.group('op')]
4471 actual_value = dct.get(m.group('key'))
4472 return op(actual_value)
4473
4474 raise ValueError('Invalid filter part %r' % filter_part)
4475
4476
4477 def match_str(filter_str, dct):
4478 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4479
4480 return all(
4481 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4482
4483
4484 def match_filter_func(filter_str):
4485 def _match_func(info_dict):
4486 if match_str(filter_str, info_dict):
4487 return None
4488 else:
4489 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4490 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4491 return _match_func
4492
4493
4494 def parse_dfxp_time_expr(time_expr):
4495 if not time_expr:
4496 return
4497
4498 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4499 if mobj:
4500 return float(mobj.group('time_offset'))
4501
4502 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4503 if mobj:
4504 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4505
4506
4507 def srt_subtitles_timecode(seconds):
4508 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4509
4510
4511 def dfxp2srt(dfxp_data):
4512 '''
4513 @param dfxp_data A bytes-like object containing DFXP data
4514 @returns A unicode object containing converted SRT data
4515 '''
4516 LEGACY_NAMESPACES = (
4517 (b'http://www.w3.org/ns/ttml', [
4518 b'http://www.w3.org/2004/11/ttaf1',
4519 b'http://www.w3.org/2006/04/ttaf1',
4520 b'http://www.w3.org/2006/10/ttaf1',
4521 ]),
4522 (b'http://www.w3.org/ns/ttml#styling', [
4523 b'http://www.w3.org/ns/ttml#style',
4524 ]),
4525 )
4526
4527 SUPPORTED_STYLING = [
4528 'color',
4529 'fontFamily',
4530 'fontSize',
4531 'fontStyle',
4532 'fontWeight',
4533 'textDecoration'
4534 ]
4535
4536 _x = functools.partial(xpath_with_ns, ns_map={
4537 'xml': 'http://www.w3.org/XML/1998/namespace',
4538 'ttml': 'http://www.w3.org/ns/ttml',
4539 'tts': 'http://www.w3.org/ns/ttml#styling',
4540 })
4541
4542 styles = {}
4543 default_style = {}
4544
4545 class TTMLPElementParser(object):
4546 _out = ''
4547 _unclosed_elements = []
4548 _applied_styles = []
4549
4550 def start(self, tag, attrib):
4551 if tag in (_x('ttml:br'), 'br'):
4552 self._out += '\n'
4553 else:
4554 unclosed_elements = []
4555 style = {}
4556 element_style_id = attrib.get('style')
4557 if default_style:
4558 style.update(default_style)
4559 if element_style_id:
4560 style.update(styles.get(element_style_id, {}))
4561 for prop in SUPPORTED_STYLING:
4562 prop_val = attrib.get(_x('tts:' + prop))
4563 if prop_val:
4564 style[prop] = prop_val
4565 if style:
4566 font = ''
4567 for k, v in sorted(style.items()):
4568 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4569 continue
4570 if k == 'color':
4571 font += ' color="%s"' % v
4572 elif k == 'fontSize':
4573 font += ' size="%s"' % v
4574 elif k == 'fontFamily':
4575 font += ' face="%s"' % v
4576 elif k == 'fontWeight' and v == 'bold':
4577 self._out += '<b>'
4578 unclosed_elements.append('b')
4579 elif k == 'fontStyle' and v == 'italic':
4580 self._out += '<i>'
4581 unclosed_elements.append('i')
4582 elif k == 'textDecoration' and v == 'underline':
4583 self._out += '<u>'
4584 unclosed_elements.append('u')
4585 if font:
4586 self._out += '<font' + font + '>'
4587 unclosed_elements.append('font')
4588 applied_style = {}
4589 if self._applied_styles:
4590 applied_style.update(self._applied_styles[-1])
4591 applied_style.update(style)
4592 self._applied_styles.append(applied_style)
4593 self._unclosed_elements.append(unclosed_elements)
4594
4595 def end(self, tag):
4596 if tag not in (_x('ttml:br'), 'br'):
4597 unclosed_elements = self._unclosed_elements.pop()
4598 for element in reversed(unclosed_elements):
4599 self._out += '</%s>' % element
4600 if unclosed_elements and self._applied_styles:
4601 self._applied_styles.pop()
4602
4603 def data(self, data):
4604 self._out += data
4605
4606 def close(self):
4607 return self._out.strip()
4608
4609 def parse_node(node):
4610 target = TTMLPElementParser()
4611 parser = xml.etree.ElementTree.XMLParser(target=target)
4612 parser.feed(xml.etree.ElementTree.tostring(node))
4613 return parser.close()
4614
4615 for k, v in LEGACY_NAMESPACES:
4616 for ns in v:
4617 dfxp_data = dfxp_data.replace(ns, k)
4618
4619 dfxp = compat_etree_fromstring(dfxp_data)
4620 out = []
4621 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4622
4623 if not paras:
4624 raise ValueError('Invalid dfxp/TTML subtitle')
4625
4626 repeat = False
4627 while True:
4628 for style in dfxp.findall(_x('.//ttml:style')):
4629 style_id = style.get('id') or style.get(_x('xml:id'))
4630 if not style_id:
4631 continue
4632 parent_style_id = style.get('style')
4633 if parent_style_id:
4634 if parent_style_id not in styles:
4635 repeat = True
4636 continue
4637 styles[style_id] = styles[parent_style_id].copy()
4638 for prop in SUPPORTED_STYLING:
4639 prop_val = style.get(_x('tts:' + prop))
4640 if prop_val:
4641 styles.setdefault(style_id, {})[prop] = prop_val
4642 if repeat:
4643 repeat = False
4644 else:
4645 break
4646
4647 for p in ('body', 'div'):
4648 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4649 if ele is None:
4650 continue
4651 style = styles.get(ele.get('style'))
4652 if not style:
4653 continue
4654 default_style.update(style)
4655
4656 for para, index in zip(paras, itertools.count(1)):
4657 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4658 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4659 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4660 if begin_time is None:
4661 continue
4662 if not end_time:
4663 if not dur:
4664 continue
4665 end_time = begin_time + dur
4666 out.append('%d\n%s --> %s\n%s\n\n' % (
4667 index,
4668 srt_subtitles_timecode(begin_time),
4669 srt_subtitles_timecode(end_time),
4670 parse_node(para)))
4671
4672 return ''.join(out)
4673
4674
4675 def cli_option(params, command_option, param):
4676 param = params.get(param)
4677 if param:
4678 param = compat_str(param)
4679 return [command_option, param] if param is not None else []
4680
4681
4682 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4683 param = params.get(param)
4684 if param is None:
4685 return []
4686 assert isinstance(param, bool)
4687 if separator:
4688 return [command_option + separator + (true_value if param else false_value)]
4689 return [command_option, true_value if param else false_value]
4690
4691
4692 def cli_valueless_option(params, command_option, param, expected_value=True):
4693 param = params.get(param)
4694 return [command_option] if param == expected_value else []
4695
4696
4697 def cli_configuration_args(argdict, keys, default=[], use_compat=True):
4698 if isinstance(argdict, (list, tuple)): # for backward compatibility
4699 if use_compat:
4700 return argdict
4701 else:
4702 argdict = None
4703 if argdict is None:
4704 return default
4705 assert isinstance(argdict, dict)
4706
4707 assert isinstance(keys, (list, tuple))
4708 for key_list in keys:
4709 if isinstance(key_list, compat_str):
4710 key_list = (key_list,)
4711 arg_list = list(filter(
4712 lambda x: x is not None,
4713 [argdict.get(key.lower()) for key in key_list]))
4714 if arg_list:
4715 return [arg for args in arg_list for arg in args]
4716 return default
4717
4718
4719 class ISO639Utils(object):
4720 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4721 _lang_map = {
4722 'aa': 'aar',
4723 'ab': 'abk',
4724 'ae': 'ave',
4725 'af': 'afr',
4726 'ak': 'aka',
4727 'am': 'amh',
4728 'an': 'arg',
4729 'ar': 'ara',
4730 'as': 'asm',
4731 'av': 'ava',
4732 'ay': 'aym',
4733 'az': 'aze',
4734 'ba': 'bak',
4735 'be': 'bel',
4736 'bg': 'bul',
4737 'bh': 'bih',
4738 'bi': 'bis',
4739 'bm': 'bam',
4740 'bn': 'ben',
4741 'bo': 'bod',
4742 'br': 'bre',
4743 'bs': 'bos',
4744 'ca': 'cat',
4745 'ce': 'che',
4746 'ch': 'cha',
4747 'co': 'cos',
4748 'cr': 'cre',
4749 'cs': 'ces',
4750 'cu': 'chu',
4751 'cv': 'chv',
4752 'cy': 'cym',
4753 'da': 'dan',
4754 'de': 'deu',
4755 'dv': 'div',
4756 'dz': 'dzo',
4757 'ee': 'ewe',
4758 'el': 'ell',
4759 'en': 'eng',
4760 'eo': 'epo',
4761 'es': 'spa',
4762 'et': 'est',
4763 'eu': 'eus',
4764 'fa': 'fas',
4765 'ff': 'ful',
4766 'fi': 'fin',
4767 'fj': 'fij',
4768 'fo': 'fao',
4769 'fr': 'fra',
4770 'fy': 'fry',
4771 'ga': 'gle',
4772 'gd': 'gla',
4773 'gl': 'glg',
4774 'gn': 'grn',
4775 'gu': 'guj',
4776 'gv': 'glv',
4777 'ha': 'hau',
4778 'he': 'heb',
4779 'iw': 'heb', # Replaced by he in 1989 revision
4780 'hi': 'hin',
4781 'ho': 'hmo',
4782 'hr': 'hrv',
4783 'ht': 'hat',
4784 'hu': 'hun',
4785 'hy': 'hye',
4786 'hz': 'her',
4787 'ia': 'ina',
4788 'id': 'ind',
4789 'in': 'ind', # Replaced by id in 1989 revision
4790 'ie': 'ile',
4791 'ig': 'ibo',
4792 'ii': 'iii',
4793 'ik': 'ipk',
4794 'io': 'ido',
4795 'is': 'isl',
4796 'it': 'ita',
4797 'iu': 'iku',
4798 'ja': 'jpn',
4799 'jv': 'jav',
4800 'ka': 'kat',
4801 'kg': 'kon',
4802 'ki': 'kik',
4803 'kj': 'kua',
4804 'kk': 'kaz',
4805 'kl': 'kal',
4806 'km': 'khm',
4807 'kn': 'kan',
4808 'ko': 'kor',
4809 'kr': 'kau',
4810 'ks': 'kas',
4811 'ku': 'kur',
4812 'kv': 'kom',
4813 'kw': 'cor',
4814 'ky': 'kir',
4815 'la': 'lat',
4816 'lb': 'ltz',
4817 'lg': 'lug',
4818 'li': 'lim',
4819 'ln': 'lin',
4820 'lo': 'lao',
4821 'lt': 'lit',
4822 'lu': 'lub',
4823 'lv': 'lav',
4824 'mg': 'mlg',
4825 'mh': 'mah',
4826 'mi': 'mri',
4827 'mk': 'mkd',
4828 'ml': 'mal',
4829 'mn': 'mon',
4830 'mr': 'mar',
4831 'ms': 'msa',
4832 'mt': 'mlt',
4833 'my': 'mya',
4834 'na': 'nau',
4835 'nb': 'nob',
4836 'nd': 'nde',
4837 'ne': 'nep',
4838 'ng': 'ndo',
4839 'nl': 'nld',
4840 'nn': 'nno',
4841 'no': 'nor',
4842 'nr': 'nbl',
4843 'nv': 'nav',
4844 'ny': 'nya',
4845 'oc': 'oci',
4846 'oj': 'oji',
4847 'om': 'orm',
4848 'or': 'ori',
4849 'os': 'oss',
4850 'pa': 'pan',
4851 'pi': 'pli',
4852 'pl': 'pol',
4853 'ps': 'pus',
4854 'pt': 'por',
4855 'qu': 'que',
4856 'rm': 'roh',
4857 'rn': 'run',
4858 'ro': 'ron',
4859 'ru': 'rus',
4860 'rw': 'kin',
4861 'sa': 'san',
4862 'sc': 'srd',
4863 'sd': 'snd',
4864 'se': 'sme',
4865 'sg': 'sag',
4866 'si': 'sin',
4867 'sk': 'slk',
4868 'sl': 'slv',
4869 'sm': 'smo',
4870 'sn': 'sna',
4871 'so': 'som',
4872 'sq': 'sqi',
4873 'sr': 'srp',
4874 'ss': 'ssw',
4875 'st': 'sot',
4876 'su': 'sun',
4877 'sv': 'swe',
4878 'sw': 'swa',
4879 'ta': 'tam',
4880 'te': 'tel',
4881 'tg': 'tgk',
4882 'th': 'tha',
4883 'ti': 'tir',
4884 'tk': 'tuk',
4885 'tl': 'tgl',
4886 'tn': 'tsn',
4887 'to': 'ton',
4888 'tr': 'tur',
4889 'ts': 'tso',
4890 'tt': 'tat',
4891 'tw': 'twi',
4892 'ty': 'tah',
4893 'ug': 'uig',
4894 'uk': 'ukr',
4895 'ur': 'urd',
4896 'uz': 'uzb',
4897 've': 'ven',
4898 'vi': 'vie',
4899 'vo': 'vol',
4900 'wa': 'wln',
4901 'wo': 'wol',
4902 'xh': 'xho',
4903 'yi': 'yid',
4904 'ji': 'yid', # Replaced by yi in 1989 revision
4905 'yo': 'yor',
4906 'za': 'zha',
4907 'zh': 'zho',
4908 'zu': 'zul',
4909 }
4910
4911 @classmethod
4912 def short2long(cls, code):
4913 """Convert language code from ISO 639-1 to ISO 639-2/T"""
4914 return cls._lang_map.get(code[:2])
4915
4916 @classmethod
4917 def long2short(cls, code):
4918 """Convert language code from ISO 639-2/T to ISO 639-1"""
4919 for short_name, long_name in cls._lang_map.items():
4920 if long_name == code:
4921 return short_name
4922
4923
4924 class ISO3166Utils(object):
4925 # From http://data.okfn.org/data/core/country-list
4926 _country_map = {
4927 'AF': 'Afghanistan',
4928 'AX': 'Åland Islands',
4929 'AL': 'Albania',
4930 'DZ': 'Algeria',
4931 'AS': 'American Samoa',
4932 'AD': 'Andorra',
4933 'AO': 'Angola',
4934 'AI': 'Anguilla',
4935 'AQ': 'Antarctica',
4936 'AG': 'Antigua and Barbuda',
4937 'AR': 'Argentina',
4938 'AM': 'Armenia',
4939 'AW': 'Aruba',
4940 'AU': 'Australia',
4941 'AT': 'Austria',
4942 'AZ': 'Azerbaijan',
4943 'BS': 'Bahamas',
4944 'BH': 'Bahrain',
4945 'BD': 'Bangladesh',
4946 'BB': 'Barbados',
4947 'BY': 'Belarus',
4948 'BE': 'Belgium',
4949 'BZ': 'Belize',
4950 'BJ': 'Benin',
4951 'BM': 'Bermuda',
4952 'BT': 'Bhutan',
4953 'BO': 'Bolivia, Plurinational State of',
4954 'BQ': 'Bonaire, Sint Eustatius and Saba',
4955 'BA': 'Bosnia and Herzegovina',
4956 'BW': 'Botswana',
4957 'BV': 'Bouvet Island',
4958 'BR': 'Brazil',
4959 'IO': 'British Indian Ocean Territory',
4960 'BN': 'Brunei Darussalam',
4961 'BG': 'Bulgaria',
4962 'BF': 'Burkina Faso',
4963 'BI': 'Burundi',
4964 'KH': 'Cambodia',
4965 'CM': 'Cameroon',
4966 'CA': 'Canada',
4967 'CV': 'Cape Verde',
4968 'KY': 'Cayman Islands',
4969 'CF': 'Central African Republic',
4970 'TD': 'Chad',
4971 'CL': 'Chile',
4972 'CN': 'China',
4973 'CX': 'Christmas Island',
4974 'CC': 'Cocos (Keeling) Islands',
4975 'CO': 'Colombia',
4976 'KM': 'Comoros',
4977 'CG': 'Congo',
4978 'CD': 'Congo, the Democratic Republic of the',
4979 'CK': 'Cook Islands',
4980 'CR': 'Costa Rica',
4981 'CI': 'Côte d\'Ivoire',
4982 'HR': 'Croatia',
4983 'CU': 'Cuba',
4984 'CW': 'Curaçao',
4985 'CY': 'Cyprus',
4986 'CZ': 'Czech Republic',
4987 'DK': 'Denmark',
4988 'DJ': 'Djibouti',
4989 'DM': 'Dominica',
4990 'DO': 'Dominican Republic',
4991 'EC': 'Ecuador',
4992 'EG': 'Egypt',
4993 'SV': 'El Salvador',
4994 'GQ': 'Equatorial Guinea',
4995 'ER': 'Eritrea',
4996 'EE': 'Estonia',
4997 'ET': 'Ethiopia',
4998 'FK': 'Falkland Islands (Malvinas)',
4999 'FO': 'Faroe Islands',
5000 'FJ': 'Fiji',
5001 'FI': 'Finland',
5002 'FR': 'France',
5003 'GF': 'French Guiana',
5004 'PF': 'French Polynesia',
5005 'TF': 'French Southern Territories',
5006 'GA': 'Gabon',
5007 'GM': 'Gambia',
5008 'GE': 'Georgia',
5009 'DE': 'Germany',
5010 'GH': 'Ghana',
5011 'GI': 'Gibraltar',
5012 'GR': 'Greece',
5013 'GL': 'Greenland',
5014 'GD': 'Grenada',
5015 'GP': 'Guadeloupe',
5016 'GU': 'Guam',
5017 'GT': 'Guatemala',
5018 'GG': 'Guernsey',
5019 'GN': 'Guinea',
5020 'GW': 'Guinea-Bissau',
5021 'GY': 'Guyana',
5022 'HT': 'Haiti',
5023 'HM': 'Heard Island and McDonald Islands',
5024 'VA': 'Holy See (Vatican City State)',
5025 'HN': 'Honduras',
5026 'HK': 'Hong Kong',
5027 'HU': 'Hungary',
5028 'IS': 'Iceland',
5029 'IN': 'India',
5030 'ID': 'Indonesia',
5031 'IR': 'Iran, Islamic Republic of',
5032 'IQ': 'Iraq',
5033 'IE': 'Ireland',
5034 'IM': 'Isle of Man',
5035 'IL': 'Israel',
5036 'IT': 'Italy',
5037 'JM': 'Jamaica',
5038 'JP': 'Japan',
5039 'JE': 'Jersey',
5040 'JO': 'Jordan',
5041 'KZ': 'Kazakhstan',
5042 'KE': 'Kenya',
5043 'KI': 'Kiribati',
5044 'KP': 'Korea, Democratic People\'s Republic of',
5045 'KR': 'Korea, Republic of',
5046 'KW': 'Kuwait',
5047 'KG': 'Kyrgyzstan',
5048 'LA': 'Lao People\'s Democratic Republic',
5049 'LV': 'Latvia',
5050 'LB': 'Lebanon',
5051 'LS': 'Lesotho',
5052 'LR': 'Liberia',
5053 'LY': 'Libya',
5054 'LI': 'Liechtenstein',
5055 'LT': 'Lithuania',
5056 'LU': 'Luxembourg',
5057 'MO': 'Macao',
5058 'MK': 'Macedonia, the Former Yugoslav Republic of',
5059 'MG': 'Madagascar',
5060 'MW': 'Malawi',
5061 'MY': 'Malaysia',
5062 'MV': 'Maldives',
5063 'ML': 'Mali',
5064 'MT': 'Malta',
5065 'MH': 'Marshall Islands',
5066 'MQ': 'Martinique',
5067 'MR': 'Mauritania',
5068 'MU': 'Mauritius',
5069 'YT': 'Mayotte',
5070 'MX': 'Mexico',
5071 'FM': 'Micronesia, Federated States of',
5072 'MD': 'Moldova, Republic of',
5073 'MC': 'Monaco',
5074 'MN': 'Mongolia',
5075 'ME': 'Montenegro',
5076 'MS': 'Montserrat',
5077 'MA': 'Morocco',
5078 'MZ': 'Mozambique',
5079 'MM': 'Myanmar',
5080 'NA': 'Namibia',
5081 'NR': 'Nauru',
5082 'NP': 'Nepal',
5083 'NL': 'Netherlands',
5084 'NC': 'New Caledonia',
5085 'NZ': 'New Zealand',
5086 'NI': 'Nicaragua',
5087 'NE': 'Niger',
5088 'NG': 'Nigeria',
5089 'NU': 'Niue',
5090 'NF': 'Norfolk Island',
5091 'MP': 'Northern Mariana Islands',
5092 'NO': 'Norway',
5093 'OM': 'Oman',
5094 'PK': 'Pakistan',
5095 'PW': 'Palau',
5096 'PS': 'Palestine, State of',
5097 'PA': 'Panama',
5098 'PG': 'Papua New Guinea',
5099 'PY': 'Paraguay',
5100 'PE': 'Peru',
5101 'PH': 'Philippines',
5102 'PN': 'Pitcairn',
5103 'PL': 'Poland',
5104 'PT': 'Portugal',
5105 'PR': 'Puerto Rico',
5106 'QA': 'Qatar',
5107 'RE': 'Réunion',
5108 'RO': 'Romania',
5109 'RU': 'Russian Federation',
5110 'RW': 'Rwanda',
5111 'BL': 'Saint Barthélemy',
5112 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5113 'KN': 'Saint Kitts and Nevis',
5114 'LC': 'Saint Lucia',
5115 'MF': 'Saint Martin (French part)',
5116 'PM': 'Saint Pierre and Miquelon',
5117 'VC': 'Saint Vincent and the Grenadines',
5118 'WS': 'Samoa',
5119 'SM': 'San Marino',
5120 'ST': 'Sao Tome and Principe',
5121 'SA': 'Saudi Arabia',
5122 'SN': 'Senegal',
5123 'RS': 'Serbia',
5124 'SC': 'Seychelles',
5125 'SL': 'Sierra Leone',
5126 'SG': 'Singapore',
5127 'SX': 'Sint Maarten (Dutch part)',
5128 'SK': 'Slovakia',
5129 'SI': 'Slovenia',
5130 'SB': 'Solomon Islands',
5131 'SO': 'Somalia',
5132 'ZA': 'South Africa',
5133 'GS': 'South Georgia and the South Sandwich Islands',
5134 'SS': 'South Sudan',
5135 'ES': 'Spain',
5136 'LK': 'Sri Lanka',
5137 'SD': 'Sudan',
5138 'SR': 'Suriname',
5139 'SJ': 'Svalbard and Jan Mayen',
5140 'SZ': 'Swaziland',
5141 'SE': 'Sweden',
5142 'CH': 'Switzerland',
5143 'SY': 'Syrian Arab Republic',
5144 'TW': 'Taiwan, Province of China',
5145 'TJ': 'Tajikistan',
5146 'TZ': 'Tanzania, United Republic of',
5147 'TH': 'Thailand',
5148 'TL': 'Timor-Leste',
5149 'TG': 'Togo',
5150 'TK': 'Tokelau',
5151 'TO': 'Tonga',
5152 'TT': 'Trinidad and Tobago',
5153 'TN': 'Tunisia',
5154 'TR': 'Turkey',
5155 'TM': 'Turkmenistan',
5156 'TC': 'Turks and Caicos Islands',
5157 'TV': 'Tuvalu',
5158 'UG': 'Uganda',
5159 'UA': 'Ukraine',
5160 'AE': 'United Arab Emirates',
5161 'GB': 'United Kingdom',
5162 'US': 'United States',
5163 'UM': 'United States Minor Outlying Islands',
5164 'UY': 'Uruguay',
5165 'UZ': 'Uzbekistan',
5166 'VU': 'Vanuatu',
5167 'VE': 'Venezuela, Bolivarian Republic of',
5168 'VN': 'Viet Nam',
5169 'VG': 'Virgin Islands, British',
5170 'VI': 'Virgin Islands, U.S.',
5171 'WF': 'Wallis and Futuna',
5172 'EH': 'Western Sahara',
5173 'YE': 'Yemen',
5174 'ZM': 'Zambia',
5175 'ZW': 'Zimbabwe',
5176 }
5177
5178 @classmethod
5179 def short2full(cls, code):
5180 """Convert an ISO 3166-2 country code to the corresponding full name"""
5181 return cls._country_map.get(code.upper())
5182
5183
5184 class GeoUtils(object):
5185 # Major IPv4 address blocks per country
5186 _country_ip_map = {
5187 'AD': '46.172.224.0/19',
5188 'AE': '94.200.0.0/13',
5189 'AF': '149.54.0.0/17',
5190 'AG': '209.59.64.0/18',
5191 'AI': '204.14.248.0/21',
5192 'AL': '46.99.0.0/16',
5193 'AM': '46.70.0.0/15',
5194 'AO': '105.168.0.0/13',
5195 'AP': '182.50.184.0/21',
5196 'AQ': '23.154.160.0/24',
5197 'AR': '181.0.0.0/12',
5198 'AS': '202.70.112.0/20',
5199 'AT': '77.116.0.0/14',
5200 'AU': '1.128.0.0/11',
5201 'AW': '181.41.0.0/18',
5202 'AX': '185.217.4.0/22',
5203 'AZ': '5.197.0.0/16',
5204 'BA': '31.176.128.0/17',
5205 'BB': '65.48.128.0/17',
5206 'BD': '114.130.0.0/16',
5207 'BE': '57.0.0.0/8',
5208 'BF': '102.178.0.0/15',
5209 'BG': '95.42.0.0/15',
5210 'BH': '37.131.0.0/17',
5211 'BI': '154.117.192.0/18',
5212 'BJ': '137.255.0.0/16',
5213 'BL': '185.212.72.0/23',
5214 'BM': '196.12.64.0/18',
5215 'BN': '156.31.0.0/16',
5216 'BO': '161.56.0.0/16',
5217 'BQ': '161.0.80.0/20',
5218 'BR': '191.128.0.0/12',
5219 'BS': '24.51.64.0/18',
5220 'BT': '119.2.96.0/19',
5221 'BW': '168.167.0.0/16',
5222 'BY': '178.120.0.0/13',
5223 'BZ': '179.42.192.0/18',
5224 'CA': '99.224.0.0/11',
5225 'CD': '41.243.0.0/16',
5226 'CF': '197.242.176.0/21',
5227 'CG': '160.113.0.0/16',
5228 'CH': '85.0.0.0/13',
5229 'CI': '102.136.0.0/14',
5230 'CK': '202.65.32.0/19',
5231 'CL': '152.172.0.0/14',
5232 'CM': '102.244.0.0/14',
5233 'CN': '36.128.0.0/10',
5234 'CO': '181.240.0.0/12',
5235 'CR': '201.192.0.0/12',
5236 'CU': '152.206.0.0/15',
5237 'CV': '165.90.96.0/19',
5238 'CW': '190.88.128.0/17',
5239 'CY': '31.153.0.0/16',
5240 'CZ': '88.100.0.0/14',
5241 'DE': '53.0.0.0/8',
5242 'DJ': '197.241.0.0/17',
5243 'DK': '87.48.0.0/12',
5244 'DM': '192.243.48.0/20',
5245 'DO': '152.166.0.0/15',
5246 'DZ': '41.96.0.0/12',
5247 'EC': '186.68.0.0/15',
5248 'EE': '90.190.0.0/15',
5249 'EG': '156.160.0.0/11',
5250 'ER': '196.200.96.0/20',
5251 'ES': '88.0.0.0/11',
5252 'ET': '196.188.0.0/14',
5253 'EU': '2.16.0.0/13',
5254 'FI': '91.152.0.0/13',
5255 'FJ': '144.120.0.0/16',
5256 'FK': '80.73.208.0/21',
5257 'FM': '119.252.112.0/20',
5258 'FO': '88.85.32.0/19',
5259 'FR': '90.0.0.0/9',
5260 'GA': '41.158.0.0/15',
5261 'GB': '25.0.0.0/8',
5262 'GD': '74.122.88.0/21',
5263 'GE': '31.146.0.0/16',
5264 'GF': '161.22.64.0/18',
5265 'GG': '62.68.160.0/19',
5266 'GH': '154.160.0.0/12',
5267 'GI': '95.164.0.0/16',
5268 'GL': '88.83.0.0/19',
5269 'GM': '160.182.0.0/15',
5270 'GN': '197.149.192.0/18',
5271 'GP': '104.250.0.0/19',
5272 'GQ': '105.235.224.0/20',
5273 'GR': '94.64.0.0/13',
5274 'GT': '168.234.0.0/16',
5275 'GU': '168.123.0.0/16',
5276 'GW': '197.214.80.0/20',
5277 'GY': '181.41.64.0/18',
5278 'HK': '113.252.0.0/14',
5279 'HN': '181.210.0.0/16',
5280 'HR': '93.136.0.0/13',
5281 'HT': '148.102.128.0/17',
5282 'HU': '84.0.0.0/14',
5283 'ID': '39.192.0.0/10',
5284 'IE': '87.32.0.0/12',
5285 'IL': '79.176.0.0/13',
5286 'IM': '5.62.80.0/20',
5287 'IN': '117.192.0.0/10',
5288 'IO': '203.83.48.0/21',
5289 'IQ': '37.236.0.0/14',
5290 'IR': '2.176.0.0/12',
5291 'IS': '82.221.0.0/16',
5292 'IT': '79.0.0.0/10',
5293 'JE': '87.244.64.0/18',
5294 'JM': '72.27.0.0/17',
5295 'JO': '176.29.0.0/16',
5296 'JP': '133.0.0.0/8',
5297 'KE': '105.48.0.0/12',
5298 'KG': '158.181.128.0/17',
5299 'KH': '36.37.128.0/17',
5300 'KI': '103.25.140.0/22',
5301 'KM': '197.255.224.0/20',
5302 'KN': '198.167.192.0/19',
5303 'KP': '175.45.176.0/22',
5304 'KR': '175.192.0.0/10',
5305 'KW': '37.36.0.0/14',
5306 'KY': '64.96.0.0/15',
5307 'KZ': '2.72.0.0/13',
5308 'LA': '115.84.64.0/18',
5309 'LB': '178.135.0.0/16',
5310 'LC': '24.92.144.0/20',
5311 'LI': '82.117.0.0/19',
5312 'LK': '112.134.0.0/15',
5313 'LR': '102.183.0.0/16',
5314 'LS': '129.232.0.0/17',
5315 'LT': '78.56.0.0/13',
5316 'LU': '188.42.0.0/16',
5317 'LV': '46.109.0.0/16',
5318 'LY': '41.252.0.0/14',
5319 'MA': '105.128.0.0/11',
5320 'MC': '88.209.64.0/18',
5321 'MD': '37.246.0.0/16',
5322 'ME': '178.175.0.0/17',
5323 'MF': '74.112.232.0/21',
5324 'MG': '154.126.0.0/17',
5325 'MH': '117.103.88.0/21',
5326 'MK': '77.28.0.0/15',
5327 'ML': '154.118.128.0/18',
5328 'MM': '37.111.0.0/17',
5329 'MN': '49.0.128.0/17',
5330 'MO': '60.246.0.0/16',
5331 'MP': '202.88.64.0/20',
5332 'MQ': '109.203.224.0/19',
5333 'MR': '41.188.64.0/18',
5334 'MS': '208.90.112.0/22',
5335 'MT': '46.11.0.0/16',
5336 'MU': '105.16.0.0/12',
5337 'MV': '27.114.128.0/18',
5338 'MW': '102.70.0.0/15',
5339 'MX': '187.192.0.0/11',
5340 'MY': '175.136.0.0/13',
5341 'MZ': '197.218.0.0/15',
5342 'NA': '41.182.0.0/16',
5343 'NC': '101.101.0.0/18',
5344 'NE': '197.214.0.0/18',
5345 'NF': '203.17.240.0/22',
5346 'NG': '105.112.0.0/12',
5347 'NI': '186.76.0.0/15',
5348 'NL': '145.96.0.0/11',
5349 'NO': '84.208.0.0/13',
5350 'NP': '36.252.0.0/15',
5351 'NR': '203.98.224.0/19',
5352 'NU': '49.156.48.0/22',
5353 'NZ': '49.224.0.0/14',
5354 'OM': '5.36.0.0/15',
5355 'PA': '186.72.0.0/15',
5356 'PE': '186.160.0.0/14',
5357 'PF': '123.50.64.0/18',
5358 'PG': '124.240.192.0/19',
5359 'PH': '49.144.0.0/13',
5360 'PK': '39.32.0.0/11',
5361 'PL': '83.0.0.0/11',
5362 'PM': '70.36.0.0/20',
5363 'PR': '66.50.0.0/16',
5364 'PS': '188.161.0.0/16',
5365 'PT': '85.240.0.0/13',
5366 'PW': '202.124.224.0/20',
5367 'PY': '181.120.0.0/14',
5368 'QA': '37.210.0.0/15',
5369 'RE': '102.35.0.0/16',
5370 'RO': '79.112.0.0/13',
5371 'RS': '93.86.0.0/15',
5372 'RU': '5.136.0.0/13',
5373 'RW': '41.186.0.0/16',
5374 'SA': '188.48.0.0/13',
5375 'SB': '202.1.160.0/19',
5376 'SC': '154.192.0.0/11',
5377 'SD': '102.120.0.0/13',
5378 'SE': '78.64.0.0/12',
5379 'SG': '8.128.0.0/10',
5380 'SI': '188.196.0.0/14',
5381 'SK': '78.98.0.0/15',
5382 'SL': '102.143.0.0/17',
5383 'SM': '89.186.32.0/19',
5384 'SN': '41.82.0.0/15',
5385 'SO': '154.115.192.0/18',
5386 'SR': '186.179.128.0/17',
5387 'SS': '105.235.208.0/21',
5388 'ST': '197.159.160.0/19',
5389 'SV': '168.243.0.0/16',
5390 'SX': '190.102.0.0/20',
5391 'SY': '5.0.0.0/16',
5392 'SZ': '41.84.224.0/19',
5393 'TC': '65.255.48.0/20',
5394 'TD': '154.68.128.0/19',
5395 'TG': '196.168.0.0/14',
5396 'TH': '171.96.0.0/13',
5397 'TJ': '85.9.128.0/18',
5398 'TK': '27.96.24.0/21',
5399 'TL': '180.189.160.0/20',
5400 'TM': '95.85.96.0/19',
5401 'TN': '197.0.0.0/11',
5402 'TO': '175.176.144.0/21',
5403 'TR': '78.160.0.0/11',
5404 'TT': '186.44.0.0/15',
5405 'TV': '202.2.96.0/19',
5406 'TW': '120.96.0.0/11',
5407 'TZ': '156.156.0.0/14',
5408 'UA': '37.52.0.0/14',
5409 'UG': '102.80.0.0/13',
5410 'US': '6.0.0.0/8',
5411 'UY': '167.56.0.0/13',
5412 'UZ': '84.54.64.0/18',
5413 'VA': '212.77.0.0/19',
5414 'VC': '207.191.240.0/21',
5415 'VE': '186.88.0.0/13',
5416 'VG': '66.81.192.0/20',
5417 'VI': '146.226.0.0/16',
5418 'VN': '14.160.0.0/11',
5419 'VU': '202.80.32.0/20',
5420 'WF': '117.20.32.0/21',
5421 'WS': '202.4.32.0/19',
5422 'YE': '134.35.0.0/16',
5423 'YT': '41.242.116.0/22',
5424 'ZA': '41.0.0.0/11',
5425 'ZM': '102.144.0.0/13',
5426 'ZW': '102.177.192.0/18',
5427 }
5428
5429 @classmethod
5430 def random_ipv4(cls, code_or_block):
5431 if len(code_or_block) == 2:
5432 block = cls._country_ip_map.get(code_or_block.upper())
5433 if not block:
5434 return None
5435 else:
5436 block = code_or_block
5437 addr, preflen = block.split('/')
5438 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5439 addr_max = addr_min | (0xffffffff >> int(preflen))
5440 return compat_str(socket.inet_ntoa(
5441 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5442
5443
5444 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5445 def __init__(self, proxies=None):
5446 # Set default handlers
5447 for type in ('http', 'https'):
5448 setattr(self, '%s_open' % type,
5449 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5450 meth(r, proxy, type))
5451 compat_urllib_request.ProxyHandler.__init__(self, proxies)
5452
5453 def proxy_open(self, req, proxy, type):
5454 req_proxy = req.headers.get('Ytdl-request-proxy')
5455 if req_proxy is not None:
5456 proxy = req_proxy
5457 del req.headers['Ytdl-request-proxy']
5458
5459 if proxy == '__noproxy__':
5460 return None # No Proxy
5461 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5462 req.add_header('Ytdl-socks-proxy', proxy)
5463 # yt-dlp's http/https handlers do wrapping the socket with socks
5464 return None
5465 return compat_urllib_request.ProxyHandler.proxy_open(
5466 self, req, proxy, type)
5467
5468
5469 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5470 # released into Public Domain
5471 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5472
5473 def long_to_bytes(n, blocksize=0):
5474 """long_to_bytes(n:long, blocksize:int) : string
5475 Convert a long integer to a byte string.
5476
5477 If optional blocksize is given and greater than zero, pad the front of the
5478 byte string with binary zeros so that the length is a multiple of
5479 blocksize.
5480 """
5481 # after much testing, this algorithm was deemed to be the fastest
5482 s = b''
5483 n = int(n)
5484 while n > 0:
5485 s = compat_struct_pack('>I', n & 0xffffffff) + s
5486 n = n >> 32
5487 # strip off leading zeros
5488 for i in range(len(s)):
5489 if s[i] != b'\000'[0]:
5490 break
5491 else:
5492 # only happens when n == 0
5493 s = b'\000'
5494 i = 0
5495 s = s[i:]
5496 # add back some pad bytes. this could be done more efficiently w.r.t. the
5497 # de-padding being done above, but sigh...
5498 if blocksize > 0 and len(s) % blocksize:
5499 s = (blocksize - len(s) % blocksize) * b'\000' + s
5500 return s
5501
5502
5503 def bytes_to_long(s):
5504 """bytes_to_long(string) : long
5505 Convert a byte string to a long integer.
5506
5507 This is (essentially) the inverse of long_to_bytes().
5508 """
5509 acc = 0
5510 length = len(s)
5511 if length % 4:
5512 extra = (4 - length % 4)
5513 s = b'\000' * extra + s
5514 length = length + extra
5515 for i in range(0, length, 4):
5516 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5517 return acc
5518
5519
5520 def ohdave_rsa_encrypt(data, exponent, modulus):
5521 '''
5522 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5523
5524 Input:
5525 data: data to encrypt, bytes-like object
5526 exponent, modulus: parameter e and N of RSA algorithm, both integer
5527 Output: hex string of encrypted data
5528
5529 Limitation: supports one block encryption only
5530 '''
5531
5532 payload = int(binascii.hexlify(data[::-1]), 16)
5533 encrypted = pow(payload, exponent, modulus)
5534 return '%x' % encrypted
5535
5536
5537 def pkcs1pad(data, length):
5538 """
5539 Padding input data with PKCS#1 scheme
5540
5541 @param {int[]} data input data
5542 @param {int} length target length
5543 @returns {int[]} padded data
5544 """
5545 if len(data) > length - 11:
5546 raise ValueError('Input data too long for PKCS#1 padding')
5547
5548 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5549 return [0, 2] + pseudo_random + [0] + data
5550
5551
5552 def encode_base_n(num, n, table=None):
5553 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5554 if not table:
5555 table = FULL_TABLE[:n]
5556
5557 if n > len(table):
5558 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5559
5560 if num == 0:
5561 return table[0]
5562
5563 ret = ''
5564 while num:
5565 ret = table[num % n] + ret
5566 num = num // n
5567 return ret
5568
5569
5570 def decode_packed_codes(code):
5571 mobj = re.search(PACKED_CODES_RE, code)
5572 obfuscated_code, base, count, symbols = mobj.groups()
5573 base = int(base)
5574 count = int(count)
5575 symbols = symbols.split('|')
5576 symbol_table = {}
5577
5578 while count:
5579 count -= 1
5580 base_n_count = encode_base_n(count, base)
5581 symbol_table[base_n_count] = symbols[count] or base_n_count
5582
5583 return re.sub(
5584 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5585 obfuscated_code)
5586
5587
5588 def caesar(s, alphabet, shift):
5589 if shift == 0:
5590 return s
5591 l = len(alphabet)
5592 return ''.join(
5593 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5594 for c in s)
5595
5596
5597 def rot47(s):
5598 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5599
5600
5601 def parse_m3u8_attributes(attrib):
5602 info = {}
5603 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5604 if val.startswith('"'):
5605 val = val[1:-1]
5606 info[key] = val
5607 return info
5608
5609
5610 def urshift(val, n):
5611 return val >> n if val >= 0 else (val + 0x100000000) >> n
5612
5613
5614 # Based on png2str() written by @gdkchan and improved by @yokrysty
5615 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5616 def decode_png(png_data):
5617 # Reference: https://www.w3.org/TR/PNG/
5618 header = png_data[8:]
5619
5620 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5621 raise IOError('Not a valid PNG file.')
5622
5623 int_map = {1: '>B', 2: '>H', 4: '>I'}
5624 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5625
5626 chunks = []
5627
5628 while header:
5629 length = unpack_integer(header[:4])
5630 header = header[4:]
5631
5632 chunk_type = header[:4]
5633 header = header[4:]
5634
5635 chunk_data = header[:length]
5636 header = header[length:]
5637
5638 header = header[4:] # Skip CRC
5639
5640 chunks.append({
5641 'type': chunk_type,
5642 'length': length,
5643 'data': chunk_data
5644 })
5645
5646 ihdr = chunks[0]['data']
5647
5648 width = unpack_integer(ihdr[:4])
5649 height = unpack_integer(ihdr[4:8])
5650
5651 idat = b''
5652
5653 for chunk in chunks:
5654 if chunk['type'] == b'IDAT':
5655 idat += chunk['data']
5656
5657 if not idat:
5658 raise IOError('Unable to read PNG data.')
5659
5660 decompressed_data = bytearray(zlib.decompress(idat))
5661
5662 stride = width * 3
5663 pixels = []
5664
5665 def _get_pixel(idx):
5666 x = idx % stride
5667 y = idx // stride
5668 return pixels[y][x]
5669
5670 for y in range(height):
5671 basePos = y * (1 + stride)
5672 filter_type = decompressed_data[basePos]
5673
5674 current_row = []
5675
5676 pixels.append(current_row)
5677
5678 for x in range(stride):
5679 color = decompressed_data[1 + basePos + x]
5680 basex = y * stride + x
5681 left = 0
5682 up = 0
5683
5684 if x > 2:
5685 left = _get_pixel(basex - 3)
5686 if y > 0:
5687 up = _get_pixel(basex - stride)
5688
5689 if filter_type == 1: # Sub
5690 color = (color + left) & 0xff
5691 elif filter_type == 2: # Up
5692 color = (color + up) & 0xff
5693 elif filter_type == 3: # Average
5694 color = (color + ((left + up) >> 1)) & 0xff
5695 elif filter_type == 4: # Paeth
5696 a = left
5697 b = up
5698 c = 0
5699
5700 if x > 2 and y > 0:
5701 c = _get_pixel(basex - stride - 3)
5702
5703 p = a + b - c
5704
5705 pa = abs(p - a)
5706 pb = abs(p - b)
5707 pc = abs(p - c)
5708
5709 if pa <= pb and pa <= pc:
5710 color = (color + a) & 0xff
5711 elif pb <= pc:
5712 color = (color + b) & 0xff
5713 else:
5714 color = (color + c) & 0xff
5715
5716 current_row.append(color)
5717
5718 return width, height, pixels
5719
5720
5721 def write_xattr(path, key, value):
5722 # This mess below finds the best xattr tool for the job
5723 try:
5724 # try the pyxattr module...
5725 import xattr
5726
5727 if hasattr(xattr, 'set'): # pyxattr
5728 # Unicode arguments are not supported in python-pyxattr until
5729 # version 0.5.0
5730 # See https://github.com/ytdl-org/youtube-dl/issues/5498
5731 pyxattr_required_version = '0.5.0'
5732 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5733 # TODO: fallback to CLI tools
5734 raise XAttrUnavailableError(
5735 'python-pyxattr is detected but is too old. '
5736 'yt-dlp requires %s or above while your version is %s. '
5737 'Falling back to other xattr implementations' % (
5738 pyxattr_required_version, xattr.__version__))
5739
5740 setxattr = xattr.set
5741 else: # xattr
5742 setxattr = xattr.setxattr
5743
5744 try:
5745 setxattr(path, key, value)
5746 except EnvironmentError as e:
5747 raise XAttrMetadataError(e.errno, e.strerror)
5748
5749 except ImportError:
5750 if compat_os_name == 'nt':
5751 # Write xattrs to NTFS Alternate Data Streams:
5752 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5753 assert ':' not in key
5754 assert os.path.exists(path)
5755
5756 ads_fn = path + ':' + key
5757 try:
5758 with open(ads_fn, 'wb') as f:
5759 f.write(value)
5760 except EnvironmentError as e:
5761 raise XAttrMetadataError(e.errno, e.strerror)
5762 else:
5763 user_has_setfattr = check_executable('setfattr', ['--version'])
5764 user_has_xattr = check_executable('xattr', ['-h'])
5765
5766 if user_has_setfattr or user_has_xattr:
5767
5768 value = value.decode('utf-8')
5769 if user_has_setfattr:
5770 executable = 'setfattr'
5771 opts = ['-n', key, '-v', value]
5772 elif user_has_xattr:
5773 executable = 'xattr'
5774 opts = ['-w', key, value]
5775
5776 cmd = ([encodeFilename(executable, True)]
5777 + [encodeArgument(o) for o in opts]
5778 + [encodeFilename(path, True)])
5779
5780 try:
5781 p = subprocess.Popen(
5782 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5783 except EnvironmentError as e:
5784 raise XAttrMetadataError(e.errno, e.strerror)
5785 stdout, stderr = process_communicate_or_kill(p)
5786 stderr = stderr.decode('utf-8', 'replace')
5787 if p.returncode != 0:
5788 raise XAttrMetadataError(p.returncode, stderr)
5789
5790 else:
5791 # On Unix, and can't find pyxattr, setfattr, or xattr.
5792 if sys.platform.startswith('linux'):
5793 raise XAttrUnavailableError(
5794 "Couldn't find a tool to set the xattrs. "
5795 "Install either the python 'pyxattr' or 'xattr' "
5796 "modules, or the GNU 'attr' package "
5797 "(which contains the 'setfattr' tool).")
5798 else:
5799 raise XAttrUnavailableError(
5800 "Couldn't find a tool to set the xattrs. "
5801 "Install either the python 'xattr' module, "
5802 "or the 'xattr' binary.")
5803
5804
5805 def random_birthday(year_field, month_field, day_field):
5806 start_date = datetime.date(1950, 1, 1)
5807 end_date = datetime.date(1995, 12, 31)
5808 offset = random.randint(0, (end_date - start_date).days)
5809 random_date = start_date + datetime.timedelta(offset)
5810 return {
5811 year_field: str(random_date.year),
5812 month_field: str(random_date.month),
5813 day_field: str(random_date.day),
5814 }
5815
5816
5817 # Templates for internet shortcut files, which are plain text files.
5818 DOT_URL_LINK_TEMPLATE = '''
5819 [InternetShortcut]
5820 URL=%(url)s
5821 '''.lstrip()
5822
5823 DOT_WEBLOC_LINK_TEMPLATE = '''
5824 <?xml version="1.0" encoding="UTF-8"?>
5825 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5826 <plist version="1.0">
5827 <dict>
5828 \t<key>URL</key>
5829 \t<string>%(url)s</string>
5830 </dict>
5831 </plist>
5832 '''.lstrip()
5833
5834 DOT_DESKTOP_LINK_TEMPLATE = '''
5835 [Desktop Entry]
5836 Encoding=UTF-8
5837 Name=%(filename)s
5838 Type=Link
5839 URL=%(url)s
5840 Icon=text-html
5841 '''.lstrip()
5842
5843
5844 def iri_to_uri(iri):
5845 """
5846 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5847
5848 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5849 """
5850
5851 iri_parts = compat_urllib_parse_urlparse(iri)
5852
5853 if '[' in iri_parts.netloc:
5854 raise ValueError('IPv6 URIs are not, yet, supported.')
5855 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5856
5857 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5858
5859 net_location = ''
5860 if iri_parts.username:
5861 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
5862 if iri_parts.password is not None:
5863 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
5864 net_location += '@'
5865
5866 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
5867 # The 'idna' encoding produces ASCII text.
5868 if iri_parts.port is not None and iri_parts.port != 80:
5869 net_location += ':' + str(iri_parts.port)
5870
5871 return compat_urllib_parse_urlunparse(
5872 (iri_parts.scheme,
5873 net_location,
5874
5875 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5876
5877 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5878 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5879
5880 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5881 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5882
5883 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5884
5885 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5886
5887
5888 def to_high_limit_path(path):
5889 if sys.platform in ['win32', 'cygwin']:
5890 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5891 return r'\\?\ '.rstrip() + os.path.abspath(path)
5892
5893 return path
5894
5895
5896 def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
5897 val = obj.get(field, default)
5898 if func and val not in ignore:
5899 val = func(val)
5900 return template % val if val not in ignore else default
5901
5902
5903 def clean_podcast_url(url):
5904 return re.sub(r'''(?x)
5905 (?:
5906 (?:
5907 chtbl\.com/track|
5908 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5909 play\.podtrac\.com
5910 )/[^/]+|
5911 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5912 flex\.acast\.com|
5913 pd(?:
5914 cn\.co| # https://podcorn.com/analytics-prefix/
5915 st\.fm # https://podsights.com/docs/
5916 )/e
5917 )/''', '', url)
5918
5919
5920 _HEX_TABLE = '0123456789abcdef'
5921
5922
5923 def random_uuidv4():
5924 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
5925
5926
5927 def make_dir(path, to_screen=None):
5928 try:
5929 dn = os.path.dirname(path)
5930 if dn and not os.path.exists(dn):
5931 os.makedirs(dn)
5932 return True
5933 except (OSError, IOError) as err:
5934 if callable(to_screen) is not None:
5935 to_screen('unable to create directory ' + error_to_compat_str(err))
5936 return False
5937
5938
5939 def get_executable_path():
5940 from zipimport import zipimporter
5941 if hasattr(sys, 'frozen'): # Running from PyInstaller
5942 path = os.path.dirname(sys.executable)
5943 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
5944 path = os.path.join(os.path.dirname(__file__), '../..')
5945 else:
5946 path = os.path.join(os.path.dirname(__file__), '..')
5947 return os.path.abspath(path)
5948
5949
5950 def load_plugins(name, type, namespace):
5951 plugin_info = [None]
5952 classes = []
5953 try:
5954 plugin_info = imp.find_module(
5955 name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
5956 plugins = imp.load_module(name, *plugin_info)
5957 for name in dir(plugins):
5958 if not name.endswith(type):
5959 continue
5960 klass = getattr(plugins, name)
5961 classes.append(klass)
5962 namespace[name] = klass
5963 except ImportError:
5964 pass
5965 finally:
5966 if plugin_info[0] is not None:
5967 plugin_info[0].close()
5968 return classes
5969
5970
5971 def traverse_dict(dictn, keys, casesense=True):
5972 if not isinstance(dictn, dict):
5973 return None
5974 first_key = keys[0]
5975 if not casesense:
5976 dictn = {key.lower(): val for key, val in dictn.items()}
5977 first_key = first_key.lower()
5978 value = dictn.get(first_key, None)
5979 return value if len(keys) < 2 else traverse_dict(value, keys[1:], casesense)