]> jfr.im git - yt-dlp.git/blame_incremental - yt_dlp/utils.py
[youtube:tab] Support channel search
[yt-dlp.git] / yt_dlp / utils.py
... / ...
CommitLineData
1#!/usr/bin/env python
2# coding: utf-8
3
4from __future__ import unicode_literals
5
6import base64
7import binascii
8import calendar
9import codecs
10import collections
11import contextlib
12import ctypes
13import datetime
14import email.utils
15import email.header
16import errno
17import functools
18import gzip
19import imp
20import io
21import itertools
22import json
23import locale
24import math
25import operator
26import os
27import platform
28import random
29import re
30import socket
31import ssl
32import subprocess
33import sys
34import tempfile
35import time
36import traceback
37import xml.etree.ElementTree
38import zlib
39
40from .compat import (
41 compat_HTMLParseError,
42 compat_HTMLParser,
43 compat_HTTPError,
44 compat_basestring,
45 compat_chr,
46 compat_cookiejar,
47 compat_ctypes_WINFUNCTYPE,
48 compat_etree_fromstring,
49 compat_expanduser,
50 compat_html_entities,
51 compat_html_entities_html5,
52 compat_http_client,
53 compat_integer_types,
54 compat_numeric_types,
55 compat_kwargs,
56 compat_os_name,
57 compat_parse_qs,
58 compat_shlex_quote,
59 compat_str,
60 compat_struct_pack,
61 compat_struct_unpack,
62 compat_urllib_error,
63 compat_urllib_parse,
64 compat_urllib_parse_urlencode,
65 compat_urllib_parse_urlparse,
66 compat_urllib_parse_urlunparse,
67 compat_urllib_parse_quote,
68 compat_urllib_parse_quote_plus,
69 compat_urllib_parse_unquote_plus,
70 compat_urllib_request,
71 compat_urlparse,
72 compat_xpath,
73)
74
75from .socks import (
76 ProxyType,
77 sockssocket,
78)
79
80
81def register_socks_protocols():
82 # "Register" SOCKS protocols
83 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
84 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
85 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
86 if scheme not in compat_urlparse.uses_netloc:
87 compat_urlparse.uses_netloc.append(scheme)
88
89
90# This is not clearly defined otherwise
91compiled_regex_type = type(re.compile(''))
92
93
94def random_user_agent():
95 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
96 _CHROME_VERSIONS = (
97 '74.0.3729.129',
98 '76.0.3780.3',
99 '76.0.3780.2',
100 '74.0.3729.128',
101 '76.0.3780.1',
102 '76.0.3780.0',
103 '75.0.3770.15',
104 '74.0.3729.127',
105 '74.0.3729.126',
106 '76.0.3779.1',
107 '76.0.3779.0',
108 '75.0.3770.14',
109 '74.0.3729.125',
110 '76.0.3778.1',
111 '76.0.3778.0',
112 '75.0.3770.13',
113 '74.0.3729.124',
114 '74.0.3729.123',
115 '73.0.3683.121',
116 '76.0.3777.1',
117 '76.0.3777.0',
118 '75.0.3770.12',
119 '74.0.3729.122',
120 '76.0.3776.4',
121 '75.0.3770.11',
122 '74.0.3729.121',
123 '76.0.3776.3',
124 '76.0.3776.2',
125 '73.0.3683.120',
126 '74.0.3729.120',
127 '74.0.3729.119',
128 '74.0.3729.118',
129 '76.0.3776.1',
130 '76.0.3776.0',
131 '76.0.3775.5',
132 '75.0.3770.10',
133 '74.0.3729.117',
134 '76.0.3775.4',
135 '76.0.3775.3',
136 '74.0.3729.116',
137 '75.0.3770.9',
138 '76.0.3775.2',
139 '76.0.3775.1',
140 '76.0.3775.0',
141 '75.0.3770.8',
142 '74.0.3729.115',
143 '74.0.3729.114',
144 '76.0.3774.1',
145 '76.0.3774.0',
146 '75.0.3770.7',
147 '74.0.3729.113',
148 '74.0.3729.112',
149 '74.0.3729.111',
150 '76.0.3773.1',
151 '76.0.3773.0',
152 '75.0.3770.6',
153 '74.0.3729.110',
154 '74.0.3729.109',
155 '76.0.3772.1',
156 '76.0.3772.0',
157 '75.0.3770.5',
158 '74.0.3729.108',
159 '74.0.3729.107',
160 '76.0.3771.1',
161 '76.0.3771.0',
162 '75.0.3770.4',
163 '74.0.3729.106',
164 '74.0.3729.105',
165 '75.0.3770.3',
166 '74.0.3729.104',
167 '74.0.3729.103',
168 '74.0.3729.102',
169 '75.0.3770.2',
170 '74.0.3729.101',
171 '75.0.3770.1',
172 '75.0.3770.0',
173 '74.0.3729.100',
174 '75.0.3769.5',
175 '75.0.3769.4',
176 '74.0.3729.99',
177 '75.0.3769.3',
178 '75.0.3769.2',
179 '75.0.3768.6',
180 '74.0.3729.98',
181 '75.0.3769.1',
182 '75.0.3769.0',
183 '74.0.3729.97',
184 '73.0.3683.119',
185 '73.0.3683.118',
186 '74.0.3729.96',
187 '75.0.3768.5',
188 '75.0.3768.4',
189 '75.0.3768.3',
190 '75.0.3768.2',
191 '74.0.3729.95',
192 '74.0.3729.94',
193 '75.0.3768.1',
194 '75.0.3768.0',
195 '74.0.3729.93',
196 '74.0.3729.92',
197 '73.0.3683.117',
198 '74.0.3729.91',
199 '75.0.3766.3',
200 '74.0.3729.90',
201 '75.0.3767.2',
202 '75.0.3767.1',
203 '75.0.3767.0',
204 '74.0.3729.89',
205 '73.0.3683.116',
206 '75.0.3766.2',
207 '74.0.3729.88',
208 '75.0.3766.1',
209 '75.0.3766.0',
210 '74.0.3729.87',
211 '73.0.3683.115',
212 '74.0.3729.86',
213 '75.0.3765.1',
214 '75.0.3765.0',
215 '74.0.3729.85',
216 '73.0.3683.114',
217 '74.0.3729.84',
218 '75.0.3764.1',
219 '75.0.3764.0',
220 '74.0.3729.83',
221 '73.0.3683.113',
222 '75.0.3763.2',
223 '75.0.3761.4',
224 '74.0.3729.82',
225 '75.0.3763.1',
226 '75.0.3763.0',
227 '74.0.3729.81',
228 '73.0.3683.112',
229 '75.0.3762.1',
230 '75.0.3762.0',
231 '74.0.3729.80',
232 '75.0.3761.3',
233 '74.0.3729.79',
234 '73.0.3683.111',
235 '75.0.3761.2',
236 '74.0.3729.78',
237 '74.0.3729.77',
238 '75.0.3761.1',
239 '75.0.3761.0',
240 '73.0.3683.110',
241 '74.0.3729.76',
242 '74.0.3729.75',
243 '75.0.3760.0',
244 '74.0.3729.74',
245 '75.0.3759.8',
246 '75.0.3759.7',
247 '75.0.3759.6',
248 '74.0.3729.73',
249 '75.0.3759.5',
250 '74.0.3729.72',
251 '73.0.3683.109',
252 '75.0.3759.4',
253 '75.0.3759.3',
254 '74.0.3729.71',
255 '75.0.3759.2',
256 '74.0.3729.70',
257 '73.0.3683.108',
258 '74.0.3729.69',
259 '75.0.3759.1',
260 '75.0.3759.0',
261 '74.0.3729.68',
262 '73.0.3683.107',
263 '74.0.3729.67',
264 '75.0.3758.1',
265 '75.0.3758.0',
266 '74.0.3729.66',
267 '73.0.3683.106',
268 '74.0.3729.65',
269 '75.0.3757.1',
270 '75.0.3757.0',
271 '74.0.3729.64',
272 '73.0.3683.105',
273 '74.0.3729.63',
274 '75.0.3756.1',
275 '75.0.3756.0',
276 '74.0.3729.62',
277 '73.0.3683.104',
278 '75.0.3755.3',
279 '75.0.3755.2',
280 '73.0.3683.103',
281 '75.0.3755.1',
282 '75.0.3755.0',
283 '74.0.3729.61',
284 '73.0.3683.102',
285 '74.0.3729.60',
286 '75.0.3754.2',
287 '74.0.3729.59',
288 '75.0.3753.4',
289 '74.0.3729.58',
290 '75.0.3754.1',
291 '75.0.3754.0',
292 '74.0.3729.57',
293 '73.0.3683.101',
294 '75.0.3753.3',
295 '75.0.3752.2',
296 '75.0.3753.2',
297 '74.0.3729.56',
298 '75.0.3753.1',
299 '75.0.3753.0',
300 '74.0.3729.55',
301 '73.0.3683.100',
302 '74.0.3729.54',
303 '75.0.3752.1',
304 '75.0.3752.0',
305 '74.0.3729.53',
306 '73.0.3683.99',
307 '74.0.3729.52',
308 '75.0.3751.1',
309 '75.0.3751.0',
310 '74.0.3729.51',
311 '73.0.3683.98',
312 '74.0.3729.50',
313 '75.0.3750.0',
314 '74.0.3729.49',
315 '74.0.3729.48',
316 '74.0.3729.47',
317 '75.0.3749.3',
318 '74.0.3729.46',
319 '73.0.3683.97',
320 '75.0.3749.2',
321 '74.0.3729.45',
322 '75.0.3749.1',
323 '75.0.3749.0',
324 '74.0.3729.44',
325 '73.0.3683.96',
326 '74.0.3729.43',
327 '74.0.3729.42',
328 '75.0.3748.1',
329 '75.0.3748.0',
330 '74.0.3729.41',
331 '75.0.3747.1',
332 '73.0.3683.95',
333 '75.0.3746.4',
334 '74.0.3729.40',
335 '74.0.3729.39',
336 '75.0.3747.0',
337 '75.0.3746.3',
338 '75.0.3746.2',
339 '74.0.3729.38',
340 '75.0.3746.1',
341 '75.0.3746.0',
342 '74.0.3729.37',
343 '73.0.3683.94',
344 '75.0.3745.5',
345 '75.0.3745.4',
346 '75.0.3745.3',
347 '75.0.3745.2',
348 '74.0.3729.36',
349 '75.0.3745.1',
350 '75.0.3745.0',
351 '75.0.3744.2',
352 '74.0.3729.35',
353 '73.0.3683.93',
354 '74.0.3729.34',
355 '75.0.3744.1',
356 '75.0.3744.0',
357 '74.0.3729.33',
358 '73.0.3683.92',
359 '74.0.3729.32',
360 '74.0.3729.31',
361 '73.0.3683.91',
362 '75.0.3741.2',
363 '75.0.3740.5',
364 '74.0.3729.30',
365 '75.0.3741.1',
366 '75.0.3741.0',
367 '74.0.3729.29',
368 '75.0.3740.4',
369 '73.0.3683.90',
370 '74.0.3729.28',
371 '75.0.3740.3',
372 '73.0.3683.89',
373 '75.0.3740.2',
374 '74.0.3729.27',
375 '75.0.3740.1',
376 '75.0.3740.0',
377 '74.0.3729.26',
378 '73.0.3683.88',
379 '73.0.3683.87',
380 '74.0.3729.25',
381 '75.0.3739.1',
382 '75.0.3739.0',
383 '73.0.3683.86',
384 '74.0.3729.24',
385 '73.0.3683.85',
386 '75.0.3738.4',
387 '75.0.3738.3',
388 '75.0.3738.2',
389 '75.0.3738.1',
390 '75.0.3738.0',
391 '74.0.3729.23',
392 '73.0.3683.84',
393 '74.0.3729.22',
394 '74.0.3729.21',
395 '75.0.3737.1',
396 '75.0.3737.0',
397 '74.0.3729.20',
398 '73.0.3683.83',
399 '74.0.3729.19',
400 '75.0.3736.1',
401 '75.0.3736.0',
402 '74.0.3729.18',
403 '73.0.3683.82',
404 '74.0.3729.17',
405 '75.0.3735.1',
406 '75.0.3735.0',
407 '74.0.3729.16',
408 '73.0.3683.81',
409 '75.0.3734.1',
410 '75.0.3734.0',
411 '74.0.3729.15',
412 '73.0.3683.80',
413 '74.0.3729.14',
414 '75.0.3733.1',
415 '75.0.3733.0',
416 '75.0.3732.1',
417 '74.0.3729.13',
418 '74.0.3729.12',
419 '73.0.3683.79',
420 '74.0.3729.11',
421 '75.0.3732.0',
422 '74.0.3729.10',
423 '73.0.3683.78',
424 '74.0.3729.9',
425 '74.0.3729.8',
426 '74.0.3729.7',
427 '75.0.3731.3',
428 '75.0.3731.2',
429 '75.0.3731.0',
430 '74.0.3729.6',
431 '73.0.3683.77',
432 '73.0.3683.76',
433 '75.0.3730.5',
434 '75.0.3730.4',
435 '73.0.3683.75',
436 '74.0.3729.5',
437 '73.0.3683.74',
438 '75.0.3730.3',
439 '75.0.3730.2',
440 '74.0.3729.4',
441 '73.0.3683.73',
442 '73.0.3683.72',
443 '75.0.3730.1',
444 '75.0.3730.0',
445 '74.0.3729.3',
446 '73.0.3683.71',
447 '74.0.3729.2',
448 '73.0.3683.70',
449 '74.0.3729.1',
450 '74.0.3729.0',
451 '74.0.3726.4',
452 '73.0.3683.69',
453 '74.0.3726.3',
454 '74.0.3728.0',
455 '74.0.3726.2',
456 '73.0.3683.68',
457 '74.0.3726.1',
458 '74.0.3726.0',
459 '74.0.3725.4',
460 '73.0.3683.67',
461 '73.0.3683.66',
462 '74.0.3725.3',
463 '74.0.3725.2',
464 '74.0.3725.1',
465 '74.0.3724.8',
466 '74.0.3725.0',
467 '73.0.3683.65',
468 '74.0.3724.7',
469 '74.0.3724.6',
470 '74.0.3724.5',
471 '74.0.3724.4',
472 '74.0.3724.3',
473 '74.0.3724.2',
474 '74.0.3724.1',
475 '74.0.3724.0',
476 '73.0.3683.64',
477 '74.0.3723.1',
478 '74.0.3723.0',
479 '73.0.3683.63',
480 '74.0.3722.1',
481 '74.0.3722.0',
482 '73.0.3683.62',
483 '74.0.3718.9',
484 '74.0.3702.3',
485 '74.0.3721.3',
486 '74.0.3721.2',
487 '74.0.3721.1',
488 '74.0.3721.0',
489 '74.0.3720.6',
490 '73.0.3683.61',
491 '72.0.3626.122',
492 '73.0.3683.60',
493 '74.0.3720.5',
494 '72.0.3626.121',
495 '74.0.3718.8',
496 '74.0.3720.4',
497 '74.0.3720.3',
498 '74.0.3718.7',
499 '74.0.3720.2',
500 '74.0.3720.1',
501 '74.0.3720.0',
502 '74.0.3718.6',
503 '74.0.3719.5',
504 '73.0.3683.59',
505 '74.0.3718.5',
506 '74.0.3718.4',
507 '74.0.3719.4',
508 '74.0.3719.3',
509 '74.0.3719.2',
510 '74.0.3719.1',
511 '73.0.3683.58',
512 '74.0.3719.0',
513 '73.0.3683.57',
514 '73.0.3683.56',
515 '74.0.3718.3',
516 '73.0.3683.55',
517 '74.0.3718.2',
518 '74.0.3718.1',
519 '74.0.3718.0',
520 '73.0.3683.54',
521 '74.0.3717.2',
522 '73.0.3683.53',
523 '74.0.3717.1',
524 '74.0.3717.0',
525 '73.0.3683.52',
526 '74.0.3716.1',
527 '74.0.3716.0',
528 '73.0.3683.51',
529 '74.0.3715.1',
530 '74.0.3715.0',
531 '73.0.3683.50',
532 '74.0.3711.2',
533 '74.0.3714.2',
534 '74.0.3713.3',
535 '74.0.3714.1',
536 '74.0.3714.0',
537 '73.0.3683.49',
538 '74.0.3713.1',
539 '74.0.3713.0',
540 '72.0.3626.120',
541 '73.0.3683.48',
542 '74.0.3712.2',
543 '74.0.3712.1',
544 '74.0.3712.0',
545 '73.0.3683.47',
546 '72.0.3626.119',
547 '73.0.3683.46',
548 '74.0.3710.2',
549 '72.0.3626.118',
550 '74.0.3711.1',
551 '74.0.3711.0',
552 '73.0.3683.45',
553 '72.0.3626.117',
554 '74.0.3710.1',
555 '74.0.3710.0',
556 '73.0.3683.44',
557 '72.0.3626.116',
558 '74.0.3709.1',
559 '74.0.3709.0',
560 '74.0.3704.9',
561 '73.0.3683.43',
562 '72.0.3626.115',
563 '74.0.3704.8',
564 '74.0.3704.7',
565 '74.0.3708.0',
566 '74.0.3706.7',
567 '74.0.3704.6',
568 '73.0.3683.42',
569 '72.0.3626.114',
570 '74.0.3706.6',
571 '72.0.3626.113',
572 '74.0.3704.5',
573 '74.0.3706.5',
574 '74.0.3706.4',
575 '74.0.3706.3',
576 '74.0.3706.2',
577 '74.0.3706.1',
578 '74.0.3706.0',
579 '73.0.3683.41',
580 '72.0.3626.112',
581 '74.0.3705.1',
582 '74.0.3705.0',
583 '73.0.3683.40',
584 '72.0.3626.111',
585 '73.0.3683.39',
586 '74.0.3704.4',
587 '73.0.3683.38',
588 '74.0.3704.3',
589 '74.0.3704.2',
590 '74.0.3704.1',
591 '74.0.3704.0',
592 '73.0.3683.37',
593 '72.0.3626.110',
594 '72.0.3626.109',
595 '74.0.3703.3',
596 '74.0.3703.2',
597 '73.0.3683.36',
598 '74.0.3703.1',
599 '74.0.3703.0',
600 '73.0.3683.35',
601 '72.0.3626.108',
602 '74.0.3702.2',
603 '74.0.3699.3',
604 '74.0.3702.1',
605 '74.0.3702.0',
606 '73.0.3683.34',
607 '72.0.3626.107',
608 '73.0.3683.33',
609 '74.0.3701.1',
610 '74.0.3701.0',
611 '73.0.3683.32',
612 '73.0.3683.31',
613 '72.0.3626.105',
614 '74.0.3700.1',
615 '74.0.3700.0',
616 '73.0.3683.29',
617 '72.0.3626.103',
618 '74.0.3699.2',
619 '74.0.3699.1',
620 '74.0.3699.0',
621 '73.0.3683.28',
622 '72.0.3626.102',
623 '73.0.3683.27',
624 '73.0.3683.26',
625 '74.0.3698.0',
626 '74.0.3696.2',
627 '72.0.3626.101',
628 '73.0.3683.25',
629 '74.0.3696.1',
630 '74.0.3696.0',
631 '74.0.3694.8',
632 '72.0.3626.100',
633 '74.0.3694.7',
634 '74.0.3694.6',
635 '74.0.3694.5',
636 '74.0.3694.4',
637 '72.0.3626.99',
638 '72.0.3626.98',
639 '74.0.3694.3',
640 '73.0.3683.24',
641 '72.0.3626.97',
642 '72.0.3626.96',
643 '72.0.3626.95',
644 '73.0.3683.23',
645 '72.0.3626.94',
646 '73.0.3683.22',
647 '73.0.3683.21',
648 '72.0.3626.93',
649 '74.0.3694.2',
650 '72.0.3626.92',
651 '74.0.3694.1',
652 '74.0.3694.0',
653 '74.0.3693.6',
654 '73.0.3683.20',
655 '72.0.3626.91',
656 '74.0.3693.5',
657 '74.0.3693.4',
658 '74.0.3693.3',
659 '74.0.3693.2',
660 '73.0.3683.19',
661 '74.0.3693.1',
662 '74.0.3693.0',
663 '73.0.3683.18',
664 '72.0.3626.90',
665 '74.0.3692.1',
666 '74.0.3692.0',
667 '73.0.3683.17',
668 '72.0.3626.89',
669 '74.0.3687.3',
670 '74.0.3691.1',
671 '74.0.3691.0',
672 '73.0.3683.16',
673 '72.0.3626.88',
674 '72.0.3626.87',
675 '73.0.3683.15',
676 '74.0.3690.1',
677 '74.0.3690.0',
678 '73.0.3683.14',
679 '72.0.3626.86',
680 '73.0.3683.13',
681 '73.0.3683.12',
682 '74.0.3689.1',
683 '74.0.3689.0',
684 '73.0.3683.11',
685 '72.0.3626.85',
686 '73.0.3683.10',
687 '72.0.3626.84',
688 '73.0.3683.9',
689 '74.0.3688.1',
690 '74.0.3688.0',
691 '73.0.3683.8',
692 '72.0.3626.83',
693 '74.0.3687.2',
694 '74.0.3687.1',
695 '74.0.3687.0',
696 '73.0.3683.7',
697 '72.0.3626.82',
698 '74.0.3686.4',
699 '72.0.3626.81',
700 '74.0.3686.3',
701 '74.0.3686.2',
702 '74.0.3686.1',
703 '74.0.3686.0',
704 '73.0.3683.6',
705 '72.0.3626.80',
706 '74.0.3685.1',
707 '74.0.3685.0',
708 '73.0.3683.5',
709 '72.0.3626.79',
710 '74.0.3684.1',
711 '74.0.3684.0',
712 '73.0.3683.4',
713 '72.0.3626.78',
714 '72.0.3626.77',
715 '73.0.3683.3',
716 '73.0.3683.2',
717 '72.0.3626.76',
718 '73.0.3683.1',
719 '73.0.3683.0',
720 '72.0.3626.75',
721 '71.0.3578.141',
722 '73.0.3682.1',
723 '73.0.3682.0',
724 '72.0.3626.74',
725 '71.0.3578.140',
726 '73.0.3681.4',
727 '73.0.3681.3',
728 '73.0.3681.2',
729 '73.0.3681.1',
730 '73.0.3681.0',
731 '72.0.3626.73',
732 '71.0.3578.139',
733 '72.0.3626.72',
734 '72.0.3626.71',
735 '73.0.3680.1',
736 '73.0.3680.0',
737 '72.0.3626.70',
738 '71.0.3578.138',
739 '73.0.3678.2',
740 '73.0.3679.1',
741 '73.0.3679.0',
742 '72.0.3626.69',
743 '71.0.3578.137',
744 '73.0.3678.1',
745 '73.0.3678.0',
746 '71.0.3578.136',
747 '73.0.3677.1',
748 '73.0.3677.0',
749 '72.0.3626.68',
750 '72.0.3626.67',
751 '71.0.3578.135',
752 '73.0.3676.1',
753 '73.0.3676.0',
754 '73.0.3674.2',
755 '72.0.3626.66',
756 '71.0.3578.134',
757 '73.0.3674.1',
758 '73.0.3674.0',
759 '72.0.3626.65',
760 '71.0.3578.133',
761 '73.0.3673.2',
762 '73.0.3673.1',
763 '73.0.3673.0',
764 '72.0.3626.64',
765 '71.0.3578.132',
766 '72.0.3626.63',
767 '72.0.3626.62',
768 '72.0.3626.61',
769 '72.0.3626.60',
770 '73.0.3672.1',
771 '73.0.3672.0',
772 '72.0.3626.59',
773 '71.0.3578.131',
774 '73.0.3671.3',
775 '73.0.3671.2',
776 '73.0.3671.1',
777 '73.0.3671.0',
778 '72.0.3626.58',
779 '71.0.3578.130',
780 '73.0.3670.1',
781 '73.0.3670.0',
782 '72.0.3626.57',
783 '71.0.3578.129',
784 '73.0.3669.1',
785 '73.0.3669.0',
786 '72.0.3626.56',
787 '71.0.3578.128',
788 '73.0.3668.2',
789 '73.0.3668.1',
790 '73.0.3668.0',
791 '72.0.3626.55',
792 '71.0.3578.127',
793 '73.0.3667.2',
794 '73.0.3667.1',
795 '73.0.3667.0',
796 '72.0.3626.54',
797 '71.0.3578.126',
798 '73.0.3666.1',
799 '73.0.3666.0',
800 '72.0.3626.53',
801 '71.0.3578.125',
802 '73.0.3665.4',
803 '73.0.3665.3',
804 '72.0.3626.52',
805 '73.0.3665.2',
806 '73.0.3664.4',
807 '73.0.3665.1',
808 '73.0.3665.0',
809 '72.0.3626.51',
810 '71.0.3578.124',
811 '72.0.3626.50',
812 '73.0.3664.3',
813 '73.0.3664.2',
814 '73.0.3664.1',
815 '73.0.3664.0',
816 '73.0.3663.2',
817 '72.0.3626.49',
818 '71.0.3578.123',
819 '73.0.3663.1',
820 '73.0.3663.0',
821 '72.0.3626.48',
822 '71.0.3578.122',
823 '73.0.3662.1',
824 '73.0.3662.0',
825 '72.0.3626.47',
826 '71.0.3578.121',
827 '73.0.3661.1',
828 '72.0.3626.46',
829 '73.0.3661.0',
830 '72.0.3626.45',
831 '71.0.3578.120',
832 '73.0.3660.2',
833 '73.0.3660.1',
834 '73.0.3660.0',
835 '72.0.3626.44',
836 '71.0.3578.119',
837 '73.0.3659.1',
838 '73.0.3659.0',
839 '72.0.3626.43',
840 '71.0.3578.118',
841 '73.0.3658.1',
842 '73.0.3658.0',
843 '72.0.3626.42',
844 '71.0.3578.117',
845 '73.0.3657.1',
846 '73.0.3657.0',
847 '72.0.3626.41',
848 '71.0.3578.116',
849 '73.0.3656.1',
850 '73.0.3656.0',
851 '72.0.3626.40',
852 '71.0.3578.115',
853 '73.0.3655.1',
854 '73.0.3655.0',
855 '72.0.3626.39',
856 '71.0.3578.114',
857 '73.0.3654.1',
858 '73.0.3654.0',
859 '72.0.3626.38',
860 '71.0.3578.113',
861 '73.0.3653.1',
862 '73.0.3653.0',
863 '72.0.3626.37',
864 '71.0.3578.112',
865 '73.0.3652.1',
866 '73.0.3652.0',
867 '72.0.3626.36',
868 '71.0.3578.111',
869 '73.0.3651.1',
870 '73.0.3651.0',
871 '72.0.3626.35',
872 '71.0.3578.110',
873 '73.0.3650.1',
874 '73.0.3650.0',
875 '72.0.3626.34',
876 '71.0.3578.109',
877 '73.0.3649.1',
878 '73.0.3649.0',
879 '72.0.3626.33',
880 '71.0.3578.108',
881 '73.0.3648.2',
882 '73.0.3648.1',
883 '73.0.3648.0',
884 '72.0.3626.32',
885 '71.0.3578.107',
886 '73.0.3647.2',
887 '73.0.3647.1',
888 '73.0.3647.0',
889 '72.0.3626.31',
890 '71.0.3578.106',
891 '73.0.3635.3',
892 '73.0.3646.2',
893 '73.0.3646.1',
894 '73.0.3646.0',
895 '72.0.3626.30',
896 '71.0.3578.105',
897 '72.0.3626.29',
898 '73.0.3645.2',
899 '73.0.3645.1',
900 '73.0.3645.0',
901 '72.0.3626.28',
902 '71.0.3578.104',
903 '72.0.3626.27',
904 '72.0.3626.26',
905 '72.0.3626.25',
906 '72.0.3626.24',
907 '73.0.3644.0',
908 '73.0.3643.2',
909 '72.0.3626.23',
910 '71.0.3578.103',
911 '73.0.3643.1',
912 '73.0.3643.0',
913 '72.0.3626.22',
914 '71.0.3578.102',
915 '73.0.3642.1',
916 '73.0.3642.0',
917 '72.0.3626.21',
918 '71.0.3578.101',
919 '73.0.3641.1',
920 '73.0.3641.0',
921 '72.0.3626.20',
922 '71.0.3578.100',
923 '72.0.3626.19',
924 '73.0.3640.1',
925 '73.0.3640.0',
926 '72.0.3626.18',
927 '73.0.3639.1',
928 '71.0.3578.99',
929 '73.0.3639.0',
930 '72.0.3626.17',
931 '73.0.3638.2',
932 '72.0.3626.16',
933 '73.0.3638.1',
934 '73.0.3638.0',
935 '72.0.3626.15',
936 '71.0.3578.98',
937 '73.0.3635.2',
938 '71.0.3578.97',
939 '73.0.3637.1',
940 '73.0.3637.0',
941 '72.0.3626.14',
942 '71.0.3578.96',
943 '71.0.3578.95',
944 '72.0.3626.13',
945 '71.0.3578.94',
946 '73.0.3636.2',
947 '71.0.3578.93',
948 '73.0.3636.1',
949 '73.0.3636.0',
950 '72.0.3626.12',
951 '71.0.3578.92',
952 '73.0.3635.1',
953 '73.0.3635.0',
954 '72.0.3626.11',
955 '71.0.3578.91',
956 '73.0.3634.2',
957 '73.0.3634.1',
958 '73.0.3634.0',
959 '72.0.3626.10',
960 '71.0.3578.90',
961 '71.0.3578.89',
962 '73.0.3633.2',
963 '73.0.3633.1',
964 '73.0.3633.0',
965 '72.0.3610.4',
966 '72.0.3626.9',
967 '71.0.3578.88',
968 '73.0.3632.5',
969 '73.0.3632.4',
970 '73.0.3632.3',
971 '73.0.3632.2',
972 '73.0.3632.1',
973 '73.0.3632.0',
974 '72.0.3626.8',
975 '71.0.3578.87',
976 '73.0.3631.2',
977 '73.0.3631.1',
978 '73.0.3631.0',
979 '72.0.3626.7',
980 '71.0.3578.86',
981 '72.0.3626.6',
982 '73.0.3630.1',
983 '73.0.3630.0',
984 '72.0.3626.5',
985 '71.0.3578.85',
986 '72.0.3626.4',
987 '73.0.3628.3',
988 '73.0.3628.2',
989 '73.0.3629.1',
990 '73.0.3629.0',
991 '72.0.3626.3',
992 '71.0.3578.84',
993 '73.0.3628.1',
994 '73.0.3628.0',
995 '71.0.3578.83',
996 '73.0.3627.1',
997 '73.0.3627.0',
998 '72.0.3626.2',
999 '71.0.3578.82',
1000 '71.0.3578.81',
1001 '71.0.3578.80',
1002 '72.0.3626.1',
1003 '72.0.3626.0',
1004 '71.0.3578.79',
1005 '70.0.3538.124',
1006 '71.0.3578.78',
1007 '72.0.3623.4',
1008 '72.0.3625.2',
1009 '72.0.3625.1',
1010 '72.0.3625.0',
1011 '71.0.3578.77',
1012 '70.0.3538.123',
1013 '72.0.3624.4',
1014 '72.0.3624.3',
1015 '72.0.3624.2',
1016 '71.0.3578.76',
1017 '72.0.3624.1',
1018 '72.0.3624.0',
1019 '72.0.3623.3',
1020 '71.0.3578.75',
1021 '70.0.3538.122',
1022 '71.0.3578.74',
1023 '72.0.3623.2',
1024 '72.0.3610.3',
1025 '72.0.3623.1',
1026 '72.0.3623.0',
1027 '72.0.3622.3',
1028 '72.0.3622.2',
1029 '71.0.3578.73',
1030 '70.0.3538.121',
1031 '72.0.3622.1',
1032 '72.0.3622.0',
1033 '71.0.3578.72',
1034 '70.0.3538.120',
1035 '72.0.3621.1',
1036 '72.0.3621.0',
1037 '71.0.3578.71',
1038 '70.0.3538.119',
1039 '72.0.3620.1',
1040 '72.0.3620.0',
1041 '71.0.3578.70',
1042 '70.0.3538.118',
1043 '71.0.3578.69',
1044 '72.0.3619.1',
1045 '72.0.3619.0',
1046 '71.0.3578.68',
1047 '70.0.3538.117',
1048 '71.0.3578.67',
1049 '72.0.3618.1',
1050 '72.0.3618.0',
1051 '71.0.3578.66',
1052 '70.0.3538.116',
1053 '72.0.3617.1',
1054 '72.0.3617.0',
1055 '71.0.3578.65',
1056 '70.0.3538.115',
1057 '72.0.3602.3',
1058 '71.0.3578.64',
1059 '72.0.3616.1',
1060 '72.0.3616.0',
1061 '71.0.3578.63',
1062 '70.0.3538.114',
1063 '71.0.3578.62',
1064 '72.0.3615.1',
1065 '72.0.3615.0',
1066 '71.0.3578.61',
1067 '70.0.3538.113',
1068 '72.0.3614.1',
1069 '72.0.3614.0',
1070 '71.0.3578.60',
1071 '70.0.3538.112',
1072 '72.0.3613.1',
1073 '72.0.3613.0',
1074 '71.0.3578.59',
1075 '70.0.3538.111',
1076 '72.0.3612.2',
1077 '72.0.3612.1',
1078 '72.0.3612.0',
1079 '70.0.3538.110',
1080 '71.0.3578.58',
1081 '70.0.3538.109',
1082 '72.0.3611.2',
1083 '72.0.3611.1',
1084 '72.0.3611.0',
1085 '71.0.3578.57',
1086 '70.0.3538.108',
1087 '72.0.3610.2',
1088 '71.0.3578.56',
1089 '71.0.3578.55',
1090 '72.0.3610.1',
1091 '72.0.3610.0',
1092 '71.0.3578.54',
1093 '70.0.3538.107',
1094 '71.0.3578.53',
1095 '72.0.3609.3',
1096 '71.0.3578.52',
1097 '72.0.3609.2',
1098 '71.0.3578.51',
1099 '72.0.3608.5',
1100 '72.0.3609.1',
1101 '72.0.3609.0',
1102 '71.0.3578.50',
1103 '70.0.3538.106',
1104 '72.0.3608.4',
1105 '72.0.3608.3',
1106 '72.0.3608.2',
1107 '71.0.3578.49',
1108 '72.0.3608.1',
1109 '72.0.3608.0',
1110 '70.0.3538.105',
1111 '71.0.3578.48',
1112 '72.0.3607.1',
1113 '72.0.3607.0',
1114 '71.0.3578.47',
1115 '70.0.3538.104',
1116 '72.0.3606.2',
1117 '72.0.3606.1',
1118 '72.0.3606.0',
1119 '71.0.3578.46',
1120 '70.0.3538.103',
1121 '70.0.3538.102',
1122 '72.0.3605.3',
1123 '72.0.3605.2',
1124 '72.0.3605.1',
1125 '72.0.3605.0',
1126 '71.0.3578.45',
1127 '70.0.3538.101',
1128 '71.0.3578.44',
1129 '71.0.3578.43',
1130 '70.0.3538.100',
1131 '70.0.3538.99',
1132 '71.0.3578.42',
1133 '72.0.3604.1',
1134 '72.0.3604.0',
1135 '71.0.3578.41',
1136 '70.0.3538.98',
1137 '71.0.3578.40',
1138 '72.0.3603.2',
1139 '72.0.3603.1',
1140 '72.0.3603.0',
1141 '71.0.3578.39',
1142 '70.0.3538.97',
1143 '72.0.3602.2',
1144 '71.0.3578.38',
1145 '71.0.3578.37',
1146 '72.0.3602.1',
1147 '72.0.3602.0',
1148 '71.0.3578.36',
1149 '70.0.3538.96',
1150 '72.0.3601.1',
1151 '72.0.3601.0',
1152 '71.0.3578.35',
1153 '70.0.3538.95',
1154 '72.0.3600.1',
1155 '72.0.3600.0',
1156 '71.0.3578.34',
1157 '70.0.3538.94',
1158 '72.0.3599.3',
1159 '72.0.3599.2',
1160 '72.0.3599.1',
1161 '72.0.3599.0',
1162 '71.0.3578.33',
1163 '70.0.3538.93',
1164 '72.0.3598.1',
1165 '72.0.3598.0',
1166 '71.0.3578.32',
1167 '70.0.3538.87',
1168 '72.0.3597.1',
1169 '72.0.3597.0',
1170 '72.0.3596.2',
1171 '71.0.3578.31',
1172 '70.0.3538.86',
1173 '71.0.3578.30',
1174 '71.0.3578.29',
1175 '72.0.3596.1',
1176 '72.0.3596.0',
1177 '71.0.3578.28',
1178 '70.0.3538.85',
1179 '72.0.3595.2',
1180 '72.0.3591.3',
1181 '72.0.3595.1',
1182 '72.0.3595.0',
1183 '71.0.3578.27',
1184 '70.0.3538.84',
1185 '72.0.3594.1',
1186 '72.0.3594.0',
1187 '71.0.3578.26',
1188 '70.0.3538.83',
1189 '72.0.3593.2',
1190 '72.0.3593.1',
1191 '72.0.3593.0',
1192 '71.0.3578.25',
1193 '70.0.3538.82',
1194 '72.0.3589.3',
1195 '72.0.3592.2',
1196 '72.0.3592.1',
1197 '72.0.3592.0',
1198 '71.0.3578.24',
1199 '72.0.3589.2',
1200 '70.0.3538.81',
1201 '70.0.3538.80',
1202 '72.0.3591.2',
1203 '72.0.3591.1',
1204 '72.0.3591.0',
1205 '71.0.3578.23',
1206 '70.0.3538.79',
1207 '71.0.3578.22',
1208 '72.0.3590.1',
1209 '72.0.3590.0',
1210 '71.0.3578.21',
1211 '70.0.3538.78',
1212 '70.0.3538.77',
1213 '72.0.3589.1',
1214 '72.0.3589.0',
1215 '71.0.3578.20',
1216 '70.0.3538.76',
1217 '71.0.3578.19',
1218 '70.0.3538.75',
1219 '72.0.3588.1',
1220 '72.0.3588.0',
1221 '71.0.3578.18',
1222 '70.0.3538.74',
1223 '72.0.3586.2',
1224 '72.0.3587.0',
1225 '71.0.3578.17',
1226 '70.0.3538.73',
1227 '72.0.3586.1',
1228 '72.0.3586.0',
1229 '71.0.3578.16',
1230 '70.0.3538.72',
1231 '72.0.3585.1',
1232 '72.0.3585.0',
1233 '71.0.3578.15',
1234 '70.0.3538.71',
1235 '71.0.3578.14',
1236 '72.0.3584.1',
1237 '72.0.3584.0',
1238 '71.0.3578.13',
1239 '70.0.3538.70',
1240 '72.0.3583.2',
1241 '71.0.3578.12',
1242 '72.0.3583.1',
1243 '72.0.3583.0',
1244 '71.0.3578.11',
1245 '70.0.3538.69',
1246 '71.0.3578.10',
1247 '72.0.3582.0',
1248 '72.0.3581.4',
1249 '71.0.3578.9',
1250 '70.0.3538.67',
1251 '72.0.3581.3',
1252 '72.0.3581.2',
1253 '72.0.3581.1',
1254 '72.0.3581.0',
1255 '71.0.3578.8',
1256 '70.0.3538.66',
1257 '72.0.3580.1',
1258 '72.0.3580.0',
1259 '71.0.3578.7',
1260 '70.0.3538.65',
1261 '71.0.3578.6',
1262 '72.0.3579.1',
1263 '72.0.3579.0',
1264 '71.0.3578.5',
1265 '70.0.3538.64',
1266 '71.0.3578.4',
1267 '71.0.3578.3',
1268 '71.0.3578.2',
1269 '71.0.3578.1',
1270 '71.0.3578.0',
1271 '70.0.3538.63',
1272 '69.0.3497.128',
1273 '70.0.3538.62',
1274 '70.0.3538.61',
1275 '70.0.3538.60',
1276 '70.0.3538.59',
1277 '71.0.3577.1',
1278 '71.0.3577.0',
1279 '70.0.3538.58',
1280 '69.0.3497.127',
1281 '71.0.3576.2',
1282 '71.0.3576.1',
1283 '71.0.3576.0',
1284 '70.0.3538.57',
1285 '70.0.3538.56',
1286 '71.0.3575.2',
1287 '70.0.3538.55',
1288 '69.0.3497.126',
1289 '70.0.3538.54',
1290 '71.0.3575.1',
1291 '71.0.3575.0',
1292 '71.0.3574.1',
1293 '71.0.3574.0',
1294 '70.0.3538.53',
1295 '69.0.3497.125',
1296 '70.0.3538.52',
1297 '71.0.3573.1',
1298 '71.0.3573.0',
1299 '70.0.3538.51',
1300 '69.0.3497.124',
1301 '71.0.3572.1',
1302 '71.0.3572.0',
1303 '70.0.3538.50',
1304 '69.0.3497.123',
1305 '71.0.3571.2',
1306 '70.0.3538.49',
1307 '69.0.3497.122',
1308 '71.0.3571.1',
1309 '71.0.3571.0',
1310 '70.0.3538.48',
1311 '69.0.3497.121',
1312 '71.0.3570.1',
1313 '71.0.3570.0',
1314 '70.0.3538.47',
1315 '69.0.3497.120',
1316 '71.0.3568.2',
1317 '71.0.3569.1',
1318 '71.0.3569.0',
1319 '70.0.3538.46',
1320 '69.0.3497.119',
1321 '70.0.3538.45',
1322 '71.0.3568.1',
1323 '71.0.3568.0',
1324 '70.0.3538.44',
1325 '69.0.3497.118',
1326 '70.0.3538.43',
1327 '70.0.3538.42',
1328 '71.0.3567.1',
1329 '71.0.3567.0',
1330 '70.0.3538.41',
1331 '69.0.3497.117',
1332 '71.0.3566.1',
1333 '71.0.3566.0',
1334 '70.0.3538.40',
1335 '69.0.3497.116',
1336 '71.0.3565.1',
1337 '71.0.3565.0',
1338 '70.0.3538.39',
1339 '69.0.3497.115',
1340 '71.0.3564.1',
1341 '71.0.3564.0',
1342 '70.0.3538.38',
1343 '69.0.3497.114',
1344 '71.0.3563.0',
1345 '71.0.3562.2',
1346 '70.0.3538.37',
1347 '69.0.3497.113',
1348 '70.0.3538.36',
1349 '70.0.3538.35',
1350 '71.0.3562.1',
1351 '71.0.3562.0',
1352 '70.0.3538.34',
1353 '69.0.3497.112',
1354 '70.0.3538.33',
1355 '71.0.3561.1',
1356 '71.0.3561.0',
1357 '70.0.3538.32',
1358 '69.0.3497.111',
1359 '71.0.3559.6',
1360 '71.0.3560.1',
1361 '71.0.3560.0',
1362 '71.0.3559.5',
1363 '71.0.3559.4',
1364 '70.0.3538.31',
1365 '69.0.3497.110',
1366 '71.0.3559.3',
1367 '70.0.3538.30',
1368 '69.0.3497.109',
1369 '71.0.3559.2',
1370 '71.0.3559.1',
1371 '71.0.3559.0',
1372 '70.0.3538.29',
1373 '69.0.3497.108',
1374 '71.0.3558.2',
1375 '71.0.3558.1',
1376 '71.0.3558.0',
1377 '70.0.3538.28',
1378 '69.0.3497.107',
1379 '71.0.3557.2',
1380 '71.0.3557.1',
1381 '71.0.3557.0',
1382 '70.0.3538.27',
1383 '69.0.3497.106',
1384 '71.0.3554.4',
1385 '70.0.3538.26',
1386 '71.0.3556.1',
1387 '71.0.3556.0',
1388 '70.0.3538.25',
1389 '71.0.3554.3',
1390 '69.0.3497.105',
1391 '71.0.3554.2',
1392 '70.0.3538.24',
1393 '69.0.3497.104',
1394 '71.0.3555.2',
1395 '70.0.3538.23',
1396 '71.0.3555.1',
1397 '71.0.3555.0',
1398 '70.0.3538.22',
1399 '69.0.3497.103',
1400 '71.0.3554.1',
1401 '71.0.3554.0',
1402 '70.0.3538.21',
1403 '69.0.3497.102',
1404 '71.0.3553.3',
1405 '70.0.3538.20',
1406 '69.0.3497.101',
1407 '71.0.3553.2',
1408 '69.0.3497.100',
1409 '71.0.3553.1',
1410 '71.0.3553.0',
1411 '70.0.3538.19',
1412 '69.0.3497.99',
1413 '69.0.3497.98',
1414 '69.0.3497.97',
1415 '71.0.3552.6',
1416 '71.0.3552.5',
1417 '71.0.3552.4',
1418 '71.0.3552.3',
1419 '71.0.3552.2',
1420 '71.0.3552.1',
1421 '71.0.3552.0',
1422 '70.0.3538.18',
1423 '69.0.3497.96',
1424 '71.0.3551.3',
1425 '71.0.3551.2',
1426 '71.0.3551.1',
1427 '71.0.3551.0',
1428 '70.0.3538.17',
1429 '69.0.3497.95',
1430 '71.0.3550.3',
1431 '71.0.3550.2',
1432 '71.0.3550.1',
1433 '71.0.3550.0',
1434 '70.0.3538.16',
1435 '69.0.3497.94',
1436 '71.0.3549.1',
1437 '71.0.3549.0',
1438 '70.0.3538.15',
1439 '69.0.3497.93',
1440 '69.0.3497.92',
1441 '71.0.3548.1',
1442 '71.0.3548.0',
1443 '70.0.3538.14',
1444 '69.0.3497.91',
1445 '71.0.3547.1',
1446 '71.0.3547.0',
1447 '70.0.3538.13',
1448 '69.0.3497.90',
1449 '71.0.3546.2',
1450 '69.0.3497.89',
1451 '71.0.3546.1',
1452 '71.0.3546.0',
1453 '70.0.3538.12',
1454 '69.0.3497.88',
1455 '71.0.3545.4',
1456 '71.0.3545.3',
1457 '71.0.3545.2',
1458 '71.0.3545.1',
1459 '71.0.3545.0',
1460 '70.0.3538.11',
1461 '69.0.3497.87',
1462 '71.0.3544.5',
1463 '71.0.3544.4',
1464 '71.0.3544.3',
1465 '71.0.3544.2',
1466 '71.0.3544.1',
1467 '71.0.3544.0',
1468 '69.0.3497.86',
1469 '70.0.3538.10',
1470 '69.0.3497.85',
1471 '70.0.3538.9',
1472 '69.0.3497.84',
1473 '71.0.3543.4',
1474 '70.0.3538.8',
1475 '71.0.3543.3',
1476 '71.0.3543.2',
1477 '71.0.3543.1',
1478 '71.0.3543.0',
1479 '70.0.3538.7',
1480 '69.0.3497.83',
1481 '71.0.3542.2',
1482 '71.0.3542.1',
1483 '71.0.3542.0',
1484 '70.0.3538.6',
1485 '69.0.3497.82',
1486 '69.0.3497.81',
1487 '71.0.3541.1',
1488 '71.0.3541.0',
1489 '70.0.3538.5',
1490 '69.0.3497.80',
1491 '71.0.3540.1',
1492 '71.0.3540.0',
1493 '70.0.3538.4',
1494 '69.0.3497.79',
1495 '70.0.3538.3',
1496 '71.0.3539.1',
1497 '71.0.3539.0',
1498 '69.0.3497.78',
1499 '68.0.3440.134',
1500 '69.0.3497.77',
1501 '70.0.3538.2',
1502 '70.0.3538.1',
1503 '70.0.3538.0',
1504 '69.0.3497.76',
1505 '68.0.3440.133',
1506 '69.0.3497.75',
1507 '70.0.3537.2',
1508 '70.0.3537.1',
1509 '70.0.3537.0',
1510 '69.0.3497.74',
1511 '68.0.3440.132',
1512 '70.0.3536.0',
1513 '70.0.3535.5',
1514 '70.0.3535.4',
1515 '70.0.3535.3',
1516 '69.0.3497.73',
1517 '68.0.3440.131',
1518 '70.0.3532.8',
1519 '70.0.3532.7',
1520 '69.0.3497.72',
1521 '69.0.3497.71',
1522 '70.0.3535.2',
1523 '70.0.3535.1',
1524 '70.0.3535.0',
1525 '69.0.3497.70',
1526 '68.0.3440.130',
1527 '69.0.3497.69',
1528 '68.0.3440.129',
1529 '70.0.3534.4',
1530 '70.0.3534.3',
1531 '70.0.3534.2',
1532 '70.0.3534.1',
1533 '70.0.3534.0',
1534 '69.0.3497.68',
1535 '68.0.3440.128',
1536 '70.0.3533.2',
1537 '70.0.3533.1',
1538 '70.0.3533.0',
1539 '69.0.3497.67',
1540 '68.0.3440.127',
1541 '70.0.3532.6',
1542 '70.0.3532.5',
1543 '70.0.3532.4',
1544 '69.0.3497.66',
1545 '68.0.3440.126',
1546 '70.0.3532.3',
1547 '70.0.3532.2',
1548 '70.0.3532.1',
1549 '69.0.3497.60',
1550 '69.0.3497.65',
1551 '69.0.3497.64',
1552 '70.0.3532.0',
1553 '70.0.3531.0',
1554 '70.0.3530.4',
1555 '70.0.3530.3',
1556 '70.0.3530.2',
1557 '69.0.3497.58',
1558 '68.0.3440.125',
1559 '69.0.3497.57',
1560 '69.0.3497.56',
1561 '69.0.3497.55',
1562 '69.0.3497.54',
1563 '70.0.3530.1',
1564 '70.0.3530.0',
1565 '69.0.3497.53',
1566 '68.0.3440.124',
1567 '69.0.3497.52',
1568 '70.0.3529.3',
1569 '70.0.3529.2',
1570 '70.0.3529.1',
1571 '70.0.3529.0',
1572 '69.0.3497.51',
1573 '70.0.3528.4',
1574 '68.0.3440.123',
1575 '70.0.3528.3',
1576 '70.0.3528.2',
1577 '70.0.3528.1',
1578 '70.0.3528.0',
1579 '69.0.3497.50',
1580 '68.0.3440.122',
1581 '70.0.3527.1',
1582 '70.0.3527.0',
1583 '69.0.3497.49',
1584 '68.0.3440.121',
1585 '70.0.3526.1',
1586 '70.0.3526.0',
1587 '68.0.3440.120',
1588 '69.0.3497.48',
1589 '69.0.3497.47',
1590 '68.0.3440.119',
1591 '68.0.3440.118',
1592 '70.0.3525.5',
1593 '70.0.3525.4',
1594 '70.0.3525.3',
1595 '68.0.3440.117',
1596 '69.0.3497.46',
1597 '70.0.3525.2',
1598 '70.0.3525.1',
1599 '70.0.3525.0',
1600 '69.0.3497.45',
1601 '68.0.3440.116',
1602 '70.0.3524.4',
1603 '70.0.3524.3',
1604 '69.0.3497.44',
1605 '70.0.3524.2',
1606 '70.0.3524.1',
1607 '70.0.3524.0',
1608 '70.0.3523.2',
1609 '69.0.3497.43',
1610 '68.0.3440.115',
1611 '70.0.3505.9',
1612 '69.0.3497.42',
1613 '70.0.3505.8',
1614 '70.0.3523.1',
1615 '70.0.3523.0',
1616 '69.0.3497.41',
1617 '68.0.3440.114',
1618 '70.0.3505.7',
1619 '69.0.3497.40',
1620 '70.0.3522.1',
1621 '70.0.3522.0',
1622 '70.0.3521.2',
1623 '69.0.3497.39',
1624 '68.0.3440.113',
1625 '70.0.3505.6',
1626 '70.0.3521.1',
1627 '70.0.3521.0',
1628 '69.0.3497.38',
1629 '68.0.3440.112',
1630 '70.0.3520.1',
1631 '70.0.3520.0',
1632 '69.0.3497.37',
1633 '68.0.3440.111',
1634 '70.0.3519.3',
1635 '70.0.3519.2',
1636 '70.0.3519.1',
1637 '70.0.3519.0',
1638 '69.0.3497.36',
1639 '68.0.3440.110',
1640 '70.0.3518.1',
1641 '70.0.3518.0',
1642 '69.0.3497.35',
1643 '69.0.3497.34',
1644 '68.0.3440.109',
1645 '70.0.3517.1',
1646 '70.0.3517.0',
1647 '69.0.3497.33',
1648 '68.0.3440.108',
1649 '69.0.3497.32',
1650 '70.0.3516.3',
1651 '70.0.3516.2',
1652 '70.0.3516.1',
1653 '70.0.3516.0',
1654 '69.0.3497.31',
1655 '68.0.3440.107',
1656 '70.0.3515.4',
1657 '68.0.3440.106',
1658 '70.0.3515.3',
1659 '70.0.3515.2',
1660 '70.0.3515.1',
1661 '70.0.3515.0',
1662 '69.0.3497.30',
1663 '68.0.3440.105',
1664 '68.0.3440.104',
1665 '70.0.3514.2',
1666 '70.0.3514.1',
1667 '70.0.3514.0',
1668 '69.0.3497.29',
1669 '68.0.3440.103',
1670 '70.0.3513.1',
1671 '70.0.3513.0',
1672 '69.0.3497.28',
1673 )
1674 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1675
1676
1677std_headers = {
1678 'User-Agent': random_user_agent(),
1679 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1680 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1681 'Accept-Encoding': 'gzip, deflate',
1682 'Accept-Language': 'en-us,en;q=0.5',
1683}
1684
1685
1686USER_AGENTS = {
1687 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1688}
1689
1690
1691NO_DEFAULT = object()
1692
1693ENGLISH_MONTH_NAMES = [
1694 'January', 'February', 'March', 'April', 'May', 'June',
1695 'July', 'August', 'September', 'October', 'November', 'December']
1696
1697MONTH_NAMES = {
1698 'en': ENGLISH_MONTH_NAMES,
1699 'fr': [
1700 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1701 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1702}
1703
1704KNOWN_EXTENSIONS = (
1705 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1706 'flv', 'f4v', 'f4a', 'f4b',
1707 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1708 'mkv', 'mka', 'mk3d',
1709 'avi', 'divx',
1710 'mov',
1711 'asf', 'wmv', 'wma',
1712 '3gp', '3g2',
1713 'mp3',
1714 'flac',
1715 'ape',
1716 'wav',
1717 'f4f', 'f4m', 'm3u8', 'smil')
1718
1719REMUX_EXTENSIONS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mp3', 'mka', 'm4a', 'ogg', 'opus')
1720
1721# needed for sanitizing filenames in restricted mode
1722ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1723 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1724 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1725
1726DATE_FORMATS = (
1727 '%d %B %Y',
1728 '%d %b %Y',
1729 '%B %d %Y',
1730 '%B %dst %Y',
1731 '%B %dnd %Y',
1732 '%B %drd %Y',
1733 '%B %dth %Y',
1734 '%b %d %Y',
1735 '%b %dst %Y',
1736 '%b %dnd %Y',
1737 '%b %drd %Y',
1738 '%b %dth %Y',
1739 '%b %dst %Y %I:%M',
1740 '%b %dnd %Y %I:%M',
1741 '%b %drd %Y %I:%M',
1742 '%b %dth %Y %I:%M',
1743 '%Y %m %d',
1744 '%Y-%m-%d',
1745 '%Y/%m/%d',
1746 '%Y/%m/%d %H:%M',
1747 '%Y/%m/%d %H:%M:%S',
1748 '%Y-%m-%d %H:%M',
1749 '%Y-%m-%d %H:%M:%S',
1750 '%Y-%m-%d %H:%M:%S.%f',
1751 '%d.%m.%Y %H:%M',
1752 '%d.%m.%Y %H.%M',
1753 '%Y-%m-%dT%H:%M:%SZ',
1754 '%Y-%m-%dT%H:%M:%S.%fZ',
1755 '%Y-%m-%dT%H:%M:%S.%f0Z',
1756 '%Y-%m-%dT%H:%M:%S',
1757 '%Y-%m-%dT%H:%M:%S.%f',
1758 '%Y-%m-%dT%H:%M',
1759 '%b %d %Y at %H:%M',
1760 '%b %d %Y at %H:%M:%S',
1761 '%B %d %Y at %H:%M',
1762 '%B %d %Y at %H:%M:%S',
1763)
1764
1765DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1766DATE_FORMATS_DAY_FIRST.extend([
1767 '%d-%m-%Y',
1768 '%d.%m.%Y',
1769 '%d.%m.%y',
1770 '%d/%m/%Y',
1771 '%d/%m/%y',
1772 '%d/%m/%Y %H:%M:%S',
1773])
1774
1775DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1776DATE_FORMATS_MONTH_FIRST.extend([
1777 '%m-%d-%Y',
1778 '%m.%d.%Y',
1779 '%m/%d/%Y',
1780 '%m/%d/%y',
1781 '%m/%d/%Y %H:%M:%S',
1782])
1783
1784PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1785JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1786
1787
1788def preferredencoding():
1789 """Get preferred encoding.
1790
1791 Returns the best encoding scheme for the system, based on
1792 locale.getpreferredencoding() and some further tweaks.
1793 """
1794 try:
1795 pref = locale.getpreferredencoding()
1796 'TEST'.encode(pref)
1797 except Exception:
1798 pref = 'UTF-8'
1799
1800 return pref
1801
1802
1803def write_json_file(obj, fn):
1804 """ Encode obj as JSON and write it to fn, atomically if possible """
1805
1806 fn = encodeFilename(fn)
1807 if sys.version_info < (3, 0) and sys.platform != 'win32':
1808 encoding = get_filesystem_encoding()
1809 # os.path.basename returns a bytes object, but NamedTemporaryFile
1810 # will fail if the filename contains non ascii characters unless we
1811 # use a unicode object
1812 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1813 # the same for os.path.dirname
1814 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1815 else:
1816 path_basename = os.path.basename
1817 path_dirname = os.path.dirname
1818
1819 args = {
1820 'suffix': '.tmp',
1821 'prefix': path_basename(fn) + '.',
1822 'dir': path_dirname(fn),
1823 'delete': False,
1824 }
1825
1826 # In Python 2.x, json.dump expects a bytestream.
1827 # In Python 3.x, it writes to a character stream
1828 if sys.version_info < (3, 0):
1829 args['mode'] = 'wb'
1830 else:
1831 args.update({
1832 'mode': 'w',
1833 'encoding': 'utf-8',
1834 })
1835
1836 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1837
1838 try:
1839 with tf:
1840 json.dump(obj, tf, default=repr)
1841 if sys.platform == 'win32':
1842 # Need to remove existing file on Windows, else os.rename raises
1843 # WindowsError or FileExistsError.
1844 try:
1845 os.unlink(fn)
1846 except OSError:
1847 pass
1848 try:
1849 mask = os.umask(0)
1850 os.umask(mask)
1851 os.chmod(tf.name, 0o666 & ~mask)
1852 except OSError:
1853 pass
1854 os.rename(tf.name, fn)
1855 except Exception:
1856 try:
1857 os.remove(tf.name)
1858 except OSError:
1859 pass
1860 raise
1861
1862
1863if sys.version_info >= (2, 7):
1864 def find_xpath_attr(node, xpath, key, val=None):
1865 """ Find the xpath xpath[@key=val] """
1866 assert re.match(r'^[a-zA-Z_-]+$', key)
1867 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1868 return node.find(expr)
1869else:
1870 def find_xpath_attr(node, xpath, key, val=None):
1871 for f in node.findall(compat_xpath(xpath)):
1872 if key not in f.attrib:
1873 continue
1874 if val is None or f.attrib.get(key) == val:
1875 return f
1876 return None
1877
1878# On python2.6 the xml.etree.ElementTree.Element methods don't support
1879# the namespace parameter
1880
1881
1882def xpath_with_ns(path, ns_map):
1883 components = [c.split(':') for c in path.split('/')]
1884 replaced = []
1885 for c in components:
1886 if len(c) == 1:
1887 replaced.append(c[0])
1888 else:
1889 ns, tag = c
1890 replaced.append('{%s}%s' % (ns_map[ns], tag))
1891 return '/'.join(replaced)
1892
1893
1894def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1895 def _find_xpath(xpath):
1896 return node.find(compat_xpath(xpath))
1897
1898 if isinstance(xpath, (str, compat_str)):
1899 n = _find_xpath(xpath)
1900 else:
1901 for xp in xpath:
1902 n = _find_xpath(xp)
1903 if n is not None:
1904 break
1905
1906 if n is None:
1907 if default is not NO_DEFAULT:
1908 return default
1909 elif fatal:
1910 name = xpath if name is None else name
1911 raise ExtractorError('Could not find XML element %s' % name)
1912 else:
1913 return None
1914 return n
1915
1916
1917def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1918 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1919 if n is None or n == default:
1920 return n
1921 if n.text is None:
1922 if default is not NO_DEFAULT:
1923 return default
1924 elif fatal:
1925 name = xpath if name is None else name
1926 raise ExtractorError('Could not find XML element\'s text %s' % name)
1927 else:
1928 return None
1929 return n.text
1930
1931
1932def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1933 n = find_xpath_attr(node, xpath, key)
1934 if n is None:
1935 if default is not NO_DEFAULT:
1936 return default
1937 elif fatal:
1938 name = '%s[@%s]' % (xpath, key) if name is None else name
1939 raise ExtractorError('Could not find XML attribute %s' % name)
1940 else:
1941 return None
1942 return n.attrib[key]
1943
1944
1945def get_element_by_id(id, html):
1946 """Return the content of the tag with the specified ID in the passed HTML document"""
1947 return get_element_by_attribute('id', id, html)
1948
1949
1950def get_element_by_class(class_name, html):
1951 """Return the content of the first tag with the specified class in the passed HTML document"""
1952 retval = get_elements_by_class(class_name, html)
1953 return retval[0] if retval else None
1954
1955
1956def get_element_by_attribute(attribute, value, html, escape_value=True):
1957 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1958 return retval[0] if retval else None
1959
1960
1961def get_elements_by_class(class_name, html):
1962 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1963 return get_elements_by_attribute(
1964 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1965 html, escape_value=False)
1966
1967
1968def get_elements_by_attribute(attribute, value, html, escape_value=True):
1969 """Return the content of the tag with the specified attribute in the passed HTML document"""
1970
1971 value = re.escape(value) if escape_value else value
1972
1973 retlist = []
1974 for m in re.finditer(r'''(?xs)
1975 <([a-zA-Z0-9:._-]+)
1976 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1977 \s+%s=['"]?%s['"]?
1978 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1979 \s*>
1980 (?P<content>.*?)
1981 </\1>
1982 ''' % (re.escape(attribute), value), html):
1983 res = m.group('content')
1984
1985 if res.startswith('"') or res.startswith("'"):
1986 res = res[1:-1]
1987
1988 retlist.append(unescapeHTML(res))
1989
1990 return retlist
1991
1992
1993class HTMLAttributeParser(compat_HTMLParser):
1994 """Trivial HTML parser to gather the attributes for a single element"""
1995
1996 def __init__(self):
1997 self.attrs = {}
1998 compat_HTMLParser.__init__(self)
1999
2000 def handle_starttag(self, tag, attrs):
2001 self.attrs = dict(attrs)
2002
2003
2004def extract_attributes(html_element):
2005 """Given a string for an HTML element such as
2006 <el
2007 a="foo" B="bar" c="&98;az" d=boz
2008 empty= noval entity="&amp;"
2009 sq='"' dq="'"
2010 >
2011 Decode and return a dictionary of attributes.
2012 {
2013 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2014 'empty': '', 'noval': None, 'entity': '&',
2015 'sq': '"', 'dq': '\''
2016 }.
2017 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2018 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2019 """
2020 parser = HTMLAttributeParser()
2021 try:
2022 parser.feed(html_element)
2023 parser.close()
2024 # Older Python may throw HTMLParseError in case of malformed HTML
2025 except compat_HTMLParseError:
2026 pass
2027 return parser.attrs
2028
2029
2030def clean_html(html):
2031 """Clean an HTML snippet into a readable string"""
2032
2033 if html is None: # Convenience for sanitizing descriptions etc.
2034 return html
2035
2036 # Newline vs <br />
2037 html = html.replace('\n', ' ')
2038 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2039 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2040 # Strip html tags
2041 html = re.sub('<.*?>', '', html)
2042 # Replace html entities
2043 html = unescapeHTML(html)
2044 return html.strip()
2045
2046
2047def sanitize_open(filename, open_mode):
2048 """Try to open the given filename, and slightly tweak it if this fails.
2049
2050 Attempts to open the given filename. If this fails, it tries to change
2051 the filename slightly, step by step, until it's either able to open it
2052 or it fails and raises a final exception, like the standard open()
2053 function.
2054
2055 It returns the tuple (stream, definitive_file_name).
2056 """
2057 try:
2058 if filename == '-':
2059 if sys.platform == 'win32':
2060 import msvcrt
2061 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2062 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2063 stream = open(encodeFilename(filename), open_mode)
2064 return (stream, filename)
2065 except (IOError, OSError) as err:
2066 if err.errno in (errno.EACCES,):
2067 raise
2068
2069 # In case of error, try to remove win32 forbidden chars
2070 alt_filename = sanitize_path(filename)
2071 if alt_filename == filename:
2072 raise
2073 else:
2074 # An exception here should be caught in the caller
2075 stream = open(encodeFilename(alt_filename), open_mode)
2076 return (stream, alt_filename)
2077
2078
2079def timeconvert(timestr):
2080 """Convert RFC 2822 defined time string into system timestamp"""
2081 timestamp = None
2082 timetuple = email.utils.parsedate_tz(timestr)
2083 if timetuple is not None:
2084 timestamp = email.utils.mktime_tz(timetuple)
2085 return timestamp
2086
2087
2088def sanitize_filename(s, restricted=False, is_id=False):
2089 """Sanitizes a string so it could be used as part of a filename.
2090 If restricted is set, use a stricter subset of allowed characters.
2091 Set is_id if this is not an arbitrary string, but an ID that should be kept
2092 if possible.
2093 """
2094 def replace_insane(char):
2095 if restricted and char in ACCENT_CHARS:
2096 return ACCENT_CHARS[char]
2097 if char == '?' or ord(char) < 32 or ord(char) == 127:
2098 return ''
2099 elif char == '"':
2100 return '' if restricted else '\''
2101 elif char == ':':
2102 return '_-' if restricted else ' -'
2103 elif char in '\\/|*<>':
2104 return '_'
2105 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2106 return '_'
2107 if restricted and ord(char) > 127:
2108 return '_'
2109 return char
2110
2111 # Handle timestamps
2112 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2113 result = ''.join(map(replace_insane, s))
2114 if not is_id:
2115 while '__' in result:
2116 result = result.replace('__', '_')
2117 result = result.strip('_')
2118 # Common case of "Foreign band name - English song title"
2119 if restricted and result.startswith('-_'):
2120 result = result[2:]
2121 if result.startswith('-'):
2122 result = '_' + result[len('-'):]
2123 result = result.lstrip('.')
2124 if not result:
2125 result = '_'
2126 return result
2127
2128
2129def sanitize_path(s, force=False):
2130 """Sanitizes and normalizes path on Windows"""
2131 if sys.platform == 'win32':
2132 force = False
2133 drive_or_unc, _ = os.path.splitdrive(s)
2134 if sys.version_info < (2, 7) and not drive_or_unc:
2135 drive_or_unc, _ = os.path.splitunc(s)
2136 elif force:
2137 drive_or_unc = ''
2138 else:
2139 return s
2140
2141 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2142 if drive_or_unc:
2143 norm_path.pop(0)
2144 sanitized_path = [
2145 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2146 for path_part in norm_path]
2147 if drive_or_unc:
2148 sanitized_path.insert(0, drive_or_unc + os.path.sep)
2149 elif force and s[0] == os.path.sep:
2150 sanitized_path.insert(0, os.path.sep)
2151 return os.path.join(*sanitized_path)
2152
2153
2154def sanitize_url(url):
2155 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2156 # the number of unwanted failures due to missing protocol
2157 if url.startswith('//'):
2158 return 'http:%s' % url
2159 # Fix some common typos seen so far
2160 COMMON_TYPOS = (
2161 # https://github.com/ytdl-org/youtube-dl/issues/15649
2162 (r'^httpss://', r'https://'),
2163 # https://bx1.be/lives/direct-tv/
2164 (r'^rmtp([es]?)://', r'rtmp\1://'),
2165 )
2166 for mistake, fixup in COMMON_TYPOS:
2167 if re.match(mistake, url):
2168 return re.sub(mistake, fixup, url)
2169 return escape_url(url)
2170
2171
2172def sanitized_Request(url, *args, **kwargs):
2173 return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2174
2175
2176def expand_path(s):
2177 """Expand shell variables and ~"""
2178 return os.path.expandvars(compat_expanduser(s))
2179
2180
2181def orderedSet(iterable):
2182 """ Remove all duplicates from the input iterable """
2183 res = []
2184 for el in iterable:
2185 if el not in res:
2186 res.append(el)
2187 return res
2188
2189
2190def _htmlentity_transform(entity_with_semicolon):
2191 """Transforms an HTML entity to a character."""
2192 entity = entity_with_semicolon[:-1]
2193
2194 # Known non-numeric HTML entity
2195 if entity in compat_html_entities.name2codepoint:
2196 return compat_chr(compat_html_entities.name2codepoint[entity])
2197
2198 # TODO: HTML5 allows entities without a semicolon. For example,
2199 # '&Eacuteric' should be decoded as 'Éric'.
2200 if entity_with_semicolon in compat_html_entities_html5:
2201 return compat_html_entities_html5[entity_with_semicolon]
2202
2203 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2204 if mobj is not None:
2205 numstr = mobj.group(1)
2206 if numstr.startswith('x'):
2207 base = 16
2208 numstr = '0%s' % numstr
2209 else:
2210 base = 10
2211 # See https://github.com/ytdl-org/youtube-dl/issues/7518
2212 try:
2213 return compat_chr(int(numstr, base))
2214 except ValueError:
2215 pass
2216
2217 # Unknown entity in name, return its literal representation
2218 return '&%s;' % entity
2219
2220
2221def unescapeHTML(s):
2222 if s is None:
2223 return None
2224 assert type(s) == compat_str
2225
2226 return re.sub(
2227 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2228
2229
2230def process_communicate_or_kill(p, *args, **kwargs):
2231 try:
2232 return p.communicate(*args, **kwargs)
2233 except BaseException: # Including KeyboardInterrupt
2234 p.kill()
2235 p.wait()
2236 raise
2237
2238
2239def get_subprocess_encoding():
2240 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2241 # For subprocess calls, encode with locale encoding
2242 # Refer to http://stackoverflow.com/a/9951851/35070
2243 encoding = preferredencoding()
2244 else:
2245 encoding = sys.getfilesystemencoding()
2246 if encoding is None:
2247 encoding = 'utf-8'
2248 return encoding
2249
2250
2251def encodeFilename(s, for_subprocess=False):
2252 """
2253 @param s The name of the file
2254 """
2255
2256 assert type(s) == compat_str
2257
2258 # Python 3 has a Unicode API
2259 if sys.version_info >= (3, 0):
2260 return s
2261
2262 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2263 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2264 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2265 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2266 return s
2267
2268 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2269 if sys.platform.startswith('java'):
2270 return s
2271
2272 return s.encode(get_subprocess_encoding(), 'ignore')
2273
2274
2275def decodeFilename(b, for_subprocess=False):
2276
2277 if sys.version_info >= (3, 0):
2278 return b
2279
2280 if not isinstance(b, bytes):
2281 return b
2282
2283 return b.decode(get_subprocess_encoding(), 'ignore')
2284
2285
2286def encodeArgument(s):
2287 if not isinstance(s, compat_str):
2288 # Legacy code that uses byte strings
2289 # Uncomment the following line after fixing all post processors
2290 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2291 s = s.decode('ascii')
2292 return encodeFilename(s, True)
2293
2294
2295def decodeArgument(b):
2296 return decodeFilename(b, True)
2297
2298
2299def decodeOption(optval):
2300 if optval is None:
2301 return optval
2302 if isinstance(optval, bytes):
2303 optval = optval.decode(preferredencoding())
2304
2305 assert isinstance(optval, compat_str)
2306 return optval
2307
2308
2309def formatSeconds(secs, delim=':'):
2310 if secs > 3600:
2311 return '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
2312 elif secs > 60:
2313 return '%d%s%02d' % (secs // 60, delim, secs % 60)
2314 else:
2315 return '%d' % secs
2316
2317
2318def make_HTTPS_handler(params, **kwargs):
2319 opts_no_check_certificate = params.get('nocheckcertificate', False)
2320 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
2321 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2322 if opts_no_check_certificate:
2323 context.check_hostname = False
2324 context.verify_mode = ssl.CERT_NONE
2325 try:
2326 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2327 except TypeError:
2328 # Python 2.7.8
2329 # (create_default_context present but HTTPSHandler has no context=)
2330 pass
2331
2332 if sys.version_info < (3, 2):
2333 return YoutubeDLHTTPSHandler(params, **kwargs)
2334 else: # Python < 3.4
2335 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2336 context.verify_mode = (ssl.CERT_NONE
2337 if opts_no_check_certificate
2338 else ssl.CERT_REQUIRED)
2339 context.set_default_verify_paths()
2340 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2341
2342
2343def bug_reports_message(before=';'):
2344 if ytdl_is_updateable():
2345 update_cmd = 'type yt-dlp -U to update'
2346 else:
2347 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
2348 msg = 'please report this issue on https://github.com/yt-dlp/yt-dlp .'
2349 msg += ' Make sure you are using the latest version; %s.' % update_cmd
2350 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
2351
2352 before = before.rstrip()
2353 if not before or before.endswith(('.', '!', '?')):
2354 msg = msg[0].title() + msg[1:]
2355
2356 return (before + ' ' if before else '') + msg
2357
2358
2359class YoutubeDLError(Exception):
2360 """Base exception for YoutubeDL errors."""
2361 pass
2362
2363
2364network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2365if hasattr(ssl, 'CertificateError'):
2366 network_exceptions.append(ssl.CertificateError)
2367network_exceptions = tuple(network_exceptions)
2368
2369
2370class ExtractorError(YoutubeDLError):
2371 """Error during info extraction."""
2372
2373 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2374 """ tb, if given, is the original traceback (so that it can be printed out).
2375 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
2376 """
2377
2378 if sys.exc_info()[0] in network_exceptions:
2379 expected = True
2380 if video_id is not None:
2381 msg = video_id + ': ' + msg
2382 if cause:
2383 msg += ' (caused by %r)' % cause
2384 if not expected:
2385 msg += bug_reports_message()
2386 super(ExtractorError, self).__init__(msg)
2387
2388 self.traceback = tb
2389 self.exc_info = sys.exc_info() # preserve original exception
2390 self.cause = cause
2391 self.video_id = video_id
2392
2393 def format_traceback(self):
2394 if self.traceback is None:
2395 return None
2396 return ''.join(traceback.format_tb(self.traceback))
2397
2398
2399class UnsupportedError(ExtractorError):
2400 def __init__(self, url):
2401 super(UnsupportedError, self).__init__(
2402 'Unsupported URL: %s' % url, expected=True)
2403 self.url = url
2404
2405
2406class RegexNotFoundError(ExtractorError):
2407 """Error when a regex didn't match"""
2408 pass
2409
2410
2411class GeoRestrictedError(ExtractorError):
2412 """Geographic restriction Error exception.
2413
2414 This exception may be thrown when a video is not available from your
2415 geographic location due to geographic restrictions imposed by a website.
2416 """
2417
2418 def __init__(self, msg, countries=None):
2419 super(GeoRestrictedError, self).__init__(msg, expected=True)
2420 self.msg = msg
2421 self.countries = countries
2422
2423
2424class DownloadError(YoutubeDLError):
2425 """Download Error exception.
2426
2427 This exception may be thrown by FileDownloader objects if they are not
2428 configured to continue on errors. They will contain the appropriate
2429 error message.
2430 """
2431
2432 def __init__(self, msg, exc_info=None):
2433 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2434 super(DownloadError, self).__init__(msg)
2435 self.exc_info = exc_info
2436
2437
2438class EntryNotInPlaylist(YoutubeDLError):
2439 """Entry not in playlist exception.
2440
2441 This exception will be thrown by YoutubeDL when a requested entry
2442 is not found in the playlist info_dict
2443 """
2444 pass
2445
2446
2447class SameFileError(YoutubeDLError):
2448 """Same File exception.
2449
2450 This exception will be thrown by FileDownloader objects if they detect
2451 multiple files would have to be downloaded to the same file on disk.
2452 """
2453 pass
2454
2455
2456class PostProcessingError(YoutubeDLError):
2457 """Post Processing exception.
2458
2459 This exception may be raised by PostProcessor's .run() method to
2460 indicate an error in the postprocessing task.
2461 """
2462
2463 def __init__(self, msg):
2464 super(PostProcessingError, self).__init__(msg)
2465 self.msg = msg
2466
2467
2468class ExistingVideoReached(YoutubeDLError):
2469 """ --max-downloads limit has been reached. """
2470 pass
2471
2472
2473class RejectedVideoReached(YoutubeDLError):
2474 """ --max-downloads limit has been reached. """
2475 pass
2476
2477
2478class MaxDownloadsReached(YoutubeDLError):
2479 """ --max-downloads limit has been reached. """
2480 pass
2481
2482
2483class UnavailableVideoError(YoutubeDLError):
2484 """Unavailable Format exception.
2485
2486 This exception will be thrown when a video is requested
2487 in a format that is not available for that video.
2488 """
2489 pass
2490
2491
2492class ContentTooShortError(YoutubeDLError):
2493 """Content Too Short exception.
2494
2495 This exception may be raised by FileDownloader objects when a file they
2496 download is too small for what the server announced first, indicating
2497 the connection was probably interrupted.
2498 """
2499
2500 def __init__(self, downloaded, expected):
2501 super(ContentTooShortError, self).__init__(
2502 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2503 )
2504 # Both in bytes
2505 self.downloaded = downloaded
2506 self.expected = expected
2507
2508
2509class XAttrMetadataError(YoutubeDLError):
2510 def __init__(self, code=None, msg='Unknown error'):
2511 super(XAttrMetadataError, self).__init__(msg)
2512 self.code = code
2513 self.msg = msg
2514
2515 # Parsing code and msg
2516 if (self.code in (errno.ENOSPC, errno.EDQUOT)
2517 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2518 self.reason = 'NO_SPACE'
2519 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2520 self.reason = 'VALUE_TOO_LONG'
2521 else:
2522 self.reason = 'NOT_SUPPORTED'
2523
2524
2525class XAttrUnavailableError(YoutubeDLError):
2526 pass
2527
2528
2529def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2530 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2531 # expected HTTP responses to meet HTTP/1.0 or later (see also
2532 # https://github.com/ytdl-org/youtube-dl/issues/6727)
2533 if sys.version_info < (3, 0):
2534 kwargs['strict'] = True
2535 hc = http_class(*args, **compat_kwargs(kwargs))
2536 source_address = ydl_handler._params.get('source_address')
2537
2538 if source_address is not None:
2539 # This is to workaround _create_connection() from socket where it will try all
2540 # address data from getaddrinfo() including IPv6. This filters the result from
2541 # getaddrinfo() based on the source_address value.
2542 # This is based on the cpython socket.create_connection() function.
2543 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2544 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2545 host, port = address
2546 err = None
2547 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2548 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2549 ip_addrs = [addr for addr in addrs if addr[0] == af]
2550 if addrs and not ip_addrs:
2551 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2552 raise socket.error(
2553 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2554 % (ip_version, source_address[0]))
2555 for res in ip_addrs:
2556 af, socktype, proto, canonname, sa = res
2557 sock = None
2558 try:
2559 sock = socket.socket(af, socktype, proto)
2560 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2561 sock.settimeout(timeout)
2562 sock.bind(source_address)
2563 sock.connect(sa)
2564 err = None # Explicitly break reference cycle
2565 return sock
2566 except socket.error as _:
2567 err = _
2568 if sock is not None:
2569 sock.close()
2570 if err is not None:
2571 raise err
2572 else:
2573 raise socket.error('getaddrinfo returns an empty list')
2574 if hasattr(hc, '_create_connection'):
2575 hc._create_connection = _create_connection
2576 sa = (source_address, 0)
2577 if hasattr(hc, 'source_address'): # Python 2.7+
2578 hc.source_address = sa
2579 else: # Python 2.6
2580 def _hc_connect(self, *args, **kwargs):
2581 sock = _create_connection(
2582 (self.host, self.port), self.timeout, sa)
2583 if is_https:
2584 self.sock = ssl.wrap_socket(
2585 sock, self.key_file, self.cert_file,
2586 ssl_version=ssl.PROTOCOL_TLSv1)
2587 else:
2588 self.sock = sock
2589 hc.connect = functools.partial(_hc_connect, hc)
2590
2591 return hc
2592
2593
2594def handle_youtubedl_headers(headers):
2595 filtered_headers = headers
2596
2597 if 'Youtubedl-no-compression' in filtered_headers:
2598 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2599 del filtered_headers['Youtubedl-no-compression']
2600
2601 return filtered_headers
2602
2603
2604class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2605 """Handler for HTTP requests and responses.
2606
2607 This class, when installed with an OpenerDirector, automatically adds
2608 the standard headers to every HTTP request and handles gzipped and
2609 deflated responses from web servers. If compression is to be avoided in
2610 a particular request, the original request in the program code only has
2611 to include the HTTP header "Youtubedl-no-compression", which will be
2612 removed before making the real request.
2613
2614 Part of this code was copied from:
2615
2616 http://techknack.net/python-urllib2-handlers/
2617
2618 Andrew Rowls, the author of that code, agreed to release it to the
2619 public domain.
2620 """
2621
2622 def __init__(self, params, *args, **kwargs):
2623 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2624 self._params = params
2625
2626 def http_open(self, req):
2627 conn_class = compat_http_client.HTTPConnection
2628
2629 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2630 if socks_proxy:
2631 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2632 del req.headers['Ytdl-socks-proxy']
2633
2634 return self.do_open(functools.partial(
2635 _create_http_connection, self, conn_class, False),
2636 req)
2637
2638 @staticmethod
2639 def deflate(data):
2640 if not data:
2641 return data
2642 try:
2643 return zlib.decompress(data, -zlib.MAX_WBITS)
2644 except zlib.error:
2645 return zlib.decompress(data)
2646
2647 def http_request(self, req):
2648 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2649 # always respected by websites, some tend to give out URLs with non percent-encoded
2650 # non-ASCII characters (see telemb.py, ard.py [#3412])
2651 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2652 # To work around aforementioned issue we will replace request's original URL with
2653 # percent-encoded one
2654 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2655 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2656 url = req.get_full_url()
2657 url_escaped = escape_url(url)
2658
2659 # Substitute URL if any change after escaping
2660 if url != url_escaped:
2661 req = update_Request(req, url=url_escaped)
2662
2663 for h, v in std_headers.items():
2664 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2665 # The dict keys are capitalized because of this bug by urllib
2666 if h.capitalize() not in req.headers:
2667 req.add_header(h, v)
2668
2669 req.headers = handle_youtubedl_headers(req.headers)
2670
2671 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2672 # Python 2.6 is brain-dead when it comes to fragments
2673 req._Request__original = req._Request__original.partition('#')[0]
2674 req._Request__r_type = req._Request__r_type.partition('#')[0]
2675
2676 return req
2677
2678 def http_response(self, req, resp):
2679 old_resp = resp
2680 # gzip
2681 if resp.headers.get('Content-encoding', '') == 'gzip':
2682 content = resp.read()
2683 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2684 try:
2685 uncompressed = io.BytesIO(gz.read())
2686 except IOError as original_ioerror:
2687 # There may be junk add the end of the file
2688 # See http://stackoverflow.com/q/4928560/35070 for details
2689 for i in range(1, 1024):
2690 try:
2691 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2692 uncompressed = io.BytesIO(gz.read())
2693 except IOError:
2694 continue
2695 break
2696 else:
2697 raise original_ioerror
2698 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2699 resp.msg = old_resp.msg
2700 del resp.headers['Content-encoding']
2701 # deflate
2702 if resp.headers.get('Content-encoding', '') == 'deflate':
2703 gz = io.BytesIO(self.deflate(resp.read()))
2704 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2705 resp.msg = old_resp.msg
2706 del resp.headers['Content-encoding']
2707 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2708 # https://github.com/ytdl-org/youtube-dl/issues/6457).
2709 if 300 <= resp.code < 400:
2710 location = resp.headers.get('Location')
2711 if location:
2712 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2713 if sys.version_info >= (3, 0):
2714 location = location.encode('iso-8859-1').decode('utf-8')
2715 else:
2716 location = location.decode('utf-8')
2717 location_escaped = escape_url(location)
2718 if location != location_escaped:
2719 del resp.headers['Location']
2720 if sys.version_info < (3, 0):
2721 location_escaped = location_escaped.encode('utf-8')
2722 resp.headers['Location'] = location_escaped
2723 return resp
2724
2725 https_request = http_request
2726 https_response = http_response
2727
2728
2729def make_socks_conn_class(base_class, socks_proxy):
2730 assert issubclass(base_class, (
2731 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2732
2733 url_components = compat_urlparse.urlparse(socks_proxy)
2734 if url_components.scheme.lower() == 'socks5':
2735 socks_type = ProxyType.SOCKS5
2736 elif url_components.scheme.lower() in ('socks', 'socks4'):
2737 socks_type = ProxyType.SOCKS4
2738 elif url_components.scheme.lower() == 'socks4a':
2739 socks_type = ProxyType.SOCKS4A
2740
2741 def unquote_if_non_empty(s):
2742 if not s:
2743 return s
2744 return compat_urllib_parse_unquote_plus(s)
2745
2746 proxy_args = (
2747 socks_type,
2748 url_components.hostname, url_components.port or 1080,
2749 True, # Remote DNS
2750 unquote_if_non_empty(url_components.username),
2751 unquote_if_non_empty(url_components.password),
2752 )
2753
2754 class SocksConnection(base_class):
2755 def connect(self):
2756 self.sock = sockssocket()
2757 self.sock.setproxy(*proxy_args)
2758 if type(self.timeout) in (int, float):
2759 self.sock.settimeout(self.timeout)
2760 self.sock.connect((self.host, self.port))
2761
2762 if isinstance(self, compat_http_client.HTTPSConnection):
2763 if hasattr(self, '_context'): # Python > 2.6
2764 self.sock = self._context.wrap_socket(
2765 self.sock, server_hostname=self.host)
2766 else:
2767 self.sock = ssl.wrap_socket(self.sock)
2768
2769 return SocksConnection
2770
2771
2772class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2773 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2774 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2775 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2776 self._params = params
2777
2778 def https_open(self, req):
2779 kwargs = {}
2780 conn_class = self._https_conn_class
2781
2782 if hasattr(self, '_context'): # python > 2.6
2783 kwargs['context'] = self._context
2784 if hasattr(self, '_check_hostname'): # python 3.x
2785 kwargs['check_hostname'] = self._check_hostname
2786
2787 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2788 if socks_proxy:
2789 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2790 del req.headers['Ytdl-socks-proxy']
2791
2792 return self.do_open(functools.partial(
2793 _create_http_connection, self, conn_class, True),
2794 req, **kwargs)
2795
2796
2797class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2798 """
2799 See [1] for cookie file format.
2800
2801 1. https://curl.haxx.se/docs/http-cookies.html
2802 """
2803 _HTTPONLY_PREFIX = '#HttpOnly_'
2804 _ENTRY_LEN = 7
2805 _HEADER = '''# Netscape HTTP Cookie File
2806# This file is generated by yt-dlp. Do not edit.
2807
2808'''
2809 _CookieFileEntry = collections.namedtuple(
2810 'CookieFileEntry',
2811 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2812
2813 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2814 """
2815 Save cookies to a file.
2816
2817 Most of the code is taken from CPython 3.8 and slightly adapted
2818 to support cookie files with UTF-8 in both python 2 and 3.
2819 """
2820 if filename is None:
2821 if self.filename is not None:
2822 filename = self.filename
2823 else:
2824 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2825
2826 # Store session cookies with `expires` set to 0 instead of an empty
2827 # string
2828 for cookie in self:
2829 if cookie.expires is None:
2830 cookie.expires = 0
2831
2832 with io.open(filename, 'w', encoding='utf-8') as f:
2833 f.write(self._HEADER)
2834 now = time.time()
2835 for cookie in self:
2836 if not ignore_discard and cookie.discard:
2837 continue
2838 if not ignore_expires and cookie.is_expired(now):
2839 continue
2840 if cookie.secure:
2841 secure = 'TRUE'
2842 else:
2843 secure = 'FALSE'
2844 if cookie.domain.startswith('.'):
2845 initial_dot = 'TRUE'
2846 else:
2847 initial_dot = 'FALSE'
2848 if cookie.expires is not None:
2849 expires = compat_str(cookie.expires)
2850 else:
2851 expires = ''
2852 if cookie.value is None:
2853 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2854 # with no name, whereas http.cookiejar regards it as a
2855 # cookie with no value.
2856 name = ''
2857 value = cookie.name
2858 else:
2859 name = cookie.name
2860 value = cookie.value
2861 f.write(
2862 '\t'.join([cookie.domain, initial_dot, cookie.path,
2863 secure, expires, name, value]) + '\n')
2864
2865 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2866 """Load cookies from a file."""
2867 if filename is None:
2868 if self.filename is not None:
2869 filename = self.filename
2870 else:
2871 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2872
2873 def prepare_line(line):
2874 if line.startswith(self._HTTPONLY_PREFIX):
2875 line = line[len(self._HTTPONLY_PREFIX):]
2876 # comments and empty lines are fine
2877 if line.startswith('#') or not line.strip():
2878 return line
2879 cookie_list = line.split('\t')
2880 if len(cookie_list) != self._ENTRY_LEN:
2881 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2882 cookie = self._CookieFileEntry(*cookie_list)
2883 if cookie.expires_at and not cookie.expires_at.isdigit():
2884 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2885 return line
2886
2887 cf = io.StringIO()
2888 with io.open(filename, encoding='utf-8') as f:
2889 for line in f:
2890 try:
2891 cf.write(prepare_line(line))
2892 except compat_cookiejar.LoadError as e:
2893 write_string(
2894 'WARNING: skipping cookie file entry due to %s: %r\n'
2895 % (e, line), sys.stderr)
2896 continue
2897 cf.seek(0)
2898 self._really_load(cf, filename, ignore_discard, ignore_expires)
2899 # Session cookies are denoted by either `expires` field set to
2900 # an empty string or 0. MozillaCookieJar only recognizes the former
2901 # (see [1]). So we need force the latter to be recognized as session
2902 # cookies on our own.
2903 # Session cookies may be important for cookies-based authentication,
2904 # e.g. usually, when user does not check 'Remember me' check box while
2905 # logging in on a site, some important cookies are stored as session
2906 # cookies so that not recognizing them will result in failed login.
2907 # 1. https://bugs.python.org/issue17164
2908 for cookie in self:
2909 # Treat `expires=0` cookies as session cookies
2910 if cookie.expires == 0:
2911 cookie.expires = None
2912 cookie.discard = True
2913
2914
2915class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2916 def __init__(self, cookiejar=None):
2917 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2918
2919 def http_response(self, request, response):
2920 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2921 # characters in Set-Cookie HTTP header of last response (see
2922 # https://github.com/ytdl-org/youtube-dl/issues/6769).
2923 # In order to at least prevent crashing we will percent encode Set-Cookie
2924 # header before HTTPCookieProcessor starts processing it.
2925 # if sys.version_info < (3, 0) and response.headers:
2926 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2927 # set_cookie = response.headers.get(set_cookie_header)
2928 # if set_cookie:
2929 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2930 # if set_cookie != set_cookie_escaped:
2931 # del response.headers[set_cookie_header]
2932 # response.headers[set_cookie_header] = set_cookie_escaped
2933 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2934
2935 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2936 https_response = http_response
2937
2938
2939class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2940 """YoutubeDL redirect handler
2941
2942 The code is based on HTTPRedirectHandler implementation from CPython [1].
2943
2944 This redirect handler solves two issues:
2945 - ensures redirect URL is always unicode under python 2
2946 - introduces support for experimental HTTP response status code
2947 308 Permanent Redirect [2] used by some sites [3]
2948
2949 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
2950 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
2951 3. https://github.com/ytdl-org/youtube-dl/issues/28768
2952 """
2953
2954 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
2955
2956 def redirect_request(self, req, fp, code, msg, headers, newurl):
2957 """Return a Request or None in response to a redirect.
2958
2959 This is called by the http_error_30x methods when a
2960 redirection response is received. If a redirection should
2961 take place, return a new Request to allow http_error_30x to
2962 perform the redirect. Otherwise, raise HTTPError if no-one
2963 else should try to handle this url. Return None if you can't
2964 but another Handler might.
2965 """
2966 m = req.get_method()
2967 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
2968 or code in (301, 302, 303) and m == "POST")):
2969 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
2970 # Strictly (according to RFC 2616), 301 or 302 in response to
2971 # a POST MUST NOT cause a redirection without confirmation
2972 # from the user (of urllib.request, in this case). In practice,
2973 # essentially all clients do redirect in this case, so we do
2974 # the same.
2975
2976 # On python 2 urlh.geturl() may sometimes return redirect URL
2977 # as byte string instead of unicode. This workaround allows
2978 # to force it always return unicode.
2979 if sys.version_info[0] < 3:
2980 newurl = compat_str(newurl)
2981
2982 # Be conciliant with URIs containing a space. This is mainly
2983 # redundant with the more complete encoding done in http_error_302(),
2984 # but it is kept for compatibility with other callers.
2985 newurl = newurl.replace(' ', '%20')
2986
2987 CONTENT_HEADERS = ("content-length", "content-type")
2988 # NB: don't use dict comprehension for python 2.6 compatibility
2989 newheaders = dict((k, v) for k, v in req.headers.items()
2990 if k.lower() not in CONTENT_HEADERS)
2991 return compat_urllib_request.Request(
2992 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
2993 unverifiable=True)
2994
2995
2996def extract_timezone(date_str):
2997 m = re.search(
2998 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2999 date_str)
3000 if not m:
3001 timezone = datetime.timedelta()
3002 else:
3003 date_str = date_str[:-len(m.group('tz'))]
3004 if not m.group('sign'):
3005 timezone = datetime.timedelta()
3006 else:
3007 sign = 1 if m.group('sign') == '+' else -1
3008 timezone = datetime.timedelta(
3009 hours=sign * int(m.group('hours')),
3010 minutes=sign * int(m.group('minutes')))
3011 return timezone, date_str
3012
3013
3014def parse_iso8601(date_str, delimiter='T', timezone=None):
3015 """ Return a UNIX timestamp from the given date """
3016
3017 if date_str is None:
3018 return None
3019
3020 date_str = re.sub(r'\.[0-9]+', '', date_str)
3021
3022 if timezone is None:
3023 timezone, date_str = extract_timezone(date_str)
3024
3025 try:
3026 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3027 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3028 return calendar.timegm(dt.timetuple())
3029 except ValueError:
3030 pass
3031
3032
3033def date_formats(day_first=True):
3034 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3035
3036
3037def unified_strdate(date_str, day_first=True):
3038 """Return a string with the date in the format YYYYMMDD"""
3039
3040 if date_str is None:
3041 return None
3042 upload_date = None
3043 # Replace commas
3044 date_str = date_str.replace(',', ' ')
3045 # Remove AM/PM + timezone
3046 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3047 _, date_str = extract_timezone(date_str)
3048
3049 for expression in date_formats(day_first):
3050 try:
3051 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
3052 except ValueError:
3053 pass
3054 if upload_date is None:
3055 timetuple = email.utils.parsedate_tz(date_str)
3056 if timetuple:
3057 try:
3058 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3059 except ValueError:
3060 pass
3061 if upload_date is not None:
3062 return compat_str(upload_date)
3063
3064
3065def unified_timestamp(date_str, day_first=True):
3066 if date_str is None:
3067 return None
3068
3069 date_str = re.sub(r'[,|]', '', date_str)
3070
3071 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
3072 timezone, date_str = extract_timezone(date_str)
3073
3074 # Remove AM/PM + timezone
3075 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3076
3077 # Remove unrecognized timezones from ISO 8601 alike timestamps
3078 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3079 if m:
3080 date_str = date_str[:-len(m.group('tz'))]
3081
3082 # Python only supports microseconds, so remove nanoseconds
3083 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3084 if m:
3085 date_str = m.group(1)
3086
3087 for expression in date_formats(day_first):
3088 try:
3089 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3090 return calendar.timegm(dt.timetuple())
3091 except ValueError:
3092 pass
3093 timetuple = email.utils.parsedate_tz(date_str)
3094 if timetuple:
3095 return calendar.timegm(timetuple) + pm_delta * 3600
3096
3097
3098def determine_ext(url, default_ext='unknown_video'):
3099 if url is None or '.' not in url:
3100 return default_ext
3101 guess = url.partition('?')[0].rpartition('.')[2]
3102 if re.match(r'^[A-Za-z0-9]+$', guess):
3103 return guess
3104 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3105 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3106 return guess.rstrip('/')
3107 else:
3108 return default_ext
3109
3110
3111def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3112 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3113
3114
3115def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
3116 """
3117 Return a datetime object from a string in the format YYYYMMDD or
3118 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3119
3120 format: string date format used to return datetime object from
3121 precision: round the time portion of a datetime object.
3122 auto|microsecond|second|minute|hour|day.
3123 auto: round to the unit provided in date_str (if applicable).
3124 """
3125 auto_precision = False
3126 if precision == 'auto':
3127 auto_precision = True
3128 precision = 'microsecond'
3129 today = datetime_round(datetime.datetime.now(), precision)
3130 if date_str in ('now', 'today'):
3131 return today
3132 if date_str == 'yesterday':
3133 return today - datetime.timedelta(days=1)
3134 match = re.match(
3135 r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3136 date_str)
3137 if match is not None:
3138 start_time = datetime_from_str(match.group('start'), precision, format)
3139 time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
3140 unit = match.group('unit')
3141 if unit == 'month' or unit == 'year':
3142 new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
3143 unit = 'day'
3144 else:
3145 if unit == 'week':
3146 unit = 'day'
3147 time *= 7
3148 delta = datetime.timedelta(**{unit + 's': time})
3149 new_date = start_time + delta
3150 if auto_precision:
3151 return datetime_round(new_date, unit)
3152 return new_date
3153
3154 return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3155
3156
3157def date_from_str(date_str, format='%Y%m%d'):
3158 """
3159 Return a datetime object from a string in the format YYYYMMDD or
3160 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3161
3162 format: string date format used to return datetime object from
3163 """
3164 return datetime_from_str(date_str, precision='microsecond', format=format).date()
3165
3166
3167def datetime_add_months(dt, months):
3168 """Increment/Decrement a datetime object by months."""
3169 month = dt.month + months - 1
3170 year = dt.year + month // 12
3171 month = month % 12 + 1
3172 day = min(dt.day, calendar.monthrange(year, month)[1])
3173 return dt.replace(year, month, day)
3174
3175
3176def datetime_round(dt, precision='day'):
3177 """
3178 Round a datetime object's time to a specific precision
3179 """
3180 if precision == 'microsecond':
3181 return dt
3182
3183 unit_seconds = {
3184 'day': 86400,
3185 'hour': 3600,
3186 'minute': 60,
3187 'second': 1,
3188 }
3189 roundto = lambda x, n: ((x + n / 2) // n) * n
3190 timestamp = calendar.timegm(dt.timetuple())
3191 return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
3192
3193
3194def hyphenate_date(date_str):
3195 """
3196 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3197 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3198 if match is not None:
3199 return '-'.join(match.groups())
3200 else:
3201 return date_str
3202
3203
3204class DateRange(object):
3205 """Represents a time interval between two dates"""
3206
3207 def __init__(self, start=None, end=None):
3208 """start and end must be strings in the format accepted by date"""
3209 if start is not None:
3210 self.start = date_from_str(start)
3211 else:
3212 self.start = datetime.datetime.min.date()
3213 if end is not None:
3214 self.end = date_from_str(end)
3215 else:
3216 self.end = datetime.datetime.max.date()
3217 if self.start > self.end:
3218 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3219
3220 @classmethod
3221 def day(cls, day):
3222 """Returns a range that only contains the given day"""
3223 return cls(day, day)
3224
3225 def __contains__(self, date):
3226 """Check if the date is in the range"""
3227 if not isinstance(date, datetime.date):
3228 date = date_from_str(date)
3229 return self.start <= date <= self.end
3230
3231 def __str__(self):
3232 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3233
3234
3235def platform_name():
3236 """ Returns the platform name as a compat_str """
3237 res = platform.platform()
3238 if isinstance(res, bytes):
3239 res = res.decode(preferredencoding())
3240
3241 assert isinstance(res, compat_str)
3242 return res
3243
3244
3245def _windows_write_string(s, out):
3246 """ Returns True if the string was written using special methods,
3247 False if it has yet to be written out."""
3248 # Adapted from http://stackoverflow.com/a/3259271/35070
3249
3250 import ctypes
3251 import ctypes.wintypes
3252
3253 WIN_OUTPUT_IDS = {
3254 1: -11,
3255 2: -12,
3256 }
3257
3258 try:
3259 fileno = out.fileno()
3260 except AttributeError:
3261 # If the output stream doesn't have a fileno, it's virtual
3262 return False
3263 except io.UnsupportedOperation:
3264 # Some strange Windows pseudo files?
3265 return False
3266 if fileno not in WIN_OUTPUT_IDS:
3267 return False
3268
3269 GetStdHandle = compat_ctypes_WINFUNCTYPE(
3270 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3271 ('GetStdHandle', ctypes.windll.kernel32))
3272 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3273
3274 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3275 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3276 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3277 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3278 written = ctypes.wintypes.DWORD(0)
3279
3280 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3281 FILE_TYPE_CHAR = 0x0002
3282 FILE_TYPE_REMOTE = 0x8000
3283 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3284 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3285 ctypes.POINTER(ctypes.wintypes.DWORD))(
3286 ('GetConsoleMode', ctypes.windll.kernel32))
3287 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3288
3289 def not_a_console(handle):
3290 if handle == INVALID_HANDLE_VALUE or handle is None:
3291 return True
3292 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3293 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3294
3295 if not_a_console(h):
3296 return False
3297
3298 def next_nonbmp_pos(s):
3299 try:
3300 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3301 except StopIteration:
3302 return len(s)
3303
3304 while s:
3305 count = min(next_nonbmp_pos(s), 1024)
3306
3307 ret = WriteConsoleW(
3308 h, s, count if count else 2, ctypes.byref(written), None)
3309 if ret == 0:
3310 raise OSError('Failed to write string')
3311 if not count: # We just wrote a non-BMP character
3312 assert written.value == 2
3313 s = s[1:]
3314 else:
3315 assert written.value > 0
3316 s = s[written.value:]
3317 return True
3318
3319
3320def write_string(s, out=None, encoding=None):
3321 if out is None:
3322 out = sys.stderr
3323 assert type(s) == compat_str
3324
3325 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3326 if _windows_write_string(s, out):
3327 return
3328
3329 if ('b' in getattr(out, 'mode', '')
3330 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
3331 byt = s.encode(encoding or preferredencoding(), 'ignore')
3332 out.write(byt)
3333 elif hasattr(out, 'buffer'):
3334 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3335 byt = s.encode(enc, 'ignore')
3336 out.buffer.write(byt)
3337 else:
3338 out.write(s)
3339 out.flush()
3340
3341
3342def bytes_to_intlist(bs):
3343 if not bs:
3344 return []
3345 if isinstance(bs[0], int): # Python 3
3346 return list(bs)
3347 else:
3348 return [ord(c) for c in bs]
3349
3350
3351def intlist_to_bytes(xs):
3352 if not xs:
3353 return b''
3354 return compat_struct_pack('%dB' % len(xs), *xs)
3355
3356
3357# Cross-platform file locking
3358if sys.platform == 'win32':
3359 import ctypes.wintypes
3360 import msvcrt
3361
3362 class OVERLAPPED(ctypes.Structure):
3363 _fields_ = [
3364 ('Internal', ctypes.wintypes.LPVOID),
3365 ('InternalHigh', ctypes.wintypes.LPVOID),
3366 ('Offset', ctypes.wintypes.DWORD),
3367 ('OffsetHigh', ctypes.wintypes.DWORD),
3368 ('hEvent', ctypes.wintypes.HANDLE),
3369 ]
3370
3371 kernel32 = ctypes.windll.kernel32
3372 LockFileEx = kernel32.LockFileEx
3373 LockFileEx.argtypes = [
3374 ctypes.wintypes.HANDLE, # hFile
3375 ctypes.wintypes.DWORD, # dwFlags
3376 ctypes.wintypes.DWORD, # dwReserved
3377 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3378 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3379 ctypes.POINTER(OVERLAPPED) # Overlapped
3380 ]
3381 LockFileEx.restype = ctypes.wintypes.BOOL
3382 UnlockFileEx = kernel32.UnlockFileEx
3383 UnlockFileEx.argtypes = [
3384 ctypes.wintypes.HANDLE, # hFile
3385 ctypes.wintypes.DWORD, # dwReserved
3386 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3387 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3388 ctypes.POINTER(OVERLAPPED) # Overlapped
3389 ]
3390 UnlockFileEx.restype = ctypes.wintypes.BOOL
3391 whole_low = 0xffffffff
3392 whole_high = 0x7fffffff
3393
3394 def _lock_file(f, exclusive):
3395 overlapped = OVERLAPPED()
3396 overlapped.Offset = 0
3397 overlapped.OffsetHigh = 0
3398 overlapped.hEvent = 0
3399 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3400 handle = msvcrt.get_osfhandle(f.fileno())
3401 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3402 whole_low, whole_high, f._lock_file_overlapped_p):
3403 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3404
3405 def _unlock_file(f):
3406 assert f._lock_file_overlapped_p
3407 handle = msvcrt.get_osfhandle(f.fileno())
3408 if not UnlockFileEx(handle, 0,
3409 whole_low, whole_high, f._lock_file_overlapped_p):
3410 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3411
3412else:
3413 # Some platforms, such as Jython, is missing fcntl
3414 try:
3415 import fcntl
3416
3417 def _lock_file(f, exclusive):
3418 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3419
3420 def _unlock_file(f):
3421 fcntl.flock(f, fcntl.LOCK_UN)
3422 except ImportError:
3423 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3424
3425 def _lock_file(f, exclusive):
3426 raise IOError(UNSUPPORTED_MSG)
3427
3428 def _unlock_file(f):
3429 raise IOError(UNSUPPORTED_MSG)
3430
3431
3432class locked_file(object):
3433 def __init__(self, filename, mode, encoding=None):
3434 assert mode in ['r', 'a', 'w']
3435 self.f = io.open(filename, mode, encoding=encoding)
3436 self.mode = mode
3437
3438 def __enter__(self):
3439 exclusive = self.mode != 'r'
3440 try:
3441 _lock_file(self.f, exclusive)
3442 except IOError:
3443 self.f.close()
3444 raise
3445 return self
3446
3447 def __exit__(self, etype, value, traceback):
3448 try:
3449 _unlock_file(self.f)
3450 finally:
3451 self.f.close()
3452
3453 def __iter__(self):
3454 return iter(self.f)
3455
3456 def write(self, *args):
3457 return self.f.write(*args)
3458
3459 def read(self, *args):
3460 return self.f.read(*args)
3461
3462
3463def get_filesystem_encoding():
3464 encoding = sys.getfilesystemencoding()
3465 return encoding if encoding is not None else 'utf-8'
3466
3467
3468def shell_quote(args):
3469 quoted_args = []
3470 encoding = get_filesystem_encoding()
3471 for a in args:
3472 if isinstance(a, bytes):
3473 # We may get a filename encoded with 'encodeFilename'
3474 a = a.decode(encoding)
3475 quoted_args.append(compat_shlex_quote(a))
3476 return ' '.join(quoted_args)
3477
3478
3479def smuggle_url(url, data):
3480 """ Pass additional data in a URL for internal use. """
3481
3482 url, idata = unsmuggle_url(url, {})
3483 data.update(idata)
3484 sdata = compat_urllib_parse_urlencode(
3485 {'__youtubedl_smuggle': json.dumps(data)})
3486 return url + '#' + sdata
3487
3488
3489def unsmuggle_url(smug_url, default=None):
3490 if '#__youtubedl_smuggle' not in smug_url:
3491 return smug_url, default
3492 url, _, sdata = smug_url.rpartition('#')
3493 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3494 data = json.loads(jsond)
3495 return url, data
3496
3497
3498def format_bytes(bytes):
3499 if bytes is None:
3500 return 'N/A'
3501 if type(bytes) is str:
3502 bytes = float(bytes)
3503 if bytes == 0.0:
3504 exponent = 0
3505 else:
3506 exponent = int(math.log(bytes, 1024.0))
3507 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3508 converted = float(bytes) / float(1024 ** exponent)
3509 return '%.2f%s' % (converted, suffix)
3510
3511
3512def lookup_unit_table(unit_table, s):
3513 units_re = '|'.join(re.escape(u) for u in unit_table)
3514 m = re.match(
3515 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3516 if not m:
3517 return None
3518 num_str = m.group('num').replace(',', '.')
3519 mult = unit_table[m.group('unit')]
3520 return int(float(num_str) * mult)
3521
3522
3523def parse_filesize(s):
3524 if s is None:
3525 return None
3526
3527 # The lower-case forms are of course incorrect and unofficial,
3528 # but we support those too
3529 _UNIT_TABLE = {
3530 'B': 1,
3531 'b': 1,
3532 'bytes': 1,
3533 'KiB': 1024,
3534 'KB': 1000,
3535 'kB': 1024,
3536 'Kb': 1000,
3537 'kb': 1000,
3538 'kilobytes': 1000,
3539 'kibibytes': 1024,
3540 'MiB': 1024 ** 2,
3541 'MB': 1000 ** 2,
3542 'mB': 1024 ** 2,
3543 'Mb': 1000 ** 2,
3544 'mb': 1000 ** 2,
3545 'megabytes': 1000 ** 2,
3546 'mebibytes': 1024 ** 2,
3547 'GiB': 1024 ** 3,
3548 'GB': 1000 ** 3,
3549 'gB': 1024 ** 3,
3550 'Gb': 1000 ** 3,
3551 'gb': 1000 ** 3,
3552 'gigabytes': 1000 ** 3,
3553 'gibibytes': 1024 ** 3,
3554 'TiB': 1024 ** 4,
3555 'TB': 1000 ** 4,
3556 'tB': 1024 ** 4,
3557 'Tb': 1000 ** 4,
3558 'tb': 1000 ** 4,
3559 'terabytes': 1000 ** 4,
3560 'tebibytes': 1024 ** 4,
3561 'PiB': 1024 ** 5,
3562 'PB': 1000 ** 5,
3563 'pB': 1024 ** 5,
3564 'Pb': 1000 ** 5,
3565 'pb': 1000 ** 5,
3566 'petabytes': 1000 ** 5,
3567 'pebibytes': 1024 ** 5,
3568 'EiB': 1024 ** 6,
3569 'EB': 1000 ** 6,
3570 'eB': 1024 ** 6,
3571 'Eb': 1000 ** 6,
3572 'eb': 1000 ** 6,
3573 'exabytes': 1000 ** 6,
3574 'exbibytes': 1024 ** 6,
3575 'ZiB': 1024 ** 7,
3576 'ZB': 1000 ** 7,
3577 'zB': 1024 ** 7,
3578 'Zb': 1000 ** 7,
3579 'zb': 1000 ** 7,
3580 'zettabytes': 1000 ** 7,
3581 'zebibytes': 1024 ** 7,
3582 'YiB': 1024 ** 8,
3583 'YB': 1000 ** 8,
3584 'yB': 1024 ** 8,
3585 'Yb': 1000 ** 8,
3586 'yb': 1000 ** 8,
3587 'yottabytes': 1000 ** 8,
3588 'yobibytes': 1024 ** 8,
3589 }
3590
3591 return lookup_unit_table(_UNIT_TABLE, s)
3592
3593
3594def parse_count(s):
3595 if s is None:
3596 return None
3597
3598 s = s.strip()
3599
3600 if re.match(r'^[\d,.]+$', s):
3601 return str_to_int(s)
3602
3603 _UNIT_TABLE = {
3604 'k': 1000,
3605 'K': 1000,
3606 'm': 1000 ** 2,
3607 'M': 1000 ** 2,
3608 'kk': 1000 ** 2,
3609 'KK': 1000 ** 2,
3610 }
3611
3612 return lookup_unit_table(_UNIT_TABLE, s)
3613
3614
3615def parse_resolution(s):
3616 if s is None:
3617 return {}
3618
3619 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3620 if mobj:
3621 return {
3622 'width': int(mobj.group('w')),
3623 'height': int(mobj.group('h')),
3624 }
3625
3626 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3627 if mobj:
3628 return {'height': int(mobj.group(1))}
3629
3630 mobj = re.search(r'\b([48])[kK]\b', s)
3631 if mobj:
3632 return {'height': int(mobj.group(1)) * 540}
3633
3634 return {}
3635
3636
3637def parse_bitrate(s):
3638 if not isinstance(s, compat_str):
3639 return
3640 mobj = re.search(r'\b(\d+)\s*kbps', s)
3641 if mobj:
3642 return int(mobj.group(1))
3643
3644
3645def month_by_name(name, lang='en'):
3646 """ Return the number of a month by (locale-independently) English name """
3647
3648 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3649
3650 try:
3651 return month_names.index(name) + 1
3652 except ValueError:
3653 return None
3654
3655
3656def month_by_abbreviation(abbrev):
3657 """ Return the number of a month by (locale-independently) English
3658 abbreviations """
3659
3660 try:
3661 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3662 except ValueError:
3663 return None
3664
3665
3666def fix_xml_ampersands(xml_str):
3667 """Replace all the '&' by '&amp;' in XML"""
3668 return re.sub(
3669 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3670 '&amp;',
3671 xml_str)
3672
3673
3674def setproctitle(title):
3675 assert isinstance(title, compat_str)
3676
3677 # ctypes in Jython is not complete
3678 # http://bugs.jython.org/issue2148
3679 if sys.platform.startswith('java'):
3680 return
3681
3682 try:
3683 libc = ctypes.cdll.LoadLibrary('libc.so.6')
3684 except OSError:
3685 return
3686 except TypeError:
3687 # LoadLibrary in Windows Python 2.7.13 only expects
3688 # a bytestring, but since unicode_literals turns
3689 # every string into a unicode string, it fails.
3690 return
3691 title_bytes = title.encode('utf-8')
3692 buf = ctypes.create_string_buffer(len(title_bytes))
3693 buf.value = title_bytes
3694 try:
3695 libc.prctl(15, buf, 0, 0, 0)
3696 except AttributeError:
3697 return # Strange libc, just skip this
3698
3699
3700def remove_start(s, start):
3701 return s[len(start):] if s is not None and s.startswith(start) else s
3702
3703
3704def remove_end(s, end):
3705 return s[:-len(end)] if s is not None and s.endswith(end) else s
3706
3707
3708def remove_quotes(s):
3709 if s is None or len(s) < 2:
3710 return s
3711 for quote in ('"', "'", ):
3712 if s[0] == quote and s[-1] == quote:
3713 return s[1:-1]
3714 return s
3715
3716
3717def get_domain(url):
3718 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3719 return domain.group('domain') if domain else None
3720
3721
3722def url_basename(url):
3723 path = compat_urlparse.urlparse(url).path
3724 return path.strip('/').split('/')[-1]
3725
3726
3727def base_url(url):
3728 return re.match(r'https?://[^?#&]+/', url).group()
3729
3730
3731def urljoin(base, path):
3732 if isinstance(path, bytes):
3733 path = path.decode('utf-8')
3734 if not isinstance(path, compat_str) or not path:
3735 return None
3736 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3737 return path
3738 if isinstance(base, bytes):
3739 base = base.decode('utf-8')
3740 if not isinstance(base, compat_str) or not re.match(
3741 r'^(?:https?:)?//', base):
3742 return None
3743 return compat_urlparse.urljoin(base, path)
3744
3745
3746class HEADRequest(compat_urllib_request.Request):
3747 def get_method(self):
3748 return 'HEAD'
3749
3750
3751class PUTRequest(compat_urllib_request.Request):
3752 def get_method(self):
3753 return 'PUT'
3754
3755
3756def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3757 if get_attr:
3758 if v is not None:
3759 v = getattr(v, get_attr, None)
3760 if v == '':
3761 v = None
3762 if v is None:
3763 return default
3764 try:
3765 return int(v) * invscale // scale
3766 except (ValueError, TypeError):
3767 return default
3768
3769
3770def str_or_none(v, default=None):
3771 return default if v is None else compat_str(v)
3772
3773
3774def str_to_int(int_str):
3775 """ A more relaxed version of int_or_none """
3776 if isinstance(int_str, compat_integer_types):
3777 return int_str
3778 elif isinstance(int_str, compat_str):
3779 int_str = re.sub(r'[,\.\+]', '', int_str)
3780 return int_or_none(int_str)
3781
3782
3783def float_or_none(v, scale=1, invscale=1, default=None):
3784 if v is None:
3785 return default
3786 try:
3787 return float(v) * invscale / scale
3788 except (ValueError, TypeError):
3789 return default
3790
3791
3792def bool_or_none(v, default=None):
3793 return v if isinstance(v, bool) else default
3794
3795
3796def strip_or_none(v, default=None):
3797 return v.strip() if isinstance(v, compat_str) else default
3798
3799
3800def url_or_none(url):
3801 if not url or not isinstance(url, compat_str):
3802 return None
3803 url = url.strip()
3804 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3805
3806
3807def strftime_or_none(timestamp, date_format, default=None):
3808 datetime_object = None
3809 try:
3810 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3811 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3812 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3813 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3814 return datetime_object.strftime(date_format)
3815 except (ValueError, TypeError, AttributeError):
3816 return default
3817
3818
3819def parse_duration(s):
3820 if not isinstance(s, compat_basestring):
3821 return None
3822
3823 s = s.strip()
3824
3825 days, hours, mins, secs, ms = [None] * 5
3826 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3827 if m:
3828 days, hours, mins, secs, ms = m.groups()
3829 else:
3830 m = re.match(
3831 r'''(?ix)(?:P?
3832 (?:
3833 [0-9]+\s*y(?:ears?)?\s*
3834 )?
3835 (?:
3836 [0-9]+\s*m(?:onths?)?\s*
3837 )?
3838 (?:
3839 [0-9]+\s*w(?:eeks?)?\s*
3840 )?
3841 (?:
3842 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3843 )?
3844 T)?
3845 (?:
3846 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3847 )?
3848 (?:
3849 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3850 )?
3851 (?:
3852 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3853 )?Z?$''', s)
3854 if m:
3855 days, hours, mins, secs, ms = m.groups()
3856 else:
3857 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3858 if m:
3859 hours, mins = m.groups()
3860 else:
3861 return None
3862
3863 duration = 0
3864 if secs:
3865 duration += float(secs)
3866 if mins:
3867 duration += float(mins) * 60
3868 if hours:
3869 duration += float(hours) * 60 * 60
3870 if days:
3871 duration += float(days) * 24 * 60 * 60
3872 if ms:
3873 duration += float(ms)
3874 return duration
3875
3876
3877def prepend_extension(filename, ext, expected_real_ext=None):
3878 name, real_ext = os.path.splitext(filename)
3879 return (
3880 '{0}.{1}{2}'.format(name, ext, real_ext)
3881 if not expected_real_ext or real_ext[1:] == expected_real_ext
3882 else '{0}.{1}'.format(filename, ext))
3883
3884
3885def replace_extension(filename, ext, expected_real_ext=None):
3886 name, real_ext = os.path.splitext(filename)
3887 return '{0}.{1}'.format(
3888 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3889 ext)
3890
3891
3892def check_executable(exe, args=[]):
3893 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3894 args can be a list of arguments for a short output (like -version) """
3895 try:
3896 process_communicate_or_kill(subprocess.Popen(
3897 [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
3898 except OSError:
3899 return False
3900 return exe
3901
3902
3903def get_exe_version(exe, args=['--version'],
3904 version_re=None, unrecognized='present'):
3905 """ Returns the version of the specified executable,
3906 or False if the executable is not present """
3907 try:
3908 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3909 # SIGTTOU if yt-dlp is run in the background.
3910 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3911 out, _ = process_communicate_or_kill(subprocess.Popen(
3912 [encodeArgument(exe)] + args,
3913 stdin=subprocess.PIPE,
3914 stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
3915 except OSError:
3916 return False
3917 if isinstance(out, bytes): # Python 2.x
3918 out = out.decode('ascii', 'ignore')
3919 return detect_exe_version(out, version_re, unrecognized)
3920
3921
3922def detect_exe_version(output, version_re=None, unrecognized='present'):
3923 assert isinstance(output, compat_str)
3924 if version_re is None:
3925 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3926 m = re.search(version_re, output)
3927 if m:
3928 return m.group(1)
3929 else:
3930 return unrecognized
3931
3932
3933class PagedList(object):
3934 def __len__(self):
3935 # This is only useful for tests
3936 return len(self.getslice())
3937
3938
3939class OnDemandPagedList(PagedList):
3940 def __init__(self, pagefunc, pagesize, use_cache=True):
3941 self._pagefunc = pagefunc
3942 self._pagesize = pagesize
3943 self._use_cache = use_cache
3944 if use_cache:
3945 self._cache = {}
3946
3947 def getslice(self, start=0, end=None):
3948 res = []
3949 for pagenum in itertools.count(start // self._pagesize):
3950 firstid = pagenum * self._pagesize
3951 nextfirstid = pagenum * self._pagesize + self._pagesize
3952 if start >= nextfirstid:
3953 continue
3954
3955 page_results = None
3956 if self._use_cache:
3957 page_results = self._cache.get(pagenum)
3958 if page_results is None:
3959 page_results = list(self._pagefunc(pagenum))
3960 if self._use_cache:
3961 self._cache[pagenum] = page_results
3962
3963 startv = (
3964 start % self._pagesize
3965 if firstid <= start < nextfirstid
3966 else 0)
3967
3968 endv = (
3969 ((end - 1) % self._pagesize) + 1
3970 if (end is not None and firstid <= end <= nextfirstid)
3971 else None)
3972
3973 if startv != 0 or endv is not None:
3974 page_results = page_results[startv:endv]
3975 res.extend(page_results)
3976
3977 # A little optimization - if current page is not "full", ie. does
3978 # not contain page_size videos then we can assume that this page
3979 # is the last one - there are no more ids on further pages -
3980 # i.e. no need to query again.
3981 if len(page_results) + startv < self._pagesize:
3982 break
3983
3984 # If we got the whole page, but the next page is not interesting,
3985 # break out early as well
3986 if end == nextfirstid:
3987 break
3988 return res
3989
3990
3991class InAdvancePagedList(PagedList):
3992 def __init__(self, pagefunc, pagecount, pagesize):
3993 self._pagefunc = pagefunc
3994 self._pagecount = pagecount
3995 self._pagesize = pagesize
3996
3997 def getslice(self, start=0, end=None):
3998 res = []
3999 start_page = start // self._pagesize
4000 end_page = (
4001 self._pagecount if end is None else (end // self._pagesize + 1))
4002 skip_elems = start - start_page * self._pagesize
4003 only_more = None if end is None else end - start
4004 for pagenum in range(start_page, end_page):
4005 page = list(self._pagefunc(pagenum))
4006 if skip_elems:
4007 page = page[skip_elems:]
4008 skip_elems = None
4009 if only_more is not None:
4010 if len(page) < only_more:
4011 only_more -= len(page)
4012 else:
4013 page = page[:only_more]
4014 res.extend(page)
4015 break
4016 res.extend(page)
4017 return res
4018
4019
4020def uppercase_escape(s):
4021 unicode_escape = codecs.getdecoder('unicode_escape')
4022 return re.sub(
4023 r'\\U[0-9a-fA-F]{8}',
4024 lambda m: unicode_escape(m.group(0))[0],
4025 s)
4026
4027
4028def lowercase_escape(s):
4029 unicode_escape = codecs.getdecoder('unicode_escape')
4030 return re.sub(
4031 r'\\u[0-9a-fA-F]{4}',
4032 lambda m: unicode_escape(m.group(0))[0],
4033 s)
4034
4035
4036def escape_rfc3986(s):
4037 """Escape non-ASCII characters as suggested by RFC 3986"""
4038 if sys.version_info < (3, 0) and isinstance(s, compat_str):
4039 s = s.encode('utf-8')
4040 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
4041
4042
4043def escape_url(url):
4044 """Escape URL as suggested by RFC 3986"""
4045 url_parsed = compat_urllib_parse_urlparse(url)
4046 return url_parsed._replace(
4047 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
4048 path=escape_rfc3986(url_parsed.path),
4049 params=escape_rfc3986(url_parsed.params),
4050 query=escape_rfc3986(url_parsed.query),
4051 fragment=escape_rfc3986(url_parsed.fragment)
4052 ).geturl()
4053
4054
4055def read_batch_urls(batch_fd):
4056 def fixup(url):
4057 if not isinstance(url, compat_str):
4058 url = url.decode('utf-8', 'replace')
4059 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4060 for bom in BOM_UTF8:
4061 if url.startswith(bom):
4062 url = url[len(bom):]
4063 url = url.lstrip()
4064 if not url or url.startswith(('#', ';', ']')):
4065 return False
4066 # "#" cannot be stripped out since it is part of the URI
4067 # However, it can be safely stipped out if follwing a whitespace
4068 return re.split(r'\s#', url, 1)[0].rstrip()
4069
4070 with contextlib.closing(batch_fd) as fd:
4071 return [url for url in map(fixup, fd) if url]
4072
4073
4074def urlencode_postdata(*args, **kargs):
4075 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
4076
4077
4078def update_url_query(url, query):
4079 if not query:
4080 return url
4081 parsed_url = compat_urlparse.urlparse(url)
4082 qs = compat_parse_qs(parsed_url.query)
4083 qs.update(query)
4084 return compat_urlparse.urlunparse(parsed_url._replace(
4085 query=compat_urllib_parse_urlencode(qs, True)))
4086
4087
4088def update_Request(req, url=None, data=None, headers={}, query={}):
4089 req_headers = req.headers.copy()
4090 req_headers.update(headers)
4091 req_data = data or req.data
4092 req_url = update_url_query(url or req.get_full_url(), query)
4093 req_get_method = req.get_method()
4094 if req_get_method == 'HEAD':
4095 req_type = HEADRequest
4096 elif req_get_method == 'PUT':
4097 req_type = PUTRequest
4098 else:
4099 req_type = compat_urllib_request.Request
4100 new_req = req_type(
4101 req_url, data=req_data, headers=req_headers,
4102 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4103 if hasattr(req, 'timeout'):
4104 new_req.timeout = req.timeout
4105 return new_req
4106
4107
4108def _multipart_encode_impl(data, boundary):
4109 content_type = 'multipart/form-data; boundary=%s' % boundary
4110
4111 out = b''
4112 for k, v in data.items():
4113 out += b'--' + boundary.encode('ascii') + b'\r\n'
4114 if isinstance(k, compat_str):
4115 k = k.encode('utf-8')
4116 if isinstance(v, compat_str):
4117 v = v.encode('utf-8')
4118 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4119 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4120 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
4121 if boundary.encode('ascii') in content:
4122 raise ValueError('Boundary overlaps with data')
4123 out += content
4124
4125 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4126
4127 return out, content_type
4128
4129
4130def multipart_encode(data, boundary=None):
4131 '''
4132 Encode a dict to RFC 7578-compliant form-data
4133
4134 data:
4135 A dict where keys and values can be either Unicode or bytes-like
4136 objects.
4137 boundary:
4138 If specified a Unicode object, it's used as the boundary. Otherwise
4139 a random boundary is generated.
4140
4141 Reference: https://tools.ietf.org/html/rfc7578
4142 '''
4143 has_specified_boundary = boundary is not None
4144
4145 while True:
4146 if boundary is None:
4147 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4148
4149 try:
4150 out, content_type = _multipart_encode_impl(data, boundary)
4151 break
4152 except ValueError:
4153 if has_specified_boundary:
4154 raise
4155 boundary = None
4156
4157 return out, content_type
4158
4159
4160def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4161 if isinstance(key_or_keys, (list, tuple)):
4162 for key in key_or_keys:
4163 if key not in d or d[key] is None or skip_false_values and not d[key]:
4164 continue
4165 return d[key]
4166 return default
4167 return d.get(key_or_keys, default)
4168
4169
4170def try_get(src, getter, expected_type=None):
4171 if not isinstance(getter, (list, tuple)):
4172 getter = [getter]
4173 for get in getter:
4174 try:
4175 v = get(src)
4176 except (AttributeError, KeyError, TypeError, IndexError):
4177 pass
4178 else:
4179 if expected_type is None or isinstance(v, expected_type):
4180 return v
4181
4182
4183def merge_dicts(*dicts):
4184 merged = {}
4185 for a_dict in dicts:
4186 for k, v in a_dict.items():
4187 if v is None:
4188 continue
4189 if (k not in merged
4190 or (isinstance(v, compat_str) and v
4191 and isinstance(merged[k], compat_str)
4192 and not merged[k])):
4193 merged[k] = v
4194 return merged
4195
4196
4197def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4198 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4199
4200
4201US_RATINGS = {
4202 'G': 0,
4203 'PG': 10,
4204 'PG-13': 13,
4205 'R': 16,
4206 'NC': 18,
4207}
4208
4209
4210TV_PARENTAL_GUIDELINES = {
4211 'TV-Y': 0,
4212 'TV-Y7': 7,
4213 'TV-G': 0,
4214 'TV-PG': 0,
4215 'TV-14': 14,
4216 'TV-MA': 17,
4217}
4218
4219
4220def parse_age_limit(s):
4221 if type(s) == int:
4222 return s if 0 <= s <= 21 else None
4223 if not isinstance(s, compat_basestring):
4224 return None
4225 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4226 if m:
4227 return int(m.group('age'))
4228 s = s.upper()
4229 if s in US_RATINGS:
4230 return US_RATINGS[s]
4231 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4232 if m:
4233 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4234 return None
4235
4236
4237def strip_jsonp(code):
4238 return re.sub(
4239 r'''(?sx)^
4240 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4241 (?:\s*&&\s*(?P=func_name))?
4242 \s*\(\s*(?P<callback_data>.*)\);?
4243 \s*?(?://[^\n]*)*$''',
4244 r'\g<callback_data>', code)
4245
4246
4247def js_to_json(code, vars={}):
4248 # vars is a dict of var, val pairs to substitute
4249 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4250 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4251 INTEGER_TABLE = (
4252 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4253 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4254 )
4255
4256 def fix_kv(m):
4257 v = m.group(0)
4258 if v in ('true', 'false', 'null'):
4259 return v
4260 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4261 return ""
4262
4263 if v[0] in ("'", '"'):
4264 v = re.sub(r'(?s)\\.|"', lambda m: {
4265 '"': '\\"',
4266 "\\'": "'",
4267 '\\\n': '',
4268 '\\x': '\\u00',
4269 }.get(m.group(0), m.group(0)), v[1:-1])
4270 else:
4271 for regex, base in INTEGER_TABLE:
4272 im = re.match(regex, v)
4273 if im:
4274 i = int(im.group(1), base)
4275 return '"%d":' % i if v.endswith(':') else '%d' % i
4276
4277 if v in vars:
4278 return vars[v]
4279
4280 return '"%s"' % v
4281
4282 return re.sub(r'''(?sx)
4283 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4284 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4285 {comment}|,(?={skip}[\]}}])|
4286 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4287 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4288 [0-9]+(?={skip}:)|
4289 !+
4290 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4291
4292
4293def qualities(quality_ids):
4294 """ Get a numeric quality value out of a list of possible values """
4295 def q(qid):
4296 try:
4297 return quality_ids.index(qid)
4298 except ValueError:
4299 return -1
4300 return q
4301
4302
4303DEFAULT_OUTTMPL = {
4304 'default': '%(title)s [%(id)s].%(ext)s',
4305 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
4306}
4307OUTTMPL_TYPES = {
4308 'chapter': None,
4309 'subtitle': None,
4310 'thumbnail': None,
4311 'description': 'description',
4312 'annotation': 'annotations.xml',
4313 'infojson': 'info.json',
4314 'pl_thumbnail': None,
4315 'pl_description': 'description',
4316 'pl_infojson': 'info.json',
4317}
4318
4319# As of [1] format syntax is:
4320# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4321# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
4322FORMAT_RE = r'''(?x)
4323 (?<!%)
4324 %
4325 \({0}\) # mapping key
4326 (?:[#0\-+ ]+)? # conversion flags (optional)
4327 (?:\d+)? # minimum field width (optional)
4328 (?:\.\d+)? # precision (optional)
4329 [hlL]? # length modifier (optional)
4330 (?P<type>[diouxXeEfFgGcrs%]) # conversion type
4331'''
4332
4333
4334def limit_length(s, length):
4335 """ Add ellipses to overly long strings """
4336 if s is None:
4337 return None
4338 ELLIPSES = '...'
4339 if len(s) > length:
4340 return s[:length - len(ELLIPSES)] + ELLIPSES
4341 return s
4342
4343
4344def version_tuple(v):
4345 return tuple(int(e) for e in re.split(r'[-.]', v))
4346
4347
4348def is_outdated_version(version, limit, assume_new=True):
4349 if not version:
4350 return not assume_new
4351 try:
4352 return version_tuple(version) < version_tuple(limit)
4353 except ValueError:
4354 return not assume_new
4355
4356
4357def ytdl_is_updateable():
4358 """ Returns if yt-dlp can be updated with -U """
4359 return False
4360
4361 from zipimport import zipimporter
4362
4363 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4364
4365
4366def args_to_str(args):
4367 # Get a short string representation for a subprocess command
4368 return ' '.join(compat_shlex_quote(a) for a in args)
4369
4370
4371def error_to_compat_str(err):
4372 err_str = str(err)
4373 # On python 2 error byte string must be decoded with proper
4374 # encoding rather than ascii
4375 if sys.version_info[0] < 3:
4376 err_str = err_str.decode(preferredencoding())
4377 return err_str
4378
4379
4380def mimetype2ext(mt):
4381 if mt is None:
4382 return None
4383
4384 ext = {
4385 'audio/mp4': 'm4a',
4386 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4387 # it's the most popular one
4388 'audio/mpeg': 'mp3',
4389 'audio/x-wav': 'wav',
4390 }.get(mt)
4391 if ext is not None:
4392 return ext
4393
4394 _, _, res = mt.rpartition('/')
4395 res = res.split(';')[0].strip().lower()
4396
4397 return {
4398 '3gpp': '3gp',
4399 'smptett+xml': 'tt',
4400 'ttaf+xml': 'dfxp',
4401 'ttml+xml': 'ttml',
4402 'x-flv': 'flv',
4403 'x-mp4-fragmented': 'mp4',
4404 'x-ms-sami': 'sami',
4405 'x-ms-wmv': 'wmv',
4406 'mpegurl': 'm3u8',
4407 'x-mpegurl': 'm3u8',
4408 'vnd.apple.mpegurl': 'm3u8',
4409 'dash+xml': 'mpd',
4410 'f4m+xml': 'f4m',
4411 'hds+xml': 'f4m',
4412 'vnd.ms-sstr+xml': 'ism',
4413 'quicktime': 'mov',
4414 'mp2t': 'ts',
4415 'x-wav': 'wav',
4416 }.get(res, res)
4417
4418
4419def parse_codecs(codecs_str):
4420 # http://tools.ietf.org/html/rfc6381
4421 if not codecs_str:
4422 return {}
4423 split_codecs = list(filter(None, map(
4424 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4425 vcodec, acodec = None, None
4426 for full_codec in split_codecs:
4427 codec = full_codec.split('.')[0]
4428 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4429 if not vcodec:
4430 vcodec = full_codec
4431 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4432 if not acodec:
4433 acodec = full_codec
4434 else:
4435 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4436 if not vcodec and not acodec:
4437 if len(split_codecs) == 2:
4438 return {
4439 'vcodec': split_codecs[0],
4440 'acodec': split_codecs[1],
4441 }
4442 else:
4443 return {
4444 'vcodec': vcodec or 'none',
4445 'acodec': acodec or 'none',
4446 }
4447 return {}
4448
4449
4450def urlhandle_detect_ext(url_handle):
4451 getheader = url_handle.headers.get
4452
4453 cd = getheader('Content-Disposition')
4454 if cd:
4455 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4456 if m:
4457 e = determine_ext(m.group('filename'), default_ext=None)
4458 if e:
4459 return e
4460
4461 return mimetype2ext(getheader('Content-Type'))
4462
4463
4464def encode_data_uri(data, mime_type):
4465 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4466
4467
4468def age_restricted(content_limit, age_limit):
4469 """ Returns True iff the content should be blocked """
4470
4471 if age_limit is None: # No limit set
4472 return False
4473 if content_limit is None:
4474 return False # Content available for everyone
4475 return age_limit < content_limit
4476
4477
4478def is_html(first_bytes):
4479 """ Detect whether a file contains HTML by examining its first bytes. """
4480
4481 BOMS = [
4482 (b'\xef\xbb\xbf', 'utf-8'),
4483 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4484 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4485 (b'\xff\xfe', 'utf-16-le'),
4486 (b'\xfe\xff', 'utf-16-be'),
4487 ]
4488 for bom, enc in BOMS:
4489 if first_bytes.startswith(bom):
4490 s = first_bytes[len(bom):].decode(enc, 'replace')
4491 break
4492 else:
4493 s = first_bytes.decode('utf-8', 'replace')
4494
4495 return re.match(r'^\s*<', s)
4496
4497
4498def determine_protocol(info_dict):
4499 protocol = info_dict.get('protocol')
4500 if protocol is not None:
4501 return protocol
4502
4503 url = info_dict['url']
4504 if url.startswith('rtmp'):
4505 return 'rtmp'
4506 elif url.startswith('mms'):
4507 return 'mms'
4508 elif url.startswith('rtsp'):
4509 return 'rtsp'
4510
4511 ext = determine_ext(url)
4512 if ext == 'm3u8':
4513 return 'm3u8'
4514 elif ext == 'f4m':
4515 return 'f4m'
4516
4517 return compat_urllib_parse_urlparse(url).scheme
4518
4519
4520def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4521 """ Render a list of rows, each as a list of values """
4522
4523 def get_max_lens(table):
4524 return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4525
4526 def filter_using_list(row, filterArray):
4527 return [col for (take, col) in zip(filterArray, row) if take]
4528
4529 if hideEmpty:
4530 max_lens = get_max_lens(data)
4531 header_row = filter_using_list(header_row, max_lens)
4532 data = [filter_using_list(row, max_lens) for row in data]
4533
4534 table = [header_row] + data
4535 max_lens = get_max_lens(table)
4536 if delim:
4537 table = [header_row] + [['-' * ml for ml in max_lens]] + data
4538 format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
4539 return '\n'.join(format_str % tuple(row) for row in table)
4540
4541
4542def _match_one(filter_part, dct):
4543 COMPARISON_OPERATORS = {
4544 '<': operator.lt,
4545 '<=': operator.le,
4546 '>': operator.gt,
4547 '>=': operator.ge,
4548 '=': operator.eq,
4549 '!=': operator.ne,
4550 }
4551 operator_rex = re.compile(r'''(?x)\s*
4552 (?P<key>[a-z_]+)
4553 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4554 (?:
4555 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4556 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4557 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4558 )
4559 \s*$
4560 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4561 m = operator_rex.search(filter_part)
4562 if m:
4563 op = COMPARISON_OPERATORS[m.group('op')]
4564 actual_value = dct.get(m.group('key'))
4565 if (m.group('quotedstrval') is not None
4566 or m.group('strval') is not None
4567 # If the original field is a string and matching comparisonvalue is
4568 # a number we should respect the origin of the original field
4569 # and process comparison value as a string (see
4570 # https://github.com/ytdl-org/youtube-dl/issues/11082).
4571 or actual_value is not None and m.group('intval') is not None
4572 and isinstance(actual_value, compat_str)):
4573 if m.group('op') not in ('=', '!='):
4574 raise ValueError(
4575 'Operator %s does not support string values!' % m.group('op'))
4576 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4577 quote = m.group('quote')
4578 if quote is not None:
4579 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4580 else:
4581 try:
4582 comparison_value = int(m.group('intval'))
4583 except ValueError:
4584 comparison_value = parse_filesize(m.group('intval'))
4585 if comparison_value is None:
4586 comparison_value = parse_filesize(m.group('intval') + 'B')
4587 if comparison_value is None:
4588 raise ValueError(
4589 'Invalid integer value %r in filter part %r' % (
4590 m.group('intval'), filter_part))
4591 if actual_value is None:
4592 return m.group('none_inclusive')
4593 return op(actual_value, comparison_value)
4594
4595 UNARY_OPERATORS = {
4596 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4597 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4598 }
4599 operator_rex = re.compile(r'''(?x)\s*
4600 (?P<op>%s)\s*(?P<key>[a-z_]+)
4601 \s*$
4602 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4603 m = operator_rex.search(filter_part)
4604 if m:
4605 op = UNARY_OPERATORS[m.group('op')]
4606 actual_value = dct.get(m.group('key'))
4607 return op(actual_value)
4608
4609 raise ValueError('Invalid filter part %r' % filter_part)
4610
4611
4612def match_str(filter_str, dct):
4613 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4614
4615 return all(
4616 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4617
4618
4619def match_filter_func(filter_str):
4620 def _match_func(info_dict):
4621 if match_str(filter_str, info_dict):
4622 return None
4623 else:
4624 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4625 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4626 return _match_func
4627
4628
4629def parse_dfxp_time_expr(time_expr):
4630 if not time_expr:
4631 return
4632
4633 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4634 if mobj:
4635 return float(mobj.group('time_offset'))
4636
4637 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4638 if mobj:
4639 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4640
4641
4642def srt_subtitles_timecode(seconds):
4643 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4644
4645
4646def dfxp2srt(dfxp_data):
4647 '''
4648 @param dfxp_data A bytes-like object containing DFXP data
4649 @returns A unicode object containing converted SRT data
4650 '''
4651 LEGACY_NAMESPACES = (
4652 (b'http://www.w3.org/ns/ttml', [
4653 b'http://www.w3.org/2004/11/ttaf1',
4654 b'http://www.w3.org/2006/04/ttaf1',
4655 b'http://www.w3.org/2006/10/ttaf1',
4656 ]),
4657 (b'http://www.w3.org/ns/ttml#styling', [
4658 b'http://www.w3.org/ns/ttml#style',
4659 ]),
4660 )
4661
4662 SUPPORTED_STYLING = [
4663 'color',
4664 'fontFamily',
4665 'fontSize',
4666 'fontStyle',
4667 'fontWeight',
4668 'textDecoration'
4669 ]
4670
4671 _x = functools.partial(xpath_with_ns, ns_map={
4672 'xml': 'http://www.w3.org/XML/1998/namespace',
4673 'ttml': 'http://www.w3.org/ns/ttml',
4674 'tts': 'http://www.w3.org/ns/ttml#styling',
4675 })
4676
4677 styles = {}
4678 default_style = {}
4679
4680 class TTMLPElementParser(object):
4681 _out = ''
4682 _unclosed_elements = []
4683 _applied_styles = []
4684
4685 def start(self, tag, attrib):
4686 if tag in (_x('ttml:br'), 'br'):
4687 self._out += '\n'
4688 else:
4689 unclosed_elements = []
4690 style = {}
4691 element_style_id = attrib.get('style')
4692 if default_style:
4693 style.update(default_style)
4694 if element_style_id:
4695 style.update(styles.get(element_style_id, {}))
4696 for prop in SUPPORTED_STYLING:
4697 prop_val = attrib.get(_x('tts:' + prop))
4698 if prop_val:
4699 style[prop] = prop_val
4700 if style:
4701 font = ''
4702 for k, v in sorted(style.items()):
4703 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4704 continue
4705 if k == 'color':
4706 font += ' color="%s"' % v
4707 elif k == 'fontSize':
4708 font += ' size="%s"' % v
4709 elif k == 'fontFamily':
4710 font += ' face="%s"' % v
4711 elif k == 'fontWeight' and v == 'bold':
4712 self._out += '<b>'
4713 unclosed_elements.append('b')
4714 elif k == 'fontStyle' and v == 'italic':
4715 self._out += '<i>'
4716 unclosed_elements.append('i')
4717 elif k == 'textDecoration' and v == 'underline':
4718 self._out += '<u>'
4719 unclosed_elements.append('u')
4720 if font:
4721 self._out += '<font' + font + '>'
4722 unclosed_elements.append('font')
4723 applied_style = {}
4724 if self._applied_styles:
4725 applied_style.update(self._applied_styles[-1])
4726 applied_style.update(style)
4727 self._applied_styles.append(applied_style)
4728 self._unclosed_elements.append(unclosed_elements)
4729
4730 def end(self, tag):
4731 if tag not in (_x('ttml:br'), 'br'):
4732 unclosed_elements = self._unclosed_elements.pop()
4733 for element in reversed(unclosed_elements):
4734 self._out += '</%s>' % element
4735 if unclosed_elements and self._applied_styles:
4736 self._applied_styles.pop()
4737
4738 def data(self, data):
4739 self._out += data
4740
4741 def close(self):
4742 return self._out.strip()
4743
4744 def parse_node(node):
4745 target = TTMLPElementParser()
4746 parser = xml.etree.ElementTree.XMLParser(target=target)
4747 parser.feed(xml.etree.ElementTree.tostring(node))
4748 return parser.close()
4749
4750 for k, v in LEGACY_NAMESPACES:
4751 for ns in v:
4752 dfxp_data = dfxp_data.replace(ns, k)
4753
4754 dfxp = compat_etree_fromstring(dfxp_data)
4755 out = []
4756 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4757
4758 if not paras:
4759 raise ValueError('Invalid dfxp/TTML subtitle')
4760
4761 repeat = False
4762 while True:
4763 for style in dfxp.findall(_x('.//ttml:style')):
4764 style_id = style.get('id') or style.get(_x('xml:id'))
4765 if not style_id:
4766 continue
4767 parent_style_id = style.get('style')
4768 if parent_style_id:
4769 if parent_style_id not in styles:
4770 repeat = True
4771 continue
4772 styles[style_id] = styles[parent_style_id].copy()
4773 for prop in SUPPORTED_STYLING:
4774 prop_val = style.get(_x('tts:' + prop))
4775 if prop_val:
4776 styles.setdefault(style_id, {})[prop] = prop_val
4777 if repeat:
4778 repeat = False
4779 else:
4780 break
4781
4782 for p in ('body', 'div'):
4783 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4784 if ele is None:
4785 continue
4786 style = styles.get(ele.get('style'))
4787 if not style:
4788 continue
4789 default_style.update(style)
4790
4791 for para, index in zip(paras, itertools.count(1)):
4792 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4793 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4794 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4795 if begin_time is None:
4796 continue
4797 if not end_time:
4798 if not dur:
4799 continue
4800 end_time = begin_time + dur
4801 out.append('%d\n%s --> %s\n%s\n\n' % (
4802 index,
4803 srt_subtitles_timecode(begin_time),
4804 srt_subtitles_timecode(end_time),
4805 parse_node(para)))
4806
4807 return ''.join(out)
4808
4809
4810def cli_option(params, command_option, param):
4811 param = params.get(param)
4812 if param:
4813 param = compat_str(param)
4814 return [command_option, param] if param is not None else []
4815
4816
4817def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4818 param = params.get(param)
4819 if param is None:
4820 return []
4821 assert isinstance(param, bool)
4822 if separator:
4823 return [command_option + separator + (true_value if param else false_value)]
4824 return [command_option, true_value if param else false_value]
4825
4826
4827def cli_valueless_option(params, command_option, param, expected_value=True):
4828 param = params.get(param)
4829 return [command_option] if param == expected_value else []
4830
4831
4832def cli_configuration_args(argdict, keys, default=[], use_compat=True):
4833 if isinstance(argdict, (list, tuple)): # for backward compatibility
4834 if use_compat:
4835 return argdict
4836 else:
4837 argdict = None
4838 if argdict is None:
4839 return default
4840 assert isinstance(argdict, dict)
4841
4842 assert isinstance(keys, (list, tuple))
4843 for key_list in keys:
4844 if isinstance(key_list, compat_str):
4845 key_list = (key_list,)
4846 arg_list = list(filter(
4847 lambda x: x is not None,
4848 [argdict.get(key.lower()) for key in key_list]))
4849 if arg_list:
4850 return [arg for args in arg_list for arg in args]
4851 return default
4852
4853
4854class ISO639Utils(object):
4855 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4856 _lang_map = {
4857 'aa': 'aar',
4858 'ab': 'abk',
4859 'ae': 'ave',
4860 'af': 'afr',
4861 'ak': 'aka',
4862 'am': 'amh',
4863 'an': 'arg',
4864 'ar': 'ara',
4865 'as': 'asm',
4866 'av': 'ava',
4867 'ay': 'aym',
4868 'az': 'aze',
4869 'ba': 'bak',
4870 'be': 'bel',
4871 'bg': 'bul',
4872 'bh': 'bih',
4873 'bi': 'bis',
4874 'bm': 'bam',
4875 'bn': 'ben',
4876 'bo': 'bod',
4877 'br': 'bre',
4878 'bs': 'bos',
4879 'ca': 'cat',
4880 'ce': 'che',
4881 'ch': 'cha',
4882 'co': 'cos',
4883 'cr': 'cre',
4884 'cs': 'ces',
4885 'cu': 'chu',
4886 'cv': 'chv',
4887 'cy': 'cym',
4888 'da': 'dan',
4889 'de': 'deu',
4890 'dv': 'div',
4891 'dz': 'dzo',
4892 'ee': 'ewe',
4893 'el': 'ell',
4894 'en': 'eng',
4895 'eo': 'epo',
4896 'es': 'spa',
4897 'et': 'est',
4898 'eu': 'eus',
4899 'fa': 'fas',
4900 'ff': 'ful',
4901 'fi': 'fin',
4902 'fj': 'fij',
4903 'fo': 'fao',
4904 'fr': 'fra',
4905 'fy': 'fry',
4906 'ga': 'gle',
4907 'gd': 'gla',
4908 'gl': 'glg',
4909 'gn': 'grn',
4910 'gu': 'guj',
4911 'gv': 'glv',
4912 'ha': 'hau',
4913 'he': 'heb',
4914 'iw': 'heb', # Replaced by he in 1989 revision
4915 'hi': 'hin',
4916 'ho': 'hmo',
4917 'hr': 'hrv',
4918 'ht': 'hat',
4919 'hu': 'hun',
4920 'hy': 'hye',
4921 'hz': 'her',
4922 'ia': 'ina',
4923 'id': 'ind',
4924 'in': 'ind', # Replaced by id in 1989 revision
4925 'ie': 'ile',
4926 'ig': 'ibo',
4927 'ii': 'iii',
4928 'ik': 'ipk',
4929 'io': 'ido',
4930 'is': 'isl',
4931 'it': 'ita',
4932 'iu': 'iku',
4933 'ja': 'jpn',
4934 'jv': 'jav',
4935 'ka': 'kat',
4936 'kg': 'kon',
4937 'ki': 'kik',
4938 'kj': 'kua',
4939 'kk': 'kaz',
4940 'kl': 'kal',
4941 'km': 'khm',
4942 'kn': 'kan',
4943 'ko': 'kor',
4944 'kr': 'kau',
4945 'ks': 'kas',
4946 'ku': 'kur',
4947 'kv': 'kom',
4948 'kw': 'cor',
4949 'ky': 'kir',
4950 'la': 'lat',
4951 'lb': 'ltz',
4952 'lg': 'lug',
4953 'li': 'lim',
4954 'ln': 'lin',
4955 'lo': 'lao',
4956 'lt': 'lit',
4957 'lu': 'lub',
4958 'lv': 'lav',
4959 'mg': 'mlg',
4960 'mh': 'mah',
4961 'mi': 'mri',
4962 'mk': 'mkd',
4963 'ml': 'mal',
4964 'mn': 'mon',
4965 'mr': 'mar',
4966 'ms': 'msa',
4967 'mt': 'mlt',
4968 'my': 'mya',
4969 'na': 'nau',
4970 'nb': 'nob',
4971 'nd': 'nde',
4972 'ne': 'nep',
4973 'ng': 'ndo',
4974 'nl': 'nld',
4975 'nn': 'nno',
4976 'no': 'nor',
4977 'nr': 'nbl',
4978 'nv': 'nav',
4979 'ny': 'nya',
4980 'oc': 'oci',
4981 'oj': 'oji',
4982 'om': 'orm',
4983 'or': 'ori',
4984 'os': 'oss',
4985 'pa': 'pan',
4986 'pi': 'pli',
4987 'pl': 'pol',
4988 'ps': 'pus',
4989 'pt': 'por',
4990 'qu': 'que',
4991 'rm': 'roh',
4992 'rn': 'run',
4993 'ro': 'ron',
4994 'ru': 'rus',
4995 'rw': 'kin',
4996 'sa': 'san',
4997 'sc': 'srd',
4998 'sd': 'snd',
4999 'se': 'sme',
5000 'sg': 'sag',
5001 'si': 'sin',
5002 'sk': 'slk',
5003 'sl': 'slv',
5004 'sm': 'smo',
5005 'sn': 'sna',
5006 'so': 'som',
5007 'sq': 'sqi',
5008 'sr': 'srp',
5009 'ss': 'ssw',
5010 'st': 'sot',
5011 'su': 'sun',
5012 'sv': 'swe',
5013 'sw': 'swa',
5014 'ta': 'tam',
5015 'te': 'tel',
5016 'tg': 'tgk',
5017 'th': 'tha',
5018 'ti': 'tir',
5019 'tk': 'tuk',
5020 'tl': 'tgl',
5021 'tn': 'tsn',
5022 'to': 'ton',
5023 'tr': 'tur',
5024 'ts': 'tso',
5025 'tt': 'tat',
5026 'tw': 'twi',
5027 'ty': 'tah',
5028 'ug': 'uig',
5029 'uk': 'ukr',
5030 'ur': 'urd',
5031 'uz': 'uzb',
5032 've': 'ven',
5033 'vi': 'vie',
5034 'vo': 'vol',
5035 'wa': 'wln',
5036 'wo': 'wol',
5037 'xh': 'xho',
5038 'yi': 'yid',
5039 'ji': 'yid', # Replaced by yi in 1989 revision
5040 'yo': 'yor',
5041 'za': 'zha',
5042 'zh': 'zho',
5043 'zu': 'zul',
5044 }
5045
5046 @classmethod
5047 def short2long(cls, code):
5048 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5049 return cls._lang_map.get(code[:2])
5050
5051 @classmethod
5052 def long2short(cls, code):
5053 """Convert language code from ISO 639-2/T to ISO 639-1"""
5054 for short_name, long_name in cls._lang_map.items():
5055 if long_name == code:
5056 return short_name
5057
5058
5059class ISO3166Utils(object):
5060 # From http://data.okfn.org/data/core/country-list
5061 _country_map = {
5062 'AF': 'Afghanistan',
5063 'AX': 'Åland Islands',
5064 'AL': 'Albania',
5065 'DZ': 'Algeria',
5066 'AS': 'American Samoa',
5067 'AD': 'Andorra',
5068 'AO': 'Angola',
5069 'AI': 'Anguilla',
5070 'AQ': 'Antarctica',
5071 'AG': 'Antigua and Barbuda',
5072 'AR': 'Argentina',
5073 'AM': 'Armenia',
5074 'AW': 'Aruba',
5075 'AU': 'Australia',
5076 'AT': 'Austria',
5077 'AZ': 'Azerbaijan',
5078 'BS': 'Bahamas',
5079 'BH': 'Bahrain',
5080 'BD': 'Bangladesh',
5081 'BB': 'Barbados',
5082 'BY': 'Belarus',
5083 'BE': 'Belgium',
5084 'BZ': 'Belize',
5085 'BJ': 'Benin',
5086 'BM': 'Bermuda',
5087 'BT': 'Bhutan',
5088 'BO': 'Bolivia, Plurinational State of',
5089 'BQ': 'Bonaire, Sint Eustatius and Saba',
5090 'BA': 'Bosnia and Herzegovina',
5091 'BW': 'Botswana',
5092 'BV': 'Bouvet Island',
5093 'BR': 'Brazil',
5094 'IO': 'British Indian Ocean Territory',
5095 'BN': 'Brunei Darussalam',
5096 'BG': 'Bulgaria',
5097 'BF': 'Burkina Faso',
5098 'BI': 'Burundi',
5099 'KH': 'Cambodia',
5100 'CM': 'Cameroon',
5101 'CA': 'Canada',
5102 'CV': 'Cape Verde',
5103 'KY': 'Cayman Islands',
5104 'CF': 'Central African Republic',
5105 'TD': 'Chad',
5106 'CL': 'Chile',
5107 'CN': 'China',
5108 'CX': 'Christmas Island',
5109 'CC': 'Cocos (Keeling) Islands',
5110 'CO': 'Colombia',
5111 'KM': 'Comoros',
5112 'CG': 'Congo',
5113 'CD': 'Congo, the Democratic Republic of the',
5114 'CK': 'Cook Islands',
5115 'CR': 'Costa Rica',
5116 'CI': 'Côte d\'Ivoire',
5117 'HR': 'Croatia',
5118 'CU': 'Cuba',
5119 'CW': 'Curaçao',
5120 'CY': 'Cyprus',
5121 'CZ': 'Czech Republic',
5122 'DK': 'Denmark',
5123 'DJ': 'Djibouti',
5124 'DM': 'Dominica',
5125 'DO': 'Dominican Republic',
5126 'EC': 'Ecuador',
5127 'EG': 'Egypt',
5128 'SV': 'El Salvador',
5129 'GQ': 'Equatorial Guinea',
5130 'ER': 'Eritrea',
5131 'EE': 'Estonia',
5132 'ET': 'Ethiopia',
5133 'FK': 'Falkland Islands (Malvinas)',
5134 'FO': 'Faroe Islands',
5135 'FJ': 'Fiji',
5136 'FI': 'Finland',
5137 'FR': 'France',
5138 'GF': 'French Guiana',
5139 'PF': 'French Polynesia',
5140 'TF': 'French Southern Territories',
5141 'GA': 'Gabon',
5142 'GM': 'Gambia',
5143 'GE': 'Georgia',
5144 'DE': 'Germany',
5145 'GH': 'Ghana',
5146 'GI': 'Gibraltar',
5147 'GR': 'Greece',
5148 'GL': 'Greenland',
5149 'GD': 'Grenada',
5150 'GP': 'Guadeloupe',
5151 'GU': 'Guam',
5152 'GT': 'Guatemala',
5153 'GG': 'Guernsey',
5154 'GN': 'Guinea',
5155 'GW': 'Guinea-Bissau',
5156 'GY': 'Guyana',
5157 'HT': 'Haiti',
5158 'HM': 'Heard Island and McDonald Islands',
5159 'VA': 'Holy See (Vatican City State)',
5160 'HN': 'Honduras',
5161 'HK': 'Hong Kong',
5162 'HU': 'Hungary',
5163 'IS': 'Iceland',
5164 'IN': 'India',
5165 'ID': 'Indonesia',
5166 'IR': 'Iran, Islamic Republic of',
5167 'IQ': 'Iraq',
5168 'IE': 'Ireland',
5169 'IM': 'Isle of Man',
5170 'IL': 'Israel',
5171 'IT': 'Italy',
5172 'JM': 'Jamaica',
5173 'JP': 'Japan',
5174 'JE': 'Jersey',
5175 'JO': 'Jordan',
5176 'KZ': 'Kazakhstan',
5177 'KE': 'Kenya',
5178 'KI': 'Kiribati',
5179 'KP': 'Korea, Democratic People\'s Republic of',
5180 'KR': 'Korea, Republic of',
5181 'KW': 'Kuwait',
5182 'KG': 'Kyrgyzstan',
5183 'LA': 'Lao People\'s Democratic Republic',
5184 'LV': 'Latvia',
5185 'LB': 'Lebanon',
5186 'LS': 'Lesotho',
5187 'LR': 'Liberia',
5188 'LY': 'Libya',
5189 'LI': 'Liechtenstein',
5190 'LT': 'Lithuania',
5191 'LU': 'Luxembourg',
5192 'MO': 'Macao',
5193 'MK': 'Macedonia, the Former Yugoslav Republic of',
5194 'MG': 'Madagascar',
5195 'MW': 'Malawi',
5196 'MY': 'Malaysia',
5197 'MV': 'Maldives',
5198 'ML': 'Mali',
5199 'MT': 'Malta',
5200 'MH': 'Marshall Islands',
5201 'MQ': 'Martinique',
5202 'MR': 'Mauritania',
5203 'MU': 'Mauritius',
5204 'YT': 'Mayotte',
5205 'MX': 'Mexico',
5206 'FM': 'Micronesia, Federated States of',
5207 'MD': 'Moldova, Republic of',
5208 'MC': 'Monaco',
5209 'MN': 'Mongolia',
5210 'ME': 'Montenegro',
5211 'MS': 'Montserrat',
5212 'MA': 'Morocco',
5213 'MZ': 'Mozambique',
5214 'MM': 'Myanmar',
5215 'NA': 'Namibia',
5216 'NR': 'Nauru',
5217 'NP': 'Nepal',
5218 'NL': 'Netherlands',
5219 'NC': 'New Caledonia',
5220 'NZ': 'New Zealand',
5221 'NI': 'Nicaragua',
5222 'NE': 'Niger',
5223 'NG': 'Nigeria',
5224 'NU': 'Niue',
5225 'NF': 'Norfolk Island',
5226 'MP': 'Northern Mariana Islands',
5227 'NO': 'Norway',
5228 'OM': 'Oman',
5229 'PK': 'Pakistan',
5230 'PW': 'Palau',
5231 'PS': 'Palestine, State of',
5232 'PA': 'Panama',
5233 'PG': 'Papua New Guinea',
5234 'PY': 'Paraguay',
5235 'PE': 'Peru',
5236 'PH': 'Philippines',
5237 'PN': 'Pitcairn',
5238 'PL': 'Poland',
5239 'PT': 'Portugal',
5240 'PR': 'Puerto Rico',
5241 'QA': 'Qatar',
5242 'RE': 'Réunion',
5243 'RO': 'Romania',
5244 'RU': 'Russian Federation',
5245 'RW': 'Rwanda',
5246 'BL': 'Saint Barthélemy',
5247 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5248 'KN': 'Saint Kitts and Nevis',
5249 'LC': 'Saint Lucia',
5250 'MF': 'Saint Martin (French part)',
5251 'PM': 'Saint Pierre and Miquelon',
5252 'VC': 'Saint Vincent and the Grenadines',
5253 'WS': 'Samoa',
5254 'SM': 'San Marino',
5255 'ST': 'Sao Tome and Principe',
5256 'SA': 'Saudi Arabia',
5257 'SN': 'Senegal',
5258 'RS': 'Serbia',
5259 'SC': 'Seychelles',
5260 'SL': 'Sierra Leone',
5261 'SG': 'Singapore',
5262 'SX': 'Sint Maarten (Dutch part)',
5263 'SK': 'Slovakia',
5264 'SI': 'Slovenia',
5265 'SB': 'Solomon Islands',
5266 'SO': 'Somalia',
5267 'ZA': 'South Africa',
5268 'GS': 'South Georgia and the South Sandwich Islands',
5269 'SS': 'South Sudan',
5270 'ES': 'Spain',
5271 'LK': 'Sri Lanka',
5272 'SD': 'Sudan',
5273 'SR': 'Suriname',
5274 'SJ': 'Svalbard and Jan Mayen',
5275 'SZ': 'Swaziland',
5276 'SE': 'Sweden',
5277 'CH': 'Switzerland',
5278 'SY': 'Syrian Arab Republic',
5279 'TW': 'Taiwan, Province of China',
5280 'TJ': 'Tajikistan',
5281 'TZ': 'Tanzania, United Republic of',
5282 'TH': 'Thailand',
5283 'TL': 'Timor-Leste',
5284 'TG': 'Togo',
5285 'TK': 'Tokelau',
5286 'TO': 'Tonga',
5287 'TT': 'Trinidad and Tobago',
5288 'TN': 'Tunisia',
5289 'TR': 'Turkey',
5290 'TM': 'Turkmenistan',
5291 'TC': 'Turks and Caicos Islands',
5292 'TV': 'Tuvalu',
5293 'UG': 'Uganda',
5294 'UA': 'Ukraine',
5295 'AE': 'United Arab Emirates',
5296 'GB': 'United Kingdom',
5297 'US': 'United States',
5298 'UM': 'United States Minor Outlying Islands',
5299 'UY': 'Uruguay',
5300 'UZ': 'Uzbekistan',
5301 'VU': 'Vanuatu',
5302 'VE': 'Venezuela, Bolivarian Republic of',
5303 'VN': 'Viet Nam',
5304 'VG': 'Virgin Islands, British',
5305 'VI': 'Virgin Islands, U.S.',
5306 'WF': 'Wallis and Futuna',
5307 'EH': 'Western Sahara',
5308 'YE': 'Yemen',
5309 'ZM': 'Zambia',
5310 'ZW': 'Zimbabwe',
5311 }
5312
5313 @classmethod
5314 def short2full(cls, code):
5315 """Convert an ISO 3166-2 country code to the corresponding full name"""
5316 return cls._country_map.get(code.upper())
5317
5318
5319class GeoUtils(object):
5320 # Major IPv4 address blocks per country
5321 _country_ip_map = {
5322 'AD': '46.172.224.0/19',
5323 'AE': '94.200.0.0/13',
5324 'AF': '149.54.0.0/17',
5325 'AG': '209.59.64.0/18',
5326 'AI': '204.14.248.0/21',
5327 'AL': '46.99.0.0/16',
5328 'AM': '46.70.0.0/15',
5329 'AO': '105.168.0.0/13',
5330 'AP': '182.50.184.0/21',
5331 'AQ': '23.154.160.0/24',
5332 'AR': '181.0.0.0/12',
5333 'AS': '202.70.112.0/20',
5334 'AT': '77.116.0.0/14',
5335 'AU': '1.128.0.0/11',
5336 'AW': '181.41.0.0/18',
5337 'AX': '185.217.4.0/22',
5338 'AZ': '5.197.0.0/16',
5339 'BA': '31.176.128.0/17',
5340 'BB': '65.48.128.0/17',
5341 'BD': '114.130.0.0/16',
5342 'BE': '57.0.0.0/8',
5343 'BF': '102.178.0.0/15',
5344 'BG': '95.42.0.0/15',
5345 'BH': '37.131.0.0/17',
5346 'BI': '154.117.192.0/18',
5347 'BJ': '137.255.0.0/16',
5348 'BL': '185.212.72.0/23',
5349 'BM': '196.12.64.0/18',
5350 'BN': '156.31.0.0/16',
5351 'BO': '161.56.0.0/16',
5352 'BQ': '161.0.80.0/20',
5353 'BR': '191.128.0.0/12',
5354 'BS': '24.51.64.0/18',
5355 'BT': '119.2.96.0/19',
5356 'BW': '168.167.0.0/16',
5357 'BY': '178.120.0.0/13',
5358 'BZ': '179.42.192.0/18',
5359 'CA': '99.224.0.0/11',
5360 'CD': '41.243.0.0/16',
5361 'CF': '197.242.176.0/21',
5362 'CG': '160.113.0.0/16',
5363 'CH': '85.0.0.0/13',
5364 'CI': '102.136.0.0/14',
5365 'CK': '202.65.32.0/19',
5366 'CL': '152.172.0.0/14',
5367 'CM': '102.244.0.0/14',
5368 'CN': '36.128.0.0/10',
5369 'CO': '181.240.0.0/12',
5370 'CR': '201.192.0.0/12',
5371 'CU': '152.206.0.0/15',
5372 'CV': '165.90.96.0/19',
5373 'CW': '190.88.128.0/17',
5374 'CY': '31.153.0.0/16',
5375 'CZ': '88.100.0.0/14',
5376 'DE': '53.0.0.0/8',
5377 'DJ': '197.241.0.0/17',
5378 'DK': '87.48.0.0/12',
5379 'DM': '192.243.48.0/20',
5380 'DO': '152.166.0.0/15',
5381 'DZ': '41.96.0.0/12',
5382 'EC': '186.68.0.0/15',
5383 'EE': '90.190.0.0/15',
5384 'EG': '156.160.0.0/11',
5385 'ER': '196.200.96.0/20',
5386 'ES': '88.0.0.0/11',
5387 'ET': '196.188.0.0/14',
5388 'EU': '2.16.0.0/13',
5389 'FI': '91.152.0.0/13',
5390 'FJ': '144.120.0.0/16',
5391 'FK': '80.73.208.0/21',
5392 'FM': '119.252.112.0/20',
5393 'FO': '88.85.32.0/19',
5394 'FR': '90.0.0.0/9',
5395 'GA': '41.158.0.0/15',
5396 'GB': '25.0.0.0/8',
5397 'GD': '74.122.88.0/21',
5398 'GE': '31.146.0.0/16',
5399 'GF': '161.22.64.0/18',
5400 'GG': '62.68.160.0/19',
5401 'GH': '154.160.0.0/12',
5402 'GI': '95.164.0.0/16',
5403 'GL': '88.83.0.0/19',
5404 'GM': '160.182.0.0/15',
5405 'GN': '197.149.192.0/18',
5406 'GP': '104.250.0.0/19',
5407 'GQ': '105.235.224.0/20',
5408 'GR': '94.64.0.0/13',
5409 'GT': '168.234.0.0/16',
5410 'GU': '168.123.0.0/16',
5411 'GW': '197.214.80.0/20',
5412 'GY': '181.41.64.0/18',
5413 'HK': '113.252.0.0/14',
5414 'HN': '181.210.0.0/16',
5415 'HR': '93.136.0.0/13',
5416 'HT': '148.102.128.0/17',
5417 'HU': '84.0.0.0/14',
5418 'ID': '39.192.0.0/10',
5419 'IE': '87.32.0.0/12',
5420 'IL': '79.176.0.0/13',
5421 'IM': '5.62.80.0/20',
5422 'IN': '117.192.0.0/10',
5423 'IO': '203.83.48.0/21',
5424 'IQ': '37.236.0.0/14',
5425 'IR': '2.176.0.0/12',
5426 'IS': '82.221.0.0/16',
5427 'IT': '79.0.0.0/10',
5428 'JE': '87.244.64.0/18',
5429 'JM': '72.27.0.0/17',
5430 'JO': '176.29.0.0/16',
5431 'JP': '133.0.0.0/8',
5432 'KE': '105.48.0.0/12',
5433 'KG': '158.181.128.0/17',
5434 'KH': '36.37.128.0/17',
5435 'KI': '103.25.140.0/22',
5436 'KM': '197.255.224.0/20',
5437 'KN': '198.167.192.0/19',
5438 'KP': '175.45.176.0/22',
5439 'KR': '175.192.0.0/10',
5440 'KW': '37.36.0.0/14',
5441 'KY': '64.96.0.0/15',
5442 'KZ': '2.72.0.0/13',
5443 'LA': '115.84.64.0/18',
5444 'LB': '178.135.0.0/16',
5445 'LC': '24.92.144.0/20',
5446 'LI': '82.117.0.0/19',
5447 'LK': '112.134.0.0/15',
5448 'LR': '102.183.0.0/16',
5449 'LS': '129.232.0.0/17',
5450 'LT': '78.56.0.0/13',
5451 'LU': '188.42.0.0/16',
5452 'LV': '46.109.0.0/16',
5453 'LY': '41.252.0.0/14',
5454 'MA': '105.128.0.0/11',
5455 'MC': '88.209.64.0/18',
5456 'MD': '37.246.0.0/16',
5457 'ME': '178.175.0.0/17',
5458 'MF': '74.112.232.0/21',
5459 'MG': '154.126.0.0/17',
5460 'MH': '117.103.88.0/21',
5461 'MK': '77.28.0.0/15',
5462 'ML': '154.118.128.0/18',
5463 'MM': '37.111.0.0/17',
5464 'MN': '49.0.128.0/17',
5465 'MO': '60.246.0.0/16',
5466 'MP': '202.88.64.0/20',
5467 'MQ': '109.203.224.0/19',
5468 'MR': '41.188.64.0/18',
5469 'MS': '208.90.112.0/22',
5470 'MT': '46.11.0.0/16',
5471 'MU': '105.16.0.0/12',
5472 'MV': '27.114.128.0/18',
5473 'MW': '102.70.0.0/15',
5474 'MX': '187.192.0.0/11',
5475 'MY': '175.136.0.0/13',
5476 'MZ': '197.218.0.0/15',
5477 'NA': '41.182.0.0/16',
5478 'NC': '101.101.0.0/18',
5479 'NE': '197.214.0.0/18',
5480 'NF': '203.17.240.0/22',
5481 'NG': '105.112.0.0/12',
5482 'NI': '186.76.0.0/15',
5483 'NL': '145.96.0.0/11',
5484 'NO': '84.208.0.0/13',
5485 'NP': '36.252.0.0/15',
5486 'NR': '203.98.224.0/19',
5487 'NU': '49.156.48.0/22',
5488 'NZ': '49.224.0.0/14',
5489 'OM': '5.36.0.0/15',
5490 'PA': '186.72.0.0/15',
5491 'PE': '186.160.0.0/14',
5492 'PF': '123.50.64.0/18',
5493 'PG': '124.240.192.0/19',
5494 'PH': '49.144.0.0/13',
5495 'PK': '39.32.0.0/11',
5496 'PL': '83.0.0.0/11',
5497 'PM': '70.36.0.0/20',
5498 'PR': '66.50.0.0/16',
5499 'PS': '188.161.0.0/16',
5500 'PT': '85.240.0.0/13',
5501 'PW': '202.124.224.0/20',
5502 'PY': '181.120.0.0/14',
5503 'QA': '37.210.0.0/15',
5504 'RE': '102.35.0.0/16',
5505 'RO': '79.112.0.0/13',
5506 'RS': '93.86.0.0/15',
5507 'RU': '5.136.0.0/13',
5508 'RW': '41.186.0.0/16',
5509 'SA': '188.48.0.0/13',
5510 'SB': '202.1.160.0/19',
5511 'SC': '154.192.0.0/11',
5512 'SD': '102.120.0.0/13',
5513 'SE': '78.64.0.0/12',
5514 'SG': '8.128.0.0/10',
5515 'SI': '188.196.0.0/14',
5516 'SK': '78.98.0.0/15',
5517 'SL': '102.143.0.0/17',
5518 'SM': '89.186.32.0/19',
5519 'SN': '41.82.0.0/15',
5520 'SO': '154.115.192.0/18',
5521 'SR': '186.179.128.0/17',
5522 'SS': '105.235.208.0/21',
5523 'ST': '197.159.160.0/19',
5524 'SV': '168.243.0.0/16',
5525 'SX': '190.102.0.0/20',
5526 'SY': '5.0.0.0/16',
5527 'SZ': '41.84.224.0/19',
5528 'TC': '65.255.48.0/20',
5529 'TD': '154.68.128.0/19',
5530 'TG': '196.168.0.0/14',
5531 'TH': '171.96.0.0/13',
5532 'TJ': '85.9.128.0/18',
5533 'TK': '27.96.24.0/21',
5534 'TL': '180.189.160.0/20',
5535 'TM': '95.85.96.0/19',
5536 'TN': '197.0.0.0/11',
5537 'TO': '175.176.144.0/21',
5538 'TR': '78.160.0.0/11',
5539 'TT': '186.44.0.0/15',
5540 'TV': '202.2.96.0/19',
5541 'TW': '120.96.0.0/11',
5542 'TZ': '156.156.0.0/14',
5543 'UA': '37.52.0.0/14',
5544 'UG': '102.80.0.0/13',
5545 'US': '6.0.0.0/8',
5546 'UY': '167.56.0.0/13',
5547 'UZ': '84.54.64.0/18',
5548 'VA': '212.77.0.0/19',
5549 'VC': '207.191.240.0/21',
5550 'VE': '186.88.0.0/13',
5551 'VG': '66.81.192.0/20',
5552 'VI': '146.226.0.0/16',
5553 'VN': '14.160.0.0/11',
5554 'VU': '202.80.32.0/20',
5555 'WF': '117.20.32.0/21',
5556 'WS': '202.4.32.0/19',
5557 'YE': '134.35.0.0/16',
5558 'YT': '41.242.116.0/22',
5559 'ZA': '41.0.0.0/11',
5560 'ZM': '102.144.0.0/13',
5561 'ZW': '102.177.192.0/18',
5562 }
5563
5564 @classmethod
5565 def random_ipv4(cls, code_or_block):
5566 if len(code_or_block) == 2:
5567 block = cls._country_ip_map.get(code_or_block.upper())
5568 if not block:
5569 return None
5570 else:
5571 block = code_or_block
5572 addr, preflen = block.split('/')
5573 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5574 addr_max = addr_min | (0xffffffff >> int(preflen))
5575 return compat_str(socket.inet_ntoa(
5576 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5577
5578
5579class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5580 def __init__(self, proxies=None):
5581 # Set default handlers
5582 for type in ('http', 'https'):
5583 setattr(self, '%s_open' % type,
5584 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5585 meth(r, proxy, type))
5586 compat_urllib_request.ProxyHandler.__init__(self, proxies)
5587
5588 def proxy_open(self, req, proxy, type):
5589 req_proxy = req.headers.get('Ytdl-request-proxy')
5590 if req_proxy is not None:
5591 proxy = req_proxy
5592 del req.headers['Ytdl-request-proxy']
5593
5594 if proxy == '__noproxy__':
5595 return None # No Proxy
5596 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5597 req.add_header('Ytdl-socks-proxy', proxy)
5598 # yt-dlp's http/https handlers do wrapping the socket with socks
5599 return None
5600 return compat_urllib_request.ProxyHandler.proxy_open(
5601 self, req, proxy, type)
5602
5603
5604# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5605# released into Public Domain
5606# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5607
5608def long_to_bytes(n, blocksize=0):
5609 """long_to_bytes(n:long, blocksize:int) : string
5610 Convert a long integer to a byte string.
5611
5612 If optional blocksize is given and greater than zero, pad the front of the
5613 byte string with binary zeros so that the length is a multiple of
5614 blocksize.
5615 """
5616 # after much testing, this algorithm was deemed to be the fastest
5617 s = b''
5618 n = int(n)
5619 while n > 0:
5620 s = compat_struct_pack('>I', n & 0xffffffff) + s
5621 n = n >> 32
5622 # strip off leading zeros
5623 for i in range(len(s)):
5624 if s[i] != b'\000'[0]:
5625 break
5626 else:
5627 # only happens when n == 0
5628 s = b'\000'
5629 i = 0
5630 s = s[i:]
5631 # add back some pad bytes. this could be done more efficiently w.r.t. the
5632 # de-padding being done above, but sigh...
5633 if blocksize > 0 and len(s) % blocksize:
5634 s = (blocksize - len(s) % blocksize) * b'\000' + s
5635 return s
5636
5637
5638def bytes_to_long(s):
5639 """bytes_to_long(string) : long
5640 Convert a byte string to a long integer.
5641
5642 This is (essentially) the inverse of long_to_bytes().
5643 """
5644 acc = 0
5645 length = len(s)
5646 if length % 4:
5647 extra = (4 - length % 4)
5648 s = b'\000' * extra + s
5649 length = length + extra
5650 for i in range(0, length, 4):
5651 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5652 return acc
5653
5654
5655def ohdave_rsa_encrypt(data, exponent, modulus):
5656 '''
5657 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5658
5659 Input:
5660 data: data to encrypt, bytes-like object
5661 exponent, modulus: parameter e and N of RSA algorithm, both integer
5662 Output: hex string of encrypted data
5663
5664 Limitation: supports one block encryption only
5665 '''
5666
5667 payload = int(binascii.hexlify(data[::-1]), 16)
5668 encrypted = pow(payload, exponent, modulus)
5669 return '%x' % encrypted
5670
5671
5672def pkcs1pad(data, length):
5673 """
5674 Padding input data with PKCS#1 scheme
5675
5676 @param {int[]} data input data
5677 @param {int} length target length
5678 @returns {int[]} padded data
5679 """
5680 if len(data) > length - 11:
5681 raise ValueError('Input data too long for PKCS#1 padding')
5682
5683 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5684 return [0, 2] + pseudo_random + [0] + data
5685
5686
5687def encode_base_n(num, n, table=None):
5688 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5689 if not table:
5690 table = FULL_TABLE[:n]
5691
5692 if n > len(table):
5693 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5694
5695 if num == 0:
5696 return table[0]
5697
5698 ret = ''
5699 while num:
5700 ret = table[num % n] + ret
5701 num = num // n
5702 return ret
5703
5704
5705def decode_packed_codes(code):
5706 mobj = re.search(PACKED_CODES_RE, code)
5707 obfuscated_code, base, count, symbols = mobj.groups()
5708 base = int(base)
5709 count = int(count)
5710 symbols = symbols.split('|')
5711 symbol_table = {}
5712
5713 while count:
5714 count -= 1
5715 base_n_count = encode_base_n(count, base)
5716 symbol_table[base_n_count] = symbols[count] or base_n_count
5717
5718 return re.sub(
5719 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5720 obfuscated_code)
5721
5722
5723def caesar(s, alphabet, shift):
5724 if shift == 0:
5725 return s
5726 l = len(alphabet)
5727 return ''.join(
5728 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5729 for c in s)
5730
5731
5732def rot47(s):
5733 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5734
5735
5736def parse_m3u8_attributes(attrib):
5737 info = {}
5738 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5739 if val.startswith('"'):
5740 val = val[1:-1]
5741 info[key] = val
5742 return info
5743
5744
5745def urshift(val, n):
5746 return val >> n if val >= 0 else (val + 0x100000000) >> n
5747
5748
5749# Based on png2str() written by @gdkchan and improved by @yokrysty
5750# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5751def decode_png(png_data):
5752 # Reference: https://www.w3.org/TR/PNG/
5753 header = png_data[8:]
5754
5755 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5756 raise IOError('Not a valid PNG file.')
5757
5758 int_map = {1: '>B', 2: '>H', 4: '>I'}
5759 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5760
5761 chunks = []
5762
5763 while header:
5764 length = unpack_integer(header[:4])
5765 header = header[4:]
5766
5767 chunk_type = header[:4]
5768 header = header[4:]
5769
5770 chunk_data = header[:length]
5771 header = header[length:]
5772
5773 header = header[4:] # Skip CRC
5774
5775 chunks.append({
5776 'type': chunk_type,
5777 'length': length,
5778 'data': chunk_data
5779 })
5780
5781 ihdr = chunks[0]['data']
5782
5783 width = unpack_integer(ihdr[:4])
5784 height = unpack_integer(ihdr[4:8])
5785
5786 idat = b''
5787
5788 for chunk in chunks:
5789 if chunk['type'] == b'IDAT':
5790 idat += chunk['data']
5791
5792 if not idat:
5793 raise IOError('Unable to read PNG data.')
5794
5795 decompressed_data = bytearray(zlib.decompress(idat))
5796
5797 stride = width * 3
5798 pixels = []
5799
5800 def _get_pixel(idx):
5801 x = idx % stride
5802 y = idx // stride
5803 return pixels[y][x]
5804
5805 for y in range(height):
5806 basePos = y * (1 + stride)
5807 filter_type = decompressed_data[basePos]
5808
5809 current_row = []
5810
5811 pixels.append(current_row)
5812
5813 for x in range(stride):
5814 color = decompressed_data[1 + basePos + x]
5815 basex = y * stride + x
5816 left = 0
5817 up = 0
5818
5819 if x > 2:
5820 left = _get_pixel(basex - 3)
5821 if y > 0:
5822 up = _get_pixel(basex - stride)
5823
5824 if filter_type == 1: # Sub
5825 color = (color + left) & 0xff
5826 elif filter_type == 2: # Up
5827 color = (color + up) & 0xff
5828 elif filter_type == 3: # Average
5829 color = (color + ((left + up) >> 1)) & 0xff
5830 elif filter_type == 4: # Paeth
5831 a = left
5832 b = up
5833 c = 0
5834
5835 if x > 2 and y > 0:
5836 c = _get_pixel(basex - stride - 3)
5837
5838 p = a + b - c
5839
5840 pa = abs(p - a)
5841 pb = abs(p - b)
5842 pc = abs(p - c)
5843
5844 if pa <= pb and pa <= pc:
5845 color = (color + a) & 0xff
5846 elif pb <= pc:
5847 color = (color + b) & 0xff
5848 else:
5849 color = (color + c) & 0xff
5850
5851 current_row.append(color)
5852
5853 return width, height, pixels
5854
5855
5856def write_xattr(path, key, value):
5857 # This mess below finds the best xattr tool for the job
5858 try:
5859 # try the pyxattr module...
5860 import xattr
5861
5862 if hasattr(xattr, 'set'): # pyxattr
5863 # Unicode arguments are not supported in python-pyxattr until
5864 # version 0.5.0
5865 # See https://github.com/ytdl-org/youtube-dl/issues/5498
5866 pyxattr_required_version = '0.5.0'
5867 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5868 # TODO: fallback to CLI tools
5869 raise XAttrUnavailableError(
5870 'python-pyxattr is detected but is too old. '
5871 'yt-dlp requires %s or above while your version is %s. '
5872 'Falling back to other xattr implementations' % (
5873 pyxattr_required_version, xattr.__version__))
5874
5875 setxattr = xattr.set
5876 else: # xattr
5877 setxattr = xattr.setxattr
5878
5879 try:
5880 setxattr(path, key, value)
5881 except EnvironmentError as e:
5882 raise XAttrMetadataError(e.errno, e.strerror)
5883
5884 except ImportError:
5885 if compat_os_name == 'nt':
5886 # Write xattrs to NTFS Alternate Data Streams:
5887 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5888 assert ':' not in key
5889 assert os.path.exists(path)
5890
5891 ads_fn = path + ':' + key
5892 try:
5893 with open(ads_fn, 'wb') as f:
5894 f.write(value)
5895 except EnvironmentError as e:
5896 raise XAttrMetadataError(e.errno, e.strerror)
5897 else:
5898 user_has_setfattr = check_executable('setfattr', ['--version'])
5899 user_has_xattr = check_executable('xattr', ['-h'])
5900
5901 if user_has_setfattr or user_has_xattr:
5902
5903 value = value.decode('utf-8')
5904 if user_has_setfattr:
5905 executable = 'setfattr'
5906 opts = ['-n', key, '-v', value]
5907 elif user_has_xattr:
5908 executable = 'xattr'
5909 opts = ['-w', key, value]
5910
5911 cmd = ([encodeFilename(executable, True)]
5912 + [encodeArgument(o) for o in opts]
5913 + [encodeFilename(path, True)])
5914
5915 try:
5916 p = subprocess.Popen(
5917 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5918 except EnvironmentError as e:
5919 raise XAttrMetadataError(e.errno, e.strerror)
5920 stdout, stderr = process_communicate_or_kill(p)
5921 stderr = stderr.decode('utf-8', 'replace')
5922 if p.returncode != 0:
5923 raise XAttrMetadataError(p.returncode, stderr)
5924
5925 else:
5926 # On Unix, and can't find pyxattr, setfattr, or xattr.
5927 if sys.platform.startswith('linux'):
5928 raise XAttrUnavailableError(
5929 "Couldn't find a tool to set the xattrs. "
5930 "Install either the python 'pyxattr' or 'xattr' "
5931 "modules, or the GNU 'attr' package "
5932 "(which contains the 'setfattr' tool).")
5933 else:
5934 raise XAttrUnavailableError(
5935 "Couldn't find a tool to set the xattrs. "
5936 "Install either the python 'xattr' module, "
5937 "or the 'xattr' binary.")
5938
5939
5940def random_birthday(year_field, month_field, day_field):
5941 start_date = datetime.date(1950, 1, 1)
5942 end_date = datetime.date(1995, 12, 31)
5943 offset = random.randint(0, (end_date - start_date).days)
5944 random_date = start_date + datetime.timedelta(offset)
5945 return {
5946 year_field: str(random_date.year),
5947 month_field: str(random_date.month),
5948 day_field: str(random_date.day),
5949 }
5950
5951
5952# Templates for internet shortcut files, which are plain text files.
5953DOT_URL_LINK_TEMPLATE = '''
5954[InternetShortcut]
5955URL=%(url)s
5956'''.lstrip()
5957
5958DOT_WEBLOC_LINK_TEMPLATE = '''
5959<?xml version="1.0" encoding="UTF-8"?>
5960<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5961<plist version="1.0">
5962<dict>
5963\t<key>URL</key>
5964\t<string>%(url)s</string>
5965</dict>
5966</plist>
5967'''.lstrip()
5968
5969DOT_DESKTOP_LINK_TEMPLATE = '''
5970[Desktop Entry]
5971Encoding=UTF-8
5972Name=%(filename)s
5973Type=Link
5974URL=%(url)s
5975Icon=text-html
5976'''.lstrip()
5977
5978
5979def iri_to_uri(iri):
5980 """
5981 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5982
5983 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5984 """
5985
5986 iri_parts = compat_urllib_parse_urlparse(iri)
5987
5988 if '[' in iri_parts.netloc:
5989 raise ValueError('IPv6 URIs are not, yet, supported.')
5990 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5991
5992 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5993
5994 net_location = ''
5995 if iri_parts.username:
5996 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
5997 if iri_parts.password is not None:
5998 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
5999 net_location += '@'
6000
6001 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
6002 # The 'idna' encoding produces ASCII text.
6003 if iri_parts.port is not None and iri_parts.port != 80:
6004 net_location += ':' + str(iri_parts.port)
6005
6006 return compat_urllib_parse_urlunparse(
6007 (iri_parts.scheme,
6008 net_location,
6009
6010 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6011
6012 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6013 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6014
6015 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6016 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6017
6018 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6019
6020 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6021
6022
6023def to_high_limit_path(path):
6024 if sys.platform in ['win32', 'cygwin']:
6025 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6026 return r'\\?\ '.rstrip() + os.path.abspath(path)
6027
6028 return path
6029
6030
6031def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
6032 val = obj.get(field, default)
6033 if func and val not in ignore:
6034 val = func(val)
6035 return template % val if val not in ignore else default
6036
6037
6038def clean_podcast_url(url):
6039 return re.sub(r'''(?x)
6040 (?:
6041 (?:
6042 chtbl\.com/track|
6043 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6044 play\.podtrac\.com
6045 )/[^/]+|
6046 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6047 flex\.acast\.com|
6048 pd(?:
6049 cn\.co| # https://podcorn.com/analytics-prefix/
6050 st\.fm # https://podsights.com/docs/
6051 )/e
6052 )/''', '', url)
6053
6054
6055_HEX_TABLE = '0123456789abcdef'
6056
6057
6058def random_uuidv4():
6059 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
6060
6061
6062def make_dir(path, to_screen=None):
6063 try:
6064 dn = os.path.dirname(path)
6065 if dn and not os.path.exists(dn):
6066 os.makedirs(dn)
6067 return True
6068 except (OSError, IOError) as err:
6069 if callable(to_screen) is not None:
6070 to_screen('unable to create directory ' + error_to_compat_str(err))
6071 return False
6072
6073
6074def get_executable_path():
6075 from zipimport import zipimporter
6076 if hasattr(sys, 'frozen'): # Running from PyInstaller
6077 path = os.path.dirname(sys.executable)
6078 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
6079 path = os.path.join(os.path.dirname(__file__), '../..')
6080 else:
6081 path = os.path.join(os.path.dirname(__file__), '..')
6082 return os.path.abspath(path)
6083
6084
6085def load_plugins(name, suffix, namespace):
6086 plugin_info = [None]
6087 classes = []
6088 try:
6089 plugin_info = imp.find_module(
6090 name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
6091 plugins = imp.load_module(name, *plugin_info)
6092 for name in dir(plugins):
6093 if name in namespace:
6094 continue
6095 if not name.endswith(suffix):
6096 continue
6097 klass = getattr(plugins, name)
6098 classes.append(klass)
6099 namespace[name] = klass
6100 except ImportError:
6101 pass
6102 finally:
6103 if plugin_info[0] is not None:
6104 plugin_info[0].close()
6105 return classes
6106
6107
6108def traverse_dict(dictn, keys, casesense=True):
6109 keys = list(keys)[::-1]
6110 while keys:
6111 key = keys.pop()
6112 if isinstance(dictn, dict):
6113 if not casesense:
6114 dictn = {k.lower(): v for k, v in dictn.items()}
6115 key = key.lower()
6116 dictn = dictn.get(key)
6117 elif isinstance(dictn, (list, tuple, compat_str)):
6118 if ':' in key:
6119 key = slice(*map(int_or_none, key.split(':')))
6120 else:
6121 key = int_or_none(key)
6122 dictn = try_get(dictn, lambda x: x[key])
6123 else:
6124 return None
6125 return dictn