]> jfr.im git - yt-dlp.git/blame_incremental - yt_dlp/utils.py
[outtmpl] Do not traverse `None`
[yt-dlp.git] / yt_dlp / utils.py
... / ...
CommitLineData
1#!/usr/bin/env python3
2# coding: utf-8
3
4from __future__ import unicode_literals
5
6import base64
7import binascii
8import calendar
9import codecs
10import collections
11import contextlib
12import ctypes
13import datetime
14import email.utils
15import email.header
16import errno
17import functools
18import gzip
19import hashlib
20import hmac
21import importlib.util
22import io
23import itertools
24import json
25import locale
26import math
27import operator
28import os
29import platform
30import random
31import re
32import socket
33import ssl
34import subprocess
35import sys
36import tempfile
37import time
38import traceback
39import xml.etree.ElementTree
40import zlib
41
42from .compat import (
43 compat_HTMLParseError,
44 compat_HTMLParser,
45 compat_HTTPError,
46 compat_basestring,
47 compat_chr,
48 compat_cookiejar,
49 compat_ctypes_WINFUNCTYPE,
50 compat_etree_fromstring,
51 compat_expanduser,
52 compat_html_entities,
53 compat_html_entities_html5,
54 compat_http_client,
55 compat_integer_types,
56 compat_numeric_types,
57 compat_kwargs,
58 compat_os_name,
59 compat_parse_qs,
60 compat_shlex_quote,
61 compat_str,
62 compat_struct_pack,
63 compat_struct_unpack,
64 compat_urllib_error,
65 compat_urllib_parse,
66 compat_urllib_parse_urlencode,
67 compat_urllib_parse_urlparse,
68 compat_urllib_parse_urlunparse,
69 compat_urllib_parse_quote,
70 compat_urllib_parse_quote_plus,
71 compat_urllib_parse_unquote_plus,
72 compat_urllib_request,
73 compat_urlparse,
74 compat_xpath,
75)
76
77from .socks import (
78 ProxyType,
79 sockssocket,
80)
81
82
83def register_socks_protocols():
84 # "Register" SOCKS protocols
85 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
86 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
87 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
88 if scheme not in compat_urlparse.uses_netloc:
89 compat_urlparse.uses_netloc.append(scheme)
90
91
92# This is not clearly defined otherwise
93compiled_regex_type = type(re.compile(''))
94
95
96def random_user_agent():
97 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
98 _CHROME_VERSIONS = (
99 '74.0.3729.129',
100 '76.0.3780.3',
101 '76.0.3780.2',
102 '74.0.3729.128',
103 '76.0.3780.1',
104 '76.0.3780.0',
105 '75.0.3770.15',
106 '74.0.3729.127',
107 '74.0.3729.126',
108 '76.0.3779.1',
109 '76.0.3779.0',
110 '75.0.3770.14',
111 '74.0.3729.125',
112 '76.0.3778.1',
113 '76.0.3778.0',
114 '75.0.3770.13',
115 '74.0.3729.124',
116 '74.0.3729.123',
117 '73.0.3683.121',
118 '76.0.3777.1',
119 '76.0.3777.0',
120 '75.0.3770.12',
121 '74.0.3729.122',
122 '76.0.3776.4',
123 '75.0.3770.11',
124 '74.0.3729.121',
125 '76.0.3776.3',
126 '76.0.3776.2',
127 '73.0.3683.120',
128 '74.0.3729.120',
129 '74.0.3729.119',
130 '74.0.3729.118',
131 '76.0.3776.1',
132 '76.0.3776.0',
133 '76.0.3775.5',
134 '75.0.3770.10',
135 '74.0.3729.117',
136 '76.0.3775.4',
137 '76.0.3775.3',
138 '74.0.3729.116',
139 '75.0.3770.9',
140 '76.0.3775.2',
141 '76.0.3775.1',
142 '76.0.3775.0',
143 '75.0.3770.8',
144 '74.0.3729.115',
145 '74.0.3729.114',
146 '76.0.3774.1',
147 '76.0.3774.0',
148 '75.0.3770.7',
149 '74.0.3729.113',
150 '74.0.3729.112',
151 '74.0.3729.111',
152 '76.0.3773.1',
153 '76.0.3773.0',
154 '75.0.3770.6',
155 '74.0.3729.110',
156 '74.0.3729.109',
157 '76.0.3772.1',
158 '76.0.3772.0',
159 '75.0.3770.5',
160 '74.0.3729.108',
161 '74.0.3729.107',
162 '76.0.3771.1',
163 '76.0.3771.0',
164 '75.0.3770.4',
165 '74.0.3729.106',
166 '74.0.3729.105',
167 '75.0.3770.3',
168 '74.0.3729.104',
169 '74.0.3729.103',
170 '74.0.3729.102',
171 '75.0.3770.2',
172 '74.0.3729.101',
173 '75.0.3770.1',
174 '75.0.3770.0',
175 '74.0.3729.100',
176 '75.0.3769.5',
177 '75.0.3769.4',
178 '74.0.3729.99',
179 '75.0.3769.3',
180 '75.0.3769.2',
181 '75.0.3768.6',
182 '74.0.3729.98',
183 '75.0.3769.1',
184 '75.0.3769.0',
185 '74.0.3729.97',
186 '73.0.3683.119',
187 '73.0.3683.118',
188 '74.0.3729.96',
189 '75.0.3768.5',
190 '75.0.3768.4',
191 '75.0.3768.3',
192 '75.0.3768.2',
193 '74.0.3729.95',
194 '74.0.3729.94',
195 '75.0.3768.1',
196 '75.0.3768.0',
197 '74.0.3729.93',
198 '74.0.3729.92',
199 '73.0.3683.117',
200 '74.0.3729.91',
201 '75.0.3766.3',
202 '74.0.3729.90',
203 '75.0.3767.2',
204 '75.0.3767.1',
205 '75.0.3767.0',
206 '74.0.3729.89',
207 '73.0.3683.116',
208 '75.0.3766.2',
209 '74.0.3729.88',
210 '75.0.3766.1',
211 '75.0.3766.0',
212 '74.0.3729.87',
213 '73.0.3683.115',
214 '74.0.3729.86',
215 '75.0.3765.1',
216 '75.0.3765.0',
217 '74.0.3729.85',
218 '73.0.3683.114',
219 '74.0.3729.84',
220 '75.0.3764.1',
221 '75.0.3764.0',
222 '74.0.3729.83',
223 '73.0.3683.113',
224 '75.0.3763.2',
225 '75.0.3761.4',
226 '74.0.3729.82',
227 '75.0.3763.1',
228 '75.0.3763.0',
229 '74.0.3729.81',
230 '73.0.3683.112',
231 '75.0.3762.1',
232 '75.0.3762.0',
233 '74.0.3729.80',
234 '75.0.3761.3',
235 '74.0.3729.79',
236 '73.0.3683.111',
237 '75.0.3761.2',
238 '74.0.3729.78',
239 '74.0.3729.77',
240 '75.0.3761.1',
241 '75.0.3761.0',
242 '73.0.3683.110',
243 '74.0.3729.76',
244 '74.0.3729.75',
245 '75.0.3760.0',
246 '74.0.3729.74',
247 '75.0.3759.8',
248 '75.0.3759.7',
249 '75.0.3759.6',
250 '74.0.3729.73',
251 '75.0.3759.5',
252 '74.0.3729.72',
253 '73.0.3683.109',
254 '75.0.3759.4',
255 '75.0.3759.3',
256 '74.0.3729.71',
257 '75.0.3759.2',
258 '74.0.3729.70',
259 '73.0.3683.108',
260 '74.0.3729.69',
261 '75.0.3759.1',
262 '75.0.3759.0',
263 '74.0.3729.68',
264 '73.0.3683.107',
265 '74.0.3729.67',
266 '75.0.3758.1',
267 '75.0.3758.0',
268 '74.0.3729.66',
269 '73.0.3683.106',
270 '74.0.3729.65',
271 '75.0.3757.1',
272 '75.0.3757.0',
273 '74.0.3729.64',
274 '73.0.3683.105',
275 '74.0.3729.63',
276 '75.0.3756.1',
277 '75.0.3756.0',
278 '74.0.3729.62',
279 '73.0.3683.104',
280 '75.0.3755.3',
281 '75.0.3755.2',
282 '73.0.3683.103',
283 '75.0.3755.1',
284 '75.0.3755.0',
285 '74.0.3729.61',
286 '73.0.3683.102',
287 '74.0.3729.60',
288 '75.0.3754.2',
289 '74.0.3729.59',
290 '75.0.3753.4',
291 '74.0.3729.58',
292 '75.0.3754.1',
293 '75.0.3754.0',
294 '74.0.3729.57',
295 '73.0.3683.101',
296 '75.0.3753.3',
297 '75.0.3752.2',
298 '75.0.3753.2',
299 '74.0.3729.56',
300 '75.0.3753.1',
301 '75.0.3753.0',
302 '74.0.3729.55',
303 '73.0.3683.100',
304 '74.0.3729.54',
305 '75.0.3752.1',
306 '75.0.3752.0',
307 '74.0.3729.53',
308 '73.0.3683.99',
309 '74.0.3729.52',
310 '75.0.3751.1',
311 '75.0.3751.0',
312 '74.0.3729.51',
313 '73.0.3683.98',
314 '74.0.3729.50',
315 '75.0.3750.0',
316 '74.0.3729.49',
317 '74.0.3729.48',
318 '74.0.3729.47',
319 '75.0.3749.3',
320 '74.0.3729.46',
321 '73.0.3683.97',
322 '75.0.3749.2',
323 '74.0.3729.45',
324 '75.0.3749.1',
325 '75.0.3749.0',
326 '74.0.3729.44',
327 '73.0.3683.96',
328 '74.0.3729.43',
329 '74.0.3729.42',
330 '75.0.3748.1',
331 '75.0.3748.0',
332 '74.0.3729.41',
333 '75.0.3747.1',
334 '73.0.3683.95',
335 '75.0.3746.4',
336 '74.0.3729.40',
337 '74.0.3729.39',
338 '75.0.3747.0',
339 '75.0.3746.3',
340 '75.0.3746.2',
341 '74.0.3729.38',
342 '75.0.3746.1',
343 '75.0.3746.0',
344 '74.0.3729.37',
345 '73.0.3683.94',
346 '75.0.3745.5',
347 '75.0.3745.4',
348 '75.0.3745.3',
349 '75.0.3745.2',
350 '74.0.3729.36',
351 '75.0.3745.1',
352 '75.0.3745.0',
353 '75.0.3744.2',
354 '74.0.3729.35',
355 '73.0.3683.93',
356 '74.0.3729.34',
357 '75.0.3744.1',
358 '75.0.3744.0',
359 '74.0.3729.33',
360 '73.0.3683.92',
361 '74.0.3729.32',
362 '74.0.3729.31',
363 '73.0.3683.91',
364 '75.0.3741.2',
365 '75.0.3740.5',
366 '74.0.3729.30',
367 '75.0.3741.1',
368 '75.0.3741.0',
369 '74.0.3729.29',
370 '75.0.3740.4',
371 '73.0.3683.90',
372 '74.0.3729.28',
373 '75.0.3740.3',
374 '73.0.3683.89',
375 '75.0.3740.2',
376 '74.0.3729.27',
377 '75.0.3740.1',
378 '75.0.3740.0',
379 '74.0.3729.26',
380 '73.0.3683.88',
381 '73.0.3683.87',
382 '74.0.3729.25',
383 '75.0.3739.1',
384 '75.0.3739.0',
385 '73.0.3683.86',
386 '74.0.3729.24',
387 '73.0.3683.85',
388 '75.0.3738.4',
389 '75.0.3738.3',
390 '75.0.3738.2',
391 '75.0.3738.1',
392 '75.0.3738.0',
393 '74.0.3729.23',
394 '73.0.3683.84',
395 '74.0.3729.22',
396 '74.0.3729.21',
397 '75.0.3737.1',
398 '75.0.3737.0',
399 '74.0.3729.20',
400 '73.0.3683.83',
401 '74.0.3729.19',
402 '75.0.3736.1',
403 '75.0.3736.0',
404 '74.0.3729.18',
405 '73.0.3683.82',
406 '74.0.3729.17',
407 '75.0.3735.1',
408 '75.0.3735.0',
409 '74.0.3729.16',
410 '73.0.3683.81',
411 '75.0.3734.1',
412 '75.0.3734.0',
413 '74.0.3729.15',
414 '73.0.3683.80',
415 '74.0.3729.14',
416 '75.0.3733.1',
417 '75.0.3733.0',
418 '75.0.3732.1',
419 '74.0.3729.13',
420 '74.0.3729.12',
421 '73.0.3683.79',
422 '74.0.3729.11',
423 '75.0.3732.0',
424 '74.0.3729.10',
425 '73.0.3683.78',
426 '74.0.3729.9',
427 '74.0.3729.8',
428 '74.0.3729.7',
429 '75.0.3731.3',
430 '75.0.3731.2',
431 '75.0.3731.0',
432 '74.0.3729.6',
433 '73.0.3683.77',
434 '73.0.3683.76',
435 '75.0.3730.5',
436 '75.0.3730.4',
437 '73.0.3683.75',
438 '74.0.3729.5',
439 '73.0.3683.74',
440 '75.0.3730.3',
441 '75.0.3730.2',
442 '74.0.3729.4',
443 '73.0.3683.73',
444 '73.0.3683.72',
445 '75.0.3730.1',
446 '75.0.3730.0',
447 '74.0.3729.3',
448 '73.0.3683.71',
449 '74.0.3729.2',
450 '73.0.3683.70',
451 '74.0.3729.1',
452 '74.0.3729.0',
453 '74.0.3726.4',
454 '73.0.3683.69',
455 '74.0.3726.3',
456 '74.0.3728.0',
457 '74.0.3726.2',
458 '73.0.3683.68',
459 '74.0.3726.1',
460 '74.0.3726.0',
461 '74.0.3725.4',
462 '73.0.3683.67',
463 '73.0.3683.66',
464 '74.0.3725.3',
465 '74.0.3725.2',
466 '74.0.3725.1',
467 '74.0.3724.8',
468 '74.0.3725.0',
469 '73.0.3683.65',
470 '74.0.3724.7',
471 '74.0.3724.6',
472 '74.0.3724.5',
473 '74.0.3724.4',
474 '74.0.3724.3',
475 '74.0.3724.2',
476 '74.0.3724.1',
477 '74.0.3724.0',
478 '73.0.3683.64',
479 '74.0.3723.1',
480 '74.0.3723.0',
481 '73.0.3683.63',
482 '74.0.3722.1',
483 '74.0.3722.0',
484 '73.0.3683.62',
485 '74.0.3718.9',
486 '74.0.3702.3',
487 '74.0.3721.3',
488 '74.0.3721.2',
489 '74.0.3721.1',
490 '74.0.3721.0',
491 '74.0.3720.6',
492 '73.0.3683.61',
493 '72.0.3626.122',
494 '73.0.3683.60',
495 '74.0.3720.5',
496 '72.0.3626.121',
497 '74.0.3718.8',
498 '74.0.3720.4',
499 '74.0.3720.3',
500 '74.0.3718.7',
501 '74.0.3720.2',
502 '74.0.3720.1',
503 '74.0.3720.0',
504 '74.0.3718.6',
505 '74.0.3719.5',
506 '73.0.3683.59',
507 '74.0.3718.5',
508 '74.0.3718.4',
509 '74.0.3719.4',
510 '74.0.3719.3',
511 '74.0.3719.2',
512 '74.0.3719.1',
513 '73.0.3683.58',
514 '74.0.3719.0',
515 '73.0.3683.57',
516 '73.0.3683.56',
517 '74.0.3718.3',
518 '73.0.3683.55',
519 '74.0.3718.2',
520 '74.0.3718.1',
521 '74.0.3718.0',
522 '73.0.3683.54',
523 '74.0.3717.2',
524 '73.0.3683.53',
525 '74.0.3717.1',
526 '74.0.3717.0',
527 '73.0.3683.52',
528 '74.0.3716.1',
529 '74.0.3716.0',
530 '73.0.3683.51',
531 '74.0.3715.1',
532 '74.0.3715.0',
533 '73.0.3683.50',
534 '74.0.3711.2',
535 '74.0.3714.2',
536 '74.0.3713.3',
537 '74.0.3714.1',
538 '74.0.3714.0',
539 '73.0.3683.49',
540 '74.0.3713.1',
541 '74.0.3713.0',
542 '72.0.3626.120',
543 '73.0.3683.48',
544 '74.0.3712.2',
545 '74.0.3712.1',
546 '74.0.3712.0',
547 '73.0.3683.47',
548 '72.0.3626.119',
549 '73.0.3683.46',
550 '74.0.3710.2',
551 '72.0.3626.118',
552 '74.0.3711.1',
553 '74.0.3711.0',
554 '73.0.3683.45',
555 '72.0.3626.117',
556 '74.0.3710.1',
557 '74.0.3710.0',
558 '73.0.3683.44',
559 '72.0.3626.116',
560 '74.0.3709.1',
561 '74.0.3709.0',
562 '74.0.3704.9',
563 '73.0.3683.43',
564 '72.0.3626.115',
565 '74.0.3704.8',
566 '74.0.3704.7',
567 '74.0.3708.0',
568 '74.0.3706.7',
569 '74.0.3704.6',
570 '73.0.3683.42',
571 '72.0.3626.114',
572 '74.0.3706.6',
573 '72.0.3626.113',
574 '74.0.3704.5',
575 '74.0.3706.5',
576 '74.0.3706.4',
577 '74.0.3706.3',
578 '74.0.3706.2',
579 '74.0.3706.1',
580 '74.0.3706.0',
581 '73.0.3683.41',
582 '72.0.3626.112',
583 '74.0.3705.1',
584 '74.0.3705.0',
585 '73.0.3683.40',
586 '72.0.3626.111',
587 '73.0.3683.39',
588 '74.0.3704.4',
589 '73.0.3683.38',
590 '74.0.3704.3',
591 '74.0.3704.2',
592 '74.0.3704.1',
593 '74.0.3704.0',
594 '73.0.3683.37',
595 '72.0.3626.110',
596 '72.0.3626.109',
597 '74.0.3703.3',
598 '74.0.3703.2',
599 '73.0.3683.36',
600 '74.0.3703.1',
601 '74.0.3703.0',
602 '73.0.3683.35',
603 '72.0.3626.108',
604 '74.0.3702.2',
605 '74.0.3699.3',
606 '74.0.3702.1',
607 '74.0.3702.0',
608 '73.0.3683.34',
609 '72.0.3626.107',
610 '73.0.3683.33',
611 '74.0.3701.1',
612 '74.0.3701.0',
613 '73.0.3683.32',
614 '73.0.3683.31',
615 '72.0.3626.105',
616 '74.0.3700.1',
617 '74.0.3700.0',
618 '73.0.3683.29',
619 '72.0.3626.103',
620 '74.0.3699.2',
621 '74.0.3699.1',
622 '74.0.3699.0',
623 '73.0.3683.28',
624 '72.0.3626.102',
625 '73.0.3683.27',
626 '73.0.3683.26',
627 '74.0.3698.0',
628 '74.0.3696.2',
629 '72.0.3626.101',
630 '73.0.3683.25',
631 '74.0.3696.1',
632 '74.0.3696.0',
633 '74.0.3694.8',
634 '72.0.3626.100',
635 '74.0.3694.7',
636 '74.0.3694.6',
637 '74.0.3694.5',
638 '74.0.3694.4',
639 '72.0.3626.99',
640 '72.0.3626.98',
641 '74.0.3694.3',
642 '73.0.3683.24',
643 '72.0.3626.97',
644 '72.0.3626.96',
645 '72.0.3626.95',
646 '73.0.3683.23',
647 '72.0.3626.94',
648 '73.0.3683.22',
649 '73.0.3683.21',
650 '72.0.3626.93',
651 '74.0.3694.2',
652 '72.0.3626.92',
653 '74.0.3694.1',
654 '74.0.3694.0',
655 '74.0.3693.6',
656 '73.0.3683.20',
657 '72.0.3626.91',
658 '74.0.3693.5',
659 '74.0.3693.4',
660 '74.0.3693.3',
661 '74.0.3693.2',
662 '73.0.3683.19',
663 '74.0.3693.1',
664 '74.0.3693.0',
665 '73.0.3683.18',
666 '72.0.3626.90',
667 '74.0.3692.1',
668 '74.0.3692.0',
669 '73.0.3683.17',
670 '72.0.3626.89',
671 '74.0.3687.3',
672 '74.0.3691.1',
673 '74.0.3691.0',
674 '73.0.3683.16',
675 '72.0.3626.88',
676 '72.0.3626.87',
677 '73.0.3683.15',
678 '74.0.3690.1',
679 '74.0.3690.0',
680 '73.0.3683.14',
681 '72.0.3626.86',
682 '73.0.3683.13',
683 '73.0.3683.12',
684 '74.0.3689.1',
685 '74.0.3689.0',
686 '73.0.3683.11',
687 '72.0.3626.85',
688 '73.0.3683.10',
689 '72.0.3626.84',
690 '73.0.3683.9',
691 '74.0.3688.1',
692 '74.0.3688.0',
693 '73.0.3683.8',
694 '72.0.3626.83',
695 '74.0.3687.2',
696 '74.0.3687.1',
697 '74.0.3687.0',
698 '73.0.3683.7',
699 '72.0.3626.82',
700 '74.0.3686.4',
701 '72.0.3626.81',
702 '74.0.3686.3',
703 '74.0.3686.2',
704 '74.0.3686.1',
705 '74.0.3686.0',
706 '73.0.3683.6',
707 '72.0.3626.80',
708 '74.0.3685.1',
709 '74.0.3685.0',
710 '73.0.3683.5',
711 '72.0.3626.79',
712 '74.0.3684.1',
713 '74.0.3684.0',
714 '73.0.3683.4',
715 '72.0.3626.78',
716 '72.0.3626.77',
717 '73.0.3683.3',
718 '73.0.3683.2',
719 '72.0.3626.76',
720 '73.0.3683.1',
721 '73.0.3683.0',
722 '72.0.3626.75',
723 '71.0.3578.141',
724 '73.0.3682.1',
725 '73.0.3682.0',
726 '72.0.3626.74',
727 '71.0.3578.140',
728 '73.0.3681.4',
729 '73.0.3681.3',
730 '73.0.3681.2',
731 '73.0.3681.1',
732 '73.0.3681.0',
733 '72.0.3626.73',
734 '71.0.3578.139',
735 '72.0.3626.72',
736 '72.0.3626.71',
737 '73.0.3680.1',
738 '73.0.3680.0',
739 '72.0.3626.70',
740 '71.0.3578.138',
741 '73.0.3678.2',
742 '73.0.3679.1',
743 '73.0.3679.0',
744 '72.0.3626.69',
745 '71.0.3578.137',
746 '73.0.3678.1',
747 '73.0.3678.0',
748 '71.0.3578.136',
749 '73.0.3677.1',
750 '73.0.3677.0',
751 '72.0.3626.68',
752 '72.0.3626.67',
753 '71.0.3578.135',
754 '73.0.3676.1',
755 '73.0.3676.0',
756 '73.0.3674.2',
757 '72.0.3626.66',
758 '71.0.3578.134',
759 '73.0.3674.1',
760 '73.0.3674.0',
761 '72.0.3626.65',
762 '71.0.3578.133',
763 '73.0.3673.2',
764 '73.0.3673.1',
765 '73.0.3673.0',
766 '72.0.3626.64',
767 '71.0.3578.132',
768 '72.0.3626.63',
769 '72.0.3626.62',
770 '72.0.3626.61',
771 '72.0.3626.60',
772 '73.0.3672.1',
773 '73.0.3672.0',
774 '72.0.3626.59',
775 '71.0.3578.131',
776 '73.0.3671.3',
777 '73.0.3671.2',
778 '73.0.3671.1',
779 '73.0.3671.0',
780 '72.0.3626.58',
781 '71.0.3578.130',
782 '73.0.3670.1',
783 '73.0.3670.0',
784 '72.0.3626.57',
785 '71.0.3578.129',
786 '73.0.3669.1',
787 '73.0.3669.0',
788 '72.0.3626.56',
789 '71.0.3578.128',
790 '73.0.3668.2',
791 '73.0.3668.1',
792 '73.0.3668.0',
793 '72.0.3626.55',
794 '71.0.3578.127',
795 '73.0.3667.2',
796 '73.0.3667.1',
797 '73.0.3667.0',
798 '72.0.3626.54',
799 '71.0.3578.126',
800 '73.0.3666.1',
801 '73.0.3666.0',
802 '72.0.3626.53',
803 '71.0.3578.125',
804 '73.0.3665.4',
805 '73.0.3665.3',
806 '72.0.3626.52',
807 '73.0.3665.2',
808 '73.0.3664.4',
809 '73.0.3665.1',
810 '73.0.3665.0',
811 '72.0.3626.51',
812 '71.0.3578.124',
813 '72.0.3626.50',
814 '73.0.3664.3',
815 '73.0.3664.2',
816 '73.0.3664.1',
817 '73.0.3664.0',
818 '73.0.3663.2',
819 '72.0.3626.49',
820 '71.0.3578.123',
821 '73.0.3663.1',
822 '73.0.3663.0',
823 '72.0.3626.48',
824 '71.0.3578.122',
825 '73.0.3662.1',
826 '73.0.3662.0',
827 '72.0.3626.47',
828 '71.0.3578.121',
829 '73.0.3661.1',
830 '72.0.3626.46',
831 '73.0.3661.0',
832 '72.0.3626.45',
833 '71.0.3578.120',
834 '73.0.3660.2',
835 '73.0.3660.1',
836 '73.0.3660.0',
837 '72.0.3626.44',
838 '71.0.3578.119',
839 '73.0.3659.1',
840 '73.0.3659.0',
841 '72.0.3626.43',
842 '71.0.3578.118',
843 '73.0.3658.1',
844 '73.0.3658.0',
845 '72.0.3626.42',
846 '71.0.3578.117',
847 '73.0.3657.1',
848 '73.0.3657.0',
849 '72.0.3626.41',
850 '71.0.3578.116',
851 '73.0.3656.1',
852 '73.0.3656.0',
853 '72.0.3626.40',
854 '71.0.3578.115',
855 '73.0.3655.1',
856 '73.0.3655.0',
857 '72.0.3626.39',
858 '71.0.3578.114',
859 '73.0.3654.1',
860 '73.0.3654.0',
861 '72.0.3626.38',
862 '71.0.3578.113',
863 '73.0.3653.1',
864 '73.0.3653.0',
865 '72.0.3626.37',
866 '71.0.3578.112',
867 '73.0.3652.1',
868 '73.0.3652.0',
869 '72.0.3626.36',
870 '71.0.3578.111',
871 '73.0.3651.1',
872 '73.0.3651.0',
873 '72.0.3626.35',
874 '71.0.3578.110',
875 '73.0.3650.1',
876 '73.0.3650.0',
877 '72.0.3626.34',
878 '71.0.3578.109',
879 '73.0.3649.1',
880 '73.0.3649.0',
881 '72.0.3626.33',
882 '71.0.3578.108',
883 '73.0.3648.2',
884 '73.0.3648.1',
885 '73.0.3648.0',
886 '72.0.3626.32',
887 '71.0.3578.107',
888 '73.0.3647.2',
889 '73.0.3647.1',
890 '73.0.3647.0',
891 '72.0.3626.31',
892 '71.0.3578.106',
893 '73.0.3635.3',
894 '73.0.3646.2',
895 '73.0.3646.1',
896 '73.0.3646.0',
897 '72.0.3626.30',
898 '71.0.3578.105',
899 '72.0.3626.29',
900 '73.0.3645.2',
901 '73.0.3645.1',
902 '73.0.3645.0',
903 '72.0.3626.28',
904 '71.0.3578.104',
905 '72.0.3626.27',
906 '72.0.3626.26',
907 '72.0.3626.25',
908 '72.0.3626.24',
909 '73.0.3644.0',
910 '73.0.3643.2',
911 '72.0.3626.23',
912 '71.0.3578.103',
913 '73.0.3643.1',
914 '73.0.3643.0',
915 '72.0.3626.22',
916 '71.0.3578.102',
917 '73.0.3642.1',
918 '73.0.3642.0',
919 '72.0.3626.21',
920 '71.0.3578.101',
921 '73.0.3641.1',
922 '73.0.3641.0',
923 '72.0.3626.20',
924 '71.0.3578.100',
925 '72.0.3626.19',
926 '73.0.3640.1',
927 '73.0.3640.0',
928 '72.0.3626.18',
929 '73.0.3639.1',
930 '71.0.3578.99',
931 '73.0.3639.0',
932 '72.0.3626.17',
933 '73.0.3638.2',
934 '72.0.3626.16',
935 '73.0.3638.1',
936 '73.0.3638.0',
937 '72.0.3626.15',
938 '71.0.3578.98',
939 '73.0.3635.2',
940 '71.0.3578.97',
941 '73.0.3637.1',
942 '73.0.3637.0',
943 '72.0.3626.14',
944 '71.0.3578.96',
945 '71.0.3578.95',
946 '72.0.3626.13',
947 '71.0.3578.94',
948 '73.0.3636.2',
949 '71.0.3578.93',
950 '73.0.3636.1',
951 '73.0.3636.0',
952 '72.0.3626.12',
953 '71.0.3578.92',
954 '73.0.3635.1',
955 '73.0.3635.0',
956 '72.0.3626.11',
957 '71.0.3578.91',
958 '73.0.3634.2',
959 '73.0.3634.1',
960 '73.0.3634.0',
961 '72.0.3626.10',
962 '71.0.3578.90',
963 '71.0.3578.89',
964 '73.0.3633.2',
965 '73.0.3633.1',
966 '73.0.3633.0',
967 '72.0.3610.4',
968 '72.0.3626.9',
969 '71.0.3578.88',
970 '73.0.3632.5',
971 '73.0.3632.4',
972 '73.0.3632.3',
973 '73.0.3632.2',
974 '73.0.3632.1',
975 '73.0.3632.0',
976 '72.0.3626.8',
977 '71.0.3578.87',
978 '73.0.3631.2',
979 '73.0.3631.1',
980 '73.0.3631.0',
981 '72.0.3626.7',
982 '71.0.3578.86',
983 '72.0.3626.6',
984 '73.0.3630.1',
985 '73.0.3630.0',
986 '72.0.3626.5',
987 '71.0.3578.85',
988 '72.0.3626.4',
989 '73.0.3628.3',
990 '73.0.3628.2',
991 '73.0.3629.1',
992 '73.0.3629.0',
993 '72.0.3626.3',
994 '71.0.3578.84',
995 '73.0.3628.1',
996 '73.0.3628.0',
997 '71.0.3578.83',
998 '73.0.3627.1',
999 '73.0.3627.0',
1000 '72.0.3626.2',
1001 '71.0.3578.82',
1002 '71.0.3578.81',
1003 '71.0.3578.80',
1004 '72.0.3626.1',
1005 '72.0.3626.0',
1006 '71.0.3578.79',
1007 '70.0.3538.124',
1008 '71.0.3578.78',
1009 '72.0.3623.4',
1010 '72.0.3625.2',
1011 '72.0.3625.1',
1012 '72.0.3625.0',
1013 '71.0.3578.77',
1014 '70.0.3538.123',
1015 '72.0.3624.4',
1016 '72.0.3624.3',
1017 '72.0.3624.2',
1018 '71.0.3578.76',
1019 '72.0.3624.1',
1020 '72.0.3624.0',
1021 '72.0.3623.3',
1022 '71.0.3578.75',
1023 '70.0.3538.122',
1024 '71.0.3578.74',
1025 '72.0.3623.2',
1026 '72.0.3610.3',
1027 '72.0.3623.1',
1028 '72.0.3623.0',
1029 '72.0.3622.3',
1030 '72.0.3622.2',
1031 '71.0.3578.73',
1032 '70.0.3538.121',
1033 '72.0.3622.1',
1034 '72.0.3622.0',
1035 '71.0.3578.72',
1036 '70.0.3538.120',
1037 '72.0.3621.1',
1038 '72.0.3621.0',
1039 '71.0.3578.71',
1040 '70.0.3538.119',
1041 '72.0.3620.1',
1042 '72.0.3620.0',
1043 '71.0.3578.70',
1044 '70.0.3538.118',
1045 '71.0.3578.69',
1046 '72.0.3619.1',
1047 '72.0.3619.0',
1048 '71.0.3578.68',
1049 '70.0.3538.117',
1050 '71.0.3578.67',
1051 '72.0.3618.1',
1052 '72.0.3618.0',
1053 '71.0.3578.66',
1054 '70.0.3538.116',
1055 '72.0.3617.1',
1056 '72.0.3617.0',
1057 '71.0.3578.65',
1058 '70.0.3538.115',
1059 '72.0.3602.3',
1060 '71.0.3578.64',
1061 '72.0.3616.1',
1062 '72.0.3616.0',
1063 '71.0.3578.63',
1064 '70.0.3538.114',
1065 '71.0.3578.62',
1066 '72.0.3615.1',
1067 '72.0.3615.0',
1068 '71.0.3578.61',
1069 '70.0.3538.113',
1070 '72.0.3614.1',
1071 '72.0.3614.0',
1072 '71.0.3578.60',
1073 '70.0.3538.112',
1074 '72.0.3613.1',
1075 '72.0.3613.0',
1076 '71.0.3578.59',
1077 '70.0.3538.111',
1078 '72.0.3612.2',
1079 '72.0.3612.1',
1080 '72.0.3612.0',
1081 '70.0.3538.110',
1082 '71.0.3578.58',
1083 '70.0.3538.109',
1084 '72.0.3611.2',
1085 '72.0.3611.1',
1086 '72.0.3611.0',
1087 '71.0.3578.57',
1088 '70.0.3538.108',
1089 '72.0.3610.2',
1090 '71.0.3578.56',
1091 '71.0.3578.55',
1092 '72.0.3610.1',
1093 '72.0.3610.0',
1094 '71.0.3578.54',
1095 '70.0.3538.107',
1096 '71.0.3578.53',
1097 '72.0.3609.3',
1098 '71.0.3578.52',
1099 '72.0.3609.2',
1100 '71.0.3578.51',
1101 '72.0.3608.5',
1102 '72.0.3609.1',
1103 '72.0.3609.0',
1104 '71.0.3578.50',
1105 '70.0.3538.106',
1106 '72.0.3608.4',
1107 '72.0.3608.3',
1108 '72.0.3608.2',
1109 '71.0.3578.49',
1110 '72.0.3608.1',
1111 '72.0.3608.0',
1112 '70.0.3538.105',
1113 '71.0.3578.48',
1114 '72.0.3607.1',
1115 '72.0.3607.0',
1116 '71.0.3578.47',
1117 '70.0.3538.104',
1118 '72.0.3606.2',
1119 '72.0.3606.1',
1120 '72.0.3606.0',
1121 '71.0.3578.46',
1122 '70.0.3538.103',
1123 '70.0.3538.102',
1124 '72.0.3605.3',
1125 '72.0.3605.2',
1126 '72.0.3605.1',
1127 '72.0.3605.0',
1128 '71.0.3578.45',
1129 '70.0.3538.101',
1130 '71.0.3578.44',
1131 '71.0.3578.43',
1132 '70.0.3538.100',
1133 '70.0.3538.99',
1134 '71.0.3578.42',
1135 '72.0.3604.1',
1136 '72.0.3604.0',
1137 '71.0.3578.41',
1138 '70.0.3538.98',
1139 '71.0.3578.40',
1140 '72.0.3603.2',
1141 '72.0.3603.1',
1142 '72.0.3603.0',
1143 '71.0.3578.39',
1144 '70.0.3538.97',
1145 '72.0.3602.2',
1146 '71.0.3578.38',
1147 '71.0.3578.37',
1148 '72.0.3602.1',
1149 '72.0.3602.0',
1150 '71.0.3578.36',
1151 '70.0.3538.96',
1152 '72.0.3601.1',
1153 '72.0.3601.0',
1154 '71.0.3578.35',
1155 '70.0.3538.95',
1156 '72.0.3600.1',
1157 '72.0.3600.0',
1158 '71.0.3578.34',
1159 '70.0.3538.94',
1160 '72.0.3599.3',
1161 '72.0.3599.2',
1162 '72.0.3599.1',
1163 '72.0.3599.0',
1164 '71.0.3578.33',
1165 '70.0.3538.93',
1166 '72.0.3598.1',
1167 '72.0.3598.0',
1168 '71.0.3578.32',
1169 '70.0.3538.87',
1170 '72.0.3597.1',
1171 '72.0.3597.0',
1172 '72.0.3596.2',
1173 '71.0.3578.31',
1174 '70.0.3538.86',
1175 '71.0.3578.30',
1176 '71.0.3578.29',
1177 '72.0.3596.1',
1178 '72.0.3596.0',
1179 '71.0.3578.28',
1180 '70.0.3538.85',
1181 '72.0.3595.2',
1182 '72.0.3591.3',
1183 '72.0.3595.1',
1184 '72.0.3595.0',
1185 '71.0.3578.27',
1186 '70.0.3538.84',
1187 '72.0.3594.1',
1188 '72.0.3594.0',
1189 '71.0.3578.26',
1190 '70.0.3538.83',
1191 '72.0.3593.2',
1192 '72.0.3593.1',
1193 '72.0.3593.0',
1194 '71.0.3578.25',
1195 '70.0.3538.82',
1196 '72.0.3589.3',
1197 '72.0.3592.2',
1198 '72.0.3592.1',
1199 '72.0.3592.0',
1200 '71.0.3578.24',
1201 '72.0.3589.2',
1202 '70.0.3538.81',
1203 '70.0.3538.80',
1204 '72.0.3591.2',
1205 '72.0.3591.1',
1206 '72.0.3591.0',
1207 '71.0.3578.23',
1208 '70.0.3538.79',
1209 '71.0.3578.22',
1210 '72.0.3590.1',
1211 '72.0.3590.0',
1212 '71.0.3578.21',
1213 '70.0.3538.78',
1214 '70.0.3538.77',
1215 '72.0.3589.1',
1216 '72.0.3589.0',
1217 '71.0.3578.20',
1218 '70.0.3538.76',
1219 '71.0.3578.19',
1220 '70.0.3538.75',
1221 '72.0.3588.1',
1222 '72.0.3588.0',
1223 '71.0.3578.18',
1224 '70.0.3538.74',
1225 '72.0.3586.2',
1226 '72.0.3587.0',
1227 '71.0.3578.17',
1228 '70.0.3538.73',
1229 '72.0.3586.1',
1230 '72.0.3586.0',
1231 '71.0.3578.16',
1232 '70.0.3538.72',
1233 '72.0.3585.1',
1234 '72.0.3585.0',
1235 '71.0.3578.15',
1236 '70.0.3538.71',
1237 '71.0.3578.14',
1238 '72.0.3584.1',
1239 '72.0.3584.0',
1240 '71.0.3578.13',
1241 '70.0.3538.70',
1242 '72.0.3583.2',
1243 '71.0.3578.12',
1244 '72.0.3583.1',
1245 '72.0.3583.0',
1246 '71.0.3578.11',
1247 '70.0.3538.69',
1248 '71.0.3578.10',
1249 '72.0.3582.0',
1250 '72.0.3581.4',
1251 '71.0.3578.9',
1252 '70.0.3538.67',
1253 '72.0.3581.3',
1254 '72.0.3581.2',
1255 '72.0.3581.1',
1256 '72.0.3581.0',
1257 '71.0.3578.8',
1258 '70.0.3538.66',
1259 '72.0.3580.1',
1260 '72.0.3580.0',
1261 '71.0.3578.7',
1262 '70.0.3538.65',
1263 '71.0.3578.6',
1264 '72.0.3579.1',
1265 '72.0.3579.0',
1266 '71.0.3578.5',
1267 '70.0.3538.64',
1268 '71.0.3578.4',
1269 '71.0.3578.3',
1270 '71.0.3578.2',
1271 '71.0.3578.1',
1272 '71.0.3578.0',
1273 '70.0.3538.63',
1274 '69.0.3497.128',
1275 '70.0.3538.62',
1276 '70.0.3538.61',
1277 '70.0.3538.60',
1278 '70.0.3538.59',
1279 '71.0.3577.1',
1280 '71.0.3577.0',
1281 '70.0.3538.58',
1282 '69.0.3497.127',
1283 '71.0.3576.2',
1284 '71.0.3576.1',
1285 '71.0.3576.0',
1286 '70.0.3538.57',
1287 '70.0.3538.56',
1288 '71.0.3575.2',
1289 '70.0.3538.55',
1290 '69.0.3497.126',
1291 '70.0.3538.54',
1292 '71.0.3575.1',
1293 '71.0.3575.0',
1294 '71.0.3574.1',
1295 '71.0.3574.0',
1296 '70.0.3538.53',
1297 '69.0.3497.125',
1298 '70.0.3538.52',
1299 '71.0.3573.1',
1300 '71.0.3573.0',
1301 '70.0.3538.51',
1302 '69.0.3497.124',
1303 '71.0.3572.1',
1304 '71.0.3572.0',
1305 '70.0.3538.50',
1306 '69.0.3497.123',
1307 '71.0.3571.2',
1308 '70.0.3538.49',
1309 '69.0.3497.122',
1310 '71.0.3571.1',
1311 '71.0.3571.0',
1312 '70.0.3538.48',
1313 '69.0.3497.121',
1314 '71.0.3570.1',
1315 '71.0.3570.0',
1316 '70.0.3538.47',
1317 '69.0.3497.120',
1318 '71.0.3568.2',
1319 '71.0.3569.1',
1320 '71.0.3569.0',
1321 '70.0.3538.46',
1322 '69.0.3497.119',
1323 '70.0.3538.45',
1324 '71.0.3568.1',
1325 '71.0.3568.0',
1326 '70.0.3538.44',
1327 '69.0.3497.118',
1328 '70.0.3538.43',
1329 '70.0.3538.42',
1330 '71.0.3567.1',
1331 '71.0.3567.0',
1332 '70.0.3538.41',
1333 '69.0.3497.117',
1334 '71.0.3566.1',
1335 '71.0.3566.0',
1336 '70.0.3538.40',
1337 '69.0.3497.116',
1338 '71.0.3565.1',
1339 '71.0.3565.0',
1340 '70.0.3538.39',
1341 '69.0.3497.115',
1342 '71.0.3564.1',
1343 '71.0.3564.0',
1344 '70.0.3538.38',
1345 '69.0.3497.114',
1346 '71.0.3563.0',
1347 '71.0.3562.2',
1348 '70.0.3538.37',
1349 '69.0.3497.113',
1350 '70.0.3538.36',
1351 '70.0.3538.35',
1352 '71.0.3562.1',
1353 '71.0.3562.0',
1354 '70.0.3538.34',
1355 '69.0.3497.112',
1356 '70.0.3538.33',
1357 '71.0.3561.1',
1358 '71.0.3561.0',
1359 '70.0.3538.32',
1360 '69.0.3497.111',
1361 '71.0.3559.6',
1362 '71.0.3560.1',
1363 '71.0.3560.0',
1364 '71.0.3559.5',
1365 '71.0.3559.4',
1366 '70.0.3538.31',
1367 '69.0.3497.110',
1368 '71.0.3559.3',
1369 '70.0.3538.30',
1370 '69.0.3497.109',
1371 '71.0.3559.2',
1372 '71.0.3559.1',
1373 '71.0.3559.0',
1374 '70.0.3538.29',
1375 '69.0.3497.108',
1376 '71.0.3558.2',
1377 '71.0.3558.1',
1378 '71.0.3558.0',
1379 '70.0.3538.28',
1380 '69.0.3497.107',
1381 '71.0.3557.2',
1382 '71.0.3557.1',
1383 '71.0.3557.0',
1384 '70.0.3538.27',
1385 '69.0.3497.106',
1386 '71.0.3554.4',
1387 '70.0.3538.26',
1388 '71.0.3556.1',
1389 '71.0.3556.0',
1390 '70.0.3538.25',
1391 '71.0.3554.3',
1392 '69.0.3497.105',
1393 '71.0.3554.2',
1394 '70.0.3538.24',
1395 '69.0.3497.104',
1396 '71.0.3555.2',
1397 '70.0.3538.23',
1398 '71.0.3555.1',
1399 '71.0.3555.0',
1400 '70.0.3538.22',
1401 '69.0.3497.103',
1402 '71.0.3554.1',
1403 '71.0.3554.0',
1404 '70.0.3538.21',
1405 '69.0.3497.102',
1406 '71.0.3553.3',
1407 '70.0.3538.20',
1408 '69.0.3497.101',
1409 '71.0.3553.2',
1410 '69.0.3497.100',
1411 '71.0.3553.1',
1412 '71.0.3553.0',
1413 '70.0.3538.19',
1414 '69.0.3497.99',
1415 '69.0.3497.98',
1416 '69.0.3497.97',
1417 '71.0.3552.6',
1418 '71.0.3552.5',
1419 '71.0.3552.4',
1420 '71.0.3552.3',
1421 '71.0.3552.2',
1422 '71.0.3552.1',
1423 '71.0.3552.0',
1424 '70.0.3538.18',
1425 '69.0.3497.96',
1426 '71.0.3551.3',
1427 '71.0.3551.2',
1428 '71.0.3551.1',
1429 '71.0.3551.0',
1430 '70.0.3538.17',
1431 '69.0.3497.95',
1432 '71.0.3550.3',
1433 '71.0.3550.2',
1434 '71.0.3550.1',
1435 '71.0.3550.0',
1436 '70.0.3538.16',
1437 '69.0.3497.94',
1438 '71.0.3549.1',
1439 '71.0.3549.0',
1440 '70.0.3538.15',
1441 '69.0.3497.93',
1442 '69.0.3497.92',
1443 '71.0.3548.1',
1444 '71.0.3548.0',
1445 '70.0.3538.14',
1446 '69.0.3497.91',
1447 '71.0.3547.1',
1448 '71.0.3547.0',
1449 '70.0.3538.13',
1450 '69.0.3497.90',
1451 '71.0.3546.2',
1452 '69.0.3497.89',
1453 '71.0.3546.1',
1454 '71.0.3546.0',
1455 '70.0.3538.12',
1456 '69.0.3497.88',
1457 '71.0.3545.4',
1458 '71.0.3545.3',
1459 '71.0.3545.2',
1460 '71.0.3545.1',
1461 '71.0.3545.0',
1462 '70.0.3538.11',
1463 '69.0.3497.87',
1464 '71.0.3544.5',
1465 '71.0.3544.4',
1466 '71.0.3544.3',
1467 '71.0.3544.2',
1468 '71.0.3544.1',
1469 '71.0.3544.0',
1470 '69.0.3497.86',
1471 '70.0.3538.10',
1472 '69.0.3497.85',
1473 '70.0.3538.9',
1474 '69.0.3497.84',
1475 '71.0.3543.4',
1476 '70.0.3538.8',
1477 '71.0.3543.3',
1478 '71.0.3543.2',
1479 '71.0.3543.1',
1480 '71.0.3543.0',
1481 '70.0.3538.7',
1482 '69.0.3497.83',
1483 '71.0.3542.2',
1484 '71.0.3542.1',
1485 '71.0.3542.0',
1486 '70.0.3538.6',
1487 '69.0.3497.82',
1488 '69.0.3497.81',
1489 '71.0.3541.1',
1490 '71.0.3541.0',
1491 '70.0.3538.5',
1492 '69.0.3497.80',
1493 '71.0.3540.1',
1494 '71.0.3540.0',
1495 '70.0.3538.4',
1496 '69.0.3497.79',
1497 '70.0.3538.3',
1498 '71.0.3539.1',
1499 '71.0.3539.0',
1500 '69.0.3497.78',
1501 '68.0.3440.134',
1502 '69.0.3497.77',
1503 '70.0.3538.2',
1504 '70.0.3538.1',
1505 '70.0.3538.0',
1506 '69.0.3497.76',
1507 '68.0.3440.133',
1508 '69.0.3497.75',
1509 '70.0.3537.2',
1510 '70.0.3537.1',
1511 '70.0.3537.0',
1512 '69.0.3497.74',
1513 '68.0.3440.132',
1514 '70.0.3536.0',
1515 '70.0.3535.5',
1516 '70.0.3535.4',
1517 '70.0.3535.3',
1518 '69.0.3497.73',
1519 '68.0.3440.131',
1520 '70.0.3532.8',
1521 '70.0.3532.7',
1522 '69.0.3497.72',
1523 '69.0.3497.71',
1524 '70.0.3535.2',
1525 '70.0.3535.1',
1526 '70.0.3535.0',
1527 '69.0.3497.70',
1528 '68.0.3440.130',
1529 '69.0.3497.69',
1530 '68.0.3440.129',
1531 '70.0.3534.4',
1532 '70.0.3534.3',
1533 '70.0.3534.2',
1534 '70.0.3534.1',
1535 '70.0.3534.0',
1536 '69.0.3497.68',
1537 '68.0.3440.128',
1538 '70.0.3533.2',
1539 '70.0.3533.1',
1540 '70.0.3533.0',
1541 '69.0.3497.67',
1542 '68.0.3440.127',
1543 '70.0.3532.6',
1544 '70.0.3532.5',
1545 '70.0.3532.4',
1546 '69.0.3497.66',
1547 '68.0.3440.126',
1548 '70.0.3532.3',
1549 '70.0.3532.2',
1550 '70.0.3532.1',
1551 '69.0.3497.60',
1552 '69.0.3497.65',
1553 '69.0.3497.64',
1554 '70.0.3532.0',
1555 '70.0.3531.0',
1556 '70.0.3530.4',
1557 '70.0.3530.3',
1558 '70.0.3530.2',
1559 '69.0.3497.58',
1560 '68.0.3440.125',
1561 '69.0.3497.57',
1562 '69.0.3497.56',
1563 '69.0.3497.55',
1564 '69.0.3497.54',
1565 '70.0.3530.1',
1566 '70.0.3530.0',
1567 '69.0.3497.53',
1568 '68.0.3440.124',
1569 '69.0.3497.52',
1570 '70.0.3529.3',
1571 '70.0.3529.2',
1572 '70.0.3529.1',
1573 '70.0.3529.0',
1574 '69.0.3497.51',
1575 '70.0.3528.4',
1576 '68.0.3440.123',
1577 '70.0.3528.3',
1578 '70.0.3528.2',
1579 '70.0.3528.1',
1580 '70.0.3528.0',
1581 '69.0.3497.50',
1582 '68.0.3440.122',
1583 '70.0.3527.1',
1584 '70.0.3527.0',
1585 '69.0.3497.49',
1586 '68.0.3440.121',
1587 '70.0.3526.1',
1588 '70.0.3526.0',
1589 '68.0.3440.120',
1590 '69.0.3497.48',
1591 '69.0.3497.47',
1592 '68.0.3440.119',
1593 '68.0.3440.118',
1594 '70.0.3525.5',
1595 '70.0.3525.4',
1596 '70.0.3525.3',
1597 '68.0.3440.117',
1598 '69.0.3497.46',
1599 '70.0.3525.2',
1600 '70.0.3525.1',
1601 '70.0.3525.0',
1602 '69.0.3497.45',
1603 '68.0.3440.116',
1604 '70.0.3524.4',
1605 '70.0.3524.3',
1606 '69.0.3497.44',
1607 '70.0.3524.2',
1608 '70.0.3524.1',
1609 '70.0.3524.0',
1610 '70.0.3523.2',
1611 '69.0.3497.43',
1612 '68.0.3440.115',
1613 '70.0.3505.9',
1614 '69.0.3497.42',
1615 '70.0.3505.8',
1616 '70.0.3523.1',
1617 '70.0.3523.0',
1618 '69.0.3497.41',
1619 '68.0.3440.114',
1620 '70.0.3505.7',
1621 '69.0.3497.40',
1622 '70.0.3522.1',
1623 '70.0.3522.0',
1624 '70.0.3521.2',
1625 '69.0.3497.39',
1626 '68.0.3440.113',
1627 '70.0.3505.6',
1628 '70.0.3521.1',
1629 '70.0.3521.0',
1630 '69.0.3497.38',
1631 '68.0.3440.112',
1632 '70.0.3520.1',
1633 '70.0.3520.0',
1634 '69.0.3497.37',
1635 '68.0.3440.111',
1636 '70.0.3519.3',
1637 '70.0.3519.2',
1638 '70.0.3519.1',
1639 '70.0.3519.0',
1640 '69.0.3497.36',
1641 '68.0.3440.110',
1642 '70.0.3518.1',
1643 '70.0.3518.0',
1644 '69.0.3497.35',
1645 '69.0.3497.34',
1646 '68.0.3440.109',
1647 '70.0.3517.1',
1648 '70.0.3517.0',
1649 '69.0.3497.33',
1650 '68.0.3440.108',
1651 '69.0.3497.32',
1652 '70.0.3516.3',
1653 '70.0.3516.2',
1654 '70.0.3516.1',
1655 '70.0.3516.0',
1656 '69.0.3497.31',
1657 '68.0.3440.107',
1658 '70.0.3515.4',
1659 '68.0.3440.106',
1660 '70.0.3515.3',
1661 '70.0.3515.2',
1662 '70.0.3515.1',
1663 '70.0.3515.0',
1664 '69.0.3497.30',
1665 '68.0.3440.105',
1666 '68.0.3440.104',
1667 '70.0.3514.2',
1668 '70.0.3514.1',
1669 '70.0.3514.0',
1670 '69.0.3497.29',
1671 '68.0.3440.103',
1672 '70.0.3513.1',
1673 '70.0.3513.0',
1674 '69.0.3497.28',
1675 )
1676 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1677
1678
1679std_headers = {
1680 'User-Agent': random_user_agent(),
1681 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1682 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1683 'Accept-Encoding': 'gzip, deflate',
1684 'Accept-Language': 'en-us,en;q=0.5',
1685}
1686
1687
1688USER_AGENTS = {
1689 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1690}
1691
1692
1693NO_DEFAULT = object()
1694
1695ENGLISH_MONTH_NAMES = [
1696 'January', 'February', 'March', 'April', 'May', 'June',
1697 'July', 'August', 'September', 'October', 'November', 'December']
1698
1699MONTH_NAMES = {
1700 'en': ENGLISH_MONTH_NAMES,
1701 'fr': [
1702 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1703 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1704}
1705
1706KNOWN_EXTENSIONS = (
1707 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1708 'flv', 'f4v', 'f4a', 'f4b',
1709 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1710 'mkv', 'mka', 'mk3d',
1711 'avi', 'divx',
1712 'mov',
1713 'asf', 'wmv', 'wma',
1714 '3gp', '3g2',
1715 'mp3',
1716 'flac',
1717 'ape',
1718 'wav',
1719 'f4f', 'f4m', 'm3u8', 'smil')
1720
1721# needed for sanitizing filenames in restricted mode
1722ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1723 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1724 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1725
1726DATE_FORMATS = (
1727 '%d %B %Y',
1728 '%d %b %Y',
1729 '%B %d %Y',
1730 '%B %dst %Y',
1731 '%B %dnd %Y',
1732 '%B %drd %Y',
1733 '%B %dth %Y',
1734 '%b %d %Y',
1735 '%b %dst %Y',
1736 '%b %dnd %Y',
1737 '%b %drd %Y',
1738 '%b %dth %Y',
1739 '%b %dst %Y %I:%M',
1740 '%b %dnd %Y %I:%M',
1741 '%b %drd %Y %I:%M',
1742 '%b %dth %Y %I:%M',
1743 '%Y %m %d',
1744 '%Y-%m-%d',
1745 '%Y.%m.%d.',
1746 '%Y/%m/%d',
1747 '%Y/%m/%d %H:%M',
1748 '%Y/%m/%d %H:%M:%S',
1749 '%Y%m%d%H%M',
1750 '%Y%m%d%H%M%S',
1751 '%Y-%m-%d %H:%M',
1752 '%Y-%m-%d %H:%M:%S',
1753 '%Y-%m-%d %H:%M:%S.%f',
1754 '%Y-%m-%d %H:%M:%S:%f',
1755 '%d.%m.%Y %H:%M',
1756 '%d.%m.%Y %H.%M',
1757 '%Y-%m-%dT%H:%M:%SZ',
1758 '%Y-%m-%dT%H:%M:%S.%fZ',
1759 '%Y-%m-%dT%H:%M:%S.%f0Z',
1760 '%Y-%m-%dT%H:%M:%S',
1761 '%Y-%m-%dT%H:%M:%S.%f',
1762 '%Y-%m-%dT%H:%M',
1763 '%b %d %Y at %H:%M',
1764 '%b %d %Y at %H:%M:%S',
1765 '%B %d %Y at %H:%M',
1766 '%B %d %Y at %H:%M:%S',
1767 '%H:%M %d-%b-%Y',
1768)
1769
1770DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1771DATE_FORMATS_DAY_FIRST.extend([
1772 '%d-%m-%Y',
1773 '%d.%m.%Y',
1774 '%d.%m.%y',
1775 '%d/%m/%Y',
1776 '%d/%m/%y',
1777 '%d/%m/%Y %H:%M:%S',
1778])
1779
1780DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1781DATE_FORMATS_MONTH_FIRST.extend([
1782 '%m-%d-%Y',
1783 '%m.%d.%Y',
1784 '%m/%d/%Y',
1785 '%m/%d/%y',
1786 '%m/%d/%Y %H:%M:%S',
1787])
1788
1789PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1790JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1791
1792
1793def preferredencoding():
1794 """Get preferred encoding.
1795
1796 Returns the best encoding scheme for the system, based on
1797 locale.getpreferredencoding() and some further tweaks.
1798 """
1799 try:
1800 pref = locale.getpreferredencoding()
1801 'TEST'.encode(pref)
1802 except Exception:
1803 pref = 'UTF-8'
1804
1805 return pref
1806
1807
1808def write_json_file(obj, fn):
1809 """ Encode obj as JSON and write it to fn, atomically if possible """
1810
1811 fn = encodeFilename(fn)
1812 if sys.version_info < (3, 0) and sys.platform != 'win32':
1813 encoding = get_filesystem_encoding()
1814 # os.path.basename returns a bytes object, but NamedTemporaryFile
1815 # will fail if the filename contains non ascii characters unless we
1816 # use a unicode object
1817 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1818 # the same for os.path.dirname
1819 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1820 else:
1821 path_basename = os.path.basename
1822 path_dirname = os.path.dirname
1823
1824 args = {
1825 'suffix': '.tmp',
1826 'prefix': path_basename(fn) + '.',
1827 'dir': path_dirname(fn),
1828 'delete': False,
1829 }
1830
1831 # In Python 2.x, json.dump expects a bytestream.
1832 # In Python 3.x, it writes to a character stream
1833 if sys.version_info < (3, 0):
1834 args['mode'] = 'wb'
1835 else:
1836 args.update({
1837 'mode': 'w',
1838 'encoding': 'utf-8',
1839 })
1840
1841 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1842
1843 try:
1844 with tf:
1845 json.dump(obj, tf)
1846 if sys.platform == 'win32':
1847 # Need to remove existing file on Windows, else os.rename raises
1848 # WindowsError or FileExistsError.
1849 try:
1850 os.unlink(fn)
1851 except OSError:
1852 pass
1853 try:
1854 mask = os.umask(0)
1855 os.umask(mask)
1856 os.chmod(tf.name, 0o666 & ~mask)
1857 except OSError:
1858 pass
1859 os.rename(tf.name, fn)
1860 except Exception:
1861 try:
1862 os.remove(tf.name)
1863 except OSError:
1864 pass
1865 raise
1866
1867
1868if sys.version_info >= (2, 7):
1869 def find_xpath_attr(node, xpath, key, val=None):
1870 """ Find the xpath xpath[@key=val] """
1871 assert re.match(r'^[a-zA-Z_-]+$', key)
1872 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1873 return node.find(expr)
1874else:
1875 def find_xpath_attr(node, xpath, key, val=None):
1876 for f in node.findall(compat_xpath(xpath)):
1877 if key not in f.attrib:
1878 continue
1879 if val is None or f.attrib.get(key) == val:
1880 return f
1881 return None
1882
1883# On python2.6 the xml.etree.ElementTree.Element methods don't support
1884# the namespace parameter
1885
1886
1887def xpath_with_ns(path, ns_map):
1888 components = [c.split(':') for c in path.split('/')]
1889 replaced = []
1890 for c in components:
1891 if len(c) == 1:
1892 replaced.append(c[0])
1893 else:
1894 ns, tag = c
1895 replaced.append('{%s}%s' % (ns_map[ns], tag))
1896 return '/'.join(replaced)
1897
1898
1899def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1900 def _find_xpath(xpath):
1901 return node.find(compat_xpath(xpath))
1902
1903 if isinstance(xpath, (str, compat_str)):
1904 n = _find_xpath(xpath)
1905 else:
1906 for xp in xpath:
1907 n = _find_xpath(xp)
1908 if n is not None:
1909 break
1910
1911 if n is None:
1912 if default is not NO_DEFAULT:
1913 return default
1914 elif fatal:
1915 name = xpath if name is None else name
1916 raise ExtractorError('Could not find XML element %s' % name)
1917 else:
1918 return None
1919 return n
1920
1921
1922def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1923 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1924 if n is None or n == default:
1925 return n
1926 if n.text is None:
1927 if default is not NO_DEFAULT:
1928 return default
1929 elif fatal:
1930 name = xpath if name is None else name
1931 raise ExtractorError('Could not find XML element\'s text %s' % name)
1932 else:
1933 return None
1934 return n.text
1935
1936
1937def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1938 n = find_xpath_attr(node, xpath, key)
1939 if n is None:
1940 if default is not NO_DEFAULT:
1941 return default
1942 elif fatal:
1943 name = '%s[@%s]' % (xpath, key) if name is None else name
1944 raise ExtractorError('Could not find XML attribute %s' % name)
1945 else:
1946 return None
1947 return n.attrib[key]
1948
1949
1950def get_element_by_id(id, html):
1951 """Return the content of the tag with the specified ID in the passed HTML document"""
1952 return get_element_by_attribute('id', id, html)
1953
1954
1955def get_element_by_class(class_name, html):
1956 """Return the content of the first tag with the specified class in the passed HTML document"""
1957 retval = get_elements_by_class(class_name, html)
1958 return retval[0] if retval else None
1959
1960
1961def get_element_by_attribute(attribute, value, html, escape_value=True):
1962 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1963 return retval[0] if retval else None
1964
1965
1966def get_elements_by_class(class_name, html):
1967 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1968 return get_elements_by_attribute(
1969 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1970 html, escape_value=False)
1971
1972
1973def get_elements_by_attribute(attribute, value, html, escape_value=True):
1974 """Return the content of the tag with the specified attribute in the passed HTML document"""
1975
1976 value = re.escape(value) if escape_value else value
1977
1978 retlist = []
1979 for m in re.finditer(r'''(?xs)
1980 <([a-zA-Z0-9:._-]+)
1981 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1982 \s+%s=['"]?%s['"]?
1983 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1984 \s*>
1985 (?P<content>.*?)
1986 </\1>
1987 ''' % (re.escape(attribute), value), html):
1988 res = m.group('content')
1989
1990 if res.startswith('"') or res.startswith("'"):
1991 res = res[1:-1]
1992
1993 retlist.append(unescapeHTML(res))
1994
1995 return retlist
1996
1997
1998class HTMLAttributeParser(compat_HTMLParser):
1999 """Trivial HTML parser to gather the attributes for a single element"""
2000
2001 def __init__(self):
2002 self.attrs = {}
2003 compat_HTMLParser.__init__(self)
2004
2005 def handle_starttag(self, tag, attrs):
2006 self.attrs = dict(attrs)
2007
2008
2009class HTMLListAttrsParser(compat_HTMLParser):
2010 """HTML parser to gather the attributes for the elements of a list"""
2011
2012 def __init__(self):
2013 compat_HTMLParser.__init__(self)
2014 self.items = []
2015 self._level = 0
2016
2017 def handle_starttag(self, tag, attrs):
2018 if tag == 'li' and self._level == 0:
2019 self.items.append(dict(attrs))
2020 self._level += 1
2021
2022 def handle_endtag(self, tag):
2023 self._level -= 1
2024
2025
2026def extract_attributes(html_element):
2027 """Given a string for an HTML element such as
2028 <el
2029 a="foo" B="bar" c="&98;az" d=boz
2030 empty= noval entity="&amp;"
2031 sq='"' dq="'"
2032 >
2033 Decode and return a dictionary of attributes.
2034 {
2035 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2036 'empty': '', 'noval': None, 'entity': '&',
2037 'sq': '"', 'dq': '\''
2038 }.
2039 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2040 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2041 """
2042 parser = HTMLAttributeParser()
2043 try:
2044 parser.feed(html_element)
2045 parser.close()
2046 # Older Python may throw HTMLParseError in case of malformed HTML
2047 except compat_HTMLParseError:
2048 pass
2049 return parser.attrs
2050
2051
2052def parse_list(webpage):
2053 """Given a string for an series of HTML <li> elements,
2054 return a dictionary of their attributes"""
2055 parser = HTMLListAttrsParser()
2056 parser.feed(webpage)
2057 parser.close()
2058 return parser.items
2059
2060
2061def clean_html(html):
2062 """Clean an HTML snippet into a readable string"""
2063
2064 if html is None: # Convenience for sanitizing descriptions etc.
2065 return html
2066
2067 # Newline vs <br />
2068 html = html.replace('\n', ' ')
2069 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2070 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2071 # Strip html tags
2072 html = re.sub('<.*?>', '', html)
2073 # Replace html entities
2074 html = unescapeHTML(html)
2075 return html.strip()
2076
2077
2078def sanitize_open(filename, open_mode):
2079 """Try to open the given filename, and slightly tweak it if this fails.
2080
2081 Attempts to open the given filename. If this fails, it tries to change
2082 the filename slightly, step by step, until it's either able to open it
2083 or it fails and raises a final exception, like the standard open()
2084 function.
2085
2086 It returns the tuple (stream, definitive_file_name).
2087 """
2088 try:
2089 if filename == '-':
2090 if sys.platform == 'win32':
2091 import msvcrt
2092 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2093 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2094 stream = open(encodeFilename(filename), open_mode)
2095 return (stream, filename)
2096 except (IOError, OSError) as err:
2097 if err.errno in (errno.EACCES,):
2098 raise
2099
2100 # In case of error, try to remove win32 forbidden chars
2101 alt_filename = sanitize_path(filename)
2102 if alt_filename == filename:
2103 raise
2104 else:
2105 # An exception here should be caught in the caller
2106 stream = open(encodeFilename(alt_filename), open_mode)
2107 return (stream, alt_filename)
2108
2109
2110def timeconvert(timestr):
2111 """Convert RFC 2822 defined time string into system timestamp"""
2112 timestamp = None
2113 timetuple = email.utils.parsedate_tz(timestr)
2114 if timetuple is not None:
2115 timestamp = email.utils.mktime_tz(timetuple)
2116 return timestamp
2117
2118
2119def sanitize_filename(s, restricted=False, is_id=False):
2120 """Sanitizes a string so it could be used as part of a filename.
2121 If restricted is set, use a stricter subset of allowed characters.
2122 Set is_id if this is not an arbitrary string, but an ID that should be kept
2123 if possible.
2124 """
2125 def replace_insane(char):
2126 if restricted and char in ACCENT_CHARS:
2127 return ACCENT_CHARS[char]
2128 elif not restricted and char == '\n':
2129 return ' '
2130 elif char == '?' or ord(char) < 32 or ord(char) == 127:
2131 return ''
2132 elif char == '"':
2133 return '' if restricted else '\''
2134 elif char == ':':
2135 return '_-' if restricted else ' -'
2136 elif char in '\\/|*<>':
2137 return '_'
2138 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2139 return '_'
2140 if restricted and ord(char) > 127:
2141 return '_'
2142 return char
2143
2144 if s == '':
2145 return ''
2146 # Handle timestamps
2147 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2148 result = ''.join(map(replace_insane, s))
2149 if not is_id:
2150 while '__' in result:
2151 result = result.replace('__', '_')
2152 result = result.strip('_')
2153 # Common case of "Foreign band name - English song title"
2154 if restricted and result.startswith('-_'):
2155 result = result[2:]
2156 if result.startswith('-'):
2157 result = '_' + result[len('-'):]
2158 result = result.lstrip('.')
2159 if not result:
2160 result = '_'
2161 return result
2162
2163
2164def sanitize_path(s, force=False):
2165 """Sanitizes and normalizes path on Windows"""
2166 if sys.platform == 'win32':
2167 force = False
2168 drive_or_unc, _ = os.path.splitdrive(s)
2169 if sys.version_info < (2, 7) and not drive_or_unc:
2170 drive_or_unc, _ = os.path.splitunc(s)
2171 elif force:
2172 drive_or_unc = ''
2173 else:
2174 return s
2175
2176 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2177 if drive_or_unc:
2178 norm_path.pop(0)
2179 sanitized_path = [
2180 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2181 for path_part in norm_path]
2182 if drive_or_unc:
2183 sanitized_path.insert(0, drive_or_unc + os.path.sep)
2184 elif force and s[0] == os.path.sep:
2185 sanitized_path.insert(0, os.path.sep)
2186 return os.path.join(*sanitized_path)
2187
2188
2189def sanitize_url(url):
2190 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2191 # the number of unwanted failures due to missing protocol
2192 if url.startswith('//'):
2193 return 'http:%s' % url
2194 # Fix some common typos seen so far
2195 COMMON_TYPOS = (
2196 # https://github.com/ytdl-org/youtube-dl/issues/15649
2197 (r'^httpss://', r'https://'),
2198 # https://bx1.be/lives/direct-tv/
2199 (r'^rmtp([es]?)://', r'rtmp\1://'),
2200 )
2201 for mistake, fixup in COMMON_TYPOS:
2202 if re.match(mistake, url):
2203 return re.sub(mistake, fixup, url)
2204 return url
2205
2206
2207def extract_basic_auth(url):
2208 parts = compat_urlparse.urlsplit(url)
2209 if parts.username is None:
2210 return url, None
2211 url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2212 parts.hostname if parts.port is None
2213 else '%s:%d' % (parts.hostname, parts.port))))
2214 auth_payload = base64.b64encode(
2215 ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2216 return url, 'Basic ' + auth_payload.decode('utf-8')
2217
2218
2219def sanitized_Request(url, *args, **kwargs):
2220 url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
2221 if auth_header is not None:
2222 headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2223 headers['Authorization'] = auth_header
2224 return compat_urllib_request.Request(url, *args, **kwargs)
2225
2226
2227def expand_path(s):
2228 """Expand shell variables and ~"""
2229 return os.path.expandvars(compat_expanduser(s))
2230
2231
2232def orderedSet(iterable):
2233 """ Remove all duplicates from the input iterable """
2234 res = []
2235 for el in iterable:
2236 if el not in res:
2237 res.append(el)
2238 return res
2239
2240
2241def _htmlentity_transform(entity_with_semicolon):
2242 """Transforms an HTML entity to a character."""
2243 entity = entity_with_semicolon[:-1]
2244
2245 # Known non-numeric HTML entity
2246 if entity in compat_html_entities.name2codepoint:
2247 return compat_chr(compat_html_entities.name2codepoint[entity])
2248
2249 # TODO: HTML5 allows entities without a semicolon. For example,
2250 # '&Eacuteric' should be decoded as 'Éric'.
2251 if entity_with_semicolon in compat_html_entities_html5:
2252 return compat_html_entities_html5[entity_with_semicolon]
2253
2254 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2255 if mobj is not None:
2256 numstr = mobj.group(1)
2257 if numstr.startswith('x'):
2258 base = 16
2259 numstr = '0%s' % numstr
2260 else:
2261 base = 10
2262 # See https://github.com/ytdl-org/youtube-dl/issues/7518
2263 try:
2264 return compat_chr(int(numstr, base))
2265 except ValueError:
2266 pass
2267
2268 # Unknown entity in name, return its literal representation
2269 return '&%s;' % entity
2270
2271
2272def unescapeHTML(s):
2273 if s is None:
2274 return None
2275 assert type(s) == compat_str
2276
2277 return re.sub(
2278 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2279
2280
2281def escapeHTML(text):
2282 return (
2283 text
2284 .replace('&', '&amp;')
2285 .replace('<', '&lt;')
2286 .replace('>', '&gt;')
2287 .replace('"', '&quot;')
2288 .replace("'", '&#39;')
2289 )
2290
2291
2292def process_communicate_or_kill(p, *args, **kwargs):
2293 try:
2294 return p.communicate(*args, **kwargs)
2295 except BaseException: # Including KeyboardInterrupt
2296 p.kill()
2297 p.wait()
2298 raise
2299
2300
2301class Popen(subprocess.Popen):
2302 if sys.platform == 'win32':
2303 _startupinfo = subprocess.STARTUPINFO()
2304 _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
2305 else:
2306 _startupinfo = None
2307
2308 def __init__(self, *args, **kwargs):
2309 super(Popen, self).__init__(*args, **kwargs, startupinfo=self._startupinfo)
2310
2311 def communicate_or_kill(self, *args, **kwargs):
2312 return process_communicate_or_kill(self, *args, **kwargs)
2313
2314
2315def get_subprocess_encoding():
2316 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2317 # For subprocess calls, encode with locale encoding
2318 # Refer to http://stackoverflow.com/a/9951851/35070
2319 encoding = preferredencoding()
2320 else:
2321 encoding = sys.getfilesystemencoding()
2322 if encoding is None:
2323 encoding = 'utf-8'
2324 return encoding
2325
2326
2327def encodeFilename(s, for_subprocess=False):
2328 """
2329 @param s The name of the file
2330 """
2331
2332 assert type(s) == compat_str
2333
2334 # Python 3 has a Unicode API
2335 if sys.version_info >= (3, 0):
2336 return s
2337
2338 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2339 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2340 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2341 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2342 return s
2343
2344 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2345 if sys.platform.startswith('java'):
2346 return s
2347
2348 return s.encode(get_subprocess_encoding(), 'ignore')
2349
2350
2351def decodeFilename(b, for_subprocess=False):
2352
2353 if sys.version_info >= (3, 0):
2354 return b
2355
2356 if not isinstance(b, bytes):
2357 return b
2358
2359 return b.decode(get_subprocess_encoding(), 'ignore')
2360
2361
2362def encodeArgument(s):
2363 if not isinstance(s, compat_str):
2364 # Legacy code that uses byte strings
2365 # Uncomment the following line after fixing all post processors
2366 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2367 s = s.decode('ascii')
2368 return encodeFilename(s, True)
2369
2370
2371def decodeArgument(b):
2372 return decodeFilename(b, True)
2373
2374
2375def decodeOption(optval):
2376 if optval is None:
2377 return optval
2378 if isinstance(optval, bytes):
2379 optval = optval.decode(preferredencoding())
2380
2381 assert isinstance(optval, compat_str)
2382 return optval
2383
2384
2385_timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
2386
2387
2388def timetuple_from_msec(msec):
2389 secs, msec = divmod(msec, 1000)
2390 mins, secs = divmod(secs, 60)
2391 hrs, mins = divmod(mins, 60)
2392 return _timetuple(hrs, mins, secs, msec)
2393
2394
2395def formatSeconds(secs, delim=':', msec=False):
2396 time = timetuple_from_msec(secs * 1000)
2397 if time.hours:
2398 ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
2399 elif time.minutes:
2400 ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
2401 else:
2402 ret = '%d' % time.seconds
2403 return '%s.%03d' % (ret, time.milliseconds) if msec else ret
2404
2405
2406def _ssl_load_windows_store_certs(ssl_context, storename):
2407 # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
2408 try:
2409 certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
2410 if encoding == 'x509_asn' and (
2411 trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
2412 except PermissionError:
2413 return
2414 for cert in certs:
2415 try:
2416 ssl_context.load_verify_locations(cadata=cert)
2417 except ssl.SSLError:
2418 pass
2419
2420
2421def make_HTTPS_handler(params, **kwargs):
2422 opts_check_certificate = not params.get('nocheckcertificate')
2423 context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
2424 context.check_hostname = opts_check_certificate
2425 context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
2426 if opts_check_certificate:
2427 try:
2428 context.load_default_certs()
2429 # Work around the issue in load_default_certs when there are bad certificates. See:
2430 # https://github.com/yt-dlp/yt-dlp/issues/1060,
2431 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
2432 except ssl.SSLError:
2433 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
2434 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
2435 # Create a new context to discard any certificates that were already loaded
2436 context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
2437 context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED
2438 for storename in ('CA', 'ROOT'):
2439 _ssl_load_windows_store_certs(context, storename)
2440 context.set_default_verify_paths()
2441 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2442
2443
2444def bug_reports_message(before=';'):
2445 if ytdl_is_updateable():
2446 update_cmd = 'type yt-dlp -U to update'
2447 else:
2448 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
2449 msg = 'please report this issue on https://github.com/yt-dlp/yt-dlp .'
2450 msg += ' Make sure you are using the latest version; %s.' % update_cmd
2451 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
2452
2453 before = before.rstrip()
2454 if not before or before.endswith(('.', '!', '?')):
2455 msg = msg[0].title() + msg[1:]
2456
2457 return (before + ' ' if before else '') + msg
2458
2459
2460class YoutubeDLError(Exception):
2461 """Base exception for YoutubeDL errors."""
2462 pass
2463
2464
2465network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2466if hasattr(ssl, 'CertificateError'):
2467 network_exceptions.append(ssl.CertificateError)
2468network_exceptions = tuple(network_exceptions)
2469
2470
2471class ExtractorError(YoutubeDLError):
2472 """Error during info extraction."""
2473
2474 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
2475 """ tb, if given, is the original traceback (so that it can be printed out).
2476 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
2477 """
2478 if sys.exc_info()[0] in network_exceptions:
2479 expected = True
2480
2481 self.msg = str(msg)
2482 self.traceback = tb
2483 self.expected = expected
2484 self.cause = cause
2485 self.video_id = video_id
2486 self.ie = ie
2487 self.exc_info = sys.exc_info() # preserve original exception
2488
2489 super(ExtractorError, self).__init__(''.join((
2490 format_field(ie, template='[%s] '),
2491 format_field(video_id, template='%s: '),
2492 self.msg,
2493 format_field(cause, template=' (caused by %r)'),
2494 '' if expected else bug_reports_message())))
2495
2496 def format_traceback(self):
2497 if self.traceback is None:
2498 return None
2499 return ''.join(traceback.format_tb(self.traceback))
2500
2501
2502class UnsupportedError(ExtractorError):
2503 def __init__(self, url):
2504 super(UnsupportedError, self).__init__(
2505 'Unsupported URL: %s' % url, expected=True)
2506 self.url = url
2507
2508
2509class RegexNotFoundError(ExtractorError):
2510 """Error when a regex didn't match"""
2511 pass
2512
2513
2514class GeoRestrictedError(ExtractorError):
2515 """Geographic restriction Error exception.
2516
2517 This exception may be thrown when a video is not available from your
2518 geographic location due to geographic restrictions imposed by a website.
2519 """
2520
2521 def __init__(self, msg, countries=None, **kwargs):
2522 kwargs['expected'] = True
2523 super(GeoRestrictedError, self).__init__(msg, **kwargs)
2524 self.countries = countries
2525
2526
2527class DownloadError(YoutubeDLError):
2528 """Download Error exception.
2529
2530 This exception may be thrown by FileDownloader objects if they are not
2531 configured to continue on errors. They will contain the appropriate
2532 error message.
2533 """
2534
2535 def __init__(self, msg, exc_info=None):
2536 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2537 super(DownloadError, self).__init__(msg)
2538 self.exc_info = exc_info
2539
2540
2541class EntryNotInPlaylist(YoutubeDLError):
2542 """Entry not in playlist exception.
2543
2544 This exception will be thrown by YoutubeDL when a requested entry
2545 is not found in the playlist info_dict
2546 """
2547 pass
2548
2549
2550class SameFileError(YoutubeDLError):
2551 """Same File exception.
2552
2553 This exception will be thrown by FileDownloader objects if they detect
2554 multiple files would have to be downloaded to the same file on disk.
2555 """
2556 pass
2557
2558
2559class PostProcessingError(YoutubeDLError):
2560 """Post Processing exception.
2561
2562 This exception may be raised by PostProcessor's .run() method to
2563 indicate an error in the postprocessing task.
2564 """
2565
2566 def __init__(self, msg):
2567 super(PostProcessingError, self).__init__(msg)
2568 self.msg = msg
2569
2570
2571class DownloadCancelled(YoutubeDLError):
2572 """ Exception raised when the download queue should be interrupted """
2573 msg = 'The download was cancelled'
2574
2575 def __init__(self, msg=None):
2576 if msg is not None:
2577 self.msg = msg
2578 YoutubeDLError.__init__(self, self.msg)
2579
2580
2581class ExistingVideoReached(DownloadCancelled):
2582 """ --break-on-existing triggered """
2583 msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
2584
2585
2586class RejectedVideoReached(DownloadCancelled):
2587 """ --break-on-reject triggered """
2588 msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
2589
2590
2591class MaxDownloadsReached(DownloadCancelled):
2592 """ --max-downloads limit has been reached. """
2593 msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
2594
2595
2596class ThrottledDownload(YoutubeDLError):
2597 """ Download speed below --throttled-rate. """
2598 pass
2599
2600
2601class UnavailableVideoError(YoutubeDLError):
2602 """Unavailable Format exception.
2603
2604 This exception will be thrown when a video is requested
2605 in a format that is not available for that video.
2606 """
2607 pass
2608
2609
2610class ContentTooShortError(YoutubeDLError):
2611 """Content Too Short exception.
2612
2613 This exception may be raised by FileDownloader objects when a file they
2614 download is too small for what the server announced first, indicating
2615 the connection was probably interrupted.
2616 """
2617
2618 def __init__(self, downloaded, expected):
2619 super(ContentTooShortError, self).__init__(
2620 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2621 )
2622 # Both in bytes
2623 self.downloaded = downloaded
2624 self.expected = expected
2625
2626
2627class XAttrMetadataError(YoutubeDLError):
2628 def __init__(self, code=None, msg='Unknown error'):
2629 super(XAttrMetadataError, self).__init__(msg)
2630 self.code = code
2631 self.msg = msg
2632
2633 # Parsing code and msg
2634 if (self.code in (errno.ENOSPC, errno.EDQUOT)
2635 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2636 self.reason = 'NO_SPACE'
2637 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2638 self.reason = 'VALUE_TOO_LONG'
2639 else:
2640 self.reason = 'NOT_SUPPORTED'
2641
2642
2643class XAttrUnavailableError(YoutubeDLError):
2644 pass
2645
2646
2647def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2648 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2649 # expected HTTP responses to meet HTTP/1.0 or later (see also
2650 # https://github.com/ytdl-org/youtube-dl/issues/6727)
2651 if sys.version_info < (3, 0):
2652 kwargs['strict'] = True
2653 hc = http_class(*args, **compat_kwargs(kwargs))
2654 source_address = ydl_handler._params.get('source_address')
2655
2656 if source_address is not None:
2657 # This is to workaround _create_connection() from socket where it will try all
2658 # address data from getaddrinfo() including IPv6. This filters the result from
2659 # getaddrinfo() based on the source_address value.
2660 # This is based on the cpython socket.create_connection() function.
2661 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2662 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2663 host, port = address
2664 err = None
2665 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2666 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2667 ip_addrs = [addr for addr in addrs if addr[0] == af]
2668 if addrs and not ip_addrs:
2669 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2670 raise socket.error(
2671 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2672 % (ip_version, source_address[0]))
2673 for res in ip_addrs:
2674 af, socktype, proto, canonname, sa = res
2675 sock = None
2676 try:
2677 sock = socket.socket(af, socktype, proto)
2678 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2679 sock.settimeout(timeout)
2680 sock.bind(source_address)
2681 sock.connect(sa)
2682 err = None # Explicitly break reference cycle
2683 return sock
2684 except socket.error as _:
2685 err = _
2686 if sock is not None:
2687 sock.close()
2688 if err is not None:
2689 raise err
2690 else:
2691 raise socket.error('getaddrinfo returns an empty list')
2692 if hasattr(hc, '_create_connection'):
2693 hc._create_connection = _create_connection
2694 sa = (source_address, 0)
2695 if hasattr(hc, 'source_address'): # Python 2.7+
2696 hc.source_address = sa
2697 else: # Python 2.6
2698 def _hc_connect(self, *args, **kwargs):
2699 sock = _create_connection(
2700 (self.host, self.port), self.timeout, sa)
2701 if is_https:
2702 self.sock = ssl.wrap_socket(
2703 sock, self.key_file, self.cert_file,
2704 ssl_version=ssl.PROTOCOL_TLSv1)
2705 else:
2706 self.sock = sock
2707 hc.connect = functools.partial(_hc_connect, hc)
2708
2709 return hc
2710
2711
2712def handle_youtubedl_headers(headers):
2713 filtered_headers = headers
2714
2715 if 'Youtubedl-no-compression' in filtered_headers:
2716 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2717 del filtered_headers['Youtubedl-no-compression']
2718
2719 return filtered_headers
2720
2721
2722class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2723 """Handler for HTTP requests and responses.
2724
2725 This class, when installed with an OpenerDirector, automatically adds
2726 the standard headers to every HTTP request and handles gzipped and
2727 deflated responses from web servers. If compression is to be avoided in
2728 a particular request, the original request in the program code only has
2729 to include the HTTP header "Youtubedl-no-compression", which will be
2730 removed before making the real request.
2731
2732 Part of this code was copied from:
2733
2734 http://techknack.net/python-urllib2-handlers/
2735
2736 Andrew Rowls, the author of that code, agreed to release it to the
2737 public domain.
2738 """
2739
2740 def __init__(self, params, *args, **kwargs):
2741 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2742 self._params = params
2743
2744 def http_open(self, req):
2745 conn_class = compat_http_client.HTTPConnection
2746
2747 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2748 if socks_proxy:
2749 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2750 del req.headers['Ytdl-socks-proxy']
2751
2752 return self.do_open(functools.partial(
2753 _create_http_connection, self, conn_class, False),
2754 req)
2755
2756 @staticmethod
2757 def deflate(data):
2758 if not data:
2759 return data
2760 try:
2761 return zlib.decompress(data, -zlib.MAX_WBITS)
2762 except zlib.error:
2763 return zlib.decompress(data)
2764
2765 def http_request(self, req):
2766 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2767 # always respected by websites, some tend to give out URLs with non percent-encoded
2768 # non-ASCII characters (see telemb.py, ard.py [#3412])
2769 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2770 # To work around aforementioned issue we will replace request's original URL with
2771 # percent-encoded one
2772 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2773 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2774 url = req.get_full_url()
2775 url_escaped = escape_url(url)
2776
2777 # Substitute URL if any change after escaping
2778 if url != url_escaped:
2779 req = update_Request(req, url=url_escaped)
2780
2781 for h, v in std_headers.items():
2782 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2783 # The dict keys are capitalized because of this bug by urllib
2784 if h.capitalize() not in req.headers:
2785 req.add_header(h, v)
2786
2787 req.headers = handle_youtubedl_headers(req.headers)
2788
2789 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2790 # Python 2.6 is brain-dead when it comes to fragments
2791 req._Request__original = req._Request__original.partition('#')[0]
2792 req._Request__r_type = req._Request__r_type.partition('#')[0]
2793
2794 return req
2795
2796 def http_response(self, req, resp):
2797 old_resp = resp
2798 # gzip
2799 if resp.headers.get('Content-encoding', '') == 'gzip':
2800 content = resp.read()
2801 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2802 try:
2803 uncompressed = io.BytesIO(gz.read())
2804 except IOError as original_ioerror:
2805 # There may be junk add the end of the file
2806 # See http://stackoverflow.com/q/4928560/35070 for details
2807 for i in range(1, 1024):
2808 try:
2809 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2810 uncompressed = io.BytesIO(gz.read())
2811 except IOError:
2812 continue
2813 break
2814 else:
2815 raise original_ioerror
2816 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2817 resp.msg = old_resp.msg
2818 del resp.headers['Content-encoding']
2819 # deflate
2820 if resp.headers.get('Content-encoding', '') == 'deflate':
2821 gz = io.BytesIO(self.deflate(resp.read()))
2822 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2823 resp.msg = old_resp.msg
2824 del resp.headers['Content-encoding']
2825 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2826 # https://github.com/ytdl-org/youtube-dl/issues/6457).
2827 if 300 <= resp.code < 400:
2828 location = resp.headers.get('Location')
2829 if location:
2830 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2831 if sys.version_info >= (3, 0):
2832 location = location.encode('iso-8859-1').decode('utf-8')
2833 else:
2834 location = location.decode('utf-8')
2835 location_escaped = escape_url(location)
2836 if location != location_escaped:
2837 del resp.headers['Location']
2838 if sys.version_info < (3, 0):
2839 location_escaped = location_escaped.encode('utf-8')
2840 resp.headers['Location'] = location_escaped
2841 return resp
2842
2843 https_request = http_request
2844 https_response = http_response
2845
2846
2847def make_socks_conn_class(base_class, socks_proxy):
2848 assert issubclass(base_class, (
2849 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2850
2851 url_components = compat_urlparse.urlparse(socks_proxy)
2852 if url_components.scheme.lower() == 'socks5':
2853 socks_type = ProxyType.SOCKS5
2854 elif url_components.scheme.lower() in ('socks', 'socks4'):
2855 socks_type = ProxyType.SOCKS4
2856 elif url_components.scheme.lower() == 'socks4a':
2857 socks_type = ProxyType.SOCKS4A
2858
2859 def unquote_if_non_empty(s):
2860 if not s:
2861 return s
2862 return compat_urllib_parse_unquote_plus(s)
2863
2864 proxy_args = (
2865 socks_type,
2866 url_components.hostname, url_components.port or 1080,
2867 True, # Remote DNS
2868 unquote_if_non_empty(url_components.username),
2869 unquote_if_non_empty(url_components.password),
2870 )
2871
2872 class SocksConnection(base_class):
2873 def connect(self):
2874 self.sock = sockssocket()
2875 self.sock.setproxy(*proxy_args)
2876 if type(self.timeout) in (int, float):
2877 self.sock.settimeout(self.timeout)
2878 self.sock.connect((self.host, self.port))
2879
2880 if isinstance(self, compat_http_client.HTTPSConnection):
2881 if hasattr(self, '_context'): # Python > 2.6
2882 self.sock = self._context.wrap_socket(
2883 self.sock, server_hostname=self.host)
2884 else:
2885 self.sock = ssl.wrap_socket(self.sock)
2886
2887 return SocksConnection
2888
2889
2890class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2891 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2892 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2893 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2894 self._params = params
2895
2896 def https_open(self, req):
2897 kwargs = {}
2898 conn_class = self._https_conn_class
2899
2900 if hasattr(self, '_context'): # python > 2.6
2901 kwargs['context'] = self._context
2902 if hasattr(self, '_check_hostname'): # python 3.x
2903 kwargs['check_hostname'] = self._check_hostname
2904
2905 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2906 if socks_proxy:
2907 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2908 del req.headers['Ytdl-socks-proxy']
2909
2910 return self.do_open(functools.partial(
2911 _create_http_connection, self, conn_class, True),
2912 req, **kwargs)
2913
2914
2915class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2916 """
2917 See [1] for cookie file format.
2918
2919 1. https://curl.haxx.se/docs/http-cookies.html
2920 """
2921 _HTTPONLY_PREFIX = '#HttpOnly_'
2922 _ENTRY_LEN = 7
2923 _HEADER = '''# Netscape HTTP Cookie File
2924# This file is generated by yt-dlp. Do not edit.
2925
2926'''
2927 _CookieFileEntry = collections.namedtuple(
2928 'CookieFileEntry',
2929 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2930
2931 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2932 """
2933 Save cookies to a file.
2934
2935 Most of the code is taken from CPython 3.8 and slightly adapted
2936 to support cookie files with UTF-8 in both python 2 and 3.
2937 """
2938 if filename is None:
2939 if self.filename is not None:
2940 filename = self.filename
2941 else:
2942 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2943
2944 # Store session cookies with `expires` set to 0 instead of an empty
2945 # string
2946 for cookie in self:
2947 if cookie.expires is None:
2948 cookie.expires = 0
2949
2950 with io.open(filename, 'w', encoding='utf-8') as f:
2951 f.write(self._HEADER)
2952 now = time.time()
2953 for cookie in self:
2954 if not ignore_discard and cookie.discard:
2955 continue
2956 if not ignore_expires and cookie.is_expired(now):
2957 continue
2958 if cookie.secure:
2959 secure = 'TRUE'
2960 else:
2961 secure = 'FALSE'
2962 if cookie.domain.startswith('.'):
2963 initial_dot = 'TRUE'
2964 else:
2965 initial_dot = 'FALSE'
2966 if cookie.expires is not None:
2967 expires = compat_str(cookie.expires)
2968 else:
2969 expires = ''
2970 if cookie.value is None:
2971 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2972 # with no name, whereas http.cookiejar regards it as a
2973 # cookie with no value.
2974 name = ''
2975 value = cookie.name
2976 else:
2977 name = cookie.name
2978 value = cookie.value
2979 f.write(
2980 '\t'.join([cookie.domain, initial_dot, cookie.path,
2981 secure, expires, name, value]) + '\n')
2982
2983 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2984 """Load cookies from a file."""
2985 if filename is None:
2986 if self.filename is not None:
2987 filename = self.filename
2988 else:
2989 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2990
2991 def prepare_line(line):
2992 if line.startswith(self._HTTPONLY_PREFIX):
2993 line = line[len(self._HTTPONLY_PREFIX):]
2994 # comments and empty lines are fine
2995 if line.startswith('#') or not line.strip():
2996 return line
2997 cookie_list = line.split('\t')
2998 if len(cookie_list) != self._ENTRY_LEN:
2999 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
3000 cookie = self._CookieFileEntry(*cookie_list)
3001 if cookie.expires_at and not cookie.expires_at.isdigit():
3002 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
3003 return line
3004
3005 cf = io.StringIO()
3006 with io.open(filename, encoding='utf-8') as f:
3007 for line in f:
3008 try:
3009 cf.write(prepare_line(line))
3010 except compat_cookiejar.LoadError as e:
3011 write_string(
3012 'WARNING: skipping cookie file entry due to %s: %r\n'
3013 % (e, line), sys.stderr)
3014 continue
3015 cf.seek(0)
3016 self._really_load(cf, filename, ignore_discard, ignore_expires)
3017 # Session cookies are denoted by either `expires` field set to
3018 # an empty string or 0. MozillaCookieJar only recognizes the former
3019 # (see [1]). So we need force the latter to be recognized as session
3020 # cookies on our own.
3021 # Session cookies may be important for cookies-based authentication,
3022 # e.g. usually, when user does not check 'Remember me' check box while
3023 # logging in on a site, some important cookies are stored as session
3024 # cookies so that not recognizing them will result in failed login.
3025 # 1. https://bugs.python.org/issue17164
3026 for cookie in self:
3027 # Treat `expires=0` cookies as session cookies
3028 if cookie.expires == 0:
3029 cookie.expires = None
3030 cookie.discard = True
3031
3032
3033class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
3034 def __init__(self, cookiejar=None):
3035 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
3036
3037 def http_response(self, request, response):
3038 # Python 2 will choke on next HTTP request in row if there are non-ASCII
3039 # characters in Set-Cookie HTTP header of last response (see
3040 # https://github.com/ytdl-org/youtube-dl/issues/6769).
3041 # In order to at least prevent crashing we will percent encode Set-Cookie
3042 # header before HTTPCookieProcessor starts processing it.
3043 # if sys.version_info < (3, 0) and response.headers:
3044 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
3045 # set_cookie = response.headers.get(set_cookie_header)
3046 # if set_cookie:
3047 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
3048 # if set_cookie != set_cookie_escaped:
3049 # del response.headers[set_cookie_header]
3050 # response.headers[set_cookie_header] = set_cookie_escaped
3051 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
3052
3053 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
3054 https_response = http_response
3055
3056
3057class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
3058 """YoutubeDL redirect handler
3059
3060 The code is based on HTTPRedirectHandler implementation from CPython [1].
3061
3062 This redirect handler solves two issues:
3063 - ensures redirect URL is always unicode under python 2
3064 - introduces support for experimental HTTP response status code
3065 308 Permanent Redirect [2] used by some sites [3]
3066
3067 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
3068 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
3069 3. https://github.com/ytdl-org/youtube-dl/issues/28768
3070 """
3071
3072 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
3073
3074 def redirect_request(self, req, fp, code, msg, headers, newurl):
3075 """Return a Request or None in response to a redirect.
3076
3077 This is called by the http_error_30x methods when a
3078 redirection response is received. If a redirection should
3079 take place, return a new Request to allow http_error_30x to
3080 perform the redirect. Otherwise, raise HTTPError if no-one
3081 else should try to handle this url. Return None if you can't
3082 but another Handler might.
3083 """
3084 m = req.get_method()
3085 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
3086 or code in (301, 302, 303) and m == "POST")):
3087 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
3088 # Strictly (according to RFC 2616), 301 or 302 in response to
3089 # a POST MUST NOT cause a redirection without confirmation
3090 # from the user (of urllib.request, in this case). In practice,
3091 # essentially all clients do redirect in this case, so we do
3092 # the same.
3093
3094 # On python 2 urlh.geturl() may sometimes return redirect URL
3095 # as byte string instead of unicode. This workaround allows
3096 # to force it always return unicode.
3097 if sys.version_info[0] < 3:
3098 newurl = compat_str(newurl)
3099
3100 # Be conciliant with URIs containing a space. This is mainly
3101 # redundant with the more complete encoding done in http_error_302(),
3102 # but it is kept for compatibility with other callers.
3103 newurl = newurl.replace(' ', '%20')
3104
3105 CONTENT_HEADERS = ("content-length", "content-type")
3106 # NB: don't use dict comprehension for python 2.6 compatibility
3107 newheaders = dict((k, v) for k, v in req.headers.items()
3108 if k.lower() not in CONTENT_HEADERS)
3109 return compat_urllib_request.Request(
3110 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3111 unverifiable=True)
3112
3113
3114def extract_timezone(date_str):
3115 m = re.search(
3116 r'''(?x)
3117 ^.{8,}? # >=8 char non-TZ prefix, if present
3118 (?P<tz>Z| # just the UTC Z, or
3119 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
3120 (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
3121 [ ]? # optional space
3122 (?P<sign>\+|-) # +/-
3123 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
3124 $)
3125 ''', date_str)
3126 if not m:
3127 timezone = datetime.timedelta()
3128 else:
3129 date_str = date_str[:-len(m.group('tz'))]
3130 if not m.group('sign'):
3131 timezone = datetime.timedelta()
3132 else:
3133 sign = 1 if m.group('sign') == '+' else -1
3134 timezone = datetime.timedelta(
3135 hours=sign * int(m.group('hours')),
3136 minutes=sign * int(m.group('minutes')))
3137 return timezone, date_str
3138
3139
3140def parse_iso8601(date_str, delimiter='T', timezone=None):
3141 """ Return a UNIX timestamp from the given date """
3142
3143 if date_str is None:
3144 return None
3145
3146 date_str = re.sub(r'\.[0-9]+', '', date_str)
3147
3148 if timezone is None:
3149 timezone, date_str = extract_timezone(date_str)
3150
3151 try:
3152 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3153 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3154 return calendar.timegm(dt.timetuple())
3155 except ValueError:
3156 pass
3157
3158
3159def date_formats(day_first=True):
3160 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3161
3162
3163def unified_strdate(date_str, day_first=True):
3164 """Return a string with the date in the format YYYYMMDD"""
3165
3166 if date_str is None:
3167 return None
3168 upload_date = None
3169 # Replace commas
3170 date_str = date_str.replace(',', ' ')
3171 # Remove AM/PM + timezone
3172 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3173 _, date_str = extract_timezone(date_str)
3174
3175 for expression in date_formats(day_first):
3176 try:
3177 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
3178 except ValueError:
3179 pass
3180 if upload_date is None:
3181 timetuple = email.utils.parsedate_tz(date_str)
3182 if timetuple:
3183 try:
3184 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3185 except ValueError:
3186 pass
3187 if upload_date is not None:
3188 return compat_str(upload_date)
3189
3190
3191def unified_timestamp(date_str, day_first=True):
3192 if date_str is None:
3193 return None
3194
3195 date_str = re.sub(r'[,|]', '', date_str)
3196
3197 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
3198 timezone, date_str = extract_timezone(date_str)
3199
3200 # Remove AM/PM + timezone
3201 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3202
3203 # Remove unrecognized timezones from ISO 8601 alike timestamps
3204 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3205 if m:
3206 date_str = date_str[:-len(m.group('tz'))]
3207
3208 # Python only supports microseconds, so remove nanoseconds
3209 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3210 if m:
3211 date_str = m.group(1)
3212
3213 for expression in date_formats(day_first):
3214 try:
3215 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3216 return calendar.timegm(dt.timetuple())
3217 except ValueError:
3218 pass
3219 timetuple = email.utils.parsedate_tz(date_str)
3220 if timetuple:
3221 return calendar.timegm(timetuple) + pm_delta * 3600
3222
3223
3224def determine_ext(url, default_ext='unknown_video'):
3225 if url is None or '.' not in url:
3226 return default_ext
3227 guess = url.partition('?')[0].rpartition('.')[2]
3228 if re.match(r'^[A-Za-z0-9]+$', guess):
3229 return guess
3230 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3231 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3232 return guess.rstrip('/')
3233 else:
3234 return default_ext
3235
3236
3237def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3238 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3239
3240
3241def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
3242 """
3243 Return a datetime object from a string in the format YYYYMMDD or
3244 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3245
3246 format: string date format used to return datetime object from
3247 precision: round the time portion of a datetime object.
3248 auto|microsecond|second|minute|hour|day.
3249 auto: round to the unit provided in date_str (if applicable).
3250 """
3251 auto_precision = False
3252 if precision == 'auto':
3253 auto_precision = True
3254 precision = 'microsecond'
3255 today = datetime_round(datetime.datetime.now(), precision)
3256 if date_str in ('now', 'today'):
3257 return today
3258 if date_str == 'yesterday':
3259 return today - datetime.timedelta(days=1)
3260 match = re.match(
3261 r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3262 date_str)
3263 if match is not None:
3264 start_time = datetime_from_str(match.group('start'), precision, format)
3265 time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
3266 unit = match.group('unit')
3267 if unit == 'month' or unit == 'year':
3268 new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
3269 unit = 'day'
3270 else:
3271 if unit == 'week':
3272 unit = 'day'
3273 time *= 7
3274 delta = datetime.timedelta(**{unit + 's': time})
3275 new_date = start_time + delta
3276 if auto_precision:
3277 return datetime_round(new_date, unit)
3278 return new_date
3279
3280 return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3281
3282
3283def date_from_str(date_str, format='%Y%m%d'):
3284 """
3285 Return a datetime object from a string in the format YYYYMMDD or
3286 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3287
3288 format: string date format used to return datetime object from
3289 """
3290 return datetime_from_str(date_str, precision='microsecond', format=format).date()
3291
3292
3293def datetime_add_months(dt, months):
3294 """Increment/Decrement a datetime object by months."""
3295 month = dt.month + months - 1
3296 year = dt.year + month // 12
3297 month = month % 12 + 1
3298 day = min(dt.day, calendar.monthrange(year, month)[1])
3299 return dt.replace(year, month, day)
3300
3301
3302def datetime_round(dt, precision='day'):
3303 """
3304 Round a datetime object's time to a specific precision
3305 """
3306 if precision == 'microsecond':
3307 return dt
3308
3309 unit_seconds = {
3310 'day': 86400,
3311 'hour': 3600,
3312 'minute': 60,
3313 'second': 1,
3314 }
3315 roundto = lambda x, n: ((x + n / 2) // n) * n
3316 timestamp = calendar.timegm(dt.timetuple())
3317 return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
3318
3319
3320def hyphenate_date(date_str):
3321 """
3322 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3323 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3324 if match is not None:
3325 return '-'.join(match.groups())
3326 else:
3327 return date_str
3328
3329
3330class DateRange(object):
3331 """Represents a time interval between two dates"""
3332
3333 def __init__(self, start=None, end=None):
3334 """start and end must be strings in the format accepted by date"""
3335 if start is not None:
3336 self.start = date_from_str(start)
3337 else:
3338 self.start = datetime.datetime.min.date()
3339 if end is not None:
3340 self.end = date_from_str(end)
3341 else:
3342 self.end = datetime.datetime.max.date()
3343 if self.start > self.end:
3344 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3345
3346 @classmethod
3347 def day(cls, day):
3348 """Returns a range that only contains the given day"""
3349 return cls(day, day)
3350
3351 def __contains__(self, date):
3352 """Check if the date is in the range"""
3353 if not isinstance(date, datetime.date):
3354 date = date_from_str(date)
3355 return self.start <= date <= self.end
3356
3357 def __str__(self):
3358 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3359
3360
3361def platform_name():
3362 """ Returns the platform name as a compat_str """
3363 res = platform.platform()
3364 if isinstance(res, bytes):
3365 res = res.decode(preferredencoding())
3366
3367 assert isinstance(res, compat_str)
3368 return res
3369
3370
3371def get_windows_version():
3372 ''' Get Windows version. None if it's not running on Windows '''
3373 if compat_os_name == 'nt':
3374 return version_tuple(platform.win32_ver()[1])
3375 else:
3376 return None
3377
3378
3379def _windows_write_string(s, out):
3380 """ Returns True if the string was written using special methods,
3381 False if it has yet to be written out."""
3382 # Adapted from http://stackoverflow.com/a/3259271/35070
3383
3384 import ctypes
3385 import ctypes.wintypes
3386
3387 WIN_OUTPUT_IDS = {
3388 1: -11,
3389 2: -12,
3390 }
3391
3392 try:
3393 fileno = out.fileno()
3394 except AttributeError:
3395 # If the output stream doesn't have a fileno, it's virtual
3396 return False
3397 except io.UnsupportedOperation:
3398 # Some strange Windows pseudo files?
3399 return False
3400 if fileno not in WIN_OUTPUT_IDS:
3401 return False
3402
3403 GetStdHandle = compat_ctypes_WINFUNCTYPE(
3404 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3405 ('GetStdHandle', ctypes.windll.kernel32))
3406 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3407
3408 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3409 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3410 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3411 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3412 written = ctypes.wintypes.DWORD(0)
3413
3414 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3415 FILE_TYPE_CHAR = 0x0002
3416 FILE_TYPE_REMOTE = 0x8000
3417 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3418 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3419 ctypes.POINTER(ctypes.wintypes.DWORD))(
3420 ('GetConsoleMode', ctypes.windll.kernel32))
3421 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3422
3423 def not_a_console(handle):
3424 if handle == INVALID_HANDLE_VALUE or handle is None:
3425 return True
3426 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3427 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3428
3429 if not_a_console(h):
3430 return False
3431
3432 def next_nonbmp_pos(s):
3433 try:
3434 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3435 except StopIteration:
3436 return len(s)
3437
3438 while s:
3439 count = min(next_nonbmp_pos(s), 1024)
3440
3441 ret = WriteConsoleW(
3442 h, s, count if count else 2, ctypes.byref(written), None)
3443 if ret == 0:
3444 raise OSError('Failed to write string')
3445 if not count: # We just wrote a non-BMP character
3446 assert written.value == 2
3447 s = s[1:]
3448 else:
3449 assert written.value > 0
3450 s = s[written.value:]
3451 return True
3452
3453
3454def write_string(s, out=None, encoding=None):
3455 if out is None:
3456 out = sys.stderr
3457 assert type(s) == compat_str
3458
3459 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3460 if _windows_write_string(s, out):
3461 return
3462
3463 if ('b' in getattr(out, 'mode', '')
3464 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
3465 byt = s.encode(encoding or preferredencoding(), 'ignore')
3466 out.write(byt)
3467 elif hasattr(out, 'buffer'):
3468 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3469 byt = s.encode(enc, 'ignore')
3470 out.buffer.write(byt)
3471 else:
3472 out.write(s)
3473 out.flush()
3474
3475
3476def bytes_to_intlist(bs):
3477 if not bs:
3478 return []
3479 if isinstance(bs[0], int): # Python 3
3480 return list(bs)
3481 else:
3482 return [ord(c) for c in bs]
3483
3484
3485def intlist_to_bytes(xs):
3486 if not xs:
3487 return b''
3488 return compat_struct_pack('%dB' % len(xs), *xs)
3489
3490
3491# Cross-platform file locking
3492if sys.platform == 'win32':
3493 import ctypes.wintypes
3494 import msvcrt
3495
3496 class OVERLAPPED(ctypes.Structure):
3497 _fields_ = [
3498 ('Internal', ctypes.wintypes.LPVOID),
3499 ('InternalHigh', ctypes.wintypes.LPVOID),
3500 ('Offset', ctypes.wintypes.DWORD),
3501 ('OffsetHigh', ctypes.wintypes.DWORD),
3502 ('hEvent', ctypes.wintypes.HANDLE),
3503 ]
3504
3505 kernel32 = ctypes.windll.kernel32
3506 LockFileEx = kernel32.LockFileEx
3507 LockFileEx.argtypes = [
3508 ctypes.wintypes.HANDLE, # hFile
3509 ctypes.wintypes.DWORD, # dwFlags
3510 ctypes.wintypes.DWORD, # dwReserved
3511 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3512 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3513 ctypes.POINTER(OVERLAPPED) # Overlapped
3514 ]
3515 LockFileEx.restype = ctypes.wintypes.BOOL
3516 UnlockFileEx = kernel32.UnlockFileEx
3517 UnlockFileEx.argtypes = [
3518 ctypes.wintypes.HANDLE, # hFile
3519 ctypes.wintypes.DWORD, # dwReserved
3520 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3521 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3522 ctypes.POINTER(OVERLAPPED) # Overlapped
3523 ]
3524 UnlockFileEx.restype = ctypes.wintypes.BOOL
3525 whole_low = 0xffffffff
3526 whole_high = 0x7fffffff
3527
3528 def _lock_file(f, exclusive):
3529 overlapped = OVERLAPPED()
3530 overlapped.Offset = 0
3531 overlapped.OffsetHigh = 0
3532 overlapped.hEvent = 0
3533 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3534 handle = msvcrt.get_osfhandle(f.fileno())
3535 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3536 whole_low, whole_high, f._lock_file_overlapped_p):
3537 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3538
3539 def _unlock_file(f):
3540 assert f._lock_file_overlapped_p
3541 handle = msvcrt.get_osfhandle(f.fileno())
3542 if not UnlockFileEx(handle, 0,
3543 whole_low, whole_high, f._lock_file_overlapped_p):
3544 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3545
3546else:
3547 # Some platforms, such as Jython, is missing fcntl
3548 try:
3549 import fcntl
3550
3551 def _lock_file(f, exclusive):
3552 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3553
3554 def _unlock_file(f):
3555 fcntl.flock(f, fcntl.LOCK_UN)
3556 except ImportError:
3557 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3558
3559 def _lock_file(f, exclusive):
3560 raise IOError(UNSUPPORTED_MSG)
3561
3562 def _unlock_file(f):
3563 raise IOError(UNSUPPORTED_MSG)
3564
3565
3566class locked_file(object):
3567 def __init__(self, filename, mode, encoding=None):
3568 assert mode in ['r', 'a', 'w']
3569 self.f = io.open(filename, mode, encoding=encoding)
3570 self.mode = mode
3571
3572 def __enter__(self):
3573 exclusive = self.mode != 'r'
3574 try:
3575 _lock_file(self.f, exclusive)
3576 except IOError:
3577 self.f.close()
3578 raise
3579 return self
3580
3581 def __exit__(self, etype, value, traceback):
3582 try:
3583 _unlock_file(self.f)
3584 finally:
3585 self.f.close()
3586
3587 def __iter__(self):
3588 return iter(self.f)
3589
3590 def write(self, *args):
3591 return self.f.write(*args)
3592
3593 def read(self, *args):
3594 return self.f.read(*args)
3595
3596
3597def get_filesystem_encoding():
3598 encoding = sys.getfilesystemencoding()
3599 return encoding if encoding is not None else 'utf-8'
3600
3601
3602def shell_quote(args):
3603 quoted_args = []
3604 encoding = get_filesystem_encoding()
3605 for a in args:
3606 if isinstance(a, bytes):
3607 # We may get a filename encoded with 'encodeFilename'
3608 a = a.decode(encoding)
3609 quoted_args.append(compat_shlex_quote(a))
3610 return ' '.join(quoted_args)
3611
3612
3613def smuggle_url(url, data):
3614 """ Pass additional data in a URL for internal use. """
3615
3616 url, idata = unsmuggle_url(url, {})
3617 data.update(idata)
3618 sdata = compat_urllib_parse_urlencode(
3619 {'__youtubedl_smuggle': json.dumps(data)})
3620 return url + '#' + sdata
3621
3622
3623def unsmuggle_url(smug_url, default=None):
3624 if '#__youtubedl_smuggle' not in smug_url:
3625 return smug_url, default
3626 url, _, sdata = smug_url.rpartition('#')
3627 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3628 data = json.loads(jsond)
3629 return url, data
3630
3631
3632def format_bytes(bytes):
3633 if bytes is None:
3634 return 'N/A'
3635 if type(bytes) is str:
3636 bytes = float(bytes)
3637 if bytes == 0.0:
3638 exponent = 0
3639 else:
3640 exponent = int(math.log(bytes, 1024.0))
3641 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3642 converted = float(bytes) / float(1024 ** exponent)
3643 return '%.2f%s' % (converted, suffix)
3644
3645
3646def lookup_unit_table(unit_table, s):
3647 units_re = '|'.join(re.escape(u) for u in unit_table)
3648 m = re.match(
3649 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3650 if not m:
3651 return None
3652 num_str = m.group('num').replace(',', '.')
3653 mult = unit_table[m.group('unit')]
3654 return int(float(num_str) * mult)
3655
3656
3657def parse_filesize(s):
3658 if s is None:
3659 return None
3660
3661 # The lower-case forms are of course incorrect and unofficial,
3662 # but we support those too
3663 _UNIT_TABLE = {
3664 'B': 1,
3665 'b': 1,
3666 'bytes': 1,
3667 'KiB': 1024,
3668 'KB': 1000,
3669 'kB': 1024,
3670 'Kb': 1000,
3671 'kb': 1000,
3672 'kilobytes': 1000,
3673 'kibibytes': 1024,
3674 'MiB': 1024 ** 2,
3675 'MB': 1000 ** 2,
3676 'mB': 1024 ** 2,
3677 'Mb': 1000 ** 2,
3678 'mb': 1000 ** 2,
3679 'megabytes': 1000 ** 2,
3680 'mebibytes': 1024 ** 2,
3681 'GiB': 1024 ** 3,
3682 'GB': 1000 ** 3,
3683 'gB': 1024 ** 3,
3684 'Gb': 1000 ** 3,
3685 'gb': 1000 ** 3,
3686 'gigabytes': 1000 ** 3,
3687 'gibibytes': 1024 ** 3,
3688 'TiB': 1024 ** 4,
3689 'TB': 1000 ** 4,
3690 'tB': 1024 ** 4,
3691 'Tb': 1000 ** 4,
3692 'tb': 1000 ** 4,
3693 'terabytes': 1000 ** 4,
3694 'tebibytes': 1024 ** 4,
3695 'PiB': 1024 ** 5,
3696 'PB': 1000 ** 5,
3697 'pB': 1024 ** 5,
3698 'Pb': 1000 ** 5,
3699 'pb': 1000 ** 5,
3700 'petabytes': 1000 ** 5,
3701 'pebibytes': 1024 ** 5,
3702 'EiB': 1024 ** 6,
3703 'EB': 1000 ** 6,
3704 'eB': 1024 ** 6,
3705 'Eb': 1000 ** 6,
3706 'eb': 1000 ** 6,
3707 'exabytes': 1000 ** 6,
3708 'exbibytes': 1024 ** 6,
3709 'ZiB': 1024 ** 7,
3710 'ZB': 1000 ** 7,
3711 'zB': 1024 ** 7,
3712 'Zb': 1000 ** 7,
3713 'zb': 1000 ** 7,
3714 'zettabytes': 1000 ** 7,
3715 'zebibytes': 1024 ** 7,
3716 'YiB': 1024 ** 8,
3717 'YB': 1000 ** 8,
3718 'yB': 1024 ** 8,
3719 'Yb': 1000 ** 8,
3720 'yb': 1000 ** 8,
3721 'yottabytes': 1000 ** 8,
3722 'yobibytes': 1024 ** 8,
3723 }
3724
3725 return lookup_unit_table(_UNIT_TABLE, s)
3726
3727
3728def parse_count(s):
3729 if s is None:
3730 return None
3731
3732 s = s.strip()
3733
3734 if re.match(r'^[\d,.]+$', s):
3735 return str_to_int(s)
3736
3737 _UNIT_TABLE = {
3738 'k': 1000,
3739 'K': 1000,
3740 'm': 1000 ** 2,
3741 'M': 1000 ** 2,
3742 'kk': 1000 ** 2,
3743 'KK': 1000 ** 2,
3744 }
3745
3746 return lookup_unit_table(_UNIT_TABLE, s)
3747
3748
3749def parse_resolution(s):
3750 if s is None:
3751 return {}
3752
3753 mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
3754 if mobj:
3755 return {
3756 'width': int(mobj.group('w')),
3757 'height': int(mobj.group('h')),
3758 }
3759
3760 mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
3761 if mobj:
3762 return {'height': int(mobj.group(1))}
3763
3764 mobj = re.search(r'\b([48])[kK]\b', s)
3765 if mobj:
3766 return {'height': int(mobj.group(1)) * 540}
3767
3768 return {}
3769
3770
3771def parse_bitrate(s):
3772 if not isinstance(s, compat_str):
3773 return
3774 mobj = re.search(r'\b(\d+)\s*kbps', s)
3775 if mobj:
3776 return int(mobj.group(1))
3777
3778
3779def month_by_name(name, lang='en'):
3780 """ Return the number of a month by (locale-independently) English name """
3781
3782 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3783
3784 try:
3785 return month_names.index(name) + 1
3786 except ValueError:
3787 return None
3788
3789
3790def month_by_abbreviation(abbrev):
3791 """ Return the number of a month by (locale-independently) English
3792 abbreviations """
3793
3794 try:
3795 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3796 except ValueError:
3797 return None
3798
3799
3800def fix_xml_ampersands(xml_str):
3801 """Replace all the '&' by '&amp;' in XML"""
3802 return re.sub(
3803 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3804 '&amp;',
3805 xml_str)
3806
3807
3808def setproctitle(title):
3809 assert isinstance(title, compat_str)
3810
3811 # ctypes in Jython is not complete
3812 # http://bugs.jython.org/issue2148
3813 if sys.platform.startswith('java'):
3814 return
3815
3816 try:
3817 libc = ctypes.cdll.LoadLibrary('libc.so.6')
3818 except OSError:
3819 return
3820 except TypeError:
3821 # LoadLibrary in Windows Python 2.7.13 only expects
3822 # a bytestring, but since unicode_literals turns
3823 # every string into a unicode string, it fails.
3824 return
3825 title_bytes = title.encode('utf-8')
3826 buf = ctypes.create_string_buffer(len(title_bytes))
3827 buf.value = title_bytes
3828 try:
3829 libc.prctl(15, buf, 0, 0, 0)
3830 except AttributeError:
3831 return # Strange libc, just skip this
3832
3833
3834def remove_start(s, start):
3835 return s[len(start):] if s is not None and s.startswith(start) else s
3836
3837
3838def remove_end(s, end):
3839 return s[:-len(end)] if s is not None and s.endswith(end) else s
3840
3841
3842def remove_quotes(s):
3843 if s is None or len(s) < 2:
3844 return s
3845 for quote in ('"', "'", ):
3846 if s[0] == quote and s[-1] == quote:
3847 return s[1:-1]
3848 return s
3849
3850
3851def get_domain(url):
3852 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3853 return domain.group('domain') if domain else None
3854
3855
3856def url_basename(url):
3857 path = compat_urlparse.urlparse(url).path
3858 return path.strip('/').split('/')[-1]
3859
3860
3861def base_url(url):
3862 return re.match(r'https?://[^?#&]+/', url).group()
3863
3864
3865def urljoin(base, path):
3866 if isinstance(path, bytes):
3867 path = path.decode('utf-8')
3868 if not isinstance(path, compat_str) or not path:
3869 return None
3870 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3871 return path
3872 if isinstance(base, bytes):
3873 base = base.decode('utf-8')
3874 if not isinstance(base, compat_str) or not re.match(
3875 r'^(?:https?:)?//', base):
3876 return None
3877 return compat_urlparse.urljoin(base, path)
3878
3879
3880class HEADRequest(compat_urllib_request.Request):
3881 def get_method(self):
3882 return 'HEAD'
3883
3884
3885class PUTRequest(compat_urllib_request.Request):
3886 def get_method(self):
3887 return 'PUT'
3888
3889
3890def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3891 if get_attr:
3892 if v is not None:
3893 v = getattr(v, get_attr, None)
3894 if v == '':
3895 v = None
3896 if v is None:
3897 return default
3898 try:
3899 return int(v) * invscale // scale
3900 except (ValueError, TypeError, OverflowError):
3901 return default
3902
3903
3904def str_or_none(v, default=None):
3905 return default if v is None else compat_str(v)
3906
3907
3908def str_to_int(int_str):
3909 """ A more relaxed version of int_or_none """
3910 if isinstance(int_str, compat_integer_types):
3911 return int_str
3912 elif isinstance(int_str, compat_str):
3913 int_str = re.sub(r'[,\.\+]', '', int_str)
3914 return int_or_none(int_str)
3915
3916
3917def float_or_none(v, scale=1, invscale=1, default=None):
3918 if v is None:
3919 return default
3920 try:
3921 return float(v) * invscale / scale
3922 except (ValueError, TypeError):
3923 return default
3924
3925
3926def bool_or_none(v, default=None):
3927 return v if isinstance(v, bool) else default
3928
3929
3930def strip_or_none(v, default=None):
3931 return v.strip() if isinstance(v, compat_str) else default
3932
3933
3934def url_or_none(url):
3935 if not url or not isinstance(url, compat_str):
3936 return None
3937 url = url.strip()
3938 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3939
3940
3941def strftime_or_none(timestamp, date_format, default=None):
3942 datetime_object = None
3943 try:
3944 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3945 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3946 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3947 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3948 return datetime_object.strftime(date_format)
3949 except (ValueError, TypeError, AttributeError):
3950 return default
3951
3952
3953def parse_duration(s):
3954 if not isinstance(s, compat_basestring):
3955 return None
3956
3957 s = s.strip()
3958
3959 days, hours, mins, secs, ms = [None] * 5
3960 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3961 if m:
3962 days, hours, mins, secs, ms = m.groups()
3963 else:
3964 m = re.match(
3965 r'''(?ix)(?:P?
3966 (?:
3967 [0-9]+\s*y(?:ears?)?\s*
3968 )?
3969 (?:
3970 [0-9]+\s*m(?:onths?)?\s*
3971 )?
3972 (?:
3973 [0-9]+\s*w(?:eeks?)?\s*
3974 )?
3975 (?:
3976 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3977 )?
3978 T)?
3979 (?:
3980 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3981 )?
3982 (?:
3983 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3984 )?
3985 (?:
3986 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3987 )?Z?$''', s)
3988 if m:
3989 days, hours, mins, secs, ms = m.groups()
3990 else:
3991 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3992 if m:
3993 hours, mins = m.groups()
3994 else:
3995 return None
3996
3997 duration = 0
3998 if secs:
3999 duration += float(secs)
4000 if mins:
4001 duration += float(mins) * 60
4002 if hours:
4003 duration += float(hours) * 60 * 60
4004 if days:
4005 duration += float(days) * 24 * 60 * 60
4006 if ms:
4007 duration += float(ms)
4008 return duration
4009
4010
4011def prepend_extension(filename, ext, expected_real_ext=None):
4012 name, real_ext = os.path.splitext(filename)
4013 return (
4014 '{0}.{1}{2}'.format(name, ext, real_ext)
4015 if not expected_real_ext or real_ext[1:] == expected_real_ext
4016 else '{0}.{1}'.format(filename, ext))
4017
4018
4019def replace_extension(filename, ext, expected_real_ext=None):
4020 name, real_ext = os.path.splitext(filename)
4021 return '{0}.{1}'.format(
4022 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
4023 ext)
4024
4025
4026def check_executable(exe, args=[]):
4027 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
4028 args can be a list of arguments for a short output (like -version) """
4029 try:
4030 Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate_or_kill()
4031 except OSError:
4032 return False
4033 return exe
4034
4035
4036def _get_exe_version_output(exe, args):
4037 try:
4038 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
4039 # SIGTTOU if yt-dlp is run in the background.
4040 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
4041 out, _ = Popen(
4042 [encodeArgument(exe)] + args, stdin=subprocess.PIPE,
4043 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate_or_kill()
4044 except OSError:
4045 return False
4046 if isinstance(out, bytes): # Python 2.x
4047 out = out.decode('ascii', 'ignore')
4048 return out
4049
4050
4051def detect_exe_version(output, version_re=None, unrecognized='present'):
4052 assert isinstance(output, compat_str)
4053 if version_re is None:
4054 version_re = r'version\s+([-0-9._a-zA-Z]+)'
4055 m = re.search(version_re, output)
4056 if m:
4057 return m.group(1)
4058 else:
4059 return unrecognized
4060
4061
4062def get_exe_version(exe, args=['--version'],
4063 version_re=None, unrecognized='present'):
4064 """ Returns the version of the specified executable,
4065 or False if the executable is not present """
4066 out = _get_exe_version_output(exe, args)
4067 return detect_exe_version(out, version_re, unrecognized) if out else False
4068
4069
4070class LazyList(collections.abc.Sequence):
4071 ''' Lazy immutable list from an iterable
4072 Note that slices of a LazyList are lists and not LazyList'''
4073
4074 class IndexError(IndexError):
4075 pass
4076
4077 def __init__(self, iterable):
4078 self.__iterable = iter(iterable)
4079 self.__cache = []
4080 self.__reversed = False
4081
4082 def __iter__(self):
4083 if self.__reversed:
4084 # We need to consume the entire iterable to iterate in reverse
4085 yield from self.exhaust()
4086 return
4087 yield from self.__cache
4088 for item in self.__iterable:
4089 self.__cache.append(item)
4090 yield item
4091
4092 def __exhaust(self):
4093 self.__cache.extend(self.__iterable)
4094 # Discard the emptied iterable to make it pickle-able
4095 self.__iterable = []
4096 return self.__cache
4097
4098 def exhaust(self):
4099 ''' Evaluate the entire iterable '''
4100 return self.__exhaust()[::-1 if self.__reversed else 1]
4101
4102 @staticmethod
4103 def __reverse_index(x):
4104 return None if x is None else -(x + 1)
4105
4106 def __getitem__(self, idx):
4107 if isinstance(idx, slice):
4108 if self.__reversed:
4109 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
4110 start, stop, step = idx.start, idx.stop, idx.step or 1
4111 elif isinstance(idx, int):
4112 if self.__reversed:
4113 idx = self.__reverse_index(idx)
4114 start, stop, step = idx, idx, 0
4115 else:
4116 raise TypeError('indices must be integers or slices')
4117 if ((start or 0) < 0 or (stop or 0) < 0
4118 or (start is None and step < 0)
4119 or (stop is None and step > 0)):
4120 # We need to consume the entire iterable to be able to slice from the end
4121 # Obviously, never use this with infinite iterables
4122 self.__exhaust()
4123 try:
4124 return self.__cache[idx]
4125 except IndexError as e:
4126 raise self.IndexError(e) from e
4127 n = max(start or 0, stop or 0) - len(self.__cache) + 1
4128 if n > 0:
4129 self.__cache.extend(itertools.islice(self.__iterable, n))
4130 try:
4131 return self.__cache[idx]
4132 except IndexError as e:
4133 raise self.IndexError(e) from e
4134
4135 def __bool__(self):
4136 try:
4137 self[-1] if self.__reversed else self[0]
4138 except self.IndexError:
4139 return False
4140 return True
4141
4142 def __len__(self):
4143 self.__exhaust()
4144 return len(self.__cache)
4145
4146 def reverse(self):
4147 self.__reversed = not self.__reversed
4148 return self
4149
4150 def __repr__(self):
4151 # repr and str should mimic a list. So we exhaust the iterable
4152 return repr(self.exhaust())
4153
4154 def __str__(self):
4155 return repr(self.exhaust())
4156
4157
4158class PagedList:
4159 def __len__(self):
4160 # This is only useful for tests
4161 return len(self.getslice())
4162
4163 def __init__(self, pagefunc, pagesize, use_cache=True):
4164 self._pagefunc = pagefunc
4165 self._pagesize = pagesize
4166 self._use_cache = use_cache
4167 self._cache = {}
4168
4169 def getpage(self, pagenum):
4170 page_results = self._cache.get(pagenum) or list(self._pagefunc(pagenum))
4171 if self._use_cache:
4172 self._cache[pagenum] = page_results
4173 return page_results
4174
4175 def getslice(self, start=0, end=None):
4176 return list(self._getslice(start, end))
4177
4178 def _getslice(self, start, end):
4179 raise NotImplementedError('This method must be implemented by subclasses')
4180
4181 def __getitem__(self, idx):
4182 # NOTE: cache must be enabled if this is used
4183 if not isinstance(idx, int) or idx < 0:
4184 raise TypeError('indices must be non-negative integers')
4185 entries = self.getslice(idx, idx + 1)
4186 return entries[0] if entries else None
4187
4188
4189class OnDemandPagedList(PagedList):
4190 def _getslice(self, start, end):
4191 for pagenum in itertools.count(start // self._pagesize):
4192 firstid = pagenum * self._pagesize
4193 nextfirstid = pagenum * self._pagesize + self._pagesize
4194 if start >= nextfirstid:
4195 continue
4196
4197 startv = (
4198 start % self._pagesize
4199 if firstid <= start < nextfirstid
4200 else 0)
4201 endv = (
4202 ((end - 1) % self._pagesize) + 1
4203 if (end is not None and firstid <= end <= nextfirstid)
4204 else None)
4205
4206 page_results = self.getpage(pagenum)
4207 if startv != 0 or endv is not None:
4208 page_results = page_results[startv:endv]
4209 yield from page_results
4210
4211 # A little optimization - if current page is not "full", ie. does
4212 # not contain page_size videos then we can assume that this page
4213 # is the last one - there are no more ids on further pages -
4214 # i.e. no need to query again.
4215 if len(page_results) + startv < self._pagesize:
4216 break
4217
4218 # If we got the whole page, but the next page is not interesting,
4219 # break out early as well
4220 if end == nextfirstid:
4221 break
4222
4223
4224class InAdvancePagedList(PagedList):
4225 def __init__(self, pagefunc, pagecount, pagesize):
4226 self._pagecount = pagecount
4227 PagedList.__init__(self, pagefunc, pagesize, True)
4228
4229 def _getslice(self, start, end):
4230 start_page = start // self._pagesize
4231 end_page = (
4232 self._pagecount if end is None else (end // self._pagesize + 1))
4233 skip_elems = start - start_page * self._pagesize
4234 only_more = None if end is None else end - start
4235 for pagenum in range(start_page, end_page):
4236 page_results = self.getpage(pagenum)
4237 if skip_elems:
4238 page_results = page_results[skip_elems:]
4239 skip_elems = None
4240 if only_more is not None:
4241 if len(page_results) < only_more:
4242 only_more -= len(page_results)
4243 else:
4244 yield from page_results[:only_more]
4245 break
4246 yield from page_results
4247
4248
4249def uppercase_escape(s):
4250 unicode_escape = codecs.getdecoder('unicode_escape')
4251 return re.sub(
4252 r'\\U[0-9a-fA-F]{8}',
4253 lambda m: unicode_escape(m.group(0))[0],
4254 s)
4255
4256
4257def lowercase_escape(s):
4258 unicode_escape = codecs.getdecoder('unicode_escape')
4259 return re.sub(
4260 r'\\u[0-9a-fA-F]{4}',
4261 lambda m: unicode_escape(m.group(0))[0],
4262 s)
4263
4264
4265def escape_rfc3986(s):
4266 """Escape non-ASCII characters as suggested by RFC 3986"""
4267 if sys.version_info < (3, 0) and isinstance(s, compat_str):
4268 s = s.encode('utf-8')
4269 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
4270
4271
4272def escape_url(url):
4273 """Escape URL as suggested by RFC 3986"""
4274 url_parsed = compat_urllib_parse_urlparse(url)
4275 return url_parsed._replace(
4276 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
4277 path=escape_rfc3986(url_parsed.path),
4278 params=escape_rfc3986(url_parsed.params),
4279 query=escape_rfc3986(url_parsed.query),
4280 fragment=escape_rfc3986(url_parsed.fragment)
4281 ).geturl()
4282
4283
4284def parse_qs(url):
4285 return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4286
4287
4288def read_batch_urls(batch_fd):
4289 def fixup(url):
4290 if not isinstance(url, compat_str):
4291 url = url.decode('utf-8', 'replace')
4292 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4293 for bom in BOM_UTF8:
4294 if url.startswith(bom):
4295 url = url[len(bom):]
4296 url = url.lstrip()
4297 if not url or url.startswith(('#', ';', ']')):
4298 return False
4299 # "#" cannot be stripped out since it is part of the URI
4300 # However, it can be safely stipped out if follwing a whitespace
4301 return re.split(r'\s#', url, 1)[0].rstrip()
4302
4303 with contextlib.closing(batch_fd) as fd:
4304 return [url for url in map(fixup, fd) if url]
4305
4306
4307def urlencode_postdata(*args, **kargs):
4308 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
4309
4310
4311def update_url_query(url, query):
4312 if not query:
4313 return url
4314 parsed_url = compat_urlparse.urlparse(url)
4315 qs = compat_parse_qs(parsed_url.query)
4316 qs.update(query)
4317 return compat_urlparse.urlunparse(parsed_url._replace(
4318 query=compat_urllib_parse_urlencode(qs, True)))
4319
4320
4321def update_Request(req, url=None, data=None, headers={}, query={}):
4322 req_headers = req.headers.copy()
4323 req_headers.update(headers)
4324 req_data = data or req.data
4325 req_url = update_url_query(url or req.get_full_url(), query)
4326 req_get_method = req.get_method()
4327 if req_get_method == 'HEAD':
4328 req_type = HEADRequest
4329 elif req_get_method == 'PUT':
4330 req_type = PUTRequest
4331 else:
4332 req_type = compat_urllib_request.Request
4333 new_req = req_type(
4334 req_url, data=req_data, headers=req_headers,
4335 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4336 if hasattr(req, 'timeout'):
4337 new_req.timeout = req.timeout
4338 return new_req
4339
4340
4341def _multipart_encode_impl(data, boundary):
4342 content_type = 'multipart/form-data; boundary=%s' % boundary
4343
4344 out = b''
4345 for k, v in data.items():
4346 out += b'--' + boundary.encode('ascii') + b'\r\n'
4347 if isinstance(k, compat_str):
4348 k = k.encode('utf-8')
4349 if isinstance(v, compat_str):
4350 v = v.encode('utf-8')
4351 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4352 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4353 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
4354 if boundary.encode('ascii') in content:
4355 raise ValueError('Boundary overlaps with data')
4356 out += content
4357
4358 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4359
4360 return out, content_type
4361
4362
4363def multipart_encode(data, boundary=None):
4364 '''
4365 Encode a dict to RFC 7578-compliant form-data
4366
4367 data:
4368 A dict where keys and values can be either Unicode or bytes-like
4369 objects.
4370 boundary:
4371 If specified a Unicode object, it's used as the boundary. Otherwise
4372 a random boundary is generated.
4373
4374 Reference: https://tools.ietf.org/html/rfc7578
4375 '''
4376 has_specified_boundary = boundary is not None
4377
4378 while True:
4379 if boundary is None:
4380 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4381
4382 try:
4383 out, content_type = _multipart_encode_impl(data, boundary)
4384 break
4385 except ValueError:
4386 if has_specified_boundary:
4387 raise
4388 boundary = None
4389
4390 return out, content_type
4391
4392
4393def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4394 if isinstance(key_or_keys, (list, tuple)):
4395 for key in key_or_keys:
4396 if key not in d or d[key] is None or skip_false_values and not d[key]:
4397 continue
4398 return d[key]
4399 return default
4400 return d.get(key_or_keys, default)
4401
4402
4403def try_get(src, getter, expected_type=None):
4404 for get in variadic(getter):
4405 try:
4406 v = get(src)
4407 except (AttributeError, KeyError, TypeError, IndexError):
4408 pass
4409 else:
4410 if expected_type is None or isinstance(v, expected_type):
4411 return v
4412
4413
4414def merge_dicts(*dicts):
4415 merged = {}
4416 for a_dict in dicts:
4417 for k, v in a_dict.items():
4418 if v is None:
4419 continue
4420 if (k not in merged
4421 or (isinstance(v, compat_str) and v
4422 and isinstance(merged[k], compat_str)
4423 and not merged[k])):
4424 merged[k] = v
4425 return merged
4426
4427
4428def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4429 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4430
4431
4432US_RATINGS = {
4433 'G': 0,
4434 'PG': 10,
4435 'PG-13': 13,
4436 'R': 16,
4437 'NC': 18,
4438}
4439
4440
4441TV_PARENTAL_GUIDELINES = {
4442 'TV-Y': 0,
4443 'TV-Y7': 7,
4444 'TV-G': 0,
4445 'TV-PG': 0,
4446 'TV-14': 14,
4447 'TV-MA': 17,
4448}
4449
4450
4451def parse_age_limit(s):
4452 if type(s) == int:
4453 return s if 0 <= s <= 21 else None
4454 if not isinstance(s, compat_basestring):
4455 return None
4456 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4457 if m:
4458 return int(m.group('age'))
4459 s = s.upper()
4460 if s in US_RATINGS:
4461 return US_RATINGS[s]
4462 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4463 if m:
4464 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4465 return None
4466
4467
4468def strip_jsonp(code):
4469 return re.sub(
4470 r'''(?sx)^
4471 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4472 (?:\s*&&\s*(?P=func_name))?
4473 \s*\(\s*(?P<callback_data>.*)\);?
4474 \s*?(?://[^\n]*)*$''',
4475 r'\g<callback_data>', code)
4476
4477
4478def js_to_json(code, vars={}):
4479 # vars is a dict of var, val pairs to substitute
4480 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4481 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4482 INTEGER_TABLE = (
4483 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4484 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4485 )
4486
4487 def fix_kv(m):
4488 v = m.group(0)
4489 if v in ('true', 'false', 'null'):
4490 return v
4491 elif v in ('undefined', 'void 0'):
4492 return 'null'
4493 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4494 return ""
4495
4496 if v[0] in ("'", '"'):
4497 v = re.sub(r'(?s)\\.|"', lambda m: {
4498 '"': '\\"',
4499 "\\'": "'",
4500 '\\\n': '',
4501 '\\x': '\\u00',
4502 }.get(m.group(0), m.group(0)), v[1:-1])
4503 else:
4504 for regex, base in INTEGER_TABLE:
4505 im = re.match(regex, v)
4506 if im:
4507 i = int(im.group(1), base)
4508 return '"%d":' % i if v.endswith(':') else '%d' % i
4509
4510 if v in vars:
4511 return vars[v]
4512
4513 return '"%s"' % v
4514
4515 return re.sub(r'''(?sx)
4516 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4517 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4518 {comment}|,(?={skip}[\]}}])|
4519 void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
4520 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4521 [0-9]+(?={skip}:)|
4522 !+
4523 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4524
4525
4526def qualities(quality_ids):
4527 """ Get a numeric quality value out of a list of possible values """
4528 def q(qid):
4529 try:
4530 return quality_ids.index(qid)
4531 except ValueError:
4532 return -1
4533 return q
4534
4535
4536DEFAULT_OUTTMPL = {
4537 'default': '%(title)s [%(id)s].%(ext)s',
4538 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
4539}
4540OUTTMPL_TYPES = {
4541 'chapter': None,
4542 'subtitle': None,
4543 'thumbnail': None,
4544 'description': 'description',
4545 'annotation': 'annotations.xml',
4546 'infojson': 'info.json',
4547 'link': None,
4548 'pl_thumbnail': None,
4549 'pl_description': 'description',
4550 'pl_infojson': 'info.json',
4551}
4552
4553# As of [1] format syntax is:
4554# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4555# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
4556STR_FORMAT_RE_TMPL = r'''(?x)
4557 (?<!%)(?P<prefix>(?:%%)*)
4558 %
4559 (?P<has_key>\((?P<key>{0})\))?
4560 (?P<format>
4561 (?P<conversion>[#0\-+ ]+)?
4562 (?P<min_width>\d+)?
4563 (?P<precision>\.\d+)?
4564 (?P<len_mod>[hlL])? # unused in python
4565 {1} # conversion type
4566 )
4567'''
4568
4569
4570STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
4571
4572
4573def limit_length(s, length):
4574 """ Add ellipses to overly long strings """
4575 if s is None:
4576 return None
4577 ELLIPSES = '...'
4578 if len(s) > length:
4579 return s[:length - len(ELLIPSES)] + ELLIPSES
4580 return s
4581
4582
4583def version_tuple(v):
4584 return tuple(int(e) for e in re.split(r'[-.]', v))
4585
4586
4587def is_outdated_version(version, limit, assume_new=True):
4588 if not version:
4589 return not assume_new
4590 try:
4591 return version_tuple(version) < version_tuple(limit)
4592 except ValueError:
4593 return not assume_new
4594
4595
4596def ytdl_is_updateable():
4597 """ Returns if yt-dlp can be updated with -U """
4598
4599 from .update import is_non_updateable
4600
4601 return not is_non_updateable()
4602
4603
4604def args_to_str(args):
4605 # Get a short string representation for a subprocess command
4606 return ' '.join(compat_shlex_quote(a) for a in args)
4607
4608
4609def error_to_compat_str(err):
4610 err_str = str(err)
4611 # On python 2 error byte string must be decoded with proper
4612 # encoding rather than ascii
4613 if sys.version_info[0] < 3:
4614 err_str = err_str.decode(preferredencoding())
4615 return err_str
4616
4617
4618def mimetype2ext(mt):
4619 if mt is None:
4620 return None
4621
4622 mt, _, params = mt.partition(';')
4623 mt = mt.strip()
4624
4625 FULL_MAP = {
4626 'audio/mp4': 'm4a',
4627 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4628 # it's the most popular one
4629 'audio/mpeg': 'mp3',
4630 'audio/x-wav': 'wav',
4631 'audio/wav': 'wav',
4632 'audio/wave': 'wav',
4633 }
4634
4635 ext = FULL_MAP.get(mt)
4636 if ext is not None:
4637 return ext
4638
4639 SUBTYPE_MAP = {
4640 '3gpp': '3gp',
4641 'smptett+xml': 'tt',
4642 'ttaf+xml': 'dfxp',
4643 'ttml+xml': 'ttml',
4644 'x-flv': 'flv',
4645 'x-mp4-fragmented': 'mp4',
4646 'x-ms-sami': 'sami',
4647 'x-ms-wmv': 'wmv',
4648 'mpegurl': 'm3u8',
4649 'x-mpegurl': 'm3u8',
4650 'vnd.apple.mpegurl': 'm3u8',
4651 'dash+xml': 'mpd',
4652 'f4m+xml': 'f4m',
4653 'hds+xml': 'f4m',
4654 'vnd.ms-sstr+xml': 'ism',
4655 'quicktime': 'mov',
4656 'mp2t': 'ts',
4657 'x-wav': 'wav',
4658 'filmstrip+json': 'fs',
4659 'svg+xml': 'svg',
4660 }
4661
4662 _, _, subtype = mt.rpartition('/')
4663 ext = SUBTYPE_MAP.get(subtype.lower())
4664 if ext is not None:
4665 return ext
4666
4667 SUFFIX_MAP = {
4668 'json': 'json',
4669 'xml': 'xml',
4670 'zip': 'zip',
4671 'gzip': 'gz',
4672 }
4673
4674 _, _, suffix = subtype.partition('+')
4675 ext = SUFFIX_MAP.get(suffix)
4676 if ext is not None:
4677 return ext
4678
4679 return subtype.replace('+', '.')
4680
4681
4682def parse_codecs(codecs_str):
4683 # http://tools.ietf.org/html/rfc6381
4684 if not codecs_str:
4685 return {}
4686 split_codecs = list(filter(None, map(
4687 str.strip, codecs_str.strip().strip(',').split(','))))
4688 vcodec, acodec, hdr = None, None, None
4689 for full_codec in split_codecs:
4690 parts = full_codec.split('.')
4691 codec = parts[0].replace('0', '')
4692 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
4693 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
4694 if not vcodec:
4695 vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1') else full_codec
4696 if codec in ('dvh1', 'dvhe'):
4697 hdr = 'DV'
4698 elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
4699 hdr = 'HDR10'
4700 elif full_codec.replace('0', '').startswith('vp9.2'):
4701 hdr = 'HDR10'
4702 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4703 if not acodec:
4704 acodec = full_codec
4705 else:
4706 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4707 if not vcodec and not acodec:
4708 if len(split_codecs) == 2:
4709 return {
4710 'vcodec': split_codecs[0],
4711 'acodec': split_codecs[1],
4712 }
4713 else:
4714 return {
4715 'vcodec': vcodec or 'none',
4716 'acodec': acodec or 'none',
4717 'dynamic_range': hdr,
4718 }
4719 return {}
4720
4721
4722def urlhandle_detect_ext(url_handle):
4723 getheader = url_handle.headers.get
4724
4725 cd = getheader('Content-Disposition')
4726 if cd:
4727 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4728 if m:
4729 e = determine_ext(m.group('filename'), default_ext=None)
4730 if e:
4731 return e
4732
4733 return mimetype2ext(getheader('Content-Type'))
4734
4735
4736def encode_data_uri(data, mime_type):
4737 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4738
4739
4740def age_restricted(content_limit, age_limit):
4741 """ Returns True iff the content should be blocked """
4742
4743 if age_limit is None: # No limit set
4744 return False
4745 if content_limit is None:
4746 return False # Content available for everyone
4747 return age_limit < content_limit
4748
4749
4750def is_html(first_bytes):
4751 """ Detect whether a file contains HTML by examining its first bytes. """
4752
4753 BOMS = [
4754 (b'\xef\xbb\xbf', 'utf-8'),
4755 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4756 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4757 (b'\xff\xfe', 'utf-16-le'),
4758 (b'\xfe\xff', 'utf-16-be'),
4759 ]
4760 for bom, enc in BOMS:
4761 if first_bytes.startswith(bom):
4762 s = first_bytes[len(bom):].decode(enc, 'replace')
4763 break
4764 else:
4765 s = first_bytes.decode('utf-8', 'replace')
4766
4767 return re.match(r'^\s*<', s)
4768
4769
4770def determine_protocol(info_dict):
4771 protocol = info_dict.get('protocol')
4772 if protocol is not None:
4773 return protocol
4774
4775 url = sanitize_url(info_dict['url'])
4776 if url.startswith('rtmp'):
4777 return 'rtmp'
4778 elif url.startswith('mms'):
4779 return 'mms'
4780 elif url.startswith('rtsp'):
4781 return 'rtsp'
4782
4783 ext = determine_ext(url)
4784 if ext == 'm3u8':
4785 return 'm3u8'
4786 elif ext == 'f4m':
4787 return 'f4m'
4788
4789 return compat_urllib_parse_urlparse(url).scheme
4790
4791
4792def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
4793 """ Render a list of rows, each as a list of values """
4794 def width(string):
4795 return len(remove_terminal_sequences(string))
4796
4797 def get_max_lens(table):
4798 return [max(width(str(v)) for v in col) for col in zip(*table)]
4799
4800 def filter_using_list(row, filterArray):
4801 return [col for (take, col) in zip(filterArray, row) if take]
4802
4803 if hideEmpty:
4804 max_lens = get_max_lens(data)
4805 header_row = filter_using_list(header_row, max_lens)
4806 data = [filter_using_list(row, max_lens) for row in data]
4807
4808 table = [header_row] + data
4809 max_lens = get_max_lens(table)
4810 extraGap += 1
4811 if delim:
4812 table = [header_row] + [[delim * (ml + extraGap) for ml in max_lens]] + data
4813 max_lens[-1] = 0
4814 for row in table:
4815 for pos, text in enumerate(map(str, row)):
4816 row[pos] = text + (' ' * (max_lens[pos] - width(text) + extraGap))
4817 ret = '\n'.join(''.join(row) for row in table)
4818 return ret
4819
4820
4821def _match_one(filter_part, dct, incomplete):
4822 # TODO: Generalize code with YoutubeDL._build_format_filter
4823 STRING_OPERATORS = {
4824 '*=': operator.contains,
4825 '^=': lambda attr, value: attr.startswith(value),
4826 '$=': lambda attr, value: attr.endswith(value),
4827 '~=': lambda attr, value: re.search(value, attr),
4828 }
4829 COMPARISON_OPERATORS = {
4830 **STRING_OPERATORS,
4831 '<=': operator.le, # "<=" must be defined above "<"
4832 '<': operator.lt,
4833 '>=': operator.ge,
4834 '>': operator.gt,
4835 '=': operator.eq,
4836 }
4837
4838 operator_rex = re.compile(r'''(?x)\s*
4839 (?P<key>[a-z_]+)
4840 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4841 (?:
4842 (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
4843 (?P<strval>.+?)
4844 )
4845 \s*$
4846 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4847 m = operator_rex.search(filter_part)
4848 if m:
4849 m = m.groupdict()
4850 unnegated_op = COMPARISON_OPERATORS[m['op']]
4851 if m['negation']:
4852 op = lambda attr, value: not unnegated_op(attr, value)
4853 else:
4854 op = unnegated_op
4855 comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
4856 if m['quote']:
4857 comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
4858 actual_value = dct.get(m['key'])
4859 numeric_comparison = None
4860 if isinstance(actual_value, compat_numeric_types):
4861 # If the original field is a string and matching comparisonvalue is
4862 # a number we should respect the origin of the original field
4863 # and process comparison value as a string (see
4864 # https://github.com/ytdl-org/youtube-dl/issues/11082)
4865 try:
4866 numeric_comparison = int(comparison_value)
4867 except ValueError:
4868 numeric_comparison = parse_filesize(comparison_value)
4869 if numeric_comparison is None:
4870 numeric_comparison = parse_filesize(f'{comparison_value}B')
4871 if numeric_comparison is None:
4872 numeric_comparison = parse_duration(comparison_value)
4873 if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
4874 raise ValueError('Operator %s only supports string values!' % m['op'])
4875 if actual_value is None:
4876 return incomplete or m['none_inclusive']
4877 return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
4878
4879 UNARY_OPERATORS = {
4880 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4881 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4882 }
4883 operator_rex = re.compile(r'''(?x)\s*
4884 (?P<op>%s)\s*(?P<key>[a-z_]+)
4885 \s*$
4886 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4887 m = operator_rex.search(filter_part)
4888 if m:
4889 op = UNARY_OPERATORS[m.group('op')]
4890 actual_value = dct.get(m.group('key'))
4891 if incomplete and actual_value is None:
4892 return True
4893 return op(actual_value)
4894
4895 raise ValueError('Invalid filter part %r' % filter_part)
4896
4897
4898def match_str(filter_str, dct, incomplete=False):
4899 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
4900 When incomplete, all conditions passes on missing fields
4901 """
4902 return all(
4903 _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
4904 for filter_part in re.split(r'(?<!\\)&', filter_str))
4905
4906
4907def match_filter_func(filter_str):
4908 def _match_func(info_dict, *args, **kwargs):
4909 if match_str(filter_str, info_dict, *args, **kwargs):
4910 return None
4911 else:
4912 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4913 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4914 return _match_func
4915
4916
4917def parse_dfxp_time_expr(time_expr):
4918 if not time_expr:
4919 return
4920
4921 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4922 if mobj:
4923 return float(mobj.group('time_offset'))
4924
4925 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4926 if mobj:
4927 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4928
4929
4930def srt_subtitles_timecode(seconds):
4931 return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
4932
4933
4934def ass_subtitles_timecode(seconds):
4935 time = timetuple_from_msec(seconds * 1000)
4936 return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
4937
4938
4939def dfxp2srt(dfxp_data):
4940 '''
4941 @param dfxp_data A bytes-like object containing DFXP data
4942 @returns A unicode object containing converted SRT data
4943 '''
4944 LEGACY_NAMESPACES = (
4945 (b'http://www.w3.org/ns/ttml', [
4946 b'http://www.w3.org/2004/11/ttaf1',
4947 b'http://www.w3.org/2006/04/ttaf1',
4948 b'http://www.w3.org/2006/10/ttaf1',
4949 ]),
4950 (b'http://www.w3.org/ns/ttml#styling', [
4951 b'http://www.w3.org/ns/ttml#style',
4952 ]),
4953 )
4954
4955 SUPPORTED_STYLING = [
4956 'color',
4957 'fontFamily',
4958 'fontSize',
4959 'fontStyle',
4960 'fontWeight',
4961 'textDecoration'
4962 ]
4963
4964 _x = functools.partial(xpath_with_ns, ns_map={
4965 'xml': 'http://www.w3.org/XML/1998/namespace',
4966 'ttml': 'http://www.w3.org/ns/ttml',
4967 'tts': 'http://www.w3.org/ns/ttml#styling',
4968 })
4969
4970 styles = {}
4971 default_style = {}
4972
4973 class TTMLPElementParser(object):
4974 _out = ''
4975 _unclosed_elements = []
4976 _applied_styles = []
4977
4978 def start(self, tag, attrib):
4979 if tag in (_x('ttml:br'), 'br'):
4980 self._out += '\n'
4981 else:
4982 unclosed_elements = []
4983 style = {}
4984 element_style_id = attrib.get('style')
4985 if default_style:
4986 style.update(default_style)
4987 if element_style_id:
4988 style.update(styles.get(element_style_id, {}))
4989 for prop in SUPPORTED_STYLING:
4990 prop_val = attrib.get(_x('tts:' + prop))
4991 if prop_val:
4992 style[prop] = prop_val
4993 if style:
4994 font = ''
4995 for k, v in sorted(style.items()):
4996 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4997 continue
4998 if k == 'color':
4999 font += ' color="%s"' % v
5000 elif k == 'fontSize':
5001 font += ' size="%s"' % v
5002 elif k == 'fontFamily':
5003 font += ' face="%s"' % v
5004 elif k == 'fontWeight' and v == 'bold':
5005 self._out += '<b>'
5006 unclosed_elements.append('b')
5007 elif k == 'fontStyle' and v == 'italic':
5008 self._out += '<i>'
5009 unclosed_elements.append('i')
5010 elif k == 'textDecoration' and v == 'underline':
5011 self._out += '<u>'
5012 unclosed_elements.append('u')
5013 if font:
5014 self._out += '<font' + font + '>'
5015 unclosed_elements.append('font')
5016 applied_style = {}
5017 if self._applied_styles:
5018 applied_style.update(self._applied_styles[-1])
5019 applied_style.update(style)
5020 self._applied_styles.append(applied_style)
5021 self._unclosed_elements.append(unclosed_elements)
5022
5023 def end(self, tag):
5024 if tag not in (_x('ttml:br'), 'br'):
5025 unclosed_elements = self._unclosed_elements.pop()
5026 for element in reversed(unclosed_elements):
5027 self._out += '</%s>' % element
5028 if unclosed_elements and self._applied_styles:
5029 self._applied_styles.pop()
5030
5031 def data(self, data):
5032 self._out += data
5033
5034 def close(self):
5035 return self._out.strip()
5036
5037 def parse_node(node):
5038 target = TTMLPElementParser()
5039 parser = xml.etree.ElementTree.XMLParser(target=target)
5040 parser.feed(xml.etree.ElementTree.tostring(node))
5041 return parser.close()
5042
5043 for k, v in LEGACY_NAMESPACES:
5044 for ns in v:
5045 dfxp_data = dfxp_data.replace(ns, k)
5046
5047 dfxp = compat_etree_fromstring(dfxp_data)
5048 out = []
5049 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
5050
5051 if not paras:
5052 raise ValueError('Invalid dfxp/TTML subtitle')
5053
5054 repeat = False
5055 while True:
5056 for style in dfxp.findall(_x('.//ttml:style')):
5057 style_id = style.get('id') or style.get(_x('xml:id'))
5058 if not style_id:
5059 continue
5060 parent_style_id = style.get('style')
5061 if parent_style_id:
5062 if parent_style_id not in styles:
5063 repeat = True
5064 continue
5065 styles[style_id] = styles[parent_style_id].copy()
5066 for prop in SUPPORTED_STYLING:
5067 prop_val = style.get(_x('tts:' + prop))
5068 if prop_val:
5069 styles.setdefault(style_id, {})[prop] = prop_val
5070 if repeat:
5071 repeat = False
5072 else:
5073 break
5074
5075 for p in ('body', 'div'):
5076 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
5077 if ele is None:
5078 continue
5079 style = styles.get(ele.get('style'))
5080 if not style:
5081 continue
5082 default_style.update(style)
5083
5084 for para, index in zip(paras, itertools.count(1)):
5085 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
5086 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
5087 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
5088 if begin_time is None:
5089 continue
5090 if not end_time:
5091 if not dur:
5092 continue
5093 end_time = begin_time + dur
5094 out.append('%d\n%s --> %s\n%s\n\n' % (
5095 index,
5096 srt_subtitles_timecode(begin_time),
5097 srt_subtitles_timecode(end_time),
5098 parse_node(para)))
5099
5100 return ''.join(out)
5101
5102
5103def cli_option(params, command_option, param):
5104 param = params.get(param)
5105 if param:
5106 param = compat_str(param)
5107 return [command_option, param] if param is not None else []
5108
5109
5110def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
5111 param = params.get(param)
5112 if param is None:
5113 return []
5114 assert isinstance(param, bool)
5115 if separator:
5116 return [command_option + separator + (true_value if param else false_value)]
5117 return [command_option, true_value if param else false_value]
5118
5119
5120def cli_valueless_option(params, command_option, param, expected_value=True):
5121 param = params.get(param)
5122 return [command_option] if param == expected_value else []
5123
5124
5125def cli_configuration_args(argdict, keys, default=[], use_compat=True):
5126 if isinstance(argdict, (list, tuple)): # for backward compatibility
5127 if use_compat:
5128 return argdict
5129 else:
5130 argdict = None
5131 if argdict is None:
5132 return default
5133 assert isinstance(argdict, dict)
5134
5135 assert isinstance(keys, (list, tuple))
5136 for key_list in keys:
5137 arg_list = list(filter(
5138 lambda x: x is not None,
5139 [argdict.get(key.lower()) for key in variadic(key_list)]))
5140 if arg_list:
5141 return [arg for args in arg_list for arg in args]
5142 return default
5143
5144
5145def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
5146 main_key, exe = main_key.lower(), exe.lower()
5147 root_key = exe if main_key == exe else f'{main_key}+{exe}'
5148 keys = [f'{root_key}{k}' for k in (keys or [''])]
5149 if root_key in keys:
5150 if main_key != exe:
5151 keys.append((main_key, exe))
5152 keys.append('default')
5153 else:
5154 use_compat = False
5155 return cli_configuration_args(argdict, keys, default, use_compat)
5156
5157
5158class ISO639Utils(object):
5159 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
5160 _lang_map = {
5161 'aa': 'aar',
5162 'ab': 'abk',
5163 'ae': 'ave',
5164 'af': 'afr',
5165 'ak': 'aka',
5166 'am': 'amh',
5167 'an': 'arg',
5168 'ar': 'ara',
5169 'as': 'asm',
5170 'av': 'ava',
5171 'ay': 'aym',
5172 'az': 'aze',
5173 'ba': 'bak',
5174 'be': 'bel',
5175 'bg': 'bul',
5176 'bh': 'bih',
5177 'bi': 'bis',
5178 'bm': 'bam',
5179 'bn': 'ben',
5180 'bo': 'bod',
5181 'br': 'bre',
5182 'bs': 'bos',
5183 'ca': 'cat',
5184 'ce': 'che',
5185 'ch': 'cha',
5186 'co': 'cos',
5187 'cr': 'cre',
5188 'cs': 'ces',
5189 'cu': 'chu',
5190 'cv': 'chv',
5191 'cy': 'cym',
5192 'da': 'dan',
5193 'de': 'deu',
5194 'dv': 'div',
5195 'dz': 'dzo',
5196 'ee': 'ewe',
5197 'el': 'ell',
5198 'en': 'eng',
5199 'eo': 'epo',
5200 'es': 'spa',
5201 'et': 'est',
5202 'eu': 'eus',
5203 'fa': 'fas',
5204 'ff': 'ful',
5205 'fi': 'fin',
5206 'fj': 'fij',
5207 'fo': 'fao',
5208 'fr': 'fra',
5209 'fy': 'fry',
5210 'ga': 'gle',
5211 'gd': 'gla',
5212 'gl': 'glg',
5213 'gn': 'grn',
5214 'gu': 'guj',
5215 'gv': 'glv',
5216 'ha': 'hau',
5217 'he': 'heb',
5218 'iw': 'heb', # Replaced by he in 1989 revision
5219 'hi': 'hin',
5220 'ho': 'hmo',
5221 'hr': 'hrv',
5222 'ht': 'hat',
5223 'hu': 'hun',
5224 'hy': 'hye',
5225 'hz': 'her',
5226 'ia': 'ina',
5227 'id': 'ind',
5228 'in': 'ind', # Replaced by id in 1989 revision
5229 'ie': 'ile',
5230 'ig': 'ibo',
5231 'ii': 'iii',
5232 'ik': 'ipk',
5233 'io': 'ido',
5234 'is': 'isl',
5235 'it': 'ita',
5236 'iu': 'iku',
5237 'ja': 'jpn',
5238 'jv': 'jav',
5239 'ka': 'kat',
5240 'kg': 'kon',
5241 'ki': 'kik',
5242 'kj': 'kua',
5243 'kk': 'kaz',
5244 'kl': 'kal',
5245 'km': 'khm',
5246 'kn': 'kan',
5247 'ko': 'kor',
5248 'kr': 'kau',
5249 'ks': 'kas',
5250 'ku': 'kur',
5251 'kv': 'kom',
5252 'kw': 'cor',
5253 'ky': 'kir',
5254 'la': 'lat',
5255 'lb': 'ltz',
5256 'lg': 'lug',
5257 'li': 'lim',
5258 'ln': 'lin',
5259 'lo': 'lao',
5260 'lt': 'lit',
5261 'lu': 'lub',
5262 'lv': 'lav',
5263 'mg': 'mlg',
5264 'mh': 'mah',
5265 'mi': 'mri',
5266 'mk': 'mkd',
5267 'ml': 'mal',
5268 'mn': 'mon',
5269 'mr': 'mar',
5270 'ms': 'msa',
5271 'mt': 'mlt',
5272 'my': 'mya',
5273 'na': 'nau',
5274 'nb': 'nob',
5275 'nd': 'nde',
5276 'ne': 'nep',
5277 'ng': 'ndo',
5278 'nl': 'nld',
5279 'nn': 'nno',
5280 'no': 'nor',
5281 'nr': 'nbl',
5282 'nv': 'nav',
5283 'ny': 'nya',
5284 'oc': 'oci',
5285 'oj': 'oji',
5286 'om': 'orm',
5287 'or': 'ori',
5288 'os': 'oss',
5289 'pa': 'pan',
5290 'pi': 'pli',
5291 'pl': 'pol',
5292 'ps': 'pus',
5293 'pt': 'por',
5294 'qu': 'que',
5295 'rm': 'roh',
5296 'rn': 'run',
5297 'ro': 'ron',
5298 'ru': 'rus',
5299 'rw': 'kin',
5300 'sa': 'san',
5301 'sc': 'srd',
5302 'sd': 'snd',
5303 'se': 'sme',
5304 'sg': 'sag',
5305 'si': 'sin',
5306 'sk': 'slk',
5307 'sl': 'slv',
5308 'sm': 'smo',
5309 'sn': 'sna',
5310 'so': 'som',
5311 'sq': 'sqi',
5312 'sr': 'srp',
5313 'ss': 'ssw',
5314 'st': 'sot',
5315 'su': 'sun',
5316 'sv': 'swe',
5317 'sw': 'swa',
5318 'ta': 'tam',
5319 'te': 'tel',
5320 'tg': 'tgk',
5321 'th': 'tha',
5322 'ti': 'tir',
5323 'tk': 'tuk',
5324 'tl': 'tgl',
5325 'tn': 'tsn',
5326 'to': 'ton',
5327 'tr': 'tur',
5328 'ts': 'tso',
5329 'tt': 'tat',
5330 'tw': 'twi',
5331 'ty': 'tah',
5332 'ug': 'uig',
5333 'uk': 'ukr',
5334 'ur': 'urd',
5335 'uz': 'uzb',
5336 've': 'ven',
5337 'vi': 'vie',
5338 'vo': 'vol',
5339 'wa': 'wln',
5340 'wo': 'wol',
5341 'xh': 'xho',
5342 'yi': 'yid',
5343 'ji': 'yid', # Replaced by yi in 1989 revision
5344 'yo': 'yor',
5345 'za': 'zha',
5346 'zh': 'zho',
5347 'zu': 'zul',
5348 }
5349
5350 @classmethod
5351 def short2long(cls, code):
5352 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5353 return cls._lang_map.get(code[:2])
5354
5355 @classmethod
5356 def long2short(cls, code):
5357 """Convert language code from ISO 639-2/T to ISO 639-1"""
5358 for short_name, long_name in cls._lang_map.items():
5359 if long_name == code:
5360 return short_name
5361
5362
5363class ISO3166Utils(object):
5364 # From http://data.okfn.org/data/core/country-list
5365 _country_map = {
5366 'AF': 'Afghanistan',
5367 'AX': 'Åland Islands',
5368 'AL': 'Albania',
5369 'DZ': 'Algeria',
5370 'AS': 'American Samoa',
5371 'AD': 'Andorra',
5372 'AO': 'Angola',
5373 'AI': 'Anguilla',
5374 'AQ': 'Antarctica',
5375 'AG': 'Antigua and Barbuda',
5376 'AR': 'Argentina',
5377 'AM': 'Armenia',
5378 'AW': 'Aruba',
5379 'AU': 'Australia',
5380 'AT': 'Austria',
5381 'AZ': 'Azerbaijan',
5382 'BS': 'Bahamas',
5383 'BH': 'Bahrain',
5384 'BD': 'Bangladesh',
5385 'BB': 'Barbados',
5386 'BY': 'Belarus',
5387 'BE': 'Belgium',
5388 'BZ': 'Belize',
5389 'BJ': 'Benin',
5390 'BM': 'Bermuda',
5391 'BT': 'Bhutan',
5392 'BO': 'Bolivia, Plurinational State of',
5393 'BQ': 'Bonaire, Sint Eustatius and Saba',
5394 'BA': 'Bosnia and Herzegovina',
5395 'BW': 'Botswana',
5396 'BV': 'Bouvet Island',
5397 'BR': 'Brazil',
5398 'IO': 'British Indian Ocean Territory',
5399 'BN': 'Brunei Darussalam',
5400 'BG': 'Bulgaria',
5401 'BF': 'Burkina Faso',
5402 'BI': 'Burundi',
5403 'KH': 'Cambodia',
5404 'CM': 'Cameroon',
5405 'CA': 'Canada',
5406 'CV': 'Cape Verde',
5407 'KY': 'Cayman Islands',
5408 'CF': 'Central African Republic',
5409 'TD': 'Chad',
5410 'CL': 'Chile',
5411 'CN': 'China',
5412 'CX': 'Christmas Island',
5413 'CC': 'Cocos (Keeling) Islands',
5414 'CO': 'Colombia',
5415 'KM': 'Comoros',
5416 'CG': 'Congo',
5417 'CD': 'Congo, the Democratic Republic of the',
5418 'CK': 'Cook Islands',
5419 'CR': 'Costa Rica',
5420 'CI': 'Côte d\'Ivoire',
5421 'HR': 'Croatia',
5422 'CU': 'Cuba',
5423 'CW': 'Curaçao',
5424 'CY': 'Cyprus',
5425 'CZ': 'Czech Republic',
5426 'DK': 'Denmark',
5427 'DJ': 'Djibouti',
5428 'DM': 'Dominica',
5429 'DO': 'Dominican Republic',
5430 'EC': 'Ecuador',
5431 'EG': 'Egypt',
5432 'SV': 'El Salvador',
5433 'GQ': 'Equatorial Guinea',
5434 'ER': 'Eritrea',
5435 'EE': 'Estonia',
5436 'ET': 'Ethiopia',
5437 'FK': 'Falkland Islands (Malvinas)',
5438 'FO': 'Faroe Islands',
5439 'FJ': 'Fiji',
5440 'FI': 'Finland',
5441 'FR': 'France',
5442 'GF': 'French Guiana',
5443 'PF': 'French Polynesia',
5444 'TF': 'French Southern Territories',
5445 'GA': 'Gabon',
5446 'GM': 'Gambia',
5447 'GE': 'Georgia',
5448 'DE': 'Germany',
5449 'GH': 'Ghana',
5450 'GI': 'Gibraltar',
5451 'GR': 'Greece',
5452 'GL': 'Greenland',
5453 'GD': 'Grenada',
5454 'GP': 'Guadeloupe',
5455 'GU': 'Guam',
5456 'GT': 'Guatemala',
5457 'GG': 'Guernsey',
5458 'GN': 'Guinea',
5459 'GW': 'Guinea-Bissau',
5460 'GY': 'Guyana',
5461 'HT': 'Haiti',
5462 'HM': 'Heard Island and McDonald Islands',
5463 'VA': 'Holy See (Vatican City State)',
5464 'HN': 'Honduras',
5465 'HK': 'Hong Kong',
5466 'HU': 'Hungary',
5467 'IS': 'Iceland',
5468 'IN': 'India',
5469 'ID': 'Indonesia',
5470 'IR': 'Iran, Islamic Republic of',
5471 'IQ': 'Iraq',
5472 'IE': 'Ireland',
5473 'IM': 'Isle of Man',
5474 'IL': 'Israel',
5475 'IT': 'Italy',
5476 'JM': 'Jamaica',
5477 'JP': 'Japan',
5478 'JE': 'Jersey',
5479 'JO': 'Jordan',
5480 'KZ': 'Kazakhstan',
5481 'KE': 'Kenya',
5482 'KI': 'Kiribati',
5483 'KP': 'Korea, Democratic People\'s Republic of',
5484 'KR': 'Korea, Republic of',
5485 'KW': 'Kuwait',
5486 'KG': 'Kyrgyzstan',
5487 'LA': 'Lao People\'s Democratic Republic',
5488 'LV': 'Latvia',
5489 'LB': 'Lebanon',
5490 'LS': 'Lesotho',
5491 'LR': 'Liberia',
5492 'LY': 'Libya',
5493 'LI': 'Liechtenstein',
5494 'LT': 'Lithuania',
5495 'LU': 'Luxembourg',
5496 'MO': 'Macao',
5497 'MK': 'Macedonia, the Former Yugoslav Republic of',
5498 'MG': 'Madagascar',
5499 'MW': 'Malawi',
5500 'MY': 'Malaysia',
5501 'MV': 'Maldives',
5502 'ML': 'Mali',
5503 'MT': 'Malta',
5504 'MH': 'Marshall Islands',
5505 'MQ': 'Martinique',
5506 'MR': 'Mauritania',
5507 'MU': 'Mauritius',
5508 'YT': 'Mayotte',
5509 'MX': 'Mexico',
5510 'FM': 'Micronesia, Federated States of',
5511 'MD': 'Moldova, Republic of',
5512 'MC': 'Monaco',
5513 'MN': 'Mongolia',
5514 'ME': 'Montenegro',
5515 'MS': 'Montserrat',
5516 'MA': 'Morocco',
5517 'MZ': 'Mozambique',
5518 'MM': 'Myanmar',
5519 'NA': 'Namibia',
5520 'NR': 'Nauru',
5521 'NP': 'Nepal',
5522 'NL': 'Netherlands',
5523 'NC': 'New Caledonia',
5524 'NZ': 'New Zealand',
5525 'NI': 'Nicaragua',
5526 'NE': 'Niger',
5527 'NG': 'Nigeria',
5528 'NU': 'Niue',
5529 'NF': 'Norfolk Island',
5530 'MP': 'Northern Mariana Islands',
5531 'NO': 'Norway',
5532 'OM': 'Oman',
5533 'PK': 'Pakistan',
5534 'PW': 'Palau',
5535 'PS': 'Palestine, State of',
5536 'PA': 'Panama',
5537 'PG': 'Papua New Guinea',
5538 'PY': 'Paraguay',
5539 'PE': 'Peru',
5540 'PH': 'Philippines',
5541 'PN': 'Pitcairn',
5542 'PL': 'Poland',
5543 'PT': 'Portugal',
5544 'PR': 'Puerto Rico',
5545 'QA': 'Qatar',
5546 'RE': 'Réunion',
5547 'RO': 'Romania',
5548 'RU': 'Russian Federation',
5549 'RW': 'Rwanda',
5550 'BL': 'Saint Barthélemy',
5551 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5552 'KN': 'Saint Kitts and Nevis',
5553 'LC': 'Saint Lucia',
5554 'MF': 'Saint Martin (French part)',
5555 'PM': 'Saint Pierre and Miquelon',
5556 'VC': 'Saint Vincent and the Grenadines',
5557 'WS': 'Samoa',
5558 'SM': 'San Marino',
5559 'ST': 'Sao Tome and Principe',
5560 'SA': 'Saudi Arabia',
5561 'SN': 'Senegal',
5562 'RS': 'Serbia',
5563 'SC': 'Seychelles',
5564 'SL': 'Sierra Leone',
5565 'SG': 'Singapore',
5566 'SX': 'Sint Maarten (Dutch part)',
5567 'SK': 'Slovakia',
5568 'SI': 'Slovenia',
5569 'SB': 'Solomon Islands',
5570 'SO': 'Somalia',
5571 'ZA': 'South Africa',
5572 'GS': 'South Georgia and the South Sandwich Islands',
5573 'SS': 'South Sudan',
5574 'ES': 'Spain',
5575 'LK': 'Sri Lanka',
5576 'SD': 'Sudan',
5577 'SR': 'Suriname',
5578 'SJ': 'Svalbard and Jan Mayen',
5579 'SZ': 'Swaziland',
5580 'SE': 'Sweden',
5581 'CH': 'Switzerland',
5582 'SY': 'Syrian Arab Republic',
5583 'TW': 'Taiwan, Province of China',
5584 'TJ': 'Tajikistan',
5585 'TZ': 'Tanzania, United Republic of',
5586 'TH': 'Thailand',
5587 'TL': 'Timor-Leste',
5588 'TG': 'Togo',
5589 'TK': 'Tokelau',
5590 'TO': 'Tonga',
5591 'TT': 'Trinidad and Tobago',
5592 'TN': 'Tunisia',
5593 'TR': 'Turkey',
5594 'TM': 'Turkmenistan',
5595 'TC': 'Turks and Caicos Islands',
5596 'TV': 'Tuvalu',
5597 'UG': 'Uganda',
5598 'UA': 'Ukraine',
5599 'AE': 'United Arab Emirates',
5600 'GB': 'United Kingdom',
5601 'US': 'United States',
5602 'UM': 'United States Minor Outlying Islands',
5603 'UY': 'Uruguay',
5604 'UZ': 'Uzbekistan',
5605 'VU': 'Vanuatu',
5606 'VE': 'Venezuela, Bolivarian Republic of',
5607 'VN': 'Viet Nam',
5608 'VG': 'Virgin Islands, British',
5609 'VI': 'Virgin Islands, U.S.',
5610 'WF': 'Wallis and Futuna',
5611 'EH': 'Western Sahara',
5612 'YE': 'Yemen',
5613 'ZM': 'Zambia',
5614 'ZW': 'Zimbabwe',
5615 }
5616
5617 @classmethod
5618 def short2full(cls, code):
5619 """Convert an ISO 3166-2 country code to the corresponding full name"""
5620 return cls._country_map.get(code.upper())
5621
5622
5623class GeoUtils(object):
5624 # Major IPv4 address blocks per country
5625 _country_ip_map = {
5626 'AD': '46.172.224.0/19',
5627 'AE': '94.200.0.0/13',
5628 'AF': '149.54.0.0/17',
5629 'AG': '209.59.64.0/18',
5630 'AI': '204.14.248.0/21',
5631 'AL': '46.99.0.0/16',
5632 'AM': '46.70.0.0/15',
5633 'AO': '105.168.0.0/13',
5634 'AP': '182.50.184.0/21',
5635 'AQ': '23.154.160.0/24',
5636 'AR': '181.0.0.0/12',
5637 'AS': '202.70.112.0/20',
5638 'AT': '77.116.0.0/14',
5639 'AU': '1.128.0.0/11',
5640 'AW': '181.41.0.0/18',
5641 'AX': '185.217.4.0/22',
5642 'AZ': '5.197.0.0/16',
5643 'BA': '31.176.128.0/17',
5644 'BB': '65.48.128.0/17',
5645 'BD': '114.130.0.0/16',
5646 'BE': '57.0.0.0/8',
5647 'BF': '102.178.0.0/15',
5648 'BG': '95.42.0.0/15',
5649 'BH': '37.131.0.0/17',
5650 'BI': '154.117.192.0/18',
5651 'BJ': '137.255.0.0/16',
5652 'BL': '185.212.72.0/23',
5653 'BM': '196.12.64.0/18',
5654 'BN': '156.31.0.0/16',
5655 'BO': '161.56.0.0/16',
5656 'BQ': '161.0.80.0/20',
5657 'BR': '191.128.0.0/12',
5658 'BS': '24.51.64.0/18',
5659 'BT': '119.2.96.0/19',
5660 'BW': '168.167.0.0/16',
5661 'BY': '178.120.0.0/13',
5662 'BZ': '179.42.192.0/18',
5663 'CA': '99.224.0.0/11',
5664 'CD': '41.243.0.0/16',
5665 'CF': '197.242.176.0/21',
5666 'CG': '160.113.0.0/16',
5667 'CH': '85.0.0.0/13',
5668 'CI': '102.136.0.0/14',
5669 'CK': '202.65.32.0/19',
5670 'CL': '152.172.0.0/14',
5671 'CM': '102.244.0.0/14',
5672 'CN': '36.128.0.0/10',
5673 'CO': '181.240.0.0/12',
5674 'CR': '201.192.0.0/12',
5675 'CU': '152.206.0.0/15',
5676 'CV': '165.90.96.0/19',
5677 'CW': '190.88.128.0/17',
5678 'CY': '31.153.0.0/16',
5679 'CZ': '88.100.0.0/14',
5680 'DE': '53.0.0.0/8',
5681 'DJ': '197.241.0.0/17',
5682 'DK': '87.48.0.0/12',
5683 'DM': '192.243.48.0/20',
5684 'DO': '152.166.0.0/15',
5685 'DZ': '41.96.0.0/12',
5686 'EC': '186.68.0.0/15',
5687 'EE': '90.190.0.0/15',
5688 'EG': '156.160.0.0/11',
5689 'ER': '196.200.96.0/20',
5690 'ES': '88.0.0.0/11',
5691 'ET': '196.188.0.0/14',
5692 'EU': '2.16.0.0/13',
5693 'FI': '91.152.0.0/13',
5694 'FJ': '144.120.0.0/16',
5695 'FK': '80.73.208.0/21',
5696 'FM': '119.252.112.0/20',
5697 'FO': '88.85.32.0/19',
5698 'FR': '90.0.0.0/9',
5699 'GA': '41.158.0.0/15',
5700 'GB': '25.0.0.0/8',
5701 'GD': '74.122.88.0/21',
5702 'GE': '31.146.0.0/16',
5703 'GF': '161.22.64.0/18',
5704 'GG': '62.68.160.0/19',
5705 'GH': '154.160.0.0/12',
5706 'GI': '95.164.0.0/16',
5707 'GL': '88.83.0.0/19',
5708 'GM': '160.182.0.0/15',
5709 'GN': '197.149.192.0/18',
5710 'GP': '104.250.0.0/19',
5711 'GQ': '105.235.224.0/20',
5712 'GR': '94.64.0.0/13',
5713 'GT': '168.234.0.0/16',
5714 'GU': '168.123.0.0/16',
5715 'GW': '197.214.80.0/20',
5716 'GY': '181.41.64.0/18',
5717 'HK': '113.252.0.0/14',
5718 'HN': '181.210.0.0/16',
5719 'HR': '93.136.0.0/13',
5720 'HT': '148.102.128.0/17',
5721 'HU': '84.0.0.0/14',
5722 'ID': '39.192.0.0/10',
5723 'IE': '87.32.0.0/12',
5724 'IL': '79.176.0.0/13',
5725 'IM': '5.62.80.0/20',
5726 'IN': '117.192.0.0/10',
5727 'IO': '203.83.48.0/21',
5728 'IQ': '37.236.0.0/14',
5729 'IR': '2.176.0.0/12',
5730 'IS': '82.221.0.0/16',
5731 'IT': '79.0.0.0/10',
5732 'JE': '87.244.64.0/18',
5733 'JM': '72.27.0.0/17',
5734 'JO': '176.29.0.0/16',
5735 'JP': '133.0.0.0/8',
5736 'KE': '105.48.0.0/12',
5737 'KG': '158.181.128.0/17',
5738 'KH': '36.37.128.0/17',
5739 'KI': '103.25.140.0/22',
5740 'KM': '197.255.224.0/20',
5741 'KN': '198.167.192.0/19',
5742 'KP': '175.45.176.0/22',
5743 'KR': '175.192.0.0/10',
5744 'KW': '37.36.0.0/14',
5745 'KY': '64.96.0.0/15',
5746 'KZ': '2.72.0.0/13',
5747 'LA': '115.84.64.0/18',
5748 'LB': '178.135.0.0/16',
5749 'LC': '24.92.144.0/20',
5750 'LI': '82.117.0.0/19',
5751 'LK': '112.134.0.0/15',
5752 'LR': '102.183.0.0/16',
5753 'LS': '129.232.0.0/17',
5754 'LT': '78.56.0.0/13',
5755 'LU': '188.42.0.0/16',
5756 'LV': '46.109.0.0/16',
5757 'LY': '41.252.0.0/14',
5758 'MA': '105.128.0.0/11',
5759 'MC': '88.209.64.0/18',
5760 'MD': '37.246.0.0/16',
5761 'ME': '178.175.0.0/17',
5762 'MF': '74.112.232.0/21',
5763 'MG': '154.126.0.0/17',
5764 'MH': '117.103.88.0/21',
5765 'MK': '77.28.0.0/15',
5766 'ML': '154.118.128.0/18',
5767 'MM': '37.111.0.0/17',
5768 'MN': '49.0.128.0/17',
5769 'MO': '60.246.0.0/16',
5770 'MP': '202.88.64.0/20',
5771 'MQ': '109.203.224.0/19',
5772 'MR': '41.188.64.0/18',
5773 'MS': '208.90.112.0/22',
5774 'MT': '46.11.0.0/16',
5775 'MU': '105.16.0.0/12',
5776 'MV': '27.114.128.0/18',
5777 'MW': '102.70.0.0/15',
5778 'MX': '187.192.0.0/11',
5779 'MY': '175.136.0.0/13',
5780 'MZ': '197.218.0.0/15',
5781 'NA': '41.182.0.0/16',
5782 'NC': '101.101.0.0/18',
5783 'NE': '197.214.0.0/18',
5784 'NF': '203.17.240.0/22',
5785 'NG': '105.112.0.0/12',
5786 'NI': '186.76.0.0/15',
5787 'NL': '145.96.0.0/11',
5788 'NO': '84.208.0.0/13',
5789 'NP': '36.252.0.0/15',
5790 'NR': '203.98.224.0/19',
5791 'NU': '49.156.48.0/22',
5792 'NZ': '49.224.0.0/14',
5793 'OM': '5.36.0.0/15',
5794 'PA': '186.72.0.0/15',
5795 'PE': '186.160.0.0/14',
5796 'PF': '123.50.64.0/18',
5797 'PG': '124.240.192.0/19',
5798 'PH': '49.144.0.0/13',
5799 'PK': '39.32.0.0/11',
5800 'PL': '83.0.0.0/11',
5801 'PM': '70.36.0.0/20',
5802 'PR': '66.50.0.0/16',
5803 'PS': '188.161.0.0/16',
5804 'PT': '85.240.0.0/13',
5805 'PW': '202.124.224.0/20',
5806 'PY': '181.120.0.0/14',
5807 'QA': '37.210.0.0/15',
5808 'RE': '102.35.0.0/16',
5809 'RO': '79.112.0.0/13',
5810 'RS': '93.86.0.0/15',
5811 'RU': '5.136.0.0/13',
5812 'RW': '41.186.0.0/16',
5813 'SA': '188.48.0.0/13',
5814 'SB': '202.1.160.0/19',
5815 'SC': '154.192.0.0/11',
5816 'SD': '102.120.0.0/13',
5817 'SE': '78.64.0.0/12',
5818 'SG': '8.128.0.0/10',
5819 'SI': '188.196.0.0/14',
5820 'SK': '78.98.0.0/15',
5821 'SL': '102.143.0.0/17',
5822 'SM': '89.186.32.0/19',
5823 'SN': '41.82.0.0/15',
5824 'SO': '154.115.192.0/18',
5825 'SR': '186.179.128.0/17',
5826 'SS': '105.235.208.0/21',
5827 'ST': '197.159.160.0/19',
5828 'SV': '168.243.0.0/16',
5829 'SX': '190.102.0.0/20',
5830 'SY': '5.0.0.0/16',
5831 'SZ': '41.84.224.0/19',
5832 'TC': '65.255.48.0/20',
5833 'TD': '154.68.128.0/19',
5834 'TG': '196.168.0.0/14',
5835 'TH': '171.96.0.0/13',
5836 'TJ': '85.9.128.0/18',
5837 'TK': '27.96.24.0/21',
5838 'TL': '180.189.160.0/20',
5839 'TM': '95.85.96.0/19',
5840 'TN': '197.0.0.0/11',
5841 'TO': '175.176.144.0/21',
5842 'TR': '78.160.0.0/11',
5843 'TT': '186.44.0.0/15',
5844 'TV': '202.2.96.0/19',
5845 'TW': '120.96.0.0/11',
5846 'TZ': '156.156.0.0/14',
5847 'UA': '37.52.0.0/14',
5848 'UG': '102.80.0.0/13',
5849 'US': '6.0.0.0/8',
5850 'UY': '167.56.0.0/13',
5851 'UZ': '84.54.64.0/18',
5852 'VA': '212.77.0.0/19',
5853 'VC': '207.191.240.0/21',
5854 'VE': '186.88.0.0/13',
5855 'VG': '66.81.192.0/20',
5856 'VI': '146.226.0.0/16',
5857 'VN': '14.160.0.0/11',
5858 'VU': '202.80.32.0/20',
5859 'WF': '117.20.32.0/21',
5860 'WS': '202.4.32.0/19',
5861 'YE': '134.35.0.0/16',
5862 'YT': '41.242.116.0/22',
5863 'ZA': '41.0.0.0/11',
5864 'ZM': '102.144.0.0/13',
5865 'ZW': '102.177.192.0/18',
5866 }
5867
5868 @classmethod
5869 def random_ipv4(cls, code_or_block):
5870 if len(code_or_block) == 2:
5871 block = cls._country_ip_map.get(code_or_block.upper())
5872 if not block:
5873 return None
5874 else:
5875 block = code_or_block
5876 addr, preflen = block.split('/')
5877 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5878 addr_max = addr_min | (0xffffffff >> int(preflen))
5879 return compat_str(socket.inet_ntoa(
5880 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5881
5882
5883class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5884 def __init__(self, proxies=None):
5885 # Set default handlers
5886 for type in ('http', 'https'):
5887 setattr(self, '%s_open' % type,
5888 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5889 meth(r, proxy, type))
5890 compat_urllib_request.ProxyHandler.__init__(self, proxies)
5891
5892 def proxy_open(self, req, proxy, type):
5893 req_proxy = req.headers.get('Ytdl-request-proxy')
5894 if req_proxy is not None:
5895 proxy = req_proxy
5896 del req.headers['Ytdl-request-proxy']
5897
5898 if proxy == '__noproxy__':
5899 return None # No Proxy
5900 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5901 req.add_header('Ytdl-socks-proxy', proxy)
5902 # yt-dlp's http/https handlers do wrapping the socket with socks
5903 return None
5904 return compat_urllib_request.ProxyHandler.proxy_open(
5905 self, req, proxy, type)
5906
5907
5908# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5909# released into Public Domain
5910# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5911
5912def long_to_bytes(n, blocksize=0):
5913 """long_to_bytes(n:long, blocksize:int) : string
5914 Convert a long integer to a byte string.
5915
5916 If optional blocksize is given and greater than zero, pad the front of the
5917 byte string with binary zeros so that the length is a multiple of
5918 blocksize.
5919 """
5920 # after much testing, this algorithm was deemed to be the fastest
5921 s = b''
5922 n = int(n)
5923 while n > 0:
5924 s = compat_struct_pack('>I', n & 0xffffffff) + s
5925 n = n >> 32
5926 # strip off leading zeros
5927 for i in range(len(s)):
5928 if s[i] != b'\000'[0]:
5929 break
5930 else:
5931 # only happens when n == 0
5932 s = b'\000'
5933 i = 0
5934 s = s[i:]
5935 # add back some pad bytes. this could be done more efficiently w.r.t. the
5936 # de-padding being done above, but sigh...
5937 if blocksize > 0 and len(s) % blocksize:
5938 s = (blocksize - len(s) % blocksize) * b'\000' + s
5939 return s
5940
5941
5942def bytes_to_long(s):
5943 """bytes_to_long(string) : long
5944 Convert a byte string to a long integer.
5945
5946 This is (essentially) the inverse of long_to_bytes().
5947 """
5948 acc = 0
5949 length = len(s)
5950 if length % 4:
5951 extra = (4 - length % 4)
5952 s = b'\000' * extra + s
5953 length = length + extra
5954 for i in range(0, length, 4):
5955 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5956 return acc
5957
5958
5959def ohdave_rsa_encrypt(data, exponent, modulus):
5960 '''
5961 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5962
5963 Input:
5964 data: data to encrypt, bytes-like object
5965 exponent, modulus: parameter e and N of RSA algorithm, both integer
5966 Output: hex string of encrypted data
5967
5968 Limitation: supports one block encryption only
5969 '''
5970
5971 payload = int(binascii.hexlify(data[::-1]), 16)
5972 encrypted = pow(payload, exponent, modulus)
5973 return '%x' % encrypted
5974
5975
5976def pkcs1pad(data, length):
5977 """
5978 Padding input data with PKCS#1 scheme
5979
5980 @param {int[]} data input data
5981 @param {int} length target length
5982 @returns {int[]} padded data
5983 """
5984 if len(data) > length - 11:
5985 raise ValueError('Input data too long for PKCS#1 padding')
5986
5987 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5988 return [0, 2] + pseudo_random + [0] + data
5989
5990
5991def encode_base_n(num, n, table=None):
5992 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5993 if not table:
5994 table = FULL_TABLE[:n]
5995
5996 if n > len(table):
5997 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5998
5999 if num == 0:
6000 return table[0]
6001
6002 ret = ''
6003 while num:
6004 ret = table[num % n] + ret
6005 num = num // n
6006 return ret
6007
6008
6009def decode_packed_codes(code):
6010 mobj = re.search(PACKED_CODES_RE, code)
6011 obfuscated_code, base, count, symbols = mobj.groups()
6012 base = int(base)
6013 count = int(count)
6014 symbols = symbols.split('|')
6015 symbol_table = {}
6016
6017 while count:
6018 count -= 1
6019 base_n_count = encode_base_n(count, base)
6020 symbol_table[base_n_count] = symbols[count] or base_n_count
6021
6022 return re.sub(
6023 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
6024 obfuscated_code)
6025
6026
6027def caesar(s, alphabet, shift):
6028 if shift == 0:
6029 return s
6030 l = len(alphabet)
6031 return ''.join(
6032 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
6033 for c in s)
6034
6035
6036def rot47(s):
6037 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
6038
6039
6040def parse_m3u8_attributes(attrib):
6041 info = {}
6042 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
6043 if val.startswith('"'):
6044 val = val[1:-1]
6045 info[key] = val
6046 return info
6047
6048
6049def urshift(val, n):
6050 return val >> n if val >= 0 else (val + 0x100000000) >> n
6051
6052
6053# Based on png2str() written by @gdkchan and improved by @yokrysty
6054# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
6055def decode_png(png_data):
6056 # Reference: https://www.w3.org/TR/PNG/
6057 header = png_data[8:]
6058
6059 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
6060 raise IOError('Not a valid PNG file.')
6061
6062 int_map = {1: '>B', 2: '>H', 4: '>I'}
6063 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
6064
6065 chunks = []
6066
6067 while header:
6068 length = unpack_integer(header[:4])
6069 header = header[4:]
6070
6071 chunk_type = header[:4]
6072 header = header[4:]
6073
6074 chunk_data = header[:length]
6075 header = header[length:]
6076
6077 header = header[4:] # Skip CRC
6078
6079 chunks.append({
6080 'type': chunk_type,
6081 'length': length,
6082 'data': chunk_data
6083 })
6084
6085 ihdr = chunks[0]['data']
6086
6087 width = unpack_integer(ihdr[:4])
6088 height = unpack_integer(ihdr[4:8])
6089
6090 idat = b''
6091
6092 for chunk in chunks:
6093 if chunk['type'] == b'IDAT':
6094 idat += chunk['data']
6095
6096 if not idat:
6097 raise IOError('Unable to read PNG data.')
6098
6099 decompressed_data = bytearray(zlib.decompress(idat))
6100
6101 stride = width * 3
6102 pixels = []
6103
6104 def _get_pixel(idx):
6105 x = idx % stride
6106 y = idx // stride
6107 return pixels[y][x]
6108
6109 for y in range(height):
6110 basePos = y * (1 + stride)
6111 filter_type = decompressed_data[basePos]
6112
6113 current_row = []
6114
6115 pixels.append(current_row)
6116
6117 for x in range(stride):
6118 color = decompressed_data[1 + basePos + x]
6119 basex = y * stride + x
6120 left = 0
6121 up = 0
6122
6123 if x > 2:
6124 left = _get_pixel(basex - 3)
6125 if y > 0:
6126 up = _get_pixel(basex - stride)
6127
6128 if filter_type == 1: # Sub
6129 color = (color + left) & 0xff
6130 elif filter_type == 2: # Up
6131 color = (color + up) & 0xff
6132 elif filter_type == 3: # Average
6133 color = (color + ((left + up) >> 1)) & 0xff
6134 elif filter_type == 4: # Paeth
6135 a = left
6136 b = up
6137 c = 0
6138
6139 if x > 2 and y > 0:
6140 c = _get_pixel(basex - stride - 3)
6141
6142 p = a + b - c
6143
6144 pa = abs(p - a)
6145 pb = abs(p - b)
6146 pc = abs(p - c)
6147
6148 if pa <= pb and pa <= pc:
6149 color = (color + a) & 0xff
6150 elif pb <= pc:
6151 color = (color + b) & 0xff
6152 else:
6153 color = (color + c) & 0xff
6154
6155 current_row.append(color)
6156
6157 return width, height, pixels
6158
6159
6160def write_xattr(path, key, value):
6161 # This mess below finds the best xattr tool for the job
6162 try:
6163 # try the pyxattr module...
6164 import xattr
6165
6166 if hasattr(xattr, 'set'): # pyxattr
6167 # Unicode arguments are not supported in python-pyxattr until
6168 # version 0.5.0
6169 # See https://github.com/ytdl-org/youtube-dl/issues/5498
6170 pyxattr_required_version = '0.5.0'
6171 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
6172 # TODO: fallback to CLI tools
6173 raise XAttrUnavailableError(
6174 'python-pyxattr is detected but is too old. '
6175 'yt-dlp requires %s or above while your version is %s. '
6176 'Falling back to other xattr implementations' % (
6177 pyxattr_required_version, xattr.__version__))
6178
6179 setxattr = xattr.set
6180 else: # xattr
6181 setxattr = xattr.setxattr
6182
6183 try:
6184 setxattr(path, key, value)
6185 except EnvironmentError as e:
6186 raise XAttrMetadataError(e.errno, e.strerror)
6187
6188 except ImportError:
6189 if compat_os_name == 'nt':
6190 # Write xattrs to NTFS Alternate Data Streams:
6191 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6192 assert ':' not in key
6193 assert os.path.exists(path)
6194
6195 ads_fn = path + ':' + key
6196 try:
6197 with open(ads_fn, 'wb') as f:
6198 f.write(value)
6199 except EnvironmentError as e:
6200 raise XAttrMetadataError(e.errno, e.strerror)
6201 else:
6202 user_has_setfattr = check_executable('setfattr', ['--version'])
6203 user_has_xattr = check_executable('xattr', ['-h'])
6204
6205 if user_has_setfattr or user_has_xattr:
6206
6207 value = value.decode('utf-8')
6208 if user_has_setfattr:
6209 executable = 'setfattr'
6210 opts = ['-n', key, '-v', value]
6211 elif user_has_xattr:
6212 executable = 'xattr'
6213 opts = ['-w', key, value]
6214
6215 cmd = ([encodeFilename(executable, True)]
6216 + [encodeArgument(o) for o in opts]
6217 + [encodeFilename(path, True)])
6218
6219 try:
6220 p = Popen(
6221 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6222 except EnvironmentError as e:
6223 raise XAttrMetadataError(e.errno, e.strerror)
6224 stdout, stderr = p.communicate_or_kill()
6225 stderr = stderr.decode('utf-8', 'replace')
6226 if p.returncode != 0:
6227 raise XAttrMetadataError(p.returncode, stderr)
6228
6229 else:
6230 # On Unix, and can't find pyxattr, setfattr, or xattr.
6231 if sys.platform.startswith('linux'):
6232 raise XAttrUnavailableError(
6233 "Couldn't find a tool to set the xattrs. "
6234 "Install either the python 'pyxattr' or 'xattr' "
6235 "modules, or the GNU 'attr' package "
6236 "(which contains the 'setfattr' tool).")
6237 else:
6238 raise XAttrUnavailableError(
6239 "Couldn't find a tool to set the xattrs. "
6240 "Install either the python 'xattr' module, "
6241 "or the 'xattr' binary.")
6242
6243
6244def random_birthday(year_field, month_field, day_field):
6245 start_date = datetime.date(1950, 1, 1)
6246 end_date = datetime.date(1995, 12, 31)
6247 offset = random.randint(0, (end_date - start_date).days)
6248 random_date = start_date + datetime.timedelta(offset)
6249 return {
6250 year_field: str(random_date.year),
6251 month_field: str(random_date.month),
6252 day_field: str(random_date.day),
6253 }
6254
6255
6256# Templates for internet shortcut files, which are plain text files.
6257DOT_URL_LINK_TEMPLATE = '''
6258[InternetShortcut]
6259URL=%(url)s
6260'''.lstrip()
6261
6262DOT_WEBLOC_LINK_TEMPLATE = '''
6263<?xml version="1.0" encoding="UTF-8"?>
6264<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6265<plist version="1.0">
6266<dict>
6267\t<key>URL</key>
6268\t<string>%(url)s</string>
6269</dict>
6270</plist>
6271'''.lstrip()
6272
6273DOT_DESKTOP_LINK_TEMPLATE = '''
6274[Desktop Entry]
6275Encoding=UTF-8
6276Name=%(filename)s
6277Type=Link
6278URL=%(url)s
6279Icon=text-html
6280'''.lstrip()
6281
6282LINK_TEMPLATES = {
6283 'url': DOT_URL_LINK_TEMPLATE,
6284 'desktop': DOT_DESKTOP_LINK_TEMPLATE,
6285 'webloc': DOT_WEBLOC_LINK_TEMPLATE,
6286}
6287
6288
6289def iri_to_uri(iri):
6290 """
6291 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6292
6293 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6294 """
6295
6296 iri_parts = compat_urllib_parse_urlparse(iri)
6297
6298 if '[' in iri_parts.netloc:
6299 raise ValueError('IPv6 URIs are not, yet, supported.')
6300 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6301
6302 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6303
6304 net_location = ''
6305 if iri_parts.username:
6306 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6307 if iri_parts.password is not None:
6308 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6309 net_location += '@'
6310
6311 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
6312 # The 'idna' encoding produces ASCII text.
6313 if iri_parts.port is not None and iri_parts.port != 80:
6314 net_location += ':' + str(iri_parts.port)
6315
6316 return compat_urllib_parse_urlunparse(
6317 (iri_parts.scheme,
6318 net_location,
6319
6320 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6321
6322 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6323 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6324
6325 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6326 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6327
6328 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6329
6330 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6331
6332
6333def to_high_limit_path(path):
6334 if sys.platform in ['win32', 'cygwin']:
6335 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6336 return r'\\?\ '.rstrip() + os.path.abspath(path)
6337
6338 return path
6339
6340
6341def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
6342 if field is None:
6343 val = obj if obj is not None else default
6344 else:
6345 val = obj.get(field, default)
6346 if func and val not in ignore:
6347 val = func(val)
6348 return template % val if val not in ignore else default
6349
6350
6351def clean_podcast_url(url):
6352 return re.sub(r'''(?x)
6353 (?:
6354 (?:
6355 chtbl\.com/track|
6356 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6357 play\.podtrac\.com
6358 )/[^/]+|
6359 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6360 flex\.acast\.com|
6361 pd(?:
6362 cn\.co| # https://podcorn.com/analytics-prefix/
6363 st\.fm # https://podsights.com/docs/
6364 )/e
6365 )/''', '', url)
6366
6367
6368_HEX_TABLE = '0123456789abcdef'
6369
6370
6371def random_uuidv4():
6372 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
6373
6374
6375def make_dir(path, to_screen=None):
6376 try:
6377 dn = os.path.dirname(path)
6378 if dn and not os.path.exists(dn):
6379 os.makedirs(dn)
6380 return True
6381 except (OSError, IOError) as err:
6382 if callable(to_screen) is not None:
6383 to_screen('unable to create directory ' + error_to_compat_str(err))
6384 return False
6385
6386
6387def get_executable_path():
6388 from zipimport import zipimporter
6389 if hasattr(sys, 'frozen'): # Running from PyInstaller
6390 path = os.path.dirname(sys.executable)
6391 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
6392 path = os.path.join(os.path.dirname(__file__), '../..')
6393 else:
6394 path = os.path.join(os.path.dirname(__file__), '..')
6395 return os.path.abspath(path)
6396
6397
6398def load_plugins(name, suffix, namespace):
6399 classes = {}
6400 try:
6401 plugins_spec = importlib.util.spec_from_file_location(
6402 name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
6403 plugins = importlib.util.module_from_spec(plugins_spec)
6404 sys.modules[plugins_spec.name] = plugins
6405 plugins_spec.loader.exec_module(plugins)
6406 for name in dir(plugins):
6407 if name in namespace:
6408 continue
6409 if not name.endswith(suffix):
6410 continue
6411 klass = getattr(plugins, name)
6412 classes[name] = namespace[name] = klass
6413 except FileNotFoundError:
6414 pass
6415 return classes
6416
6417
6418def traverse_obj(
6419 obj, *path_list, default=None, expected_type=None, get_all=True,
6420 casesense=True, is_user_input=False, traverse_string=False):
6421 ''' Traverse nested list/dict/tuple
6422 @param path_list A list of paths which are checked one by one.
6423 Each path is a list of keys where each key is a string,
6424 a function, a tuple of strings or "...".
6425 When a fuction is given, it takes the key as argument and
6426 returns whether the key matches or not. When a tuple is given,
6427 all the keys given in the tuple are traversed, and
6428 "..." traverses all the keys in the object
6429 @param default Default value to return
6430 @param expected_type Only accept final value of this type (Can also be any callable)
6431 @param get_all Return all the values obtained from a path or only the first one
6432 @param casesense Whether to consider dictionary keys as case sensitive
6433 @param is_user_input Whether the keys are generated from user input. If True,
6434 strings are converted to int/slice if necessary
6435 @param traverse_string Whether to traverse inside strings. If True, any
6436 non-compatible object will also be converted into a string
6437 # TODO: Write tests
6438 '''
6439 if not casesense:
6440 _lower = lambda k: (k.lower() if isinstance(k, str) else k)
6441 path_list = (map(_lower, variadic(path)) for path in path_list)
6442
6443 def _traverse_obj(obj, path, _current_depth=0):
6444 nonlocal depth
6445 path = tuple(variadic(path))
6446 for i, key in enumerate(path):
6447 if obj is None:
6448 return None
6449 if isinstance(key, (list, tuple)):
6450 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
6451 key = ...
6452 if key is ...:
6453 obj = (obj.values() if isinstance(obj, dict)
6454 else obj if isinstance(obj, (list, tuple, LazyList))
6455 else str(obj) if traverse_string else [])
6456 _current_depth += 1
6457 depth = max(depth, _current_depth)
6458 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
6459 elif callable(key):
6460 if isinstance(obj, (list, tuple, LazyList)):
6461 obj = enumerate(obj)
6462 elif isinstance(obj, dict):
6463 obj = obj.items()
6464 else:
6465 if not traverse_string:
6466 return None
6467 obj = str(obj)
6468 _current_depth += 1
6469 depth = max(depth, _current_depth)
6470 return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if key(k)]
6471 elif isinstance(obj, dict) and not (is_user_input and key == ':'):
6472 obj = (obj.get(key) if casesense or (key in obj)
6473 else next((v for k, v in obj.items() if _lower(k) == key), None))
6474 else:
6475 if is_user_input:
6476 key = (int_or_none(key) if ':' not in key
6477 else slice(*map(int_or_none, key.split(':'))))
6478 if key == slice(None):
6479 return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
6480 if not isinstance(key, (int, slice)):
6481 return None
6482 if not isinstance(obj, (list, tuple, LazyList)):
6483 if not traverse_string:
6484 return None
6485 obj = str(obj)
6486 try:
6487 obj = obj[key]
6488 except IndexError:
6489 return None
6490 return obj
6491
6492 if isinstance(expected_type, type):
6493 type_test = lambda val: val if isinstance(val, expected_type) else None
6494 elif expected_type is not None:
6495 type_test = expected_type
6496 else:
6497 type_test = lambda val: val
6498
6499 for path in path_list:
6500 depth = 0
6501 val = _traverse_obj(obj, path)
6502 if val is not None:
6503 if depth:
6504 for _ in range(depth - 1):
6505 val = itertools.chain.from_iterable(v for v in val if v is not None)
6506 val = [v for v in map(type_test, val) if v is not None]
6507 if val:
6508 return val if get_all else val[0]
6509 else:
6510 val = type_test(val)
6511 if val is not None:
6512 return val
6513 return default
6514
6515
6516def traverse_dict(dictn, keys, casesense=True):
6517 ''' For backward compatibility. Do not use '''
6518 return traverse_obj(dictn, keys, casesense=casesense,
6519 is_user_input=True, traverse_string=True)
6520
6521
6522def variadic(x, allowed_types=(str, bytes)):
6523 return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
6524
6525
6526# create a JSON Web Signature (jws) with HS256 algorithm
6527# the resulting format is in JWS Compact Serialization
6528# implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
6529# implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
6530def jwt_encode_hs256(payload_data, key, headers={}):
6531 header_data = {
6532 'alg': 'HS256',
6533 'typ': 'JWT',
6534 }
6535 if headers:
6536 header_data.update(headers)
6537 header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8'))
6538 payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8'))
6539 h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256)
6540 signature_b64 = base64.b64encode(h.digest())
6541 token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
6542 return token
6543
6544
6545# can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
6546def jwt_decode_hs256(jwt):
6547 header_b64, payload_b64, signature_b64 = jwt.split('.')
6548 payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
6549 return payload_data
6550
6551
6552def supports_terminal_sequences(stream):
6553 if compat_os_name == 'nt':
6554 if get_windows_version() < (10, 0, 10586):
6555 return False
6556 elif not os.getenv('TERM'):
6557 return False
6558 try:
6559 return stream.isatty()
6560 except BaseException:
6561 return False
6562
6563
6564_terminal_sequences_re = re.compile('\033\\[[^m]+m')
6565
6566
6567def remove_terminal_sequences(string):
6568 return _terminal_sequences_re.sub('', string)
6569
6570
6571def number_of_digits(number):
6572 return len('%d' % number)
6573
6574
6575def join_nonempty(*values, delim='-', from_dict=None):
6576 if from_dict is not None:
6577 values = operator.itemgetter(values)(from_dict)
6578 return delim.join(map(str, filter(None, values)))