]> jfr.im git - yt-dlp.git/blame - yt_dlp/utils.py
[RaiNews] Fix extractor (#1864)
[yt-dlp.git] / yt_dlp / utils.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
d77c3dfd 3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
c380cc28 10import collections
62e609ab 11import contextlib
e3946f98 12import ctypes
c496ca96
PH
13import datetime
14import email.utils
0c265486 15import email.header
f45c185f 16import errno
be4a824d 17import functools
d77c3dfd 18import gzip
49fa4d9a
N
19import hashlib
20import hmac
019a94f7 21import importlib.util
03f9daab 22import io
79a2e94e 23import itertools
f4bfd65f 24import json
d77c3dfd 25import locale
02dbf93f 26import math
347de493 27import operator
d77c3dfd 28import os
c496ca96 29import platform
773f291d 30import random
d77c3dfd 31import re
c496ca96 32import socket
79a2e94e 33import ssl
1c088fa8 34import subprocess
d77c3dfd 35import sys
181c8655 36import tempfile
c380cc28 37import time
01951dda 38import traceback
bcf89ce6 39import xml.etree.ElementTree
d77c3dfd 40import zlib
d77c3dfd 41
8c25f81b 42from .compat import (
b4a3d461 43 compat_HTMLParseError,
8bb56eee 44 compat_HTMLParser,
201c1459 45 compat_HTTPError,
8f9312c3 46 compat_basestring,
8c25f81b 47 compat_chr,
1bab3437 48 compat_cookiejar,
d7cd9a9e 49 compat_ctypes_WINFUNCTYPE,
36e6f62c 50 compat_etree_fromstring,
51098426 51 compat_expanduser,
8c25f81b 52 compat_html_entities,
55b2f099 53 compat_html_entities_html5,
be4a824d 54 compat_http_client,
42db58ec 55 compat_integer_types,
e29663c6 56 compat_numeric_types,
c86b6142 57 compat_kwargs,
efa97bdc 58 compat_os_name,
8c25f81b 59 compat_parse_qs,
702ccf2d 60 compat_shlex_quote,
8c25f81b 61 compat_str,
edaa23f8 62 compat_struct_pack,
d3f8e038 63 compat_struct_unpack,
8c25f81b
PH
64 compat_urllib_error,
65 compat_urllib_parse,
15707c7e 66 compat_urllib_parse_urlencode,
8c25f81b 67 compat_urllib_parse_urlparse,
732044af 68 compat_urllib_parse_urlunparse,
69 compat_urllib_parse_quote,
70 compat_urllib_parse_quote_plus,
7581bfc9 71 compat_urllib_parse_unquote_plus,
8c25f81b
PH
72 compat_urllib_request,
73 compat_urlparse,
810c10ba 74 compat_xpath,
8c25f81b 75)
4644ac55 76
71aff188
YCH
77from .socks import (
78 ProxyType,
79 sockssocket,
80)
81
4644ac55 82
51fb4995
YCH
83def register_socks_protocols():
84 # "Register" SOCKS protocols
d5ae6bb5
YCH
85 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
86 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
51fb4995
YCH
87 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
88 if scheme not in compat_urlparse.uses_netloc:
89 compat_urlparse.uses_netloc.append(scheme)
90
91
468e2e92
FV
92# This is not clearly defined otherwise
93compiled_regex_type = type(re.compile(''))
94
f7a147e3
S
95
96def random_user_agent():
97 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
98 _CHROME_VERSIONS = (
99 '74.0.3729.129',
100 '76.0.3780.3',
101 '76.0.3780.2',
102 '74.0.3729.128',
103 '76.0.3780.1',
104 '76.0.3780.0',
105 '75.0.3770.15',
106 '74.0.3729.127',
107 '74.0.3729.126',
108 '76.0.3779.1',
109 '76.0.3779.0',
110 '75.0.3770.14',
111 '74.0.3729.125',
112 '76.0.3778.1',
113 '76.0.3778.0',
114 '75.0.3770.13',
115 '74.0.3729.124',
116 '74.0.3729.123',
117 '73.0.3683.121',
118 '76.0.3777.1',
119 '76.0.3777.0',
120 '75.0.3770.12',
121 '74.0.3729.122',
122 '76.0.3776.4',
123 '75.0.3770.11',
124 '74.0.3729.121',
125 '76.0.3776.3',
126 '76.0.3776.2',
127 '73.0.3683.120',
128 '74.0.3729.120',
129 '74.0.3729.119',
130 '74.0.3729.118',
131 '76.0.3776.1',
132 '76.0.3776.0',
133 '76.0.3775.5',
134 '75.0.3770.10',
135 '74.0.3729.117',
136 '76.0.3775.4',
137 '76.0.3775.3',
138 '74.0.3729.116',
139 '75.0.3770.9',
140 '76.0.3775.2',
141 '76.0.3775.1',
142 '76.0.3775.0',
143 '75.0.3770.8',
144 '74.0.3729.115',
145 '74.0.3729.114',
146 '76.0.3774.1',
147 '76.0.3774.0',
148 '75.0.3770.7',
149 '74.0.3729.113',
150 '74.0.3729.112',
151 '74.0.3729.111',
152 '76.0.3773.1',
153 '76.0.3773.0',
154 '75.0.3770.6',
155 '74.0.3729.110',
156 '74.0.3729.109',
157 '76.0.3772.1',
158 '76.0.3772.0',
159 '75.0.3770.5',
160 '74.0.3729.108',
161 '74.0.3729.107',
162 '76.0.3771.1',
163 '76.0.3771.0',
164 '75.0.3770.4',
165 '74.0.3729.106',
166 '74.0.3729.105',
167 '75.0.3770.3',
168 '74.0.3729.104',
169 '74.0.3729.103',
170 '74.0.3729.102',
171 '75.0.3770.2',
172 '74.0.3729.101',
173 '75.0.3770.1',
174 '75.0.3770.0',
175 '74.0.3729.100',
176 '75.0.3769.5',
177 '75.0.3769.4',
178 '74.0.3729.99',
179 '75.0.3769.3',
180 '75.0.3769.2',
181 '75.0.3768.6',
182 '74.0.3729.98',
183 '75.0.3769.1',
184 '75.0.3769.0',
185 '74.0.3729.97',
186 '73.0.3683.119',
187 '73.0.3683.118',
188 '74.0.3729.96',
189 '75.0.3768.5',
190 '75.0.3768.4',
191 '75.0.3768.3',
192 '75.0.3768.2',
193 '74.0.3729.95',
194 '74.0.3729.94',
195 '75.0.3768.1',
196 '75.0.3768.0',
197 '74.0.3729.93',
198 '74.0.3729.92',
199 '73.0.3683.117',
200 '74.0.3729.91',
201 '75.0.3766.3',
202 '74.0.3729.90',
203 '75.0.3767.2',
204 '75.0.3767.1',
205 '75.0.3767.0',
206 '74.0.3729.89',
207 '73.0.3683.116',
208 '75.0.3766.2',
209 '74.0.3729.88',
210 '75.0.3766.1',
211 '75.0.3766.0',
212 '74.0.3729.87',
213 '73.0.3683.115',
214 '74.0.3729.86',
215 '75.0.3765.1',
216 '75.0.3765.0',
217 '74.0.3729.85',
218 '73.0.3683.114',
219 '74.0.3729.84',
220 '75.0.3764.1',
221 '75.0.3764.0',
222 '74.0.3729.83',
223 '73.0.3683.113',
224 '75.0.3763.2',
225 '75.0.3761.4',
226 '74.0.3729.82',
227 '75.0.3763.1',
228 '75.0.3763.0',
229 '74.0.3729.81',
230 '73.0.3683.112',
231 '75.0.3762.1',
232 '75.0.3762.0',
233 '74.0.3729.80',
234 '75.0.3761.3',
235 '74.0.3729.79',
236 '73.0.3683.111',
237 '75.0.3761.2',
238 '74.0.3729.78',
239 '74.0.3729.77',
240 '75.0.3761.1',
241 '75.0.3761.0',
242 '73.0.3683.110',
243 '74.0.3729.76',
244 '74.0.3729.75',
245 '75.0.3760.0',
246 '74.0.3729.74',
247 '75.0.3759.8',
248 '75.0.3759.7',
249 '75.0.3759.6',
250 '74.0.3729.73',
251 '75.0.3759.5',
252 '74.0.3729.72',
253 '73.0.3683.109',
254 '75.0.3759.4',
255 '75.0.3759.3',
256 '74.0.3729.71',
257 '75.0.3759.2',
258 '74.0.3729.70',
259 '73.0.3683.108',
260 '74.0.3729.69',
261 '75.0.3759.1',
262 '75.0.3759.0',
263 '74.0.3729.68',
264 '73.0.3683.107',
265 '74.0.3729.67',
266 '75.0.3758.1',
267 '75.0.3758.0',
268 '74.0.3729.66',
269 '73.0.3683.106',
270 '74.0.3729.65',
271 '75.0.3757.1',
272 '75.0.3757.0',
273 '74.0.3729.64',
274 '73.0.3683.105',
275 '74.0.3729.63',
276 '75.0.3756.1',
277 '75.0.3756.0',
278 '74.0.3729.62',
279 '73.0.3683.104',
280 '75.0.3755.3',
281 '75.0.3755.2',
282 '73.0.3683.103',
283 '75.0.3755.1',
284 '75.0.3755.0',
285 '74.0.3729.61',
286 '73.0.3683.102',
287 '74.0.3729.60',
288 '75.0.3754.2',
289 '74.0.3729.59',
290 '75.0.3753.4',
291 '74.0.3729.58',
292 '75.0.3754.1',
293 '75.0.3754.0',
294 '74.0.3729.57',
295 '73.0.3683.101',
296 '75.0.3753.3',
297 '75.0.3752.2',
298 '75.0.3753.2',
299 '74.0.3729.56',
300 '75.0.3753.1',
301 '75.0.3753.0',
302 '74.0.3729.55',
303 '73.0.3683.100',
304 '74.0.3729.54',
305 '75.0.3752.1',
306 '75.0.3752.0',
307 '74.0.3729.53',
308 '73.0.3683.99',
309 '74.0.3729.52',
310 '75.0.3751.1',
311 '75.0.3751.0',
312 '74.0.3729.51',
313 '73.0.3683.98',
314 '74.0.3729.50',
315 '75.0.3750.0',
316 '74.0.3729.49',
317 '74.0.3729.48',
318 '74.0.3729.47',
319 '75.0.3749.3',
320 '74.0.3729.46',
321 '73.0.3683.97',
322 '75.0.3749.2',
323 '74.0.3729.45',
324 '75.0.3749.1',
325 '75.0.3749.0',
326 '74.0.3729.44',
327 '73.0.3683.96',
328 '74.0.3729.43',
329 '74.0.3729.42',
330 '75.0.3748.1',
331 '75.0.3748.0',
332 '74.0.3729.41',
333 '75.0.3747.1',
334 '73.0.3683.95',
335 '75.0.3746.4',
336 '74.0.3729.40',
337 '74.0.3729.39',
338 '75.0.3747.0',
339 '75.0.3746.3',
340 '75.0.3746.2',
341 '74.0.3729.38',
342 '75.0.3746.1',
343 '75.0.3746.0',
344 '74.0.3729.37',
345 '73.0.3683.94',
346 '75.0.3745.5',
347 '75.0.3745.4',
348 '75.0.3745.3',
349 '75.0.3745.2',
350 '74.0.3729.36',
351 '75.0.3745.1',
352 '75.0.3745.0',
353 '75.0.3744.2',
354 '74.0.3729.35',
355 '73.0.3683.93',
356 '74.0.3729.34',
357 '75.0.3744.1',
358 '75.0.3744.0',
359 '74.0.3729.33',
360 '73.0.3683.92',
361 '74.0.3729.32',
362 '74.0.3729.31',
363 '73.0.3683.91',
364 '75.0.3741.2',
365 '75.0.3740.5',
366 '74.0.3729.30',
367 '75.0.3741.1',
368 '75.0.3741.0',
369 '74.0.3729.29',
370 '75.0.3740.4',
371 '73.0.3683.90',
372 '74.0.3729.28',
373 '75.0.3740.3',
374 '73.0.3683.89',
375 '75.0.3740.2',
376 '74.0.3729.27',
377 '75.0.3740.1',
378 '75.0.3740.0',
379 '74.0.3729.26',
380 '73.0.3683.88',
381 '73.0.3683.87',
382 '74.0.3729.25',
383 '75.0.3739.1',
384 '75.0.3739.0',
385 '73.0.3683.86',
386 '74.0.3729.24',
387 '73.0.3683.85',
388 '75.0.3738.4',
389 '75.0.3738.3',
390 '75.0.3738.2',
391 '75.0.3738.1',
392 '75.0.3738.0',
393 '74.0.3729.23',
394 '73.0.3683.84',
395 '74.0.3729.22',
396 '74.0.3729.21',
397 '75.0.3737.1',
398 '75.0.3737.0',
399 '74.0.3729.20',
400 '73.0.3683.83',
401 '74.0.3729.19',
402 '75.0.3736.1',
403 '75.0.3736.0',
404 '74.0.3729.18',
405 '73.0.3683.82',
406 '74.0.3729.17',
407 '75.0.3735.1',
408 '75.0.3735.0',
409 '74.0.3729.16',
410 '73.0.3683.81',
411 '75.0.3734.1',
412 '75.0.3734.0',
413 '74.0.3729.15',
414 '73.0.3683.80',
415 '74.0.3729.14',
416 '75.0.3733.1',
417 '75.0.3733.0',
418 '75.0.3732.1',
419 '74.0.3729.13',
420 '74.0.3729.12',
421 '73.0.3683.79',
422 '74.0.3729.11',
423 '75.0.3732.0',
424 '74.0.3729.10',
425 '73.0.3683.78',
426 '74.0.3729.9',
427 '74.0.3729.8',
428 '74.0.3729.7',
429 '75.0.3731.3',
430 '75.0.3731.2',
431 '75.0.3731.0',
432 '74.0.3729.6',
433 '73.0.3683.77',
434 '73.0.3683.76',
435 '75.0.3730.5',
436 '75.0.3730.4',
437 '73.0.3683.75',
438 '74.0.3729.5',
439 '73.0.3683.74',
440 '75.0.3730.3',
441 '75.0.3730.2',
442 '74.0.3729.4',
443 '73.0.3683.73',
444 '73.0.3683.72',
445 '75.0.3730.1',
446 '75.0.3730.0',
447 '74.0.3729.3',
448 '73.0.3683.71',
449 '74.0.3729.2',
450 '73.0.3683.70',
451 '74.0.3729.1',
452 '74.0.3729.0',
453 '74.0.3726.4',
454 '73.0.3683.69',
455 '74.0.3726.3',
456 '74.0.3728.0',
457 '74.0.3726.2',
458 '73.0.3683.68',
459 '74.0.3726.1',
460 '74.0.3726.0',
461 '74.0.3725.4',
462 '73.0.3683.67',
463 '73.0.3683.66',
464 '74.0.3725.3',
465 '74.0.3725.2',
466 '74.0.3725.1',
467 '74.0.3724.8',
468 '74.0.3725.0',
469 '73.0.3683.65',
470 '74.0.3724.7',
471 '74.0.3724.6',
472 '74.0.3724.5',
473 '74.0.3724.4',
474 '74.0.3724.3',
475 '74.0.3724.2',
476 '74.0.3724.1',
477 '74.0.3724.0',
478 '73.0.3683.64',
479 '74.0.3723.1',
480 '74.0.3723.0',
481 '73.0.3683.63',
482 '74.0.3722.1',
483 '74.0.3722.0',
484 '73.0.3683.62',
485 '74.0.3718.9',
486 '74.0.3702.3',
487 '74.0.3721.3',
488 '74.0.3721.2',
489 '74.0.3721.1',
490 '74.0.3721.0',
491 '74.0.3720.6',
492 '73.0.3683.61',
493 '72.0.3626.122',
494 '73.0.3683.60',
495 '74.0.3720.5',
496 '72.0.3626.121',
497 '74.0.3718.8',
498 '74.0.3720.4',
499 '74.0.3720.3',
500 '74.0.3718.7',
501 '74.0.3720.2',
502 '74.0.3720.1',
503 '74.0.3720.0',
504 '74.0.3718.6',
505 '74.0.3719.5',
506 '73.0.3683.59',
507 '74.0.3718.5',
508 '74.0.3718.4',
509 '74.0.3719.4',
510 '74.0.3719.3',
511 '74.0.3719.2',
512 '74.0.3719.1',
513 '73.0.3683.58',
514 '74.0.3719.0',
515 '73.0.3683.57',
516 '73.0.3683.56',
517 '74.0.3718.3',
518 '73.0.3683.55',
519 '74.0.3718.2',
520 '74.0.3718.1',
521 '74.0.3718.0',
522 '73.0.3683.54',
523 '74.0.3717.2',
524 '73.0.3683.53',
525 '74.0.3717.1',
526 '74.0.3717.0',
527 '73.0.3683.52',
528 '74.0.3716.1',
529 '74.0.3716.0',
530 '73.0.3683.51',
531 '74.0.3715.1',
532 '74.0.3715.0',
533 '73.0.3683.50',
534 '74.0.3711.2',
535 '74.0.3714.2',
536 '74.0.3713.3',
537 '74.0.3714.1',
538 '74.0.3714.0',
539 '73.0.3683.49',
540 '74.0.3713.1',
541 '74.0.3713.0',
542 '72.0.3626.120',
543 '73.0.3683.48',
544 '74.0.3712.2',
545 '74.0.3712.1',
546 '74.0.3712.0',
547 '73.0.3683.47',
548 '72.0.3626.119',
549 '73.0.3683.46',
550 '74.0.3710.2',
551 '72.0.3626.118',
552 '74.0.3711.1',
553 '74.0.3711.0',
554 '73.0.3683.45',
555 '72.0.3626.117',
556 '74.0.3710.1',
557 '74.0.3710.0',
558 '73.0.3683.44',
559 '72.0.3626.116',
560 '74.0.3709.1',
561 '74.0.3709.0',
562 '74.0.3704.9',
563 '73.0.3683.43',
564 '72.0.3626.115',
565 '74.0.3704.8',
566 '74.0.3704.7',
567 '74.0.3708.0',
568 '74.0.3706.7',
569 '74.0.3704.6',
570 '73.0.3683.42',
571 '72.0.3626.114',
572 '74.0.3706.6',
573 '72.0.3626.113',
574 '74.0.3704.5',
575 '74.0.3706.5',
576 '74.0.3706.4',
577 '74.0.3706.3',
578 '74.0.3706.2',
579 '74.0.3706.1',
580 '74.0.3706.0',
581 '73.0.3683.41',
582 '72.0.3626.112',
583 '74.0.3705.1',
584 '74.0.3705.0',
585 '73.0.3683.40',
586 '72.0.3626.111',
587 '73.0.3683.39',
588 '74.0.3704.4',
589 '73.0.3683.38',
590 '74.0.3704.3',
591 '74.0.3704.2',
592 '74.0.3704.1',
593 '74.0.3704.0',
594 '73.0.3683.37',
595 '72.0.3626.110',
596 '72.0.3626.109',
597 '74.0.3703.3',
598 '74.0.3703.2',
599 '73.0.3683.36',
600 '74.0.3703.1',
601 '74.0.3703.0',
602 '73.0.3683.35',
603 '72.0.3626.108',
604 '74.0.3702.2',
605 '74.0.3699.3',
606 '74.0.3702.1',
607 '74.0.3702.0',
608 '73.0.3683.34',
609 '72.0.3626.107',
610 '73.0.3683.33',
611 '74.0.3701.1',
612 '74.0.3701.0',
613 '73.0.3683.32',
614 '73.0.3683.31',
615 '72.0.3626.105',
616 '74.0.3700.1',
617 '74.0.3700.0',
618 '73.0.3683.29',
619 '72.0.3626.103',
620 '74.0.3699.2',
621 '74.0.3699.1',
622 '74.0.3699.0',
623 '73.0.3683.28',
624 '72.0.3626.102',
625 '73.0.3683.27',
626 '73.0.3683.26',
627 '74.0.3698.0',
628 '74.0.3696.2',
629 '72.0.3626.101',
630 '73.0.3683.25',
631 '74.0.3696.1',
632 '74.0.3696.0',
633 '74.0.3694.8',
634 '72.0.3626.100',
635 '74.0.3694.7',
636 '74.0.3694.6',
637 '74.0.3694.5',
638 '74.0.3694.4',
639 '72.0.3626.99',
640 '72.0.3626.98',
641 '74.0.3694.3',
642 '73.0.3683.24',
643 '72.0.3626.97',
644 '72.0.3626.96',
645 '72.0.3626.95',
646 '73.0.3683.23',
647 '72.0.3626.94',
648 '73.0.3683.22',
649 '73.0.3683.21',
650 '72.0.3626.93',
651 '74.0.3694.2',
652 '72.0.3626.92',
653 '74.0.3694.1',
654 '74.0.3694.0',
655 '74.0.3693.6',
656 '73.0.3683.20',
657 '72.0.3626.91',
658 '74.0.3693.5',
659 '74.0.3693.4',
660 '74.0.3693.3',
661 '74.0.3693.2',
662 '73.0.3683.19',
663 '74.0.3693.1',
664 '74.0.3693.0',
665 '73.0.3683.18',
666 '72.0.3626.90',
667 '74.0.3692.1',
668 '74.0.3692.0',
669 '73.0.3683.17',
670 '72.0.3626.89',
671 '74.0.3687.3',
672 '74.0.3691.1',
673 '74.0.3691.0',
674 '73.0.3683.16',
675 '72.0.3626.88',
676 '72.0.3626.87',
677 '73.0.3683.15',
678 '74.0.3690.1',
679 '74.0.3690.0',
680 '73.0.3683.14',
681 '72.0.3626.86',
682 '73.0.3683.13',
683 '73.0.3683.12',
684 '74.0.3689.1',
685 '74.0.3689.0',
686 '73.0.3683.11',
687 '72.0.3626.85',
688 '73.0.3683.10',
689 '72.0.3626.84',
690 '73.0.3683.9',
691 '74.0.3688.1',
692 '74.0.3688.0',
693 '73.0.3683.8',
694 '72.0.3626.83',
695 '74.0.3687.2',
696 '74.0.3687.1',
697 '74.0.3687.0',
698 '73.0.3683.7',
699 '72.0.3626.82',
700 '74.0.3686.4',
701 '72.0.3626.81',
702 '74.0.3686.3',
703 '74.0.3686.2',
704 '74.0.3686.1',
705 '74.0.3686.0',
706 '73.0.3683.6',
707 '72.0.3626.80',
708 '74.0.3685.1',
709 '74.0.3685.0',
710 '73.0.3683.5',
711 '72.0.3626.79',
712 '74.0.3684.1',
713 '74.0.3684.0',
714 '73.0.3683.4',
715 '72.0.3626.78',
716 '72.0.3626.77',
717 '73.0.3683.3',
718 '73.0.3683.2',
719 '72.0.3626.76',
720 '73.0.3683.1',
721 '73.0.3683.0',
722 '72.0.3626.75',
723 '71.0.3578.141',
724 '73.0.3682.1',
725 '73.0.3682.0',
726 '72.0.3626.74',
727 '71.0.3578.140',
728 '73.0.3681.4',
729 '73.0.3681.3',
730 '73.0.3681.2',
731 '73.0.3681.1',
732 '73.0.3681.0',
733 '72.0.3626.73',
734 '71.0.3578.139',
735 '72.0.3626.72',
736 '72.0.3626.71',
737 '73.0.3680.1',
738 '73.0.3680.0',
739 '72.0.3626.70',
740 '71.0.3578.138',
741 '73.0.3678.2',
742 '73.0.3679.1',
743 '73.0.3679.0',
744 '72.0.3626.69',
745 '71.0.3578.137',
746 '73.0.3678.1',
747 '73.0.3678.0',
748 '71.0.3578.136',
749 '73.0.3677.1',
750 '73.0.3677.0',
751 '72.0.3626.68',
752 '72.0.3626.67',
753 '71.0.3578.135',
754 '73.0.3676.1',
755 '73.0.3676.0',
756 '73.0.3674.2',
757 '72.0.3626.66',
758 '71.0.3578.134',
759 '73.0.3674.1',
760 '73.0.3674.0',
761 '72.0.3626.65',
762 '71.0.3578.133',
763 '73.0.3673.2',
764 '73.0.3673.1',
765 '73.0.3673.0',
766 '72.0.3626.64',
767 '71.0.3578.132',
768 '72.0.3626.63',
769 '72.0.3626.62',
770 '72.0.3626.61',
771 '72.0.3626.60',
772 '73.0.3672.1',
773 '73.0.3672.0',
774 '72.0.3626.59',
775 '71.0.3578.131',
776 '73.0.3671.3',
777 '73.0.3671.2',
778 '73.0.3671.1',
779 '73.0.3671.0',
780 '72.0.3626.58',
781 '71.0.3578.130',
782 '73.0.3670.1',
783 '73.0.3670.0',
784 '72.0.3626.57',
785 '71.0.3578.129',
786 '73.0.3669.1',
787 '73.0.3669.0',
788 '72.0.3626.56',
789 '71.0.3578.128',
790 '73.0.3668.2',
791 '73.0.3668.1',
792 '73.0.3668.0',
793 '72.0.3626.55',
794 '71.0.3578.127',
795 '73.0.3667.2',
796 '73.0.3667.1',
797 '73.0.3667.0',
798 '72.0.3626.54',
799 '71.0.3578.126',
800 '73.0.3666.1',
801 '73.0.3666.0',
802 '72.0.3626.53',
803 '71.0.3578.125',
804 '73.0.3665.4',
805 '73.0.3665.3',
806 '72.0.3626.52',
807 '73.0.3665.2',
808 '73.0.3664.4',
809 '73.0.3665.1',
810 '73.0.3665.0',
811 '72.0.3626.51',
812 '71.0.3578.124',
813 '72.0.3626.50',
814 '73.0.3664.3',
815 '73.0.3664.2',
816 '73.0.3664.1',
817 '73.0.3664.0',
818 '73.0.3663.2',
819 '72.0.3626.49',
820 '71.0.3578.123',
821 '73.0.3663.1',
822 '73.0.3663.0',
823 '72.0.3626.48',
824 '71.0.3578.122',
825 '73.0.3662.1',
826 '73.0.3662.0',
827 '72.0.3626.47',
828 '71.0.3578.121',
829 '73.0.3661.1',
830 '72.0.3626.46',
831 '73.0.3661.0',
832 '72.0.3626.45',
833 '71.0.3578.120',
834 '73.0.3660.2',
835 '73.0.3660.1',
836 '73.0.3660.0',
837 '72.0.3626.44',
838 '71.0.3578.119',
839 '73.0.3659.1',
840 '73.0.3659.0',
841 '72.0.3626.43',
842 '71.0.3578.118',
843 '73.0.3658.1',
844 '73.0.3658.0',
845 '72.0.3626.42',
846 '71.0.3578.117',
847 '73.0.3657.1',
848 '73.0.3657.0',
849 '72.0.3626.41',
850 '71.0.3578.116',
851 '73.0.3656.1',
852 '73.0.3656.0',
853 '72.0.3626.40',
854 '71.0.3578.115',
855 '73.0.3655.1',
856 '73.0.3655.0',
857 '72.0.3626.39',
858 '71.0.3578.114',
859 '73.0.3654.1',
860 '73.0.3654.0',
861 '72.0.3626.38',
862 '71.0.3578.113',
863 '73.0.3653.1',
864 '73.0.3653.0',
865 '72.0.3626.37',
866 '71.0.3578.112',
867 '73.0.3652.1',
868 '73.0.3652.0',
869 '72.0.3626.36',
870 '71.0.3578.111',
871 '73.0.3651.1',
872 '73.0.3651.0',
873 '72.0.3626.35',
874 '71.0.3578.110',
875 '73.0.3650.1',
876 '73.0.3650.0',
877 '72.0.3626.34',
878 '71.0.3578.109',
879 '73.0.3649.1',
880 '73.0.3649.0',
881 '72.0.3626.33',
882 '71.0.3578.108',
883 '73.0.3648.2',
884 '73.0.3648.1',
885 '73.0.3648.0',
886 '72.0.3626.32',
887 '71.0.3578.107',
888 '73.0.3647.2',
889 '73.0.3647.1',
890 '73.0.3647.0',
891 '72.0.3626.31',
892 '71.0.3578.106',
893 '73.0.3635.3',
894 '73.0.3646.2',
895 '73.0.3646.1',
896 '73.0.3646.0',
897 '72.0.3626.30',
898 '71.0.3578.105',
899 '72.0.3626.29',
900 '73.0.3645.2',
901 '73.0.3645.1',
902 '73.0.3645.0',
903 '72.0.3626.28',
904 '71.0.3578.104',
905 '72.0.3626.27',
906 '72.0.3626.26',
907 '72.0.3626.25',
908 '72.0.3626.24',
909 '73.0.3644.0',
910 '73.0.3643.2',
911 '72.0.3626.23',
912 '71.0.3578.103',
913 '73.0.3643.1',
914 '73.0.3643.0',
915 '72.0.3626.22',
916 '71.0.3578.102',
917 '73.0.3642.1',
918 '73.0.3642.0',
919 '72.0.3626.21',
920 '71.0.3578.101',
921 '73.0.3641.1',
922 '73.0.3641.0',
923 '72.0.3626.20',
924 '71.0.3578.100',
925 '72.0.3626.19',
926 '73.0.3640.1',
927 '73.0.3640.0',
928 '72.0.3626.18',
929 '73.0.3639.1',
930 '71.0.3578.99',
931 '73.0.3639.0',
932 '72.0.3626.17',
933 '73.0.3638.2',
934 '72.0.3626.16',
935 '73.0.3638.1',
936 '73.0.3638.0',
937 '72.0.3626.15',
938 '71.0.3578.98',
939 '73.0.3635.2',
940 '71.0.3578.97',
941 '73.0.3637.1',
942 '73.0.3637.0',
943 '72.0.3626.14',
944 '71.0.3578.96',
945 '71.0.3578.95',
946 '72.0.3626.13',
947 '71.0.3578.94',
948 '73.0.3636.2',
949 '71.0.3578.93',
950 '73.0.3636.1',
951 '73.0.3636.0',
952 '72.0.3626.12',
953 '71.0.3578.92',
954 '73.0.3635.1',
955 '73.0.3635.0',
956 '72.0.3626.11',
957 '71.0.3578.91',
958 '73.0.3634.2',
959 '73.0.3634.1',
960 '73.0.3634.0',
961 '72.0.3626.10',
962 '71.0.3578.90',
963 '71.0.3578.89',
964 '73.0.3633.2',
965 '73.0.3633.1',
966 '73.0.3633.0',
967 '72.0.3610.4',
968 '72.0.3626.9',
969 '71.0.3578.88',
970 '73.0.3632.5',
971 '73.0.3632.4',
972 '73.0.3632.3',
973 '73.0.3632.2',
974 '73.0.3632.1',
975 '73.0.3632.0',
976 '72.0.3626.8',
977 '71.0.3578.87',
978 '73.0.3631.2',
979 '73.0.3631.1',
980 '73.0.3631.0',
981 '72.0.3626.7',
982 '71.0.3578.86',
983 '72.0.3626.6',
984 '73.0.3630.1',
985 '73.0.3630.0',
986 '72.0.3626.5',
987 '71.0.3578.85',
988 '72.0.3626.4',
989 '73.0.3628.3',
990 '73.0.3628.2',
991 '73.0.3629.1',
992 '73.0.3629.0',
993 '72.0.3626.3',
994 '71.0.3578.84',
995 '73.0.3628.1',
996 '73.0.3628.0',
997 '71.0.3578.83',
998 '73.0.3627.1',
999 '73.0.3627.0',
1000 '72.0.3626.2',
1001 '71.0.3578.82',
1002 '71.0.3578.81',
1003 '71.0.3578.80',
1004 '72.0.3626.1',
1005 '72.0.3626.0',
1006 '71.0.3578.79',
1007 '70.0.3538.124',
1008 '71.0.3578.78',
1009 '72.0.3623.4',
1010 '72.0.3625.2',
1011 '72.0.3625.1',
1012 '72.0.3625.0',
1013 '71.0.3578.77',
1014 '70.0.3538.123',
1015 '72.0.3624.4',
1016 '72.0.3624.3',
1017 '72.0.3624.2',
1018 '71.0.3578.76',
1019 '72.0.3624.1',
1020 '72.0.3624.0',
1021 '72.0.3623.3',
1022 '71.0.3578.75',
1023 '70.0.3538.122',
1024 '71.0.3578.74',
1025 '72.0.3623.2',
1026 '72.0.3610.3',
1027 '72.0.3623.1',
1028 '72.0.3623.0',
1029 '72.0.3622.3',
1030 '72.0.3622.2',
1031 '71.0.3578.73',
1032 '70.0.3538.121',
1033 '72.0.3622.1',
1034 '72.0.3622.0',
1035 '71.0.3578.72',
1036 '70.0.3538.120',
1037 '72.0.3621.1',
1038 '72.0.3621.0',
1039 '71.0.3578.71',
1040 '70.0.3538.119',
1041 '72.0.3620.1',
1042 '72.0.3620.0',
1043 '71.0.3578.70',
1044 '70.0.3538.118',
1045 '71.0.3578.69',
1046 '72.0.3619.1',
1047 '72.0.3619.0',
1048 '71.0.3578.68',
1049 '70.0.3538.117',
1050 '71.0.3578.67',
1051 '72.0.3618.1',
1052 '72.0.3618.0',
1053 '71.0.3578.66',
1054 '70.0.3538.116',
1055 '72.0.3617.1',
1056 '72.0.3617.0',
1057 '71.0.3578.65',
1058 '70.0.3538.115',
1059 '72.0.3602.3',
1060 '71.0.3578.64',
1061 '72.0.3616.1',
1062 '72.0.3616.0',
1063 '71.0.3578.63',
1064 '70.0.3538.114',
1065 '71.0.3578.62',
1066 '72.0.3615.1',
1067 '72.0.3615.0',
1068 '71.0.3578.61',
1069 '70.0.3538.113',
1070 '72.0.3614.1',
1071 '72.0.3614.0',
1072 '71.0.3578.60',
1073 '70.0.3538.112',
1074 '72.0.3613.1',
1075 '72.0.3613.0',
1076 '71.0.3578.59',
1077 '70.0.3538.111',
1078 '72.0.3612.2',
1079 '72.0.3612.1',
1080 '72.0.3612.0',
1081 '70.0.3538.110',
1082 '71.0.3578.58',
1083 '70.0.3538.109',
1084 '72.0.3611.2',
1085 '72.0.3611.1',
1086 '72.0.3611.0',
1087 '71.0.3578.57',
1088 '70.0.3538.108',
1089 '72.0.3610.2',
1090 '71.0.3578.56',
1091 '71.0.3578.55',
1092 '72.0.3610.1',
1093 '72.0.3610.0',
1094 '71.0.3578.54',
1095 '70.0.3538.107',
1096 '71.0.3578.53',
1097 '72.0.3609.3',
1098 '71.0.3578.52',
1099 '72.0.3609.2',
1100 '71.0.3578.51',
1101 '72.0.3608.5',
1102 '72.0.3609.1',
1103 '72.0.3609.0',
1104 '71.0.3578.50',
1105 '70.0.3538.106',
1106 '72.0.3608.4',
1107 '72.0.3608.3',
1108 '72.0.3608.2',
1109 '71.0.3578.49',
1110 '72.0.3608.1',
1111 '72.0.3608.0',
1112 '70.0.3538.105',
1113 '71.0.3578.48',
1114 '72.0.3607.1',
1115 '72.0.3607.0',
1116 '71.0.3578.47',
1117 '70.0.3538.104',
1118 '72.0.3606.2',
1119 '72.0.3606.1',
1120 '72.0.3606.0',
1121 '71.0.3578.46',
1122 '70.0.3538.103',
1123 '70.0.3538.102',
1124 '72.0.3605.3',
1125 '72.0.3605.2',
1126 '72.0.3605.1',
1127 '72.0.3605.0',
1128 '71.0.3578.45',
1129 '70.0.3538.101',
1130 '71.0.3578.44',
1131 '71.0.3578.43',
1132 '70.0.3538.100',
1133 '70.0.3538.99',
1134 '71.0.3578.42',
1135 '72.0.3604.1',
1136 '72.0.3604.0',
1137 '71.0.3578.41',
1138 '70.0.3538.98',
1139 '71.0.3578.40',
1140 '72.0.3603.2',
1141 '72.0.3603.1',
1142 '72.0.3603.0',
1143 '71.0.3578.39',
1144 '70.0.3538.97',
1145 '72.0.3602.2',
1146 '71.0.3578.38',
1147 '71.0.3578.37',
1148 '72.0.3602.1',
1149 '72.0.3602.0',
1150 '71.0.3578.36',
1151 '70.0.3538.96',
1152 '72.0.3601.1',
1153 '72.0.3601.0',
1154 '71.0.3578.35',
1155 '70.0.3538.95',
1156 '72.0.3600.1',
1157 '72.0.3600.0',
1158 '71.0.3578.34',
1159 '70.0.3538.94',
1160 '72.0.3599.3',
1161 '72.0.3599.2',
1162 '72.0.3599.1',
1163 '72.0.3599.0',
1164 '71.0.3578.33',
1165 '70.0.3538.93',
1166 '72.0.3598.1',
1167 '72.0.3598.0',
1168 '71.0.3578.32',
1169 '70.0.3538.87',
1170 '72.0.3597.1',
1171 '72.0.3597.0',
1172 '72.0.3596.2',
1173 '71.0.3578.31',
1174 '70.0.3538.86',
1175 '71.0.3578.30',
1176 '71.0.3578.29',
1177 '72.0.3596.1',
1178 '72.0.3596.0',
1179 '71.0.3578.28',
1180 '70.0.3538.85',
1181 '72.0.3595.2',
1182 '72.0.3591.3',
1183 '72.0.3595.1',
1184 '72.0.3595.0',
1185 '71.0.3578.27',
1186 '70.0.3538.84',
1187 '72.0.3594.1',
1188 '72.0.3594.0',
1189 '71.0.3578.26',
1190 '70.0.3538.83',
1191 '72.0.3593.2',
1192 '72.0.3593.1',
1193 '72.0.3593.0',
1194 '71.0.3578.25',
1195 '70.0.3538.82',
1196 '72.0.3589.3',
1197 '72.0.3592.2',
1198 '72.0.3592.1',
1199 '72.0.3592.0',
1200 '71.0.3578.24',
1201 '72.0.3589.2',
1202 '70.0.3538.81',
1203 '70.0.3538.80',
1204 '72.0.3591.2',
1205 '72.0.3591.1',
1206 '72.0.3591.0',
1207 '71.0.3578.23',
1208 '70.0.3538.79',
1209 '71.0.3578.22',
1210 '72.0.3590.1',
1211 '72.0.3590.0',
1212 '71.0.3578.21',
1213 '70.0.3538.78',
1214 '70.0.3538.77',
1215 '72.0.3589.1',
1216 '72.0.3589.0',
1217 '71.0.3578.20',
1218 '70.0.3538.76',
1219 '71.0.3578.19',
1220 '70.0.3538.75',
1221 '72.0.3588.1',
1222 '72.0.3588.0',
1223 '71.0.3578.18',
1224 '70.0.3538.74',
1225 '72.0.3586.2',
1226 '72.0.3587.0',
1227 '71.0.3578.17',
1228 '70.0.3538.73',
1229 '72.0.3586.1',
1230 '72.0.3586.0',
1231 '71.0.3578.16',
1232 '70.0.3538.72',
1233 '72.0.3585.1',
1234 '72.0.3585.0',
1235 '71.0.3578.15',
1236 '70.0.3538.71',
1237 '71.0.3578.14',
1238 '72.0.3584.1',
1239 '72.0.3584.0',
1240 '71.0.3578.13',
1241 '70.0.3538.70',
1242 '72.0.3583.2',
1243 '71.0.3578.12',
1244 '72.0.3583.1',
1245 '72.0.3583.0',
1246 '71.0.3578.11',
1247 '70.0.3538.69',
1248 '71.0.3578.10',
1249 '72.0.3582.0',
1250 '72.0.3581.4',
1251 '71.0.3578.9',
1252 '70.0.3538.67',
1253 '72.0.3581.3',
1254 '72.0.3581.2',
1255 '72.0.3581.1',
1256 '72.0.3581.0',
1257 '71.0.3578.8',
1258 '70.0.3538.66',
1259 '72.0.3580.1',
1260 '72.0.3580.0',
1261 '71.0.3578.7',
1262 '70.0.3538.65',
1263 '71.0.3578.6',
1264 '72.0.3579.1',
1265 '72.0.3579.0',
1266 '71.0.3578.5',
1267 '70.0.3538.64',
1268 '71.0.3578.4',
1269 '71.0.3578.3',
1270 '71.0.3578.2',
1271 '71.0.3578.1',
1272 '71.0.3578.0',
1273 '70.0.3538.63',
1274 '69.0.3497.128',
1275 '70.0.3538.62',
1276 '70.0.3538.61',
1277 '70.0.3538.60',
1278 '70.0.3538.59',
1279 '71.0.3577.1',
1280 '71.0.3577.0',
1281 '70.0.3538.58',
1282 '69.0.3497.127',
1283 '71.0.3576.2',
1284 '71.0.3576.1',
1285 '71.0.3576.0',
1286 '70.0.3538.57',
1287 '70.0.3538.56',
1288 '71.0.3575.2',
1289 '70.0.3538.55',
1290 '69.0.3497.126',
1291 '70.0.3538.54',
1292 '71.0.3575.1',
1293 '71.0.3575.0',
1294 '71.0.3574.1',
1295 '71.0.3574.0',
1296 '70.0.3538.53',
1297 '69.0.3497.125',
1298 '70.0.3538.52',
1299 '71.0.3573.1',
1300 '71.0.3573.0',
1301 '70.0.3538.51',
1302 '69.0.3497.124',
1303 '71.0.3572.1',
1304 '71.0.3572.0',
1305 '70.0.3538.50',
1306 '69.0.3497.123',
1307 '71.0.3571.2',
1308 '70.0.3538.49',
1309 '69.0.3497.122',
1310 '71.0.3571.1',
1311 '71.0.3571.0',
1312 '70.0.3538.48',
1313 '69.0.3497.121',
1314 '71.0.3570.1',
1315 '71.0.3570.0',
1316 '70.0.3538.47',
1317 '69.0.3497.120',
1318 '71.0.3568.2',
1319 '71.0.3569.1',
1320 '71.0.3569.0',
1321 '70.0.3538.46',
1322 '69.0.3497.119',
1323 '70.0.3538.45',
1324 '71.0.3568.1',
1325 '71.0.3568.0',
1326 '70.0.3538.44',
1327 '69.0.3497.118',
1328 '70.0.3538.43',
1329 '70.0.3538.42',
1330 '71.0.3567.1',
1331 '71.0.3567.0',
1332 '70.0.3538.41',
1333 '69.0.3497.117',
1334 '71.0.3566.1',
1335 '71.0.3566.0',
1336 '70.0.3538.40',
1337 '69.0.3497.116',
1338 '71.0.3565.1',
1339 '71.0.3565.0',
1340 '70.0.3538.39',
1341 '69.0.3497.115',
1342 '71.0.3564.1',
1343 '71.0.3564.0',
1344 '70.0.3538.38',
1345 '69.0.3497.114',
1346 '71.0.3563.0',
1347 '71.0.3562.2',
1348 '70.0.3538.37',
1349 '69.0.3497.113',
1350 '70.0.3538.36',
1351 '70.0.3538.35',
1352 '71.0.3562.1',
1353 '71.0.3562.0',
1354 '70.0.3538.34',
1355 '69.0.3497.112',
1356 '70.0.3538.33',
1357 '71.0.3561.1',
1358 '71.0.3561.0',
1359 '70.0.3538.32',
1360 '69.0.3497.111',
1361 '71.0.3559.6',
1362 '71.0.3560.1',
1363 '71.0.3560.0',
1364 '71.0.3559.5',
1365 '71.0.3559.4',
1366 '70.0.3538.31',
1367 '69.0.3497.110',
1368 '71.0.3559.3',
1369 '70.0.3538.30',
1370 '69.0.3497.109',
1371 '71.0.3559.2',
1372 '71.0.3559.1',
1373 '71.0.3559.0',
1374 '70.0.3538.29',
1375 '69.0.3497.108',
1376 '71.0.3558.2',
1377 '71.0.3558.1',
1378 '71.0.3558.0',
1379 '70.0.3538.28',
1380 '69.0.3497.107',
1381 '71.0.3557.2',
1382 '71.0.3557.1',
1383 '71.0.3557.0',
1384 '70.0.3538.27',
1385 '69.0.3497.106',
1386 '71.0.3554.4',
1387 '70.0.3538.26',
1388 '71.0.3556.1',
1389 '71.0.3556.0',
1390 '70.0.3538.25',
1391 '71.0.3554.3',
1392 '69.0.3497.105',
1393 '71.0.3554.2',
1394 '70.0.3538.24',
1395 '69.0.3497.104',
1396 '71.0.3555.2',
1397 '70.0.3538.23',
1398 '71.0.3555.1',
1399 '71.0.3555.0',
1400 '70.0.3538.22',
1401 '69.0.3497.103',
1402 '71.0.3554.1',
1403 '71.0.3554.0',
1404 '70.0.3538.21',
1405 '69.0.3497.102',
1406 '71.0.3553.3',
1407 '70.0.3538.20',
1408 '69.0.3497.101',
1409 '71.0.3553.2',
1410 '69.0.3497.100',
1411 '71.0.3553.1',
1412 '71.0.3553.0',
1413 '70.0.3538.19',
1414 '69.0.3497.99',
1415 '69.0.3497.98',
1416 '69.0.3497.97',
1417 '71.0.3552.6',
1418 '71.0.3552.5',
1419 '71.0.3552.4',
1420 '71.0.3552.3',
1421 '71.0.3552.2',
1422 '71.0.3552.1',
1423 '71.0.3552.0',
1424 '70.0.3538.18',
1425 '69.0.3497.96',
1426 '71.0.3551.3',
1427 '71.0.3551.2',
1428 '71.0.3551.1',
1429 '71.0.3551.0',
1430 '70.0.3538.17',
1431 '69.0.3497.95',
1432 '71.0.3550.3',
1433 '71.0.3550.2',
1434 '71.0.3550.1',
1435 '71.0.3550.0',
1436 '70.0.3538.16',
1437 '69.0.3497.94',
1438 '71.0.3549.1',
1439 '71.0.3549.0',
1440 '70.0.3538.15',
1441 '69.0.3497.93',
1442 '69.0.3497.92',
1443 '71.0.3548.1',
1444 '71.0.3548.0',
1445 '70.0.3538.14',
1446 '69.0.3497.91',
1447 '71.0.3547.1',
1448 '71.0.3547.0',
1449 '70.0.3538.13',
1450 '69.0.3497.90',
1451 '71.0.3546.2',
1452 '69.0.3497.89',
1453 '71.0.3546.1',
1454 '71.0.3546.0',
1455 '70.0.3538.12',
1456 '69.0.3497.88',
1457 '71.0.3545.4',
1458 '71.0.3545.3',
1459 '71.0.3545.2',
1460 '71.0.3545.1',
1461 '71.0.3545.0',
1462 '70.0.3538.11',
1463 '69.0.3497.87',
1464 '71.0.3544.5',
1465 '71.0.3544.4',
1466 '71.0.3544.3',
1467 '71.0.3544.2',
1468 '71.0.3544.1',
1469 '71.0.3544.0',
1470 '69.0.3497.86',
1471 '70.0.3538.10',
1472 '69.0.3497.85',
1473 '70.0.3538.9',
1474 '69.0.3497.84',
1475 '71.0.3543.4',
1476 '70.0.3538.8',
1477 '71.0.3543.3',
1478 '71.0.3543.2',
1479 '71.0.3543.1',
1480 '71.0.3543.0',
1481 '70.0.3538.7',
1482 '69.0.3497.83',
1483 '71.0.3542.2',
1484 '71.0.3542.1',
1485 '71.0.3542.0',
1486 '70.0.3538.6',
1487 '69.0.3497.82',
1488 '69.0.3497.81',
1489 '71.0.3541.1',
1490 '71.0.3541.0',
1491 '70.0.3538.5',
1492 '69.0.3497.80',
1493 '71.0.3540.1',
1494 '71.0.3540.0',
1495 '70.0.3538.4',
1496 '69.0.3497.79',
1497 '70.0.3538.3',
1498 '71.0.3539.1',
1499 '71.0.3539.0',
1500 '69.0.3497.78',
1501 '68.0.3440.134',
1502 '69.0.3497.77',
1503 '70.0.3538.2',
1504 '70.0.3538.1',
1505 '70.0.3538.0',
1506 '69.0.3497.76',
1507 '68.0.3440.133',
1508 '69.0.3497.75',
1509 '70.0.3537.2',
1510 '70.0.3537.1',
1511 '70.0.3537.0',
1512 '69.0.3497.74',
1513 '68.0.3440.132',
1514 '70.0.3536.0',
1515 '70.0.3535.5',
1516 '70.0.3535.4',
1517 '70.0.3535.3',
1518 '69.0.3497.73',
1519 '68.0.3440.131',
1520 '70.0.3532.8',
1521 '70.0.3532.7',
1522 '69.0.3497.72',
1523 '69.0.3497.71',
1524 '70.0.3535.2',
1525 '70.0.3535.1',
1526 '70.0.3535.0',
1527 '69.0.3497.70',
1528 '68.0.3440.130',
1529 '69.0.3497.69',
1530 '68.0.3440.129',
1531 '70.0.3534.4',
1532 '70.0.3534.3',
1533 '70.0.3534.2',
1534 '70.0.3534.1',
1535 '70.0.3534.0',
1536 '69.0.3497.68',
1537 '68.0.3440.128',
1538 '70.0.3533.2',
1539 '70.0.3533.1',
1540 '70.0.3533.0',
1541 '69.0.3497.67',
1542 '68.0.3440.127',
1543 '70.0.3532.6',
1544 '70.0.3532.5',
1545 '70.0.3532.4',
1546 '69.0.3497.66',
1547 '68.0.3440.126',
1548 '70.0.3532.3',
1549 '70.0.3532.2',
1550 '70.0.3532.1',
1551 '69.0.3497.60',
1552 '69.0.3497.65',
1553 '69.0.3497.64',
1554 '70.0.3532.0',
1555 '70.0.3531.0',
1556 '70.0.3530.4',
1557 '70.0.3530.3',
1558 '70.0.3530.2',
1559 '69.0.3497.58',
1560 '68.0.3440.125',
1561 '69.0.3497.57',
1562 '69.0.3497.56',
1563 '69.0.3497.55',
1564 '69.0.3497.54',
1565 '70.0.3530.1',
1566 '70.0.3530.0',
1567 '69.0.3497.53',
1568 '68.0.3440.124',
1569 '69.0.3497.52',
1570 '70.0.3529.3',
1571 '70.0.3529.2',
1572 '70.0.3529.1',
1573 '70.0.3529.0',
1574 '69.0.3497.51',
1575 '70.0.3528.4',
1576 '68.0.3440.123',
1577 '70.0.3528.3',
1578 '70.0.3528.2',
1579 '70.0.3528.1',
1580 '70.0.3528.0',
1581 '69.0.3497.50',
1582 '68.0.3440.122',
1583 '70.0.3527.1',
1584 '70.0.3527.0',
1585 '69.0.3497.49',
1586 '68.0.3440.121',
1587 '70.0.3526.1',
1588 '70.0.3526.0',
1589 '68.0.3440.120',
1590 '69.0.3497.48',
1591 '69.0.3497.47',
1592 '68.0.3440.119',
1593 '68.0.3440.118',
1594 '70.0.3525.5',
1595 '70.0.3525.4',
1596 '70.0.3525.3',
1597 '68.0.3440.117',
1598 '69.0.3497.46',
1599 '70.0.3525.2',
1600 '70.0.3525.1',
1601 '70.0.3525.0',
1602 '69.0.3497.45',
1603 '68.0.3440.116',
1604 '70.0.3524.4',
1605 '70.0.3524.3',
1606 '69.0.3497.44',
1607 '70.0.3524.2',
1608 '70.0.3524.1',
1609 '70.0.3524.0',
1610 '70.0.3523.2',
1611 '69.0.3497.43',
1612 '68.0.3440.115',
1613 '70.0.3505.9',
1614 '69.0.3497.42',
1615 '70.0.3505.8',
1616 '70.0.3523.1',
1617 '70.0.3523.0',
1618 '69.0.3497.41',
1619 '68.0.3440.114',
1620 '70.0.3505.7',
1621 '69.0.3497.40',
1622 '70.0.3522.1',
1623 '70.0.3522.0',
1624 '70.0.3521.2',
1625 '69.0.3497.39',
1626 '68.0.3440.113',
1627 '70.0.3505.6',
1628 '70.0.3521.1',
1629 '70.0.3521.0',
1630 '69.0.3497.38',
1631 '68.0.3440.112',
1632 '70.0.3520.1',
1633 '70.0.3520.0',
1634 '69.0.3497.37',
1635 '68.0.3440.111',
1636 '70.0.3519.3',
1637 '70.0.3519.2',
1638 '70.0.3519.1',
1639 '70.0.3519.0',
1640 '69.0.3497.36',
1641 '68.0.3440.110',
1642 '70.0.3518.1',
1643 '70.0.3518.0',
1644 '69.0.3497.35',
1645 '69.0.3497.34',
1646 '68.0.3440.109',
1647 '70.0.3517.1',
1648 '70.0.3517.0',
1649 '69.0.3497.33',
1650 '68.0.3440.108',
1651 '69.0.3497.32',
1652 '70.0.3516.3',
1653 '70.0.3516.2',
1654 '70.0.3516.1',
1655 '70.0.3516.0',
1656 '69.0.3497.31',
1657 '68.0.3440.107',
1658 '70.0.3515.4',
1659 '68.0.3440.106',
1660 '70.0.3515.3',
1661 '70.0.3515.2',
1662 '70.0.3515.1',
1663 '70.0.3515.0',
1664 '69.0.3497.30',
1665 '68.0.3440.105',
1666 '68.0.3440.104',
1667 '70.0.3514.2',
1668 '70.0.3514.1',
1669 '70.0.3514.0',
1670 '69.0.3497.29',
1671 '68.0.3440.103',
1672 '70.0.3513.1',
1673 '70.0.3513.0',
1674 '69.0.3497.28',
1675 )
1676 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1677
1678
3e669f36 1679std_headers = {
f7a147e3 1680 'User-Agent': random_user_agent(),
59ae15a5
PH
1681 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1682 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1683 'Accept-Encoding': 'gzip, deflate',
1684 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 1685}
f427df17 1686
5f6a1245 1687
fb37eb25
S
1688USER_AGENTS = {
1689 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1690}
1691
1692
bf42a990
S
1693NO_DEFAULT = object()
1694
7105440c
YCH
1695ENGLISH_MONTH_NAMES = [
1696 'January', 'February', 'March', 'April', 'May', 'June',
1697 'July', 'August', 'September', 'October', 'November', 'December']
1698
f6717dec
S
1699MONTH_NAMES = {
1700 'en': ENGLISH_MONTH_NAMES,
1701 'fr': [
3e4185c3
S
1702 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1703 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
f6717dec 1704}
a942d6cb 1705
a7aaa398
S
1706KNOWN_EXTENSIONS = (
1707 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1708 'flv', 'f4v', 'f4a', 'f4b',
1709 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1710 'mkv', 'mka', 'mk3d',
1711 'avi', 'divx',
1712 'mov',
1713 'asf', 'wmv', 'wma',
1714 '3gp', '3g2',
1715 'mp3',
1716 'flac',
1717 'ape',
1718 'wav',
1719 'f4f', 'f4m', 'm3u8', 'smil')
1720
c587cbb7 1721# needed for sanitizing filenames in restricted mode
c8827027 1722ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
fd35d8cd
JW
1723 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1724 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
c587cbb7 1725
46f59e89
S
1726DATE_FORMATS = (
1727 '%d %B %Y',
1728 '%d %b %Y',
1729 '%B %d %Y',
cb655f34
S
1730 '%B %dst %Y',
1731 '%B %dnd %Y',
9d30c213 1732 '%B %drd %Y',
cb655f34 1733 '%B %dth %Y',
46f59e89 1734 '%b %d %Y',
cb655f34
S
1735 '%b %dst %Y',
1736 '%b %dnd %Y',
9d30c213 1737 '%b %drd %Y',
cb655f34 1738 '%b %dth %Y',
46f59e89
S
1739 '%b %dst %Y %I:%M',
1740 '%b %dnd %Y %I:%M',
9d30c213 1741 '%b %drd %Y %I:%M',
46f59e89
S
1742 '%b %dth %Y %I:%M',
1743 '%Y %m %d',
1744 '%Y-%m-%d',
bccdbd22 1745 '%Y.%m.%d.',
46f59e89 1746 '%Y/%m/%d',
81c13222 1747 '%Y/%m/%d %H:%M',
46f59e89 1748 '%Y/%m/%d %H:%M:%S',
1931a55e
THD
1749 '%Y%m%d%H%M',
1750 '%Y%m%d%H%M%S',
0c1c6f4b 1751 '%Y-%m-%d %H:%M',
46f59e89
S
1752 '%Y-%m-%d %H:%M:%S',
1753 '%Y-%m-%d %H:%M:%S.%f',
5014558a 1754 '%Y-%m-%d %H:%M:%S:%f',
46f59e89
S
1755 '%d.%m.%Y %H:%M',
1756 '%d.%m.%Y %H.%M',
1757 '%Y-%m-%dT%H:%M:%SZ',
1758 '%Y-%m-%dT%H:%M:%S.%fZ',
1759 '%Y-%m-%dT%H:%M:%S.%f0Z',
1760 '%Y-%m-%dT%H:%M:%S',
1761 '%Y-%m-%dT%H:%M:%S.%f',
1762 '%Y-%m-%dT%H:%M',
c6eed6b8
S
1763 '%b %d %Y at %H:%M',
1764 '%b %d %Y at %H:%M:%S',
b555ae9b
S
1765 '%B %d %Y at %H:%M',
1766 '%B %d %Y at %H:%M:%S',
a63d9bd0 1767 '%H:%M %d-%b-%Y',
46f59e89
S
1768)
1769
1770DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1771DATE_FORMATS_DAY_FIRST.extend([
1772 '%d-%m-%Y',
1773 '%d.%m.%Y',
1774 '%d.%m.%y',
1775 '%d/%m/%Y',
1776 '%d/%m/%y',
1777 '%d/%m/%Y %H:%M:%S',
1778])
1779
1780DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1781DATE_FORMATS_MONTH_FIRST.extend([
1782 '%m-%d-%Y',
1783 '%m.%d.%Y',
1784 '%m/%d/%Y',
1785 '%m/%d/%y',
1786 '%m/%d/%Y %H:%M:%S',
1787])
1788
06b3fe29 1789PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
22f5f5c6 1790JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
06b3fe29 1791
7105440c 1792
d77c3dfd 1793def preferredencoding():
59ae15a5 1794 """Get preferred encoding.
d77c3dfd 1795
59ae15a5
PH
1796 Returns the best encoding scheme for the system, based on
1797 locale.getpreferredencoding() and some further tweaks.
1798 """
1799 try:
1800 pref = locale.getpreferredencoding()
28e614de 1801 'TEST'.encode(pref)
70a1165b 1802 except Exception:
59ae15a5 1803 pref = 'UTF-8'
bae611f2 1804
59ae15a5 1805 return pref
d77c3dfd 1806
f4bfd65f 1807
181c8655 1808def write_json_file(obj, fn):
1394646a 1809 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 1810
92120217 1811 fn = encodeFilename(fn)
61ee5aeb 1812 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
1813 encoding = get_filesystem_encoding()
1814 # os.path.basename returns a bytes object, but NamedTemporaryFile
1815 # will fail if the filename contains non ascii characters unless we
1816 # use a unicode object
1817 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1818 # the same for os.path.dirname
1819 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1820 else:
1821 path_basename = os.path.basename
1822 path_dirname = os.path.dirname
1823
73159f99
S
1824 args = {
1825 'suffix': '.tmp',
ec5f6016
JMF
1826 'prefix': path_basename(fn) + '.',
1827 'dir': path_dirname(fn),
73159f99
S
1828 'delete': False,
1829 }
1830
181c8655
PH
1831 # In Python 2.x, json.dump expects a bytestream.
1832 # In Python 3.x, it writes to a character stream
1833 if sys.version_info < (3, 0):
73159f99 1834 args['mode'] = 'wb'
181c8655 1835 else:
73159f99
S
1836 args.update({
1837 'mode': 'w',
1838 'encoding': 'utf-8',
1839 })
1840
c86b6142 1841 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
1842
1843 try:
1844 with tf:
6e84b215 1845 json.dump(obj, tf)
1394646a
IK
1846 if sys.platform == 'win32':
1847 # Need to remove existing file on Windows, else os.rename raises
1848 # WindowsError or FileExistsError.
1849 try:
1850 os.unlink(fn)
1851 except OSError:
1852 pass
9cd5f54e
R
1853 try:
1854 mask = os.umask(0)
1855 os.umask(mask)
1856 os.chmod(tf.name, 0o666 & ~mask)
1857 except OSError:
1858 pass
181c8655 1859 os.rename(tf.name, fn)
70a1165b 1860 except Exception:
181c8655
PH
1861 try:
1862 os.remove(tf.name)
1863 except OSError:
1864 pass
1865 raise
1866
1867
1868if sys.version_info >= (2, 7):
ee114368 1869 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 1870 """ Find the xpath xpath[@key=val] """
5d2354f1 1871 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368 1872 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
1873 return node.find(expr)
1874else:
ee114368 1875 def find_xpath_attr(node, xpath, key, val=None):
810c10ba 1876 for f in node.findall(compat_xpath(xpath)):
ee114368
S
1877 if key not in f.attrib:
1878 continue
1879 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
1880 return f
1881 return None
1882
d7e66d39
JMF
1883# On python2.6 the xml.etree.ElementTree.Element methods don't support
1884# the namespace parameter
5f6a1245
JW
1885
1886
d7e66d39
JMF
1887def xpath_with_ns(path, ns_map):
1888 components = [c.split(':') for c in path.split('/')]
1889 replaced = []
1890 for c in components:
1891 if len(c) == 1:
1892 replaced.append(c[0])
1893 else:
1894 ns, tag = c
1895 replaced.append('{%s}%s' % (ns_map[ns], tag))
1896 return '/'.join(replaced)
1897
d77c3dfd 1898
a41fb80c 1899def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745 1900 def _find_xpath(xpath):
810c10ba 1901 return node.find(compat_xpath(xpath))
578c0745
S
1902
1903 if isinstance(xpath, (str, compat_str)):
1904 n = _find_xpath(xpath)
1905 else:
1906 for xp in xpath:
1907 n = _find_xpath(xp)
1908 if n is not None:
1909 break
d74bebd5 1910
8e636da4 1911 if n is None:
bf42a990
S
1912 if default is not NO_DEFAULT:
1913 return default
1914 elif fatal:
bf0ff932
PH
1915 name = xpath if name is None else name
1916 raise ExtractorError('Could not find XML element %s' % name)
1917 else:
1918 return None
a41fb80c
S
1919 return n
1920
1921
1922def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
1923 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1924 if n is None or n == default:
1925 return n
1926 if n.text is None:
1927 if default is not NO_DEFAULT:
1928 return default
1929 elif fatal:
1930 name = xpath if name is None else name
1931 raise ExtractorError('Could not find XML element\'s text %s' % name)
1932 else:
1933 return None
1934 return n.text
a41fb80c
S
1935
1936
1937def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1938 n = find_xpath_attr(node, xpath, key)
1939 if n is None:
1940 if default is not NO_DEFAULT:
1941 return default
1942 elif fatal:
1943 name = '%s[@%s]' % (xpath, key) if name is None else name
1944 raise ExtractorError('Could not find XML attribute %s' % name)
1945 else:
1946 return None
1947 return n.attrib[key]
bf0ff932
PH
1948
1949
9e6dd238 1950def get_element_by_id(id, html):
43e8fafd 1951 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 1952 return get_element_by_attribute('id', id, html)
43e8fafd 1953
12ea2f30 1954
84c237fb 1955def get_element_by_class(class_name, html):
2af12ad9
TC
1956 """Return the content of the first tag with the specified class in the passed HTML document"""
1957 retval = get_elements_by_class(class_name, html)
1958 return retval[0] if retval else None
1959
1960
1961def get_element_by_attribute(attribute, value, html, escape_value=True):
1962 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1963 return retval[0] if retval else None
1964
1965
1966def get_elements_by_class(class_name, html):
1967 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1968 return get_elements_by_attribute(
84c237fb
YCH
1969 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1970 html, escape_value=False)
1971
1972
2af12ad9 1973def get_elements_by_attribute(attribute, value, html, escape_value=True):
43e8fafd 1974 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 1975
84c237fb
YCH
1976 value = re.escape(value) if escape_value else value
1977
2af12ad9
TC
1978 retlist = []
1979 for m in re.finditer(r'''(?xs)
38285056 1980 <([a-zA-Z0-9:._-]+)
609ff8ca 1981 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056 1982 \s+%s=['"]?%s['"]?
609ff8ca 1983 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056
PH
1984 \s*>
1985 (?P<content>.*?)
1986 </\1>
2af12ad9
TC
1987 ''' % (re.escape(attribute), value), html):
1988 res = m.group('content')
38285056 1989
2af12ad9
TC
1990 if res.startswith('"') or res.startswith("'"):
1991 res = res[1:-1]
38285056 1992
2af12ad9 1993 retlist.append(unescapeHTML(res))
a921f407 1994
2af12ad9 1995 return retlist
a921f407 1996
c5229f39 1997
8bb56eee
BF
1998class HTMLAttributeParser(compat_HTMLParser):
1999 """Trivial HTML parser to gather the attributes for a single element"""
b6e0c7d2 2000
8bb56eee 2001 def __init__(self):
c5229f39 2002 self.attrs = {}
8bb56eee
BF
2003 compat_HTMLParser.__init__(self)
2004
2005 def handle_starttag(self, tag, attrs):
2006 self.attrs = dict(attrs)
2007
c5229f39 2008
73673ccf
FF
2009class HTMLListAttrsParser(compat_HTMLParser):
2010 """HTML parser to gather the attributes for the elements of a list"""
2011
2012 def __init__(self):
2013 compat_HTMLParser.__init__(self)
2014 self.items = []
2015 self._level = 0
2016
2017 def handle_starttag(self, tag, attrs):
2018 if tag == 'li' and self._level == 0:
2019 self.items.append(dict(attrs))
2020 self._level += 1
2021
2022 def handle_endtag(self, tag):
2023 self._level -= 1
2024
2025
8bb56eee
BF
2026def extract_attributes(html_element):
2027 """Given a string for an HTML element such as
2028 <el
2029 a="foo" B="bar" c="&98;az" d=boz
2030 empty= noval entity="&amp;"
2031 sq='"' dq="'"
2032 >
2033 Decode and return a dictionary of attributes.
2034 {
2035 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2036 'empty': '', 'noval': None, 'entity': '&',
2037 'sq': '"', 'dq': '\''
2038 }.
2039 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2040 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2041 """
2042 parser = HTMLAttributeParser()
b4a3d461
S
2043 try:
2044 parser.feed(html_element)
2045 parser.close()
2046 # Older Python may throw HTMLParseError in case of malformed HTML
2047 except compat_HTMLParseError:
2048 pass
8bb56eee 2049 return parser.attrs
9e6dd238 2050
c5229f39 2051
73673ccf
FF
2052def parse_list(webpage):
2053 """Given a string for an series of HTML <li> elements,
2054 return a dictionary of their attributes"""
2055 parser = HTMLListAttrsParser()
2056 parser.feed(webpage)
2057 parser.close()
2058 return parser.items
2059
2060
9e6dd238 2061def clean_html(html):
59ae15a5 2062 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
2063
2064 if html is None: # Convenience for sanitizing descriptions etc.
2065 return html
2066
59ae15a5
PH
2067 # Newline vs <br />
2068 html = html.replace('\n', ' ')
edd9221c
TF
2069 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2070 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
2071 # Strip html tags
2072 html = re.sub('<.*?>', '', html)
2073 # Replace html entities
2074 html = unescapeHTML(html)
7decf895 2075 return html.strip()
9e6dd238
FV
2076
2077
d77c3dfd 2078def sanitize_open(filename, open_mode):
59ae15a5
PH
2079 """Try to open the given filename, and slightly tweak it if this fails.
2080
2081 Attempts to open the given filename. If this fails, it tries to change
2082 the filename slightly, step by step, until it's either able to open it
2083 or it fails and raises a final exception, like the standard open()
2084 function.
2085
2086 It returns the tuple (stream, definitive_file_name).
2087 """
2088 try:
28e614de 2089 if filename == '-':
59ae15a5
PH
2090 if sys.platform == 'win32':
2091 import msvcrt
2092 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 2093 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
2094 stream = open(encodeFilename(filename), open_mode)
2095 return (stream, filename)
2096 except (IOError, OSError) as err:
f45c185f
PH
2097 if err.errno in (errno.EACCES,):
2098 raise
59ae15a5 2099
f45c185f 2100 # In case of error, try to remove win32 forbidden chars
d55de57b 2101 alt_filename = sanitize_path(filename)
f45c185f
PH
2102 if alt_filename == filename:
2103 raise
2104 else:
2105 # An exception here should be caught in the caller
d55de57b 2106 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 2107 return (stream, alt_filename)
d77c3dfd
FV
2108
2109
2110def timeconvert(timestr):
59ae15a5
PH
2111 """Convert RFC 2822 defined time string into system timestamp"""
2112 timestamp = None
2113 timetuple = email.utils.parsedate_tz(timestr)
2114 if timetuple is not None:
2115 timestamp = email.utils.mktime_tz(timetuple)
2116 return timestamp
1c469a94 2117
5f6a1245 2118
796173d0 2119def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
2120 """Sanitizes a string so it could be used as part of a filename.
2121 If restricted is set, use a stricter subset of allowed characters.
158af524
S
2122 Set is_id if this is not an arbitrary string, but an ID that should be kept
2123 if possible.
59ae15a5
PH
2124 """
2125 def replace_insane(char):
c587cbb7
AT
2126 if restricted and char in ACCENT_CHARS:
2127 return ACCENT_CHARS[char]
91dd88b9 2128 elif not restricted and char == '\n':
2129 return ' '
2130 elif char == '?' or ord(char) < 32 or ord(char) == 127:
59ae15a5
PH
2131 return ''
2132 elif char == '"':
2133 return '' if restricted else '\''
2134 elif char == ':':
2135 return '_-' if restricted else ' -'
2136 elif char in '\\/|*<>':
2137 return '_'
627dcfff 2138 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
2139 return '_'
2140 if restricted and ord(char) > 127:
2141 return '_'
2142 return char
2143
639f1cea 2144 if s == '':
2145 return ''
2aeb06d6
PH
2146 # Handle timestamps
2147 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 2148 result = ''.join(map(replace_insane, s))
796173d0
PH
2149 if not is_id:
2150 while '__' in result:
2151 result = result.replace('__', '_')
2152 result = result.strip('_')
2153 # Common case of "Foreign band name - English song title"
2154 if restricted and result.startswith('-_'):
2155 result = result[2:]
5a42414b
PH
2156 if result.startswith('-'):
2157 result = '_' + result[len('-'):]
a7440261 2158 result = result.lstrip('.')
796173d0
PH
2159 if not result:
2160 result = '_'
59ae15a5 2161 return result
d77c3dfd 2162
5f6a1245 2163
c2934512 2164def sanitize_path(s, force=False):
a2aaf4db 2165 """Sanitizes and normalizes path on Windows"""
c2934512 2166 if sys.platform == 'win32':
c4218ac3 2167 force = False
c2934512 2168 drive_or_unc, _ = os.path.splitdrive(s)
2169 if sys.version_info < (2, 7) and not drive_or_unc:
2170 drive_or_unc, _ = os.path.splitunc(s)
2171 elif force:
2172 drive_or_unc = ''
2173 else:
a2aaf4db 2174 return s
c2934512 2175
be531ef1
S
2176 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2177 if drive_or_unc:
a2aaf4db
S
2178 norm_path.pop(0)
2179 sanitized_path = [
ec85ded8 2180 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
a2aaf4db 2181 for path_part in norm_path]
be531ef1
S
2182 if drive_or_unc:
2183 sanitized_path.insert(0, drive_or_unc + os.path.sep)
c4218ac3 2184 elif force and s[0] == os.path.sep:
2185 sanitized_path.insert(0, os.path.sep)
a2aaf4db
S
2186 return os.path.join(*sanitized_path)
2187
2188
17bcc626 2189def sanitize_url(url):
befa4708
S
2190 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2191 # the number of unwanted failures due to missing protocol
2192 if url.startswith('//'):
2193 return 'http:%s' % url
2194 # Fix some common typos seen so far
2195 COMMON_TYPOS = (
067aa17e 2196 # https://github.com/ytdl-org/youtube-dl/issues/15649
befa4708
S
2197 (r'^httpss://', r'https://'),
2198 # https://bx1.be/lives/direct-tv/
2199 (r'^rmtp([es]?)://', r'rtmp\1://'),
2200 )
2201 for mistake, fixup in COMMON_TYPOS:
2202 if re.match(mistake, url):
2203 return re.sub(mistake, fixup, url)
bc6b9bcd 2204 return url
17bcc626
S
2205
2206
5435dcf9
HH
2207def extract_basic_auth(url):
2208 parts = compat_urlparse.urlsplit(url)
2209 if parts.username is None:
2210 return url, None
2211 url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2212 parts.hostname if parts.port is None
2213 else '%s:%d' % (parts.hostname, parts.port))))
2214 auth_payload = base64.b64encode(
2215 ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2216 return url, 'Basic ' + auth_payload.decode('utf-8')
2217
2218
67dda517 2219def sanitized_Request(url, *args, **kwargs):
bc6b9bcd 2220 url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
5435dcf9
HH
2221 if auth_header is not None:
2222 headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2223 headers['Authorization'] = auth_header
2224 return compat_urllib_request.Request(url, *args, **kwargs)
67dda517
S
2225
2226
51098426
S
2227def expand_path(s):
2228 """Expand shell variables and ~"""
2229 return os.path.expandvars(compat_expanduser(s))
2230
2231
d77c3dfd 2232def orderedSet(iterable):
59ae15a5
PH
2233 """ Remove all duplicates from the input iterable """
2234 res = []
2235 for el in iterable:
2236 if el not in res:
2237 res.append(el)
2238 return res
d77c3dfd 2239
912b38b4 2240
55b2f099 2241def _htmlentity_transform(entity_with_semicolon):
4e408e47 2242 """Transforms an HTML entity to a character."""
55b2f099
YCH
2243 entity = entity_with_semicolon[:-1]
2244
4e408e47
PH
2245 # Known non-numeric HTML entity
2246 if entity in compat_html_entities.name2codepoint:
2247 return compat_chr(compat_html_entities.name2codepoint[entity])
2248
55b2f099
YCH
2249 # TODO: HTML5 allows entities without a semicolon. For example,
2250 # '&Eacuteric' should be decoded as 'Éric'.
2251 if entity_with_semicolon in compat_html_entities_html5:
2252 return compat_html_entities_html5[entity_with_semicolon]
2253
91757b0f 2254 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
2255 if mobj is not None:
2256 numstr = mobj.group(1)
28e614de 2257 if numstr.startswith('x'):
4e408e47 2258 base = 16
28e614de 2259 numstr = '0%s' % numstr
4e408e47
PH
2260 else:
2261 base = 10
067aa17e 2262 # See https://github.com/ytdl-org/youtube-dl/issues/7518
7aefc49c
S
2263 try:
2264 return compat_chr(int(numstr, base))
2265 except ValueError:
2266 pass
4e408e47
PH
2267
2268 # Unknown entity in name, return its literal representation
7a3f0c00 2269 return '&%s;' % entity
4e408e47
PH
2270
2271
d77c3dfd 2272def unescapeHTML(s):
912b38b4
PH
2273 if s is None:
2274 return None
2275 assert type(s) == compat_str
d77c3dfd 2276
4e408e47 2277 return re.sub(
95f3f7c2 2278 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 2279
8bf48f23 2280
cdb19aa4 2281def escapeHTML(text):
2282 return (
2283 text
2284 .replace('&', '&amp;')
2285 .replace('<', '&lt;')
2286 .replace('>', '&gt;')
2287 .replace('"', '&quot;')
2288 .replace("'", '&#39;')
2289 )
2290
2291
f5b1bca9 2292def process_communicate_or_kill(p, *args, **kwargs):
2293 try:
2294 return p.communicate(*args, **kwargs)
2295 except BaseException: # Including KeyboardInterrupt
2296 p.kill()
2297 p.wait()
2298 raise
2299
2300
d3c93ec2 2301class Popen(subprocess.Popen):
2302 if sys.platform == 'win32':
2303 _startupinfo = subprocess.STARTUPINFO()
2304 _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
2305 else:
2306 _startupinfo = None
2307
2308 def __init__(self, *args, **kwargs):
2309 super(Popen, self).__init__(*args, **kwargs, startupinfo=self._startupinfo)
2310
2311 def communicate_or_kill(self, *args, **kwargs):
2312 return process_communicate_or_kill(self, *args, **kwargs)
2313
2314
aa49acd1
S
2315def get_subprocess_encoding():
2316 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2317 # For subprocess calls, encode with locale encoding
2318 # Refer to http://stackoverflow.com/a/9951851/35070
2319 encoding = preferredencoding()
2320 else:
2321 encoding = sys.getfilesystemencoding()
2322 if encoding is None:
2323 encoding = 'utf-8'
2324 return encoding
2325
2326
8bf48f23 2327def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
2328 """
2329 @param s The name of the file
2330 """
d77c3dfd 2331
8bf48f23 2332 assert type(s) == compat_str
d77c3dfd 2333
59ae15a5
PH
2334 # Python 3 has a Unicode API
2335 if sys.version_info >= (3, 0):
2336 return s
0f00efed 2337
aa49acd1
S
2338 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2339 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2340 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2341 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2342 return s
2343
8ee239e9
YCH
2344 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2345 if sys.platform.startswith('java'):
2346 return s
2347
aa49acd1
S
2348 return s.encode(get_subprocess_encoding(), 'ignore')
2349
2350
2351def decodeFilename(b, for_subprocess=False):
2352
2353 if sys.version_info >= (3, 0):
2354 return b
2355
2356 if not isinstance(b, bytes):
2357 return b
2358
2359 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 2360
f07b74fc
PH
2361
2362def encodeArgument(s):
2363 if not isinstance(s, compat_str):
2364 # Legacy code that uses byte strings
2365 # Uncomment the following line after fixing all post processors
7af808a5 2366 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
2367 s = s.decode('ascii')
2368 return encodeFilename(s, True)
2369
2370
aa49acd1
S
2371def decodeArgument(b):
2372 return decodeFilename(b, True)
2373
2374
8271226a
PH
2375def decodeOption(optval):
2376 if optval is None:
2377 return optval
2378 if isinstance(optval, bytes):
2379 optval = optval.decode(preferredencoding())
2380
2381 assert isinstance(optval, compat_str)
2382 return optval
1c256f70 2383
5f6a1245 2384
aa7785f8 2385_timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
2386
2387
2388def timetuple_from_msec(msec):
2389 secs, msec = divmod(msec, 1000)
2390 mins, secs = divmod(secs, 60)
2391 hrs, mins = divmod(mins, 60)
2392 return _timetuple(hrs, mins, secs, msec)
2393
2394
cdb19aa4 2395def formatSeconds(secs, delim=':', msec=False):
aa7785f8 2396 time = timetuple_from_msec(secs * 1000)
2397 if time.hours:
2398 ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
2399 elif time.minutes:
2400 ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
4539dd30 2401 else:
aa7785f8 2402 ret = '%d' % time.seconds
2403 return '%s.%03d' % (ret, time.milliseconds) if msec else ret
4539dd30 2404
a0ddb8a2 2405
77562778 2406def _ssl_load_windows_store_certs(ssl_context, storename):
2407 # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
2408 try:
2409 certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
2410 if encoding == 'x509_asn' and (
2411 trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
2412 except PermissionError:
2413 return
2414 for cert in certs:
a2366922 2415 try:
77562778 2416 ssl_context.load_verify_locations(cadata=cert)
2417 except ssl.SSLError:
a2366922
PH
2418 pass
2419
77562778 2420
2421def make_HTTPS_handler(params, **kwargs):
2422 opts_check_certificate = not params.get('nocheckcertificate')
2423 context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
2424 context.check_hostname = opts_check_certificate
2425 context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
2426 if opts_check_certificate:
4e3d1898 2427 try:
2428 context.load_default_certs()
2429 # Work around the issue in load_default_certs when there are bad certificates. See:
2430 # https://github.com/yt-dlp/yt-dlp/issues/1060,
2431 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
2432 except ssl.SSLError:
2433 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
2434 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
2435 # Create a new context to discard any certificates that were already loaded
2436 context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
2437 context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED
2438 for storename in ('CA', 'ROOT'):
2439 _ssl_load_windows_store_certs(context, storename)
2440 context.set_default_verify_paths()
77562778 2441 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 2442
732ea2f0 2443
5873d4cc 2444def bug_reports_message(before=';'):
08f2a92c 2445 if ytdl_is_updateable():
7a5c1cfe 2446 update_cmd = 'type yt-dlp -U to update'
08f2a92c 2447 else:
7a5c1cfe 2448 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
5873d4cc 2449 msg = 'please report this issue on https://github.com/yt-dlp/yt-dlp .'
08f2a92c 2450 msg += ' Make sure you are using the latest version; %s.' % update_cmd
7a5c1cfe 2451 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
5873d4cc
F
2452
2453 before = before.rstrip()
2454 if not before or before.endswith(('.', '!', '?')):
2455 msg = msg[0].title() + msg[1:]
2456
2457 return (before + ' ' if before else '') + msg
08f2a92c
JMF
2458
2459
bf5b9d85
PM
2460class YoutubeDLError(Exception):
2461 """Base exception for YoutubeDL errors."""
aa9369a2 2462 msg = None
2463
2464 def __init__(self, msg=None):
2465 if msg is not None:
2466 self.msg = msg
2467 elif self.msg is None:
2468 self.msg = type(self).__name__
2469 super().__init__(self.msg)
bf5b9d85
PM
2470
2471
3158150c 2472network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2473if hasattr(ssl, 'CertificateError'):
2474 network_exceptions.append(ssl.CertificateError)
2475network_exceptions = tuple(network_exceptions)
2476
2477
bf5b9d85 2478class ExtractorError(YoutubeDLError):
1c256f70 2479 """Error during info extraction."""
5f6a1245 2480
1151c407 2481 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
9a82b238 2482 """ tb, if given, is the original traceback (so that it can be printed out).
7a5c1cfe 2483 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
9a82b238 2484 """
3158150c 2485 if sys.exc_info()[0] in network_exceptions:
9a82b238 2486 expected = True
d5979c5d 2487
526d74ec 2488 self.msg = str(msg)
1c256f70 2489 self.traceback = tb
1151c407 2490 self.expected = expected
2eabb802 2491 self.cause = cause
d11271dd 2492 self.video_id = video_id
1151c407 2493 self.ie = ie
2494 self.exc_info = sys.exc_info() # preserve original exception
2495
2496 super(ExtractorError, self).__init__(''.join((
2497 format_field(ie, template='[%s] '),
2498 format_field(video_id, template='%s: '),
526d74ec 2499 self.msg,
1151c407 2500 format_field(cause, template=' (caused by %r)'),
2501 '' if expected else bug_reports_message())))
1c256f70 2502
01951dda
PH
2503 def format_traceback(self):
2504 if self.traceback is None:
2505 return None
28e614de 2506 return ''.join(traceback.format_tb(self.traceback))
01951dda 2507
1c256f70 2508
416c7fcb
PH
2509class UnsupportedError(ExtractorError):
2510 def __init__(self, url):
2511 super(UnsupportedError, self).__init__(
2512 'Unsupported URL: %s' % url, expected=True)
2513 self.url = url
2514
2515
55b3e45b
JMF
2516class RegexNotFoundError(ExtractorError):
2517 """Error when a regex didn't match"""
2518 pass
2519
2520
773f291d
S
2521class GeoRestrictedError(ExtractorError):
2522 """Geographic restriction Error exception.
2523
2524 This exception may be thrown when a video is not available from your
2525 geographic location due to geographic restrictions imposed by a website.
2526 """
b6e0c7d2 2527
0db3bae8 2528 def __init__(self, msg, countries=None, **kwargs):
2529 kwargs['expected'] = True
2530 super(GeoRestrictedError, self).__init__(msg, **kwargs)
773f291d
S
2531 self.countries = countries
2532
2533
bf5b9d85 2534class DownloadError(YoutubeDLError):
59ae15a5 2535 """Download Error exception.
d77c3dfd 2536
59ae15a5
PH
2537 This exception may be thrown by FileDownloader objects if they are not
2538 configured to continue on errors. They will contain the appropriate
2539 error message.
2540 """
5f6a1245 2541
8cc83b8d
FV
2542 def __init__(self, msg, exc_info=None):
2543 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2544 super(DownloadError, self).__init__(msg)
2545 self.exc_info = exc_info
d77c3dfd
FV
2546
2547
498f5606 2548class EntryNotInPlaylist(YoutubeDLError):
2549 """Entry not in playlist exception.
2550
2551 This exception will be thrown by YoutubeDL when a requested entry
2552 is not found in the playlist info_dict
2553 """
aa9369a2 2554 msg = 'Entry not found in info'
498f5606 2555
2556
bf5b9d85 2557class SameFileError(YoutubeDLError):
59ae15a5 2558 """Same File exception.
d77c3dfd 2559
59ae15a5
PH
2560 This exception will be thrown by FileDownloader objects if they detect
2561 multiple files would have to be downloaded to the same file on disk.
2562 """
aa9369a2 2563 msg = 'Fixed output name but more than one file to download'
2564
2565 def __init__(self, filename=None):
2566 if filename is not None:
2567 self.msg += f': {filename}'
2568 super().__init__(self.msg)
d77c3dfd
FV
2569
2570
bf5b9d85 2571class PostProcessingError(YoutubeDLError):
59ae15a5 2572 """Post Processing exception.
d77c3dfd 2573
59ae15a5
PH
2574 This exception may be raised by PostProcessor's .run() method to
2575 indicate an error in the postprocessing task.
2576 """
5f6a1245 2577
5f6a1245 2578
48f79687 2579class DownloadCancelled(YoutubeDLError):
2580 """ Exception raised when the download queue should be interrupted """
2581 msg = 'The download was cancelled'
8b0d7497 2582
8b0d7497 2583
48f79687 2584class ExistingVideoReached(DownloadCancelled):
2585 """ --break-on-existing triggered """
2586 msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
8b0d7497 2587
48f79687 2588
2589class RejectedVideoReached(DownloadCancelled):
2590 """ --break-on-reject triggered """
2591 msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
51d9739f 2592
2593
48f79687 2594class MaxDownloadsReached(DownloadCancelled):
59ae15a5 2595 """ --max-downloads limit has been reached. """
48f79687 2596 msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
2597
2598
f2ebc5c7 2599class ReExtractInfo(YoutubeDLError):
2600 """ Video info needs to be re-extracted. """
2601
2602 def __init__(self, msg, expected=False):
2603 super().__init__(msg)
2604 self.expected = expected
2605
2606
2607class ThrottledDownload(ReExtractInfo):
48f79687 2608 """ Download speed below --throttled-rate. """
aa9369a2 2609 msg = 'The download speed is below throttle limit'
d77c3dfd 2610
f2ebc5c7 2611 def __init__(self, msg):
2612 super().__init__(msg, expected=False)
2613
d77c3dfd 2614
bf5b9d85 2615class UnavailableVideoError(YoutubeDLError):
59ae15a5 2616 """Unavailable Format exception.
d77c3dfd 2617
59ae15a5
PH
2618 This exception will be thrown when a video is requested
2619 in a format that is not available for that video.
2620 """
aa9369a2 2621 msg = 'Unable to download video'
2622
2623 def __init__(self, err=None):
2624 if err is not None:
2625 self.msg += f': {err}'
2626 super().__init__(self.msg)
d77c3dfd
FV
2627
2628
bf5b9d85 2629class ContentTooShortError(YoutubeDLError):
59ae15a5 2630 """Content Too Short exception.
d77c3dfd 2631
59ae15a5
PH
2632 This exception may be raised by FileDownloader objects when a file they
2633 download is too small for what the server announced first, indicating
2634 the connection was probably interrupted.
2635 """
d77c3dfd 2636
59ae15a5 2637 def __init__(self, downloaded, expected):
bf5b9d85
PM
2638 super(ContentTooShortError, self).__init__(
2639 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2640 )
2c7ed247 2641 # Both in bytes
59ae15a5
PH
2642 self.downloaded = downloaded
2643 self.expected = expected
d77c3dfd 2644
5f6a1245 2645
bf5b9d85 2646class XAttrMetadataError(YoutubeDLError):
efa97bdc
YCH
2647 def __init__(self, code=None, msg='Unknown error'):
2648 super(XAttrMetadataError, self).__init__(msg)
2649 self.code = code
bd264412 2650 self.msg = msg
efa97bdc
YCH
2651
2652 # Parsing code and msg
3089bc74 2653 if (self.code in (errno.ENOSPC, errno.EDQUOT)
a0566bbf 2654 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
efa97bdc
YCH
2655 self.reason = 'NO_SPACE'
2656 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2657 self.reason = 'VALUE_TOO_LONG'
2658 else:
2659 self.reason = 'NOT_SUPPORTED'
2660
2661
bf5b9d85 2662class XAttrUnavailableError(YoutubeDLError):
efa97bdc
YCH
2663 pass
2664
2665
c5a59d93 2666def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
2667 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2668 # expected HTTP responses to meet HTTP/1.0 or later (see also
067aa17e 2669 # https://github.com/ytdl-org/youtube-dl/issues/6727)
e5e78797 2670 if sys.version_info < (3, 0):
65220c3b
S
2671 kwargs['strict'] = True
2672 hc = http_class(*args, **compat_kwargs(kwargs))
be4a824d 2673 source_address = ydl_handler._params.get('source_address')
8959018a 2674
be4a824d 2675 if source_address is not None:
8959018a
AU
2676 # This is to workaround _create_connection() from socket where it will try all
2677 # address data from getaddrinfo() including IPv6. This filters the result from
2678 # getaddrinfo() based on the source_address value.
2679 # This is based on the cpython socket.create_connection() function.
2680 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2681 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2682 host, port = address
2683 err = None
2684 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
9e21e6d9
S
2685 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2686 ip_addrs = [addr for addr in addrs if addr[0] == af]
2687 if addrs and not ip_addrs:
2688 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2689 raise socket.error(
2690 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2691 % (ip_version, source_address[0]))
8959018a
AU
2692 for res in ip_addrs:
2693 af, socktype, proto, canonname, sa = res
2694 sock = None
2695 try:
2696 sock = socket.socket(af, socktype, proto)
2697 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2698 sock.settimeout(timeout)
2699 sock.bind(source_address)
2700 sock.connect(sa)
2701 err = None # Explicitly break reference cycle
2702 return sock
2703 except socket.error as _:
2704 err = _
2705 if sock is not None:
2706 sock.close()
2707 if err is not None:
2708 raise err
2709 else:
9e21e6d9
S
2710 raise socket.error('getaddrinfo returns an empty list')
2711 if hasattr(hc, '_create_connection'):
2712 hc._create_connection = _create_connection
be4a824d
PH
2713 sa = (source_address, 0)
2714 if hasattr(hc, 'source_address'): # Python 2.7+
2715 hc.source_address = sa
2716 else: # Python 2.6
2717 def _hc_connect(self, *args, **kwargs):
9e21e6d9 2718 sock = _create_connection(
be4a824d
PH
2719 (self.host, self.port), self.timeout, sa)
2720 if is_https:
d7932313
PH
2721 self.sock = ssl.wrap_socket(
2722 sock, self.key_file, self.cert_file,
2723 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
2724 else:
2725 self.sock = sock
2726 hc.connect = functools.partial(_hc_connect, hc)
2727
2728 return hc
2729
2730
87f0e62d 2731def handle_youtubedl_headers(headers):
992fc9d6
YCH
2732 filtered_headers = headers
2733
2734 if 'Youtubedl-no-compression' in filtered_headers:
2735 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 2736 del filtered_headers['Youtubedl-no-compression']
87f0e62d 2737
992fc9d6 2738 return filtered_headers
87f0e62d
YCH
2739
2740
acebc9cd 2741class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
2742 """Handler for HTTP requests and responses.
2743
2744 This class, when installed with an OpenerDirector, automatically adds
2745 the standard headers to every HTTP request and handles gzipped and
2746 deflated responses from web servers. If compression is to be avoided in
2747 a particular request, the original request in the program code only has
0424ec30 2748 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
2749 removed before making the real request.
2750
2751 Part of this code was copied from:
2752
2753 http://techknack.net/python-urllib2-handlers/
2754
2755 Andrew Rowls, the author of that code, agreed to release it to the
2756 public domain.
2757 """
2758
be4a824d
PH
2759 def __init__(self, params, *args, **kwargs):
2760 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2761 self._params = params
2762
2763 def http_open(self, req):
71aff188
YCH
2764 conn_class = compat_http_client.HTTPConnection
2765
2766 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2767 if socks_proxy:
2768 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2769 del req.headers['Ytdl-socks-proxy']
2770
be4a824d 2771 return self.do_open(functools.partial(
71aff188 2772 _create_http_connection, self, conn_class, False),
be4a824d
PH
2773 req)
2774
59ae15a5
PH
2775 @staticmethod
2776 def deflate(data):
fc2119f2 2777 if not data:
2778 return data
59ae15a5
PH
2779 try:
2780 return zlib.decompress(data, -zlib.MAX_WBITS)
2781 except zlib.error:
2782 return zlib.decompress(data)
2783
acebc9cd 2784 def http_request(self, req):
51f267d9
S
2785 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2786 # always respected by websites, some tend to give out URLs with non percent-encoded
2787 # non-ASCII characters (see telemb.py, ard.py [#3412])
2788 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2789 # To work around aforementioned issue we will replace request's original URL with
2790 # percent-encoded one
2791 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2792 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2793 url = req.get_full_url()
2794 url_escaped = escape_url(url)
2795
2796 # Substitute URL if any change after escaping
2797 if url != url_escaped:
15d260eb 2798 req = update_Request(req, url=url_escaped)
51f267d9 2799
33ac271b 2800 for h, v in std_headers.items():
3d5f7a39
JK
2801 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2802 # The dict keys are capitalized because of this bug by urllib
2803 if h.capitalize() not in req.headers:
33ac271b 2804 req.add_header(h, v)
87f0e62d
YCH
2805
2806 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
2807
2808 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2809 # Python 2.6 is brain-dead when it comes to fragments
2810 req._Request__original = req._Request__original.partition('#')[0]
2811 req._Request__r_type = req._Request__r_type.partition('#')[0]
2812
59ae15a5
PH
2813 return req
2814
acebc9cd 2815 def http_response(self, req, resp):
59ae15a5
PH
2816 old_resp = resp
2817 # gzip
2818 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
2819 content = resp.read()
2820 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2821 try:
2822 uncompressed = io.BytesIO(gz.read())
2823 except IOError as original_ioerror:
2824 # There may be junk add the end of the file
2825 # See http://stackoverflow.com/q/4928560/35070 for details
2826 for i in range(1, 1024):
2827 try:
2828 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2829 uncompressed = io.BytesIO(gz.read())
2830 except IOError:
2831 continue
2832 break
2833 else:
2834 raise original_ioerror
b407d853 2835 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2836 resp.msg = old_resp.msg
c047270c 2837 del resp.headers['Content-encoding']
59ae15a5
PH
2838 # deflate
2839 if resp.headers.get('Content-encoding', '') == 'deflate':
2840 gz = io.BytesIO(self.deflate(resp.read()))
b407d853 2841 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2842 resp.msg = old_resp.msg
c047270c 2843 del resp.headers['Content-encoding']
ad729172 2844 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
067aa17e 2845 # https://github.com/ytdl-org/youtube-dl/issues/6457).
5a4d9ddb
S
2846 if 300 <= resp.code < 400:
2847 location = resp.headers.get('Location')
2848 if location:
2849 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2850 if sys.version_info >= (3, 0):
2851 location = location.encode('iso-8859-1').decode('utf-8')
0ea59007
YCH
2852 else:
2853 location = location.decode('utf-8')
5a4d9ddb
S
2854 location_escaped = escape_url(location)
2855 if location != location_escaped:
2856 del resp.headers['Location']
9a4aec8b
YCH
2857 if sys.version_info < (3, 0):
2858 location_escaped = location_escaped.encode('utf-8')
5a4d9ddb 2859 resp.headers['Location'] = location_escaped
59ae15a5 2860 return resp
0f8d03f8 2861
acebc9cd
PH
2862 https_request = http_request
2863 https_response = http_response
bf50b038 2864
5de90176 2865
71aff188
YCH
2866def make_socks_conn_class(base_class, socks_proxy):
2867 assert issubclass(base_class, (
2868 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2869
2870 url_components = compat_urlparse.urlparse(socks_proxy)
2871 if url_components.scheme.lower() == 'socks5':
2872 socks_type = ProxyType.SOCKS5
2873 elif url_components.scheme.lower() in ('socks', 'socks4'):
2874 socks_type = ProxyType.SOCKS4
51fb4995
YCH
2875 elif url_components.scheme.lower() == 'socks4a':
2876 socks_type = ProxyType.SOCKS4A
71aff188 2877
cdd94c2e
YCH
2878 def unquote_if_non_empty(s):
2879 if not s:
2880 return s
2881 return compat_urllib_parse_unquote_plus(s)
2882
71aff188
YCH
2883 proxy_args = (
2884 socks_type,
2885 url_components.hostname, url_components.port or 1080,
2886 True, # Remote DNS
cdd94c2e
YCH
2887 unquote_if_non_empty(url_components.username),
2888 unquote_if_non_empty(url_components.password),
71aff188
YCH
2889 )
2890
2891 class SocksConnection(base_class):
2892 def connect(self):
2893 self.sock = sockssocket()
2894 self.sock.setproxy(*proxy_args)
2895 if type(self.timeout) in (int, float):
2896 self.sock.settimeout(self.timeout)
2897 self.sock.connect((self.host, self.port))
2898
2899 if isinstance(self, compat_http_client.HTTPSConnection):
2900 if hasattr(self, '_context'): # Python > 2.6
2901 self.sock = self._context.wrap_socket(
2902 self.sock, server_hostname=self.host)
2903 else:
2904 self.sock = ssl.wrap_socket(self.sock)
2905
2906 return SocksConnection
2907
2908
be4a824d
PH
2909class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2910 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2911 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2912 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2913 self._params = params
2914
2915 def https_open(self, req):
4f264c02 2916 kwargs = {}
71aff188
YCH
2917 conn_class = self._https_conn_class
2918
4f264c02
JMF
2919 if hasattr(self, '_context'): # python > 2.6
2920 kwargs['context'] = self._context
2921 if hasattr(self, '_check_hostname'): # python 3.x
2922 kwargs['check_hostname'] = self._check_hostname
71aff188
YCH
2923
2924 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2925 if socks_proxy:
2926 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2927 del req.headers['Ytdl-socks-proxy']
2928
be4a824d 2929 return self.do_open(functools.partial(
71aff188 2930 _create_http_connection, self, conn_class, True),
4f264c02 2931 req, **kwargs)
be4a824d
PH
2932
2933
1bab3437 2934class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
f1a8511f
S
2935 """
2936 See [1] for cookie file format.
2937
2938 1. https://curl.haxx.se/docs/http-cookies.html
2939 """
e7e62441 2940 _HTTPONLY_PREFIX = '#HttpOnly_'
c380cc28
S
2941 _ENTRY_LEN = 7
2942 _HEADER = '''# Netscape HTTP Cookie File
7a5c1cfe 2943# This file is generated by yt-dlp. Do not edit.
c380cc28
S
2944
2945'''
2946 _CookieFileEntry = collections.namedtuple(
2947 'CookieFileEntry',
2948 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
e7e62441 2949
1bab3437 2950 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
c380cc28
S
2951 """
2952 Save cookies to a file.
2953
2954 Most of the code is taken from CPython 3.8 and slightly adapted
2955 to support cookie files with UTF-8 in both python 2 and 3.
2956 """
2957 if filename is None:
2958 if self.filename is not None:
2959 filename = self.filename
2960 else:
2961 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2962
1bab3437
S
2963 # Store session cookies with `expires` set to 0 instead of an empty
2964 # string
2965 for cookie in self:
2966 if cookie.expires is None:
2967 cookie.expires = 0
c380cc28
S
2968
2969 with io.open(filename, 'w', encoding='utf-8') as f:
2970 f.write(self._HEADER)
2971 now = time.time()
2972 for cookie in self:
2973 if not ignore_discard and cookie.discard:
2974 continue
2975 if not ignore_expires and cookie.is_expired(now):
2976 continue
2977 if cookie.secure:
2978 secure = 'TRUE'
2979 else:
2980 secure = 'FALSE'
2981 if cookie.domain.startswith('.'):
2982 initial_dot = 'TRUE'
2983 else:
2984 initial_dot = 'FALSE'
2985 if cookie.expires is not None:
2986 expires = compat_str(cookie.expires)
2987 else:
2988 expires = ''
2989 if cookie.value is None:
2990 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2991 # with no name, whereas http.cookiejar regards it as a
2992 # cookie with no value.
2993 name = ''
2994 value = cookie.name
2995 else:
2996 name = cookie.name
2997 value = cookie.value
2998 f.write(
2999 '\t'.join([cookie.domain, initial_dot, cookie.path,
3000 secure, expires, name, value]) + '\n')
1bab3437
S
3001
3002 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
e7e62441 3003 """Load cookies from a file."""
3004 if filename is None:
3005 if self.filename is not None:
3006 filename = self.filename
3007 else:
3008 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
3009
c380cc28
S
3010 def prepare_line(line):
3011 if line.startswith(self._HTTPONLY_PREFIX):
3012 line = line[len(self._HTTPONLY_PREFIX):]
3013 # comments and empty lines are fine
3014 if line.startswith('#') or not line.strip():
3015 return line
3016 cookie_list = line.split('\t')
3017 if len(cookie_list) != self._ENTRY_LEN:
3018 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
3019 cookie = self._CookieFileEntry(*cookie_list)
3020 if cookie.expires_at and not cookie.expires_at.isdigit():
3021 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
3022 return line
3023
e7e62441 3024 cf = io.StringIO()
c380cc28 3025 with io.open(filename, encoding='utf-8') as f:
e7e62441 3026 for line in f:
c380cc28
S
3027 try:
3028 cf.write(prepare_line(line))
3029 except compat_cookiejar.LoadError as e:
3030 write_string(
3031 'WARNING: skipping cookie file entry due to %s: %r\n'
3032 % (e, line), sys.stderr)
3033 continue
e7e62441 3034 cf.seek(0)
3035 self._really_load(cf, filename, ignore_discard, ignore_expires)
1bab3437
S
3036 # Session cookies are denoted by either `expires` field set to
3037 # an empty string or 0. MozillaCookieJar only recognizes the former
3038 # (see [1]). So we need force the latter to be recognized as session
3039 # cookies on our own.
3040 # Session cookies may be important for cookies-based authentication,
3041 # e.g. usually, when user does not check 'Remember me' check box while
3042 # logging in on a site, some important cookies are stored as session
3043 # cookies so that not recognizing them will result in failed login.
3044 # 1. https://bugs.python.org/issue17164
3045 for cookie in self:
3046 # Treat `expires=0` cookies as session cookies
3047 if cookie.expires == 0:
3048 cookie.expires = None
3049 cookie.discard = True
3050
3051
a6420bf5
S
3052class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
3053 def __init__(self, cookiejar=None):
3054 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
3055
3056 def http_response(self, request, response):
3057 # Python 2 will choke on next HTTP request in row if there are non-ASCII
3058 # characters in Set-Cookie HTTP header of last response (see
067aa17e 3059 # https://github.com/ytdl-org/youtube-dl/issues/6769).
a6420bf5
S
3060 # In order to at least prevent crashing we will percent encode Set-Cookie
3061 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
3062 # if sys.version_info < (3, 0) and response.headers:
3063 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
3064 # set_cookie = response.headers.get(set_cookie_header)
3065 # if set_cookie:
3066 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
3067 # if set_cookie != set_cookie_escaped:
3068 # del response.headers[set_cookie_header]
3069 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
3070 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
3071
f5fa042c 3072 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
a6420bf5
S
3073 https_response = http_response
3074
3075
fca6dba8 3076class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
201c1459 3077 """YoutubeDL redirect handler
3078
3079 The code is based on HTTPRedirectHandler implementation from CPython [1].
3080
3081 This redirect handler solves two issues:
3082 - ensures redirect URL is always unicode under python 2
3083 - introduces support for experimental HTTP response status code
3084 308 Permanent Redirect [2] used by some sites [3]
3085
3086 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
3087 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
3088 3. https://github.com/ytdl-org/youtube-dl/issues/28768
3089 """
3090
3091 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
3092
3093 def redirect_request(self, req, fp, code, msg, headers, newurl):
3094 """Return a Request or None in response to a redirect.
3095
3096 This is called by the http_error_30x methods when a
3097 redirection response is received. If a redirection should
3098 take place, return a new Request to allow http_error_30x to
3099 perform the redirect. Otherwise, raise HTTPError if no-one
3100 else should try to handle this url. Return None if you can't
3101 but another Handler might.
3102 """
3103 m = req.get_method()
3104 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
3105 or code in (301, 302, 303) and m == "POST")):
3106 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
3107 # Strictly (according to RFC 2616), 301 or 302 in response to
3108 # a POST MUST NOT cause a redirection without confirmation
3109 # from the user (of urllib.request, in this case). In practice,
3110 # essentially all clients do redirect in this case, so we do
3111 # the same.
3112
3113 # On python 2 urlh.geturl() may sometimes return redirect URL
3114 # as byte string instead of unicode. This workaround allows
3115 # to force it always return unicode.
3116 if sys.version_info[0] < 3:
3117 newurl = compat_str(newurl)
3118
3119 # Be conciliant with URIs containing a space. This is mainly
3120 # redundant with the more complete encoding done in http_error_302(),
3121 # but it is kept for compatibility with other callers.
3122 newurl = newurl.replace(' ', '%20')
3123
3124 CONTENT_HEADERS = ("content-length", "content-type")
3125 # NB: don't use dict comprehension for python 2.6 compatibility
3126 newheaders = dict((k, v) for k, v in req.headers.items()
3127 if k.lower() not in CONTENT_HEADERS)
3128 return compat_urllib_request.Request(
3129 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3130 unverifiable=True)
fca6dba8
S
3131
3132
46f59e89
S
3133def extract_timezone(date_str):
3134 m = re.search(
f137e4c2 3135 r'''(?x)
3136 ^.{8,}? # >=8 char non-TZ prefix, if present
3137 (?P<tz>Z| # just the UTC Z, or
3138 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
3139 (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
3140 [ ]? # optional space
3141 (?P<sign>\+|-) # +/-
3142 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
3143 $)
3144 ''', date_str)
46f59e89
S
3145 if not m:
3146 timezone = datetime.timedelta()
3147 else:
3148 date_str = date_str[:-len(m.group('tz'))]
3149 if not m.group('sign'):
3150 timezone = datetime.timedelta()
3151 else:
3152 sign = 1 if m.group('sign') == '+' else -1
3153 timezone = datetime.timedelta(
3154 hours=sign * int(m.group('hours')),
3155 minutes=sign * int(m.group('minutes')))
3156 return timezone, date_str
3157
3158
08b38d54 3159def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
3160 """ Return a UNIX timestamp from the given date """
3161
3162 if date_str is None:
3163 return None
3164
52c3a6e4
S
3165 date_str = re.sub(r'\.[0-9]+', '', date_str)
3166
08b38d54 3167 if timezone is None:
46f59e89
S
3168 timezone, date_str = extract_timezone(date_str)
3169
52c3a6e4
S
3170 try:
3171 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3172 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3173 return calendar.timegm(dt.timetuple())
3174 except ValueError:
3175 pass
912b38b4
PH
3176
3177
46f59e89
S
3178def date_formats(day_first=True):
3179 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3180
3181
42bdd9d0 3182def unified_strdate(date_str, day_first=True):
bf50b038 3183 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
3184
3185 if date_str is None:
3186 return None
bf50b038 3187 upload_date = None
5f6a1245 3188 # Replace commas
026fcc04 3189 date_str = date_str.replace(',', ' ')
42bdd9d0 3190 # Remove AM/PM + timezone
9bb8e0a3 3191 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89 3192 _, date_str = extract_timezone(date_str)
42bdd9d0 3193
46f59e89 3194 for expression in date_formats(day_first):
bf50b038
JMF
3195 try:
3196 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 3197 except ValueError:
bf50b038 3198 pass
42393ce2
PH
3199 if upload_date is None:
3200 timetuple = email.utils.parsedate_tz(date_str)
3201 if timetuple:
c6b9cf05
S
3202 try:
3203 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3204 except ValueError:
3205 pass
6a750402
JMF
3206 if upload_date is not None:
3207 return compat_str(upload_date)
bf50b038 3208
5f6a1245 3209
46f59e89
S
3210def unified_timestamp(date_str, day_first=True):
3211 if date_str is None:
3212 return None
3213
2ae2ffda 3214 date_str = re.sub(r'[,|]', '', date_str)
46f59e89 3215
7dc2a74e 3216 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89
S
3217 timezone, date_str = extract_timezone(date_str)
3218
3219 # Remove AM/PM + timezone
3220 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3221
deef3195
S
3222 # Remove unrecognized timezones from ISO 8601 alike timestamps
3223 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3224 if m:
3225 date_str = date_str[:-len(m.group('tz'))]
3226
f226880c
PH
3227 # Python only supports microseconds, so remove nanoseconds
3228 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3229 if m:
3230 date_str = m.group(1)
3231
46f59e89
S
3232 for expression in date_formats(day_first):
3233 try:
7dc2a74e 3234 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
46f59e89
S
3235 return calendar.timegm(dt.timetuple())
3236 except ValueError:
3237 pass
3238 timetuple = email.utils.parsedate_tz(date_str)
3239 if timetuple:
7dc2a74e 3240 return calendar.timegm(timetuple) + pm_delta * 3600
46f59e89
S
3241
3242
28e614de 3243def determine_ext(url, default_ext='unknown_video'):
85750f89 3244 if url is None or '.' not in url:
f4776371 3245 return default_ext
9cb9a5df 3246 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
3247 if re.match(r'^[A-Za-z0-9]+$', guess):
3248 return guess
a7aaa398
S
3249 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3250 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 3251 return guess.rstrip('/')
73e79f2a 3252 else:
cbdbb766 3253 return default_ext
73e79f2a 3254
5f6a1245 3255
824fa511
S
3256def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3257 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
d4051a8e 3258
5f6a1245 3259
9e62f283 3260def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
37254abc
JMF
3261 """
3262 Return a datetime object from a string in the format YYYYMMDD or
9e62f283 3263 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3264
3265 format: string date format used to return datetime object from
3266 precision: round the time portion of a datetime object.
3267 auto|microsecond|second|minute|hour|day.
3268 auto: round to the unit provided in date_str (if applicable).
3269 """
3270 auto_precision = False
3271 if precision == 'auto':
3272 auto_precision = True
3273 precision = 'microsecond'
3274 today = datetime_round(datetime.datetime.now(), precision)
f8795e10 3275 if date_str in ('now', 'today'):
37254abc 3276 return today
f8795e10
PH
3277 if date_str == 'yesterday':
3278 return today - datetime.timedelta(days=1)
9e62f283 3279 match = re.match(
3280 r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3281 date_str)
37254abc 3282 if match is not None:
9e62f283 3283 start_time = datetime_from_str(match.group('start'), precision, format)
3284 time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
37254abc 3285 unit = match.group('unit')
9e62f283 3286 if unit == 'month' or unit == 'year':
3287 new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
37254abc 3288 unit = 'day'
9e62f283 3289 else:
3290 if unit == 'week':
3291 unit = 'day'
3292 time *= 7
3293 delta = datetime.timedelta(**{unit + 's': time})
3294 new_date = start_time + delta
3295 if auto_precision:
3296 return datetime_round(new_date, unit)
3297 return new_date
3298
3299 return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3300
3301
3302def date_from_str(date_str, format='%Y%m%d'):
3303 """
3304 Return a datetime object from a string in the format YYYYMMDD or
3305 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3306
3307 format: string date format used to return datetime object from
3308 """
3309 return datetime_from_str(date_str, precision='microsecond', format=format).date()
3310
3311
3312def datetime_add_months(dt, months):
3313 """Increment/Decrement a datetime object by months."""
3314 month = dt.month + months - 1
3315 year = dt.year + month // 12
3316 month = month % 12 + 1
3317 day = min(dt.day, calendar.monthrange(year, month)[1])
3318 return dt.replace(year, month, day)
3319
3320
3321def datetime_round(dt, precision='day'):
3322 """
3323 Round a datetime object's time to a specific precision
3324 """
3325 if precision == 'microsecond':
3326 return dt
3327
3328 unit_seconds = {
3329 'day': 86400,
3330 'hour': 3600,
3331 'minute': 60,
3332 'second': 1,
3333 }
3334 roundto = lambda x, n: ((x + n / 2) // n) * n
3335 timestamp = calendar.timegm(dt.timetuple())
3336 return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
5f6a1245
JW
3337
3338
e63fc1be 3339def hyphenate_date(date_str):
3340 """
3341 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3342 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3343 if match is not None:
3344 return '-'.join(match.groups())
3345 else:
3346 return date_str
3347
5f6a1245 3348
bd558525
JMF
3349class DateRange(object):
3350 """Represents a time interval between two dates"""
5f6a1245 3351
bd558525
JMF
3352 def __init__(self, start=None, end=None):
3353 """start and end must be strings in the format accepted by date"""
3354 if start is not None:
3355 self.start = date_from_str(start)
3356 else:
3357 self.start = datetime.datetime.min.date()
3358 if end is not None:
3359 self.end = date_from_str(end)
3360 else:
3361 self.end = datetime.datetime.max.date()
37254abc 3362 if self.start > self.end:
bd558525 3363 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 3364
bd558525
JMF
3365 @classmethod
3366 def day(cls, day):
3367 """Returns a range that only contains the given day"""
5f6a1245
JW
3368 return cls(day, day)
3369
bd558525
JMF
3370 def __contains__(self, date):
3371 """Check if the date is in the range"""
37254abc
JMF
3372 if not isinstance(date, datetime.date):
3373 date = date_from_str(date)
3374 return self.start <= date <= self.end
5f6a1245 3375
bd558525 3376 def __str__(self):
5f6a1245 3377 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
3378
3379
3380def platform_name():
3381 """ Returns the platform name as a compat_str """
3382 res = platform.platform()
3383 if isinstance(res, bytes):
3384 res = res.decode(preferredencoding())
3385
3386 assert isinstance(res, compat_str)
3387 return res
c257baff
PH
3388
3389
49fa4d9a
N
3390def get_windows_version():
3391 ''' Get Windows version. None if it's not running on Windows '''
3392 if compat_os_name == 'nt':
3393 return version_tuple(platform.win32_ver()[1])
3394 else:
3395 return None
3396
3397
b58ddb32
PH
3398def _windows_write_string(s, out):
3399 """ Returns True if the string was written using special methods,
3400 False if it has yet to be written out."""
3401 # Adapted from http://stackoverflow.com/a/3259271/35070
3402
3403 import ctypes
3404 import ctypes.wintypes
3405
3406 WIN_OUTPUT_IDS = {
3407 1: -11,
3408 2: -12,
3409 }
3410
a383a98a
PH
3411 try:
3412 fileno = out.fileno()
3413 except AttributeError:
3414 # If the output stream doesn't have a fileno, it's virtual
3415 return False
aa42e873
PH
3416 except io.UnsupportedOperation:
3417 # Some strange Windows pseudo files?
3418 return False
b58ddb32
PH
3419 if fileno not in WIN_OUTPUT_IDS:
3420 return False
3421
d7cd9a9e 3422 GetStdHandle = compat_ctypes_WINFUNCTYPE(
b58ddb32 3423 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
d7cd9a9e 3424 ('GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
3425 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3426
d7cd9a9e 3427 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3428 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3429 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
d7cd9a9e 3430 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
3431 written = ctypes.wintypes.DWORD(0)
3432
d7cd9a9e 3433 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
3434 FILE_TYPE_CHAR = 0x0002
3435 FILE_TYPE_REMOTE = 0x8000
d7cd9a9e 3436 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3437 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3438 ctypes.POINTER(ctypes.wintypes.DWORD))(
d7cd9a9e 3439 ('GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
3440 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3441
3442 def not_a_console(handle):
3443 if handle == INVALID_HANDLE_VALUE or handle is None:
3444 return True
3089bc74
S
3445 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3446 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
3447
3448 if not_a_console(h):
3449 return False
3450
d1b9c912
PH
3451 def next_nonbmp_pos(s):
3452 try:
3453 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3454 except StopIteration:
3455 return len(s)
3456
3457 while s:
3458 count = min(next_nonbmp_pos(s), 1024)
3459
b58ddb32 3460 ret = WriteConsoleW(
d1b9c912 3461 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
3462 if ret == 0:
3463 raise OSError('Failed to write string')
d1b9c912
PH
3464 if not count: # We just wrote a non-BMP character
3465 assert written.value == 2
3466 s = s[1:]
3467 else:
3468 assert written.value > 0
3469 s = s[written.value:]
b58ddb32
PH
3470 return True
3471
3472
734f90bb 3473def write_string(s, out=None, encoding=None):
7459e3a2
PH
3474 if out is None:
3475 out = sys.stderr
8bf48f23 3476 assert type(s) == compat_str
7459e3a2 3477
b58ddb32
PH
3478 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3479 if _windows_write_string(s, out):
3480 return
3481
3089bc74
S
3482 if ('b' in getattr(out, 'mode', '')
3483 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
3484 byt = s.encode(encoding or preferredencoding(), 'ignore')
3485 out.write(byt)
3486 elif hasattr(out, 'buffer'):
3487 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3488 byt = s.encode(enc, 'ignore')
3489 out.buffer.write(byt)
3490 else:
8bf48f23 3491 out.write(s)
7459e3a2
PH
3492 out.flush()
3493
3494
48ea9cea
PH
3495def bytes_to_intlist(bs):
3496 if not bs:
3497 return []
3498 if isinstance(bs[0], int): # Python 3
3499 return list(bs)
3500 else:
3501 return [ord(c) for c in bs]
3502
c257baff 3503
cba892fa 3504def intlist_to_bytes(xs):
3505 if not xs:
3506 return b''
edaa23f8 3507 return compat_struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
3508
3509
c1c9a79c
PH
3510# Cross-platform file locking
3511if sys.platform == 'win32':
3512 import ctypes.wintypes
3513 import msvcrt
3514
3515 class OVERLAPPED(ctypes.Structure):
3516 _fields_ = [
3517 ('Internal', ctypes.wintypes.LPVOID),
3518 ('InternalHigh', ctypes.wintypes.LPVOID),
3519 ('Offset', ctypes.wintypes.DWORD),
3520 ('OffsetHigh', ctypes.wintypes.DWORD),
3521 ('hEvent', ctypes.wintypes.HANDLE),
3522 ]
3523
3524 kernel32 = ctypes.windll.kernel32
3525 LockFileEx = kernel32.LockFileEx
3526 LockFileEx.argtypes = [
3527 ctypes.wintypes.HANDLE, # hFile
3528 ctypes.wintypes.DWORD, # dwFlags
3529 ctypes.wintypes.DWORD, # dwReserved
3530 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3531 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3532 ctypes.POINTER(OVERLAPPED) # Overlapped
3533 ]
3534 LockFileEx.restype = ctypes.wintypes.BOOL
3535 UnlockFileEx = kernel32.UnlockFileEx
3536 UnlockFileEx.argtypes = [
3537 ctypes.wintypes.HANDLE, # hFile
3538 ctypes.wintypes.DWORD, # dwReserved
3539 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3540 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3541 ctypes.POINTER(OVERLAPPED) # Overlapped
3542 ]
3543 UnlockFileEx.restype = ctypes.wintypes.BOOL
3544 whole_low = 0xffffffff
3545 whole_high = 0x7fffffff
3546
3547 def _lock_file(f, exclusive):
3548 overlapped = OVERLAPPED()
3549 overlapped.Offset = 0
3550 overlapped.OffsetHigh = 0
3551 overlapped.hEvent = 0
3552 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3553 handle = msvcrt.get_osfhandle(f.fileno())
3554 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3555 whole_low, whole_high, f._lock_file_overlapped_p):
3556 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3557
3558 def _unlock_file(f):
3559 assert f._lock_file_overlapped_p
3560 handle = msvcrt.get_osfhandle(f.fileno())
3561 if not UnlockFileEx(handle, 0,
3562 whole_low, whole_high, f._lock_file_overlapped_p):
3563 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3564
3565else:
399a76e6
YCH
3566 # Some platforms, such as Jython, is missing fcntl
3567 try:
3568 import fcntl
c1c9a79c 3569
399a76e6
YCH
3570 def _lock_file(f, exclusive):
3571 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 3572
399a76e6
YCH
3573 def _unlock_file(f):
3574 fcntl.flock(f, fcntl.LOCK_UN)
3575 except ImportError:
3576 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3577
3578 def _lock_file(f, exclusive):
3579 raise IOError(UNSUPPORTED_MSG)
3580
3581 def _unlock_file(f):
3582 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
3583
3584
3585class locked_file(object):
3586 def __init__(self, filename, mode, encoding=None):
3587 assert mode in ['r', 'a', 'w']
3588 self.f = io.open(filename, mode, encoding=encoding)
3589 self.mode = mode
3590
3591 def __enter__(self):
3592 exclusive = self.mode != 'r'
3593 try:
3594 _lock_file(self.f, exclusive)
3595 except IOError:
3596 self.f.close()
3597 raise
3598 return self
3599
3600 def __exit__(self, etype, value, traceback):
3601 try:
3602 _unlock_file(self.f)
3603 finally:
3604 self.f.close()
3605
3606 def __iter__(self):
3607 return iter(self.f)
3608
3609 def write(self, *args):
3610 return self.f.write(*args)
3611
3612 def read(self, *args):
3613 return self.f.read(*args)
4eb7f1d1
JMF
3614
3615
4644ac55
S
3616def get_filesystem_encoding():
3617 encoding = sys.getfilesystemencoding()
3618 return encoding if encoding is not None else 'utf-8'
3619
3620
4eb7f1d1 3621def shell_quote(args):
a6a173c2 3622 quoted_args = []
4644ac55 3623 encoding = get_filesystem_encoding()
a6a173c2
JMF
3624 for a in args:
3625 if isinstance(a, bytes):
3626 # We may get a filename encoded with 'encodeFilename'
3627 a = a.decode(encoding)
aefce8e6 3628 quoted_args.append(compat_shlex_quote(a))
28e614de 3629 return ' '.join(quoted_args)
9d4660ca
PH
3630
3631
3632def smuggle_url(url, data):
3633 """ Pass additional data in a URL for internal use. """
3634
81953d1a
RA
3635 url, idata = unsmuggle_url(url, {})
3636 data.update(idata)
15707c7e 3637 sdata = compat_urllib_parse_urlencode(
28e614de
PH
3638 {'__youtubedl_smuggle': json.dumps(data)})
3639 return url + '#' + sdata
9d4660ca
PH
3640
3641
79f82953 3642def unsmuggle_url(smug_url, default=None):
83e865a3 3643 if '#__youtubedl_smuggle' not in smug_url:
79f82953 3644 return smug_url, default
28e614de
PH
3645 url, _, sdata = smug_url.rpartition('#')
3646 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
3647 data = json.loads(jsond)
3648 return url, data
02dbf93f
PH
3649
3650
02dbf93f
PH
3651def format_bytes(bytes):
3652 if bytes is None:
28e614de 3653 return 'N/A'
02dbf93f
PH
3654 if type(bytes) is str:
3655 bytes = float(bytes)
3656 if bytes == 0.0:
3657 exponent = 0
3658 else:
3659 exponent = int(math.log(bytes, 1024.0))
28e614de 3660 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 3661 converted = float(bytes) / float(1024 ** exponent)
28e614de 3662 return '%.2f%s' % (converted, suffix)
f53c966a 3663
1c088fa8 3664
fb47597b
S
3665def lookup_unit_table(unit_table, s):
3666 units_re = '|'.join(re.escape(u) for u in unit_table)
3667 m = re.match(
782b1b5b 3668 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
fb47597b
S
3669 if not m:
3670 return None
3671 num_str = m.group('num').replace(',', '.')
3672 mult = unit_table[m.group('unit')]
3673 return int(float(num_str) * mult)
3674
3675
be64b5b0
PH
3676def parse_filesize(s):
3677 if s is None:
3678 return None
3679
dfb1b146 3680 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
3681 # but we support those too
3682 _UNIT_TABLE = {
3683 'B': 1,
3684 'b': 1,
70852b47 3685 'bytes': 1,
be64b5b0
PH
3686 'KiB': 1024,
3687 'KB': 1000,
3688 'kB': 1024,
3689 'Kb': 1000,
13585d76 3690 'kb': 1000,
70852b47
YCH
3691 'kilobytes': 1000,
3692 'kibibytes': 1024,
be64b5b0
PH
3693 'MiB': 1024 ** 2,
3694 'MB': 1000 ** 2,
3695 'mB': 1024 ** 2,
3696 'Mb': 1000 ** 2,
13585d76 3697 'mb': 1000 ** 2,
70852b47
YCH
3698 'megabytes': 1000 ** 2,
3699 'mebibytes': 1024 ** 2,
be64b5b0
PH
3700 'GiB': 1024 ** 3,
3701 'GB': 1000 ** 3,
3702 'gB': 1024 ** 3,
3703 'Gb': 1000 ** 3,
13585d76 3704 'gb': 1000 ** 3,
70852b47
YCH
3705 'gigabytes': 1000 ** 3,
3706 'gibibytes': 1024 ** 3,
be64b5b0
PH
3707 'TiB': 1024 ** 4,
3708 'TB': 1000 ** 4,
3709 'tB': 1024 ** 4,
3710 'Tb': 1000 ** 4,
13585d76 3711 'tb': 1000 ** 4,
70852b47
YCH
3712 'terabytes': 1000 ** 4,
3713 'tebibytes': 1024 ** 4,
be64b5b0
PH
3714 'PiB': 1024 ** 5,
3715 'PB': 1000 ** 5,
3716 'pB': 1024 ** 5,
3717 'Pb': 1000 ** 5,
13585d76 3718 'pb': 1000 ** 5,
70852b47
YCH
3719 'petabytes': 1000 ** 5,
3720 'pebibytes': 1024 ** 5,
be64b5b0
PH
3721 'EiB': 1024 ** 6,
3722 'EB': 1000 ** 6,
3723 'eB': 1024 ** 6,
3724 'Eb': 1000 ** 6,
13585d76 3725 'eb': 1000 ** 6,
70852b47
YCH
3726 'exabytes': 1000 ** 6,
3727 'exbibytes': 1024 ** 6,
be64b5b0
PH
3728 'ZiB': 1024 ** 7,
3729 'ZB': 1000 ** 7,
3730 'zB': 1024 ** 7,
3731 'Zb': 1000 ** 7,
13585d76 3732 'zb': 1000 ** 7,
70852b47
YCH
3733 'zettabytes': 1000 ** 7,
3734 'zebibytes': 1024 ** 7,
be64b5b0
PH
3735 'YiB': 1024 ** 8,
3736 'YB': 1000 ** 8,
3737 'yB': 1024 ** 8,
3738 'Yb': 1000 ** 8,
13585d76 3739 'yb': 1000 ** 8,
70852b47
YCH
3740 'yottabytes': 1000 ** 8,
3741 'yobibytes': 1024 ** 8,
be64b5b0
PH
3742 }
3743
fb47597b
S
3744 return lookup_unit_table(_UNIT_TABLE, s)
3745
3746
3747def parse_count(s):
3748 if s is None:
be64b5b0
PH
3749 return None
3750
fb47597b
S
3751 s = s.strip()
3752
3753 if re.match(r'^[\d,.]+$', s):
3754 return str_to_int(s)
3755
3756 _UNIT_TABLE = {
3757 'k': 1000,
3758 'K': 1000,
3759 'm': 1000 ** 2,
3760 'M': 1000 ** 2,
3761 'kk': 1000 ** 2,
3762 'KK': 1000 ** 2,
3763 }
be64b5b0 3764
fb47597b 3765 return lookup_unit_table(_UNIT_TABLE, s)
be64b5b0 3766
2f7ae819 3767
b871d7e9
S
3768def parse_resolution(s):
3769 if s is None:
3770 return {}
3771
17ec8bcf 3772 mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
b871d7e9
S
3773 if mobj:
3774 return {
3775 'width': int(mobj.group('w')),
3776 'height': int(mobj.group('h')),
3777 }
3778
17ec8bcf 3779 mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
b871d7e9
S
3780 if mobj:
3781 return {'height': int(mobj.group(1))}
3782
3783 mobj = re.search(r'\b([48])[kK]\b', s)
3784 if mobj:
3785 return {'height': int(mobj.group(1)) * 540}
3786
3787 return {}
3788
3789
0dc41787
S
3790def parse_bitrate(s):
3791 if not isinstance(s, compat_str):
3792 return
3793 mobj = re.search(r'\b(\d+)\s*kbps', s)
3794 if mobj:
3795 return int(mobj.group(1))
3796
3797
a942d6cb 3798def month_by_name(name, lang='en'):
caefb1de
PH
3799 """ Return the number of a month by (locale-independently) English name """
3800
f6717dec 3801 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb 3802
caefb1de 3803 try:
f6717dec 3804 return month_names.index(name) + 1
7105440c
YCH
3805 except ValueError:
3806 return None
3807
3808
3809def month_by_abbreviation(abbrev):
3810 """ Return the number of a month by (locale-independently) English
3811 abbreviations """
3812
3813 try:
3814 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
3815 except ValueError:
3816 return None
18258362
JMF
3817
3818
5aafe895 3819def fix_xml_ampersands(xml_str):
18258362 3820 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
3821 return re.sub(
3822 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 3823 '&amp;',
5aafe895 3824 xml_str)
e3946f98
PH
3825
3826
3827def setproctitle(title):
8bf48f23 3828 assert isinstance(title, compat_str)
c1c05c67
YCH
3829
3830 # ctypes in Jython is not complete
3831 # http://bugs.jython.org/issue2148
3832 if sys.platform.startswith('java'):
3833 return
3834
e3946f98 3835 try:
611c1dd9 3836 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
3837 except OSError:
3838 return
2f49bcd6
RC
3839 except TypeError:
3840 # LoadLibrary in Windows Python 2.7.13 only expects
3841 # a bytestring, but since unicode_literals turns
3842 # every string into a unicode string, it fails.
3843 return
6eefe533
PH
3844 title_bytes = title.encode('utf-8')
3845 buf = ctypes.create_string_buffer(len(title_bytes))
3846 buf.value = title_bytes
e3946f98 3847 try:
6eefe533 3848 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
3849 except AttributeError:
3850 return # Strange libc, just skip this
d7dda168
PH
3851
3852
3853def remove_start(s, start):
46bc9b7d 3854 return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174
PH
3855
3856
2b9faf55 3857def remove_end(s, end):
46bc9b7d 3858 return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55
PH
3859
3860
31b2051e
S
3861def remove_quotes(s):
3862 if s is None or len(s) < 2:
3863 return s
3864 for quote in ('"', "'", ):
3865 if s[0] == quote and s[-1] == quote:
3866 return s[1:-1]
3867 return s
3868
3869
b6e0c7d2
U
3870def get_domain(url):
3871 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3872 return domain.group('domain') if domain else None
3873
3874
29eb5174 3875def url_basename(url):
9b8aaeed 3876 path = compat_urlparse.urlparse(url).path
28e614de 3877 return path.strip('/').split('/')[-1]
aa94a6d3
PH
3878
3879
02dc0a36
S
3880def base_url(url):
3881 return re.match(r'https?://[^?#&]+/', url).group()
3882
3883
e34c3361 3884def urljoin(base, path):
4b5de77b
S
3885 if isinstance(path, bytes):
3886 path = path.decode('utf-8')
e34c3361
S
3887 if not isinstance(path, compat_str) or not path:
3888 return None
fad4ceb5 3889 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
e34c3361 3890 return path
4b5de77b
S
3891 if isinstance(base, bytes):
3892 base = base.decode('utf-8')
3893 if not isinstance(base, compat_str) or not re.match(
3894 r'^(?:https?:)?//', base):
e34c3361
S
3895 return None
3896 return compat_urlparse.urljoin(base, path)
3897
3898
aa94a6d3
PH
3899class HEADRequest(compat_urllib_request.Request):
3900 def get_method(self):
611c1dd9 3901 return 'HEAD'
7217e148
PH
3902
3903
95cf60e8
S
3904class PUTRequest(compat_urllib_request.Request):
3905 def get_method(self):
3906 return 'PUT'
3907
3908
9732d77e 3909def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
3910 if get_attr:
3911 if v is not None:
3912 v = getattr(v, get_attr, None)
9572013d
PH
3913 if v == '':
3914 v = None
1812afb7
S
3915 if v is None:
3916 return default
3917 try:
3918 return int(v) * invscale // scale
31c49255 3919 except (ValueError, TypeError, OverflowError):
af98f8ff 3920 return default
9732d77e 3921
9572013d 3922
40a90862
JMF
3923def str_or_none(v, default=None):
3924 return default if v is None else compat_str(v)
3925
9732d77e
PH
3926
3927def str_to_int(int_str):
48d4681e 3928 """ A more relaxed version of int_or_none """
42db58ec 3929 if isinstance(int_str, compat_integer_types):
348c6bf1 3930 return int_str
42db58ec
S
3931 elif isinstance(int_str, compat_str):
3932 int_str = re.sub(r'[,\.\+]', '', int_str)
3933 return int_or_none(int_str)
608d11f5
PH
3934
3935
9732d77e 3936def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
3937 if v is None:
3938 return default
3939 try:
3940 return float(v) * invscale / scale
5e1271c5 3941 except (ValueError, TypeError):
caf80631 3942 return default
43f775e4
PH
3943
3944
c7e327c4
S
3945def bool_or_none(v, default=None):
3946 return v if isinstance(v, bool) else default
3947
3948
53cd37ba
S
3949def strip_or_none(v, default=None):
3950 return v.strip() if isinstance(v, compat_str) else default
b72b4431
S
3951
3952
af03000a
S
3953def url_or_none(url):
3954 if not url or not isinstance(url, compat_str):
3955 return None
3956 url = url.strip()
29f7c58a 3957 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
af03000a
S
3958
3959
e29663c6 3960def strftime_or_none(timestamp, date_format, default=None):
3961 datetime_object = None
3962 try:
3963 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3964 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3965 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3966 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3967 return datetime_object.strftime(date_format)
3968 except (ValueError, TypeError, AttributeError):
3969 return default
3970
3971
608d11f5 3972def parse_duration(s):
8f9312c3 3973 if not isinstance(s, compat_basestring):
608d11f5
PH
3974 return None
3975
ca7b3246
S
3976 s = s.strip()
3977
acaff495 3978 days, hours, mins, secs, ms = [None] * 5
15846398 3979 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
acaff495 3980 if m:
3981 days, hours, mins, secs, ms = m.groups()
3982 else:
3983 m = re.match(
056653bb
S
3984 r'''(?ix)(?:P?
3985 (?:
3986 [0-9]+\s*y(?:ears?)?\s*
3987 )?
3988 (?:
3989 [0-9]+\s*m(?:onths?)?\s*
3990 )?
3991 (?:
3992 [0-9]+\s*w(?:eeks?)?\s*
3993 )?
8f4b58d7 3994 (?:
acaff495 3995 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
8f4b58d7 3996 )?
056653bb 3997 T)?
acaff495 3998 (?:
3999 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
4000 )?
4001 (?:
4002 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
4003 )?
4004 (?:
4005 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
15846398 4006 )?Z?$''', s)
acaff495 4007 if m:
4008 days, hours, mins, secs, ms = m.groups()
4009 else:
15846398 4010 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
acaff495 4011 if m:
4012 hours, mins = m.groups()
4013 else:
4014 return None
4015
4016 duration = 0
4017 if secs:
4018 duration += float(secs)
4019 if mins:
4020 duration += float(mins) * 60
4021 if hours:
4022 duration += float(hours) * 60 * 60
4023 if days:
4024 duration += float(days) * 24 * 60 * 60
4025 if ms:
4026 duration += float(ms)
4027 return duration
91d7d0b3
JMF
4028
4029
e65e4c88 4030def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 4031 name, real_ext = os.path.splitext(filename)
e65e4c88
S
4032 return (
4033 '{0}.{1}{2}'.format(name, ext, real_ext)
4034 if not expected_real_ext or real_ext[1:] == expected_real_ext
4035 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
4036
4037
b3ed15b7
S
4038def replace_extension(filename, ext, expected_real_ext=None):
4039 name, real_ext = os.path.splitext(filename)
4040 return '{0}.{1}'.format(
4041 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
4042 ext)
4043
4044
d70ad093
PH
4045def check_executable(exe, args=[]):
4046 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
4047 args can be a list of arguments for a short output (like -version) """
4048 try:
d3c93ec2 4049 Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate_or_kill()
d70ad093
PH
4050 except OSError:
4051 return False
4052 return exe
b7ab0590
PH
4053
4054
9af98e17 4055def _get_exe_version_output(exe, args):
95807118 4056 try:
b64d04c1 4057 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
7a5c1cfe 4058 # SIGTTOU if yt-dlp is run in the background.
067aa17e 4059 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
d3c93ec2 4060 out, _ = Popen(
4061 [encodeArgument(exe)] + args, stdin=subprocess.PIPE,
4062 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate_or_kill()
95807118
PH
4063 except OSError:
4064 return False
cae97f65
PH
4065 if isinstance(out, bytes): # Python 2.x
4066 out = out.decode('ascii', 'ignore')
9af98e17 4067 return out
cae97f65
PH
4068
4069
4070def detect_exe_version(output, version_re=None, unrecognized='present'):
4071 assert isinstance(output, compat_str)
4072 if version_re is None:
4073 version_re = r'version\s+([-0-9._a-zA-Z]+)'
4074 m = re.search(version_re, output)
95807118
PH
4075 if m:
4076 return m.group(1)
4077 else:
4078 return unrecognized
4079
4080
9af98e17 4081def get_exe_version(exe, args=['--version'],
4082 version_re=None, unrecognized='present'):
4083 """ Returns the version of the specified executable,
4084 or False if the executable is not present """
4085 out = _get_exe_version_output(exe, args)
4086 return detect_exe_version(out, version_re, unrecognized) if out else False
4087
4088
cb89cfc1 4089class LazyList(collections.abc.Sequence):
483336e7 4090 ''' Lazy immutable list from an iterable
4091 Note that slices of a LazyList are lists and not LazyList'''
4092
8e5fecc8 4093 class IndexError(IndexError):
4094 pass
4095
282f5709 4096 def __init__(self, iterable, *, reverse=False, _cache=None):
483336e7 4097 self.__iterable = iter(iterable)
282f5709 4098 self.__cache = [] if _cache is None else _cache
4099 self.__reversed = reverse
483336e7 4100
4101 def __iter__(self):
28419ca2 4102 if self.__reversed:
4103 # We need to consume the entire iterable to iterate in reverse
981052c9 4104 yield from self.exhaust()
28419ca2 4105 return
4106 yield from self.__cache
483336e7 4107 for item in self.__iterable:
4108 self.__cache.append(item)
4109 yield item
4110
981052c9 4111 def __exhaust(self):
483336e7 4112 self.__cache.extend(self.__iterable)
9f1a1c36 4113 # Discard the emptied iterable to make it pickle-able
4114 self.__iterable = []
28419ca2 4115 return self.__cache
4116
981052c9 4117 def exhaust(self):
4118 ''' Evaluate the entire iterable '''
4119 return self.__exhaust()[::-1 if self.__reversed else 1]
4120
28419ca2 4121 @staticmethod
981052c9 4122 def __reverse_index(x):
e0f2b4b4 4123 return None if x is None else -(x + 1)
483336e7 4124
4125 def __getitem__(self, idx):
4126 if isinstance(idx, slice):
28419ca2 4127 if self.__reversed:
e0f2b4b4 4128 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
4129 start, stop, step = idx.start, idx.stop, idx.step or 1
483336e7 4130 elif isinstance(idx, int):
28419ca2 4131 if self.__reversed:
981052c9 4132 idx = self.__reverse_index(idx)
e0f2b4b4 4133 start, stop, step = idx, idx, 0
483336e7 4134 else:
4135 raise TypeError('indices must be integers or slices')
e0f2b4b4 4136 if ((start or 0) < 0 or (stop or 0) < 0
4137 or (start is None and step < 0)
4138 or (stop is None and step > 0)):
483336e7 4139 # We need to consume the entire iterable to be able to slice from the end
4140 # Obviously, never use this with infinite iterables
8e5fecc8 4141 self.__exhaust()
4142 try:
4143 return self.__cache[idx]
4144 except IndexError as e:
4145 raise self.IndexError(e) from e
e0f2b4b4 4146 n = max(start or 0, stop or 0) - len(self.__cache) + 1
28419ca2 4147 if n > 0:
4148 self.__cache.extend(itertools.islice(self.__iterable, n))
8e5fecc8 4149 try:
4150 return self.__cache[idx]
4151 except IndexError as e:
4152 raise self.IndexError(e) from e
483336e7 4153
4154 def __bool__(self):
4155 try:
28419ca2 4156 self[-1] if self.__reversed else self[0]
8e5fecc8 4157 except self.IndexError:
483336e7 4158 return False
4159 return True
4160
4161 def __len__(self):
8e5fecc8 4162 self.__exhaust()
483336e7 4163 return len(self.__cache)
4164
282f5709 4165 def __reversed__(self):
4166 return type(self)(self.__iterable, reverse=not self.__reversed, _cache=self.__cache)
4167
4168 def __copy__(self):
4169 return type(self)(self.__iterable, reverse=self.__reversed, _cache=self.__cache)
4170
4171 def __deepcopy__(self, memo):
4172 # FIXME: This is actually just a shallow copy
4173 id_ = id(self)
4174 memo[id_] = self.__copy__()
4175 return memo[id_]
28419ca2 4176
4177 def __repr__(self):
4178 # repr and str should mimic a list. So we exhaust the iterable
4179 return repr(self.exhaust())
4180
4181 def __str__(self):
4182 return repr(self.exhaust())
4183
483336e7 4184
7be9ccff 4185class PagedList:
c07a39ae 4186
4187 class IndexError(IndexError):
4188 pass
4189
dd26ced1
PH
4190 def __len__(self):
4191 # This is only useful for tests
4192 return len(self.getslice())
4193
7be9ccff 4194 def __init__(self, pagefunc, pagesize, use_cache=True):
4195 self._pagefunc = pagefunc
4196 self._pagesize = pagesize
4197 self._use_cache = use_cache
4198 self._cache = {}
4199
4200 def getpage(self, pagenum):
d8cf8d97 4201 page_results = self._cache.get(pagenum)
4202 if page_results is None:
4203 page_results = list(self._pagefunc(pagenum))
7be9ccff 4204 if self._use_cache:
4205 self._cache[pagenum] = page_results
4206 return page_results
4207
4208 def getslice(self, start=0, end=None):
4209 return list(self._getslice(start, end))
4210
4211 def _getslice(self, start, end):
55575225 4212 raise NotImplementedError('This method must be implemented by subclasses')
4213
4214 def __getitem__(self, idx):
7be9ccff 4215 # NOTE: cache must be enabled if this is used
55575225 4216 if not isinstance(idx, int) or idx < 0:
4217 raise TypeError('indices must be non-negative integers')
4218 entries = self.getslice(idx, idx + 1)
d8cf8d97 4219 if not entries:
c07a39ae 4220 raise self.IndexError()
d8cf8d97 4221 return entries[0]
55575225 4222
9c44d242
PH
4223
4224class OnDemandPagedList(PagedList):
7be9ccff 4225 def _getslice(self, start, end):
b7ab0590
PH
4226 for pagenum in itertools.count(start // self._pagesize):
4227 firstid = pagenum * self._pagesize
4228 nextfirstid = pagenum * self._pagesize + self._pagesize
4229 if start >= nextfirstid:
4230 continue
4231
b7ab0590
PH
4232 startv = (
4233 start % self._pagesize
4234 if firstid <= start < nextfirstid
4235 else 0)
b7ab0590
PH
4236 endv = (
4237 ((end - 1) % self._pagesize) + 1
4238 if (end is not None and firstid <= end <= nextfirstid)
4239 else None)
4240
7be9ccff 4241 page_results = self.getpage(pagenum)
b7ab0590
PH
4242 if startv != 0 or endv is not None:
4243 page_results = page_results[startv:endv]
7be9ccff 4244 yield from page_results
b7ab0590
PH
4245
4246 # A little optimization - if current page is not "full", ie. does
4247 # not contain page_size videos then we can assume that this page
4248 # is the last one - there are no more ids on further pages -
4249 # i.e. no need to query again.
4250 if len(page_results) + startv < self._pagesize:
4251 break
4252
4253 # If we got the whole page, but the next page is not interesting,
4254 # break out early as well
4255 if end == nextfirstid:
4256 break
81c2f20b
PH
4257
4258
9c44d242
PH
4259class InAdvancePagedList(PagedList):
4260 def __init__(self, pagefunc, pagecount, pagesize):
9c44d242 4261 self._pagecount = pagecount
7be9ccff 4262 PagedList.__init__(self, pagefunc, pagesize, True)
9c44d242 4263
7be9ccff 4264 def _getslice(self, start, end):
9c44d242
PH
4265 start_page = start // self._pagesize
4266 end_page = (
4267 self._pagecount if end is None else (end // self._pagesize + 1))
4268 skip_elems = start - start_page * self._pagesize
4269 only_more = None if end is None else end - start
4270 for pagenum in range(start_page, end_page):
7be9ccff 4271 page_results = self.getpage(pagenum)
9c44d242 4272 if skip_elems:
7be9ccff 4273 page_results = page_results[skip_elems:]
9c44d242
PH
4274 skip_elems = None
4275 if only_more is not None:
7be9ccff 4276 if len(page_results) < only_more:
4277 only_more -= len(page_results)
9c44d242 4278 else:
7be9ccff 4279 yield from page_results[:only_more]
9c44d242 4280 break
7be9ccff 4281 yield from page_results
9c44d242
PH
4282
4283
81c2f20b 4284def uppercase_escape(s):
676eb3f2 4285 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 4286 return re.sub(
a612753d 4287 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
4288 lambda m: unicode_escape(m.group(0))[0],
4289 s)
0fe2ff78
YCH
4290
4291
4292def lowercase_escape(s):
4293 unicode_escape = codecs.getdecoder('unicode_escape')
4294 return re.sub(
4295 r'\\u[0-9a-fA-F]{4}',
4296 lambda m: unicode_escape(m.group(0))[0],
4297 s)
b53466e1 4298
d05cfe06
S
4299
4300def escape_rfc3986(s):
4301 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 4302 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 4303 s = s.encode('utf-8')
ecc0c5ee 4304 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
4305
4306
4307def escape_url(url):
4308 """Escape URL as suggested by RFC 3986"""
4309 url_parsed = compat_urllib_parse_urlparse(url)
4310 return url_parsed._replace(
efbed08d 4311 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
d05cfe06
S
4312 path=escape_rfc3986(url_parsed.path),
4313 params=escape_rfc3986(url_parsed.params),
4314 query=escape_rfc3986(url_parsed.query),
4315 fragment=escape_rfc3986(url_parsed.fragment)
4316 ).geturl()
4317
62e609ab 4318
4dfbf869 4319def parse_qs(url):
4320 return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4321
4322
62e609ab
PH
4323def read_batch_urls(batch_fd):
4324 def fixup(url):
4325 if not isinstance(url, compat_str):
4326 url = url.decode('utf-8', 'replace')
8c04f0be 4327 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4328 for bom in BOM_UTF8:
4329 if url.startswith(bom):
4330 url = url[len(bom):]
4331 url = url.lstrip()
4332 if not url or url.startswith(('#', ';', ']')):
62e609ab 4333 return False
8c04f0be 4334 # "#" cannot be stripped out since it is part of the URI
4335 # However, it can be safely stipped out if follwing a whitespace
4336 return re.split(r'\s#', url, 1)[0].rstrip()
62e609ab
PH
4337
4338 with contextlib.closing(batch_fd) as fd:
4339 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
4340
4341
4342def urlencode_postdata(*args, **kargs):
15707c7e 4343 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
4344
4345
38f9ef31 4346def update_url_query(url, query):
cacd9966
YCH
4347 if not query:
4348 return url
38f9ef31 4349 parsed_url = compat_urlparse.urlparse(url)
4350 qs = compat_parse_qs(parsed_url.query)
4351 qs.update(query)
4352 return compat_urlparse.urlunparse(parsed_url._replace(
15707c7e 4353 query=compat_urllib_parse_urlencode(qs, True)))
16392824 4354
8e60dc75 4355
ed0291d1
S
4356def update_Request(req, url=None, data=None, headers={}, query={}):
4357 req_headers = req.headers.copy()
4358 req_headers.update(headers)
4359 req_data = data or req.data
4360 req_url = update_url_query(url or req.get_full_url(), query)
95cf60e8
S
4361 req_get_method = req.get_method()
4362 if req_get_method == 'HEAD':
4363 req_type = HEADRequest
4364 elif req_get_method == 'PUT':
4365 req_type = PUTRequest
4366 else:
4367 req_type = compat_urllib_request.Request
ed0291d1
S
4368 new_req = req_type(
4369 req_url, data=req_data, headers=req_headers,
4370 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4371 if hasattr(req, 'timeout'):
4372 new_req.timeout = req.timeout
4373 return new_req
4374
4375
10c87c15 4376def _multipart_encode_impl(data, boundary):
0c265486
YCH
4377 content_type = 'multipart/form-data; boundary=%s' % boundary
4378
4379 out = b''
4380 for k, v in data.items():
4381 out += b'--' + boundary.encode('ascii') + b'\r\n'
4382 if isinstance(k, compat_str):
4383 k = k.encode('utf-8')
4384 if isinstance(v, compat_str):
4385 v = v.encode('utf-8')
4386 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4387 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
b2ad479d 4388 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
0c265486
YCH
4389 if boundary.encode('ascii') in content:
4390 raise ValueError('Boundary overlaps with data')
4391 out += content
4392
4393 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4394
4395 return out, content_type
4396
4397
4398def multipart_encode(data, boundary=None):
4399 '''
4400 Encode a dict to RFC 7578-compliant form-data
4401
4402 data:
4403 A dict where keys and values can be either Unicode or bytes-like
4404 objects.
4405 boundary:
4406 If specified a Unicode object, it's used as the boundary. Otherwise
4407 a random boundary is generated.
4408
4409 Reference: https://tools.ietf.org/html/rfc7578
4410 '''
4411 has_specified_boundary = boundary is not None
4412
4413 while True:
4414 if boundary is None:
4415 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4416
4417 try:
10c87c15 4418 out, content_type = _multipart_encode_impl(data, boundary)
0c265486
YCH
4419 break
4420 except ValueError:
4421 if has_specified_boundary:
4422 raise
4423 boundary = None
4424
4425 return out, content_type
4426
4427
86296ad2 4428def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
4429 if isinstance(key_or_keys, (list, tuple)):
4430 for key in key_or_keys:
86296ad2
S
4431 if key not in d or d[key] is None or skip_false_values and not d[key]:
4432 continue
4433 return d[key]
cbecc9b9
S
4434 return default
4435 return d.get(key_or_keys, default)
4436
4437
329ca3be 4438def try_get(src, getter, expected_type=None):
6606817a 4439 for get in variadic(getter):
a32a9a7e
S
4440 try:
4441 v = get(src)
4442 except (AttributeError, KeyError, TypeError, IndexError):
4443 pass
4444 else:
4445 if expected_type is None or isinstance(v, expected_type):
4446 return v
329ca3be
S
4447
4448
6cc62232
S
4449def merge_dicts(*dicts):
4450 merged = {}
4451 for a_dict in dicts:
4452 for k, v in a_dict.items():
4453 if v is None:
4454 continue
3089bc74
S
4455 if (k not in merged
4456 or (isinstance(v, compat_str) and v
4457 and isinstance(merged[k], compat_str)
4458 and not merged[k])):
6cc62232
S
4459 merged[k] = v
4460 return merged
4461
4462
8e60dc75
S
4463def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4464 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4465
16392824 4466
a1a530b0
PH
4467US_RATINGS = {
4468 'G': 0,
4469 'PG': 10,
4470 'PG-13': 13,
4471 'R': 16,
4472 'NC': 18,
4473}
fac55558
PH
4474
4475
a8795327 4476TV_PARENTAL_GUIDELINES = {
5a16c9d9
RA
4477 'TV-Y': 0,
4478 'TV-Y7': 7,
4479 'TV-G': 0,
4480 'TV-PG': 0,
4481 'TV-14': 14,
4482 'TV-MA': 17,
a8795327
S
4483}
4484
4485
146c80e2 4486def parse_age_limit(s):
a8795327
S
4487 if type(s) == int:
4488 return s if 0 <= s <= 21 else None
4489 if not isinstance(s, compat_basestring):
d838b1bd 4490 return None
146c80e2 4491 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327
S
4492 if m:
4493 return int(m.group('age'))
5c5fae6d 4494 s = s.upper()
a8795327
S
4495 if s in US_RATINGS:
4496 return US_RATINGS[s]
5a16c9d9 4497 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
b8361187 4498 if m:
5a16c9d9 4499 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
b8361187 4500 return None
146c80e2
S
4501
4502
fac55558 4503def strip_jsonp(code):
609a61e3 4504 return re.sub(
5552c9eb 4505 r'''(?sx)^
e9c671d5 4506 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
5552c9eb
YCH
4507 (?:\s*&&\s*(?P=func_name))?
4508 \s*\(\s*(?P<callback_data>.*)\);?
4509 \s*?(?://[^\n]*)*$''',
4510 r'\g<callback_data>', code)
478c2c61
PH
4511
4512
5c610515 4513def js_to_json(code, vars={}):
4514 # vars is a dict of var, val pairs to substitute
c843e685 4515 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4195096e
S
4516 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4517 INTEGER_TABLE = (
4518 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4519 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4520 )
4521
e05f6939 4522 def fix_kv(m):
e7b6d122
PH
4523 v = m.group(0)
4524 if v in ('true', 'false', 'null'):
4525 return v
421ddcb8
C
4526 elif v in ('undefined', 'void 0'):
4527 return 'null'
8bdd16b4 4528 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
bd1e4844 4529 return ""
4530
4531 if v[0] in ("'", '"'):
4532 v = re.sub(r'(?s)\\.|"', lambda m: {
e7b6d122 4533 '"': '\\"',
bd1e4844 4534 "\\'": "'",
4535 '\\\n': '',
4536 '\\x': '\\u00',
4537 }.get(m.group(0), m.group(0)), v[1:-1])
8bdd16b4 4538 else:
4539 for regex, base in INTEGER_TABLE:
4540 im = re.match(regex, v)
4541 if im:
4542 i = int(im.group(1), base)
4543 return '"%d":' % i if v.endswith(':') else '%d' % i
89ac4a19 4544
5c610515 4545 if v in vars:
4546 return vars[v]
4547
e7b6d122 4548 return '"%s"' % v
e05f6939 4549
bd1e4844 4550 return re.sub(r'''(?sx)
4551 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4552 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4195096e 4553 {comment}|,(?={skip}[\]}}])|
421ddcb8 4554 void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
4195096e 4555 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
8bdd16b4 4556 [0-9]+(?={skip}:)|
4557 !+
4195096e 4558 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
e05f6939
PH
4559
4560
478c2c61
PH
4561def qualities(quality_ids):
4562 """ Get a numeric quality value out of a list of possible values """
4563 def q(qid):
4564 try:
4565 return quality_ids.index(qid)
4566 except ValueError:
4567 return -1
4568 return q
4569
acd69589 4570
de6000d9 4571DEFAULT_OUTTMPL = {
4572 'default': '%(title)s [%(id)s].%(ext)s',
72755351 4573 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
de6000d9 4574}
4575OUTTMPL_TYPES = {
72755351 4576 'chapter': None,
de6000d9 4577 'subtitle': None,
4578 'thumbnail': None,
4579 'description': 'description',
4580 'annotation': 'annotations.xml',
4581 'infojson': 'info.json',
08438d2c 4582 'link': None,
5112f26a 4583 'pl_thumbnail': None,
de6000d9 4584 'pl_description': 'description',
4585 'pl_infojson': 'info.json',
4586}
0a871f68 4587
143db31d 4588# As of [1] format syntax is:
4589# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4590# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
901130bb 4591STR_FORMAT_RE_TMPL = r'''(?x)
4592 (?<!%)(?P<prefix>(?:%%)*)
143db31d 4593 %
524e2e4f 4594 (?P<has_key>\((?P<key>{0})\))?
752cda38 4595 (?P<format>
524e2e4f 4596 (?P<conversion>[#0\-+ ]+)?
4597 (?P<min_width>\d+)?
4598 (?P<precision>\.\d+)?
4599 (?P<len_mod>[hlL])? # unused in python
901130bb 4600 {1} # conversion type
752cda38 4601 )
143db31d 4602'''
4603
7d1eb38a 4604
901130bb 4605STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
a020a0dc 4606
7d1eb38a 4607
a020a0dc
PH
4608def limit_length(s, length):
4609 """ Add ellipses to overly long strings """
4610 if s is None:
4611 return None
4612 ELLIPSES = '...'
4613 if len(s) > length:
4614 return s[:length - len(ELLIPSES)] + ELLIPSES
4615 return s
48844745
PH
4616
4617
4618def version_tuple(v):
5f9b8394 4619 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
4620
4621
4622def is_outdated_version(version, limit, assume_new=True):
4623 if not version:
4624 return not assume_new
4625 try:
4626 return version_tuple(version) < version_tuple(limit)
4627 except ValueError:
4628 return not assume_new
732ea2f0
PH
4629
4630
4631def ytdl_is_updateable():
7a5c1cfe 4632 """ Returns if yt-dlp can be updated with -U """
735d865e 4633
5d535b4a 4634 from .update import is_non_updateable
732ea2f0 4635
5d535b4a 4636 return not is_non_updateable()
7d4111ed
PH
4637
4638
4639def args_to_str(args):
4640 # Get a short string representation for a subprocess command
702ccf2d 4641 return ' '.join(compat_shlex_quote(a) for a in args)
2ccd1b10
PH
4642
4643
9b9c5355 4644def error_to_compat_str(err):
fdae2358
S
4645 err_str = str(err)
4646 # On python 2 error byte string must be decoded with proper
4647 # encoding rather than ascii
4648 if sys.version_info[0] < 3:
4649 err_str = err_str.decode(preferredencoding())
4650 return err_str
4651
4652
c460bdd5 4653def mimetype2ext(mt):
eb9ee194
S
4654 if mt is None:
4655 return None
4656
9359f3d4
F
4657 mt, _, params = mt.partition(';')
4658 mt = mt.strip()
4659
4660 FULL_MAP = {
765ac263 4661 'audio/mp4': 'm4a',
6c33d24b
YCH
4662 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4663 # it's the most popular one
4664 'audio/mpeg': 'mp3',
ba39289d 4665 'audio/x-wav': 'wav',
9359f3d4
F
4666 'audio/wav': 'wav',
4667 'audio/wave': 'wav',
4668 }
4669
4670 ext = FULL_MAP.get(mt)
765ac263
JMF
4671 if ext is not None:
4672 return ext
4673
9359f3d4 4674 SUBTYPE_MAP = {
f6861ec9 4675 '3gpp': '3gp',
cafcf657 4676 'smptett+xml': 'tt',
cafcf657 4677 'ttaf+xml': 'dfxp',
a0d8d704 4678 'ttml+xml': 'ttml',
f6861ec9 4679 'x-flv': 'flv',
a0d8d704 4680 'x-mp4-fragmented': 'mp4',
d4f05d47 4681 'x-ms-sami': 'sami',
a0d8d704 4682 'x-ms-wmv': 'wmv',
b4173f15
RA
4683 'mpegurl': 'm3u8',
4684 'x-mpegurl': 'm3u8',
4685 'vnd.apple.mpegurl': 'm3u8',
4686 'dash+xml': 'mpd',
b4173f15 4687 'f4m+xml': 'f4m',
f164b971 4688 'hds+xml': 'f4m',
e910fe2f 4689 'vnd.ms-sstr+xml': 'ism',
c2b2c7e1 4690 'quicktime': 'mov',
98ce1a3f 4691 'mp2t': 'ts',
39e7107d 4692 'x-wav': 'wav',
9359f3d4
F
4693 'filmstrip+json': 'fs',
4694 'svg+xml': 'svg',
4695 }
4696
4697 _, _, subtype = mt.rpartition('/')
4698 ext = SUBTYPE_MAP.get(subtype.lower())
4699 if ext is not None:
4700 return ext
4701
4702 SUFFIX_MAP = {
4703 'json': 'json',
4704 'xml': 'xml',
4705 'zip': 'zip',
4706 'gzip': 'gz',
4707 }
4708
4709 _, _, suffix = subtype.partition('+')
4710 ext = SUFFIX_MAP.get(suffix)
4711 if ext is not None:
4712 return ext
4713
4714 return subtype.replace('+', '.')
c460bdd5
PH
4715
4716
4f3c5e06 4717def parse_codecs(codecs_str):
4718 # http://tools.ietf.org/html/rfc6381
4719 if not codecs_str:
4720 return {}
a0566bbf 4721 split_codecs = list(filter(None, map(
dbf5416a 4722 str.strip, codecs_str.strip().strip(',').split(','))))
176f1866 4723 vcodec, acodec, hdr = None, None, None
a0566bbf 4724 for full_codec in split_codecs:
9bd979ca 4725 parts = full_codec.split('.')
4726 codec = parts[0].replace('0', '')
4727 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
4728 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
4f3c5e06 4729 if not vcodec:
9bd979ca 4730 vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1') else full_codec
176f1866 4731 if codec in ('dvh1', 'dvhe'):
4732 hdr = 'DV'
9bd979ca 4733 elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
4734 hdr = 'HDR10'
4735 elif full_codec.replace('0', '').startswith('vp9.2'):
176f1866 4736 hdr = 'HDR10'
60f5c9fb 4737 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4f3c5e06 4738 if not acodec:
4739 acodec = full_codec
4740 else:
60f5c9fb 4741 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4f3c5e06 4742 if not vcodec and not acodec:
a0566bbf 4743 if len(split_codecs) == 2:
4f3c5e06 4744 return {
a0566bbf 4745 'vcodec': split_codecs[0],
4746 'acodec': split_codecs[1],
4f3c5e06 4747 }
4748 else:
4749 return {
4750 'vcodec': vcodec or 'none',
4751 'acodec': acodec or 'none',
176f1866 4752 'dynamic_range': hdr,
4f3c5e06 4753 }
4754 return {}
4755
4756
2ccd1b10 4757def urlhandle_detect_ext(url_handle):
79298173 4758 getheader = url_handle.headers.get
2ccd1b10 4759
b55ee18f
PH
4760 cd = getheader('Content-Disposition')
4761 if cd:
4762 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4763 if m:
4764 e = determine_ext(m.group('filename'), default_ext=None)
4765 if e:
4766 return e
4767
c460bdd5 4768 return mimetype2ext(getheader('Content-Type'))
05900629
PH
4769
4770
1e399778
YCH
4771def encode_data_uri(data, mime_type):
4772 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4773
4774
05900629 4775def age_restricted(content_limit, age_limit):
6ec6cb4e 4776 """ Returns True iff the content should be blocked """
05900629
PH
4777
4778 if age_limit is None: # No limit set
4779 return False
4780 if content_limit is None:
4781 return False # Content available for everyone
4782 return age_limit < content_limit
61ca9a80
PH
4783
4784
4785def is_html(first_bytes):
4786 """ Detect whether a file contains HTML by examining its first bytes. """
4787
4788 BOMS = [
4789 (b'\xef\xbb\xbf', 'utf-8'),
4790 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4791 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4792 (b'\xff\xfe', 'utf-16-le'),
4793 (b'\xfe\xff', 'utf-16-be'),
4794 ]
4795 for bom, enc in BOMS:
4796 if first_bytes.startswith(bom):
4797 s = first_bytes[len(bom):].decode(enc, 'replace')
4798 break
4799 else:
4800 s = first_bytes.decode('utf-8', 'replace')
4801
4802 return re.match(r'^\s*<', s)
a055469f
PH
4803
4804
4805def determine_protocol(info_dict):
4806 protocol = info_dict.get('protocol')
4807 if protocol is not None:
4808 return protocol
4809
7de837a5 4810 url = sanitize_url(info_dict['url'])
a055469f
PH
4811 if url.startswith('rtmp'):
4812 return 'rtmp'
4813 elif url.startswith('mms'):
4814 return 'mms'
4815 elif url.startswith('rtsp'):
4816 return 'rtsp'
4817
4818 ext = determine_ext(url)
4819 if ext == 'm3u8':
4820 return 'm3u8'
4821 elif ext == 'f4m':
4822 return 'f4m'
4823
4824 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
4825
4826
c5e3f849 4827def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
4828 """ Render a list of rows, each as a list of values.
4829 Text after a \t will be right aligned """
ec11a9f4 4830 def width(string):
c5e3f849 4831 return len(remove_terminal_sequences(string).replace('\t', ''))
76d321f6 4832
4833 def get_max_lens(table):
ec11a9f4 4834 return [max(width(str(v)) for v in col) for col in zip(*table)]
76d321f6 4835
4836 def filter_using_list(row, filterArray):
4837 return [col for (take, col) in zip(filterArray, row) if take]
4838
c5e3f849 4839 if hide_empty:
76d321f6 4840 max_lens = get_max_lens(data)
4841 header_row = filter_using_list(header_row, max_lens)
4842 data = [filter_using_list(row, max_lens) for row in data]
4843
cfb56d1a 4844 table = [header_row] + data
76d321f6 4845 max_lens = get_max_lens(table)
c5e3f849 4846 extra_gap += 1
76d321f6 4847 if delim:
c5e3f849 4848 table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
4849 table[1][-1] = table[1][-1][:-extra_gap] # Remove extra_gap from end of delimiter
ec11a9f4 4850 for row in table:
4851 for pos, text in enumerate(map(str, row)):
c5e3f849 4852 if '\t' in text:
4853 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
4854 else:
4855 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
4856 ret = '\n'.join(''.join(row).rstrip() for row in table)
ec11a9f4 4857 return ret
347de493
PH
4858
4859
8f18aca8 4860def _match_one(filter_part, dct, incomplete):
77b87f05 4861 # TODO: Generalize code with YoutubeDL._build_format_filter
a047eeb6 4862 STRING_OPERATORS = {
4863 '*=': operator.contains,
4864 '^=': lambda attr, value: attr.startswith(value),
4865 '$=': lambda attr, value: attr.endswith(value),
4866 '~=': lambda attr, value: re.search(value, attr),
4867 }
347de493 4868 COMPARISON_OPERATORS = {
a047eeb6 4869 **STRING_OPERATORS,
4870 '<=': operator.le, # "<=" must be defined above "<"
347de493 4871 '<': operator.lt,
347de493 4872 '>=': operator.ge,
a047eeb6 4873 '>': operator.gt,
347de493 4874 '=': operator.eq,
347de493 4875 }
a047eeb6 4876
347de493
PH
4877 operator_rex = re.compile(r'''(?x)\s*
4878 (?P<key>[a-z_]+)
77b87f05 4879 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
347de493 4880 (?:
a047eeb6 4881 (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
4882 (?P<strval>.+?)
347de493
PH
4883 )
4884 \s*$
4885 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4886 m = operator_rex.search(filter_part)
4887 if m:
18f96d12 4888 m = m.groupdict()
4889 unnegated_op = COMPARISON_OPERATORS[m['op']]
4890 if m['negation']:
77b87f05
MT
4891 op = lambda attr, value: not unnegated_op(attr, value)
4892 else:
4893 op = unnegated_op
18f96d12 4894 comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
4895 if m['quote']:
4896 comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
4897 actual_value = dct.get(m['key'])
4898 numeric_comparison = None
4899 if isinstance(actual_value, compat_numeric_types):
e5a088dc
S
4900 # If the original field is a string and matching comparisonvalue is
4901 # a number we should respect the origin of the original field
4902 # and process comparison value as a string (see
18f96d12 4903 # https://github.com/ytdl-org/youtube-dl/issues/11082)
347de493 4904 try:
18f96d12 4905 numeric_comparison = int(comparison_value)
347de493 4906 except ValueError:
18f96d12 4907 numeric_comparison = parse_filesize(comparison_value)
4908 if numeric_comparison is None:
4909 numeric_comparison = parse_filesize(f'{comparison_value}B')
4910 if numeric_comparison is None:
4911 numeric_comparison = parse_duration(comparison_value)
4912 if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
4913 raise ValueError('Operator %s only supports string values!' % m['op'])
347de493 4914 if actual_value is None:
18f96d12 4915 return incomplete or m['none_inclusive']
4916 return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
347de493
PH
4917
4918 UNARY_OPERATORS = {
1cc47c66
S
4919 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4920 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
347de493
PH
4921 }
4922 operator_rex = re.compile(r'''(?x)\s*
4923 (?P<op>%s)\s*(?P<key>[a-z_]+)
4924 \s*$
4925 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4926 m = operator_rex.search(filter_part)
4927 if m:
4928 op = UNARY_OPERATORS[m.group('op')]
4929 actual_value = dct.get(m.group('key'))
8f18aca8 4930 if incomplete and actual_value is None:
4931 return True
347de493
PH
4932 return op(actual_value)
4933
4934 raise ValueError('Invalid filter part %r' % filter_part)
4935
4936
8f18aca8 4937def match_str(filter_str, dct, incomplete=False):
4938 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
4939 When incomplete, all conditions passes on missing fields
4940 """
347de493 4941 return all(
8f18aca8 4942 _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
a047eeb6 4943 for filter_part in re.split(r'(?<!\\)&', filter_str))
347de493
PH
4944
4945
4946def match_filter_func(filter_str):
8f18aca8 4947 def _match_func(info_dict, *args, **kwargs):
4948 if match_str(filter_str, info_dict, *args, **kwargs):
347de493
PH
4949 return None
4950 else:
4951 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4952 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4953 return _match_func
91410c9b
PH
4954
4955
bf6427d2
YCH
4956def parse_dfxp_time_expr(time_expr):
4957 if not time_expr:
d631d5f9 4958 return
bf6427d2
YCH
4959
4960 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4961 if mobj:
4962 return float(mobj.group('time_offset'))
4963
db2fe38b 4964 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 4965 if mobj:
db2fe38b 4966 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
4967
4968
c1c924ab 4969def srt_subtitles_timecode(seconds):
aa7785f8 4970 return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
4971
4972
4973def ass_subtitles_timecode(seconds):
4974 time = timetuple_from_msec(seconds * 1000)
4975 return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
bf6427d2
YCH
4976
4977
4978def dfxp2srt(dfxp_data):
3869028f
YCH
4979 '''
4980 @param dfxp_data A bytes-like object containing DFXP data
4981 @returns A unicode object containing converted SRT data
4982 '''
5b995f71 4983 LEGACY_NAMESPACES = (
3869028f
YCH
4984 (b'http://www.w3.org/ns/ttml', [
4985 b'http://www.w3.org/2004/11/ttaf1',
4986 b'http://www.w3.org/2006/04/ttaf1',
4987 b'http://www.w3.org/2006/10/ttaf1',
5b995f71 4988 ]),
3869028f
YCH
4989 (b'http://www.w3.org/ns/ttml#styling', [
4990 b'http://www.w3.org/ns/ttml#style',
5b995f71
RA
4991 ]),
4992 )
4993
4994 SUPPORTED_STYLING = [
4995 'color',
4996 'fontFamily',
4997 'fontSize',
4998 'fontStyle',
4999 'fontWeight',
5000 'textDecoration'
5001 ]
5002
4e335771 5003 _x = functools.partial(xpath_with_ns, ns_map={
261f4730 5004 'xml': 'http://www.w3.org/XML/1998/namespace',
4e335771 5005 'ttml': 'http://www.w3.org/ns/ttml',
5b995f71 5006 'tts': 'http://www.w3.org/ns/ttml#styling',
4e335771 5007 })
bf6427d2 5008
5b995f71
RA
5009 styles = {}
5010 default_style = {}
5011
87de7069 5012 class TTMLPElementParser(object):
5b995f71
RA
5013 _out = ''
5014 _unclosed_elements = []
5015 _applied_styles = []
bf6427d2 5016
2b14cb56 5017 def start(self, tag, attrib):
5b995f71
RA
5018 if tag in (_x('ttml:br'), 'br'):
5019 self._out += '\n'
5020 else:
5021 unclosed_elements = []
5022 style = {}
5023 element_style_id = attrib.get('style')
5024 if default_style:
5025 style.update(default_style)
5026 if element_style_id:
5027 style.update(styles.get(element_style_id, {}))
5028 for prop in SUPPORTED_STYLING:
5029 prop_val = attrib.get(_x('tts:' + prop))
5030 if prop_val:
5031 style[prop] = prop_val
5032 if style:
5033 font = ''
5034 for k, v in sorted(style.items()):
5035 if self._applied_styles and self._applied_styles[-1].get(k) == v:
5036 continue
5037 if k == 'color':
5038 font += ' color="%s"' % v
5039 elif k == 'fontSize':
5040 font += ' size="%s"' % v
5041 elif k == 'fontFamily':
5042 font += ' face="%s"' % v
5043 elif k == 'fontWeight' and v == 'bold':
5044 self._out += '<b>'
5045 unclosed_elements.append('b')
5046 elif k == 'fontStyle' and v == 'italic':
5047 self._out += '<i>'
5048 unclosed_elements.append('i')
5049 elif k == 'textDecoration' and v == 'underline':
5050 self._out += '<u>'
5051 unclosed_elements.append('u')
5052 if font:
5053 self._out += '<font' + font + '>'
5054 unclosed_elements.append('font')
5055 applied_style = {}
5056 if self._applied_styles:
5057 applied_style.update(self._applied_styles[-1])
5058 applied_style.update(style)
5059 self._applied_styles.append(applied_style)
5060 self._unclosed_elements.append(unclosed_elements)
bf6427d2 5061
2b14cb56 5062 def end(self, tag):
5b995f71
RA
5063 if tag not in (_x('ttml:br'), 'br'):
5064 unclosed_elements = self._unclosed_elements.pop()
5065 for element in reversed(unclosed_elements):
5066 self._out += '</%s>' % element
5067 if unclosed_elements and self._applied_styles:
5068 self._applied_styles.pop()
bf6427d2 5069
2b14cb56 5070 def data(self, data):
5b995f71 5071 self._out += data
2b14cb56 5072
5073 def close(self):
5b995f71 5074 return self._out.strip()
2b14cb56 5075
5076 def parse_node(node):
5077 target = TTMLPElementParser()
5078 parser = xml.etree.ElementTree.XMLParser(target=target)
5079 parser.feed(xml.etree.ElementTree.tostring(node))
5080 return parser.close()
bf6427d2 5081
5b995f71
RA
5082 for k, v in LEGACY_NAMESPACES:
5083 for ns in v:
5084 dfxp_data = dfxp_data.replace(ns, k)
5085
3869028f 5086 dfxp = compat_etree_fromstring(dfxp_data)
bf6427d2 5087 out = []
5b995f71 5088 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
1b0427e6
YCH
5089
5090 if not paras:
5091 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2 5092
5b995f71
RA
5093 repeat = False
5094 while True:
5095 for style in dfxp.findall(_x('.//ttml:style')):
261f4730
RA
5096 style_id = style.get('id') or style.get(_x('xml:id'))
5097 if not style_id:
5098 continue
5b995f71
RA
5099 parent_style_id = style.get('style')
5100 if parent_style_id:
5101 if parent_style_id not in styles:
5102 repeat = True
5103 continue
5104 styles[style_id] = styles[parent_style_id].copy()
5105 for prop in SUPPORTED_STYLING:
5106 prop_val = style.get(_x('tts:' + prop))
5107 if prop_val:
5108 styles.setdefault(style_id, {})[prop] = prop_val
5109 if repeat:
5110 repeat = False
5111 else:
5112 break
5113
5114 for p in ('body', 'div'):
5115 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
5116 if ele is None:
5117 continue
5118 style = styles.get(ele.get('style'))
5119 if not style:
5120 continue
5121 default_style.update(style)
5122
bf6427d2 5123 for para, index in zip(paras, itertools.count(1)):
d631d5f9 5124 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 5125 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
5126 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
5127 if begin_time is None:
5128 continue
7dff0363 5129 if not end_time:
d631d5f9
YCH
5130 if not dur:
5131 continue
5132 end_time = begin_time + dur
bf6427d2
YCH
5133 out.append('%d\n%s --> %s\n%s\n\n' % (
5134 index,
c1c924ab
YCH
5135 srt_subtitles_timecode(begin_time),
5136 srt_subtitles_timecode(end_time),
bf6427d2
YCH
5137 parse_node(para)))
5138
5139 return ''.join(out)
5140
5141
66e289ba
S
5142def cli_option(params, command_option, param):
5143 param = params.get(param)
98e698f1
RA
5144 if param:
5145 param = compat_str(param)
66e289ba
S
5146 return [command_option, param] if param is not None else []
5147
5148
5149def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
5150 param = params.get(param)
5b232f46
S
5151 if param is None:
5152 return []
66e289ba
S
5153 assert isinstance(param, bool)
5154 if separator:
5155 return [command_option + separator + (true_value if param else false_value)]
5156 return [command_option, true_value if param else false_value]
5157
5158
5159def cli_valueless_option(params, command_option, param, expected_value=True):
5160 param = params.get(param)
5161 return [command_option] if param == expected_value else []
5162
5163
e92caff5 5164def cli_configuration_args(argdict, keys, default=[], use_compat=True):
eab9b2bc 5165 if isinstance(argdict, (list, tuple)): # for backward compatibility
e92caff5 5166 if use_compat:
5b1ecbb3 5167 return argdict
5168 else:
5169 argdict = None
eab9b2bc 5170 if argdict is None:
5b1ecbb3 5171 return default
eab9b2bc 5172 assert isinstance(argdict, dict)
5173
e92caff5 5174 assert isinstance(keys, (list, tuple))
5175 for key_list in keys:
e92caff5 5176 arg_list = list(filter(
5177 lambda x: x is not None,
6606817a 5178 [argdict.get(key.lower()) for key in variadic(key_list)]))
e92caff5 5179 if arg_list:
5180 return [arg for args in arg_list for arg in args]
5181 return default
66e289ba 5182
6251555f 5183
330690a2 5184def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
5185 main_key, exe = main_key.lower(), exe.lower()
5186 root_key = exe if main_key == exe else f'{main_key}+{exe}'
5187 keys = [f'{root_key}{k}' for k in (keys or [''])]
5188 if root_key in keys:
5189 if main_key != exe:
5190 keys.append((main_key, exe))
5191 keys.append('default')
5192 else:
5193 use_compat = False
5194 return cli_configuration_args(argdict, keys, default, use_compat)
5195
66e289ba 5196
39672624
YCH
5197class ISO639Utils(object):
5198 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
5199 _lang_map = {
5200 'aa': 'aar',
5201 'ab': 'abk',
5202 'ae': 'ave',
5203 'af': 'afr',
5204 'ak': 'aka',
5205 'am': 'amh',
5206 'an': 'arg',
5207 'ar': 'ara',
5208 'as': 'asm',
5209 'av': 'ava',
5210 'ay': 'aym',
5211 'az': 'aze',
5212 'ba': 'bak',
5213 'be': 'bel',
5214 'bg': 'bul',
5215 'bh': 'bih',
5216 'bi': 'bis',
5217 'bm': 'bam',
5218 'bn': 'ben',
5219 'bo': 'bod',
5220 'br': 'bre',
5221 'bs': 'bos',
5222 'ca': 'cat',
5223 'ce': 'che',
5224 'ch': 'cha',
5225 'co': 'cos',
5226 'cr': 'cre',
5227 'cs': 'ces',
5228 'cu': 'chu',
5229 'cv': 'chv',
5230 'cy': 'cym',
5231 'da': 'dan',
5232 'de': 'deu',
5233 'dv': 'div',
5234 'dz': 'dzo',
5235 'ee': 'ewe',
5236 'el': 'ell',
5237 'en': 'eng',
5238 'eo': 'epo',
5239 'es': 'spa',
5240 'et': 'est',
5241 'eu': 'eus',
5242 'fa': 'fas',
5243 'ff': 'ful',
5244 'fi': 'fin',
5245 'fj': 'fij',
5246 'fo': 'fao',
5247 'fr': 'fra',
5248 'fy': 'fry',
5249 'ga': 'gle',
5250 'gd': 'gla',
5251 'gl': 'glg',
5252 'gn': 'grn',
5253 'gu': 'guj',
5254 'gv': 'glv',
5255 'ha': 'hau',
5256 'he': 'heb',
b7acc835 5257 'iw': 'heb', # Replaced by he in 1989 revision
39672624
YCH
5258 'hi': 'hin',
5259 'ho': 'hmo',
5260 'hr': 'hrv',
5261 'ht': 'hat',
5262 'hu': 'hun',
5263 'hy': 'hye',
5264 'hz': 'her',
5265 'ia': 'ina',
5266 'id': 'ind',
b7acc835 5267 'in': 'ind', # Replaced by id in 1989 revision
39672624
YCH
5268 'ie': 'ile',
5269 'ig': 'ibo',
5270 'ii': 'iii',
5271 'ik': 'ipk',
5272 'io': 'ido',
5273 'is': 'isl',
5274 'it': 'ita',
5275 'iu': 'iku',
5276 'ja': 'jpn',
5277 'jv': 'jav',
5278 'ka': 'kat',
5279 'kg': 'kon',
5280 'ki': 'kik',
5281 'kj': 'kua',
5282 'kk': 'kaz',
5283 'kl': 'kal',
5284 'km': 'khm',
5285 'kn': 'kan',
5286 'ko': 'kor',
5287 'kr': 'kau',
5288 'ks': 'kas',
5289 'ku': 'kur',
5290 'kv': 'kom',
5291 'kw': 'cor',
5292 'ky': 'kir',
5293 'la': 'lat',
5294 'lb': 'ltz',
5295 'lg': 'lug',
5296 'li': 'lim',
5297 'ln': 'lin',
5298 'lo': 'lao',
5299 'lt': 'lit',
5300 'lu': 'lub',
5301 'lv': 'lav',
5302 'mg': 'mlg',
5303 'mh': 'mah',
5304 'mi': 'mri',
5305 'mk': 'mkd',
5306 'ml': 'mal',
5307 'mn': 'mon',
5308 'mr': 'mar',
5309 'ms': 'msa',
5310 'mt': 'mlt',
5311 'my': 'mya',
5312 'na': 'nau',
5313 'nb': 'nob',
5314 'nd': 'nde',
5315 'ne': 'nep',
5316 'ng': 'ndo',
5317 'nl': 'nld',
5318 'nn': 'nno',
5319 'no': 'nor',
5320 'nr': 'nbl',
5321 'nv': 'nav',
5322 'ny': 'nya',
5323 'oc': 'oci',
5324 'oj': 'oji',
5325 'om': 'orm',
5326 'or': 'ori',
5327 'os': 'oss',
5328 'pa': 'pan',
5329 'pi': 'pli',
5330 'pl': 'pol',
5331 'ps': 'pus',
5332 'pt': 'por',
5333 'qu': 'que',
5334 'rm': 'roh',
5335 'rn': 'run',
5336 'ro': 'ron',
5337 'ru': 'rus',
5338 'rw': 'kin',
5339 'sa': 'san',
5340 'sc': 'srd',
5341 'sd': 'snd',
5342 'se': 'sme',
5343 'sg': 'sag',
5344 'si': 'sin',
5345 'sk': 'slk',
5346 'sl': 'slv',
5347 'sm': 'smo',
5348 'sn': 'sna',
5349 'so': 'som',
5350 'sq': 'sqi',
5351 'sr': 'srp',
5352 'ss': 'ssw',
5353 'st': 'sot',
5354 'su': 'sun',
5355 'sv': 'swe',
5356 'sw': 'swa',
5357 'ta': 'tam',
5358 'te': 'tel',
5359 'tg': 'tgk',
5360 'th': 'tha',
5361 'ti': 'tir',
5362 'tk': 'tuk',
5363 'tl': 'tgl',
5364 'tn': 'tsn',
5365 'to': 'ton',
5366 'tr': 'tur',
5367 'ts': 'tso',
5368 'tt': 'tat',
5369 'tw': 'twi',
5370 'ty': 'tah',
5371 'ug': 'uig',
5372 'uk': 'ukr',
5373 'ur': 'urd',
5374 'uz': 'uzb',
5375 've': 'ven',
5376 'vi': 'vie',
5377 'vo': 'vol',
5378 'wa': 'wln',
5379 'wo': 'wol',
5380 'xh': 'xho',
5381 'yi': 'yid',
e9a50fba 5382 'ji': 'yid', # Replaced by yi in 1989 revision
39672624
YCH
5383 'yo': 'yor',
5384 'za': 'zha',
5385 'zh': 'zho',
5386 'zu': 'zul',
5387 }
5388
5389 @classmethod
5390 def short2long(cls, code):
5391 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5392 return cls._lang_map.get(code[:2])
5393
5394 @classmethod
5395 def long2short(cls, code):
5396 """Convert language code from ISO 639-2/T to ISO 639-1"""
5397 for short_name, long_name in cls._lang_map.items():
5398 if long_name == code:
5399 return short_name
5400
5401
4eb10f66
YCH
5402class ISO3166Utils(object):
5403 # From http://data.okfn.org/data/core/country-list
5404 _country_map = {
5405 'AF': 'Afghanistan',
5406 'AX': 'Åland Islands',
5407 'AL': 'Albania',
5408 'DZ': 'Algeria',
5409 'AS': 'American Samoa',
5410 'AD': 'Andorra',
5411 'AO': 'Angola',
5412 'AI': 'Anguilla',
5413 'AQ': 'Antarctica',
5414 'AG': 'Antigua and Barbuda',
5415 'AR': 'Argentina',
5416 'AM': 'Armenia',
5417 'AW': 'Aruba',
5418 'AU': 'Australia',
5419 'AT': 'Austria',
5420 'AZ': 'Azerbaijan',
5421 'BS': 'Bahamas',
5422 'BH': 'Bahrain',
5423 'BD': 'Bangladesh',
5424 'BB': 'Barbados',
5425 'BY': 'Belarus',
5426 'BE': 'Belgium',
5427 'BZ': 'Belize',
5428 'BJ': 'Benin',
5429 'BM': 'Bermuda',
5430 'BT': 'Bhutan',
5431 'BO': 'Bolivia, Plurinational State of',
5432 'BQ': 'Bonaire, Sint Eustatius and Saba',
5433 'BA': 'Bosnia and Herzegovina',
5434 'BW': 'Botswana',
5435 'BV': 'Bouvet Island',
5436 'BR': 'Brazil',
5437 'IO': 'British Indian Ocean Territory',
5438 'BN': 'Brunei Darussalam',
5439 'BG': 'Bulgaria',
5440 'BF': 'Burkina Faso',
5441 'BI': 'Burundi',
5442 'KH': 'Cambodia',
5443 'CM': 'Cameroon',
5444 'CA': 'Canada',
5445 'CV': 'Cape Verde',
5446 'KY': 'Cayman Islands',
5447 'CF': 'Central African Republic',
5448 'TD': 'Chad',
5449 'CL': 'Chile',
5450 'CN': 'China',
5451 'CX': 'Christmas Island',
5452 'CC': 'Cocos (Keeling) Islands',
5453 'CO': 'Colombia',
5454 'KM': 'Comoros',
5455 'CG': 'Congo',
5456 'CD': 'Congo, the Democratic Republic of the',
5457 'CK': 'Cook Islands',
5458 'CR': 'Costa Rica',
5459 'CI': 'Côte d\'Ivoire',
5460 'HR': 'Croatia',
5461 'CU': 'Cuba',
5462 'CW': 'Curaçao',
5463 'CY': 'Cyprus',
5464 'CZ': 'Czech Republic',
5465 'DK': 'Denmark',
5466 'DJ': 'Djibouti',
5467 'DM': 'Dominica',
5468 'DO': 'Dominican Republic',
5469 'EC': 'Ecuador',
5470 'EG': 'Egypt',
5471 'SV': 'El Salvador',
5472 'GQ': 'Equatorial Guinea',
5473 'ER': 'Eritrea',
5474 'EE': 'Estonia',
5475 'ET': 'Ethiopia',
5476 'FK': 'Falkland Islands (Malvinas)',
5477 'FO': 'Faroe Islands',
5478 'FJ': 'Fiji',
5479 'FI': 'Finland',
5480 'FR': 'France',
5481 'GF': 'French Guiana',
5482 'PF': 'French Polynesia',
5483 'TF': 'French Southern Territories',
5484 'GA': 'Gabon',
5485 'GM': 'Gambia',
5486 'GE': 'Georgia',
5487 'DE': 'Germany',
5488 'GH': 'Ghana',
5489 'GI': 'Gibraltar',
5490 'GR': 'Greece',
5491 'GL': 'Greenland',
5492 'GD': 'Grenada',
5493 'GP': 'Guadeloupe',
5494 'GU': 'Guam',
5495 'GT': 'Guatemala',
5496 'GG': 'Guernsey',
5497 'GN': 'Guinea',
5498 'GW': 'Guinea-Bissau',
5499 'GY': 'Guyana',
5500 'HT': 'Haiti',
5501 'HM': 'Heard Island and McDonald Islands',
5502 'VA': 'Holy See (Vatican City State)',
5503 'HN': 'Honduras',
5504 'HK': 'Hong Kong',
5505 'HU': 'Hungary',
5506 'IS': 'Iceland',
5507 'IN': 'India',
5508 'ID': 'Indonesia',
5509 'IR': 'Iran, Islamic Republic of',
5510 'IQ': 'Iraq',
5511 'IE': 'Ireland',
5512 'IM': 'Isle of Man',
5513 'IL': 'Israel',
5514 'IT': 'Italy',
5515 'JM': 'Jamaica',
5516 'JP': 'Japan',
5517 'JE': 'Jersey',
5518 'JO': 'Jordan',
5519 'KZ': 'Kazakhstan',
5520 'KE': 'Kenya',
5521 'KI': 'Kiribati',
5522 'KP': 'Korea, Democratic People\'s Republic of',
5523 'KR': 'Korea, Republic of',
5524 'KW': 'Kuwait',
5525 'KG': 'Kyrgyzstan',
5526 'LA': 'Lao People\'s Democratic Republic',
5527 'LV': 'Latvia',
5528 'LB': 'Lebanon',
5529 'LS': 'Lesotho',
5530 'LR': 'Liberia',
5531 'LY': 'Libya',
5532 'LI': 'Liechtenstein',
5533 'LT': 'Lithuania',
5534 'LU': 'Luxembourg',
5535 'MO': 'Macao',
5536 'MK': 'Macedonia, the Former Yugoslav Republic of',
5537 'MG': 'Madagascar',
5538 'MW': 'Malawi',
5539 'MY': 'Malaysia',
5540 'MV': 'Maldives',
5541 'ML': 'Mali',
5542 'MT': 'Malta',
5543 'MH': 'Marshall Islands',
5544 'MQ': 'Martinique',
5545 'MR': 'Mauritania',
5546 'MU': 'Mauritius',
5547 'YT': 'Mayotte',
5548 'MX': 'Mexico',
5549 'FM': 'Micronesia, Federated States of',
5550 'MD': 'Moldova, Republic of',
5551 'MC': 'Monaco',
5552 'MN': 'Mongolia',
5553 'ME': 'Montenegro',
5554 'MS': 'Montserrat',
5555 'MA': 'Morocco',
5556 'MZ': 'Mozambique',
5557 'MM': 'Myanmar',
5558 'NA': 'Namibia',
5559 'NR': 'Nauru',
5560 'NP': 'Nepal',
5561 'NL': 'Netherlands',
5562 'NC': 'New Caledonia',
5563 'NZ': 'New Zealand',
5564 'NI': 'Nicaragua',
5565 'NE': 'Niger',
5566 'NG': 'Nigeria',
5567 'NU': 'Niue',
5568 'NF': 'Norfolk Island',
5569 'MP': 'Northern Mariana Islands',
5570 'NO': 'Norway',
5571 'OM': 'Oman',
5572 'PK': 'Pakistan',
5573 'PW': 'Palau',
5574 'PS': 'Palestine, State of',
5575 'PA': 'Panama',
5576 'PG': 'Papua New Guinea',
5577 'PY': 'Paraguay',
5578 'PE': 'Peru',
5579 'PH': 'Philippines',
5580 'PN': 'Pitcairn',
5581 'PL': 'Poland',
5582 'PT': 'Portugal',
5583 'PR': 'Puerto Rico',
5584 'QA': 'Qatar',
5585 'RE': 'Réunion',
5586 'RO': 'Romania',
5587 'RU': 'Russian Federation',
5588 'RW': 'Rwanda',
5589 'BL': 'Saint Barthélemy',
5590 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5591 'KN': 'Saint Kitts and Nevis',
5592 'LC': 'Saint Lucia',
5593 'MF': 'Saint Martin (French part)',
5594 'PM': 'Saint Pierre and Miquelon',
5595 'VC': 'Saint Vincent and the Grenadines',
5596 'WS': 'Samoa',
5597 'SM': 'San Marino',
5598 'ST': 'Sao Tome and Principe',
5599 'SA': 'Saudi Arabia',
5600 'SN': 'Senegal',
5601 'RS': 'Serbia',
5602 'SC': 'Seychelles',
5603 'SL': 'Sierra Leone',
5604 'SG': 'Singapore',
5605 'SX': 'Sint Maarten (Dutch part)',
5606 'SK': 'Slovakia',
5607 'SI': 'Slovenia',
5608 'SB': 'Solomon Islands',
5609 'SO': 'Somalia',
5610 'ZA': 'South Africa',
5611 'GS': 'South Georgia and the South Sandwich Islands',
5612 'SS': 'South Sudan',
5613 'ES': 'Spain',
5614 'LK': 'Sri Lanka',
5615 'SD': 'Sudan',
5616 'SR': 'Suriname',
5617 'SJ': 'Svalbard and Jan Mayen',
5618 'SZ': 'Swaziland',
5619 'SE': 'Sweden',
5620 'CH': 'Switzerland',
5621 'SY': 'Syrian Arab Republic',
5622 'TW': 'Taiwan, Province of China',
5623 'TJ': 'Tajikistan',
5624 'TZ': 'Tanzania, United Republic of',
5625 'TH': 'Thailand',
5626 'TL': 'Timor-Leste',
5627 'TG': 'Togo',
5628 'TK': 'Tokelau',
5629 'TO': 'Tonga',
5630 'TT': 'Trinidad and Tobago',
5631 'TN': 'Tunisia',
5632 'TR': 'Turkey',
5633 'TM': 'Turkmenistan',
5634 'TC': 'Turks and Caicos Islands',
5635 'TV': 'Tuvalu',
5636 'UG': 'Uganda',
5637 'UA': 'Ukraine',
5638 'AE': 'United Arab Emirates',
5639 'GB': 'United Kingdom',
5640 'US': 'United States',
5641 'UM': 'United States Minor Outlying Islands',
5642 'UY': 'Uruguay',
5643 'UZ': 'Uzbekistan',
5644 'VU': 'Vanuatu',
5645 'VE': 'Venezuela, Bolivarian Republic of',
5646 'VN': 'Viet Nam',
5647 'VG': 'Virgin Islands, British',
5648 'VI': 'Virgin Islands, U.S.',
5649 'WF': 'Wallis and Futuna',
5650 'EH': 'Western Sahara',
5651 'YE': 'Yemen',
5652 'ZM': 'Zambia',
5653 'ZW': 'Zimbabwe',
5654 }
5655
5656 @classmethod
5657 def short2full(cls, code):
5658 """Convert an ISO 3166-2 country code to the corresponding full name"""
5659 return cls._country_map.get(code.upper())
5660
5661
773f291d
S
5662class GeoUtils(object):
5663 # Major IPv4 address blocks per country
5664 _country_ip_map = {
53896ca5 5665 'AD': '46.172.224.0/19',
773f291d
S
5666 'AE': '94.200.0.0/13',
5667 'AF': '149.54.0.0/17',
5668 'AG': '209.59.64.0/18',
5669 'AI': '204.14.248.0/21',
5670 'AL': '46.99.0.0/16',
5671 'AM': '46.70.0.0/15',
5672 'AO': '105.168.0.0/13',
53896ca5
S
5673 'AP': '182.50.184.0/21',
5674 'AQ': '23.154.160.0/24',
773f291d
S
5675 'AR': '181.0.0.0/12',
5676 'AS': '202.70.112.0/20',
53896ca5 5677 'AT': '77.116.0.0/14',
773f291d
S
5678 'AU': '1.128.0.0/11',
5679 'AW': '181.41.0.0/18',
53896ca5
S
5680 'AX': '185.217.4.0/22',
5681 'AZ': '5.197.0.0/16',
773f291d
S
5682 'BA': '31.176.128.0/17',
5683 'BB': '65.48.128.0/17',
5684 'BD': '114.130.0.0/16',
5685 'BE': '57.0.0.0/8',
53896ca5 5686 'BF': '102.178.0.0/15',
773f291d
S
5687 'BG': '95.42.0.0/15',
5688 'BH': '37.131.0.0/17',
5689 'BI': '154.117.192.0/18',
5690 'BJ': '137.255.0.0/16',
53896ca5 5691 'BL': '185.212.72.0/23',
773f291d
S
5692 'BM': '196.12.64.0/18',
5693 'BN': '156.31.0.0/16',
5694 'BO': '161.56.0.0/16',
5695 'BQ': '161.0.80.0/20',
53896ca5 5696 'BR': '191.128.0.0/12',
773f291d
S
5697 'BS': '24.51.64.0/18',
5698 'BT': '119.2.96.0/19',
5699 'BW': '168.167.0.0/16',
5700 'BY': '178.120.0.0/13',
5701 'BZ': '179.42.192.0/18',
5702 'CA': '99.224.0.0/11',
5703 'CD': '41.243.0.0/16',
53896ca5
S
5704 'CF': '197.242.176.0/21',
5705 'CG': '160.113.0.0/16',
773f291d 5706 'CH': '85.0.0.0/13',
53896ca5 5707 'CI': '102.136.0.0/14',
773f291d
S
5708 'CK': '202.65.32.0/19',
5709 'CL': '152.172.0.0/14',
53896ca5 5710 'CM': '102.244.0.0/14',
773f291d
S
5711 'CN': '36.128.0.0/10',
5712 'CO': '181.240.0.0/12',
5713 'CR': '201.192.0.0/12',
5714 'CU': '152.206.0.0/15',
5715 'CV': '165.90.96.0/19',
5716 'CW': '190.88.128.0/17',
53896ca5 5717 'CY': '31.153.0.0/16',
773f291d
S
5718 'CZ': '88.100.0.0/14',
5719 'DE': '53.0.0.0/8',
5720 'DJ': '197.241.0.0/17',
5721 'DK': '87.48.0.0/12',
5722 'DM': '192.243.48.0/20',
5723 'DO': '152.166.0.0/15',
5724 'DZ': '41.96.0.0/12',
5725 'EC': '186.68.0.0/15',
5726 'EE': '90.190.0.0/15',
5727 'EG': '156.160.0.0/11',
5728 'ER': '196.200.96.0/20',
5729 'ES': '88.0.0.0/11',
5730 'ET': '196.188.0.0/14',
5731 'EU': '2.16.0.0/13',
5732 'FI': '91.152.0.0/13',
5733 'FJ': '144.120.0.0/16',
53896ca5 5734 'FK': '80.73.208.0/21',
773f291d
S
5735 'FM': '119.252.112.0/20',
5736 'FO': '88.85.32.0/19',
5737 'FR': '90.0.0.0/9',
5738 'GA': '41.158.0.0/15',
5739 'GB': '25.0.0.0/8',
5740 'GD': '74.122.88.0/21',
5741 'GE': '31.146.0.0/16',
5742 'GF': '161.22.64.0/18',
5743 'GG': '62.68.160.0/19',
53896ca5
S
5744 'GH': '154.160.0.0/12',
5745 'GI': '95.164.0.0/16',
773f291d
S
5746 'GL': '88.83.0.0/19',
5747 'GM': '160.182.0.0/15',
5748 'GN': '197.149.192.0/18',
5749 'GP': '104.250.0.0/19',
5750 'GQ': '105.235.224.0/20',
5751 'GR': '94.64.0.0/13',
5752 'GT': '168.234.0.0/16',
5753 'GU': '168.123.0.0/16',
5754 'GW': '197.214.80.0/20',
5755 'GY': '181.41.64.0/18',
5756 'HK': '113.252.0.0/14',
5757 'HN': '181.210.0.0/16',
5758 'HR': '93.136.0.0/13',
5759 'HT': '148.102.128.0/17',
5760 'HU': '84.0.0.0/14',
5761 'ID': '39.192.0.0/10',
5762 'IE': '87.32.0.0/12',
5763 'IL': '79.176.0.0/13',
5764 'IM': '5.62.80.0/20',
5765 'IN': '117.192.0.0/10',
5766 'IO': '203.83.48.0/21',
5767 'IQ': '37.236.0.0/14',
5768 'IR': '2.176.0.0/12',
5769 'IS': '82.221.0.0/16',
5770 'IT': '79.0.0.0/10',
5771 'JE': '87.244.64.0/18',
5772 'JM': '72.27.0.0/17',
5773 'JO': '176.29.0.0/16',
53896ca5 5774 'JP': '133.0.0.0/8',
773f291d
S
5775 'KE': '105.48.0.0/12',
5776 'KG': '158.181.128.0/17',
5777 'KH': '36.37.128.0/17',
5778 'KI': '103.25.140.0/22',
5779 'KM': '197.255.224.0/20',
53896ca5 5780 'KN': '198.167.192.0/19',
773f291d
S
5781 'KP': '175.45.176.0/22',
5782 'KR': '175.192.0.0/10',
5783 'KW': '37.36.0.0/14',
5784 'KY': '64.96.0.0/15',
5785 'KZ': '2.72.0.0/13',
5786 'LA': '115.84.64.0/18',
5787 'LB': '178.135.0.0/16',
53896ca5 5788 'LC': '24.92.144.0/20',
773f291d
S
5789 'LI': '82.117.0.0/19',
5790 'LK': '112.134.0.0/15',
53896ca5 5791 'LR': '102.183.0.0/16',
773f291d
S
5792 'LS': '129.232.0.0/17',
5793 'LT': '78.56.0.0/13',
5794 'LU': '188.42.0.0/16',
5795 'LV': '46.109.0.0/16',
5796 'LY': '41.252.0.0/14',
5797 'MA': '105.128.0.0/11',
5798 'MC': '88.209.64.0/18',
5799 'MD': '37.246.0.0/16',
5800 'ME': '178.175.0.0/17',
5801 'MF': '74.112.232.0/21',
5802 'MG': '154.126.0.0/17',
5803 'MH': '117.103.88.0/21',
5804 'MK': '77.28.0.0/15',
5805 'ML': '154.118.128.0/18',
5806 'MM': '37.111.0.0/17',
5807 'MN': '49.0.128.0/17',
5808 'MO': '60.246.0.0/16',
5809 'MP': '202.88.64.0/20',
5810 'MQ': '109.203.224.0/19',
5811 'MR': '41.188.64.0/18',
5812 'MS': '208.90.112.0/22',
5813 'MT': '46.11.0.0/16',
5814 'MU': '105.16.0.0/12',
5815 'MV': '27.114.128.0/18',
53896ca5 5816 'MW': '102.70.0.0/15',
773f291d
S
5817 'MX': '187.192.0.0/11',
5818 'MY': '175.136.0.0/13',
5819 'MZ': '197.218.0.0/15',
5820 'NA': '41.182.0.0/16',
5821 'NC': '101.101.0.0/18',
5822 'NE': '197.214.0.0/18',
5823 'NF': '203.17.240.0/22',
5824 'NG': '105.112.0.0/12',
5825 'NI': '186.76.0.0/15',
5826 'NL': '145.96.0.0/11',
5827 'NO': '84.208.0.0/13',
5828 'NP': '36.252.0.0/15',
5829 'NR': '203.98.224.0/19',
5830 'NU': '49.156.48.0/22',
5831 'NZ': '49.224.0.0/14',
5832 'OM': '5.36.0.0/15',
5833 'PA': '186.72.0.0/15',
5834 'PE': '186.160.0.0/14',
5835 'PF': '123.50.64.0/18',
5836 'PG': '124.240.192.0/19',
5837 'PH': '49.144.0.0/13',
5838 'PK': '39.32.0.0/11',
5839 'PL': '83.0.0.0/11',
5840 'PM': '70.36.0.0/20',
5841 'PR': '66.50.0.0/16',
5842 'PS': '188.161.0.0/16',
5843 'PT': '85.240.0.0/13',
5844 'PW': '202.124.224.0/20',
5845 'PY': '181.120.0.0/14',
5846 'QA': '37.210.0.0/15',
53896ca5 5847 'RE': '102.35.0.0/16',
773f291d 5848 'RO': '79.112.0.0/13',
53896ca5 5849 'RS': '93.86.0.0/15',
773f291d 5850 'RU': '5.136.0.0/13',
53896ca5 5851 'RW': '41.186.0.0/16',
773f291d
S
5852 'SA': '188.48.0.0/13',
5853 'SB': '202.1.160.0/19',
5854 'SC': '154.192.0.0/11',
53896ca5 5855 'SD': '102.120.0.0/13',
773f291d 5856 'SE': '78.64.0.0/12',
53896ca5 5857 'SG': '8.128.0.0/10',
773f291d
S
5858 'SI': '188.196.0.0/14',
5859 'SK': '78.98.0.0/15',
53896ca5 5860 'SL': '102.143.0.0/17',
773f291d
S
5861 'SM': '89.186.32.0/19',
5862 'SN': '41.82.0.0/15',
53896ca5 5863 'SO': '154.115.192.0/18',
773f291d
S
5864 'SR': '186.179.128.0/17',
5865 'SS': '105.235.208.0/21',
5866 'ST': '197.159.160.0/19',
5867 'SV': '168.243.0.0/16',
5868 'SX': '190.102.0.0/20',
5869 'SY': '5.0.0.0/16',
5870 'SZ': '41.84.224.0/19',
5871 'TC': '65.255.48.0/20',
5872 'TD': '154.68.128.0/19',
5873 'TG': '196.168.0.0/14',
5874 'TH': '171.96.0.0/13',
5875 'TJ': '85.9.128.0/18',
5876 'TK': '27.96.24.0/21',
5877 'TL': '180.189.160.0/20',
5878 'TM': '95.85.96.0/19',
5879 'TN': '197.0.0.0/11',
5880 'TO': '175.176.144.0/21',
5881 'TR': '78.160.0.0/11',
5882 'TT': '186.44.0.0/15',
5883 'TV': '202.2.96.0/19',
5884 'TW': '120.96.0.0/11',
5885 'TZ': '156.156.0.0/14',
53896ca5
S
5886 'UA': '37.52.0.0/14',
5887 'UG': '102.80.0.0/13',
5888 'US': '6.0.0.0/8',
773f291d 5889 'UY': '167.56.0.0/13',
53896ca5 5890 'UZ': '84.54.64.0/18',
773f291d 5891 'VA': '212.77.0.0/19',
53896ca5 5892 'VC': '207.191.240.0/21',
773f291d 5893 'VE': '186.88.0.0/13',
53896ca5 5894 'VG': '66.81.192.0/20',
773f291d
S
5895 'VI': '146.226.0.0/16',
5896 'VN': '14.160.0.0/11',
5897 'VU': '202.80.32.0/20',
5898 'WF': '117.20.32.0/21',
5899 'WS': '202.4.32.0/19',
5900 'YE': '134.35.0.0/16',
5901 'YT': '41.242.116.0/22',
5902 'ZA': '41.0.0.0/11',
53896ca5
S
5903 'ZM': '102.144.0.0/13',
5904 'ZW': '102.177.192.0/18',
773f291d
S
5905 }
5906
5907 @classmethod
5f95927a
S
5908 def random_ipv4(cls, code_or_block):
5909 if len(code_or_block) == 2:
5910 block = cls._country_ip_map.get(code_or_block.upper())
5911 if not block:
5912 return None
5913 else:
5914 block = code_or_block
773f291d
S
5915 addr, preflen = block.split('/')
5916 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5917 addr_max = addr_min | (0xffffffff >> int(preflen))
18a0defa 5918 return compat_str(socket.inet_ntoa(
4248dad9 5919 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
773f291d
S
5920
5921
91410c9b 5922class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
5923 def __init__(self, proxies=None):
5924 # Set default handlers
5925 for type in ('http', 'https'):
5926 setattr(self, '%s_open' % type,
5927 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5928 meth(r, proxy, type))
38e87f6c 5929 compat_urllib_request.ProxyHandler.__init__(self, proxies)
2461f79d 5930
91410c9b 5931 def proxy_open(self, req, proxy, type):
2461f79d 5932 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
5933 if req_proxy is not None:
5934 proxy = req_proxy
2461f79d
PH
5935 del req.headers['Ytdl-request-proxy']
5936
5937 if proxy == '__noproxy__':
5938 return None # No Proxy
51fb4995 5939 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
71aff188 5940 req.add_header('Ytdl-socks-proxy', proxy)
7a5c1cfe 5941 # yt-dlp's http/https handlers do wrapping the socket with socks
71aff188 5942 return None
91410c9b
PH
5943 return compat_urllib_request.ProxyHandler.proxy_open(
5944 self, req, proxy, type)
5bc880b9
YCH
5945
5946
0a5445dd
YCH
5947# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5948# released into Public Domain
5949# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5950
5951def long_to_bytes(n, blocksize=0):
5952 """long_to_bytes(n:long, blocksize:int) : string
5953 Convert a long integer to a byte string.
5954
5955 If optional blocksize is given and greater than zero, pad the front of the
5956 byte string with binary zeros so that the length is a multiple of
5957 blocksize.
5958 """
5959 # after much testing, this algorithm was deemed to be the fastest
5960 s = b''
5961 n = int(n)
5962 while n > 0:
5963 s = compat_struct_pack('>I', n & 0xffffffff) + s
5964 n = n >> 32
5965 # strip off leading zeros
5966 for i in range(len(s)):
5967 if s[i] != b'\000'[0]:
5968 break
5969 else:
5970 # only happens when n == 0
5971 s = b'\000'
5972 i = 0
5973 s = s[i:]
5974 # add back some pad bytes. this could be done more efficiently w.r.t. the
5975 # de-padding being done above, but sigh...
5976 if blocksize > 0 and len(s) % blocksize:
5977 s = (blocksize - len(s) % blocksize) * b'\000' + s
5978 return s
5979
5980
5981def bytes_to_long(s):
5982 """bytes_to_long(string) : long
5983 Convert a byte string to a long integer.
5984
5985 This is (essentially) the inverse of long_to_bytes().
5986 """
5987 acc = 0
5988 length = len(s)
5989 if length % 4:
5990 extra = (4 - length % 4)
5991 s = b'\000' * extra + s
5992 length = length + extra
5993 for i in range(0, length, 4):
5994 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5995 return acc
5996
5997
5bc880b9
YCH
5998def ohdave_rsa_encrypt(data, exponent, modulus):
5999 '''
6000 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
6001
6002 Input:
6003 data: data to encrypt, bytes-like object
6004 exponent, modulus: parameter e and N of RSA algorithm, both integer
6005 Output: hex string of encrypted data
6006
6007 Limitation: supports one block encryption only
6008 '''
6009
6010 payload = int(binascii.hexlify(data[::-1]), 16)
6011 encrypted = pow(payload, exponent, modulus)
6012 return '%x' % encrypted
81bdc8fd
YCH
6013
6014
f48409c7
YCH
6015def pkcs1pad(data, length):
6016 """
6017 Padding input data with PKCS#1 scheme
6018
6019 @param {int[]} data input data
6020 @param {int} length target length
6021 @returns {int[]} padded data
6022 """
6023 if len(data) > length - 11:
6024 raise ValueError('Input data too long for PKCS#1 padding')
6025
6026 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
6027 return [0, 2] + pseudo_random + [0] + data
6028
6029
5eb6bdce 6030def encode_base_n(num, n, table=None):
59f898b7 6031 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
59f898b7
YCH
6032 if not table:
6033 table = FULL_TABLE[:n]
6034
5eb6bdce
YCH
6035 if n > len(table):
6036 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
6037
6038 if num == 0:
6039 return table[0]
6040
81bdc8fd
YCH
6041 ret = ''
6042 while num:
6043 ret = table[num % n] + ret
6044 num = num // n
6045 return ret
f52354a8
YCH
6046
6047
6048def decode_packed_codes(code):
06b3fe29 6049 mobj = re.search(PACKED_CODES_RE, code)
a0566bbf 6050 obfuscated_code, base, count, symbols = mobj.groups()
f52354a8
YCH
6051 base = int(base)
6052 count = int(count)
6053 symbols = symbols.split('|')
6054 symbol_table = {}
6055
6056 while count:
6057 count -= 1
5eb6bdce 6058 base_n_count = encode_base_n(count, base)
f52354a8
YCH
6059 symbol_table[base_n_count] = symbols[count] or base_n_count
6060
6061 return re.sub(
6062 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
a0566bbf 6063 obfuscated_code)
e154c651 6064
6065
1ced2221
S
6066def caesar(s, alphabet, shift):
6067 if shift == 0:
6068 return s
6069 l = len(alphabet)
6070 return ''.join(
6071 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
6072 for c in s)
6073
6074
6075def rot47(s):
6076 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
6077
6078
e154c651 6079def parse_m3u8_attributes(attrib):
6080 info = {}
6081 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
6082 if val.startswith('"'):
6083 val = val[1:-1]
6084 info[key] = val
6085 return info
1143535d
YCH
6086
6087
6088def urshift(val, n):
6089 return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038
YCH
6090
6091
6092# Based on png2str() written by @gdkchan and improved by @yokrysty
067aa17e 6093# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
d3f8e038
YCH
6094def decode_png(png_data):
6095 # Reference: https://www.w3.org/TR/PNG/
6096 header = png_data[8:]
6097
6098 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
6099 raise IOError('Not a valid PNG file.')
6100
6101 int_map = {1: '>B', 2: '>H', 4: '>I'}
6102 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
6103
6104 chunks = []
6105
6106 while header:
6107 length = unpack_integer(header[:4])
6108 header = header[4:]
6109
6110 chunk_type = header[:4]
6111 header = header[4:]
6112
6113 chunk_data = header[:length]
6114 header = header[length:]
6115
6116 header = header[4:] # Skip CRC
6117
6118 chunks.append({
6119 'type': chunk_type,
6120 'length': length,
6121 'data': chunk_data
6122 })
6123
6124 ihdr = chunks[0]['data']
6125
6126 width = unpack_integer(ihdr[:4])
6127 height = unpack_integer(ihdr[4:8])
6128
6129 idat = b''
6130
6131 for chunk in chunks:
6132 if chunk['type'] == b'IDAT':
6133 idat += chunk['data']
6134
6135 if not idat:
6136 raise IOError('Unable to read PNG data.')
6137
6138 decompressed_data = bytearray(zlib.decompress(idat))
6139
6140 stride = width * 3
6141 pixels = []
6142
6143 def _get_pixel(idx):
6144 x = idx % stride
6145 y = idx // stride
6146 return pixels[y][x]
6147
6148 for y in range(height):
6149 basePos = y * (1 + stride)
6150 filter_type = decompressed_data[basePos]
6151
6152 current_row = []
6153
6154 pixels.append(current_row)
6155
6156 for x in range(stride):
6157 color = decompressed_data[1 + basePos + x]
6158 basex = y * stride + x
6159 left = 0
6160 up = 0
6161
6162 if x > 2:
6163 left = _get_pixel(basex - 3)
6164 if y > 0:
6165 up = _get_pixel(basex - stride)
6166
6167 if filter_type == 1: # Sub
6168 color = (color + left) & 0xff
6169 elif filter_type == 2: # Up
6170 color = (color + up) & 0xff
6171 elif filter_type == 3: # Average
6172 color = (color + ((left + up) >> 1)) & 0xff
6173 elif filter_type == 4: # Paeth
6174 a = left
6175 b = up
6176 c = 0
6177
6178 if x > 2 and y > 0:
6179 c = _get_pixel(basex - stride - 3)
6180
6181 p = a + b - c
6182
6183 pa = abs(p - a)
6184 pb = abs(p - b)
6185 pc = abs(p - c)
6186
6187 if pa <= pb and pa <= pc:
6188 color = (color + a) & 0xff
6189 elif pb <= pc:
6190 color = (color + b) & 0xff
6191 else:
6192 color = (color + c) & 0xff
6193
6194 current_row.append(color)
6195
6196 return width, height, pixels
efa97bdc
YCH
6197
6198
6199def write_xattr(path, key, value):
6200 # This mess below finds the best xattr tool for the job
6201 try:
6202 # try the pyxattr module...
6203 import xattr
6204
53a7e3d2
YCH
6205 if hasattr(xattr, 'set'): # pyxattr
6206 # Unicode arguments are not supported in python-pyxattr until
6207 # version 0.5.0
067aa17e 6208 # See https://github.com/ytdl-org/youtube-dl/issues/5498
53a7e3d2
YCH
6209 pyxattr_required_version = '0.5.0'
6210 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
6211 # TODO: fallback to CLI tools
6212 raise XAttrUnavailableError(
6213 'python-pyxattr is detected but is too old. '
7a5c1cfe 6214 'yt-dlp requires %s or above while your version is %s. '
53a7e3d2
YCH
6215 'Falling back to other xattr implementations' % (
6216 pyxattr_required_version, xattr.__version__))
6217
6218 setxattr = xattr.set
6219 else: # xattr
6220 setxattr = xattr.setxattr
efa97bdc
YCH
6221
6222 try:
53a7e3d2 6223 setxattr(path, key, value)
efa97bdc
YCH
6224 except EnvironmentError as e:
6225 raise XAttrMetadataError(e.errno, e.strerror)
6226
6227 except ImportError:
6228 if compat_os_name == 'nt':
6229 # Write xattrs to NTFS Alternate Data Streams:
6230 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6231 assert ':' not in key
6232 assert os.path.exists(path)
6233
6234 ads_fn = path + ':' + key
6235 try:
6236 with open(ads_fn, 'wb') as f:
6237 f.write(value)
6238 except EnvironmentError as e:
6239 raise XAttrMetadataError(e.errno, e.strerror)
6240 else:
6241 user_has_setfattr = check_executable('setfattr', ['--version'])
6242 user_has_xattr = check_executable('xattr', ['-h'])
6243
6244 if user_has_setfattr or user_has_xattr:
6245
6246 value = value.decode('utf-8')
6247 if user_has_setfattr:
6248 executable = 'setfattr'
6249 opts = ['-n', key, '-v', value]
6250 elif user_has_xattr:
6251 executable = 'xattr'
6252 opts = ['-w', key, value]
6253
3089bc74
S
6254 cmd = ([encodeFilename(executable, True)]
6255 + [encodeArgument(o) for o in opts]
6256 + [encodeFilename(path, True)])
efa97bdc
YCH
6257
6258 try:
d3c93ec2 6259 p = Popen(
efa97bdc
YCH
6260 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6261 except EnvironmentError as e:
6262 raise XAttrMetadataError(e.errno, e.strerror)
d3c93ec2 6263 stdout, stderr = p.communicate_or_kill()
efa97bdc
YCH
6264 stderr = stderr.decode('utf-8', 'replace')
6265 if p.returncode != 0:
6266 raise XAttrMetadataError(p.returncode, stderr)
6267
6268 else:
6269 # On Unix, and can't find pyxattr, setfattr, or xattr.
6270 if sys.platform.startswith('linux'):
6271 raise XAttrUnavailableError(
6272 "Couldn't find a tool to set the xattrs. "
6273 "Install either the python 'pyxattr' or 'xattr' "
6274 "modules, or the GNU 'attr' package "
6275 "(which contains the 'setfattr' tool).")
6276 else:
6277 raise XAttrUnavailableError(
6278 "Couldn't find a tool to set the xattrs. "
6279 "Install either the python 'xattr' module, "
6280 "or the 'xattr' binary.")
0c265486
YCH
6281
6282
6283def random_birthday(year_field, month_field, day_field):
aa374bc7
AS
6284 start_date = datetime.date(1950, 1, 1)
6285 end_date = datetime.date(1995, 12, 31)
6286 offset = random.randint(0, (end_date - start_date).days)
6287 random_date = start_date + datetime.timedelta(offset)
0c265486 6288 return {
aa374bc7
AS
6289 year_field: str(random_date.year),
6290 month_field: str(random_date.month),
6291 day_field: str(random_date.day),
0c265486 6292 }
732044af 6293
c76eb41b 6294
732044af 6295# Templates for internet shortcut files, which are plain text files.
6296DOT_URL_LINK_TEMPLATE = '''
6297[InternetShortcut]
6298URL=%(url)s
6299'''.lstrip()
6300
6301DOT_WEBLOC_LINK_TEMPLATE = '''
6302<?xml version="1.0" encoding="UTF-8"?>
6303<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6304<plist version="1.0">
6305<dict>
6306\t<key>URL</key>
6307\t<string>%(url)s</string>
6308</dict>
6309</plist>
6310'''.lstrip()
6311
6312DOT_DESKTOP_LINK_TEMPLATE = '''
6313[Desktop Entry]
6314Encoding=UTF-8
6315Name=%(filename)s
6316Type=Link
6317URL=%(url)s
6318Icon=text-html
6319'''.lstrip()
6320
08438d2c 6321LINK_TEMPLATES = {
6322 'url': DOT_URL_LINK_TEMPLATE,
6323 'desktop': DOT_DESKTOP_LINK_TEMPLATE,
6324 'webloc': DOT_WEBLOC_LINK_TEMPLATE,
6325}
6326
732044af 6327
6328def iri_to_uri(iri):
6329 """
6330 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6331
6332 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6333 """
6334
6335 iri_parts = compat_urllib_parse_urlparse(iri)
6336
6337 if '[' in iri_parts.netloc:
6338 raise ValueError('IPv6 URIs are not, yet, supported.')
6339 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6340
6341 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6342
6343 net_location = ''
6344 if iri_parts.username:
6345 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6346 if iri_parts.password is not None:
6347 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6348 net_location += '@'
6349
6350 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
6351 # The 'idna' encoding produces ASCII text.
6352 if iri_parts.port is not None and iri_parts.port != 80:
6353 net_location += ':' + str(iri_parts.port)
6354
6355 return compat_urllib_parse_urlunparse(
6356 (iri_parts.scheme,
6357 net_location,
6358
6359 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6360
6361 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6362 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6363
6364 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6365 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6366
6367 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6368
6369 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6370
6371
6372def to_high_limit_path(path):
6373 if sys.platform in ['win32', 'cygwin']:
6374 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6375 return r'\\?\ '.rstrip() + os.path.abspath(path)
6376
6377 return path
76d321f6 6378
c76eb41b 6379
b868936c 6380def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
6381 if field is None:
6382 val = obj if obj is not None else default
6383 else:
6384 val = obj.get(field, default)
76d321f6 6385 if func and val not in ignore:
6386 val = func(val)
6387 return template % val if val not in ignore else default
00dd0cd5 6388
6389
6390def clean_podcast_url(url):
6391 return re.sub(r'''(?x)
6392 (?:
6393 (?:
6394 chtbl\.com/track|
6395 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6396 play\.podtrac\.com
6397 )/[^/]+|
6398 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6399 flex\.acast\.com|
6400 pd(?:
6401 cn\.co| # https://podcorn.com/analytics-prefix/
6402 st\.fm # https://podsights.com/docs/
6403 )/e
6404 )/''', '', url)
ffcb8191
THD
6405
6406
6407_HEX_TABLE = '0123456789abcdef'
6408
6409
6410def random_uuidv4():
6411 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
0202b52a 6412
6413
6414def make_dir(path, to_screen=None):
6415 try:
6416 dn = os.path.dirname(path)
6417 if dn and not os.path.exists(dn):
6418 os.makedirs(dn)
6419 return True
6420 except (OSError, IOError) as err:
6421 if callable(to_screen) is not None:
6422 to_screen('unable to create directory ' + error_to_compat_str(err))
6423 return False
f74980cb 6424
6425
6426def get_executable_path():
c552ae88 6427 from zipimport import zipimporter
6428 if hasattr(sys, 'frozen'): # Running from PyInstaller
6429 path = os.path.dirname(sys.executable)
6430 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
6431 path = os.path.join(os.path.dirname(__file__), '../..')
6432 else:
6433 path = os.path.join(os.path.dirname(__file__), '..')
f74980cb 6434 return os.path.abspath(path)
6435
6436
2f567473 6437def load_plugins(name, suffix, namespace):
3ae5e797 6438 classes = {}
f74980cb 6439 try:
019a94f7
ÁS
6440 plugins_spec = importlib.util.spec_from_file_location(
6441 name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
6442 plugins = importlib.util.module_from_spec(plugins_spec)
6443 sys.modules[plugins_spec.name] = plugins
6444 plugins_spec.loader.exec_module(plugins)
f74980cb 6445 for name in dir(plugins):
2f567473 6446 if name in namespace:
6447 continue
6448 if not name.endswith(suffix):
f74980cb 6449 continue
6450 klass = getattr(plugins, name)
3ae5e797 6451 classes[name] = namespace[name] = klass
019a94f7 6452 except FileNotFoundError:
f74980cb 6453 pass
f74980cb 6454 return classes
06167fbb 6455
6456
325ebc17 6457def traverse_obj(
352d63fd 6458 obj, *path_list, default=None, expected_type=None, get_all=True,
325ebc17 6459 casesense=True, is_user_input=False, traverse_string=False):
324ad820 6460 ''' Traverse nested list/dict/tuple
8f334380 6461 @param path_list A list of paths which are checked one by one.
6462 Each path is a list of keys where each key is a string,
2614f646 6463 a function, a tuple of strings or "...".
6464 When a fuction is given, it takes the key as argument and
6465 returns whether the key matches or not. When a tuple is given,
8f334380 6466 all the keys given in the tuple are traversed, and
6467 "..." traverses all the keys in the object
325ebc17 6468 @param default Default value to return
352d63fd 6469 @param expected_type Only accept final value of this type (Can also be any callable)
6470 @param get_all Return all the values obtained from a path or only the first one
324ad820 6471 @param casesense Whether to consider dictionary keys as case sensitive
6472 @param is_user_input Whether the keys are generated from user input. If True,
6473 strings are converted to int/slice if necessary
6474 @param traverse_string Whether to traverse inside strings. If True, any
6475 non-compatible object will also be converted into a string
8f334380 6476 # TODO: Write tests
324ad820 6477 '''
325ebc17 6478 if not casesense:
dbf5416a 6479 _lower = lambda k: (k.lower() if isinstance(k, str) else k)
8f334380 6480 path_list = (map(_lower, variadic(path)) for path in path_list)
6481
6482 def _traverse_obj(obj, path, _current_depth=0):
6483 nonlocal depth
6484 path = tuple(variadic(path))
6485 for i, key in enumerate(path):
582fad70 6486 if obj is None:
6487 return None
8f334380 6488 if isinstance(key, (list, tuple)):
6489 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
6490 key = ...
6491 if key is ...:
6492 obj = (obj.values() if isinstance(obj, dict)
6493 else obj if isinstance(obj, (list, tuple, LazyList))
6494 else str(obj) if traverse_string else [])
6495 _current_depth += 1
6496 depth = max(depth, _current_depth)
6497 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
2614f646 6498 elif callable(key):
6499 if isinstance(obj, (list, tuple, LazyList)):
6500 obj = enumerate(obj)
6501 elif isinstance(obj, dict):
6502 obj = obj.items()
6503 else:
6504 if not traverse_string:
6505 return None
6506 obj = str(obj)
6507 _current_depth += 1
6508 depth = max(depth, _current_depth)
6509 return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if key(k)]
575e17a1 6510 elif isinstance(obj, dict) and not (is_user_input and key == ':'):
325ebc17 6511 obj = (obj.get(key) if casesense or (key in obj)
6512 else next((v for k, v in obj.items() if _lower(k) == key), None))
6513 else:
6514 if is_user_input:
6515 key = (int_or_none(key) if ':' not in key
6516 else slice(*map(int_or_none, key.split(':'))))
8f334380 6517 if key == slice(None):
575e17a1 6518 return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
325ebc17 6519 if not isinstance(key, (int, slice)):
9fea350f 6520 return None
8f334380 6521 if not isinstance(obj, (list, tuple, LazyList)):
325ebc17 6522 if not traverse_string:
6523 return None
6524 obj = str(obj)
6525 try:
6526 obj = obj[key]
6527 except IndexError:
324ad820 6528 return None
325ebc17 6529 return obj
6530
352d63fd 6531 if isinstance(expected_type, type):
6532 type_test = lambda val: val if isinstance(val, expected_type) else None
6533 elif expected_type is not None:
6534 type_test = expected_type
6535 else:
6536 type_test = lambda val: val
6537
8f334380 6538 for path in path_list:
6539 depth = 0
6540 val = _traverse_obj(obj, path)
325ebc17 6541 if val is not None:
8f334380 6542 if depth:
6543 for _ in range(depth - 1):
6586bca9 6544 val = itertools.chain.from_iterable(v for v in val if v is not None)
352d63fd 6545 val = [v for v in map(type_test, val) if v is not None]
8f334380 6546 if val:
352d63fd 6547 return val if get_all else val[0]
6548 else:
6549 val = type_test(val)
6550 if val is not None:
8f334380 6551 return val
325ebc17 6552 return default
324ad820 6553
6554
ee8dd27a 6555# Deprecated
324ad820 6556def traverse_dict(dictn, keys, casesense=True):
ee8dd27a 6557 write_string('DeprecationWarning: yt_dlp.utils.traverse_dict is deprecated '
6558 'and may be removed in a future version. Use yt_dlp.utils.traverse_obj instead')
6559 return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
6606817a 6560
6561
c634ad2a 6562def variadic(x, allowed_types=(str, bytes)):
cb89cfc1 6563 return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
bd50a52b
THD
6564
6565
49fa4d9a
N
6566# create a JSON Web Signature (jws) with HS256 algorithm
6567# the resulting format is in JWS Compact Serialization
6568# implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
6569# implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
6570def jwt_encode_hs256(payload_data, key, headers={}):
6571 header_data = {
6572 'alg': 'HS256',
6573 'typ': 'JWT',
6574 }
6575 if headers:
6576 header_data.update(headers)
6577 header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8'))
6578 payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8'))
6579 h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256)
6580 signature_b64 = base64.b64encode(h.digest())
6581 token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
6582 return token
819e0531 6583
6584
16b0d7e6 6585# can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
6586def jwt_decode_hs256(jwt):
6587 header_b64, payload_b64, signature_b64 = jwt.split('.')
6588 payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
6589 return payload_data
6590
6591
819e0531 6592def supports_terminal_sequences(stream):
6593 if compat_os_name == 'nt':
d1d5c08f 6594 if get_windows_version() < (10, 0, 10586):
819e0531 6595 return False
6596 elif not os.getenv('TERM'):
6597 return False
6598 try:
6599 return stream.isatty()
6600 except BaseException:
6601 return False
6602
6603
ec11a9f4 6604_terminal_sequences_re = re.compile('\033\\[[^m]+m')
6605
6606
6607def remove_terminal_sequences(string):
6608 return _terminal_sequences_re.sub('', string)
6609
6610
6611def number_of_digits(number):
6612 return len('%d' % number)
34921b43 6613
6614
6615def join_nonempty(*values, delim='-', from_dict=None):
6616 if from_dict is not None:
c586f9e8 6617 values = map(from_dict.get, values)
34921b43 6618 return delim.join(map(str, filter(None, values)))