]> jfr.im git - yt-dlp.git/blame - yt_dlp/utils.py
Use `parse_duration` for `--wait-for-video`
[yt-dlp.git] / yt_dlp / utils.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
d77c3dfd 3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
c380cc28 10import collections
62e609ab 11import contextlib
e3946f98 12import ctypes
c496ca96
PH
13import datetime
14import email.utils
0c265486 15import email.header
f45c185f 16import errno
be4a824d 17import functools
d77c3dfd 18import gzip
49fa4d9a
N
19import hashlib
20import hmac
019a94f7 21import importlib.util
03f9daab 22import io
79a2e94e 23import itertools
f4bfd65f 24import json
d77c3dfd 25import locale
02dbf93f 26import math
347de493 27import operator
d77c3dfd 28import os
c496ca96 29import platform
773f291d 30import random
d77c3dfd 31import re
c496ca96 32import socket
79a2e94e 33import ssl
1c088fa8 34import subprocess
d77c3dfd 35import sys
181c8655 36import tempfile
c380cc28 37import time
01951dda 38import traceback
bcf89ce6 39import xml.etree.ElementTree
d77c3dfd 40import zlib
d77c3dfd 41
8c25f81b 42from .compat import (
b4a3d461 43 compat_HTMLParseError,
8bb56eee 44 compat_HTMLParser,
201c1459 45 compat_HTTPError,
8f9312c3 46 compat_basestring,
8c25f81b 47 compat_chr,
1bab3437 48 compat_cookiejar,
d7cd9a9e 49 compat_ctypes_WINFUNCTYPE,
36e6f62c 50 compat_etree_fromstring,
51098426 51 compat_expanduser,
8c25f81b 52 compat_html_entities,
55b2f099 53 compat_html_entities_html5,
be4a824d 54 compat_http_client,
42db58ec 55 compat_integer_types,
e29663c6 56 compat_numeric_types,
c86b6142 57 compat_kwargs,
efa97bdc 58 compat_os_name,
8c25f81b 59 compat_parse_qs,
702ccf2d 60 compat_shlex_quote,
8c25f81b 61 compat_str,
edaa23f8 62 compat_struct_pack,
d3f8e038 63 compat_struct_unpack,
8c25f81b
PH
64 compat_urllib_error,
65 compat_urllib_parse,
15707c7e 66 compat_urllib_parse_urlencode,
8c25f81b 67 compat_urllib_parse_urlparse,
732044af 68 compat_urllib_parse_urlunparse,
69 compat_urllib_parse_quote,
70 compat_urllib_parse_quote_plus,
7581bfc9 71 compat_urllib_parse_unquote_plus,
8c25f81b
PH
72 compat_urllib_request,
73 compat_urlparse,
810c10ba 74 compat_xpath,
8c25f81b 75)
4644ac55 76
71aff188
YCH
77from .socks import (
78 ProxyType,
79 sockssocket,
80)
81
4644ac55 82
51fb4995
YCH
83def register_socks_protocols():
84 # "Register" SOCKS protocols
d5ae6bb5
YCH
85 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
86 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
51fb4995
YCH
87 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
88 if scheme not in compat_urlparse.uses_netloc:
89 compat_urlparse.uses_netloc.append(scheme)
90
91
468e2e92
FV
92# This is not clearly defined otherwise
93compiled_regex_type = type(re.compile(''))
94
f7a147e3
S
95
96def random_user_agent():
97 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
98 _CHROME_VERSIONS = (
99 '74.0.3729.129',
100 '76.0.3780.3',
101 '76.0.3780.2',
102 '74.0.3729.128',
103 '76.0.3780.1',
104 '76.0.3780.0',
105 '75.0.3770.15',
106 '74.0.3729.127',
107 '74.0.3729.126',
108 '76.0.3779.1',
109 '76.0.3779.0',
110 '75.0.3770.14',
111 '74.0.3729.125',
112 '76.0.3778.1',
113 '76.0.3778.0',
114 '75.0.3770.13',
115 '74.0.3729.124',
116 '74.0.3729.123',
117 '73.0.3683.121',
118 '76.0.3777.1',
119 '76.0.3777.0',
120 '75.0.3770.12',
121 '74.0.3729.122',
122 '76.0.3776.4',
123 '75.0.3770.11',
124 '74.0.3729.121',
125 '76.0.3776.3',
126 '76.0.3776.2',
127 '73.0.3683.120',
128 '74.0.3729.120',
129 '74.0.3729.119',
130 '74.0.3729.118',
131 '76.0.3776.1',
132 '76.0.3776.0',
133 '76.0.3775.5',
134 '75.0.3770.10',
135 '74.0.3729.117',
136 '76.0.3775.4',
137 '76.0.3775.3',
138 '74.0.3729.116',
139 '75.0.3770.9',
140 '76.0.3775.2',
141 '76.0.3775.1',
142 '76.0.3775.0',
143 '75.0.3770.8',
144 '74.0.3729.115',
145 '74.0.3729.114',
146 '76.0.3774.1',
147 '76.0.3774.0',
148 '75.0.3770.7',
149 '74.0.3729.113',
150 '74.0.3729.112',
151 '74.0.3729.111',
152 '76.0.3773.1',
153 '76.0.3773.0',
154 '75.0.3770.6',
155 '74.0.3729.110',
156 '74.0.3729.109',
157 '76.0.3772.1',
158 '76.0.3772.0',
159 '75.0.3770.5',
160 '74.0.3729.108',
161 '74.0.3729.107',
162 '76.0.3771.1',
163 '76.0.3771.0',
164 '75.0.3770.4',
165 '74.0.3729.106',
166 '74.0.3729.105',
167 '75.0.3770.3',
168 '74.0.3729.104',
169 '74.0.3729.103',
170 '74.0.3729.102',
171 '75.0.3770.2',
172 '74.0.3729.101',
173 '75.0.3770.1',
174 '75.0.3770.0',
175 '74.0.3729.100',
176 '75.0.3769.5',
177 '75.0.3769.4',
178 '74.0.3729.99',
179 '75.0.3769.3',
180 '75.0.3769.2',
181 '75.0.3768.6',
182 '74.0.3729.98',
183 '75.0.3769.1',
184 '75.0.3769.0',
185 '74.0.3729.97',
186 '73.0.3683.119',
187 '73.0.3683.118',
188 '74.0.3729.96',
189 '75.0.3768.5',
190 '75.0.3768.4',
191 '75.0.3768.3',
192 '75.0.3768.2',
193 '74.0.3729.95',
194 '74.0.3729.94',
195 '75.0.3768.1',
196 '75.0.3768.0',
197 '74.0.3729.93',
198 '74.0.3729.92',
199 '73.0.3683.117',
200 '74.0.3729.91',
201 '75.0.3766.3',
202 '74.0.3729.90',
203 '75.0.3767.2',
204 '75.0.3767.1',
205 '75.0.3767.0',
206 '74.0.3729.89',
207 '73.0.3683.116',
208 '75.0.3766.2',
209 '74.0.3729.88',
210 '75.0.3766.1',
211 '75.0.3766.0',
212 '74.0.3729.87',
213 '73.0.3683.115',
214 '74.0.3729.86',
215 '75.0.3765.1',
216 '75.0.3765.0',
217 '74.0.3729.85',
218 '73.0.3683.114',
219 '74.0.3729.84',
220 '75.0.3764.1',
221 '75.0.3764.0',
222 '74.0.3729.83',
223 '73.0.3683.113',
224 '75.0.3763.2',
225 '75.0.3761.4',
226 '74.0.3729.82',
227 '75.0.3763.1',
228 '75.0.3763.0',
229 '74.0.3729.81',
230 '73.0.3683.112',
231 '75.0.3762.1',
232 '75.0.3762.0',
233 '74.0.3729.80',
234 '75.0.3761.3',
235 '74.0.3729.79',
236 '73.0.3683.111',
237 '75.0.3761.2',
238 '74.0.3729.78',
239 '74.0.3729.77',
240 '75.0.3761.1',
241 '75.0.3761.0',
242 '73.0.3683.110',
243 '74.0.3729.76',
244 '74.0.3729.75',
245 '75.0.3760.0',
246 '74.0.3729.74',
247 '75.0.3759.8',
248 '75.0.3759.7',
249 '75.0.3759.6',
250 '74.0.3729.73',
251 '75.0.3759.5',
252 '74.0.3729.72',
253 '73.0.3683.109',
254 '75.0.3759.4',
255 '75.0.3759.3',
256 '74.0.3729.71',
257 '75.0.3759.2',
258 '74.0.3729.70',
259 '73.0.3683.108',
260 '74.0.3729.69',
261 '75.0.3759.1',
262 '75.0.3759.0',
263 '74.0.3729.68',
264 '73.0.3683.107',
265 '74.0.3729.67',
266 '75.0.3758.1',
267 '75.0.3758.0',
268 '74.0.3729.66',
269 '73.0.3683.106',
270 '74.0.3729.65',
271 '75.0.3757.1',
272 '75.0.3757.0',
273 '74.0.3729.64',
274 '73.0.3683.105',
275 '74.0.3729.63',
276 '75.0.3756.1',
277 '75.0.3756.0',
278 '74.0.3729.62',
279 '73.0.3683.104',
280 '75.0.3755.3',
281 '75.0.3755.2',
282 '73.0.3683.103',
283 '75.0.3755.1',
284 '75.0.3755.0',
285 '74.0.3729.61',
286 '73.0.3683.102',
287 '74.0.3729.60',
288 '75.0.3754.2',
289 '74.0.3729.59',
290 '75.0.3753.4',
291 '74.0.3729.58',
292 '75.0.3754.1',
293 '75.0.3754.0',
294 '74.0.3729.57',
295 '73.0.3683.101',
296 '75.0.3753.3',
297 '75.0.3752.2',
298 '75.0.3753.2',
299 '74.0.3729.56',
300 '75.0.3753.1',
301 '75.0.3753.0',
302 '74.0.3729.55',
303 '73.0.3683.100',
304 '74.0.3729.54',
305 '75.0.3752.1',
306 '75.0.3752.0',
307 '74.0.3729.53',
308 '73.0.3683.99',
309 '74.0.3729.52',
310 '75.0.3751.1',
311 '75.0.3751.0',
312 '74.0.3729.51',
313 '73.0.3683.98',
314 '74.0.3729.50',
315 '75.0.3750.0',
316 '74.0.3729.49',
317 '74.0.3729.48',
318 '74.0.3729.47',
319 '75.0.3749.3',
320 '74.0.3729.46',
321 '73.0.3683.97',
322 '75.0.3749.2',
323 '74.0.3729.45',
324 '75.0.3749.1',
325 '75.0.3749.0',
326 '74.0.3729.44',
327 '73.0.3683.96',
328 '74.0.3729.43',
329 '74.0.3729.42',
330 '75.0.3748.1',
331 '75.0.3748.0',
332 '74.0.3729.41',
333 '75.0.3747.1',
334 '73.0.3683.95',
335 '75.0.3746.4',
336 '74.0.3729.40',
337 '74.0.3729.39',
338 '75.0.3747.0',
339 '75.0.3746.3',
340 '75.0.3746.2',
341 '74.0.3729.38',
342 '75.0.3746.1',
343 '75.0.3746.0',
344 '74.0.3729.37',
345 '73.0.3683.94',
346 '75.0.3745.5',
347 '75.0.3745.4',
348 '75.0.3745.3',
349 '75.0.3745.2',
350 '74.0.3729.36',
351 '75.0.3745.1',
352 '75.0.3745.0',
353 '75.0.3744.2',
354 '74.0.3729.35',
355 '73.0.3683.93',
356 '74.0.3729.34',
357 '75.0.3744.1',
358 '75.0.3744.0',
359 '74.0.3729.33',
360 '73.0.3683.92',
361 '74.0.3729.32',
362 '74.0.3729.31',
363 '73.0.3683.91',
364 '75.0.3741.2',
365 '75.0.3740.5',
366 '74.0.3729.30',
367 '75.0.3741.1',
368 '75.0.3741.0',
369 '74.0.3729.29',
370 '75.0.3740.4',
371 '73.0.3683.90',
372 '74.0.3729.28',
373 '75.0.3740.3',
374 '73.0.3683.89',
375 '75.0.3740.2',
376 '74.0.3729.27',
377 '75.0.3740.1',
378 '75.0.3740.0',
379 '74.0.3729.26',
380 '73.0.3683.88',
381 '73.0.3683.87',
382 '74.0.3729.25',
383 '75.0.3739.1',
384 '75.0.3739.0',
385 '73.0.3683.86',
386 '74.0.3729.24',
387 '73.0.3683.85',
388 '75.0.3738.4',
389 '75.0.3738.3',
390 '75.0.3738.2',
391 '75.0.3738.1',
392 '75.0.3738.0',
393 '74.0.3729.23',
394 '73.0.3683.84',
395 '74.0.3729.22',
396 '74.0.3729.21',
397 '75.0.3737.1',
398 '75.0.3737.0',
399 '74.0.3729.20',
400 '73.0.3683.83',
401 '74.0.3729.19',
402 '75.0.3736.1',
403 '75.0.3736.0',
404 '74.0.3729.18',
405 '73.0.3683.82',
406 '74.0.3729.17',
407 '75.0.3735.1',
408 '75.0.3735.0',
409 '74.0.3729.16',
410 '73.0.3683.81',
411 '75.0.3734.1',
412 '75.0.3734.0',
413 '74.0.3729.15',
414 '73.0.3683.80',
415 '74.0.3729.14',
416 '75.0.3733.1',
417 '75.0.3733.0',
418 '75.0.3732.1',
419 '74.0.3729.13',
420 '74.0.3729.12',
421 '73.0.3683.79',
422 '74.0.3729.11',
423 '75.0.3732.0',
424 '74.0.3729.10',
425 '73.0.3683.78',
426 '74.0.3729.9',
427 '74.0.3729.8',
428 '74.0.3729.7',
429 '75.0.3731.3',
430 '75.0.3731.2',
431 '75.0.3731.0',
432 '74.0.3729.6',
433 '73.0.3683.77',
434 '73.0.3683.76',
435 '75.0.3730.5',
436 '75.0.3730.4',
437 '73.0.3683.75',
438 '74.0.3729.5',
439 '73.0.3683.74',
440 '75.0.3730.3',
441 '75.0.3730.2',
442 '74.0.3729.4',
443 '73.0.3683.73',
444 '73.0.3683.72',
445 '75.0.3730.1',
446 '75.0.3730.0',
447 '74.0.3729.3',
448 '73.0.3683.71',
449 '74.0.3729.2',
450 '73.0.3683.70',
451 '74.0.3729.1',
452 '74.0.3729.0',
453 '74.0.3726.4',
454 '73.0.3683.69',
455 '74.0.3726.3',
456 '74.0.3728.0',
457 '74.0.3726.2',
458 '73.0.3683.68',
459 '74.0.3726.1',
460 '74.0.3726.0',
461 '74.0.3725.4',
462 '73.0.3683.67',
463 '73.0.3683.66',
464 '74.0.3725.3',
465 '74.0.3725.2',
466 '74.0.3725.1',
467 '74.0.3724.8',
468 '74.0.3725.0',
469 '73.0.3683.65',
470 '74.0.3724.7',
471 '74.0.3724.6',
472 '74.0.3724.5',
473 '74.0.3724.4',
474 '74.0.3724.3',
475 '74.0.3724.2',
476 '74.0.3724.1',
477 '74.0.3724.0',
478 '73.0.3683.64',
479 '74.0.3723.1',
480 '74.0.3723.0',
481 '73.0.3683.63',
482 '74.0.3722.1',
483 '74.0.3722.0',
484 '73.0.3683.62',
485 '74.0.3718.9',
486 '74.0.3702.3',
487 '74.0.3721.3',
488 '74.0.3721.2',
489 '74.0.3721.1',
490 '74.0.3721.0',
491 '74.0.3720.6',
492 '73.0.3683.61',
493 '72.0.3626.122',
494 '73.0.3683.60',
495 '74.0.3720.5',
496 '72.0.3626.121',
497 '74.0.3718.8',
498 '74.0.3720.4',
499 '74.0.3720.3',
500 '74.0.3718.7',
501 '74.0.3720.2',
502 '74.0.3720.1',
503 '74.0.3720.0',
504 '74.0.3718.6',
505 '74.0.3719.5',
506 '73.0.3683.59',
507 '74.0.3718.5',
508 '74.0.3718.4',
509 '74.0.3719.4',
510 '74.0.3719.3',
511 '74.0.3719.2',
512 '74.0.3719.1',
513 '73.0.3683.58',
514 '74.0.3719.0',
515 '73.0.3683.57',
516 '73.0.3683.56',
517 '74.0.3718.3',
518 '73.0.3683.55',
519 '74.0.3718.2',
520 '74.0.3718.1',
521 '74.0.3718.0',
522 '73.0.3683.54',
523 '74.0.3717.2',
524 '73.0.3683.53',
525 '74.0.3717.1',
526 '74.0.3717.0',
527 '73.0.3683.52',
528 '74.0.3716.1',
529 '74.0.3716.0',
530 '73.0.3683.51',
531 '74.0.3715.1',
532 '74.0.3715.0',
533 '73.0.3683.50',
534 '74.0.3711.2',
535 '74.0.3714.2',
536 '74.0.3713.3',
537 '74.0.3714.1',
538 '74.0.3714.0',
539 '73.0.3683.49',
540 '74.0.3713.1',
541 '74.0.3713.0',
542 '72.0.3626.120',
543 '73.0.3683.48',
544 '74.0.3712.2',
545 '74.0.3712.1',
546 '74.0.3712.0',
547 '73.0.3683.47',
548 '72.0.3626.119',
549 '73.0.3683.46',
550 '74.0.3710.2',
551 '72.0.3626.118',
552 '74.0.3711.1',
553 '74.0.3711.0',
554 '73.0.3683.45',
555 '72.0.3626.117',
556 '74.0.3710.1',
557 '74.0.3710.0',
558 '73.0.3683.44',
559 '72.0.3626.116',
560 '74.0.3709.1',
561 '74.0.3709.0',
562 '74.0.3704.9',
563 '73.0.3683.43',
564 '72.0.3626.115',
565 '74.0.3704.8',
566 '74.0.3704.7',
567 '74.0.3708.0',
568 '74.0.3706.7',
569 '74.0.3704.6',
570 '73.0.3683.42',
571 '72.0.3626.114',
572 '74.0.3706.6',
573 '72.0.3626.113',
574 '74.0.3704.5',
575 '74.0.3706.5',
576 '74.0.3706.4',
577 '74.0.3706.3',
578 '74.0.3706.2',
579 '74.0.3706.1',
580 '74.0.3706.0',
581 '73.0.3683.41',
582 '72.0.3626.112',
583 '74.0.3705.1',
584 '74.0.3705.0',
585 '73.0.3683.40',
586 '72.0.3626.111',
587 '73.0.3683.39',
588 '74.0.3704.4',
589 '73.0.3683.38',
590 '74.0.3704.3',
591 '74.0.3704.2',
592 '74.0.3704.1',
593 '74.0.3704.0',
594 '73.0.3683.37',
595 '72.0.3626.110',
596 '72.0.3626.109',
597 '74.0.3703.3',
598 '74.0.3703.2',
599 '73.0.3683.36',
600 '74.0.3703.1',
601 '74.0.3703.0',
602 '73.0.3683.35',
603 '72.0.3626.108',
604 '74.0.3702.2',
605 '74.0.3699.3',
606 '74.0.3702.1',
607 '74.0.3702.0',
608 '73.0.3683.34',
609 '72.0.3626.107',
610 '73.0.3683.33',
611 '74.0.3701.1',
612 '74.0.3701.0',
613 '73.0.3683.32',
614 '73.0.3683.31',
615 '72.0.3626.105',
616 '74.0.3700.1',
617 '74.0.3700.0',
618 '73.0.3683.29',
619 '72.0.3626.103',
620 '74.0.3699.2',
621 '74.0.3699.1',
622 '74.0.3699.0',
623 '73.0.3683.28',
624 '72.0.3626.102',
625 '73.0.3683.27',
626 '73.0.3683.26',
627 '74.0.3698.0',
628 '74.0.3696.2',
629 '72.0.3626.101',
630 '73.0.3683.25',
631 '74.0.3696.1',
632 '74.0.3696.0',
633 '74.0.3694.8',
634 '72.0.3626.100',
635 '74.0.3694.7',
636 '74.0.3694.6',
637 '74.0.3694.5',
638 '74.0.3694.4',
639 '72.0.3626.99',
640 '72.0.3626.98',
641 '74.0.3694.3',
642 '73.0.3683.24',
643 '72.0.3626.97',
644 '72.0.3626.96',
645 '72.0.3626.95',
646 '73.0.3683.23',
647 '72.0.3626.94',
648 '73.0.3683.22',
649 '73.0.3683.21',
650 '72.0.3626.93',
651 '74.0.3694.2',
652 '72.0.3626.92',
653 '74.0.3694.1',
654 '74.0.3694.0',
655 '74.0.3693.6',
656 '73.0.3683.20',
657 '72.0.3626.91',
658 '74.0.3693.5',
659 '74.0.3693.4',
660 '74.0.3693.3',
661 '74.0.3693.2',
662 '73.0.3683.19',
663 '74.0.3693.1',
664 '74.0.3693.0',
665 '73.0.3683.18',
666 '72.0.3626.90',
667 '74.0.3692.1',
668 '74.0.3692.0',
669 '73.0.3683.17',
670 '72.0.3626.89',
671 '74.0.3687.3',
672 '74.0.3691.1',
673 '74.0.3691.0',
674 '73.0.3683.16',
675 '72.0.3626.88',
676 '72.0.3626.87',
677 '73.0.3683.15',
678 '74.0.3690.1',
679 '74.0.3690.0',
680 '73.0.3683.14',
681 '72.0.3626.86',
682 '73.0.3683.13',
683 '73.0.3683.12',
684 '74.0.3689.1',
685 '74.0.3689.0',
686 '73.0.3683.11',
687 '72.0.3626.85',
688 '73.0.3683.10',
689 '72.0.3626.84',
690 '73.0.3683.9',
691 '74.0.3688.1',
692 '74.0.3688.0',
693 '73.0.3683.8',
694 '72.0.3626.83',
695 '74.0.3687.2',
696 '74.0.3687.1',
697 '74.0.3687.0',
698 '73.0.3683.7',
699 '72.0.3626.82',
700 '74.0.3686.4',
701 '72.0.3626.81',
702 '74.0.3686.3',
703 '74.0.3686.2',
704 '74.0.3686.1',
705 '74.0.3686.0',
706 '73.0.3683.6',
707 '72.0.3626.80',
708 '74.0.3685.1',
709 '74.0.3685.0',
710 '73.0.3683.5',
711 '72.0.3626.79',
712 '74.0.3684.1',
713 '74.0.3684.0',
714 '73.0.3683.4',
715 '72.0.3626.78',
716 '72.0.3626.77',
717 '73.0.3683.3',
718 '73.0.3683.2',
719 '72.0.3626.76',
720 '73.0.3683.1',
721 '73.0.3683.0',
722 '72.0.3626.75',
723 '71.0.3578.141',
724 '73.0.3682.1',
725 '73.0.3682.0',
726 '72.0.3626.74',
727 '71.0.3578.140',
728 '73.0.3681.4',
729 '73.0.3681.3',
730 '73.0.3681.2',
731 '73.0.3681.1',
732 '73.0.3681.0',
733 '72.0.3626.73',
734 '71.0.3578.139',
735 '72.0.3626.72',
736 '72.0.3626.71',
737 '73.0.3680.1',
738 '73.0.3680.0',
739 '72.0.3626.70',
740 '71.0.3578.138',
741 '73.0.3678.2',
742 '73.0.3679.1',
743 '73.0.3679.0',
744 '72.0.3626.69',
745 '71.0.3578.137',
746 '73.0.3678.1',
747 '73.0.3678.0',
748 '71.0.3578.136',
749 '73.0.3677.1',
750 '73.0.3677.0',
751 '72.0.3626.68',
752 '72.0.3626.67',
753 '71.0.3578.135',
754 '73.0.3676.1',
755 '73.0.3676.0',
756 '73.0.3674.2',
757 '72.0.3626.66',
758 '71.0.3578.134',
759 '73.0.3674.1',
760 '73.0.3674.0',
761 '72.0.3626.65',
762 '71.0.3578.133',
763 '73.0.3673.2',
764 '73.0.3673.1',
765 '73.0.3673.0',
766 '72.0.3626.64',
767 '71.0.3578.132',
768 '72.0.3626.63',
769 '72.0.3626.62',
770 '72.0.3626.61',
771 '72.0.3626.60',
772 '73.0.3672.1',
773 '73.0.3672.0',
774 '72.0.3626.59',
775 '71.0.3578.131',
776 '73.0.3671.3',
777 '73.0.3671.2',
778 '73.0.3671.1',
779 '73.0.3671.0',
780 '72.0.3626.58',
781 '71.0.3578.130',
782 '73.0.3670.1',
783 '73.0.3670.0',
784 '72.0.3626.57',
785 '71.0.3578.129',
786 '73.0.3669.1',
787 '73.0.3669.0',
788 '72.0.3626.56',
789 '71.0.3578.128',
790 '73.0.3668.2',
791 '73.0.3668.1',
792 '73.0.3668.0',
793 '72.0.3626.55',
794 '71.0.3578.127',
795 '73.0.3667.2',
796 '73.0.3667.1',
797 '73.0.3667.0',
798 '72.0.3626.54',
799 '71.0.3578.126',
800 '73.0.3666.1',
801 '73.0.3666.0',
802 '72.0.3626.53',
803 '71.0.3578.125',
804 '73.0.3665.4',
805 '73.0.3665.3',
806 '72.0.3626.52',
807 '73.0.3665.2',
808 '73.0.3664.4',
809 '73.0.3665.1',
810 '73.0.3665.0',
811 '72.0.3626.51',
812 '71.0.3578.124',
813 '72.0.3626.50',
814 '73.0.3664.3',
815 '73.0.3664.2',
816 '73.0.3664.1',
817 '73.0.3664.0',
818 '73.0.3663.2',
819 '72.0.3626.49',
820 '71.0.3578.123',
821 '73.0.3663.1',
822 '73.0.3663.0',
823 '72.0.3626.48',
824 '71.0.3578.122',
825 '73.0.3662.1',
826 '73.0.3662.0',
827 '72.0.3626.47',
828 '71.0.3578.121',
829 '73.0.3661.1',
830 '72.0.3626.46',
831 '73.0.3661.0',
832 '72.0.3626.45',
833 '71.0.3578.120',
834 '73.0.3660.2',
835 '73.0.3660.1',
836 '73.0.3660.0',
837 '72.0.3626.44',
838 '71.0.3578.119',
839 '73.0.3659.1',
840 '73.0.3659.0',
841 '72.0.3626.43',
842 '71.0.3578.118',
843 '73.0.3658.1',
844 '73.0.3658.0',
845 '72.0.3626.42',
846 '71.0.3578.117',
847 '73.0.3657.1',
848 '73.0.3657.0',
849 '72.0.3626.41',
850 '71.0.3578.116',
851 '73.0.3656.1',
852 '73.0.3656.0',
853 '72.0.3626.40',
854 '71.0.3578.115',
855 '73.0.3655.1',
856 '73.0.3655.0',
857 '72.0.3626.39',
858 '71.0.3578.114',
859 '73.0.3654.1',
860 '73.0.3654.0',
861 '72.0.3626.38',
862 '71.0.3578.113',
863 '73.0.3653.1',
864 '73.0.3653.0',
865 '72.0.3626.37',
866 '71.0.3578.112',
867 '73.0.3652.1',
868 '73.0.3652.0',
869 '72.0.3626.36',
870 '71.0.3578.111',
871 '73.0.3651.1',
872 '73.0.3651.0',
873 '72.0.3626.35',
874 '71.0.3578.110',
875 '73.0.3650.1',
876 '73.0.3650.0',
877 '72.0.3626.34',
878 '71.0.3578.109',
879 '73.0.3649.1',
880 '73.0.3649.0',
881 '72.0.3626.33',
882 '71.0.3578.108',
883 '73.0.3648.2',
884 '73.0.3648.1',
885 '73.0.3648.0',
886 '72.0.3626.32',
887 '71.0.3578.107',
888 '73.0.3647.2',
889 '73.0.3647.1',
890 '73.0.3647.0',
891 '72.0.3626.31',
892 '71.0.3578.106',
893 '73.0.3635.3',
894 '73.0.3646.2',
895 '73.0.3646.1',
896 '73.0.3646.0',
897 '72.0.3626.30',
898 '71.0.3578.105',
899 '72.0.3626.29',
900 '73.0.3645.2',
901 '73.0.3645.1',
902 '73.0.3645.0',
903 '72.0.3626.28',
904 '71.0.3578.104',
905 '72.0.3626.27',
906 '72.0.3626.26',
907 '72.0.3626.25',
908 '72.0.3626.24',
909 '73.0.3644.0',
910 '73.0.3643.2',
911 '72.0.3626.23',
912 '71.0.3578.103',
913 '73.0.3643.1',
914 '73.0.3643.0',
915 '72.0.3626.22',
916 '71.0.3578.102',
917 '73.0.3642.1',
918 '73.0.3642.0',
919 '72.0.3626.21',
920 '71.0.3578.101',
921 '73.0.3641.1',
922 '73.0.3641.0',
923 '72.0.3626.20',
924 '71.0.3578.100',
925 '72.0.3626.19',
926 '73.0.3640.1',
927 '73.0.3640.0',
928 '72.0.3626.18',
929 '73.0.3639.1',
930 '71.0.3578.99',
931 '73.0.3639.0',
932 '72.0.3626.17',
933 '73.0.3638.2',
934 '72.0.3626.16',
935 '73.0.3638.1',
936 '73.0.3638.0',
937 '72.0.3626.15',
938 '71.0.3578.98',
939 '73.0.3635.2',
940 '71.0.3578.97',
941 '73.0.3637.1',
942 '73.0.3637.0',
943 '72.0.3626.14',
944 '71.0.3578.96',
945 '71.0.3578.95',
946 '72.0.3626.13',
947 '71.0.3578.94',
948 '73.0.3636.2',
949 '71.0.3578.93',
950 '73.0.3636.1',
951 '73.0.3636.0',
952 '72.0.3626.12',
953 '71.0.3578.92',
954 '73.0.3635.1',
955 '73.0.3635.0',
956 '72.0.3626.11',
957 '71.0.3578.91',
958 '73.0.3634.2',
959 '73.0.3634.1',
960 '73.0.3634.0',
961 '72.0.3626.10',
962 '71.0.3578.90',
963 '71.0.3578.89',
964 '73.0.3633.2',
965 '73.0.3633.1',
966 '73.0.3633.0',
967 '72.0.3610.4',
968 '72.0.3626.9',
969 '71.0.3578.88',
970 '73.0.3632.5',
971 '73.0.3632.4',
972 '73.0.3632.3',
973 '73.0.3632.2',
974 '73.0.3632.1',
975 '73.0.3632.0',
976 '72.0.3626.8',
977 '71.0.3578.87',
978 '73.0.3631.2',
979 '73.0.3631.1',
980 '73.0.3631.0',
981 '72.0.3626.7',
982 '71.0.3578.86',
983 '72.0.3626.6',
984 '73.0.3630.1',
985 '73.0.3630.0',
986 '72.0.3626.5',
987 '71.0.3578.85',
988 '72.0.3626.4',
989 '73.0.3628.3',
990 '73.0.3628.2',
991 '73.0.3629.1',
992 '73.0.3629.0',
993 '72.0.3626.3',
994 '71.0.3578.84',
995 '73.0.3628.1',
996 '73.0.3628.0',
997 '71.0.3578.83',
998 '73.0.3627.1',
999 '73.0.3627.0',
1000 '72.0.3626.2',
1001 '71.0.3578.82',
1002 '71.0.3578.81',
1003 '71.0.3578.80',
1004 '72.0.3626.1',
1005 '72.0.3626.0',
1006 '71.0.3578.79',
1007 '70.0.3538.124',
1008 '71.0.3578.78',
1009 '72.0.3623.4',
1010 '72.0.3625.2',
1011 '72.0.3625.1',
1012 '72.0.3625.0',
1013 '71.0.3578.77',
1014 '70.0.3538.123',
1015 '72.0.3624.4',
1016 '72.0.3624.3',
1017 '72.0.3624.2',
1018 '71.0.3578.76',
1019 '72.0.3624.1',
1020 '72.0.3624.0',
1021 '72.0.3623.3',
1022 '71.0.3578.75',
1023 '70.0.3538.122',
1024 '71.0.3578.74',
1025 '72.0.3623.2',
1026 '72.0.3610.3',
1027 '72.0.3623.1',
1028 '72.0.3623.0',
1029 '72.0.3622.3',
1030 '72.0.3622.2',
1031 '71.0.3578.73',
1032 '70.0.3538.121',
1033 '72.0.3622.1',
1034 '72.0.3622.0',
1035 '71.0.3578.72',
1036 '70.0.3538.120',
1037 '72.0.3621.1',
1038 '72.0.3621.0',
1039 '71.0.3578.71',
1040 '70.0.3538.119',
1041 '72.0.3620.1',
1042 '72.0.3620.0',
1043 '71.0.3578.70',
1044 '70.0.3538.118',
1045 '71.0.3578.69',
1046 '72.0.3619.1',
1047 '72.0.3619.0',
1048 '71.0.3578.68',
1049 '70.0.3538.117',
1050 '71.0.3578.67',
1051 '72.0.3618.1',
1052 '72.0.3618.0',
1053 '71.0.3578.66',
1054 '70.0.3538.116',
1055 '72.0.3617.1',
1056 '72.0.3617.0',
1057 '71.0.3578.65',
1058 '70.0.3538.115',
1059 '72.0.3602.3',
1060 '71.0.3578.64',
1061 '72.0.3616.1',
1062 '72.0.3616.0',
1063 '71.0.3578.63',
1064 '70.0.3538.114',
1065 '71.0.3578.62',
1066 '72.0.3615.1',
1067 '72.0.3615.0',
1068 '71.0.3578.61',
1069 '70.0.3538.113',
1070 '72.0.3614.1',
1071 '72.0.3614.0',
1072 '71.0.3578.60',
1073 '70.0.3538.112',
1074 '72.0.3613.1',
1075 '72.0.3613.0',
1076 '71.0.3578.59',
1077 '70.0.3538.111',
1078 '72.0.3612.2',
1079 '72.0.3612.1',
1080 '72.0.3612.0',
1081 '70.0.3538.110',
1082 '71.0.3578.58',
1083 '70.0.3538.109',
1084 '72.0.3611.2',
1085 '72.0.3611.1',
1086 '72.0.3611.0',
1087 '71.0.3578.57',
1088 '70.0.3538.108',
1089 '72.0.3610.2',
1090 '71.0.3578.56',
1091 '71.0.3578.55',
1092 '72.0.3610.1',
1093 '72.0.3610.0',
1094 '71.0.3578.54',
1095 '70.0.3538.107',
1096 '71.0.3578.53',
1097 '72.0.3609.3',
1098 '71.0.3578.52',
1099 '72.0.3609.2',
1100 '71.0.3578.51',
1101 '72.0.3608.5',
1102 '72.0.3609.1',
1103 '72.0.3609.0',
1104 '71.0.3578.50',
1105 '70.0.3538.106',
1106 '72.0.3608.4',
1107 '72.0.3608.3',
1108 '72.0.3608.2',
1109 '71.0.3578.49',
1110 '72.0.3608.1',
1111 '72.0.3608.0',
1112 '70.0.3538.105',
1113 '71.0.3578.48',
1114 '72.0.3607.1',
1115 '72.0.3607.0',
1116 '71.0.3578.47',
1117 '70.0.3538.104',
1118 '72.0.3606.2',
1119 '72.0.3606.1',
1120 '72.0.3606.0',
1121 '71.0.3578.46',
1122 '70.0.3538.103',
1123 '70.0.3538.102',
1124 '72.0.3605.3',
1125 '72.0.3605.2',
1126 '72.0.3605.1',
1127 '72.0.3605.0',
1128 '71.0.3578.45',
1129 '70.0.3538.101',
1130 '71.0.3578.44',
1131 '71.0.3578.43',
1132 '70.0.3538.100',
1133 '70.0.3538.99',
1134 '71.0.3578.42',
1135 '72.0.3604.1',
1136 '72.0.3604.0',
1137 '71.0.3578.41',
1138 '70.0.3538.98',
1139 '71.0.3578.40',
1140 '72.0.3603.2',
1141 '72.0.3603.1',
1142 '72.0.3603.0',
1143 '71.0.3578.39',
1144 '70.0.3538.97',
1145 '72.0.3602.2',
1146 '71.0.3578.38',
1147 '71.0.3578.37',
1148 '72.0.3602.1',
1149 '72.0.3602.0',
1150 '71.0.3578.36',
1151 '70.0.3538.96',
1152 '72.0.3601.1',
1153 '72.0.3601.0',
1154 '71.0.3578.35',
1155 '70.0.3538.95',
1156 '72.0.3600.1',
1157 '72.0.3600.0',
1158 '71.0.3578.34',
1159 '70.0.3538.94',
1160 '72.0.3599.3',
1161 '72.0.3599.2',
1162 '72.0.3599.1',
1163 '72.0.3599.0',
1164 '71.0.3578.33',
1165 '70.0.3538.93',
1166 '72.0.3598.1',
1167 '72.0.3598.0',
1168 '71.0.3578.32',
1169 '70.0.3538.87',
1170 '72.0.3597.1',
1171 '72.0.3597.0',
1172 '72.0.3596.2',
1173 '71.0.3578.31',
1174 '70.0.3538.86',
1175 '71.0.3578.30',
1176 '71.0.3578.29',
1177 '72.0.3596.1',
1178 '72.0.3596.0',
1179 '71.0.3578.28',
1180 '70.0.3538.85',
1181 '72.0.3595.2',
1182 '72.0.3591.3',
1183 '72.0.3595.1',
1184 '72.0.3595.0',
1185 '71.0.3578.27',
1186 '70.0.3538.84',
1187 '72.0.3594.1',
1188 '72.0.3594.0',
1189 '71.0.3578.26',
1190 '70.0.3538.83',
1191 '72.0.3593.2',
1192 '72.0.3593.1',
1193 '72.0.3593.0',
1194 '71.0.3578.25',
1195 '70.0.3538.82',
1196 '72.0.3589.3',
1197 '72.0.3592.2',
1198 '72.0.3592.1',
1199 '72.0.3592.0',
1200 '71.0.3578.24',
1201 '72.0.3589.2',
1202 '70.0.3538.81',
1203 '70.0.3538.80',
1204 '72.0.3591.2',
1205 '72.0.3591.1',
1206 '72.0.3591.0',
1207 '71.0.3578.23',
1208 '70.0.3538.79',
1209 '71.0.3578.22',
1210 '72.0.3590.1',
1211 '72.0.3590.0',
1212 '71.0.3578.21',
1213 '70.0.3538.78',
1214 '70.0.3538.77',
1215 '72.0.3589.1',
1216 '72.0.3589.0',
1217 '71.0.3578.20',
1218 '70.0.3538.76',
1219 '71.0.3578.19',
1220 '70.0.3538.75',
1221 '72.0.3588.1',
1222 '72.0.3588.0',
1223 '71.0.3578.18',
1224 '70.0.3538.74',
1225 '72.0.3586.2',
1226 '72.0.3587.0',
1227 '71.0.3578.17',
1228 '70.0.3538.73',
1229 '72.0.3586.1',
1230 '72.0.3586.0',
1231 '71.0.3578.16',
1232 '70.0.3538.72',
1233 '72.0.3585.1',
1234 '72.0.3585.0',
1235 '71.0.3578.15',
1236 '70.0.3538.71',
1237 '71.0.3578.14',
1238 '72.0.3584.1',
1239 '72.0.3584.0',
1240 '71.0.3578.13',
1241 '70.0.3538.70',
1242 '72.0.3583.2',
1243 '71.0.3578.12',
1244 '72.0.3583.1',
1245 '72.0.3583.0',
1246 '71.0.3578.11',
1247 '70.0.3538.69',
1248 '71.0.3578.10',
1249 '72.0.3582.0',
1250 '72.0.3581.4',
1251 '71.0.3578.9',
1252 '70.0.3538.67',
1253 '72.0.3581.3',
1254 '72.0.3581.2',
1255 '72.0.3581.1',
1256 '72.0.3581.0',
1257 '71.0.3578.8',
1258 '70.0.3538.66',
1259 '72.0.3580.1',
1260 '72.0.3580.0',
1261 '71.0.3578.7',
1262 '70.0.3538.65',
1263 '71.0.3578.6',
1264 '72.0.3579.1',
1265 '72.0.3579.0',
1266 '71.0.3578.5',
1267 '70.0.3538.64',
1268 '71.0.3578.4',
1269 '71.0.3578.3',
1270 '71.0.3578.2',
1271 '71.0.3578.1',
1272 '71.0.3578.0',
1273 '70.0.3538.63',
1274 '69.0.3497.128',
1275 '70.0.3538.62',
1276 '70.0.3538.61',
1277 '70.0.3538.60',
1278 '70.0.3538.59',
1279 '71.0.3577.1',
1280 '71.0.3577.0',
1281 '70.0.3538.58',
1282 '69.0.3497.127',
1283 '71.0.3576.2',
1284 '71.0.3576.1',
1285 '71.0.3576.0',
1286 '70.0.3538.57',
1287 '70.0.3538.56',
1288 '71.0.3575.2',
1289 '70.0.3538.55',
1290 '69.0.3497.126',
1291 '70.0.3538.54',
1292 '71.0.3575.1',
1293 '71.0.3575.0',
1294 '71.0.3574.1',
1295 '71.0.3574.0',
1296 '70.0.3538.53',
1297 '69.0.3497.125',
1298 '70.0.3538.52',
1299 '71.0.3573.1',
1300 '71.0.3573.0',
1301 '70.0.3538.51',
1302 '69.0.3497.124',
1303 '71.0.3572.1',
1304 '71.0.3572.0',
1305 '70.0.3538.50',
1306 '69.0.3497.123',
1307 '71.0.3571.2',
1308 '70.0.3538.49',
1309 '69.0.3497.122',
1310 '71.0.3571.1',
1311 '71.0.3571.0',
1312 '70.0.3538.48',
1313 '69.0.3497.121',
1314 '71.0.3570.1',
1315 '71.0.3570.0',
1316 '70.0.3538.47',
1317 '69.0.3497.120',
1318 '71.0.3568.2',
1319 '71.0.3569.1',
1320 '71.0.3569.0',
1321 '70.0.3538.46',
1322 '69.0.3497.119',
1323 '70.0.3538.45',
1324 '71.0.3568.1',
1325 '71.0.3568.0',
1326 '70.0.3538.44',
1327 '69.0.3497.118',
1328 '70.0.3538.43',
1329 '70.0.3538.42',
1330 '71.0.3567.1',
1331 '71.0.3567.0',
1332 '70.0.3538.41',
1333 '69.0.3497.117',
1334 '71.0.3566.1',
1335 '71.0.3566.0',
1336 '70.0.3538.40',
1337 '69.0.3497.116',
1338 '71.0.3565.1',
1339 '71.0.3565.0',
1340 '70.0.3538.39',
1341 '69.0.3497.115',
1342 '71.0.3564.1',
1343 '71.0.3564.0',
1344 '70.0.3538.38',
1345 '69.0.3497.114',
1346 '71.0.3563.0',
1347 '71.0.3562.2',
1348 '70.0.3538.37',
1349 '69.0.3497.113',
1350 '70.0.3538.36',
1351 '70.0.3538.35',
1352 '71.0.3562.1',
1353 '71.0.3562.0',
1354 '70.0.3538.34',
1355 '69.0.3497.112',
1356 '70.0.3538.33',
1357 '71.0.3561.1',
1358 '71.0.3561.0',
1359 '70.0.3538.32',
1360 '69.0.3497.111',
1361 '71.0.3559.6',
1362 '71.0.3560.1',
1363 '71.0.3560.0',
1364 '71.0.3559.5',
1365 '71.0.3559.4',
1366 '70.0.3538.31',
1367 '69.0.3497.110',
1368 '71.0.3559.3',
1369 '70.0.3538.30',
1370 '69.0.3497.109',
1371 '71.0.3559.2',
1372 '71.0.3559.1',
1373 '71.0.3559.0',
1374 '70.0.3538.29',
1375 '69.0.3497.108',
1376 '71.0.3558.2',
1377 '71.0.3558.1',
1378 '71.0.3558.0',
1379 '70.0.3538.28',
1380 '69.0.3497.107',
1381 '71.0.3557.2',
1382 '71.0.3557.1',
1383 '71.0.3557.0',
1384 '70.0.3538.27',
1385 '69.0.3497.106',
1386 '71.0.3554.4',
1387 '70.0.3538.26',
1388 '71.0.3556.1',
1389 '71.0.3556.0',
1390 '70.0.3538.25',
1391 '71.0.3554.3',
1392 '69.0.3497.105',
1393 '71.0.3554.2',
1394 '70.0.3538.24',
1395 '69.0.3497.104',
1396 '71.0.3555.2',
1397 '70.0.3538.23',
1398 '71.0.3555.1',
1399 '71.0.3555.0',
1400 '70.0.3538.22',
1401 '69.0.3497.103',
1402 '71.0.3554.1',
1403 '71.0.3554.0',
1404 '70.0.3538.21',
1405 '69.0.3497.102',
1406 '71.0.3553.3',
1407 '70.0.3538.20',
1408 '69.0.3497.101',
1409 '71.0.3553.2',
1410 '69.0.3497.100',
1411 '71.0.3553.1',
1412 '71.0.3553.0',
1413 '70.0.3538.19',
1414 '69.0.3497.99',
1415 '69.0.3497.98',
1416 '69.0.3497.97',
1417 '71.0.3552.6',
1418 '71.0.3552.5',
1419 '71.0.3552.4',
1420 '71.0.3552.3',
1421 '71.0.3552.2',
1422 '71.0.3552.1',
1423 '71.0.3552.0',
1424 '70.0.3538.18',
1425 '69.0.3497.96',
1426 '71.0.3551.3',
1427 '71.0.3551.2',
1428 '71.0.3551.1',
1429 '71.0.3551.0',
1430 '70.0.3538.17',
1431 '69.0.3497.95',
1432 '71.0.3550.3',
1433 '71.0.3550.2',
1434 '71.0.3550.1',
1435 '71.0.3550.0',
1436 '70.0.3538.16',
1437 '69.0.3497.94',
1438 '71.0.3549.1',
1439 '71.0.3549.0',
1440 '70.0.3538.15',
1441 '69.0.3497.93',
1442 '69.0.3497.92',
1443 '71.0.3548.1',
1444 '71.0.3548.0',
1445 '70.0.3538.14',
1446 '69.0.3497.91',
1447 '71.0.3547.1',
1448 '71.0.3547.0',
1449 '70.0.3538.13',
1450 '69.0.3497.90',
1451 '71.0.3546.2',
1452 '69.0.3497.89',
1453 '71.0.3546.1',
1454 '71.0.3546.0',
1455 '70.0.3538.12',
1456 '69.0.3497.88',
1457 '71.0.3545.4',
1458 '71.0.3545.3',
1459 '71.0.3545.2',
1460 '71.0.3545.1',
1461 '71.0.3545.0',
1462 '70.0.3538.11',
1463 '69.0.3497.87',
1464 '71.0.3544.5',
1465 '71.0.3544.4',
1466 '71.0.3544.3',
1467 '71.0.3544.2',
1468 '71.0.3544.1',
1469 '71.0.3544.0',
1470 '69.0.3497.86',
1471 '70.0.3538.10',
1472 '69.0.3497.85',
1473 '70.0.3538.9',
1474 '69.0.3497.84',
1475 '71.0.3543.4',
1476 '70.0.3538.8',
1477 '71.0.3543.3',
1478 '71.0.3543.2',
1479 '71.0.3543.1',
1480 '71.0.3543.0',
1481 '70.0.3538.7',
1482 '69.0.3497.83',
1483 '71.0.3542.2',
1484 '71.0.3542.1',
1485 '71.0.3542.0',
1486 '70.0.3538.6',
1487 '69.0.3497.82',
1488 '69.0.3497.81',
1489 '71.0.3541.1',
1490 '71.0.3541.0',
1491 '70.0.3538.5',
1492 '69.0.3497.80',
1493 '71.0.3540.1',
1494 '71.0.3540.0',
1495 '70.0.3538.4',
1496 '69.0.3497.79',
1497 '70.0.3538.3',
1498 '71.0.3539.1',
1499 '71.0.3539.0',
1500 '69.0.3497.78',
1501 '68.0.3440.134',
1502 '69.0.3497.77',
1503 '70.0.3538.2',
1504 '70.0.3538.1',
1505 '70.0.3538.0',
1506 '69.0.3497.76',
1507 '68.0.3440.133',
1508 '69.0.3497.75',
1509 '70.0.3537.2',
1510 '70.0.3537.1',
1511 '70.0.3537.0',
1512 '69.0.3497.74',
1513 '68.0.3440.132',
1514 '70.0.3536.0',
1515 '70.0.3535.5',
1516 '70.0.3535.4',
1517 '70.0.3535.3',
1518 '69.0.3497.73',
1519 '68.0.3440.131',
1520 '70.0.3532.8',
1521 '70.0.3532.7',
1522 '69.0.3497.72',
1523 '69.0.3497.71',
1524 '70.0.3535.2',
1525 '70.0.3535.1',
1526 '70.0.3535.0',
1527 '69.0.3497.70',
1528 '68.0.3440.130',
1529 '69.0.3497.69',
1530 '68.0.3440.129',
1531 '70.0.3534.4',
1532 '70.0.3534.3',
1533 '70.0.3534.2',
1534 '70.0.3534.1',
1535 '70.0.3534.0',
1536 '69.0.3497.68',
1537 '68.0.3440.128',
1538 '70.0.3533.2',
1539 '70.0.3533.1',
1540 '70.0.3533.0',
1541 '69.0.3497.67',
1542 '68.0.3440.127',
1543 '70.0.3532.6',
1544 '70.0.3532.5',
1545 '70.0.3532.4',
1546 '69.0.3497.66',
1547 '68.0.3440.126',
1548 '70.0.3532.3',
1549 '70.0.3532.2',
1550 '70.0.3532.1',
1551 '69.0.3497.60',
1552 '69.0.3497.65',
1553 '69.0.3497.64',
1554 '70.0.3532.0',
1555 '70.0.3531.0',
1556 '70.0.3530.4',
1557 '70.0.3530.3',
1558 '70.0.3530.2',
1559 '69.0.3497.58',
1560 '68.0.3440.125',
1561 '69.0.3497.57',
1562 '69.0.3497.56',
1563 '69.0.3497.55',
1564 '69.0.3497.54',
1565 '70.0.3530.1',
1566 '70.0.3530.0',
1567 '69.0.3497.53',
1568 '68.0.3440.124',
1569 '69.0.3497.52',
1570 '70.0.3529.3',
1571 '70.0.3529.2',
1572 '70.0.3529.1',
1573 '70.0.3529.0',
1574 '69.0.3497.51',
1575 '70.0.3528.4',
1576 '68.0.3440.123',
1577 '70.0.3528.3',
1578 '70.0.3528.2',
1579 '70.0.3528.1',
1580 '70.0.3528.0',
1581 '69.0.3497.50',
1582 '68.0.3440.122',
1583 '70.0.3527.1',
1584 '70.0.3527.0',
1585 '69.0.3497.49',
1586 '68.0.3440.121',
1587 '70.0.3526.1',
1588 '70.0.3526.0',
1589 '68.0.3440.120',
1590 '69.0.3497.48',
1591 '69.0.3497.47',
1592 '68.0.3440.119',
1593 '68.0.3440.118',
1594 '70.0.3525.5',
1595 '70.0.3525.4',
1596 '70.0.3525.3',
1597 '68.0.3440.117',
1598 '69.0.3497.46',
1599 '70.0.3525.2',
1600 '70.0.3525.1',
1601 '70.0.3525.0',
1602 '69.0.3497.45',
1603 '68.0.3440.116',
1604 '70.0.3524.4',
1605 '70.0.3524.3',
1606 '69.0.3497.44',
1607 '70.0.3524.2',
1608 '70.0.3524.1',
1609 '70.0.3524.0',
1610 '70.0.3523.2',
1611 '69.0.3497.43',
1612 '68.0.3440.115',
1613 '70.0.3505.9',
1614 '69.0.3497.42',
1615 '70.0.3505.8',
1616 '70.0.3523.1',
1617 '70.0.3523.0',
1618 '69.0.3497.41',
1619 '68.0.3440.114',
1620 '70.0.3505.7',
1621 '69.0.3497.40',
1622 '70.0.3522.1',
1623 '70.0.3522.0',
1624 '70.0.3521.2',
1625 '69.0.3497.39',
1626 '68.0.3440.113',
1627 '70.0.3505.6',
1628 '70.0.3521.1',
1629 '70.0.3521.0',
1630 '69.0.3497.38',
1631 '68.0.3440.112',
1632 '70.0.3520.1',
1633 '70.0.3520.0',
1634 '69.0.3497.37',
1635 '68.0.3440.111',
1636 '70.0.3519.3',
1637 '70.0.3519.2',
1638 '70.0.3519.1',
1639 '70.0.3519.0',
1640 '69.0.3497.36',
1641 '68.0.3440.110',
1642 '70.0.3518.1',
1643 '70.0.3518.0',
1644 '69.0.3497.35',
1645 '69.0.3497.34',
1646 '68.0.3440.109',
1647 '70.0.3517.1',
1648 '70.0.3517.0',
1649 '69.0.3497.33',
1650 '68.0.3440.108',
1651 '69.0.3497.32',
1652 '70.0.3516.3',
1653 '70.0.3516.2',
1654 '70.0.3516.1',
1655 '70.0.3516.0',
1656 '69.0.3497.31',
1657 '68.0.3440.107',
1658 '70.0.3515.4',
1659 '68.0.3440.106',
1660 '70.0.3515.3',
1661 '70.0.3515.2',
1662 '70.0.3515.1',
1663 '70.0.3515.0',
1664 '69.0.3497.30',
1665 '68.0.3440.105',
1666 '68.0.3440.104',
1667 '70.0.3514.2',
1668 '70.0.3514.1',
1669 '70.0.3514.0',
1670 '69.0.3497.29',
1671 '68.0.3440.103',
1672 '70.0.3513.1',
1673 '70.0.3513.0',
1674 '69.0.3497.28',
1675 )
1676 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1677
1678
3e669f36 1679std_headers = {
f7a147e3 1680 'User-Agent': random_user_agent(),
59ae15a5
PH
1681 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1682 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1683 'Accept-Encoding': 'gzip, deflate',
1684 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 1685}
f427df17 1686
5f6a1245 1687
fb37eb25
S
1688USER_AGENTS = {
1689 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1690}
1691
1692
bf42a990
S
1693NO_DEFAULT = object()
1694
7105440c
YCH
1695ENGLISH_MONTH_NAMES = [
1696 'January', 'February', 'March', 'April', 'May', 'June',
1697 'July', 'August', 'September', 'October', 'November', 'December']
1698
f6717dec
S
1699MONTH_NAMES = {
1700 'en': ENGLISH_MONTH_NAMES,
1701 'fr': [
3e4185c3
S
1702 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1703 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
f6717dec 1704}
a942d6cb 1705
a7aaa398
S
1706KNOWN_EXTENSIONS = (
1707 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1708 'flv', 'f4v', 'f4a', 'f4b',
1709 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1710 'mkv', 'mka', 'mk3d',
1711 'avi', 'divx',
1712 'mov',
1713 'asf', 'wmv', 'wma',
1714 '3gp', '3g2',
1715 'mp3',
1716 'flac',
1717 'ape',
1718 'wav',
1719 'f4f', 'f4m', 'm3u8', 'smil')
1720
c587cbb7 1721# needed for sanitizing filenames in restricted mode
c8827027 1722ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
fd35d8cd
JW
1723 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1724 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
c587cbb7 1725
46f59e89
S
1726DATE_FORMATS = (
1727 '%d %B %Y',
1728 '%d %b %Y',
1729 '%B %d %Y',
cb655f34
S
1730 '%B %dst %Y',
1731 '%B %dnd %Y',
9d30c213 1732 '%B %drd %Y',
cb655f34 1733 '%B %dth %Y',
46f59e89 1734 '%b %d %Y',
cb655f34
S
1735 '%b %dst %Y',
1736 '%b %dnd %Y',
9d30c213 1737 '%b %drd %Y',
cb655f34 1738 '%b %dth %Y',
46f59e89
S
1739 '%b %dst %Y %I:%M',
1740 '%b %dnd %Y %I:%M',
9d30c213 1741 '%b %drd %Y %I:%M',
46f59e89
S
1742 '%b %dth %Y %I:%M',
1743 '%Y %m %d',
1744 '%Y-%m-%d',
bccdbd22 1745 '%Y.%m.%d.',
46f59e89 1746 '%Y/%m/%d',
81c13222 1747 '%Y/%m/%d %H:%M',
46f59e89 1748 '%Y/%m/%d %H:%M:%S',
1931a55e
THD
1749 '%Y%m%d%H%M',
1750 '%Y%m%d%H%M%S',
0c1c6f4b 1751 '%Y-%m-%d %H:%M',
46f59e89
S
1752 '%Y-%m-%d %H:%M:%S',
1753 '%Y-%m-%d %H:%M:%S.%f',
5014558a 1754 '%Y-%m-%d %H:%M:%S:%f',
46f59e89
S
1755 '%d.%m.%Y %H:%M',
1756 '%d.%m.%Y %H.%M',
1757 '%Y-%m-%dT%H:%M:%SZ',
1758 '%Y-%m-%dT%H:%M:%S.%fZ',
1759 '%Y-%m-%dT%H:%M:%S.%f0Z',
1760 '%Y-%m-%dT%H:%M:%S',
1761 '%Y-%m-%dT%H:%M:%S.%f',
1762 '%Y-%m-%dT%H:%M',
c6eed6b8
S
1763 '%b %d %Y at %H:%M',
1764 '%b %d %Y at %H:%M:%S',
b555ae9b
S
1765 '%B %d %Y at %H:%M',
1766 '%B %d %Y at %H:%M:%S',
a63d9bd0 1767 '%H:%M %d-%b-%Y',
46f59e89
S
1768)
1769
1770DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1771DATE_FORMATS_DAY_FIRST.extend([
1772 '%d-%m-%Y',
1773 '%d.%m.%Y',
1774 '%d.%m.%y',
1775 '%d/%m/%Y',
1776 '%d/%m/%y',
1777 '%d/%m/%Y %H:%M:%S',
1778])
1779
1780DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1781DATE_FORMATS_MONTH_FIRST.extend([
1782 '%m-%d-%Y',
1783 '%m.%d.%Y',
1784 '%m/%d/%Y',
1785 '%m/%d/%y',
1786 '%m/%d/%Y %H:%M:%S',
1787])
1788
06b3fe29 1789PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
22f5f5c6 1790JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
06b3fe29 1791
7105440c 1792
d77c3dfd 1793def preferredencoding():
59ae15a5 1794 """Get preferred encoding.
d77c3dfd 1795
59ae15a5
PH
1796 Returns the best encoding scheme for the system, based on
1797 locale.getpreferredencoding() and some further tweaks.
1798 """
1799 try:
1800 pref = locale.getpreferredencoding()
28e614de 1801 'TEST'.encode(pref)
70a1165b 1802 except Exception:
59ae15a5 1803 pref = 'UTF-8'
bae611f2 1804
59ae15a5 1805 return pref
d77c3dfd 1806
f4bfd65f 1807
181c8655 1808def write_json_file(obj, fn):
1394646a 1809 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 1810
92120217 1811 fn = encodeFilename(fn)
61ee5aeb 1812 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
1813 encoding = get_filesystem_encoding()
1814 # os.path.basename returns a bytes object, but NamedTemporaryFile
1815 # will fail if the filename contains non ascii characters unless we
1816 # use a unicode object
1817 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1818 # the same for os.path.dirname
1819 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1820 else:
1821 path_basename = os.path.basename
1822 path_dirname = os.path.dirname
1823
73159f99
S
1824 args = {
1825 'suffix': '.tmp',
ec5f6016
JMF
1826 'prefix': path_basename(fn) + '.',
1827 'dir': path_dirname(fn),
73159f99
S
1828 'delete': False,
1829 }
1830
181c8655
PH
1831 # In Python 2.x, json.dump expects a bytestream.
1832 # In Python 3.x, it writes to a character stream
1833 if sys.version_info < (3, 0):
73159f99 1834 args['mode'] = 'wb'
181c8655 1835 else:
73159f99
S
1836 args.update({
1837 'mode': 'w',
1838 'encoding': 'utf-8',
1839 })
1840
c86b6142 1841 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
1842
1843 try:
1844 with tf:
6e84b215 1845 json.dump(obj, tf)
1394646a
IK
1846 if sys.platform == 'win32':
1847 # Need to remove existing file on Windows, else os.rename raises
1848 # WindowsError or FileExistsError.
1849 try:
1850 os.unlink(fn)
1851 except OSError:
1852 pass
9cd5f54e
R
1853 try:
1854 mask = os.umask(0)
1855 os.umask(mask)
1856 os.chmod(tf.name, 0o666 & ~mask)
1857 except OSError:
1858 pass
181c8655 1859 os.rename(tf.name, fn)
70a1165b 1860 except Exception:
181c8655
PH
1861 try:
1862 os.remove(tf.name)
1863 except OSError:
1864 pass
1865 raise
1866
1867
1868if sys.version_info >= (2, 7):
ee114368 1869 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 1870 """ Find the xpath xpath[@key=val] """
5d2354f1 1871 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368 1872 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
1873 return node.find(expr)
1874else:
ee114368 1875 def find_xpath_attr(node, xpath, key, val=None):
810c10ba 1876 for f in node.findall(compat_xpath(xpath)):
ee114368
S
1877 if key not in f.attrib:
1878 continue
1879 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
1880 return f
1881 return None
1882
d7e66d39
JMF
1883# On python2.6 the xml.etree.ElementTree.Element methods don't support
1884# the namespace parameter
5f6a1245
JW
1885
1886
d7e66d39
JMF
1887def xpath_with_ns(path, ns_map):
1888 components = [c.split(':') for c in path.split('/')]
1889 replaced = []
1890 for c in components:
1891 if len(c) == 1:
1892 replaced.append(c[0])
1893 else:
1894 ns, tag = c
1895 replaced.append('{%s}%s' % (ns_map[ns], tag))
1896 return '/'.join(replaced)
1897
d77c3dfd 1898
a41fb80c 1899def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745 1900 def _find_xpath(xpath):
810c10ba 1901 return node.find(compat_xpath(xpath))
578c0745
S
1902
1903 if isinstance(xpath, (str, compat_str)):
1904 n = _find_xpath(xpath)
1905 else:
1906 for xp in xpath:
1907 n = _find_xpath(xp)
1908 if n is not None:
1909 break
d74bebd5 1910
8e636da4 1911 if n is None:
bf42a990
S
1912 if default is not NO_DEFAULT:
1913 return default
1914 elif fatal:
bf0ff932
PH
1915 name = xpath if name is None else name
1916 raise ExtractorError('Could not find XML element %s' % name)
1917 else:
1918 return None
a41fb80c
S
1919 return n
1920
1921
1922def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
1923 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1924 if n is None or n == default:
1925 return n
1926 if n.text is None:
1927 if default is not NO_DEFAULT:
1928 return default
1929 elif fatal:
1930 name = xpath if name is None else name
1931 raise ExtractorError('Could not find XML element\'s text %s' % name)
1932 else:
1933 return None
1934 return n.text
a41fb80c
S
1935
1936
1937def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1938 n = find_xpath_attr(node, xpath, key)
1939 if n is None:
1940 if default is not NO_DEFAULT:
1941 return default
1942 elif fatal:
1943 name = '%s[@%s]' % (xpath, key) if name is None else name
1944 raise ExtractorError('Could not find XML attribute %s' % name)
1945 else:
1946 return None
1947 return n.attrib[key]
bf0ff932
PH
1948
1949
9e6dd238 1950def get_element_by_id(id, html):
43e8fafd 1951 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 1952 return get_element_by_attribute('id', id, html)
43e8fafd 1953
12ea2f30 1954
84c237fb 1955def get_element_by_class(class_name, html):
2af12ad9
TC
1956 """Return the content of the first tag with the specified class in the passed HTML document"""
1957 retval = get_elements_by_class(class_name, html)
1958 return retval[0] if retval else None
1959
1960
1961def get_element_by_attribute(attribute, value, html, escape_value=True):
1962 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1963 return retval[0] if retval else None
1964
1965
1966def get_elements_by_class(class_name, html):
1967 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1968 return get_elements_by_attribute(
84c237fb
YCH
1969 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1970 html, escape_value=False)
1971
1972
2af12ad9 1973def get_elements_by_attribute(attribute, value, html, escape_value=True):
43e8fafd 1974 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 1975
84c237fb
YCH
1976 value = re.escape(value) if escape_value else value
1977
2af12ad9
TC
1978 retlist = []
1979 for m in re.finditer(r'''(?xs)
38285056 1980 <([a-zA-Z0-9:._-]+)
609ff8ca 1981 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056 1982 \s+%s=['"]?%s['"]?
609ff8ca 1983 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056
PH
1984 \s*>
1985 (?P<content>.*?)
1986 </\1>
2af12ad9
TC
1987 ''' % (re.escape(attribute), value), html):
1988 res = m.group('content')
38285056 1989
2af12ad9
TC
1990 if res.startswith('"') or res.startswith("'"):
1991 res = res[1:-1]
38285056 1992
2af12ad9 1993 retlist.append(unescapeHTML(res))
a921f407 1994
2af12ad9 1995 return retlist
a921f407 1996
c5229f39 1997
8bb56eee
BF
1998class HTMLAttributeParser(compat_HTMLParser):
1999 """Trivial HTML parser to gather the attributes for a single element"""
b6e0c7d2 2000
8bb56eee 2001 def __init__(self):
c5229f39 2002 self.attrs = {}
8bb56eee
BF
2003 compat_HTMLParser.__init__(self)
2004
2005 def handle_starttag(self, tag, attrs):
2006 self.attrs = dict(attrs)
2007
c5229f39 2008
73673ccf
FF
2009class HTMLListAttrsParser(compat_HTMLParser):
2010 """HTML parser to gather the attributes for the elements of a list"""
2011
2012 def __init__(self):
2013 compat_HTMLParser.__init__(self)
2014 self.items = []
2015 self._level = 0
2016
2017 def handle_starttag(self, tag, attrs):
2018 if tag == 'li' and self._level == 0:
2019 self.items.append(dict(attrs))
2020 self._level += 1
2021
2022 def handle_endtag(self, tag):
2023 self._level -= 1
2024
2025
8bb56eee
BF
2026def extract_attributes(html_element):
2027 """Given a string for an HTML element such as
2028 <el
2029 a="foo" B="bar" c="&98;az" d=boz
2030 empty= noval entity="&amp;"
2031 sq='"' dq="'"
2032 >
2033 Decode and return a dictionary of attributes.
2034 {
2035 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2036 'empty': '', 'noval': None, 'entity': '&',
2037 'sq': '"', 'dq': '\''
2038 }.
2039 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2040 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2041 """
2042 parser = HTMLAttributeParser()
b4a3d461
S
2043 try:
2044 parser.feed(html_element)
2045 parser.close()
2046 # Older Python may throw HTMLParseError in case of malformed HTML
2047 except compat_HTMLParseError:
2048 pass
8bb56eee 2049 return parser.attrs
9e6dd238 2050
c5229f39 2051
73673ccf
FF
2052def parse_list(webpage):
2053 """Given a string for an series of HTML <li> elements,
2054 return a dictionary of their attributes"""
2055 parser = HTMLListAttrsParser()
2056 parser.feed(webpage)
2057 parser.close()
2058 return parser.items
2059
2060
9e6dd238 2061def clean_html(html):
59ae15a5 2062 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
2063
2064 if html is None: # Convenience for sanitizing descriptions etc.
2065 return html
2066
59ae15a5
PH
2067 # Newline vs <br />
2068 html = html.replace('\n', ' ')
edd9221c
TF
2069 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2070 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
2071 # Strip html tags
2072 html = re.sub('<.*?>', '', html)
2073 # Replace html entities
2074 html = unescapeHTML(html)
7decf895 2075 return html.strip()
9e6dd238
FV
2076
2077
d77c3dfd 2078def sanitize_open(filename, open_mode):
59ae15a5
PH
2079 """Try to open the given filename, and slightly tweak it if this fails.
2080
2081 Attempts to open the given filename. If this fails, it tries to change
2082 the filename slightly, step by step, until it's either able to open it
2083 or it fails and raises a final exception, like the standard open()
2084 function.
2085
2086 It returns the tuple (stream, definitive_file_name).
2087 """
2088 try:
28e614de 2089 if filename == '-':
59ae15a5
PH
2090 if sys.platform == 'win32':
2091 import msvcrt
2092 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 2093 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
2094 stream = open(encodeFilename(filename), open_mode)
2095 return (stream, filename)
2096 except (IOError, OSError) as err:
f45c185f
PH
2097 if err.errno in (errno.EACCES,):
2098 raise
59ae15a5 2099
f45c185f 2100 # In case of error, try to remove win32 forbidden chars
d55de57b 2101 alt_filename = sanitize_path(filename)
f45c185f
PH
2102 if alt_filename == filename:
2103 raise
2104 else:
2105 # An exception here should be caught in the caller
d55de57b 2106 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 2107 return (stream, alt_filename)
d77c3dfd
FV
2108
2109
2110def timeconvert(timestr):
59ae15a5
PH
2111 """Convert RFC 2822 defined time string into system timestamp"""
2112 timestamp = None
2113 timetuple = email.utils.parsedate_tz(timestr)
2114 if timetuple is not None:
2115 timestamp = email.utils.mktime_tz(timetuple)
2116 return timestamp
1c469a94 2117
5f6a1245 2118
796173d0 2119def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
2120 """Sanitizes a string so it could be used as part of a filename.
2121 If restricted is set, use a stricter subset of allowed characters.
158af524
S
2122 Set is_id if this is not an arbitrary string, but an ID that should be kept
2123 if possible.
59ae15a5
PH
2124 """
2125 def replace_insane(char):
c587cbb7
AT
2126 if restricted and char in ACCENT_CHARS:
2127 return ACCENT_CHARS[char]
91dd88b9 2128 elif not restricted and char == '\n':
2129 return ' '
2130 elif char == '?' or ord(char) < 32 or ord(char) == 127:
59ae15a5
PH
2131 return ''
2132 elif char == '"':
2133 return '' if restricted else '\''
2134 elif char == ':':
2135 return '_-' if restricted else ' -'
2136 elif char in '\\/|*<>':
2137 return '_'
627dcfff 2138 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
2139 return '_'
2140 if restricted and ord(char) > 127:
2141 return '_'
2142 return char
2143
639f1cea 2144 if s == '':
2145 return ''
2aeb06d6
PH
2146 # Handle timestamps
2147 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 2148 result = ''.join(map(replace_insane, s))
796173d0
PH
2149 if not is_id:
2150 while '__' in result:
2151 result = result.replace('__', '_')
2152 result = result.strip('_')
2153 # Common case of "Foreign band name - English song title"
2154 if restricted and result.startswith('-_'):
2155 result = result[2:]
5a42414b
PH
2156 if result.startswith('-'):
2157 result = '_' + result[len('-'):]
a7440261 2158 result = result.lstrip('.')
796173d0
PH
2159 if not result:
2160 result = '_'
59ae15a5 2161 return result
d77c3dfd 2162
5f6a1245 2163
c2934512 2164def sanitize_path(s, force=False):
a2aaf4db 2165 """Sanitizes and normalizes path on Windows"""
c2934512 2166 if sys.platform == 'win32':
c4218ac3 2167 force = False
c2934512 2168 drive_or_unc, _ = os.path.splitdrive(s)
2169 if sys.version_info < (2, 7) and not drive_or_unc:
2170 drive_or_unc, _ = os.path.splitunc(s)
2171 elif force:
2172 drive_or_unc = ''
2173 else:
a2aaf4db 2174 return s
c2934512 2175
be531ef1
S
2176 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2177 if drive_or_unc:
a2aaf4db
S
2178 norm_path.pop(0)
2179 sanitized_path = [
ec85ded8 2180 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
a2aaf4db 2181 for path_part in norm_path]
be531ef1
S
2182 if drive_or_unc:
2183 sanitized_path.insert(0, drive_or_unc + os.path.sep)
c4218ac3 2184 elif force and s[0] == os.path.sep:
2185 sanitized_path.insert(0, os.path.sep)
a2aaf4db
S
2186 return os.path.join(*sanitized_path)
2187
2188
17bcc626 2189def sanitize_url(url):
befa4708
S
2190 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2191 # the number of unwanted failures due to missing protocol
2192 if url.startswith('//'):
2193 return 'http:%s' % url
2194 # Fix some common typos seen so far
2195 COMMON_TYPOS = (
067aa17e 2196 # https://github.com/ytdl-org/youtube-dl/issues/15649
befa4708
S
2197 (r'^httpss://', r'https://'),
2198 # https://bx1.be/lives/direct-tv/
2199 (r'^rmtp([es]?)://', r'rtmp\1://'),
2200 )
2201 for mistake, fixup in COMMON_TYPOS:
2202 if re.match(mistake, url):
2203 return re.sub(mistake, fixup, url)
bc6b9bcd 2204 return url
17bcc626
S
2205
2206
5435dcf9
HH
2207def extract_basic_auth(url):
2208 parts = compat_urlparse.urlsplit(url)
2209 if parts.username is None:
2210 return url, None
2211 url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2212 parts.hostname if parts.port is None
2213 else '%s:%d' % (parts.hostname, parts.port))))
2214 auth_payload = base64.b64encode(
2215 ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2216 return url, 'Basic ' + auth_payload.decode('utf-8')
2217
2218
67dda517 2219def sanitized_Request(url, *args, **kwargs):
bc6b9bcd 2220 url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
5435dcf9
HH
2221 if auth_header is not None:
2222 headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2223 headers['Authorization'] = auth_header
2224 return compat_urllib_request.Request(url, *args, **kwargs)
67dda517
S
2225
2226
51098426
S
2227def expand_path(s):
2228 """Expand shell variables and ~"""
2229 return os.path.expandvars(compat_expanduser(s))
2230
2231
d77c3dfd 2232def orderedSet(iterable):
59ae15a5
PH
2233 """ Remove all duplicates from the input iterable """
2234 res = []
2235 for el in iterable:
2236 if el not in res:
2237 res.append(el)
2238 return res
d77c3dfd 2239
912b38b4 2240
55b2f099 2241def _htmlentity_transform(entity_with_semicolon):
4e408e47 2242 """Transforms an HTML entity to a character."""
55b2f099
YCH
2243 entity = entity_with_semicolon[:-1]
2244
4e408e47
PH
2245 # Known non-numeric HTML entity
2246 if entity in compat_html_entities.name2codepoint:
2247 return compat_chr(compat_html_entities.name2codepoint[entity])
2248
55b2f099
YCH
2249 # TODO: HTML5 allows entities without a semicolon. For example,
2250 # '&Eacuteric' should be decoded as 'Éric'.
2251 if entity_with_semicolon in compat_html_entities_html5:
2252 return compat_html_entities_html5[entity_with_semicolon]
2253
91757b0f 2254 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
2255 if mobj is not None:
2256 numstr = mobj.group(1)
28e614de 2257 if numstr.startswith('x'):
4e408e47 2258 base = 16
28e614de 2259 numstr = '0%s' % numstr
4e408e47
PH
2260 else:
2261 base = 10
067aa17e 2262 # See https://github.com/ytdl-org/youtube-dl/issues/7518
7aefc49c
S
2263 try:
2264 return compat_chr(int(numstr, base))
2265 except ValueError:
2266 pass
4e408e47
PH
2267
2268 # Unknown entity in name, return its literal representation
7a3f0c00 2269 return '&%s;' % entity
4e408e47
PH
2270
2271
d77c3dfd 2272def unescapeHTML(s):
912b38b4
PH
2273 if s is None:
2274 return None
2275 assert type(s) == compat_str
d77c3dfd 2276
4e408e47 2277 return re.sub(
95f3f7c2 2278 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 2279
8bf48f23 2280
cdb19aa4 2281def escapeHTML(text):
2282 return (
2283 text
2284 .replace('&', '&amp;')
2285 .replace('<', '&lt;')
2286 .replace('>', '&gt;')
2287 .replace('"', '&quot;')
2288 .replace("'", '&#39;')
2289 )
2290
2291
f5b1bca9 2292def process_communicate_or_kill(p, *args, **kwargs):
2293 try:
2294 return p.communicate(*args, **kwargs)
2295 except BaseException: # Including KeyboardInterrupt
2296 p.kill()
2297 p.wait()
2298 raise
2299
2300
d3c93ec2 2301class Popen(subprocess.Popen):
2302 if sys.platform == 'win32':
2303 _startupinfo = subprocess.STARTUPINFO()
2304 _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
2305 else:
2306 _startupinfo = None
2307
2308 def __init__(self, *args, **kwargs):
2309 super(Popen, self).__init__(*args, **kwargs, startupinfo=self._startupinfo)
2310
2311 def communicate_or_kill(self, *args, **kwargs):
2312 return process_communicate_or_kill(self, *args, **kwargs)
2313
2314
aa49acd1
S
2315def get_subprocess_encoding():
2316 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2317 # For subprocess calls, encode with locale encoding
2318 # Refer to http://stackoverflow.com/a/9951851/35070
2319 encoding = preferredencoding()
2320 else:
2321 encoding = sys.getfilesystemencoding()
2322 if encoding is None:
2323 encoding = 'utf-8'
2324 return encoding
2325
2326
8bf48f23 2327def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
2328 """
2329 @param s The name of the file
2330 """
d77c3dfd 2331
8bf48f23 2332 assert type(s) == compat_str
d77c3dfd 2333
59ae15a5
PH
2334 # Python 3 has a Unicode API
2335 if sys.version_info >= (3, 0):
2336 return s
0f00efed 2337
aa49acd1
S
2338 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2339 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2340 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2341 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2342 return s
2343
8ee239e9
YCH
2344 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2345 if sys.platform.startswith('java'):
2346 return s
2347
aa49acd1
S
2348 return s.encode(get_subprocess_encoding(), 'ignore')
2349
2350
2351def decodeFilename(b, for_subprocess=False):
2352
2353 if sys.version_info >= (3, 0):
2354 return b
2355
2356 if not isinstance(b, bytes):
2357 return b
2358
2359 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 2360
f07b74fc
PH
2361
2362def encodeArgument(s):
2363 if not isinstance(s, compat_str):
2364 # Legacy code that uses byte strings
2365 # Uncomment the following line after fixing all post processors
7af808a5 2366 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
2367 s = s.decode('ascii')
2368 return encodeFilename(s, True)
2369
2370
aa49acd1
S
2371def decodeArgument(b):
2372 return decodeFilename(b, True)
2373
2374
8271226a
PH
2375def decodeOption(optval):
2376 if optval is None:
2377 return optval
2378 if isinstance(optval, bytes):
2379 optval = optval.decode(preferredencoding())
2380
2381 assert isinstance(optval, compat_str)
2382 return optval
1c256f70 2383
5f6a1245 2384
aa7785f8 2385_timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
2386
2387
2388def timetuple_from_msec(msec):
2389 secs, msec = divmod(msec, 1000)
2390 mins, secs = divmod(secs, 60)
2391 hrs, mins = divmod(mins, 60)
2392 return _timetuple(hrs, mins, secs, msec)
2393
2394
cdb19aa4 2395def formatSeconds(secs, delim=':', msec=False):
aa7785f8 2396 time = timetuple_from_msec(secs * 1000)
2397 if time.hours:
2398 ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
2399 elif time.minutes:
2400 ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
4539dd30 2401 else:
aa7785f8 2402 ret = '%d' % time.seconds
2403 return '%s.%03d' % (ret, time.milliseconds) if msec else ret
4539dd30 2404
a0ddb8a2 2405
77562778 2406def _ssl_load_windows_store_certs(ssl_context, storename):
2407 # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
2408 try:
2409 certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
2410 if encoding == 'x509_asn' and (
2411 trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
2412 except PermissionError:
2413 return
2414 for cert in certs:
a2366922 2415 try:
77562778 2416 ssl_context.load_verify_locations(cadata=cert)
2417 except ssl.SSLError:
a2366922
PH
2418 pass
2419
77562778 2420
2421def make_HTTPS_handler(params, **kwargs):
2422 opts_check_certificate = not params.get('nocheckcertificate')
2423 context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
2424 context.check_hostname = opts_check_certificate
2425 context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
2426 if opts_check_certificate:
4e3d1898 2427 try:
2428 context.load_default_certs()
2429 # Work around the issue in load_default_certs when there are bad certificates. See:
2430 # https://github.com/yt-dlp/yt-dlp/issues/1060,
2431 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
2432 except ssl.SSLError:
2433 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
2434 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
2435 # Create a new context to discard any certificates that were already loaded
2436 context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
2437 context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED
2438 for storename in ('CA', 'ROOT'):
2439 _ssl_load_windows_store_certs(context, storename)
2440 context.set_default_verify_paths()
77562778 2441 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 2442
732ea2f0 2443
5873d4cc 2444def bug_reports_message(before=';'):
08f2a92c 2445 if ytdl_is_updateable():
7a5c1cfe 2446 update_cmd = 'type yt-dlp -U to update'
08f2a92c 2447 else:
7a5c1cfe 2448 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
5873d4cc 2449 msg = 'please report this issue on https://github.com/yt-dlp/yt-dlp .'
08f2a92c 2450 msg += ' Make sure you are using the latest version; %s.' % update_cmd
7a5c1cfe 2451 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
5873d4cc
F
2452
2453 before = before.rstrip()
2454 if not before or before.endswith(('.', '!', '?')):
2455 msg = msg[0].title() + msg[1:]
2456
2457 return (before + ' ' if before else '') + msg
08f2a92c
JMF
2458
2459
bf5b9d85
PM
2460class YoutubeDLError(Exception):
2461 """Base exception for YoutubeDL errors."""
aa9369a2 2462 msg = None
2463
2464 def __init__(self, msg=None):
2465 if msg is not None:
2466 self.msg = msg
2467 elif self.msg is None:
2468 self.msg = type(self).__name__
2469 super().__init__(self.msg)
bf5b9d85
PM
2470
2471
3158150c 2472network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2473if hasattr(ssl, 'CertificateError'):
2474 network_exceptions.append(ssl.CertificateError)
2475network_exceptions = tuple(network_exceptions)
2476
2477
bf5b9d85 2478class ExtractorError(YoutubeDLError):
1c256f70 2479 """Error during info extraction."""
5f6a1245 2480
1151c407 2481 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
9a82b238 2482 """ tb, if given, is the original traceback (so that it can be printed out).
7a5c1cfe 2483 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
9a82b238 2484 """
3158150c 2485 if sys.exc_info()[0] in network_exceptions:
9a82b238 2486 expected = True
d5979c5d 2487
526d74ec 2488 self.msg = str(msg)
1c256f70 2489 self.traceback = tb
1151c407 2490 self.expected = expected
2eabb802 2491 self.cause = cause
d11271dd 2492 self.video_id = video_id
1151c407 2493 self.ie = ie
2494 self.exc_info = sys.exc_info() # preserve original exception
2495
2496 super(ExtractorError, self).__init__(''.join((
2497 format_field(ie, template='[%s] '),
2498 format_field(video_id, template='%s: '),
526d74ec 2499 self.msg,
1151c407 2500 format_field(cause, template=' (caused by %r)'),
2501 '' if expected else bug_reports_message())))
1c256f70 2502
01951dda
PH
2503 def format_traceback(self):
2504 if self.traceback is None:
2505 return None
28e614de 2506 return ''.join(traceback.format_tb(self.traceback))
01951dda 2507
1c256f70 2508
416c7fcb
PH
2509class UnsupportedError(ExtractorError):
2510 def __init__(self, url):
2511 super(UnsupportedError, self).__init__(
2512 'Unsupported URL: %s' % url, expected=True)
2513 self.url = url
2514
2515
55b3e45b
JMF
2516class RegexNotFoundError(ExtractorError):
2517 """Error when a regex didn't match"""
2518 pass
2519
2520
773f291d
S
2521class GeoRestrictedError(ExtractorError):
2522 """Geographic restriction Error exception.
2523
2524 This exception may be thrown when a video is not available from your
2525 geographic location due to geographic restrictions imposed by a website.
2526 """
b6e0c7d2 2527
0db3bae8 2528 def __init__(self, msg, countries=None, **kwargs):
2529 kwargs['expected'] = True
2530 super(GeoRestrictedError, self).__init__(msg, **kwargs)
773f291d
S
2531 self.countries = countries
2532
2533
bf5b9d85 2534class DownloadError(YoutubeDLError):
59ae15a5 2535 """Download Error exception.
d77c3dfd 2536
59ae15a5
PH
2537 This exception may be thrown by FileDownloader objects if they are not
2538 configured to continue on errors. They will contain the appropriate
2539 error message.
2540 """
5f6a1245 2541
8cc83b8d
FV
2542 def __init__(self, msg, exc_info=None):
2543 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2544 super(DownloadError, self).__init__(msg)
2545 self.exc_info = exc_info
d77c3dfd
FV
2546
2547
498f5606 2548class EntryNotInPlaylist(YoutubeDLError):
2549 """Entry not in playlist exception.
2550
2551 This exception will be thrown by YoutubeDL when a requested entry
2552 is not found in the playlist info_dict
2553 """
aa9369a2 2554 msg = 'Entry not found in info'
498f5606 2555
2556
bf5b9d85 2557class SameFileError(YoutubeDLError):
59ae15a5 2558 """Same File exception.
d77c3dfd 2559
59ae15a5
PH
2560 This exception will be thrown by FileDownloader objects if they detect
2561 multiple files would have to be downloaded to the same file on disk.
2562 """
aa9369a2 2563 msg = 'Fixed output name but more than one file to download'
2564
2565 def __init__(self, filename=None):
2566 if filename is not None:
2567 self.msg += f': {filename}'
2568 super().__init__(self.msg)
d77c3dfd
FV
2569
2570
bf5b9d85 2571class PostProcessingError(YoutubeDLError):
59ae15a5 2572 """Post Processing exception.
d77c3dfd 2573
59ae15a5
PH
2574 This exception may be raised by PostProcessor's .run() method to
2575 indicate an error in the postprocessing task.
2576 """
5f6a1245 2577
5f6a1245 2578
48f79687 2579class DownloadCancelled(YoutubeDLError):
2580 """ Exception raised when the download queue should be interrupted """
2581 msg = 'The download was cancelled'
8b0d7497 2582
8b0d7497 2583
48f79687 2584class ExistingVideoReached(DownloadCancelled):
2585 """ --break-on-existing triggered """
2586 msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
8b0d7497 2587
48f79687 2588
2589class RejectedVideoReached(DownloadCancelled):
2590 """ --break-on-reject triggered """
2591 msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
51d9739f 2592
2593
48f79687 2594class MaxDownloadsReached(DownloadCancelled):
59ae15a5 2595 """ --max-downloads limit has been reached. """
48f79687 2596 msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
2597
2598
f2ebc5c7 2599class ReExtractInfo(YoutubeDLError):
2600 """ Video info needs to be re-extracted. """
2601
2602 def __init__(self, msg, expected=False):
2603 super().__init__(msg)
2604 self.expected = expected
2605
2606
2607class ThrottledDownload(ReExtractInfo):
48f79687 2608 """ Download speed below --throttled-rate. """
aa9369a2 2609 msg = 'The download speed is below throttle limit'
d77c3dfd 2610
43b22906 2611 def __init__(self):
2612 super().__init__(self.msg, expected=False)
f2ebc5c7 2613
d77c3dfd 2614
bf5b9d85 2615class UnavailableVideoError(YoutubeDLError):
59ae15a5 2616 """Unavailable Format exception.
d77c3dfd 2617
59ae15a5
PH
2618 This exception will be thrown when a video is requested
2619 in a format that is not available for that video.
2620 """
aa9369a2 2621 msg = 'Unable to download video'
2622
2623 def __init__(self, err=None):
2624 if err is not None:
2625 self.msg += f': {err}'
2626 super().__init__(self.msg)
d77c3dfd
FV
2627
2628
bf5b9d85 2629class ContentTooShortError(YoutubeDLError):
59ae15a5 2630 """Content Too Short exception.
d77c3dfd 2631
59ae15a5
PH
2632 This exception may be raised by FileDownloader objects when a file they
2633 download is too small for what the server announced first, indicating
2634 the connection was probably interrupted.
2635 """
d77c3dfd 2636
59ae15a5 2637 def __init__(self, downloaded, expected):
bf5b9d85
PM
2638 super(ContentTooShortError, self).__init__(
2639 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2640 )
2c7ed247 2641 # Both in bytes
59ae15a5
PH
2642 self.downloaded = downloaded
2643 self.expected = expected
d77c3dfd 2644
5f6a1245 2645
bf5b9d85 2646class XAttrMetadataError(YoutubeDLError):
efa97bdc
YCH
2647 def __init__(self, code=None, msg='Unknown error'):
2648 super(XAttrMetadataError, self).__init__(msg)
2649 self.code = code
bd264412 2650 self.msg = msg
efa97bdc
YCH
2651
2652 # Parsing code and msg
3089bc74 2653 if (self.code in (errno.ENOSPC, errno.EDQUOT)
a0566bbf 2654 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
efa97bdc
YCH
2655 self.reason = 'NO_SPACE'
2656 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2657 self.reason = 'VALUE_TOO_LONG'
2658 else:
2659 self.reason = 'NOT_SUPPORTED'
2660
2661
bf5b9d85 2662class XAttrUnavailableError(YoutubeDLError):
efa97bdc
YCH
2663 pass
2664
2665
c5a59d93 2666def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
2667 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2668 # expected HTTP responses to meet HTTP/1.0 or later (see also
067aa17e 2669 # https://github.com/ytdl-org/youtube-dl/issues/6727)
e5e78797 2670 if sys.version_info < (3, 0):
65220c3b
S
2671 kwargs['strict'] = True
2672 hc = http_class(*args, **compat_kwargs(kwargs))
be4a824d 2673 source_address = ydl_handler._params.get('source_address')
8959018a 2674
be4a824d 2675 if source_address is not None:
8959018a
AU
2676 # This is to workaround _create_connection() from socket where it will try all
2677 # address data from getaddrinfo() including IPv6. This filters the result from
2678 # getaddrinfo() based on the source_address value.
2679 # This is based on the cpython socket.create_connection() function.
2680 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2681 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2682 host, port = address
2683 err = None
2684 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
9e21e6d9
S
2685 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2686 ip_addrs = [addr for addr in addrs if addr[0] == af]
2687 if addrs and not ip_addrs:
2688 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2689 raise socket.error(
2690 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2691 % (ip_version, source_address[0]))
8959018a
AU
2692 for res in ip_addrs:
2693 af, socktype, proto, canonname, sa = res
2694 sock = None
2695 try:
2696 sock = socket.socket(af, socktype, proto)
2697 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2698 sock.settimeout(timeout)
2699 sock.bind(source_address)
2700 sock.connect(sa)
2701 err = None # Explicitly break reference cycle
2702 return sock
2703 except socket.error as _:
2704 err = _
2705 if sock is not None:
2706 sock.close()
2707 if err is not None:
2708 raise err
2709 else:
9e21e6d9
S
2710 raise socket.error('getaddrinfo returns an empty list')
2711 if hasattr(hc, '_create_connection'):
2712 hc._create_connection = _create_connection
be4a824d
PH
2713 sa = (source_address, 0)
2714 if hasattr(hc, 'source_address'): # Python 2.7+
2715 hc.source_address = sa
2716 else: # Python 2.6
2717 def _hc_connect(self, *args, **kwargs):
9e21e6d9 2718 sock = _create_connection(
be4a824d
PH
2719 (self.host, self.port), self.timeout, sa)
2720 if is_https:
d7932313
PH
2721 self.sock = ssl.wrap_socket(
2722 sock, self.key_file, self.cert_file,
2723 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
2724 else:
2725 self.sock = sock
2726 hc.connect = functools.partial(_hc_connect, hc)
2727
2728 return hc
2729
2730
87f0e62d 2731def handle_youtubedl_headers(headers):
992fc9d6
YCH
2732 filtered_headers = headers
2733
2734 if 'Youtubedl-no-compression' in filtered_headers:
2735 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 2736 del filtered_headers['Youtubedl-no-compression']
87f0e62d 2737
992fc9d6 2738 return filtered_headers
87f0e62d
YCH
2739
2740
acebc9cd 2741class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
2742 """Handler for HTTP requests and responses.
2743
2744 This class, when installed with an OpenerDirector, automatically adds
2745 the standard headers to every HTTP request and handles gzipped and
2746 deflated responses from web servers. If compression is to be avoided in
2747 a particular request, the original request in the program code only has
0424ec30 2748 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
2749 removed before making the real request.
2750
2751 Part of this code was copied from:
2752
2753 http://techknack.net/python-urllib2-handlers/
2754
2755 Andrew Rowls, the author of that code, agreed to release it to the
2756 public domain.
2757 """
2758
be4a824d
PH
2759 def __init__(self, params, *args, **kwargs):
2760 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2761 self._params = params
2762
2763 def http_open(self, req):
71aff188
YCH
2764 conn_class = compat_http_client.HTTPConnection
2765
2766 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2767 if socks_proxy:
2768 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2769 del req.headers['Ytdl-socks-proxy']
2770
be4a824d 2771 return self.do_open(functools.partial(
71aff188 2772 _create_http_connection, self, conn_class, False),
be4a824d
PH
2773 req)
2774
59ae15a5
PH
2775 @staticmethod
2776 def deflate(data):
fc2119f2 2777 if not data:
2778 return data
59ae15a5
PH
2779 try:
2780 return zlib.decompress(data, -zlib.MAX_WBITS)
2781 except zlib.error:
2782 return zlib.decompress(data)
2783
acebc9cd 2784 def http_request(self, req):
51f267d9
S
2785 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2786 # always respected by websites, some tend to give out URLs with non percent-encoded
2787 # non-ASCII characters (see telemb.py, ard.py [#3412])
2788 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2789 # To work around aforementioned issue we will replace request's original URL with
2790 # percent-encoded one
2791 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2792 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2793 url = req.get_full_url()
2794 url_escaped = escape_url(url)
2795
2796 # Substitute URL if any change after escaping
2797 if url != url_escaped:
15d260eb 2798 req = update_Request(req, url=url_escaped)
51f267d9 2799
33ac271b 2800 for h, v in std_headers.items():
3d5f7a39
JK
2801 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2802 # The dict keys are capitalized because of this bug by urllib
2803 if h.capitalize() not in req.headers:
33ac271b 2804 req.add_header(h, v)
87f0e62d
YCH
2805
2806 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
2807
2808 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2809 # Python 2.6 is brain-dead when it comes to fragments
2810 req._Request__original = req._Request__original.partition('#')[0]
2811 req._Request__r_type = req._Request__r_type.partition('#')[0]
2812
59ae15a5
PH
2813 return req
2814
acebc9cd 2815 def http_response(self, req, resp):
59ae15a5
PH
2816 old_resp = resp
2817 # gzip
2818 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
2819 content = resp.read()
2820 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2821 try:
2822 uncompressed = io.BytesIO(gz.read())
2823 except IOError as original_ioerror:
2824 # There may be junk add the end of the file
2825 # See http://stackoverflow.com/q/4928560/35070 for details
2826 for i in range(1, 1024):
2827 try:
2828 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2829 uncompressed = io.BytesIO(gz.read())
2830 except IOError:
2831 continue
2832 break
2833 else:
2834 raise original_ioerror
b407d853 2835 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2836 resp.msg = old_resp.msg
c047270c 2837 del resp.headers['Content-encoding']
59ae15a5
PH
2838 # deflate
2839 if resp.headers.get('Content-encoding', '') == 'deflate':
2840 gz = io.BytesIO(self.deflate(resp.read()))
b407d853 2841 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2842 resp.msg = old_resp.msg
c047270c 2843 del resp.headers['Content-encoding']
ad729172 2844 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
067aa17e 2845 # https://github.com/ytdl-org/youtube-dl/issues/6457).
5a4d9ddb
S
2846 if 300 <= resp.code < 400:
2847 location = resp.headers.get('Location')
2848 if location:
2849 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2850 if sys.version_info >= (3, 0):
2851 location = location.encode('iso-8859-1').decode('utf-8')
0ea59007
YCH
2852 else:
2853 location = location.decode('utf-8')
5a4d9ddb
S
2854 location_escaped = escape_url(location)
2855 if location != location_escaped:
2856 del resp.headers['Location']
9a4aec8b
YCH
2857 if sys.version_info < (3, 0):
2858 location_escaped = location_escaped.encode('utf-8')
5a4d9ddb 2859 resp.headers['Location'] = location_escaped
59ae15a5 2860 return resp
0f8d03f8 2861
acebc9cd
PH
2862 https_request = http_request
2863 https_response = http_response
bf50b038 2864
5de90176 2865
71aff188
YCH
2866def make_socks_conn_class(base_class, socks_proxy):
2867 assert issubclass(base_class, (
2868 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2869
2870 url_components = compat_urlparse.urlparse(socks_proxy)
2871 if url_components.scheme.lower() == 'socks5':
2872 socks_type = ProxyType.SOCKS5
2873 elif url_components.scheme.lower() in ('socks', 'socks4'):
2874 socks_type = ProxyType.SOCKS4
51fb4995
YCH
2875 elif url_components.scheme.lower() == 'socks4a':
2876 socks_type = ProxyType.SOCKS4A
71aff188 2877
cdd94c2e
YCH
2878 def unquote_if_non_empty(s):
2879 if not s:
2880 return s
2881 return compat_urllib_parse_unquote_plus(s)
2882
71aff188
YCH
2883 proxy_args = (
2884 socks_type,
2885 url_components.hostname, url_components.port or 1080,
2886 True, # Remote DNS
cdd94c2e
YCH
2887 unquote_if_non_empty(url_components.username),
2888 unquote_if_non_empty(url_components.password),
71aff188
YCH
2889 )
2890
2891 class SocksConnection(base_class):
2892 def connect(self):
2893 self.sock = sockssocket()
2894 self.sock.setproxy(*proxy_args)
2895 if type(self.timeout) in (int, float):
2896 self.sock.settimeout(self.timeout)
2897 self.sock.connect((self.host, self.port))
2898
2899 if isinstance(self, compat_http_client.HTTPSConnection):
2900 if hasattr(self, '_context'): # Python > 2.6
2901 self.sock = self._context.wrap_socket(
2902 self.sock, server_hostname=self.host)
2903 else:
2904 self.sock = ssl.wrap_socket(self.sock)
2905
2906 return SocksConnection
2907
2908
be4a824d
PH
2909class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2910 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2911 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2912 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2913 self._params = params
2914
2915 def https_open(self, req):
4f264c02 2916 kwargs = {}
71aff188
YCH
2917 conn_class = self._https_conn_class
2918
4f264c02
JMF
2919 if hasattr(self, '_context'): # python > 2.6
2920 kwargs['context'] = self._context
2921 if hasattr(self, '_check_hostname'): # python 3.x
2922 kwargs['check_hostname'] = self._check_hostname
71aff188
YCH
2923
2924 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2925 if socks_proxy:
2926 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2927 del req.headers['Ytdl-socks-proxy']
2928
be4a824d 2929 return self.do_open(functools.partial(
71aff188 2930 _create_http_connection, self, conn_class, True),
4f264c02 2931 req, **kwargs)
be4a824d
PH
2932
2933
1bab3437 2934class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
f1a8511f
S
2935 """
2936 See [1] for cookie file format.
2937
2938 1. https://curl.haxx.se/docs/http-cookies.html
2939 """
e7e62441 2940 _HTTPONLY_PREFIX = '#HttpOnly_'
c380cc28
S
2941 _ENTRY_LEN = 7
2942 _HEADER = '''# Netscape HTTP Cookie File
7a5c1cfe 2943# This file is generated by yt-dlp. Do not edit.
c380cc28
S
2944
2945'''
2946 _CookieFileEntry = collections.namedtuple(
2947 'CookieFileEntry',
2948 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
e7e62441 2949
1bab3437 2950 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
c380cc28
S
2951 """
2952 Save cookies to a file.
2953
2954 Most of the code is taken from CPython 3.8 and slightly adapted
2955 to support cookie files with UTF-8 in both python 2 and 3.
2956 """
2957 if filename is None:
2958 if self.filename is not None:
2959 filename = self.filename
2960 else:
2961 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2962
1bab3437
S
2963 # Store session cookies with `expires` set to 0 instead of an empty
2964 # string
2965 for cookie in self:
2966 if cookie.expires is None:
2967 cookie.expires = 0
c380cc28
S
2968
2969 with io.open(filename, 'w', encoding='utf-8') as f:
2970 f.write(self._HEADER)
2971 now = time.time()
2972 for cookie in self:
2973 if not ignore_discard and cookie.discard:
2974 continue
2975 if not ignore_expires and cookie.is_expired(now):
2976 continue
2977 if cookie.secure:
2978 secure = 'TRUE'
2979 else:
2980 secure = 'FALSE'
2981 if cookie.domain.startswith('.'):
2982 initial_dot = 'TRUE'
2983 else:
2984 initial_dot = 'FALSE'
2985 if cookie.expires is not None:
2986 expires = compat_str(cookie.expires)
2987 else:
2988 expires = ''
2989 if cookie.value is None:
2990 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2991 # with no name, whereas http.cookiejar regards it as a
2992 # cookie with no value.
2993 name = ''
2994 value = cookie.name
2995 else:
2996 name = cookie.name
2997 value = cookie.value
2998 f.write(
2999 '\t'.join([cookie.domain, initial_dot, cookie.path,
3000 secure, expires, name, value]) + '\n')
1bab3437
S
3001
3002 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
e7e62441 3003 """Load cookies from a file."""
3004 if filename is None:
3005 if self.filename is not None:
3006 filename = self.filename
3007 else:
3008 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
3009
c380cc28
S
3010 def prepare_line(line):
3011 if line.startswith(self._HTTPONLY_PREFIX):
3012 line = line[len(self._HTTPONLY_PREFIX):]
3013 # comments and empty lines are fine
3014 if line.startswith('#') or not line.strip():
3015 return line
3016 cookie_list = line.split('\t')
3017 if len(cookie_list) != self._ENTRY_LEN:
3018 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
3019 cookie = self._CookieFileEntry(*cookie_list)
3020 if cookie.expires_at and not cookie.expires_at.isdigit():
3021 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
3022 return line
3023
e7e62441 3024 cf = io.StringIO()
c380cc28 3025 with io.open(filename, encoding='utf-8') as f:
e7e62441 3026 for line in f:
c380cc28
S
3027 try:
3028 cf.write(prepare_line(line))
3029 except compat_cookiejar.LoadError as e:
3030 write_string(
3031 'WARNING: skipping cookie file entry due to %s: %r\n'
3032 % (e, line), sys.stderr)
3033 continue
e7e62441 3034 cf.seek(0)
3035 self._really_load(cf, filename, ignore_discard, ignore_expires)
1bab3437
S
3036 # Session cookies are denoted by either `expires` field set to
3037 # an empty string or 0. MozillaCookieJar only recognizes the former
3038 # (see [1]). So we need force the latter to be recognized as session
3039 # cookies on our own.
3040 # Session cookies may be important for cookies-based authentication,
3041 # e.g. usually, when user does not check 'Remember me' check box while
3042 # logging in on a site, some important cookies are stored as session
3043 # cookies so that not recognizing them will result in failed login.
3044 # 1. https://bugs.python.org/issue17164
3045 for cookie in self:
3046 # Treat `expires=0` cookies as session cookies
3047 if cookie.expires == 0:
3048 cookie.expires = None
3049 cookie.discard = True
3050
3051
a6420bf5
S
3052class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
3053 def __init__(self, cookiejar=None):
3054 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
3055
3056 def http_response(self, request, response):
3057 # Python 2 will choke on next HTTP request in row if there are non-ASCII
3058 # characters in Set-Cookie HTTP header of last response (see
067aa17e 3059 # https://github.com/ytdl-org/youtube-dl/issues/6769).
a6420bf5
S
3060 # In order to at least prevent crashing we will percent encode Set-Cookie
3061 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
3062 # if sys.version_info < (3, 0) and response.headers:
3063 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
3064 # set_cookie = response.headers.get(set_cookie_header)
3065 # if set_cookie:
3066 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
3067 # if set_cookie != set_cookie_escaped:
3068 # del response.headers[set_cookie_header]
3069 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
3070 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
3071
f5fa042c 3072 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
a6420bf5
S
3073 https_response = http_response
3074
3075
fca6dba8 3076class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
201c1459 3077 """YoutubeDL redirect handler
3078
3079 The code is based on HTTPRedirectHandler implementation from CPython [1].
3080
3081 This redirect handler solves two issues:
3082 - ensures redirect URL is always unicode under python 2
3083 - introduces support for experimental HTTP response status code
3084 308 Permanent Redirect [2] used by some sites [3]
3085
3086 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
3087 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
3088 3. https://github.com/ytdl-org/youtube-dl/issues/28768
3089 """
3090
3091 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
3092
3093 def redirect_request(self, req, fp, code, msg, headers, newurl):
3094 """Return a Request or None in response to a redirect.
3095
3096 This is called by the http_error_30x methods when a
3097 redirection response is received. If a redirection should
3098 take place, return a new Request to allow http_error_30x to
3099 perform the redirect. Otherwise, raise HTTPError if no-one
3100 else should try to handle this url. Return None if you can't
3101 but another Handler might.
3102 """
3103 m = req.get_method()
3104 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
3105 or code in (301, 302, 303) and m == "POST")):
3106 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
3107 # Strictly (according to RFC 2616), 301 or 302 in response to
3108 # a POST MUST NOT cause a redirection without confirmation
3109 # from the user (of urllib.request, in this case). In practice,
3110 # essentially all clients do redirect in this case, so we do
3111 # the same.
3112
3113 # On python 2 urlh.geturl() may sometimes return redirect URL
3114 # as byte string instead of unicode. This workaround allows
3115 # to force it always return unicode.
3116 if sys.version_info[0] < 3:
3117 newurl = compat_str(newurl)
3118
3119 # Be conciliant with URIs containing a space. This is mainly
3120 # redundant with the more complete encoding done in http_error_302(),
3121 # but it is kept for compatibility with other callers.
3122 newurl = newurl.replace(' ', '%20')
3123
3124 CONTENT_HEADERS = ("content-length", "content-type")
3125 # NB: don't use dict comprehension for python 2.6 compatibility
3126 newheaders = dict((k, v) for k, v in req.headers.items()
3127 if k.lower() not in CONTENT_HEADERS)
3128 return compat_urllib_request.Request(
3129 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3130 unverifiable=True)
fca6dba8
S
3131
3132
46f59e89
S
3133def extract_timezone(date_str):
3134 m = re.search(
f137e4c2 3135 r'''(?x)
3136 ^.{8,}? # >=8 char non-TZ prefix, if present
3137 (?P<tz>Z| # just the UTC Z, or
3138 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
3139 (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
3140 [ ]? # optional space
3141 (?P<sign>\+|-) # +/-
3142 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
3143 $)
3144 ''', date_str)
46f59e89
S
3145 if not m:
3146 timezone = datetime.timedelta()
3147 else:
3148 date_str = date_str[:-len(m.group('tz'))]
3149 if not m.group('sign'):
3150 timezone = datetime.timedelta()
3151 else:
3152 sign = 1 if m.group('sign') == '+' else -1
3153 timezone = datetime.timedelta(
3154 hours=sign * int(m.group('hours')),
3155 minutes=sign * int(m.group('minutes')))
3156 return timezone, date_str
3157
3158
08b38d54 3159def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
3160 """ Return a UNIX timestamp from the given date """
3161
3162 if date_str is None:
3163 return None
3164
52c3a6e4
S
3165 date_str = re.sub(r'\.[0-9]+', '', date_str)
3166
08b38d54 3167 if timezone is None:
46f59e89
S
3168 timezone, date_str = extract_timezone(date_str)
3169
52c3a6e4
S
3170 try:
3171 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3172 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3173 return calendar.timegm(dt.timetuple())
3174 except ValueError:
3175 pass
912b38b4
PH
3176
3177
46f59e89
S
3178def date_formats(day_first=True):
3179 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3180
3181
42bdd9d0 3182def unified_strdate(date_str, day_first=True):
bf50b038 3183 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
3184
3185 if date_str is None:
3186 return None
bf50b038 3187 upload_date = None
5f6a1245 3188 # Replace commas
026fcc04 3189 date_str = date_str.replace(',', ' ')
42bdd9d0 3190 # Remove AM/PM + timezone
9bb8e0a3 3191 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89 3192 _, date_str = extract_timezone(date_str)
42bdd9d0 3193
46f59e89 3194 for expression in date_formats(day_first):
bf50b038
JMF
3195 try:
3196 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 3197 except ValueError:
bf50b038 3198 pass
42393ce2
PH
3199 if upload_date is None:
3200 timetuple = email.utils.parsedate_tz(date_str)
3201 if timetuple:
c6b9cf05
S
3202 try:
3203 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3204 except ValueError:
3205 pass
6a750402
JMF
3206 if upload_date is not None:
3207 return compat_str(upload_date)
bf50b038 3208
5f6a1245 3209
46f59e89
S
3210def unified_timestamp(date_str, day_first=True):
3211 if date_str is None:
3212 return None
3213
2ae2ffda 3214 date_str = re.sub(r'[,|]', '', date_str)
46f59e89 3215
7dc2a74e 3216 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89
S
3217 timezone, date_str = extract_timezone(date_str)
3218
3219 # Remove AM/PM + timezone
3220 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3221
deef3195
S
3222 # Remove unrecognized timezones from ISO 8601 alike timestamps
3223 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3224 if m:
3225 date_str = date_str[:-len(m.group('tz'))]
3226
f226880c
PH
3227 # Python only supports microseconds, so remove nanoseconds
3228 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3229 if m:
3230 date_str = m.group(1)
3231
46f59e89
S
3232 for expression in date_formats(day_first):
3233 try:
7dc2a74e 3234 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
46f59e89
S
3235 return calendar.timegm(dt.timetuple())
3236 except ValueError:
3237 pass
3238 timetuple = email.utils.parsedate_tz(date_str)
3239 if timetuple:
7dc2a74e 3240 return calendar.timegm(timetuple) + pm_delta * 3600
46f59e89
S
3241
3242
28e614de 3243def determine_ext(url, default_ext='unknown_video'):
85750f89 3244 if url is None or '.' not in url:
f4776371 3245 return default_ext
9cb9a5df 3246 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
3247 if re.match(r'^[A-Za-z0-9]+$', guess):
3248 return guess
a7aaa398
S
3249 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3250 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 3251 return guess.rstrip('/')
73e79f2a 3252 else:
cbdbb766 3253 return default_ext
73e79f2a 3254
5f6a1245 3255
824fa511
S
3256def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3257 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
d4051a8e 3258
5f6a1245 3259
9e62f283 3260def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
37254abc
JMF
3261 """
3262 Return a datetime object from a string in the format YYYYMMDD or
9e62f283 3263 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3264
3265 format: string date format used to return datetime object from
3266 precision: round the time portion of a datetime object.
3267 auto|microsecond|second|minute|hour|day.
3268 auto: round to the unit provided in date_str (if applicable).
3269 """
3270 auto_precision = False
3271 if precision == 'auto':
3272 auto_precision = True
3273 precision = 'microsecond'
3274 today = datetime_round(datetime.datetime.now(), precision)
f8795e10 3275 if date_str in ('now', 'today'):
37254abc 3276 return today
f8795e10
PH
3277 if date_str == 'yesterday':
3278 return today - datetime.timedelta(days=1)
9e62f283 3279 match = re.match(
3280 r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3281 date_str)
37254abc 3282 if match is not None:
9e62f283 3283 start_time = datetime_from_str(match.group('start'), precision, format)
3284 time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
37254abc 3285 unit = match.group('unit')
9e62f283 3286 if unit == 'month' or unit == 'year':
3287 new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
37254abc 3288 unit = 'day'
9e62f283 3289 else:
3290 if unit == 'week':
3291 unit = 'day'
3292 time *= 7
3293 delta = datetime.timedelta(**{unit + 's': time})
3294 new_date = start_time + delta
3295 if auto_precision:
3296 return datetime_round(new_date, unit)
3297 return new_date
3298
3299 return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3300
3301
3302def date_from_str(date_str, format='%Y%m%d'):
3303 """
3304 Return a datetime object from a string in the format YYYYMMDD or
3305 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3306
3307 format: string date format used to return datetime object from
3308 """
3309 return datetime_from_str(date_str, precision='microsecond', format=format).date()
3310
3311
3312def datetime_add_months(dt, months):
3313 """Increment/Decrement a datetime object by months."""
3314 month = dt.month + months - 1
3315 year = dt.year + month // 12
3316 month = month % 12 + 1
3317 day = min(dt.day, calendar.monthrange(year, month)[1])
3318 return dt.replace(year, month, day)
3319
3320
3321def datetime_round(dt, precision='day'):
3322 """
3323 Round a datetime object's time to a specific precision
3324 """
3325 if precision == 'microsecond':
3326 return dt
3327
3328 unit_seconds = {
3329 'day': 86400,
3330 'hour': 3600,
3331 'minute': 60,
3332 'second': 1,
3333 }
3334 roundto = lambda x, n: ((x + n / 2) // n) * n
3335 timestamp = calendar.timegm(dt.timetuple())
3336 return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
5f6a1245
JW
3337
3338
e63fc1be 3339def hyphenate_date(date_str):
3340 """
3341 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3342 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3343 if match is not None:
3344 return '-'.join(match.groups())
3345 else:
3346 return date_str
3347
5f6a1245 3348
bd558525
JMF
3349class DateRange(object):
3350 """Represents a time interval between two dates"""
5f6a1245 3351
bd558525
JMF
3352 def __init__(self, start=None, end=None):
3353 """start and end must be strings in the format accepted by date"""
3354 if start is not None:
3355 self.start = date_from_str(start)
3356 else:
3357 self.start = datetime.datetime.min.date()
3358 if end is not None:
3359 self.end = date_from_str(end)
3360 else:
3361 self.end = datetime.datetime.max.date()
37254abc 3362 if self.start > self.end:
bd558525 3363 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 3364
bd558525
JMF
3365 @classmethod
3366 def day(cls, day):
3367 """Returns a range that only contains the given day"""
5f6a1245
JW
3368 return cls(day, day)
3369
bd558525
JMF
3370 def __contains__(self, date):
3371 """Check if the date is in the range"""
37254abc
JMF
3372 if not isinstance(date, datetime.date):
3373 date = date_from_str(date)
3374 return self.start <= date <= self.end
5f6a1245 3375
bd558525 3376 def __str__(self):
5f6a1245 3377 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
3378
3379
3380def platform_name():
3381 """ Returns the platform name as a compat_str """
3382 res = platform.platform()
3383 if isinstance(res, bytes):
3384 res = res.decode(preferredencoding())
3385
3386 assert isinstance(res, compat_str)
3387 return res
c257baff
PH
3388
3389
49fa4d9a
N
3390def get_windows_version():
3391 ''' Get Windows version. None if it's not running on Windows '''
3392 if compat_os_name == 'nt':
3393 return version_tuple(platform.win32_ver()[1])
3394 else:
3395 return None
3396
3397
b58ddb32
PH
3398def _windows_write_string(s, out):
3399 """ Returns True if the string was written using special methods,
3400 False if it has yet to be written out."""
3401 # Adapted from http://stackoverflow.com/a/3259271/35070
3402
3403 import ctypes
3404 import ctypes.wintypes
3405
3406 WIN_OUTPUT_IDS = {
3407 1: -11,
3408 2: -12,
3409 }
3410
a383a98a
PH
3411 try:
3412 fileno = out.fileno()
3413 except AttributeError:
3414 # If the output stream doesn't have a fileno, it's virtual
3415 return False
aa42e873
PH
3416 except io.UnsupportedOperation:
3417 # Some strange Windows pseudo files?
3418 return False
b58ddb32
PH
3419 if fileno not in WIN_OUTPUT_IDS:
3420 return False
3421
d7cd9a9e 3422 GetStdHandle = compat_ctypes_WINFUNCTYPE(
b58ddb32 3423 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
d7cd9a9e 3424 ('GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
3425 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3426
d7cd9a9e 3427 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3428 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3429 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
d7cd9a9e 3430 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
3431 written = ctypes.wintypes.DWORD(0)
3432
d7cd9a9e 3433 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
3434 FILE_TYPE_CHAR = 0x0002
3435 FILE_TYPE_REMOTE = 0x8000
d7cd9a9e 3436 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3437 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3438 ctypes.POINTER(ctypes.wintypes.DWORD))(
d7cd9a9e 3439 ('GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
3440 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3441
3442 def not_a_console(handle):
3443 if handle == INVALID_HANDLE_VALUE or handle is None:
3444 return True
3089bc74
S
3445 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3446 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
3447
3448 if not_a_console(h):
3449 return False
3450
d1b9c912
PH
3451 def next_nonbmp_pos(s):
3452 try:
3453 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3454 except StopIteration:
3455 return len(s)
3456
3457 while s:
3458 count = min(next_nonbmp_pos(s), 1024)
3459
b58ddb32 3460 ret = WriteConsoleW(
d1b9c912 3461 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
3462 if ret == 0:
3463 raise OSError('Failed to write string')
d1b9c912
PH
3464 if not count: # We just wrote a non-BMP character
3465 assert written.value == 2
3466 s = s[1:]
3467 else:
3468 assert written.value > 0
3469 s = s[written.value:]
b58ddb32
PH
3470 return True
3471
3472
734f90bb 3473def write_string(s, out=None, encoding=None):
7459e3a2
PH
3474 if out is None:
3475 out = sys.stderr
8bf48f23 3476 assert type(s) == compat_str
7459e3a2 3477
b58ddb32
PH
3478 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3479 if _windows_write_string(s, out):
3480 return
3481
3089bc74
S
3482 if ('b' in getattr(out, 'mode', '')
3483 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
3484 byt = s.encode(encoding or preferredencoding(), 'ignore')
3485 out.write(byt)
3486 elif hasattr(out, 'buffer'):
3487 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3488 byt = s.encode(enc, 'ignore')
3489 out.buffer.write(byt)
3490 else:
8bf48f23 3491 out.write(s)
7459e3a2
PH
3492 out.flush()
3493
3494
48ea9cea
PH
3495def bytes_to_intlist(bs):
3496 if not bs:
3497 return []
3498 if isinstance(bs[0], int): # Python 3
3499 return list(bs)
3500 else:
3501 return [ord(c) for c in bs]
3502
c257baff 3503
cba892fa 3504def intlist_to_bytes(xs):
3505 if not xs:
3506 return b''
edaa23f8 3507 return compat_struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
3508
3509
c1c9a79c
PH
3510# Cross-platform file locking
3511if sys.platform == 'win32':
3512 import ctypes.wintypes
3513 import msvcrt
3514
3515 class OVERLAPPED(ctypes.Structure):
3516 _fields_ = [
3517 ('Internal', ctypes.wintypes.LPVOID),
3518 ('InternalHigh', ctypes.wintypes.LPVOID),
3519 ('Offset', ctypes.wintypes.DWORD),
3520 ('OffsetHigh', ctypes.wintypes.DWORD),
3521 ('hEvent', ctypes.wintypes.HANDLE),
3522 ]
3523
3524 kernel32 = ctypes.windll.kernel32
3525 LockFileEx = kernel32.LockFileEx
3526 LockFileEx.argtypes = [
3527 ctypes.wintypes.HANDLE, # hFile
3528 ctypes.wintypes.DWORD, # dwFlags
3529 ctypes.wintypes.DWORD, # dwReserved
3530 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3531 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3532 ctypes.POINTER(OVERLAPPED) # Overlapped
3533 ]
3534 LockFileEx.restype = ctypes.wintypes.BOOL
3535 UnlockFileEx = kernel32.UnlockFileEx
3536 UnlockFileEx.argtypes = [
3537 ctypes.wintypes.HANDLE, # hFile
3538 ctypes.wintypes.DWORD, # dwReserved
3539 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3540 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3541 ctypes.POINTER(OVERLAPPED) # Overlapped
3542 ]
3543 UnlockFileEx.restype = ctypes.wintypes.BOOL
3544 whole_low = 0xffffffff
3545 whole_high = 0x7fffffff
3546
3547 def _lock_file(f, exclusive):
3548 overlapped = OVERLAPPED()
3549 overlapped.Offset = 0
3550 overlapped.OffsetHigh = 0
3551 overlapped.hEvent = 0
3552 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3553 handle = msvcrt.get_osfhandle(f.fileno())
3554 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3555 whole_low, whole_high, f._lock_file_overlapped_p):
3556 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3557
3558 def _unlock_file(f):
3559 assert f._lock_file_overlapped_p
3560 handle = msvcrt.get_osfhandle(f.fileno())
3561 if not UnlockFileEx(handle, 0,
3562 whole_low, whole_high, f._lock_file_overlapped_p):
3563 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3564
3565else:
399a76e6
YCH
3566 # Some platforms, such as Jython, is missing fcntl
3567 try:
3568 import fcntl
c1c9a79c 3569
399a76e6
YCH
3570 def _lock_file(f, exclusive):
3571 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 3572
399a76e6
YCH
3573 def _unlock_file(f):
3574 fcntl.flock(f, fcntl.LOCK_UN)
3575 except ImportError:
3576 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3577
3578 def _lock_file(f, exclusive):
3579 raise IOError(UNSUPPORTED_MSG)
3580
3581 def _unlock_file(f):
3582 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
3583
3584
3585class locked_file(object):
3586 def __init__(self, filename, mode, encoding=None):
3587 assert mode in ['r', 'a', 'w']
3588 self.f = io.open(filename, mode, encoding=encoding)
3589 self.mode = mode
3590
3591 def __enter__(self):
3592 exclusive = self.mode != 'r'
3593 try:
3594 _lock_file(self.f, exclusive)
3595 except IOError:
3596 self.f.close()
3597 raise
3598 return self
3599
3600 def __exit__(self, etype, value, traceback):
3601 try:
3602 _unlock_file(self.f)
3603 finally:
3604 self.f.close()
3605
3606 def __iter__(self):
3607 return iter(self.f)
3608
3609 def write(self, *args):
3610 return self.f.write(*args)
3611
3612 def read(self, *args):
3613 return self.f.read(*args)
4eb7f1d1
JMF
3614
3615
4644ac55
S
3616def get_filesystem_encoding():
3617 encoding = sys.getfilesystemencoding()
3618 return encoding if encoding is not None else 'utf-8'
3619
3620
4eb7f1d1 3621def shell_quote(args):
a6a173c2 3622 quoted_args = []
4644ac55 3623 encoding = get_filesystem_encoding()
a6a173c2
JMF
3624 for a in args:
3625 if isinstance(a, bytes):
3626 # We may get a filename encoded with 'encodeFilename'
3627 a = a.decode(encoding)
aefce8e6 3628 quoted_args.append(compat_shlex_quote(a))
28e614de 3629 return ' '.join(quoted_args)
9d4660ca
PH
3630
3631
3632def smuggle_url(url, data):
3633 """ Pass additional data in a URL for internal use. """
3634
81953d1a
RA
3635 url, idata = unsmuggle_url(url, {})
3636 data.update(idata)
15707c7e 3637 sdata = compat_urllib_parse_urlencode(
28e614de
PH
3638 {'__youtubedl_smuggle': json.dumps(data)})
3639 return url + '#' + sdata
9d4660ca
PH
3640
3641
79f82953 3642def unsmuggle_url(smug_url, default=None):
83e865a3 3643 if '#__youtubedl_smuggle' not in smug_url:
79f82953 3644 return smug_url, default
28e614de
PH
3645 url, _, sdata = smug_url.rpartition('#')
3646 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
3647 data = json.loads(jsond)
3648 return url, data
02dbf93f
PH
3649
3650
02dbf93f
PH
3651def format_bytes(bytes):
3652 if bytes is None:
28e614de 3653 return 'N/A'
02dbf93f
PH
3654 if type(bytes) is str:
3655 bytes = float(bytes)
3656 if bytes == 0.0:
3657 exponent = 0
3658 else:
3659 exponent = int(math.log(bytes, 1024.0))
28e614de 3660 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 3661 converted = float(bytes) / float(1024 ** exponent)
28e614de 3662 return '%.2f%s' % (converted, suffix)
f53c966a 3663
1c088fa8 3664
fb47597b
S
3665def lookup_unit_table(unit_table, s):
3666 units_re = '|'.join(re.escape(u) for u in unit_table)
3667 m = re.match(
782b1b5b 3668 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
fb47597b
S
3669 if not m:
3670 return None
3671 num_str = m.group('num').replace(',', '.')
3672 mult = unit_table[m.group('unit')]
3673 return int(float(num_str) * mult)
3674
3675
be64b5b0
PH
3676def parse_filesize(s):
3677 if s is None:
3678 return None
3679
dfb1b146 3680 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
3681 # but we support those too
3682 _UNIT_TABLE = {
3683 'B': 1,
3684 'b': 1,
70852b47 3685 'bytes': 1,
be64b5b0
PH
3686 'KiB': 1024,
3687 'KB': 1000,
3688 'kB': 1024,
3689 'Kb': 1000,
13585d76 3690 'kb': 1000,
70852b47
YCH
3691 'kilobytes': 1000,
3692 'kibibytes': 1024,
be64b5b0
PH
3693 'MiB': 1024 ** 2,
3694 'MB': 1000 ** 2,
3695 'mB': 1024 ** 2,
3696 'Mb': 1000 ** 2,
13585d76 3697 'mb': 1000 ** 2,
70852b47
YCH
3698 'megabytes': 1000 ** 2,
3699 'mebibytes': 1024 ** 2,
be64b5b0
PH
3700 'GiB': 1024 ** 3,
3701 'GB': 1000 ** 3,
3702 'gB': 1024 ** 3,
3703 'Gb': 1000 ** 3,
13585d76 3704 'gb': 1000 ** 3,
70852b47
YCH
3705 'gigabytes': 1000 ** 3,
3706 'gibibytes': 1024 ** 3,
be64b5b0
PH
3707 'TiB': 1024 ** 4,
3708 'TB': 1000 ** 4,
3709 'tB': 1024 ** 4,
3710 'Tb': 1000 ** 4,
13585d76 3711 'tb': 1000 ** 4,
70852b47
YCH
3712 'terabytes': 1000 ** 4,
3713 'tebibytes': 1024 ** 4,
be64b5b0
PH
3714 'PiB': 1024 ** 5,
3715 'PB': 1000 ** 5,
3716 'pB': 1024 ** 5,
3717 'Pb': 1000 ** 5,
13585d76 3718 'pb': 1000 ** 5,
70852b47
YCH
3719 'petabytes': 1000 ** 5,
3720 'pebibytes': 1024 ** 5,
be64b5b0
PH
3721 'EiB': 1024 ** 6,
3722 'EB': 1000 ** 6,
3723 'eB': 1024 ** 6,
3724 'Eb': 1000 ** 6,
13585d76 3725 'eb': 1000 ** 6,
70852b47
YCH
3726 'exabytes': 1000 ** 6,
3727 'exbibytes': 1024 ** 6,
be64b5b0
PH
3728 'ZiB': 1024 ** 7,
3729 'ZB': 1000 ** 7,
3730 'zB': 1024 ** 7,
3731 'Zb': 1000 ** 7,
13585d76 3732 'zb': 1000 ** 7,
70852b47
YCH
3733 'zettabytes': 1000 ** 7,
3734 'zebibytes': 1024 ** 7,
be64b5b0
PH
3735 'YiB': 1024 ** 8,
3736 'YB': 1000 ** 8,
3737 'yB': 1024 ** 8,
3738 'Yb': 1000 ** 8,
13585d76 3739 'yb': 1000 ** 8,
70852b47
YCH
3740 'yottabytes': 1000 ** 8,
3741 'yobibytes': 1024 ** 8,
be64b5b0
PH
3742 }
3743
fb47597b
S
3744 return lookup_unit_table(_UNIT_TABLE, s)
3745
3746
3747def parse_count(s):
3748 if s is None:
be64b5b0
PH
3749 return None
3750
fb47597b
S
3751 s = s.strip()
3752
3753 if re.match(r'^[\d,.]+$', s):
3754 return str_to_int(s)
3755
3756 _UNIT_TABLE = {
3757 'k': 1000,
3758 'K': 1000,
3759 'm': 1000 ** 2,
3760 'M': 1000 ** 2,
3761 'kk': 1000 ** 2,
3762 'KK': 1000 ** 2,
3763 }
be64b5b0 3764
fb47597b 3765 return lookup_unit_table(_UNIT_TABLE, s)
be64b5b0 3766
2f7ae819 3767
b871d7e9
S
3768def parse_resolution(s):
3769 if s is None:
3770 return {}
3771
17ec8bcf 3772 mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
b871d7e9
S
3773 if mobj:
3774 return {
3775 'width': int(mobj.group('w')),
3776 'height': int(mobj.group('h')),
3777 }
3778
17ec8bcf 3779 mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
b871d7e9
S
3780 if mobj:
3781 return {'height': int(mobj.group(1))}
3782
3783 mobj = re.search(r'\b([48])[kK]\b', s)
3784 if mobj:
3785 return {'height': int(mobj.group(1)) * 540}
3786
3787 return {}
3788
3789
0dc41787
S
3790def parse_bitrate(s):
3791 if not isinstance(s, compat_str):
3792 return
3793 mobj = re.search(r'\b(\d+)\s*kbps', s)
3794 if mobj:
3795 return int(mobj.group(1))
3796
3797
a942d6cb 3798def month_by_name(name, lang='en'):
caefb1de
PH
3799 """ Return the number of a month by (locale-independently) English name """
3800
f6717dec 3801 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb 3802
caefb1de 3803 try:
f6717dec 3804 return month_names.index(name) + 1
7105440c
YCH
3805 except ValueError:
3806 return None
3807
3808
3809def month_by_abbreviation(abbrev):
3810 """ Return the number of a month by (locale-independently) English
3811 abbreviations """
3812
3813 try:
3814 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
3815 except ValueError:
3816 return None
18258362
JMF
3817
3818
5aafe895 3819def fix_xml_ampersands(xml_str):
18258362 3820 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
3821 return re.sub(
3822 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 3823 '&amp;',
5aafe895 3824 xml_str)
e3946f98
PH
3825
3826
3827def setproctitle(title):
8bf48f23 3828 assert isinstance(title, compat_str)
c1c05c67
YCH
3829
3830 # ctypes in Jython is not complete
3831 # http://bugs.jython.org/issue2148
3832 if sys.platform.startswith('java'):
3833 return
3834
e3946f98 3835 try:
611c1dd9 3836 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
3837 except OSError:
3838 return
2f49bcd6
RC
3839 except TypeError:
3840 # LoadLibrary in Windows Python 2.7.13 only expects
3841 # a bytestring, but since unicode_literals turns
3842 # every string into a unicode string, it fails.
3843 return
6eefe533
PH
3844 title_bytes = title.encode('utf-8')
3845 buf = ctypes.create_string_buffer(len(title_bytes))
3846 buf.value = title_bytes
e3946f98 3847 try:
6eefe533 3848 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
3849 except AttributeError:
3850 return # Strange libc, just skip this
d7dda168
PH
3851
3852
3853def remove_start(s, start):
46bc9b7d 3854 return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174
PH
3855
3856
2b9faf55 3857def remove_end(s, end):
46bc9b7d 3858 return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55
PH
3859
3860
31b2051e
S
3861def remove_quotes(s):
3862 if s is None or len(s) < 2:
3863 return s
3864 for quote in ('"', "'", ):
3865 if s[0] == quote and s[-1] == quote:
3866 return s[1:-1]
3867 return s
3868
3869
b6e0c7d2
U
3870def get_domain(url):
3871 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3872 return domain.group('domain') if domain else None
3873
3874
29eb5174 3875def url_basename(url):
9b8aaeed 3876 path = compat_urlparse.urlparse(url).path
28e614de 3877 return path.strip('/').split('/')[-1]
aa94a6d3
PH
3878
3879
02dc0a36
S
3880def base_url(url):
3881 return re.match(r'https?://[^?#&]+/', url).group()
3882
3883
e34c3361 3884def urljoin(base, path):
4b5de77b
S
3885 if isinstance(path, bytes):
3886 path = path.decode('utf-8')
e34c3361
S
3887 if not isinstance(path, compat_str) or not path:
3888 return None
fad4ceb5 3889 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
e34c3361 3890 return path
4b5de77b
S
3891 if isinstance(base, bytes):
3892 base = base.decode('utf-8')
3893 if not isinstance(base, compat_str) or not re.match(
3894 r'^(?:https?:)?//', base):
e34c3361
S
3895 return None
3896 return compat_urlparse.urljoin(base, path)
3897
3898
aa94a6d3
PH
3899class HEADRequest(compat_urllib_request.Request):
3900 def get_method(self):
611c1dd9 3901 return 'HEAD'
7217e148
PH
3902
3903
95cf60e8
S
3904class PUTRequest(compat_urllib_request.Request):
3905 def get_method(self):
3906 return 'PUT'
3907
3908
9732d77e 3909def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
3910 if get_attr:
3911 if v is not None:
3912 v = getattr(v, get_attr, None)
9572013d
PH
3913 if v == '':
3914 v = None
1812afb7
S
3915 if v is None:
3916 return default
3917 try:
3918 return int(v) * invscale // scale
31c49255 3919 except (ValueError, TypeError, OverflowError):
af98f8ff 3920 return default
9732d77e 3921
9572013d 3922
40a90862
JMF
3923def str_or_none(v, default=None):
3924 return default if v is None else compat_str(v)
3925
9732d77e
PH
3926
3927def str_to_int(int_str):
48d4681e 3928 """ A more relaxed version of int_or_none """
42db58ec 3929 if isinstance(int_str, compat_integer_types):
348c6bf1 3930 return int_str
42db58ec
S
3931 elif isinstance(int_str, compat_str):
3932 int_str = re.sub(r'[,\.\+]', '', int_str)
3933 return int_or_none(int_str)
608d11f5
PH
3934
3935
9732d77e 3936def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
3937 if v is None:
3938 return default
3939 try:
3940 return float(v) * invscale / scale
5e1271c5 3941 except (ValueError, TypeError):
caf80631 3942 return default
43f775e4
PH
3943
3944
c7e327c4
S
3945def bool_or_none(v, default=None):
3946 return v if isinstance(v, bool) else default
3947
3948
53cd37ba
S
3949def strip_or_none(v, default=None):
3950 return v.strip() if isinstance(v, compat_str) else default
b72b4431
S
3951
3952
af03000a
S
3953def url_or_none(url):
3954 if not url or not isinstance(url, compat_str):
3955 return None
3956 url = url.strip()
29f7c58a 3957 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
af03000a
S
3958
3959
e29663c6 3960def strftime_or_none(timestamp, date_format, default=None):
3961 datetime_object = None
3962 try:
3963 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3964 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3965 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3966 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3967 return datetime_object.strftime(date_format)
3968 except (ValueError, TypeError, AttributeError):
3969 return default
3970
3971
608d11f5 3972def parse_duration(s):
8f9312c3 3973 if not isinstance(s, compat_basestring):
608d11f5 3974 return None
ca7b3246 3975 s = s.strip()
38d79fd1 3976 if not s:
3977 return None
ca7b3246 3978
acaff495 3979 days, hours, mins, secs, ms = [None] * 5
15846398 3980 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
acaff495 3981 if m:
3982 days, hours, mins, secs, ms = m.groups()
3983 else:
3984 m = re.match(
056653bb
S
3985 r'''(?ix)(?:P?
3986 (?:
3987 [0-9]+\s*y(?:ears?)?\s*
3988 )?
3989 (?:
3990 [0-9]+\s*m(?:onths?)?\s*
3991 )?
3992 (?:
3993 [0-9]+\s*w(?:eeks?)?\s*
3994 )?
8f4b58d7 3995 (?:
acaff495 3996 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
8f4b58d7 3997 )?
056653bb 3998 T)?
acaff495 3999 (?:
4000 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
4001 )?
4002 (?:
4003 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
4004 )?
4005 (?:
4006 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
15846398 4007 )?Z?$''', s)
acaff495 4008 if m:
4009 days, hours, mins, secs, ms = m.groups()
4010 else:
15846398 4011 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
acaff495 4012 if m:
4013 hours, mins = m.groups()
4014 else:
4015 return None
4016
4017 duration = 0
4018 if secs:
4019 duration += float(secs)
4020 if mins:
4021 duration += float(mins) * 60
4022 if hours:
4023 duration += float(hours) * 60 * 60
4024 if days:
4025 duration += float(days) * 24 * 60 * 60
4026 if ms:
4027 duration += float(ms)
4028 return duration
91d7d0b3
JMF
4029
4030
e65e4c88 4031def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 4032 name, real_ext = os.path.splitext(filename)
e65e4c88
S
4033 return (
4034 '{0}.{1}{2}'.format(name, ext, real_ext)
4035 if not expected_real_ext or real_ext[1:] == expected_real_ext
4036 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
4037
4038
b3ed15b7
S
4039def replace_extension(filename, ext, expected_real_ext=None):
4040 name, real_ext = os.path.splitext(filename)
4041 return '{0}.{1}'.format(
4042 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
4043 ext)
4044
4045
d70ad093
PH
4046def check_executable(exe, args=[]):
4047 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
4048 args can be a list of arguments for a short output (like -version) """
4049 try:
d3c93ec2 4050 Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate_or_kill()
d70ad093
PH
4051 except OSError:
4052 return False
4053 return exe
b7ab0590
PH
4054
4055
9af98e17 4056def _get_exe_version_output(exe, args):
95807118 4057 try:
b64d04c1 4058 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
7a5c1cfe 4059 # SIGTTOU if yt-dlp is run in the background.
067aa17e 4060 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
d3c93ec2 4061 out, _ = Popen(
4062 [encodeArgument(exe)] + args, stdin=subprocess.PIPE,
4063 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate_or_kill()
95807118
PH
4064 except OSError:
4065 return False
cae97f65
PH
4066 if isinstance(out, bytes): # Python 2.x
4067 out = out.decode('ascii', 'ignore')
9af98e17 4068 return out
cae97f65
PH
4069
4070
4071def detect_exe_version(output, version_re=None, unrecognized='present'):
4072 assert isinstance(output, compat_str)
4073 if version_re is None:
4074 version_re = r'version\s+([-0-9._a-zA-Z]+)'
4075 m = re.search(version_re, output)
95807118
PH
4076 if m:
4077 return m.group(1)
4078 else:
4079 return unrecognized
4080
4081
9af98e17 4082def get_exe_version(exe, args=['--version'],
4083 version_re=None, unrecognized='present'):
4084 """ Returns the version of the specified executable,
4085 or False if the executable is not present """
4086 out = _get_exe_version_output(exe, args)
4087 return detect_exe_version(out, version_re, unrecognized) if out else False
4088
4089
cb89cfc1 4090class LazyList(collections.abc.Sequence):
483336e7 4091 ''' Lazy immutable list from an iterable
4092 Note that slices of a LazyList are lists and not LazyList'''
4093
8e5fecc8 4094 class IndexError(IndexError):
4095 pass
4096
282f5709 4097 def __init__(self, iterable, *, reverse=False, _cache=None):
483336e7 4098 self.__iterable = iter(iterable)
282f5709 4099 self.__cache = [] if _cache is None else _cache
4100 self.__reversed = reverse
483336e7 4101
4102 def __iter__(self):
28419ca2 4103 if self.__reversed:
4104 # We need to consume the entire iterable to iterate in reverse
981052c9 4105 yield from self.exhaust()
28419ca2 4106 return
4107 yield from self.__cache
483336e7 4108 for item in self.__iterable:
4109 self.__cache.append(item)
4110 yield item
4111
981052c9 4112 def __exhaust(self):
483336e7 4113 self.__cache.extend(self.__iterable)
9f1a1c36 4114 # Discard the emptied iterable to make it pickle-able
4115 self.__iterable = []
28419ca2 4116 return self.__cache
4117
981052c9 4118 def exhaust(self):
4119 ''' Evaluate the entire iterable '''
4120 return self.__exhaust()[::-1 if self.__reversed else 1]
4121
28419ca2 4122 @staticmethod
981052c9 4123 def __reverse_index(x):
e0f2b4b4 4124 return None if x is None else -(x + 1)
483336e7 4125
4126 def __getitem__(self, idx):
4127 if isinstance(idx, slice):
28419ca2 4128 if self.__reversed:
e0f2b4b4 4129 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
4130 start, stop, step = idx.start, idx.stop, idx.step or 1
483336e7 4131 elif isinstance(idx, int):
28419ca2 4132 if self.__reversed:
981052c9 4133 idx = self.__reverse_index(idx)
e0f2b4b4 4134 start, stop, step = idx, idx, 0
483336e7 4135 else:
4136 raise TypeError('indices must be integers or slices')
e0f2b4b4 4137 if ((start or 0) < 0 or (stop or 0) < 0
4138 or (start is None and step < 0)
4139 or (stop is None and step > 0)):
483336e7 4140 # We need to consume the entire iterable to be able to slice from the end
4141 # Obviously, never use this with infinite iterables
8e5fecc8 4142 self.__exhaust()
4143 try:
4144 return self.__cache[idx]
4145 except IndexError as e:
4146 raise self.IndexError(e) from e
e0f2b4b4 4147 n = max(start or 0, stop or 0) - len(self.__cache) + 1
28419ca2 4148 if n > 0:
4149 self.__cache.extend(itertools.islice(self.__iterable, n))
8e5fecc8 4150 try:
4151 return self.__cache[idx]
4152 except IndexError as e:
4153 raise self.IndexError(e) from e
483336e7 4154
4155 def __bool__(self):
4156 try:
28419ca2 4157 self[-1] if self.__reversed else self[0]
8e5fecc8 4158 except self.IndexError:
483336e7 4159 return False
4160 return True
4161
4162 def __len__(self):
8e5fecc8 4163 self.__exhaust()
483336e7 4164 return len(self.__cache)
4165
282f5709 4166 def __reversed__(self):
4167 return type(self)(self.__iterable, reverse=not self.__reversed, _cache=self.__cache)
4168
4169 def __copy__(self):
4170 return type(self)(self.__iterable, reverse=self.__reversed, _cache=self.__cache)
4171
4172 def __deepcopy__(self, memo):
4173 # FIXME: This is actually just a shallow copy
4174 id_ = id(self)
4175 memo[id_] = self.__copy__()
4176 return memo[id_]
28419ca2 4177
4178 def __repr__(self):
4179 # repr and str should mimic a list. So we exhaust the iterable
4180 return repr(self.exhaust())
4181
4182 def __str__(self):
4183 return repr(self.exhaust())
4184
483336e7 4185
7be9ccff 4186class PagedList:
c07a39ae 4187
4188 class IndexError(IndexError):
4189 pass
4190
dd26ced1
PH
4191 def __len__(self):
4192 # This is only useful for tests
4193 return len(self.getslice())
4194
7be9ccff 4195 def __init__(self, pagefunc, pagesize, use_cache=True):
4196 self._pagefunc = pagefunc
4197 self._pagesize = pagesize
4198 self._use_cache = use_cache
4199 self._cache = {}
4200
4201 def getpage(self, pagenum):
d8cf8d97 4202 page_results = self._cache.get(pagenum)
4203 if page_results is None:
4204 page_results = list(self._pagefunc(pagenum))
7be9ccff 4205 if self._use_cache:
4206 self._cache[pagenum] = page_results
4207 return page_results
4208
4209 def getslice(self, start=0, end=None):
4210 return list(self._getslice(start, end))
4211
4212 def _getslice(self, start, end):
55575225 4213 raise NotImplementedError('This method must be implemented by subclasses')
4214
4215 def __getitem__(self, idx):
7be9ccff 4216 # NOTE: cache must be enabled if this is used
55575225 4217 if not isinstance(idx, int) or idx < 0:
4218 raise TypeError('indices must be non-negative integers')
4219 entries = self.getslice(idx, idx + 1)
d8cf8d97 4220 if not entries:
c07a39ae 4221 raise self.IndexError()
d8cf8d97 4222 return entries[0]
55575225 4223
9c44d242
PH
4224
4225class OnDemandPagedList(PagedList):
7be9ccff 4226 def _getslice(self, start, end):
b7ab0590
PH
4227 for pagenum in itertools.count(start // self._pagesize):
4228 firstid = pagenum * self._pagesize
4229 nextfirstid = pagenum * self._pagesize + self._pagesize
4230 if start >= nextfirstid:
4231 continue
4232
b7ab0590
PH
4233 startv = (
4234 start % self._pagesize
4235 if firstid <= start < nextfirstid
4236 else 0)
b7ab0590
PH
4237 endv = (
4238 ((end - 1) % self._pagesize) + 1
4239 if (end is not None and firstid <= end <= nextfirstid)
4240 else None)
4241
7be9ccff 4242 page_results = self.getpage(pagenum)
b7ab0590
PH
4243 if startv != 0 or endv is not None:
4244 page_results = page_results[startv:endv]
7be9ccff 4245 yield from page_results
b7ab0590
PH
4246
4247 # A little optimization - if current page is not "full", ie. does
4248 # not contain page_size videos then we can assume that this page
4249 # is the last one - there are no more ids on further pages -
4250 # i.e. no need to query again.
4251 if len(page_results) + startv < self._pagesize:
4252 break
4253
4254 # If we got the whole page, but the next page is not interesting,
4255 # break out early as well
4256 if end == nextfirstid:
4257 break
81c2f20b
PH
4258
4259
9c44d242
PH
4260class InAdvancePagedList(PagedList):
4261 def __init__(self, pagefunc, pagecount, pagesize):
9c44d242 4262 self._pagecount = pagecount
7be9ccff 4263 PagedList.__init__(self, pagefunc, pagesize, True)
9c44d242 4264
7be9ccff 4265 def _getslice(self, start, end):
9c44d242
PH
4266 start_page = start // self._pagesize
4267 end_page = (
4268 self._pagecount if end is None else (end // self._pagesize + 1))
4269 skip_elems = start - start_page * self._pagesize
4270 only_more = None if end is None else end - start
4271 for pagenum in range(start_page, end_page):
7be9ccff 4272 page_results = self.getpage(pagenum)
9c44d242 4273 if skip_elems:
7be9ccff 4274 page_results = page_results[skip_elems:]
9c44d242
PH
4275 skip_elems = None
4276 if only_more is not None:
7be9ccff 4277 if len(page_results) < only_more:
4278 only_more -= len(page_results)
9c44d242 4279 else:
7be9ccff 4280 yield from page_results[:only_more]
9c44d242 4281 break
7be9ccff 4282 yield from page_results
9c44d242
PH
4283
4284
81c2f20b 4285def uppercase_escape(s):
676eb3f2 4286 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 4287 return re.sub(
a612753d 4288 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
4289 lambda m: unicode_escape(m.group(0))[0],
4290 s)
0fe2ff78
YCH
4291
4292
4293def lowercase_escape(s):
4294 unicode_escape = codecs.getdecoder('unicode_escape')
4295 return re.sub(
4296 r'\\u[0-9a-fA-F]{4}',
4297 lambda m: unicode_escape(m.group(0))[0],
4298 s)
b53466e1 4299
d05cfe06
S
4300
4301def escape_rfc3986(s):
4302 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 4303 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 4304 s = s.encode('utf-8')
ecc0c5ee 4305 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
4306
4307
4308def escape_url(url):
4309 """Escape URL as suggested by RFC 3986"""
4310 url_parsed = compat_urllib_parse_urlparse(url)
4311 return url_parsed._replace(
efbed08d 4312 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
d05cfe06
S
4313 path=escape_rfc3986(url_parsed.path),
4314 params=escape_rfc3986(url_parsed.params),
4315 query=escape_rfc3986(url_parsed.query),
4316 fragment=escape_rfc3986(url_parsed.fragment)
4317 ).geturl()
4318
62e609ab 4319
4dfbf869 4320def parse_qs(url):
4321 return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4322
4323
62e609ab
PH
4324def read_batch_urls(batch_fd):
4325 def fixup(url):
4326 if not isinstance(url, compat_str):
4327 url = url.decode('utf-8', 'replace')
8c04f0be 4328 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4329 for bom in BOM_UTF8:
4330 if url.startswith(bom):
4331 url = url[len(bom):]
4332 url = url.lstrip()
4333 if not url or url.startswith(('#', ';', ']')):
62e609ab 4334 return False
8c04f0be 4335 # "#" cannot be stripped out since it is part of the URI
4336 # However, it can be safely stipped out if follwing a whitespace
4337 return re.split(r'\s#', url, 1)[0].rstrip()
62e609ab
PH
4338
4339 with contextlib.closing(batch_fd) as fd:
4340 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
4341
4342
4343def urlencode_postdata(*args, **kargs):
15707c7e 4344 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
4345
4346
38f9ef31 4347def update_url_query(url, query):
cacd9966
YCH
4348 if not query:
4349 return url
38f9ef31 4350 parsed_url = compat_urlparse.urlparse(url)
4351 qs = compat_parse_qs(parsed_url.query)
4352 qs.update(query)
4353 return compat_urlparse.urlunparse(parsed_url._replace(
15707c7e 4354 query=compat_urllib_parse_urlencode(qs, True)))
16392824 4355
8e60dc75 4356
ed0291d1
S
4357def update_Request(req, url=None, data=None, headers={}, query={}):
4358 req_headers = req.headers.copy()
4359 req_headers.update(headers)
4360 req_data = data or req.data
4361 req_url = update_url_query(url or req.get_full_url(), query)
95cf60e8
S
4362 req_get_method = req.get_method()
4363 if req_get_method == 'HEAD':
4364 req_type = HEADRequest
4365 elif req_get_method == 'PUT':
4366 req_type = PUTRequest
4367 else:
4368 req_type = compat_urllib_request.Request
ed0291d1
S
4369 new_req = req_type(
4370 req_url, data=req_data, headers=req_headers,
4371 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4372 if hasattr(req, 'timeout'):
4373 new_req.timeout = req.timeout
4374 return new_req
4375
4376
10c87c15 4377def _multipart_encode_impl(data, boundary):
0c265486
YCH
4378 content_type = 'multipart/form-data; boundary=%s' % boundary
4379
4380 out = b''
4381 for k, v in data.items():
4382 out += b'--' + boundary.encode('ascii') + b'\r\n'
4383 if isinstance(k, compat_str):
4384 k = k.encode('utf-8')
4385 if isinstance(v, compat_str):
4386 v = v.encode('utf-8')
4387 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4388 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
b2ad479d 4389 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
0c265486
YCH
4390 if boundary.encode('ascii') in content:
4391 raise ValueError('Boundary overlaps with data')
4392 out += content
4393
4394 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4395
4396 return out, content_type
4397
4398
4399def multipart_encode(data, boundary=None):
4400 '''
4401 Encode a dict to RFC 7578-compliant form-data
4402
4403 data:
4404 A dict where keys and values can be either Unicode or bytes-like
4405 objects.
4406 boundary:
4407 If specified a Unicode object, it's used as the boundary. Otherwise
4408 a random boundary is generated.
4409
4410 Reference: https://tools.ietf.org/html/rfc7578
4411 '''
4412 has_specified_boundary = boundary is not None
4413
4414 while True:
4415 if boundary is None:
4416 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4417
4418 try:
10c87c15 4419 out, content_type = _multipart_encode_impl(data, boundary)
0c265486
YCH
4420 break
4421 except ValueError:
4422 if has_specified_boundary:
4423 raise
4424 boundary = None
4425
4426 return out, content_type
4427
4428
86296ad2 4429def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
4430 if isinstance(key_or_keys, (list, tuple)):
4431 for key in key_or_keys:
86296ad2
S
4432 if key not in d or d[key] is None or skip_false_values and not d[key]:
4433 continue
4434 return d[key]
cbecc9b9
S
4435 return default
4436 return d.get(key_or_keys, default)
4437
4438
329ca3be 4439def try_get(src, getter, expected_type=None):
6606817a 4440 for get in variadic(getter):
a32a9a7e
S
4441 try:
4442 v = get(src)
4443 except (AttributeError, KeyError, TypeError, IndexError):
4444 pass
4445 else:
4446 if expected_type is None or isinstance(v, expected_type):
4447 return v
329ca3be
S
4448
4449
6cc62232
S
4450def merge_dicts(*dicts):
4451 merged = {}
4452 for a_dict in dicts:
4453 for k, v in a_dict.items():
4454 if v is None:
4455 continue
3089bc74
S
4456 if (k not in merged
4457 or (isinstance(v, compat_str) and v
4458 and isinstance(merged[k], compat_str)
4459 and not merged[k])):
6cc62232
S
4460 merged[k] = v
4461 return merged
4462
4463
8e60dc75
S
4464def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4465 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4466
16392824 4467
a1a530b0
PH
4468US_RATINGS = {
4469 'G': 0,
4470 'PG': 10,
4471 'PG-13': 13,
4472 'R': 16,
4473 'NC': 18,
4474}
fac55558
PH
4475
4476
a8795327 4477TV_PARENTAL_GUIDELINES = {
5a16c9d9
RA
4478 'TV-Y': 0,
4479 'TV-Y7': 7,
4480 'TV-G': 0,
4481 'TV-PG': 0,
4482 'TV-14': 14,
4483 'TV-MA': 17,
a8795327
S
4484}
4485
4486
146c80e2 4487def parse_age_limit(s):
a8795327
S
4488 if type(s) == int:
4489 return s if 0 <= s <= 21 else None
4490 if not isinstance(s, compat_basestring):
d838b1bd 4491 return None
146c80e2 4492 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327
S
4493 if m:
4494 return int(m.group('age'))
5c5fae6d 4495 s = s.upper()
a8795327
S
4496 if s in US_RATINGS:
4497 return US_RATINGS[s]
5a16c9d9 4498 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
b8361187 4499 if m:
5a16c9d9 4500 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
b8361187 4501 return None
146c80e2
S
4502
4503
fac55558 4504def strip_jsonp(code):
609a61e3 4505 return re.sub(
5552c9eb 4506 r'''(?sx)^
e9c671d5 4507 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
5552c9eb
YCH
4508 (?:\s*&&\s*(?P=func_name))?
4509 \s*\(\s*(?P<callback_data>.*)\);?
4510 \s*?(?://[^\n]*)*$''',
4511 r'\g<callback_data>', code)
478c2c61
PH
4512
4513
5c610515 4514def js_to_json(code, vars={}):
4515 # vars is a dict of var, val pairs to substitute
c843e685 4516 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4195096e
S
4517 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4518 INTEGER_TABLE = (
4519 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4520 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4521 )
4522
e05f6939 4523 def fix_kv(m):
e7b6d122
PH
4524 v = m.group(0)
4525 if v in ('true', 'false', 'null'):
4526 return v
421ddcb8
C
4527 elif v in ('undefined', 'void 0'):
4528 return 'null'
8bdd16b4 4529 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
bd1e4844 4530 return ""
4531
4532 if v[0] in ("'", '"'):
4533 v = re.sub(r'(?s)\\.|"', lambda m: {
e7b6d122 4534 '"': '\\"',
bd1e4844 4535 "\\'": "'",
4536 '\\\n': '',
4537 '\\x': '\\u00',
4538 }.get(m.group(0), m.group(0)), v[1:-1])
8bdd16b4 4539 else:
4540 for regex, base in INTEGER_TABLE:
4541 im = re.match(regex, v)
4542 if im:
4543 i = int(im.group(1), base)
4544 return '"%d":' % i if v.endswith(':') else '%d' % i
89ac4a19 4545
5c610515 4546 if v in vars:
4547 return vars[v]
4548
e7b6d122 4549 return '"%s"' % v
e05f6939 4550
bd1e4844 4551 return re.sub(r'''(?sx)
4552 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4553 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4195096e 4554 {comment}|,(?={skip}[\]}}])|
421ddcb8 4555 void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
4195096e 4556 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
8bdd16b4 4557 [0-9]+(?={skip}:)|
4558 !+
4195096e 4559 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
e05f6939
PH
4560
4561
478c2c61
PH
4562def qualities(quality_ids):
4563 """ Get a numeric quality value out of a list of possible values """
4564 def q(qid):
4565 try:
4566 return quality_ids.index(qid)
4567 except ValueError:
4568 return -1
4569 return q
4570
acd69589 4571
de6000d9 4572DEFAULT_OUTTMPL = {
4573 'default': '%(title)s [%(id)s].%(ext)s',
72755351 4574 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
de6000d9 4575}
4576OUTTMPL_TYPES = {
72755351 4577 'chapter': None,
de6000d9 4578 'subtitle': None,
4579 'thumbnail': None,
4580 'description': 'description',
4581 'annotation': 'annotations.xml',
4582 'infojson': 'info.json',
08438d2c 4583 'link': None,
5112f26a 4584 'pl_thumbnail': None,
de6000d9 4585 'pl_description': 'description',
4586 'pl_infojson': 'info.json',
4587}
0a871f68 4588
143db31d 4589# As of [1] format syntax is:
4590# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4591# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
901130bb 4592STR_FORMAT_RE_TMPL = r'''(?x)
4593 (?<!%)(?P<prefix>(?:%%)*)
143db31d 4594 %
524e2e4f 4595 (?P<has_key>\((?P<key>{0})\))?
752cda38 4596 (?P<format>
524e2e4f 4597 (?P<conversion>[#0\-+ ]+)?
4598 (?P<min_width>\d+)?
4599 (?P<precision>\.\d+)?
4600 (?P<len_mod>[hlL])? # unused in python
901130bb 4601 {1} # conversion type
752cda38 4602 )
143db31d 4603'''
4604
7d1eb38a 4605
901130bb 4606STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
a020a0dc 4607
7d1eb38a 4608
a020a0dc
PH
4609def limit_length(s, length):
4610 """ Add ellipses to overly long strings """
4611 if s is None:
4612 return None
4613 ELLIPSES = '...'
4614 if len(s) > length:
4615 return s[:length - len(ELLIPSES)] + ELLIPSES
4616 return s
48844745
PH
4617
4618
4619def version_tuple(v):
5f9b8394 4620 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
4621
4622
4623def is_outdated_version(version, limit, assume_new=True):
4624 if not version:
4625 return not assume_new
4626 try:
4627 return version_tuple(version) < version_tuple(limit)
4628 except ValueError:
4629 return not assume_new
732ea2f0
PH
4630
4631
4632def ytdl_is_updateable():
7a5c1cfe 4633 """ Returns if yt-dlp can be updated with -U """
735d865e 4634
5d535b4a 4635 from .update import is_non_updateable
732ea2f0 4636
5d535b4a 4637 return not is_non_updateable()
7d4111ed
PH
4638
4639
4640def args_to_str(args):
4641 # Get a short string representation for a subprocess command
702ccf2d 4642 return ' '.join(compat_shlex_quote(a) for a in args)
2ccd1b10
PH
4643
4644
9b9c5355 4645def error_to_compat_str(err):
fdae2358
S
4646 err_str = str(err)
4647 # On python 2 error byte string must be decoded with proper
4648 # encoding rather than ascii
4649 if sys.version_info[0] < 3:
4650 err_str = err_str.decode(preferredencoding())
4651 return err_str
4652
4653
c460bdd5 4654def mimetype2ext(mt):
eb9ee194
S
4655 if mt is None:
4656 return None
4657
9359f3d4
F
4658 mt, _, params = mt.partition(';')
4659 mt = mt.strip()
4660
4661 FULL_MAP = {
765ac263 4662 'audio/mp4': 'm4a',
6c33d24b
YCH
4663 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4664 # it's the most popular one
4665 'audio/mpeg': 'mp3',
ba39289d 4666 'audio/x-wav': 'wav',
9359f3d4
F
4667 'audio/wav': 'wav',
4668 'audio/wave': 'wav',
4669 }
4670
4671 ext = FULL_MAP.get(mt)
765ac263
JMF
4672 if ext is not None:
4673 return ext
4674
9359f3d4 4675 SUBTYPE_MAP = {
f6861ec9 4676 '3gpp': '3gp',
cafcf657 4677 'smptett+xml': 'tt',
cafcf657 4678 'ttaf+xml': 'dfxp',
a0d8d704 4679 'ttml+xml': 'ttml',
f6861ec9 4680 'x-flv': 'flv',
a0d8d704 4681 'x-mp4-fragmented': 'mp4',
d4f05d47 4682 'x-ms-sami': 'sami',
a0d8d704 4683 'x-ms-wmv': 'wmv',
b4173f15
RA
4684 'mpegurl': 'm3u8',
4685 'x-mpegurl': 'm3u8',
4686 'vnd.apple.mpegurl': 'm3u8',
4687 'dash+xml': 'mpd',
b4173f15 4688 'f4m+xml': 'f4m',
f164b971 4689 'hds+xml': 'f4m',
e910fe2f 4690 'vnd.ms-sstr+xml': 'ism',
c2b2c7e1 4691 'quicktime': 'mov',
98ce1a3f 4692 'mp2t': 'ts',
39e7107d 4693 'x-wav': 'wav',
9359f3d4
F
4694 'filmstrip+json': 'fs',
4695 'svg+xml': 'svg',
4696 }
4697
4698 _, _, subtype = mt.rpartition('/')
4699 ext = SUBTYPE_MAP.get(subtype.lower())
4700 if ext is not None:
4701 return ext
4702
4703 SUFFIX_MAP = {
4704 'json': 'json',
4705 'xml': 'xml',
4706 'zip': 'zip',
4707 'gzip': 'gz',
4708 }
4709
4710 _, _, suffix = subtype.partition('+')
4711 ext = SUFFIX_MAP.get(suffix)
4712 if ext is not None:
4713 return ext
4714
4715 return subtype.replace('+', '.')
c460bdd5
PH
4716
4717
4f3c5e06 4718def parse_codecs(codecs_str):
4719 # http://tools.ietf.org/html/rfc6381
4720 if not codecs_str:
4721 return {}
a0566bbf 4722 split_codecs = list(filter(None, map(
dbf5416a 4723 str.strip, codecs_str.strip().strip(',').split(','))))
176f1866 4724 vcodec, acodec, hdr = None, None, None
a0566bbf 4725 for full_codec in split_codecs:
9bd979ca 4726 parts = full_codec.split('.')
4727 codec = parts[0].replace('0', '')
4728 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
4729 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
4f3c5e06 4730 if not vcodec:
9bd979ca 4731 vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1') else full_codec
176f1866 4732 if codec in ('dvh1', 'dvhe'):
4733 hdr = 'DV'
9bd979ca 4734 elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
4735 hdr = 'HDR10'
4736 elif full_codec.replace('0', '').startswith('vp9.2'):
176f1866 4737 hdr = 'HDR10'
60f5c9fb 4738 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4f3c5e06 4739 if not acodec:
4740 acodec = full_codec
4741 else:
60f5c9fb 4742 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4f3c5e06 4743 if not vcodec and not acodec:
a0566bbf 4744 if len(split_codecs) == 2:
4f3c5e06 4745 return {
a0566bbf 4746 'vcodec': split_codecs[0],
4747 'acodec': split_codecs[1],
4f3c5e06 4748 }
4749 else:
4750 return {
4751 'vcodec': vcodec or 'none',
4752 'acodec': acodec or 'none',
176f1866 4753 'dynamic_range': hdr,
4f3c5e06 4754 }
4755 return {}
4756
4757
2ccd1b10 4758def urlhandle_detect_ext(url_handle):
79298173 4759 getheader = url_handle.headers.get
2ccd1b10 4760
b55ee18f
PH
4761 cd = getheader('Content-Disposition')
4762 if cd:
4763 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4764 if m:
4765 e = determine_ext(m.group('filename'), default_ext=None)
4766 if e:
4767 return e
4768
c460bdd5 4769 return mimetype2ext(getheader('Content-Type'))
05900629
PH
4770
4771
1e399778
YCH
4772def encode_data_uri(data, mime_type):
4773 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4774
4775
05900629 4776def age_restricted(content_limit, age_limit):
6ec6cb4e 4777 """ Returns True iff the content should be blocked """
05900629
PH
4778
4779 if age_limit is None: # No limit set
4780 return False
4781 if content_limit is None:
4782 return False # Content available for everyone
4783 return age_limit < content_limit
61ca9a80
PH
4784
4785
4786def is_html(first_bytes):
4787 """ Detect whether a file contains HTML by examining its first bytes. """
4788
4789 BOMS = [
4790 (b'\xef\xbb\xbf', 'utf-8'),
4791 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4792 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4793 (b'\xff\xfe', 'utf-16-le'),
4794 (b'\xfe\xff', 'utf-16-be'),
4795 ]
4796 for bom, enc in BOMS:
4797 if first_bytes.startswith(bom):
4798 s = first_bytes[len(bom):].decode(enc, 'replace')
4799 break
4800 else:
4801 s = first_bytes.decode('utf-8', 'replace')
4802
4803 return re.match(r'^\s*<', s)
a055469f
PH
4804
4805
4806def determine_protocol(info_dict):
4807 protocol = info_dict.get('protocol')
4808 if protocol is not None:
4809 return protocol
4810
7de837a5 4811 url = sanitize_url(info_dict['url'])
a055469f
PH
4812 if url.startswith('rtmp'):
4813 return 'rtmp'
4814 elif url.startswith('mms'):
4815 return 'mms'
4816 elif url.startswith('rtsp'):
4817 return 'rtsp'
4818
4819 ext = determine_ext(url)
4820 if ext == 'm3u8':
4821 return 'm3u8'
4822 elif ext == 'f4m':
4823 return 'f4m'
4824
4825 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
4826
4827
c5e3f849 4828def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
4829 """ Render a list of rows, each as a list of values.
4830 Text after a \t will be right aligned """
ec11a9f4 4831 def width(string):
c5e3f849 4832 return len(remove_terminal_sequences(string).replace('\t', ''))
76d321f6 4833
4834 def get_max_lens(table):
ec11a9f4 4835 return [max(width(str(v)) for v in col) for col in zip(*table)]
76d321f6 4836
4837 def filter_using_list(row, filterArray):
4838 return [col for (take, col) in zip(filterArray, row) if take]
4839
c5e3f849 4840 if hide_empty:
76d321f6 4841 max_lens = get_max_lens(data)
4842 header_row = filter_using_list(header_row, max_lens)
4843 data = [filter_using_list(row, max_lens) for row in data]
4844
cfb56d1a 4845 table = [header_row] + data
76d321f6 4846 max_lens = get_max_lens(table)
c5e3f849 4847 extra_gap += 1
76d321f6 4848 if delim:
c5e3f849 4849 table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
4850 table[1][-1] = table[1][-1][:-extra_gap] # Remove extra_gap from end of delimiter
ec11a9f4 4851 for row in table:
4852 for pos, text in enumerate(map(str, row)):
c5e3f849 4853 if '\t' in text:
4854 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
4855 else:
4856 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
4857 ret = '\n'.join(''.join(row).rstrip() for row in table)
ec11a9f4 4858 return ret
347de493
PH
4859
4860
8f18aca8 4861def _match_one(filter_part, dct, incomplete):
77b87f05 4862 # TODO: Generalize code with YoutubeDL._build_format_filter
a047eeb6 4863 STRING_OPERATORS = {
4864 '*=': operator.contains,
4865 '^=': lambda attr, value: attr.startswith(value),
4866 '$=': lambda attr, value: attr.endswith(value),
4867 '~=': lambda attr, value: re.search(value, attr),
4868 }
347de493 4869 COMPARISON_OPERATORS = {
a047eeb6 4870 **STRING_OPERATORS,
4871 '<=': operator.le, # "<=" must be defined above "<"
347de493 4872 '<': operator.lt,
347de493 4873 '>=': operator.ge,
a047eeb6 4874 '>': operator.gt,
347de493 4875 '=': operator.eq,
347de493 4876 }
a047eeb6 4877
347de493
PH
4878 operator_rex = re.compile(r'''(?x)\s*
4879 (?P<key>[a-z_]+)
77b87f05 4880 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
347de493 4881 (?:
a047eeb6 4882 (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
4883 (?P<strval>.+?)
347de493
PH
4884 )
4885 \s*$
4886 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4887 m = operator_rex.search(filter_part)
4888 if m:
18f96d12 4889 m = m.groupdict()
4890 unnegated_op = COMPARISON_OPERATORS[m['op']]
4891 if m['negation']:
77b87f05
MT
4892 op = lambda attr, value: not unnegated_op(attr, value)
4893 else:
4894 op = unnegated_op
18f96d12 4895 comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
4896 if m['quote']:
4897 comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
4898 actual_value = dct.get(m['key'])
4899 numeric_comparison = None
4900 if isinstance(actual_value, compat_numeric_types):
e5a088dc
S
4901 # If the original field is a string and matching comparisonvalue is
4902 # a number we should respect the origin of the original field
4903 # and process comparison value as a string (see
18f96d12 4904 # https://github.com/ytdl-org/youtube-dl/issues/11082)
347de493 4905 try:
18f96d12 4906 numeric_comparison = int(comparison_value)
347de493 4907 except ValueError:
18f96d12 4908 numeric_comparison = parse_filesize(comparison_value)
4909 if numeric_comparison is None:
4910 numeric_comparison = parse_filesize(f'{comparison_value}B')
4911 if numeric_comparison is None:
4912 numeric_comparison = parse_duration(comparison_value)
4913 if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
4914 raise ValueError('Operator %s only supports string values!' % m['op'])
347de493 4915 if actual_value is None:
18f96d12 4916 return incomplete or m['none_inclusive']
4917 return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
347de493
PH
4918
4919 UNARY_OPERATORS = {
1cc47c66
S
4920 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4921 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
347de493
PH
4922 }
4923 operator_rex = re.compile(r'''(?x)\s*
4924 (?P<op>%s)\s*(?P<key>[a-z_]+)
4925 \s*$
4926 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4927 m = operator_rex.search(filter_part)
4928 if m:
4929 op = UNARY_OPERATORS[m.group('op')]
4930 actual_value = dct.get(m.group('key'))
8f18aca8 4931 if incomplete and actual_value is None:
4932 return True
347de493
PH
4933 return op(actual_value)
4934
4935 raise ValueError('Invalid filter part %r' % filter_part)
4936
4937
8f18aca8 4938def match_str(filter_str, dct, incomplete=False):
4939 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
4940 When incomplete, all conditions passes on missing fields
4941 """
347de493 4942 return all(
8f18aca8 4943 _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
a047eeb6 4944 for filter_part in re.split(r'(?<!\\)&', filter_str))
347de493
PH
4945
4946
4947def match_filter_func(filter_str):
8f18aca8 4948 def _match_func(info_dict, *args, **kwargs):
4949 if match_str(filter_str, info_dict, *args, **kwargs):
347de493
PH
4950 return None
4951 else:
4952 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4953 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4954 return _match_func
91410c9b
PH
4955
4956
bf6427d2
YCH
4957def parse_dfxp_time_expr(time_expr):
4958 if not time_expr:
d631d5f9 4959 return
bf6427d2
YCH
4960
4961 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4962 if mobj:
4963 return float(mobj.group('time_offset'))
4964
db2fe38b 4965 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 4966 if mobj:
db2fe38b 4967 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
4968
4969
c1c924ab 4970def srt_subtitles_timecode(seconds):
aa7785f8 4971 return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
4972
4973
4974def ass_subtitles_timecode(seconds):
4975 time = timetuple_from_msec(seconds * 1000)
4976 return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
bf6427d2
YCH
4977
4978
4979def dfxp2srt(dfxp_data):
3869028f
YCH
4980 '''
4981 @param dfxp_data A bytes-like object containing DFXP data
4982 @returns A unicode object containing converted SRT data
4983 '''
5b995f71 4984 LEGACY_NAMESPACES = (
3869028f
YCH
4985 (b'http://www.w3.org/ns/ttml', [
4986 b'http://www.w3.org/2004/11/ttaf1',
4987 b'http://www.w3.org/2006/04/ttaf1',
4988 b'http://www.w3.org/2006/10/ttaf1',
5b995f71 4989 ]),
3869028f
YCH
4990 (b'http://www.w3.org/ns/ttml#styling', [
4991 b'http://www.w3.org/ns/ttml#style',
5b995f71
RA
4992 ]),
4993 )
4994
4995 SUPPORTED_STYLING = [
4996 'color',
4997 'fontFamily',
4998 'fontSize',
4999 'fontStyle',
5000 'fontWeight',
5001 'textDecoration'
5002 ]
5003
4e335771 5004 _x = functools.partial(xpath_with_ns, ns_map={
261f4730 5005 'xml': 'http://www.w3.org/XML/1998/namespace',
4e335771 5006 'ttml': 'http://www.w3.org/ns/ttml',
5b995f71 5007 'tts': 'http://www.w3.org/ns/ttml#styling',
4e335771 5008 })
bf6427d2 5009
5b995f71
RA
5010 styles = {}
5011 default_style = {}
5012
87de7069 5013 class TTMLPElementParser(object):
5b995f71
RA
5014 _out = ''
5015 _unclosed_elements = []
5016 _applied_styles = []
bf6427d2 5017
2b14cb56 5018 def start(self, tag, attrib):
5b995f71
RA
5019 if tag in (_x('ttml:br'), 'br'):
5020 self._out += '\n'
5021 else:
5022 unclosed_elements = []
5023 style = {}
5024 element_style_id = attrib.get('style')
5025 if default_style:
5026 style.update(default_style)
5027 if element_style_id:
5028 style.update(styles.get(element_style_id, {}))
5029 for prop in SUPPORTED_STYLING:
5030 prop_val = attrib.get(_x('tts:' + prop))
5031 if prop_val:
5032 style[prop] = prop_val
5033 if style:
5034 font = ''
5035 for k, v in sorted(style.items()):
5036 if self._applied_styles and self._applied_styles[-1].get(k) == v:
5037 continue
5038 if k == 'color':
5039 font += ' color="%s"' % v
5040 elif k == 'fontSize':
5041 font += ' size="%s"' % v
5042 elif k == 'fontFamily':
5043 font += ' face="%s"' % v
5044 elif k == 'fontWeight' and v == 'bold':
5045 self._out += '<b>'
5046 unclosed_elements.append('b')
5047 elif k == 'fontStyle' and v == 'italic':
5048 self._out += '<i>'
5049 unclosed_elements.append('i')
5050 elif k == 'textDecoration' and v == 'underline':
5051 self._out += '<u>'
5052 unclosed_elements.append('u')
5053 if font:
5054 self._out += '<font' + font + '>'
5055 unclosed_elements.append('font')
5056 applied_style = {}
5057 if self._applied_styles:
5058 applied_style.update(self._applied_styles[-1])
5059 applied_style.update(style)
5060 self._applied_styles.append(applied_style)
5061 self._unclosed_elements.append(unclosed_elements)
bf6427d2 5062
2b14cb56 5063 def end(self, tag):
5b995f71
RA
5064 if tag not in (_x('ttml:br'), 'br'):
5065 unclosed_elements = self._unclosed_elements.pop()
5066 for element in reversed(unclosed_elements):
5067 self._out += '</%s>' % element
5068 if unclosed_elements and self._applied_styles:
5069 self._applied_styles.pop()
bf6427d2 5070
2b14cb56 5071 def data(self, data):
5b995f71 5072 self._out += data
2b14cb56 5073
5074 def close(self):
5b995f71 5075 return self._out.strip()
2b14cb56 5076
5077 def parse_node(node):
5078 target = TTMLPElementParser()
5079 parser = xml.etree.ElementTree.XMLParser(target=target)
5080 parser.feed(xml.etree.ElementTree.tostring(node))
5081 return parser.close()
bf6427d2 5082
5b995f71
RA
5083 for k, v in LEGACY_NAMESPACES:
5084 for ns in v:
5085 dfxp_data = dfxp_data.replace(ns, k)
5086
3869028f 5087 dfxp = compat_etree_fromstring(dfxp_data)
bf6427d2 5088 out = []
5b995f71 5089 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
1b0427e6
YCH
5090
5091 if not paras:
5092 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2 5093
5b995f71
RA
5094 repeat = False
5095 while True:
5096 for style in dfxp.findall(_x('.//ttml:style')):
261f4730
RA
5097 style_id = style.get('id') or style.get(_x('xml:id'))
5098 if not style_id:
5099 continue
5b995f71
RA
5100 parent_style_id = style.get('style')
5101 if parent_style_id:
5102 if parent_style_id not in styles:
5103 repeat = True
5104 continue
5105 styles[style_id] = styles[parent_style_id].copy()
5106 for prop in SUPPORTED_STYLING:
5107 prop_val = style.get(_x('tts:' + prop))
5108 if prop_val:
5109 styles.setdefault(style_id, {})[prop] = prop_val
5110 if repeat:
5111 repeat = False
5112 else:
5113 break
5114
5115 for p in ('body', 'div'):
5116 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
5117 if ele is None:
5118 continue
5119 style = styles.get(ele.get('style'))
5120 if not style:
5121 continue
5122 default_style.update(style)
5123
bf6427d2 5124 for para, index in zip(paras, itertools.count(1)):
d631d5f9 5125 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 5126 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
5127 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
5128 if begin_time is None:
5129 continue
7dff0363 5130 if not end_time:
d631d5f9
YCH
5131 if not dur:
5132 continue
5133 end_time = begin_time + dur
bf6427d2
YCH
5134 out.append('%d\n%s --> %s\n%s\n\n' % (
5135 index,
c1c924ab
YCH
5136 srt_subtitles_timecode(begin_time),
5137 srt_subtitles_timecode(end_time),
bf6427d2
YCH
5138 parse_node(para)))
5139
5140 return ''.join(out)
5141
5142
66e289ba
S
5143def cli_option(params, command_option, param):
5144 param = params.get(param)
98e698f1
RA
5145 if param:
5146 param = compat_str(param)
66e289ba
S
5147 return [command_option, param] if param is not None else []
5148
5149
5150def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
5151 param = params.get(param)
5b232f46
S
5152 if param is None:
5153 return []
66e289ba
S
5154 assert isinstance(param, bool)
5155 if separator:
5156 return [command_option + separator + (true_value if param else false_value)]
5157 return [command_option, true_value if param else false_value]
5158
5159
5160def cli_valueless_option(params, command_option, param, expected_value=True):
5161 param = params.get(param)
5162 return [command_option] if param == expected_value else []
5163
5164
e92caff5 5165def cli_configuration_args(argdict, keys, default=[], use_compat=True):
eab9b2bc 5166 if isinstance(argdict, (list, tuple)): # for backward compatibility
e92caff5 5167 if use_compat:
5b1ecbb3 5168 return argdict
5169 else:
5170 argdict = None
eab9b2bc 5171 if argdict is None:
5b1ecbb3 5172 return default
eab9b2bc 5173 assert isinstance(argdict, dict)
5174
e92caff5 5175 assert isinstance(keys, (list, tuple))
5176 for key_list in keys:
e92caff5 5177 arg_list = list(filter(
5178 lambda x: x is not None,
6606817a 5179 [argdict.get(key.lower()) for key in variadic(key_list)]))
e92caff5 5180 if arg_list:
5181 return [arg for args in arg_list for arg in args]
5182 return default
66e289ba 5183
6251555f 5184
330690a2 5185def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
5186 main_key, exe = main_key.lower(), exe.lower()
5187 root_key = exe if main_key == exe else f'{main_key}+{exe}'
5188 keys = [f'{root_key}{k}' for k in (keys or [''])]
5189 if root_key in keys:
5190 if main_key != exe:
5191 keys.append((main_key, exe))
5192 keys.append('default')
5193 else:
5194 use_compat = False
5195 return cli_configuration_args(argdict, keys, default, use_compat)
5196
66e289ba 5197
39672624
YCH
5198class ISO639Utils(object):
5199 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
5200 _lang_map = {
5201 'aa': 'aar',
5202 'ab': 'abk',
5203 'ae': 'ave',
5204 'af': 'afr',
5205 'ak': 'aka',
5206 'am': 'amh',
5207 'an': 'arg',
5208 'ar': 'ara',
5209 'as': 'asm',
5210 'av': 'ava',
5211 'ay': 'aym',
5212 'az': 'aze',
5213 'ba': 'bak',
5214 'be': 'bel',
5215 'bg': 'bul',
5216 'bh': 'bih',
5217 'bi': 'bis',
5218 'bm': 'bam',
5219 'bn': 'ben',
5220 'bo': 'bod',
5221 'br': 'bre',
5222 'bs': 'bos',
5223 'ca': 'cat',
5224 'ce': 'che',
5225 'ch': 'cha',
5226 'co': 'cos',
5227 'cr': 'cre',
5228 'cs': 'ces',
5229 'cu': 'chu',
5230 'cv': 'chv',
5231 'cy': 'cym',
5232 'da': 'dan',
5233 'de': 'deu',
5234 'dv': 'div',
5235 'dz': 'dzo',
5236 'ee': 'ewe',
5237 'el': 'ell',
5238 'en': 'eng',
5239 'eo': 'epo',
5240 'es': 'spa',
5241 'et': 'est',
5242 'eu': 'eus',
5243 'fa': 'fas',
5244 'ff': 'ful',
5245 'fi': 'fin',
5246 'fj': 'fij',
5247 'fo': 'fao',
5248 'fr': 'fra',
5249 'fy': 'fry',
5250 'ga': 'gle',
5251 'gd': 'gla',
5252 'gl': 'glg',
5253 'gn': 'grn',
5254 'gu': 'guj',
5255 'gv': 'glv',
5256 'ha': 'hau',
5257 'he': 'heb',
b7acc835 5258 'iw': 'heb', # Replaced by he in 1989 revision
39672624
YCH
5259 'hi': 'hin',
5260 'ho': 'hmo',
5261 'hr': 'hrv',
5262 'ht': 'hat',
5263 'hu': 'hun',
5264 'hy': 'hye',
5265 'hz': 'her',
5266 'ia': 'ina',
5267 'id': 'ind',
b7acc835 5268 'in': 'ind', # Replaced by id in 1989 revision
39672624
YCH
5269 'ie': 'ile',
5270 'ig': 'ibo',
5271 'ii': 'iii',
5272 'ik': 'ipk',
5273 'io': 'ido',
5274 'is': 'isl',
5275 'it': 'ita',
5276 'iu': 'iku',
5277 'ja': 'jpn',
5278 'jv': 'jav',
5279 'ka': 'kat',
5280 'kg': 'kon',
5281 'ki': 'kik',
5282 'kj': 'kua',
5283 'kk': 'kaz',
5284 'kl': 'kal',
5285 'km': 'khm',
5286 'kn': 'kan',
5287 'ko': 'kor',
5288 'kr': 'kau',
5289 'ks': 'kas',
5290 'ku': 'kur',
5291 'kv': 'kom',
5292 'kw': 'cor',
5293 'ky': 'kir',
5294 'la': 'lat',
5295 'lb': 'ltz',
5296 'lg': 'lug',
5297 'li': 'lim',
5298 'ln': 'lin',
5299 'lo': 'lao',
5300 'lt': 'lit',
5301 'lu': 'lub',
5302 'lv': 'lav',
5303 'mg': 'mlg',
5304 'mh': 'mah',
5305 'mi': 'mri',
5306 'mk': 'mkd',
5307 'ml': 'mal',
5308 'mn': 'mon',
5309 'mr': 'mar',
5310 'ms': 'msa',
5311 'mt': 'mlt',
5312 'my': 'mya',
5313 'na': 'nau',
5314 'nb': 'nob',
5315 'nd': 'nde',
5316 'ne': 'nep',
5317 'ng': 'ndo',
5318 'nl': 'nld',
5319 'nn': 'nno',
5320 'no': 'nor',
5321 'nr': 'nbl',
5322 'nv': 'nav',
5323 'ny': 'nya',
5324 'oc': 'oci',
5325 'oj': 'oji',
5326 'om': 'orm',
5327 'or': 'ori',
5328 'os': 'oss',
5329 'pa': 'pan',
5330 'pi': 'pli',
5331 'pl': 'pol',
5332 'ps': 'pus',
5333 'pt': 'por',
5334 'qu': 'que',
5335 'rm': 'roh',
5336 'rn': 'run',
5337 'ro': 'ron',
5338 'ru': 'rus',
5339 'rw': 'kin',
5340 'sa': 'san',
5341 'sc': 'srd',
5342 'sd': 'snd',
5343 'se': 'sme',
5344 'sg': 'sag',
5345 'si': 'sin',
5346 'sk': 'slk',
5347 'sl': 'slv',
5348 'sm': 'smo',
5349 'sn': 'sna',
5350 'so': 'som',
5351 'sq': 'sqi',
5352 'sr': 'srp',
5353 'ss': 'ssw',
5354 'st': 'sot',
5355 'su': 'sun',
5356 'sv': 'swe',
5357 'sw': 'swa',
5358 'ta': 'tam',
5359 'te': 'tel',
5360 'tg': 'tgk',
5361 'th': 'tha',
5362 'ti': 'tir',
5363 'tk': 'tuk',
5364 'tl': 'tgl',
5365 'tn': 'tsn',
5366 'to': 'ton',
5367 'tr': 'tur',
5368 'ts': 'tso',
5369 'tt': 'tat',
5370 'tw': 'twi',
5371 'ty': 'tah',
5372 'ug': 'uig',
5373 'uk': 'ukr',
5374 'ur': 'urd',
5375 'uz': 'uzb',
5376 've': 'ven',
5377 'vi': 'vie',
5378 'vo': 'vol',
5379 'wa': 'wln',
5380 'wo': 'wol',
5381 'xh': 'xho',
5382 'yi': 'yid',
e9a50fba 5383 'ji': 'yid', # Replaced by yi in 1989 revision
39672624
YCH
5384 'yo': 'yor',
5385 'za': 'zha',
5386 'zh': 'zho',
5387 'zu': 'zul',
5388 }
5389
5390 @classmethod
5391 def short2long(cls, code):
5392 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5393 return cls._lang_map.get(code[:2])
5394
5395 @classmethod
5396 def long2short(cls, code):
5397 """Convert language code from ISO 639-2/T to ISO 639-1"""
5398 for short_name, long_name in cls._lang_map.items():
5399 if long_name == code:
5400 return short_name
5401
5402
4eb10f66
YCH
5403class ISO3166Utils(object):
5404 # From http://data.okfn.org/data/core/country-list
5405 _country_map = {
5406 'AF': 'Afghanistan',
5407 'AX': 'Åland Islands',
5408 'AL': 'Albania',
5409 'DZ': 'Algeria',
5410 'AS': 'American Samoa',
5411 'AD': 'Andorra',
5412 'AO': 'Angola',
5413 'AI': 'Anguilla',
5414 'AQ': 'Antarctica',
5415 'AG': 'Antigua and Barbuda',
5416 'AR': 'Argentina',
5417 'AM': 'Armenia',
5418 'AW': 'Aruba',
5419 'AU': 'Australia',
5420 'AT': 'Austria',
5421 'AZ': 'Azerbaijan',
5422 'BS': 'Bahamas',
5423 'BH': 'Bahrain',
5424 'BD': 'Bangladesh',
5425 'BB': 'Barbados',
5426 'BY': 'Belarus',
5427 'BE': 'Belgium',
5428 'BZ': 'Belize',
5429 'BJ': 'Benin',
5430 'BM': 'Bermuda',
5431 'BT': 'Bhutan',
5432 'BO': 'Bolivia, Plurinational State of',
5433 'BQ': 'Bonaire, Sint Eustatius and Saba',
5434 'BA': 'Bosnia and Herzegovina',
5435 'BW': 'Botswana',
5436 'BV': 'Bouvet Island',
5437 'BR': 'Brazil',
5438 'IO': 'British Indian Ocean Territory',
5439 'BN': 'Brunei Darussalam',
5440 'BG': 'Bulgaria',
5441 'BF': 'Burkina Faso',
5442 'BI': 'Burundi',
5443 'KH': 'Cambodia',
5444 'CM': 'Cameroon',
5445 'CA': 'Canada',
5446 'CV': 'Cape Verde',
5447 'KY': 'Cayman Islands',
5448 'CF': 'Central African Republic',
5449 'TD': 'Chad',
5450 'CL': 'Chile',
5451 'CN': 'China',
5452 'CX': 'Christmas Island',
5453 'CC': 'Cocos (Keeling) Islands',
5454 'CO': 'Colombia',
5455 'KM': 'Comoros',
5456 'CG': 'Congo',
5457 'CD': 'Congo, the Democratic Republic of the',
5458 'CK': 'Cook Islands',
5459 'CR': 'Costa Rica',
5460 'CI': 'Côte d\'Ivoire',
5461 'HR': 'Croatia',
5462 'CU': 'Cuba',
5463 'CW': 'Curaçao',
5464 'CY': 'Cyprus',
5465 'CZ': 'Czech Republic',
5466 'DK': 'Denmark',
5467 'DJ': 'Djibouti',
5468 'DM': 'Dominica',
5469 'DO': 'Dominican Republic',
5470 'EC': 'Ecuador',
5471 'EG': 'Egypt',
5472 'SV': 'El Salvador',
5473 'GQ': 'Equatorial Guinea',
5474 'ER': 'Eritrea',
5475 'EE': 'Estonia',
5476 'ET': 'Ethiopia',
5477 'FK': 'Falkland Islands (Malvinas)',
5478 'FO': 'Faroe Islands',
5479 'FJ': 'Fiji',
5480 'FI': 'Finland',
5481 'FR': 'France',
5482 'GF': 'French Guiana',
5483 'PF': 'French Polynesia',
5484 'TF': 'French Southern Territories',
5485 'GA': 'Gabon',
5486 'GM': 'Gambia',
5487 'GE': 'Georgia',
5488 'DE': 'Germany',
5489 'GH': 'Ghana',
5490 'GI': 'Gibraltar',
5491 'GR': 'Greece',
5492 'GL': 'Greenland',
5493 'GD': 'Grenada',
5494 'GP': 'Guadeloupe',
5495 'GU': 'Guam',
5496 'GT': 'Guatemala',
5497 'GG': 'Guernsey',
5498 'GN': 'Guinea',
5499 'GW': 'Guinea-Bissau',
5500 'GY': 'Guyana',
5501 'HT': 'Haiti',
5502 'HM': 'Heard Island and McDonald Islands',
5503 'VA': 'Holy See (Vatican City State)',
5504 'HN': 'Honduras',
5505 'HK': 'Hong Kong',
5506 'HU': 'Hungary',
5507 'IS': 'Iceland',
5508 'IN': 'India',
5509 'ID': 'Indonesia',
5510 'IR': 'Iran, Islamic Republic of',
5511 'IQ': 'Iraq',
5512 'IE': 'Ireland',
5513 'IM': 'Isle of Man',
5514 'IL': 'Israel',
5515 'IT': 'Italy',
5516 'JM': 'Jamaica',
5517 'JP': 'Japan',
5518 'JE': 'Jersey',
5519 'JO': 'Jordan',
5520 'KZ': 'Kazakhstan',
5521 'KE': 'Kenya',
5522 'KI': 'Kiribati',
5523 'KP': 'Korea, Democratic People\'s Republic of',
5524 'KR': 'Korea, Republic of',
5525 'KW': 'Kuwait',
5526 'KG': 'Kyrgyzstan',
5527 'LA': 'Lao People\'s Democratic Republic',
5528 'LV': 'Latvia',
5529 'LB': 'Lebanon',
5530 'LS': 'Lesotho',
5531 'LR': 'Liberia',
5532 'LY': 'Libya',
5533 'LI': 'Liechtenstein',
5534 'LT': 'Lithuania',
5535 'LU': 'Luxembourg',
5536 'MO': 'Macao',
5537 'MK': 'Macedonia, the Former Yugoslav Republic of',
5538 'MG': 'Madagascar',
5539 'MW': 'Malawi',
5540 'MY': 'Malaysia',
5541 'MV': 'Maldives',
5542 'ML': 'Mali',
5543 'MT': 'Malta',
5544 'MH': 'Marshall Islands',
5545 'MQ': 'Martinique',
5546 'MR': 'Mauritania',
5547 'MU': 'Mauritius',
5548 'YT': 'Mayotte',
5549 'MX': 'Mexico',
5550 'FM': 'Micronesia, Federated States of',
5551 'MD': 'Moldova, Republic of',
5552 'MC': 'Monaco',
5553 'MN': 'Mongolia',
5554 'ME': 'Montenegro',
5555 'MS': 'Montserrat',
5556 'MA': 'Morocco',
5557 'MZ': 'Mozambique',
5558 'MM': 'Myanmar',
5559 'NA': 'Namibia',
5560 'NR': 'Nauru',
5561 'NP': 'Nepal',
5562 'NL': 'Netherlands',
5563 'NC': 'New Caledonia',
5564 'NZ': 'New Zealand',
5565 'NI': 'Nicaragua',
5566 'NE': 'Niger',
5567 'NG': 'Nigeria',
5568 'NU': 'Niue',
5569 'NF': 'Norfolk Island',
5570 'MP': 'Northern Mariana Islands',
5571 'NO': 'Norway',
5572 'OM': 'Oman',
5573 'PK': 'Pakistan',
5574 'PW': 'Palau',
5575 'PS': 'Palestine, State of',
5576 'PA': 'Panama',
5577 'PG': 'Papua New Guinea',
5578 'PY': 'Paraguay',
5579 'PE': 'Peru',
5580 'PH': 'Philippines',
5581 'PN': 'Pitcairn',
5582 'PL': 'Poland',
5583 'PT': 'Portugal',
5584 'PR': 'Puerto Rico',
5585 'QA': 'Qatar',
5586 'RE': 'Réunion',
5587 'RO': 'Romania',
5588 'RU': 'Russian Federation',
5589 'RW': 'Rwanda',
5590 'BL': 'Saint Barthélemy',
5591 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5592 'KN': 'Saint Kitts and Nevis',
5593 'LC': 'Saint Lucia',
5594 'MF': 'Saint Martin (French part)',
5595 'PM': 'Saint Pierre and Miquelon',
5596 'VC': 'Saint Vincent and the Grenadines',
5597 'WS': 'Samoa',
5598 'SM': 'San Marino',
5599 'ST': 'Sao Tome and Principe',
5600 'SA': 'Saudi Arabia',
5601 'SN': 'Senegal',
5602 'RS': 'Serbia',
5603 'SC': 'Seychelles',
5604 'SL': 'Sierra Leone',
5605 'SG': 'Singapore',
5606 'SX': 'Sint Maarten (Dutch part)',
5607 'SK': 'Slovakia',
5608 'SI': 'Slovenia',
5609 'SB': 'Solomon Islands',
5610 'SO': 'Somalia',
5611 'ZA': 'South Africa',
5612 'GS': 'South Georgia and the South Sandwich Islands',
5613 'SS': 'South Sudan',
5614 'ES': 'Spain',
5615 'LK': 'Sri Lanka',
5616 'SD': 'Sudan',
5617 'SR': 'Suriname',
5618 'SJ': 'Svalbard and Jan Mayen',
5619 'SZ': 'Swaziland',
5620 'SE': 'Sweden',
5621 'CH': 'Switzerland',
5622 'SY': 'Syrian Arab Republic',
5623 'TW': 'Taiwan, Province of China',
5624 'TJ': 'Tajikistan',
5625 'TZ': 'Tanzania, United Republic of',
5626 'TH': 'Thailand',
5627 'TL': 'Timor-Leste',
5628 'TG': 'Togo',
5629 'TK': 'Tokelau',
5630 'TO': 'Tonga',
5631 'TT': 'Trinidad and Tobago',
5632 'TN': 'Tunisia',
5633 'TR': 'Turkey',
5634 'TM': 'Turkmenistan',
5635 'TC': 'Turks and Caicos Islands',
5636 'TV': 'Tuvalu',
5637 'UG': 'Uganda',
5638 'UA': 'Ukraine',
5639 'AE': 'United Arab Emirates',
5640 'GB': 'United Kingdom',
5641 'US': 'United States',
5642 'UM': 'United States Minor Outlying Islands',
5643 'UY': 'Uruguay',
5644 'UZ': 'Uzbekistan',
5645 'VU': 'Vanuatu',
5646 'VE': 'Venezuela, Bolivarian Republic of',
5647 'VN': 'Viet Nam',
5648 'VG': 'Virgin Islands, British',
5649 'VI': 'Virgin Islands, U.S.',
5650 'WF': 'Wallis and Futuna',
5651 'EH': 'Western Sahara',
5652 'YE': 'Yemen',
5653 'ZM': 'Zambia',
5654 'ZW': 'Zimbabwe',
5655 }
5656
5657 @classmethod
5658 def short2full(cls, code):
5659 """Convert an ISO 3166-2 country code to the corresponding full name"""
5660 return cls._country_map.get(code.upper())
5661
5662
773f291d
S
5663class GeoUtils(object):
5664 # Major IPv4 address blocks per country
5665 _country_ip_map = {
53896ca5 5666 'AD': '46.172.224.0/19',
773f291d
S
5667 'AE': '94.200.0.0/13',
5668 'AF': '149.54.0.0/17',
5669 'AG': '209.59.64.0/18',
5670 'AI': '204.14.248.0/21',
5671 'AL': '46.99.0.0/16',
5672 'AM': '46.70.0.0/15',
5673 'AO': '105.168.0.0/13',
53896ca5
S
5674 'AP': '182.50.184.0/21',
5675 'AQ': '23.154.160.0/24',
773f291d
S
5676 'AR': '181.0.0.0/12',
5677 'AS': '202.70.112.0/20',
53896ca5 5678 'AT': '77.116.0.0/14',
773f291d
S
5679 'AU': '1.128.0.0/11',
5680 'AW': '181.41.0.0/18',
53896ca5
S
5681 'AX': '185.217.4.0/22',
5682 'AZ': '5.197.0.0/16',
773f291d
S
5683 'BA': '31.176.128.0/17',
5684 'BB': '65.48.128.0/17',
5685 'BD': '114.130.0.0/16',
5686 'BE': '57.0.0.0/8',
53896ca5 5687 'BF': '102.178.0.0/15',
773f291d
S
5688 'BG': '95.42.0.0/15',
5689 'BH': '37.131.0.0/17',
5690 'BI': '154.117.192.0/18',
5691 'BJ': '137.255.0.0/16',
53896ca5 5692 'BL': '185.212.72.0/23',
773f291d
S
5693 'BM': '196.12.64.0/18',
5694 'BN': '156.31.0.0/16',
5695 'BO': '161.56.0.0/16',
5696 'BQ': '161.0.80.0/20',
53896ca5 5697 'BR': '191.128.0.0/12',
773f291d
S
5698 'BS': '24.51.64.0/18',
5699 'BT': '119.2.96.0/19',
5700 'BW': '168.167.0.0/16',
5701 'BY': '178.120.0.0/13',
5702 'BZ': '179.42.192.0/18',
5703 'CA': '99.224.0.0/11',
5704 'CD': '41.243.0.0/16',
53896ca5
S
5705 'CF': '197.242.176.0/21',
5706 'CG': '160.113.0.0/16',
773f291d 5707 'CH': '85.0.0.0/13',
53896ca5 5708 'CI': '102.136.0.0/14',
773f291d
S
5709 'CK': '202.65.32.0/19',
5710 'CL': '152.172.0.0/14',
53896ca5 5711 'CM': '102.244.0.0/14',
773f291d
S
5712 'CN': '36.128.0.0/10',
5713 'CO': '181.240.0.0/12',
5714 'CR': '201.192.0.0/12',
5715 'CU': '152.206.0.0/15',
5716 'CV': '165.90.96.0/19',
5717 'CW': '190.88.128.0/17',
53896ca5 5718 'CY': '31.153.0.0/16',
773f291d
S
5719 'CZ': '88.100.0.0/14',
5720 'DE': '53.0.0.0/8',
5721 'DJ': '197.241.0.0/17',
5722 'DK': '87.48.0.0/12',
5723 'DM': '192.243.48.0/20',
5724 'DO': '152.166.0.0/15',
5725 'DZ': '41.96.0.0/12',
5726 'EC': '186.68.0.0/15',
5727 'EE': '90.190.0.0/15',
5728 'EG': '156.160.0.0/11',
5729 'ER': '196.200.96.0/20',
5730 'ES': '88.0.0.0/11',
5731 'ET': '196.188.0.0/14',
5732 'EU': '2.16.0.0/13',
5733 'FI': '91.152.0.0/13',
5734 'FJ': '144.120.0.0/16',
53896ca5 5735 'FK': '80.73.208.0/21',
773f291d
S
5736 'FM': '119.252.112.0/20',
5737 'FO': '88.85.32.0/19',
5738 'FR': '90.0.0.0/9',
5739 'GA': '41.158.0.0/15',
5740 'GB': '25.0.0.0/8',
5741 'GD': '74.122.88.0/21',
5742 'GE': '31.146.0.0/16',
5743 'GF': '161.22.64.0/18',
5744 'GG': '62.68.160.0/19',
53896ca5
S
5745 'GH': '154.160.0.0/12',
5746 'GI': '95.164.0.0/16',
773f291d
S
5747 'GL': '88.83.0.0/19',
5748 'GM': '160.182.0.0/15',
5749 'GN': '197.149.192.0/18',
5750 'GP': '104.250.0.0/19',
5751 'GQ': '105.235.224.0/20',
5752 'GR': '94.64.0.0/13',
5753 'GT': '168.234.0.0/16',
5754 'GU': '168.123.0.0/16',
5755 'GW': '197.214.80.0/20',
5756 'GY': '181.41.64.0/18',
5757 'HK': '113.252.0.0/14',
5758 'HN': '181.210.0.0/16',
5759 'HR': '93.136.0.0/13',
5760 'HT': '148.102.128.0/17',
5761 'HU': '84.0.0.0/14',
5762 'ID': '39.192.0.0/10',
5763 'IE': '87.32.0.0/12',
5764 'IL': '79.176.0.0/13',
5765 'IM': '5.62.80.0/20',
5766 'IN': '117.192.0.0/10',
5767 'IO': '203.83.48.0/21',
5768 'IQ': '37.236.0.0/14',
5769 'IR': '2.176.0.0/12',
5770 'IS': '82.221.0.0/16',
5771 'IT': '79.0.0.0/10',
5772 'JE': '87.244.64.0/18',
5773 'JM': '72.27.0.0/17',
5774 'JO': '176.29.0.0/16',
53896ca5 5775 'JP': '133.0.0.0/8',
773f291d
S
5776 'KE': '105.48.0.0/12',
5777 'KG': '158.181.128.0/17',
5778 'KH': '36.37.128.0/17',
5779 'KI': '103.25.140.0/22',
5780 'KM': '197.255.224.0/20',
53896ca5 5781 'KN': '198.167.192.0/19',
773f291d
S
5782 'KP': '175.45.176.0/22',
5783 'KR': '175.192.0.0/10',
5784 'KW': '37.36.0.0/14',
5785 'KY': '64.96.0.0/15',
5786 'KZ': '2.72.0.0/13',
5787 'LA': '115.84.64.0/18',
5788 'LB': '178.135.0.0/16',
53896ca5 5789 'LC': '24.92.144.0/20',
773f291d
S
5790 'LI': '82.117.0.0/19',
5791 'LK': '112.134.0.0/15',
53896ca5 5792 'LR': '102.183.0.0/16',
773f291d
S
5793 'LS': '129.232.0.0/17',
5794 'LT': '78.56.0.0/13',
5795 'LU': '188.42.0.0/16',
5796 'LV': '46.109.0.0/16',
5797 'LY': '41.252.0.0/14',
5798 'MA': '105.128.0.0/11',
5799 'MC': '88.209.64.0/18',
5800 'MD': '37.246.0.0/16',
5801 'ME': '178.175.0.0/17',
5802 'MF': '74.112.232.0/21',
5803 'MG': '154.126.0.0/17',
5804 'MH': '117.103.88.0/21',
5805 'MK': '77.28.0.0/15',
5806 'ML': '154.118.128.0/18',
5807 'MM': '37.111.0.0/17',
5808 'MN': '49.0.128.0/17',
5809 'MO': '60.246.0.0/16',
5810 'MP': '202.88.64.0/20',
5811 'MQ': '109.203.224.0/19',
5812 'MR': '41.188.64.0/18',
5813 'MS': '208.90.112.0/22',
5814 'MT': '46.11.0.0/16',
5815 'MU': '105.16.0.0/12',
5816 'MV': '27.114.128.0/18',
53896ca5 5817 'MW': '102.70.0.0/15',
773f291d
S
5818 'MX': '187.192.0.0/11',
5819 'MY': '175.136.0.0/13',
5820 'MZ': '197.218.0.0/15',
5821 'NA': '41.182.0.0/16',
5822 'NC': '101.101.0.0/18',
5823 'NE': '197.214.0.0/18',
5824 'NF': '203.17.240.0/22',
5825 'NG': '105.112.0.0/12',
5826 'NI': '186.76.0.0/15',
5827 'NL': '145.96.0.0/11',
5828 'NO': '84.208.0.0/13',
5829 'NP': '36.252.0.0/15',
5830 'NR': '203.98.224.0/19',
5831 'NU': '49.156.48.0/22',
5832 'NZ': '49.224.0.0/14',
5833 'OM': '5.36.0.0/15',
5834 'PA': '186.72.0.0/15',
5835 'PE': '186.160.0.0/14',
5836 'PF': '123.50.64.0/18',
5837 'PG': '124.240.192.0/19',
5838 'PH': '49.144.0.0/13',
5839 'PK': '39.32.0.0/11',
5840 'PL': '83.0.0.0/11',
5841 'PM': '70.36.0.0/20',
5842 'PR': '66.50.0.0/16',
5843 'PS': '188.161.0.0/16',
5844 'PT': '85.240.0.0/13',
5845 'PW': '202.124.224.0/20',
5846 'PY': '181.120.0.0/14',
5847 'QA': '37.210.0.0/15',
53896ca5 5848 'RE': '102.35.0.0/16',
773f291d 5849 'RO': '79.112.0.0/13',
53896ca5 5850 'RS': '93.86.0.0/15',
773f291d 5851 'RU': '5.136.0.0/13',
53896ca5 5852 'RW': '41.186.0.0/16',
773f291d
S
5853 'SA': '188.48.0.0/13',
5854 'SB': '202.1.160.0/19',
5855 'SC': '154.192.0.0/11',
53896ca5 5856 'SD': '102.120.0.0/13',
773f291d 5857 'SE': '78.64.0.0/12',
53896ca5 5858 'SG': '8.128.0.0/10',
773f291d
S
5859 'SI': '188.196.0.0/14',
5860 'SK': '78.98.0.0/15',
53896ca5 5861 'SL': '102.143.0.0/17',
773f291d
S
5862 'SM': '89.186.32.0/19',
5863 'SN': '41.82.0.0/15',
53896ca5 5864 'SO': '154.115.192.0/18',
773f291d
S
5865 'SR': '186.179.128.0/17',
5866 'SS': '105.235.208.0/21',
5867 'ST': '197.159.160.0/19',
5868 'SV': '168.243.0.0/16',
5869 'SX': '190.102.0.0/20',
5870 'SY': '5.0.0.0/16',
5871 'SZ': '41.84.224.0/19',
5872 'TC': '65.255.48.0/20',
5873 'TD': '154.68.128.0/19',
5874 'TG': '196.168.0.0/14',
5875 'TH': '171.96.0.0/13',
5876 'TJ': '85.9.128.0/18',
5877 'TK': '27.96.24.0/21',
5878 'TL': '180.189.160.0/20',
5879 'TM': '95.85.96.0/19',
5880 'TN': '197.0.0.0/11',
5881 'TO': '175.176.144.0/21',
5882 'TR': '78.160.0.0/11',
5883 'TT': '186.44.0.0/15',
5884 'TV': '202.2.96.0/19',
5885 'TW': '120.96.0.0/11',
5886 'TZ': '156.156.0.0/14',
53896ca5
S
5887 'UA': '37.52.0.0/14',
5888 'UG': '102.80.0.0/13',
5889 'US': '6.0.0.0/8',
773f291d 5890 'UY': '167.56.0.0/13',
53896ca5 5891 'UZ': '84.54.64.0/18',
773f291d 5892 'VA': '212.77.0.0/19',
53896ca5 5893 'VC': '207.191.240.0/21',
773f291d 5894 'VE': '186.88.0.0/13',
53896ca5 5895 'VG': '66.81.192.0/20',
773f291d
S
5896 'VI': '146.226.0.0/16',
5897 'VN': '14.160.0.0/11',
5898 'VU': '202.80.32.0/20',
5899 'WF': '117.20.32.0/21',
5900 'WS': '202.4.32.0/19',
5901 'YE': '134.35.0.0/16',
5902 'YT': '41.242.116.0/22',
5903 'ZA': '41.0.0.0/11',
53896ca5
S
5904 'ZM': '102.144.0.0/13',
5905 'ZW': '102.177.192.0/18',
773f291d
S
5906 }
5907
5908 @classmethod
5f95927a
S
5909 def random_ipv4(cls, code_or_block):
5910 if len(code_or_block) == 2:
5911 block = cls._country_ip_map.get(code_or_block.upper())
5912 if not block:
5913 return None
5914 else:
5915 block = code_or_block
773f291d
S
5916 addr, preflen = block.split('/')
5917 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5918 addr_max = addr_min | (0xffffffff >> int(preflen))
18a0defa 5919 return compat_str(socket.inet_ntoa(
4248dad9 5920 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
773f291d
S
5921
5922
91410c9b 5923class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
5924 def __init__(self, proxies=None):
5925 # Set default handlers
5926 for type in ('http', 'https'):
5927 setattr(self, '%s_open' % type,
5928 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5929 meth(r, proxy, type))
38e87f6c 5930 compat_urllib_request.ProxyHandler.__init__(self, proxies)
2461f79d 5931
91410c9b 5932 def proxy_open(self, req, proxy, type):
2461f79d 5933 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
5934 if req_proxy is not None:
5935 proxy = req_proxy
2461f79d
PH
5936 del req.headers['Ytdl-request-proxy']
5937
5938 if proxy == '__noproxy__':
5939 return None # No Proxy
51fb4995 5940 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
71aff188 5941 req.add_header('Ytdl-socks-proxy', proxy)
7a5c1cfe 5942 # yt-dlp's http/https handlers do wrapping the socket with socks
71aff188 5943 return None
91410c9b
PH
5944 return compat_urllib_request.ProxyHandler.proxy_open(
5945 self, req, proxy, type)
5bc880b9
YCH
5946
5947
0a5445dd
YCH
5948# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5949# released into Public Domain
5950# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5951
5952def long_to_bytes(n, blocksize=0):
5953 """long_to_bytes(n:long, blocksize:int) : string
5954 Convert a long integer to a byte string.
5955
5956 If optional blocksize is given and greater than zero, pad the front of the
5957 byte string with binary zeros so that the length is a multiple of
5958 blocksize.
5959 """
5960 # after much testing, this algorithm was deemed to be the fastest
5961 s = b''
5962 n = int(n)
5963 while n > 0:
5964 s = compat_struct_pack('>I', n & 0xffffffff) + s
5965 n = n >> 32
5966 # strip off leading zeros
5967 for i in range(len(s)):
5968 if s[i] != b'\000'[0]:
5969 break
5970 else:
5971 # only happens when n == 0
5972 s = b'\000'
5973 i = 0
5974 s = s[i:]
5975 # add back some pad bytes. this could be done more efficiently w.r.t. the
5976 # de-padding being done above, but sigh...
5977 if blocksize > 0 and len(s) % blocksize:
5978 s = (blocksize - len(s) % blocksize) * b'\000' + s
5979 return s
5980
5981
5982def bytes_to_long(s):
5983 """bytes_to_long(string) : long
5984 Convert a byte string to a long integer.
5985
5986 This is (essentially) the inverse of long_to_bytes().
5987 """
5988 acc = 0
5989 length = len(s)
5990 if length % 4:
5991 extra = (4 - length % 4)
5992 s = b'\000' * extra + s
5993 length = length + extra
5994 for i in range(0, length, 4):
5995 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5996 return acc
5997
5998
5bc880b9
YCH
5999def ohdave_rsa_encrypt(data, exponent, modulus):
6000 '''
6001 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
6002
6003 Input:
6004 data: data to encrypt, bytes-like object
6005 exponent, modulus: parameter e and N of RSA algorithm, both integer
6006 Output: hex string of encrypted data
6007
6008 Limitation: supports one block encryption only
6009 '''
6010
6011 payload = int(binascii.hexlify(data[::-1]), 16)
6012 encrypted = pow(payload, exponent, modulus)
6013 return '%x' % encrypted
81bdc8fd
YCH
6014
6015
f48409c7
YCH
6016def pkcs1pad(data, length):
6017 """
6018 Padding input data with PKCS#1 scheme
6019
6020 @param {int[]} data input data
6021 @param {int} length target length
6022 @returns {int[]} padded data
6023 """
6024 if len(data) > length - 11:
6025 raise ValueError('Input data too long for PKCS#1 padding')
6026
6027 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
6028 return [0, 2] + pseudo_random + [0] + data
6029
6030
5eb6bdce 6031def encode_base_n(num, n, table=None):
59f898b7 6032 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
59f898b7
YCH
6033 if not table:
6034 table = FULL_TABLE[:n]
6035
5eb6bdce
YCH
6036 if n > len(table):
6037 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
6038
6039 if num == 0:
6040 return table[0]
6041
81bdc8fd
YCH
6042 ret = ''
6043 while num:
6044 ret = table[num % n] + ret
6045 num = num // n
6046 return ret
f52354a8
YCH
6047
6048
6049def decode_packed_codes(code):
06b3fe29 6050 mobj = re.search(PACKED_CODES_RE, code)
a0566bbf 6051 obfuscated_code, base, count, symbols = mobj.groups()
f52354a8
YCH
6052 base = int(base)
6053 count = int(count)
6054 symbols = symbols.split('|')
6055 symbol_table = {}
6056
6057 while count:
6058 count -= 1
5eb6bdce 6059 base_n_count = encode_base_n(count, base)
f52354a8
YCH
6060 symbol_table[base_n_count] = symbols[count] or base_n_count
6061
6062 return re.sub(
6063 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
a0566bbf 6064 obfuscated_code)
e154c651 6065
6066
1ced2221
S
6067def caesar(s, alphabet, shift):
6068 if shift == 0:
6069 return s
6070 l = len(alphabet)
6071 return ''.join(
6072 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
6073 for c in s)
6074
6075
6076def rot47(s):
6077 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
6078
6079
e154c651 6080def parse_m3u8_attributes(attrib):
6081 info = {}
6082 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
6083 if val.startswith('"'):
6084 val = val[1:-1]
6085 info[key] = val
6086 return info
1143535d
YCH
6087
6088
6089def urshift(val, n):
6090 return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038
YCH
6091
6092
6093# Based on png2str() written by @gdkchan and improved by @yokrysty
067aa17e 6094# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
d3f8e038
YCH
6095def decode_png(png_data):
6096 # Reference: https://www.w3.org/TR/PNG/
6097 header = png_data[8:]
6098
6099 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
6100 raise IOError('Not a valid PNG file.')
6101
6102 int_map = {1: '>B', 2: '>H', 4: '>I'}
6103 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
6104
6105 chunks = []
6106
6107 while header:
6108 length = unpack_integer(header[:4])
6109 header = header[4:]
6110
6111 chunk_type = header[:4]
6112 header = header[4:]
6113
6114 chunk_data = header[:length]
6115 header = header[length:]
6116
6117 header = header[4:] # Skip CRC
6118
6119 chunks.append({
6120 'type': chunk_type,
6121 'length': length,
6122 'data': chunk_data
6123 })
6124
6125 ihdr = chunks[0]['data']
6126
6127 width = unpack_integer(ihdr[:4])
6128 height = unpack_integer(ihdr[4:8])
6129
6130 idat = b''
6131
6132 for chunk in chunks:
6133 if chunk['type'] == b'IDAT':
6134 idat += chunk['data']
6135
6136 if not idat:
6137 raise IOError('Unable to read PNG data.')
6138
6139 decompressed_data = bytearray(zlib.decompress(idat))
6140
6141 stride = width * 3
6142 pixels = []
6143
6144 def _get_pixel(idx):
6145 x = idx % stride
6146 y = idx // stride
6147 return pixels[y][x]
6148
6149 for y in range(height):
6150 basePos = y * (1 + stride)
6151 filter_type = decompressed_data[basePos]
6152
6153 current_row = []
6154
6155 pixels.append(current_row)
6156
6157 for x in range(stride):
6158 color = decompressed_data[1 + basePos + x]
6159 basex = y * stride + x
6160 left = 0
6161 up = 0
6162
6163 if x > 2:
6164 left = _get_pixel(basex - 3)
6165 if y > 0:
6166 up = _get_pixel(basex - stride)
6167
6168 if filter_type == 1: # Sub
6169 color = (color + left) & 0xff
6170 elif filter_type == 2: # Up
6171 color = (color + up) & 0xff
6172 elif filter_type == 3: # Average
6173 color = (color + ((left + up) >> 1)) & 0xff
6174 elif filter_type == 4: # Paeth
6175 a = left
6176 b = up
6177 c = 0
6178
6179 if x > 2 and y > 0:
6180 c = _get_pixel(basex - stride - 3)
6181
6182 p = a + b - c
6183
6184 pa = abs(p - a)
6185 pb = abs(p - b)
6186 pc = abs(p - c)
6187
6188 if pa <= pb and pa <= pc:
6189 color = (color + a) & 0xff
6190 elif pb <= pc:
6191 color = (color + b) & 0xff
6192 else:
6193 color = (color + c) & 0xff
6194
6195 current_row.append(color)
6196
6197 return width, height, pixels
efa97bdc
YCH
6198
6199
6200def write_xattr(path, key, value):
6201 # This mess below finds the best xattr tool for the job
6202 try:
6203 # try the pyxattr module...
6204 import xattr
6205
53a7e3d2
YCH
6206 if hasattr(xattr, 'set'): # pyxattr
6207 # Unicode arguments are not supported in python-pyxattr until
6208 # version 0.5.0
067aa17e 6209 # See https://github.com/ytdl-org/youtube-dl/issues/5498
53a7e3d2
YCH
6210 pyxattr_required_version = '0.5.0'
6211 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
6212 # TODO: fallback to CLI tools
6213 raise XAttrUnavailableError(
6214 'python-pyxattr is detected but is too old. '
7a5c1cfe 6215 'yt-dlp requires %s or above while your version is %s. '
53a7e3d2
YCH
6216 'Falling back to other xattr implementations' % (
6217 pyxattr_required_version, xattr.__version__))
6218
6219 setxattr = xattr.set
6220 else: # xattr
6221 setxattr = xattr.setxattr
efa97bdc
YCH
6222
6223 try:
53a7e3d2 6224 setxattr(path, key, value)
efa97bdc
YCH
6225 except EnvironmentError as e:
6226 raise XAttrMetadataError(e.errno, e.strerror)
6227
6228 except ImportError:
6229 if compat_os_name == 'nt':
6230 # Write xattrs to NTFS Alternate Data Streams:
6231 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6232 assert ':' not in key
6233 assert os.path.exists(path)
6234
6235 ads_fn = path + ':' + key
6236 try:
6237 with open(ads_fn, 'wb') as f:
6238 f.write(value)
6239 except EnvironmentError as e:
6240 raise XAttrMetadataError(e.errno, e.strerror)
6241 else:
6242 user_has_setfattr = check_executable('setfattr', ['--version'])
6243 user_has_xattr = check_executable('xattr', ['-h'])
6244
6245 if user_has_setfattr or user_has_xattr:
6246
6247 value = value.decode('utf-8')
6248 if user_has_setfattr:
6249 executable = 'setfattr'
6250 opts = ['-n', key, '-v', value]
6251 elif user_has_xattr:
6252 executable = 'xattr'
6253 opts = ['-w', key, value]
6254
3089bc74
S
6255 cmd = ([encodeFilename(executable, True)]
6256 + [encodeArgument(o) for o in opts]
6257 + [encodeFilename(path, True)])
efa97bdc
YCH
6258
6259 try:
d3c93ec2 6260 p = Popen(
efa97bdc
YCH
6261 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6262 except EnvironmentError as e:
6263 raise XAttrMetadataError(e.errno, e.strerror)
d3c93ec2 6264 stdout, stderr = p.communicate_or_kill()
efa97bdc
YCH
6265 stderr = stderr.decode('utf-8', 'replace')
6266 if p.returncode != 0:
6267 raise XAttrMetadataError(p.returncode, stderr)
6268
6269 else:
6270 # On Unix, and can't find pyxattr, setfattr, or xattr.
6271 if sys.platform.startswith('linux'):
6272 raise XAttrUnavailableError(
6273 "Couldn't find a tool to set the xattrs. "
6274 "Install either the python 'pyxattr' or 'xattr' "
6275 "modules, or the GNU 'attr' package "
6276 "(which contains the 'setfattr' tool).")
6277 else:
6278 raise XAttrUnavailableError(
6279 "Couldn't find a tool to set the xattrs. "
6280 "Install either the python 'xattr' module, "
6281 "or the 'xattr' binary.")
0c265486
YCH
6282
6283
6284def random_birthday(year_field, month_field, day_field):
aa374bc7
AS
6285 start_date = datetime.date(1950, 1, 1)
6286 end_date = datetime.date(1995, 12, 31)
6287 offset = random.randint(0, (end_date - start_date).days)
6288 random_date = start_date + datetime.timedelta(offset)
0c265486 6289 return {
aa374bc7
AS
6290 year_field: str(random_date.year),
6291 month_field: str(random_date.month),
6292 day_field: str(random_date.day),
0c265486 6293 }
732044af 6294
c76eb41b 6295
732044af 6296# Templates for internet shortcut files, which are plain text files.
6297DOT_URL_LINK_TEMPLATE = '''
6298[InternetShortcut]
6299URL=%(url)s
6300'''.lstrip()
6301
6302DOT_WEBLOC_LINK_TEMPLATE = '''
6303<?xml version="1.0" encoding="UTF-8"?>
6304<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6305<plist version="1.0">
6306<dict>
6307\t<key>URL</key>
6308\t<string>%(url)s</string>
6309</dict>
6310</plist>
6311'''.lstrip()
6312
6313DOT_DESKTOP_LINK_TEMPLATE = '''
6314[Desktop Entry]
6315Encoding=UTF-8
6316Name=%(filename)s
6317Type=Link
6318URL=%(url)s
6319Icon=text-html
6320'''.lstrip()
6321
08438d2c 6322LINK_TEMPLATES = {
6323 'url': DOT_URL_LINK_TEMPLATE,
6324 'desktop': DOT_DESKTOP_LINK_TEMPLATE,
6325 'webloc': DOT_WEBLOC_LINK_TEMPLATE,
6326}
6327
732044af 6328
6329def iri_to_uri(iri):
6330 """
6331 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6332
6333 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6334 """
6335
6336 iri_parts = compat_urllib_parse_urlparse(iri)
6337
6338 if '[' in iri_parts.netloc:
6339 raise ValueError('IPv6 URIs are not, yet, supported.')
6340 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6341
6342 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6343
6344 net_location = ''
6345 if iri_parts.username:
6346 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6347 if iri_parts.password is not None:
6348 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6349 net_location += '@'
6350
6351 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
6352 # The 'idna' encoding produces ASCII text.
6353 if iri_parts.port is not None and iri_parts.port != 80:
6354 net_location += ':' + str(iri_parts.port)
6355
6356 return compat_urllib_parse_urlunparse(
6357 (iri_parts.scheme,
6358 net_location,
6359
6360 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6361
6362 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6363 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6364
6365 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6366 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6367
6368 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6369
6370 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6371
6372
6373def to_high_limit_path(path):
6374 if sys.platform in ['win32', 'cygwin']:
6375 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6376 return r'\\?\ '.rstrip() + os.path.abspath(path)
6377
6378 return path
76d321f6 6379
c76eb41b 6380
b868936c 6381def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
6382 if field is None:
6383 val = obj if obj is not None else default
6384 else:
6385 val = obj.get(field, default)
76d321f6 6386 if func and val not in ignore:
6387 val = func(val)
6388 return template % val if val not in ignore else default
00dd0cd5 6389
6390
6391def clean_podcast_url(url):
6392 return re.sub(r'''(?x)
6393 (?:
6394 (?:
6395 chtbl\.com/track|
6396 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6397 play\.podtrac\.com
6398 )/[^/]+|
6399 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6400 flex\.acast\.com|
6401 pd(?:
6402 cn\.co| # https://podcorn.com/analytics-prefix/
6403 st\.fm # https://podsights.com/docs/
6404 )/e
6405 )/''', '', url)
ffcb8191
THD
6406
6407
6408_HEX_TABLE = '0123456789abcdef'
6409
6410
6411def random_uuidv4():
6412 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
0202b52a 6413
6414
6415def make_dir(path, to_screen=None):
6416 try:
6417 dn = os.path.dirname(path)
6418 if dn and not os.path.exists(dn):
6419 os.makedirs(dn)
6420 return True
6421 except (OSError, IOError) as err:
6422 if callable(to_screen) is not None:
6423 to_screen('unable to create directory ' + error_to_compat_str(err))
6424 return False
f74980cb 6425
6426
6427def get_executable_path():
c552ae88 6428 from zipimport import zipimporter
6429 if hasattr(sys, 'frozen'): # Running from PyInstaller
6430 path = os.path.dirname(sys.executable)
6431 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
6432 path = os.path.join(os.path.dirname(__file__), '../..')
6433 else:
6434 path = os.path.join(os.path.dirname(__file__), '..')
f74980cb 6435 return os.path.abspath(path)
6436
6437
2f567473 6438def load_plugins(name, suffix, namespace):
3ae5e797 6439 classes = {}
f74980cb 6440 try:
019a94f7
ÁS
6441 plugins_spec = importlib.util.spec_from_file_location(
6442 name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
6443 plugins = importlib.util.module_from_spec(plugins_spec)
6444 sys.modules[plugins_spec.name] = plugins
6445 plugins_spec.loader.exec_module(plugins)
f74980cb 6446 for name in dir(plugins):
2f567473 6447 if name in namespace:
6448 continue
6449 if not name.endswith(suffix):
f74980cb 6450 continue
6451 klass = getattr(plugins, name)
3ae5e797 6452 classes[name] = namespace[name] = klass
019a94f7 6453 except FileNotFoundError:
f74980cb 6454 pass
f74980cb 6455 return classes
06167fbb 6456
6457
325ebc17 6458def traverse_obj(
352d63fd 6459 obj, *path_list, default=None, expected_type=None, get_all=True,
325ebc17 6460 casesense=True, is_user_input=False, traverse_string=False):
324ad820 6461 ''' Traverse nested list/dict/tuple
8f334380 6462 @param path_list A list of paths which are checked one by one.
6463 Each path is a list of keys where each key is a string,
2614f646 6464 a function, a tuple of strings or "...".
6465 When a fuction is given, it takes the key as argument and
6466 returns whether the key matches or not. When a tuple is given,
8f334380 6467 all the keys given in the tuple are traversed, and
6468 "..." traverses all the keys in the object
325ebc17 6469 @param default Default value to return
352d63fd 6470 @param expected_type Only accept final value of this type (Can also be any callable)
6471 @param get_all Return all the values obtained from a path or only the first one
324ad820 6472 @param casesense Whether to consider dictionary keys as case sensitive
6473 @param is_user_input Whether the keys are generated from user input. If True,
6474 strings are converted to int/slice if necessary
6475 @param traverse_string Whether to traverse inside strings. If True, any
6476 non-compatible object will also be converted into a string
8f334380 6477 # TODO: Write tests
324ad820 6478 '''
325ebc17 6479 if not casesense:
dbf5416a 6480 _lower = lambda k: (k.lower() if isinstance(k, str) else k)
8f334380 6481 path_list = (map(_lower, variadic(path)) for path in path_list)
6482
6483 def _traverse_obj(obj, path, _current_depth=0):
6484 nonlocal depth
6485 path = tuple(variadic(path))
6486 for i, key in enumerate(path):
582fad70 6487 if obj is None:
6488 return None
8f334380 6489 if isinstance(key, (list, tuple)):
6490 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
6491 key = ...
6492 if key is ...:
6493 obj = (obj.values() if isinstance(obj, dict)
6494 else obj if isinstance(obj, (list, tuple, LazyList))
6495 else str(obj) if traverse_string else [])
6496 _current_depth += 1
6497 depth = max(depth, _current_depth)
6498 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
2614f646 6499 elif callable(key):
6500 if isinstance(obj, (list, tuple, LazyList)):
6501 obj = enumerate(obj)
6502 elif isinstance(obj, dict):
6503 obj = obj.items()
6504 else:
6505 if not traverse_string:
6506 return None
6507 obj = str(obj)
6508 _current_depth += 1
6509 depth = max(depth, _current_depth)
6510 return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if key(k)]
575e17a1 6511 elif isinstance(obj, dict) and not (is_user_input and key == ':'):
325ebc17 6512 obj = (obj.get(key) if casesense or (key in obj)
6513 else next((v for k, v in obj.items() if _lower(k) == key), None))
6514 else:
6515 if is_user_input:
6516 key = (int_or_none(key) if ':' not in key
6517 else slice(*map(int_or_none, key.split(':'))))
8f334380 6518 if key == slice(None):
575e17a1 6519 return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
325ebc17 6520 if not isinstance(key, (int, slice)):
9fea350f 6521 return None
8f334380 6522 if not isinstance(obj, (list, tuple, LazyList)):
325ebc17 6523 if not traverse_string:
6524 return None
6525 obj = str(obj)
6526 try:
6527 obj = obj[key]
6528 except IndexError:
324ad820 6529 return None
325ebc17 6530 return obj
6531
352d63fd 6532 if isinstance(expected_type, type):
6533 type_test = lambda val: val if isinstance(val, expected_type) else None
6534 elif expected_type is not None:
6535 type_test = expected_type
6536 else:
6537 type_test = lambda val: val
6538
8f334380 6539 for path in path_list:
6540 depth = 0
6541 val = _traverse_obj(obj, path)
325ebc17 6542 if val is not None:
8f334380 6543 if depth:
6544 for _ in range(depth - 1):
6586bca9 6545 val = itertools.chain.from_iterable(v for v in val if v is not None)
352d63fd 6546 val = [v for v in map(type_test, val) if v is not None]
8f334380 6547 if val:
352d63fd 6548 return val if get_all else val[0]
6549 else:
6550 val = type_test(val)
6551 if val is not None:
8f334380 6552 return val
325ebc17 6553 return default
324ad820 6554
6555
ee8dd27a 6556# Deprecated
324ad820 6557def traverse_dict(dictn, keys, casesense=True):
ee8dd27a 6558 write_string('DeprecationWarning: yt_dlp.utils.traverse_dict is deprecated '
6559 'and may be removed in a future version. Use yt_dlp.utils.traverse_obj instead')
6560 return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
6606817a 6561
6562
c634ad2a 6563def variadic(x, allowed_types=(str, bytes)):
cb89cfc1 6564 return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
bd50a52b
THD
6565
6566
49fa4d9a
N
6567# create a JSON Web Signature (jws) with HS256 algorithm
6568# the resulting format is in JWS Compact Serialization
6569# implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
6570# implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
6571def jwt_encode_hs256(payload_data, key, headers={}):
6572 header_data = {
6573 'alg': 'HS256',
6574 'typ': 'JWT',
6575 }
6576 if headers:
6577 header_data.update(headers)
6578 header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8'))
6579 payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8'))
6580 h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256)
6581 signature_b64 = base64.b64encode(h.digest())
6582 token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
6583 return token
819e0531 6584
6585
16b0d7e6 6586# can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
6587def jwt_decode_hs256(jwt):
6588 header_b64, payload_b64, signature_b64 = jwt.split('.')
6589 payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
6590 return payload_data
6591
6592
819e0531 6593def supports_terminal_sequences(stream):
6594 if compat_os_name == 'nt':
d1d5c08f 6595 if get_windows_version() < (10, 0, 10586):
819e0531 6596 return False
6597 elif not os.getenv('TERM'):
6598 return False
6599 try:
6600 return stream.isatty()
6601 except BaseException:
6602 return False
6603
6604
ec11a9f4 6605_terminal_sequences_re = re.compile('\033\\[[^m]+m')
6606
6607
6608def remove_terminal_sequences(string):
6609 return _terminal_sequences_re.sub('', string)
6610
6611
6612def number_of_digits(number):
6613 return len('%d' % number)
34921b43 6614
6615
6616def join_nonempty(*values, delim='-', from_dict=None):
6617 if from_dict is not None:
c586f9e8 6618 values = map(from_dict.get, values)
34921b43 6619 return delim.join(map(str, filter(None, values)))