]> jfr.im git - yt-dlp.git/blame - yt_dlp/utils.py
[downloader/ffmpeg] Fix vtt download with ffmpeg
[yt-dlp.git] / yt_dlp / utils.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
d77c3dfd 3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
c380cc28 10import collections
62e609ab 11import contextlib
e3946f98 12import ctypes
c496ca96
PH
13import datetime
14import email.utils
0c265486 15import email.header
f45c185f 16import errno
be4a824d 17import functools
d77c3dfd 18import gzip
49fa4d9a
N
19import hashlib
20import hmac
019a94f7 21import importlib.util
03f9daab 22import io
79a2e94e 23import itertools
f4bfd65f 24import json
d77c3dfd 25import locale
02dbf93f 26import math
347de493 27import operator
d77c3dfd 28import os
c496ca96 29import platform
773f291d 30import random
d77c3dfd 31import re
c496ca96 32import socket
79a2e94e 33import ssl
1c088fa8 34import subprocess
d77c3dfd 35import sys
181c8655 36import tempfile
c380cc28 37import time
01951dda 38import traceback
bcf89ce6 39import xml.etree.ElementTree
d77c3dfd 40import zlib
d77c3dfd 41
8c25f81b 42from .compat import (
b4a3d461 43 compat_HTMLParseError,
8bb56eee 44 compat_HTMLParser,
201c1459 45 compat_HTTPError,
8f9312c3 46 compat_basestring,
8c25f81b 47 compat_chr,
1bab3437 48 compat_cookiejar,
d7cd9a9e 49 compat_ctypes_WINFUNCTYPE,
36e6f62c 50 compat_etree_fromstring,
51098426 51 compat_expanduser,
8c25f81b 52 compat_html_entities,
55b2f099 53 compat_html_entities_html5,
be4a824d 54 compat_http_client,
42db58ec 55 compat_integer_types,
e29663c6 56 compat_numeric_types,
c86b6142 57 compat_kwargs,
efa97bdc 58 compat_os_name,
8c25f81b 59 compat_parse_qs,
702ccf2d 60 compat_shlex_quote,
8c25f81b 61 compat_str,
edaa23f8 62 compat_struct_pack,
d3f8e038 63 compat_struct_unpack,
8c25f81b
PH
64 compat_urllib_error,
65 compat_urllib_parse,
15707c7e 66 compat_urllib_parse_urlencode,
8c25f81b 67 compat_urllib_parse_urlparse,
732044af 68 compat_urllib_parse_urlunparse,
69 compat_urllib_parse_quote,
70 compat_urllib_parse_quote_plus,
7581bfc9 71 compat_urllib_parse_unquote_plus,
8c25f81b
PH
72 compat_urllib_request,
73 compat_urlparse,
810c10ba 74 compat_xpath,
8c25f81b 75)
4644ac55 76
71aff188
YCH
77from .socks import (
78 ProxyType,
79 sockssocket,
80)
81
4644ac55 82
51fb4995
YCH
83def register_socks_protocols():
84 # "Register" SOCKS protocols
d5ae6bb5
YCH
85 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
86 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
51fb4995
YCH
87 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
88 if scheme not in compat_urlparse.uses_netloc:
89 compat_urlparse.uses_netloc.append(scheme)
90
91
468e2e92
FV
92# This is not clearly defined otherwise
93compiled_regex_type = type(re.compile(''))
94
f7a147e3
S
95
96def random_user_agent():
97 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
98 _CHROME_VERSIONS = (
99 '74.0.3729.129',
100 '76.0.3780.3',
101 '76.0.3780.2',
102 '74.0.3729.128',
103 '76.0.3780.1',
104 '76.0.3780.0',
105 '75.0.3770.15',
106 '74.0.3729.127',
107 '74.0.3729.126',
108 '76.0.3779.1',
109 '76.0.3779.0',
110 '75.0.3770.14',
111 '74.0.3729.125',
112 '76.0.3778.1',
113 '76.0.3778.0',
114 '75.0.3770.13',
115 '74.0.3729.124',
116 '74.0.3729.123',
117 '73.0.3683.121',
118 '76.0.3777.1',
119 '76.0.3777.0',
120 '75.0.3770.12',
121 '74.0.3729.122',
122 '76.0.3776.4',
123 '75.0.3770.11',
124 '74.0.3729.121',
125 '76.0.3776.3',
126 '76.0.3776.2',
127 '73.0.3683.120',
128 '74.0.3729.120',
129 '74.0.3729.119',
130 '74.0.3729.118',
131 '76.0.3776.1',
132 '76.0.3776.0',
133 '76.0.3775.5',
134 '75.0.3770.10',
135 '74.0.3729.117',
136 '76.0.3775.4',
137 '76.0.3775.3',
138 '74.0.3729.116',
139 '75.0.3770.9',
140 '76.0.3775.2',
141 '76.0.3775.1',
142 '76.0.3775.0',
143 '75.0.3770.8',
144 '74.0.3729.115',
145 '74.0.3729.114',
146 '76.0.3774.1',
147 '76.0.3774.0',
148 '75.0.3770.7',
149 '74.0.3729.113',
150 '74.0.3729.112',
151 '74.0.3729.111',
152 '76.0.3773.1',
153 '76.0.3773.0',
154 '75.0.3770.6',
155 '74.0.3729.110',
156 '74.0.3729.109',
157 '76.0.3772.1',
158 '76.0.3772.0',
159 '75.0.3770.5',
160 '74.0.3729.108',
161 '74.0.3729.107',
162 '76.0.3771.1',
163 '76.0.3771.0',
164 '75.0.3770.4',
165 '74.0.3729.106',
166 '74.0.3729.105',
167 '75.0.3770.3',
168 '74.0.3729.104',
169 '74.0.3729.103',
170 '74.0.3729.102',
171 '75.0.3770.2',
172 '74.0.3729.101',
173 '75.0.3770.1',
174 '75.0.3770.0',
175 '74.0.3729.100',
176 '75.0.3769.5',
177 '75.0.3769.4',
178 '74.0.3729.99',
179 '75.0.3769.3',
180 '75.0.3769.2',
181 '75.0.3768.6',
182 '74.0.3729.98',
183 '75.0.3769.1',
184 '75.0.3769.0',
185 '74.0.3729.97',
186 '73.0.3683.119',
187 '73.0.3683.118',
188 '74.0.3729.96',
189 '75.0.3768.5',
190 '75.0.3768.4',
191 '75.0.3768.3',
192 '75.0.3768.2',
193 '74.0.3729.95',
194 '74.0.3729.94',
195 '75.0.3768.1',
196 '75.0.3768.0',
197 '74.0.3729.93',
198 '74.0.3729.92',
199 '73.0.3683.117',
200 '74.0.3729.91',
201 '75.0.3766.3',
202 '74.0.3729.90',
203 '75.0.3767.2',
204 '75.0.3767.1',
205 '75.0.3767.0',
206 '74.0.3729.89',
207 '73.0.3683.116',
208 '75.0.3766.2',
209 '74.0.3729.88',
210 '75.0.3766.1',
211 '75.0.3766.0',
212 '74.0.3729.87',
213 '73.0.3683.115',
214 '74.0.3729.86',
215 '75.0.3765.1',
216 '75.0.3765.0',
217 '74.0.3729.85',
218 '73.0.3683.114',
219 '74.0.3729.84',
220 '75.0.3764.1',
221 '75.0.3764.0',
222 '74.0.3729.83',
223 '73.0.3683.113',
224 '75.0.3763.2',
225 '75.0.3761.4',
226 '74.0.3729.82',
227 '75.0.3763.1',
228 '75.0.3763.0',
229 '74.0.3729.81',
230 '73.0.3683.112',
231 '75.0.3762.1',
232 '75.0.3762.0',
233 '74.0.3729.80',
234 '75.0.3761.3',
235 '74.0.3729.79',
236 '73.0.3683.111',
237 '75.0.3761.2',
238 '74.0.3729.78',
239 '74.0.3729.77',
240 '75.0.3761.1',
241 '75.0.3761.0',
242 '73.0.3683.110',
243 '74.0.3729.76',
244 '74.0.3729.75',
245 '75.0.3760.0',
246 '74.0.3729.74',
247 '75.0.3759.8',
248 '75.0.3759.7',
249 '75.0.3759.6',
250 '74.0.3729.73',
251 '75.0.3759.5',
252 '74.0.3729.72',
253 '73.0.3683.109',
254 '75.0.3759.4',
255 '75.0.3759.3',
256 '74.0.3729.71',
257 '75.0.3759.2',
258 '74.0.3729.70',
259 '73.0.3683.108',
260 '74.0.3729.69',
261 '75.0.3759.1',
262 '75.0.3759.0',
263 '74.0.3729.68',
264 '73.0.3683.107',
265 '74.0.3729.67',
266 '75.0.3758.1',
267 '75.0.3758.0',
268 '74.0.3729.66',
269 '73.0.3683.106',
270 '74.0.3729.65',
271 '75.0.3757.1',
272 '75.0.3757.0',
273 '74.0.3729.64',
274 '73.0.3683.105',
275 '74.0.3729.63',
276 '75.0.3756.1',
277 '75.0.3756.0',
278 '74.0.3729.62',
279 '73.0.3683.104',
280 '75.0.3755.3',
281 '75.0.3755.2',
282 '73.0.3683.103',
283 '75.0.3755.1',
284 '75.0.3755.0',
285 '74.0.3729.61',
286 '73.0.3683.102',
287 '74.0.3729.60',
288 '75.0.3754.2',
289 '74.0.3729.59',
290 '75.0.3753.4',
291 '74.0.3729.58',
292 '75.0.3754.1',
293 '75.0.3754.0',
294 '74.0.3729.57',
295 '73.0.3683.101',
296 '75.0.3753.3',
297 '75.0.3752.2',
298 '75.0.3753.2',
299 '74.0.3729.56',
300 '75.0.3753.1',
301 '75.0.3753.0',
302 '74.0.3729.55',
303 '73.0.3683.100',
304 '74.0.3729.54',
305 '75.0.3752.1',
306 '75.0.3752.0',
307 '74.0.3729.53',
308 '73.0.3683.99',
309 '74.0.3729.52',
310 '75.0.3751.1',
311 '75.0.3751.0',
312 '74.0.3729.51',
313 '73.0.3683.98',
314 '74.0.3729.50',
315 '75.0.3750.0',
316 '74.0.3729.49',
317 '74.0.3729.48',
318 '74.0.3729.47',
319 '75.0.3749.3',
320 '74.0.3729.46',
321 '73.0.3683.97',
322 '75.0.3749.2',
323 '74.0.3729.45',
324 '75.0.3749.1',
325 '75.0.3749.0',
326 '74.0.3729.44',
327 '73.0.3683.96',
328 '74.0.3729.43',
329 '74.0.3729.42',
330 '75.0.3748.1',
331 '75.0.3748.0',
332 '74.0.3729.41',
333 '75.0.3747.1',
334 '73.0.3683.95',
335 '75.0.3746.4',
336 '74.0.3729.40',
337 '74.0.3729.39',
338 '75.0.3747.0',
339 '75.0.3746.3',
340 '75.0.3746.2',
341 '74.0.3729.38',
342 '75.0.3746.1',
343 '75.0.3746.0',
344 '74.0.3729.37',
345 '73.0.3683.94',
346 '75.0.3745.5',
347 '75.0.3745.4',
348 '75.0.3745.3',
349 '75.0.3745.2',
350 '74.0.3729.36',
351 '75.0.3745.1',
352 '75.0.3745.0',
353 '75.0.3744.2',
354 '74.0.3729.35',
355 '73.0.3683.93',
356 '74.0.3729.34',
357 '75.0.3744.1',
358 '75.0.3744.0',
359 '74.0.3729.33',
360 '73.0.3683.92',
361 '74.0.3729.32',
362 '74.0.3729.31',
363 '73.0.3683.91',
364 '75.0.3741.2',
365 '75.0.3740.5',
366 '74.0.3729.30',
367 '75.0.3741.1',
368 '75.0.3741.0',
369 '74.0.3729.29',
370 '75.0.3740.4',
371 '73.0.3683.90',
372 '74.0.3729.28',
373 '75.0.3740.3',
374 '73.0.3683.89',
375 '75.0.3740.2',
376 '74.0.3729.27',
377 '75.0.3740.1',
378 '75.0.3740.0',
379 '74.0.3729.26',
380 '73.0.3683.88',
381 '73.0.3683.87',
382 '74.0.3729.25',
383 '75.0.3739.1',
384 '75.0.3739.0',
385 '73.0.3683.86',
386 '74.0.3729.24',
387 '73.0.3683.85',
388 '75.0.3738.4',
389 '75.0.3738.3',
390 '75.0.3738.2',
391 '75.0.3738.1',
392 '75.0.3738.0',
393 '74.0.3729.23',
394 '73.0.3683.84',
395 '74.0.3729.22',
396 '74.0.3729.21',
397 '75.0.3737.1',
398 '75.0.3737.0',
399 '74.0.3729.20',
400 '73.0.3683.83',
401 '74.0.3729.19',
402 '75.0.3736.1',
403 '75.0.3736.0',
404 '74.0.3729.18',
405 '73.0.3683.82',
406 '74.0.3729.17',
407 '75.0.3735.1',
408 '75.0.3735.0',
409 '74.0.3729.16',
410 '73.0.3683.81',
411 '75.0.3734.1',
412 '75.0.3734.0',
413 '74.0.3729.15',
414 '73.0.3683.80',
415 '74.0.3729.14',
416 '75.0.3733.1',
417 '75.0.3733.0',
418 '75.0.3732.1',
419 '74.0.3729.13',
420 '74.0.3729.12',
421 '73.0.3683.79',
422 '74.0.3729.11',
423 '75.0.3732.0',
424 '74.0.3729.10',
425 '73.0.3683.78',
426 '74.0.3729.9',
427 '74.0.3729.8',
428 '74.0.3729.7',
429 '75.0.3731.3',
430 '75.0.3731.2',
431 '75.0.3731.0',
432 '74.0.3729.6',
433 '73.0.3683.77',
434 '73.0.3683.76',
435 '75.0.3730.5',
436 '75.0.3730.4',
437 '73.0.3683.75',
438 '74.0.3729.5',
439 '73.0.3683.74',
440 '75.0.3730.3',
441 '75.0.3730.2',
442 '74.0.3729.4',
443 '73.0.3683.73',
444 '73.0.3683.72',
445 '75.0.3730.1',
446 '75.0.3730.0',
447 '74.0.3729.3',
448 '73.0.3683.71',
449 '74.0.3729.2',
450 '73.0.3683.70',
451 '74.0.3729.1',
452 '74.0.3729.0',
453 '74.0.3726.4',
454 '73.0.3683.69',
455 '74.0.3726.3',
456 '74.0.3728.0',
457 '74.0.3726.2',
458 '73.0.3683.68',
459 '74.0.3726.1',
460 '74.0.3726.0',
461 '74.0.3725.4',
462 '73.0.3683.67',
463 '73.0.3683.66',
464 '74.0.3725.3',
465 '74.0.3725.2',
466 '74.0.3725.1',
467 '74.0.3724.8',
468 '74.0.3725.0',
469 '73.0.3683.65',
470 '74.0.3724.7',
471 '74.0.3724.6',
472 '74.0.3724.5',
473 '74.0.3724.4',
474 '74.0.3724.3',
475 '74.0.3724.2',
476 '74.0.3724.1',
477 '74.0.3724.0',
478 '73.0.3683.64',
479 '74.0.3723.1',
480 '74.0.3723.0',
481 '73.0.3683.63',
482 '74.0.3722.1',
483 '74.0.3722.0',
484 '73.0.3683.62',
485 '74.0.3718.9',
486 '74.0.3702.3',
487 '74.0.3721.3',
488 '74.0.3721.2',
489 '74.0.3721.1',
490 '74.0.3721.0',
491 '74.0.3720.6',
492 '73.0.3683.61',
493 '72.0.3626.122',
494 '73.0.3683.60',
495 '74.0.3720.5',
496 '72.0.3626.121',
497 '74.0.3718.8',
498 '74.0.3720.4',
499 '74.0.3720.3',
500 '74.0.3718.7',
501 '74.0.3720.2',
502 '74.0.3720.1',
503 '74.0.3720.0',
504 '74.0.3718.6',
505 '74.0.3719.5',
506 '73.0.3683.59',
507 '74.0.3718.5',
508 '74.0.3718.4',
509 '74.0.3719.4',
510 '74.0.3719.3',
511 '74.0.3719.2',
512 '74.0.3719.1',
513 '73.0.3683.58',
514 '74.0.3719.0',
515 '73.0.3683.57',
516 '73.0.3683.56',
517 '74.0.3718.3',
518 '73.0.3683.55',
519 '74.0.3718.2',
520 '74.0.3718.1',
521 '74.0.3718.0',
522 '73.0.3683.54',
523 '74.0.3717.2',
524 '73.0.3683.53',
525 '74.0.3717.1',
526 '74.0.3717.0',
527 '73.0.3683.52',
528 '74.0.3716.1',
529 '74.0.3716.0',
530 '73.0.3683.51',
531 '74.0.3715.1',
532 '74.0.3715.0',
533 '73.0.3683.50',
534 '74.0.3711.2',
535 '74.0.3714.2',
536 '74.0.3713.3',
537 '74.0.3714.1',
538 '74.0.3714.0',
539 '73.0.3683.49',
540 '74.0.3713.1',
541 '74.0.3713.0',
542 '72.0.3626.120',
543 '73.0.3683.48',
544 '74.0.3712.2',
545 '74.0.3712.1',
546 '74.0.3712.0',
547 '73.0.3683.47',
548 '72.0.3626.119',
549 '73.0.3683.46',
550 '74.0.3710.2',
551 '72.0.3626.118',
552 '74.0.3711.1',
553 '74.0.3711.0',
554 '73.0.3683.45',
555 '72.0.3626.117',
556 '74.0.3710.1',
557 '74.0.3710.0',
558 '73.0.3683.44',
559 '72.0.3626.116',
560 '74.0.3709.1',
561 '74.0.3709.0',
562 '74.0.3704.9',
563 '73.0.3683.43',
564 '72.0.3626.115',
565 '74.0.3704.8',
566 '74.0.3704.7',
567 '74.0.3708.0',
568 '74.0.3706.7',
569 '74.0.3704.6',
570 '73.0.3683.42',
571 '72.0.3626.114',
572 '74.0.3706.6',
573 '72.0.3626.113',
574 '74.0.3704.5',
575 '74.0.3706.5',
576 '74.0.3706.4',
577 '74.0.3706.3',
578 '74.0.3706.2',
579 '74.0.3706.1',
580 '74.0.3706.0',
581 '73.0.3683.41',
582 '72.0.3626.112',
583 '74.0.3705.1',
584 '74.0.3705.0',
585 '73.0.3683.40',
586 '72.0.3626.111',
587 '73.0.3683.39',
588 '74.0.3704.4',
589 '73.0.3683.38',
590 '74.0.3704.3',
591 '74.0.3704.2',
592 '74.0.3704.1',
593 '74.0.3704.0',
594 '73.0.3683.37',
595 '72.0.3626.110',
596 '72.0.3626.109',
597 '74.0.3703.3',
598 '74.0.3703.2',
599 '73.0.3683.36',
600 '74.0.3703.1',
601 '74.0.3703.0',
602 '73.0.3683.35',
603 '72.0.3626.108',
604 '74.0.3702.2',
605 '74.0.3699.3',
606 '74.0.3702.1',
607 '74.0.3702.0',
608 '73.0.3683.34',
609 '72.0.3626.107',
610 '73.0.3683.33',
611 '74.0.3701.1',
612 '74.0.3701.0',
613 '73.0.3683.32',
614 '73.0.3683.31',
615 '72.0.3626.105',
616 '74.0.3700.1',
617 '74.0.3700.0',
618 '73.0.3683.29',
619 '72.0.3626.103',
620 '74.0.3699.2',
621 '74.0.3699.1',
622 '74.0.3699.0',
623 '73.0.3683.28',
624 '72.0.3626.102',
625 '73.0.3683.27',
626 '73.0.3683.26',
627 '74.0.3698.0',
628 '74.0.3696.2',
629 '72.0.3626.101',
630 '73.0.3683.25',
631 '74.0.3696.1',
632 '74.0.3696.0',
633 '74.0.3694.8',
634 '72.0.3626.100',
635 '74.0.3694.7',
636 '74.0.3694.6',
637 '74.0.3694.5',
638 '74.0.3694.4',
639 '72.0.3626.99',
640 '72.0.3626.98',
641 '74.0.3694.3',
642 '73.0.3683.24',
643 '72.0.3626.97',
644 '72.0.3626.96',
645 '72.0.3626.95',
646 '73.0.3683.23',
647 '72.0.3626.94',
648 '73.0.3683.22',
649 '73.0.3683.21',
650 '72.0.3626.93',
651 '74.0.3694.2',
652 '72.0.3626.92',
653 '74.0.3694.1',
654 '74.0.3694.0',
655 '74.0.3693.6',
656 '73.0.3683.20',
657 '72.0.3626.91',
658 '74.0.3693.5',
659 '74.0.3693.4',
660 '74.0.3693.3',
661 '74.0.3693.2',
662 '73.0.3683.19',
663 '74.0.3693.1',
664 '74.0.3693.0',
665 '73.0.3683.18',
666 '72.0.3626.90',
667 '74.0.3692.1',
668 '74.0.3692.0',
669 '73.0.3683.17',
670 '72.0.3626.89',
671 '74.0.3687.3',
672 '74.0.3691.1',
673 '74.0.3691.0',
674 '73.0.3683.16',
675 '72.0.3626.88',
676 '72.0.3626.87',
677 '73.0.3683.15',
678 '74.0.3690.1',
679 '74.0.3690.0',
680 '73.0.3683.14',
681 '72.0.3626.86',
682 '73.0.3683.13',
683 '73.0.3683.12',
684 '74.0.3689.1',
685 '74.0.3689.0',
686 '73.0.3683.11',
687 '72.0.3626.85',
688 '73.0.3683.10',
689 '72.0.3626.84',
690 '73.0.3683.9',
691 '74.0.3688.1',
692 '74.0.3688.0',
693 '73.0.3683.8',
694 '72.0.3626.83',
695 '74.0.3687.2',
696 '74.0.3687.1',
697 '74.0.3687.0',
698 '73.0.3683.7',
699 '72.0.3626.82',
700 '74.0.3686.4',
701 '72.0.3626.81',
702 '74.0.3686.3',
703 '74.0.3686.2',
704 '74.0.3686.1',
705 '74.0.3686.0',
706 '73.0.3683.6',
707 '72.0.3626.80',
708 '74.0.3685.1',
709 '74.0.3685.0',
710 '73.0.3683.5',
711 '72.0.3626.79',
712 '74.0.3684.1',
713 '74.0.3684.0',
714 '73.0.3683.4',
715 '72.0.3626.78',
716 '72.0.3626.77',
717 '73.0.3683.3',
718 '73.0.3683.2',
719 '72.0.3626.76',
720 '73.0.3683.1',
721 '73.0.3683.0',
722 '72.0.3626.75',
723 '71.0.3578.141',
724 '73.0.3682.1',
725 '73.0.3682.0',
726 '72.0.3626.74',
727 '71.0.3578.140',
728 '73.0.3681.4',
729 '73.0.3681.3',
730 '73.0.3681.2',
731 '73.0.3681.1',
732 '73.0.3681.0',
733 '72.0.3626.73',
734 '71.0.3578.139',
735 '72.0.3626.72',
736 '72.0.3626.71',
737 '73.0.3680.1',
738 '73.0.3680.0',
739 '72.0.3626.70',
740 '71.0.3578.138',
741 '73.0.3678.2',
742 '73.0.3679.1',
743 '73.0.3679.0',
744 '72.0.3626.69',
745 '71.0.3578.137',
746 '73.0.3678.1',
747 '73.0.3678.0',
748 '71.0.3578.136',
749 '73.0.3677.1',
750 '73.0.3677.0',
751 '72.0.3626.68',
752 '72.0.3626.67',
753 '71.0.3578.135',
754 '73.0.3676.1',
755 '73.0.3676.0',
756 '73.0.3674.2',
757 '72.0.3626.66',
758 '71.0.3578.134',
759 '73.0.3674.1',
760 '73.0.3674.0',
761 '72.0.3626.65',
762 '71.0.3578.133',
763 '73.0.3673.2',
764 '73.0.3673.1',
765 '73.0.3673.0',
766 '72.0.3626.64',
767 '71.0.3578.132',
768 '72.0.3626.63',
769 '72.0.3626.62',
770 '72.0.3626.61',
771 '72.0.3626.60',
772 '73.0.3672.1',
773 '73.0.3672.0',
774 '72.0.3626.59',
775 '71.0.3578.131',
776 '73.0.3671.3',
777 '73.0.3671.2',
778 '73.0.3671.1',
779 '73.0.3671.0',
780 '72.0.3626.58',
781 '71.0.3578.130',
782 '73.0.3670.1',
783 '73.0.3670.0',
784 '72.0.3626.57',
785 '71.0.3578.129',
786 '73.0.3669.1',
787 '73.0.3669.0',
788 '72.0.3626.56',
789 '71.0.3578.128',
790 '73.0.3668.2',
791 '73.0.3668.1',
792 '73.0.3668.0',
793 '72.0.3626.55',
794 '71.0.3578.127',
795 '73.0.3667.2',
796 '73.0.3667.1',
797 '73.0.3667.0',
798 '72.0.3626.54',
799 '71.0.3578.126',
800 '73.0.3666.1',
801 '73.0.3666.0',
802 '72.0.3626.53',
803 '71.0.3578.125',
804 '73.0.3665.4',
805 '73.0.3665.3',
806 '72.0.3626.52',
807 '73.0.3665.2',
808 '73.0.3664.4',
809 '73.0.3665.1',
810 '73.0.3665.0',
811 '72.0.3626.51',
812 '71.0.3578.124',
813 '72.0.3626.50',
814 '73.0.3664.3',
815 '73.0.3664.2',
816 '73.0.3664.1',
817 '73.0.3664.0',
818 '73.0.3663.2',
819 '72.0.3626.49',
820 '71.0.3578.123',
821 '73.0.3663.1',
822 '73.0.3663.0',
823 '72.0.3626.48',
824 '71.0.3578.122',
825 '73.0.3662.1',
826 '73.0.3662.0',
827 '72.0.3626.47',
828 '71.0.3578.121',
829 '73.0.3661.1',
830 '72.0.3626.46',
831 '73.0.3661.0',
832 '72.0.3626.45',
833 '71.0.3578.120',
834 '73.0.3660.2',
835 '73.0.3660.1',
836 '73.0.3660.0',
837 '72.0.3626.44',
838 '71.0.3578.119',
839 '73.0.3659.1',
840 '73.0.3659.0',
841 '72.0.3626.43',
842 '71.0.3578.118',
843 '73.0.3658.1',
844 '73.0.3658.0',
845 '72.0.3626.42',
846 '71.0.3578.117',
847 '73.0.3657.1',
848 '73.0.3657.0',
849 '72.0.3626.41',
850 '71.0.3578.116',
851 '73.0.3656.1',
852 '73.0.3656.0',
853 '72.0.3626.40',
854 '71.0.3578.115',
855 '73.0.3655.1',
856 '73.0.3655.0',
857 '72.0.3626.39',
858 '71.0.3578.114',
859 '73.0.3654.1',
860 '73.0.3654.0',
861 '72.0.3626.38',
862 '71.0.3578.113',
863 '73.0.3653.1',
864 '73.0.3653.0',
865 '72.0.3626.37',
866 '71.0.3578.112',
867 '73.0.3652.1',
868 '73.0.3652.0',
869 '72.0.3626.36',
870 '71.0.3578.111',
871 '73.0.3651.1',
872 '73.0.3651.0',
873 '72.0.3626.35',
874 '71.0.3578.110',
875 '73.0.3650.1',
876 '73.0.3650.0',
877 '72.0.3626.34',
878 '71.0.3578.109',
879 '73.0.3649.1',
880 '73.0.3649.0',
881 '72.0.3626.33',
882 '71.0.3578.108',
883 '73.0.3648.2',
884 '73.0.3648.1',
885 '73.0.3648.0',
886 '72.0.3626.32',
887 '71.0.3578.107',
888 '73.0.3647.2',
889 '73.0.3647.1',
890 '73.0.3647.0',
891 '72.0.3626.31',
892 '71.0.3578.106',
893 '73.0.3635.3',
894 '73.0.3646.2',
895 '73.0.3646.1',
896 '73.0.3646.0',
897 '72.0.3626.30',
898 '71.0.3578.105',
899 '72.0.3626.29',
900 '73.0.3645.2',
901 '73.0.3645.1',
902 '73.0.3645.0',
903 '72.0.3626.28',
904 '71.0.3578.104',
905 '72.0.3626.27',
906 '72.0.3626.26',
907 '72.0.3626.25',
908 '72.0.3626.24',
909 '73.0.3644.0',
910 '73.0.3643.2',
911 '72.0.3626.23',
912 '71.0.3578.103',
913 '73.0.3643.1',
914 '73.0.3643.0',
915 '72.0.3626.22',
916 '71.0.3578.102',
917 '73.0.3642.1',
918 '73.0.3642.0',
919 '72.0.3626.21',
920 '71.0.3578.101',
921 '73.0.3641.1',
922 '73.0.3641.0',
923 '72.0.3626.20',
924 '71.0.3578.100',
925 '72.0.3626.19',
926 '73.0.3640.1',
927 '73.0.3640.0',
928 '72.0.3626.18',
929 '73.0.3639.1',
930 '71.0.3578.99',
931 '73.0.3639.0',
932 '72.0.3626.17',
933 '73.0.3638.2',
934 '72.0.3626.16',
935 '73.0.3638.1',
936 '73.0.3638.0',
937 '72.0.3626.15',
938 '71.0.3578.98',
939 '73.0.3635.2',
940 '71.0.3578.97',
941 '73.0.3637.1',
942 '73.0.3637.0',
943 '72.0.3626.14',
944 '71.0.3578.96',
945 '71.0.3578.95',
946 '72.0.3626.13',
947 '71.0.3578.94',
948 '73.0.3636.2',
949 '71.0.3578.93',
950 '73.0.3636.1',
951 '73.0.3636.0',
952 '72.0.3626.12',
953 '71.0.3578.92',
954 '73.0.3635.1',
955 '73.0.3635.0',
956 '72.0.3626.11',
957 '71.0.3578.91',
958 '73.0.3634.2',
959 '73.0.3634.1',
960 '73.0.3634.0',
961 '72.0.3626.10',
962 '71.0.3578.90',
963 '71.0.3578.89',
964 '73.0.3633.2',
965 '73.0.3633.1',
966 '73.0.3633.0',
967 '72.0.3610.4',
968 '72.0.3626.9',
969 '71.0.3578.88',
970 '73.0.3632.5',
971 '73.0.3632.4',
972 '73.0.3632.3',
973 '73.0.3632.2',
974 '73.0.3632.1',
975 '73.0.3632.0',
976 '72.0.3626.8',
977 '71.0.3578.87',
978 '73.0.3631.2',
979 '73.0.3631.1',
980 '73.0.3631.0',
981 '72.0.3626.7',
982 '71.0.3578.86',
983 '72.0.3626.6',
984 '73.0.3630.1',
985 '73.0.3630.0',
986 '72.0.3626.5',
987 '71.0.3578.85',
988 '72.0.3626.4',
989 '73.0.3628.3',
990 '73.0.3628.2',
991 '73.0.3629.1',
992 '73.0.3629.0',
993 '72.0.3626.3',
994 '71.0.3578.84',
995 '73.0.3628.1',
996 '73.0.3628.0',
997 '71.0.3578.83',
998 '73.0.3627.1',
999 '73.0.3627.0',
1000 '72.0.3626.2',
1001 '71.0.3578.82',
1002 '71.0.3578.81',
1003 '71.0.3578.80',
1004 '72.0.3626.1',
1005 '72.0.3626.0',
1006 '71.0.3578.79',
1007 '70.0.3538.124',
1008 '71.0.3578.78',
1009 '72.0.3623.4',
1010 '72.0.3625.2',
1011 '72.0.3625.1',
1012 '72.0.3625.0',
1013 '71.0.3578.77',
1014 '70.0.3538.123',
1015 '72.0.3624.4',
1016 '72.0.3624.3',
1017 '72.0.3624.2',
1018 '71.0.3578.76',
1019 '72.0.3624.1',
1020 '72.0.3624.0',
1021 '72.0.3623.3',
1022 '71.0.3578.75',
1023 '70.0.3538.122',
1024 '71.0.3578.74',
1025 '72.0.3623.2',
1026 '72.0.3610.3',
1027 '72.0.3623.1',
1028 '72.0.3623.0',
1029 '72.0.3622.3',
1030 '72.0.3622.2',
1031 '71.0.3578.73',
1032 '70.0.3538.121',
1033 '72.0.3622.1',
1034 '72.0.3622.0',
1035 '71.0.3578.72',
1036 '70.0.3538.120',
1037 '72.0.3621.1',
1038 '72.0.3621.0',
1039 '71.0.3578.71',
1040 '70.0.3538.119',
1041 '72.0.3620.1',
1042 '72.0.3620.0',
1043 '71.0.3578.70',
1044 '70.0.3538.118',
1045 '71.0.3578.69',
1046 '72.0.3619.1',
1047 '72.0.3619.0',
1048 '71.0.3578.68',
1049 '70.0.3538.117',
1050 '71.0.3578.67',
1051 '72.0.3618.1',
1052 '72.0.3618.0',
1053 '71.0.3578.66',
1054 '70.0.3538.116',
1055 '72.0.3617.1',
1056 '72.0.3617.0',
1057 '71.0.3578.65',
1058 '70.0.3538.115',
1059 '72.0.3602.3',
1060 '71.0.3578.64',
1061 '72.0.3616.1',
1062 '72.0.3616.0',
1063 '71.0.3578.63',
1064 '70.0.3538.114',
1065 '71.0.3578.62',
1066 '72.0.3615.1',
1067 '72.0.3615.0',
1068 '71.0.3578.61',
1069 '70.0.3538.113',
1070 '72.0.3614.1',
1071 '72.0.3614.0',
1072 '71.0.3578.60',
1073 '70.0.3538.112',
1074 '72.0.3613.1',
1075 '72.0.3613.0',
1076 '71.0.3578.59',
1077 '70.0.3538.111',
1078 '72.0.3612.2',
1079 '72.0.3612.1',
1080 '72.0.3612.0',
1081 '70.0.3538.110',
1082 '71.0.3578.58',
1083 '70.0.3538.109',
1084 '72.0.3611.2',
1085 '72.0.3611.1',
1086 '72.0.3611.0',
1087 '71.0.3578.57',
1088 '70.0.3538.108',
1089 '72.0.3610.2',
1090 '71.0.3578.56',
1091 '71.0.3578.55',
1092 '72.0.3610.1',
1093 '72.0.3610.0',
1094 '71.0.3578.54',
1095 '70.0.3538.107',
1096 '71.0.3578.53',
1097 '72.0.3609.3',
1098 '71.0.3578.52',
1099 '72.0.3609.2',
1100 '71.0.3578.51',
1101 '72.0.3608.5',
1102 '72.0.3609.1',
1103 '72.0.3609.0',
1104 '71.0.3578.50',
1105 '70.0.3538.106',
1106 '72.0.3608.4',
1107 '72.0.3608.3',
1108 '72.0.3608.2',
1109 '71.0.3578.49',
1110 '72.0.3608.1',
1111 '72.0.3608.0',
1112 '70.0.3538.105',
1113 '71.0.3578.48',
1114 '72.0.3607.1',
1115 '72.0.3607.0',
1116 '71.0.3578.47',
1117 '70.0.3538.104',
1118 '72.0.3606.2',
1119 '72.0.3606.1',
1120 '72.0.3606.0',
1121 '71.0.3578.46',
1122 '70.0.3538.103',
1123 '70.0.3538.102',
1124 '72.0.3605.3',
1125 '72.0.3605.2',
1126 '72.0.3605.1',
1127 '72.0.3605.0',
1128 '71.0.3578.45',
1129 '70.0.3538.101',
1130 '71.0.3578.44',
1131 '71.0.3578.43',
1132 '70.0.3538.100',
1133 '70.0.3538.99',
1134 '71.0.3578.42',
1135 '72.0.3604.1',
1136 '72.0.3604.0',
1137 '71.0.3578.41',
1138 '70.0.3538.98',
1139 '71.0.3578.40',
1140 '72.0.3603.2',
1141 '72.0.3603.1',
1142 '72.0.3603.0',
1143 '71.0.3578.39',
1144 '70.0.3538.97',
1145 '72.0.3602.2',
1146 '71.0.3578.38',
1147 '71.0.3578.37',
1148 '72.0.3602.1',
1149 '72.0.3602.0',
1150 '71.0.3578.36',
1151 '70.0.3538.96',
1152 '72.0.3601.1',
1153 '72.0.3601.0',
1154 '71.0.3578.35',
1155 '70.0.3538.95',
1156 '72.0.3600.1',
1157 '72.0.3600.0',
1158 '71.0.3578.34',
1159 '70.0.3538.94',
1160 '72.0.3599.3',
1161 '72.0.3599.2',
1162 '72.0.3599.1',
1163 '72.0.3599.0',
1164 '71.0.3578.33',
1165 '70.0.3538.93',
1166 '72.0.3598.1',
1167 '72.0.3598.0',
1168 '71.0.3578.32',
1169 '70.0.3538.87',
1170 '72.0.3597.1',
1171 '72.0.3597.0',
1172 '72.0.3596.2',
1173 '71.0.3578.31',
1174 '70.0.3538.86',
1175 '71.0.3578.30',
1176 '71.0.3578.29',
1177 '72.0.3596.1',
1178 '72.0.3596.0',
1179 '71.0.3578.28',
1180 '70.0.3538.85',
1181 '72.0.3595.2',
1182 '72.0.3591.3',
1183 '72.0.3595.1',
1184 '72.0.3595.0',
1185 '71.0.3578.27',
1186 '70.0.3538.84',
1187 '72.0.3594.1',
1188 '72.0.3594.0',
1189 '71.0.3578.26',
1190 '70.0.3538.83',
1191 '72.0.3593.2',
1192 '72.0.3593.1',
1193 '72.0.3593.0',
1194 '71.0.3578.25',
1195 '70.0.3538.82',
1196 '72.0.3589.3',
1197 '72.0.3592.2',
1198 '72.0.3592.1',
1199 '72.0.3592.0',
1200 '71.0.3578.24',
1201 '72.0.3589.2',
1202 '70.0.3538.81',
1203 '70.0.3538.80',
1204 '72.0.3591.2',
1205 '72.0.3591.1',
1206 '72.0.3591.0',
1207 '71.0.3578.23',
1208 '70.0.3538.79',
1209 '71.0.3578.22',
1210 '72.0.3590.1',
1211 '72.0.3590.0',
1212 '71.0.3578.21',
1213 '70.0.3538.78',
1214 '70.0.3538.77',
1215 '72.0.3589.1',
1216 '72.0.3589.0',
1217 '71.0.3578.20',
1218 '70.0.3538.76',
1219 '71.0.3578.19',
1220 '70.0.3538.75',
1221 '72.0.3588.1',
1222 '72.0.3588.0',
1223 '71.0.3578.18',
1224 '70.0.3538.74',
1225 '72.0.3586.2',
1226 '72.0.3587.0',
1227 '71.0.3578.17',
1228 '70.0.3538.73',
1229 '72.0.3586.1',
1230 '72.0.3586.0',
1231 '71.0.3578.16',
1232 '70.0.3538.72',
1233 '72.0.3585.1',
1234 '72.0.3585.0',
1235 '71.0.3578.15',
1236 '70.0.3538.71',
1237 '71.0.3578.14',
1238 '72.0.3584.1',
1239 '72.0.3584.0',
1240 '71.0.3578.13',
1241 '70.0.3538.70',
1242 '72.0.3583.2',
1243 '71.0.3578.12',
1244 '72.0.3583.1',
1245 '72.0.3583.0',
1246 '71.0.3578.11',
1247 '70.0.3538.69',
1248 '71.0.3578.10',
1249 '72.0.3582.0',
1250 '72.0.3581.4',
1251 '71.0.3578.9',
1252 '70.0.3538.67',
1253 '72.0.3581.3',
1254 '72.0.3581.2',
1255 '72.0.3581.1',
1256 '72.0.3581.0',
1257 '71.0.3578.8',
1258 '70.0.3538.66',
1259 '72.0.3580.1',
1260 '72.0.3580.0',
1261 '71.0.3578.7',
1262 '70.0.3538.65',
1263 '71.0.3578.6',
1264 '72.0.3579.1',
1265 '72.0.3579.0',
1266 '71.0.3578.5',
1267 '70.0.3538.64',
1268 '71.0.3578.4',
1269 '71.0.3578.3',
1270 '71.0.3578.2',
1271 '71.0.3578.1',
1272 '71.0.3578.0',
1273 '70.0.3538.63',
1274 '69.0.3497.128',
1275 '70.0.3538.62',
1276 '70.0.3538.61',
1277 '70.0.3538.60',
1278 '70.0.3538.59',
1279 '71.0.3577.1',
1280 '71.0.3577.0',
1281 '70.0.3538.58',
1282 '69.0.3497.127',
1283 '71.0.3576.2',
1284 '71.0.3576.1',
1285 '71.0.3576.0',
1286 '70.0.3538.57',
1287 '70.0.3538.56',
1288 '71.0.3575.2',
1289 '70.0.3538.55',
1290 '69.0.3497.126',
1291 '70.0.3538.54',
1292 '71.0.3575.1',
1293 '71.0.3575.0',
1294 '71.0.3574.1',
1295 '71.0.3574.0',
1296 '70.0.3538.53',
1297 '69.0.3497.125',
1298 '70.0.3538.52',
1299 '71.0.3573.1',
1300 '71.0.3573.0',
1301 '70.0.3538.51',
1302 '69.0.3497.124',
1303 '71.0.3572.1',
1304 '71.0.3572.0',
1305 '70.0.3538.50',
1306 '69.0.3497.123',
1307 '71.0.3571.2',
1308 '70.0.3538.49',
1309 '69.0.3497.122',
1310 '71.0.3571.1',
1311 '71.0.3571.0',
1312 '70.0.3538.48',
1313 '69.0.3497.121',
1314 '71.0.3570.1',
1315 '71.0.3570.0',
1316 '70.0.3538.47',
1317 '69.0.3497.120',
1318 '71.0.3568.2',
1319 '71.0.3569.1',
1320 '71.0.3569.0',
1321 '70.0.3538.46',
1322 '69.0.3497.119',
1323 '70.0.3538.45',
1324 '71.0.3568.1',
1325 '71.0.3568.0',
1326 '70.0.3538.44',
1327 '69.0.3497.118',
1328 '70.0.3538.43',
1329 '70.0.3538.42',
1330 '71.0.3567.1',
1331 '71.0.3567.0',
1332 '70.0.3538.41',
1333 '69.0.3497.117',
1334 '71.0.3566.1',
1335 '71.0.3566.0',
1336 '70.0.3538.40',
1337 '69.0.3497.116',
1338 '71.0.3565.1',
1339 '71.0.3565.0',
1340 '70.0.3538.39',
1341 '69.0.3497.115',
1342 '71.0.3564.1',
1343 '71.0.3564.0',
1344 '70.0.3538.38',
1345 '69.0.3497.114',
1346 '71.0.3563.0',
1347 '71.0.3562.2',
1348 '70.0.3538.37',
1349 '69.0.3497.113',
1350 '70.0.3538.36',
1351 '70.0.3538.35',
1352 '71.0.3562.1',
1353 '71.0.3562.0',
1354 '70.0.3538.34',
1355 '69.0.3497.112',
1356 '70.0.3538.33',
1357 '71.0.3561.1',
1358 '71.0.3561.0',
1359 '70.0.3538.32',
1360 '69.0.3497.111',
1361 '71.0.3559.6',
1362 '71.0.3560.1',
1363 '71.0.3560.0',
1364 '71.0.3559.5',
1365 '71.0.3559.4',
1366 '70.0.3538.31',
1367 '69.0.3497.110',
1368 '71.0.3559.3',
1369 '70.0.3538.30',
1370 '69.0.3497.109',
1371 '71.0.3559.2',
1372 '71.0.3559.1',
1373 '71.0.3559.0',
1374 '70.0.3538.29',
1375 '69.0.3497.108',
1376 '71.0.3558.2',
1377 '71.0.3558.1',
1378 '71.0.3558.0',
1379 '70.0.3538.28',
1380 '69.0.3497.107',
1381 '71.0.3557.2',
1382 '71.0.3557.1',
1383 '71.0.3557.0',
1384 '70.0.3538.27',
1385 '69.0.3497.106',
1386 '71.0.3554.4',
1387 '70.0.3538.26',
1388 '71.0.3556.1',
1389 '71.0.3556.0',
1390 '70.0.3538.25',
1391 '71.0.3554.3',
1392 '69.0.3497.105',
1393 '71.0.3554.2',
1394 '70.0.3538.24',
1395 '69.0.3497.104',
1396 '71.0.3555.2',
1397 '70.0.3538.23',
1398 '71.0.3555.1',
1399 '71.0.3555.0',
1400 '70.0.3538.22',
1401 '69.0.3497.103',
1402 '71.0.3554.1',
1403 '71.0.3554.0',
1404 '70.0.3538.21',
1405 '69.0.3497.102',
1406 '71.0.3553.3',
1407 '70.0.3538.20',
1408 '69.0.3497.101',
1409 '71.0.3553.2',
1410 '69.0.3497.100',
1411 '71.0.3553.1',
1412 '71.0.3553.0',
1413 '70.0.3538.19',
1414 '69.0.3497.99',
1415 '69.0.3497.98',
1416 '69.0.3497.97',
1417 '71.0.3552.6',
1418 '71.0.3552.5',
1419 '71.0.3552.4',
1420 '71.0.3552.3',
1421 '71.0.3552.2',
1422 '71.0.3552.1',
1423 '71.0.3552.0',
1424 '70.0.3538.18',
1425 '69.0.3497.96',
1426 '71.0.3551.3',
1427 '71.0.3551.2',
1428 '71.0.3551.1',
1429 '71.0.3551.0',
1430 '70.0.3538.17',
1431 '69.0.3497.95',
1432 '71.0.3550.3',
1433 '71.0.3550.2',
1434 '71.0.3550.1',
1435 '71.0.3550.0',
1436 '70.0.3538.16',
1437 '69.0.3497.94',
1438 '71.0.3549.1',
1439 '71.0.3549.0',
1440 '70.0.3538.15',
1441 '69.0.3497.93',
1442 '69.0.3497.92',
1443 '71.0.3548.1',
1444 '71.0.3548.0',
1445 '70.0.3538.14',
1446 '69.0.3497.91',
1447 '71.0.3547.1',
1448 '71.0.3547.0',
1449 '70.0.3538.13',
1450 '69.0.3497.90',
1451 '71.0.3546.2',
1452 '69.0.3497.89',
1453 '71.0.3546.1',
1454 '71.0.3546.0',
1455 '70.0.3538.12',
1456 '69.0.3497.88',
1457 '71.0.3545.4',
1458 '71.0.3545.3',
1459 '71.0.3545.2',
1460 '71.0.3545.1',
1461 '71.0.3545.0',
1462 '70.0.3538.11',
1463 '69.0.3497.87',
1464 '71.0.3544.5',
1465 '71.0.3544.4',
1466 '71.0.3544.3',
1467 '71.0.3544.2',
1468 '71.0.3544.1',
1469 '71.0.3544.0',
1470 '69.0.3497.86',
1471 '70.0.3538.10',
1472 '69.0.3497.85',
1473 '70.0.3538.9',
1474 '69.0.3497.84',
1475 '71.0.3543.4',
1476 '70.0.3538.8',
1477 '71.0.3543.3',
1478 '71.0.3543.2',
1479 '71.0.3543.1',
1480 '71.0.3543.0',
1481 '70.0.3538.7',
1482 '69.0.3497.83',
1483 '71.0.3542.2',
1484 '71.0.3542.1',
1485 '71.0.3542.0',
1486 '70.0.3538.6',
1487 '69.0.3497.82',
1488 '69.0.3497.81',
1489 '71.0.3541.1',
1490 '71.0.3541.0',
1491 '70.0.3538.5',
1492 '69.0.3497.80',
1493 '71.0.3540.1',
1494 '71.0.3540.0',
1495 '70.0.3538.4',
1496 '69.0.3497.79',
1497 '70.0.3538.3',
1498 '71.0.3539.1',
1499 '71.0.3539.0',
1500 '69.0.3497.78',
1501 '68.0.3440.134',
1502 '69.0.3497.77',
1503 '70.0.3538.2',
1504 '70.0.3538.1',
1505 '70.0.3538.0',
1506 '69.0.3497.76',
1507 '68.0.3440.133',
1508 '69.0.3497.75',
1509 '70.0.3537.2',
1510 '70.0.3537.1',
1511 '70.0.3537.0',
1512 '69.0.3497.74',
1513 '68.0.3440.132',
1514 '70.0.3536.0',
1515 '70.0.3535.5',
1516 '70.0.3535.4',
1517 '70.0.3535.3',
1518 '69.0.3497.73',
1519 '68.0.3440.131',
1520 '70.0.3532.8',
1521 '70.0.3532.7',
1522 '69.0.3497.72',
1523 '69.0.3497.71',
1524 '70.0.3535.2',
1525 '70.0.3535.1',
1526 '70.0.3535.0',
1527 '69.0.3497.70',
1528 '68.0.3440.130',
1529 '69.0.3497.69',
1530 '68.0.3440.129',
1531 '70.0.3534.4',
1532 '70.0.3534.3',
1533 '70.0.3534.2',
1534 '70.0.3534.1',
1535 '70.0.3534.0',
1536 '69.0.3497.68',
1537 '68.0.3440.128',
1538 '70.0.3533.2',
1539 '70.0.3533.1',
1540 '70.0.3533.0',
1541 '69.0.3497.67',
1542 '68.0.3440.127',
1543 '70.0.3532.6',
1544 '70.0.3532.5',
1545 '70.0.3532.4',
1546 '69.0.3497.66',
1547 '68.0.3440.126',
1548 '70.0.3532.3',
1549 '70.0.3532.2',
1550 '70.0.3532.1',
1551 '69.0.3497.60',
1552 '69.0.3497.65',
1553 '69.0.3497.64',
1554 '70.0.3532.0',
1555 '70.0.3531.0',
1556 '70.0.3530.4',
1557 '70.0.3530.3',
1558 '70.0.3530.2',
1559 '69.0.3497.58',
1560 '68.0.3440.125',
1561 '69.0.3497.57',
1562 '69.0.3497.56',
1563 '69.0.3497.55',
1564 '69.0.3497.54',
1565 '70.0.3530.1',
1566 '70.0.3530.0',
1567 '69.0.3497.53',
1568 '68.0.3440.124',
1569 '69.0.3497.52',
1570 '70.0.3529.3',
1571 '70.0.3529.2',
1572 '70.0.3529.1',
1573 '70.0.3529.0',
1574 '69.0.3497.51',
1575 '70.0.3528.4',
1576 '68.0.3440.123',
1577 '70.0.3528.3',
1578 '70.0.3528.2',
1579 '70.0.3528.1',
1580 '70.0.3528.0',
1581 '69.0.3497.50',
1582 '68.0.3440.122',
1583 '70.0.3527.1',
1584 '70.0.3527.0',
1585 '69.0.3497.49',
1586 '68.0.3440.121',
1587 '70.0.3526.1',
1588 '70.0.3526.0',
1589 '68.0.3440.120',
1590 '69.0.3497.48',
1591 '69.0.3497.47',
1592 '68.0.3440.119',
1593 '68.0.3440.118',
1594 '70.0.3525.5',
1595 '70.0.3525.4',
1596 '70.0.3525.3',
1597 '68.0.3440.117',
1598 '69.0.3497.46',
1599 '70.0.3525.2',
1600 '70.0.3525.1',
1601 '70.0.3525.0',
1602 '69.0.3497.45',
1603 '68.0.3440.116',
1604 '70.0.3524.4',
1605 '70.0.3524.3',
1606 '69.0.3497.44',
1607 '70.0.3524.2',
1608 '70.0.3524.1',
1609 '70.0.3524.0',
1610 '70.0.3523.2',
1611 '69.0.3497.43',
1612 '68.0.3440.115',
1613 '70.0.3505.9',
1614 '69.0.3497.42',
1615 '70.0.3505.8',
1616 '70.0.3523.1',
1617 '70.0.3523.0',
1618 '69.0.3497.41',
1619 '68.0.3440.114',
1620 '70.0.3505.7',
1621 '69.0.3497.40',
1622 '70.0.3522.1',
1623 '70.0.3522.0',
1624 '70.0.3521.2',
1625 '69.0.3497.39',
1626 '68.0.3440.113',
1627 '70.0.3505.6',
1628 '70.0.3521.1',
1629 '70.0.3521.0',
1630 '69.0.3497.38',
1631 '68.0.3440.112',
1632 '70.0.3520.1',
1633 '70.0.3520.0',
1634 '69.0.3497.37',
1635 '68.0.3440.111',
1636 '70.0.3519.3',
1637 '70.0.3519.2',
1638 '70.0.3519.1',
1639 '70.0.3519.0',
1640 '69.0.3497.36',
1641 '68.0.3440.110',
1642 '70.0.3518.1',
1643 '70.0.3518.0',
1644 '69.0.3497.35',
1645 '69.0.3497.34',
1646 '68.0.3440.109',
1647 '70.0.3517.1',
1648 '70.0.3517.0',
1649 '69.0.3497.33',
1650 '68.0.3440.108',
1651 '69.0.3497.32',
1652 '70.0.3516.3',
1653 '70.0.3516.2',
1654 '70.0.3516.1',
1655 '70.0.3516.0',
1656 '69.0.3497.31',
1657 '68.0.3440.107',
1658 '70.0.3515.4',
1659 '68.0.3440.106',
1660 '70.0.3515.3',
1661 '70.0.3515.2',
1662 '70.0.3515.1',
1663 '70.0.3515.0',
1664 '69.0.3497.30',
1665 '68.0.3440.105',
1666 '68.0.3440.104',
1667 '70.0.3514.2',
1668 '70.0.3514.1',
1669 '70.0.3514.0',
1670 '69.0.3497.29',
1671 '68.0.3440.103',
1672 '70.0.3513.1',
1673 '70.0.3513.0',
1674 '69.0.3497.28',
1675 )
1676 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1677
1678
3e669f36 1679std_headers = {
f7a147e3 1680 'User-Agent': random_user_agent(),
59ae15a5
PH
1681 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1682 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1683 'Accept-Encoding': 'gzip, deflate',
1684 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 1685}
f427df17 1686
5f6a1245 1687
fb37eb25
S
1688USER_AGENTS = {
1689 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1690}
1691
1692
bf42a990
S
1693NO_DEFAULT = object()
1694
7105440c
YCH
1695ENGLISH_MONTH_NAMES = [
1696 'January', 'February', 'March', 'April', 'May', 'June',
1697 'July', 'August', 'September', 'October', 'November', 'December']
1698
f6717dec
S
1699MONTH_NAMES = {
1700 'en': ENGLISH_MONTH_NAMES,
1701 'fr': [
3e4185c3
S
1702 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1703 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
f6717dec 1704}
a942d6cb 1705
a7aaa398
S
1706KNOWN_EXTENSIONS = (
1707 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1708 'flv', 'f4v', 'f4a', 'f4b',
1709 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1710 'mkv', 'mka', 'mk3d',
1711 'avi', 'divx',
1712 'mov',
1713 'asf', 'wmv', 'wma',
1714 '3gp', '3g2',
1715 'mp3',
1716 'flac',
1717 'ape',
1718 'wav',
1719 'f4f', 'f4m', 'm3u8', 'smil')
1720
c587cbb7 1721# needed for sanitizing filenames in restricted mode
c8827027 1722ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
fd35d8cd
JW
1723 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1724 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
c587cbb7 1725
46f59e89
S
1726DATE_FORMATS = (
1727 '%d %B %Y',
1728 '%d %b %Y',
1729 '%B %d %Y',
cb655f34
S
1730 '%B %dst %Y',
1731 '%B %dnd %Y',
9d30c213 1732 '%B %drd %Y',
cb655f34 1733 '%B %dth %Y',
46f59e89 1734 '%b %d %Y',
cb655f34
S
1735 '%b %dst %Y',
1736 '%b %dnd %Y',
9d30c213 1737 '%b %drd %Y',
cb655f34 1738 '%b %dth %Y',
46f59e89
S
1739 '%b %dst %Y %I:%M',
1740 '%b %dnd %Y %I:%M',
9d30c213 1741 '%b %drd %Y %I:%M',
46f59e89
S
1742 '%b %dth %Y %I:%M',
1743 '%Y %m %d',
1744 '%Y-%m-%d',
bccdbd22 1745 '%Y.%m.%d.',
46f59e89 1746 '%Y/%m/%d',
81c13222 1747 '%Y/%m/%d %H:%M',
46f59e89 1748 '%Y/%m/%d %H:%M:%S',
1931a55e
THD
1749 '%Y%m%d%H%M',
1750 '%Y%m%d%H%M%S',
0c1c6f4b 1751 '%Y-%m-%d %H:%M',
46f59e89
S
1752 '%Y-%m-%d %H:%M:%S',
1753 '%Y-%m-%d %H:%M:%S.%f',
5014558a 1754 '%Y-%m-%d %H:%M:%S:%f',
46f59e89
S
1755 '%d.%m.%Y %H:%M',
1756 '%d.%m.%Y %H.%M',
1757 '%Y-%m-%dT%H:%M:%SZ',
1758 '%Y-%m-%dT%H:%M:%S.%fZ',
1759 '%Y-%m-%dT%H:%M:%S.%f0Z',
1760 '%Y-%m-%dT%H:%M:%S',
1761 '%Y-%m-%dT%H:%M:%S.%f',
1762 '%Y-%m-%dT%H:%M',
c6eed6b8
S
1763 '%b %d %Y at %H:%M',
1764 '%b %d %Y at %H:%M:%S',
b555ae9b
S
1765 '%B %d %Y at %H:%M',
1766 '%B %d %Y at %H:%M:%S',
a63d9bd0 1767 '%H:%M %d-%b-%Y',
46f59e89
S
1768)
1769
1770DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1771DATE_FORMATS_DAY_FIRST.extend([
1772 '%d-%m-%Y',
1773 '%d.%m.%Y',
1774 '%d.%m.%y',
1775 '%d/%m/%Y',
1776 '%d/%m/%y',
1777 '%d/%m/%Y %H:%M:%S',
1778])
1779
1780DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1781DATE_FORMATS_MONTH_FIRST.extend([
1782 '%m-%d-%Y',
1783 '%m.%d.%Y',
1784 '%m/%d/%Y',
1785 '%m/%d/%y',
1786 '%m/%d/%Y %H:%M:%S',
1787])
1788
06b3fe29 1789PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
22f5f5c6 1790JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
06b3fe29 1791
7105440c 1792
d77c3dfd 1793def preferredencoding():
59ae15a5 1794 """Get preferred encoding.
d77c3dfd 1795
59ae15a5
PH
1796 Returns the best encoding scheme for the system, based on
1797 locale.getpreferredencoding() and some further tweaks.
1798 """
1799 try:
1800 pref = locale.getpreferredencoding()
28e614de 1801 'TEST'.encode(pref)
70a1165b 1802 except Exception:
59ae15a5 1803 pref = 'UTF-8'
bae611f2 1804
59ae15a5 1805 return pref
d77c3dfd 1806
f4bfd65f 1807
181c8655 1808def write_json_file(obj, fn):
1394646a 1809 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 1810
92120217 1811 fn = encodeFilename(fn)
61ee5aeb 1812 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
1813 encoding = get_filesystem_encoding()
1814 # os.path.basename returns a bytes object, but NamedTemporaryFile
1815 # will fail if the filename contains non ascii characters unless we
1816 # use a unicode object
1817 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1818 # the same for os.path.dirname
1819 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1820 else:
1821 path_basename = os.path.basename
1822 path_dirname = os.path.dirname
1823
73159f99
S
1824 args = {
1825 'suffix': '.tmp',
ec5f6016
JMF
1826 'prefix': path_basename(fn) + '.',
1827 'dir': path_dirname(fn),
73159f99
S
1828 'delete': False,
1829 }
1830
181c8655
PH
1831 # In Python 2.x, json.dump expects a bytestream.
1832 # In Python 3.x, it writes to a character stream
1833 if sys.version_info < (3, 0):
73159f99 1834 args['mode'] = 'wb'
181c8655 1835 else:
73159f99
S
1836 args.update({
1837 'mode': 'w',
1838 'encoding': 'utf-8',
1839 })
1840
c86b6142 1841 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
1842
1843 try:
1844 with tf:
6e84b215 1845 json.dump(obj, tf)
1394646a
IK
1846 if sys.platform == 'win32':
1847 # Need to remove existing file on Windows, else os.rename raises
1848 # WindowsError or FileExistsError.
1849 try:
1850 os.unlink(fn)
1851 except OSError:
1852 pass
9cd5f54e
R
1853 try:
1854 mask = os.umask(0)
1855 os.umask(mask)
1856 os.chmod(tf.name, 0o666 & ~mask)
1857 except OSError:
1858 pass
181c8655 1859 os.rename(tf.name, fn)
70a1165b 1860 except Exception:
181c8655
PH
1861 try:
1862 os.remove(tf.name)
1863 except OSError:
1864 pass
1865 raise
1866
1867
1868if sys.version_info >= (2, 7):
ee114368 1869 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 1870 """ Find the xpath xpath[@key=val] """
5d2354f1 1871 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368 1872 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
1873 return node.find(expr)
1874else:
ee114368 1875 def find_xpath_attr(node, xpath, key, val=None):
810c10ba 1876 for f in node.findall(compat_xpath(xpath)):
ee114368
S
1877 if key not in f.attrib:
1878 continue
1879 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
1880 return f
1881 return None
1882
d7e66d39
JMF
1883# On python2.6 the xml.etree.ElementTree.Element methods don't support
1884# the namespace parameter
5f6a1245
JW
1885
1886
d7e66d39
JMF
1887def xpath_with_ns(path, ns_map):
1888 components = [c.split(':') for c in path.split('/')]
1889 replaced = []
1890 for c in components:
1891 if len(c) == 1:
1892 replaced.append(c[0])
1893 else:
1894 ns, tag = c
1895 replaced.append('{%s}%s' % (ns_map[ns], tag))
1896 return '/'.join(replaced)
1897
d77c3dfd 1898
a41fb80c 1899def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745 1900 def _find_xpath(xpath):
810c10ba 1901 return node.find(compat_xpath(xpath))
578c0745
S
1902
1903 if isinstance(xpath, (str, compat_str)):
1904 n = _find_xpath(xpath)
1905 else:
1906 for xp in xpath:
1907 n = _find_xpath(xp)
1908 if n is not None:
1909 break
d74bebd5 1910
8e636da4 1911 if n is None:
bf42a990
S
1912 if default is not NO_DEFAULT:
1913 return default
1914 elif fatal:
bf0ff932
PH
1915 name = xpath if name is None else name
1916 raise ExtractorError('Could not find XML element %s' % name)
1917 else:
1918 return None
a41fb80c
S
1919 return n
1920
1921
1922def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
1923 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1924 if n is None or n == default:
1925 return n
1926 if n.text is None:
1927 if default is not NO_DEFAULT:
1928 return default
1929 elif fatal:
1930 name = xpath if name is None else name
1931 raise ExtractorError('Could not find XML element\'s text %s' % name)
1932 else:
1933 return None
1934 return n.text
a41fb80c
S
1935
1936
1937def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1938 n = find_xpath_attr(node, xpath, key)
1939 if n is None:
1940 if default is not NO_DEFAULT:
1941 return default
1942 elif fatal:
1943 name = '%s[@%s]' % (xpath, key) if name is None else name
1944 raise ExtractorError('Could not find XML attribute %s' % name)
1945 else:
1946 return None
1947 return n.attrib[key]
bf0ff932
PH
1948
1949
9e6dd238 1950def get_element_by_id(id, html):
43e8fafd 1951 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 1952 return get_element_by_attribute('id', id, html)
43e8fafd 1953
12ea2f30 1954
84c237fb 1955def get_element_by_class(class_name, html):
2af12ad9
TC
1956 """Return the content of the first tag with the specified class in the passed HTML document"""
1957 retval = get_elements_by_class(class_name, html)
1958 return retval[0] if retval else None
1959
1960
1961def get_element_by_attribute(attribute, value, html, escape_value=True):
1962 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1963 return retval[0] if retval else None
1964
1965
1966def get_elements_by_class(class_name, html):
1967 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1968 return get_elements_by_attribute(
84c237fb
YCH
1969 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1970 html, escape_value=False)
1971
1972
2af12ad9 1973def get_elements_by_attribute(attribute, value, html, escape_value=True):
43e8fafd 1974 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 1975
84c237fb
YCH
1976 value = re.escape(value) if escape_value else value
1977
2af12ad9
TC
1978 retlist = []
1979 for m in re.finditer(r'''(?xs)
38285056 1980 <([a-zA-Z0-9:._-]+)
609ff8ca 1981 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056 1982 \s+%s=['"]?%s['"]?
609ff8ca 1983 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056
PH
1984 \s*>
1985 (?P<content>.*?)
1986 </\1>
2af12ad9
TC
1987 ''' % (re.escape(attribute), value), html):
1988 res = m.group('content')
38285056 1989
2af12ad9
TC
1990 if res.startswith('"') or res.startswith("'"):
1991 res = res[1:-1]
38285056 1992
2af12ad9 1993 retlist.append(unescapeHTML(res))
a921f407 1994
2af12ad9 1995 return retlist
a921f407 1996
c5229f39 1997
8bb56eee
BF
1998class HTMLAttributeParser(compat_HTMLParser):
1999 """Trivial HTML parser to gather the attributes for a single element"""
b6e0c7d2 2000
8bb56eee 2001 def __init__(self):
c5229f39 2002 self.attrs = {}
8bb56eee
BF
2003 compat_HTMLParser.__init__(self)
2004
2005 def handle_starttag(self, tag, attrs):
2006 self.attrs = dict(attrs)
2007
c5229f39 2008
8bb56eee
BF
2009def extract_attributes(html_element):
2010 """Given a string for an HTML element such as
2011 <el
2012 a="foo" B="bar" c="&98;az" d=boz
2013 empty= noval entity="&amp;"
2014 sq='"' dq="'"
2015 >
2016 Decode and return a dictionary of attributes.
2017 {
2018 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2019 'empty': '', 'noval': None, 'entity': '&',
2020 'sq': '"', 'dq': '\''
2021 }.
2022 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2023 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2024 """
2025 parser = HTMLAttributeParser()
b4a3d461
S
2026 try:
2027 parser.feed(html_element)
2028 parser.close()
2029 # Older Python may throw HTMLParseError in case of malformed HTML
2030 except compat_HTMLParseError:
2031 pass
8bb56eee 2032 return parser.attrs
9e6dd238 2033
c5229f39 2034
9e6dd238 2035def clean_html(html):
59ae15a5 2036 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
2037
2038 if html is None: # Convenience for sanitizing descriptions etc.
2039 return html
2040
59ae15a5
PH
2041 # Newline vs <br />
2042 html = html.replace('\n', ' ')
edd9221c
TF
2043 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2044 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
2045 # Strip html tags
2046 html = re.sub('<.*?>', '', html)
2047 # Replace html entities
2048 html = unescapeHTML(html)
7decf895 2049 return html.strip()
9e6dd238
FV
2050
2051
d77c3dfd 2052def sanitize_open(filename, open_mode):
59ae15a5
PH
2053 """Try to open the given filename, and slightly tweak it if this fails.
2054
2055 Attempts to open the given filename. If this fails, it tries to change
2056 the filename slightly, step by step, until it's either able to open it
2057 or it fails and raises a final exception, like the standard open()
2058 function.
2059
2060 It returns the tuple (stream, definitive_file_name).
2061 """
2062 try:
28e614de 2063 if filename == '-':
59ae15a5
PH
2064 if sys.platform == 'win32':
2065 import msvcrt
2066 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 2067 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
2068 stream = open(encodeFilename(filename), open_mode)
2069 return (stream, filename)
2070 except (IOError, OSError) as err:
f45c185f
PH
2071 if err.errno in (errno.EACCES,):
2072 raise
59ae15a5 2073
f45c185f 2074 # In case of error, try to remove win32 forbidden chars
d55de57b 2075 alt_filename = sanitize_path(filename)
f45c185f
PH
2076 if alt_filename == filename:
2077 raise
2078 else:
2079 # An exception here should be caught in the caller
d55de57b 2080 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 2081 return (stream, alt_filename)
d77c3dfd
FV
2082
2083
2084def timeconvert(timestr):
59ae15a5
PH
2085 """Convert RFC 2822 defined time string into system timestamp"""
2086 timestamp = None
2087 timetuple = email.utils.parsedate_tz(timestr)
2088 if timetuple is not None:
2089 timestamp = email.utils.mktime_tz(timetuple)
2090 return timestamp
1c469a94 2091
5f6a1245 2092
796173d0 2093def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
2094 """Sanitizes a string so it could be used as part of a filename.
2095 If restricted is set, use a stricter subset of allowed characters.
158af524
S
2096 Set is_id if this is not an arbitrary string, but an ID that should be kept
2097 if possible.
59ae15a5
PH
2098 """
2099 def replace_insane(char):
c587cbb7
AT
2100 if restricted and char in ACCENT_CHARS:
2101 return ACCENT_CHARS[char]
91dd88b9 2102 elif not restricted and char == '\n':
2103 return ' '
2104 elif char == '?' or ord(char) < 32 or ord(char) == 127:
59ae15a5
PH
2105 return ''
2106 elif char == '"':
2107 return '' if restricted else '\''
2108 elif char == ':':
2109 return '_-' if restricted else ' -'
2110 elif char in '\\/|*<>':
2111 return '_'
627dcfff 2112 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
2113 return '_'
2114 if restricted and ord(char) > 127:
2115 return '_'
2116 return char
2117
639f1cea 2118 if s == '':
2119 return ''
2aeb06d6
PH
2120 # Handle timestamps
2121 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 2122 result = ''.join(map(replace_insane, s))
796173d0
PH
2123 if not is_id:
2124 while '__' in result:
2125 result = result.replace('__', '_')
2126 result = result.strip('_')
2127 # Common case of "Foreign band name - English song title"
2128 if restricted and result.startswith('-_'):
2129 result = result[2:]
5a42414b
PH
2130 if result.startswith('-'):
2131 result = '_' + result[len('-'):]
a7440261 2132 result = result.lstrip('.')
796173d0
PH
2133 if not result:
2134 result = '_'
59ae15a5 2135 return result
d77c3dfd 2136
5f6a1245 2137
c2934512 2138def sanitize_path(s, force=False):
a2aaf4db 2139 """Sanitizes and normalizes path on Windows"""
c2934512 2140 if sys.platform == 'win32':
c4218ac3 2141 force = False
c2934512 2142 drive_or_unc, _ = os.path.splitdrive(s)
2143 if sys.version_info < (2, 7) and not drive_or_unc:
2144 drive_or_unc, _ = os.path.splitunc(s)
2145 elif force:
2146 drive_or_unc = ''
2147 else:
a2aaf4db 2148 return s
c2934512 2149
be531ef1
S
2150 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2151 if drive_or_unc:
a2aaf4db
S
2152 norm_path.pop(0)
2153 sanitized_path = [
ec85ded8 2154 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
a2aaf4db 2155 for path_part in norm_path]
be531ef1
S
2156 if drive_or_unc:
2157 sanitized_path.insert(0, drive_or_unc + os.path.sep)
c4218ac3 2158 elif force and s[0] == os.path.sep:
2159 sanitized_path.insert(0, os.path.sep)
a2aaf4db
S
2160 return os.path.join(*sanitized_path)
2161
2162
17bcc626 2163def sanitize_url(url):
befa4708
S
2164 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2165 # the number of unwanted failures due to missing protocol
2166 if url.startswith('//'):
2167 return 'http:%s' % url
2168 # Fix some common typos seen so far
2169 COMMON_TYPOS = (
067aa17e 2170 # https://github.com/ytdl-org/youtube-dl/issues/15649
befa4708
S
2171 (r'^httpss://', r'https://'),
2172 # https://bx1.be/lives/direct-tv/
2173 (r'^rmtp([es]?)://', r'rtmp\1://'),
2174 )
2175 for mistake, fixup in COMMON_TYPOS:
2176 if re.match(mistake, url):
2177 return re.sub(mistake, fixup, url)
bc6b9bcd 2178 return url
17bcc626
S
2179
2180
5435dcf9
HH
2181def extract_basic_auth(url):
2182 parts = compat_urlparse.urlsplit(url)
2183 if parts.username is None:
2184 return url, None
2185 url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2186 parts.hostname if parts.port is None
2187 else '%s:%d' % (parts.hostname, parts.port))))
2188 auth_payload = base64.b64encode(
2189 ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2190 return url, 'Basic ' + auth_payload.decode('utf-8')
2191
2192
67dda517 2193def sanitized_Request(url, *args, **kwargs):
bc6b9bcd 2194 url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
5435dcf9
HH
2195 if auth_header is not None:
2196 headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2197 headers['Authorization'] = auth_header
2198 return compat_urllib_request.Request(url, *args, **kwargs)
67dda517
S
2199
2200
51098426
S
2201def expand_path(s):
2202 """Expand shell variables and ~"""
2203 return os.path.expandvars(compat_expanduser(s))
2204
2205
d77c3dfd 2206def orderedSet(iterable):
59ae15a5
PH
2207 """ Remove all duplicates from the input iterable """
2208 res = []
2209 for el in iterable:
2210 if el not in res:
2211 res.append(el)
2212 return res
d77c3dfd 2213
912b38b4 2214
55b2f099 2215def _htmlentity_transform(entity_with_semicolon):
4e408e47 2216 """Transforms an HTML entity to a character."""
55b2f099
YCH
2217 entity = entity_with_semicolon[:-1]
2218
4e408e47
PH
2219 # Known non-numeric HTML entity
2220 if entity in compat_html_entities.name2codepoint:
2221 return compat_chr(compat_html_entities.name2codepoint[entity])
2222
55b2f099
YCH
2223 # TODO: HTML5 allows entities without a semicolon. For example,
2224 # '&Eacuteric' should be decoded as 'Éric'.
2225 if entity_with_semicolon in compat_html_entities_html5:
2226 return compat_html_entities_html5[entity_with_semicolon]
2227
91757b0f 2228 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
2229 if mobj is not None:
2230 numstr = mobj.group(1)
28e614de 2231 if numstr.startswith('x'):
4e408e47 2232 base = 16
28e614de 2233 numstr = '0%s' % numstr
4e408e47
PH
2234 else:
2235 base = 10
067aa17e 2236 # See https://github.com/ytdl-org/youtube-dl/issues/7518
7aefc49c
S
2237 try:
2238 return compat_chr(int(numstr, base))
2239 except ValueError:
2240 pass
4e408e47
PH
2241
2242 # Unknown entity in name, return its literal representation
7a3f0c00 2243 return '&%s;' % entity
4e408e47
PH
2244
2245
d77c3dfd 2246def unescapeHTML(s):
912b38b4
PH
2247 if s is None:
2248 return None
2249 assert type(s) == compat_str
d77c3dfd 2250
4e408e47 2251 return re.sub(
95f3f7c2 2252 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 2253
8bf48f23 2254
cdb19aa4 2255def escapeHTML(text):
2256 return (
2257 text
2258 .replace('&', '&amp;')
2259 .replace('<', '&lt;')
2260 .replace('>', '&gt;')
2261 .replace('"', '&quot;')
2262 .replace("'", '&#39;')
2263 )
2264
2265
f5b1bca9 2266def process_communicate_or_kill(p, *args, **kwargs):
2267 try:
2268 return p.communicate(*args, **kwargs)
2269 except BaseException: # Including KeyboardInterrupt
2270 p.kill()
2271 p.wait()
2272 raise
2273
2274
d3c93ec2 2275class Popen(subprocess.Popen):
2276 if sys.platform == 'win32':
2277 _startupinfo = subprocess.STARTUPINFO()
2278 _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
2279 else:
2280 _startupinfo = None
2281
2282 def __init__(self, *args, **kwargs):
2283 super(Popen, self).__init__(*args, **kwargs, startupinfo=self._startupinfo)
2284
2285 def communicate_or_kill(self, *args, **kwargs):
2286 return process_communicate_or_kill(self, *args, **kwargs)
2287
2288
aa49acd1
S
2289def get_subprocess_encoding():
2290 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2291 # For subprocess calls, encode with locale encoding
2292 # Refer to http://stackoverflow.com/a/9951851/35070
2293 encoding = preferredencoding()
2294 else:
2295 encoding = sys.getfilesystemencoding()
2296 if encoding is None:
2297 encoding = 'utf-8'
2298 return encoding
2299
2300
8bf48f23 2301def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
2302 """
2303 @param s The name of the file
2304 """
d77c3dfd 2305
8bf48f23 2306 assert type(s) == compat_str
d77c3dfd 2307
59ae15a5
PH
2308 # Python 3 has a Unicode API
2309 if sys.version_info >= (3, 0):
2310 return s
0f00efed 2311
aa49acd1
S
2312 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2313 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2314 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2315 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2316 return s
2317
8ee239e9
YCH
2318 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2319 if sys.platform.startswith('java'):
2320 return s
2321
aa49acd1
S
2322 return s.encode(get_subprocess_encoding(), 'ignore')
2323
2324
2325def decodeFilename(b, for_subprocess=False):
2326
2327 if sys.version_info >= (3, 0):
2328 return b
2329
2330 if not isinstance(b, bytes):
2331 return b
2332
2333 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 2334
f07b74fc
PH
2335
2336def encodeArgument(s):
2337 if not isinstance(s, compat_str):
2338 # Legacy code that uses byte strings
2339 # Uncomment the following line after fixing all post processors
7af808a5 2340 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
2341 s = s.decode('ascii')
2342 return encodeFilename(s, True)
2343
2344
aa49acd1
S
2345def decodeArgument(b):
2346 return decodeFilename(b, True)
2347
2348
8271226a
PH
2349def decodeOption(optval):
2350 if optval is None:
2351 return optval
2352 if isinstance(optval, bytes):
2353 optval = optval.decode(preferredencoding())
2354
2355 assert isinstance(optval, compat_str)
2356 return optval
1c256f70 2357
5f6a1245 2358
aa7785f8 2359_timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
2360
2361
2362def timetuple_from_msec(msec):
2363 secs, msec = divmod(msec, 1000)
2364 mins, secs = divmod(secs, 60)
2365 hrs, mins = divmod(mins, 60)
2366 return _timetuple(hrs, mins, secs, msec)
2367
2368
cdb19aa4 2369def formatSeconds(secs, delim=':', msec=False):
aa7785f8 2370 time = timetuple_from_msec(secs * 1000)
2371 if time.hours:
2372 ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
2373 elif time.minutes:
2374 ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
4539dd30 2375 else:
aa7785f8 2376 ret = '%d' % time.seconds
2377 return '%s.%03d' % (ret, time.milliseconds) if msec else ret
4539dd30 2378
a0ddb8a2 2379
77562778 2380def _ssl_load_windows_store_certs(ssl_context, storename):
2381 # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
2382 try:
2383 certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
2384 if encoding == 'x509_asn' and (
2385 trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
2386 except PermissionError:
2387 return
2388 for cert in certs:
a2366922 2389 try:
77562778 2390 ssl_context.load_verify_locations(cadata=cert)
2391 except ssl.SSLError:
a2366922
PH
2392 pass
2393
77562778 2394
2395def make_HTTPS_handler(params, **kwargs):
2396 opts_check_certificate = not params.get('nocheckcertificate')
2397 context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
2398 context.check_hostname = opts_check_certificate
2399 context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
2400 if opts_check_certificate:
4e3d1898 2401 try:
2402 context.load_default_certs()
2403 # Work around the issue in load_default_certs when there are bad certificates. See:
2404 # https://github.com/yt-dlp/yt-dlp/issues/1060,
2405 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
2406 except ssl.SSLError:
2407 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
2408 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
2409 # Create a new context to discard any certificates that were already loaded
2410 context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
2411 context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED
2412 for storename in ('CA', 'ROOT'):
2413 _ssl_load_windows_store_certs(context, storename)
2414 context.set_default_verify_paths()
77562778 2415 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 2416
732ea2f0 2417
5873d4cc 2418def bug_reports_message(before=';'):
08f2a92c 2419 if ytdl_is_updateable():
7a5c1cfe 2420 update_cmd = 'type yt-dlp -U to update'
08f2a92c 2421 else:
7a5c1cfe 2422 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
5873d4cc 2423 msg = 'please report this issue on https://github.com/yt-dlp/yt-dlp .'
08f2a92c 2424 msg += ' Make sure you are using the latest version; %s.' % update_cmd
7a5c1cfe 2425 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
5873d4cc
F
2426
2427 before = before.rstrip()
2428 if not before or before.endswith(('.', '!', '?')):
2429 msg = msg[0].title() + msg[1:]
2430
2431 return (before + ' ' if before else '') + msg
08f2a92c
JMF
2432
2433
bf5b9d85
PM
2434class YoutubeDLError(Exception):
2435 """Base exception for YoutubeDL errors."""
2436 pass
2437
2438
3158150c 2439network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2440if hasattr(ssl, 'CertificateError'):
2441 network_exceptions.append(ssl.CertificateError)
2442network_exceptions = tuple(network_exceptions)
2443
2444
bf5b9d85 2445class ExtractorError(YoutubeDLError):
1c256f70 2446 """Error during info extraction."""
5f6a1245 2447
1151c407 2448 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
9a82b238 2449 """ tb, if given, is the original traceback (so that it can be printed out).
7a5c1cfe 2450 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
9a82b238 2451 """
3158150c 2452 if sys.exc_info()[0] in network_exceptions:
9a82b238 2453 expected = True
d5979c5d 2454
526d74ec 2455 self.msg = str(msg)
1c256f70 2456 self.traceback = tb
1151c407 2457 self.expected = expected
2eabb802 2458 self.cause = cause
d11271dd 2459 self.video_id = video_id
1151c407 2460 self.ie = ie
2461 self.exc_info = sys.exc_info() # preserve original exception
2462
2463 super(ExtractorError, self).__init__(''.join((
2464 format_field(ie, template='[%s] '),
2465 format_field(video_id, template='%s: '),
526d74ec 2466 self.msg,
1151c407 2467 format_field(cause, template=' (caused by %r)'),
2468 '' if expected else bug_reports_message())))
1c256f70 2469
01951dda
PH
2470 def format_traceback(self):
2471 if self.traceback is None:
2472 return None
28e614de 2473 return ''.join(traceback.format_tb(self.traceback))
01951dda 2474
1c256f70 2475
416c7fcb
PH
2476class UnsupportedError(ExtractorError):
2477 def __init__(self, url):
2478 super(UnsupportedError, self).__init__(
2479 'Unsupported URL: %s' % url, expected=True)
2480 self.url = url
2481
2482
55b3e45b
JMF
2483class RegexNotFoundError(ExtractorError):
2484 """Error when a regex didn't match"""
2485 pass
2486
2487
773f291d
S
2488class GeoRestrictedError(ExtractorError):
2489 """Geographic restriction Error exception.
2490
2491 This exception may be thrown when a video is not available from your
2492 geographic location due to geographic restrictions imposed by a website.
2493 """
b6e0c7d2 2494
773f291d
S
2495 def __init__(self, msg, countries=None):
2496 super(GeoRestrictedError, self).__init__(msg, expected=True)
2497 self.msg = msg
2498 self.countries = countries
2499
2500
bf5b9d85 2501class DownloadError(YoutubeDLError):
59ae15a5 2502 """Download Error exception.
d77c3dfd 2503
59ae15a5
PH
2504 This exception may be thrown by FileDownloader objects if they are not
2505 configured to continue on errors. They will contain the appropriate
2506 error message.
2507 """
5f6a1245 2508
8cc83b8d
FV
2509 def __init__(self, msg, exc_info=None):
2510 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2511 super(DownloadError, self).__init__(msg)
2512 self.exc_info = exc_info
d77c3dfd
FV
2513
2514
498f5606 2515class EntryNotInPlaylist(YoutubeDLError):
2516 """Entry not in playlist exception.
2517
2518 This exception will be thrown by YoutubeDL when a requested entry
2519 is not found in the playlist info_dict
2520 """
2521 pass
2522
2523
bf5b9d85 2524class SameFileError(YoutubeDLError):
59ae15a5 2525 """Same File exception.
d77c3dfd 2526
59ae15a5
PH
2527 This exception will be thrown by FileDownloader objects if they detect
2528 multiple files would have to be downloaded to the same file on disk.
2529 """
2530 pass
d77c3dfd
FV
2531
2532
bf5b9d85 2533class PostProcessingError(YoutubeDLError):
59ae15a5 2534 """Post Processing exception.
d77c3dfd 2535
59ae15a5
PH
2536 This exception may be raised by PostProcessor's .run() method to
2537 indicate an error in the postprocessing task.
2538 """
5f6a1245 2539
7851b379 2540 def __init__(self, msg):
bf5b9d85 2541 super(PostProcessingError, self).__init__(msg)
7851b379 2542 self.msg = msg
d77c3dfd 2543
5f6a1245 2544
8b0d7497 2545class ExistingVideoReached(YoutubeDLError):
2546 """ --max-downloads limit has been reached. """
2547 pass
2548
2549
2550class RejectedVideoReached(YoutubeDLError):
2551 """ --max-downloads limit has been reached. """
2552 pass
2553
2554
51d9739f 2555class ThrottledDownload(YoutubeDLError):
2556 """ Download speed below --throttled-rate. """
2557 pass
2558
2559
bf5b9d85 2560class MaxDownloadsReached(YoutubeDLError):
59ae15a5
PH
2561 """ --max-downloads limit has been reached. """
2562 pass
d77c3dfd
FV
2563
2564
bf5b9d85 2565class UnavailableVideoError(YoutubeDLError):
59ae15a5 2566 """Unavailable Format exception.
d77c3dfd 2567
59ae15a5
PH
2568 This exception will be thrown when a video is requested
2569 in a format that is not available for that video.
2570 """
2571 pass
d77c3dfd
FV
2572
2573
bf5b9d85 2574class ContentTooShortError(YoutubeDLError):
59ae15a5 2575 """Content Too Short exception.
d77c3dfd 2576
59ae15a5
PH
2577 This exception may be raised by FileDownloader objects when a file they
2578 download is too small for what the server announced first, indicating
2579 the connection was probably interrupted.
2580 """
d77c3dfd 2581
59ae15a5 2582 def __init__(self, downloaded, expected):
bf5b9d85
PM
2583 super(ContentTooShortError, self).__init__(
2584 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2585 )
2c7ed247 2586 # Both in bytes
59ae15a5
PH
2587 self.downloaded = downloaded
2588 self.expected = expected
d77c3dfd 2589
5f6a1245 2590
bf5b9d85 2591class XAttrMetadataError(YoutubeDLError):
efa97bdc
YCH
2592 def __init__(self, code=None, msg='Unknown error'):
2593 super(XAttrMetadataError, self).__init__(msg)
2594 self.code = code
bd264412 2595 self.msg = msg
efa97bdc
YCH
2596
2597 # Parsing code and msg
3089bc74 2598 if (self.code in (errno.ENOSPC, errno.EDQUOT)
a0566bbf 2599 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
efa97bdc
YCH
2600 self.reason = 'NO_SPACE'
2601 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2602 self.reason = 'VALUE_TOO_LONG'
2603 else:
2604 self.reason = 'NOT_SUPPORTED'
2605
2606
bf5b9d85 2607class XAttrUnavailableError(YoutubeDLError):
efa97bdc
YCH
2608 pass
2609
2610
c5a59d93 2611def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
2612 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2613 # expected HTTP responses to meet HTTP/1.0 or later (see also
067aa17e 2614 # https://github.com/ytdl-org/youtube-dl/issues/6727)
e5e78797 2615 if sys.version_info < (3, 0):
65220c3b
S
2616 kwargs['strict'] = True
2617 hc = http_class(*args, **compat_kwargs(kwargs))
be4a824d 2618 source_address = ydl_handler._params.get('source_address')
8959018a 2619
be4a824d 2620 if source_address is not None:
8959018a
AU
2621 # This is to workaround _create_connection() from socket where it will try all
2622 # address data from getaddrinfo() including IPv6. This filters the result from
2623 # getaddrinfo() based on the source_address value.
2624 # This is based on the cpython socket.create_connection() function.
2625 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2626 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2627 host, port = address
2628 err = None
2629 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
9e21e6d9
S
2630 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2631 ip_addrs = [addr for addr in addrs if addr[0] == af]
2632 if addrs and not ip_addrs:
2633 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2634 raise socket.error(
2635 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2636 % (ip_version, source_address[0]))
8959018a
AU
2637 for res in ip_addrs:
2638 af, socktype, proto, canonname, sa = res
2639 sock = None
2640 try:
2641 sock = socket.socket(af, socktype, proto)
2642 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2643 sock.settimeout(timeout)
2644 sock.bind(source_address)
2645 sock.connect(sa)
2646 err = None # Explicitly break reference cycle
2647 return sock
2648 except socket.error as _:
2649 err = _
2650 if sock is not None:
2651 sock.close()
2652 if err is not None:
2653 raise err
2654 else:
9e21e6d9
S
2655 raise socket.error('getaddrinfo returns an empty list')
2656 if hasattr(hc, '_create_connection'):
2657 hc._create_connection = _create_connection
be4a824d
PH
2658 sa = (source_address, 0)
2659 if hasattr(hc, 'source_address'): # Python 2.7+
2660 hc.source_address = sa
2661 else: # Python 2.6
2662 def _hc_connect(self, *args, **kwargs):
9e21e6d9 2663 sock = _create_connection(
be4a824d
PH
2664 (self.host, self.port), self.timeout, sa)
2665 if is_https:
d7932313
PH
2666 self.sock = ssl.wrap_socket(
2667 sock, self.key_file, self.cert_file,
2668 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
2669 else:
2670 self.sock = sock
2671 hc.connect = functools.partial(_hc_connect, hc)
2672
2673 return hc
2674
2675
87f0e62d 2676def handle_youtubedl_headers(headers):
992fc9d6
YCH
2677 filtered_headers = headers
2678
2679 if 'Youtubedl-no-compression' in filtered_headers:
2680 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 2681 del filtered_headers['Youtubedl-no-compression']
87f0e62d 2682
992fc9d6 2683 return filtered_headers
87f0e62d
YCH
2684
2685
acebc9cd 2686class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
2687 """Handler for HTTP requests and responses.
2688
2689 This class, when installed with an OpenerDirector, automatically adds
2690 the standard headers to every HTTP request and handles gzipped and
2691 deflated responses from web servers. If compression is to be avoided in
2692 a particular request, the original request in the program code only has
0424ec30 2693 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
2694 removed before making the real request.
2695
2696 Part of this code was copied from:
2697
2698 http://techknack.net/python-urllib2-handlers/
2699
2700 Andrew Rowls, the author of that code, agreed to release it to the
2701 public domain.
2702 """
2703
be4a824d
PH
2704 def __init__(self, params, *args, **kwargs):
2705 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2706 self._params = params
2707
2708 def http_open(self, req):
71aff188
YCH
2709 conn_class = compat_http_client.HTTPConnection
2710
2711 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2712 if socks_proxy:
2713 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2714 del req.headers['Ytdl-socks-proxy']
2715
be4a824d 2716 return self.do_open(functools.partial(
71aff188 2717 _create_http_connection, self, conn_class, False),
be4a824d
PH
2718 req)
2719
59ae15a5
PH
2720 @staticmethod
2721 def deflate(data):
fc2119f2 2722 if not data:
2723 return data
59ae15a5
PH
2724 try:
2725 return zlib.decompress(data, -zlib.MAX_WBITS)
2726 except zlib.error:
2727 return zlib.decompress(data)
2728
acebc9cd 2729 def http_request(self, req):
51f267d9
S
2730 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2731 # always respected by websites, some tend to give out URLs with non percent-encoded
2732 # non-ASCII characters (see telemb.py, ard.py [#3412])
2733 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2734 # To work around aforementioned issue we will replace request's original URL with
2735 # percent-encoded one
2736 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2737 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2738 url = req.get_full_url()
2739 url_escaped = escape_url(url)
2740
2741 # Substitute URL if any change after escaping
2742 if url != url_escaped:
15d260eb 2743 req = update_Request(req, url=url_escaped)
51f267d9 2744
33ac271b 2745 for h, v in std_headers.items():
3d5f7a39
JK
2746 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2747 # The dict keys are capitalized because of this bug by urllib
2748 if h.capitalize() not in req.headers:
33ac271b 2749 req.add_header(h, v)
87f0e62d
YCH
2750
2751 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
2752
2753 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2754 # Python 2.6 is brain-dead when it comes to fragments
2755 req._Request__original = req._Request__original.partition('#')[0]
2756 req._Request__r_type = req._Request__r_type.partition('#')[0]
2757
59ae15a5
PH
2758 return req
2759
acebc9cd 2760 def http_response(self, req, resp):
59ae15a5
PH
2761 old_resp = resp
2762 # gzip
2763 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
2764 content = resp.read()
2765 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2766 try:
2767 uncompressed = io.BytesIO(gz.read())
2768 except IOError as original_ioerror:
2769 # There may be junk add the end of the file
2770 # See http://stackoverflow.com/q/4928560/35070 for details
2771 for i in range(1, 1024):
2772 try:
2773 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2774 uncompressed = io.BytesIO(gz.read())
2775 except IOError:
2776 continue
2777 break
2778 else:
2779 raise original_ioerror
b407d853 2780 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2781 resp.msg = old_resp.msg
c047270c 2782 del resp.headers['Content-encoding']
59ae15a5
PH
2783 # deflate
2784 if resp.headers.get('Content-encoding', '') == 'deflate':
2785 gz = io.BytesIO(self.deflate(resp.read()))
b407d853 2786 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2787 resp.msg = old_resp.msg
c047270c 2788 del resp.headers['Content-encoding']
ad729172 2789 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
067aa17e 2790 # https://github.com/ytdl-org/youtube-dl/issues/6457).
5a4d9ddb
S
2791 if 300 <= resp.code < 400:
2792 location = resp.headers.get('Location')
2793 if location:
2794 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2795 if sys.version_info >= (3, 0):
2796 location = location.encode('iso-8859-1').decode('utf-8')
0ea59007
YCH
2797 else:
2798 location = location.decode('utf-8')
5a4d9ddb
S
2799 location_escaped = escape_url(location)
2800 if location != location_escaped:
2801 del resp.headers['Location']
9a4aec8b
YCH
2802 if sys.version_info < (3, 0):
2803 location_escaped = location_escaped.encode('utf-8')
5a4d9ddb 2804 resp.headers['Location'] = location_escaped
59ae15a5 2805 return resp
0f8d03f8 2806
acebc9cd
PH
2807 https_request = http_request
2808 https_response = http_response
bf50b038 2809
5de90176 2810
71aff188
YCH
2811def make_socks_conn_class(base_class, socks_proxy):
2812 assert issubclass(base_class, (
2813 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2814
2815 url_components = compat_urlparse.urlparse(socks_proxy)
2816 if url_components.scheme.lower() == 'socks5':
2817 socks_type = ProxyType.SOCKS5
2818 elif url_components.scheme.lower() in ('socks', 'socks4'):
2819 socks_type = ProxyType.SOCKS4
51fb4995
YCH
2820 elif url_components.scheme.lower() == 'socks4a':
2821 socks_type = ProxyType.SOCKS4A
71aff188 2822
cdd94c2e
YCH
2823 def unquote_if_non_empty(s):
2824 if not s:
2825 return s
2826 return compat_urllib_parse_unquote_plus(s)
2827
71aff188
YCH
2828 proxy_args = (
2829 socks_type,
2830 url_components.hostname, url_components.port or 1080,
2831 True, # Remote DNS
cdd94c2e
YCH
2832 unquote_if_non_empty(url_components.username),
2833 unquote_if_non_empty(url_components.password),
71aff188
YCH
2834 )
2835
2836 class SocksConnection(base_class):
2837 def connect(self):
2838 self.sock = sockssocket()
2839 self.sock.setproxy(*proxy_args)
2840 if type(self.timeout) in (int, float):
2841 self.sock.settimeout(self.timeout)
2842 self.sock.connect((self.host, self.port))
2843
2844 if isinstance(self, compat_http_client.HTTPSConnection):
2845 if hasattr(self, '_context'): # Python > 2.6
2846 self.sock = self._context.wrap_socket(
2847 self.sock, server_hostname=self.host)
2848 else:
2849 self.sock = ssl.wrap_socket(self.sock)
2850
2851 return SocksConnection
2852
2853
be4a824d
PH
2854class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2855 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2856 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2857 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2858 self._params = params
2859
2860 def https_open(self, req):
4f264c02 2861 kwargs = {}
71aff188
YCH
2862 conn_class = self._https_conn_class
2863
4f264c02
JMF
2864 if hasattr(self, '_context'): # python > 2.6
2865 kwargs['context'] = self._context
2866 if hasattr(self, '_check_hostname'): # python 3.x
2867 kwargs['check_hostname'] = self._check_hostname
71aff188
YCH
2868
2869 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2870 if socks_proxy:
2871 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2872 del req.headers['Ytdl-socks-proxy']
2873
be4a824d 2874 return self.do_open(functools.partial(
71aff188 2875 _create_http_connection, self, conn_class, True),
4f264c02 2876 req, **kwargs)
be4a824d
PH
2877
2878
1bab3437 2879class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
f1a8511f
S
2880 """
2881 See [1] for cookie file format.
2882
2883 1. https://curl.haxx.se/docs/http-cookies.html
2884 """
e7e62441 2885 _HTTPONLY_PREFIX = '#HttpOnly_'
c380cc28
S
2886 _ENTRY_LEN = 7
2887 _HEADER = '''# Netscape HTTP Cookie File
7a5c1cfe 2888# This file is generated by yt-dlp. Do not edit.
c380cc28
S
2889
2890'''
2891 _CookieFileEntry = collections.namedtuple(
2892 'CookieFileEntry',
2893 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
e7e62441 2894
1bab3437 2895 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
c380cc28
S
2896 """
2897 Save cookies to a file.
2898
2899 Most of the code is taken from CPython 3.8 and slightly adapted
2900 to support cookie files with UTF-8 in both python 2 and 3.
2901 """
2902 if filename is None:
2903 if self.filename is not None:
2904 filename = self.filename
2905 else:
2906 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2907
1bab3437
S
2908 # Store session cookies with `expires` set to 0 instead of an empty
2909 # string
2910 for cookie in self:
2911 if cookie.expires is None:
2912 cookie.expires = 0
c380cc28
S
2913
2914 with io.open(filename, 'w', encoding='utf-8') as f:
2915 f.write(self._HEADER)
2916 now = time.time()
2917 for cookie in self:
2918 if not ignore_discard and cookie.discard:
2919 continue
2920 if not ignore_expires and cookie.is_expired(now):
2921 continue
2922 if cookie.secure:
2923 secure = 'TRUE'
2924 else:
2925 secure = 'FALSE'
2926 if cookie.domain.startswith('.'):
2927 initial_dot = 'TRUE'
2928 else:
2929 initial_dot = 'FALSE'
2930 if cookie.expires is not None:
2931 expires = compat_str(cookie.expires)
2932 else:
2933 expires = ''
2934 if cookie.value is None:
2935 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2936 # with no name, whereas http.cookiejar regards it as a
2937 # cookie with no value.
2938 name = ''
2939 value = cookie.name
2940 else:
2941 name = cookie.name
2942 value = cookie.value
2943 f.write(
2944 '\t'.join([cookie.domain, initial_dot, cookie.path,
2945 secure, expires, name, value]) + '\n')
1bab3437
S
2946
2947 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
e7e62441 2948 """Load cookies from a file."""
2949 if filename is None:
2950 if self.filename is not None:
2951 filename = self.filename
2952 else:
2953 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2954
c380cc28
S
2955 def prepare_line(line):
2956 if line.startswith(self._HTTPONLY_PREFIX):
2957 line = line[len(self._HTTPONLY_PREFIX):]
2958 # comments and empty lines are fine
2959 if line.startswith('#') or not line.strip():
2960 return line
2961 cookie_list = line.split('\t')
2962 if len(cookie_list) != self._ENTRY_LEN:
2963 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2964 cookie = self._CookieFileEntry(*cookie_list)
2965 if cookie.expires_at and not cookie.expires_at.isdigit():
2966 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2967 return line
2968
e7e62441 2969 cf = io.StringIO()
c380cc28 2970 with io.open(filename, encoding='utf-8') as f:
e7e62441 2971 for line in f:
c380cc28
S
2972 try:
2973 cf.write(prepare_line(line))
2974 except compat_cookiejar.LoadError as e:
2975 write_string(
2976 'WARNING: skipping cookie file entry due to %s: %r\n'
2977 % (e, line), sys.stderr)
2978 continue
e7e62441 2979 cf.seek(0)
2980 self._really_load(cf, filename, ignore_discard, ignore_expires)
1bab3437
S
2981 # Session cookies are denoted by either `expires` field set to
2982 # an empty string or 0. MozillaCookieJar only recognizes the former
2983 # (see [1]). So we need force the latter to be recognized as session
2984 # cookies on our own.
2985 # Session cookies may be important for cookies-based authentication,
2986 # e.g. usually, when user does not check 'Remember me' check box while
2987 # logging in on a site, some important cookies are stored as session
2988 # cookies so that not recognizing them will result in failed login.
2989 # 1. https://bugs.python.org/issue17164
2990 for cookie in self:
2991 # Treat `expires=0` cookies as session cookies
2992 if cookie.expires == 0:
2993 cookie.expires = None
2994 cookie.discard = True
2995
2996
a6420bf5
S
2997class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2998 def __init__(self, cookiejar=None):
2999 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
3000
3001 def http_response(self, request, response):
3002 # Python 2 will choke on next HTTP request in row if there are non-ASCII
3003 # characters in Set-Cookie HTTP header of last response (see
067aa17e 3004 # https://github.com/ytdl-org/youtube-dl/issues/6769).
a6420bf5
S
3005 # In order to at least prevent crashing we will percent encode Set-Cookie
3006 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
3007 # if sys.version_info < (3, 0) and response.headers:
3008 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
3009 # set_cookie = response.headers.get(set_cookie_header)
3010 # if set_cookie:
3011 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
3012 # if set_cookie != set_cookie_escaped:
3013 # del response.headers[set_cookie_header]
3014 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
3015 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
3016
f5fa042c 3017 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
a6420bf5
S
3018 https_response = http_response
3019
3020
fca6dba8 3021class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
201c1459 3022 """YoutubeDL redirect handler
3023
3024 The code is based on HTTPRedirectHandler implementation from CPython [1].
3025
3026 This redirect handler solves two issues:
3027 - ensures redirect URL is always unicode under python 2
3028 - introduces support for experimental HTTP response status code
3029 308 Permanent Redirect [2] used by some sites [3]
3030
3031 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
3032 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
3033 3. https://github.com/ytdl-org/youtube-dl/issues/28768
3034 """
3035
3036 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
3037
3038 def redirect_request(self, req, fp, code, msg, headers, newurl):
3039 """Return a Request or None in response to a redirect.
3040
3041 This is called by the http_error_30x methods when a
3042 redirection response is received. If a redirection should
3043 take place, return a new Request to allow http_error_30x to
3044 perform the redirect. Otherwise, raise HTTPError if no-one
3045 else should try to handle this url. Return None if you can't
3046 but another Handler might.
3047 """
3048 m = req.get_method()
3049 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
3050 or code in (301, 302, 303) and m == "POST")):
3051 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
3052 # Strictly (according to RFC 2616), 301 or 302 in response to
3053 # a POST MUST NOT cause a redirection without confirmation
3054 # from the user (of urllib.request, in this case). In practice,
3055 # essentially all clients do redirect in this case, so we do
3056 # the same.
3057
3058 # On python 2 urlh.geturl() may sometimes return redirect URL
3059 # as byte string instead of unicode. This workaround allows
3060 # to force it always return unicode.
3061 if sys.version_info[0] < 3:
3062 newurl = compat_str(newurl)
3063
3064 # Be conciliant with URIs containing a space. This is mainly
3065 # redundant with the more complete encoding done in http_error_302(),
3066 # but it is kept for compatibility with other callers.
3067 newurl = newurl.replace(' ', '%20')
3068
3069 CONTENT_HEADERS = ("content-length", "content-type")
3070 # NB: don't use dict comprehension for python 2.6 compatibility
3071 newheaders = dict((k, v) for k, v in req.headers.items()
3072 if k.lower() not in CONTENT_HEADERS)
3073 return compat_urllib_request.Request(
3074 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3075 unverifiable=True)
fca6dba8
S
3076
3077
46f59e89
S
3078def extract_timezone(date_str):
3079 m = re.search(
f137e4c2 3080 r'''(?x)
3081 ^.{8,}? # >=8 char non-TZ prefix, if present
3082 (?P<tz>Z| # just the UTC Z, or
3083 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
3084 (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
3085 [ ]? # optional space
3086 (?P<sign>\+|-) # +/-
3087 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
3088 $)
3089 ''', date_str)
46f59e89
S
3090 if not m:
3091 timezone = datetime.timedelta()
3092 else:
3093 date_str = date_str[:-len(m.group('tz'))]
3094 if not m.group('sign'):
3095 timezone = datetime.timedelta()
3096 else:
3097 sign = 1 if m.group('sign') == '+' else -1
3098 timezone = datetime.timedelta(
3099 hours=sign * int(m.group('hours')),
3100 minutes=sign * int(m.group('minutes')))
3101 return timezone, date_str
3102
3103
08b38d54 3104def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
3105 """ Return a UNIX timestamp from the given date """
3106
3107 if date_str is None:
3108 return None
3109
52c3a6e4
S
3110 date_str = re.sub(r'\.[0-9]+', '', date_str)
3111
08b38d54 3112 if timezone is None:
46f59e89
S
3113 timezone, date_str = extract_timezone(date_str)
3114
52c3a6e4
S
3115 try:
3116 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3117 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3118 return calendar.timegm(dt.timetuple())
3119 except ValueError:
3120 pass
912b38b4
PH
3121
3122
46f59e89
S
3123def date_formats(day_first=True):
3124 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3125
3126
42bdd9d0 3127def unified_strdate(date_str, day_first=True):
bf50b038 3128 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
3129
3130 if date_str is None:
3131 return None
bf50b038 3132 upload_date = None
5f6a1245 3133 # Replace commas
026fcc04 3134 date_str = date_str.replace(',', ' ')
42bdd9d0 3135 # Remove AM/PM + timezone
9bb8e0a3 3136 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89 3137 _, date_str = extract_timezone(date_str)
42bdd9d0 3138
46f59e89 3139 for expression in date_formats(day_first):
bf50b038
JMF
3140 try:
3141 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 3142 except ValueError:
bf50b038 3143 pass
42393ce2
PH
3144 if upload_date is None:
3145 timetuple = email.utils.parsedate_tz(date_str)
3146 if timetuple:
c6b9cf05
S
3147 try:
3148 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3149 except ValueError:
3150 pass
6a750402
JMF
3151 if upload_date is not None:
3152 return compat_str(upload_date)
bf50b038 3153
5f6a1245 3154
46f59e89
S
3155def unified_timestamp(date_str, day_first=True):
3156 if date_str is None:
3157 return None
3158
2ae2ffda 3159 date_str = re.sub(r'[,|]', '', date_str)
46f59e89 3160
7dc2a74e 3161 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89
S
3162 timezone, date_str = extract_timezone(date_str)
3163
3164 # Remove AM/PM + timezone
3165 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3166
deef3195
S
3167 # Remove unrecognized timezones from ISO 8601 alike timestamps
3168 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3169 if m:
3170 date_str = date_str[:-len(m.group('tz'))]
3171
f226880c
PH
3172 # Python only supports microseconds, so remove nanoseconds
3173 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3174 if m:
3175 date_str = m.group(1)
3176
46f59e89
S
3177 for expression in date_formats(day_first):
3178 try:
7dc2a74e 3179 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
46f59e89
S
3180 return calendar.timegm(dt.timetuple())
3181 except ValueError:
3182 pass
3183 timetuple = email.utils.parsedate_tz(date_str)
3184 if timetuple:
7dc2a74e 3185 return calendar.timegm(timetuple) + pm_delta * 3600
46f59e89
S
3186
3187
28e614de 3188def determine_ext(url, default_ext='unknown_video'):
85750f89 3189 if url is None or '.' not in url:
f4776371 3190 return default_ext
9cb9a5df 3191 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
3192 if re.match(r'^[A-Za-z0-9]+$', guess):
3193 return guess
a7aaa398
S
3194 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3195 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 3196 return guess.rstrip('/')
73e79f2a 3197 else:
cbdbb766 3198 return default_ext
73e79f2a 3199
5f6a1245 3200
824fa511
S
3201def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3202 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
d4051a8e 3203
5f6a1245 3204
9e62f283 3205def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
37254abc
JMF
3206 """
3207 Return a datetime object from a string in the format YYYYMMDD or
9e62f283 3208 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3209
3210 format: string date format used to return datetime object from
3211 precision: round the time portion of a datetime object.
3212 auto|microsecond|second|minute|hour|day.
3213 auto: round to the unit provided in date_str (if applicable).
3214 """
3215 auto_precision = False
3216 if precision == 'auto':
3217 auto_precision = True
3218 precision = 'microsecond'
3219 today = datetime_round(datetime.datetime.now(), precision)
f8795e10 3220 if date_str in ('now', 'today'):
37254abc 3221 return today
f8795e10
PH
3222 if date_str == 'yesterday':
3223 return today - datetime.timedelta(days=1)
9e62f283 3224 match = re.match(
3225 r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3226 date_str)
37254abc 3227 if match is not None:
9e62f283 3228 start_time = datetime_from_str(match.group('start'), precision, format)
3229 time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
37254abc 3230 unit = match.group('unit')
9e62f283 3231 if unit == 'month' or unit == 'year':
3232 new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
37254abc 3233 unit = 'day'
9e62f283 3234 else:
3235 if unit == 'week':
3236 unit = 'day'
3237 time *= 7
3238 delta = datetime.timedelta(**{unit + 's': time})
3239 new_date = start_time + delta
3240 if auto_precision:
3241 return datetime_round(new_date, unit)
3242 return new_date
3243
3244 return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3245
3246
3247def date_from_str(date_str, format='%Y%m%d'):
3248 """
3249 Return a datetime object from a string in the format YYYYMMDD or
3250 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3251
3252 format: string date format used to return datetime object from
3253 """
3254 return datetime_from_str(date_str, precision='microsecond', format=format).date()
3255
3256
3257def datetime_add_months(dt, months):
3258 """Increment/Decrement a datetime object by months."""
3259 month = dt.month + months - 1
3260 year = dt.year + month // 12
3261 month = month % 12 + 1
3262 day = min(dt.day, calendar.monthrange(year, month)[1])
3263 return dt.replace(year, month, day)
3264
3265
3266def datetime_round(dt, precision='day'):
3267 """
3268 Round a datetime object's time to a specific precision
3269 """
3270 if precision == 'microsecond':
3271 return dt
3272
3273 unit_seconds = {
3274 'day': 86400,
3275 'hour': 3600,
3276 'minute': 60,
3277 'second': 1,
3278 }
3279 roundto = lambda x, n: ((x + n / 2) // n) * n
3280 timestamp = calendar.timegm(dt.timetuple())
3281 return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
5f6a1245
JW
3282
3283
e63fc1be 3284def hyphenate_date(date_str):
3285 """
3286 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3287 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3288 if match is not None:
3289 return '-'.join(match.groups())
3290 else:
3291 return date_str
3292
5f6a1245 3293
bd558525
JMF
3294class DateRange(object):
3295 """Represents a time interval between two dates"""
5f6a1245 3296
bd558525
JMF
3297 def __init__(self, start=None, end=None):
3298 """start and end must be strings in the format accepted by date"""
3299 if start is not None:
3300 self.start = date_from_str(start)
3301 else:
3302 self.start = datetime.datetime.min.date()
3303 if end is not None:
3304 self.end = date_from_str(end)
3305 else:
3306 self.end = datetime.datetime.max.date()
37254abc 3307 if self.start > self.end:
bd558525 3308 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 3309
bd558525
JMF
3310 @classmethod
3311 def day(cls, day):
3312 """Returns a range that only contains the given day"""
5f6a1245
JW
3313 return cls(day, day)
3314
bd558525
JMF
3315 def __contains__(self, date):
3316 """Check if the date is in the range"""
37254abc
JMF
3317 if not isinstance(date, datetime.date):
3318 date = date_from_str(date)
3319 return self.start <= date <= self.end
5f6a1245 3320
bd558525 3321 def __str__(self):
5f6a1245 3322 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
3323
3324
3325def platform_name():
3326 """ Returns the platform name as a compat_str """
3327 res = platform.platform()
3328 if isinstance(res, bytes):
3329 res = res.decode(preferredencoding())
3330
3331 assert isinstance(res, compat_str)
3332 return res
c257baff
PH
3333
3334
49fa4d9a
N
3335def get_windows_version():
3336 ''' Get Windows version. None if it's not running on Windows '''
3337 if compat_os_name == 'nt':
3338 return version_tuple(platform.win32_ver()[1])
3339 else:
3340 return None
3341
3342
b58ddb32
PH
3343def _windows_write_string(s, out):
3344 """ Returns True if the string was written using special methods,
3345 False if it has yet to be written out."""
3346 # Adapted from http://stackoverflow.com/a/3259271/35070
3347
3348 import ctypes
3349 import ctypes.wintypes
3350
3351 WIN_OUTPUT_IDS = {
3352 1: -11,
3353 2: -12,
3354 }
3355
a383a98a
PH
3356 try:
3357 fileno = out.fileno()
3358 except AttributeError:
3359 # If the output stream doesn't have a fileno, it's virtual
3360 return False
aa42e873
PH
3361 except io.UnsupportedOperation:
3362 # Some strange Windows pseudo files?
3363 return False
b58ddb32
PH
3364 if fileno not in WIN_OUTPUT_IDS:
3365 return False
3366
d7cd9a9e 3367 GetStdHandle = compat_ctypes_WINFUNCTYPE(
b58ddb32 3368 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
d7cd9a9e 3369 ('GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
3370 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3371
d7cd9a9e 3372 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3373 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3374 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
d7cd9a9e 3375 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
3376 written = ctypes.wintypes.DWORD(0)
3377
d7cd9a9e 3378 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
3379 FILE_TYPE_CHAR = 0x0002
3380 FILE_TYPE_REMOTE = 0x8000
d7cd9a9e 3381 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3382 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3383 ctypes.POINTER(ctypes.wintypes.DWORD))(
d7cd9a9e 3384 ('GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
3385 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3386
3387 def not_a_console(handle):
3388 if handle == INVALID_HANDLE_VALUE or handle is None:
3389 return True
3089bc74
S
3390 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3391 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
3392
3393 if not_a_console(h):
3394 return False
3395
d1b9c912
PH
3396 def next_nonbmp_pos(s):
3397 try:
3398 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3399 except StopIteration:
3400 return len(s)
3401
3402 while s:
3403 count = min(next_nonbmp_pos(s), 1024)
3404
b58ddb32 3405 ret = WriteConsoleW(
d1b9c912 3406 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
3407 if ret == 0:
3408 raise OSError('Failed to write string')
d1b9c912
PH
3409 if not count: # We just wrote a non-BMP character
3410 assert written.value == 2
3411 s = s[1:]
3412 else:
3413 assert written.value > 0
3414 s = s[written.value:]
b58ddb32
PH
3415 return True
3416
3417
734f90bb 3418def write_string(s, out=None, encoding=None):
7459e3a2
PH
3419 if out is None:
3420 out = sys.stderr
8bf48f23 3421 assert type(s) == compat_str
7459e3a2 3422
b58ddb32
PH
3423 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3424 if _windows_write_string(s, out):
3425 return
3426
3089bc74
S
3427 if ('b' in getattr(out, 'mode', '')
3428 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
3429 byt = s.encode(encoding or preferredencoding(), 'ignore')
3430 out.write(byt)
3431 elif hasattr(out, 'buffer'):
3432 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3433 byt = s.encode(enc, 'ignore')
3434 out.buffer.write(byt)
3435 else:
8bf48f23 3436 out.write(s)
7459e3a2
PH
3437 out.flush()
3438
3439
48ea9cea
PH
3440def bytes_to_intlist(bs):
3441 if not bs:
3442 return []
3443 if isinstance(bs[0], int): # Python 3
3444 return list(bs)
3445 else:
3446 return [ord(c) for c in bs]
3447
c257baff 3448
cba892fa 3449def intlist_to_bytes(xs):
3450 if not xs:
3451 return b''
edaa23f8 3452 return compat_struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
3453
3454
c1c9a79c
PH
3455# Cross-platform file locking
3456if sys.platform == 'win32':
3457 import ctypes.wintypes
3458 import msvcrt
3459
3460 class OVERLAPPED(ctypes.Structure):
3461 _fields_ = [
3462 ('Internal', ctypes.wintypes.LPVOID),
3463 ('InternalHigh', ctypes.wintypes.LPVOID),
3464 ('Offset', ctypes.wintypes.DWORD),
3465 ('OffsetHigh', ctypes.wintypes.DWORD),
3466 ('hEvent', ctypes.wintypes.HANDLE),
3467 ]
3468
3469 kernel32 = ctypes.windll.kernel32
3470 LockFileEx = kernel32.LockFileEx
3471 LockFileEx.argtypes = [
3472 ctypes.wintypes.HANDLE, # hFile
3473 ctypes.wintypes.DWORD, # dwFlags
3474 ctypes.wintypes.DWORD, # dwReserved
3475 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3476 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3477 ctypes.POINTER(OVERLAPPED) # Overlapped
3478 ]
3479 LockFileEx.restype = ctypes.wintypes.BOOL
3480 UnlockFileEx = kernel32.UnlockFileEx
3481 UnlockFileEx.argtypes = [
3482 ctypes.wintypes.HANDLE, # hFile
3483 ctypes.wintypes.DWORD, # dwReserved
3484 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3485 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3486 ctypes.POINTER(OVERLAPPED) # Overlapped
3487 ]
3488 UnlockFileEx.restype = ctypes.wintypes.BOOL
3489 whole_low = 0xffffffff
3490 whole_high = 0x7fffffff
3491
3492 def _lock_file(f, exclusive):
3493 overlapped = OVERLAPPED()
3494 overlapped.Offset = 0
3495 overlapped.OffsetHigh = 0
3496 overlapped.hEvent = 0
3497 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3498 handle = msvcrt.get_osfhandle(f.fileno())
3499 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3500 whole_low, whole_high, f._lock_file_overlapped_p):
3501 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3502
3503 def _unlock_file(f):
3504 assert f._lock_file_overlapped_p
3505 handle = msvcrt.get_osfhandle(f.fileno())
3506 if not UnlockFileEx(handle, 0,
3507 whole_low, whole_high, f._lock_file_overlapped_p):
3508 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3509
3510else:
399a76e6
YCH
3511 # Some platforms, such as Jython, is missing fcntl
3512 try:
3513 import fcntl
c1c9a79c 3514
399a76e6
YCH
3515 def _lock_file(f, exclusive):
3516 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 3517
399a76e6
YCH
3518 def _unlock_file(f):
3519 fcntl.flock(f, fcntl.LOCK_UN)
3520 except ImportError:
3521 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3522
3523 def _lock_file(f, exclusive):
3524 raise IOError(UNSUPPORTED_MSG)
3525
3526 def _unlock_file(f):
3527 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
3528
3529
3530class locked_file(object):
3531 def __init__(self, filename, mode, encoding=None):
3532 assert mode in ['r', 'a', 'w']
3533 self.f = io.open(filename, mode, encoding=encoding)
3534 self.mode = mode
3535
3536 def __enter__(self):
3537 exclusive = self.mode != 'r'
3538 try:
3539 _lock_file(self.f, exclusive)
3540 except IOError:
3541 self.f.close()
3542 raise
3543 return self
3544
3545 def __exit__(self, etype, value, traceback):
3546 try:
3547 _unlock_file(self.f)
3548 finally:
3549 self.f.close()
3550
3551 def __iter__(self):
3552 return iter(self.f)
3553
3554 def write(self, *args):
3555 return self.f.write(*args)
3556
3557 def read(self, *args):
3558 return self.f.read(*args)
4eb7f1d1
JMF
3559
3560
4644ac55
S
3561def get_filesystem_encoding():
3562 encoding = sys.getfilesystemencoding()
3563 return encoding if encoding is not None else 'utf-8'
3564
3565
4eb7f1d1 3566def shell_quote(args):
a6a173c2 3567 quoted_args = []
4644ac55 3568 encoding = get_filesystem_encoding()
a6a173c2
JMF
3569 for a in args:
3570 if isinstance(a, bytes):
3571 # We may get a filename encoded with 'encodeFilename'
3572 a = a.decode(encoding)
aefce8e6 3573 quoted_args.append(compat_shlex_quote(a))
28e614de 3574 return ' '.join(quoted_args)
9d4660ca
PH
3575
3576
3577def smuggle_url(url, data):
3578 """ Pass additional data in a URL for internal use. """
3579
81953d1a
RA
3580 url, idata = unsmuggle_url(url, {})
3581 data.update(idata)
15707c7e 3582 sdata = compat_urllib_parse_urlencode(
28e614de
PH
3583 {'__youtubedl_smuggle': json.dumps(data)})
3584 return url + '#' + sdata
9d4660ca
PH
3585
3586
79f82953 3587def unsmuggle_url(smug_url, default=None):
83e865a3 3588 if '#__youtubedl_smuggle' not in smug_url:
79f82953 3589 return smug_url, default
28e614de
PH
3590 url, _, sdata = smug_url.rpartition('#')
3591 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
3592 data = json.loads(jsond)
3593 return url, data
02dbf93f
PH
3594
3595
02dbf93f
PH
3596def format_bytes(bytes):
3597 if bytes is None:
28e614de 3598 return 'N/A'
02dbf93f
PH
3599 if type(bytes) is str:
3600 bytes = float(bytes)
3601 if bytes == 0.0:
3602 exponent = 0
3603 else:
3604 exponent = int(math.log(bytes, 1024.0))
28e614de 3605 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 3606 converted = float(bytes) / float(1024 ** exponent)
28e614de 3607 return '%.2f%s' % (converted, suffix)
f53c966a 3608
1c088fa8 3609
fb47597b
S
3610def lookup_unit_table(unit_table, s):
3611 units_re = '|'.join(re.escape(u) for u in unit_table)
3612 m = re.match(
782b1b5b 3613 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
fb47597b
S
3614 if not m:
3615 return None
3616 num_str = m.group('num').replace(',', '.')
3617 mult = unit_table[m.group('unit')]
3618 return int(float(num_str) * mult)
3619
3620
be64b5b0
PH
3621def parse_filesize(s):
3622 if s is None:
3623 return None
3624
dfb1b146 3625 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
3626 # but we support those too
3627 _UNIT_TABLE = {
3628 'B': 1,
3629 'b': 1,
70852b47 3630 'bytes': 1,
be64b5b0
PH
3631 'KiB': 1024,
3632 'KB': 1000,
3633 'kB': 1024,
3634 'Kb': 1000,
13585d76 3635 'kb': 1000,
70852b47
YCH
3636 'kilobytes': 1000,
3637 'kibibytes': 1024,
be64b5b0
PH
3638 'MiB': 1024 ** 2,
3639 'MB': 1000 ** 2,
3640 'mB': 1024 ** 2,
3641 'Mb': 1000 ** 2,
13585d76 3642 'mb': 1000 ** 2,
70852b47
YCH
3643 'megabytes': 1000 ** 2,
3644 'mebibytes': 1024 ** 2,
be64b5b0
PH
3645 'GiB': 1024 ** 3,
3646 'GB': 1000 ** 3,
3647 'gB': 1024 ** 3,
3648 'Gb': 1000 ** 3,
13585d76 3649 'gb': 1000 ** 3,
70852b47
YCH
3650 'gigabytes': 1000 ** 3,
3651 'gibibytes': 1024 ** 3,
be64b5b0
PH
3652 'TiB': 1024 ** 4,
3653 'TB': 1000 ** 4,
3654 'tB': 1024 ** 4,
3655 'Tb': 1000 ** 4,
13585d76 3656 'tb': 1000 ** 4,
70852b47
YCH
3657 'terabytes': 1000 ** 4,
3658 'tebibytes': 1024 ** 4,
be64b5b0
PH
3659 'PiB': 1024 ** 5,
3660 'PB': 1000 ** 5,
3661 'pB': 1024 ** 5,
3662 'Pb': 1000 ** 5,
13585d76 3663 'pb': 1000 ** 5,
70852b47
YCH
3664 'petabytes': 1000 ** 5,
3665 'pebibytes': 1024 ** 5,
be64b5b0
PH
3666 'EiB': 1024 ** 6,
3667 'EB': 1000 ** 6,
3668 'eB': 1024 ** 6,
3669 'Eb': 1000 ** 6,
13585d76 3670 'eb': 1000 ** 6,
70852b47
YCH
3671 'exabytes': 1000 ** 6,
3672 'exbibytes': 1024 ** 6,
be64b5b0
PH
3673 'ZiB': 1024 ** 7,
3674 'ZB': 1000 ** 7,
3675 'zB': 1024 ** 7,
3676 'Zb': 1000 ** 7,
13585d76 3677 'zb': 1000 ** 7,
70852b47
YCH
3678 'zettabytes': 1000 ** 7,
3679 'zebibytes': 1024 ** 7,
be64b5b0
PH
3680 'YiB': 1024 ** 8,
3681 'YB': 1000 ** 8,
3682 'yB': 1024 ** 8,
3683 'Yb': 1000 ** 8,
13585d76 3684 'yb': 1000 ** 8,
70852b47
YCH
3685 'yottabytes': 1000 ** 8,
3686 'yobibytes': 1024 ** 8,
be64b5b0
PH
3687 }
3688
fb47597b
S
3689 return lookup_unit_table(_UNIT_TABLE, s)
3690
3691
3692def parse_count(s):
3693 if s is None:
be64b5b0
PH
3694 return None
3695
fb47597b
S
3696 s = s.strip()
3697
3698 if re.match(r'^[\d,.]+$', s):
3699 return str_to_int(s)
3700
3701 _UNIT_TABLE = {
3702 'k': 1000,
3703 'K': 1000,
3704 'm': 1000 ** 2,
3705 'M': 1000 ** 2,
3706 'kk': 1000 ** 2,
3707 'KK': 1000 ** 2,
3708 }
be64b5b0 3709
fb47597b 3710 return lookup_unit_table(_UNIT_TABLE, s)
be64b5b0 3711
2f7ae819 3712
b871d7e9
S
3713def parse_resolution(s):
3714 if s is None:
3715 return {}
3716
17ec8bcf 3717 mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
b871d7e9
S
3718 if mobj:
3719 return {
3720 'width': int(mobj.group('w')),
3721 'height': int(mobj.group('h')),
3722 }
3723
17ec8bcf 3724 mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
b871d7e9
S
3725 if mobj:
3726 return {'height': int(mobj.group(1))}
3727
3728 mobj = re.search(r'\b([48])[kK]\b', s)
3729 if mobj:
3730 return {'height': int(mobj.group(1)) * 540}
3731
3732 return {}
3733
3734
0dc41787
S
3735def parse_bitrate(s):
3736 if not isinstance(s, compat_str):
3737 return
3738 mobj = re.search(r'\b(\d+)\s*kbps', s)
3739 if mobj:
3740 return int(mobj.group(1))
3741
3742
a942d6cb 3743def month_by_name(name, lang='en'):
caefb1de
PH
3744 """ Return the number of a month by (locale-independently) English name """
3745
f6717dec 3746 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb 3747
caefb1de 3748 try:
f6717dec 3749 return month_names.index(name) + 1
7105440c
YCH
3750 except ValueError:
3751 return None
3752
3753
3754def month_by_abbreviation(abbrev):
3755 """ Return the number of a month by (locale-independently) English
3756 abbreviations """
3757
3758 try:
3759 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
3760 except ValueError:
3761 return None
18258362
JMF
3762
3763
5aafe895 3764def fix_xml_ampersands(xml_str):
18258362 3765 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
3766 return re.sub(
3767 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 3768 '&amp;',
5aafe895 3769 xml_str)
e3946f98
PH
3770
3771
3772def setproctitle(title):
8bf48f23 3773 assert isinstance(title, compat_str)
c1c05c67
YCH
3774
3775 # ctypes in Jython is not complete
3776 # http://bugs.jython.org/issue2148
3777 if sys.platform.startswith('java'):
3778 return
3779
e3946f98 3780 try:
611c1dd9 3781 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
3782 except OSError:
3783 return
2f49bcd6
RC
3784 except TypeError:
3785 # LoadLibrary in Windows Python 2.7.13 only expects
3786 # a bytestring, but since unicode_literals turns
3787 # every string into a unicode string, it fails.
3788 return
6eefe533
PH
3789 title_bytes = title.encode('utf-8')
3790 buf = ctypes.create_string_buffer(len(title_bytes))
3791 buf.value = title_bytes
e3946f98 3792 try:
6eefe533 3793 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
3794 except AttributeError:
3795 return # Strange libc, just skip this
d7dda168
PH
3796
3797
3798def remove_start(s, start):
46bc9b7d 3799 return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174
PH
3800
3801
2b9faf55 3802def remove_end(s, end):
46bc9b7d 3803 return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55
PH
3804
3805
31b2051e
S
3806def remove_quotes(s):
3807 if s is None or len(s) < 2:
3808 return s
3809 for quote in ('"', "'", ):
3810 if s[0] == quote and s[-1] == quote:
3811 return s[1:-1]
3812 return s
3813
3814
b6e0c7d2
U
3815def get_domain(url):
3816 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3817 return domain.group('domain') if domain else None
3818
3819
29eb5174 3820def url_basename(url):
9b8aaeed 3821 path = compat_urlparse.urlparse(url).path
28e614de 3822 return path.strip('/').split('/')[-1]
aa94a6d3
PH
3823
3824
02dc0a36
S
3825def base_url(url):
3826 return re.match(r'https?://[^?#&]+/', url).group()
3827
3828
e34c3361 3829def urljoin(base, path):
4b5de77b
S
3830 if isinstance(path, bytes):
3831 path = path.decode('utf-8')
e34c3361
S
3832 if not isinstance(path, compat_str) or not path:
3833 return None
fad4ceb5 3834 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
e34c3361 3835 return path
4b5de77b
S
3836 if isinstance(base, bytes):
3837 base = base.decode('utf-8')
3838 if not isinstance(base, compat_str) or not re.match(
3839 r'^(?:https?:)?//', base):
e34c3361
S
3840 return None
3841 return compat_urlparse.urljoin(base, path)
3842
3843
aa94a6d3
PH
3844class HEADRequest(compat_urllib_request.Request):
3845 def get_method(self):
611c1dd9 3846 return 'HEAD'
7217e148
PH
3847
3848
95cf60e8
S
3849class PUTRequest(compat_urllib_request.Request):
3850 def get_method(self):
3851 return 'PUT'
3852
3853
9732d77e 3854def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
3855 if get_attr:
3856 if v is not None:
3857 v = getattr(v, get_attr, None)
9572013d
PH
3858 if v == '':
3859 v = None
1812afb7
S
3860 if v is None:
3861 return default
3862 try:
3863 return int(v) * invscale // scale
5e1271c5 3864 except (ValueError, TypeError):
af98f8ff 3865 return default
9732d77e 3866
9572013d 3867
40a90862
JMF
3868def str_or_none(v, default=None):
3869 return default if v is None else compat_str(v)
3870
9732d77e
PH
3871
3872def str_to_int(int_str):
48d4681e 3873 """ A more relaxed version of int_or_none """
42db58ec 3874 if isinstance(int_str, compat_integer_types):
348c6bf1 3875 return int_str
42db58ec
S
3876 elif isinstance(int_str, compat_str):
3877 int_str = re.sub(r'[,\.\+]', '', int_str)
3878 return int_or_none(int_str)
608d11f5
PH
3879
3880
9732d77e 3881def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
3882 if v is None:
3883 return default
3884 try:
3885 return float(v) * invscale / scale
5e1271c5 3886 except (ValueError, TypeError):
caf80631 3887 return default
43f775e4
PH
3888
3889
c7e327c4
S
3890def bool_or_none(v, default=None):
3891 return v if isinstance(v, bool) else default
3892
3893
53cd37ba
S
3894def strip_or_none(v, default=None):
3895 return v.strip() if isinstance(v, compat_str) else default
b72b4431
S
3896
3897
af03000a
S
3898def url_or_none(url):
3899 if not url or not isinstance(url, compat_str):
3900 return None
3901 url = url.strip()
29f7c58a 3902 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
af03000a
S
3903
3904
e29663c6 3905def strftime_or_none(timestamp, date_format, default=None):
3906 datetime_object = None
3907 try:
3908 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3909 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3910 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3911 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3912 return datetime_object.strftime(date_format)
3913 except (ValueError, TypeError, AttributeError):
3914 return default
3915
3916
608d11f5 3917def parse_duration(s):
8f9312c3 3918 if not isinstance(s, compat_basestring):
608d11f5
PH
3919 return None
3920
ca7b3246
S
3921 s = s.strip()
3922
acaff495 3923 days, hours, mins, secs, ms = [None] * 5
15846398 3924 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
acaff495 3925 if m:
3926 days, hours, mins, secs, ms = m.groups()
3927 else:
3928 m = re.match(
056653bb
S
3929 r'''(?ix)(?:P?
3930 (?:
3931 [0-9]+\s*y(?:ears?)?\s*
3932 )?
3933 (?:
3934 [0-9]+\s*m(?:onths?)?\s*
3935 )?
3936 (?:
3937 [0-9]+\s*w(?:eeks?)?\s*
3938 )?
8f4b58d7 3939 (?:
acaff495 3940 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
8f4b58d7 3941 )?
056653bb 3942 T)?
acaff495 3943 (?:
3944 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3945 )?
3946 (?:
3947 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3948 )?
3949 (?:
3950 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
15846398 3951 )?Z?$''', s)
acaff495 3952 if m:
3953 days, hours, mins, secs, ms = m.groups()
3954 else:
15846398 3955 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
acaff495 3956 if m:
3957 hours, mins = m.groups()
3958 else:
3959 return None
3960
3961 duration = 0
3962 if secs:
3963 duration += float(secs)
3964 if mins:
3965 duration += float(mins) * 60
3966 if hours:
3967 duration += float(hours) * 60 * 60
3968 if days:
3969 duration += float(days) * 24 * 60 * 60
3970 if ms:
3971 duration += float(ms)
3972 return duration
91d7d0b3
JMF
3973
3974
e65e4c88 3975def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 3976 name, real_ext = os.path.splitext(filename)
e65e4c88
S
3977 return (
3978 '{0}.{1}{2}'.format(name, ext, real_ext)
3979 if not expected_real_ext or real_ext[1:] == expected_real_ext
3980 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
3981
3982
b3ed15b7
S
3983def replace_extension(filename, ext, expected_real_ext=None):
3984 name, real_ext = os.path.splitext(filename)
3985 return '{0}.{1}'.format(
3986 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3987 ext)
3988
3989
d70ad093
PH
3990def check_executable(exe, args=[]):
3991 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3992 args can be a list of arguments for a short output (like -version) """
3993 try:
d3c93ec2 3994 Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate_or_kill()
d70ad093
PH
3995 except OSError:
3996 return False
3997 return exe
b7ab0590
PH
3998
3999
95807118 4000def get_exe_version(exe, args=['--version'],
cae97f65 4001 version_re=None, unrecognized='present'):
95807118
PH
4002 """ Returns the version of the specified executable,
4003 or False if the executable is not present """
4004 try:
b64d04c1 4005 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
7a5c1cfe 4006 # SIGTTOU if yt-dlp is run in the background.
067aa17e 4007 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
d3c93ec2 4008 out, _ = Popen(
4009 [encodeArgument(exe)] + args, stdin=subprocess.PIPE,
4010 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate_or_kill()
95807118
PH
4011 except OSError:
4012 return False
cae97f65
PH
4013 if isinstance(out, bytes): # Python 2.x
4014 out = out.decode('ascii', 'ignore')
4015 return detect_exe_version(out, version_re, unrecognized)
4016
4017
4018def detect_exe_version(output, version_re=None, unrecognized='present'):
4019 assert isinstance(output, compat_str)
4020 if version_re is None:
4021 version_re = r'version\s+([-0-9._a-zA-Z]+)'
4022 m = re.search(version_re, output)
95807118
PH
4023 if m:
4024 return m.group(1)
4025 else:
4026 return unrecognized
4027
4028
cb89cfc1 4029class LazyList(collections.abc.Sequence):
483336e7 4030 ''' Lazy immutable list from an iterable
4031 Note that slices of a LazyList are lists and not LazyList'''
4032
8e5fecc8 4033 class IndexError(IndexError):
4034 pass
4035
483336e7 4036 def __init__(self, iterable):
4037 self.__iterable = iter(iterable)
4038 self.__cache = []
28419ca2 4039 self.__reversed = False
483336e7 4040
4041 def __iter__(self):
28419ca2 4042 if self.__reversed:
4043 # We need to consume the entire iterable to iterate in reverse
981052c9 4044 yield from self.exhaust()
28419ca2 4045 return
4046 yield from self.__cache
483336e7 4047 for item in self.__iterable:
4048 self.__cache.append(item)
4049 yield item
4050
981052c9 4051 def __exhaust(self):
483336e7 4052 self.__cache.extend(self.__iterable)
9f1a1c36 4053 # Discard the emptied iterable to make it pickle-able
4054 self.__iterable = []
28419ca2 4055 return self.__cache
4056
981052c9 4057 def exhaust(self):
4058 ''' Evaluate the entire iterable '''
4059 return self.__exhaust()[::-1 if self.__reversed else 1]
4060
28419ca2 4061 @staticmethod
981052c9 4062 def __reverse_index(x):
e0f2b4b4 4063 return None if x is None else -(x + 1)
483336e7 4064
4065 def __getitem__(self, idx):
4066 if isinstance(idx, slice):
28419ca2 4067 if self.__reversed:
e0f2b4b4 4068 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
4069 start, stop, step = idx.start, idx.stop, idx.step or 1
483336e7 4070 elif isinstance(idx, int):
28419ca2 4071 if self.__reversed:
981052c9 4072 idx = self.__reverse_index(idx)
e0f2b4b4 4073 start, stop, step = idx, idx, 0
483336e7 4074 else:
4075 raise TypeError('indices must be integers or slices')
e0f2b4b4 4076 if ((start or 0) < 0 or (stop or 0) < 0
4077 or (start is None and step < 0)
4078 or (stop is None and step > 0)):
483336e7 4079 # We need to consume the entire iterable to be able to slice from the end
4080 # Obviously, never use this with infinite iterables
8e5fecc8 4081 self.__exhaust()
4082 try:
4083 return self.__cache[idx]
4084 except IndexError as e:
4085 raise self.IndexError(e) from e
e0f2b4b4 4086 n = max(start or 0, stop or 0) - len(self.__cache) + 1
28419ca2 4087 if n > 0:
4088 self.__cache.extend(itertools.islice(self.__iterable, n))
8e5fecc8 4089 try:
4090 return self.__cache[idx]
4091 except IndexError as e:
4092 raise self.IndexError(e) from e
483336e7 4093
4094 def __bool__(self):
4095 try:
28419ca2 4096 self[-1] if self.__reversed else self[0]
8e5fecc8 4097 except self.IndexError:
483336e7 4098 return False
4099 return True
4100
4101 def __len__(self):
8e5fecc8 4102 self.__exhaust()
483336e7 4103 return len(self.__cache)
4104
981052c9 4105 def reverse(self):
28419ca2 4106 self.__reversed = not self.__reversed
4107 return self
4108
4109 def __repr__(self):
4110 # repr and str should mimic a list. So we exhaust the iterable
4111 return repr(self.exhaust())
4112
4113 def __str__(self):
4114 return repr(self.exhaust())
4115
483336e7 4116
7be9ccff 4117class PagedList:
dd26ced1
PH
4118 def __len__(self):
4119 # This is only useful for tests
4120 return len(self.getslice())
4121
7be9ccff 4122 def __init__(self, pagefunc, pagesize, use_cache=True):
4123 self._pagefunc = pagefunc
4124 self._pagesize = pagesize
4125 self._use_cache = use_cache
4126 self._cache = {}
4127
4128 def getpage(self, pagenum):
4129 page_results = self._cache.get(pagenum) or list(self._pagefunc(pagenum))
4130 if self._use_cache:
4131 self._cache[pagenum] = page_results
4132 return page_results
4133
4134 def getslice(self, start=0, end=None):
4135 return list(self._getslice(start, end))
4136
4137 def _getslice(self, start, end):
55575225 4138 raise NotImplementedError('This method must be implemented by subclasses')
4139
4140 def __getitem__(self, idx):
7be9ccff 4141 # NOTE: cache must be enabled if this is used
55575225 4142 if not isinstance(idx, int) or idx < 0:
4143 raise TypeError('indices must be non-negative integers')
4144 entries = self.getslice(idx, idx + 1)
4145 return entries[0] if entries else None
4146
9c44d242
PH
4147
4148class OnDemandPagedList(PagedList):
7be9ccff 4149 def _getslice(self, start, end):
b7ab0590
PH
4150 for pagenum in itertools.count(start // self._pagesize):
4151 firstid = pagenum * self._pagesize
4152 nextfirstid = pagenum * self._pagesize + self._pagesize
4153 if start >= nextfirstid:
4154 continue
4155
b7ab0590
PH
4156 startv = (
4157 start % self._pagesize
4158 if firstid <= start < nextfirstid
4159 else 0)
b7ab0590
PH
4160 endv = (
4161 ((end - 1) % self._pagesize) + 1
4162 if (end is not None and firstid <= end <= nextfirstid)
4163 else None)
4164
7be9ccff 4165 page_results = self.getpage(pagenum)
b7ab0590
PH
4166 if startv != 0 or endv is not None:
4167 page_results = page_results[startv:endv]
7be9ccff 4168 yield from page_results
b7ab0590
PH
4169
4170 # A little optimization - if current page is not "full", ie. does
4171 # not contain page_size videos then we can assume that this page
4172 # is the last one - there are no more ids on further pages -
4173 # i.e. no need to query again.
4174 if len(page_results) + startv < self._pagesize:
4175 break
4176
4177 # If we got the whole page, but the next page is not interesting,
4178 # break out early as well
4179 if end == nextfirstid:
4180 break
81c2f20b
PH
4181
4182
9c44d242
PH
4183class InAdvancePagedList(PagedList):
4184 def __init__(self, pagefunc, pagecount, pagesize):
9c44d242 4185 self._pagecount = pagecount
7be9ccff 4186 PagedList.__init__(self, pagefunc, pagesize, True)
9c44d242 4187
7be9ccff 4188 def _getslice(self, start, end):
9c44d242
PH
4189 start_page = start // self._pagesize
4190 end_page = (
4191 self._pagecount if end is None else (end // self._pagesize + 1))
4192 skip_elems = start - start_page * self._pagesize
4193 only_more = None if end is None else end - start
4194 for pagenum in range(start_page, end_page):
7be9ccff 4195 page_results = self.getpage(pagenum)
9c44d242 4196 if skip_elems:
7be9ccff 4197 page_results = page_results[skip_elems:]
9c44d242
PH
4198 skip_elems = None
4199 if only_more is not None:
7be9ccff 4200 if len(page_results) < only_more:
4201 only_more -= len(page_results)
9c44d242 4202 else:
7be9ccff 4203 yield from page_results[:only_more]
9c44d242 4204 break
7be9ccff 4205 yield from page_results
9c44d242
PH
4206
4207
81c2f20b 4208def uppercase_escape(s):
676eb3f2 4209 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 4210 return re.sub(
a612753d 4211 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
4212 lambda m: unicode_escape(m.group(0))[0],
4213 s)
0fe2ff78
YCH
4214
4215
4216def lowercase_escape(s):
4217 unicode_escape = codecs.getdecoder('unicode_escape')
4218 return re.sub(
4219 r'\\u[0-9a-fA-F]{4}',
4220 lambda m: unicode_escape(m.group(0))[0],
4221 s)
b53466e1 4222
d05cfe06
S
4223
4224def escape_rfc3986(s):
4225 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 4226 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 4227 s = s.encode('utf-8')
ecc0c5ee 4228 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
4229
4230
4231def escape_url(url):
4232 """Escape URL as suggested by RFC 3986"""
4233 url_parsed = compat_urllib_parse_urlparse(url)
4234 return url_parsed._replace(
efbed08d 4235 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
d05cfe06
S
4236 path=escape_rfc3986(url_parsed.path),
4237 params=escape_rfc3986(url_parsed.params),
4238 query=escape_rfc3986(url_parsed.query),
4239 fragment=escape_rfc3986(url_parsed.fragment)
4240 ).geturl()
4241
62e609ab 4242
4dfbf869 4243def parse_qs(url):
4244 return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4245
4246
62e609ab
PH
4247def read_batch_urls(batch_fd):
4248 def fixup(url):
4249 if not isinstance(url, compat_str):
4250 url = url.decode('utf-8', 'replace')
8c04f0be 4251 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4252 for bom in BOM_UTF8:
4253 if url.startswith(bom):
4254 url = url[len(bom):]
4255 url = url.lstrip()
4256 if not url or url.startswith(('#', ';', ']')):
62e609ab 4257 return False
8c04f0be 4258 # "#" cannot be stripped out since it is part of the URI
4259 # However, it can be safely stipped out if follwing a whitespace
4260 return re.split(r'\s#', url, 1)[0].rstrip()
62e609ab
PH
4261
4262 with contextlib.closing(batch_fd) as fd:
4263 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
4264
4265
4266def urlencode_postdata(*args, **kargs):
15707c7e 4267 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
4268
4269
38f9ef31 4270def update_url_query(url, query):
cacd9966
YCH
4271 if not query:
4272 return url
38f9ef31 4273 parsed_url = compat_urlparse.urlparse(url)
4274 qs = compat_parse_qs(parsed_url.query)
4275 qs.update(query)
4276 return compat_urlparse.urlunparse(parsed_url._replace(
15707c7e 4277 query=compat_urllib_parse_urlencode(qs, True)))
16392824 4278
8e60dc75 4279
ed0291d1
S
4280def update_Request(req, url=None, data=None, headers={}, query={}):
4281 req_headers = req.headers.copy()
4282 req_headers.update(headers)
4283 req_data = data or req.data
4284 req_url = update_url_query(url or req.get_full_url(), query)
95cf60e8
S
4285 req_get_method = req.get_method()
4286 if req_get_method == 'HEAD':
4287 req_type = HEADRequest
4288 elif req_get_method == 'PUT':
4289 req_type = PUTRequest
4290 else:
4291 req_type = compat_urllib_request.Request
ed0291d1
S
4292 new_req = req_type(
4293 req_url, data=req_data, headers=req_headers,
4294 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4295 if hasattr(req, 'timeout'):
4296 new_req.timeout = req.timeout
4297 return new_req
4298
4299
10c87c15 4300def _multipart_encode_impl(data, boundary):
0c265486
YCH
4301 content_type = 'multipart/form-data; boundary=%s' % boundary
4302
4303 out = b''
4304 for k, v in data.items():
4305 out += b'--' + boundary.encode('ascii') + b'\r\n'
4306 if isinstance(k, compat_str):
4307 k = k.encode('utf-8')
4308 if isinstance(v, compat_str):
4309 v = v.encode('utf-8')
4310 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4311 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
b2ad479d 4312 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
0c265486
YCH
4313 if boundary.encode('ascii') in content:
4314 raise ValueError('Boundary overlaps with data')
4315 out += content
4316
4317 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4318
4319 return out, content_type
4320
4321
4322def multipart_encode(data, boundary=None):
4323 '''
4324 Encode a dict to RFC 7578-compliant form-data
4325
4326 data:
4327 A dict where keys and values can be either Unicode or bytes-like
4328 objects.
4329 boundary:
4330 If specified a Unicode object, it's used as the boundary. Otherwise
4331 a random boundary is generated.
4332
4333 Reference: https://tools.ietf.org/html/rfc7578
4334 '''
4335 has_specified_boundary = boundary is not None
4336
4337 while True:
4338 if boundary is None:
4339 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4340
4341 try:
10c87c15 4342 out, content_type = _multipart_encode_impl(data, boundary)
0c265486
YCH
4343 break
4344 except ValueError:
4345 if has_specified_boundary:
4346 raise
4347 boundary = None
4348
4349 return out, content_type
4350
4351
86296ad2 4352def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
4353 if isinstance(key_or_keys, (list, tuple)):
4354 for key in key_or_keys:
86296ad2
S
4355 if key not in d or d[key] is None or skip_false_values and not d[key]:
4356 continue
4357 return d[key]
cbecc9b9
S
4358 return default
4359 return d.get(key_or_keys, default)
4360
4361
329ca3be 4362def try_get(src, getter, expected_type=None):
6606817a 4363 for get in variadic(getter):
a32a9a7e
S
4364 try:
4365 v = get(src)
4366 except (AttributeError, KeyError, TypeError, IndexError):
4367 pass
4368 else:
4369 if expected_type is None or isinstance(v, expected_type):
4370 return v
329ca3be
S
4371
4372
6cc62232
S
4373def merge_dicts(*dicts):
4374 merged = {}
4375 for a_dict in dicts:
4376 for k, v in a_dict.items():
4377 if v is None:
4378 continue
3089bc74
S
4379 if (k not in merged
4380 or (isinstance(v, compat_str) and v
4381 and isinstance(merged[k], compat_str)
4382 and not merged[k])):
6cc62232
S
4383 merged[k] = v
4384 return merged
4385
4386
8e60dc75
S
4387def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4388 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4389
16392824 4390
a1a530b0
PH
4391US_RATINGS = {
4392 'G': 0,
4393 'PG': 10,
4394 'PG-13': 13,
4395 'R': 16,
4396 'NC': 18,
4397}
fac55558
PH
4398
4399
a8795327 4400TV_PARENTAL_GUIDELINES = {
5a16c9d9
RA
4401 'TV-Y': 0,
4402 'TV-Y7': 7,
4403 'TV-G': 0,
4404 'TV-PG': 0,
4405 'TV-14': 14,
4406 'TV-MA': 17,
a8795327
S
4407}
4408
4409
146c80e2 4410def parse_age_limit(s):
a8795327
S
4411 if type(s) == int:
4412 return s if 0 <= s <= 21 else None
4413 if not isinstance(s, compat_basestring):
d838b1bd 4414 return None
146c80e2 4415 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327
S
4416 if m:
4417 return int(m.group('age'))
5c5fae6d 4418 s = s.upper()
a8795327
S
4419 if s in US_RATINGS:
4420 return US_RATINGS[s]
5a16c9d9 4421 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
b8361187 4422 if m:
5a16c9d9 4423 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
b8361187 4424 return None
146c80e2
S
4425
4426
fac55558 4427def strip_jsonp(code):
609a61e3 4428 return re.sub(
5552c9eb 4429 r'''(?sx)^
e9c671d5 4430 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
5552c9eb
YCH
4431 (?:\s*&&\s*(?P=func_name))?
4432 \s*\(\s*(?P<callback_data>.*)\);?
4433 \s*?(?://[^\n]*)*$''',
4434 r'\g<callback_data>', code)
478c2c61
PH
4435
4436
5c610515 4437def js_to_json(code, vars={}):
4438 # vars is a dict of var, val pairs to substitute
c843e685 4439 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4195096e
S
4440 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4441 INTEGER_TABLE = (
4442 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4443 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4444 )
4445
e05f6939 4446 def fix_kv(m):
e7b6d122
PH
4447 v = m.group(0)
4448 if v in ('true', 'false', 'null'):
4449 return v
421ddcb8
C
4450 elif v in ('undefined', 'void 0'):
4451 return 'null'
8bdd16b4 4452 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
bd1e4844 4453 return ""
4454
4455 if v[0] in ("'", '"'):
4456 v = re.sub(r'(?s)\\.|"', lambda m: {
e7b6d122 4457 '"': '\\"',
bd1e4844 4458 "\\'": "'",
4459 '\\\n': '',
4460 '\\x': '\\u00',
4461 }.get(m.group(0), m.group(0)), v[1:-1])
8bdd16b4 4462 else:
4463 for regex, base in INTEGER_TABLE:
4464 im = re.match(regex, v)
4465 if im:
4466 i = int(im.group(1), base)
4467 return '"%d":' % i if v.endswith(':') else '%d' % i
89ac4a19 4468
5c610515 4469 if v in vars:
4470 return vars[v]
4471
e7b6d122 4472 return '"%s"' % v
e05f6939 4473
bd1e4844 4474 return re.sub(r'''(?sx)
4475 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4476 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4195096e 4477 {comment}|,(?={skip}[\]}}])|
421ddcb8 4478 void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
4195096e 4479 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
8bdd16b4 4480 [0-9]+(?={skip}:)|
4481 !+
4195096e 4482 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
e05f6939
PH
4483
4484
478c2c61
PH
4485def qualities(quality_ids):
4486 """ Get a numeric quality value out of a list of possible values """
4487 def q(qid):
4488 try:
4489 return quality_ids.index(qid)
4490 except ValueError:
4491 return -1
4492 return q
4493
acd69589 4494
de6000d9 4495DEFAULT_OUTTMPL = {
4496 'default': '%(title)s [%(id)s].%(ext)s',
72755351 4497 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
de6000d9 4498}
4499OUTTMPL_TYPES = {
72755351 4500 'chapter': None,
de6000d9 4501 'subtitle': None,
4502 'thumbnail': None,
4503 'description': 'description',
4504 'annotation': 'annotations.xml',
4505 'infojson': 'info.json',
08438d2c 4506 'link': None,
5112f26a 4507 'pl_thumbnail': None,
de6000d9 4508 'pl_description': 'description',
4509 'pl_infojson': 'info.json',
4510}
0a871f68 4511
143db31d 4512# As of [1] format syntax is:
4513# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4514# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
901130bb 4515STR_FORMAT_RE_TMPL = r'''(?x)
4516 (?<!%)(?P<prefix>(?:%%)*)
143db31d 4517 %
524e2e4f 4518 (?P<has_key>\((?P<key>{0})\))?
752cda38 4519 (?P<format>
524e2e4f 4520 (?P<conversion>[#0\-+ ]+)?
4521 (?P<min_width>\d+)?
4522 (?P<precision>\.\d+)?
4523 (?P<len_mod>[hlL])? # unused in python
901130bb 4524 {1} # conversion type
752cda38 4525 )
143db31d 4526'''
4527
7d1eb38a 4528
901130bb 4529STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
a020a0dc 4530
7d1eb38a 4531
a020a0dc
PH
4532def limit_length(s, length):
4533 """ Add ellipses to overly long strings """
4534 if s is None:
4535 return None
4536 ELLIPSES = '...'
4537 if len(s) > length:
4538 return s[:length - len(ELLIPSES)] + ELLIPSES
4539 return s
48844745
PH
4540
4541
4542def version_tuple(v):
5f9b8394 4543 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
4544
4545
4546def is_outdated_version(version, limit, assume_new=True):
4547 if not version:
4548 return not assume_new
4549 try:
4550 return version_tuple(version) < version_tuple(limit)
4551 except ValueError:
4552 return not assume_new
732ea2f0
PH
4553
4554
4555def ytdl_is_updateable():
7a5c1cfe 4556 """ Returns if yt-dlp can be updated with -U """
735d865e 4557
5d535b4a 4558 from .update import is_non_updateable
732ea2f0 4559
5d535b4a 4560 return not is_non_updateable()
7d4111ed
PH
4561
4562
4563def args_to_str(args):
4564 # Get a short string representation for a subprocess command
702ccf2d 4565 return ' '.join(compat_shlex_quote(a) for a in args)
2ccd1b10
PH
4566
4567
9b9c5355 4568def error_to_compat_str(err):
fdae2358
S
4569 err_str = str(err)
4570 # On python 2 error byte string must be decoded with proper
4571 # encoding rather than ascii
4572 if sys.version_info[0] < 3:
4573 err_str = err_str.decode(preferredencoding())
4574 return err_str
4575
4576
c460bdd5 4577def mimetype2ext(mt):
eb9ee194
S
4578 if mt is None:
4579 return None
4580
9359f3d4
F
4581 mt, _, params = mt.partition(';')
4582 mt = mt.strip()
4583
4584 FULL_MAP = {
765ac263 4585 'audio/mp4': 'm4a',
6c33d24b
YCH
4586 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4587 # it's the most popular one
4588 'audio/mpeg': 'mp3',
ba39289d 4589 'audio/x-wav': 'wav',
9359f3d4
F
4590 'audio/wav': 'wav',
4591 'audio/wave': 'wav',
4592 }
4593
4594 ext = FULL_MAP.get(mt)
765ac263
JMF
4595 if ext is not None:
4596 return ext
4597
9359f3d4 4598 SUBTYPE_MAP = {
f6861ec9 4599 '3gpp': '3gp',
cafcf657 4600 'smptett+xml': 'tt',
cafcf657 4601 'ttaf+xml': 'dfxp',
a0d8d704 4602 'ttml+xml': 'ttml',
f6861ec9 4603 'x-flv': 'flv',
a0d8d704 4604 'x-mp4-fragmented': 'mp4',
d4f05d47 4605 'x-ms-sami': 'sami',
a0d8d704 4606 'x-ms-wmv': 'wmv',
b4173f15
RA
4607 'mpegurl': 'm3u8',
4608 'x-mpegurl': 'm3u8',
4609 'vnd.apple.mpegurl': 'm3u8',
4610 'dash+xml': 'mpd',
b4173f15 4611 'f4m+xml': 'f4m',
f164b971 4612 'hds+xml': 'f4m',
e910fe2f 4613 'vnd.ms-sstr+xml': 'ism',
c2b2c7e1 4614 'quicktime': 'mov',
98ce1a3f 4615 'mp2t': 'ts',
39e7107d 4616 'x-wav': 'wav',
9359f3d4
F
4617 'filmstrip+json': 'fs',
4618 'svg+xml': 'svg',
4619 }
4620
4621 _, _, subtype = mt.rpartition('/')
4622 ext = SUBTYPE_MAP.get(subtype.lower())
4623 if ext is not None:
4624 return ext
4625
4626 SUFFIX_MAP = {
4627 'json': 'json',
4628 'xml': 'xml',
4629 'zip': 'zip',
4630 'gzip': 'gz',
4631 }
4632
4633 _, _, suffix = subtype.partition('+')
4634 ext = SUFFIX_MAP.get(suffix)
4635 if ext is not None:
4636 return ext
4637
4638 return subtype.replace('+', '.')
c460bdd5
PH
4639
4640
4f3c5e06 4641def parse_codecs(codecs_str):
4642 # http://tools.ietf.org/html/rfc6381
4643 if not codecs_str:
4644 return {}
a0566bbf 4645 split_codecs = list(filter(None, map(
dbf5416a 4646 str.strip, codecs_str.strip().strip(',').split(','))))
176f1866 4647 vcodec, acodec, hdr = None, None, None
a0566bbf 4648 for full_codec in split_codecs:
4f3c5e06 4649 codec = full_codec.split('.')[0]
6993f78d 4650 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora', 'dvh1', 'dvhe'):
4f3c5e06 4651 if not vcodec:
4652 vcodec = full_codec
176f1866 4653 if codec in ('dvh1', 'dvhe'):
4654 hdr = 'DV'
4655 elif codec == 'vp9' and vcodec.startswith('vp9.2'):
4656 hdr = 'HDR10'
4657 elif codec == 'av01':
4658 parts = full_codec.split('.')
4659 if len(parts) > 3 and parts[3] == '10':
4660 hdr = 'HDR10'
4661 vcodec = '.'.join(parts[:4])
60f5c9fb 4662 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4f3c5e06 4663 if not acodec:
4664 acodec = full_codec
4665 else:
60f5c9fb 4666 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4f3c5e06 4667 if not vcodec and not acodec:
a0566bbf 4668 if len(split_codecs) == 2:
4f3c5e06 4669 return {
a0566bbf 4670 'vcodec': split_codecs[0],
4671 'acodec': split_codecs[1],
4f3c5e06 4672 }
4673 else:
4674 return {
4675 'vcodec': vcodec or 'none',
4676 'acodec': acodec or 'none',
176f1866 4677 'dynamic_range': hdr,
4f3c5e06 4678 }
4679 return {}
4680
4681
2ccd1b10 4682def urlhandle_detect_ext(url_handle):
79298173 4683 getheader = url_handle.headers.get
2ccd1b10 4684
b55ee18f
PH
4685 cd = getheader('Content-Disposition')
4686 if cd:
4687 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4688 if m:
4689 e = determine_ext(m.group('filename'), default_ext=None)
4690 if e:
4691 return e
4692
c460bdd5 4693 return mimetype2ext(getheader('Content-Type'))
05900629
PH
4694
4695
1e399778
YCH
4696def encode_data_uri(data, mime_type):
4697 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4698
4699
05900629 4700def age_restricted(content_limit, age_limit):
6ec6cb4e 4701 """ Returns True iff the content should be blocked """
05900629
PH
4702
4703 if age_limit is None: # No limit set
4704 return False
4705 if content_limit is None:
4706 return False # Content available for everyone
4707 return age_limit < content_limit
61ca9a80
PH
4708
4709
4710def is_html(first_bytes):
4711 """ Detect whether a file contains HTML by examining its first bytes. """
4712
4713 BOMS = [
4714 (b'\xef\xbb\xbf', 'utf-8'),
4715 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4716 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4717 (b'\xff\xfe', 'utf-16-le'),
4718 (b'\xfe\xff', 'utf-16-be'),
4719 ]
4720 for bom, enc in BOMS:
4721 if first_bytes.startswith(bom):
4722 s = first_bytes[len(bom):].decode(enc, 'replace')
4723 break
4724 else:
4725 s = first_bytes.decode('utf-8', 'replace')
4726
4727 return re.match(r'^\s*<', s)
a055469f
PH
4728
4729
4730def determine_protocol(info_dict):
4731 protocol = info_dict.get('protocol')
4732 if protocol is not None:
4733 return protocol
4734
7de837a5 4735 url = sanitize_url(info_dict['url'])
a055469f
PH
4736 if url.startswith('rtmp'):
4737 return 'rtmp'
4738 elif url.startswith('mms'):
4739 return 'mms'
4740 elif url.startswith('rtsp'):
4741 return 'rtsp'
4742
4743 ext = determine_ext(url)
4744 if ext == 'm3u8':
4745 return 'm3u8'
4746 elif ext == 'f4m':
4747 return 'f4m'
4748
4749 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
4750
4751
76d321f6 4752def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
cfb56d1a 4753 """ Render a list of rows, each as a list of values """
ec11a9f4 4754 def width(string):
4755 return len(remove_terminal_sequences(string))
76d321f6 4756
4757 def get_max_lens(table):
ec11a9f4 4758 return [max(width(str(v)) for v in col) for col in zip(*table)]
76d321f6 4759
4760 def filter_using_list(row, filterArray):
4761 return [col for (take, col) in zip(filterArray, row) if take]
4762
4763 if hideEmpty:
4764 max_lens = get_max_lens(data)
4765 header_row = filter_using_list(header_row, max_lens)
4766 data = [filter_using_list(row, max_lens) for row in data]
4767
cfb56d1a 4768 table = [header_row] + data
76d321f6 4769 max_lens = get_max_lens(table)
ec11a9f4 4770 extraGap += 1
76d321f6 4771 if delim:
ec11a9f4 4772 table = [header_row] + [[delim * (ml + extraGap) for ml in max_lens]] + data
4773 max_lens[-1] = 0
4774 for row in table:
4775 for pos, text in enumerate(map(str, row)):
4776 row[pos] = text + (' ' * (max_lens[pos] - width(text) + extraGap))
4777 ret = '\n'.join(''.join(row) for row in table)
4778 return ret
347de493
PH
4779
4780
8f18aca8 4781def _match_one(filter_part, dct, incomplete):
77b87f05 4782 # TODO: Generalize code with YoutubeDL._build_format_filter
a047eeb6 4783 STRING_OPERATORS = {
4784 '*=': operator.contains,
4785 '^=': lambda attr, value: attr.startswith(value),
4786 '$=': lambda attr, value: attr.endswith(value),
4787 '~=': lambda attr, value: re.search(value, attr),
4788 }
347de493 4789 COMPARISON_OPERATORS = {
a047eeb6 4790 **STRING_OPERATORS,
4791 '<=': operator.le, # "<=" must be defined above "<"
347de493 4792 '<': operator.lt,
347de493 4793 '>=': operator.ge,
a047eeb6 4794 '>': operator.gt,
347de493 4795 '=': operator.eq,
347de493 4796 }
a047eeb6 4797
347de493
PH
4798 operator_rex = re.compile(r'''(?x)\s*
4799 (?P<key>[a-z_]+)
77b87f05 4800 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
347de493 4801 (?:
a047eeb6 4802 (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
4803 (?P<strval>.+?)
347de493
PH
4804 )
4805 \s*$
4806 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4807 m = operator_rex.search(filter_part)
4808 if m:
18f96d12 4809 m = m.groupdict()
4810 unnegated_op = COMPARISON_OPERATORS[m['op']]
4811 if m['negation']:
77b87f05
MT
4812 op = lambda attr, value: not unnegated_op(attr, value)
4813 else:
4814 op = unnegated_op
18f96d12 4815 comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
4816 if m['quote']:
4817 comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
4818 actual_value = dct.get(m['key'])
4819 numeric_comparison = None
4820 if isinstance(actual_value, compat_numeric_types):
e5a088dc
S
4821 # If the original field is a string and matching comparisonvalue is
4822 # a number we should respect the origin of the original field
4823 # and process comparison value as a string (see
18f96d12 4824 # https://github.com/ytdl-org/youtube-dl/issues/11082)
347de493 4825 try:
18f96d12 4826 numeric_comparison = int(comparison_value)
347de493 4827 except ValueError:
18f96d12 4828 numeric_comparison = parse_filesize(comparison_value)
4829 if numeric_comparison is None:
4830 numeric_comparison = parse_filesize(f'{comparison_value}B')
4831 if numeric_comparison is None:
4832 numeric_comparison = parse_duration(comparison_value)
4833 if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
4834 raise ValueError('Operator %s only supports string values!' % m['op'])
347de493 4835 if actual_value is None:
18f96d12 4836 return incomplete or m['none_inclusive']
4837 return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
347de493
PH
4838
4839 UNARY_OPERATORS = {
1cc47c66
S
4840 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4841 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
347de493
PH
4842 }
4843 operator_rex = re.compile(r'''(?x)\s*
4844 (?P<op>%s)\s*(?P<key>[a-z_]+)
4845 \s*$
4846 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4847 m = operator_rex.search(filter_part)
4848 if m:
4849 op = UNARY_OPERATORS[m.group('op')]
4850 actual_value = dct.get(m.group('key'))
8f18aca8 4851 if incomplete and actual_value is None:
4852 return True
347de493
PH
4853 return op(actual_value)
4854
4855 raise ValueError('Invalid filter part %r' % filter_part)
4856
4857
8f18aca8 4858def match_str(filter_str, dct, incomplete=False):
4859 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
4860 When incomplete, all conditions passes on missing fields
4861 """
347de493 4862 return all(
8f18aca8 4863 _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
a047eeb6 4864 for filter_part in re.split(r'(?<!\\)&', filter_str))
347de493
PH
4865
4866
4867def match_filter_func(filter_str):
8f18aca8 4868 def _match_func(info_dict, *args, **kwargs):
4869 if match_str(filter_str, info_dict, *args, **kwargs):
347de493
PH
4870 return None
4871 else:
4872 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4873 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4874 return _match_func
91410c9b
PH
4875
4876
bf6427d2
YCH
4877def parse_dfxp_time_expr(time_expr):
4878 if not time_expr:
d631d5f9 4879 return
bf6427d2
YCH
4880
4881 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4882 if mobj:
4883 return float(mobj.group('time_offset'))
4884
db2fe38b 4885 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 4886 if mobj:
db2fe38b 4887 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
4888
4889
c1c924ab 4890def srt_subtitles_timecode(seconds):
aa7785f8 4891 return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
4892
4893
4894def ass_subtitles_timecode(seconds):
4895 time = timetuple_from_msec(seconds * 1000)
4896 return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
bf6427d2
YCH
4897
4898
4899def dfxp2srt(dfxp_data):
3869028f
YCH
4900 '''
4901 @param dfxp_data A bytes-like object containing DFXP data
4902 @returns A unicode object containing converted SRT data
4903 '''
5b995f71 4904 LEGACY_NAMESPACES = (
3869028f
YCH
4905 (b'http://www.w3.org/ns/ttml', [
4906 b'http://www.w3.org/2004/11/ttaf1',
4907 b'http://www.w3.org/2006/04/ttaf1',
4908 b'http://www.w3.org/2006/10/ttaf1',
5b995f71 4909 ]),
3869028f
YCH
4910 (b'http://www.w3.org/ns/ttml#styling', [
4911 b'http://www.w3.org/ns/ttml#style',
5b995f71
RA
4912 ]),
4913 )
4914
4915 SUPPORTED_STYLING = [
4916 'color',
4917 'fontFamily',
4918 'fontSize',
4919 'fontStyle',
4920 'fontWeight',
4921 'textDecoration'
4922 ]
4923
4e335771 4924 _x = functools.partial(xpath_with_ns, ns_map={
261f4730 4925 'xml': 'http://www.w3.org/XML/1998/namespace',
4e335771 4926 'ttml': 'http://www.w3.org/ns/ttml',
5b995f71 4927 'tts': 'http://www.w3.org/ns/ttml#styling',
4e335771 4928 })
bf6427d2 4929
5b995f71
RA
4930 styles = {}
4931 default_style = {}
4932
87de7069 4933 class TTMLPElementParser(object):
5b995f71
RA
4934 _out = ''
4935 _unclosed_elements = []
4936 _applied_styles = []
bf6427d2 4937
2b14cb56 4938 def start(self, tag, attrib):
5b995f71
RA
4939 if tag in (_x('ttml:br'), 'br'):
4940 self._out += '\n'
4941 else:
4942 unclosed_elements = []
4943 style = {}
4944 element_style_id = attrib.get('style')
4945 if default_style:
4946 style.update(default_style)
4947 if element_style_id:
4948 style.update(styles.get(element_style_id, {}))
4949 for prop in SUPPORTED_STYLING:
4950 prop_val = attrib.get(_x('tts:' + prop))
4951 if prop_val:
4952 style[prop] = prop_val
4953 if style:
4954 font = ''
4955 for k, v in sorted(style.items()):
4956 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4957 continue
4958 if k == 'color':
4959 font += ' color="%s"' % v
4960 elif k == 'fontSize':
4961 font += ' size="%s"' % v
4962 elif k == 'fontFamily':
4963 font += ' face="%s"' % v
4964 elif k == 'fontWeight' and v == 'bold':
4965 self._out += '<b>'
4966 unclosed_elements.append('b')
4967 elif k == 'fontStyle' and v == 'italic':
4968 self._out += '<i>'
4969 unclosed_elements.append('i')
4970 elif k == 'textDecoration' and v == 'underline':
4971 self._out += '<u>'
4972 unclosed_elements.append('u')
4973 if font:
4974 self._out += '<font' + font + '>'
4975 unclosed_elements.append('font')
4976 applied_style = {}
4977 if self._applied_styles:
4978 applied_style.update(self._applied_styles[-1])
4979 applied_style.update(style)
4980 self._applied_styles.append(applied_style)
4981 self._unclosed_elements.append(unclosed_elements)
bf6427d2 4982
2b14cb56 4983 def end(self, tag):
5b995f71
RA
4984 if tag not in (_x('ttml:br'), 'br'):
4985 unclosed_elements = self._unclosed_elements.pop()
4986 for element in reversed(unclosed_elements):
4987 self._out += '</%s>' % element
4988 if unclosed_elements and self._applied_styles:
4989 self._applied_styles.pop()
bf6427d2 4990
2b14cb56 4991 def data(self, data):
5b995f71 4992 self._out += data
2b14cb56 4993
4994 def close(self):
5b995f71 4995 return self._out.strip()
2b14cb56 4996
4997 def parse_node(node):
4998 target = TTMLPElementParser()
4999 parser = xml.etree.ElementTree.XMLParser(target=target)
5000 parser.feed(xml.etree.ElementTree.tostring(node))
5001 return parser.close()
bf6427d2 5002
5b995f71
RA
5003 for k, v in LEGACY_NAMESPACES:
5004 for ns in v:
5005 dfxp_data = dfxp_data.replace(ns, k)
5006
3869028f 5007 dfxp = compat_etree_fromstring(dfxp_data)
bf6427d2 5008 out = []
5b995f71 5009 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
1b0427e6
YCH
5010
5011 if not paras:
5012 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2 5013
5b995f71
RA
5014 repeat = False
5015 while True:
5016 for style in dfxp.findall(_x('.//ttml:style')):
261f4730
RA
5017 style_id = style.get('id') or style.get(_x('xml:id'))
5018 if not style_id:
5019 continue
5b995f71
RA
5020 parent_style_id = style.get('style')
5021 if parent_style_id:
5022 if parent_style_id not in styles:
5023 repeat = True
5024 continue
5025 styles[style_id] = styles[parent_style_id].copy()
5026 for prop in SUPPORTED_STYLING:
5027 prop_val = style.get(_x('tts:' + prop))
5028 if prop_val:
5029 styles.setdefault(style_id, {})[prop] = prop_val
5030 if repeat:
5031 repeat = False
5032 else:
5033 break
5034
5035 for p in ('body', 'div'):
5036 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
5037 if ele is None:
5038 continue
5039 style = styles.get(ele.get('style'))
5040 if not style:
5041 continue
5042 default_style.update(style)
5043
bf6427d2 5044 for para, index in zip(paras, itertools.count(1)):
d631d5f9 5045 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 5046 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
5047 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
5048 if begin_time is None:
5049 continue
7dff0363 5050 if not end_time:
d631d5f9
YCH
5051 if not dur:
5052 continue
5053 end_time = begin_time + dur
bf6427d2
YCH
5054 out.append('%d\n%s --> %s\n%s\n\n' % (
5055 index,
c1c924ab
YCH
5056 srt_subtitles_timecode(begin_time),
5057 srt_subtitles_timecode(end_time),
bf6427d2
YCH
5058 parse_node(para)))
5059
5060 return ''.join(out)
5061
5062
66e289ba
S
5063def cli_option(params, command_option, param):
5064 param = params.get(param)
98e698f1
RA
5065 if param:
5066 param = compat_str(param)
66e289ba
S
5067 return [command_option, param] if param is not None else []
5068
5069
5070def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
5071 param = params.get(param)
5b232f46
S
5072 if param is None:
5073 return []
66e289ba
S
5074 assert isinstance(param, bool)
5075 if separator:
5076 return [command_option + separator + (true_value if param else false_value)]
5077 return [command_option, true_value if param else false_value]
5078
5079
5080def cli_valueless_option(params, command_option, param, expected_value=True):
5081 param = params.get(param)
5082 return [command_option] if param == expected_value else []
5083
5084
e92caff5 5085def cli_configuration_args(argdict, keys, default=[], use_compat=True):
eab9b2bc 5086 if isinstance(argdict, (list, tuple)): # for backward compatibility
e92caff5 5087 if use_compat:
5b1ecbb3 5088 return argdict
5089 else:
5090 argdict = None
eab9b2bc 5091 if argdict is None:
5b1ecbb3 5092 return default
eab9b2bc 5093 assert isinstance(argdict, dict)
5094
e92caff5 5095 assert isinstance(keys, (list, tuple))
5096 for key_list in keys:
e92caff5 5097 arg_list = list(filter(
5098 lambda x: x is not None,
6606817a 5099 [argdict.get(key.lower()) for key in variadic(key_list)]))
e92caff5 5100 if arg_list:
5101 return [arg for args in arg_list for arg in args]
5102 return default
66e289ba 5103
6251555f 5104
330690a2 5105def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
5106 main_key, exe = main_key.lower(), exe.lower()
5107 root_key = exe if main_key == exe else f'{main_key}+{exe}'
5108 keys = [f'{root_key}{k}' for k in (keys or [''])]
5109 if root_key in keys:
5110 if main_key != exe:
5111 keys.append((main_key, exe))
5112 keys.append('default')
5113 else:
5114 use_compat = False
5115 return cli_configuration_args(argdict, keys, default, use_compat)
5116
66e289ba 5117
39672624
YCH
5118class ISO639Utils(object):
5119 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
5120 _lang_map = {
5121 'aa': 'aar',
5122 'ab': 'abk',
5123 'ae': 'ave',
5124 'af': 'afr',
5125 'ak': 'aka',
5126 'am': 'amh',
5127 'an': 'arg',
5128 'ar': 'ara',
5129 'as': 'asm',
5130 'av': 'ava',
5131 'ay': 'aym',
5132 'az': 'aze',
5133 'ba': 'bak',
5134 'be': 'bel',
5135 'bg': 'bul',
5136 'bh': 'bih',
5137 'bi': 'bis',
5138 'bm': 'bam',
5139 'bn': 'ben',
5140 'bo': 'bod',
5141 'br': 'bre',
5142 'bs': 'bos',
5143 'ca': 'cat',
5144 'ce': 'che',
5145 'ch': 'cha',
5146 'co': 'cos',
5147 'cr': 'cre',
5148 'cs': 'ces',
5149 'cu': 'chu',
5150 'cv': 'chv',
5151 'cy': 'cym',
5152 'da': 'dan',
5153 'de': 'deu',
5154 'dv': 'div',
5155 'dz': 'dzo',
5156 'ee': 'ewe',
5157 'el': 'ell',
5158 'en': 'eng',
5159 'eo': 'epo',
5160 'es': 'spa',
5161 'et': 'est',
5162 'eu': 'eus',
5163 'fa': 'fas',
5164 'ff': 'ful',
5165 'fi': 'fin',
5166 'fj': 'fij',
5167 'fo': 'fao',
5168 'fr': 'fra',
5169 'fy': 'fry',
5170 'ga': 'gle',
5171 'gd': 'gla',
5172 'gl': 'glg',
5173 'gn': 'grn',
5174 'gu': 'guj',
5175 'gv': 'glv',
5176 'ha': 'hau',
5177 'he': 'heb',
b7acc835 5178 'iw': 'heb', # Replaced by he in 1989 revision
39672624
YCH
5179 'hi': 'hin',
5180 'ho': 'hmo',
5181 'hr': 'hrv',
5182 'ht': 'hat',
5183 'hu': 'hun',
5184 'hy': 'hye',
5185 'hz': 'her',
5186 'ia': 'ina',
5187 'id': 'ind',
b7acc835 5188 'in': 'ind', # Replaced by id in 1989 revision
39672624
YCH
5189 'ie': 'ile',
5190 'ig': 'ibo',
5191 'ii': 'iii',
5192 'ik': 'ipk',
5193 'io': 'ido',
5194 'is': 'isl',
5195 'it': 'ita',
5196 'iu': 'iku',
5197 'ja': 'jpn',
5198 'jv': 'jav',
5199 'ka': 'kat',
5200 'kg': 'kon',
5201 'ki': 'kik',
5202 'kj': 'kua',
5203 'kk': 'kaz',
5204 'kl': 'kal',
5205 'km': 'khm',
5206 'kn': 'kan',
5207 'ko': 'kor',
5208 'kr': 'kau',
5209 'ks': 'kas',
5210 'ku': 'kur',
5211 'kv': 'kom',
5212 'kw': 'cor',
5213 'ky': 'kir',
5214 'la': 'lat',
5215 'lb': 'ltz',
5216 'lg': 'lug',
5217 'li': 'lim',
5218 'ln': 'lin',
5219 'lo': 'lao',
5220 'lt': 'lit',
5221 'lu': 'lub',
5222 'lv': 'lav',
5223 'mg': 'mlg',
5224 'mh': 'mah',
5225 'mi': 'mri',
5226 'mk': 'mkd',
5227 'ml': 'mal',
5228 'mn': 'mon',
5229 'mr': 'mar',
5230 'ms': 'msa',
5231 'mt': 'mlt',
5232 'my': 'mya',
5233 'na': 'nau',
5234 'nb': 'nob',
5235 'nd': 'nde',
5236 'ne': 'nep',
5237 'ng': 'ndo',
5238 'nl': 'nld',
5239 'nn': 'nno',
5240 'no': 'nor',
5241 'nr': 'nbl',
5242 'nv': 'nav',
5243 'ny': 'nya',
5244 'oc': 'oci',
5245 'oj': 'oji',
5246 'om': 'orm',
5247 'or': 'ori',
5248 'os': 'oss',
5249 'pa': 'pan',
5250 'pi': 'pli',
5251 'pl': 'pol',
5252 'ps': 'pus',
5253 'pt': 'por',
5254 'qu': 'que',
5255 'rm': 'roh',
5256 'rn': 'run',
5257 'ro': 'ron',
5258 'ru': 'rus',
5259 'rw': 'kin',
5260 'sa': 'san',
5261 'sc': 'srd',
5262 'sd': 'snd',
5263 'se': 'sme',
5264 'sg': 'sag',
5265 'si': 'sin',
5266 'sk': 'slk',
5267 'sl': 'slv',
5268 'sm': 'smo',
5269 'sn': 'sna',
5270 'so': 'som',
5271 'sq': 'sqi',
5272 'sr': 'srp',
5273 'ss': 'ssw',
5274 'st': 'sot',
5275 'su': 'sun',
5276 'sv': 'swe',
5277 'sw': 'swa',
5278 'ta': 'tam',
5279 'te': 'tel',
5280 'tg': 'tgk',
5281 'th': 'tha',
5282 'ti': 'tir',
5283 'tk': 'tuk',
5284 'tl': 'tgl',
5285 'tn': 'tsn',
5286 'to': 'ton',
5287 'tr': 'tur',
5288 'ts': 'tso',
5289 'tt': 'tat',
5290 'tw': 'twi',
5291 'ty': 'tah',
5292 'ug': 'uig',
5293 'uk': 'ukr',
5294 'ur': 'urd',
5295 'uz': 'uzb',
5296 've': 'ven',
5297 'vi': 'vie',
5298 'vo': 'vol',
5299 'wa': 'wln',
5300 'wo': 'wol',
5301 'xh': 'xho',
5302 'yi': 'yid',
e9a50fba 5303 'ji': 'yid', # Replaced by yi in 1989 revision
39672624
YCH
5304 'yo': 'yor',
5305 'za': 'zha',
5306 'zh': 'zho',
5307 'zu': 'zul',
5308 }
5309
5310 @classmethod
5311 def short2long(cls, code):
5312 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5313 return cls._lang_map.get(code[:2])
5314
5315 @classmethod
5316 def long2short(cls, code):
5317 """Convert language code from ISO 639-2/T to ISO 639-1"""
5318 for short_name, long_name in cls._lang_map.items():
5319 if long_name == code:
5320 return short_name
5321
5322
4eb10f66
YCH
5323class ISO3166Utils(object):
5324 # From http://data.okfn.org/data/core/country-list
5325 _country_map = {
5326 'AF': 'Afghanistan',
5327 'AX': 'Åland Islands',
5328 'AL': 'Albania',
5329 'DZ': 'Algeria',
5330 'AS': 'American Samoa',
5331 'AD': 'Andorra',
5332 'AO': 'Angola',
5333 'AI': 'Anguilla',
5334 'AQ': 'Antarctica',
5335 'AG': 'Antigua and Barbuda',
5336 'AR': 'Argentina',
5337 'AM': 'Armenia',
5338 'AW': 'Aruba',
5339 'AU': 'Australia',
5340 'AT': 'Austria',
5341 'AZ': 'Azerbaijan',
5342 'BS': 'Bahamas',
5343 'BH': 'Bahrain',
5344 'BD': 'Bangladesh',
5345 'BB': 'Barbados',
5346 'BY': 'Belarus',
5347 'BE': 'Belgium',
5348 'BZ': 'Belize',
5349 'BJ': 'Benin',
5350 'BM': 'Bermuda',
5351 'BT': 'Bhutan',
5352 'BO': 'Bolivia, Plurinational State of',
5353 'BQ': 'Bonaire, Sint Eustatius and Saba',
5354 'BA': 'Bosnia and Herzegovina',
5355 'BW': 'Botswana',
5356 'BV': 'Bouvet Island',
5357 'BR': 'Brazil',
5358 'IO': 'British Indian Ocean Territory',
5359 'BN': 'Brunei Darussalam',
5360 'BG': 'Bulgaria',
5361 'BF': 'Burkina Faso',
5362 'BI': 'Burundi',
5363 'KH': 'Cambodia',
5364 'CM': 'Cameroon',
5365 'CA': 'Canada',
5366 'CV': 'Cape Verde',
5367 'KY': 'Cayman Islands',
5368 'CF': 'Central African Republic',
5369 'TD': 'Chad',
5370 'CL': 'Chile',
5371 'CN': 'China',
5372 'CX': 'Christmas Island',
5373 'CC': 'Cocos (Keeling) Islands',
5374 'CO': 'Colombia',
5375 'KM': 'Comoros',
5376 'CG': 'Congo',
5377 'CD': 'Congo, the Democratic Republic of the',
5378 'CK': 'Cook Islands',
5379 'CR': 'Costa Rica',
5380 'CI': 'Côte d\'Ivoire',
5381 'HR': 'Croatia',
5382 'CU': 'Cuba',
5383 'CW': 'Curaçao',
5384 'CY': 'Cyprus',
5385 'CZ': 'Czech Republic',
5386 'DK': 'Denmark',
5387 'DJ': 'Djibouti',
5388 'DM': 'Dominica',
5389 'DO': 'Dominican Republic',
5390 'EC': 'Ecuador',
5391 'EG': 'Egypt',
5392 'SV': 'El Salvador',
5393 'GQ': 'Equatorial Guinea',
5394 'ER': 'Eritrea',
5395 'EE': 'Estonia',
5396 'ET': 'Ethiopia',
5397 'FK': 'Falkland Islands (Malvinas)',
5398 'FO': 'Faroe Islands',
5399 'FJ': 'Fiji',
5400 'FI': 'Finland',
5401 'FR': 'France',
5402 'GF': 'French Guiana',
5403 'PF': 'French Polynesia',
5404 'TF': 'French Southern Territories',
5405 'GA': 'Gabon',
5406 'GM': 'Gambia',
5407 'GE': 'Georgia',
5408 'DE': 'Germany',
5409 'GH': 'Ghana',
5410 'GI': 'Gibraltar',
5411 'GR': 'Greece',
5412 'GL': 'Greenland',
5413 'GD': 'Grenada',
5414 'GP': 'Guadeloupe',
5415 'GU': 'Guam',
5416 'GT': 'Guatemala',
5417 'GG': 'Guernsey',
5418 'GN': 'Guinea',
5419 'GW': 'Guinea-Bissau',
5420 'GY': 'Guyana',
5421 'HT': 'Haiti',
5422 'HM': 'Heard Island and McDonald Islands',
5423 'VA': 'Holy See (Vatican City State)',
5424 'HN': 'Honduras',
5425 'HK': 'Hong Kong',
5426 'HU': 'Hungary',
5427 'IS': 'Iceland',
5428 'IN': 'India',
5429 'ID': 'Indonesia',
5430 'IR': 'Iran, Islamic Republic of',
5431 'IQ': 'Iraq',
5432 'IE': 'Ireland',
5433 'IM': 'Isle of Man',
5434 'IL': 'Israel',
5435 'IT': 'Italy',
5436 'JM': 'Jamaica',
5437 'JP': 'Japan',
5438 'JE': 'Jersey',
5439 'JO': 'Jordan',
5440 'KZ': 'Kazakhstan',
5441 'KE': 'Kenya',
5442 'KI': 'Kiribati',
5443 'KP': 'Korea, Democratic People\'s Republic of',
5444 'KR': 'Korea, Republic of',
5445 'KW': 'Kuwait',
5446 'KG': 'Kyrgyzstan',
5447 'LA': 'Lao People\'s Democratic Republic',
5448 'LV': 'Latvia',
5449 'LB': 'Lebanon',
5450 'LS': 'Lesotho',
5451 'LR': 'Liberia',
5452 'LY': 'Libya',
5453 'LI': 'Liechtenstein',
5454 'LT': 'Lithuania',
5455 'LU': 'Luxembourg',
5456 'MO': 'Macao',
5457 'MK': 'Macedonia, the Former Yugoslav Republic of',
5458 'MG': 'Madagascar',
5459 'MW': 'Malawi',
5460 'MY': 'Malaysia',
5461 'MV': 'Maldives',
5462 'ML': 'Mali',
5463 'MT': 'Malta',
5464 'MH': 'Marshall Islands',
5465 'MQ': 'Martinique',
5466 'MR': 'Mauritania',
5467 'MU': 'Mauritius',
5468 'YT': 'Mayotte',
5469 'MX': 'Mexico',
5470 'FM': 'Micronesia, Federated States of',
5471 'MD': 'Moldova, Republic of',
5472 'MC': 'Monaco',
5473 'MN': 'Mongolia',
5474 'ME': 'Montenegro',
5475 'MS': 'Montserrat',
5476 'MA': 'Morocco',
5477 'MZ': 'Mozambique',
5478 'MM': 'Myanmar',
5479 'NA': 'Namibia',
5480 'NR': 'Nauru',
5481 'NP': 'Nepal',
5482 'NL': 'Netherlands',
5483 'NC': 'New Caledonia',
5484 'NZ': 'New Zealand',
5485 'NI': 'Nicaragua',
5486 'NE': 'Niger',
5487 'NG': 'Nigeria',
5488 'NU': 'Niue',
5489 'NF': 'Norfolk Island',
5490 'MP': 'Northern Mariana Islands',
5491 'NO': 'Norway',
5492 'OM': 'Oman',
5493 'PK': 'Pakistan',
5494 'PW': 'Palau',
5495 'PS': 'Palestine, State of',
5496 'PA': 'Panama',
5497 'PG': 'Papua New Guinea',
5498 'PY': 'Paraguay',
5499 'PE': 'Peru',
5500 'PH': 'Philippines',
5501 'PN': 'Pitcairn',
5502 'PL': 'Poland',
5503 'PT': 'Portugal',
5504 'PR': 'Puerto Rico',
5505 'QA': 'Qatar',
5506 'RE': 'Réunion',
5507 'RO': 'Romania',
5508 'RU': 'Russian Federation',
5509 'RW': 'Rwanda',
5510 'BL': 'Saint Barthélemy',
5511 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5512 'KN': 'Saint Kitts and Nevis',
5513 'LC': 'Saint Lucia',
5514 'MF': 'Saint Martin (French part)',
5515 'PM': 'Saint Pierre and Miquelon',
5516 'VC': 'Saint Vincent and the Grenadines',
5517 'WS': 'Samoa',
5518 'SM': 'San Marino',
5519 'ST': 'Sao Tome and Principe',
5520 'SA': 'Saudi Arabia',
5521 'SN': 'Senegal',
5522 'RS': 'Serbia',
5523 'SC': 'Seychelles',
5524 'SL': 'Sierra Leone',
5525 'SG': 'Singapore',
5526 'SX': 'Sint Maarten (Dutch part)',
5527 'SK': 'Slovakia',
5528 'SI': 'Slovenia',
5529 'SB': 'Solomon Islands',
5530 'SO': 'Somalia',
5531 'ZA': 'South Africa',
5532 'GS': 'South Georgia and the South Sandwich Islands',
5533 'SS': 'South Sudan',
5534 'ES': 'Spain',
5535 'LK': 'Sri Lanka',
5536 'SD': 'Sudan',
5537 'SR': 'Suriname',
5538 'SJ': 'Svalbard and Jan Mayen',
5539 'SZ': 'Swaziland',
5540 'SE': 'Sweden',
5541 'CH': 'Switzerland',
5542 'SY': 'Syrian Arab Republic',
5543 'TW': 'Taiwan, Province of China',
5544 'TJ': 'Tajikistan',
5545 'TZ': 'Tanzania, United Republic of',
5546 'TH': 'Thailand',
5547 'TL': 'Timor-Leste',
5548 'TG': 'Togo',
5549 'TK': 'Tokelau',
5550 'TO': 'Tonga',
5551 'TT': 'Trinidad and Tobago',
5552 'TN': 'Tunisia',
5553 'TR': 'Turkey',
5554 'TM': 'Turkmenistan',
5555 'TC': 'Turks and Caicos Islands',
5556 'TV': 'Tuvalu',
5557 'UG': 'Uganda',
5558 'UA': 'Ukraine',
5559 'AE': 'United Arab Emirates',
5560 'GB': 'United Kingdom',
5561 'US': 'United States',
5562 'UM': 'United States Minor Outlying Islands',
5563 'UY': 'Uruguay',
5564 'UZ': 'Uzbekistan',
5565 'VU': 'Vanuatu',
5566 'VE': 'Venezuela, Bolivarian Republic of',
5567 'VN': 'Viet Nam',
5568 'VG': 'Virgin Islands, British',
5569 'VI': 'Virgin Islands, U.S.',
5570 'WF': 'Wallis and Futuna',
5571 'EH': 'Western Sahara',
5572 'YE': 'Yemen',
5573 'ZM': 'Zambia',
5574 'ZW': 'Zimbabwe',
5575 }
5576
5577 @classmethod
5578 def short2full(cls, code):
5579 """Convert an ISO 3166-2 country code to the corresponding full name"""
5580 return cls._country_map.get(code.upper())
5581
5582
773f291d
S
5583class GeoUtils(object):
5584 # Major IPv4 address blocks per country
5585 _country_ip_map = {
53896ca5 5586 'AD': '46.172.224.0/19',
773f291d
S
5587 'AE': '94.200.0.0/13',
5588 'AF': '149.54.0.0/17',
5589 'AG': '209.59.64.0/18',
5590 'AI': '204.14.248.0/21',
5591 'AL': '46.99.0.0/16',
5592 'AM': '46.70.0.0/15',
5593 'AO': '105.168.0.0/13',
53896ca5
S
5594 'AP': '182.50.184.0/21',
5595 'AQ': '23.154.160.0/24',
773f291d
S
5596 'AR': '181.0.0.0/12',
5597 'AS': '202.70.112.0/20',
53896ca5 5598 'AT': '77.116.0.0/14',
773f291d
S
5599 'AU': '1.128.0.0/11',
5600 'AW': '181.41.0.0/18',
53896ca5
S
5601 'AX': '185.217.4.0/22',
5602 'AZ': '5.197.0.0/16',
773f291d
S
5603 'BA': '31.176.128.0/17',
5604 'BB': '65.48.128.0/17',
5605 'BD': '114.130.0.0/16',
5606 'BE': '57.0.0.0/8',
53896ca5 5607 'BF': '102.178.0.0/15',
773f291d
S
5608 'BG': '95.42.0.0/15',
5609 'BH': '37.131.0.0/17',
5610 'BI': '154.117.192.0/18',
5611 'BJ': '137.255.0.0/16',
53896ca5 5612 'BL': '185.212.72.0/23',
773f291d
S
5613 'BM': '196.12.64.0/18',
5614 'BN': '156.31.0.0/16',
5615 'BO': '161.56.0.0/16',
5616 'BQ': '161.0.80.0/20',
53896ca5 5617 'BR': '191.128.0.0/12',
773f291d
S
5618 'BS': '24.51.64.0/18',
5619 'BT': '119.2.96.0/19',
5620 'BW': '168.167.0.0/16',
5621 'BY': '178.120.0.0/13',
5622 'BZ': '179.42.192.0/18',
5623 'CA': '99.224.0.0/11',
5624 'CD': '41.243.0.0/16',
53896ca5
S
5625 'CF': '197.242.176.0/21',
5626 'CG': '160.113.0.0/16',
773f291d 5627 'CH': '85.0.0.0/13',
53896ca5 5628 'CI': '102.136.0.0/14',
773f291d
S
5629 'CK': '202.65.32.0/19',
5630 'CL': '152.172.0.0/14',
53896ca5 5631 'CM': '102.244.0.0/14',
773f291d
S
5632 'CN': '36.128.0.0/10',
5633 'CO': '181.240.0.0/12',
5634 'CR': '201.192.0.0/12',
5635 'CU': '152.206.0.0/15',
5636 'CV': '165.90.96.0/19',
5637 'CW': '190.88.128.0/17',
53896ca5 5638 'CY': '31.153.0.0/16',
773f291d
S
5639 'CZ': '88.100.0.0/14',
5640 'DE': '53.0.0.0/8',
5641 'DJ': '197.241.0.0/17',
5642 'DK': '87.48.0.0/12',
5643 'DM': '192.243.48.0/20',
5644 'DO': '152.166.0.0/15',
5645 'DZ': '41.96.0.0/12',
5646 'EC': '186.68.0.0/15',
5647 'EE': '90.190.0.0/15',
5648 'EG': '156.160.0.0/11',
5649 'ER': '196.200.96.0/20',
5650 'ES': '88.0.0.0/11',
5651 'ET': '196.188.0.0/14',
5652 'EU': '2.16.0.0/13',
5653 'FI': '91.152.0.0/13',
5654 'FJ': '144.120.0.0/16',
53896ca5 5655 'FK': '80.73.208.0/21',
773f291d
S
5656 'FM': '119.252.112.0/20',
5657 'FO': '88.85.32.0/19',
5658 'FR': '90.0.0.0/9',
5659 'GA': '41.158.0.0/15',
5660 'GB': '25.0.0.0/8',
5661 'GD': '74.122.88.0/21',
5662 'GE': '31.146.0.0/16',
5663 'GF': '161.22.64.0/18',
5664 'GG': '62.68.160.0/19',
53896ca5
S
5665 'GH': '154.160.0.0/12',
5666 'GI': '95.164.0.0/16',
773f291d
S
5667 'GL': '88.83.0.0/19',
5668 'GM': '160.182.0.0/15',
5669 'GN': '197.149.192.0/18',
5670 'GP': '104.250.0.0/19',
5671 'GQ': '105.235.224.0/20',
5672 'GR': '94.64.0.0/13',
5673 'GT': '168.234.0.0/16',
5674 'GU': '168.123.0.0/16',
5675 'GW': '197.214.80.0/20',
5676 'GY': '181.41.64.0/18',
5677 'HK': '113.252.0.0/14',
5678 'HN': '181.210.0.0/16',
5679 'HR': '93.136.0.0/13',
5680 'HT': '148.102.128.0/17',
5681 'HU': '84.0.0.0/14',
5682 'ID': '39.192.0.0/10',
5683 'IE': '87.32.0.0/12',
5684 'IL': '79.176.0.0/13',
5685 'IM': '5.62.80.0/20',
5686 'IN': '117.192.0.0/10',
5687 'IO': '203.83.48.0/21',
5688 'IQ': '37.236.0.0/14',
5689 'IR': '2.176.0.0/12',
5690 'IS': '82.221.0.0/16',
5691 'IT': '79.0.0.0/10',
5692 'JE': '87.244.64.0/18',
5693 'JM': '72.27.0.0/17',
5694 'JO': '176.29.0.0/16',
53896ca5 5695 'JP': '133.0.0.0/8',
773f291d
S
5696 'KE': '105.48.0.0/12',
5697 'KG': '158.181.128.0/17',
5698 'KH': '36.37.128.0/17',
5699 'KI': '103.25.140.0/22',
5700 'KM': '197.255.224.0/20',
53896ca5 5701 'KN': '198.167.192.0/19',
773f291d
S
5702 'KP': '175.45.176.0/22',
5703 'KR': '175.192.0.0/10',
5704 'KW': '37.36.0.0/14',
5705 'KY': '64.96.0.0/15',
5706 'KZ': '2.72.0.0/13',
5707 'LA': '115.84.64.0/18',
5708 'LB': '178.135.0.0/16',
53896ca5 5709 'LC': '24.92.144.0/20',
773f291d
S
5710 'LI': '82.117.0.0/19',
5711 'LK': '112.134.0.0/15',
53896ca5 5712 'LR': '102.183.0.0/16',
773f291d
S
5713 'LS': '129.232.0.0/17',
5714 'LT': '78.56.0.0/13',
5715 'LU': '188.42.0.0/16',
5716 'LV': '46.109.0.0/16',
5717 'LY': '41.252.0.0/14',
5718 'MA': '105.128.0.0/11',
5719 'MC': '88.209.64.0/18',
5720 'MD': '37.246.0.0/16',
5721 'ME': '178.175.0.0/17',
5722 'MF': '74.112.232.0/21',
5723 'MG': '154.126.0.0/17',
5724 'MH': '117.103.88.0/21',
5725 'MK': '77.28.0.0/15',
5726 'ML': '154.118.128.0/18',
5727 'MM': '37.111.0.0/17',
5728 'MN': '49.0.128.0/17',
5729 'MO': '60.246.0.0/16',
5730 'MP': '202.88.64.0/20',
5731 'MQ': '109.203.224.0/19',
5732 'MR': '41.188.64.0/18',
5733 'MS': '208.90.112.0/22',
5734 'MT': '46.11.0.0/16',
5735 'MU': '105.16.0.0/12',
5736 'MV': '27.114.128.0/18',
53896ca5 5737 'MW': '102.70.0.0/15',
773f291d
S
5738 'MX': '187.192.0.0/11',
5739 'MY': '175.136.0.0/13',
5740 'MZ': '197.218.0.0/15',
5741 'NA': '41.182.0.0/16',
5742 'NC': '101.101.0.0/18',
5743 'NE': '197.214.0.0/18',
5744 'NF': '203.17.240.0/22',
5745 'NG': '105.112.0.0/12',
5746 'NI': '186.76.0.0/15',
5747 'NL': '145.96.0.0/11',
5748 'NO': '84.208.0.0/13',
5749 'NP': '36.252.0.0/15',
5750 'NR': '203.98.224.0/19',
5751 'NU': '49.156.48.0/22',
5752 'NZ': '49.224.0.0/14',
5753 'OM': '5.36.0.0/15',
5754 'PA': '186.72.0.0/15',
5755 'PE': '186.160.0.0/14',
5756 'PF': '123.50.64.0/18',
5757 'PG': '124.240.192.0/19',
5758 'PH': '49.144.0.0/13',
5759 'PK': '39.32.0.0/11',
5760 'PL': '83.0.0.0/11',
5761 'PM': '70.36.0.0/20',
5762 'PR': '66.50.0.0/16',
5763 'PS': '188.161.0.0/16',
5764 'PT': '85.240.0.0/13',
5765 'PW': '202.124.224.0/20',
5766 'PY': '181.120.0.0/14',
5767 'QA': '37.210.0.0/15',
53896ca5 5768 'RE': '102.35.0.0/16',
773f291d 5769 'RO': '79.112.0.0/13',
53896ca5 5770 'RS': '93.86.0.0/15',
773f291d 5771 'RU': '5.136.0.0/13',
53896ca5 5772 'RW': '41.186.0.0/16',
773f291d
S
5773 'SA': '188.48.0.0/13',
5774 'SB': '202.1.160.0/19',
5775 'SC': '154.192.0.0/11',
53896ca5 5776 'SD': '102.120.0.0/13',
773f291d 5777 'SE': '78.64.0.0/12',
53896ca5 5778 'SG': '8.128.0.0/10',
773f291d
S
5779 'SI': '188.196.0.0/14',
5780 'SK': '78.98.0.0/15',
53896ca5 5781 'SL': '102.143.0.0/17',
773f291d
S
5782 'SM': '89.186.32.0/19',
5783 'SN': '41.82.0.0/15',
53896ca5 5784 'SO': '154.115.192.0/18',
773f291d
S
5785 'SR': '186.179.128.0/17',
5786 'SS': '105.235.208.0/21',
5787 'ST': '197.159.160.0/19',
5788 'SV': '168.243.0.0/16',
5789 'SX': '190.102.0.0/20',
5790 'SY': '5.0.0.0/16',
5791 'SZ': '41.84.224.0/19',
5792 'TC': '65.255.48.0/20',
5793 'TD': '154.68.128.0/19',
5794 'TG': '196.168.0.0/14',
5795 'TH': '171.96.0.0/13',
5796 'TJ': '85.9.128.0/18',
5797 'TK': '27.96.24.0/21',
5798 'TL': '180.189.160.0/20',
5799 'TM': '95.85.96.0/19',
5800 'TN': '197.0.0.0/11',
5801 'TO': '175.176.144.0/21',
5802 'TR': '78.160.0.0/11',
5803 'TT': '186.44.0.0/15',
5804 'TV': '202.2.96.0/19',
5805 'TW': '120.96.0.0/11',
5806 'TZ': '156.156.0.0/14',
53896ca5
S
5807 'UA': '37.52.0.0/14',
5808 'UG': '102.80.0.0/13',
5809 'US': '6.0.0.0/8',
773f291d 5810 'UY': '167.56.0.0/13',
53896ca5 5811 'UZ': '84.54.64.0/18',
773f291d 5812 'VA': '212.77.0.0/19',
53896ca5 5813 'VC': '207.191.240.0/21',
773f291d 5814 'VE': '186.88.0.0/13',
53896ca5 5815 'VG': '66.81.192.0/20',
773f291d
S
5816 'VI': '146.226.0.0/16',
5817 'VN': '14.160.0.0/11',
5818 'VU': '202.80.32.0/20',
5819 'WF': '117.20.32.0/21',
5820 'WS': '202.4.32.0/19',
5821 'YE': '134.35.0.0/16',
5822 'YT': '41.242.116.0/22',
5823 'ZA': '41.0.0.0/11',
53896ca5
S
5824 'ZM': '102.144.0.0/13',
5825 'ZW': '102.177.192.0/18',
773f291d
S
5826 }
5827
5828 @classmethod
5f95927a
S
5829 def random_ipv4(cls, code_or_block):
5830 if len(code_or_block) == 2:
5831 block = cls._country_ip_map.get(code_or_block.upper())
5832 if not block:
5833 return None
5834 else:
5835 block = code_or_block
773f291d
S
5836 addr, preflen = block.split('/')
5837 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5838 addr_max = addr_min | (0xffffffff >> int(preflen))
18a0defa 5839 return compat_str(socket.inet_ntoa(
4248dad9 5840 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
773f291d
S
5841
5842
91410c9b 5843class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
5844 def __init__(self, proxies=None):
5845 # Set default handlers
5846 for type in ('http', 'https'):
5847 setattr(self, '%s_open' % type,
5848 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5849 meth(r, proxy, type))
38e87f6c 5850 compat_urllib_request.ProxyHandler.__init__(self, proxies)
2461f79d 5851
91410c9b 5852 def proxy_open(self, req, proxy, type):
2461f79d 5853 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
5854 if req_proxy is not None:
5855 proxy = req_proxy
2461f79d
PH
5856 del req.headers['Ytdl-request-proxy']
5857
5858 if proxy == '__noproxy__':
5859 return None # No Proxy
51fb4995 5860 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
71aff188 5861 req.add_header('Ytdl-socks-proxy', proxy)
7a5c1cfe 5862 # yt-dlp's http/https handlers do wrapping the socket with socks
71aff188 5863 return None
91410c9b
PH
5864 return compat_urllib_request.ProxyHandler.proxy_open(
5865 self, req, proxy, type)
5bc880b9
YCH
5866
5867
0a5445dd
YCH
5868# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5869# released into Public Domain
5870# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5871
5872def long_to_bytes(n, blocksize=0):
5873 """long_to_bytes(n:long, blocksize:int) : string
5874 Convert a long integer to a byte string.
5875
5876 If optional blocksize is given and greater than zero, pad the front of the
5877 byte string with binary zeros so that the length is a multiple of
5878 blocksize.
5879 """
5880 # after much testing, this algorithm was deemed to be the fastest
5881 s = b''
5882 n = int(n)
5883 while n > 0:
5884 s = compat_struct_pack('>I', n & 0xffffffff) + s
5885 n = n >> 32
5886 # strip off leading zeros
5887 for i in range(len(s)):
5888 if s[i] != b'\000'[0]:
5889 break
5890 else:
5891 # only happens when n == 0
5892 s = b'\000'
5893 i = 0
5894 s = s[i:]
5895 # add back some pad bytes. this could be done more efficiently w.r.t. the
5896 # de-padding being done above, but sigh...
5897 if blocksize > 0 and len(s) % blocksize:
5898 s = (blocksize - len(s) % blocksize) * b'\000' + s
5899 return s
5900
5901
5902def bytes_to_long(s):
5903 """bytes_to_long(string) : long
5904 Convert a byte string to a long integer.
5905
5906 This is (essentially) the inverse of long_to_bytes().
5907 """
5908 acc = 0
5909 length = len(s)
5910 if length % 4:
5911 extra = (4 - length % 4)
5912 s = b'\000' * extra + s
5913 length = length + extra
5914 for i in range(0, length, 4):
5915 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5916 return acc
5917
5918
5bc880b9
YCH
5919def ohdave_rsa_encrypt(data, exponent, modulus):
5920 '''
5921 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5922
5923 Input:
5924 data: data to encrypt, bytes-like object
5925 exponent, modulus: parameter e and N of RSA algorithm, both integer
5926 Output: hex string of encrypted data
5927
5928 Limitation: supports one block encryption only
5929 '''
5930
5931 payload = int(binascii.hexlify(data[::-1]), 16)
5932 encrypted = pow(payload, exponent, modulus)
5933 return '%x' % encrypted
81bdc8fd
YCH
5934
5935
f48409c7
YCH
5936def pkcs1pad(data, length):
5937 """
5938 Padding input data with PKCS#1 scheme
5939
5940 @param {int[]} data input data
5941 @param {int} length target length
5942 @returns {int[]} padded data
5943 """
5944 if len(data) > length - 11:
5945 raise ValueError('Input data too long for PKCS#1 padding')
5946
5947 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5948 return [0, 2] + pseudo_random + [0] + data
5949
5950
5eb6bdce 5951def encode_base_n(num, n, table=None):
59f898b7 5952 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
59f898b7
YCH
5953 if not table:
5954 table = FULL_TABLE[:n]
5955
5eb6bdce
YCH
5956 if n > len(table):
5957 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5958
5959 if num == 0:
5960 return table[0]
5961
81bdc8fd
YCH
5962 ret = ''
5963 while num:
5964 ret = table[num % n] + ret
5965 num = num // n
5966 return ret
f52354a8
YCH
5967
5968
5969def decode_packed_codes(code):
06b3fe29 5970 mobj = re.search(PACKED_CODES_RE, code)
a0566bbf 5971 obfuscated_code, base, count, symbols = mobj.groups()
f52354a8
YCH
5972 base = int(base)
5973 count = int(count)
5974 symbols = symbols.split('|')
5975 symbol_table = {}
5976
5977 while count:
5978 count -= 1
5eb6bdce 5979 base_n_count = encode_base_n(count, base)
f52354a8
YCH
5980 symbol_table[base_n_count] = symbols[count] or base_n_count
5981
5982 return re.sub(
5983 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
a0566bbf 5984 obfuscated_code)
e154c651 5985
5986
1ced2221
S
5987def caesar(s, alphabet, shift):
5988 if shift == 0:
5989 return s
5990 l = len(alphabet)
5991 return ''.join(
5992 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5993 for c in s)
5994
5995
5996def rot47(s):
5997 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5998
5999
e154c651 6000def parse_m3u8_attributes(attrib):
6001 info = {}
6002 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
6003 if val.startswith('"'):
6004 val = val[1:-1]
6005 info[key] = val
6006 return info
1143535d
YCH
6007
6008
6009def urshift(val, n):
6010 return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038
YCH
6011
6012
6013# Based on png2str() written by @gdkchan and improved by @yokrysty
067aa17e 6014# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
d3f8e038
YCH
6015def decode_png(png_data):
6016 # Reference: https://www.w3.org/TR/PNG/
6017 header = png_data[8:]
6018
6019 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
6020 raise IOError('Not a valid PNG file.')
6021
6022 int_map = {1: '>B', 2: '>H', 4: '>I'}
6023 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
6024
6025 chunks = []
6026
6027 while header:
6028 length = unpack_integer(header[:4])
6029 header = header[4:]
6030
6031 chunk_type = header[:4]
6032 header = header[4:]
6033
6034 chunk_data = header[:length]
6035 header = header[length:]
6036
6037 header = header[4:] # Skip CRC
6038
6039 chunks.append({
6040 'type': chunk_type,
6041 'length': length,
6042 'data': chunk_data
6043 })
6044
6045 ihdr = chunks[0]['data']
6046
6047 width = unpack_integer(ihdr[:4])
6048 height = unpack_integer(ihdr[4:8])
6049
6050 idat = b''
6051
6052 for chunk in chunks:
6053 if chunk['type'] == b'IDAT':
6054 idat += chunk['data']
6055
6056 if not idat:
6057 raise IOError('Unable to read PNG data.')
6058
6059 decompressed_data = bytearray(zlib.decompress(idat))
6060
6061 stride = width * 3
6062 pixels = []
6063
6064 def _get_pixel(idx):
6065 x = idx % stride
6066 y = idx // stride
6067 return pixels[y][x]
6068
6069 for y in range(height):
6070 basePos = y * (1 + stride)
6071 filter_type = decompressed_data[basePos]
6072
6073 current_row = []
6074
6075 pixels.append(current_row)
6076
6077 for x in range(stride):
6078 color = decompressed_data[1 + basePos + x]
6079 basex = y * stride + x
6080 left = 0
6081 up = 0
6082
6083 if x > 2:
6084 left = _get_pixel(basex - 3)
6085 if y > 0:
6086 up = _get_pixel(basex - stride)
6087
6088 if filter_type == 1: # Sub
6089 color = (color + left) & 0xff
6090 elif filter_type == 2: # Up
6091 color = (color + up) & 0xff
6092 elif filter_type == 3: # Average
6093 color = (color + ((left + up) >> 1)) & 0xff
6094 elif filter_type == 4: # Paeth
6095 a = left
6096 b = up
6097 c = 0
6098
6099 if x > 2 and y > 0:
6100 c = _get_pixel(basex - stride - 3)
6101
6102 p = a + b - c
6103
6104 pa = abs(p - a)
6105 pb = abs(p - b)
6106 pc = abs(p - c)
6107
6108 if pa <= pb and pa <= pc:
6109 color = (color + a) & 0xff
6110 elif pb <= pc:
6111 color = (color + b) & 0xff
6112 else:
6113 color = (color + c) & 0xff
6114
6115 current_row.append(color)
6116
6117 return width, height, pixels
efa97bdc
YCH
6118
6119
6120def write_xattr(path, key, value):
6121 # This mess below finds the best xattr tool for the job
6122 try:
6123 # try the pyxattr module...
6124 import xattr
6125
53a7e3d2
YCH
6126 if hasattr(xattr, 'set'): # pyxattr
6127 # Unicode arguments are not supported in python-pyxattr until
6128 # version 0.5.0
067aa17e 6129 # See https://github.com/ytdl-org/youtube-dl/issues/5498
53a7e3d2
YCH
6130 pyxattr_required_version = '0.5.0'
6131 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
6132 # TODO: fallback to CLI tools
6133 raise XAttrUnavailableError(
6134 'python-pyxattr is detected but is too old. '
7a5c1cfe 6135 'yt-dlp requires %s or above while your version is %s. '
53a7e3d2
YCH
6136 'Falling back to other xattr implementations' % (
6137 pyxattr_required_version, xattr.__version__))
6138
6139 setxattr = xattr.set
6140 else: # xattr
6141 setxattr = xattr.setxattr
efa97bdc
YCH
6142
6143 try:
53a7e3d2 6144 setxattr(path, key, value)
efa97bdc
YCH
6145 except EnvironmentError as e:
6146 raise XAttrMetadataError(e.errno, e.strerror)
6147
6148 except ImportError:
6149 if compat_os_name == 'nt':
6150 # Write xattrs to NTFS Alternate Data Streams:
6151 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6152 assert ':' not in key
6153 assert os.path.exists(path)
6154
6155 ads_fn = path + ':' + key
6156 try:
6157 with open(ads_fn, 'wb') as f:
6158 f.write(value)
6159 except EnvironmentError as e:
6160 raise XAttrMetadataError(e.errno, e.strerror)
6161 else:
6162 user_has_setfattr = check_executable('setfattr', ['--version'])
6163 user_has_xattr = check_executable('xattr', ['-h'])
6164
6165 if user_has_setfattr or user_has_xattr:
6166
6167 value = value.decode('utf-8')
6168 if user_has_setfattr:
6169 executable = 'setfattr'
6170 opts = ['-n', key, '-v', value]
6171 elif user_has_xattr:
6172 executable = 'xattr'
6173 opts = ['-w', key, value]
6174
3089bc74
S
6175 cmd = ([encodeFilename(executable, True)]
6176 + [encodeArgument(o) for o in opts]
6177 + [encodeFilename(path, True)])
efa97bdc
YCH
6178
6179 try:
d3c93ec2 6180 p = Popen(
efa97bdc
YCH
6181 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6182 except EnvironmentError as e:
6183 raise XAttrMetadataError(e.errno, e.strerror)
d3c93ec2 6184 stdout, stderr = p.communicate_or_kill()
efa97bdc
YCH
6185 stderr = stderr.decode('utf-8', 'replace')
6186 if p.returncode != 0:
6187 raise XAttrMetadataError(p.returncode, stderr)
6188
6189 else:
6190 # On Unix, and can't find pyxattr, setfattr, or xattr.
6191 if sys.platform.startswith('linux'):
6192 raise XAttrUnavailableError(
6193 "Couldn't find a tool to set the xattrs. "
6194 "Install either the python 'pyxattr' or 'xattr' "
6195 "modules, or the GNU 'attr' package "
6196 "(which contains the 'setfattr' tool).")
6197 else:
6198 raise XAttrUnavailableError(
6199 "Couldn't find a tool to set the xattrs. "
6200 "Install either the python 'xattr' module, "
6201 "or the 'xattr' binary.")
0c265486
YCH
6202
6203
6204def random_birthday(year_field, month_field, day_field):
aa374bc7
AS
6205 start_date = datetime.date(1950, 1, 1)
6206 end_date = datetime.date(1995, 12, 31)
6207 offset = random.randint(0, (end_date - start_date).days)
6208 random_date = start_date + datetime.timedelta(offset)
0c265486 6209 return {
aa374bc7
AS
6210 year_field: str(random_date.year),
6211 month_field: str(random_date.month),
6212 day_field: str(random_date.day),
0c265486 6213 }
732044af 6214
c76eb41b 6215
732044af 6216# Templates for internet shortcut files, which are plain text files.
6217DOT_URL_LINK_TEMPLATE = '''
6218[InternetShortcut]
6219URL=%(url)s
6220'''.lstrip()
6221
6222DOT_WEBLOC_LINK_TEMPLATE = '''
6223<?xml version="1.0" encoding="UTF-8"?>
6224<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6225<plist version="1.0">
6226<dict>
6227\t<key>URL</key>
6228\t<string>%(url)s</string>
6229</dict>
6230</plist>
6231'''.lstrip()
6232
6233DOT_DESKTOP_LINK_TEMPLATE = '''
6234[Desktop Entry]
6235Encoding=UTF-8
6236Name=%(filename)s
6237Type=Link
6238URL=%(url)s
6239Icon=text-html
6240'''.lstrip()
6241
08438d2c 6242LINK_TEMPLATES = {
6243 'url': DOT_URL_LINK_TEMPLATE,
6244 'desktop': DOT_DESKTOP_LINK_TEMPLATE,
6245 'webloc': DOT_WEBLOC_LINK_TEMPLATE,
6246}
6247
732044af 6248
6249def iri_to_uri(iri):
6250 """
6251 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6252
6253 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6254 """
6255
6256 iri_parts = compat_urllib_parse_urlparse(iri)
6257
6258 if '[' in iri_parts.netloc:
6259 raise ValueError('IPv6 URIs are not, yet, supported.')
6260 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6261
6262 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6263
6264 net_location = ''
6265 if iri_parts.username:
6266 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6267 if iri_parts.password is not None:
6268 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6269 net_location += '@'
6270
6271 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
6272 # The 'idna' encoding produces ASCII text.
6273 if iri_parts.port is not None and iri_parts.port != 80:
6274 net_location += ':' + str(iri_parts.port)
6275
6276 return compat_urllib_parse_urlunparse(
6277 (iri_parts.scheme,
6278 net_location,
6279
6280 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6281
6282 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6283 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6284
6285 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6286 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6287
6288 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6289
6290 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6291
6292
6293def to_high_limit_path(path):
6294 if sys.platform in ['win32', 'cygwin']:
6295 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6296 return r'\\?\ '.rstrip() + os.path.abspath(path)
6297
6298 return path
76d321f6 6299
c76eb41b 6300
b868936c 6301def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
6302 if field is None:
6303 val = obj if obj is not None else default
6304 else:
6305 val = obj.get(field, default)
76d321f6 6306 if func and val not in ignore:
6307 val = func(val)
6308 return template % val if val not in ignore else default
00dd0cd5 6309
6310
6311def clean_podcast_url(url):
6312 return re.sub(r'''(?x)
6313 (?:
6314 (?:
6315 chtbl\.com/track|
6316 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6317 play\.podtrac\.com
6318 )/[^/]+|
6319 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6320 flex\.acast\.com|
6321 pd(?:
6322 cn\.co| # https://podcorn.com/analytics-prefix/
6323 st\.fm # https://podsights.com/docs/
6324 )/e
6325 )/''', '', url)
ffcb8191
THD
6326
6327
6328_HEX_TABLE = '0123456789abcdef'
6329
6330
6331def random_uuidv4():
6332 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
0202b52a 6333
6334
6335def make_dir(path, to_screen=None):
6336 try:
6337 dn = os.path.dirname(path)
6338 if dn and not os.path.exists(dn):
6339 os.makedirs(dn)
6340 return True
6341 except (OSError, IOError) as err:
6342 if callable(to_screen) is not None:
6343 to_screen('unable to create directory ' + error_to_compat_str(err))
6344 return False
f74980cb 6345
6346
6347def get_executable_path():
c552ae88 6348 from zipimport import zipimporter
6349 if hasattr(sys, 'frozen'): # Running from PyInstaller
6350 path = os.path.dirname(sys.executable)
6351 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
6352 path = os.path.join(os.path.dirname(__file__), '../..')
6353 else:
6354 path = os.path.join(os.path.dirname(__file__), '..')
f74980cb 6355 return os.path.abspath(path)
6356
6357
2f567473 6358def load_plugins(name, suffix, namespace):
3ae5e797 6359 classes = {}
f74980cb 6360 try:
019a94f7
ÁS
6361 plugins_spec = importlib.util.spec_from_file_location(
6362 name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
6363 plugins = importlib.util.module_from_spec(plugins_spec)
6364 sys.modules[plugins_spec.name] = plugins
6365 plugins_spec.loader.exec_module(plugins)
f74980cb 6366 for name in dir(plugins):
2f567473 6367 if name in namespace:
6368 continue
6369 if not name.endswith(suffix):
f74980cb 6370 continue
6371 klass = getattr(plugins, name)
3ae5e797 6372 classes[name] = namespace[name] = klass
019a94f7 6373 except FileNotFoundError:
f74980cb 6374 pass
f74980cb 6375 return classes
06167fbb 6376
6377
325ebc17 6378def traverse_obj(
352d63fd 6379 obj, *path_list, default=None, expected_type=None, get_all=True,
325ebc17 6380 casesense=True, is_user_input=False, traverse_string=False):
324ad820 6381 ''' Traverse nested list/dict/tuple
8f334380 6382 @param path_list A list of paths which are checked one by one.
6383 Each path is a list of keys where each key is a string,
2614f646 6384 a function, a tuple of strings or "...".
6385 When a fuction is given, it takes the key as argument and
6386 returns whether the key matches or not. When a tuple is given,
8f334380 6387 all the keys given in the tuple are traversed, and
6388 "..." traverses all the keys in the object
325ebc17 6389 @param default Default value to return
352d63fd 6390 @param expected_type Only accept final value of this type (Can also be any callable)
6391 @param get_all Return all the values obtained from a path or only the first one
324ad820 6392 @param casesense Whether to consider dictionary keys as case sensitive
6393 @param is_user_input Whether the keys are generated from user input. If True,
6394 strings are converted to int/slice if necessary
6395 @param traverse_string Whether to traverse inside strings. If True, any
6396 non-compatible object will also be converted into a string
8f334380 6397 # TODO: Write tests
324ad820 6398 '''
325ebc17 6399 if not casesense:
dbf5416a 6400 _lower = lambda k: (k.lower() if isinstance(k, str) else k)
8f334380 6401 path_list = (map(_lower, variadic(path)) for path in path_list)
6402
6403 def _traverse_obj(obj, path, _current_depth=0):
6404 nonlocal depth
575e17a1 6405 if obj is None:
6406 return None
8f334380 6407 path = tuple(variadic(path))
6408 for i, key in enumerate(path):
6409 if isinstance(key, (list, tuple)):
6410 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
6411 key = ...
6412 if key is ...:
6413 obj = (obj.values() if isinstance(obj, dict)
6414 else obj if isinstance(obj, (list, tuple, LazyList))
6415 else str(obj) if traverse_string else [])
6416 _current_depth += 1
6417 depth = max(depth, _current_depth)
6418 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
2614f646 6419 elif callable(key):
6420 if isinstance(obj, (list, tuple, LazyList)):
6421 obj = enumerate(obj)
6422 elif isinstance(obj, dict):
6423 obj = obj.items()
6424 else:
6425 if not traverse_string:
6426 return None
6427 obj = str(obj)
6428 _current_depth += 1
6429 depth = max(depth, _current_depth)
6430 return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if key(k)]
575e17a1 6431 elif isinstance(obj, dict) and not (is_user_input and key == ':'):
325ebc17 6432 obj = (obj.get(key) if casesense or (key in obj)
6433 else next((v for k, v in obj.items() if _lower(k) == key), None))
6434 else:
6435 if is_user_input:
6436 key = (int_or_none(key) if ':' not in key
6437 else slice(*map(int_or_none, key.split(':'))))
8f334380 6438 if key == slice(None):
575e17a1 6439 return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
325ebc17 6440 if not isinstance(key, (int, slice)):
9fea350f 6441 return None
8f334380 6442 if not isinstance(obj, (list, tuple, LazyList)):
325ebc17 6443 if not traverse_string:
6444 return None
6445 obj = str(obj)
6446 try:
6447 obj = obj[key]
6448 except IndexError:
324ad820 6449 return None
325ebc17 6450 return obj
6451
352d63fd 6452 if isinstance(expected_type, type):
6453 type_test = lambda val: val if isinstance(val, expected_type) else None
6454 elif expected_type is not None:
6455 type_test = expected_type
6456 else:
6457 type_test = lambda val: val
6458
8f334380 6459 for path in path_list:
6460 depth = 0
6461 val = _traverse_obj(obj, path)
325ebc17 6462 if val is not None:
8f334380 6463 if depth:
6464 for _ in range(depth - 1):
6586bca9 6465 val = itertools.chain.from_iterable(v for v in val if v is not None)
352d63fd 6466 val = [v for v in map(type_test, val) if v is not None]
8f334380 6467 if val:
352d63fd 6468 return val if get_all else val[0]
6469 else:
6470 val = type_test(val)
6471 if val is not None:
8f334380 6472 return val
325ebc17 6473 return default
324ad820 6474
6475
6476def traverse_dict(dictn, keys, casesense=True):
6477 ''' For backward compatibility. Do not use '''
6478 return traverse_obj(dictn, keys, casesense=casesense,
6479 is_user_input=True, traverse_string=True)
6606817a 6480
6481
c634ad2a 6482def variadic(x, allowed_types=(str, bytes)):
cb89cfc1 6483 return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
bd50a52b
THD
6484
6485
49fa4d9a
N
6486# create a JSON Web Signature (jws) with HS256 algorithm
6487# the resulting format is in JWS Compact Serialization
6488# implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
6489# implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
6490def jwt_encode_hs256(payload_data, key, headers={}):
6491 header_data = {
6492 'alg': 'HS256',
6493 'typ': 'JWT',
6494 }
6495 if headers:
6496 header_data.update(headers)
6497 header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8'))
6498 payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8'))
6499 h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256)
6500 signature_b64 = base64.b64encode(h.digest())
6501 token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
6502 return token
819e0531 6503
6504
6505def supports_terminal_sequences(stream):
6506 if compat_os_name == 'nt':
d1d5c08f 6507 if get_windows_version() < (10, 0, 10586):
819e0531 6508 return False
6509 elif not os.getenv('TERM'):
6510 return False
6511 try:
6512 return stream.isatty()
6513 except BaseException:
6514 return False
6515
6516
ec11a9f4 6517_terminal_sequences_re = re.compile('\033\\[[^m]+m')
6518
6519
6520def remove_terminal_sequences(string):
6521 return _terminal_sequences_re.sub('', string)
6522
6523
6524def number_of_digits(number):
6525 return len('%d' % number)