]> jfr.im git - yt-dlp.git/blame - yt_dlp/utils.py
[FormatSort] `eac3` is better than `ac3`
[yt-dlp.git] / yt_dlp / utils.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
d77c3dfd 3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
c380cc28 10import collections
62e609ab 11import contextlib
e3946f98 12import ctypes
c496ca96
PH
13import datetime
14import email.utils
0c265486 15import email.header
f45c185f 16import errno
be4a824d 17import functools
d77c3dfd 18import gzip
49fa4d9a
N
19import hashlib
20import hmac
019a94f7 21import importlib.util
03f9daab 22import io
79a2e94e 23import itertools
f4bfd65f 24import json
d77c3dfd 25import locale
02dbf93f 26import math
347de493 27import operator
d77c3dfd 28import os
c496ca96 29import platform
773f291d 30import random
d77c3dfd 31import re
c496ca96 32import socket
79a2e94e 33import ssl
1c088fa8 34import subprocess
d77c3dfd 35import sys
181c8655 36import tempfile
c380cc28 37import time
01951dda 38import traceback
bcf89ce6 39import xml.etree.ElementTree
d77c3dfd 40import zlib
d77c3dfd 41
8c25f81b 42from .compat import (
b4a3d461 43 compat_HTMLParseError,
8bb56eee 44 compat_HTMLParser,
201c1459 45 compat_HTTPError,
8f9312c3 46 compat_basestring,
8c25f81b 47 compat_chr,
1bab3437 48 compat_cookiejar,
d7cd9a9e 49 compat_ctypes_WINFUNCTYPE,
36e6f62c 50 compat_etree_fromstring,
51098426 51 compat_expanduser,
8c25f81b 52 compat_html_entities,
55b2f099 53 compat_html_entities_html5,
be4a824d 54 compat_http_client,
42db58ec 55 compat_integer_types,
e29663c6 56 compat_numeric_types,
c86b6142 57 compat_kwargs,
efa97bdc 58 compat_os_name,
8c25f81b 59 compat_parse_qs,
702ccf2d 60 compat_shlex_quote,
8c25f81b 61 compat_str,
edaa23f8 62 compat_struct_pack,
d3f8e038 63 compat_struct_unpack,
8c25f81b
PH
64 compat_urllib_error,
65 compat_urllib_parse,
15707c7e 66 compat_urllib_parse_urlencode,
8c25f81b 67 compat_urllib_parse_urlparse,
732044af 68 compat_urllib_parse_urlunparse,
69 compat_urllib_parse_quote,
70 compat_urllib_parse_quote_plus,
7581bfc9 71 compat_urllib_parse_unquote_plus,
8c25f81b
PH
72 compat_urllib_request,
73 compat_urlparse,
810c10ba 74 compat_xpath,
8c25f81b 75)
4644ac55 76
71aff188
YCH
77from .socks import (
78 ProxyType,
79 sockssocket,
80)
81
4644ac55 82
51fb4995
YCH
83def register_socks_protocols():
84 # "Register" SOCKS protocols
d5ae6bb5
YCH
85 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
86 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
51fb4995
YCH
87 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
88 if scheme not in compat_urlparse.uses_netloc:
89 compat_urlparse.uses_netloc.append(scheme)
90
91
468e2e92
FV
92# This is not clearly defined otherwise
93compiled_regex_type = type(re.compile(''))
94
f7a147e3
S
95
96def random_user_agent():
97 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
98 _CHROME_VERSIONS = (
99 '74.0.3729.129',
100 '76.0.3780.3',
101 '76.0.3780.2',
102 '74.0.3729.128',
103 '76.0.3780.1',
104 '76.0.3780.0',
105 '75.0.3770.15',
106 '74.0.3729.127',
107 '74.0.3729.126',
108 '76.0.3779.1',
109 '76.0.3779.0',
110 '75.0.3770.14',
111 '74.0.3729.125',
112 '76.0.3778.1',
113 '76.0.3778.0',
114 '75.0.3770.13',
115 '74.0.3729.124',
116 '74.0.3729.123',
117 '73.0.3683.121',
118 '76.0.3777.1',
119 '76.0.3777.0',
120 '75.0.3770.12',
121 '74.0.3729.122',
122 '76.0.3776.4',
123 '75.0.3770.11',
124 '74.0.3729.121',
125 '76.0.3776.3',
126 '76.0.3776.2',
127 '73.0.3683.120',
128 '74.0.3729.120',
129 '74.0.3729.119',
130 '74.0.3729.118',
131 '76.0.3776.1',
132 '76.0.3776.0',
133 '76.0.3775.5',
134 '75.0.3770.10',
135 '74.0.3729.117',
136 '76.0.3775.4',
137 '76.0.3775.3',
138 '74.0.3729.116',
139 '75.0.3770.9',
140 '76.0.3775.2',
141 '76.0.3775.1',
142 '76.0.3775.0',
143 '75.0.3770.8',
144 '74.0.3729.115',
145 '74.0.3729.114',
146 '76.0.3774.1',
147 '76.0.3774.0',
148 '75.0.3770.7',
149 '74.0.3729.113',
150 '74.0.3729.112',
151 '74.0.3729.111',
152 '76.0.3773.1',
153 '76.0.3773.0',
154 '75.0.3770.6',
155 '74.0.3729.110',
156 '74.0.3729.109',
157 '76.0.3772.1',
158 '76.0.3772.0',
159 '75.0.3770.5',
160 '74.0.3729.108',
161 '74.0.3729.107',
162 '76.0.3771.1',
163 '76.0.3771.0',
164 '75.0.3770.4',
165 '74.0.3729.106',
166 '74.0.3729.105',
167 '75.0.3770.3',
168 '74.0.3729.104',
169 '74.0.3729.103',
170 '74.0.3729.102',
171 '75.0.3770.2',
172 '74.0.3729.101',
173 '75.0.3770.1',
174 '75.0.3770.0',
175 '74.0.3729.100',
176 '75.0.3769.5',
177 '75.0.3769.4',
178 '74.0.3729.99',
179 '75.0.3769.3',
180 '75.0.3769.2',
181 '75.0.3768.6',
182 '74.0.3729.98',
183 '75.0.3769.1',
184 '75.0.3769.0',
185 '74.0.3729.97',
186 '73.0.3683.119',
187 '73.0.3683.118',
188 '74.0.3729.96',
189 '75.0.3768.5',
190 '75.0.3768.4',
191 '75.0.3768.3',
192 '75.0.3768.2',
193 '74.0.3729.95',
194 '74.0.3729.94',
195 '75.0.3768.1',
196 '75.0.3768.0',
197 '74.0.3729.93',
198 '74.0.3729.92',
199 '73.0.3683.117',
200 '74.0.3729.91',
201 '75.0.3766.3',
202 '74.0.3729.90',
203 '75.0.3767.2',
204 '75.0.3767.1',
205 '75.0.3767.0',
206 '74.0.3729.89',
207 '73.0.3683.116',
208 '75.0.3766.2',
209 '74.0.3729.88',
210 '75.0.3766.1',
211 '75.0.3766.0',
212 '74.0.3729.87',
213 '73.0.3683.115',
214 '74.0.3729.86',
215 '75.0.3765.1',
216 '75.0.3765.0',
217 '74.0.3729.85',
218 '73.0.3683.114',
219 '74.0.3729.84',
220 '75.0.3764.1',
221 '75.0.3764.0',
222 '74.0.3729.83',
223 '73.0.3683.113',
224 '75.0.3763.2',
225 '75.0.3761.4',
226 '74.0.3729.82',
227 '75.0.3763.1',
228 '75.0.3763.0',
229 '74.0.3729.81',
230 '73.0.3683.112',
231 '75.0.3762.1',
232 '75.0.3762.0',
233 '74.0.3729.80',
234 '75.0.3761.3',
235 '74.0.3729.79',
236 '73.0.3683.111',
237 '75.0.3761.2',
238 '74.0.3729.78',
239 '74.0.3729.77',
240 '75.0.3761.1',
241 '75.0.3761.0',
242 '73.0.3683.110',
243 '74.0.3729.76',
244 '74.0.3729.75',
245 '75.0.3760.0',
246 '74.0.3729.74',
247 '75.0.3759.8',
248 '75.0.3759.7',
249 '75.0.3759.6',
250 '74.0.3729.73',
251 '75.0.3759.5',
252 '74.0.3729.72',
253 '73.0.3683.109',
254 '75.0.3759.4',
255 '75.0.3759.3',
256 '74.0.3729.71',
257 '75.0.3759.2',
258 '74.0.3729.70',
259 '73.0.3683.108',
260 '74.0.3729.69',
261 '75.0.3759.1',
262 '75.0.3759.0',
263 '74.0.3729.68',
264 '73.0.3683.107',
265 '74.0.3729.67',
266 '75.0.3758.1',
267 '75.0.3758.0',
268 '74.0.3729.66',
269 '73.0.3683.106',
270 '74.0.3729.65',
271 '75.0.3757.1',
272 '75.0.3757.0',
273 '74.0.3729.64',
274 '73.0.3683.105',
275 '74.0.3729.63',
276 '75.0.3756.1',
277 '75.0.3756.0',
278 '74.0.3729.62',
279 '73.0.3683.104',
280 '75.0.3755.3',
281 '75.0.3755.2',
282 '73.0.3683.103',
283 '75.0.3755.1',
284 '75.0.3755.0',
285 '74.0.3729.61',
286 '73.0.3683.102',
287 '74.0.3729.60',
288 '75.0.3754.2',
289 '74.0.3729.59',
290 '75.0.3753.4',
291 '74.0.3729.58',
292 '75.0.3754.1',
293 '75.0.3754.0',
294 '74.0.3729.57',
295 '73.0.3683.101',
296 '75.0.3753.3',
297 '75.0.3752.2',
298 '75.0.3753.2',
299 '74.0.3729.56',
300 '75.0.3753.1',
301 '75.0.3753.0',
302 '74.0.3729.55',
303 '73.0.3683.100',
304 '74.0.3729.54',
305 '75.0.3752.1',
306 '75.0.3752.0',
307 '74.0.3729.53',
308 '73.0.3683.99',
309 '74.0.3729.52',
310 '75.0.3751.1',
311 '75.0.3751.0',
312 '74.0.3729.51',
313 '73.0.3683.98',
314 '74.0.3729.50',
315 '75.0.3750.0',
316 '74.0.3729.49',
317 '74.0.3729.48',
318 '74.0.3729.47',
319 '75.0.3749.3',
320 '74.0.3729.46',
321 '73.0.3683.97',
322 '75.0.3749.2',
323 '74.0.3729.45',
324 '75.0.3749.1',
325 '75.0.3749.0',
326 '74.0.3729.44',
327 '73.0.3683.96',
328 '74.0.3729.43',
329 '74.0.3729.42',
330 '75.0.3748.1',
331 '75.0.3748.0',
332 '74.0.3729.41',
333 '75.0.3747.1',
334 '73.0.3683.95',
335 '75.0.3746.4',
336 '74.0.3729.40',
337 '74.0.3729.39',
338 '75.0.3747.0',
339 '75.0.3746.3',
340 '75.0.3746.2',
341 '74.0.3729.38',
342 '75.0.3746.1',
343 '75.0.3746.0',
344 '74.0.3729.37',
345 '73.0.3683.94',
346 '75.0.3745.5',
347 '75.0.3745.4',
348 '75.0.3745.3',
349 '75.0.3745.2',
350 '74.0.3729.36',
351 '75.0.3745.1',
352 '75.0.3745.0',
353 '75.0.3744.2',
354 '74.0.3729.35',
355 '73.0.3683.93',
356 '74.0.3729.34',
357 '75.0.3744.1',
358 '75.0.3744.0',
359 '74.0.3729.33',
360 '73.0.3683.92',
361 '74.0.3729.32',
362 '74.0.3729.31',
363 '73.0.3683.91',
364 '75.0.3741.2',
365 '75.0.3740.5',
366 '74.0.3729.30',
367 '75.0.3741.1',
368 '75.0.3741.0',
369 '74.0.3729.29',
370 '75.0.3740.4',
371 '73.0.3683.90',
372 '74.0.3729.28',
373 '75.0.3740.3',
374 '73.0.3683.89',
375 '75.0.3740.2',
376 '74.0.3729.27',
377 '75.0.3740.1',
378 '75.0.3740.0',
379 '74.0.3729.26',
380 '73.0.3683.88',
381 '73.0.3683.87',
382 '74.0.3729.25',
383 '75.0.3739.1',
384 '75.0.3739.0',
385 '73.0.3683.86',
386 '74.0.3729.24',
387 '73.0.3683.85',
388 '75.0.3738.4',
389 '75.0.3738.3',
390 '75.0.3738.2',
391 '75.0.3738.1',
392 '75.0.3738.0',
393 '74.0.3729.23',
394 '73.0.3683.84',
395 '74.0.3729.22',
396 '74.0.3729.21',
397 '75.0.3737.1',
398 '75.0.3737.0',
399 '74.0.3729.20',
400 '73.0.3683.83',
401 '74.0.3729.19',
402 '75.0.3736.1',
403 '75.0.3736.0',
404 '74.0.3729.18',
405 '73.0.3683.82',
406 '74.0.3729.17',
407 '75.0.3735.1',
408 '75.0.3735.0',
409 '74.0.3729.16',
410 '73.0.3683.81',
411 '75.0.3734.1',
412 '75.0.3734.0',
413 '74.0.3729.15',
414 '73.0.3683.80',
415 '74.0.3729.14',
416 '75.0.3733.1',
417 '75.0.3733.0',
418 '75.0.3732.1',
419 '74.0.3729.13',
420 '74.0.3729.12',
421 '73.0.3683.79',
422 '74.0.3729.11',
423 '75.0.3732.0',
424 '74.0.3729.10',
425 '73.0.3683.78',
426 '74.0.3729.9',
427 '74.0.3729.8',
428 '74.0.3729.7',
429 '75.0.3731.3',
430 '75.0.3731.2',
431 '75.0.3731.0',
432 '74.0.3729.6',
433 '73.0.3683.77',
434 '73.0.3683.76',
435 '75.0.3730.5',
436 '75.0.3730.4',
437 '73.0.3683.75',
438 '74.0.3729.5',
439 '73.0.3683.74',
440 '75.0.3730.3',
441 '75.0.3730.2',
442 '74.0.3729.4',
443 '73.0.3683.73',
444 '73.0.3683.72',
445 '75.0.3730.1',
446 '75.0.3730.0',
447 '74.0.3729.3',
448 '73.0.3683.71',
449 '74.0.3729.2',
450 '73.0.3683.70',
451 '74.0.3729.1',
452 '74.0.3729.0',
453 '74.0.3726.4',
454 '73.0.3683.69',
455 '74.0.3726.3',
456 '74.0.3728.0',
457 '74.0.3726.2',
458 '73.0.3683.68',
459 '74.0.3726.1',
460 '74.0.3726.0',
461 '74.0.3725.4',
462 '73.0.3683.67',
463 '73.0.3683.66',
464 '74.0.3725.3',
465 '74.0.3725.2',
466 '74.0.3725.1',
467 '74.0.3724.8',
468 '74.0.3725.0',
469 '73.0.3683.65',
470 '74.0.3724.7',
471 '74.0.3724.6',
472 '74.0.3724.5',
473 '74.0.3724.4',
474 '74.0.3724.3',
475 '74.0.3724.2',
476 '74.0.3724.1',
477 '74.0.3724.0',
478 '73.0.3683.64',
479 '74.0.3723.1',
480 '74.0.3723.0',
481 '73.0.3683.63',
482 '74.0.3722.1',
483 '74.0.3722.0',
484 '73.0.3683.62',
485 '74.0.3718.9',
486 '74.0.3702.3',
487 '74.0.3721.3',
488 '74.0.3721.2',
489 '74.0.3721.1',
490 '74.0.3721.0',
491 '74.0.3720.6',
492 '73.0.3683.61',
493 '72.0.3626.122',
494 '73.0.3683.60',
495 '74.0.3720.5',
496 '72.0.3626.121',
497 '74.0.3718.8',
498 '74.0.3720.4',
499 '74.0.3720.3',
500 '74.0.3718.7',
501 '74.0.3720.2',
502 '74.0.3720.1',
503 '74.0.3720.0',
504 '74.0.3718.6',
505 '74.0.3719.5',
506 '73.0.3683.59',
507 '74.0.3718.5',
508 '74.0.3718.4',
509 '74.0.3719.4',
510 '74.0.3719.3',
511 '74.0.3719.2',
512 '74.0.3719.1',
513 '73.0.3683.58',
514 '74.0.3719.0',
515 '73.0.3683.57',
516 '73.0.3683.56',
517 '74.0.3718.3',
518 '73.0.3683.55',
519 '74.0.3718.2',
520 '74.0.3718.1',
521 '74.0.3718.0',
522 '73.0.3683.54',
523 '74.0.3717.2',
524 '73.0.3683.53',
525 '74.0.3717.1',
526 '74.0.3717.0',
527 '73.0.3683.52',
528 '74.0.3716.1',
529 '74.0.3716.0',
530 '73.0.3683.51',
531 '74.0.3715.1',
532 '74.0.3715.0',
533 '73.0.3683.50',
534 '74.0.3711.2',
535 '74.0.3714.2',
536 '74.0.3713.3',
537 '74.0.3714.1',
538 '74.0.3714.0',
539 '73.0.3683.49',
540 '74.0.3713.1',
541 '74.0.3713.0',
542 '72.0.3626.120',
543 '73.0.3683.48',
544 '74.0.3712.2',
545 '74.0.3712.1',
546 '74.0.3712.0',
547 '73.0.3683.47',
548 '72.0.3626.119',
549 '73.0.3683.46',
550 '74.0.3710.2',
551 '72.0.3626.118',
552 '74.0.3711.1',
553 '74.0.3711.0',
554 '73.0.3683.45',
555 '72.0.3626.117',
556 '74.0.3710.1',
557 '74.0.3710.0',
558 '73.0.3683.44',
559 '72.0.3626.116',
560 '74.0.3709.1',
561 '74.0.3709.0',
562 '74.0.3704.9',
563 '73.0.3683.43',
564 '72.0.3626.115',
565 '74.0.3704.8',
566 '74.0.3704.7',
567 '74.0.3708.0',
568 '74.0.3706.7',
569 '74.0.3704.6',
570 '73.0.3683.42',
571 '72.0.3626.114',
572 '74.0.3706.6',
573 '72.0.3626.113',
574 '74.0.3704.5',
575 '74.0.3706.5',
576 '74.0.3706.4',
577 '74.0.3706.3',
578 '74.0.3706.2',
579 '74.0.3706.1',
580 '74.0.3706.0',
581 '73.0.3683.41',
582 '72.0.3626.112',
583 '74.0.3705.1',
584 '74.0.3705.0',
585 '73.0.3683.40',
586 '72.0.3626.111',
587 '73.0.3683.39',
588 '74.0.3704.4',
589 '73.0.3683.38',
590 '74.0.3704.3',
591 '74.0.3704.2',
592 '74.0.3704.1',
593 '74.0.3704.0',
594 '73.0.3683.37',
595 '72.0.3626.110',
596 '72.0.3626.109',
597 '74.0.3703.3',
598 '74.0.3703.2',
599 '73.0.3683.36',
600 '74.0.3703.1',
601 '74.0.3703.0',
602 '73.0.3683.35',
603 '72.0.3626.108',
604 '74.0.3702.2',
605 '74.0.3699.3',
606 '74.0.3702.1',
607 '74.0.3702.0',
608 '73.0.3683.34',
609 '72.0.3626.107',
610 '73.0.3683.33',
611 '74.0.3701.1',
612 '74.0.3701.0',
613 '73.0.3683.32',
614 '73.0.3683.31',
615 '72.0.3626.105',
616 '74.0.3700.1',
617 '74.0.3700.0',
618 '73.0.3683.29',
619 '72.0.3626.103',
620 '74.0.3699.2',
621 '74.0.3699.1',
622 '74.0.3699.0',
623 '73.0.3683.28',
624 '72.0.3626.102',
625 '73.0.3683.27',
626 '73.0.3683.26',
627 '74.0.3698.0',
628 '74.0.3696.2',
629 '72.0.3626.101',
630 '73.0.3683.25',
631 '74.0.3696.1',
632 '74.0.3696.0',
633 '74.0.3694.8',
634 '72.0.3626.100',
635 '74.0.3694.7',
636 '74.0.3694.6',
637 '74.0.3694.5',
638 '74.0.3694.4',
639 '72.0.3626.99',
640 '72.0.3626.98',
641 '74.0.3694.3',
642 '73.0.3683.24',
643 '72.0.3626.97',
644 '72.0.3626.96',
645 '72.0.3626.95',
646 '73.0.3683.23',
647 '72.0.3626.94',
648 '73.0.3683.22',
649 '73.0.3683.21',
650 '72.0.3626.93',
651 '74.0.3694.2',
652 '72.0.3626.92',
653 '74.0.3694.1',
654 '74.0.3694.0',
655 '74.0.3693.6',
656 '73.0.3683.20',
657 '72.0.3626.91',
658 '74.0.3693.5',
659 '74.0.3693.4',
660 '74.0.3693.3',
661 '74.0.3693.2',
662 '73.0.3683.19',
663 '74.0.3693.1',
664 '74.0.3693.0',
665 '73.0.3683.18',
666 '72.0.3626.90',
667 '74.0.3692.1',
668 '74.0.3692.0',
669 '73.0.3683.17',
670 '72.0.3626.89',
671 '74.0.3687.3',
672 '74.0.3691.1',
673 '74.0.3691.0',
674 '73.0.3683.16',
675 '72.0.3626.88',
676 '72.0.3626.87',
677 '73.0.3683.15',
678 '74.0.3690.1',
679 '74.0.3690.0',
680 '73.0.3683.14',
681 '72.0.3626.86',
682 '73.0.3683.13',
683 '73.0.3683.12',
684 '74.0.3689.1',
685 '74.0.3689.0',
686 '73.0.3683.11',
687 '72.0.3626.85',
688 '73.0.3683.10',
689 '72.0.3626.84',
690 '73.0.3683.9',
691 '74.0.3688.1',
692 '74.0.3688.0',
693 '73.0.3683.8',
694 '72.0.3626.83',
695 '74.0.3687.2',
696 '74.0.3687.1',
697 '74.0.3687.0',
698 '73.0.3683.7',
699 '72.0.3626.82',
700 '74.0.3686.4',
701 '72.0.3626.81',
702 '74.0.3686.3',
703 '74.0.3686.2',
704 '74.0.3686.1',
705 '74.0.3686.0',
706 '73.0.3683.6',
707 '72.0.3626.80',
708 '74.0.3685.1',
709 '74.0.3685.0',
710 '73.0.3683.5',
711 '72.0.3626.79',
712 '74.0.3684.1',
713 '74.0.3684.0',
714 '73.0.3683.4',
715 '72.0.3626.78',
716 '72.0.3626.77',
717 '73.0.3683.3',
718 '73.0.3683.2',
719 '72.0.3626.76',
720 '73.0.3683.1',
721 '73.0.3683.0',
722 '72.0.3626.75',
723 '71.0.3578.141',
724 '73.0.3682.1',
725 '73.0.3682.0',
726 '72.0.3626.74',
727 '71.0.3578.140',
728 '73.0.3681.4',
729 '73.0.3681.3',
730 '73.0.3681.2',
731 '73.0.3681.1',
732 '73.0.3681.0',
733 '72.0.3626.73',
734 '71.0.3578.139',
735 '72.0.3626.72',
736 '72.0.3626.71',
737 '73.0.3680.1',
738 '73.0.3680.0',
739 '72.0.3626.70',
740 '71.0.3578.138',
741 '73.0.3678.2',
742 '73.0.3679.1',
743 '73.0.3679.0',
744 '72.0.3626.69',
745 '71.0.3578.137',
746 '73.0.3678.1',
747 '73.0.3678.0',
748 '71.0.3578.136',
749 '73.0.3677.1',
750 '73.0.3677.0',
751 '72.0.3626.68',
752 '72.0.3626.67',
753 '71.0.3578.135',
754 '73.0.3676.1',
755 '73.0.3676.0',
756 '73.0.3674.2',
757 '72.0.3626.66',
758 '71.0.3578.134',
759 '73.0.3674.1',
760 '73.0.3674.0',
761 '72.0.3626.65',
762 '71.0.3578.133',
763 '73.0.3673.2',
764 '73.0.3673.1',
765 '73.0.3673.0',
766 '72.0.3626.64',
767 '71.0.3578.132',
768 '72.0.3626.63',
769 '72.0.3626.62',
770 '72.0.3626.61',
771 '72.0.3626.60',
772 '73.0.3672.1',
773 '73.0.3672.0',
774 '72.0.3626.59',
775 '71.0.3578.131',
776 '73.0.3671.3',
777 '73.0.3671.2',
778 '73.0.3671.1',
779 '73.0.3671.0',
780 '72.0.3626.58',
781 '71.0.3578.130',
782 '73.0.3670.1',
783 '73.0.3670.0',
784 '72.0.3626.57',
785 '71.0.3578.129',
786 '73.0.3669.1',
787 '73.0.3669.0',
788 '72.0.3626.56',
789 '71.0.3578.128',
790 '73.0.3668.2',
791 '73.0.3668.1',
792 '73.0.3668.0',
793 '72.0.3626.55',
794 '71.0.3578.127',
795 '73.0.3667.2',
796 '73.0.3667.1',
797 '73.0.3667.0',
798 '72.0.3626.54',
799 '71.0.3578.126',
800 '73.0.3666.1',
801 '73.0.3666.0',
802 '72.0.3626.53',
803 '71.0.3578.125',
804 '73.0.3665.4',
805 '73.0.3665.3',
806 '72.0.3626.52',
807 '73.0.3665.2',
808 '73.0.3664.4',
809 '73.0.3665.1',
810 '73.0.3665.0',
811 '72.0.3626.51',
812 '71.0.3578.124',
813 '72.0.3626.50',
814 '73.0.3664.3',
815 '73.0.3664.2',
816 '73.0.3664.1',
817 '73.0.3664.0',
818 '73.0.3663.2',
819 '72.0.3626.49',
820 '71.0.3578.123',
821 '73.0.3663.1',
822 '73.0.3663.0',
823 '72.0.3626.48',
824 '71.0.3578.122',
825 '73.0.3662.1',
826 '73.0.3662.0',
827 '72.0.3626.47',
828 '71.0.3578.121',
829 '73.0.3661.1',
830 '72.0.3626.46',
831 '73.0.3661.0',
832 '72.0.3626.45',
833 '71.0.3578.120',
834 '73.0.3660.2',
835 '73.0.3660.1',
836 '73.0.3660.0',
837 '72.0.3626.44',
838 '71.0.3578.119',
839 '73.0.3659.1',
840 '73.0.3659.0',
841 '72.0.3626.43',
842 '71.0.3578.118',
843 '73.0.3658.1',
844 '73.0.3658.0',
845 '72.0.3626.42',
846 '71.0.3578.117',
847 '73.0.3657.1',
848 '73.0.3657.0',
849 '72.0.3626.41',
850 '71.0.3578.116',
851 '73.0.3656.1',
852 '73.0.3656.0',
853 '72.0.3626.40',
854 '71.0.3578.115',
855 '73.0.3655.1',
856 '73.0.3655.0',
857 '72.0.3626.39',
858 '71.0.3578.114',
859 '73.0.3654.1',
860 '73.0.3654.0',
861 '72.0.3626.38',
862 '71.0.3578.113',
863 '73.0.3653.1',
864 '73.0.3653.0',
865 '72.0.3626.37',
866 '71.0.3578.112',
867 '73.0.3652.1',
868 '73.0.3652.0',
869 '72.0.3626.36',
870 '71.0.3578.111',
871 '73.0.3651.1',
872 '73.0.3651.0',
873 '72.0.3626.35',
874 '71.0.3578.110',
875 '73.0.3650.1',
876 '73.0.3650.0',
877 '72.0.3626.34',
878 '71.0.3578.109',
879 '73.0.3649.1',
880 '73.0.3649.0',
881 '72.0.3626.33',
882 '71.0.3578.108',
883 '73.0.3648.2',
884 '73.0.3648.1',
885 '73.0.3648.0',
886 '72.0.3626.32',
887 '71.0.3578.107',
888 '73.0.3647.2',
889 '73.0.3647.1',
890 '73.0.3647.0',
891 '72.0.3626.31',
892 '71.0.3578.106',
893 '73.0.3635.3',
894 '73.0.3646.2',
895 '73.0.3646.1',
896 '73.0.3646.0',
897 '72.0.3626.30',
898 '71.0.3578.105',
899 '72.0.3626.29',
900 '73.0.3645.2',
901 '73.0.3645.1',
902 '73.0.3645.0',
903 '72.0.3626.28',
904 '71.0.3578.104',
905 '72.0.3626.27',
906 '72.0.3626.26',
907 '72.0.3626.25',
908 '72.0.3626.24',
909 '73.0.3644.0',
910 '73.0.3643.2',
911 '72.0.3626.23',
912 '71.0.3578.103',
913 '73.0.3643.1',
914 '73.0.3643.0',
915 '72.0.3626.22',
916 '71.0.3578.102',
917 '73.0.3642.1',
918 '73.0.3642.0',
919 '72.0.3626.21',
920 '71.0.3578.101',
921 '73.0.3641.1',
922 '73.0.3641.0',
923 '72.0.3626.20',
924 '71.0.3578.100',
925 '72.0.3626.19',
926 '73.0.3640.1',
927 '73.0.3640.0',
928 '72.0.3626.18',
929 '73.0.3639.1',
930 '71.0.3578.99',
931 '73.0.3639.0',
932 '72.0.3626.17',
933 '73.0.3638.2',
934 '72.0.3626.16',
935 '73.0.3638.1',
936 '73.0.3638.0',
937 '72.0.3626.15',
938 '71.0.3578.98',
939 '73.0.3635.2',
940 '71.0.3578.97',
941 '73.0.3637.1',
942 '73.0.3637.0',
943 '72.0.3626.14',
944 '71.0.3578.96',
945 '71.0.3578.95',
946 '72.0.3626.13',
947 '71.0.3578.94',
948 '73.0.3636.2',
949 '71.0.3578.93',
950 '73.0.3636.1',
951 '73.0.3636.0',
952 '72.0.3626.12',
953 '71.0.3578.92',
954 '73.0.3635.1',
955 '73.0.3635.0',
956 '72.0.3626.11',
957 '71.0.3578.91',
958 '73.0.3634.2',
959 '73.0.3634.1',
960 '73.0.3634.0',
961 '72.0.3626.10',
962 '71.0.3578.90',
963 '71.0.3578.89',
964 '73.0.3633.2',
965 '73.0.3633.1',
966 '73.0.3633.0',
967 '72.0.3610.4',
968 '72.0.3626.9',
969 '71.0.3578.88',
970 '73.0.3632.5',
971 '73.0.3632.4',
972 '73.0.3632.3',
973 '73.0.3632.2',
974 '73.0.3632.1',
975 '73.0.3632.0',
976 '72.0.3626.8',
977 '71.0.3578.87',
978 '73.0.3631.2',
979 '73.0.3631.1',
980 '73.0.3631.0',
981 '72.0.3626.7',
982 '71.0.3578.86',
983 '72.0.3626.6',
984 '73.0.3630.1',
985 '73.0.3630.0',
986 '72.0.3626.5',
987 '71.0.3578.85',
988 '72.0.3626.4',
989 '73.0.3628.3',
990 '73.0.3628.2',
991 '73.0.3629.1',
992 '73.0.3629.0',
993 '72.0.3626.3',
994 '71.0.3578.84',
995 '73.0.3628.1',
996 '73.0.3628.0',
997 '71.0.3578.83',
998 '73.0.3627.1',
999 '73.0.3627.0',
1000 '72.0.3626.2',
1001 '71.0.3578.82',
1002 '71.0.3578.81',
1003 '71.0.3578.80',
1004 '72.0.3626.1',
1005 '72.0.3626.0',
1006 '71.0.3578.79',
1007 '70.0.3538.124',
1008 '71.0.3578.78',
1009 '72.0.3623.4',
1010 '72.0.3625.2',
1011 '72.0.3625.1',
1012 '72.0.3625.0',
1013 '71.0.3578.77',
1014 '70.0.3538.123',
1015 '72.0.3624.4',
1016 '72.0.3624.3',
1017 '72.0.3624.2',
1018 '71.0.3578.76',
1019 '72.0.3624.1',
1020 '72.0.3624.0',
1021 '72.0.3623.3',
1022 '71.0.3578.75',
1023 '70.0.3538.122',
1024 '71.0.3578.74',
1025 '72.0.3623.2',
1026 '72.0.3610.3',
1027 '72.0.3623.1',
1028 '72.0.3623.0',
1029 '72.0.3622.3',
1030 '72.0.3622.2',
1031 '71.0.3578.73',
1032 '70.0.3538.121',
1033 '72.0.3622.1',
1034 '72.0.3622.0',
1035 '71.0.3578.72',
1036 '70.0.3538.120',
1037 '72.0.3621.1',
1038 '72.0.3621.0',
1039 '71.0.3578.71',
1040 '70.0.3538.119',
1041 '72.0.3620.1',
1042 '72.0.3620.0',
1043 '71.0.3578.70',
1044 '70.0.3538.118',
1045 '71.0.3578.69',
1046 '72.0.3619.1',
1047 '72.0.3619.0',
1048 '71.0.3578.68',
1049 '70.0.3538.117',
1050 '71.0.3578.67',
1051 '72.0.3618.1',
1052 '72.0.3618.0',
1053 '71.0.3578.66',
1054 '70.0.3538.116',
1055 '72.0.3617.1',
1056 '72.0.3617.0',
1057 '71.0.3578.65',
1058 '70.0.3538.115',
1059 '72.0.3602.3',
1060 '71.0.3578.64',
1061 '72.0.3616.1',
1062 '72.0.3616.0',
1063 '71.0.3578.63',
1064 '70.0.3538.114',
1065 '71.0.3578.62',
1066 '72.0.3615.1',
1067 '72.0.3615.0',
1068 '71.0.3578.61',
1069 '70.0.3538.113',
1070 '72.0.3614.1',
1071 '72.0.3614.0',
1072 '71.0.3578.60',
1073 '70.0.3538.112',
1074 '72.0.3613.1',
1075 '72.0.3613.0',
1076 '71.0.3578.59',
1077 '70.0.3538.111',
1078 '72.0.3612.2',
1079 '72.0.3612.1',
1080 '72.0.3612.0',
1081 '70.0.3538.110',
1082 '71.0.3578.58',
1083 '70.0.3538.109',
1084 '72.0.3611.2',
1085 '72.0.3611.1',
1086 '72.0.3611.0',
1087 '71.0.3578.57',
1088 '70.0.3538.108',
1089 '72.0.3610.2',
1090 '71.0.3578.56',
1091 '71.0.3578.55',
1092 '72.0.3610.1',
1093 '72.0.3610.0',
1094 '71.0.3578.54',
1095 '70.0.3538.107',
1096 '71.0.3578.53',
1097 '72.0.3609.3',
1098 '71.0.3578.52',
1099 '72.0.3609.2',
1100 '71.0.3578.51',
1101 '72.0.3608.5',
1102 '72.0.3609.1',
1103 '72.0.3609.0',
1104 '71.0.3578.50',
1105 '70.0.3538.106',
1106 '72.0.3608.4',
1107 '72.0.3608.3',
1108 '72.0.3608.2',
1109 '71.0.3578.49',
1110 '72.0.3608.1',
1111 '72.0.3608.0',
1112 '70.0.3538.105',
1113 '71.0.3578.48',
1114 '72.0.3607.1',
1115 '72.0.3607.0',
1116 '71.0.3578.47',
1117 '70.0.3538.104',
1118 '72.0.3606.2',
1119 '72.0.3606.1',
1120 '72.0.3606.0',
1121 '71.0.3578.46',
1122 '70.0.3538.103',
1123 '70.0.3538.102',
1124 '72.0.3605.3',
1125 '72.0.3605.2',
1126 '72.0.3605.1',
1127 '72.0.3605.0',
1128 '71.0.3578.45',
1129 '70.0.3538.101',
1130 '71.0.3578.44',
1131 '71.0.3578.43',
1132 '70.0.3538.100',
1133 '70.0.3538.99',
1134 '71.0.3578.42',
1135 '72.0.3604.1',
1136 '72.0.3604.0',
1137 '71.0.3578.41',
1138 '70.0.3538.98',
1139 '71.0.3578.40',
1140 '72.0.3603.2',
1141 '72.0.3603.1',
1142 '72.0.3603.0',
1143 '71.0.3578.39',
1144 '70.0.3538.97',
1145 '72.0.3602.2',
1146 '71.0.3578.38',
1147 '71.0.3578.37',
1148 '72.0.3602.1',
1149 '72.0.3602.0',
1150 '71.0.3578.36',
1151 '70.0.3538.96',
1152 '72.0.3601.1',
1153 '72.0.3601.0',
1154 '71.0.3578.35',
1155 '70.0.3538.95',
1156 '72.0.3600.1',
1157 '72.0.3600.0',
1158 '71.0.3578.34',
1159 '70.0.3538.94',
1160 '72.0.3599.3',
1161 '72.0.3599.2',
1162 '72.0.3599.1',
1163 '72.0.3599.0',
1164 '71.0.3578.33',
1165 '70.0.3538.93',
1166 '72.0.3598.1',
1167 '72.0.3598.0',
1168 '71.0.3578.32',
1169 '70.0.3538.87',
1170 '72.0.3597.1',
1171 '72.0.3597.0',
1172 '72.0.3596.2',
1173 '71.0.3578.31',
1174 '70.0.3538.86',
1175 '71.0.3578.30',
1176 '71.0.3578.29',
1177 '72.0.3596.1',
1178 '72.0.3596.0',
1179 '71.0.3578.28',
1180 '70.0.3538.85',
1181 '72.0.3595.2',
1182 '72.0.3591.3',
1183 '72.0.3595.1',
1184 '72.0.3595.0',
1185 '71.0.3578.27',
1186 '70.0.3538.84',
1187 '72.0.3594.1',
1188 '72.0.3594.0',
1189 '71.0.3578.26',
1190 '70.0.3538.83',
1191 '72.0.3593.2',
1192 '72.0.3593.1',
1193 '72.0.3593.0',
1194 '71.0.3578.25',
1195 '70.0.3538.82',
1196 '72.0.3589.3',
1197 '72.0.3592.2',
1198 '72.0.3592.1',
1199 '72.0.3592.0',
1200 '71.0.3578.24',
1201 '72.0.3589.2',
1202 '70.0.3538.81',
1203 '70.0.3538.80',
1204 '72.0.3591.2',
1205 '72.0.3591.1',
1206 '72.0.3591.0',
1207 '71.0.3578.23',
1208 '70.0.3538.79',
1209 '71.0.3578.22',
1210 '72.0.3590.1',
1211 '72.0.3590.0',
1212 '71.0.3578.21',
1213 '70.0.3538.78',
1214 '70.0.3538.77',
1215 '72.0.3589.1',
1216 '72.0.3589.0',
1217 '71.0.3578.20',
1218 '70.0.3538.76',
1219 '71.0.3578.19',
1220 '70.0.3538.75',
1221 '72.0.3588.1',
1222 '72.0.3588.0',
1223 '71.0.3578.18',
1224 '70.0.3538.74',
1225 '72.0.3586.2',
1226 '72.0.3587.0',
1227 '71.0.3578.17',
1228 '70.0.3538.73',
1229 '72.0.3586.1',
1230 '72.0.3586.0',
1231 '71.0.3578.16',
1232 '70.0.3538.72',
1233 '72.0.3585.1',
1234 '72.0.3585.0',
1235 '71.0.3578.15',
1236 '70.0.3538.71',
1237 '71.0.3578.14',
1238 '72.0.3584.1',
1239 '72.0.3584.0',
1240 '71.0.3578.13',
1241 '70.0.3538.70',
1242 '72.0.3583.2',
1243 '71.0.3578.12',
1244 '72.0.3583.1',
1245 '72.0.3583.0',
1246 '71.0.3578.11',
1247 '70.0.3538.69',
1248 '71.0.3578.10',
1249 '72.0.3582.0',
1250 '72.0.3581.4',
1251 '71.0.3578.9',
1252 '70.0.3538.67',
1253 '72.0.3581.3',
1254 '72.0.3581.2',
1255 '72.0.3581.1',
1256 '72.0.3581.0',
1257 '71.0.3578.8',
1258 '70.0.3538.66',
1259 '72.0.3580.1',
1260 '72.0.3580.0',
1261 '71.0.3578.7',
1262 '70.0.3538.65',
1263 '71.0.3578.6',
1264 '72.0.3579.1',
1265 '72.0.3579.0',
1266 '71.0.3578.5',
1267 '70.0.3538.64',
1268 '71.0.3578.4',
1269 '71.0.3578.3',
1270 '71.0.3578.2',
1271 '71.0.3578.1',
1272 '71.0.3578.0',
1273 '70.0.3538.63',
1274 '69.0.3497.128',
1275 '70.0.3538.62',
1276 '70.0.3538.61',
1277 '70.0.3538.60',
1278 '70.0.3538.59',
1279 '71.0.3577.1',
1280 '71.0.3577.0',
1281 '70.0.3538.58',
1282 '69.0.3497.127',
1283 '71.0.3576.2',
1284 '71.0.3576.1',
1285 '71.0.3576.0',
1286 '70.0.3538.57',
1287 '70.0.3538.56',
1288 '71.0.3575.2',
1289 '70.0.3538.55',
1290 '69.0.3497.126',
1291 '70.0.3538.54',
1292 '71.0.3575.1',
1293 '71.0.3575.0',
1294 '71.0.3574.1',
1295 '71.0.3574.0',
1296 '70.0.3538.53',
1297 '69.0.3497.125',
1298 '70.0.3538.52',
1299 '71.0.3573.1',
1300 '71.0.3573.0',
1301 '70.0.3538.51',
1302 '69.0.3497.124',
1303 '71.0.3572.1',
1304 '71.0.3572.0',
1305 '70.0.3538.50',
1306 '69.0.3497.123',
1307 '71.0.3571.2',
1308 '70.0.3538.49',
1309 '69.0.3497.122',
1310 '71.0.3571.1',
1311 '71.0.3571.0',
1312 '70.0.3538.48',
1313 '69.0.3497.121',
1314 '71.0.3570.1',
1315 '71.0.3570.0',
1316 '70.0.3538.47',
1317 '69.0.3497.120',
1318 '71.0.3568.2',
1319 '71.0.3569.1',
1320 '71.0.3569.0',
1321 '70.0.3538.46',
1322 '69.0.3497.119',
1323 '70.0.3538.45',
1324 '71.0.3568.1',
1325 '71.0.3568.0',
1326 '70.0.3538.44',
1327 '69.0.3497.118',
1328 '70.0.3538.43',
1329 '70.0.3538.42',
1330 '71.0.3567.1',
1331 '71.0.3567.0',
1332 '70.0.3538.41',
1333 '69.0.3497.117',
1334 '71.0.3566.1',
1335 '71.0.3566.0',
1336 '70.0.3538.40',
1337 '69.0.3497.116',
1338 '71.0.3565.1',
1339 '71.0.3565.0',
1340 '70.0.3538.39',
1341 '69.0.3497.115',
1342 '71.0.3564.1',
1343 '71.0.3564.0',
1344 '70.0.3538.38',
1345 '69.0.3497.114',
1346 '71.0.3563.0',
1347 '71.0.3562.2',
1348 '70.0.3538.37',
1349 '69.0.3497.113',
1350 '70.0.3538.36',
1351 '70.0.3538.35',
1352 '71.0.3562.1',
1353 '71.0.3562.0',
1354 '70.0.3538.34',
1355 '69.0.3497.112',
1356 '70.0.3538.33',
1357 '71.0.3561.1',
1358 '71.0.3561.0',
1359 '70.0.3538.32',
1360 '69.0.3497.111',
1361 '71.0.3559.6',
1362 '71.0.3560.1',
1363 '71.0.3560.0',
1364 '71.0.3559.5',
1365 '71.0.3559.4',
1366 '70.0.3538.31',
1367 '69.0.3497.110',
1368 '71.0.3559.3',
1369 '70.0.3538.30',
1370 '69.0.3497.109',
1371 '71.0.3559.2',
1372 '71.0.3559.1',
1373 '71.0.3559.0',
1374 '70.0.3538.29',
1375 '69.0.3497.108',
1376 '71.0.3558.2',
1377 '71.0.3558.1',
1378 '71.0.3558.0',
1379 '70.0.3538.28',
1380 '69.0.3497.107',
1381 '71.0.3557.2',
1382 '71.0.3557.1',
1383 '71.0.3557.0',
1384 '70.0.3538.27',
1385 '69.0.3497.106',
1386 '71.0.3554.4',
1387 '70.0.3538.26',
1388 '71.0.3556.1',
1389 '71.0.3556.0',
1390 '70.0.3538.25',
1391 '71.0.3554.3',
1392 '69.0.3497.105',
1393 '71.0.3554.2',
1394 '70.0.3538.24',
1395 '69.0.3497.104',
1396 '71.0.3555.2',
1397 '70.0.3538.23',
1398 '71.0.3555.1',
1399 '71.0.3555.0',
1400 '70.0.3538.22',
1401 '69.0.3497.103',
1402 '71.0.3554.1',
1403 '71.0.3554.0',
1404 '70.0.3538.21',
1405 '69.0.3497.102',
1406 '71.0.3553.3',
1407 '70.0.3538.20',
1408 '69.0.3497.101',
1409 '71.0.3553.2',
1410 '69.0.3497.100',
1411 '71.0.3553.1',
1412 '71.0.3553.0',
1413 '70.0.3538.19',
1414 '69.0.3497.99',
1415 '69.0.3497.98',
1416 '69.0.3497.97',
1417 '71.0.3552.6',
1418 '71.0.3552.5',
1419 '71.0.3552.4',
1420 '71.0.3552.3',
1421 '71.0.3552.2',
1422 '71.0.3552.1',
1423 '71.0.3552.0',
1424 '70.0.3538.18',
1425 '69.0.3497.96',
1426 '71.0.3551.3',
1427 '71.0.3551.2',
1428 '71.0.3551.1',
1429 '71.0.3551.0',
1430 '70.0.3538.17',
1431 '69.0.3497.95',
1432 '71.0.3550.3',
1433 '71.0.3550.2',
1434 '71.0.3550.1',
1435 '71.0.3550.0',
1436 '70.0.3538.16',
1437 '69.0.3497.94',
1438 '71.0.3549.1',
1439 '71.0.3549.0',
1440 '70.0.3538.15',
1441 '69.0.3497.93',
1442 '69.0.3497.92',
1443 '71.0.3548.1',
1444 '71.0.3548.0',
1445 '70.0.3538.14',
1446 '69.0.3497.91',
1447 '71.0.3547.1',
1448 '71.0.3547.0',
1449 '70.0.3538.13',
1450 '69.0.3497.90',
1451 '71.0.3546.2',
1452 '69.0.3497.89',
1453 '71.0.3546.1',
1454 '71.0.3546.0',
1455 '70.0.3538.12',
1456 '69.0.3497.88',
1457 '71.0.3545.4',
1458 '71.0.3545.3',
1459 '71.0.3545.2',
1460 '71.0.3545.1',
1461 '71.0.3545.0',
1462 '70.0.3538.11',
1463 '69.0.3497.87',
1464 '71.0.3544.5',
1465 '71.0.3544.4',
1466 '71.0.3544.3',
1467 '71.0.3544.2',
1468 '71.0.3544.1',
1469 '71.0.3544.0',
1470 '69.0.3497.86',
1471 '70.0.3538.10',
1472 '69.0.3497.85',
1473 '70.0.3538.9',
1474 '69.0.3497.84',
1475 '71.0.3543.4',
1476 '70.0.3538.8',
1477 '71.0.3543.3',
1478 '71.0.3543.2',
1479 '71.0.3543.1',
1480 '71.0.3543.0',
1481 '70.0.3538.7',
1482 '69.0.3497.83',
1483 '71.0.3542.2',
1484 '71.0.3542.1',
1485 '71.0.3542.0',
1486 '70.0.3538.6',
1487 '69.0.3497.82',
1488 '69.0.3497.81',
1489 '71.0.3541.1',
1490 '71.0.3541.0',
1491 '70.0.3538.5',
1492 '69.0.3497.80',
1493 '71.0.3540.1',
1494 '71.0.3540.0',
1495 '70.0.3538.4',
1496 '69.0.3497.79',
1497 '70.0.3538.3',
1498 '71.0.3539.1',
1499 '71.0.3539.0',
1500 '69.0.3497.78',
1501 '68.0.3440.134',
1502 '69.0.3497.77',
1503 '70.0.3538.2',
1504 '70.0.3538.1',
1505 '70.0.3538.0',
1506 '69.0.3497.76',
1507 '68.0.3440.133',
1508 '69.0.3497.75',
1509 '70.0.3537.2',
1510 '70.0.3537.1',
1511 '70.0.3537.0',
1512 '69.0.3497.74',
1513 '68.0.3440.132',
1514 '70.0.3536.0',
1515 '70.0.3535.5',
1516 '70.0.3535.4',
1517 '70.0.3535.3',
1518 '69.0.3497.73',
1519 '68.0.3440.131',
1520 '70.0.3532.8',
1521 '70.0.3532.7',
1522 '69.0.3497.72',
1523 '69.0.3497.71',
1524 '70.0.3535.2',
1525 '70.0.3535.1',
1526 '70.0.3535.0',
1527 '69.0.3497.70',
1528 '68.0.3440.130',
1529 '69.0.3497.69',
1530 '68.0.3440.129',
1531 '70.0.3534.4',
1532 '70.0.3534.3',
1533 '70.0.3534.2',
1534 '70.0.3534.1',
1535 '70.0.3534.0',
1536 '69.0.3497.68',
1537 '68.0.3440.128',
1538 '70.0.3533.2',
1539 '70.0.3533.1',
1540 '70.0.3533.0',
1541 '69.0.3497.67',
1542 '68.0.3440.127',
1543 '70.0.3532.6',
1544 '70.0.3532.5',
1545 '70.0.3532.4',
1546 '69.0.3497.66',
1547 '68.0.3440.126',
1548 '70.0.3532.3',
1549 '70.0.3532.2',
1550 '70.0.3532.1',
1551 '69.0.3497.60',
1552 '69.0.3497.65',
1553 '69.0.3497.64',
1554 '70.0.3532.0',
1555 '70.0.3531.0',
1556 '70.0.3530.4',
1557 '70.0.3530.3',
1558 '70.0.3530.2',
1559 '69.0.3497.58',
1560 '68.0.3440.125',
1561 '69.0.3497.57',
1562 '69.0.3497.56',
1563 '69.0.3497.55',
1564 '69.0.3497.54',
1565 '70.0.3530.1',
1566 '70.0.3530.0',
1567 '69.0.3497.53',
1568 '68.0.3440.124',
1569 '69.0.3497.52',
1570 '70.0.3529.3',
1571 '70.0.3529.2',
1572 '70.0.3529.1',
1573 '70.0.3529.0',
1574 '69.0.3497.51',
1575 '70.0.3528.4',
1576 '68.0.3440.123',
1577 '70.0.3528.3',
1578 '70.0.3528.2',
1579 '70.0.3528.1',
1580 '70.0.3528.0',
1581 '69.0.3497.50',
1582 '68.0.3440.122',
1583 '70.0.3527.1',
1584 '70.0.3527.0',
1585 '69.0.3497.49',
1586 '68.0.3440.121',
1587 '70.0.3526.1',
1588 '70.0.3526.0',
1589 '68.0.3440.120',
1590 '69.0.3497.48',
1591 '69.0.3497.47',
1592 '68.0.3440.119',
1593 '68.0.3440.118',
1594 '70.0.3525.5',
1595 '70.0.3525.4',
1596 '70.0.3525.3',
1597 '68.0.3440.117',
1598 '69.0.3497.46',
1599 '70.0.3525.2',
1600 '70.0.3525.1',
1601 '70.0.3525.0',
1602 '69.0.3497.45',
1603 '68.0.3440.116',
1604 '70.0.3524.4',
1605 '70.0.3524.3',
1606 '69.0.3497.44',
1607 '70.0.3524.2',
1608 '70.0.3524.1',
1609 '70.0.3524.0',
1610 '70.0.3523.2',
1611 '69.0.3497.43',
1612 '68.0.3440.115',
1613 '70.0.3505.9',
1614 '69.0.3497.42',
1615 '70.0.3505.8',
1616 '70.0.3523.1',
1617 '70.0.3523.0',
1618 '69.0.3497.41',
1619 '68.0.3440.114',
1620 '70.0.3505.7',
1621 '69.0.3497.40',
1622 '70.0.3522.1',
1623 '70.0.3522.0',
1624 '70.0.3521.2',
1625 '69.0.3497.39',
1626 '68.0.3440.113',
1627 '70.0.3505.6',
1628 '70.0.3521.1',
1629 '70.0.3521.0',
1630 '69.0.3497.38',
1631 '68.0.3440.112',
1632 '70.0.3520.1',
1633 '70.0.3520.0',
1634 '69.0.3497.37',
1635 '68.0.3440.111',
1636 '70.0.3519.3',
1637 '70.0.3519.2',
1638 '70.0.3519.1',
1639 '70.0.3519.0',
1640 '69.0.3497.36',
1641 '68.0.3440.110',
1642 '70.0.3518.1',
1643 '70.0.3518.0',
1644 '69.0.3497.35',
1645 '69.0.3497.34',
1646 '68.0.3440.109',
1647 '70.0.3517.1',
1648 '70.0.3517.0',
1649 '69.0.3497.33',
1650 '68.0.3440.108',
1651 '69.0.3497.32',
1652 '70.0.3516.3',
1653 '70.0.3516.2',
1654 '70.0.3516.1',
1655 '70.0.3516.0',
1656 '69.0.3497.31',
1657 '68.0.3440.107',
1658 '70.0.3515.4',
1659 '68.0.3440.106',
1660 '70.0.3515.3',
1661 '70.0.3515.2',
1662 '70.0.3515.1',
1663 '70.0.3515.0',
1664 '69.0.3497.30',
1665 '68.0.3440.105',
1666 '68.0.3440.104',
1667 '70.0.3514.2',
1668 '70.0.3514.1',
1669 '70.0.3514.0',
1670 '69.0.3497.29',
1671 '68.0.3440.103',
1672 '70.0.3513.1',
1673 '70.0.3513.0',
1674 '69.0.3497.28',
1675 )
1676 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1677
1678
3e669f36 1679std_headers = {
f7a147e3 1680 'User-Agent': random_user_agent(),
59ae15a5
PH
1681 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1682 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1683 'Accept-Encoding': 'gzip, deflate',
1684 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 1685}
f427df17 1686
5f6a1245 1687
fb37eb25
S
1688USER_AGENTS = {
1689 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1690}
1691
1692
bf42a990
S
1693NO_DEFAULT = object()
1694
7105440c
YCH
1695ENGLISH_MONTH_NAMES = [
1696 'January', 'February', 'March', 'April', 'May', 'June',
1697 'July', 'August', 'September', 'October', 'November', 'December']
1698
f6717dec
S
1699MONTH_NAMES = {
1700 'en': ENGLISH_MONTH_NAMES,
1701 'fr': [
3e4185c3
S
1702 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1703 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
f6717dec 1704}
a942d6cb 1705
a7aaa398
S
1706KNOWN_EXTENSIONS = (
1707 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1708 'flv', 'f4v', 'f4a', 'f4b',
1709 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1710 'mkv', 'mka', 'mk3d',
1711 'avi', 'divx',
1712 'mov',
1713 'asf', 'wmv', 'wma',
1714 '3gp', '3g2',
1715 'mp3',
1716 'flac',
1717 'ape',
1718 'wav',
1719 'f4f', 'f4m', 'm3u8', 'smil')
1720
c587cbb7 1721# needed for sanitizing filenames in restricted mode
c8827027 1722ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
fd35d8cd
JW
1723 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1724 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
c587cbb7 1725
46f59e89
S
1726DATE_FORMATS = (
1727 '%d %B %Y',
1728 '%d %b %Y',
1729 '%B %d %Y',
cb655f34
S
1730 '%B %dst %Y',
1731 '%B %dnd %Y',
9d30c213 1732 '%B %drd %Y',
cb655f34 1733 '%B %dth %Y',
46f59e89 1734 '%b %d %Y',
cb655f34
S
1735 '%b %dst %Y',
1736 '%b %dnd %Y',
9d30c213 1737 '%b %drd %Y',
cb655f34 1738 '%b %dth %Y',
46f59e89
S
1739 '%b %dst %Y %I:%M',
1740 '%b %dnd %Y %I:%M',
9d30c213 1741 '%b %drd %Y %I:%M',
46f59e89
S
1742 '%b %dth %Y %I:%M',
1743 '%Y %m %d',
1744 '%Y-%m-%d',
bccdbd22 1745 '%Y.%m.%d.',
46f59e89 1746 '%Y/%m/%d',
81c13222 1747 '%Y/%m/%d %H:%M',
46f59e89 1748 '%Y/%m/%d %H:%M:%S',
1931a55e
THD
1749 '%Y%m%d%H%M',
1750 '%Y%m%d%H%M%S',
0c1c6f4b 1751 '%Y-%m-%d %H:%M',
46f59e89
S
1752 '%Y-%m-%d %H:%M:%S',
1753 '%Y-%m-%d %H:%M:%S.%f',
5014558a 1754 '%Y-%m-%d %H:%M:%S:%f',
46f59e89
S
1755 '%d.%m.%Y %H:%M',
1756 '%d.%m.%Y %H.%M',
1757 '%Y-%m-%dT%H:%M:%SZ',
1758 '%Y-%m-%dT%H:%M:%S.%fZ',
1759 '%Y-%m-%dT%H:%M:%S.%f0Z',
1760 '%Y-%m-%dT%H:%M:%S',
1761 '%Y-%m-%dT%H:%M:%S.%f',
1762 '%Y-%m-%dT%H:%M',
c6eed6b8
S
1763 '%b %d %Y at %H:%M',
1764 '%b %d %Y at %H:%M:%S',
b555ae9b
S
1765 '%B %d %Y at %H:%M',
1766 '%B %d %Y at %H:%M:%S',
a63d9bd0 1767 '%H:%M %d-%b-%Y',
46f59e89
S
1768)
1769
1770DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1771DATE_FORMATS_DAY_FIRST.extend([
1772 '%d-%m-%Y',
1773 '%d.%m.%Y',
1774 '%d.%m.%y',
1775 '%d/%m/%Y',
1776 '%d/%m/%y',
1777 '%d/%m/%Y %H:%M:%S',
1778])
1779
1780DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1781DATE_FORMATS_MONTH_FIRST.extend([
1782 '%m-%d-%Y',
1783 '%m.%d.%Y',
1784 '%m/%d/%Y',
1785 '%m/%d/%y',
1786 '%m/%d/%Y %H:%M:%S',
1787])
1788
06b3fe29 1789PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
22f5f5c6 1790JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
06b3fe29 1791
7105440c 1792
d77c3dfd 1793def preferredencoding():
59ae15a5 1794 """Get preferred encoding.
d77c3dfd 1795
59ae15a5
PH
1796 Returns the best encoding scheme for the system, based on
1797 locale.getpreferredencoding() and some further tweaks.
1798 """
1799 try:
1800 pref = locale.getpreferredencoding()
28e614de 1801 'TEST'.encode(pref)
70a1165b 1802 except Exception:
59ae15a5 1803 pref = 'UTF-8'
bae611f2 1804
59ae15a5 1805 return pref
d77c3dfd 1806
f4bfd65f 1807
181c8655 1808def write_json_file(obj, fn):
1394646a 1809 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 1810
92120217 1811 fn = encodeFilename(fn)
61ee5aeb 1812 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
1813 encoding = get_filesystem_encoding()
1814 # os.path.basename returns a bytes object, but NamedTemporaryFile
1815 # will fail if the filename contains non ascii characters unless we
1816 # use a unicode object
1817 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1818 # the same for os.path.dirname
1819 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1820 else:
1821 path_basename = os.path.basename
1822 path_dirname = os.path.dirname
1823
73159f99
S
1824 args = {
1825 'suffix': '.tmp',
ec5f6016
JMF
1826 'prefix': path_basename(fn) + '.',
1827 'dir': path_dirname(fn),
73159f99
S
1828 'delete': False,
1829 }
1830
181c8655
PH
1831 # In Python 2.x, json.dump expects a bytestream.
1832 # In Python 3.x, it writes to a character stream
1833 if sys.version_info < (3, 0):
73159f99 1834 args['mode'] = 'wb'
181c8655 1835 else:
73159f99
S
1836 args.update({
1837 'mode': 'w',
1838 'encoding': 'utf-8',
1839 })
1840
c86b6142 1841 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
1842
1843 try:
1844 with tf:
6e84b215 1845 json.dump(obj, tf)
1394646a
IK
1846 if sys.platform == 'win32':
1847 # Need to remove existing file on Windows, else os.rename raises
1848 # WindowsError or FileExistsError.
1849 try:
1850 os.unlink(fn)
1851 except OSError:
1852 pass
9cd5f54e
R
1853 try:
1854 mask = os.umask(0)
1855 os.umask(mask)
1856 os.chmod(tf.name, 0o666 & ~mask)
1857 except OSError:
1858 pass
181c8655 1859 os.rename(tf.name, fn)
70a1165b 1860 except Exception:
181c8655
PH
1861 try:
1862 os.remove(tf.name)
1863 except OSError:
1864 pass
1865 raise
1866
1867
1868if sys.version_info >= (2, 7):
ee114368 1869 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 1870 """ Find the xpath xpath[@key=val] """
5d2354f1 1871 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368 1872 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
1873 return node.find(expr)
1874else:
ee114368 1875 def find_xpath_attr(node, xpath, key, val=None):
810c10ba 1876 for f in node.findall(compat_xpath(xpath)):
ee114368
S
1877 if key not in f.attrib:
1878 continue
1879 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
1880 return f
1881 return None
1882
d7e66d39
JMF
1883# On python2.6 the xml.etree.ElementTree.Element methods don't support
1884# the namespace parameter
5f6a1245
JW
1885
1886
d7e66d39
JMF
1887def xpath_with_ns(path, ns_map):
1888 components = [c.split(':') for c in path.split('/')]
1889 replaced = []
1890 for c in components:
1891 if len(c) == 1:
1892 replaced.append(c[0])
1893 else:
1894 ns, tag = c
1895 replaced.append('{%s}%s' % (ns_map[ns], tag))
1896 return '/'.join(replaced)
1897
d77c3dfd 1898
a41fb80c 1899def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745 1900 def _find_xpath(xpath):
810c10ba 1901 return node.find(compat_xpath(xpath))
578c0745
S
1902
1903 if isinstance(xpath, (str, compat_str)):
1904 n = _find_xpath(xpath)
1905 else:
1906 for xp in xpath:
1907 n = _find_xpath(xp)
1908 if n is not None:
1909 break
d74bebd5 1910
8e636da4 1911 if n is None:
bf42a990
S
1912 if default is not NO_DEFAULT:
1913 return default
1914 elif fatal:
bf0ff932
PH
1915 name = xpath if name is None else name
1916 raise ExtractorError('Could not find XML element %s' % name)
1917 else:
1918 return None
a41fb80c
S
1919 return n
1920
1921
1922def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
1923 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1924 if n is None or n == default:
1925 return n
1926 if n.text is None:
1927 if default is not NO_DEFAULT:
1928 return default
1929 elif fatal:
1930 name = xpath if name is None else name
1931 raise ExtractorError('Could not find XML element\'s text %s' % name)
1932 else:
1933 return None
1934 return n.text
a41fb80c
S
1935
1936
1937def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1938 n = find_xpath_attr(node, xpath, key)
1939 if n is None:
1940 if default is not NO_DEFAULT:
1941 return default
1942 elif fatal:
1943 name = '%s[@%s]' % (xpath, key) if name is None else name
1944 raise ExtractorError('Could not find XML attribute %s' % name)
1945 else:
1946 return None
1947 return n.attrib[key]
bf0ff932
PH
1948
1949
9e6dd238 1950def get_element_by_id(id, html):
43e8fafd 1951 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 1952 return get_element_by_attribute('id', id, html)
43e8fafd 1953
12ea2f30 1954
84c237fb 1955def get_element_by_class(class_name, html):
2af12ad9
TC
1956 """Return the content of the first tag with the specified class in the passed HTML document"""
1957 retval = get_elements_by_class(class_name, html)
1958 return retval[0] if retval else None
1959
1960
1961def get_element_by_attribute(attribute, value, html, escape_value=True):
1962 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1963 return retval[0] if retval else None
1964
1965
1966def get_elements_by_class(class_name, html):
1967 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1968 return get_elements_by_attribute(
84c237fb
YCH
1969 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1970 html, escape_value=False)
1971
1972
2af12ad9 1973def get_elements_by_attribute(attribute, value, html, escape_value=True):
43e8fafd 1974 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 1975
84c237fb
YCH
1976 value = re.escape(value) if escape_value else value
1977
2af12ad9
TC
1978 retlist = []
1979 for m in re.finditer(r'''(?xs)
38285056 1980 <([a-zA-Z0-9:._-]+)
609ff8ca 1981 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056 1982 \s+%s=['"]?%s['"]?
609ff8ca 1983 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056
PH
1984 \s*>
1985 (?P<content>.*?)
1986 </\1>
2af12ad9
TC
1987 ''' % (re.escape(attribute), value), html):
1988 res = m.group('content')
38285056 1989
2af12ad9
TC
1990 if res.startswith('"') or res.startswith("'"):
1991 res = res[1:-1]
38285056 1992
2af12ad9 1993 retlist.append(unescapeHTML(res))
a921f407 1994
2af12ad9 1995 return retlist
a921f407 1996
c5229f39 1997
8bb56eee
BF
1998class HTMLAttributeParser(compat_HTMLParser):
1999 """Trivial HTML parser to gather the attributes for a single element"""
b6e0c7d2 2000
8bb56eee 2001 def __init__(self):
c5229f39 2002 self.attrs = {}
8bb56eee
BF
2003 compat_HTMLParser.__init__(self)
2004
2005 def handle_starttag(self, tag, attrs):
2006 self.attrs = dict(attrs)
2007
c5229f39 2008
8bb56eee
BF
2009def extract_attributes(html_element):
2010 """Given a string for an HTML element such as
2011 <el
2012 a="foo" B="bar" c="&98;az" d=boz
2013 empty= noval entity="&amp;"
2014 sq='"' dq="'"
2015 >
2016 Decode and return a dictionary of attributes.
2017 {
2018 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2019 'empty': '', 'noval': None, 'entity': '&',
2020 'sq': '"', 'dq': '\''
2021 }.
2022 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2023 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2024 """
2025 parser = HTMLAttributeParser()
b4a3d461
S
2026 try:
2027 parser.feed(html_element)
2028 parser.close()
2029 # Older Python may throw HTMLParseError in case of malformed HTML
2030 except compat_HTMLParseError:
2031 pass
8bb56eee 2032 return parser.attrs
9e6dd238 2033
c5229f39 2034
9e6dd238 2035def clean_html(html):
59ae15a5 2036 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
2037
2038 if html is None: # Convenience for sanitizing descriptions etc.
2039 return html
2040
59ae15a5
PH
2041 # Newline vs <br />
2042 html = html.replace('\n', ' ')
edd9221c
TF
2043 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2044 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
2045 # Strip html tags
2046 html = re.sub('<.*?>', '', html)
2047 # Replace html entities
2048 html = unescapeHTML(html)
7decf895 2049 return html.strip()
9e6dd238
FV
2050
2051
d77c3dfd 2052def sanitize_open(filename, open_mode):
59ae15a5
PH
2053 """Try to open the given filename, and slightly tweak it if this fails.
2054
2055 Attempts to open the given filename. If this fails, it tries to change
2056 the filename slightly, step by step, until it's either able to open it
2057 or it fails and raises a final exception, like the standard open()
2058 function.
2059
2060 It returns the tuple (stream, definitive_file_name).
2061 """
2062 try:
28e614de 2063 if filename == '-':
59ae15a5
PH
2064 if sys.platform == 'win32':
2065 import msvcrt
2066 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 2067 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
2068 stream = open(encodeFilename(filename), open_mode)
2069 return (stream, filename)
2070 except (IOError, OSError) as err:
f45c185f
PH
2071 if err.errno in (errno.EACCES,):
2072 raise
59ae15a5 2073
f45c185f 2074 # In case of error, try to remove win32 forbidden chars
d55de57b 2075 alt_filename = sanitize_path(filename)
f45c185f
PH
2076 if alt_filename == filename:
2077 raise
2078 else:
2079 # An exception here should be caught in the caller
d55de57b 2080 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 2081 return (stream, alt_filename)
d77c3dfd
FV
2082
2083
2084def timeconvert(timestr):
59ae15a5
PH
2085 """Convert RFC 2822 defined time string into system timestamp"""
2086 timestamp = None
2087 timetuple = email.utils.parsedate_tz(timestr)
2088 if timetuple is not None:
2089 timestamp = email.utils.mktime_tz(timetuple)
2090 return timestamp
1c469a94 2091
5f6a1245 2092
796173d0 2093def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
2094 """Sanitizes a string so it could be used as part of a filename.
2095 If restricted is set, use a stricter subset of allowed characters.
158af524
S
2096 Set is_id if this is not an arbitrary string, but an ID that should be kept
2097 if possible.
59ae15a5
PH
2098 """
2099 def replace_insane(char):
c587cbb7
AT
2100 if restricted and char in ACCENT_CHARS:
2101 return ACCENT_CHARS[char]
91dd88b9 2102 elif not restricted and char == '\n':
2103 return ' '
2104 elif char == '?' or ord(char) < 32 or ord(char) == 127:
59ae15a5
PH
2105 return ''
2106 elif char == '"':
2107 return '' if restricted else '\''
2108 elif char == ':':
2109 return '_-' if restricted else ' -'
2110 elif char in '\\/|*<>':
2111 return '_'
627dcfff 2112 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
2113 return '_'
2114 if restricted and ord(char) > 127:
2115 return '_'
2116 return char
2117
639f1cea 2118 if s == '':
2119 return ''
2aeb06d6
PH
2120 # Handle timestamps
2121 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 2122 result = ''.join(map(replace_insane, s))
796173d0
PH
2123 if not is_id:
2124 while '__' in result:
2125 result = result.replace('__', '_')
2126 result = result.strip('_')
2127 # Common case of "Foreign band name - English song title"
2128 if restricted and result.startswith('-_'):
2129 result = result[2:]
5a42414b
PH
2130 if result.startswith('-'):
2131 result = '_' + result[len('-'):]
a7440261 2132 result = result.lstrip('.')
796173d0
PH
2133 if not result:
2134 result = '_'
59ae15a5 2135 return result
d77c3dfd 2136
5f6a1245 2137
c2934512 2138def sanitize_path(s, force=False):
a2aaf4db 2139 """Sanitizes and normalizes path on Windows"""
c2934512 2140 if sys.platform == 'win32':
c4218ac3 2141 force = False
c2934512 2142 drive_or_unc, _ = os.path.splitdrive(s)
2143 if sys.version_info < (2, 7) and not drive_or_unc:
2144 drive_or_unc, _ = os.path.splitunc(s)
2145 elif force:
2146 drive_or_unc = ''
2147 else:
a2aaf4db 2148 return s
c2934512 2149
be531ef1
S
2150 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2151 if drive_or_unc:
a2aaf4db
S
2152 norm_path.pop(0)
2153 sanitized_path = [
ec85ded8 2154 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
a2aaf4db 2155 for path_part in norm_path]
be531ef1
S
2156 if drive_or_unc:
2157 sanitized_path.insert(0, drive_or_unc + os.path.sep)
c4218ac3 2158 elif force and s[0] == os.path.sep:
2159 sanitized_path.insert(0, os.path.sep)
a2aaf4db
S
2160 return os.path.join(*sanitized_path)
2161
2162
17bcc626 2163def sanitize_url(url):
befa4708
S
2164 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2165 # the number of unwanted failures due to missing protocol
2166 if url.startswith('//'):
2167 return 'http:%s' % url
2168 # Fix some common typos seen so far
2169 COMMON_TYPOS = (
067aa17e 2170 # https://github.com/ytdl-org/youtube-dl/issues/15649
befa4708
S
2171 (r'^httpss://', r'https://'),
2172 # https://bx1.be/lives/direct-tv/
2173 (r'^rmtp([es]?)://', r'rtmp\1://'),
2174 )
2175 for mistake, fixup in COMMON_TYPOS:
2176 if re.match(mistake, url):
2177 return re.sub(mistake, fixup, url)
bc6b9bcd 2178 return url
17bcc626
S
2179
2180
5435dcf9
HH
2181def extract_basic_auth(url):
2182 parts = compat_urlparse.urlsplit(url)
2183 if parts.username is None:
2184 return url, None
2185 url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2186 parts.hostname if parts.port is None
2187 else '%s:%d' % (parts.hostname, parts.port))))
2188 auth_payload = base64.b64encode(
2189 ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2190 return url, 'Basic ' + auth_payload.decode('utf-8')
2191
2192
67dda517 2193def sanitized_Request(url, *args, **kwargs):
bc6b9bcd 2194 url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
5435dcf9
HH
2195 if auth_header is not None:
2196 headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2197 headers['Authorization'] = auth_header
2198 return compat_urllib_request.Request(url, *args, **kwargs)
67dda517
S
2199
2200
51098426
S
2201def expand_path(s):
2202 """Expand shell variables and ~"""
2203 return os.path.expandvars(compat_expanduser(s))
2204
2205
d77c3dfd 2206def orderedSet(iterable):
59ae15a5
PH
2207 """ Remove all duplicates from the input iterable """
2208 res = []
2209 for el in iterable:
2210 if el not in res:
2211 res.append(el)
2212 return res
d77c3dfd 2213
912b38b4 2214
55b2f099 2215def _htmlentity_transform(entity_with_semicolon):
4e408e47 2216 """Transforms an HTML entity to a character."""
55b2f099
YCH
2217 entity = entity_with_semicolon[:-1]
2218
4e408e47
PH
2219 # Known non-numeric HTML entity
2220 if entity in compat_html_entities.name2codepoint:
2221 return compat_chr(compat_html_entities.name2codepoint[entity])
2222
55b2f099
YCH
2223 # TODO: HTML5 allows entities without a semicolon. For example,
2224 # '&Eacuteric' should be decoded as 'Éric'.
2225 if entity_with_semicolon in compat_html_entities_html5:
2226 return compat_html_entities_html5[entity_with_semicolon]
2227
91757b0f 2228 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
2229 if mobj is not None:
2230 numstr = mobj.group(1)
28e614de 2231 if numstr.startswith('x'):
4e408e47 2232 base = 16
28e614de 2233 numstr = '0%s' % numstr
4e408e47
PH
2234 else:
2235 base = 10
067aa17e 2236 # See https://github.com/ytdl-org/youtube-dl/issues/7518
7aefc49c
S
2237 try:
2238 return compat_chr(int(numstr, base))
2239 except ValueError:
2240 pass
4e408e47
PH
2241
2242 # Unknown entity in name, return its literal representation
7a3f0c00 2243 return '&%s;' % entity
4e408e47
PH
2244
2245
d77c3dfd 2246def unescapeHTML(s):
912b38b4
PH
2247 if s is None:
2248 return None
2249 assert type(s) == compat_str
d77c3dfd 2250
4e408e47 2251 return re.sub(
95f3f7c2 2252 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 2253
8bf48f23 2254
cdb19aa4 2255def escapeHTML(text):
2256 return (
2257 text
2258 .replace('&', '&amp;')
2259 .replace('<', '&lt;')
2260 .replace('>', '&gt;')
2261 .replace('"', '&quot;')
2262 .replace("'", '&#39;')
2263 )
2264
2265
f5b1bca9 2266def process_communicate_or_kill(p, *args, **kwargs):
2267 try:
2268 return p.communicate(*args, **kwargs)
2269 except BaseException: # Including KeyboardInterrupt
2270 p.kill()
2271 p.wait()
2272 raise
2273
2274
d3c93ec2 2275class Popen(subprocess.Popen):
2276 if sys.platform == 'win32':
2277 _startupinfo = subprocess.STARTUPINFO()
2278 _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
2279 else:
2280 _startupinfo = None
2281
2282 def __init__(self, *args, **kwargs):
2283 super(Popen, self).__init__(*args, **kwargs, startupinfo=self._startupinfo)
2284
2285 def communicate_or_kill(self, *args, **kwargs):
2286 return process_communicate_or_kill(self, *args, **kwargs)
2287
2288
aa49acd1
S
2289def get_subprocess_encoding():
2290 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2291 # For subprocess calls, encode with locale encoding
2292 # Refer to http://stackoverflow.com/a/9951851/35070
2293 encoding = preferredencoding()
2294 else:
2295 encoding = sys.getfilesystemencoding()
2296 if encoding is None:
2297 encoding = 'utf-8'
2298 return encoding
2299
2300
8bf48f23 2301def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
2302 """
2303 @param s The name of the file
2304 """
d77c3dfd 2305
8bf48f23 2306 assert type(s) == compat_str
d77c3dfd 2307
59ae15a5
PH
2308 # Python 3 has a Unicode API
2309 if sys.version_info >= (3, 0):
2310 return s
0f00efed 2311
aa49acd1
S
2312 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2313 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2314 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2315 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2316 return s
2317
8ee239e9
YCH
2318 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2319 if sys.platform.startswith('java'):
2320 return s
2321
aa49acd1
S
2322 return s.encode(get_subprocess_encoding(), 'ignore')
2323
2324
2325def decodeFilename(b, for_subprocess=False):
2326
2327 if sys.version_info >= (3, 0):
2328 return b
2329
2330 if not isinstance(b, bytes):
2331 return b
2332
2333 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 2334
f07b74fc
PH
2335
2336def encodeArgument(s):
2337 if not isinstance(s, compat_str):
2338 # Legacy code that uses byte strings
2339 # Uncomment the following line after fixing all post processors
7af808a5 2340 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
2341 s = s.decode('ascii')
2342 return encodeFilename(s, True)
2343
2344
aa49acd1
S
2345def decodeArgument(b):
2346 return decodeFilename(b, True)
2347
2348
8271226a
PH
2349def decodeOption(optval):
2350 if optval is None:
2351 return optval
2352 if isinstance(optval, bytes):
2353 optval = optval.decode(preferredencoding())
2354
2355 assert isinstance(optval, compat_str)
2356 return optval
1c256f70 2357
5f6a1245 2358
aa7785f8 2359_timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
2360
2361
2362def timetuple_from_msec(msec):
2363 secs, msec = divmod(msec, 1000)
2364 mins, secs = divmod(secs, 60)
2365 hrs, mins = divmod(mins, 60)
2366 return _timetuple(hrs, mins, secs, msec)
2367
2368
cdb19aa4 2369def formatSeconds(secs, delim=':', msec=False):
aa7785f8 2370 time = timetuple_from_msec(secs * 1000)
2371 if time.hours:
2372 ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
2373 elif time.minutes:
2374 ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
4539dd30 2375 else:
aa7785f8 2376 ret = '%d' % time.seconds
2377 return '%s.%03d' % (ret, time.milliseconds) if msec else ret
4539dd30 2378
a0ddb8a2 2379
77562778 2380def _ssl_load_windows_store_certs(ssl_context, storename):
2381 # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
2382 try:
2383 certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
2384 if encoding == 'x509_asn' and (
2385 trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
2386 except PermissionError:
2387 return
2388 for cert in certs:
a2366922 2389 try:
77562778 2390 ssl_context.load_verify_locations(cadata=cert)
2391 except ssl.SSLError:
a2366922
PH
2392 pass
2393
77562778 2394
2395def make_HTTPS_handler(params, **kwargs):
2396 opts_check_certificate = not params.get('nocheckcertificate')
2397 context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
2398 context.check_hostname = opts_check_certificate
2399 context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
2400 if opts_check_certificate:
4e3d1898 2401 try:
2402 context.load_default_certs()
2403 # Work around the issue in load_default_certs when there are bad certificates. See:
2404 # https://github.com/yt-dlp/yt-dlp/issues/1060,
2405 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
2406 except ssl.SSLError:
2407 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
2408 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
2409 # Create a new context to discard any certificates that were already loaded
2410 context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
2411 context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED
2412 for storename in ('CA', 'ROOT'):
2413 _ssl_load_windows_store_certs(context, storename)
2414 context.set_default_verify_paths()
77562778 2415 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 2416
732ea2f0 2417
5873d4cc 2418def bug_reports_message(before=';'):
08f2a92c 2419 if ytdl_is_updateable():
7a5c1cfe 2420 update_cmd = 'type yt-dlp -U to update'
08f2a92c 2421 else:
7a5c1cfe 2422 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
5873d4cc 2423 msg = 'please report this issue on https://github.com/yt-dlp/yt-dlp .'
08f2a92c 2424 msg += ' Make sure you are using the latest version; %s.' % update_cmd
7a5c1cfe 2425 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
5873d4cc
F
2426
2427 before = before.rstrip()
2428 if not before or before.endswith(('.', '!', '?')):
2429 msg = msg[0].title() + msg[1:]
2430
2431 return (before + ' ' if before else '') + msg
08f2a92c
JMF
2432
2433
bf5b9d85
PM
2434class YoutubeDLError(Exception):
2435 """Base exception for YoutubeDL errors."""
2436 pass
2437
2438
3158150c 2439network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2440if hasattr(ssl, 'CertificateError'):
2441 network_exceptions.append(ssl.CertificateError)
2442network_exceptions = tuple(network_exceptions)
2443
2444
bf5b9d85 2445class ExtractorError(YoutubeDLError):
1c256f70 2446 """Error during info extraction."""
5f6a1245 2447
1151c407 2448 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
9a82b238 2449 """ tb, if given, is the original traceback (so that it can be printed out).
7a5c1cfe 2450 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
9a82b238 2451 """
3158150c 2452 if sys.exc_info()[0] in network_exceptions:
9a82b238 2453 expected = True
d5979c5d 2454
526d74ec 2455 self.msg = str(msg)
1c256f70 2456 self.traceback = tb
1151c407 2457 self.expected = expected
2eabb802 2458 self.cause = cause
d11271dd 2459 self.video_id = video_id
1151c407 2460 self.ie = ie
2461 self.exc_info = sys.exc_info() # preserve original exception
2462
2463 super(ExtractorError, self).__init__(''.join((
2464 format_field(ie, template='[%s] '),
2465 format_field(video_id, template='%s: '),
526d74ec 2466 self.msg,
1151c407 2467 format_field(cause, template=' (caused by %r)'),
2468 '' if expected else bug_reports_message())))
1c256f70 2469
01951dda
PH
2470 def format_traceback(self):
2471 if self.traceback is None:
2472 return None
28e614de 2473 return ''.join(traceback.format_tb(self.traceback))
01951dda 2474
1c256f70 2475
416c7fcb
PH
2476class UnsupportedError(ExtractorError):
2477 def __init__(self, url):
2478 super(UnsupportedError, self).__init__(
2479 'Unsupported URL: %s' % url, expected=True)
2480 self.url = url
2481
2482
55b3e45b
JMF
2483class RegexNotFoundError(ExtractorError):
2484 """Error when a regex didn't match"""
2485 pass
2486
2487
773f291d
S
2488class GeoRestrictedError(ExtractorError):
2489 """Geographic restriction Error exception.
2490
2491 This exception may be thrown when a video is not available from your
2492 geographic location due to geographic restrictions imposed by a website.
2493 """
b6e0c7d2 2494
0db3bae8 2495 def __init__(self, msg, countries=None, **kwargs):
2496 kwargs['expected'] = True
2497 super(GeoRestrictedError, self).__init__(msg, **kwargs)
773f291d
S
2498 self.countries = countries
2499
2500
bf5b9d85 2501class DownloadError(YoutubeDLError):
59ae15a5 2502 """Download Error exception.
d77c3dfd 2503
59ae15a5
PH
2504 This exception may be thrown by FileDownloader objects if they are not
2505 configured to continue on errors. They will contain the appropriate
2506 error message.
2507 """
5f6a1245 2508
8cc83b8d
FV
2509 def __init__(self, msg, exc_info=None):
2510 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2511 super(DownloadError, self).__init__(msg)
2512 self.exc_info = exc_info
d77c3dfd
FV
2513
2514
498f5606 2515class EntryNotInPlaylist(YoutubeDLError):
2516 """Entry not in playlist exception.
2517
2518 This exception will be thrown by YoutubeDL when a requested entry
2519 is not found in the playlist info_dict
2520 """
2521 pass
2522
2523
bf5b9d85 2524class SameFileError(YoutubeDLError):
59ae15a5 2525 """Same File exception.
d77c3dfd 2526
59ae15a5
PH
2527 This exception will be thrown by FileDownloader objects if they detect
2528 multiple files would have to be downloaded to the same file on disk.
2529 """
2530 pass
d77c3dfd
FV
2531
2532
bf5b9d85 2533class PostProcessingError(YoutubeDLError):
59ae15a5 2534 """Post Processing exception.
d77c3dfd 2535
59ae15a5
PH
2536 This exception may be raised by PostProcessor's .run() method to
2537 indicate an error in the postprocessing task.
2538 """
5f6a1245 2539
7851b379 2540 def __init__(self, msg):
bf5b9d85 2541 super(PostProcessingError, self).__init__(msg)
7851b379 2542 self.msg = msg
d77c3dfd 2543
5f6a1245 2544
48f79687 2545class DownloadCancelled(YoutubeDLError):
2546 """ Exception raised when the download queue should be interrupted """
2547 msg = 'The download was cancelled'
8b0d7497 2548
48f79687 2549 def __init__(self, msg=None):
2550 if msg is not None:
2551 self.msg = msg
2552 YoutubeDLError.__init__(self, self.msg)
8b0d7497 2553
8b0d7497 2554
48f79687 2555class ExistingVideoReached(DownloadCancelled):
2556 """ --break-on-existing triggered """
2557 msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
8b0d7497 2558
48f79687 2559
2560class RejectedVideoReached(DownloadCancelled):
2561 """ --break-on-reject triggered """
2562 msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
51d9739f 2563
2564
48f79687 2565class MaxDownloadsReached(DownloadCancelled):
59ae15a5 2566 """ --max-downloads limit has been reached. """
48f79687 2567 msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
2568
2569
2570class ThrottledDownload(YoutubeDLError):
2571 """ Download speed below --throttled-rate. """
59ae15a5 2572 pass
d77c3dfd
FV
2573
2574
bf5b9d85 2575class UnavailableVideoError(YoutubeDLError):
59ae15a5 2576 """Unavailable Format exception.
d77c3dfd 2577
59ae15a5
PH
2578 This exception will be thrown when a video is requested
2579 in a format that is not available for that video.
2580 """
2581 pass
d77c3dfd
FV
2582
2583
bf5b9d85 2584class ContentTooShortError(YoutubeDLError):
59ae15a5 2585 """Content Too Short exception.
d77c3dfd 2586
59ae15a5
PH
2587 This exception may be raised by FileDownloader objects when a file they
2588 download is too small for what the server announced first, indicating
2589 the connection was probably interrupted.
2590 """
d77c3dfd 2591
59ae15a5 2592 def __init__(self, downloaded, expected):
bf5b9d85
PM
2593 super(ContentTooShortError, self).__init__(
2594 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2595 )
2c7ed247 2596 # Both in bytes
59ae15a5
PH
2597 self.downloaded = downloaded
2598 self.expected = expected
d77c3dfd 2599
5f6a1245 2600
bf5b9d85 2601class XAttrMetadataError(YoutubeDLError):
efa97bdc
YCH
2602 def __init__(self, code=None, msg='Unknown error'):
2603 super(XAttrMetadataError, self).__init__(msg)
2604 self.code = code
bd264412 2605 self.msg = msg
efa97bdc
YCH
2606
2607 # Parsing code and msg
3089bc74 2608 if (self.code in (errno.ENOSPC, errno.EDQUOT)
a0566bbf 2609 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
efa97bdc
YCH
2610 self.reason = 'NO_SPACE'
2611 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2612 self.reason = 'VALUE_TOO_LONG'
2613 else:
2614 self.reason = 'NOT_SUPPORTED'
2615
2616
bf5b9d85 2617class XAttrUnavailableError(YoutubeDLError):
efa97bdc
YCH
2618 pass
2619
2620
c5a59d93 2621def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
2622 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2623 # expected HTTP responses to meet HTTP/1.0 or later (see also
067aa17e 2624 # https://github.com/ytdl-org/youtube-dl/issues/6727)
e5e78797 2625 if sys.version_info < (3, 0):
65220c3b
S
2626 kwargs['strict'] = True
2627 hc = http_class(*args, **compat_kwargs(kwargs))
be4a824d 2628 source_address = ydl_handler._params.get('source_address')
8959018a 2629
be4a824d 2630 if source_address is not None:
8959018a
AU
2631 # This is to workaround _create_connection() from socket where it will try all
2632 # address data from getaddrinfo() including IPv6. This filters the result from
2633 # getaddrinfo() based on the source_address value.
2634 # This is based on the cpython socket.create_connection() function.
2635 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2636 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2637 host, port = address
2638 err = None
2639 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
9e21e6d9
S
2640 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2641 ip_addrs = [addr for addr in addrs if addr[0] == af]
2642 if addrs and not ip_addrs:
2643 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2644 raise socket.error(
2645 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2646 % (ip_version, source_address[0]))
8959018a
AU
2647 for res in ip_addrs:
2648 af, socktype, proto, canonname, sa = res
2649 sock = None
2650 try:
2651 sock = socket.socket(af, socktype, proto)
2652 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2653 sock.settimeout(timeout)
2654 sock.bind(source_address)
2655 sock.connect(sa)
2656 err = None # Explicitly break reference cycle
2657 return sock
2658 except socket.error as _:
2659 err = _
2660 if sock is not None:
2661 sock.close()
2662 if err is not None:
2663 raise err
2664 else:
9e21e6d9
S
2665 raise socket.error('getaddrinfo returns an empty list')
2666 if hasattr(hc, '_create_connection'):
2667 hc._create_connection = _create_connection
be4a824d
PH
2668 sa = (source_address, 0)
2669 if hasattr(hc, 'source_address'): # Python 2.7+
2670 hc.source_address = sa
2671 else: # Python 2.6
2672 def _hc_connect(self, *args, **kwargs):
9e21e6d9 2673 sock = _create_connection(
be4a824d
PH
2674 (self.host, self.port), self.timeout, sa)
2675 if is_https:
d7932313
PH
2676 self.sock = ssl.wrap_socket(
2677 sock, self.key_file, self.cert_file,
2678 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
2679 else:
2680 self.sock = sock
2681 hc.connect = functools.partial(_hc_connect, hc)
2682
2683 return hc
2684
2685
87f0e62d 2686def handle_youtubedl_headers(headers):
992fc9d6
YCH
2687 filtered_headers = headers
2688
2689 if 'Youtubedl-no-compression' in filtered_headers:
2690 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 2691 del filtered_headers['Youtubedl-no-compression']
87f0e62d 2692
992fc9d6 2693 return filtered_headers
87f0e62d
YCH
2694
2695
acebc9cd 2696class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
2697 """Handler for HTTP requests and responses.
2698
2699 This class, when installed with an OpenerDirector, automatically adds
2700 the standard headers to every HTTP request and handles gzipped and
2701 deflated responses from web servers. If compression is to be avoided in
2702 a particular request, the original request in the program code only has
0424ec30 2703 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
2704 removed before making the real request.
2705
2706 Part of this code was copied from:
2707
2708 http://techknack.net/python-urllib2-handlers/
2709
2710 Andrew Rowls, the author of that code, agreed to release it to the
2711 public domain.
2712 """
2713
be4a824d
PH
2714 def __init__(self, params, *args, **kwargs):
2715 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2716 self._params = params
2717
2718 def http_open(self, req):
71aff188
YCH
2719 conn_class = compat_http_client.HTTPConnection
2720
2721 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2722 if socks_proxy:
2723 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2724 del req.headers['Ytdl-socks-proxy']
2725
be4a824d 2726 return self.do_open(functools.partial(
71aff188 2727 _create_http_connection, self, conn_class, False),
be4a824d
PH
2728 req)
2729
59ae15a5
PH
2730 @staticmethod
2731 def deflate(data):
fc2119f2 2732 if not data:
2733 return data
59ae15a5
PH
2734 try:
2735 return zlib.decompress(data, -zlib.MAX_WBITS)
2736 except zlib.error:
2737 return zlib.decompress(data)
2738
acebc9cd 2739 def http_request(self, req):
51f267d9
S
2740 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2741 # always respected by websites, some tend to give out URLs with non percent-encoded
2742 # non-ASCII characters (see telemb.py, ard.py [#3412])
2743 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2744 # To work around aforementioned issue we will replace request's original URL with
2745 # percent-encoded one
2746 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2747 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2748 url = req.get_full_url()
2749 url_escaped = escape_url(url)
2750
2751 # Substitute URL if any change after escaping
2752 if url != url_escaped:
15d260eb 2753 req = update_Request(req, url=url_escaped)
51f267d9 2754
33ac271b 2755 for h, v in std_headers.items():
3d5f7a39
JK
2756 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2757 # The dict keys are capitalized because of this bug by urllib
2758 if h.capitalize() not in req.headers:
33ac271b 2759 req.add_header(h, v)
87f0e62d
YCH
2760
2761 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
2762
2763 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2764 # Python 2.6 is brain-dead when it comes to fragments
2765 req._Request__original = req._Request__original.partition('#')[0]
2766 req._Request__r_type = req._Request__r_type.partition('#')[0]
2767
59ae15a5
PH
2768 return req
2769
acebc9cd 2770 def http_response(self, req, resp):
59ae15a5
PH
2771 old_resp = resp
2772 # gzip
2773 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
2774 content = resp.read()
2775 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2776 try:
2777 uncompressed = io.BytesIO(gz.read())
2778 except IOError as original_ioerror:
2779 # There may be junk add the end of the file
2780 # See http://stackoverflow.com/q/4928560/35070 for details
2781 for i in range(1, 1024):
2782 try:
2783 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2784 uncompressed = io.BytesIO(gz.read())
2785 except IOError:
2786 continue
2787 break
2788 else:
2789 raise original_ioerror
b407d853 2790 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2791 resp.msg = old_resp.msg
c047270c 2792 del resp.headers['Content-encoding']
59ae15a5
PH
2793 # deflate
2794 if resp.headers.get('Content-encoding', '') == 'deflate':
2795 gz = io.BytesIO(self.deflate(resp.read()))
b407d853 2796 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2797 resp.msg = old_resp.msg
c047270c 2798 del resp.headers['Content-encoding']
ad729172 2799 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
067aa17e 2800 # https://github.com/ytdl-org/youtube-dl/issues/6457).
5a4d9ddb
S
2801 if 300 <= resp.code < 400:
2802 location = resp.headers.get('Location')
2803 if location:
2804 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2805 if sys.version_info >= (3, 0):
2806 location = location.encode('iso-8859-1').decode('utf-8')
0ea59007
YCH
2807 else:
2808 location = location.decode('utf-8')
5a4d9ddb
S
2809 location_escaped = escape_url(location)
2810 if location != location_escaped:
2811 del resp.headers['Location']
9a4aec8b
YCH
2812 if sys.version_info < (3, 0):
2813 location_escaped = location_escaped.encode('utf-8')
5a4d9ddb 2814 resp.headers['Location'] = location_escaped
59ae15a5 2815 return resp
0f8d03f8 2816
acebc9cd
PH
2817 https_request = http_request
2818 https_response = http_response
bf50b038 2819
5de90176 2820
71aff188
YCH
2821def make_socks_conn_class(base_class, socks_proxy):
2822 assert issubclass(base_class, (
2823 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2824
2825 url_components = compat_urlparse.urlparse(socks_proxy)
2826 if url_components.scheme.lower() == 'socks5':
2827 socks_type = ProxyType.SOCKS5
2828 elif url_components.scheme.lower() in ('socks', 'socks4'):
2829 socks_type = ProxyType.SOCKS4
51fb4995
YCH
2830 elif url_components.scheme.lower() == 'socks4a':
2831 socks_type = ProxyType.SOCKS4A
71aff188 2832
cdd94c2e
YCH
2833 def unquote_if_non_empty(s):
2834 if not s:
2835 return s
2836 return compat_urllib_parse_unquote_plus(s)
2837
71aff188
YCH
2838 proxy_args = (
2839 socks_type,
2840 url_components.hostname, url_components.port or 1080,
2841 True, # Remote DNS
cdd94c2e
YCH
2842 unquote_if_non_empty(url_components.username),
2843 unquote_if_non_empty(url_components.password),
71aff188
YCH
2844 )
2845
2846 class SocksConnection(base_class):
2847 def connect(self):
2848 self.sock = sockssocket()
2849 self.sock.setproxy(*proxy_args)
2850 if type(self.timeout) in (int, float):
2851 self.sock.settimeout(self.timeout)
2852 self.sock.connect((self.host, self.port))
2853
2854 if isinstance(self, compat_http_client.HTTPSConnection):
2855 if hasattr(self, '_context'): # Python > 2.6
2856 self.sock = self._context.wrap_socket(
2857 self.sock, server_hostname=self.host)
2858 else:
2859 self.sock = ssl.wrap_socket(self.sock)
2860
2861 return SocksConnection
2862
2863
be4a824d
PH
2864class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2865 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2866 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2867 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2868 self._params = params
2869
2870 def https_open(self, req):
4f264c02 2871 kwargs = {}
71aff188
YCH
2872 conn_class = self._https_conn_class
2873
4f264c02
JMF
2874 if hasattr(self, '_context'): # python > 2.6
2875 kwargs['context'] = self._context
2876 if hasattr(self, '_check_hostname'): # python 3.x
2877 kwargs['check_hostname'] = self._check_hostname
71aff188
YCH
2878
2879 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2880 if socks_proxy:
2881 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2882 del req.headers['Ytdl-socks-proxy']
2883
be4a824d 2884 return self.do_open(functools.partial(
71aff188 2885 _create_http_connection, self, conn_class, True),
4f264c02 2886 req, **kwargs)
be4a824d
PH
2887
2888
1bab3437 2889class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
f1a8511f
S
2890 """
2891 See [1] for cookie file format.
2892
2893 1. https://curl.haxx.se/docs/http-cookies.html
2894 """
e7e62441 2895 _HTTPONLY_PREFIX = '#HttpOnly_'
c380cc28
S
2896 _ENTRY_LEN = 7
2897 _HEADER = '''# Netscape HTTP Cookie File
7a5c1cfe 2898# This file is generated by yt-dlp. Do not edit.
c380cc28
S
2899
2900'''
2901 _CookieFileEntry = collections.namedtuple(
2902 'CookieFileEntry',
2903 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
e7e62441 2904
1bab3437 2905 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
c380cc28
S
2906 """
2907 Save cookies to a file.
2908
2909 Most of the code is taken from CPython 3.8 and slightly adapted
2910 to support cookie files with UTF-8 in both python 2 and 3.
2911 """
2912 if filename is None:
2913 if self.filename is not None:
2914 filename = self.filename
2915 else:
2916 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2917
1bab3437
S
2918 # Store session cookies with `expires` set to 0 instead of an empty
2919 # string
2920 for cookie in self:
2921 if cookie.expires is None:
2922 cookie.expires = 0
c380cc28
S
2923
2924 with io.open(filename, 'w', encoding='utf-8') as f:
2925 f.write(self._HEADER)
2926 now = time.time()
2927 for cookie in self:
2928 if not ignore_discard and cookie.discard:
2929 continue
2930 if not ignore_expires and cookie.is_expired(now):
2931 continue
2932 if cookie.secure:
2933 secure = 'TRUE'
2934 else:
2935 secure = 'FALSE'
2936 if cookie.domain.startswith('.'):
2937 initial_dot = 'TRUE'
2938 else:
2939 initial_dot = 'FALSE'
2940 if cookie.expires is not None:
2941 expires = compat_str(cookie.expires)
2942 else:
2943 expires = ''
2944 if cookie.value is None:
2945 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2946 # with no name, whereas http.cookiejar regards it as a
2947 # cookie with no value.
2948 name = ''
2949 value = cookie.name
2950 else:
2951 name = cookie.name
2952 value = cookie.value
2953 f.write(
2954 '\t'.join([cookie.domain, initial_dot, cookie.path,
2955 secure, expires, name, value]) + '\n')
1bab3437
S
2956
2957 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
e7e62441 2958 """Load cookies from a file."""
2959 if filename is None:
2960 if self.filename is not None:
2961 filename = self.filename
2962 else:
2963 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2964
c380cc28
S
2965 def prepare_line(line):
2966 if line.startswith(self._HTTPONLY_PREFIX):
2967 line = line[len(self._HTTPONLY_PREFIX):]
2968 # comments and empty lines are fine
2969 if line.startswith('#') or not line.strip():
2970 return line
2971 cookie_list = line.split('\t')
2972 if len(cookie_list) != self._ENTRY_LEN:
2973 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2974 cookie = self._CookieFileEntry(*cookie_list)
2975 if cookie.expires_at and not cookie.expires_at.isdigit():
2976 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2977 return line
2978
e7e62441 2979 cf = io.StringIO()
c380cc28 2980 with io.open(filename, encoding='utf-8') as f:
e7e62441 2981 for line in f:
c380cc28
S
2982 try:
2983 cf.write(prepare_line(line))
2984 except compat_cookiejar.LoadError as e:
2985 write_string(
2986 'WARNING: skipping cookie file entry due to %s: %r\n'
2987 % (e, line), sys.stderr)
2988 continue
e7e62441 2989 cf.seek(0)
2990 self._really_load(cf, filename, ignore_discard, ignore_expires)
1bab3437
S
2991 # Session cookies are denoted by either `expires` field set to
2992 # an empty string or 0. MozillaCookieJar only recognizes the former
2993 # (see [1]). So we need force the latter to be recognized as session
2994 # cookies on our own.
2995 # Session cookies may be important for cookies-based authentication,
2996 # e.g. usually, when user does not check 'Remember me' check box while
2997 # logging in on a site, some important cookies are stored as session
2998 # cookies so that not recognizing them will result in failed login.
2999 # 1. https://bugs.python.org/issue17164
3000 for cookie in self:
3001 # Treat `expires=0` cookies as session cookies
3002 if cookie.expires == 0:
3003 cookie.expires = None
3004 cookie.discard = True
3005
3006
a6420bf5
S
3007class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
3008 def __init__(self, cookiejar=None):
3009 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
3010
3011 def http_response(self, request, response):
3012 # Python 2 will choke on next HTTP request in row if there are non-ASCII
3013 # characters in Set-Cookie HTTP header of last response (see
067aa17e 3014 # https://github.com/ytdl-org/youtube-dl/issues/6769).
a6420bf5
S
3015 # In order to at least prevent crashing we will percent encode Set-Cookie
3016 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
3017 # if sys.version_info < (3, 0) and response.headers:
3018 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
3019 # set_cookie = response.headers.get(set_cookie_header)
3020 # if set_cookie:
3021 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
3022 # if set_cookie != set_cookie_escaped:
3023 # del response.headers[set_cookie_header]
3024 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
3025 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
3026
f5fa042c 3027 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
a6420bf5
S
3028 https_response = http_response
3029
3030
fca6dba8 3031class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
201c1459 3032 """YoutubeDL redirect handler
3033
3034 The code is based on HTTPRedirectHandler implementation from CPython [1].
3035
3036 This redirect handler solves two issues:
3037 - ensures redirect URL is always unicode under python 2
3038 - introduces support for experimental HTTP response status code
3039 308 Permanent Redirect [2] used by some sites [3]
3040
3041 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
3042 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
3043 3. https://github.com/ytdl-org/youtube-dl/issues/28768
3044 """
3045
3046 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
3047
3048 def redirect_request(self, req, fp, code, msg, headers, newurl):
3049 """Return a Request or None in response to a redirect.
3050
3051 This is called by the http_error_30x methods when a
3052 redirection response is received. If a redirection should
3053 take place, return a new Request to allow http_error_30x to
3054 perform the redirect. Otherwise, raise HTTPError if no-one
3055 else should try to handle this url. Return None if you can't
3056 but another Handler might.
3057 """
3058 m = req.get_method()
3059 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
3060 or code in (301, 302, 303) and m == "POST")):
3061 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
3062 # Strictly (according to RFC 2616), 301 or 302 in response to
3063 # a POST MUST NOT cause a redirection without confirmation
3064 # from the user (of urllib.request, in this case). In practice,
3065 # essentially all clients do redirect in this case, so we do
3066 # the same.
3067
3068 # On python 2 urlh.geturl() may sometimes return redirect URL
3069 # as byte string instead of unicode. This workaround allows
3070 # to force it always return unicode.
3071 if sys.version_info[0] < 3:
3072 newurl = compat_str(newurl)
3073
3074 # Be conciliant with URIs containing a space. This is mainly
3075 # redundant with the more complete encoding done in http_error_302(),
3076 # but it is kept for compatibility with other callers.
3077 newurl = newurl.replace(' ', '%20')
3078
3079 CONTENT_HEADERS = ("content-length", "content-type")
3080 # NB: don't use dict comprehension for python 2.6 compatibility
3081 newheaders = dict((k, v) for k, v in req.headers.items()
3082 if k.lower() not in CONTENT_HEADERS)
3083 return compat_urllib_request.Request(
3084 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3085 unverifiable=True)
fca6dba8
S
3086
3087
46f59e89
S
3088def extract_timezone(date_str):
3089 m = re.search(
f137e4c2 3090 r'''(?x)
3091 ^.{8,}? # >=8 char non-TZ prefix, if present
3092 (?P<tz>Z| # just the UTC Z, or
3093 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
3094 (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
3095 [ ]? # optional space
3096 (?P<sign>\+|-) # +/-
3097 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
3098 $)
3099 ''', date_str)
46f59e89
S
3100 if not m:
3101 timezone = datetime.timedelta()
3102 else:
3103 date_str = date_str[:-len(m.group('tz'))]
3104 if not m.group('sign'):
3105 timezone = datetime.timedelta()
3106 else:
3107 sign = 1 if m.group('sign') == '+' else -1
3108 timezone = datetime.timedelta(
3109 hours=sign * int(m.group('hours')),
3110 minutes=sign * int(m.group('minutes')))
3111 return timezone, date_str
3112
3113
08b38d54 3114def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
3115 """ Return a UNIX timestamp from the given date """
3116
3117 if date_str is None:
3118 return None
3119
52c3a6e4
S
3120 date_str = re.sub(r'\.[0-9]+', '', date_str)
3121
08b38d54 3122 if timezone is None:
46f59e89
S
3123 timezone, date_str = extract_timezone(date_str)
3124
52c3a6e4
S
3125 try:
3126 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3127 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3128 return calendar.timegm(dt.timetuple())
3129 except ValueError:
3130 pass
912b38b4
PH
3131
3132
46f59e89
S
3133def date_formats(day_first=True):
3134 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3135
3136
42bdd9d0 3137def unified_strdate(date_str, day_first=True):
bf50b038 3138 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
3139
3140 if date_str is None:
3141 return None
bf50b038 3142 upload_date = None
5f6a1245 3143 # Replace commas
026fcc04 3144 date_str = date_str.replace(',', ' ')
42bdd9d0 3145 # Remove AM/PM + timezone
9bb8e0a3 3146 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89 3147 _, date_str = extract_timezone(date_str)
42bdd9d0 3148
46f59e89 3149 for expression in date_formats(day_first):
bf50b038
JMF
3150 try:
3151 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 3152 except ValueError:
bf50b038 3153 pass
42393ce2
PH
3154 if upload_date is None:
3155 timetuple = email.utils.parsedate_tz(date_str)
3156 if timetuple:
c6b9cf05
S
3157 try:
3158 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3159 except ValueError:
3160 pass
6a750402
JMF
3161 if upload_date is not None:
3162 return compat_str(upload_date)
bf50b038 3163
5f6a1245 3164
46f59e89
S
3165def unified_timestamp(date_str, day_first=True):
3166 if date_str is None:
3167 return None
3168
2ae2ffda 3169 date_str = re.sub(r'[,|]', '', date_str)
46f59e89 3170
7dc2a74e 3171 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89
S
3172 timezone, date_str = extract_timezone(date_str)
3173
3174 # Remove AM/PM + timezone
3175 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3176
deef3195
S
3177 # Remove unrecognized timezones from ISO 8601 alike timestamps
3178 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3179 if m:
3180 date_str = date_str[:-len(m.group('tz'))]
3181
f226880c
PH
3182 # Python only supports microseconds, so remove nanoseconds
3183 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3184 if m:
3185 date_str = m.group(1)
3186
46f59e89
S
3187 for expression in date_formats(day_first):
3188 try:
7dc2a74e 3189 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
46f59e89
S
3190 return calendar.timegm(dt.timetuple())
3191 except ValueError:
3192 pass
3193 timetuple = email.utils.parsedate_tz(date_str)
3194 if timetuple:
7dc2a74e 3195 return calendar.timegm(timetuple) + pm_delta * 3600
46f59e89
S
3196
3197
28e614de 3198def determine_ext(url, default_ext='unknown_video'):
85750f89 3199 if url is None or '.' not in url:
f4776371 3200 return default_ext
9cb9a5df 3201 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
3202 if re.match(r'^[A-Za-z0-9]+$', guess):
3203 return guess
a7aaa398
S
3204 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3205 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 3206 return guess.rstrip('/')
73e79f2a 3207 else:
cbdbb766 3208 return default_ext
73e79f2a 3209
5f6a1245 3210
824fa511
S
3211def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3212 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
d4051a8e 3213
5f6a1245 3214
9e62f283 3215def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
37254abc
JMF
3216 """
3217 Return a datetime object from a string in the format YYYYMMDD or
9e62f283 3218 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3219
3220 format: string date format used to return datetime object from
3221 precision: round the time portion of a datetime object.
3222 auto|microsecond|second|minute|hour|day.
3223 auto: round to the unit provided in date_str (if applicable).
3224 """
3225 auto_precision = False
3226 if precision == 'auto':
3227 auto_precision = True
3228 precision = 'microsecond'
3229 today = datetime_round(datetime.datetime.now(), precision)
f8795e10 3230 if date_str in ('now', 'today'):
37254abc 3231 return today
f8795e10
PH
3232 if date_str == 'yesterday':
3233 return today - datetime.timedelta(days=1)
9e62f283 3234 match = re.match(
3235 r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3236 date_str)
37254abc 3237 if match is not None:
9e62f283 3238 start_time = datetime_from_str(match.group('start'), precision, format)
3239 time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
37254abc 3240 unit = match.group('unit')
9e62f283 3241 if unit == 'month' or unit == 'year':
3242 new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
37254abc 3243 unit = 'day'
9e62f283 3244 else:
3245 if unit == 'week':
3246 unit = 'day'
3247 time *= 7
3248 delta = datetime.timedelta(**{unit + 's': time})
3249 new_date = start_time + delta
3250 if auto_precision:
3251 return datetime_round(new_date, unit)
3252 return new_date
3253
3254 return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3255
3256
3257def date_from_str(date_str, format='%Y%m%d'):
3258 """
3259 Return a datetime object from a string in the format YYYYMMDD or
3260 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3261
3262 format: string date format used to return datetime object from
3263 """
3264 return datetime_from_str(date_str, precision='microsecond', format=format).date()
3265
3266
3267def datetime_add_months(dt, months):
3268 """Increment/Decrement a datetime object by months."""
3269 month = dt.month + months - 1
3270 year = dt.year + month // 12
3271 month = month % 12 + 1
3272 day = min(dt.day, calendar.monthrange(year, month)[1])
3273 return dt.replace(year, month, day)
3274
3275
3276def datetime_round(dt, precision='day'):
3277 """
3278 Round a datetime object's time to a specific precision
3279 """
3280 if precision == 'microsecond':
3281 return dt
3282
3283 unit_seconds = {
3284 'day': 86400,
3285 'hour': 3600,
3286 'minute': 60,
3287 'second': 1,
3288 }
3289 roundto = lambda x, n: ((x + n / 2) // n) * n
3290 timestamp = calendar.timegm(dt.timetuple())
3291 return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
5f6a1245
JW
3292
3293
e63fc1be 3294def hyphenate_date(date_str):
3295 """
3296 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3297 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3298 if match is not None:
3299 return '-'.join(match.groups())
3300 else:
3301 return date_str
3302
5f6a1245 3303
bd558525
JMF
3304class DateRange(object):
3305 """Represents a time interval between two dates"""
5f6a1245 3306
bd558525
JMF
3307 def __init__(self, start=None, end=None):
3308 """start and end must be strings in the format accepted by date"""
3309 if start is not None:
3310 self.start = date_from_str(start)
3311 else:
3312 self.start = datetime.datetime.min.date()
3313 if end is not None:
3314 self.end = date_from_str(end)
3315 else:
3316 self.end = datetime.datetime.max.date()
37254abc 3317 if self.start > self.end:
bd558525 3318 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 3319
bd558525
JMF
3320 @classmethod
3321 def day(cls, day):
3322 """Returns a range that only contains the given day"""
5f6a1245
JW
3323 return cls(day, day)
3324
bd558525
JMF
3325 def __contains__(self, date):
3326 """Check if the date is in the range"""
37254abc
JMF
3327 if not isinstance(date, datetime.date):
3328 date = date_from_str(date)
3329 return self.start <= date <= self.end
5f6a1245 3330
bd558525 3331 def __str__(self):
5f6a1245 3332 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
3333
3334
3335def platform_name():
3336 """ Returns the platform name as a compat_str """
3337 res = platform.platform()
3338 if isinstance(res, bytes):
3339 res = res.decode(preferredencoding())
3340
3341 assert isinstance(res, compat_str)
3342 return res
c257baff
PH
3343
3344
49fa4d9a
N
3345def get_windows_version():
3346 ''' Get Windows version. None if it's not running on Windows '''
3347 if compat_os_name == 'nt':
3348 return version_tuple(platform.win32_ver()[1])
3349 else:
3350 return None
3351
3352
b58ddb32
PH
3353def _windows_write_string(s, out):
3354 """ Returns True if the string was written using special methods,
3355 False if it has yet to be written out."""
3356 # Adapted from http://stackoverflow.com/a/3259271/35070
3357
3358 import ctypes
3359 import ctypes.wintypes
3360
3361 WIN_OUTPUT_IDS = {
3362 1: -11,
3363 2: -12,
3364 }
3365
a383a98a
PH
3366 try:
3367 fileno = out.fileno()
3368 except AttributeError:
3369 # If the output stream doesn't have a fileno, it's virtual
3370 return False
aa42e873
PH
3371 except io.UnsupportedOperation:
3372 # Some strange Windows pseudo files?
3373 return False
b58ddb32
PH
3374 if fileno not in WIN_OUTPUT_IDS:
3375 return False
3376
d7cd9a9e 3377 GetStdHandle = compat_ctypes_WINFUNCTYPE(
b58ddb32 3378 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
d7cd9a9e 3379 ('GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
3380 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3381
d7cd9a9e 3382 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3383 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3384 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
d7cd9a9e 3385 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
3386 written = ctypes.wintypes.DWORD(0)
3387
d7cd9a9e 3388 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
3389 FILE_TYPE_CHAR = 0x0002
3390 FILE_TYPE_REMOTE = 0x8000
d7cd9a9e 3391 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3392 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3393 ctypes.POINTER(ctypes.wintypes.DWORD))(
d7cd9a9e 3394 ('GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
3395 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3396
3397 def not_a_console(handle):
3398 if handle == INVALID_HANDLE_VALUE or handle is None:
3399 return True
3089bc74
S
3400 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3401 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
3402
3403 if not_a_console(h):
3404 return False
3405
d1b9c912
PH
3406 def next_nonbmp_pos(s):
3407 try:
3408 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3409 except StopIteration:
3410 return len(s)
3411
3412 while s:
3413 count = min(next_nonbmp_pos(s), 1024)
3414
b58ddb32 3415 ret = WriteConsoleW(
d1b9c912 3416 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
3417 if ret == 0:
3418 raise OSError('Failed to write string')
d1b9c912
PH
3419 if not count: # We just wrote a non-BMP character
3420 assert written.value == 2
3421 s = s[1:]
3422 else:
3423 assert written.value > 0
3424 s = s[written.value:]
b58ddb32
PH
3425 return True
3426
3427
734f90bb 3428def write_string(s, out=None, encoding=None):
7459e3a2
PH
3429 if out is None:
3430 out = sys.stderr
8bf48f23 3431 assert type(s) == compat_str
7459e3a2 3432
b58ddb32
PH
3433 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3434 if _windows_write_string(s, out):
3435 return
3436
3089bc74
S
3437 if ('b' in getattr(out, 'mode', '')
3438 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
3439 byt = s.encode(encoding or preferredencoding(), 'ignore')
3440 out.write(byt)
3441 elif hasattr(out, 'buffer'):
3442 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3443 byt = s.encode(enc, 'ignore')
3444 out.buffer.write(byt)
3445 else:
8bf48f23 3446 out.write(s)
7459e3a2
PH
3447 out.flush()
3448
3449
48ea9cea
PH
3450def bytes_to_intlist(bs):
3451 if not bs:
3452 return []
3453 if isinstance(bs[0], int): # Python 3
3454 return list(bs)
3455 else:
3456 return [ord(c) for c in bs]
3457
c257baff 3458
cba892fa 3459def intlist_to_bytes(xs):
3460 if not xs:
3461 return b''
edaa23f8 3462 return compat_struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
3463
3464
c1c9a79c
PH
3465# Cross-platform file locking
3466if sys.platform == 'win32':
3467 import ctypes.wintypes
3468 import msvcrt
3469
3470 class OVERLAPPED(ctypes.Structure):
3471 _fields_ = [
3472 ('Internal', ctypes.wintypes.LPVOID),
3473 ('InternalHigh', ctypes.wintypes.LPVOID),
3474 ('Offset', ctypes.wintypes.DWORD),
3475 ('OffsetHigh', ctypes.wintypes.DWORD),
3476 ('hEvent', ctypes.wintypes.HANDLE),
3477 ]
3478
3479 kernel32 = ctypes.windll.kernel32
3480 LockFileEx = kernel32.LockFileEx
3481 LockFileEx.argtypes = [
3482 ctypes.wintypes.HANDLE, # hFile
3483 ctypes.wintypes.DWORD, # dwFlags
3484 ctypes.wintypes.DWORD, # dwReserved
3485 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3486 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3487 ctypes.POINTER(OVERLAPPED) # Overlapped
3488 ]
3489 LockFileEx.restype = ctypes.wintypes.BOOL
3490 UnlockFileEx = kernel32.UnlockFileEx
3491 UnlockFileEx.argtypes = [
3492 ctypes.wintypes.HANDLE, # hFile
3493 ctypes.wintypes.DWORD, # dwReserved
3494 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3495 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3496 ctypes.POINTER(OVERLAPPED) # Overlapped
3497 ]
3498 UnlockFileEx.restype = ctypes.wintypes.BOOL
3499 whole_low = 0xffffffff
3500 whole_high = 0x7fffffff
3501
3502 def _lock_file(f, exclusive):
3503 overlapped = OVERLAPPED()
3504 overlapped.Offset = 0
3505 overlapped.OffsetHigh = 0
3506 overlapped.hEvent = 0
3507 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3508 handle = msvcrt.get_osfhandle(f.fileno())
3509 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3510 whole_low, whole_high, f._lock_file_overlapped_p):
3511 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3512
3513 def _unlock_file(f):
3514 assert f._lock_file_overlapped_p
3515 handle = msvcrt.get_osfhandle(f.fileno())
3516 if not UnlockFileEx(handle, 0,
3517 whole_low, whole_high, f._lock_file_overlapped_p):
3518 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3519
3520else:
399a76e6
YCH
3521 # Some platforms, such as Jython, is missing fcntl
3522 try:
3523 import fcntl
c1c9a79c 3524
399a76e6
YCH
3525 def _lock_file(f, exclusive):
3526 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 3527
399a76e6
YCH
3528 def _unlock_file(f):
3529 fcntl.flock(f, fcntl.LOCK_UN)
3530 except ImportError:
3531 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3532
3533 def _lock_file(f, exclusive):
3534 raise IOError(UNSUPPORTED_MSG)
3535
3536 def _unlock_file(f):
3537 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
3538
3539
3540class locked_file(object):
3541 def __init__(self, filename, mode, encoding=None):
3542 assert mode in ['r', 'a', 'w']
3543 self.f = io.open(filename, mode, encoding=encoding)
3544 self.mode = mode
3545
3546 def __enter__(self):
3547 exclusive = self.mode != 'r'
3548 try:
3549 _lock_file(self.f, exclusive)
3550 except IOError:
3551 self.f.close()
3552 raise
3553 return self
3554
3555 def __exit__(self, etype, value, traceback):
3556 try:
3557 _unlock_file(self.f)
3558 finally:
3559 self.f.close()
3560
3561 def __iter__(self):
3562 return iter(self.f)
3563
3564 def write(self, *args):
3565 return self.f.write(*args)
3566
3567 def read(self, *args):
3568 return self.f.read(*args)
4eb7f1d1
JMF
3569
3570
4644ac55
S
3571def get_filesystem_encoding():
3572 encoding = sys.getfilesystemencoding()
3573 return encoding if encoding is not None else 'utf-8'
3574
3575
4eb7f1d1 3576def shell_quote(args):
a6a173c2 3577 quoted_args = []
4644ac55 3578 encoding = get_filesystem_encoding()
a6a173c2
JMF
3579 for a in args:
3580 if isinstance(a, bytes):
3581 # We may get a filename encoded with 'encodeFilename'
3582 a = a.decode(encoding)
aefce8e6 3583 quoted_args.append(compat_shlex_quote(a))
28e614de 3584 return ' '.join(quoted_args)
9d4660ca
PH
3585
3586
3587def smuggle_url(url, data):
3588 """ Pass additional data in a URL for internal use. """
3589
81953d1a
RA
3590 url, idata = unsmuggle_url(url, {})
3591 data.update(idata)
15707c7e 3592 sdata = compat_urllib_parse_urlencode(
28e614de
PH
3593 {'__youtubedl_smuggle': json.dumps(data)})
3594 return url + '#' + sdata
9d4660ca
PH
3595
3596
79f82953 3597def unsmuggle_url(smug_url, default=None):
83e865a3 3598 if '#__youtubedl_smuggle' not in smug_url:
79f82953 3599 return smug_url, default
28e614de
PH
3600 url, _, sdata = smug_url.rpartition('#')
3601 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
3602 data = json.loads(jsond)
3603 return url, data
02dbf93f
PH
3604
3605
02dbf93f
PH
3606def format_bytes(bytes):
3607 if bytes is None:
28e614de 3608 return 'N/A'
02dbf93f
PH
3609 if type(bytes) is str:
3610 bytes = float(bytes)
3611 if bytes == 0.0:
3612 exponent = 0
3613 else:
3614 exponent = int(math.log(bytes, 1024.0))
28e614de 3615 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 3616 converted = float(bytes) / float(1024 ** exponent)
28e614de 3617 return '%.2f%s' % (converted, suffix)
f53c966a 3618
1c088fa8 3619
fb47597b
S
3620def lookup_unit_table(unit_table, s):
3621 units_re = '|'.join(re.escape(u) for u in unit_table)
3622 m = re.match(
782b1b5b 3623 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
fb47597b
S
3624 if not m:
3625 return None
3626 num_str = m.group('num').replace(',', '.')
3627 mult = unit_table[m.group('unit')]
3628 return int(float(num_str) * mult)
3629
3630
be64b5b0
PH
3631def parse_filesize(s):
3632 if s is None:
3633 return None
3634
dfb1b146 3635 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
3636 # but we support those too
3637 _UNIT_TABLE = {
3638 'B': 1,
3639 'b': 1,
70852b47 3640 'bytes': 1,
be64b5b0
PH
3641 'KiB': 1024,
3642 'KB': 1000,
3643 'kB': 1024,
3644 'Kb': 1000,
13585d76 3645 'kb': 1000,
70852b47
YCH
3646 'kilobytes': 1000,
3647 'kibibytes': 1024,
be64b5b0
PH
3648 'MiB': 1024 ** 2,
3649 'MB': 1000 ** 2,
3650 'mB': 1024 ** 2,
3651 'Mb': 1000 ** 2,
13585d76 3652 'mb': 1000 ** 2,
70852b47
YCH
3653 'megabytes': 1000 ** 2,
3654 'mebibytes': 1024 ** 2,
be64b5b0
PH
3655 'GiB': 1024 ** 3,
3656 'GB': 1000 ** 3,
3657 'gB': 1024 ** 3,
3658 'Gb': 1000 ** 3,
13585d76 3659 'gb': 1000 ** 3,
70852b47
YCH
3660 'gigabytes': 1000 ** 3,
3661 'gibibytes': 1024 ** 3,
be64b5b0
PH
3662 'TiB': 1024 ** 4,
3663 'TB': 1000 ** 4,
3664 'tB': 1024 ** 4,
3665 'Tb': 1000 ** 4,
13585d76 3666 'tb': 1000 ** 4,
70852b47
YCH
3667 'terabytes': 1000 ** 4,
3668 'tebibytes': 1024 ** 4,
be64b5b0
PH
3669 'PiB': 1024 ** 5,
3670 'PB': 1000 ** 5,
3671 'pB': 1024 ** 5,
3672 'Pb': 1000 ** 5,
13585d76 3673 'pb': 1000 ** 5,
70852b47
YCH
3674 'petabytes': 1000 ** 5,
3675 'pebibytes': 1024 ** 5,
be64b5b0
PH
3676 'EiB': 1024 ** 6,
3677 'EB': 1000 ** 6,
3678 'eB': 1024 ** 6,
3679 'Eb': 1000 ** 6,
13585d76 3680 'eb': 1000 ** 6,
70852b47
YCH
3681 'exabytes': 1000 ** 6,
3682 'exbibytes': 1024 ** 6,
be64b5b0
PH
3683 'ZiB': 1024 ** 7,
3684 'ZB': 1000 ** 7,
3685 'zB': 1024 ** 7,
3686 'Zb': 1000 ** 7,
13585d76 3687 'zb': 1000 ** 7,
70852b47
YCH
3688 'zettabytes': 1000 ** 7,
3689 'zebibytes': 1024 ** 7,
be64b5b0
PH
3690 'YiB': 1024 ** 8,
3691 'YB': 1000 ** 8,
3692 'yB': 1024 ** 8,
3693 'Yb': 1000 ** 8,
13585d76 3694 'yb': 1000 ** 8,
70852b47
YCH
3695 'yottabytes': 1000 ** 8,
3696 'yobibytes': 1024 ** 8,
be64b5b0
PH
3697 }
3698
fb47597b
S
3699 return lookup_unit_table(_UNIT_TABLE, s)
3700
3701
3702def parse_count(s):
3703 if s is None:
be64b5b0
PH
3704 return None
3705
fb47597b
S
3706 s = s.strip()
3707
3708 if re.match(r'^[\d,.]+$', s):
3709 return str_to_int(s)
3710
3711 _UNIT_TABLE = {
3712 'k': 1000,
3713 'K': 1000,
3714 'm': 1000 ** 2,
3715 'M': 1000 ** 2,
3716 'kk': 1000 ** 2,
3717 'KK': 1000 ** 2,
3718 }
be64b5b0 3719
fb47597b 3720 return lookup_unit_table(_UNIT_TABLE, s)
be64b5b0 3721
2f7ae819 3722
b871d7e9
S
3723def parse_resolution(s):
3724 if s is None:
3725 return {}
3726
17ec8bcf 3727 mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
b871d7e9
S
3728 if mobj:
3729 return {
3730 'width': int(mobj.group('w')),
3731 'height': int(mobj.group('h')),
3732 }
3733
17ec8bcf 3734 mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
b871d7e9
S
3735 if mobj:
3736 return {'height': int(mobj.group(1))}
3737
3738 mobj = re.search(r'\b([48])[kK]\b', s)
3739 if mobj:
3740 return {'height': int(mobj.group(1)) * 540}
3741
3742 return {}
3743
3744
0dc41787
S
3745def parse_bitrate(s):
3746 if not isinstance(s, compat_str):
3747 return
3748 mobj = re.search(r'\b(\d+)\s*kbps', s)
3749 if mobj:
3750 return int(mobj.group(1))
3751
3752
a942d6cb 3753def month_by_name(name, lang='en'):
caefb1de
PH
3754 """ Return the number of a month by (locale-independently) English name """
3755
f6717dec 3756 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb 3757
caefb1de 3758 try:
f6717dec 3759 return month_names.index(name) + 1
7105440c
YCH
3760 except ValueError:
3761 return None
3762
3763
3764def month_by_abbreviation(abbrev):
3765 """ Return the number of a month by (locale-independently) English
3766 abbreviations """
3767
3768 try:
3769 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
3770 except ValueError:
3771 return None
18258362
JMF
3772
3773
5aafe895 3774def fix_xml_ampersands(xml_str):
18258362 3775 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
3776 return re.sub(
3777 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 3778 '&amp;',
5aafe895 3779 xml_str)
e3946f98
PH
3780
3781
3782def setproctitle(title):
8bf48f23 3783 assert isinstance(title, compat_str)
c1c05c67
YCH
3784
3785 # ctypes in Jython is not complete
3786 # http://bugs.jython.org/issue2148
3787 if sys.platform.startswith('java'):
3788 return
3789
e3946f98 3790 try:
611c1dd9 3791 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
3792 except OSError:
3793 return
2f49bcd6
RC
3794 except TypeError:
3795 # LoadLibrary in Windows Python 2.7.13 only expects
3796 # a bytestring, but since unicode_literals turns
3797 # every string into a unicode string, it fails.
3798 return
6eefe533
PH
3799 title_bytes = title.encode('utf-8')
3800 buf = ctypes.create_string_buffer(len(title_bytes))
3801 buf.value = title_bytes
e3946f98 3802 try:
6eefe533 3803 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
3804 except AttributeError:
3805 return # Strange libc, just skip this
d7dda168
PH
3806
3807
3808def remove_start(s, start):
46bc9b7d 3809 return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174
PH
3810
3811
2b9faf55 3812def remove_end(s, end):
46bc9b7d 3813 return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55
PH
3814
3815
31b2051e
S
3816def remove_quotes(s):
3817 if s is None or len(s) < 2:
3818 return s
3819 for quote in ('"', "'", ):
3820 if s[0] == quote and s[-1] == quote:
3821 return s[1:-1]
3822 return s
3823
3824
b6e0c7d2
U
3825def get_domain(url):
3826 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3827 return domain.group('domain') if domain else None
3828
3829
29eb5174 3830def url_basename(url):
9b8aaeed 3831 path = compat_urlparse.urlparse(url).path
28e614de 3832 return path.strip('/').split('/')[-1]
aa94a6d3
PH
3833
3834
02dc0a36
S
3835def base_url(url):
3836 return re.match(r'https?://[^?#&]+/', url).group()
3837
3838
e34c3361 3839def urljoin(base, path):
4b5de77b
S
3840 if isinstance(path, bytes):
3841 path = path.decode('utf-8')
e34c3361
S
3842 if not isinstance(path, compat_str) or not path:
3843 return None
fad4ceb5 3844 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
e34c3361 3845 return path
4b5de77b
S
3846 if isinstance(base, bytes):
3847 base = base.decode('utf-8')
3848 if not isinstance(base, compat_str) or not re.match(
3849 r'^(?:https?:)?//', base):
e34c3361
S
3850 return None
3851 return compat_urlparse.urljoin(base, path)
3852
3853
aa94a6d3
PH
3854class HEADRequest(compat_urllib_request.Request):
3855 def get_method(self):
611c1dd9 3856 return 'HEAD'
7217e148
PH
3857
3858
95cf60e8
S
3859class PUTRequest(compat_urllib_request.Request):
3860 def get_method(self):
3861 return 'PUT'
3862
3863
9732d77e 3864def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
3865 if get_attr:
3866 if v is not None:
3867 v = getattr(v, get_attr, None)
9572013d
PH
3868 if v == '':
3869 v = None
1812afb7
S
3870 if v is None:
3871 return default
3872 try:
3873 return int(v) * invscale // scale
31c49255 3874 except (ValueError, TypeError, OverflowError):
af98f8ff 3875 return default
9732d77e 3876
9572013d 3877
40a90862
JMF
3878def str_or_none(v, default=None):
3879 return default if v is None else compat_str(v)
3880
9732d77e
PH
3881
3882def str_to_int(int_str):
48d4681e 3883 """ A more relaxed version of int_or_none """
42db58ec 3884 if isinstance(int_str, compat_integer_types):
348c6bf1 3885 return int_str
42db58ec
S
3886 elif isinstance(int_str, compat_str):
3887 int_str = re.sub(r'[,\.\+]', '', int_str)
3888 return int_or_none(int_str)
608d11f5
PH
3889
3890
9732d77e 3891def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
3892 if v is None:
3893 return default
3894 try:
3895 return float(v) * invscale / scale
5e1271c5 3896 except (ValueError, TypeError):
caf80631 3897 return default
43f775e4
PH
3898
3899
c7e327c4
S
3900def bool_or_none(v, default=None):
3901 return v if isinstance(v, bool) else default
3902
3903
53cd37ba
S
3904def strip_or_none(v, default=None):
3905 return v.strip() if isinstance(v, compat_str) else default
b72b4431
S
3906
3907
af03000a
S
3908def url_or_none(url):
3909 if not url or not isinstance(url, compat_str):
3910 return None
3911 url = url.strip()
29f7c58a 3912 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
af03000a
S
3913
3914
e29663c6 3915def strftime_or_none(timestamp, date_format, default=None):
3916 datetime_object = None
3917 try:
3918 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3919 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3920 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3921 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3922 return datetime_object.strftime(date_format)
3923 except (ValueError, TypeError, AttributeError):
3924 return default
3925
3926
608d11f5 3927def parse_duration(s):
8f9312c3 3928 if not isinstance(s, compat_basestring):
608d11f5
PH
3929 return None
3930
ca7b3246
S
3931 s = s.strip()
3932
acaff495 3933 days, hours, mins, secs, ms = [None] * 5
15846398 3934 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
acaff495 3935 if m:
3936 days, hours, mins, secs, ms = m.groups()
3937 else:
3938 m = re.match(
056653bb
S
3939 r'''(?ix)(?:P?
3940 (?:
3941 [0-9]+\s*y(?:ears?)?\s*
3942 )?
3943 (?:
3944 [0-9]+\s*m(?:onths?)?\s*
3945 )?
3946 (?:
3947 [0-9]+\s*w(?:eeks?)?\s*
3948 )?
8f4b58d7 3949 (?:
acaff495 3950 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
8f4b58d7 3951 )?
056653bb 3952 T)?
acaff495 3953 (?:
3954 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3955 )?
3956 (?:
3957 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3958 )?
3959 (?:
3960 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
15846398 3961 )?Z?$''', s)
acaff495 3962 if m:
3963 days, hours, mins, secs, ms = m.groups()
3964 else:
15846398 3965 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
acaff495 3966 if m:
3967 hours, mins = m.groups()
3968 else:
3969 return None
3970
3971 duration = 0
3972 if secs:
3973 duration += float(secs)
3974 if mins:
3975 duration += float(mins) * 60
3976 if hours:
3977 duration += float(hours) * 60 * 60
3978 if days:
3979 duration += float(days) * 24 * 60 * 60
3980 if ms:
3981 duration += float(ms)
3982 return duration
91d7d0b3
JMF
3983
3984
e65e4c88 3985def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 3986 name, real_ext = os.path.splitext(filename)
e65e4c88
S
3987 return (
3988 '{0}.{1}{2}'.format(name, ext, real_ext)
3989 if not expected_real_ext or real_ext[1:] == expected_real_ext
3990 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
3991
3992
b3ed15b7
S
3993def replace_extension(filename, ext, expected_real_ext=None):
3994 name, real_ext = os.path.splitext(filename)
3995 return '{0}.{1}'.format(
3996 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3997 ext)
3998
3999
d70ad093
PH
4000def check_executable(exe, args=[]):
4001 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
4002 args can be a list of arguments for a short output (like -version) """
4003 try:
d3c93ec2 4004 Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate_or_kill()
d70ad093
PH
4005 except OSError:
4006 return False
4007 return exe
b7ab0590
PH
4008
4009
9af98e17 4010def _get_exe_version_output(exe, args):
95807118 4011 try:
b64d04c1 4012 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
7a5c1cfe 4013 # SIGTTOU if yt-dlp is run in the background.
067aa17e 4014 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
d3c93ec2 4015 out, _ = Popen(
4016 [encodeArgument(exe)] + args, stdin=subprocess.PIPE,
4017 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate_or_kill()
95807118
PH
4018 except OSError:
4019 return False
cae97f65
PH
4020 if isinstance(out, bytes): # Python 2.x
4021 out = out.decode('ascii', 'ignore')
9af98e17 4022 return out
cae97f65
PH
4023
4024
4025def detect_exe_version(output, version_re=None, unrecognized='present'):
4026 assert isinstance(output, compat_str)
4027 if version_re is None:
4028 version_re = r'version\s+([-0-9._a-zA-Z]+)'
4029 m = re.search(version_re, output)
95807118
PH
4030 if m:
4031 return m.group(1)
4032 else:
4033 return unrecognized
4034
4035
9af98e17 4036def get_exe_version(exe, args=['--version'],
4037 version_re=None, unrecognized='present'):
4038 """ Returns the version of the specified executable,
4039 or False if the executable is not present """
4040 out = _get_exe_version_output(exe, args)
4041 return detect_exe_version(out, version_re, unrecognized) if out else False
4042
4043
cb89cfc1 4044class LazyList(collections.abc.Sequence):
483336e7 4045 ''' Lazy immutable list from an iterable
4046 Note that slices of a LazyList are lists and not LazyList'''
4047
8e5fecc8 4048 class IndexError(IndexError):
4049 pass
4050
483336e7 4051 def __init__(self, iterable):
4052 self.__iterable = iter(iterable)
4053 self.__cache = []
28419ca2 4054 self.__reversed = False
483336e7 4055
4056 def __iter__(self):
28419ca2 4057 if self.__reversed:
4058 # We need to consume the entire iterable to iterate in reverse
981052c9 4059 yield from self.exhaust()
28419ca2 4060 return
4061 yield from self.__cache
483336e7 4062 for item in self.__iterable:
4063 self.__cache.append(item)
4064 yield item
4065
981052c9 4066 def __exhaust(self):
483336e7 4067 self.__cache.extend(self.__iterable)
9f1a1c36 4068 # Discard the emptied iterable to make it pickle-able
4069 self.__iterable = []
28419ca2 4070 return self.__cache
4071
981052c9 4072 def exhaust(self):
4073 ''' Evaluate the entire iterable '''
4074 return self.__exhaust()[::-1 if self.__reversed else 1]
4075
28419ca2 4076 @staticmethod
981052c9 4077 def __reverse_index(x):
e0f2b4b4 4078 return None if x is None else -(x + 1)
483336e7 4079
4080 def __getitem__(self, idx):
4081 if isinstance(idx, slice):
28419ca2 4082 if self.__reversed:
e0f2b4b4 4083 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
4084 start, stop, step = idx.start, idx.stop, idx.step or 1
483336e7 4085 elif isinstance(idx, int):
28419ca2 4086 if self.__reversed:
981052c9 4087 idx = self.__reverse_index(idx)
e0f2b4b4 4088 start, stop, step = idx, idx, 0
483336e7 4089 else:
4090 raise TypeError('indices must be integers or slices')
e0f2b4b4 4091 if ((start or 0) < 0 or (stop or 0) < 0
4092 or (start is None and step < 0)
4093 or (stop is None and step > 0)):
483336e7 4094 # We need to consume the entire iterable to be able to slice from the end
4095 # Obviously, never use this with infinite iterables
8e5fecc8 4096 self.__exhaust()
4097 try:
4098 return self.__cache[idx]
4099 except IndexError as e:
4100 raise self.IndexError(e) from e
e0f2b4b4 4101 n = max(start or 0, stop or 0) - len(self.__cache) + 1
28419ca2 4102 if n > 0:
4103 self.__cache.extend(itertools.islice(self.__iterable, n))
8e5fecc8 4104 try:
4105 return self.__cache[idx]
4106 except IndexError as e:
4107 raise self.IndexError(e) from e
483336e7 4108
4109 def __bool__(self):
4110 try:
28419ca2 4111 self[-1] if self.__reversed else self[0]
8e5fecc8 4112 except self.IndexError:
483336e7 4113 return False
4114 return True
4115
4116 def __len__(self):
8e5fecc8 4117 self.__exhaust()
483336e7 4118 return len(self.__cache)
4119
981052c9 4120 def reverse(self):
28419ca2 4121 self.__reversed = not self.__reversed
4122 return self
4123
4124 def __repr__(self):
4125 # repr and str should mimic a list. So we exhaust the iterable
4126 return repr(self.exhaust())
4127
4128 def __str__(self):
4129 return repr(self.exhaust())
4130
483336e7 4131
7be9ccff 4132class PagedList:
dd26ced1
PH
4133 def __len__(self):
4134 # This is only useful for tests
4135 return len(self.getslice())
4136
7be9ccff 4137 def __init__(self, pagefunc, pagesize, use_cache=True):
4138 self._pagefunc = pagefunc
4139 self._pagesize = pagesize
4140 self._use_cache = use_cache
4141 self._cache = {}
4142
4143 def getpage(self, pagenum):
4144 page_results = self._cache.get(pagenum) or list(self._pagefunc(pagenum))
4145 if self._use_cache:
4146 self._cache[pagenum] = page_results
4147 return page_results
4148
4149 def getslice(self, start=0, end=None):
4150 return list(self._getslice(start, end))
4151
4152 def _getslice(self, start, end):
55575225 4153 raise NotImplementedError('This method must be implemented by subclasses')
4154
4155 def __getitem__(self, idx):
7be9ccff 4156 # NOTE: cache must be enabled if this is used
55575225 4157 if not isinstance(idx, int) or idx < 0:
4158 raise TypeError('indices must be non-negative integers')
4159 entries = self.getslice(idx, idx + 1)
4160 return entries[0] if entries else None
4161
9c44d242
PH
4162
4163class OnDemandPagedList(PagedList):
7be9ccff 4164 def _getslice(self, start, end):
b7ab0590
PH
4165 for pagenum in itertools.count(start // self._pagesize):
4166 firstid = pagenum * self._pagesize
4167 nextfirstid = pagenum * self._pagesize + self._pagesize
4168 if start >= nextfirstid:
4169 continue
4170
b7ab0590
PH
4171 startv = (
4172 start % self._pagesize
4173 if firstid <= start < nextfirstid
4174 else 0)
b7ab0590
PH
4175 endv = (
4176 ((end - 1) % self._pagesize) + 1
4177 if (end is not None and firstid <= end <= nextfirstid)
4178 else None)
4179
7be9ccff 4180 page_results = self.getpage(pagenum)
b7ab0590
PH
4181 if startv != 0 or endv is not None:
4182 page_results = page_results[startv:endv]
7be9ccff 4183 yield from page_results
b7ab0590
PH
4184
4185 # A little optimization - if current page is not "full", ie. does
4186 # not contain page_size videos then we can assume that this page
4187 # is the last one - there are no more ids on further pages -
4188 # i.e. no need to query again.
4189 if len(page_results) + startv < self._pagesize:
4190 break
4191
4192 # If we got the whole page, but the next page is not interesting,
4193 # break out early as well
4194 if end == nextfirstid:
4195 break
81c2f20b
PH
4196
4197
9c44d242
PH
4198class InAdvancePagedList(PagedList):
4199 def __init__(self, pagefunc, pagecount, pagesize):
9c44d242 4200 self._pagecount = pagecount
7be9ccff 4201 PagedList.__init__(self, pagefunc, pagesize, True)
9c44d242 4202
7be9ccff 4203 def _getslice(self, start, end):
9c44d242
PH
4204 start_page = start // self._pagesize
4205 end_page = (
4206 self._pagecount if end is None else (end // self._pagesize + 1))
4207 skip_elems = start - start_page * self._pagesize
4208 only_more = None if end is None else end - start
4209 for pagenum in range(start_page, end_page):
7be9ccff 4210 page_results = self.getpage(pagenum)
9c44d242 4211 if skip_elems:
7be9ccff 4212 page_results = page_results[skip_elems:]
9c44d242
PH
4213 skip_elems = None
4214 if only_more is not None:
7be9ccff 4215 if len(page_results) < only_more:
4216 only_more -= len(page_results)
9c44d242 4217 else:
7be9ccff 4218 yield from page_results[:only_more]
9c44d242 4219 break
7be9ccff 4220 yield from page_results
9c44d242
PH
4221
4222
81c2f20b 4223def uppercase_escape(s):
676eb3f2 4224 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 4225 return re.sub(
a612753d 4226 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
4227 lambda m: unicode_escape(m.group(0))[0],
4228 s)
0fe2ff78
YCH
4229
4230
4231def lowercase_escape(s):
4232 unicode_escape = codecs.getdecoder('unicode_escape')
4233 return re.sub(
4234 r'\\u[0-9a-fA-F]{4}',
4235 lambda m: unicode_escape(m.group(0))[0],
4236 s)
b53466e1 4237
d05cfe06
S
4238
4239def escape_rfc3986(s):
4240 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 4241 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 4242 s = s.encode('utf-8')
ecc0c5ee 4243 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
4244
4245
4246def escape_url(url):
4247 """Escape URL as suggested by RFC 3986"""
4248 url_parsed = compat_urllib_parse_urlparse(url)
4249 return url_parsed._replace(
efbed08d 4250 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
d05cfe06
S
4251 path=escape_rfc3986(url_parsed.path),
4252 params=escape_rfc3986(url_parsed.params),
4253 query=escape_rfc3986(url_parsed.query),
4254 fragment=escape_rfc3986(url_parsed.fragment)
4255 ).geturl()
4256
62e609ab 4257
4dfbf869 4258def parse_qs(url):
4259 return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4260
4261
62e609ab
PH
4262def read_batch_urls(batch_fd):
4263 def fixup(url):
4264 if not isinstance(url, compat_str):
4265 url = url.decode('utf-8', 'replace')
8c04f0be 4266 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4267 for bom in BOM_UTF8:
4268 if url.startswith(bom):
4269 url = url[len(bom):]
4270 url = url.lstrip()
4271 if not url or url.startswith(('#', ';', ']')):
62e609ab 4272 return False
8c04f0be 4273 # "#" cannot be stripped out since it is part of the URI
4274 # However, it can be safely stipped out if follwing a whitespace
4275 return re.split(r'\s#', url, 1)[0].rstrip()
62e609ab
PH
4276
4277 with contextlib.closing(batch_fd) as fd:
4278 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
4279
4280
4281def urlencode_postdata(*args, **kargs):
15707c7e 4282 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
4283
4284
38f9ef31 4285def update_url_query(url, query):
cacd9966
YCH
4286 if not query:
4287 return url
38f9ef31 4288 parsed_url = compat_urlparse.urlparse(url)
4289 qs = compat_parse_qs(parsed_url.query)
4290 qs.update(query)
4291 return compat_urlparse.urlunparse(parsed_url._replace(
15707c7e 4292 query=compat_urllib_parse_urlencode(qs, True)))
16392824 4293
8e60dc75 4294
ed0291d1
S
4295def update_Request(req, url=None, data=None, headers={}, query={}):
4296 req_headers = req.headers.copy()
4297 req_headers.update(headers)
4298 req_data = data or req.data
4299 req_url = update_url_query(url or req.get_full_url(), query)
95cf60e8
S
4300 req_get_method = req.get_method()
4301 if req_get_method == 'HEAD':
4302 req_type = HEADRequest
4303 elif req_get_method == 'PUT':
4304 req_type = PUTRequest
4305 else:
4306 req_type = compat_urllib_request.Request
ed0291d1
S
4307 new_req = req_type(
4308 req_url, data=req_data, headers=req_headers,
4309 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4310 if hasattr(req, 'timeout'):
4311 new_req.timeout = req.timeout
4312 return new_req
4313
4314
10c87c15 4315def _multipart_encode_impl(data, boundary):
0c265486
YCH
4316 content_type = 'multipart/form-data; boundary=%s' % boundary
4317
4318 out = b''
4319 for k, v in data.items():
4320 out += b'--' + boundary.encode('ascii') + b'\r\n'
4321 if isinstance(k, compat_str):
4322 k = k.encode('utf-8')
4323 if isinstance(v, compat_str):
4324 v = v.encode('utf-8')
4325 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4326 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
b2ad479d 4327 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
0c265486
YCH
4328 if boundary.encode('ascii') in content:
4329 raise ValueError('Boundary overlaps with data')
4330 out += content
4331
4332 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4333
4334 return out, content_type
4335
4336
4337def multipart_encode(data, boundary=None):
4338 '''
4339 Encode a dict to RFC 7578-compliant form-data
4340
4341 data:
4342 A dict where keys and values can be either Unicode or bytes-like
4343 objects.
4344 boundary:
4345 If specified a Unicode object, it's used as the boundary. Otherwise
4346 a random boundary is generated.
4347
4348 Reference: https://tools.ietf.org/html/rfc7578
4349 '''
4350 has_specified_boundary = boundary is not None
4351
4352 while True:
4353 if boundary is None:
4354 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4355
4356 try:
10c87c15 4357 out, content_type = _multipart_encode_impl(data, boundary)
0c265486
YCH
4358 break
4359 except ValueError:
4360 if has_specified_boundary:
4361 raise
4362 boundary = None
4363
4364 return out, content_type
4365
4366
86296ad2 4367def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
4368 if isinstance(key_or_keys, (list, tuple)):
4369 for key in key_or_keys:
86296ad2
S
4370 if key not in d or d[key] is None or skip_false_values and not d[key]:
4371 continue
4372 return d[key]
cbecc9b9
S
4373 return default
4374 return d.get(key_or_keys, default)
4375
4376
329ca3be 4377def try_get(src, getter, expected_type=None):
6606817a 4378 for get in variadic(getter):
a32a9a7e
S
4379 try:
4380 v = get(src)
4381 except (AttributeError, KeyError, TypeError, IndexError):
4382 pass
4383 else:
4384 if expected_type is None or isinstance(v, expected_type):
4385 return v
329ca3be
S
4386
4387
6cc62232
S
4388def merge_dicts(*dicts):
4389 merged = {}
4390 for a_dict in dicts:
4391 for k, v in a_dict.items():
4392 if v is None:
4393 continue
3089bc74
S
4394 if (k not in merged
4395 or (isinstance(v, compat_str) and v
4396 and isinstance(merged[k], compat_str)
4397 and not merged[k])):
6cc62232
S
4398 merged[k] = v
4399 return merged
4400
4401
8e60dc75
S
4402def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4403 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4404
16392824 4405
a1a530b0
PH
4406US_RATINGS = {
4407 'G': 0,
4408 'PG': 10,
4409 'PG-13': 13,
4410 'R': 16,
4411 'NC': 18,
4412}
fac55558
PH
4413
4414
a8795327 4415TV_PARENTAL_GUIDELINES = {
5a16c9d9
RA
4416 'TV-Y': 0,
4417 'TV-Y7': 7,
4418 'TV-G': 0,
4419 'TV-PG': 0,
4420 'TV-14': 14,
4421 'TV-MA': 17,
a8795327
S
4422}
4423
4424
146c80e2 4425def parse_age_limit(s):
a8795327
S
4426 if type(s) == int:
4427 return s if 0 <= s <= 21 else None
4428 if not isinstance(s, compat_basestring):
d838b1bd 4429 return None
146c80e2 4430 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327
S
4431 if m:
4432 return int(m.group('age'))
5c5fae6d 4433 s = s.upper()
a8795327
S
4434 if s in US_RATINGS:
4435 return US_RATINGS[s]
5a16c9d9 4436 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
b8361187 4437 if m:
5a16c9d9 4438 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
b8361187 4439 return None
146c80e2
S
4440
4441
fac55558 4442def strip_jsonp(code):
609a61e3 4443 return re.sub(
5552c9eb 4444 r'''(?sx)^
e9c671d5 4445 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
5552c9eb
YCH
4446 (?:\s*&&\s*(?P=func_name))?
4447 \s*\(\s*(?P<callback_data>.*)\);?
4448 \s*?(?://[^\n]*)*$''',
4449 r'\g<callback_data>', code)
478c2c61
PH
4450
4451
5c610515 4452def js_to_json(code, vars={}):
4453 # vars is a dict of var, val pairs to substitute
c843e685 4454 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4195096e
S
4455 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4456 INTEGER_TABLE = (
4457 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4458 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4459 )
4460
e05f6939 4461 def fix_kv(m):
e7b6d122
PH
4462 v = m.group(0)
4463 if v in ('true', 'false', 'null'):
4464 return v
421ddcb8
C
4465 elif v in ('undefined', 'void 0'):
4466 return 'null'
8bdd16b4 4467 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
bd1e4844 4468 return ""
4469
4470 if v[0] in ("'", '"'):
4471 v = re.sub(r'(?s)\\.|"', lambda m: {
e7b6d122 4472 '"': '\\"',
bd1e4844 4473 "\\'": "'",
4474 '\\\n': '',
4475 '\\x': '\\u00',
4476 }.get(m.group(0), m.group(0)), v[1:-1])
8bdd16b4 4477 else:
4478 for regex, base in INTEGER_TABLE:
4479 im = re.match(regex, v)
4480 if im:
4481 i = int(im.group(1), base)
4482 return '"%d":' % i if v.endswith(':') else '%d' % i
89ac4a19 4483
5c610515 4484 if v in vars:
4485 return vars[v]
4486
e7b6d122 4487 return '"%s"' % v
e05f6939 4488
bd1e4844 4489 return re.sub(r'''(?sx)
4490 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4491 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4195096e 4492 {comment}|,(?={skip}[\]}}])|
421ddcb8 4493 void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
4195096e 4494 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
8bdd16b4 4495 [0-9]+(?={skip}:)|
4496 !+
4195096e 4497 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
e05f6939
PH
4498
4499
478c2c61
PH
4500def qualities(quality_ids):
4501 """ Get a numeric quality value out of a list of possible values """
4502 def q(qid):
4503 try:
4504 return quality_ids.index(qid)
4505 except ValueError:
4506 return -1
4507 return q
4508
acd69589 4509
de6000d9 4510DEFAULT_OUTTMPL = {
4511 'default': '%(title)s [%(id)s].%(ext)s',
72755351 4512 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
de6000d9 4513}
4514OUTTMPL_TYPES = {
72755351 4515 'chapter': None,
de6000d9 4516 'subtitle': None,
4517 'thumbnail': None,
4518 'description': 'description',
4519 'annotation': 'annotations.xml',
4520 'infojson': 'info.json',
08438d2c 4521 'link': None,
5112f26a 4522 'pl_thumbnail': None,
de6000d9 4523 'pl_description': 'description',
4524 'pl_infojson': 'info.json',
4525}
0a871f68 4526
143db31d 4527# As of [1] format syntax is:
4528# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4529# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
901130bb 4530STR_FORMAT_RE_TMPL = r'''(?x)
4531 (?<!%)(?P<prefix>(?:%%)*)
143db31d 4532 %
524e2e4f 4533 (?P<has_key>\((?P<key>{0})\))?
752cda38 4534 (?P<format>
524e2e4f 4535 (?P<conversion>[#0\-+ ]+)?
4536 (?P<min_width>\d+)?
4537 (?P<precision>\.\d+)?
4538 (?P<len_mod>[hlL])? # unused in python
901130bb 4539 {1} # conversion type
752cda38 4540 )
143db31d 4541'''
4542
7d1eb38a 4543
901130bb 4544STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
a020a0dc 4545
7d1eb38a 4546
a020a0dc
PH
4547def limit_length(s, length):
4548 """ Add ellipses to overly long strings """
4549 if s is None:
4550 return None
4551 ELLIPSES = '...'
4552 if len(s) > length:
4553 return s[:length - len(ELLIPSES)] + ELLIPSES
4554 return s
48844745
PH
4555
4556
4557def version_tuple(v):
5f9b8394 4558 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
4559
4560
4561def is_outdated_version(version, limit, assume_new=True):
4562 if not version:
4563 return not assume_new
4564 try:
4565 return version_tuple(version) < version_tuple(limit)
4566 except ValueError:
4567 return not assume_new
732ea2f0
PH
4568
4569
4570def ytdl_is_updateable():
7a5c1cfe 4571 """ Returns if yt-dlp can be updated with -U """
735d865e 4572
5d535b4a 4573 from .update import is_non_updateable
732ea2f0 4574
5d535b4a 4575 return not is_non_updateable()
7d4111ed
PH
4576
4577
4578def args_to_str(args):
4579 # Get a short string representation for a subprocess command
702ccf2d 4580 return ' '.join(compat_shlex_quote(a) for a in args)
2ccd1b10
PH
4581
4582
9b9c5355 4583def error_to_compat_str(err):
fdae2358
S
4584 err_str = str(err)
4585 # On python 2 error byte string must be decoded with proper
4586 # encoding rather than ascii
4587 if sys.version_info[0] < 3:
4588 err_str = err_str.decode(preferredencoding())
4589 return err_str
4590
4591
c460bdd5 4592def mimetype2ext(mt):
eb9ee194
S
4593 if mt is None:
4594 return None
4595
9359f3d4
F
4596 mt, _, params = mt.partition(';')
4597 mt = mt.strip()
4598
4599 FULL_MAP = {
765ac263 4600 'audio/mp4': 'm4a',
6c33d24b
YCH
4601 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4602 # it's the most popular one
4603 'audio/mpeg': 'mp3',
ba39289d 4604 'audio/x-wav': 'wav',
9359f3d4
F
4605 'audio/wav': 'wav',
4606 'audio/wave': 'wav',
4607 }
4608
4609 ext = FULL_MAP.get(mt)
765ac263
JMF
4610 if ext is not None:
4611 return ext
4612
9359f3d4 4613 SUBTYPE_MAP = {
f6861ec9 4614 '3gpp': '3gp',
cafcf657 4615 'smptett+xml': 'tt',
cafcf657 4616 'ttaf+xml': 'dfxp',
a0d8d704 4617 'ttml+xml': 'ttml',
f6861ec9 4618 'x-flv': 'flv',
a0d8d704 4619 'x-mp4-fragmented': 'mp4',
d4f05d47 4620 'x-ms-sami': 'sami',
a0d8d704 4621 'x-ms-wmv': 'wmv',
b4173f15
RA
4622 'mpegurl': 'm3u8',
4623 'x-mpegurl': 'm3u8',
4624 'vnd.apple.mpegurl': 'm3u8',
4625 'dash+xml': 'mpd',
b4173f15 4626 'f4m+xml': 'f4m',
f164b971 4627 'hds+xml': 'f4m',
e910fe2f 4628 'vnd.ms-sstr+xml': 'ism',
c2b2c7e1 4629 'quicktime': 'mov',
98ce1a3f 4630 'mp2t': 'ts',
39e7107d 4631 'x-wav': 'wav',
9359f3d4
F
4632 'filmstrip+json': 'fs',
4633 'svg+xml': 'svg',
4634 }
4635
4636 _, _, subtype = mt.rpartition('/')
4637 ext = SUBTYPE_MAP.get(subtype.lower())
4638 if ext is not None:
4639 return ext
4640
4641 SUFFIX_MAP = {
4642 'json': 'json',
4643 'xml': 'xml',
4644 'zip': 'zip',
4645 'gzip': 'gz',
4646 }
4647
4648 _, _, suffix = subtype.partition('+')
4649 ext = SUFFIX_MAP.get(suffix)
4650 if ext is not None:
4651 return ext
4652
4653 return subtype.replace('+', '.')
c460bdd5
PH
4654
4655
4f3c5e06 4656def parse_codecs(codecs_str):
4657 # http://tools.ietf.org/html/rfc6381
4658 if not codecs_str:
4659 return {}
a0566bbf 4660 split_codecs = list(filter(None, map(
dbf5416a 4661 str.strip, codecs_str.strip().strip(',').split(','))))
176f1866 4662 vcodec, acodec, hdr = None, None, None
a0566bbf 4663 for full_codec in split_codecs:
9bd979ca 4664 parts = full_codec.split('.')
4665 codec = parts[0].replace('0', '')
4666 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
4667 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
4f3c5e06 4668 if not vcodec:
9bd979ca 4669 vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1') else full_codec
176f1866 4670 if codec in ('dvh1', 'dvhe'):
4671 hdr = 'DV'
9bd979ca 4672 elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
4673 hdr = 'HDR10'
4674 elif full_codec.replace('0', '').startswith('vp9.2'):
176f1866 4675 hdr = 'HDR10'
60f5c9fb 4676 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4f3c5e06 4677 if not acodec:
4678 acodec = full_codec
4679 else:
60f5c9fb 4680 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4f3c5e06 4681 if not vcodec and not acodec:
a0566bbf 4682 if len(split_codecs) == 2:
4f3c5e06 4683 return {
a0566bbf 4684 'vcodec': split_codecs[0],
4685 'acodec': split_codecs[1],
4f3c5e06 4686 }
4687 else:
4688 return {
4689 'vcodec': vcodec or 'none',
4690 'acodec': acodec or 'none',
176f1866 4691 'dynamic_range': hdr,
4f3c5e06 4692 }
4693 return {}
4694
4695
2ccd1b10 4696def urlhandle_detect_ext(url_handle):
79298173 4697 getheader = url_handle.headers.get
2ccd1b10 4698
b55ee18f
PH
4699 cd = getheader('Content-Disposition')
4700 if cd:
4701 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4702 if m:
4703 e = determine_ext(m.group('filename'), default_ext=None)
4704 if e:
4705 return e
4706
c460bdd5 4707 return mimetype2ext(getheader('Content-Type'))
05900629
PH
4708
4709
1e399778
YCH
4710def encode_data_uri(data, mime_type):
4711 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4712
4713
05900629 4714def age_restricted(content_limit, age_limit):
6ec6cb4e 4715 """ Returns True iff the content should be blocked """
05900629
PH
4716
4717 if age_limit is None: # No limit set
4718 return False
4719 if content_limit is None:
4720 return False # Content available for everyone
4721 return age_limit < content_limit
61ca9a80
PH
4722
4723
4724def is_html(first_bytes):
4725 """ Detect whether a file contains HTML by examining its first bytes. """
4726
4727 BOMS = [
4728 (b'\xef\xbb\xbf', 'utf-8'),
4729 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4730 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4731 (b'\xff\xfe', 'utf-16-le'),
4732 (b'\xfe\xff', 'utf-16-be'),
4733 ]
4734 for bom, enc in BOMS:
4735 if first_bytes.startswith(bom):
4736 s = first_bytes[len(bom):].decode(enc, 'replace')
4737 break
4738 else:
4739 s = first_bytes.decode('utf-8', 'replace')
4740
4741 return re.match(r'^\s*<', s)
a055469f
PH
4742
4743
4744def determine_protocol(info_dict):
4745 protocol = info_dict.get('protocol')
4746 if protocol is not None:
4747 return protocol
4748
7de837a5 4749 url = sanitize_url(info_dict['url'])
a055469f
PH
4750 if url.startswith('rtmp'):
4751 return 'rtmp'
4752 elif url.startswith('mms'):
4753 return 'mms'
4754 elif url.startswith('rtsp'):
4755 return 'rtsp'
4756
4757 ext = determine_ext(url)
4758 if ext == 'm3u8':
4759 return 'm3u8'
4760 elif ext == 'f4m':
4761 return 'f4m'
4762
4763 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
4764
4765
76d321f6 4766def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
cfb56d1a 4767 """ Render a list of rows, each as a list of values """
ec11a9f4 4768 def width(string):
4769 return len(remove_terminal_sequences(string))
76d321f6 4770
4771 def get_max_lens(table):
ec11a9f4 4772 return [max(width(str(v)) for v in col) for col in zip(*table)]
76d321f6 4773
4774 def filter_using_list(row, filterArray):
4775 return [col for (take, col) in zip(filterArray, row) if take]
4776
4777 if hideEmpty:
4778 max_lens = get_max_lens(data)
4779 header_row = filter_using_list(header_row, max_lens)
4780 data = [filter_using_list(row, max_lens) for row in data]
4781
cfb56d1a 4782 table = [header_row] + data
76d321f6 4783 max_lens = get_max_lens(table)
ec11a9f4 4784 extraGap += 1
76d321f6 4785 if delim:
ec11a9f4 4786 table = [header_row] + [[delim * (ml + extraGap) for ml in max_lens]] + data
4787 max_lens[-1] = 0
4788 for row in table:
4789 for pos, text in enumerate(map(str, row)):
4790 row[pos] = text + (' ' * (max_lens[pos] - width(text) + extraGap))
4791 ret = '\n'.join(''.join(row) for row in table)
4792 return ret
347de493
PH
4793
4794
8f18aca8 4795def _match_one(filter_part, dct, incomplete):
77b87f05 4796 # TODO: Generalize code with YoutubeDL._build_format_filter
a047eeb6 4797 STRING_OPERATORS = {
4798 '*=': operator.contains,
4799 '^=': lambda attr, value: attr.startswith(value),
4800 '$=': lambda attr, value: attr.endswith(value),
4801 '~=': lambda attr, value: re.search(value, attr),
4802 }
347de493 4803 COMPARISON_OPERATORS = {
a047eeb6 4804 **STRING_OPERATORS,
4805 '<=': operator.le, # "<=" must be defined above "<"
347de493 4806 '<': operator.lt,
347de493 4807 '>=': operator.ge,
a047eeb6 4808 '>': operator.gt,
347de493 4809 '=': operator.eq,
347de493 4810 }
a047eeb6 4811
347de493
PH
4812 operator_rex = re.compile(r'''(?x)\s*
4813 (?P<key>[a-z_]+)
77b87f05 4814 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
347de493 4815 (?:
a047eeb6 4816 (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
4817 (?P<strval>.+?)
347de493
PH
4818 )
4819 \s*$
4820 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4821 m = operator_rex.search(filter_part)
4822 if m:
18f96d12 4823 m = m.groupdict()
4824 unnegated_op = COMPARISON_OPERATORS[m['op']]
4825 if m['negation']:
77b87f05
MT
4826 op = lambda attr, value: not unnegated_op(attr, value)
4827 else:
4828 op = unnegated_op
18f96d12 4829 comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
4830 if m['quote']:
4831 comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
4832 actual_value = dct.get(m['key'])
4833 numeric_comparison = None
4834 if isinstance(actual_value, compat_numeric_types):
e5a088dc
S
4835 # If the original field is a string and matching comparisonvalue is
4836 # a number we should respect the origin of the original field
4837 # and process comparison value as a string (see
18f96d12 4838 # https://github.com/ytdl-org/youtube-dl/issues/11082)
347de493 4839 try:
18f96d12 4840 numeric_comparison = int(comparison_value)
347de493 4841 except ValueError:
18f96d12 4842 numeric_comparison = parse_filesize(comparison_value)
4843 if numeric_comparison is None:
4844 numeric_comparison = parse_filesize(f'{comparison_value}B')
4845 if numeric_comparison is None:
4846 numeric_comparison = parse_duration(comparison_value)
4847 if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
4848 raise ValueError('Operator %s only supports string values!' % m['op'])
347de493 4849 if actual_value is None:
18f96d12 4850 return incomplete or m['none_inclusive']
4851 return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
347de493
PH
4852
4853 UNARY_OPERATORS = {
1cc47c66
S
4854 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4855 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
347de493
PH
4856 }
4857 operator_rex = re.compile(r'''(?x)\s*
4858 (?P<op>%s)\s*(?P<key>[a-z_]+)
4859 \s*$
4860 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4861 m = operator_rex.search(filter_part)
4862 if m:
4863 op = UNARY_OPERATORS[m.group('op')]
4864 actual_value = dct.get(m.group('key'))
8f18aca8 4865 if incomplete and actual_value is None:
4866 return True
347de493
PH
4867 return op(actual_value)
4868
4869 raise ValueError('Invalid filter part %r' % filter_part)
4870
4871
8f18aca8 4872def match_str(filter_str, dct, incomplete=False):
4873 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
4874 When incomplete, all conditions passes on missing fields
4875 """
347de493 4876 return all(
8f18aca8 4877 _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
a047eeb6 4878 for filter_part in re.split(r'(?<!\\)&', filter_str))
347de493
PH
4879
4880
4881def match_filter_func(filter_str):
8f18aca8 4882 def _match_func(info_dict, *args, **kwargs):
4883 if match_str(filter_str, info_dict, *args, **kwargs):
347de493
PH
4884 return None
4885 else:
4886 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4887 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4888 return _match_func
91410c9b
PH
4889
4890
bf6427d2
YCH
4891def parse_dfxp_time_expr(time_expr):
4892 if not time_expr:
d631d5f9 4893 return
bf6427d2
YCH
4894
4895 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4896 if mobj:
4897 return float(mobj.group('time_offset'))
4898
db2fe38b 4899 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 4900 if mobj:
db2fe38b 4901 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
4902
4903
c1c924ab 4904def srt_subtitles_timecode(seconds):
aa7785f8 4905 return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
4906
4907
4908def ass_subtitles_timecode(seconds):
4909 time = timetuple_from_msec(seconds * 1000)
4910 return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
bf6427d2
YCH
4911
4912
4913def dfxp2srt(dfxp_data):
3869028f
YCH
4914 '''
4915 @param dfxp_data A bytes-like object containing DFXP data
4916 @returns A unicode object containing converted SRT data
4917 '''
5b995f71 4918 LEGACY_NAMESPACES = (
3869028f
YCH
4919 (b'http://www.w3.org/ns/ttml', [
4920 b'http://www.w3.org/2004/11/ttaf1',
4921 b'http://www.w3.org/2006/04/ttaf1',
4922 b'http://www.w3.org/2006/10/ttaf1',
5b995f71 4923 ]),
3869028f
YCH
4924 (b'http://www.w3.org/ns/ttml#styling', [
4925 b'http://www.w3.org/ns/ttml#style',
5b995f71
RA
4926 ]),
4927 )
4928
4929 SUPPORTED_STYLING = [
4930 'color',
4931 'fontFamily',
4932 'fontSize',
4933 'fontStyle',
4934 'fontWeight',
4935 'textDecoration'
4936 ]
4937
4e335771 4938 _x = functools.partial(xpath_with_ns, ns_map={
261f4730 4939 'xml': 'http://www.w3.org/XML/1998/namespace',
4e335771 4940 'ttml': 'http://www.w3.org/ns/ttml',
5b995f71 4941 'tts': 'http://www.w3.org/ns/ttml#styling',
4e335771 4942 })
bf6427d2 4943
5b995f71
RA
4944 styles = {}
4945 default_style = {}
4946
87de7069 4947 class TTMLPElementParser(object):
5b995f71
RA
4948 _out = ''
4949 _unclosed_elements = []
4950 _applied_styles = []
bf6427d2 4951
2b14cb56 4952 def start(self, tag, attrib):
5b995f71
RA
4953 if tag in (_x('ttml:br'), 'br'):
4954 self._out += '\n'
4955 else:
4956 unclosed_elements = []
4957 style = {}
4958 element_style_id = attrib.get('style')
4959 if default_style:
4960 style.update(default_style)
4961 if element_style_id:
4962 style.update(styles.get(element_style_id, {}))
4963 for prop in SUPPORTED_STYLING:
4964 prop_val = attrib.get(_x('tts:' + prop))
4965 if prop_val:
4966 style[prop] = prop_val
4967 if style:
4968 font = ''
4969 for k, v in sorted(style.items()):
4970 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4971 continue
4972 if k == 'color':
4973 font += ' color="%s"' % v
4974 elif k == 'fontSize':
4975 font += ' size="%s"' % v
4976 elif k == 'fontFamily':
4977 font += ' face="%s"' % v
4978 elif k == 'fontWeight' and v == 'bold':
4979 self._out += '<b>'
4980 unclosed_elements.append('b')
4981 elif k == 'fontStyle' and v == 'italic':
4982 self._out += '<i>'
4983 unclosed_elements.append('i')
4984 elif k == 'textDecoration' and v == 'underline':
4985 self._out += '<u>'
4986 unclosed_elements.append('u')
4987 if font:
4988 self._out += '<font' + font + '>'
4989 unclosed_elements.append('font')
4990 applied_style = {}
4991 if self._applied_styles:
4992 applied_style.update(self._applied_styles[-1])
4993 applied_style.update(style)
4994 self._applied_styles.append(applied_style)
4995 self._unclosed_elements.append(unclosed_elements)
bf6427d2 4996
2b14cb56 4997 def end(self, tag):
5b995f71
RA
4998 if tag not in (_x('ttml:br'), 'br'):
4999 unclosed_elements = self._unclosed_elements.pop()
5000 for element in reversed(unclosed_elements):
5001 self._out += '</%s>' % element
5002 if unclosed_elements and self._applied_styles:
5003 self._applied_styles.pop()
bf6427d2 5004
2b14cb56 5005 def data(self, data):
5b995f71 5006 self._out += data
2b14cb56 5007
5008 def close(self):
5b995f71 5009 return self._out.strip()
2b14cb56 5010
5011 def parse_node(node):
5012 target = TTMLPElementParser()
5013 parser = xml.etree.ElementTree.XMLParser(target=target)
5014 parser.feed(xml.etree.ElementTree.tostring(node))
5015 return parser.close()
bf6427d2 5016
5b995f71
RA
5017 for k, v in LEGACY_NAMESPACES:
5018 for ns in v:
5019 dfxp_data = dfxp_data.replace(ns, k)
5020
3869028f 5021 dfxp = compat_etree_fromstring(dfxp_data)
bf6427d2 5022 out = []
5b995f71 5023 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
1b0427e6
YCH
5024
5025 if not paras:
5026 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2 5027
5b995f71
RA
5028 repeat = False
5029 while True:
5030 for style in dfxp.findall(_x('.//ttml:style')):
261f4730
RA
5031 style_id = style.get('id') or style.get(_x('xml:id'))
5032 if not style_id:
5033 continue
5b995f71
RA
5034 parent_style_id = style.get('style')
5035 if parent_style_id:
5036 if parent_style_id not in styles:
5037 repeat = True
5038 continue
5039 styles[style_id] = styles[parent_style_id].copy()
5040 for prop in SUPPORTED_STYLING:
5041 prop_val = style.get(_x('tts:' + prop))
5042 if prop_val:
5043 styles.setdefault(style_id, {})[prop] = prop_val
5044 if repeat:
5045 repeat = False
5046 else:
5047 break
5048
5049 for p in ('body', 'div'):
5050 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
5051 if ele is None:
5052 continue
5053 style = styles.get(ele.get('style'))
5054 if not style:
5055 continue
5056 default_style.update(style)
5057
bf6427d2 5058 for para, index in zip(paras, itertools.count(1)):
d631d5f9 5059 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 5060 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
5061 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
5062 if begin_time is None:
5063 continue
7dff0363 5064 if not end_time:
d631d5f9
YCH
5065 if not dur:
5066 continue
5067 end_time = begin_time + dur
bf6427d2
YCH
5068 out.append('%d\n%s --> %s\n%s\n\n' % (
5069 index,
c1c924ab
YCH
5070 srt_subtitles_timecode(begin_time),
5071 srt_subtitles_timecode(end_time),
bf6427d2
YCH
5072 parse_node(para)))
5073
5074 return ''.join(out)
5075
5076
66e289ba
S
5077def cli_option(params, command_option, param):
5078 param = params.get(param)
98e698f1
RA
5079 if param:
5080 param = compat_str(param)
66e289ba
S
5081 return [command_option, param] if param is not None else []
5082
5083
5084def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
5085 param = params.get(param)
5b232f46
S
5086 if param is None:
5087 return []
66e289ba
S
5088 assert isinstance(param, bool)
5089 if separator:
5090 return [command_option + separator + (true_value if param else false_value)]
5091 return [command_option, true_value if param else false_value]
5092
5093
5094def cli_valueless_option(params, command_option, param, expected_value=True):
5095 param = params.get(param)
5096 return [command_option] if param == expected_value else []
5097
5098
e92caff5 5099def cli_configuration_args(argdict, keys, default=[], use_compat=True):
eab9b2bc 5100 if isinstance(argdict, (list, tuple)): # for backward compatibility
e92caff5 5101 if use_compat:
5b1ecbb3 5102 return argdict
5103 else:
5104 argdict = None
eab9b2bc 5105 if argdict is None:
5b1ecbb3 5106 return default
eab9b2bc 5107 assert isinstance(argdict, dict)
5108
e92caff5 5109 assert isinstance(keys, (list, tuple))
5110 for key_list in keys:
e92caff5 5111 arg_list = list(filter(
5112 lambda x: x is not None,
6606817a 5113 [argdict.get(key.lower()) for key in variadic(key_list)]))
e92caff5 5114 if arg_list:
5115 return [arg for args in arg_list for arg in args]
5116 return default
66e289ba 5117
6251555f 5118
330690a2 5119def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
5120 main_key, exe = main_key.lower(), exe.lower()
5121 root_key = exe if main_key == exe else f'{main_key}+{exe}'
5122 keys = [f'{root_key}{k}' for k in (keys or [''])]
5123 if root_key in keys:
5124 if main_key != exe:
5125 keys.append((main_key, exe))
5126 keys.append('default')
5127 else:
5128 use_compat = False
5129 return cli_configuration_args(argdict, keys, default, use_compat)
5130
66e289ba 5131
39672624
YCH
5132class ISO639Utils(object):
5133 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
5134 _lang_map = {
5135 'aa': 'aar',
5136 'ab': 'abk',
5137 'ae': 'ave',
5138 'af': 'afr',
5139 'ak': 'aka',
5140 'am': 'amh',
5141 'an': 'arg',
5142 'ar': 'ara',
5143 'as': 'asm',
5144 'av': 'ava',
5145 'ay': 'aym',
5146 'az': 'aze',
5147 'ba': 'bak',
5148 'be': 'bel',
5149 'bg': 'bul',
5150 'bh': 'bih',
5151 'bi': 'bis',
5152 'bm': 'bam',
5153 'bn': 'ben',
5154 'bo': 'bod',
5155 'br': 'bre',
5156 'bs': 'bos',
5157 'ca': 'cat',
5158 'ce': 'che',
5159 'ch': 'cha',
5160 'co': 'cos',
5161 'cr': 'cre',
5162 'cs': 'ces',
5163 'cu': 'chu',
5164 'cv': 'chv',
5165 'cy': 'cym',
5166 'da': 'dan',
5167 'de': 'deu',
5168 'dv': 'div',
5169 'dz': 'dzo',
5170 'ee': 'ewe',
5171 'el': 'ell',
5172 'en': 'eng',
5173 'eo': 'epo',
5174 'es': 'spa',
5175 'et': 'est',
5176 'eu': 'eus',
5177 'fa': 'fas',
5178 'ff': 'ful',
5179 'fi': 'fin',
5180 'fj': 'fij',
5181 'fo': 'fao',
5182 'fr': 'fra',
5183 'fy': 'fry',
5184 'ga': 'gle',
5185 'gd': 'gla',
5186 'gl': 'glg',
5187 'gn': 'grn',
5188 'gu': 'guj',
5189 'gv': 'glv',
5190 'ha': 'hau',
5191 'he': 'heb',
b7acc835 5192 'iw': 'heb', # Replaced by he in 1989 revision
39672624
YCH
5193 'hi': 'hin',
5194 'ho': 'hmo',
5195 'hr': 'hrv',
5196 'ht': 'hat',
5197 'hu': 'hun',
5198 'hy': 'hye',
5199 'hz': 'her',
5200 'ia': 'ina',
5201 'id': 'ind',
b7acc835 5202 'in': 'ind', # Replaced by id in 1989 revision
39672624
YCH
5203 'ie': 'ile',
5204 'ig': 'ibo',
5205 'ii': 'iii',
5206 'ik': 'ipk',
5207 'io': 'ido',
5208 'is': 'isl',
5209 'it': 'ita',
5210 'iu': 'iku',
5211 'ja': 'jpn',
5212 'jv': 'jav',
5213 'ka': 'kat',
5214 'kg': 'kon',
5215 'ki': 'kik',
5216 'kj': 'kua',
5217 'kk': 'kaz',
5218 'kl': 'kal',
5219 'km': 'khm',
5220 'kn': 'kan',
5221 'ko': 'kor',
5222 'kr': 'kau',
5223 'ks': 'kas',
5224 'ku': 'kur',
5225 'kv': 'kom',
5226 'kw': 'cor',
5227 'ky': 'kir',
5228 'la': 'lat',
5229 'lb': 'ltz',
5230 'lg': 'lug',
5231 'li': 'lim',
5232 'ln': 'lin',
5233 'lo': 'lao',
5234 'lt': 'lit',
5235 'lu': 'lub',
5236 'lv': 'lav',
5237 'mg': 'mlg',
5238 'mh': 'mah',
5239 'mi': 'mri',
5240 'mk': 'mkd',
5241 'ml': 'mal',
5242 'mn': 'mon',
5243 'mr': 'mar',
5244 'ms': 'msa',
5245 'mt': 'mlt',
5246 'my': 'mya',
5247 'na': 'nau',
5248 'nb': 'nob',
5249 'nd': 'nde',
5250 'ne': 'nep',
5251 'ng': 'ndo',
5252 'nl': 'nld',
5253 'nn': 'nno',
5254 'no': 'nor',
5255 'nr': 'nbl',
5256 'nv': 'nav',
5257 'ny': 'nya',
5258 'oc': 'oci',
5259 'oj': 'oji',
5260 'om': 'orm',
5261 'or': 'ori',
5262 'os': 'oss',
5263 'pa': 'pan',
5264 'pi': 'pli',
5265 'pl': 'pol',
5266 'ps': 'pus',
5267 'pt': 'por',
5268 'qu': 'que',
5269 'rm': 'roh',
5270 'rn': 'run',
5271 'ro': 'ron',
5272 'ru': 'rus',
5273 'rw': 'kin',
5274 'sa': 'san',
5275 'sc': 'srd',
5276 'sd': 'snd',
5277 'se': 'sme',
5278 'sg': 'sag',
5279 'si': 'sin',
5280 'sk': 'slk',
5281 'sl': 'slv',
5282 'sm': 'smo',
5283 'sn': 'sna',
5284 'so': 'som',
5285 'sq': 'sqi',
5286 'sr': 'srp',
5287 'ss': 'ssw',
5288 'st': 'sot',
5289 'su': 'sun',
5290 'sv': 'swe',
5291 'sw': 'swa',
5292 'ta': 'tam',
5293 'te': 'tel',
5294 'tg': 'tgk',
5295 'th': 'tha',
5296 'ti': 'tir',
5297 'tk': 'tuk',
5298 'tl': 'tgl',
5299 'tn': 'tsn',
5300 'to': 'ton',
5301 'tr': 'tur',
5302 'ts': 'tso',
5303 'tt': 'tat',
5304 'tw': 'twi',
5305 'ty': 'tah',
5306 'ug': 'uig',
5307 'uk': 'ukr',
5308 'ur': 'urd',
5309 'uz': 'uzb',
5310 've': 'ven',
5311 'vi': 'vie',
5312 'vo': 'vol',
5313 'wa': 'wln',
5314 'wo': 'wol',
5315 'xh': 'xho',
5316 'yi': 'yid',
e9a50fba 5317 'ji': 'yid', # Replaced by yi in 1989 revision
39672624
YCH
5318 'yo': 'yor',
5319 'za': 'zha',
5320 'zh': 'zho',
5321 'zu': 'zul',
5322 }
5323
5324 @classmethod
5325 def short2long(cls, code):
5326 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5327 return cls._lang_map.get(code[:2])
5328
5329 @classmethod
5330 def long2short(cls, code):
5331 """Convert language code from ISO 639-2/T to ISO 639-1"""
5332 for short_name, long_name in cls._lang_map.items():
5333 if long_name == code:
5334 return short_name
5335
5336
4eb10f66
YCH
5337class ISO3166Utils(object):
5338 # From http://data.okfn.org/data/core/country-list
5339 _country_map = {
5340 'AF': 'Afghanistan',
5341 'AX': 'Åland Islands',
5342 'AL': 'Albania',
5343 'DZ': 'Algeria',
5344 'AS': 'American Samoa',
5345 'AD': 'Andorra',
5346 'AO': 'Angola',
5347 'AI': 'Anguilla',
5348 'AQ': 'Antarctica',
5349 'AG': 'Antigua and Barbuda',
5350 'AR': 'Argentina',
5351 'AM': 'Armenia',
5352 'AW': 'Aruba',
5353 'AU': 'Australia',
5354 'AT': 'Austria',
5355 'AZ': 'Azerbaijan',
5356 'BS': 'Bahamas',
5357 'BH': 'Bahrain',
5358 'BD': 'Bangladesh',
5359 'BB': 'Barbados',
5360 'BY': 'Belarus',
5361 'BE': 'Belgium',
5362 'BZ': 'Belize',
5363 'BJ': 'Benin',
5364 'BM': 'Bermuda',
5365 'BT': 'Bhutan',
5366 'BO': 'Bolivia, Plurinational State of',
5367 'BQ': 'Bonaire, Sint Eustatius and Saba',
5368 'BA': 'Bosnia and Herzegovina',
5369 'BW': 'Botswana',
5370 'BV': 'Bouvet Island',
5371 'BR': 'Brazil',
5372 'IO': 'British Indian Ocean Territory',
5373 'BN': 'Brunei Darussalam',
5374 'BG': 'Bulgaria',
5375 'BF': 'Burkina Faso',
5376 'BI': 'Burundi',
5377 'KH': 'Cambodia',
5378 'CM': 'Cameroon',
5379 'CA': 'Canada',
5380 'CV': 'Cape Verde',
5381 'KY': 'Cayman Islands',
5382 'CF': 'Central African Republic',
5383 'TD': 'Chad',
5384 'CL': 'Chile',
5385 'CN': 'China',
5386 'CX': 'Christmas Island',
5387 'CC': 'Cocos (Keeling) Islands',
5388 'CO': 'Colombia',
5389 'KM': 'Comoros',
5390 'CG': 'Congo',
5391 'CD': 'Congo, the Democratic Republic of the',
5392 'CK': 'Cook Islands',
5393 'CR': 'Costa Rica',
5394 'CI': 'Côte d\'Ivoire',
5395 'HR': 'Croatia',
5396 'CU': 'Cuba',
5397 'CW': 'Curaçao',
5398 'CY': 'Cyprus',
5399 'CZ': 'Czech Republic',
5400 'DK': 'Denmark',
5401 'DJ': 'Djibouti',
5402 'DM': 'Dominica',
5403 'DO': 'Dominican Republic',
5404 'EC': 'Ecuador',
5405 'EG': 'Egypt',
5406 'SV': 'El Salvador',
5407 'GQ': 'Equatorial Guinea',
5408 'ER': 'Eritrea',
5409 'EE': 'Estonia',
5410 'ET': 'Ethiopia',
5411 'FK': 'Falkland Islands (Malvinas)',
5412 'FO': 'Faroe Islands',
5413 'FJ': 'Fiji',
5414 'FI': 'Finland',
5415 'FR': 'France',
5416 'GF': 'French Guiana',
5417 'PF': 'French Polynesia',
5418 'TF': 'French Southern Territories',
5419 'GA': 'Gabon',
5420 'GM': 'Gambia',
5421 'GE': 'Georgia',
5422 'DE': 'Germany',
5423 'GH': 'Ghana',
5424 'GI': 'Gibraltar',
5425 'GR': 'Greece',
5426 'GL': 'Greenland',
5427 'GD': 'Grenada',
5428 'GP': 'Guadeloupe',
5429 'GU': 'Guam',
5430 'GT': 'Guatemala',
5431 'GG': 'Guernsey',
5432 'GN': 'Guinea',
5433 'GW': 'Guinea-Bissau',
5434 'GY': 'Guyana',
5435 'HT': 'Haiti',
5436 'HM': 'Heard Island and McDonald Islands',
5437 'VA': 'Holy See (Vatican City State)',
5438 'HN': 'Honduras',
5439 'HK': 'Hong Kong',
5440 'HU': 'Hungary',
5441 'IS': 'Iceland',
5442 'IN': 'India',
5443 'ID': 'Indonesia',
5444 'IR': 'Iran, Islamic Republic of',
5445 'IQ': 'Iraq',
5446 'IE': 'Ireland',
5447 'IM': 'Isle of Man',
5448 'IL': 'Israel',
5449 'IT': 'Italy',
5450 'JM': 'Jamaica',
5451 'JP': 'Japan',
5452 'JE': 'Jersey',
5453 'JO': 'Jordan',
5454 'KZ': 'Kazakhstan',
5455 'KE': 'Kenya',
5456 'KI': 'Kiribati',
5457 'KP': 'Korea, Democratic People\'s Republic of',
5458 'KR': 'Korea, Republic of',
5459 'KW': 'Kuwait',
5460 'KG': 'Kyrgyzstan',
5461 'LA': 'Lao People\'s Democratic Republic',
5462 'LV': 'Latvia',
5463 'LB': 'Lebanon',
5464 'LS': 'Lesotho',
5465 'LR': 'Liberia',
5466 'LY': 'Libya',
5467 'LI': 'Liechtenstein',
5468 'LT': 'Lithuania',
5469 'LU': 'Luxembourg',
5470 'MO': 'Macao',
5471 'MK': 'Macedonia, the Former Yugoslav Republic of',
5472 'MG': 'Madagascar',
5473 'MW': 'Malawi',
5474 'MY': 'Malaysia',
5475 'MV': 'Maldives',
5476 'ML': 'Mali',
5477 'MT': 'Malta',
5478 'MH': 'Marshall Islands',
5479 'MQ': 'Martinique',
5480 'MR': 'Mauritania',
5481 'MU': 'Mauritius',
5482 'YT': 'Mayotte',
5483 'MX': 'Mexico',
5484 'FM': 'Micronesia, Federated States of',
5485 'MD': 'Moldova, Republic of',
5486 'MC': 'Monaco',
5487 'MN': 'Mongolia',
5488 'ME': 'Montenegro',
5489 'MS': 'Montserrat',
5490 'MA': 'Morocco',
5491 'MZ': 'Mozambique',
5492 'MM': 'Myanmar',
5493 'NA': 'Namibia',
5494 'NR': 'Nauru',
5495 'NP': 'Nepal',
5496 'NL': 'Netherlands',
5497 'NC': 'New Caledonia',
5498 'NZ': 'New Zealand',
5499 'NI': 'Nicaragua',
5500 'NE': 'Niger',
5501 'NG': 'Nigeria',
5502 'NU': 'Niue',
5503 'NF': 'Norfolk Island',
5504 'MP': 'Northern Mariana Islands',
5505 'NO': 'Norway',
5506 'OM': 'Oman',
5507 'PK': 'Pakistan',
5508 'PW': 'Palau',
5509 'PS': 'Palestine, State of',
5510 'PA': 'Panama',
5511 'PG': 'Papua New Guinea',
5512 'PY': 'Paraguay',
5513 'PE': 'Peru',
5514 'PH': 'Philippines',
5515 'PN': 'Pitcairn',
5516 'PL': 'Poland',
5517 'PT': 'Portugal',
5518 'PR': 'Puerto Rico',
5519 'QA': 'Qatar',
5520 'RE': 'Réunion',
5521 'RO': 'Romania',
5522 'RU': 'Russian Federation',
5523 'RW': 'Rwanda',
5524 'BL': 'Saint Barthélemy',
5525 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5526 'KN': 'Saint Kitts and Nevis',
5527 'LC': 'Saint Lucia',
5528 'MF': 'Saint Martin (French part)',
5529 'PM': 'Saint Pierre and Miquelon',
5530 'VC': 'Saint Vincent and the Grenadines',
5531 'WS': 'Samoa',
5532 'SM': 'San Marino',
5533 'ST': 'Sao Tome and Principe',
5534 'SA': 'Saudi Arabia',
5535 'SN': 'Senegal',
5536 'RS': 'Serbia',
5537 'SC': 'Seychelles',
5538 'SL': 'Sierra Leone',
5539 'SG': 'Singapore',
5540 'SX': 'Sint Maarten (Dutch part)',
5541 'SK': 'Slovakia',
5542 'SI': 'Slovenia',
5543 'SB': 'Solomon Islands',
5544 'SO': 'Somalia',
5545 'ZA': 'South Africa',
5546 'GS': 'South Georgia and the South Sandwich Islands',
5547 'SS': 'South Sudan',
5548 'ES': 'Spain',
5549 'LK': 'Sri Lanka',
5550 'SD': 'Sudan',
5551 'SR': 'Suriname',
5552 'SJ': 'Svalbard and Jan Mayen',
5553 'SZ': 'Swaziland',
5554 'SE': 'Sweden',
5555 'CH': 'Switzerland',
5556 'SY': 'Syrian Arab Republic',
5557 'TW': 'Taiwan, Province of China',
5558 'TJ': 'Tajikistan',
5559 'TZ': 'Tanzania, United Republic of',
5560 'TH': 'Thailand',
5561 'TL': 'Timor-Leste',
5562 'TG': 'Togo',
5563 'TK': 'Tokelau',
5564 'TO': 'Tonga',
5565 'TT': 'Trinidad and Tobago',
5566 'TN': 'Tunisia',
5567 'TR': 'Turkey',
5568 'TM': 'Turkmenistan',
5569 'TC': 'Turks and Caicos Islands',
5570 'TV': 'Tuvalu',
5571 'UG': 'Uganda',
5572 'UA': 'Ukraine',
5573 'AE': 'United Arab Emirates',
5574 'GB': 'United Kingdom',
5575 'US': 'United States',
5576 'UM': 'United States Minor Outlying Islands',
5577 'UY': 'Uruguay',
5578 'UZ': 'Uzbekistan',
5579 'VU': 'Vanuatu',
5580 'VE': 'Venezuela, Bolivarian Republic of',
5581 'VN': 'Viet Nam',
5582 'VG': 'Virgin Islands, British',
5583 'VI': 'Virgin Islands, U.S.',
5584 'WF': 'Wallis and Futuna',
5585 'EH': 'Western Sahara',
5586 'YE': 'Yemen',
5587 'ZM': 'Zambia',
5588 'ZW': 'Zimbabwe',
5589 }
5590
5591 @classmethod
5592 def short2full(cls, code):
5593 """Convert an ISO 3166-2 country code to the corresponding full name"""
5594 return cls._country_map.get(code.upper())
5595
5596
773f291d
S
5597class GeoUtils(object):
5598 # Major IPv4 address blocks per country
5599 _country_ip_map = {
53896ca5 5600 'AD': '46.172.224.0/19',
773f291d
S
5601 'AE': '94.200.0.0/13',
5602 'AF': '149.54.0.0/17',
5603 'AG': '209.59.64.0/18',
5604 'AI': '204.14.248.0/21',
5605 'AL': '46.99.0.0/16',
5606 'AM': '46.70.0.0/15',
5607 'AO': '105.168.0.0/13',
53896ca5
S
5608 'AP': '182.50.184.0/21',
5609 'AQ': '23.154.160.0/24',
773f291d
S
5610 'AR': '181.0.0.0/12',
5611 'AS': '202.70.112.0/20',
53896ca5 5612 'AT': '77.116.0.0/14',
773f291d
S
5613 'AU': '1.128.0.0/11',
5614 'AW': '181.41.0.0/18',
53896ca5
S
5615 'AX': '185.217.4.0/22',
5616 'AZ': '5.197.0.0/16',
773f291d
S
5617 'BA': '31.176.128.0/17',
5618 'BB': '65.48.128.0/17',
5619 'BD': '114.130.0.0/16',
5620 'BE': '57.0.0.0/8',
53896ca5 5621 'BF': '102.178.0.0/15',
773f291d
S
5622 'BG': '95.42.0.0/15',
5623 'BH': '37.131.0.0/17',
5624 'BI': '154.117.192.0/18',
5625 'BJ': '137.255.0.0/16',
53896ca5 5626 'BL': '185.212.72.0/23',
773f291d
S
5627 'BM': '196.12.64.0/18',
5628 'BN': '156.31.0.0/16',
5629 'BO': '161.56.0.0/16',
5630 'BQ': '161.0.80.0/20',
53896ca5 5631 'BR': '191.128.0.0/12',
773f291d
S
5632 'BS': '24.51.64.0/18',
5633 'BT': '119.2.96.0/19',
5634 'BW': '168.167.0.0/16',
5635 'BY': '178.120.0.0/13',
5636 'BZ': '179.42.192.0/18',
5637 'CA': '99.224.0.0/11',
5638 'CD': '41.243.0.0/16',
53896ca5
S
5639 'CF': '197.242.176.0/21',
5640 'CG': '160.113.0.0/16',
773f291d 5641 'CH': '85.0.0.0/13',
53896ca5 5642 'CI': '102.136.0.0/14',
773f291d
S
5643 'CK': '202.65.32.0/19',
5644 'CL': '152.172.0.0/14',
53896ca5 5645 'CM': '102.244.0.0/14',
773f291d
S
5646 'CN': '36.128.0.0/10',
5647 'CO': '181.240.0.0/12',
5648 'CR': '201.192.0.0/12',
5649 'CU': '152.206.0.0/15',
5650 'CV': '165.90.96.0/19',
5651 'CW': '190.88.128.0/17',
53896ca5 5652 'CY': '31.153.0.0/16',
773f291d
S
5653 'CZ': '88.100.0.0/14',
5654 'DE': '53.0.0.0/8',
5655 'DJ': '197.241.0.0/17',
5656 'DK': '87.48.0.0/12',
5657 'DM': '192.243.48.0/20',
5658 'DO': '152.166.0.0/15',
5659 'DZ': '41.96.0.0/12',
5660 'EC': '186.68.0.0/15',
5661 'EE': '90.190.0.0/15',
5662 'EG': '156.160.0.0/11',
5663 'ER': '196.200.96.0/20',
5664 'ES': '88.0.0.0/11',
5665 'ET': '196.188.0.0/14',
5666 'EU': '2.16.0.0/13',
5667 'FI': '91.152.0.0/13',
5668 'FJ': '144.120.0.0/16',
53896ca5 5669 'FK': '80.73.208.0/21',
773f291d
S
5670 'FM': '119.252.112.0/20',
5671 'FO': '88.85.32.0/19',
5672 'FR': '90.0.0.0/9',
5673 'GA': '41.158.0.0/15',
5674 'GB': '25.0.0.0/8',
5675 'GD': '74.122.88.0/21',
5676 'GE': '31.146.0.0/16',
5677 'GF': '161.22.64.0/18',
5678 'GG': '62.68.160.0/19',
53896ca5
S
5679 'GH': '154.160.0.0/12',
5680 'GI': '95.164.0.0/16',
773f291d
S
5681 'GL': '88.83.0.0/19',
5682 'GM': '160.182.0.0/15',
5683 'GN': '197.149.192.0/18',
5684 'GP': '104.250.0.0/19',
5685 'GQ': '105.235.224.0/20',
5686 'GR': '94.64.0.0/13',
5687 'GT': '168.234.0.0/16',
5688 'GU': '168.123.0.0/16',
5689 'GW': '197.214.80.0/20',
5690 'GY': '181.41.64.0/18',
5691 'HK': '113.252.0.0/14',
5692 'HN': '181.210.0.0/16',
5693 'HR': '93.136.0.0/13',
5694 'HT': '148.102.128.0/17',
5695 'HU': '84.0.0.0/14',
5696 'ID': '39.192.0.0/10',
5697 'IE': '87.32.0.0/12',
5698 'IL': '79.176.0.0/13',
5699 'IM': '5.62.80.0/20',
5700 'IN': '117.192.0.0/10',
5701 'IO': '203.83.48.0/21',
5702 'IQ': '37.236.0.0/14',
5703 'IR': '2.176.0.0/12',
5704 'IS': '82.221.0.0/16',
5705 'IT': '79.0.0.0/10',
5706 'JE': '87.244.64.0/18',
5707 'JM': '72.27.0.0/17',
5708 'JO': '176.29.0.0/16',
53896ca5 5709 'JP': '133.0.0.0/8',
773f291d
S
5710 'KE': '105.48.0.0/12',
5711 'KG': '158.181.128.0/17',
5712 'KH': '36.37.128.0/17',
5713 'KI': '103.25.140.0/22',
5714 'KM': '197.255.224.0/20',
53896ca5 5715 'KN': '198.167.192.0/19',
773f291d
S
5716 'KP': '175.45.176.0/22',
5717 'KR': '175.192.0.0/10',
5718 'KW': '37.36.0.0/14',
5719 'KY': '64.96.0.0/15',
5720 'KZ': '2.72.0.0/13',
5721 'LA': '115.84.64.0/18',
5722 'LB': '178.135.0.0/16',
53896ca5 5723 'LC': '24.92.144.0/20',
773f291d
S
5724 'LI': '82.117.0.0/19',
5725 'LK': '112.134.0.0/15',
53896ca5 5726 'LR': '102.183.0.0/16',
773f291d
S
5727 'LS': '129.232.0.0/17',
5728 'LT': '78.56.0.0/13',
5729 'LU': '188.42.0.0/16',
5730 'LV': '46.109.0.0/16',
5731 'LY': '41.252.0.0/14',
5732 'MA': '105.128.0.0/11',
5733 'MC': '88.209.64.0/18',
5734 'MD': '37.246.0.0/16',
5735 'ME': '178.175.0.0/17',
5736 'MF': '74.112.232.0/21',
5737 'MG': '154.126.0.0/17',
5738 'MH': '117.103.88.0/21',
5739 'MK': '77.28.0.0/15',
5740 'ML': '154.118.128.0/18',
5741 'MM': '37.111.0.0/17',
5742 'MN': '49.0.128.0/17',
5743 'MO': '60.246.0.0/16',
5744 'MP': '202.88.64.0/20',
5745 'MQ': '109.203.224.0/19',
5746 'MR': '41.188.64.0/18',
5747 'MS': '208.90.112.0/22',
5748 'MT': '46.11.0.0/16',
5749 'MU': '105.16.0.0/12',
5750 'MV': '27.114.128.0/18',
53896ca5 5751 'MW': '102.70.0.0/15',
773f291d
S
5752 'MX': '187.192.0.0/11',
5753 'MY': '175.136.0.0/13',
5754 'MZ': '197.218.0.0/15',
5755 'NA': '41.182.0.0/16',
5756 'NC': '101.101.0.0/18',
5757 'NE': '197.214.0.0/18',
5758 'NF': '203.17.240.0/22',
5759 'NG': '105.112.0.0/12',
5760 'NI': '186.76.0.0/15',
5761 'NL': '145.96.0.0/11',
5762 'NO': '84.208.0.0/13',
5763 'NP': '36.252.0.0/15',
5764 'NR': '203.98.224.0/19',
5765 'NU': '49.156.48.0/22',
5766 'NZ': '49.224.0.0/14',
5767 'OM': '5.36.0.0/15',
5768 'PA': '186.72.0.0/15',
5769 'PE': '186.160.0.0/14',
5770 'PF': '123.50.64.0/18',
5771 'PG': '124.240.192.0/19',
5772 'PH': '49.144.0.0/13',
5773 'PK': '39.32.0.0/11',
5774 'PL': '83.0.0.0/11',
5775 'PM': '70.36.0.0/20',
5776 'PR': '66.50.0.0/16',
5777 'PS': '188.161.0.0/16',
5778 'PT': '85.240.0.0/13',
5779 'PW': '202.124.224.0/20',
5780 'PY': '181.120.0.0/14',
5781 'QA': '37.210.0.0/15',
53896ca5 5782 'RE': '102.35.0.0/16',
773f291d 5783 'RO': '79.112.0.0/13',
53896ca5 5784 'RS': '93.86.0.0/15',
773f291d 5785 'RU': '5.136.0.0/13',
53896ca5 5786 'RW': '41.186.0.0/16',
773f291d
S
5787 'SA': '188.48.0.0/13',
5788 'SB': '202.1.160.0/19',
5789 'SC': '154.192.0.0/11',
53896ca5 5790 'SD': '102.120.0.0/13',
773f291d 5791 'SE': '78.64.0.0/12',
53896ca5 5792 'SG': '8.128.0.0/10',
773f291d
S
5793 'SI': '188.196.0.0/14',
5794 'SK': '78.98.0.0/15',
53896ca5 5795 'SL': '102.143.0.0/17',
773f291d
S
5796 'SM': '89.186.32.0/19',
5797 'SN': '41.82.0.0/15',
53896ca5 5798 'SO': '154.115.192.0/18',
773f291d
S
5799 'SR': '186.179.128.0/17',
5800 'SS': '105.235.208.0/21',
5801 'ST': '197.159.160.0/19',
5802 'SV': '168.243.0.0/16',
5803 'SX': '190.102.0.0/20',
5804 'SY': '5.0.0.0/16',
5805 'SZ': '41.84.224.0/19',
5806 'TC': '65.255.48.0/20',
5807 'TD': '154.68.128.0/19',
5808 'TG': '196.168.0.0/14',
5809 'TH': '171.96.0.0/13',
5810 'TJ': '85.9.128.0/18',
5811 'TK': '27.96.24.0/21',
5812 'TL': '180.189.160.0/20',
5813 'TM': '95.85.96.0/19',
5814 'TN': '197.0.0.0/11',
5815 'TO': '175.176.144.0/21',
5816 'TR': '78.160.0.0/11',
5817 'TT': '186.44.0.0/15',
5818 'TV': '202.2.96.0/19',
5819 'TW': '120.96.0.0/11',
5820 'TZ': '156.156.0.0/14',
53896ca5
S
5821 'UA': '37.52.0.0/14',
5822 'UG': '102.80.0.0/13',
5823 'US': '6.0.0.0/8',
773f291d 5824 'UY': '167.56.0.0/13',
53896ca5 5825 'UZ': '84.54.64.0/18',
773f291d 5826 'VA': '212.77.0.0/19',
53896ca5 5827 'VC': '207.191.240.0/21',
773f291d 5828 'VE': '186.88.0.0/13',
53896ca5 5829 'VG': '66.81.192.0/20',
773f291d
S
5830 'VI': '146.226.0.0/16',
5831 'VN': '14.160.0.0/11',
5832 'VU': '202.80.32.0/20',
5833 'WF': '117.20.32.0/21',
5834 'WS': '202.4.32.0/19',
5835 'YE': '134.35.0.0/16',
5836 'YT': '41.242.116.0/22',
5837 'ZA': '41.0.0.0/11',
53896ca5
S
5838 'ZM': '102.144.0.0/13',
5839 'ZW': '102.177.192.0/18',
773f291d
S
5840 }
5841
5842 @classmethod
5f95927a
S
5843 def random_ipv4(cls, code_or_block):
5844 if len(code_or_block) == 2:
5845 block = cls._country_ip_map.get(code_or_block.upper())
5846 if not block:
5847 return None
5848 else:
5849 block = code_or_block
773f291d
S
5850 addr, preflen = block.split('/')
5851 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5852 addr_max = addr_min | (0xffffffff >> int(preflen))
18a0defa 5853 return compat_str(socket.inet_ntoa(
4248dad9 5854 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
773f291d
S
5855
5856
91410c9b 5857class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
5858 def __init__(self, proxies=None):
5859 # Set default handlers
5860 for type in ('http', 'https'):
5861 setattr(self, '%s_open' % type,
5862 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5863 meth(r, proxy, type))
38e87f6c 5864 compat_urllib_request.ProxyHandler.__init__(self, proxies)
2461f79d 5865
91410c9b 5866 def proxy_open(self, req, proxy, type):
2461f79d 5867 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
5868 if req_proxy is not None:
5869 proxy = req_proxy
2461f79d
PH
5870 del req.headers['Ytdl-request-proxy']
5871
5872 if proxy == '__noproxy__':
5873 return None # No Proxy
51fb4995 5874 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
71aff188 5875 req.add_header('Ytdl-socks-proxy', proxy)
7a5c1cfe 5876 # yt-dlp's http/https handlers do wrapping the socket with socks
71aff188 5877 return None
91410c9b
PH
5878 return compat_urllib_request.ProxyHandler.proxy_open(
5879 self, req, proxy, type)
5bc880b9
YCH
5880
5881
0a5445dd
YCH
5882# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5883# released into Public Domain
5884# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5885
5886def long_to_bytes(n, blocksize=0):
5887 """long_to_bytes(n:long, blocksize:int) : string
5888 Convert a long integer to a byte string.
5889
5890 If optional blocksize is given and greater than zero, pad the front of the
5891 byte string with binary zeros so that the length is a multiple of
5892 blocksize.
5893 """
5894 # after much testing, this algorithm was deemed to be the fastest
5895 s = b''
5896 n = int(n)
5897 while n > 0:
5898 s = compat_struct_pack('>I', n & 0xffffffff) + s
5899 n = n >> 32
5900 # strip off leading zeros
5901 for i in range(len(s)):
5902 if s[i] != b'\000'[0]:
5903 break
5904 else:
5905 # only happens when n == 0
5906 s = b'\000'
5907 i = 0
5908 s = s[i:]
5909 # add back some pad bytes. this could be done more efficiently w.r.t. the
5910 # de-padding being done above, but sigh...
5911 if blocksize > 0 and len(s) % blocksize:
5912 s = (blocksize - len(s) % blocksize) * b'\000' + s
5913 return s
5914
5915
5916def bytes_to_long(s):
5917 """bytes_to_long(string) : long
5918 Convert a byte string to a long integer.
5919
5920 This is (essentially) the inverse of long_to_bytes().
5921 """
5922 acc = 0
5923 length = len(s)
5924 if length % 4:
5925 extra = (4 - length % 4)
5926 s = b'\000' * extra + s
5927 length = length + extra
5928 for i in range(0, length, 4):
5929 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5930 return acc
5931
5932
5bc880b9
YCH
5933def ohdave_rsa_encrypt(data, exponent, modulus):
5934 '''
5935 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5936
5937 Input:
5938 data: data to encrypt, bytes-like object
5939 exponent, modulus: parameter e and N of RSA algorithm, both integer
5940 Output: hex string of encrypted data
5941
5942 Limitation: supports one block encryption only
5943 '''
5944
5945 payload = int(binascii.hexlify(data[::-1]), 16)
5946 encrypted = pow(payload, exponent, modulus)
5947 return '%x' % encrypted
81bdc8fd
YCH
5948
5949
f48409c7
YCH
5950def pkcs1pad(data, length):
5951 """
5952 Padding input data with PKCS#1 scheme
5953
5954 @param {int[]} data input data
5955 @param {int} length target length
5956 @returns {int[]} padded data
5957 """
5958 if len(data) > length - 11:
5959 raise ValueError('Input data too long for PKCS#1 padding')
5960
5961 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5962 return [0, 2] + pseudo_random + [0] + data
5963
5964
5eb6bdce 5965def encode_base_n(num, n, table=None):
59f898b7 5966 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
59f898b7
YCH
5967 if not table:
5968 table = FULL_TABLE[:n]
5969
5eb6bdce
YCH
5970 if n > len(table):
5971 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5972
5973 if num == 0:
5974 return table[0]
5975
81bdc8fd
YCH
5976 ret = ''
5977 while num:
5978 ret = table[num % n] + ret
5979 num = num // n
5980 return ret
f52354a8
YCH
5981
5982
5983def decode_packed_codes(code):
06b3fe29 5984 mobj = re.search(PACKED_CODES_RE, code)
a0566bbf 5985 obfuscated_code, base, count, symbols = mobj.groups()
f52354a8
YCH
5986 base = int(base)
5987 count = int(count)
5988 symbols = symbols.split('|')
5989 symbol_table = {}
5990
5991 while count:
5992 count -= 1
5eb6bdce 5993 base_n_count = encode_base_n(count, base)
f52354a8
YCH
5994 symbol_table[base_n_count] = symbols[count] or base_n_count
5995
5996 return re.sub(
5997 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
a0566bbf 5998 obfuscated_code)
e154c651 5999
6000
1ced2221
S
6001def caesar(s, alphabet, shift):
6002 if shift == 0:
6003 return s
6004 l = len(alphabet)
6005 return ''.join(
6006 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
6007 for c in s)
6008
6009
6010def rot47(s):
6011 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
6012
6013
e154c651 6014def parse_m3u8_attributes(attrib):
6015 info = {}
6016 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
6017 if val.startswith('"'):
6018 val = val[1:-1]
6019 info[key] = val
6020 return info
1143535d
YCH
6021
6022
6023def urshift(val, n):
6024 return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038
YCH
6025
6026
6027# Based on png2str() written by @gdkchan and improved by @yokrysty
067aa17e 6028# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
d3f8e038
YCH
6029def decode_png(png_data):
6030 # Reference: https://www.w3.org/TR/PNG/
6031 header = png_data[8:]
6032
6033 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
6034 raise IOError('Not a valid PNG file.')
6035
6036 int_map = {1: '>B', 2: '>H', 4: '>I'}
6037 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
6038
6039 chunks = []
6040
6041 while header:
6042 length = unpack_integer(header[:4])
6043 header = header[4:]
6044
6045 chunk_type = header[:4]
6046 header = header[4:]
6047
6048 chunk_data = header[:length]
6049 header = header[length:]
6050
6051 header = header[4:] # Skip CRC
6052
6053 chunks.append({
6054 'type': chunk_type,
6055 'length': length,
6056 'data': chunk_data
6057 })
6058
6059 ihdr = chunks[0]['data']
6060
6061 width = unpack_integer(ihdr[:4])
6062 height = unpack_integer(ihdr[4:8])
6063
6064 idat = b''
6065
6066 for chunk in chunks:
6067 if chunk['type'] == b'IDAT':
6068 idat += chunk['data']
6069
6070 if not idat:
6071 raise IOError('Unable to read PNG data.')
6072
6073 decompressed_data = bytearray(zlib.decompress(idat))
6074
6075 stride = width * 3
6076 pixels = []
6077
6078 def _get_pixel(idx):
6079 x = idx % stride
6080 y = idx // stride
6081 return pixels[y][x]
6082
6083 for y in range(height):
6084 basePos = y * (1 + stride)
6085 filter_type = decompressed_data[basePos]
6086
6087 current_row = []
6088
6089 pixels.append(current_row)
6090
6091 for x in range(stride):
6092 color = decompressed_data[1 + basePos + x]
6093 basex = y * stride + x
6094 left = 0
6095 up = 0
6096
6097 if x > 2:
6098 left = _get_pixel(basex - 3)
6099 if y > 0:
6100 up = _get_pixel(basex - stride)
6101
6102 if filter_type == 1: # Sub
6103 color = (color + left) & 0xff
6104 elif filter_type == 2: # Up
6105 color = (color + up) & 0xff
6106 elif filter_type == 3: # Average
6107 color = (color + ((left + up) >> 1)) & 0xff
6108 elif filter_type == 4: # Paeth
6109 a = left
6110 b = up
6111 c = 0
6112
6113 if x > 2 and y > 0:
6114 c = _get_pixel(basex - stride - 3)
6115
6116 p = a + b - c
6117
6118 pa = abs(p - a)
6119 pb = abs(p - b)
6120 pc = abs(p - c)
6121
6122 if pa <= pb and pa <= pc:
6123 color = (color + a) & 0xff
6124 elif pb <= pc:
6125 color = (color + b) & 0xff
6126 else:
6127 color = (color + c) & 0xff
6128
6129 current_row.append(color)
6130
6131 return width, height, pixels
efa97bdc
YCH
6132
6133
6134def write_xattr(path, key, value):
6135 # This mess below finds the best xattr tool for the job
6136 try:
6137 # try the pyxattr module...
6138 import xattr
6139
53a7e3d2
YCH
6140 if hasattr(xattr, 'set'): # pyxattr
6141 # Unicode arguments are not supported in python-pyxattr until
6142 # version 0.5.0
067aa17e 6143 # See https://github.com/ytdl-org/youtube-dl/issues/5498
53a7e3d2
YCH
6144 pyxattr_required_version = '0.5.0'
6145 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
6146 # TODO: fallback to CLI tools
6147 raise XAttrUnavailableError(
6148 'python-pyxattr is detected but is too old. '
7a5c1cfe 6149 'yt-dlp requires %s or above while your version is %s. '
53a7e3d2
YCH
6150 'Falling back to other xattr implementations' % (
6151 pyxattr_required_version, xattr.__version__))
6152
6153 setxattr = xattr.set
6154 else: # xattr
6155 setxattr = xattr.setxattr
efa97bdc
YCH
6156
6157 try:
53a7e3d2 6158 setxattr(path, key, value)
efa97bdc
YCH
6159 except EnvironmentError as e:
6160 raise XAttrMetadataError(e.errno, e.strerror)
6161
6162 except ImportError:
6163 if compat_os_name == 'nt':
6164 # Write xattrs to NTFS Alternate Data Streams:
6165 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6166 assert ':' not in key
6167 assert os.path.exists(path)
6168
6169 ads_fn = path + ':' + key
6170 try:
6171 with open(ads_fn, 'wb') as f:
6172 f.write(value)
6173 except EnvironmentError as e:
6174 raise XAttrMetadataError(e.errno, e.strerror)
6175 else:
6176 user_has_setfattr = check_executable('setfattr', ['--version'])
6177 user_has_xattr = check_executable('xattr', ['-h'])
6178
6179 if user_has_setfattr or user_has_xattr:
6180
6181 value = value.decode('utf-8')
6182 if user_has_setfattr:
6183 executable = 'setfattr'
6184 opts = ['-n', key, '-v', value]
6185 elif user_has_xattr:
6186 executable = 'xattr'
6187 opts = ['-w', key, value]
6188
3089bc74
S
6189 cmd = ([encodeFilename(executable, True)]
6190 + [encodeArgument(o) for o in opts]
6191 + [encodeFilename(path, True)])
efa97bdc
YCH
6192
6193 try:
d3c93ec2 6194 p = Popen(
efa97bdc
YCH
6195 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6196 except EnvironmentError as e:
6197 raise XAttrMetadataError(e.errno, e.strerror)
d3c93ec2 6198 stdout, stderr = p.communicate_or_kill()
efa97bdc
YCH
6199 stderr = stderr.decode('utf-8', 'replace')
6200 if p.returncode != 0:
6201 raise XAttrMetadataError(p.returncode, stderr)
6202
6203 else:
6204 # On Unix, and can't find pyxattr, setfattr, or xattr.
6205 if sys.platform.startswith('linux'):
6206 raise XAttrUnavailableError(
6207 "Couldn't find a tool to set the xattrs. "
6208 "Install either the python 'pyxattr' or 'xattr' "
6209 "modules, or the GNU 'attr' package "
6210 "(which contains the 'setfattr' tool).")
6211 else:
6212 raise XAttrUnavailableError(
6213 "Couldn't find a tool to set the xattrs. "
6214 "Install either the python 'xattr' module, "
6215 "or the 'xattr' binary.")
0c265486
YCH
6216
6217
6218def random_birthday(year_field, month_field, day_field):
aa374bc7
AS
6219 start_date = datetime.date(1950, 1, 1)
6220 end_date = datetime.date(1995, 12, 31)
6221 offset = random.randint(0, (end_date - start_date).days)
6222 random_date = start_date + datetime.timedelta(offset)
0c265486 6223 return {
aa374bc7
AS
6224 year_field: str(random_date.year),
6225 month_field: str(random_date.month),
6226 day_field: str(random_date.day),
0c265486 6227 }
732044af 6228
c76eb41b 6229
732044af 6230# Templates for internet shortcut files, which are plain text files.
6231DOT_URL_LINK_TEMPLATE = '''
6232[InternetShortcut]
6233URL=%(url)s
6234'''.lstrip()
6235
6236DOT_WEBLOC_LINK_TEMPLATE = '''
6237<?xml version="1.0" encoding="UTF-8"?>
6238<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6239<plist version="1.0">
6240<dict>
6241\t<key>URL</key>
6242\t<string>%(url)s</string>
6243</dict>
6244</plist>
6245'''.lstrip()
6246
6247DOT_DESKTOP_LINK_TEMPLATE = '''
6248[Desktop Entry]
6249Encoding=UTF-8
6250Name=%(filename)s
6251Type=Link
6252URL=%(url)s
6253Icon=text-html
6254'''.lstrip()
6255
08438d2c 6256LINK_TEMPLATES = {
6257 'url': DOT_URL_LINK_TEMPLATE,
6258 'desktop': DOT_DESKTOP_LINK_TEMPLATE,
6259 'webloc': DOT_WEBLOC_LINK_TEMPLATE,
6260}
6261
732044af 6262
6263def iri_to_uri(iri):
6264 """
6265 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6266
6267 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6268 """
6269
6270 iri_parts = compat_urllib_parse_urlparse(iri)
6271
6272 if '[' in iri_parts.netloc:
6273 raise ValueError('IPv6 URIs are not, yet, supported.')
6274 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6275
6276 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6277
6278 net_location = ''
6279 if iri_parts.username:
6280 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6281 if iri_parts.password is not None:
6282 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6283 net_location += '@'
6284
6285 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
6286 # The 'idna' encoding produces ASCII text.
6287 if iri_parts.port is not None and iri_parts.port != 80:
6288 net_location += ':' + str(iri_parts.port)
6289
6290 return compat_urllib_parse_urlunparse(
6291 (iri_parts.scheme,
6292 net_location,
6293
6294 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6295
6296 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6297 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6298
6299 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6300 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6301
6302 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6303
6304 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6305
6306
6307def to_high_limit_path(path):
6308 if sys.platform in ['win32', 'cygwin']:
6309 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6310 return r'\\?\ '.rstrip() + os.path.abspath(path)
6311
6312 return path
76d321f6 6313
c76eb41b 6314
b868936c 6315def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
6316 if field is None:
6317 val = obj if obj is not None else default
6318 else:
6319 val = obj.get(field, default)
76d321f6 6320 if func and val not in ignore:
6321 val = func(val)
6322 return template % val if val not in ignore else default
00dd0cd5 6323
6324
6325def clean_podcast_url(url):
6326 return re.sub(r'''(?x)
6327 (?:
6328 (?:
6329 chtbl\.com/track|
6330 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6331 play\.podtrac\.com
6332 )/[^/]+|
6333 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6334 flex\.acast\.com|
6335 pd(?:
6336 cn\.co| # https://podcorn.com/analytics-prefix/
6337 st\.fm # https://podsights.com/docs/
6338 )/e
6339 )/''', '', url)
ffcb8191
THD
6340
6341
6342_HEX_TABLE = '0123456789abcdef'
6343
6344
6345def random_uuidv4():
6346 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
0202b52a 6347
6348
6349def make_dir(path, to_screen=None):
6350 try:
6351 dn = os.path.dirname(path)
6352 if dn and not os.path.exists(dn):
6353 os.makedirs(dn)
6354 return True
6355 except (OSError, IOError) as err:
6356 if callable(to_screen) is not None:
6357 to_screen('unable to create directory ' + error_to_compat_str(err))
6358 return False
f74980cb 6359
6360
6361def get_executable_path():
c552ae88 6362 from zipimport import zipimporter
6363 if hasattr(sys, 'frozen'): # Running from PyInstaller
6364 path = os.path.dirname(sys.executable)
6365 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
6366 path = os.path.join(os.path.dirname(__file__), '../..')
6367 else:
6368 path = os.path.join(os.path.dirname(__file__), '..')
f74980cb 6369 return os.path.abspath(path)
6370
6371
2f567473 6372def load_plugins(name, suffix, namespace):
3ae5e797 6373 classes = {}
f74980cb 6374 try:
019a94f7
ÁS
6375 plugins_spec = importlib.util.spec_from_file_location(
6376 name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
6377 plugins = importlib.util.module_from_spec(plugins_spec)
6378 sys.modules[plugins_spec.name] = plugins
6379 plugins_spec.loader.exec_module(plugins)
f74980cb 6380 for name in dir(plugins):
2f567473 6381 if name in namespace:
6382 continue
6383 if not name.endswith(suffix):
f74980cb 6384 continue
6385 klass = getattr(plugins, name)
3ae5e797 6386 classes[name] = namespace[name] = klass
019a94f7 6387 except FileNotFoundError:
f74980cb 6388 pass
f74980cb 6389 return classes
06167fbb 6390
6391
325ebc17 6392def traverse_obj(
352d63fd 6393 obj, *path_list, default=None, expected_type=None, get_all=True,
325ebc17 6394 casesense=True, is_user_input=False, traverse_string=False):
324ad820 6395 ''' Traverse nested list/dict/tuple
8f334380 6396 @param path_list A list of paths which are checked one by one.
6397 Each path is a list of keys where each key is a string,
2614f646 6398 a function, a tuple of strings or "...".
6399 When a fuction is given, it takes the key as argument and
6400 returns whether the key matches or not. When a tuple is given,
8f334380 6401 all the keys given in the tuple are traversed, and
6402 "..." traverses all the keys in the object
325ebc17 6403 @param default Default value to return
352d63fd 6404 @param expected_type Only accept final value of this type (Can also be any callable)
6405 @param get_all Return all the values obtained from a path or only the first one
324ad820 6406 @param casesense Whether to consider dictionary keys as case sensitive
6407 @param is_user_input Whether the keys are generated from user input. If True,
6408 strings are converted to int/slice if necessary
6409 @param traverse_string Whether to traverse inside strings. If True, any
6410 non-compatible object will also be converted into a string
8f334380 6411 # TODO: Write tests
324ad820 6412 '''
325ebc17 6413 if not casesense:
dbf5416a 6414 _lower = lambda k: (k.lower() if isinstance(k, str) else k)
8f334380 6415 path_list = (map(_lower, variadic(path)) for path in path_list)
6416
6417 def _traverse_obj(obj, path, _current_depth=0):
6418 nonlocal depth
575e17a1 6419 if obj is None:
6420 return None
8f334380 6421 path = tuple(variadic(path))
6422 for i, key in enumerate(path):
6423 if isinstance(key, (list, tuple)):
6424 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
6425 key = ...
6426 if key is ...:
6427 obj = (obj.values() if isinstance(obj, dict)
6428 else obj if isinstance(obj, (list, tuple, LazyList))
6429 else str(obj) if traverse_string else [])
6430 _current_depth += 1
6431 depth = max(depth, _current_depth)
6432 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
2614f646 6433 elif callable(key):
6434 if isinstance(obj, (list, tuple, LazyList)):
6435 obj = enumerate(obj)
6436 elif isinstance(obj, dict):
6437 obj = obj.items()
6438 else:
6439 if not traverse_string:
6440 return None
6441 obj = str(obj)
6442 _current_depth += 1
6443 depth = max(depth, _current_depth)
6444 return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if key(k)]
575e17a1 6445 elif isinstance(obj, dict) and not (is_user_input and key == ':'):
325ebc17 6446 obj = (obj.get(key) if casesense or (key in obj)
6447 else next((v for k, v in obj.items() if _lower(k) == key), None))
6448 else:
6449 if is_user_input:
6450 key = (int_or_none(key) if ':' not in key
6451 else slice(*map(int_or_none, key.split(':'))))
8f334380 6452 if key == slice(None):
575e17a1 6453 return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
325ebc17 6454 if not isinstance(key, (int, slice)):
9fea350f 6455 return None
8f334380 6456 if not isinstance(obj, (list, tuple, LazyList)):
325ebc17 6457 if not traverse_string:
6458 return None
6459 obj = str(obj)
6460 try:
6461 obj = obj[key]
6462 except IndexError:
324ad820 6463 return None
325ebc17 6464 return obj
6465
352d63fd 6466 if isinstance(expected_type, type):
6467 type_test = lambda val: val if isinstance(val, expected_type) else None
6468 elif expected_type is not None:
6469 type_test = expected_type
6470 else:
6471 type_test = lambda val: val
6472
8f334380 6473 for path in path_list:
6474 depth = 0
6475 val = _traverse_obj(obj, path)
325ebc17 6476 if val is not None:
8f334380 6477 if depth:
6478 for _ in range(depth - 1):
6586bca9 6479 val = itertools.chain.from_iterable(v for v in val if v is not None)
352d63fd 6480 val = [v for v in map(type_test, val) if v is not None]
8f334380 6481 if val:
352d63fd 6482 return val if get_all else val[0]
6483 else:
6484 val = type_test(val)
6485 if val is not None:
8f334380 6486 return val
325ebc17 6487 return default
324ad820 6488
6489
6490def traverse_dict(dictn, keys, casesense=True):
6491 ''' For backward compatibility. Do not use '''
6492 return traverse_obj(dictn, keys, casesense=casesense,
6493 is_user_input=True, traverse_string=True)
6606817a 6494
6495
c634ad2a 6496def variadic(x, allowed_types=(str, bytes)):
cb89cfc1 6497 return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
bd50a52b
THD
6498
6499
49fa4d9a
N
6500# create a JSON Web Signature (jws) with HS256 algorithm
6501# the resulting format is in JWS Compact Serialization
6502# implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
6503# implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
6504def jwt_encode_hs256(payload_data, key, headers={}):
6505 header_data = {
6506 'alg': 'HS256',
6507 'typ': 'JWT',
6508 }
6509 if headers:
6510 header_data.update(headers)
6511 header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8'))
6512 payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8'))
6513 h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256)
6514 signature_b64 = base64.b64encode(h.digest())
6515 token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
6516 return token
819e0531 6517
6518
16b0d7e6 6519# can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
6520def jwt_decode_hs256(jwt):
6521 header_b64, payload_b64, signature_b64 = jwt.split('.')
6522 payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
6523 return payload_data
6524
6525
819e0531 6526def supports_terminal_sequences(stream):
6527 if compat_os_name == 'nt':
d1d5c08f 6528 if get_windows_version() < (10, 0, 10586):
819e0531 6529 return False
6530 elif not os.getenv('TERM'):
6531 return False
6532 try:
6533 return stream.isatty()
6534 except BaseException:
6535 return False
6536
6537
ec11a9f4 6538_terminal_sequences_re = re.compile('\033\\[[^m]+m')
6539
6540
6541def remove_terminal_sequences(string):
6542 return _terminal_sequences_re.sub('', string)
6543
6544
6545def number_of_digits(number):
6546 return len('%d' % number)