]> jfr.im git - yt-dlp.git/blame - yt_dlp/utils.py
[vidme] Remove extractor (#1095)
[yt-dlp.git] / yt_dlp / utils.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
d77c3dfd 3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
c380cc28 10import collections
62e609ab 11import contextlib
e3946f98 12import ctypes
c496ca96
PH
13import datetime
14import email.utils
0c265486 15import email.header
f45c185f 16import errno
be4a824d 17import functools
d77c3dfd 18import gzip
49fa4d9a
N
19import hashlib
20import hmac
f74980cb 21import imp
03f9daab 22import io
79a2e94e 23import itertools
f4bfd65f 24import json
d77c3dfd 25import locale
02dbf93f 26import math
347de493 27import operator
d77c3dfd 28import os
c496ca96 29import platform
773f291d 30import random
d77c3dfd 31import re
c496ca96 32import socket
79a2e94e 33import ssl
1c088fa8 34import subprocess
d77c3dfd 35import sys
181c8655 36import tempfile
c380cc28 37import time
01951dda 38import traceback
bcf89ce6 39import xml.etree.ElementTree
d77c3dfd 40import zlib
d77c3dfd 41
8c25f81b 42from .compat import (
b4a3d461 43 compat_HTMLParseError,
8bb56eee 44 compat_HTMLParser,
201c1459 45 compat_HTTPError,
8f9312c3 46 compat_basestring,
8c25f81b 47 compat_chr,
1bab3437 48 compat_cookiejar,
d7cd9a9e 49 compat_ctypes_WINFUNCTYPE,
36e6f62c 50 compat_etree_fromstring,
51098426 51 compat_expanduser,
8c25f81b 52 compat_html_entities,
55b2f099 53 compat_html_entities_html5,
be4a824d 54 compat_http_client,
42db58ec 55 compat_integer_types,
e29663c6 56 compat_numeric_types,
c86b6142 57 compat_kwargs,
efa97bdc 58 compat_os_name,
8c25f81b 59 compat_parse_qs,
702ccf2d 60 compat_shlex_quote,
8c25f81b 61 compat_str,
edaa23f8 62 compat_struct_pack,
d3f8e038 63 compat_struct_unpack,
8c25f81b
PH
64 compat_urllib_error,
65 compat_urllib_parse,
15707c7e 66 compat_urllib_parse_urlencode,
8c25f81b 67 compat_urllib_parse_urlparse,
732044af 68 compat_urllib_parse_urlunparse,
69 compat_urllib_parse_quote,
70 compat_urllib_parse_quote_plus,
7581bfc9 71 compat_urllib_parse_unquote_plus,
8c25f81b
PH
72 compat_urllib_request,
73 compat_urlparse,
810c10ba 74 compat_xpath,
8c25f81b 75)
4644ac55 76
71aff188
YCH
77from .socks import (
78 ProxyType,
79 sockssocket,
80)
81
4644ac55 82
51fb4995
YCH
83def register_socks_protocols():
84 # "Register" SOCKS protocols
d5ae6bb5
YCH
85 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
86 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
51fb4995
YCH
87 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
88 if scheme not in compat_urlparse.uses_netloc:
89 compat_urlparse.uses_netloc.append(scheme)
90
91
468e2e92
FV
92# This is not clearly defined otherwise
93compiled_regex_type = type(re.compile(''))
94
f7a147e3
S
95
96def random_user_agent():
97 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
98 _CHROME_VERSIONS = (
99 '74.0.3729.129',
100 '76.0.3780.3',
101 '76.0.3780.2',
102 '74.0.3729.128',
103 '76.0.3780.1',
104 '76.0.3780.0',
105 '75.0.3770.15',
106 '74.0.3729.127',
107 '74.0.3729.126',
108 '76.0.3779.1',
109 '76.0.3779.0',
110 '75.0.3770.14',
111 '74.0.3729.125',
112 '76.0.3778.1',
113 '76.0.3778.0',
114 '75.0.3770.13',
115 '74.0.3729.124',
116 '74.0.3729.123',
117 '73.0.3683.121',
118 '76.0.3777.1',
119 '76.0.3777.0',
120 '75.0.3770.12',
121 '74.0.3729.122',
122 '76.0.3776.4',
123 '75.0.3770.11',
124 '74.0.3729.121',
125 '76.0.3776.3',
126 '76.0.3776.2',
127 '73.0.3683.120',
128 '74.0.3729.120',
129 '74.0.3729.119',
130 '74.0.3729.118',
131 '76.0.3776.1',
132 '76.0.3776.0',
133 '76.0.3775.5',
134 '75.0.3770.10',
135 '74.0.3729.117',
136 '76.0.3775.4',
137 '76.0.3775.3',
138 '74.0.3729.116',
139 '75.0.3770.9',
140 '76.0.3775.2',
141 '76.0.3775.1',
142 '76.0.3775.0',
143 '75.0.3770.8',
144 '74.0.3729.115',
145 '74.0.3729.114',
146 '76.0.3774.1',
147 '76.0.3774.0',
148 '75.0.3770.7',
149 '74.0.3729.113',
150 '74.0.3729.112',
151 '74.0.3729.111',
152 '76.0.3773.1',
153 '76.0.3773.0',
154 '75.0.3770.6',
155 '74.0.3729.110',
156 '74.0.3729.109',
157 '76.0.3772.1',
158 '76.0.3772.0',
159 '75.0.3770.5',
160 '74.0.3729.108',
161 '74.0.3729.107',
162 '76.0.3771.1',
163 '76.0.3771.0',
164 '75.0.3770.4',
165 '74.0.3729.106',
166 '74.0.3729.105',
167 '75.0.3770.3',
168 '74.0.3729.104',
169 '74.0.3729.103',
170 '74.0.3729.102',
171 '75.0.3770.2',
172 '74.0.3729.101',
173 '75.0.3770.1',
174 '75.0.3770.0',
175 '74.0.3729.100',
176 '75.0.3769.5',
177 '75.0.3769.4',
178 '74.0.3729.99',
179 '75.0.3769.3',
180 '75.0.3769.2',
181 '75.0.3768.6',
182 '74.0.3729.98',
183 '75.0.3769.1',
184 '75.0.3769.0',
185 '74.0.3729.97',
186 '73.0.3683.119',
187 '73.0.3683.118',
188 '74.0.3729.96',
189 '75.0.3768.5',
190 '75.0.3768.4',
191 '75.0.3768.3',
192 '75.0.3768.2',
193 '74.0.3729.95',
194 '74.0.3729.94',
195 '75.0.3768.1',
196 '75.0.3768.0',
197 '74.0.3729.93',
198 '74.0.3729.92',
199 '73.0.3683.117',
200 '74.0.3729.91',
201 '75.0.3766.3',
202 '74.0.3729.90',
203 '75.0.3767.2',
204 '75.0.3767.1',
205 '75.0.3767.0',
206 '74.0.3729.89',
207 '73.0.3683.116',
208 '75.0.3766.2',
209 '74.0.3729.88',
210 '75.0.3766.1',
211 '75.0.3766.0',
212 '74.0.3729.87',
213 '73.0.3683.115',
214 '74.0.3729.86',
215 '75.0.3765.1',
216 '75.0.3765.0',
217 '74.0.3729.85',
218 '73.0.3683.114',
219 '74.0.3729.84',
220 '75.0.3764.1',
221 '75.0.3764.0',
222 '74.0.3729.83',
223 '73.0.3683.113',
224 '75.0.3763.2',
225 '75.0.3761.4',
226 '74.0.3729.82',
227 '75.0.3763.1',
228 '75.0.3763.0',
229 '74.0.3729.81',
230 '73.0.3683.112',
231 '75.0.3762.1',
232 '75.0.3762.0',
233 '74.0.3729.80',
234 '75.0.3761.3',
235 '74.0.3729.79',
236 '73.0.3683.111',
237 '75.0.3761.2',
238 '74.0.3729.78',
239 '74.0.3729.77',
240 '75.0.3761.1',
241 '75.0.3761.0',
242 '73.0.3683.110',
243 '74.0.3729.76',
244 '74.0.3729.75',
245 '75.0.3760.0',
246 '74.0.3729.74',
247 '75.0.3759.8',
248 '75.0.3759.7',
249 '75.0.3759.6',
250 '74.0.3729.73',
251 '75.0.3759.5',
252 '74.0.3729.72',
253 '73.0.3683.109',
254 '75.0.3759.4',
255 '75.0.3759.3',
256 '74.0.3729.71',
257 '75.0.3759.2',
258 '74.0.3729.70',
259 '73.0.3683.108',
260 '74.0.3729.69',
261 '75.0.3759.1',
262 '75.0.3759.0',
263 '74.0.3729.68',
264 '73.0.3683.107',
265 '74.0.3729.67',
266 '75.0.3758.1',
267 '75.0.3758.0',
268 '74.0.3729.66',
269 '73.0.3683.106',
270 '74.0.3729.65',
271 '75.0.3757.1',
272 '75.0.3757.0',
273 '74.0.3729.64',
274 '73.0.3683.105',
275 '74.0.3729.63',
276 '75.0.3756.1',
277 '75.0.3756.0',
278 '74.0.3729.62',
279 '73.0.3683.104',
280 '75.0.3755.3',
281 '75.0.3755.2',
282 '73.0.3683.103',
283 '75.0.3755.1',
284 '75.0.3755.0',
285 '74.0.3729.61',
286 '73.0.3683.102',
287 '74.0.3729.60',
288 '75.0.3754.2',
289 '74.0.3729.59',
290 '75.0.3753.4',
291 '74.0.3729.58',
292 '75.0.3754.1',
293 '75.0.3754.0',
294 '74.0.3729.57',
295 '73.0.3683.101',
296 '75.0.3753.3',
297 '75.0.3752.2',
298 '75.0.3753.2',
299 '74.0.3729.56',
300 '75.0.3753.1',
301 '75.0.3753.0',
302 '74.0.3729.55',
303 '73.0.3683.100',
304 '74.0.3729.54',
305 '75.0.3752.1',
306 '75.0.3752.0',
307 '74.0.3729.53',
308 '73.0.3683.99',
309 '74.0.3729.52',
310 '75.0.3751.1',
311 '75.0.3751.0',
312 '74.0.3729.51',
313 '73.0.3683.98',
314 '74.0.3729.50',
315 '75.0.3750.0',
316 '74.0.3729.49',
317 '74.0.3729.48',
318 '74.0.3729.47',
319 '75.0.3749.3',
320 '74.0.3729.46',
321 '73.0.3683.97',
322 '75.0.3749.2',
323 '74.0.3729.45',
324 '75.0.3749.1',
325 '75.0.3749.0',
326 '74.0.3729.44',
327 '73.0.3683.96',
328 '74.0.3729.43',
329 '74.0.3729.42',
330 '75.0.3748.1',
331 '75.0.3748.0',
332 '74.0.3729.41',
333 '75.0.3747.1',
334 '73.0.3683.95',
335 '75.0.3746.4',
336 '74.0.3729.40',
337 '74.0.3729.39',
338 '75.0.3747.0',
339 '75.0.3746.3',
340 '75.0.3746.2',
341 '74.0.3729.38',
342 '75.0.3746.1',
343 '75.0.3746.0',
344 '74.0.3729.37',
345 '73.0.3683.94',
346 '75.0.3745.5',
347 '75.0.3745.4',
348 '75.0.3745.3',
349 '75.0.3745.2',
350 '74.0.3729.36',
351 '75.0.3745.1',
352 '75.0.3745.0',
353 '75.0.3744.2',
354 '74.0.3729.35',
355 '73.0.3683.93',
356 '74.0.3729.34',
357 '75.0.3744.1',
358 '75.0.3744.0',
359 '74.0.3729.33',
360 '73.0.3683.92',
361 '74.0.3729.32',
362 '74.0.3729.31',
363 '73.0.3683.91',
364 '75.0.3741.2',
365 '75.0.3740.5',
366 '74.0.3729.30',
367 '75.0.3741.1',
368 '75.0.3741.0',
369 '74.0.3729.29',
370 '75.0.3740.4',
371 '73.0.3683.90',
372 '74.0.3729.28',
373 '75.0.3740.3',
374 '73.0.3683.89',
375 '75.0.3740.2',
376 '74.0.3729.27',
377 '75.0.3740.1',
378 '75.0.3740.0',
379 '74.0.3729.26',
380 '73.0.3683.88',
381 '73.0.3683.87',
382 '74.0.3729.25',
383 '75.0.3739.1',
384 '75.0.3739.0',
385 '73.0.3683.86',
386 '74.0.3729.24',
387 '73.0.3683.85',
388 '75.0.3738.4',
389 '75.0.3738.3',
390 '75.0.3738.2',
391 '75.0.3738.1',
392 '75.0.3738.0',
393 '74.0.3729.23',
394 '73.0.3683.84',
395 '74.0.3729.22',
396 '74.0.3729.21',
397 '75.0.3737.1',
398 '75.0.3737.0',
399 '74.0.3729.20',
400 '73.0.3683.83',
401 '74.0.3729.19',
402 '75.0.3736.1',
403 '75.0.3736.0',
404 '74.0.3729.18',
405 '73.0.3683.82',
406 '74.0.3729.17',
407 '75.0.3735.1',
408 '75.0.3735.0',
409 '74.0.3729.16',
410 '73.0.3683.81',
411 '75.0.3734.1',
412 '75.0.3734.0',
413 '74.0.3729.15',
414 '73.0.3683.80',
415 '74.0.3729.14',
416 '75.0.3733.1',
417 '75.0.3733.0',
418 '75.0.3732.1',
419 '74.0.3729.13',
420 '74.0.3729.12',
421 '73.0.3683.79',
422 '74.0.3729.11',
423 '75.0.3732.0',
424 '74.0.3729.10',
425 '73.0.3683.78',
426 '74.0.3729.9',
427 '74.0.3729.8',
428 '74.0.3729.7',
429 '75.0.3731.3',
430 '75.0.3731.2',
431 '75.0.3731.0',
432 '74.0.3729.6',
433 '73.0.3683.77',
434 '73.0.3683.76',
435 '75.0.3730.5',
436 '75.0.3730.4',
437 '73.0.3683.75',
438 '74.0.3729.5',
439 '73.0.3683.74',
440 '75.0.3730.3',
441 '75.0.3730.2',
442 '74.0.3729.4',
443 '73.0.3683.73',
444 '73.0.3683.72',
445 '75.0.3730.1',
446 '75.0.3730.0',
447 '74.0.3729.3',
448 '73.0.3683.71',
449 '74.0.3729.2',
450 '73.0.3683.70',
451 '74.0.3729.1',
452 '74.0.3729.0',
453 '74.0.3726.4',
454 '73.0.3683.69',
455 '74.0.3726.3',
456 '74.0.3728.0',
457 '74.0.3726.2',
458 '73.0.3683.68',
459 '74.0.3726.1',
460 '74.0.3726.0',
461 '74.0.3725.4',
462 '73.0.3683.67',
463 '73.0.3683.66',
464 '74.0.3725.3',
465 '74.0.3725.2',
466 '74.0.3725.1',
467 '74.0.3724.8',
468 '74.0.3725.0',
469 '73.0.3683.65',
470 '74.0.3724.7',
471 '74.0.3724.6',
472 '74.0.3724.5',
473 '74.0.3724.4',
474 '74.0.3724.3',
475 '74.0.3724.2',
476 '74.0.3724.1',
477 '74.0.3724.0',
478 '73.0.3683.64',
479 '74.0.3723.1',
480 '74.0.3723.0',
481 '73.0.3683.63',
482 '74.0.3722.1',
483 '74.0.3722.0',
484 '73.0.3683.62',
485 '74.0.3718.9',
486 '74.0.3702.3',
487 '74.0.3721.3',
488 '74.0.3721.2',
489 '74.0.3721.1',
490 '74.0.3721.0',
491 '74.0.3720.6',
492 '73.0.3683.61',
493 '72.0.3626.122',
494 '73.0.3683.60',
495 '74.0.3720.5',
496 '72.0.3626.121',
497 '74.0.3718.8',
498 '74.0.3720.4',
499 '74.0.3720.3',
500 '74.0.3718.7',
501 '74.0.3720.2',
502 '74.0.3720.1',
503 '74.0.3720.0',
504 '74.0.3718.6',
505 '74.0.3719.5',
506 '73.0.3683.59',
507 '74.0.3718.5',
508 '74.0.3718.4',
509 '74.0.3719.4',
510 '74.0.3719.3',
511 '74.0.3719.2',
512 '74.0.3719.1',
513 '73.0.3683.58',
514 '74.0.3719.0',
515 '73.0.3683.57',
516 '73.0.3683.56',
517 '74.0.3718.3',
518 '73.0.3683.55',
519 '74.0.3718.2',
520 '74.0.3718.1',
521 '74.0.3718.0',
522 '73.0.3683.54',
523 '74.0.3717.2',
524 '73.0.3683.53',
525 '74.0.3717.1',
526 '74.0.3717.0',
527 '73.0.3683.52',
528 '74.0.3716.1',
529 '74.0.3716.0',
530 '73.0.3683.51',
531 '74.0.3715.1',
532 '74.0.3715.0',
533 '73.0.3683.50',
534 '74.0.3711.2',
535 '74.0.3714.2',
536 '74.0.3713.3',
537 '74.0.3714.1',
538 '74.0.3714.0',
539 '73.0.3683.49',
540 '74.0.3713.1',
541 '74.0.3713.0',
542 '72.0.3626.120',
543 '73.0.3683.48',
544 '74.0.3712.2',
545 '74.0.3712.1',
546 '74.0.3712.0',
547 '73.0.3683.47',
548 '72.0.3626.119',
549 '73.0.3683.46',
550 '74.0.3710.2',
551 '72.0.3626.118',
552 '74.0.3711.1',
553 '74.0.3711.0',
554 '73.0.3683.45',
555 '72.0.3626.117',
556 '74.0.3710.1',
557 '74.0.3710.0',
558 '73.0.3683.44',
559 '72.0.3626.116',
560 '74.0.3709.1',
561 '74.0.3709.0',
562 '74.0.3704.9',
563 '73.0.3683.43',
564 '72.0.3626.115',
565 '74.0.3704.8',
566 '74.0.3704.7',
567 '74.0.3708.0',
568 '74.0.3706.7',
569 '74.0.3704.6',
570 '73.0.3683.42',
571 '72.0.3626.114',
572 '74.0.3706.6',
573 '72.0.3626.113',
574 '74.0.3704.5',
575 '74.0.3706.5',
576 '74.0.3706.4',
577 '74.0.3706.3',
578 '74.0.3706.2',
579 '74.0.3706.1',
580 '74.0.3706.0',
581 '73.0.3683.41',
582 '72.0.3626.112',
583 '74.0.3705.1',
584 '74.0.3705.0',
585 '73.0.3683.40',
586 '72.0.3626.111',
587 '73.0.3683.39',
588 '74.0.3704.4',
589 '73.0.3683.38',
590 '74.0.3704.3',
591 '74.0.3704.2',
592 '74.0.3704.1',
593 '74.0.3704.0',
594 '73.0.3683.37',
595 '72.0.3626.110',
596 '72.0.3626.109',
597 '74.0.3703.3',
598 '74.0.3703.2',
599 '73.0.3683.36',
600 '74.0.3703.1',
601 '74.0.3703.0',
602 '73.0.3683.35',
603 '72.0.3626.108',
604 '74.0.3702.2',
605 '74.0.3699.3',
606 '74.0.3702.1',
607 '74.0.3702.0',
608 '73.0.3683.34',
609 '72.0.3626.107',
610 '73.0.3683.33',
611 '74.0.3701.1',
612 '74.0.3701.0',
613 '73.0.3683.32',
614 '73.0.3683.31',
615 '72.0.3626.105',
616 '74.0.3700.1',
617 '74.0.3700.0',
618 '73.0.3683.29',
619 '72.0.3626.103',
620 '74.0.3699.2',
621 '74.0.3699.1',
622 '74.0.3699.0',
623 '73.0.3683.28',
624 '72.0.3626.102',
625 '73.0.3683.27',
626 '73.0.3683.26',
627 '74.0.3698.0',
628 '74.0.3696.2',
629 '72.0.3626.101',
630 '73.0.3683.25',
631 '74.0.3696.1',
632 '74.0.3696.0',
633 '74.0.3694.8',
634 '72.0.3626.100',
635 '74.0.3694.7',
636 '74.0.3694.6',
637 '74.0.3694.5',
638 '74.0.3694.4',
639 '72.0.3626.99',
640 '72.0.3626.98',
641 '74.0.3694.3',
642 '73.0.3683.24',
643 '72.0.3626.97',
644 '72.0.3626.96',
645 '72.0.3626.95',
646 '73.0.3683.23',
647 '72.0.3626.94',
648 '73.0.3683.22',
649 '73.0.3683.21',
650 '72.0.3626.93',
651 '74.0.3694.2',
652 '72.0.3626.92',
653 '74.0.3694.1',
654 '74.0.3694.0',
655 '74.0.3693.6',
656 '73.0.3683.20',
657 '72.0.3626.91',
658 '74.0.3693.5',
659 '74.0.3693.4',
660 '74.0.3693.3',
661 '74.0.3693.2',
662 '73.0.3683.19',
663 '74.0.3693.1',
664 '74.0.3693.0',
665 '73.0.3683.18',
666 '72.0.3626.90',
667 '74.0.3692.1',
668 '74.0.3692.0',
669 '73.0.3683.17',
670 '72.0.3626.89',
671 '74.0.3687.3',
672 '74.0.3691.1',
673 '74.0.3691.0',
674 '73.0.3683.16',
675 '72.0.3626.88',
676 '72.0.3626.87',
677 '73.0.3683.15',
678 '74.0.3690.1',
679 '74.0.3690.0',
680 '73.0.3683.14',
681 '72.0.3626.86',
682 '73.0.3683.13',
683 '73.0.3683.12',
684 '74.0.3689.1',
685 '74.0.3689.0',
686 '73.0.3683.11',
687 '72.0.3626.85',
688 '73.0.3683.10',
689 '72.0.3626.84',
690 '73.0.3683.9',
691 '74.0.3688.1',
692 '74.0.3688.0',
693 '73.0.3683.8',
694 '72.0.3626.83',
695 '74.0.3687.2',
696 '74.0.3687.1',
697 '74.0.3687.0',
698 '73.0.3683.7',
699 '72.0.3626.82',
700 '74.0.3686.4',
701 '72.0.3626.81',
702 '74.0.3686.3',
703 '74.0.3686.2',
704 '74.0.3686.1',
705 '74.0.3686.0',
706 '73.0.3683.6',
707 '72.0.3626.80',
708 '74.0.3685.1',
709 '74.0.3685.0',
710 '73.0.3683.5',
711 '72.0.3626.79',
712 '74.0.3684.1',
713 '74.0.3684.0',
714 '73.0.3683.4',
715 '72.0.3626.78',
716 '72.0.3626.77',
717 '73.0.3683.3',
718 '73.0.3683.2',
719 '72.0.3626.76',
720 '73.0.3683.1',
721 '73.0.3683.0',
722 '72.0.3626.75',
723 '71.0.3578.141',
724 '73.0.3682.1',
725 '73.0.3682.0',
726 '72.0.3626.74',
727 '71.0.3578.140',
728 '73.0.3681.4',
729 '73.0.3681.3',
730 '73.0.3681.2',
731 '73.0.3681.1',
732 '73.0.3681.0',
733 '72.0.3626.73',
734 '71.0.3578.139',
735 '72.0.3626.72',
736 '72.0.3626.71',
737 '73.0.3680.1',
738 '73.0.3680.0',
739 '72.0.3626.70',
740 '71.0.3578.138',
741 '73.0.3678.2',
742 '73.0.3679.1',
743 '73.0.3679.0',
744 '72.0.3626.69',
745 '71.0.3578.137',
746 '73.0.3678.1',
747 '73.0.3678.0',
748 '71.0.3578.136',
749 '73.0.3677.1',
750 '73.0.3677.0',
751 '72.0.3626.68',
752 '72.0.3626.67',
753 '71.0.3578.135',
754 '73.0.3676.1',
755 '73.0.3676.0',
756 '73.0.3674.2',
757 '72.0.3626.66',
758 '71.0.3578.134',
759 '73.0.3674.1',
760 '73.0.3674.0',
761 '72.0.3626.65',
762 '71.0.3578.133',
763 '73.0.3673.2',
764 '73.0.3673.1',
765 '73.0.3673.0',
766 '72.0.3626.64',
767 '71.0.3578.132',
768 '72.0.3626.63',
769 '72.0.3626.62',
770 '72.0.3626.61',
771 '72.0.3626.60',
772 '73.0.3672.1',
773 '73.0.3672.0',
774 '72.0.3626.59',
775 '71.0.3578.131',
776 '73.0.3671.3',
777 '73.0.3671.2',
778 '73.0.3671.1',
779 '73.0.3671.0',
780 '72.0.3626.58',
781 '71.0.3578.130',
782 '73.0.3670.1',
783 '73.0.3670.0',
784 '72.0.3626.57',
785 '71.0.3578.129',
786 '73.0.3669.1',
787 '73.0.3669.0',
788 '72.0.3626.56',
789 '71.0.3578.128',
790 '73.0.3668.2',
791 '73.0.3668.1',
792 '73.0.3668.0',
793 '72.0.3626.55',
794 '71.0.3578.127',
795 '73.0.3667.2',
796 '73.0.3667.1',
797 '73.0.3667.0',
798 '72.0.3626.54',
799 '71.0.3578.126',
800 '73.0.3666.1',
801 '73.0.3666.0',
802 '72.0.3626.53',
803 '71.0.3578.125',
804 '73.0.3665.4',
805 '73.0.3665.3',
806 '72.0.3626.52',
807 '73.0.3665.2',
808 '73.0.3664.4',
809 '73.0.3665.1',
810 '73.0.3665.0',
811 '72.0.3626.51',
812 '71.0.3578.124',
813 '72.0.3626.50',
814 '73.0.3664.3',
815 '73.0.3664.2',
816 '73.0.3664.1',
817 '73.0.3664.0',
818 '73.0.3663.2',
819 '72.0.3626.49',
820 '71.0.3578.123',
821 '73.0.3663.1',
822 '73.0.3663.0',
823 '72.0.3626.48',
824 '71.0.3578.122',
825 '73.0.3662.1',
826 '73.0.3662.0',
827 '72.0.3626.47',
828 '71.0.3578.121',
829 '73.0.3661.1',
830 '72.0.3626.46',
831 '73.0.3661.0',
832 '72.0.3626.45',
833 '71.0.3578.120',
834 '73.0.3660.2',
835 '73.0.3660.1',
836 '73.0.3660.0',
837 '72.0.3626.44',
838 '71.0.3578.119',
839 '73.0.3659.1',
840 '73.0.3659.0',
841 '72.0.3626.43',
842 '71.0.3578.118',
843 '73.0.3658.1',
844 '73.0.3658.0',
845 '72.0.3626.42',
846 '71.0.3578.117',
847 '73.0.3657.1',
848 '73.0.3657.0',
849 '72.0.3626.41',
850 '71.0.3578.116',
851 '73.0.3656.1',
852 '73.0.3656.0',
853 '72.0.3626.40',
854 '71.0.3578.115',
855 '73.0.3655.1',
856 '73.0.3655.0',
857 '72.0.3626.39',
858 '71.0.3578.114',
859 '73.0.3654.1',
860 '73.0.3654.0',
861 '72.0.3626.38',
862 '71.0.3578.113',
863 '73.0.3653.1',
864 '73.0.3653.0',
865 '72.0.3626.37',
866 '71.0.3578.112',
867 '73.0.3652.1',
868 '73.0.3652.0',
869 '72.0.3626.36',
870 '71.0.3578.111',
871 '73.0.3651.1',
872 '73.0.3651.0',
873 '72.0.3626.35',
874 '71.0.3578.110',
875 '73.0.3650.1',
876 '73.0.3650.0',
877 '72.0.3626.34',
878 '71.0.3578.109',
879 '73.0.3649.1',
880 '73.0.3649.0',
881 '72.0.3626.33',
882 '71.0.3578.108',
883 '73.0.3648.2',
884 '73.0.3648.1',
885 '73.0.3648.0',
886 '72.0.3626.32',
887 '71.0.3578.107',
888 '73.0.3647.2',
889 '73.0.3647.1',
890 '73.0.3647.0',
891 '72.0.3626.31',
892 '71.0.3578.106',
893 '73.0.3635.3',
894 '73.0.3646.2',
895 '73.0.3646.1',
896 '73.0.3646.0',
897 '72.0.3626.30',
898 '71.0.3578.105',
899 '72.0.3626.29',
900 '73.0.3645.2',
901 '73.0.3645.1',
902 '73.0.3645.0',
903 '72.0.3626.28',
904 '71.0.3578.104',
905 '72.0.3626.27',
906 '72.0.3626.26',
907 '72.0.3626.25',
908 '72.0.3626.24',
909 '73.0.3644.0',
910 '73.0.3643.2',
911 '72.0.3626.23',
912 '71.0.3578.103',
913 '73.0.3643.1',
914 '73.0.3643.0',
915 '72.0.3626.22',
916 '71.0.3578.102',
917 '73.0.3642.1',
918 '73.0.3642.0',
919 '72.0.3626.21',
920 '71.0.3578.101',
921 '73.0.3641.1',
922 '73.0.3641.0',
923 '72.0.3626.20',
924 '71.0.3578.100',
925 '72.0.3626.19',
926 '73.0.3640.1',
927 '73.0.3640.0',
928 '72.0.3626.18',
929 '73.0.3639.1',
930 '71.0.3578.99',
931 '73.0.3639.0',
932 '72.0.3626.17',
933 '73.0.3638.2',
934 '72.0.3626.16',
935 '73.0.3638.1',
936 '73.0.3638.0',
937 '72.0.3626.15',
938 '71.0.3578.98',
939 '73.0.3635.2',
940 '71.0.3578.97',
941 '73.0.3637.1',
942 '73.0.3637.0',
943 '72.0.3626.14',
944 '71.0.3578.96',
945 '71.0.3578.95',
946 '72.0.3626.13',
947 '71.0.3578.94',
948 '73.0.3636.2',
949 '71.0.3578.93',
950 '73.0.3636.1',
951 '73.0.3636.0',
952 '72.0.3626.12',
953 '71.0.3578.92',
954 '73.0.3635.1',
955 '73.0.3635.0',
956 '72.0.3626.11',
957 '71.0.3578.91',
958 '73.0.3634.2',
959 '73.0.3634.1',
960 '73.0.3634.0',
961 '72.0.3626.10',
962 '71.0.3578.90',
963 '71.0.3578.89',
964 '73.0.3633.2',
965 '73.0.3633.1',
966 '73.0.3633.0',
967 '72.0.3610.4',
968 '72.0.3626.9',
969 '71.0.3578.88',
970 '73.0.3632.5',
971 '73.0.3632.4',
972 '73.0.3632.3',
973 '73.0.3632.2',
974 '73.0.3632.1',
975 '73.0.3632.0',
976 '72.0.3626.8',
977 '71.0.3578.87',
978 '73.0.3631.2',
979 '73.0.3631.1',
980 '73.0.3631.0',
981 '72.0.3626.7',
982 '71.0.3578.86',
983 '72.0.3626.6',
984 '73.0.3630.1',
985 '73.0.3630.0',
986 '72.0.3626.5',
987 '71.0.3578.85',
988 '72.0.3626.4',
989 '73.0.3628.3',
990 '73.0.3628.2',
991 '73.0.3629.1',
992 '73.0.3629.0',
993 '72.0.3626.3',
994 '71.0.3578.84',
995 '73.0.3628.1',
996 '73.0.3628.0',
997 '71.0.3578.83',
998 '73.0.3627.1',
999 '73.0.3627.0',
1000 '72.0.3626.2',
1001 '71.0.3578.82',
1002 '71.0.3578.81',
1003 '71.0.3578.80',
1004 '72.0.3626.1',
1005 '72.0.3626.0',
1006 '71.0.3578.79',
1007 '70.0.3538.124',
1008 '71.0.3578.78',
1009 '72.0.3623.4',
1010 '72.0.3625.2',
1011 '72.0.3625.1',
1012 '72.0.3625.0',
1013 '71.0.3578.77',
1014 '70.0.3538.123',
1015 '72.0.3624.4',
1016 '72.0.3624.3',
1017 '72.0.3624.2',
1018 '71.0.3578.76',
1019 '72.0.3624.1',
1020 '72.0.3624.0',
1021 '72.0.3623.3',
1022 '71.0.3578.75',
1023 '70.0.3538.122',
1024 '71.0.3578.74',
1025 '72.0.3623.2',
1026 '72.0.3610.3',
1027 '72.0.3623.1',
1028 '72.0.3623.0',
1029 '72.0.3622.3',
1030 '72.0.3622.2',
1031 '71.0.3578.73',
1032 '70.0.3538.121',
1033 '72.0.3622.1',
1034 '72.0.3622.0',
1035 '71.0.3578.72',
1036 '70.0.3538.120',
1037 '72.0.3621.1',
1038 '72.0.3621.0',
1039 '71.0.3578.71',
1040 '70.0.3538.119',
1041 '72.0.3620.1',
1042 '72.0.3620.0',
1043 '71.0.3578.70',
1044 '70.0.3538.118',
1045 '71.0.3578.69',
1046 '72.0.3619.1',
1047 '72.0.3619.0',
1048 '71.0.3578.68',
1049 '70.0.3538.117',
1050 '71.0.3578.67',
1051 '72.0.3618.1',
1052 '72.0.3618.0',
1053 '71.0.3578.66',
1054 '70.0.3538.116',
1055 '72.0.3617.1',
1056 '72.0.3617.0',
1057 '71.0.3578.65',
1058 '70.0.3538.115',
1059 '72.0.3602.3',
1060 '71.0.3578.64',
1061 '72.0.3616.1',
1062 '72.0.3616.0',
1063 '71.0.3578.63',
1064 '70.0.3538.114',
1065 '71.0.3578.62',
1066 '72.0.3615.1',
1067 '72.0.3615.0',
1068 '71.0.3578.61',
1069 '70.0.3538.113',
1070 '72.0.3614.1',
1071 '72.0.3614.0',
1072 '71.0.3578.60',
1073 '70.0.3538.112',
1074 '72.0.3613.1',
1075 '72.0.3613.0',
1076 '71.0.3578.59',
1077 '70.0.3538.111',
1078 '72.0.3612.2',
1079 '72.0.3612.1',
1080 '72.0.3612.0',
1081 '70.0.3538.110',
1082 '71.0.3578.58',
1083 '70.0.3538.109',
1084 '72.0.3611.2',
1085 '72.0.3611.1',
1086 '72.0.3611.0',
1087 '71.0.3578.57',
1088 '70.0.3538.108',
1089 '72.0.3610.2',
1090 '71.0.3578.56',
1091 '71.0.3578.55',
1092 '72.0.3610.1',
1093 '72.0.3610.0',
1094 '71.0.3578.54',
1095 '70.0.3538.107',
1096 '71.0.3578.53',
1097 '72.0.3609.3',
1098 '71.0.3578.52',
1099 '72.0.3609.2',
1100 '71.0.3578.51',
1101 '72.0.3608.5',
1102 '72.0.3609.1',
1103 '72.0.3609.0',
1104 '71.0.3578.50',
1105 '70.0.3538.106',
1106 '72.0.3608.4',
1107 '72.0.3608.3',
1108 '72.0.3608.2',
1109 '71.0.3578.49',
1110 '72.0.3608.1',
1111 '72.0.3608.0',
1112 '70.0.3538.105',
1113 '71.0.3578.48',
1114 '72.0.3607.1',
1115 '72.0.3607.0',
1116 '71.0.3578.47',
1117 '70.0.3538.104',
1118 '72.0.3606.2',
1119 '72.0.3606.1',
1120 '72.0.3606.0',
1121 '71.0.3578.46',
1122 '70.0.3538.103',
1123 '70.0.3538.102',
1124 '72.0.3605.3',
1125 '72.0.3605.2',
1126 '72.0.3605.1',
1127 '72.0.3605.0',
1128 '71.0.3578.45',
1129 '70.0.3538.101',
1130 '71.0.3578.44',
1131 '71.0.3578.43',
1132 '70.0.3538.100',
1133 '70.0.3538.99',
1134 '71.0.3578.42',
1135 '72.0.3604.1',
1136 '72.0.3604.0',
1137 '71.0.3578.41',
1138 '70.0.3538.98',
1139 '71.0.3578.40',
1140 '72.0.3603.2',
1141 '72.0.3603.1',
1142 '72.0.3603.0',
1143 '71.0.3578.39',
1144 '70.0.3538.97',
1145 '72.0.3602.2',
1146 '71.0.3578.38',
1147 '71.0.3578.37',
1148 '72.0.3602.1',
1149 '72.0.3602.0',
1150 '71.0.3578.36',
1151 '70.0.3538.96',
1152 '72.0.3601.1',
1153 '72.0.3601.0',
1154 '71.0.3578.35',
1155 '70.0.3538.95',
1156 '72.0.3600.1',
1157 '72.0.3600.0',
1158 '71.0.3578.34',
1159 '70.0.3538.94',
1160 '72.0.3599.3',
1161 '72.0.3599.2',
1162 '72.0.3599.1',
1163 '72.0.3599.0',
1164 '71.0.3578.33',
1165 '70.0.3538.93',
1166 '72.0.3598.1',
1167 '72.0.3598.0',
1168 '71.0.3578.32',
1169 '70.0.3538.87',
1170 '72.0.3597.1',
1171 '72.0.3597.0',
1172 '72.0.3596.2',
1173 '71.0.3578.31',
1174 '70.0.3538.86',
1175 '71.0.3578.30',
1176 '71.0.3578.29',
1177 '72.0.3596.1',
1178 '72.0.3596.0',
1179 '71.0.3578.28',
1180 '70.0.3538.85',
1181 '72.0.3595.2',
1182 '72.0.3591.3',
1183 '72.0.3595.1',
1184 '72.0.3595.0',
1185 '71.0.3578.27',
1186 '70.0.3538.84',
1187 '72.0.3594.1',
1188 '72.0.3594.0',
1189 '71.0.3578.26',
1190 '70.0.3538.83',
1191 '72.0.3593.2',
1192 '72.0.3593.1',
1193 '72.0.3593.0',
1194 '71.0.3578.25',
1195 '70.0.3538.82',
1196 '72.0.3589.3',
1197 '72.0.3592.2',
1198 '72.0.3592.1',
1199 '72.0.3592.0',
1200 '71.0.3578.24',
1201 '72.0.3589.2',
1202 '70.0.3538.81',
1203 '70.0.3538.80',
1204 '72.0.3591.2',
1205 '72.0.3591.1',
1206 '72.0.3591.0',
1207 '71.0.3578.23',
1208 '70.0.3538.79',
1209 '71.0.3578.22',
1210 '72.0.3590.1',
1211 '72.0.3590.0',
1212 '71.0.3578.21',
1213 '70.0.3538.78',
1214 '70.0.3538.77',
1215 '72.0.3589.1',
1216 '72.0.3589.0',
1217 '71.0.3578.20',
1218 '70.0.3538.76',
1219 '71.0.3578.19',
1220 '70.0.3538.75',
1221 '72.0.3588.1',
1222 '72.0.3588.0',
1223 '71.0.3578.18',
1224 '70.0.3538.74',
1225 '72.0.3586.2',
1226 '72.0.3587.0',
1227 '71.0.3578.17',
1228 '70.0.3538.73',
1229 '72.0.3586.1',
1230 '72.0.3586.0',
1231 '71.0.3578.16',
1232 '70.0.3538.72',
1233 '72.0.3585.1',
1234 '72.0.3585.0',
1235 '71.0.3578.15',
1236 '70.0.3538.71',
1237 '71.0.3578.14',
1238 '72.0.3584.1',
1239 '72.0.3584.0',
1240 '71.0.3578.13',
1241 '70.0.3538.70',
1242 '72.0.3583.2',
1243 '71.0.3578.12',
1244 '72.0.3583.1',
1245 '72.0.3583.0',
1246 '71.0.3578.11',
1247 '70.0.3538.69',
1248 '71.0.3578.10',
1249 '72.0.3582.0',
1250 '72.0.3581.4',
1251 '71.0.3578.9',
1252 '70.0.3538.67',
1253 '72.0.3581.3',
1254 '72.0.3581.2',
1255 '72.0.3581.1',
1256 '72.0.3581.0',
1257 '71.0.3578.8',
1258 '70.0.3538.66',
1259 '72.0.3580.1',
1260 '72.0.3580.0',
1261 '71.0.3578.7',
1262 '70.0.3538.65',
1263 '71.0.3578.6',
1264 '72.0.3579.1',
1265 '72.0.3579.0',
1266 '71.0.3578.5',
1267 '70.0.3538.64',
1268 '71.0.3578.4',
1269 '71.0.3578.3',
1270 '71.0.3578.2',
1271 '71.0.3578.1',
1272 '71.0.3578.0',
1273 '70.0.3538.63',
1274 '69.0.3497.128',
1275 '70.0.3538.62',
1276 '70.0.3538.61',
1277 '70.0.3538.60',
1278 '70.0.3538.59',
1279 '71.0.3577.1',
1280 '71.0.3577.0',
1281 '70.0.3538.58',
1282 '69.0.3497.127',
1283 '71.0.3576.2',
1284 '71.0.3576.1',
1285 '71.0.3576.0',
1286 '70.0.3538.57',
1287 '70.0.3538.56',
1288 '71.0.3575.2',
1289 '70.0.3538.55',
1290 '69.0.3497.126',
1291 '70.0.3538.54',
1292 '71.0.3575.1',
1293 '71.0.3575.0',
1294 '71.0.3574.1',
1295 '71.0.3574.0',
1296 '70.0.3538.53',
1297 '69.0.3497.125',
1298 '70.0.3538.52',
1299 '71.0.3573.1',
1300 '71.0.3573.0',
1301 '70.0.3538.51',
1302 '69.0.3497.124',
1303 '71.0.3572.1',
1304 '71.0.3572.0',
1305 '70.0.3538.50',
1306 '69.0.3497.123',
1307 '71.0.3571.2',
1308 '70.0.3538.49',
1309 '69.0.3497.122',
1310 '71.0.3571.1',
1311 '71.0.3571.0',
1312 '70.0.3538.48',
1313 '69.0.3497.121',
1314 '71.0.3570.1',
1315 '71.0.3570.0',
1316 '70.0.3538.47',
1317 '69.0.3497.120',
1318 '71.0.3568.2',
1319 '71.0.3569.1',
1320 '71.0.3569.0',
1321 '70.0.3538.46',
1322 '69.0.3497.119',
1323 '70.0.3538.45',
1324 '71.0.3568.1',
1325 '71.0.3568.0',
1326 '70.0.3538.44',
1327 '69.0.3497.118',
1328 '70.0.3538.43',
1329 '70.0.3538.42',
1330 '71.0.3567.1',
1331 '71.0.3567.0',
1332 '70.0.3538.41',
1333 '69.0.3497.117',
1334 '71.0.3566.1',
1335 '71.0.3566.0',
1336 '70.0.3538.40',
1337 '69.0.3497.116',
1338 '71.0.3565.1',
1339 '71.0.3565.0',
1340 '70.0.3538.39',
1341 '69.0.3497.115',
1342 '71.0.3564.1',
1343 '71.0.3564.0',
1344 '70.0.3538.38',
1345 '69.0.3497.114',
1346 '71.0.3563.0',
1347 '71.0.3562.2',
1348 '70.0.3538.37',
1349 '69.0.3497.113',
1350 '70.0.3538.36',
1351 '70.0.3538.35',
1352 '71.0.3562.1',
1353 '71.0.3562.0',
1354 '70.0.3538.34',
1355 '69.0.3497.112',
1356 '70.0.3538.33',
1357 '71.0.3561.1',
1358 '71.0.3561.0',
1359 '70.0.3538.32',
1360 '69.0.3497.111',
1361 '71.0.3559.6',
1362 '71.0.3560.1',
1363 '71.0.3560.0',
1364 '71.0.3559.5',
1365 '71.0.3559.4',
1366 '70.0.3538.31',
1367 '69.0.3497.110',
1368 '71.0.3559.3',
1369 '70.0.3538.30',
1370 '69.0.3497.109',
1371 '71.0.3559.2',
1372 '71.0.3559.1',
1373 '71.0.3559.0',
1374 '70.0.3538.29',
1375 '69.0.3497.108',
1376 '71.0.3558.2',
1377 '71.0.3558.1',
1378 '71.0.3558.0',
1379 '70.0.3538.28',
1380 '69.0.3497.107',
1381 '71.0.3557.2',
1382 '71.0.3557.1',
1383 '71.0.3557.0',
1384 '70.0.3538.27',
1385 '69.0.3497.106',
1386 '71.0.3554.4',
1387 '70.0.3538.26',
1388 '71.0.3556.1',
1389 '71.0.3556.0',
1390 '70.0.3538.25',
1391 '71.0.3554.3',
1392 '69.0.3497.105',
1393 '71.0.3554.2',
1394 '70.0.3538.24',
1395 '69.0.3497.104',
1396 '71.0.3555.2',
1397 '70.0.3538.23',
1398 '71.0.3555.1',
1399 '71.0.3555.0',
1400 '70.0.3538.22',
1401 '69.0.3497.103',
1402 '71.0.3554.1',
1403 '71.0.3554.0',
1404 '70.0.3538.21',
1405 '69.0.3497.102',
1406 '71.0.3553.3',
1407 '70.0.3538.20',
1408 '69.0.3497.101',
1409 '71.0.3553.2',
1410 '69.0.3497.100',
1411 '71.0.3553.1',
1412 '71.0.3553.0',
1413 '70.0.3538.19',
1414 '69.0.3497.99',
1415 '69.0.3497.98',
1416 '69.0.3497.97',
1417 '71.0.3552.6',
1418 '71.0.3552.5',
1419 '71.0.3552.4',
1420 '71.0.3552.3',
1421 '71.0.3552.2',
1422 '71.0.3552.1',
1423 '71.0.3552.0',
1424 '70.0.3538.18',
1425 '69.0.3497.96',
1426 '71.0.3551.3',
1427 '71.0.3551.2',
1428 '71.0.3551.1',
1429 '71.0.3551.0',
1430 '70.0.3538.17',
1431 '69.0.3497.95',
1432 '71.0.3550.3',
1433 '71.0.3550.2',
1434 '71.0.3550.1',
1435 '71.0.3550.0',
1436 '70.0.3538.16',
1437 '69.0.3497.94',
1438 '71.0.3549.1',
1439 '71.0.3549.0',
1440 '70.0.3538.15',
1441 '69.0.3497.93',
1442 '69.0.3497.92',
1443 '71.0.3548.1',
1444 '71.0.3548.0',
1445 '70.0.3538.14',
1446 '69.0.3497.91',
1447 '71.0.3547.1',
1448 '71.0.3547.0',
1449 '70.0.3538.13',
1450 '69.0.3497.90',
1451 '71.0.3546.2',
1452 '69.0.3497.89',
1453 '71.0.3546.1',
1454 '71.0.3546.0',
1455 '70.0.3538.12',
1456 '69.0.3497.88',
1457 '71.0.3545.4',
1458 '71.0.3545.3',
1459 '71.0.3545.2',
1460 '71.0.3545.1',
1461 '71.0.3545.0',
1462 '70.0.3538.11',
1463 '69.0.3497.87',
1464 '71.0.3544.5',
1465 '71.0.3544.4',
1466 '71.0.3544.3',
1467 '71.0.3544.2',
1468 '71.0.3544.1',
1469 '71.0.3544.0',
1470 '69.0.3497.86',
1471 '70.0.3538.10',
1472 '69.0.3497.85',
1473 '70.0.3538.9',
1474 '69.0.3497.84',
1475 '71.0.3543.4',
1476 '70.0.3538.8',
1477 '71.0.3543.3',
1478 '71.0.3543.2',
1479 '71.0.3543.1',
1480 '71.0.3543.0',
1481 '70.0.3538.7',
1482 '69.0.3497.83',
1483 '71.0.3542.2',
1484 '71.0.3542.1',
1485 '71.0.3542.0',
1486 '70.0.3538.6',
1487 '69.0.3497.82',
1488 '69.0.3497.81',
1489 '71.0.3541.1',
1490 '71.0.3541.0',
1491 '70.0.3538.5',
1492 '69.0.3497.80',
1493 '71.0.3540.1',
1494 '71.0.3540.0',
1495 '70.0.3538.4',
1496 '69.0.3497.79',
1497 '70.0.3538.3',
1498 '71.0.3539.1',
1499 '71.0.3539.0',
1500 '69.0.3497.78',
1501 '68.0.3440.134',
1502 '69.0.3497.77',
1503 '70.0.3538.2',
1504 '70.0.3538.1',
1505 '70.0.3538.0',
1506 '69.0.3497.76',
1507 '68.0.3440.133',
1508 '69.0.3497.75',
1509 '70.0.3537.2',
1510 '70.0.3537.1',
1511 '70.0.3537.0',
1512 '69.0.3497.74',
1513 '68.0.3440.132',
1514 '70.0.3536.0',
1515 '70.0.3535.5',
1516 '70.0.3535.4',
1517 '70.0.3535.3',
1518 '69.0.3497.73',
1519 '68.0.3440.131',
1520 '70.0.3532.8',
1521 '70.0.3532.7',
1522 '69.0.3497.72',
1523 '69.0.3497.71',
1524 '70.0.3535.2',
1525 '70.0.3535.1',
1526 '70.0.3535.0',
1527 '69.0.3497.70',
1528 '68.0.3440.130',
1529 '69.0.3497.69',
1530 '68.0.3440.129',
1531 '70.0.3534.4',
1532 '70.0.3534.3',
1533 '70.0.3534.2',
1534 '70.0.3534.1',
1535 '70.0.3534.0',
1536 '69.0.3497.68',
1537 '68.0.3440.128',
1538 '70.0.3533.2',
1539 '70.0.3533.1',
1540 '70.0.3533.0',
1541 '69.0.3497.67',
1542 '68.0.3440.127',
1543 '70.0.3532.6',
1544 '70.0.3532.5',
1545 '70.0.3532.4',
1546 '69.0.3497.66',
1547 '68.0.3440.126',
1548 '70.0.3532.3',
1549 '70.0.3532.2',
1550 '70.0.3532.1',
1551 '69.0.3497.60',
1552 '69.0.3497.65',
1553 '69.0.3497.64',
1554 '70.0.3532.0',
1555 '70.0.3531.0',
1556 '70.0.3530.4',
1557 '70.0.3530.3',
1558 '70.0.3530.2',
1559 '69.0.3497.58',
1560 '68.0.3440.125',
1561 '69.0.3497.57',
1562 '69.0.3497.56',
1563 '69.0.3497.55',
1564 '69.0.3497.54',
1565 '70.0.3530.1',
1566 '70.0.3530.0',
1567 '69.0.3497.53',
1568 '68.0.3440.124',
1569 '69.0.3497.52',
1570 '70.0.3529.3',
1571 '70.0.3529.2',
1572 '70.0.3529.1',
1573 '70.0.3529.0',
1574 '69.0.3497.51',
1575 '70.0.3528.4',
1576 '68.0.3440.123',
1577 '70.0.3528.3',
1578 '70.0.3528.2',
1579 '70.0.3528.1',
1580 '70.0.3528.0',
1581 '69.0.3497.50',
1582 '68.0.3440.122',
1583 '70.0.3527.1',
1584 '70.0.3527.0',
1585 '69.0.3497.49',
1586 '68.0.3440.121',
1587 '70.0.3526.1',
1588 '70.0.3526.0',
1589 '68.0.3440.120',
1590 '69.0.3497.48',
1591 '69.0.3497.47',
1592 '68.0.3440.119',
1593 '68.0.3440.118',
1594 '70.0.3525.5',
1595 '70.0.3525.4',
1596 '70.0.3525.3',
1597 '68.0.3440.117',
1598 '69.0.3497.46',
1599 '70.0.3525.2',
1600 '70.0.3525.1',
1601 '70.0.3525.0',
1602 '69.0.3497.45',
1603 '68.0.3440.116',
1604 '70.0.3524.4',
1605 '70.0.3524.3',
1606 '69.0.3497.44',
1607 '70.0.3524.2',
1608 '70.0.3524.1',
1609 '70.0.3524.0',
1610 '70.0.3523.2',
1611 '69.0.3497.43',
1612 '68.0.3440.115',
1613 '70.0.3505.9',
1614 '69.0.3497.42',
1615 '70.0.3505.8',
1616 '70.0.3523.1',
1617 '70.0.3523.0',
1618 '69.0.3497.41',
1619 '68.0.3440.114',
1620 '70.0.3505.7',
1621 '69.0.3497.40',
1622 '70.0.3522.1',
1623 '70.0.3522.0',
1624 '70.0.3521.2',
1625 '69.0.3497.39',
1626 '68.0.3440.113',
1627 '70.0.3505.6',
1628 '70.0.3521.1',
1629 '70.0.3521.0',
1630 '69.0.3497.38',
1631 '68.0.3440.112',
1632 '70.0.3520.1',
1633 '70.0.3520.0',
1634 '69.0.3497.37',
1635 '68.0.3440.111',
1636 '70.0.3519.3',
1637 '70.0.3519.2',
1638 '70.0.3519.1',
1639 '70.0.3519.0',
1640 '69.0.3497.36',
1641 '68.0.3440.110',
1642 '70.0.3518.1',
1643 '70.0.3518.0',
1644 '69.0.3497.35',
1645 '69.0.3497.34',
1646 '68.0.3440.109',
1647 '70.0.3517.1',
1648 '70.0.3517.0',
1649 '69.0.3497.33',
1650 '68.0.3440.108',
1651 '69.0.3497.32',
1652 '70.0.3516.3',
1653 '70.0.3516.2',
1654 '70.0.3516.1',
1655 '70.0.3516.0',
1656 '69.0.3497.31',
1657 '68.0.3440.107',
1658 '70.0.3515.4',
1659 '68.0.3440.106',
1660 '70.0.3515.3',
1661 '70.0.3515.2',
1662 '70.0.3515.1',
1663 '70.0.3515.0',
1664 '69.0.3497.30',
1665 '68.0.3440.105',
1666 '68.0.3440.104',
1667 '70.0.3514.2',
1668 '70.0.3514.1',
1669 '70.0.3514.0',
1670 '69.0.3497.29',
1671 '68.0.3440.103',
1672 '70.0.3513.1',
1673 '70.0.3513.0',
1674 '69.0.3497.28',
1675 )
1676 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1677
1678
3e669f36 1679std_headers = {
f7a147e3 1680 'User-Agent': random_user_agent(),
59ae15a5
PH
1681 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1682 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1683 'Accept-Encoding': 'gzip, deflate',
1684 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 1685}
f427df17 1686
5f6a1245 1687
fb37eb25
S
1688USER_AGENTS = {
1689 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1690}
1691
1692
bf42a990
S
1693NO_DEFAULT = object()
1694
7105440c
YCH
1695ENGLISH_MONTH_NAMES = [
1696 'January', 'February', 'March', 'April', 'May', 'June',
1697 'July', 'August', 'September', 'October', 'November', 'December']
1698
f6717dec
S
1699MONTH_NAMES = {
1700 'en': ENGLISH_MONTH_NAMES,
1701 'fr': [
3e4185c3
S
1702 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1703 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
f6717dec 1704}
a942d6cb 1705
a7aaa398
S
1706KNOWN_EXTENSIONS = (
1707 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1708 'flv', 'f4v', 'f4a', 'f4b',
1709 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1710 'mkv', 'mka', 'mk3d',
1711 'avi', 'divx',
1712 'mov',
1713 'asf', 'wmv', 'wma',
1714 '3gp', '3g2',
1715 'mp3',
1716 'flac',
1717 'ape',
1718 'wav',
1719 'f4f', 'f4m', 'm3u8', 'smil')
1720
c587cbb7 1721# needed for sanitizing filenames in restricted mode
c8827027 1722ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
fd35d8cd
JW
1723 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1724 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
c587cbb7 1725
46f59e89
S
1726DATE_FORMATS = (
1727 '%d %B %Y',
1728 '%d %b %Y',
1729 '%B %d %Y',
cb655f34
S
1730 '%B %dst %Y',
1731 '%B %dnd %Y',
9d30c213 1732 '%B %drd %Y',
cb655f34 1733 '%B %dth %Y',
46f59e89 1734 '%b %d %Y',
cb655f34
S
1735 '%b %dst %Y',
1736 '%b %dnd %Y',
9d30c213 1737 '%b %drd %Y',
cb655f34 1738 '%b %dth %Y',
46f59e89
S
1739 '%b %dst %Y %I:%M',
1740 '%b %dnd %Y %I:%M',
9d30c213 1741 '%b %drd %Y %I:%M',
46f59e89
S
1742 '%b %dth %Y %I:%M',
1743 '%Y %m %d',
1744 '%Y-%m-%d',
bccdbd22 1745 '%Y.%m.%d.',
46f59e89 1746 '%Y/%m/%d',
81c13222 1747 '%Y/%m/%d %H:%M',
46f59e89 1748 '%Y/%m/%d %H:%M:%S',
1931a55e
THD
1749 '%Y%m%d%H%M',
1750 '%Y%m%d%H%M%S',
0c1c6f4b 1751 '%Y-%m-%d %H:%M',
46f59e89
S
1752 '%Y-%m-%d %H:%M:%S',
1753 '%Y-%m-%d %H:%M:%S.%f',
5014558a 1754 '%Y-%m-%d %H:%M:%S:%f',
46f59e89
S
1755 '%d.%m.%Y %H:%M',
1756 '%d.%m.%Y %H.%M',
1757 '%Y-%m-%dT%H:%M:%SZ',
1758 '%Y-%m-%dT%H:%M:%S.%fZ',
1759 '%Y-%m-%dT%H:%M:%S.%f0Z',
1760 '%Y-%m-%dT%H:%M:%S',
1761 '%Y-%m-%dT%H:%M:%S.%f',
1762 '%Y-%m-%dT%H:%M',
c6eed6b8
S
1763 '%b %d %Y at %H:%M',
1764 '%b %d %Y at %H:%M:%S',
b555ae9b
S
1765 '%B %d %Y at %H:%M',
1766 '%B %d %Y at %H:%M:%S',
a63d9bd0 1767 '%H:%M %d-%b-%Y',
46f59e89
S
1768)
1769
1770DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1771DATE_FORMATS_DAY_FIRST.extend([
1772 '%d-%m-%Y',
1773 '%d.%m.%Y',
1774 '%d.%m.%y',
1775 '%d/%m/%Y',
1776 '%d/%m/%y',
1777 '%d/%m/%Y %H:%M:%S',
1778])
1779
1780DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1781DATE_FORMATS_MONTH_FIRST.extend([
1782 '%m-%d-%Y',
1783 '%m.%d.%Y',
1784 '%m/%d/%Y',
1785 '%m/%d/%y',
1786 '%m/%d/%Y %H:%M:%S',
1787])
1788
06b3fe29 1789PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
22f5f5c6 1790JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
06b3fe29 1791
7105440c 1792
d77c3dfd 1793def preferredencoding():
59ae15a5 1794 """Get preferred encoding.
d77c3dfd 1795
59ae15a5
PH
1796 Returns the best encoding scheme for the system, based on
1797 locale.getpreferredencoding() and some further tweaks.
1798 """
1799 try:
1800 pref = locale.getpreferredencoding()
28e614de 1801 'TEST'.encode(pref)
70a1165b 1802 except Exception:
59ae15a5 1803 pref = 'UTF-8'
bae611f2 1804
59ae15a5 1805 return pref
d77c3dfd 1806
f4bfd65f 1807
181c8655 1808def write_json_file(obj, fn):
1394646a 1809 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 1810
92120217 1811 fn = encodeFilename(fn)
61ee5aeb 1812 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
1813 encoding = get_filesystem_encoding()
1814 # os.path.basename returns a bytes object, but NamedTemporaryFile
1815 # will fail if the filename contains non ascii characters unless we
1816 # use a unicode object
1817 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1818 # the same for os.path.dirname
1819 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1820 else:
1821 path_basename = os.path.basename
1822 path_dirname = os.path.dirname
1823
73159f99
S
1824 args = {
1825 'suffix': '.tmp',
ec5f6016
JMF
1826 'prefix': path_basename(fn) + '.',
1827 'dir': path_dirname(fn),
73159f99
S
1828 'delete': False,
1829 }
1830
181c8655
PH
1831 # In Python 2.x, json.dump expects a bytestream.
1832 # In Python 3.x, it writes to a character stream
1833 if sys.version_info < (3, 0):
73159f99 1834 args['mode'] = 'wb'
181c8655 1835 else:
73159f99
S
1836 args.update({
1837 'mode': 'w',
1838 'encoding': 'utf-8',
1839 })
1840
c86b6142 1841 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
1842
1843 try:
1844 with tf:
6e84b215 1845 json.dump(obj, tf)
1394646a
IK
1846 if sys.platform == 'win32':
1847 # Need to remove existing file on Windows, else os.rename raises
1848 # WindowsError or FileExistsError.
1849 try:
1850 os.unlink(fn)
1851 except OSError:
1852 pass
9cd5f54e
R
1853 try:
1854 mask = os.umask(0)
1855 os.umask(mask)
1856 os.chmod(tf.name, 0o666 & ~mask)
1857 except OSError:
1858 pass
181c8655 1859 os.rename(tf.name, fn)
70a1165b 1860 except Exception:
181c8655
PH
1861 try:
1862 os.remove(tf.name)
1863 except OSError:
1864 pass
1865 raise
1866
1867
1868if sys.version_info >= (2, 7):
ee114368 1869 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 1870 """ Find the xpath xpath[@key=val] """
5d2354f1 1871 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368 1872 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
1873 return node.find(expr)
1874else:
ee114368 1875 def find_xpath_attr(node, xpath, key, val=None):
810c10ba 1876 for f in node.findall(compat_xpath(xpath)):
ee114368
S
1877 if key not in f.attrib:
1878 continue
1879 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
1880 return f
1881 return None
1882
d7e66d39
JMF
1883# On python2.6 the xml.etree.ElementTree.Element methods don't support
1884# the namespace parameter
5f6a1245
JW
1885
1886
d7e66d39
JMF
1887def xpath_with_ns(path, ns_map):
1888 components = [c.split(':') for c in path.split('/')]
1889 replaced = []
1890 for c in components:
1891 if len(c) == 1:
1892 replaced.append(c[0])
1893 else:
1894 ns, tag = c
1895 replaced.append('{%s}%s' % (ns_map[ns], tag))
1896 return '/'.join(replaced)
1897
d77c3dfd 1898
a41fb80c 1899def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745 1900 def _find_xpath(xpath):
810c10ba 1901 return node.find(compat_xpath(xpath))
578c0745
S
1902
1903 if isinstance(xpath, (str, compat_str)):
1904 n = _find_xpath(xpath)
1905 else:
1906 for xp in xpath:
1907 n = _find_xpath(xp)
1908 if n is not None:
1909 break
d74bebd5 1910
8e636da4 1911 if n is None:
bf42a990
S
1912 if default is not NO_DEFAULT:
1913 return default
1914 elif fatal:
bf0ff932
PH
1915 name = xpath if name is None else name
1916 raise ExtractorError('Could not find XML element %s' % name)
1917 else:
1918 return None
a41fb80c
S
1919 return n
1920
1921
1922def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
1923 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1924 if n is None or n == default:
1925 return n
1926 if n.text is None:
1927 if default is not NO_DEFAULT:
1928 return default
1929 elif fatal:
1930 name = xpath if name is None else name
1931 raise ExtractorError('Could not find XML element\'s text %s' % name)
1932 else:
1933 return None
1934 return n.text
a41fb80c
S
1935
1936
1937def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1938 n = find_xpath_attr(node, xpath, key)
1939 if n is None:
1940 if default is not NO_DEFAULT:
1941 return default
1942 elif fatal:
1943 name = '%s[@%s]' % (xpath, key) if name is None else name
1944 raise ExtractorError('Could not find XML attribute %s' % name)
1945 else:
1946 return None
1947 return n.attrib[key]
bf0ff932
PH
1948
1949
9e6dd238 1950def get_element_by_id(id, html):
43e8fafd 1951 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 1952 return get_element_by_attribute('id', id, html)
43e8fafd 1953
12ea2f30 1954
84c237fb 1955def get_element_by_class(class_name, html):
2af12ad9
TC
1956 """Return the content of the first tag with the specified class in the passed HTML document"""
1957 retval = get_elements_by_class(class_name, html)
1958 return retval[0] if retval else None
1959
1960
1961def get_element_by_attribute(attribute, value, html, escape_value=True):
1962 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1963 return retval[0] if retval else None
1964
1965
1966def get_elements_by_class(class_name, html):
1967 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1968 return get_elements_by_attribute(
84c237fb
YCH
1969 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1970 html, escape_value=False)
1971
1972
2af12ad9 1973def get_elements_by_attribute(attribute, value, html, escape_value=True):
43e8fafd 1974 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 1975
84c237fb
YCH
1976 value = re.escape(value) if escape_value else value
1977
2af12ad9
TC
1978 retlist = []
1979 for m in re.finditer(r'''(?xs)
38285056 1980 <([a-zA-Z0-9:._-]+)
609ff8ca 1981 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056 1982 \s+%s=['"]?%s['"]?
609ff8ca 1983 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056
PH
1984 \s*>
1985 (?P<content>.*?)
1986 </\1>
2af12ad9
TC
1987 ''' % (re.escape(attribute), value), html):
1988 res = m.group('content')
38285056 1989
2af12ad9
TC
1990 if res.startswith('"') or res.startswith("'"):
1991 res = res[1:-1]
38285056 1992
2af12ad9 1993 retlist.append(unescapeHTML(res))
a921f407 1994
2af12ad9 1995 return retlist
a921f407 1996
c5229f39 1997
8bb56eee
BF
1998class HTMLAttributeParser(compat_HTMLParser):
1999 """Trivial HTML parser to gather the attributes for a single element"""
b6e0c7d2 2000
8bb56eee 2001 def __init__(self):
c5229f39 2002 self.attrs = {}
8bb56eee
BF
2003 compat_HTMLParser.__init__(self)
2004
2005 def handle_starttag(self, tag, attrs):
2006 self.attrs = dict(attrs)
2007
c5229f39 2008
8bb56eee
BF
2009def extract_attributes(html_element):
2010 """Given a string for an HTML element such as
2011 <el
2012 a="foo" B="bar" c="&98;az" d=boz
2013 empty= noval entity="&amp;"
2014 sq='"' dq="'"
2015 >
2016 Decode and return a dictionary of attributes.
2017 {
2018 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2019 'empty': '', 'noval': None, 'entity': '&',
2020 'sq': '"', 'dq': '\''
2021 }.
2022 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2023 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2024 """
2025 parser = HTMLAttributeParser()
b4a3d461
S
2026 try:
2027 parser.feed(html_element)
2028 parser.close()
2029 # Older Python may throw HTMLParseError in case of malformed HTML
2030 except compat_HTMLParseError:
2031 pass
8bb56eee 2032 return parser.attrs
9e6dd238 2033
c5229f39 2034
9e6dd238 2035def clean_html(html):
59ae15a5 2036 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
2037
2038 if html is None: # Convenience for sanitizing descriptions etc.
2039 return html
2040
59ae15a5
PH
2041 # Newline vs <br />
2042 html = html.replace('\n', ' ')
edd9221c
TF
2043 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2044 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
2045 # Strip html tags
2046 html = re.sub('<.*?>', '', html)
2047 # Replace html entities
2048 html = unescapeHTML(html)
7decf895 2049 return html.strip()
9e6dd238
FV
2050
2051
d77c3dfd 2052def sanitize_open(filename, open_mode):
59ae15a5
PH
2053 """Try to open the given filename, and slightly tweak it if this fails.
2054
2055 Attempts to open the given filename. If this fails, it tries to change
2056 the filename slightly, step by step, until it's either able to open it
2057 or it fails and raises a final exception, like the standard open()
2058 function.
2059
2060 It returns the tuple (stream, definitive_file_name).
2061 """
2062 try:
28e614de 2063 if filename == '-':
59ae15a5
PH
2064 if sys.platform == 'win32':
2065 import msvcrt
2066 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 2067 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
2068 stream = open(encodeFilename(filename), open_mode)
2069 return (stream, filename)
2070 except (IOError, OSError) as err:
f45c185f
PH
2071 if err.errno in (errno.EACCES,):
2072 raise
59ae15a5 2073
f45c185f 2074 # In case of error, try to remove win32 forbidden chars
d55de57b 2075 alt_filename = sanitize_path(filename)
f45c185f
PH
2076 if alt_filename == filename:
2077 raise
2078 else:
2079 # An exception here should be caught in the caller
d55de57b 2080 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 2081 return (stream, alt_filename)
d77c3dfd
FV
2082
2083
2084def timeconvert(timestr):
59ae15a5
PH
2085 """Convert RFC 2822 defined time string into system timestamp"""
2086 timestamp = None
2087 timetuple = email.utils.parsedate_tz(timestr)
2088 if timetuple is not None:
2089 timestamp = email.utils.mktime_tz(timetuple)
2090 return timestamp
1c469a94 2091
5f6a1245 2092
796173d0 2093def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
2094 """Sanitizes a string so it could be used as part of a filename.
2095 If restricted is set, use a stricter subset of allowed characters.
158af524
S
2096 Set is_id if this is not an arbitrary string, but an ID that should be kept
2097 if possible.
59ae15a5
PH
2098 """
2099 def replace_insane(char):
c587cbb7
AT
2100 if restricted and char in ACCENT_CHARS:
2101 return ACCENT_CHARS[char]
59ae15a5
PH
2102 if char == '?' or ord(char) < 32 or ord(char) == 127:
2103 return ''
2104 elif char == '"':
2105 return '' if restricted else '\''
2106 elif char == ':':
2107 return '_-' if restricted else ' -'
2108 elif char in '\\/|*<>':
2109 return '_'
627dcfff 2110 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
2111 return '_'
2112 if restricted and ord(char) > 127:
2113 return '_'
2114 return char
2115
639f1cea 2116 if s == '':
2117 return ''
2aeb06d6
PH
2118 # Handle timestamps
2119 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 2120 result = ''.join(map(replace_insane, s))
796173d0
PH
2121 if not is_id:
2122 while '__' in result:
2123 result = result.replace('__', '_')
2124 result = result.strip('_')
2125 # Common case of "Foreign band name - English song title"
2126 if restricted and result.startswith('-_'):
2127 result = result[2:]
5a42414b
PH
2128 if result.startswith('-'):
2129 result = '_' + result[len('-'):]
a7440261 2130 result = result.lstrip('.')
796173d0
PH
2131 if not result:
2132 result = '_'
59ae15a5 2133 return result
d77c3dfd 2134
5f6a1245 2135
c2934512 2136def sanitize_path(s, force=False):
a2aaf4db 2137 """Sanitizes and normalizes path on Windows"""
c2934512 2138 if sys.platform == 'win32':
c4218ac3 2139 force = False
c2934512 2140 drive_or_unc, _ = os.path.splitdrive(s)
2141 if sys.version_info < (2, 7) and not drive_or_unc:
2142 drive_or_unc, _ = os.path.splitunc(s)
2143 elif force:
2144 drive_or_unc = ''
2145 else:
a2aaf4db 2146 return s
c2934512 2147
be531ef1
S
2148 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2149 if drive_or_unc:
a2aaf4db
S
2150 norm_path.pop(0)
2151 sanitized_path = [
ec85ded8 2152 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
a2aaf4db 2153 for path_part in norm_path]
be531ef1
S
2154 if drive_or_unc:
2155 sanitized_path.insert(0, drive_or_unc + os.path.sep)
c4218ac3 2156 elif force and s[0] == os.path.sep:
2157 sanitized_path.insert(0, os.path.sep)
a2aaf4db
S
2158 return os.path.join(*sanitized_path)
2159
2160
17bcc626 2161def sanitize_url(url):
befa4708
S
2162 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2163 # the number of unwanted failures due to missing protocol
2164 if url.startswith('//'):
2165 return 'http:%s' % url
2166 # Fix some common typos seen so far
2167 COMMON_TYPOS = (
067aa17e 2168 # https://github.com/ytdl-org/youtube-dl/issues/15649
befa4708
S
2169 (r'^httpss://', r'https://'),
2170 # https://bx1.be/lives/direct-tv/
2171 (r'^rmtp([es]?)://', r'rtmp\1://'),
2172 )
2173 for mistake, fixup in COMMON_TYPOS:
2174 if re.match(mistake, url):
2175 return re.sub(mistake, fixup, url)
bc6b9bcd 2176 return url
17bcc626
S
2177
2178
5435dcf9
HH
2179def extract_basic_auth(url):
2180 parts = compat_urlparse.urlsplit(url)
2181 if parts.username is None:
2182 return url, None
2183 url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2184 parts.hostname if parts.port is None
2185 else '%s:%d' % (parts.hostname, parts.port))))
2186 auth_payload = base64.b64encode(
2187 ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2188 return url, 'Basic ' + auth_payload.decode('utf-8')
2189
2190
67dda517 2191def sanitized_Request(url, *args, **kwargs):
bc6b9bcd 2192 url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
5435dcf9
HH
2193 if auth_header is not None:
2194 headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2195 headers['Authorization'] = auth_header
2196 return compat_urllib_request.Request(url, *args, **kwargs)
67dda517
S
2197
2198
51098426
S
2199def expand_path(s):
2200 """Expand shell variables and ~"""
2201 return os.path.expandvars(compat_expanduser(s))
2202
2203
d77c3dfd 2204def orderedSet(iterable):
59ae15a5
PH
2205 """ Remove all duplicates from the input iterable """
2206 res = []
2207 for el in iterable:
2208 if el not in res:
2209 res.append(el)
2210 return res
d77c3dfd 2211
912b38b4 2212
55b2f099 2213def _htmlentity_transform(entity_with_semicolon):
4e408e47 2214 """Transforms an HTML entity to a character."""
55b2f099
YCH
2215 entity = entity_with_semicolon[:-1]
2216
4e408e47
PH
2217 # Known non-numeric HTML entity
2218 if entity in compat_html_entities.name2codepoint:
2219 return compat_chr(compat_html_entities.name2codepoint[entity])
2220
55b2f099
YCH
2221 # TODO: HTML5 allows entities without a semicolon. For example,
2222 # '&Eacuteric' should be decoded as 'Éric'.
2223 if entity_with_semicolon in compat_html_entities_html5:
2224 return compat_html_entities_html5[entity_with_semicolon]
2225
91757b0f 2226 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
2227 if mobj is not None:
2228 numstr = mobj.group(1)
28e614de 2229 if numstr.startswith('x'):
4e408e47 2230 base = 16
28e614de 2231 numstr = '0%s' % numstr
4e408e47
PH
2232 else:
2233 base = 10
067aa17e 2234 # See https://github.com/ytdl-org/youtube-dl/issues/7518
7aefc49c
S
2235 try:
2236 return compat_chr(int(numstr, base))
2237 except ValueError:
2238 pass
4e408e47
PH
2239
2240 # Unknown entity in name, return its literal representation
7a3f0c00 2241 return '&%s;' % entity
4e408e47
PH
2242
2243
d77c3dfd 2244def unescapeHTML(s):
912b38b4
PH
2245 if s is None:
2246 return None
2247 assert type(s) == compat_str
d77c3dfd 2248
4e408e47 2249 return re.sub(
95f3f7c2 2250 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 2251
8bf48f23 2252
cdb19aa4 2253def escapeHTML(text):
2254 return (
2255 text
2256 .replace('&', '&amp;')
2257 .replace('<', '&lt;')
2258 .replace('>', '&gt;')
2259 .replace('"', '&quot;')
2260 .replace("'", '&#39;')
2261 )
2262
2263
f5b1bca9 2264def process_communicate_or_kill(p, *args, **kwargs):
2265 try:
2266 return p.communicate(*args, **kwargs)
2267 except BaseException: # Including KeyboardInterrupt
2268 p.kill()
2269 p.wait()
2270 raise
2271
2272
aa49acd1
S
2273def get_subprocess_encoding():
2274 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2275 # For subprocess calls, encode with locale encoding
2276 # Refer to http://stackoverflow.com/a/9951851/35070
2277 encoding = preferredencoding()
2278 else:
2279 encoding = sys.getfilesystemencoding()
2280 if encoding is None:
2281 encoding = 'utf-8'
2282 return encoding
2283
2284
8bf48f23 2285def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
2286 """
2287 @param s The name of the file
2288 """
d77c3dfd 2289
8bf48f23 2290 assert type(s) == compat_str
d77c3dfd 2291
59ae15a5
PH
2292 # Python 3 has a Unicode API
2293 if sys.version_info >= (3, 0):
2294 return s
0f00efed 2295
aa49acd1
S
2296 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2297 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2298 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2299 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2300 return s
2301
8ee239e9
YCH
2302 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2303 if sys.platform.startswith('java'):
2304 return s
2305
aa49acd1
S
2306 return s.encode(get_subprocess_encoding(), 'ignore')
2307
2308
2309def decodeFilename(b, for_subprocess=False):
2310
2311 if sys.version_info >= (3, 0):
2312 return b
2313
2314 if not isinstance(b, bytes):
2315 return b
2316
2317 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 2318
f07b74fc
PH
2319
2320def encodeArgument(s):
2321 if not isinstance(s, compat_str):
2322 # Legacy code that uses byte strings
2323 # Uncomment the following line after fixing all post processors
7af808a5 2324 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
2325 s = s.decode('ascii')
2326 return encodeFilename(s, True)
2327
2328
aa49acd1
S
2329def decodeArgument(b):
2330 return decodeFilename(b, True)
2331
2332
8271226a
PH
2333def decodeOption(optval):
2334 if optval is None:
2335 return optval
2336 if isinstance(optval, bytes):
2337 optval = optval.decode(preferredencoding())
2338
2339 assert isinstance(optval, compat_str)
2340 return optval
1c256f70 2341
5f6a1245 2342
cdb19aa4 2343def formatSeconds(secs, delim=':', msec=False):
4539dd30 2344 if secs > 3600:
cdb19aa4 2345 ret = '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
4539dd30 2346 elif secs > 60:
cdb19aa4 2347 ret = '%d%s%02d' % (secs // 60, delim, secs % 60)
4539dd30 2348 else:
cdb19aa4 2349 ret = '%d' % secs
2350 return '%s.%03d' % (ret, secs % 1) if msec else ret
4539dd30 2351
a0ddb8a2 2352
be4a824d
PH
2353def make_HTTPS_handler(params, **kwargs):
2354 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 2355 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 2356 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 2357 if opts_no_check_certificate:
be5f2c19 2358 context.check_hostname = False
0db261ba 2359 context.verify_mode = ssl.CERT_NONE
a2366922 2360 try:
be4a824d 2361 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
2362 except TypeError:
2363 # Python 2.7.8
2364 # (create_default_context present but HTTPSHandler has no context=)
2365 pass
2366
2367 if sys.version_info < (3, 2):
d7932313 2368 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 2369 else: # Python < 3.4
d7932313 2370 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 2371 context.verify_mode = (ssl.CERT_NONE
dca08720 2372 if opts_no_check_certificate
ea6d901e 2373 else ssl.CERT_REQUIRED)
303b479e 2374 context.set_default_verify_paths()
be4a824d 2375 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 2376
732ea2f0 2377
5873d4cc 2378def bug_reports_message(before=';'):
08f2a92c 2379 if ytdl_is_updateable():
7a5c1cfe 2380 update_cmd = 'type yt-dlp -U to update'
08f2a92c 2381 else:
7a5c1cfe 2382 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
5873d4cc 2383 msg = 'please report this issue on https://github.com/yt-dlp/yt-dlp .'
08f2a92c 2384 msg += ' Make sure you are using the latest version; %s.' % update_cmd
7a5c1cfe 2385 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
5873d4cc
F
2386
2387 before = before.rstrip()
2388 if not before or before.endswith(('.', '!', '?')):
2389 msg = msg[0].title() + msg[1:]
2390
2391 return (before + ' ' if before else '') + msg
08f2a92c
JMF
2392
2393
bf5b9d85
PM
2394class YoutubeDLError(Exception):
2395 """Base exception for YoutubeDL errors."""
2396 pass
2397
2398
3158150c 2399network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2400if hasattr(ssl, 'CertificateError'):
2401 network_exceptions.append(ssl.CertificateError)
2402network_exceptions = tuple(network_exceptions)
2403
2404
bf5b9d85 2405class ExtractorError(YoutubeDLError):
1c256f70 2406 """Error during info extraction."""
5f6a1245 2407
1151c407 2408 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
9a82b238 2409 """ tb, if given, is the original traceback (so that it can be printed out).
7a5c1cfe 2410 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
9a82b238 2411 """
3158150c 2412 if sys.exc_info()[0] in network_exceptions:
9a82b238 2413 expected = True
d5979c5d 2414
526d74ec 2415 self.msg = str(msg)
1c256f70 2416 self.traceback = tb
1151c407 2417 self.expected = expected
2eabb802 2418 self.cause = cause
d11271dd 2419 self.video_id = video_id
1151c407 2420 self.ie = ie
2421 self.exc_info = sys.exc_info() # preserve original exception
2422
2423 super(ExtractorError, self).__init__(''.join((
2424 format_field(ie, template='[%s] '),
2425 format_field(video_id, template='%s: '),
526d74ec 2426 self.msg,
1151c407 2427 format_field(cause, template=' (caused by %r)'),
2428 '' if expected else bug_reports_message())))
1c256f70 2429
01951dda
PH
2430 def format_traceback(self):
2431 if self.traceback is None:
2432 return None
28e614de 2433 return ''.join(traceback.format_tb(self.traceback))
01951dda 2434
1c256f70 2435
416c7fcb
PH
2436class UnsupportedError(ExtractorError):
2437 def __init__(self, url):
2438 super(UnsupportedError, self).__init__(
2439 'Unsupported URL: %s' % url, expected=True)
2440 self.url = url
2441
2442
55b3e45b
JMF
2443class RegexNotFoundError(ExtractorError):
2444 """Error when a regex didn't match"""
2445 pass
2446
2447
773f291d
S
2448class GeoRestrictedError(ExtractorError):
2449 """Geographic restriction Error exception.
2450
2451 This exception may be thrown when a video is not available from your
2452 geographic location due to geographic restrictions imposed by a website.
2453 """
b6e0c7d2 2454
773f291d
S
2455 def __init__(self, msg, countries=None):
2456 super(GeoRestrictedError, self).__init__(msg, expected=True)
2457 self.msg = msg
2458 self.countries = countries
2459
2460
bf5b9d85 2461class DownloadError(YoutubeDLError):
59ae15a5 2462 """Download Error exception.
d77c3dfd 2463
59ae15a5
PH
2464 This exception may be thrown by FileDownloader objects if they are not
2465 configured to continue on errors. They will contain the appropriate
2466 error message.
2467 """
5f6a1245 2468
8cc83b8d
FV
2469 def __init__(self, msg, exc_info=None):
2470 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2471 super(DownloadError, self).__init__(msg)
2472 self.exc_info = exc_info
d77c3dfd
FV
2473
2474
498f5606 2475class EntryNotInPlaylist(YoutubeDLError):
2476 """Entry not in playlist exception.
2477
2478 This exception will be thrown by YoutubeDL when a requested entry
2479 is not found in the playlist info_dict
2480 """
2481 pass
2482
2483
bf5b9d85 2484class SameFileError(YoutubeDLError):
59ae15a5 2485 """Same File exception.
d77c3dfd 2486
59ae15a5
PH
2487 This exception will be thrown by FileDownloader objects if they detect
2488 multiple files would have to be downloaded to the same file on disk.
2489 """
2490 pass
d77c3dfd
FV
2491
2492
bf5b9d85 2493class PostProcessingError(YoutubeDLError):
59ae15a5 2494 """Post Processing exception.
d77c3dfd 2495
59ae15a5
PH
2496 This exception may be raised by PostProcessor's .run() method to
2497 indicate an error in the postprocessing task.
2498 """
5f6a1245 2499
7851b379 2500 def __init__(self, msg):
bf5b9d85 2501 super(PostProcessingError, self).__init__(msg)
7851b379 2502 self.msg = msg
d77c3dfd 2503
5f6a1245 2504
8b0d7497 2505class ExistingVideoReached(YoutubeDLError):
2506 """ --max-downloads limit has been reached. """
2507 pass
2508
2509
2510class RejectedVideoReached(YoutubeDLError):
2511 """ --max-downloads limit has been reached. """
2512 pass
2513
2514
51d9739f 2515class ThrottledDownload(YoutubeDLError):
2516 """ Download speed below --throttled-rate. """
2517 pass
2518
2519
bf5b9d85 2520class MaxDownloadsReached(YoutubeDLError):
59ae15a5
PH
2521 """ --max-downloads limit has been reached. """
2522 pass
d77c3dfd
FV
2523
2524
bf5b9d85 2525class UnavailableVideoError(YoutubeDLError):
59ae15a5 2526 """Unavailable Format exception.
d77c3dfd 2527
59ae15a5
PH
2528 This exception will be thrown when a video is requested
2529 in a format that is not available for that video.
2530 """
2531 pass
d77c3dfd
FV
2532
2533
bf5b9d85 2534class ContentTooShortError(YoutubeDLError):
59ae15a5 2535 """Content Too Short exception.
d77c3dfd 2536
59ae15a5
PH
2537 This exception may be raised by FileDownloader objects when a file they
2538 download is too small for what the server announced first, indicating
2539 the connection was probably interrupted.
2540 """
d77c3dfd 2541
59ae15a5 2542 def __init__(self, downloaded, expected):
bf5b9d85
PM
2543 super(ContentTooShortError, self).__init__(
2544 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2545 )
2c7ed247 2546 # Both in bytes
59ae15a5
PH
2547 self.downloaded = downloaded
2548 self.expected = expected
d77c3dfd 2549
5f6a1245 2550
bf5b9d85 2551class XAttrMetadataError(YoutubeDLError):
efa97bdc
YCH
2552 def __init__(self, code=None, msg='Unknown error'):
2553 super(XAttrMetadataError, self).__init__(msg)
2554 self.code = code
bd264412 2555 self.msg = msg
efa97bdc
YCH
2556
2557 # Parsing code and msg
3089bc74 2558 if (self.code in (errno.ENOSPC, errno.EDQUOT)
a0566bbf 2559 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
efa97bdc
YCH
2560 self.reason = 'NO_SPACE'
2561 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2562 self.reason = 'VALUE_TOO_LONG'
2563 else:
2564 self.reason = 'NOT_SUPPORTED'
2565
2566
bf5b9d85 2567class XAttrUnavailableError(YoutubeDLError):
efa97bdc
YCH
2568 pass
2569
2570
c5a59d93 2571def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
2572 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2573 # expected HTTP responses to meet HTTP/1.0 or later (see also
067aa17e 2574 # https://github.com/ytdl-org/youtube-dl/issues/6727)
e5e78797 2575 if sys.version_info < (3, 0):
65220c3b
S
2576 kwargs['strict'] = True
2577 hc = http_class(*args, **compat_kwargs(kwargs))
be4a824d 2578 source_address = ydl_handler._params.get('source_address')
8959018a 2579
be4a824d 2580 if source_address is not None:
8959018a
AU
2581 # This is to workaround _create_connection() from socket where it will try all
2582 # address data from getaddrinfo() including IPv6. This filters the result from
2583 # getaddrinfo() based on the source_address value.
2584 # This is based on the cpython socket.create_connection() function.
2585 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2586 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2587 host, port = address
2588 err = None
2589 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
9e21e6d9
S
2590 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2591 ip_addrs = [addr for addr in addrs if addr[0] == af]
2592 if addrs and not ip_addrs:
2593 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2594 raise socket.error(
2595 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2596 % (ip_version, source_address[0]))
8959018a
AU
2597 for res in ip_addrs:
2598 af, socktype, proto, canonname, sa = res
2599 sock = None
2600 try:
2601 sock = socket.socket(af, socktype, proto)
2602 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2603 sock.settimeout(timeout)
2604 sock.bind(source_address)
2605 sock.connect(sa)
2606 err = None # Explicitly break reference cycle
2607 return sock
2608 except socket.error as _:
2609 err = _
2610 if sock is not None:
2611 sock.close()
2612 if err is not None:
2613 raise err
2614 else:
9e21e6d9
S
2615 raise socket.error('getaddrinfo returns an empty list')
2616 if hasattr(hc, '_create_connection'):
2617 hc._create_connection = _create_connection
be4a824d
PH
2618 sa = (source_address, 0)
2619 if hasattr(hc, 'source_address'): # Python 2.7+
2620 hc.source_address = sa
2621 else: # Python 2.6
2622 def _hc_connect(self, *args, **kwargs):
9e21e6d9 2623 sock = _create_connection(
be4a824d
PH
2624 (self.host, self.port), self.timeout, sa)
2625 if is_https:
d7932313
PH
2626 self.sock = ssl.wrap_socket(
2627 sock, self.key_file, self.cert_file,
2628 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
2629 else:
2630 self.sock = sock
2631 hc.connect = functools.partial(_hc_connect, hc)
2632
2633 return hc
2634
2635
87f0e62d 2636def handle_youtubedl_headers(headers):
992fc9d6
YCH
2637 filtered_headers = headers
2638
2639 if 'Youtubedl-no-compression' in filtered_headers:
2640 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 2641 del filtered_headers['Youtubedl-no-compression']
87f0e62d 2642
992fc9d6 2643 return filtered_headers
87f0e62d
YCH
2644
2645
acebc9cd 2646class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
2647 """Handler for HTTP requests and responses.
2648
2649 This class, when installed with an OpenerDirector, automatically adds
2650 the standard headers to every HTTP request and handles gzipped and
2651 deflated responses from web servers. If compression is to be avoided in
2652 a particular request, the original request in the program code only has
0424ec30 2653 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
2654 removed before making the real request.
2655
2656 Part of this code was copied from:
2657
2658 http://techknack.net/python-urllib2-handlers/
2659
2660 Andrew Rowls, the author of that code, agreed to release it to the
2661 public domain.
2662 """
2663
be4a824d
PH
2664 def __init__(self, params, *args, **kwargs):
2665 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2666 self._params = params
2667
2668 def http_open(self, req):
71aff188
YCH
2669 conn_class = compat_http_client.HTTPConnection
2670
2671 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2672 if socks_proxy:
2673 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2674 del req.headers['Ytdl-socks-proxy']
2675
be4a824d 2676 return self.do_open(functools.partial(
71aff188 2677 _create_http_connection, self, conn_class, False),
be4a824d
PH
2678 req)
2679
59ae15a5
PH
2680 @staticmethod
2681 def deflate(data):
fc2119f2 2682 if not data:
2683 return data
59ae15a5
PH
2684 try:
2685 return zlib.decompress(data, -zlib.MAX_WBITS)
2686 except zlib.error:
2687 return zlib.decompress(data)
2688
acebc9cd 2689 def http_request(self, req):
51f267d9
S
2690 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2691 # always respected by websites, some tend to give out URLs with non percent-encoded
2692 # non-ASCII characters (see telemb.py, ard.py [#3412])
2693 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2694 # To work around aforementioned issue we will replace request's original URL with
2695 # percent-encoded one
2696 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2697 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2698 url = req.get_full_url()
2699 url_escaped = escape_url(url)
2700
2701 # Substitute URL if any change after escaping
2702 if url != url_escaped:
15d260eb 2703 req = update_Request(req, url=url_escaped)
51f267d9 2704
33ac271b 2705 for h, v in std_headers.items():
3d5f7a39
JK
2706 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2707 # The dict keys are capitalized because of this bug by urllib
2708 if h.capitalize() not in req.headers:
33ac271b 2709 req.add_header(h, v)
87f0e62d
YCH
2710
2711 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
2712
2713 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2714 # Python 2.6 is brain-dead when it comes to fragments
2715 req._Request__original = req._Request__original.partition('#')[0]
2716 req._Request__r_type = req._Request__r_type.partition('#')[0]
2717
59ae15a5
PH
2718 return req
2719
acebc9cd 2720 def http_response(self, req, resp):
59ae15a5
PH
2721 old_resp = resp
2722 # gzip
2723 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
2724 content = resp.read()
2725 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2726 try:
2727 uncompressed = io.BytesIO(gz.read())
2728 except IOError as original_ioerror:
2729 # There may be junk add the end of the file
2730 # See http://stackoverflow.com/q/4928560/35070 for details
2731 for i in range(1, 1024):
2732 try:
2733 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2734 uncompressed = io.BytesIO(gz.read())
2735 except IOError:
2736 continue
2737 break
2738 else:
2739 raise original_ioerror
b407d853 2740 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2741 resp.msg = old_resp.msg
c047270c 2742 del resp.headers['Content-encoding']
59ae15a5
PH
2743 # deflate
2744 if resp.headers.get('Content-encoding', '') == 'deflate':
2745 gz = io.BytesIO(self.deflate(resp.read()))
b407d853 2746 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2747 resp.msg = old_resp.msg
c047270c 2748 del resp.headers['Content-encoding']
ad729172 2749 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
067aa17e 2750 # https://github.com/ytdl-org/youtube-dl/issues/6457).
5a4d9ddb
S
2751 if 300 <= resp.code < 400:
2752 location = resp.headers.get('Location')
2753 if location:
2754 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2755 if sys.version_info >= (3, 0):
2756 location = location.encode('iso-8859-1').decode('utf-8')
0ea59007
YCH
2757 else:
2758 location = location.decode('utf-8')
5a4d9ddb
S
2759 location_escaped = escape_url(location)
2760 if location != location_escaped:
2761 del resp.headers['Location']
9a4aec8b
YCH
2762 if sys.version_info < (3, 0):
2763 location_escaped = location_escaped.encode('utf-8')
5a4d9ddb 2764 resp.headers['Location'] = location_escaped
59ae15a5 2765 return resp
0f8d03f8 2766
acebc9cd
PH
2767 https_request = http_request
2768 https_response = http_response
bf50b038 2769
5de90176 2770
71aff188
YCH
2771def make_socks_conn_class(base_class, socks_proxy):
2772 assert issubclass(base_class, (
2773 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2774
2775 url_components = compat_urlparse.urlparse(socks_proxy)
2776 if url_components.scheme.lower() == 'socks5':
2777 socks_type = ProxyType.SOCKS5
2778 elif url_components.scheme.lower() in ('socks', 'socks4'):
2779 socks_type = ProxyType.SOCKS4
51fb4995
YCH
2780 elif url_components.scheme.lower() == 'socks4a':
2781 socks_type = ProxyType.SOCKS4A
71aff188 2782
cdd94c2e
YCH
2783 def unquote_if_non_empty(s):
2784 if not s:
2785 return s
2786 return compat_urllib_parse_unquote_plus(s)
2787
71aff188
YCH
2788 proxy_args = (
2789 socks_type,
2790 url_components.hostname, url_components.port or 1080,
2791 True, # Remote DNS
cdd94c2e
YCH
2792 unquote_if_non_empty(url_components.username),
2793 unquote_if_non_empty(url_components.password),
71aff188
YCH
2794 )
2795
2796 class SocksConnection(base_class):
2797 def connect(self):
2798 self.sock = sockssocket()
2799 self.sock.setproxy(*proxy_args)
2800 if type(self.timeout) in (int, float):
2801 self.sock.settimeout(self.timeout)
2802 self.sock.connect((self.host, self.port))
2803
2804 if isinstance(self, compat_http_client.HTTPSConnection):
2805 if hasattr(self, '_context'): # Python > 2.6
2806 self.sock = self._context.wrap_socket(
2807 self.sock, server_hostname=self.host)
2808 else:
2809 self.sock = ssl.wrap_socket(self.sock)
2810
2811 return SocksConnection
2812
2813
be4a824d
PH
2814class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2815 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2816 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2817 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2818 self._params = params
2819
2820 def https_open(self, req):
4f264c02 2821 kwargs = {}
71aff188
YCH
2822 conn_class = self._https_conn_class
2823
4f264c02
JMF
2824 if hasattr(self, '_context'): # python > 2.6
2825 kwargs['context'] = self._context
2826 if hasattr(self, '_check_hostname'): # python 3.x
2827 kwargs['check_hostname'] = self._check_hostname
71aff188
YCH
2828
2829 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2830 if socks_proxy:
2831 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2832 del req.headers['Ytdl-socks-proxy']
2833
be4a824d 2834 return self.do_open(functools.partial(
71aff188 2835 _create_http_connection, self, conn_class, True),
4f264c02 2836 req, **kwargs)
be4a824d
PH
2837
2838
1bab3437 2839class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
f1a8511f
S
2840 """
2841 See [1] for cookie file format.
2842
2843 1. https://curl.haxx.se/docs/http-cookies.html
2844 """
e7e62441 2845 _HTTPONLY_PREFIX = '#HttpOnly_'
c380cc28
S
2846 _ENTRY_LEN = 7
2847 _HEADER = '''# Netscape HTTP Cookie File
7a5c1cfe 2848# This file is generated by yt-dlp. Do not edit.
c380cc28
S
2849
2850'''
2851 _CookieFileEntry = collections.namedtuple(
2852 'CookieFileEntry',
2853 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
e7e62441 2854
1bab3437 2855 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
c380cc28
S
2856 """
2857 Save cookies to a file.
2858
2859 Most of the code is taken from CPython 3.8 and slightly adapted
2860 to support cookie files with UTF-8 in both python 2 and 3.
2861 """
2862 if filename is None:
2863 if self.filename is not None:
2864 filename = self.filename
2865 else:
2866 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2867
1bab3437
S
2868 # Store session cookies with `expires` set to 0 instead of an empty
2869 # string
2870 for cookie in self:
2871 if cookie.expires is None:
2872 cookie.expires = 0
c380cc28
S
2873
2874 with io.open(filename, 'w', encoding='utf-8') as f:
2875 f.write(self._HEADER)
2876 now = time.time()
2877 for cookie in self:
2878 if not ignore_discard and cookie.discard:
2879 continue
2880 if not ignore_expires and cookie.is_expired(now):
2881 continue
2882 if cookie.secure:
2883 secure = 'TRUE'
2884 else:
2885 secure = 'FALSE'
2886 if cookie.domain.startswith('.'):
2887 initial_dot = 'TRUE'
2888 else:
2889 initial_dot = 'FALSE'
2890 if cookie.expires is not None:
2891 expires = compat_str(cookie.expires)
2892 else:
2893 expires = ''
2894 if cookie.value is None:
2895 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2896 # with no name, whereas http.cookiejar regards it as a
2897 # cookie with no value.
2898 name = ''
2899 value = cookie.name
2900 else:
2901 name = cookie.name
2902 value = cookie.value
2903 f.write(
2904 '\t'.join([cookie.domain, initial_dot, cookie.path,
2905 secure, expires, name, value]) + '\n')
1bab3437
S
2906
2907 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
e7e62441 2908 """Load cookies from a file."""
2909 if filename is None:
2910 if self.filename is not None:
2911 filename = self.filename
2912 else:
2913 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2914
c380cc28
S
2915 def prepare_line(line):
2916 if line.startswith(self._HTTPONLY_PREFIX):
2917 line = line[len(self._HTTPONLY_PREFIX):]
2918 # comments and empty lines are fine
2919 if line.startswith('#') or not line.strip():
2920 return line
2921 cookie_list = line.split('\t')
2922 if len(cookie_list) != self._ENTRY_LEN:
2923 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2924 cookie = self._CookieFileEntry(*cookie_list)
2925 if cookie.expires_at and not cookie.expires_at.isdigit():
2926 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2927 return line
2928
e7e62441 2929 cf = io.StringIO()
c380cc28 2930 with io.open(filename, encoding='utf-8') as f:
e7e62441 2931 for line in f:
c380cc28
S
2932 try:
2933 cf.write(prepare_line(line))
2934 except compat_cookiejar.LoadError as e:
2935 write_string(
2936 'WARNING: skipping cookie file entry due to %s: %r\n'
2937 % (e, line), sys.stderr)
2938 continue
e7e62441 2939 cf.seek(0)
2940 self._really_load(cf, filename, ignore_discard, ignore_expires)
1bab3437
S
2941 # Session cookies are denoted by either `expires` field set to
2942 # an empty string or 0. MozillaCookieJar only recognizes the former
2943 # (see [1]). So we need force the latter to be recognized as session
2944 # cookies on our own.
2945 # Session cookies may be important for cookies-based authentication,
2946 # e.g. usually, when user does not check 'Remember me' check box while
2947 # logging in on a site, some important cookies are stored as session
2948 # cookies so that not recognizing them will result in failed login.
2949 # 1. https://bugs.python.org/issue17164
2950 for cookie in self:
2951 # Treat `expires=0` cookies as session cookies
2952 if cookie.expires == 0:
2953 cookie.expires = None
2954 cookie.discard = True
2955
2956
a6420bf5
S
2957class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2958 def __init__(self, cookiejar=None):
2959 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2960
2961 def http_response(self, request, response):
2962 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2963 # characters in Set-Cookie HTTP header of last response (see
067aa17e 2964 # https://github.com/ytdl-org/youtube-dl/issues/6769).
a6420bf5
S
2965 # In order to at least prevent crashing we will percent encode Set-Cookie
2966 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
2967 # if sys.version_info < (3, 0) and response.headers:
2968 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2969 # set_cookie = response.headers.get(set_cookie_header)
2970 # if set_cookie:
2971 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2972 # if set_cookie != set_cookie_escaped:
2973 # del response.headers[set_cookie_header]
2974 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
2975 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2976
f5fa042c 2977 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
a6420bf5
S
2978 https_response = http_response
2979
2980
fca6dba8 2981class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
201c1459 2982 """YoutubeDL redirect handler
2983
2984 The code is based on HTTPRedirectHandler implementation from CPython [1].
2985
2986 This redirect handler solves two issues:
2987 - ensures redirect URL is always unicode under python 2
2988 - introduces support for experimental HTTP response status code
2989 308 Permanent Redirect [2] used by some sites [3]
2990
2991 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
2992 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
2993 3. https://github.com/ytdl-org/youtube-dl/issues/28768
2994 """
2995
2996 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
2997
2998 def redirect_request(self, req, fp, code, msg, headers, newurl):
2999 """Return a Request or None in response to a redirect.
3000
3001 This is called by the http_error_30x methods when a
3002 redirection response is received. If a redirection should
3003 take place, return a new Request to allow http_error_30x to
3004 perform the redirect. Otherwise, raise HTTPError if no-one
3005 else should try to handle this url. Return None if you can't
3006 but another Handler might.
3007 """
3008 m = req.get_method()
3009 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
3010 or code in (301, 302, 303) and m == "POST")):
3011 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
3012 # Strictly (according to RFC 2616), 301 or 302 in response to
3013 # a POST MUST NOT cause a redirection without confirmation
3014 # from the user (of urllib.request, in this case). In practice,
3015 # essentially all clients do redirect in this case, so we do
3016 # the same.
3017
3018 # On python 2 urlh.geturl() may sometimes return redirect URL
3019 # as byte string instead of unicode. This workaround allows
3020 # to force it always return unicode.
3021 if sys.version_info[0] < 3:
3022 newurl = compat_str(newurl)
3023
3024 # Be conciliant with URIs containing a space. This is mainly
3025 # redundant with the more complete encoding done in http_error_302(),
3026 # but it is kept for compatibility with other callers.
3027 newurl = newurl.replace(' ', '%20')
3028
3029 CONTENT_HEADERS = ("content-length", "content-type")
3030 # NB: don't use dict comprehension for python 2.6 compatibility
3031 newheaders = dict((k, v) for k, v in req.headers.items()
3032 if k.lower() not in CONTENT_HEADERS)
3033 return compat_urllib_request.Request(
3034 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3035 unverifiable=True)
fca6dba8
S
3036
3037
46f59e89
S
3038def extract_timezone(date_str):
3039 m = re.search(
f137e4c2 3040 r'''(?x)
3041 ^.{8,}? # >=8 char non-TZ prefix, if present
3042 (?P<tz>Z| # just the UTC Z, or
3043 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
3044 (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
3045 [ ]? # optional space
3046 (?P<sign>\+|-) # +/-
3047 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
3048 $)
3049 ''', date_str)
46f59e89
S
3050 if not m:
3051 timezone = datetime.timedelta()
3052 else:
3053 date_str = date_str[:-len(m.group('tz'))]
3054 if not m.group('sign'):
3055 timezone = datetime.timedelta()
3056 else:
3057 sign = 1 if m.group('sign') == '+' else -1
3058 timezone = datetime.timedelta(
3059 hours=sign * int(m.group('hours')),
3060 minutes=sign * int(m.group('minutes')))
3061 return timezone, date_str
3062
3063
08b38d54 3064def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
3065 """ Return a UNIX timestamp from the given date """
3066
3067 if date_str is None:
3068 return None
3069
52c3a6e4
S
3070 date_str = re.sub(r'\.[0-9]+', '', date_str)
3071
08b38d54 3072 if timezone is None:
46f59e89
S
3073 timezone, date_str = extract_timezone(date_str)
3074
52c3a6e4
S
3075 try:
3076 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3077 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3078 return calendar.timegm(dt.timetuple())
3079 except ValueError:
3080 pass
912b38b4
PH
3081
3082
46f59e89
S
3083def date_formats(day_first=True):
3084 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3085
3086
42bdd9d0 3087def unified_strdate(date_str, day_first=True):
bf50b038 3088 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
3089
3090 if date_str is None:
3091 return None
bf50b038 3092 upload_date = None
5f6a1245 3093 # Replace commas
026fcc04 3094 date_str = date_str.replace(',', ' ')
42bdd9d0 3095 # Remove AM/PM + timezone
9bb8e0a3 3096 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89 3097 _, date_str = extract_timezone(date_str)
42bdd9d0 3098
46f59e89 3099 for expression in date_formats(day_first):
bf50b038
JMF
3100 try:
3101 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 3102 except ValueError:
bf50b038 3103 pass
42393ce2
PH
3104 if upload_date is None:
3105 timetuple = email.utils.parsedate_tz(date_str)
3106 if timetuple:
c6b9cf05
S
3107 try:
3108 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3109 except ValueError:
3110 pass
6a750402
JMF
3111 if upload_date is not None:
3112 return compat_str(upload_date)
bf50b038 3113
5f6a1245 3114
46f59e89
S
3115def unified_timestamp(date_str, day_first=True):
3116 if date_str is None:
3117 return None
3118
2ae2ffda 3119 date_str = re.sub(r'[,|]', '', date_str)
46f59e89 3120
7dc2a74e 3121 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89
S
3122 timezone, date_str = extract_timezone(date_str)
3123
3124 # Remove AM/PM + timezone
3125 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3126
deef3195
S
3127 # Remove unrecognized timezones from ISO 8601 alike timestamps
3128 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3129 if m:
3130 date_str = date_str[:-len(m.group('tz'))]
3131
f226880c
PH
3132 # Python only supports microseconds, so remove nanoseconds
3133 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3134 if m:
3135 date_str = m.group(1)
3136
46f59e89
S
3137 for expression in date_formats(day_first):
3138 try:
7dc2a74e 3139 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
46f59e89
S
3140 return calendar.timegm(dt.timetuple())
3141 except ValueError:
3142 pass
3143 timetuple = email.utils.parsedate_tz(date_str)
3144 if timetuple:
7dc2a74e 3145 return calendar.timegm(timetuple) + pm_delta * 3600
46f59e89
S
3146
3147
28e614de 3148def determine_ext(url, default_ext='unknown_video'):
85750f89 3149 if url is None or '.' not in url:
f4776371 3150 return default_ext
9cb9a5df 3151 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
3152 if re.match(r'^[A-Za-z0-9]+$', guess):
3153 return guess
a7aaa398
S
3154 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3155 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 3156 return guess.rstrip('/')
73e79f2a 3157 else:
cbdbb766 3158 return default_ext
73e79f2a 3159
5f6a1245 3160
824fa511
S
3161def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3162 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
d4051a8e 3163
5f6a1245 3164
9e62f283 3165def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
37254abc
JMF
3166 """
3167 Return a datetime object from a string in the format YYYYMMDD or
9e62f283 3168 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3169
3170 format: string date format used to return datetime object from
3171 precision: round the time portion of a datetime object.
3172 auto|microsecond|second|minute|hour|day.
3173 auto: round to the unit provided in date_str (if applicable).
3174 """
3175 auto_precision = False
3176 if precision == 'auto':
3177 auto_precision = True
3178 precision = 'microsecond'
3179 today = datetime_round(datetime.datetime.now(), precision)
f8795e10 3180 if date_str in ('now', 'today'):
37254abc 3181 return today
f8795e10
PH
3182 if date_str == 'yesterday':
3183 return today - datetime.timedelta(days=1)
9e62f283 3184 match = re.match(
3185 r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3186 date_str)
37254abc 3187 if match is not None:
9e62f283 3188 start_time = datetime_from_str(match.group('start'), precision, format)
3189 time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
37254abc 3190 unit = match.group('unit')
9e62f283 3191 if unit == 'month' or unit == 'year':
3192 new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
37254abc 3193 unit = 'day'
9e62f283 3194 else:
3195 if unit == 'week':
3196 unit = 'day'
3197 time *= 7
3198 delta = datetime.timedelta(**{unit + 's': time})
3199 new_date = start_time + delta
3200 if auto_precision:
3201 return datetime_round(new_date, unit)
3202 return new_date
3203
3204 return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3205
3206
3207def date_from_str(date_str, format='%Y%m%d'):
3208 """
3209 Return a datetime object from a string in the format YYYYMMDD or
3210 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3211
3212 format: string date format used to return datetime object from
3213 """
3214 return datetime_from_str(date_str, precision='microsecond', format=format).date()
3215
3216
3217def datetime_add_months(dt, months):
3218 """Increment/Decrement a datetime object by months."""
3219 month = dt.month + months - 1
3220 year = dt.year + month // 12
3221 month = month % 12 + 1
3222 day = min(dt.day, calendar.monthrange(year, month)[1])
3223 return dt.replace(year, month, day)
3224
3225
3226def datetime_round(dt, precision='day'):
3227 """
3228 Round a datetime object's time to a specific precision
3229 """
3230 if precision == 'microsecond':
3231 return dt
3232
3233 unit_seconds = {
3234 'day': 86400,
3235 'hour': 3600,
3236 'minute': 60,
3237 'second': 1,
3238 }
3239 roundto = lambda x, n: ((x + n / 2) // n) * n
3240 timestamp = calendar.timegm(dt.timetuple())
3241 return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
5f6a1245
JW
3242
3243
e63fc1be 3244def hyphenate_date(date_str):
3245 """
3246 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3247 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3248 if match is not None:
3249 return '-'.join(match.groups())
3250 else:
3251 return date_str
3252
5f6a1245 3253
bd558525
JMF
3254class DateRange(object):
3255 """Represents a time interval between two dates"""
5f6a1245 3256
bd558525
JMF
3257 def __init__(self, start=None, end=None):
3258 """start and end must be strings in the format accepted by date"""
3259 if start is not None:
3260 self.start = date_from_str(start)
3261 else:
3262 self.start = datetime.datetime.min.date()
3263 if end is not None:
3264 self.end = date_from_str(end)
3265 else:
3266 self.end = datetime.datetime.max.date()
37254abc 3267 if self.start > self.end:
bd558525 3268 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 3269
bd558525
JMF
3270 @classmethod
3271 def day(cls, day):
3272 """Returns a range that only contains the given day"""
5f6a1245
JW
3273 return cls(day, day)
3274
bd558525
JMF
3275 def __contains__(self, date):
3276 """Check if the date is in the range"""
37254abc
JMF
3277 if not isinstance(date, datetime.date):
3278 date = date_from_str(date)
3279 return self.start <= date <= self.end
5f6a1245 3280
bd558525 3281 def __str__(self):
5f6a1245 3282 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
3283
3284
3285def platform_name():
3286 """ Returns the platform name as a compat_str """
3287 res = platform.platform()
3288 if isinstance(res, bytes):
3289 res = res.decode(preferredencoding())
3290
3291 assert isinstance(res, compat_str)
3292 return res
c257baff
PH
3293
3294
49fa4d9a
N
3295def get_windows_version():
3296 ''' Get Windows version. None if it's not running on Windows '''
3297 if compat_os_name == 'nt':
3298 return version_tuple(platform.win32_ver()[1])
3299 else:
3300 return None
3301
3302
b58ddb32
PH
3303def _windows_write_string(s, out):
3304 """ Returns True if the string was written using special methods,
3305 False if it has yet to be written out."""
3306 # Adapted from http://stackoverflow.com/a/3259271/35070
3307
3308 import ctypes
3309 import ctypes.wintypes
3310
3311 WIN_OUTPUT_IDS = {
3312 1: -11,
3313 2: -12,
3314 }
3315
a383a98a
PH
3316 try:
3317 fileno = out.fileno()
3318 except AttributeError:
3319 # If the output stream doesn't have a fileno, it's virtual
3320 return False
aa42e873
PH
3321 except io.UnsupportedOperation:
3322 # Some strange Windows pseudo files?
3323 return False
b58ddb32
PH
3324 if fileno not in WIN_OUTPUT_IDS:
3325 return False
3326
d7cd9a9e 3327 GetStdHandle = compat_ctypes_WINFUNCTYPE(
b58ddb32 3328 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
d7cd9a9e 3329 ('GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
3330 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3331
d7cd9a9e 3332 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3333 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3334 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
d7cd9a9e 3335 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
3336 written = ctypes.wintypes.DWORD(0)
3337
d7cd9a9e 3338 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
3339 FILE_TYPE_CHAR = 0x0002
3340 FILE_TYPE_REMOTE = 0x8000
d7cd9a9e 3341 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3342 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3343 ctypes.POINTER(ctypes.wintypes.DWORD))(
d7cd9a9e 3344 ('GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
3345 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3346
3347 def not_a_console(handle):
3348 if handle == INVALID_HANDLE_VALUE or handle is None:
3349 return True
3089bc74
S
3350 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3351 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
3352
3353 if not_a_console(h):
3354 return False
3355
d1b9c912
PH
3356 def next_nonbmp_pos(s):
3357 try:
3358 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3359 except StopIteration:
3360 return len(s)
3361
3362 while s:
3363 count = min(next_nonbmp_pos(s), 1024)
3364
b58ddb32 3365 ret = WriteConsoleW(
d1b9c912 3366 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
3367 if ret == 0:
3368 raise OSError('Failed to write string')
d1b9c912
PH
3369 if not count: # We just wrote a non-BMP character
3370 assert written.value == 2
3371 s = s[1:]
3372 else:
3373 assert written.value > 0
3374 s = s[written.value:]
b58ddb32
PH
3375 return True
3376
3377
734f90bb 3378def write_string(s, out=None, encoding=None):
7459e3a2
PH
3379 if out is None:
3380 out = sys.stderr
8bf48f23 3381 assert type(s) == compat_str
7459e3a2 3382
b58ddb32
PH
3383 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3384 if _windows_write_string(s, out):
3385 return
3386
3089bc74
S
3387 if ('b' in getattr(out, 'mode', '')
3388 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
3389 byt = s.encode(encoding or preferredencoding(), 'ignore')
3390 out.write(byt)
3391 elif hasattr(out, 'buffer'):
3392 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3393 byt = s.encode(enc, 'ignore')
3394 out.buffer.write(byt)
3395 else:
8bf48f23 3396 out.write(s)
7459e3a2
PH
3397 out.flush()
3398
3399
48ea9cea
PH
3400def bytes_to_intlist(bs):
3401 if not bs:
3402 return []
3403 if isinstance(bs[0], int): # Python 3
3404 return list(bs)
3405 else:
3406 return [ord(c) for c in bs]
3407
c257baff 3408
cba892fa 3409def intlist_to_bytes(xs):
3410 if not xs:
3411 return b''
edaa23f8 3412 return compat_struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
3413
3414
c1c9a79c
PH
3415# Cross-platform file locking
3416if sys.platform == 'win32':
3417 import ctypes.wintypes
3418 import msvcrt
3419
3420 class OVERLAPPED(ctypes.Structure):
3421 _fields_ = [
3422 ('Internal', ctypes.wintypes.LPVOID),
3423 ('InternalHigh', ctypes.wintypes.LPVOID),
3424 ('Offset', ctypes.wintypes.DWORD),
3425 ('OffsetHigh', ctypes.wintypes.DWORD),
3426 ('hEvent', ctypes.wintypes.HANDLE),
3427 ]
3428
3429 kernel32 = ctypes.windll.kernel32
3430 LockFileEx = kernel32.LockFileEx
3431 LockFileEx.argtypes = [
3432 ctypes.wintypes.HANDLE, # hFile
3433 ctypes.wintypes.DWORD, # dwFlags
3434 ctypes.wintypes.DWORD, # dwReserved
3435 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3436 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3437 ctypes.POINTER(OVERLAPPED) # Overlapped
3438 ]
3439 LockFileEx.restype = ctypes.wintypes.BOOL
3440 UnlockFileEx = kernel32.UnlockFileEx
3441 UnlockFileEx.argtypes = [
3442 ctypes.wintypes.HANDLE, # hFile
3443 ctypes.wintypes.DWORD, # dwReserved
3444 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3445 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3446 ctypes.POINTER(OVERLAPPED) # Overlapped
3447 ]
3448 UnlockFileEx.restype = ctypes.wintypes.BOOL
3449 whole_low = 0xffffffff
3450 whole_high = 0x7fffffff
3451
3452 def _lock_file(f, exclusive):
3453 overlapped = OVERLAPPED()
3454 overlapped.Offset = 0
3455 overlapped.OffsetHigh = 0
3456 overlapped.hEvent = 0
3457 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3458 handle = msvcrt.get_osfhandle(f.fileno())
3459 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3460 whole_low, whole_high, f._lock_file_overlapped_p):
3461 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3462
3463 def _unlock_file(f):
3464 assert f._lock_file_overlapped_p
3465 handle = msvcrt.get_osfhandle(f.fileno())
3466 if not UnlockFileEx(handle, 0,
3467 whole_low, whole_high, f._lock_file_overlapped_p):
3468 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3469
3470else:
399a76e6
YCH
3471 # Some platforms, such as Jython, is missing fcntl
3472 try:
3473 import fcntl
c1c9a79c 3474
399a76e6
YCH
3475 def _lock_file(f, exclusive):
3476 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 3477
399a76e6
YCH
3478 def _unlock_file(f):
3479 fcntl.flock(f, fcntl.LOCK_UN)
3480 except ImportError:
3481 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3482
3483 def _lock_file(f, exclusive):
3484 raise IOError(UNSUPPORTED_MSG)
3485
3486 def _unlock_file(f):
3487 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
3488
3489
3490class locked_file(object):
3491 def __init__(self, filename, mode, encoding=None):
3492 assert mode in ['r', 'a', 'w']
3493 self.f = io.open(filename, mode, encoding=encoding)
3494 self.mode = mode
3495
3496 def __enter__(self):
3497 exclusive = self.mode != 'r'
3498 try:
3499 _lock_file(self.f, exclusive)
3500 except IOError:
3501 self.f.close()
3502 raise
3503 return self
3504
3505 def __exit__(self, etype, value, traceback):
3506 try:
3507 _unlock_file(self.f)
3508 finally:
3509 self.f.close()
3510
3511 def __iter__(self):
3512 return iter(self.f)
3513
3514 def write(self, *args):
3515 return self.f.write(*args)
3516
3517 def read(self, *args):
3518 return self.f.read(*args)
4eb7f1d1
JMF
3519
3520
4644ac55
S
3521def get_filesystem_encoding():
3522 encoding = sys.getfilesystemencoding()
3523 return encoding if encoding is not None else 'utf-8'
3524
3525
4eb7f1d1 3526def shell_quote(args):
a6a173c2 3527 quoted_args = []
4644ac55 3528 encoding = get_filesystem_encoding()
a6a173c2
JMF
3529 for a in args:
3530 if isinstance(a, bytes):
3531 # We may get a filename encoded with 'encodeFilename'
3532 a = a.decode(encoding)
aefce8e6 3533 quoted_args.append(compat_shlex_quote(a))
28e614de 3534 return ' '.join(quoted_args)
9d4660ca
PH
3535
3536
3537def smuggle_url(url, data):
3538 """ Pass additional data in a URL for internal use. """
3539
81953d1a
RA
3540 url, idata = unsmuggle_url(url, {})
3541 data.update(idata)
15707c7e 3542 sdata = compat_urllib_parse_urlencode(
28e614de
PH
3543 {'__youtubedl_smuggle': json.dumps(data)})
3544 return url + '#' + sdata
9d4660ca
PH
3545
3546
79f82953 3547def unsmuggle_url(smug_url, default=None):
83e865a3 3548 if '#__youtubedl_smuggle' not in smug_url:
79f82953 3549 return smug_url, default
28e614de
PH
3550 url, _, sdata = smug_url.rpartition('#')
3551 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
3552 data = json.loads(jsond)
3553 return url, data
02dbf93f
PH
3554
3555
02dbf93f
PH
3556def format_bytes(bytes):
3557 if bytes is None:
28e614de 3558 return 'N/A'
02dbf93f
PH
3559 if type(bytes) is str:
3560 bytes = float(bytes)
3561 if bytes == 0.0:
3562 exponent = 0
3563 else:
3564 exponent = int(math.log(bytes, 1024.0))
28e614de 3565 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 3566 converted = float(bytes) / float(1024 ** exponent)
28e614de 3567 return '%.2f%s' % (converted, suffix)
f53c966a 3568
1c088fa8 3569
fb47597b
S
3570def lookup_unit_table(unit_table, s):
3571 units_re = '|'.join(re.escape(u) for u in unit_table)
3572 m = re.match(
782b1b5b 3573 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
fb47597b
S
3574 if not m:
3575 return None
3576 num_str = m.group('num').replace(',', '.')
3577 mult = unit_table[m.group('unit')]
3578 return int(float(num_str) * mult)
3579
3580
be64b5b0
PH
3581def parse_filesize(s):
3582 if s is None:
3583 return None
3584
dfb1b146 3585 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
3586 # but we support those too
3587 _UNIT_TABLE = {
3588 'B': 1,
3589 'b': 1,
70852b47 3590 'bytes': 1,
be64b5b0
PH
3591 'KiB': 1024,
3592 'KB': 1000,
3593 'kB': 1024,
3594 'Kb': 1000,
13585d76 3595 'kb': 1000,
70852b47
YCH
3596 'kilobytes': 1000,
3597 'kibibytes': 1024,
be64b5b0
PH
3598 'MiB': 1024 ** 2,
3599 'MB': 1000 ** 2,
3600 'mB': 1024 ** 2,
3601 'Mb': 1000 ** 2,
13585d76 3602 'mb': 1000 ** 2,
70852b47
YCH
3603 'megabytes': 1000 ** 2,
3604 'mebibytes': 1024 ** 2,
be64b5b0
PH
3605 'GiB': 1024 ** 3,
3606 'GB': 1000 ** 3,
3607 'gB': 1024 ** 3,
3608 'Gb': 1000 ** 3,
13585d76 3609 'gb': 1000 ** 3,
70852b47
YCH
3610 'gigabytes': 1000 ** 3,
3611 'gibibytes': 1024 ** 3,
be64b5b0
PH
3612 'TiB': 1024 ** 4,
3613 'TB': 1000 ** 4,
3614 'tB': 1024 ** 4,
3615 'Tb': 1000 ** 4,
13585d76 3616 'tb': 1000 ** 4,
70852b47
YCH
3617 'terabytes': 1000 ** 4,
3618 'tebibytes': 1024 ** 4,
be64b5b0
PH
3619 'PiB': 1024 ** 5,
3620 'PB': 1000 ** 5,
3621 'pB': 1024 ** 5,
3622 'Pb': 1000 ** 5,
13585d76 3623 'pb': 1000 ** 5,
70852b47
YCH
3624 'petabytes': 1000 ** 5,
3625 'pebibytes': 1024 ** 5,
be64b5b0
PH
3626 'EiB': 1024 ** 6,
3627 'EB': 1000 ** 6,
3628 'eB': 1024 ** 6,
3629 'Eb': 1000 ** 6,
13585d76 3630 'eb': 1000 ** 6,
70852b47
YCH
3631 'exabytes': 1000 ** 6,
3632 'exbibytes': 1024 ** 6,
be64b5b0
PH
3633 'ZiB': 1024 ** 7,
3634 'ZB': 1000 ** 7,
3635 'zB': 1024 ** 7,
3636 'Zb': 1000 ** 7,
13585d76 3637 'zb': 1000 ** 7,
70852b47
YCH
3638 'zettabytes': 1000 ** 7,
3639 'zebibytes': 1024 ** 7,
be64b5b0
PH
3640 'YiB': 1024 ** 8,
3641 'YB': 1000 ** 8,
3642 'yB': 1024 ** 8,
3643 'Yb': 1000 ** 8,
13585d76 3644 'yb': 1000 ** 8,
70852b47
YCH
3645 'yottabytes': 1000 ** 8,
3646 'yobibytes': 1024 ** 8,
be64b5b0
PH
3647 }
3648
fb47597b
S
3649 return lookup_unit_table(_UNIT_TABLE, s)
3650
3651
3652def parse_count(s):
3653 if s is None:
be64b5b0
PH
3654 return None
3655
fb47597b
S
3656 s = s.strip()
3657
3658 if re.match(r'^[\d,.]+$', s):
3659 return str_to_int(s)
3660
3661 _UNIT_TABLE = {
3662 'k': 1000,
3663 'K': 1000,
3664 'm': 1000 ** 2,
3665 'M': 1000 ** 2,
3666 'kk': 1000 ** 2,
3667 'KK': 1000 ** 2,
3668 }
be64b5b0 3669
fb47597b 3670 return lookup_unit_table(_UNIT_TABLE, s)
be64b5b0 3671
2f7ae819 3672
b871d7e9
S
3673def parse_resolution(s):
3674 if s is None:
3675 return {}
3676
3677 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3678 if mobj:
3679 return {
3680 'width': int(mobj.group('w')),
3681 'height': int(mobj.group('h')),
3682 }
3683
3684 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3685 if mobj:
3686 return {'height': int(mobj.group(1))}
3687
3688 mobj = re.search(r'\b([48])[kK]\b', s)
3689 if mobj:
3690 return {'height': int(mobj.group(1)) * 540}
3691
3692 return {}
3693
3694
0dc41787
S
3695def parse_bitrate(s):
3696 if not isinstance(s, compat_str):
3697 return
3698 mobj = re.search(r'\b(\d+)\s*kbps', s)
3699 if mobj:
3700 return int(mobj.group(1))
3701
3702
a942d6cb 3703def month_by_name(name, lang='en'):
caefb1de
PH
3704 """ Return the number of a month by (locale-independently) English name """
3705
f6717dec 3706 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb 3707
caefb1de 3708 try:
f6717dec 3709 return month_names.index(name) + 1
7105440c
YCH
3710 except ValueError:
3711 return None
3712
3713
3714def month_by_abbreviation(abbrev):
3715 """ Return the number of a month by (locale-independently) English
3716 abbreviations """
3717
3718 try:
3719 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
3720 except ValueError:
3721 return None
18258362
JMF
3722
3723
5aafe895 3724def fix_xml_ampersands(xml_str):
18258362 3725 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
3726 return re.sub(
3727 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 3728 '&amp;',
5aafe895 3729 xml_str)
e3946f98
PH
3730
3731
3732def setproctitle(title):
8bf48f23 3733 assert isinstance(title, compat_str)
c1c05c67
YCH
3734
3735 # ctypes in Jython is not complete
3736 # http://bugs.jython.org/issue2148
3737 if sys.platform.startswith('java'):
3738 return
3739
e3946f98 3740 try:
611c1dd9 3741 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
3742 except OSError:
3743 return
2f49bcd6
RC
3744 except TypeError:
3745 # LoadLibrary in Windows Python 2.7.13 only expects
3746 # a bytestring, but since unicode_literals turns
3747 # every string into a unicode string, it fails.
3748 return
6eefe533
PH
3749 title_bytes = title.encode('utf-8')
3750 buf = ctypes.create_string_buffer(len(title_bytes))
3751 buf.value = title_bytes
e3946f98 3752 try:
6eefe533 3753 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
3754 except AttributeError:
3755 return # Strange libc, just skip this
d7dda168
PH
3756
3757
3758def remove_start(s, start):
46bc9b7d 3759 return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174
PH
3760
3761
2b9faf55 3762def remove_end(s, end):
46bc9b7d 3763 return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55
PH
3764
3765
31b2051e
S
3766def remove_quotes(s):
3767 if s is None or len(s) < 2:
3768 return s
3769 for quote in ('"', "'", ):
3770 if s[0] == quote and s[-1] == quote:
3771 return s[1:-1]
3772 return s
3773
3774
b6e0c7d2
U
3775def get_domain(url):
3776 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3777 return domain.group('domain') if domain else None
3778
3779
29eb5174 3780def url_basename(url):
9b8aaeed 3781 path = compat_urlparse.urlparse(url).path
28e614de 3782 return path.strip('/').split('/')[-1]
aa94a6d3
PH
3783
3784
02dc0a36
S
3785def base_url(url):
3786 return re.match(r'https?://[^?#&]+/', url).group()
3787
3788
e34c3361 3789def urljoin(base, path):
4b5de77b
S
3790 if isinstance(path, bytes):
3791 path = path.decode('utf-8')
e34c3361
S
3792 if not isinstance(path, compat_str) or not path:
3793 return None
fad4ceb5 3794 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
e34c3361 3795 return path
4b5de77b
S
3796 if isinstance(base, bytes):
3797 base = base.decode('utf-8')
3798 if not isinstance(base, compat_str) or not re.match(
3799 r'^(?:https?:)?//', base):
e34c3361
S
3800 return None
3801 return compat_urlparse.urljoin(base, path)
3802
3803
aa94a6d3
PH
3804class HEADRequest(compat_urllib_request.Request):
3805 def get_method(self):
611c1dd9 3806 return 'HEAD'
7217e148
PH
3807
3808
95cf60e8
S
3809class PUTRequest(compat_urllib_request.Request):
3810 def get_method(self):
3811 return 'PUT'
3812
3813
9732d77e 3814def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
3815 if get_attr:
3816 if v is not None:
3817 v = getattr(v, get_attr, None)
9572013d
PH
3818 if v == '':
3819 v = None
1812afb7
S
3820 if v is None:
3821 return default
3822 try:
3823 return int(v) * invscale // scale
5e1271c5 3824 except (ValueError, TypeError):
af98f8ff 3825 return default
9732d77e 3826
9572013d 3827
40a90862
JMF
3828def str_or_none(v, default=None):
3829 return default if v is None else compat_str(v)
3830
9732d77e
PH
3831
3832def str_to_int(int_str):
48d4681e 3833 """ A more relaxed version of int_or_none """
42db58ec 3834 if isinstance(int_str, compat_integer_types):
348c6bf1 3835 return int_str
42db58ec
S
3836 elif isinstance(int_str, compat_str):
3837 int_str = re.sub(r'[,\.\+]', '', int_str)
3838 return int_or_none(int_str)
608d11f5
PH
3839
3840
9732d77e 3841def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
3842 if v is None:
3843 return default
3844 try:
3845 return float(v) * invscale / scale
5e1271c5 3846 except (ValueError, TypeError):
caf80631 3847 return default
43f775e4
PH
3848
3849
c7e327c4
S
3850def bool_or_none(v, default=None):
3851 return v if isinstance(v, bool) else default
3852
3853
53cd37ba
S
3854def strip_or_none(v, default=None):
3855 return v.strip() if isinstance(v, compat_str) else default
b72b4431
S
3856
3857
af03000a
S
3858def url_or_none(url):
3859 if not url or not isinstance(url, compat_str):
3860 return None
3861 url = url.strip()
29f7c58a 3862 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
af03000a
S
3863
3864
e29663c6 3865def strftime_or_none(timestamp, date_format, default=None):
3866 datetime_object = None
3867 try:
3868 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3869 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3870 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3871 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3872 return datetime_object.strftime(date_format)
3873 except (ValueError, TypeError, AttributeError):
3874 return default
3875
3876
608d11f5 3877def parse_duration(s):
8f9312c3 3878 if not isinstance(s, compat_basestring):
608d11f5
PH
3879 return None
3880
ca7b3246
S
3881 s = s.strip()
3882
acaff495 3883 days, hours, mins, secs, ms = [None] * 5
15846398 3884 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
acaff495 3885 if m:
3886 days, hours, mins, secs, ms = m.groups()
3887 else:
3888 m = re.match(
056653bb
S
3889 r'''(?ix)(?:P?
3890 (?:
3891 [0-9]+\s*y(?:ears?)?\s*
3892 )?
3893 (?:
3894 [0-9]+\s*m(?:onths?)?\s*
3895 )?
3896 (?:
3897 [0-9]+\s*w(?:eeks?)?\s*
3898 )?
8f4b58d7 3899 (?:
acaff495 3900 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
8f4b58d7 3901 )?
056653bb 3902 T)?
acaff495 3903 (?:
3904 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3905 )?
3906 (?:
3907 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3908 )?
3909 (?:
3910 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
15846398 3911 )?Z?$''', s)
acaff495 3912 if m:
3913 days, hours, mins, secs, ms = m.groups()
3914 else:
15846398 3915 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
acaff495 3916 if m:
3917 hours, mins = m.groups()
3918 else:
3919 return None
3920
3921 duration = 0
3922 if secs:
3923 duration += float(secs)
3924 if mins:
3925 duration += float(mins) * 60
3926 if hours:
3927 duration += float(hours) * 60 * 60
3928 if days:
3929 duration += float(days) * 24 * 60 * 60
3930 if ms:
3931 duration += float(ms)
3932 return duration
91d7d0b3
JMF
3933
3934
e65e4c88 3935def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 3936 name, real_ext = os.path.splitext(filename)
e65e4c88
S
3937 return (
3938 '{0}.{1}{2}'.format(name, ext, real_ext)
3939 if not expected_real_ext or real_ext[1:] == expected_real_ext
3940 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
3941
3942
b3ed15b7
S
3943def replace_extension(filename, ext, expected_real_ext=None):
3944 name, real_ext = os.path.splitext(filename)
3945 return '{0}.{1}'.format(
3946 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3947 ext)
3948
3949
d70ad093
PH
3950def check_executable(exe, args=[]):
3951 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3952 args can be a list of arguments for a short output (like -version) """
3953 try:
f5b1bca9 3954 process_communicate_or_kill(subprocess.Popen(
3955 [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
d70ad093
PH
3956 except OSError:
3957 return False
3958 return exe
b7ab0590
PH
3959
3960
95807118 3961def get_exe_version(exe, args=['--version'],
cae97f65 3962 version_re=None, unrecognized='present'):
95807118
PH
3963 """ Returns the version of the specified executable,
3964 or False if the executable is not present """
3965 try:
b64d04c1 3966 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
7a5c1cfe 3967 # SIGTTOU if yt-dlp is run in the background.
067aa17e 3968 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
f5b1bca9 3969 out, _ = process_communicate_or_kill(subprocess.Popen(
54116803 3970 [encodeArgument(exe)] + args,
00ca7552 3971 stdin=subprocess.PIPE,
f5b1bca9 3972 stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
95807118
PH
3973 except OSError:
3974 return False
cae97f65
PH
3975 if isinstance(out, bytes): # Python 2.x
3976 out = out.decode('ascii', 'ignore')
3977 return detect_exe_version(out, version_re, unrecognized)
3978
3979
3980def detect_exe_version(output, version_re=None, unrecognized='present'):
3981 assert isinstance(output, compat_str)
3982 if version_re is None:
3983 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3984 m = re.search(version_re, output)
95807118
PH
3985 if m:
3986 return m.group(1)
3987 else:
3988 return unrecognized
3989
3990
cb89cfc1 3991class LazyList(collections.abc.Sequence):
483336e7 3992 ''' Lazy immutable list from an iterable
3993 Note that slices of a LazyList are lists and not LazyList'''
3994
8e5fecc8 3995 class IndexError(IndexError):
3996 pass
3997
483336e7 3998 def __init__(self, iterable):
3999 self.__iterable = iter(iterable)
4000 self.__cache = []
28419ca2 4001 self.__reversed = False
483336e7 4002
4003 def __iter__(self):
28419ca2 4004 if self.__reversed:
4005 # We need to consume the entire iterable to iterate in reverse
981052c9 4006 yield from self.exhaust()
28419ca2 4007 return
4008 yield from self.__cache
483336e7 4009 for item in self.__iterable:
4010 self.__cache.append(item)
4011 yield item
4012
981052c9 4013 def __exhaust(self):
483336e7 4014 self.__cache.extend(self.__iterable)
28419ca2 4015 return self.__cache
4016
981052c9 4017 def exhaust(self):
4018 ''' Evaluate the entire iterable '''
4019 return self.__exhaust()[::-1 if self.__reversed else 1]
4020
28419ca2 4021 @staticmethod
981052c9 4022 def __reverse_index(x):
e0f2b4b4 4023 return None if x is None else -(x + 1)
483336e7 4024
4025 def __getitem__(self, idx):
4026 if isinstance(idx, slice):
28419ca2 4027 if self.__reversed:
e0f2b4b4 4028 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
4029 start, stop, step = idx.start, idx.stop, idx.step or 1
483336e7 4030 elif isinstance(idx, int):
28419ca2 4031 if self.__reversed:
981052c9 4032 idx = self.__reverse_index(idx)
e0f2b4b4 4033 start, stop, step = idx, idx, 0
483336e7 4034 else:
4035 raise TypeError('indices must be integers or slices')
e0f2b4b4 4036 if ((start or 0) < 0 or (stop or 0) < 0
4037 or (start is None and step < 0)
4038 or (stop is None and step > 0)):
483336e7 4039 # We need to consume the entire iterable to be able to slice from the end
4040 # Obviously, never use this with infinite iterables
8e5fecc8 4041 self.__exhaust()
4042 try:
4043 return self.__cache[idx]
4044 except IndexError as e:
4045 raise self.IndexError(e) from e
e0f2b4b4 4046 n = max(start or 0, stop or 0) - len(self.__cache) + 1
28419ca2 4047 if n > 0:
4048 self.__cache.extend(itertools.islice(self.__iterable, n))
8e5fecc8 4049 try:
4050 return self.__cache[idx]
4051 except IndexError as e:
4052 raise self.IndexError(e) from e
483336e7 4053
4054 def __bool__(self):
4055 try:
28419ca2 4056 self[-1] if self.__reversed else self[0]
8e5fecc8 4057 except self.IndexError:
483336e7 4058 return False
4059 return True
4060
4061 def __len__(self):
8e5fecc8 4062 self.__exhaust()
483336e7 4063 return len(self.__cache)
4064
981052c9 4065 def reverse(self):
28419ca2 4066 self.__reversed = not self.__reversed
4067 return self
4068
4069 def __repr__(self):
4070 # repr and str should mimic a list. So we exhaust the iterable
4071 return repr(self.exhaust())
4072
4073 def __str__(self):
4074 return repr(self.exhaust())
4075
483336e7 4076
7be9ccff 4077class PagedList:
dd26ced1
PH
4078 def __len__(self):
4079 # This is only useful for tests
4080 return len(self.getslice())
4081
7be9ccff 4082 def __init__(self, pagefunc, pagesize, use_cache=True):
4083 self._pagefunc = pagefunc
4084 self._pagesize = pagesize
4085 self._use_cache = use_cache
4086 self._cache = {}
4087
4088 def getpage(self, pagenum):
4089 page_results = self._cache.get(pagenum) or list(self._pagefunc(pagenum))
4090 if self._use_cache:
4091 self._cache[pagenum] = page_results
4092 return page_results
4093
4094 def getslice(self, start=0, end=None):
4095 return list(self._getslice(start, end))
4096
4097 def _getslice(self, start, end):
55575225 4098 raise NotImplementedError('This method must be implemented by subclasses')
4099
4100 def __getitem__(self, idx):
7be9ccff 4101 # NOTE: cache must be enabled if this is used
55575225 4102 if not isinstance(idx, int) or idx < 0:
4103 raise TypeError('indices must be non-negative integers')
4104 entries = self.getslice(idx, idx + 1)
4105 return entries[0] if entries else None
4106
9c44d242
PH
4107
4108class OnDemandPagedList(PagedList):
7be9ccff 4109 def _getslice(self, start, end):
b7ab0590
PH
4110 for pagenum in itertools.count(start // self._pagesize):
4111 firstid = pagenum * self._pagesize
4112 nextfirstid = pagenum * self._pagesize + self._pagesize
4113 if start >= nextfirstid:
4114 continue
4115
b7ab0590
PH
4116 startv = (
4117 start % self._pagesize
4118 if firstid <= start < nextfirstid
4119 else 0)
b7ab0590
PH
4120 endv = (
4121 ((end - 1) % self._pagesize) + 1
4122 if (end is not None and firstid <= end <= nextfirstid)
4123 else None)
4124
7be9ccff 4125 page_results = self.getpage(pagenum)
b7ab0590
PH
4126 if startv != 0 or endv is not None:
4127 page_results = page_results[startv:endv]
7be9ccff 4128 yield from page_results
b7ab0590
PH
4129
4130 # A little optimization - if current page is not "full", ie. does
4131 # not contain page_size videos then we can assume that this page
4132 # is the last one - there are no more ids on further pages -
4133 # i.e. no need to query again.
4134 if len(page_results) + startv < self._pagesize:
4135 break
4136
4137 # If we got the whole page, but the next page is not interesting,
4138 # break out early as well
4139 if end == nextfirstid:
4140 break
81c2f20b
PH
4141
4142
9c44d242
PH
4143class InAdvancePagedList(PagedList):
4144 def __init__(self, pagefunc, pagecount, pagesize):
9c44d242 4145 self._pagecount = pagecount
7be9ccff 4146 PagedList.__init__(self, pagefunc, pagesize, True)
9c44d242 4147
7be9ccff 4148 def _getslice(self, start, end):
9c44d242
PH
4149 start_page = start // self._pagesize
4150 end_page = (
4151 self._pagecount if end is None else (end // self._pagesize + 1))
4152 skip_elems = start - start_page * self._pagesize
4153 only_more = None if end is None else end - start
4154 for pagenum in range(start_page, end_page):
7be9ccff 4155 page_results = self.getpage(pagenum)
9c44d242 4156 if skip_elems:
7be9ccff 4157 page_results = page_results[skip_elems:]
9c44d242
PH
4158 skip_elems = None
4159 if only_more is not None:
7be9ccff 4160 if len(page_results) < only_more:
4161 only_more -= len(page_results)
9c44d242 4162 else:
7be9ccff 4163 yield from page_results[:only_more]
9c44d242 4164 break
7be9ccff 4165 yield from page_results
9c44d242
PH
4166
4167
81c2f20b 4168def uppercase_escape(s):
676eb3f2 4169 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 4170 return re.sub(
a612753d 4171 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
4172 lambda m: unicode_escape(m.group(0))[0],
4173 s)
0fe2ff78
YCH
4174
4175
4176def lowercase_escape(s):
4177 unicode_escape = codecs.getdecoder('unicode_escape')
4178 return re.sub(
4179 r'\\u[0-9a-fA-F]{4}',
4180 lambda m: unicode_escape(m.group(0))[0],
4181 s)
b53466e1 4182
d05cfe06
S
4183
4184def escape_rfc3986(s):
4185 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 4186 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 4187 s = s.encode('utf-8')
ecc0c5ee 4188 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
4189
4190
4191def escape_url(url):
4192 """Escape URL as suggested by RFC 3986"""
4193 url_parsed = compat_urllib_parse_urlparse(url)
4194 return url_parsed._replace(
efbed08d 4195 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
d05cfe06
S
4196 path=escape_rfc3986(url_parsed.path),
4197 params=escape_rfc3986(url_parsed.params),
4198 query=escape_rfc3986(url_parsed.query),
4199 fragment=escape_rfc3986(url_parsed.fragment)
4200 ).geturl()
4201
62e609ab 4202
4dfbf869 4203def parse_qs(url):
4204 return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4205
4206
62e609ab
PH
4207def read_batch_urls(batch_fd):
4208 def fixup(url):
4209 if not isinstance(url, compat_str):
4210 url = url.decode('utf-8', 'replace')
8c04f0be 4211 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4212 for bom in BOM_UTF8:
4213 if url.startswith(bom):
4214 url = url[len(bom):]
4215 url = url.lstrip()
4216 if not url or url.startswith(('#', ';', ']')):
62e609ab 4217 return False
8c04f0be 4218 # "#" cannot be stripped out since it is part of the URI
4219 # However, it can be safely stipped out if follwing a whitespace
4220 return re.split(r'\s#', url, 1)[0].rstrip()
62e609ab
PH
4221
4222 with contextlib.closing(batch_fd) as fd:
4223 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
4224
4225
4226def urlencode_postdata(*args, **kargs):
15707c7e 4227 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
4228
4229
38f9ef31 4230def update_url_query(url, query):
cacd9966
YCH
4231 if not query:
4232 return url
38f9ef31 4233 parsed_url = compat_urlparse.urlparse(url)
4234 qs = compat_parse_qs(parsed_url.query)
4235 qs.update(query)
4236 return compat_urlparse.urlunparse(parsed_url._replace(
15707c7e 4237 query=compat_urllib_parse_urlencode(qs, True)))
16392824 4238
8e60dc75 4239
ed0291d1
S
4240def update_Request(req, url=None, data=None, headers={}, query={}):
4241 req_headers = req.headers.copy()
4242 req_headers.update(headers)
4243 req_data = data or req.data
4244 req_url = update_url_query(url or req.get_full_url(), query)
95cf60e8
S
4245 req_get_method = req.get_method()
4246 if req_get_method == 'HEAD':
4247 req_type = HEADRequest
4248 elif req_get_method == 'PUT':
4249 req_type = PUTRequest
4250 else:
4251 req_type = compat_urllib_request.Request
ed0291d1
S
4252 new_req = req_type(
4253 req_url, data=req_data, headers=req_headers,
4254 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4255 if hasattr(req, 'timeout'):
4256 new_req.timeout = req.timeout
4257 return new_req
4258
4259
10c87c15 4260def _multipart_encode_impl(data, boundary):
0c265486
YCH
4261 content_type = 'multipart/form-data; boundary=%s' % boundary
4262
4263 out = b''
4264 for k, v in data.items():
4265 out += b'--' + boundary.encode('ascii') + b'\r\n'
4266 if isinstance(k, compat_str):
4267 k = k.encode('utf-8')
4268 if isinstance(v, compat_str):
4269 v = v.encode('utf-8')
4270 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4271 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
b2ad479d 4272 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
0c265486
YCH
4273 if boundary.encode('ascii') in content:
4274 raise ValueError('Boundary overlaps with data')
4275 out += content
4276
4277 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4278
4279 return out, content_type
4280
4281
4282def multipart_encode(data, boundary=None):
4283 '''
4284 Encode a dict to RFC 7578-compliant form-data
4285
4286 data:
4287 A dict where keys and values can be either Unicode or bytes-like
4288 objects.
4289 boundary:
4290 If specified a Unicode object, it's used as the boundary. Otherwise
4291 a random boundary is generated.
4292
4293 Reference: https://tools.ietf.org/html/rfc7578
4294 '''
4295 has_specified_boundary = boundary is not None
4296
4297 while True:
4298 if boundary is None:
4299 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4300
4301 try:
10c87c15 4302 out, content_type = _multipart_encode_impl(data, boundary)
0c265486
YCH
4303 break
4304 except ValueError:
4305 if has_specified_boundary:
4306 raise
4307 boundary = None
4308
4309 return out, content_type
4310
4311
86296ad2 4312def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
4313 if isinstance(key_or_keys, (list, tuple)):
4314 for key in key_or_keys:
86296ad2
S
4315 if key not in d or d[key] is None or skip_false_values and not d[key]:
4316 continue
4317 return d[key]
cbecc9b9
S
4318 return default
4319 return d.get(key_or_keys, default)
4320
4321
329ca3be 4322def try_get(src, getter, expected_type=None):
6606817a 4323 for get in variadic(getter):
a32a9a7e
S
4324 try:
4325 v = get(src)
4326 except (AttributeError, KeyError, TypeError, IndexError):
4327 pass
4328 else:
4329 if expected_type is None or isinstance(v, expected_type):
4330 return v
329ca3be
S
4331
4332
6cc62232
S
4333def merge_dicts(*dicts):
4334 merged = {}
4335 for a_dict in dicts:
4336 for k, v in a_dict.items():
4337 if v is None:
4338 continue
3089bc74
S
4339 if (k not in merged
4340 or (isinstance(v, compat_str) and v
4341 and isinstance(merged[k], compat_str)
4342 and not merged[k])):
6cc62232
S
4343 merged[k] = v
4344 return merged
4345
4346
8e60dc75
S
4347def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4348 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4349
16392824 4350
a1a530b0
PH
4351US_RATINGS = {
4352 'G': 0,
4353 'PG': 10,
4354 'PG-13': 13,
4355 'R': 16,
4356 'NC': 18,
4357}
fac55558
PH
4358
4359
a8795327 4360TV_PARENTAL_GUIDELINES = {
5a16c9d9
RA
4361 'TV-Y': 0,
4362 'TV-Y7': 7,
4363 'TV-G': 0,
4364 'TV-PG': 0,
4365 'TV-14': 14,
4366 'TV-MA': 17,
a8795327
S
4367}
4368
4369
146c80e2 4370def parse_age_limit(s):
a8795327
S
4371 if type(s) == int:
4372 return s if 0 <= s <= 21 else None
4373 if not isinstance(s, compat_basestring):
d838b1bd 4374 return None
146c80e2 4375 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327
S
4376 if m:
4377 return int(m.group('age'))
5c5fae6d 4378 s = s.upper()
a8795327
S
4379 if s in US_RATINGS:
4380 return US_RATINGS[s]
5a16c9d9 4381 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
b8361187 4382 if m:
5a16c9d9 4383 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
b8361187 4384 return None
146c80e2
S
4385
4386
fac55558 4387def strip_jsonp(code):
609a61e3 4388 return re.sub(
5552c9eb 4389 r'''(?sx)^
e9c671d5 4390 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
5552c9eb
YCH
4391 (?:\s*&&\s*(?P=func_name))?
4392 \s*\(\s*(?P<callback_data>.*)\);?
4393 \s*?(?://[^\n]*)*$''',
4394 r'\g<callback_data>', code)
478c2c61
PH
4395
4396
5c610515 4397def js_to_json(code, vars={}):
4398 # vars is a dict of var, val pairs to substitute
c843e685 4399 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4195096e
S
4400 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4401 INTEGER_TABLE = (
4402 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4403 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4404 )
4405
e05f6939 4406 def fix_kv(m):
e7b6d122
PH
4407 v = m.group(0)
4408 if v in ('true', 'false', 'null'):
4409 return v
421ddcb8
C
4410 elif v in ('undefined', 'void 0'):
4411 return 'null'
8bdd16b4 4412 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
bd1e4844 4413 return ""
4414
4415 if v[0] in ("'", '"'):
4416 v = re.sub(r'(?s)\\.|"', lambda m: {
e7b6d122 4417 '"': '\\"',
bd1e4844 4418 "\\'": "'",
4419 '\\\n': '',
4420 '\\x': '\\u00',
4421 }.get(m.group(0), m.group(0)), v[1:-1])
8bdd16b4 4422 else:
4423 for regex, base in INTEGER_TABLE:
4424 im = re.match(regex, v)
4425 if im:
4426 i = int(im.group(1), base)
4427 return '"%d":' % i if v.endswith(':') else '%d' % i
89ac4a19 4428
5c610515 4429 if v in vars:
4430 return vars[v]
4431
e7b6d122 4432 return '"%s"' % v
e05f6939 4433
bd1e4844 4434 return re.sub(r'''(?sx)
4435 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4436 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4195096e 4437 {comment}|,(?={skip}[\]}}])|
421ddcb8 4438 void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
4195096e 4439 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
8bdd16b4 4440 [0-9]+(?={skip}:)|
4441 !+
4195096e 4442 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
e05f6939
PH
4443
4444
478c2c61
PH
4445def qualities(quality_ids):
4446 """ Get a numeric quality value out of a list of possible values """
4447 def q(qid):
4448 try:
4449 return quality_ids.index(qid)
4450 except ValueError:
4451 return -1
4452 return q
4453
acd69589 4454
de6000d9 4455DEFAULT_OUTTMPL = {
4456 'default': '%(title)s [%(id)s].%(ext)s',
72755351 4457 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
de6000d9 4458}
4459OUTTMPL_TYPES = {
72755351 4460 'chapter': None,
de6000d9 4461 'subtitle': None,
4462 'thumbnail': None,
4463 'description': 'description',
4464 'annotation': 'annotations.xml',
4465 'infojson': 'info.json',
5112f26a 4466 'pl_thumbnail': None,
de6000d9 4467 'pl_description': 'description',
4468 'pl_infojson': 'info.json',
4469}
0a871f68 4470
143db31d 4471# As of [1] format syntax is:
4472# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4473# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
901130bb 4474STR_FORMAT_RE_TMPL = r'''(?x)
4475 (?<!%)(?P<prefix>(?:%%)*)
143db31d 4476 %
524e2e4f 4477 (?P<has_key>\((?P<key>{0})\))?
752cda38 4478 (?P<format>
524e2e4f 4479 (?P<conversion>[#0\-+ ]+)?
4480 (?P<min_width>\d+)?
4481 (?P<precision>\.\d+)?
4482 (?P<len_mod>[hlL])? # unused in python
901130bb 4483 {1} # conversion type
752cda38 4484 )
143db31d 4485'''
4486
7d1eb38a 4487
901130bb 4488STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
a020a0dc 4489
7d1eb38a 4490
a020a0dc
PH
4491def limit_length(s, length):
4492 """ Add ellipses to overly long strings """
4493 if s is None:
4494 return None
4495 ELLIPSES = '...'
4496 if len(s) > length:
4497 return s[:length - len(ELLIPSES)] + ELLIPSES
4498 return s
48844745
PH
4499
4500
4501def version_tuple(v):
5f9b8394 4502 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
4503
4504
4505def is_outdated_version(version, limit, assume_new=True):
4506 if not version:
4507 return not assume_new
4508 try:
4509 return version_tuple(version) < version_tuple(limit)
4510 except ValueError:
4511 return not assume_new
732ea2f0
PH
4512
4513
4514def ytdl_is_updateable():
7a5c1cfe 4515 """ Returns if yt-dlp can be updated with -U """
735d865e 4516 return False
4517
732ea2f0
PH
4518 from zipimport import zipimporter
4519
4520 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
4521
4522
4523def args_to_str(args):
4524 # Get a short string representation for a subprocess command
702ccf2d 4525 return ' '.join(compat_shlex_quote(a) for a in args)
2ccd1b10
PH
4526
4527
9b9c5355 4528def error_to_compat_str(err):
fdae2358
S
4529 err_str = str(err)
4530 # On python 2 error byte string must be decoded with proper
4531 # encoding rather than ascii
4532 if sys.version_info[0] < 3:
4533 err_str = err_str.decode(preferredencoding())
4534 return err_str
4535
4536
c460bdd5 4537def mimetype2ext(mt):
eb9ee194
S
4538 if mt is None:
4539 return None
4540
765ac263
JMF
4541 ext = {
4542 'audio/mp4': 'm4a',
6c33d24b
YCH
4543 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4544 # it's the most popular one
4545 'audio/mpeg': 'mp3',
ba39289d 4546 'audio/x-wav': 'wav',
765ac263
JMF
4547 }.get(mt)
4548 if ext is not None:
4549 return ext
4550
c460bdd5 4551 _, _, res = mt.rpartition('/')
6562d34a 4552 res = res.split(';')[0].strip().lower()
c460bdd5
PH
4553
4554 return {
f6861ec9 4555 '3gpp': '3gp',
cafcf657 4556 'smptett+xml': 'tt',
cafcf657 4557 'ttaf+xml': 'dfxp',
a0d8d704 4558 'ttml+xml': 'ttml',
f6861ec9 4559 'x-flv': 'flv',
a0d8d704 4560 'x-mp4-fragmented': 'mp4',
d4f05d47 4561 'x-ms-sami': 'sami',
a0d8d704 4562 'x-ms-wmv': 'wmv',
b4173f15
RA
4563 'mpegurl': 'm3u8',
4564 'x-mpegurl': 'm3u8',
4565 'vnd.apple.mpegurl': 'm3u8',
4566 'dash+xml': 'mpd',
b4173f15 4567 'f4m+xml': 'f4m',
f164b971 4568 'hds+xml': 'f4m',
e910fe2f 4569 'vnd.ms-sstr+xml': 'ism',
c2b2c7e1 4570 'quicktime': 'mov',
98ce1a3f 4571 'mp2t': 'ts',
39e7107d 4572 'x-wav': 'wav',
c460bdd5
PH
4573 }.get(res, res)
4574
4575
4f3c5e06 4576def parse_codecs(codecs_str):
4577 # http://tools.ietf.org/html/rfc6381
4578 if not codecs_str:
4579 return {}
a0566bbf 4580 split_codecs = list(filter(None, map(
dbf5416a 4581 str.strip, codecs_str.strip().strip(',').split(','))))
4f3c5e06 4582 vcodec, acodec = None, None
a0566bbf 4583 for full_codec in split_codecs:
4f3c5e06 4584 codec = full_codec.split('.')[0]
28cc2241 4585 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4f3c5e06 4586 if not vcodec:
4587 vcodec = full_codec
60f5c9fb 4588 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4f3c5e06 4589 if not acodec:
4590 acodec = full_codec
4591 else:
60f5c9fb 4592 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4f3c5e06 4593 if not vcodec and not acodec:
a0566bbf 4594 if len(split_codecs) == 2:
4f3c5e06 4595 return {
a0566bbf 4596 'vcodec': split_codecs[0],
4597 'acodec': split_codecs[1],
4f3c5e06 4598 }
4599 else:
4600 return {
4601 'vcodec': vcodec or 'none',
4602 'acodec': acodec or 'none',
4603 }
4604 return {}
4605
4606
2ccd1b10 4607def urlhandle_detect_ext(url_handle):
79298173 4608 getheader = url_handle.headers.get
2ccd1b10 4609
b55ee18f
PH
4610 cd = getheader('Content-Disposition')
4611 if cd:
4612 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4613 if m:
4614 e = determine_ext(m.group('filename'), default_ext=None)
4615 if e:
4616 return e
4617
c460bdd5 4618 return mimetype2ext(getheader('Content-Type'))
05900629
PH
4619
4620
1e399778
YCH
4621def encode_data_uri(data, mime_type):
4622 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4623
4624
05900629 4625def age_restricted(content_limit, age_limit):
6ec6cb4e 4626 """ Returns True iff the content should be blocked """
05900629
PH
4627
4628 if age_limit is None: # No limit set
4629 return False
4630 if content_limit is None:
4631 return False # Content available for everyone
4632 return age_limit < content_limit
61ca9a80
PH
4633
4634
4635def is_html(first_bytes):
4636 """ Detect whether a file contains HTML by examining its first bytes. """
4637
4638 BOMS = [
4639 (b'\xef\xbb\xbf', 'utf-8'),
4640 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4641 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4642 (b'\xff\xfe', 'utf-16-le'),
4643 (b'\xfe\xff', 'utf-16-be'),
4644 ]
4645 for bom, enc in BOMS:
4646 if first_bytes.startswith(bom):
4647 s = first_bytes[len(bom):].decode(enc, 'replace')
4648 break
4649 else:
4650 s = first_bytes.decode('utf-8', 'replace')
4651
4652 return re.match(r'^\s*<', s)
a055469f
PH
4653
4654
4655def determine_protocol(info_dict):
4656 protocol = info_dict.get('protocol')
4657 if protocol is not None:
4658 return protocol
4659
4660 url = info_dict['url']
4661 if url.startswith('rtmp'):
4662 return 'rtmp'
4663 elif url.startswith('mms'):
4664 return 'mms'
4665 elif url.startswith('rtsp'):
4666 return 'rtsp'
4667
4668 ext = determine_ext(url)
4669 if ext == 'm3u8':
4670 return 'm3u8'
4671 elif ext == 'f4m':
4672 return 'f4m'
4673
4674 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
4675
4676
76d321f6 4677def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
cfb56d1a 4678 """ Render a list of rows, each as a list of values """
76d321f6 4679
4680 def get_max_lens(table):
4681 return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4682
4683 def filter_using_list(row, filterArray):
4684 return [col for (take, col) in zip(filterArray, row) if take]
4685
4686 if hideEmpty:
4687 max_lens = get_max_lens(data)
4688 header_row = filter_using_list(header_row, max_lens)
4689 data = [filter_using_list(row, max_lens) for row in data]
4690
cfb56d1a 4691 table = [header_row] + data
76d321f6 4692 max_lens = get_max_lens(table)
4693 if delim:
4694 table = [header_row] + [['-' * ml for ml in max_lens]] + data
4695 format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
cfb56d1a 4696 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
4697
4698
8f18aca8 4699def _match_one(filter_part, dct, incomplete):
77b87f05 4700 # TODO: Generalize code with YoutubeDL._build_format_filter
a047eeb6 4701 STRING_OPERATORS = {
4702 '*=': operator.contains,
4703 '^=': lambda attr, value: attr.startswith(value),
4704 '$=': lambda attr, value: attr.endswith(value),
4705 '~=': lambda attr, value: re.search(value, attr),
4706 }
347de493 4707 COMPARISON_OPERATORS = {
a047eeb6 4708 **STRING_OPERATORS,
4709 '<=': operator.le, # "<=" must be defined above "<"
347de493 4710 '<': operator.lt,
347de493 4711 '>=': operator.ge,
a047eeb6 4712 '>': operator.gt,
347de493 4713 '=': operator.eq,
347de493 4714 }
a047eeb6 4715
347de493
PH
4716 operator_rex = re.compile(r'''(?x)\s*
4717 (?P<key>[a-z_]+)
77b87f05 4718 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
347de493
PH
4719 (?:
4720 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
a047eeb6 4721 (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
4722 (?P<strval>.+?)
347de493
PH
4723 )
4724 \s*$
4725 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4726 m = operator_rex.search(filter_part)
4727 if m:
77b87f05
MT
4728 unnegated_op = COMPARISON_OPERATORS[m.group('op')]
4729 if m.group('negation'):
4730 op = lambda attr, value: not unnegated_op(attr, value)
4731 else:
4732 op = unnegated_op
e5a088dc 4733 actual_value = dct.get(m.group('key'))
3089bc74
S
4734 if (m.group('quotedstrval') is not None
4735 or m.group('strval') is not None
e5a088dc
S
4736 # If the original field is a string and matching comparisonvalue is
4737 # a number we should respect the origin of the original field
4738 # and process comparison value as a string (see
067aa17e 4739 # https://github.com/ytdl-org/youtube-dl/issues/11082).
3089bc74
S
4740 or actual_value is not None and m.group('intval') is not None
4741 and isinstance(actual_value, compat_str)):
db13c16e
S
4742 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4743 quote = m.group('quote')
4744 if quote is not None:
4745 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
347de493 4746 else:
a047eeb6 4747 if m.group('op') in STRING_OPERATORS:
4748 raise ValueError('Operator %s only supports string values!' % m.group('op'))
347de493
PH
4749 try:
4750 comparison_value = int(m.group('intval'))
4751 except ValueError:
4752 comparison_value = parse_filesize(m.group('intval'))
4753 if comparison_value is None:
4754 comparison_value = parse_filesize(m.group('intval') + 'B')
4755 if comparison_value is None:
4756 raise ValueError(
4757 'Invalid integer value %r in filter part %r' % (
4758 m.group('intval'), filter_part))
347de493 4759 if actual_value is None:
8f18aca8 4760 return incomplete or m.group('none_inclusive')
347de493
PH
4761 return op(actual_value, comparison_value)
4762
4763 UNARY_OPERATORS = {
1cc47c66
S
4764 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4765 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
347de493
PH
4766 }
4767 operator_rex = re.compile(r'''(?x)\s*
4768 (?P<op>%s)\s*(?P<key>[a-z_]+)
4769 \s*$
4770 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4771 m = operator_rex.search(filter_part)
4772 if m:
4773 op = UNARY_OPERATORS[m.group('op')]
4774 actual_value = dct.get(m.group('key'))
8f18aca8 4775 if incomplete and actual_value is None:
4776 return True
347de493
PH
4777 return op(actual_value)
4778
4779 raise ValueError('Invalid filter part %r' % filter_part)
4780
4781
8f18aca8 4782def match_str(filter_str, dct, incomplete=False):
4783 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
4784 When incomplete, all conditions passes on missing fields
4785 """
347de493 4786 return all(
8f18aca8 4787 _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
a047eeb6 4788 for filter_part in re.split(r'(?<!\\)&', filter_str))
347de493
PH
4789
4790
4791def match_filter_func(filter_str):
8f18aca8 4792 def _match_func(info_dict, *args, **kwargs):
4793 if match_str(filter_str, info_dict, *args, **kwargs):
347de493
PH
4794 return None
4795 else:
4796 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4797 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4798 return _match_func
91410c9b
PH
4799
4800
bf6427d2
YCH
4801def parse_dfxp_time_expr(time_expr):
4802 if not time_expr:
d631d5f9 4803 return
bf6427d2
YCH
4804
4805 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4806 if mobj:
4807 return float(mobj.group('time_offset'))
4808
db2fe38b 4809 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 4810 if mobj:
db2fe38b 4811 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
4812
4813
c1c924ab
YCH
4814def srt_subtitles_timecode(seconds):
4815 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
4816
4817
4818def dfxp2srt(dfxp_data):
3869028f
YCH
4819 '''
4820 @param dfxp_data A bytes-like object containing DFXP data
4821 @returns A unicode object containing converted SRT data
4822 '''
5b995f71 4823 LEGACY_NAMESPACES = (
3869028f
YCH
4824 (b'http://www.w3.org/ns/ttml', [
4825 b'http://www.w3.org/2004/11/ttaf1',
4826 b'http://www.w3.org/2006/04/ttaf1',
4827 b'http://www.w3.org/2006/10/ttaf1',
5b995f71 4828 ]),
3869028f
YCH
4829 (b'http://www.w3.org/ns/ttml#styling', [
4830 b'http://www.w3.org/ns/ttml#style',
5b995f71
RA
4831 ]),
4832 )
4833
4834 SUPPORTED_STYLING = [
4835 'color',
4836 'fontFamily',
4837 'fontSize',
4838 'fontStyle',
4839 'fontWeight',
4840 'textDecoration'
4841 ]
4842
4e335771 4843 _x = functools.partial(xpath_with_ns, ns_map={
261f4730 4844 'xml': 'http://www.w3.org/XML/1998/namespace',
4e335771 4845 'ttml': 'http://www.w3.org/ns/ttml',
5b995f71 4846 'tts': 'http://www.w3.org/ns/ttml#styling',
4e335771 4847 })
bf6427d2 4848
5b995f71
RA
4849 styles = {}
4850 default_style = {}
4851
87de7069 4852 class TTMLPElementParser(object):
5b995f71
RA
4853 _out = ''
4854 _unclosed_elements = []
4855 _applied_styles = []
bf6427d2 4856
2b14cb56 4857 def start(self, tag, attrib):
5b995f71
RA
4858 if tag in (_x('ttml:br'), 'br'):
4859 self._out += '\n'
4860 else:
4861 unclosed_elements = []
4862 style = {}
4863 element_style_id = attrib.get('style')
4864 if default_style:
4865 style.update(default_style)
4866 if element_style_id:
4867 style.update(styles.get(element_style_id, {}))
4868 for prop in SUPPORTED_STYLING:
4869 prop_val = attrib.get(_x('tts:' + prop))
4870 if prop_val:
4871 style[prop] = prop_val
4872 if style:
4873 font = ''
4874 for k, v in sorted(style.items()):
4875 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4876 continue
4877 if k == 'color':
4878 font += ' color="%s"' % v
4879 elif k == 'fontSize':
4880 font += ' size="%s"' % v
4881 elif k == 'fontFamily':
4882 font += ' face="%s"' % v
4883 elif k == 'fontWeight' and v == 'bold':
4884 self._out += '<b>'
4885 unclosed_elements.append('b')
4886 elif k == 'fontStyle' and v == 'italic':
4887 self._out += '<i>'
4888 unclosed_elements.append('i')
4889 elif k == 'textDecoration' and v == 'underline':
4890 self._out += '<u>'
4891 unclosed_elements.append('u')
4892 if font:
4893 self._out += '<font' + font + '>'
4894 unclosed_elements.append('font')
4895 applied_style = {}
4896 if self._applied_styles:
4897 applied_style.update(self._applied_styles[-1])
4898 applied_style.update(style)
4899 self._applied_styles.append(applied_style)
4900 self._unclosed_elements.append(unclosed_elements)
bf6427d2 4901
2b14cb56 4902 def end(self, tag):
5b995f71
RA
4903 if tag not in (_x('ttml:br'), 'br'):
4904 unclosed_elements = self._unclosed_elements.pop()
4905 for element in reversed(unclosed_elements):
4906 self._out += '</%s>' % element
4907 if unclosed_elements and self._applied_styles:
4908 self._applied_styles.pop()
bf6427d2 4909
2b14cb56 4910 def data(self, data):
5b995f71 4911 self._out += data
2b14cb56 4912
4913 def close(self):
5b995f71 4914 return self._out.strip()
2b14cb56 4915
4916 def parse_node(node):
4917 target = TTMLPElementParser()
4918 parser = xml.etree.ElementTree.XMLParser(target=target)
4919 parser.feed(xml.etree.ElementTree.tostring(node))
4920 return parser.close()
bf6427d2 4921
5b995f71
RA
4922 for k, v in LEGACY_NAMESPACES:
4923 for ns in v:
4924 dfxp_data = dfxp_data.replace(ns, k)
4925
3869028f 4926 dfxp = compat_etree_fromstring(dfxp_data)
bf6427d2 4927 out = []
5b995f71 4928 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
1b0427e6
YCH
4929
4930 if not paras:
4931 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2 4932
5b995f71
RA
4933 repeat = False
4934 while True:
4935 for style in dfxp.findall(_x('.//ttml:style')):
261f4730
RA
4936 style_id = style.get('id') or style.get(_x('xml:id'))
4937 if not style_id:
4938 continue
5b995f71
RA
4939 parent_style_id = style.get('style')
4940 if parent_style_id:
4941 if parent_style_id not in styles:
4942 repeat = True
4943 continue
4944 styles[style_id] = styles[parent_style_id].copy()
4945 for prop in SUPPORTED_STYLING:
4946 prop_val = style.get(_x('tts:' + prop))
4947 if prop_val:
4948 styles.setdefault(style_id, {})[prop] = prop_val
4949 if repeat:
4950 repeat = False
4951 else:
4952 break
4953
4954 for p in ('body', 'div'):
4955 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4956 if ele is None:
4957 continue
4958 style = styles.get(ele.get('style'))
4959 if not style:
4960 continue
4961 default_style.update(style)
4962
bf6427d2 4963 for para, index in zip(paras, itertools.count(1)):
d631d5f9 4964 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 4965 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
4966 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4967 if begin_time is None:
4968 continue
7dff0363 4969 if not end_time:
d631d5f9
YCH
4970 if not dur:
4971 continue
4972 end_time = begin_time + dur
bf6427d2
YCH
4973 out.append('%d\n%s --> %s\n%s\n\n' % (
4974 index,
c1c924ab
YCH
4975 srt_subtitles_timecode(begin_time),
4976 srt_subtitles_timecode(end_time),
bf6427d2
YCH
4977 parse_node(para)))
4978
4979 return ''.join(out)
4980
4981
66e289ba
S
4982def cli_option(params, command_option, param):
4983 param = params.get(param)
98e698f1
RA
4984 if param:
4985 param = compat_str(param)
66e289ba
S
4986 return [command_option, param] if param is not None else []
4987
4988
4989def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4990 param = params.get(param)
5b232f46
S
4991 if param is None:
4992 return []
66e289ba
S
4993 assert isinstance(param, bool)
4994 if separator:
4995 return [command_option + separator + (true_value if param else false_value)]
4996 return [command_option, true_value if param else false_value]
4997
4998
4999def cli_valueless_option(params, command_option, param, expected_value=True):
5000 param = params.get(param)
5001 return [command_option] if param == expected_value else []
5002
5003
e92caff5 5004def cli_configuration_args(argdict, keys, default=[], use_compat=True):
eab9b2bc 5005 if isinstance(argdict, (list, tuple)): # for backward compatibility
e92caff5 5006 if use_compat:
5b1ecbb3 5007 return argdict
5008 else:
5009 argdict = None
eab9b2bc 5010 if argdict is None:
5b1ecbb3 5011 return default
eab9b2bc 5012 assert isinstance(argdict, dict)
5013
e92caff5 5014 assert isinstance(keys, (list, tuple))
5015 for key_list in keys:
e92caff5 5016 arg_list = list(filter(
5017 lambda x: x is not None,
6606817a 5018 [argdict.get(key.lower()) for key in variadic(key_list)]))
e92caff5 5019 if arg_list:
5020 return [arg for args in arg_list for arg in args]
5021 return default
66e289ba 5022
6251555f 5023
330690a2 5024def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
5025 main_key, exe = main_key.lower(), exe.lower()
5026 root_key = exe if main_key == exe else f'{main_key}+{exe}'
5027 keys = [f'{root_key}{k}' for k in (keys or [''])]
5028 if root_key in keys:
5029 if main_key != exe:
5030 keys.append((main_key, exe))
5031 keys.append('default')
5032 else:
5033 use_compat = False
5034 return cli_configuration_args(argdict, keys, default, use_compat)
5035
66e289ba 5036
39672624
YCH
5037class ISO639Utils(object):
5038 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
5039 _lang_map = {
5040 'aa': 'aar',
5041 'ab': 'abk',
5042 'ae': 'ave',
5043 'af': 'afr',
5044 'ak': 'aka',
5045 'am': 'amh',
5046 'an': 'arg',
5047 'ar': 'ara',
5048 'as': 'asm',
5049 'av': 'ava',
5050 'ay': 'aym',
5051 'az': 'aze',
5052 'ba': 'bak',
5053 'be': 'bel',
5054 'bg': 'bul',
5055 'bh': 'bih',
5056 'bi': 'bis',
5057 'bm': 'bam',
5058 'bn': 'ben',
5059 'bo': 'bod',
5060 'br': 'bre',
5061 'bs': 'bos',
5062 'ca': 'cat',
5063 'ce': 'che',
5064 'ch': 'cha',
5065 'co': 'cos',
5066 'cr': 'cre',
5067 'cs': 'ces',
5068 'cu': 'chu',
5069 'cv': 'chv',
5070 'cy': 'cym',
5071 'da': 'dan',
5072 'de': 'deu',
5073 'dv': 'div',
5074 'dz': 'dzo',
5075 'ee': 'ewe',
5076 'el': 'ell',
5077 'en': 'eng',
5078 'eo': 'epo',
5079 'es': 'spa',
5080 'et': 'est',
5081 'eu': 'eus',
5082 'fa': 'fas',
5083 'ff': 'ful',
5084 'fi': 'fin',
5085 'fj': 'fij',
5086 'fo': 'fao',
5087 'fr': 'fra',
5088 'fy': 'fry',
5089 'ga': 'gle',
5090 'gd': 'gla',
5091 'gl': 'glg',
5092 'gn': 'grn',
5093 'gu': 'guj',
5094 'gv': 'glv',
5095 'ha': 'hau',
5096 'he': 'heb',
b7acc835 5097 'iw': 'heb', # Replaced by he in 1989 revision
39672624
YCH
5098 'hi': 'hin',
5099 'ho': 'hmo',
5100 'hr': 'hrv',
5101 'ht': 'hat',
5102 'hu': 'hun',
5103 'hy': 'hye',
5104 'hz': 'her',
5105 'ia': 'ina',
5106 'id': 'ind',
b7acc835 5107 'in': 'ind', # Replaced by id in 1989 revision
39672624
YCH
5108 'ie': 'ile',
5109 'ig': 'ibo',
5110 'ii': 'iii',
5111 'ik': 'ipk',
5112 'io': 'ido',
5113 'is': 'isl',
5114 'it': 'ita',
5115 'iu': 'iku',
5116 'ja': 'jpn',
5117 'jv': 'jav',
5118 'ka': 'kat',
5119 'kg': 'kon',
5120 'ki': 'kik',
5121 'kj': 'kua',
5122 'kk': 'kaz',
5123 'kl': 'kal',
5124 'km': 'khm',
5125 'kn': 'kan',
5126 'ko': 'kor',
5127 'kr': 'kau',
5128 'ks': 'kas',
5129 'ku': 'kur',
5130 'kv': 'kom',
5131 'kw': 'cor',
5132 'ky': 'kir',
5133 'la': 'lat',
5134 'lb': 'ltz',
5135 'lg': 'lug',
5136 'li': 'lim',
5137 'ln': 'lin',
5138 'lo': 'lao',
5139 'lt': 'lit',
5140 'lu': 'lub',
5141 'lv': 'lav',
5142 'mg': 'mlg',
5143 'mh': 'mah',
5144 'mi': 'mri',
5145 'mk': 'mkd',
5146 'ml': 'mal',
5147 'mn': 'mon',
5148 'mr': 'mar',
5149 'ms': 'msa',
5150 'mt': 'mlt',
5151 'my': 'mya',
5152 'na': 'nau',
5153 'nb': 'nob',
5154 'nd': 'nde',
5155 'ne': 'nep',
5156 'ng': 'ndo',
5157 'nl': 'nld',
5158 'nn': 'nno',
5159 'no': 'nor',
5160 'nr': 'nbl',
5161 'nv': 'nav',
5162 'ny': 'nya',
5163 'oc': 'oci',
5164 'oj': 'oji',
5165 'om': 'orm',
5166 'or': 'ori',
5167 'os': 'oss',
5168 'pa': 'pan',
5169 'pi': 'pli',
5170 'pl': 'pol',
5171 'ps': 'pus',
5172 'pt': 'por',
5173 'qu': 'que',
5174 'rm': 'roh',
5175 'rn': 'run',
5176 'ro': 'ron',
5177 'ru': 'rus',
5178 'rw': 'kin',
5179 'sa': 'san',
5180 'sc': 'srd',
5181 'sd': 'snd',
5182 'se': 'sme',
5183 'sg': 'sag',
5184 'si': 'sin',
5185 'sk': 'slk',
5186 'sl': 'slv',
5187 'sm': 'smo',
5188 'sn': 'sna',
5189 'so': 'som',
5190 'sq': 'sqi',
5191 'sr': 'srp',
5192 'ss': 'ssw',
5193 'st': 'sot',
5194 'su': 'sun',
5195 'sv': 'swe',
5196 'sw': 'swa',
5197 'ta': 'tam',
5198 'te': 'tel',
5199 'tg': 'tgk',
5200 'th': 'tha',
5201 'ti': 'tir',
5202 'tk': 'tuk',
5203 'tl': 'tgl',
5204 'tn': 'tsn',
5205 'to': 'ton',
5206 'tr': 'tur',
5207 'ts': 'tso',
5208 'tt': 'tat',
5209 'tw': 'twi',
5210 'ty': 'tah',
5211 'ug': 'uig',
5212 'uk': 'ukr',
5213 'ur': 'urd',
5214 'uz': 'uzb',
5215 've': 'ven',
5216 'vi': 'vie',
5217 'vo': 'vol',
5218 'wa': 'wln',
5219 'wo': 'wol',
5220 'xh': 'xho',
5221 'yi': 'yid',
e9a50fba 5222 'ji': 'yid', # Replaced by yi in 1989 revision
39672624
YCH
5223 'yo': 'yor',
5224 'za': 'zha',
5225 'zh': 'zho',
5226 'zu': 'zul',
5227 }
5228
5229 @classmethod
5230 def short2long(cls, code):
5231 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5232 return cls._lang_map.get(code[:2])
5233
5234 @classmethod
5235 def long2short(cls, code):
5236 """Convert language code from ISO 639-2/T to ISO 639-1"""
5237 for short_name, long_name in cls._lang_map.items():
5238 if long_name == code:
5239 return short_name
5240
5241
4eb10f66
YCH
5242class ISO3166Utils(object):
5243 # From http://data.okfn.org/data/core/country-list
5244 _country_map = {
5245 'AF': 'Afghanistan',
5246 'AX': 'Åland Islands',
5247 'AL': 'Albania',
5248 'DZ': 'Algeria',
5249 'AS': 'American Samoa',
5250 'AD': 'Andorra',
5251 'AO': 'Angola',
5252 'AI': 'Anguilla',
5253 'AQ': 'Antarctica',
5254 'AG': 'Antigua and Barbuda',
5255 'AR': 'Argentina',
5256 'AM': 'Armenia',
5257 'AW': 'Aruba',
5258 'AU': 'Australia',
5259 'AT': 'Austria',
5260 'AZ': 'Azerbaijan',
5261 'BS': 'Bahamas',
5262 'BH': 'Bahrain',
5263 'BD': 'Bangladesh',
5264 'BB': 'Barbados',
5265 'BY': 'Belarus',
5266 'BE': 'Belgium',
5267 'BZ': 'Belize',
5268 'BJ': 'Benin',
5269 'BM': 'Bermuda',
5270 'BT': 'Bhutan',
5271 'BO': 'Bolivia, Plurinational State of',
5272 'BQ': 'Bonaire, Sint Eustatius and Saba',
5273 'BA': 'Bosnia and Herzegovina',
5274 'BW': 'Botswana',
5275 'BV': 'Bouvet Island',
5276 'BR': 'Brazil',
5277 'IO': 'British Indian Ocean Territory',
5278 'BN': 'Brunei Darussalam',
5279 'BG': 'Bulgaria',
5280 'BF': 'Burkina Faso',
5281 'BI': 'Burundi',
5282 'KH': 'Cambodia',
5283 'CM': 'Cameroon',
5284 'CA': 'Canada',
5285 'CV': 'Cape Verde',
5286 'KY': 'Cayman Islands',
5287 'CF': 'Central African Republic',
5288 'TD': 'Chad',
5289 'CL': 'Chile',
5290 'CN': 'China',
5291 'CX': 'Christmas Island',
5292 'CC': 'Cocos (Keeling) Islands',
5293 'CO': 'Colombia',
5294 'KM': 'Comoros',
5295 'CG': 'Congo',
5296 'CD': 'Congo, the Democratic Republic of the',
5297 'CK': 'Cook Islands',
5298 'CR': 'Costa Rica',
5299 'CI': 'Côte d\'Ivoire',
5300 'HR': 'Croatia',
5301 'CU': 'Cuba',
5302 'CW': 'Curaçao',
5303 'CY': 'Cyprus',
5304 'CZ': 'Czech Republic',
5305 'DK': 'Denmark',
5306 'DJ': 'Djibouti',
5307 'DM': 'Dominica',
5308 'DO': 'Dominican Republic',
5309 'EC': 'Ecuador',
5310 'EG': 'Egypt',
5311 'SV': 'El Salvador',
5312 'GQ': 'Equatorial Guinea',
5313 'ER': 'Eritrea',
5314 'EE': 'Estonia',
5315 'ET': 'Ethiopia',
5316 'FK': 'Falkland Islands (Malvinas)',
5317 'FO': 'Faroe Islands',
5318 'FJ': 'Fiji',
5319 'FI': 'Finland',
5320 'FR': 'France',
5321 'GF': 'French Guiana',
5322 'PF': 'French Polynesia',
5323 'TF': 'French Southern Territories',
5324 'GA': 'Gabon',
5325 'GM': 'Gambia',
5326 'GE': 'Georgia',
5327 'DE': 'Germany',
5328 'GH': 'Ghana',
5329 'GI': 'Gibraltar',
5330 'GR': 'Greece',
5331 'GL': 'Greenland',
5332 'GD': 'Grenada',
5333 'GP': 'Guadeloupe',
5334 'GU': 'Guam',
5335 'GT': 'Guatemala',
5336 'GG': 'Guernsey',
5337 'GN': 'Guinea',
5338 'GW': 'Guinea-Bissau',
5339 'GY': 'Guyana',
5340 'HT': 'Haiti',
5341 'HM': 'Heard Island and McDonald Islands',
5342 'VA': 'Holy See (Vatican City State)',
5343 'HN': 'Honduras',
5344 'HK': 'Hong Kong',
5345 'HU': 'Hungary',
5346 'IS': 'Iceland',
5347 'IN': 'India',
5348 'ID': 'Indonesia',
5349 'IR': 'Iran, Islamic Republic of',
5350 'IQ': 'Iraq',
5351 'IE': 'Ireland',
5352 'IM': 'Isle of Man',
5353 'IL': 'Israel',
5354 'IT': 'Italy',
5355 'JM': 'Jamaica',
5356 'JP': 'Japan',
5357 'JE': 'Jersey',
5358 'JO': 'Jordan',
5359 'KZ': 'Kazakhstan',
5360 'KE': 'Kenya',
5361 'KI': 'Kiribati',
5362 'KP': 'Korea, Democratic People\'s Republic of',
5363 'KR': 'Korea, Republic of',
5364 'KW': 'Kuwait',
5365 'KG': 'Kyrgyzstan',
5366 'LA': 'Lao People\'s Democratic Republic',
5367 'LV': 'Latvia',
5368 'LB': 'Lebanon',
5369 'LS': 'Lesotho',
5370 'LR': 'Liberia',
5371 'LY': 'Libya',
5372 'LI': 'Liechtenstein',
5373 'LT': 'Lithuania',
5374 'LU': 'Luxembourg',
5375 'MO': 'Macao',
5376 'MK': 'Macedonia, the Former Yugoslav Republic of',
5377 'MG': 'Madagascar',
5378 'MW': 'Malawi',
5379 'MY': 'Malaysia',
5380 'MV': 'Maldives',
5381 'ML': 'Mali',
5382 'MT': 'Malta',
5383 'MH': 'Marshall Islands',
5384 'MQ': 'Martinique',
5385 'MR': 'Mauritania',
5386 'MU': 'Mauritius',
5387 'YT': 'Mayotte',
5388 'MX': 'Mexico',
5389 'FM': 'Micronesia, Federated States of',
5390 'MD': 'Moldova, Republic of',
5391 'MC': 'Monaco',
5392 'MN': 'Mongolia',
5393 'ME': 'Montenegro',
5394 'MS': 'Montserrat',
5395 'MA': 'Morocco',
5396 'MZ': 'Mozambique',
5397 'MM': 'Myanmar',
5398 'NA': 'Namibia',
5399 'NR': 'Nauru',
5400 'NP': 'Nepal',
5401 'NL': 'Netherlands',
5402 'NC': 'New Caledonia',
5403 'NZ': 'New Zealand',
5404 'NI': 'Nicaragua',
5405 'NE': 'Niger',
5406 'NG': 'Nigeria',
5407 'NU': 'Niue',
5408 'NF': 'Norfolk Island',
5409 'MP': 'Northern Mariana Islands',
5410 'NO': 'Norway',
5411 'OM': 'Oman',
5412 'PK': 'Pakistan',
5413 'PW': 'Palau',
5414 'PS': 'Palestine, State of',
5415 'PA': 'Panama',
5416 'PG': 'Papua New Guinea',
5417 'PY': 'Paraguay',
5418 'PE': 'Peru',
5419 'PH': 'Philippines',
5420 'PN': 'Pitcairn',
5421 'PL': 'Poland',
5422 'PT': 'Portugal',
5423 'PR': 'Puerto Rico',
5424 'QA': 'Qatar',
5425 'RE': 'Réunion',
5426 'RO': 'Romania',
5427 'RU': 'Russian Federation',
5428 'RW': 'Rwanda',
5429 'BL': 'Saint Barthélemy',
5430 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5431 'KN': 'Saint Kitts and Nevis',
5432 'LC': 'Saint Lucia',
5433 'MF': 'Saint Martin (French part)',
5434 'PM': 'Saint Pierre and Miquelon',
5435 'VC': 'Saint Vincent and the Grenadines',
5436 'WS': 'Samoa',
5437 'SM': 'San Marino',
5438 'ST': 'Sao Tome and Principe',
5439 'SA': 'Saudi Arabia',
5440 'SN': 'Senegal',
5441 'RS': 'Serbia',
5442 'SC': 'Seychelles',
5443 'SL': 'Sierra Leone',
5444 'SG': 'Singapore',
5445 'SX': 'Sint Maarten (Dutch part)',
5446 'SK': 'Slovakia',
5447 'SI': 'Slovenia',
5448 'SB': 'Solomon Islands',
5449 'SO': 'Somalia',
5450 'ZA': 'South Africa',
5451 'GS': 'South Georgia and the South Sandwich Islands',
5452 'SS': 'South Sudan',
5453 'ES': 'Spain',
5454 'LK': 'Sri Lanka',
5455 'SD': 'Sudan',
5456 'SR': 'Suriname',
5457 'SJ': 'Svalbard and Jan Mayen',
5458 'SZ': 'Swaziland',
5459 'SE': 'Sweden',
5460 'CH': 'Switzerland',
5461 'SY': 'Syrian Arab Republic',
5462 'TW': 'Taiwan, Province of China',
5463 'TJ': 'Tajikistan',
5464 'TZ': 'Tanzania, United Republic of',
5465 'TH': 'Thailand',
5466 'TL': 'Timor-Leste',
5467 'TG': 'Togo',
5468 'TK': 'Tokelau',
5469 'TO': 'Tonga',
5470 'TT': 'Trinidad and Tobago',
5471 'TN': 'Tunisia',
5472 'TR': 'Turkey',
5473 'TM': 'Turkmenistan',
5474 'TC': 'Turks and Caicos Islands',
5475 'TV': 'Tuvalu',
5476 'UG': 'Uganda',
5477 'UA': 'Ukraine',
5478 'AE': 'United Arab Emirates',
5479 'GB': 'United Kingdom',
5480 'US': 'United States',
5481 'UM': 'United States Minor Outlying Islands',
5482 'UY': 'Uruguay',
5483 'UZ': 'Uzbekistan',
5484 'VU': 'Vanuatu',
5485 'VE': 'Venezuela, Bolivarian Republic of',
5486 'VN': 'Viet Nam',
5487 'VG': 'Virgin Islands, British',
5488 'VI': 'Virgin Islands, U.S.',
5489 'WF': 'Wallis and Futuna',
5490 'EH': 'Western Sahara',
5491 'YE': 'Yemen',
5492 'ZM': 'Zambia',
5493 'ZW': 'Zimbabwe',
5494 }
5495
5496 @classmethod
5497 def short2full(cls, code):
5498 """Convert an ISO 3166-2 country code to the corresponding full name"""
5499 return cls._country_map.get(code.upper())
5500
5501
773f291d
S
5502class GeoUtils(object):
5503 # Major IPv4 address blocks per country
5504 _country_ip_map = {
53896ca5 5505 'AD': '46.172.224.0/19',
773f291d
S
5506 'AE': '94.200.0.0/13',
5507 'AF': '149.54.0.0/17',
5508 'AG': '209.59.64.0/18',
5509 'AI': '204.14.248.0/21',
5510 'AL': '46.99.0.0/16',
5511 'AM': '46.70.0.0/15',
5512 'AO': '105.168.0.0/13',
53896ca5
S
5513 'AP': '182.50.184.0/21',
5514 'AQ': '23.154.160.0/24',
773f291d
S
5515 'AR': '181.0.0.0/12',
5516 'AS': '202.70.112.0/20',
53896ca5 5517 'AT': '77.116.0.0/14',
773f291d
S
5518 'AU': '1.128.0.0/11',
5519 'AW': '181.41.0.0/18',
53896ca5
S
5520 'AX': '185.217.4.0/22',
5521 'AZ': '5.197.0.0/16',
773f291d
S
5522 'BA': '31.176.128.0/17',
5523 'BB': '65.48.128.0/17',
5524 'BD': '114.130.0.0/16',
5525 'BE': '57.0.0.0/8',
53896ca5 5526 'BF': '102.178.0.0/15',
773f291d
S
5527 'BG': '95.42.0.0/15',
5528 'BH': '37.131.0.0/17',
5529 'BI': '154.117.192.0/18',
5530 'BJ': '137.255.0.0/16',
53896ca5 5531 'BL': '185.212.72.0/23',
773f291d
S
5532 'BM': '196.12.64.0/18',
5533 'BN': '156.31.0.0/16',
5534 'BO': '161.56.0.0/16',
5535 'BQ': '161.0.80.0/20',
53896ca5 5536 'BR': '191.128.0.0/12',
773f291d
S
5537 'BS': '24.51.64.0/18',
5538 'BT': '119.2.96.0/19',
5539 'BW': '168.167.0.0/16',
5540 'BY': '178.120.0.0/13',
5541 'BZ': '179.42.192.0/18',
5542 'CA': '99.224.0.0/11',
5543 'CD': '41.243.0.0/16',
53896ca5
S
5544 'CF': '197.242.176.0/21',
5545 'CG': '160.113.0.0/16',
773f291d 5546 'CH': '85.0.0.0/13',
53896ca5 5547 'CI': '102.136.0.0/14',
773f291d
S
5548 'CK': '202.65.32.0/19',
5549 'CL': '152.172.0.0/14',
53896ca5 5550 'CM': '102.244.0.0/14',
773f291d
S
5551 'CN': '36.128.0.0/10',
5552 'CO': '181.240.0.0/12',
5553 'CR': '201.192.0.0/12',
5554 'CU': '152.206.0.0/15',
5555 'CV': '165.90.96.0/19',
5556 'CW': '190.88.128.0/17',
53896ca5 5557 'CY': '31.153.0.0/16',
773f291d
S
5558 'CZ': '88.100.0.0/14',
5559 'DE': '53.0.0.0/8',
5560 'DJ': '197.241.0.0/17',
5561 'DK': '87.48.0.0/12',
5562 'DM': '192.243.48.0/20',
5563 'DO': '152.166.0.0/15',
5564 'DZ': '41.96.0.0/12',
5565 'EC': '186.68.0.0/15',
5566 'EE': '90.190.0.0/15',
5567 'EG': '156.160.0.0/11',
5568 'ER': '196.200.96.0/20',
5569 'ES': '88.0.0.0/11',
5570 'ET': '196.188.0.0/14',
5571 'EU': '2.16.0.0/13',
5572 'FI': '91.152.0.0/13',
5573 'FJ': '144.120.0.0/16',
53896ca5 5574 'FK': '80.73.208.0/21',
773f291d
S
5575 'FM': '119.252.112.0/20',
5576 'FO': '88.85.32.0/19',
5577 'FR': '90.0.0.0/9',
5578 'GA': '41.158.0.0/15',
5579 'GB': '25.0.0.0/8',
5580 'GD': '74.122.88.0/21',
5581 'GE': '31.146.0.0/16',
5582 'GF': '161.22.64.0/18',
5583 'GG': '62.68.160.0/19',
53896ca5
S
5584 'GH': '154.160.0.0/12',
5585 'GI': '95.164.0.0/16',
773f291d
S
5586 'GL': '88.83.0.0/19',
5587 'GM': '160.182.0.0/15',
5588 'GN': '197.149.192.0/18',
5589 'GP': '104.250.0.0/19',
5590 'GQ': '105.235.224.0/20',
5591 'GR': '94.64.0.0/13',
5592 'GT': '168.234.0.0/16',
5593 'GU': '168.123.0.0/16',
5594 'GW': '197.214.80.0/20',
5595 'GY': '181.41.64.0/18',
5596 'HK': '113.252.0.0/14',
5597 'HN': '181.210.0.0/16',
5598 'HR': '93.136.0.0/13',
5599 'HT': '148.102.128.0/17',
5600 'HU': '84.0.0.0/14',
5601 'ID': '39.192.0.0/10',
5602 'IE': '87.32.0.0/12',
5603 'IL': '79.176.0.0/13',
5604 'IM': '5.62.80.0/20',
5605 'IN': '117.192.0.0/10',
5606 'IO': '203.83.48.0/21',
5607 'IQ': '37.236.0.0/14',
5608 'IR': '2.176.0.0/12',
5609 'IS': '82.221.0.0/16',
5610 'IT': '79.0.0.0/10',
5611 'JE': '87.244.64.0/18',
5612 'JM': '72.27.0.0/17',
5613 'JO': '176.29.0.0/16',
53896ca5 5614 'JP': '133.0.0.0/8',
773f291d
S
5615 'KE': '105.48.0.0/12',
5616 'KG': '158.181.128.0/17',
5617 'KH': '36.37.128.0/17',
5618 'KI': '103.25.140.0/22',
5619 'KM': '197.255.224.0/20',
53896ca5 5620 'KN': '198.167.192.0/19',
773f291d
S
5621 'KP': '175.45.176.0/22',
5622 'KR': '175.192.0.0/10',
5623 'KW': '37.36.0.0/14',
5624 'KY': '64.96.0.0/15',
5625 'KZ': '2.72.0.0/13',
5626 'LA': '115.84.64.0/18',
5627 'LB': '178.135.0.0/16',
53896ca5 5628 'LC': '24.92.144.0/20',
773f291d
S
5629 'LI': '82.117.0.0/19',
5630 'LK': '112.134.0.0/15',
53896ca5 5631 'LR': '102.183.0.0/16',
773f291d
S
5632 'LS': '129.232.0.0/17',
5633 'LT': '78.56.0.0/13',
5634 'LU': '188.42.0.0/16',
5635 'LV': '46.109.0.0/16',
5636 'LY': '41.252.0.0/14',
5637 'MA': '105.128.0.0/11',
5638 'MC': '88.209.64.0/18',
5639 'MD': '37.246.0.0/16',
5640 'ME': '178.175.0.0/17',
5641 'MF': '74.112.232.0/21',
5642 'MG': '154.126.0.0/17',
5643 'MH': '117.103.88.0/21',
5644 'MK': '77.28.0.0/15',
5645 'ML': '154.118.128.0/18',
5646 'MM': '37.111.0.0/17',
5647 'MN': '49.0.128.0/17',
5648 'MO': '60.246.0.0/16',
5649 'MP': '202.88.64.0/20',
5650 'MQ': '109.203.224.0/19',
5651 'MR': '41.188.64.0/18',
5652 'MS': '208.90.112.0/22',
5653 'MT': '46.11.0.0/16',
5654 'MU': '105.16.0.0/12',
5655 'MV': '27.114.128.0/18',
53896ca5 5656 'MW': '102.70.0.0/15',
773f291d
S
5657 'MX': '187.192.0.0/11',
5658 'MY': '175.136.0.0/13',
5659 'MZ': '197.218.0.0/15',
5660 'NA': '41.182.0.0/16',
5661 'NC': '101.101.0.0/18',
5662 'NE': '197.214.0.0/18',
5663 'NF': '203.17.240.0/22',
5664 'NG': '105.112.0.0/12',
5665 'NI': '186.76.0.0/15',
5666 'NL': '145.96.0.0/11',
5667 'NO': '84.208.0.0/13',
5668 'NP': '36.252.0.0/15',
5669 'NR': '203.98.224.0/19',
5670 'NU': '49.156.48.0/22',
5671 'NZ': '49.224.0.0/14',
5672 'OM': '5.36.0.0/15',
5673 'PA': '186.72.0.0/15',
5674 'PE': '186.160.0.0/14',
5675 'PF': '123.50.64.0/18',
5676 'PG': '124.240.192.0/19',
5677 'PH': '49.144.0.0/13',
5678 'PK': '39.32.0.0/11',
5679 'PL': '83.0.0.0/11',
5680 'PM': '70.36.0.0/20',
5681 'PR': '66.50.0.0/16',
5682 'PS': '188.161.0.0/16',
5683 'PT': '85.240.0.0/13',
5684 'PW': '202.124.224.0/20',
5685 'PY': '181.120.0.0/14',
5686 'QA': '37.210.0.0/15',
53896ca5 5687 'RE': '102.35.0.0/16',
773f291d 5688 'RO': '79.112.0.0/13',
53896ca5 5689 'RS': '93.86.0.0/15',
773f291d 5690 'RU': '5.136.0.0/13',
53896ca5 5691 'RW': '41.186.0.0/16',
773f291d
S
5692 'SA': '188.48.0.0/13',
5693 'SB': '202.1.160.0/19',
5694 'SC': '154.192.0.0/11',
53896ca5 5695 'SD': '102.120.0.0/13',
773f291d 5696 'SE': '78.64.0.0/12',
53896ca5 5697 'SG': '8.128.0.0/10',
773f291d
S
5698 'SI': '188.196.0.0/14',
5699 'SK': '78.98.0.0/15',
53896ca5 5700 'SL': '102.143.0.0/17',
773f291d
S
5701 'SM': '89.186.32.0/19',
5702 'SN': '41.82.0.0/15',
53896ca5 5703 'SO': '154.115.192.0/18',
773f291d
S
5704 'SR': '186.179.128.0/17',
5705 'SS': '105.235.208.0/21',
5706 'ST': '197.159.160.0/19',
5707 'SV': '168.243.0.0/16',
5708 'SX': '190.102.0.0/20',
5709 'SY': '5.0.0.0/16',
5710 'SZ': '41.84.224.0/19',
5711 'TC': '65.255.48.0/20',
5712 'TD': '154.68.128.0/19',
5713 'TG': '196.168.0.0/14',
5714 'TH': '171.96.0.0/13',
5715 'TJ': '85.9.128.0/18',
5716 'TK': '27.96.24.0/21',
5717 'TL': '180.189.160.0/20',
5718 'TM': '95.85.96.0/19',
5719 'TN': '197.0.0.0/11',
5720 'TO': '175.176.144.0/21',
5721 'TR': '78.160.0.0/11',
5722 'TT': '186.44.0.0/15',
5723 'TV': '202.2.96.0/19',
5724 'TW': '120.96.0.0/11',
5725 'TZ': '156.156.0.0/14',
53896ca5
S
5726 'UA': '37.52.0.0/14',
5727 'UG': '102.80.0.0/13',
5728 'US': '6.0.0.0/8',
773f291d 5729 'UY': '167.56.0.0/13',
53896ca5 5730 'UZ': '84.54.64.0/18',
773f291d 5731 'VA': '212.77.0.0/19',
53896ca5 5732 'VC': '207.191.240.0/21',
773f291d 5733 'VE': '186.88.0.0/13',
53896ca5 5734 'VG': '66.81.192.0/20',
773f291d
S
5735 'VI': '146.226.0.0/16',
5736 'VN': '14.160.0.0/11',
5737 'VU': '202.80.32.0/20',
5738 'WF': '117.20.32.0/21',
5739 'WS': '202.4.32.0/19',
5740 'YE': '134.35.0.0/16',
5741 'YT': '41.242.116.0/22',
5742 'ZA': '41.0.0.0/11',
53896ca5
S
5743 'ZM': '102.144.0.0/13',
5744 'ZW': '102.177.192.0/18',
773f291d
S
5745 }
5746
5747 @classmethod
5f95927a
S
5748 def random_ipv4(cls, code_or_block):
5749 if len(code_or_block) == 2:
5750 block = cls._country_ip_map.get(code_or_block.upper())
5751 if not block:
5752 return None
5753 else:
5754 block = code_or_block
773f291d
S
5755 addr, preflen = block.split('/')
5756 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5757 addr_max = addr_min | (0xffffffff >> int(preflen))
18a0defa 5758 return compat_str(socket.inet_ntoa(
4248dad9 5759 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
773f291d
S
5760
5761
91410c9b 5762class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
5763 def __init__(self, proxies=None):
5764 # Set default handlers
5765 for type in ('http', 'https'):
5766 setattr(self, '%s_open' % type,
5767 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5768 meth(r, proxy, type))
38e87f6c 5769 compat_urllib_request.ProxyHandler.__init__(self, proxies)
2461f79d 5770
91410c9b 5771 def proxy_open(self, req, proxy, type):
2461f79d 5772 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
5773 if req_proxy is not None:
5774 proxy = req_proxy
2461f79d
PH
5775 del req.headers['Ytdl-request-proxy']
5776
5777 if proxy == '__noproxy__':
5778 return None # No Proxy
51fb4995 5779 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
71aff188 5780 req.add_header('Ytdl-socks-proxy', proxy)
7a5c1cfe 5781 # yt-dlp's http/https handlers do wrapping the socket with socks
71aff188 5782 return None
91410c9b
PH
5783 return compat_urllib_request.ProxyHandler.proxy_open(
5784 self, req, proxy, type)
5bc880b9
YCH
5785
5786
0a5445dd
YCH
5787# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5788# released into Public Domain
5789# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5790
5791def long_to_bytes(n, blocksize=0):
5792 """long_to_bytes(n:long, blocksize:int) : string
5793 Convert a long integer to a byte string.
5794
5795 If optional blocksize is given and greater than zero, pad the front of the
5796 byte string with binary zeros so that the length is a multiple of
5797 blocksize.
5798 """
5799 # after much testing, this algorithm was deemed to be the fastest
5800 s = b''
5801 n = int(n)
5802 while n > 0:
5803 s = compat_struct_pack('>I', n & 0xffffffff) + s
5804 n = n >> 32
5805 # strip off leading zeros
5806 for i in range(len(s)):
5807 if s[i] != b'\000'[0]:
5808 break
5809 else:
5810 # only happens when n == 0
5811 s = b'\000'
5812 i = 0
5813 s = s[i:]
5814 # add back some pad bytes. this could be done more efficiently w.r.t. the
5815 # de-padding being done above, but sigh...
5816 if blocksize > 0 and len(s) % blocksize:
5817 s = (blocksize - len(s) % blocksize) * b'\000' + s
5818 return s
5819
5820
5821def bytes_to_long(s):
5822 """bytes_to_long(string) : long
5823 Convert a byte string to a long integer.
5824
5825 This is (essentially) the inverse of long_to_bytes().
5826 """
5827 acc = 0
5828 length = len(s)
5829 if length % 4:
5830 extra = (4 - length % 4)
5831 s = b'\000' * extra + s
5832 length = length + extra
5833 for i in range(0, length, 4):
5834 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5835 return acc
5836
5837
5bc880b9
YCH
5838def ohdave_rsa_encrypt(data, exponent, modulus):
5839 '''
5840 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5841
5842 Input:
5843 data: data to encrypt, bytes-like object
5844 exponent, modulus: parameter e and N of RSA algorithm, both integer
5845 Output: hex string of encrypted data
5846
5847 Limitation: supports one block encryption only
5848 '''
5849
5850 payload = int(binascii.hexlify(data[::-1]), 16)
5851 encrypted = pow(payload, exponent, modulus)
5852 return '%x' % encrypted
81bdc8fd
YCH
5853
5854
f48409c7
YCH
5855def pkcs1pad(data, length):
5856 """
5857 Padding input data with PKCS#1 scheme
5858
5859 @param {int[]} data input data
5860 @param {int} length target length
5861 @returns {int[]} padded data
5862 """
5863 if len(data) > length - 11:
5864 raise ValueError('Input data too long for PKCS#1 padding')
5865
5866 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5867 return [0, 2] + pseudo_random + [0] + data
5868
5869
5eb6bdce 5870def encode_base_n(num, n, table=None):
59f898b7 5871 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
59f898b7
YCH
5872 if not table:
5873 table = FULL_TABLE[:n]
5874
5eb6bdce
YCH
5875 if n > len(table):
5876 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5877
5878 if num == 0:
5879 return table[0]
5880
81bdc8fd
YCH
5881 ret = ''
5882 while num:
5883 ret = table[num % n] + ret
5884 num = num // n
5885 return ret
f52354a8
YCH
5886
5887
5888def decode_packed_codes(code):
06b3fe29 5889 mobj = re.search(PACKED_CODES_RE, code)
a0566bbf 5890 obfuscated_code, base, count, symbols = mobj.groups()
f52354a8
YCH
5891 base = int(base)
5892 count = int(count)
5893 symbols = symbols.split('|')
5894 symbol_table = {}
5895
5896 while count:
5897 count -= 1
5eb6bdce 5898 base_n_count = encode_base_n(count, base)
f52354a8
YCH
5899 symbol_table[base_n_count] = symbols[count] or base_n_count
5900
5901 return re.sub(
5902 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
a0566bbf 5903 obfuscated_code)
e154c651 5904
5905
1ced2221
S
5906def caesar(s, alphabet, shift):
5907 if shift == 0:
5908 return s
5909 l = len(alphabet)
5910 return ''.join(
5911 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5912 for c in s)
5913
5914
5915def rot47(s):
5916 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5917
5918
e154c651 5919def parse_m3u8_attributes(attrib):
5920 info = {}
5921 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5922 if val.startswith('"'):
5923 val = val[1:-1]
5924 info[key] = val
5925 return info
1143535d
YCH
5926
5927
5928def urshift(val, n):
5929 return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038
YCH
5930
5931
5932# Based on png2str() written by @gdkchan and improved by @yokrysty
067aa17e 5933# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
d3f8e038
YCH
5934def decode_png(png_data):
5935 # Reference: https://www.w3.org/TR/PNG/
5936 header = png_data[8:]
5937
5938 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5939 raise IOError('Not a valid PNG file.')
5940
5941 int_map = {1: '>B', 2: '>H', 4: '>I'}
5942 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5943
5944 chunks = []
5945
5946 while header:
5947 length = unpack_integer(header[:4])
5948 header = header[4:]
5949
5950 chunk_type = header[:4]
5951 header = header[4:]
5952
5953 chunk_data = header[:length]
5954 header = header[length:]
5955
5956 header = header[4:] # Skip CRC
5957
5958 chunks.append({
5959 'type': chunk_type,
5960 'length': length,
5961 'data': chunk_data
5962 })
5963
5964 ihdr = chunks[0]['data']
5965
5966 width = unpack_integer(ihdr[:4])
5967 height = unpack_integer(ihdr[4:8])
5968
5969 idat = b''
5970
5971 for chunk in chunks:
5972 if chunk['type'] == b'IDAT':
5973 idat += chunk['data']
5974
5975 if not idat:
5976 raise IOError('Unable to read PNG data.')
5977
5978 decompressed_data = bytearray(zlib.decompress(idat))
5979
5980 stride = width * 3
5981 pixels = []
5982
5983 def _get_pixel(idx):
5984 x = idx % stride
5985 y = idx // stride
5986 return pixels[y][x]
5987
5988 for y in range(height):
5989 basePos = y * (1 + stride)
5990 filter_type = decompressed_data[basePos]
5991
5992 current_row = []
5993
5994 pixels.append(current_row)
5995
5996 for x in range(stride):
5997 color = decompressed_data[1 + basePos + x]
5998 basex = y * stride + x
5999 left = 0
6000 up = 0
6001
6002 if x > 2:
6003 left = _get_pixel(basex - 3)
6004 if y > 0:
6005 up = _get_pixel(basex - stride)
6006
6007 if filter_type == 1: # Sub
6008 color = (color + left) & 0xff
6009 elif filter_type == 2: # Up
6010 color = (color + up) & 0xff
6011 elif filter_type == 3: # Average
6012 color = (color + ((left + up) >> 1)) & 0xff
6013 elif filter_type == 4: # Paeth
6014 a = left
6015 b = up
6016 c = 0
6017
6018 if x > 2 and y > 0:
6019 c = _get_pixel(basex - stride - 3)
6020
6021 p = a + b - c
6022
6023 pa = abs(p - a)
6024 pb = abs(p - b)
6025 pc = abs(p - c)
6026
6027 if pa <= pb and pa <= pc:
6028 color = (color + a) & 0xff
6029 elif pb <= pc:
6030 color = (color + b) & 0xff
6031 else:
6032 color = (color + c) & 0xff
6033
6034 current_row.append(color)
6035
6036 return width, height, pixels
efa97bdc
YCH
6037
6038
6039def write_xattr(path, key, value):
6040 # This mess below finds the best xattr tool for the job
6041 try:
6042 # try the pyxattr module...
6043 import xattr
6044
53a7e3d2
YCH
6045 if hasattr(xattr, 'set'): # pyxattr
6046 # Unicode arguments are not supported in python-pyxattr until
6047 # version 0.5.0
067aa17e 6048 # See https://github.com/ytdl-org/youtube-dl/issues/5498
53a7e3d2
YCH
6049 pyxattr_required_version = '0.5.0'
6050 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
6051 # TODO: fallback to CLI tools
6052 raise XAttrUnavailableError(
6053 'python-pyxattr is detected but is too old. '
7a5c1cfe 6054 'yt-dlp requires %s or above while your version is %s. '
53a7e3d2
YCH
6055 'Falling back to other xattr implementations' % (
6056 pyxattr_required_version, xattr.__version__))
6057
6058 setxattr = xattr.set
6059 else: # xattr
6060 setxattr = xattr.setxattr
efa97bdc
YCH
6061
6062 try:
53a7e3d2 6063 setxattr(path, key, value)
efa97bdc
YCH
6064 except EnvironmentError as e:
6065 raise XAttrMetadataError(e.errno, e.strerror)
6066
6067 except ImportError:
6068 if compat_os_name == 'nt':
6069 # Write xattrs to NTFS Alternate Data Streams:
6070 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6071 assert ':' not in key
6072 assert os.path.exists(path)
6073
6074 ads_fn = path + ':' + key
6075 try:
6076 with open(ads_fn, 'wb') as f:
6077 f.write(value)
6078 except EnvironmentError as e:
6079 raise XAttrMetadataError(e.errno, e.strerror)
6080 else:
6081 user_has_setfattr = check_executable('setfattr', ['--version'])
6082 user_has_xattr = check_executable('xattr', ['-h'])
6083
6084 if user_has_setfattr or user_has_xattr:
6085
6086 value = value.decode('utf-8')
6087 if user_has_setfattr:
6088 executable = 'setfattr'
6089 opts = ['-n', key, '-v', value]
6090 elif user_has_xattr:
6091 executable = 'xattr'
6092 opts = ['-w', key, value]
6093
3089bc74
S
6094 cmd = ([encodeFilename(executable, True)]
6095 + [encodeArgument(o) for o in opts]
6096 + [encodeFilename(path, True)])
efa97bdc
YCH
6097
6098 try:
6099 p = subprocess.Popen(
6100 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6101 except EnvironmentError as e:
6102 raise XAttrMetadataError(e.errno, e.strerror)
f5b1bca9 6103 stdout, stderr = process_communicate_or_kill(p)
efa97bdc
YCH
6104 stderr = stderr.decode('utf-8', 'replace')
6105 if p.returncode != 0:
6106 raise XAttrMetadataError(p.returncode, stderr)
6107
6108 else:
6109 # On Unix, and can't find pyxattr, setfattr, or xattr.
6110 if sys.platform.startswith('linux'):
6111 raise XAttrUnavailableError(
6112 "Couldn't find a tool to set the xattrs. "
6113 "Install either the python 'pyxattr' or 'xattr' "
6114 "modules, or the GNU 'attr' package "
6115 "(which contains the 'setfattr' tool).")
6116 else:
6117 raise XAttrUnavailableError(
6118 "Couldn't find a tool to set the xattrs. "
6119 "Install either the python 'xattr' module, "
6120 "or the 'xattr' binary.")
0c265486
YCH
6121
6122
6123def random_birthday(year_field, month_field, day_field):
aa374bc7
AS
6124 start_date = datetime.date(1950, 1, 1)
6125 end_date = datetime.date(1995, 12, 31)
6126 offset = random.randint(0, (end_date - start_date).days)
6127 random_date = start_date + datetime.timedelta(offset)
0c265486 6128 return {
aa374bc7
AS
6129 year_field: str(random_date.year),
6130 month_field: str(random_date.month),
6131 day_field: str(random_date.day),
0c265486 6132 }
732044af 6133
c76eb41b 6134
732044af 6135# Templates for internet shortcut files, which are plain text files.
6136DOT_URL_LINK_TEMPLATE = '''
6137[InternetShortcut]
6138URL=%(url)s
6139'''.lstrip()
6140
6141DOT_WEBLOC_LINK_TEMPLATE = '''
6142<?xml version="1.0" encoding="UTF-8"?>
6143<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6144<plist version="1.0">
6145<dict>
6146\t<key>URL</key>
6147\t<string>%(url)s</string>
6148</dict>
6149</plist>
6150'''.lstrip()
6151
6152DOT_DESKTOP_LINK_TEMPLATE = '''
6153[Desktop Entry]
6154Encoding=UTF-8
6155Name=%(filename)s
6156Type=Link
6157URL=%(url)s
6158Icon=text-html
6159'''.lstrip()
6160
6161
6162def iri_to_uri(iri):
6163 """
6164 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6165
6166 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6167 """
6168
6169 iri_parts = compat_urllib_parse_urlparse(iri)
6170
6171 if '[' in iri_parts.netloc:
6172 raise ValueError('IPv6 URIs are not, yet, supported.')
6173 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6174
6175 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6176
6177 net_location = ''
6178 if iri_parts.username:
6179 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6180 if iri_parts.password is not None:
6181 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6182 net_location += '@'
6183
6184 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
6185 # The 'idna' encoding produces ASCII text.
6186 if iri_parts.port is not None and iri_parts.port != 80:
6187 net_location += ':' + str(iri_parts.port)
6188
6189 return compat_urllib_parse_urlunparse(
6190 (iri_parts.scheme,
6191 net_location,
6192
6193 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6194
6195 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6196 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6197
6198 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6199 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6200
6201 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6202
6203 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6204
6205
6206def to_high_limit_path(path):
6207 if sys.platform in ['win32', 'cygwin']:
6208 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6209 return r'\\?\ '.rstrip() + os.path.abspath(path)
6210
6211 return path
76d321f6 6212
c76eb41b 6213
b868936c 6214def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
6215 if field is None:
6216 val = obj if obj is not None else default
6217 else:
6218 val = obj.get(field, default)
76d321f6 6219 if func and val not in ignore:
6220 val = func(val)
6221 return template % val if val not in ignore else default
00dd0cd5 6222
6223
6224def clean_podcast_url(url):
6225 return re.sub(r'''(?x)
6226 (?:
6227 (?:
6228 chtbl\.com/track|
6229 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6230 play\.podtrac\.com
6231 )/[^/]+|
6232 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6233 flex\.acast\.com|
6234 pd(?:
6235 cn\.co| # https://podcorn.com/analytics-prefix/
6236 st\.fm # https://podsights.com/docs/
6237 )/e
6238 )/''', '', url)
ffcb8191
THD
6239
6240
6241_HEX_TABLE = '0123456789abcdef'
6242
6243
6244def random_uuidv4():
6245 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
0202b52a 6246
6247
6248def make_dir(path, to_screen=None):
6249 try:
6250 dn = os.path.dirname(path)
6251 if dn and not os.path.exists(dn):
6252 os.makedirs(dn)
6253 return True
6254 except (OSError, IOError) as err:
6255 if callable(to_screen) is not None:
6256 to_screen('unable to create directory ' + error_to_compat_str(err))
6257 return False
f74980cb 6258
6259
6260def get_executable_path():
c552ae88 6261 from zipimport import zipimporter
6262 if hasattr(sys, 'frozen'): # Running from PyInstaller
6263 path = os.path.dirname(sys.executable)
6264 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
6265 path = os.path.join(os.path.dirname(__file__), '../..')
6266 else:
6267 path = os.path.join(os.path.dirname(__file__), '..')
f74980cb 6268 return os.path.abspath(path)
6269
6270
2f567473 6271def load_plugins(name, suffix, namespace):
f74980cb 6272 plugin_info = [None]
6273 classes = []
6274 try:
6275 plugin_info = imp.find_module(
6276 name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
6277 plugins = imp.load_module(name, *plugin_info)
6278 for name in dir(plugins):
2f567473 6279 if name in namespace:
6280 continue
6281 if not name.endswith(suffix):
f74980cb 6282 continue
6283 klass = getattr(plugins, name)
6284 classes.append(klass)
6285 namespace[name] = klass
6286 except ImportError:
6287 pass
6288 finally:
6289 if plugin_info[0] is not None:
6290 plugin_info[0].close()
6291 return classes
06167fbb 6292
6293
325ebc17 6294def traverse_obj(
352d63fd 6295 obj, *path_list, default=None, expected_type=None, get_all=True,
325ebc17 6296 casesense=True, is_user_input=False, traverse_string=False):
324ad820 6297 ''' Traverse nested list/dict/tuple
8f334380 6298 @param path_list A list of paths which are checked one by one.
6299 Each path is a list of keys where each key is a string,
6300 a tuple of strings or "...". When a tuple is given,
6301 all the keys given in the tuple are traversed, and
6302 "..." traverses all the keys in the object
325ebc17 6303 @param default Default value to return
352d63fd 6304 @param expected_type Only accept final value of this type (Can also be any callable)
6305 @param get_all Return all the values obtained from a path or only the first one
324ad820 6306 @param casesense Whether to consider dictionary keys as case sensitive
6307 @param is_user_input Whether the keys are generated from user input. If True,
6308 strings are converted to int/slice if necessary
6309 @param traverse_string Whether to traverse inside strings. If True, any
6310 non-compatible object will also be converted into a string
8f334380 6311 # TODO: Write tests
324ad820 6312 '''
325ebc17 6313 if not casesense:
dbf5416a 6314 _lower = lambda k: (k.lower() if isinstance(k, str) else k)
8f334380 6315 path_list = (map(_lower, variadic(path)) for path in path_list)
6316
6317 def _traverse_obj(obj, path, _current_depth=0):
6318 nonlocal depth
575e17a1 6319 if obj is None:
6320 return None
8f334380 6321 path = tuple(variadic(path))
6322 for i, key in enumerate(path):
6323 if isinstance(key, (list, tuple)):
6324 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
6325 key = ...
6326 if key is ...:
6327 obj = (obj.values() if isinstance(obj, dict)
6328 else obj if isinstance(obj, (list, tuple, LazyList))
6329 else str(obj) if traverse_string else [])
6330 _current_depth += 1
6331 depth = max(depth, _current_depth)
6332 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
575e17a1 6333 elif isinstance(obj, dict) and not (is_user_input and key == ':'):
325ebc17 6334 obj = (obj.get(key) if casesense or (key in obj)
6335 else next((v for k, v in obj.items() if _lower(k) == key), None))
6336 else:
6337 if is_user_input:
6338 key = (int_or_none(key) if ':' not in key
6339 else slice(*map(int_or_none, key.split(':'))))
8f334380 6340 if key == slice(None):
575e17a1 6341 return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
325ebc17 6342 if not isinstance(key, (int, slice)):
9fea350f 6343 return None
8f334380 6344 if not isinstance(obj, (list, tuple, LazyList)):
325ebc17 6345 if not traverse_string:
6346 return None
6347 obj = str(obj)
6348 try:
6349 obj = obj[key]
6350 except IndexError:
324ad820 6351 return None
325ebc17 6352 return obj
6353
352d63fd 6354 if isinstance(expected_type, type):
6355 type_test = lambda val: val if isinstance(val, expected_type) else None
6356 elif expected_type is not None:
6357 type_test = expected_type
6358 else:
6359 type_test = lambda val: val
6360
8f334380 6361 for path in path_list:
6362 depth = 0
6363 val = _traverse_obj(obj, path)
325ebc17 6364 if val is not None:
8f334380 6365 if depth:
6366 for _ in range(depth - 1):
6586bca9 6367 val = itertools.chain.from_iterable(v for v in val if v is not None)
352d63fd 6368 val = [v for v in map(type_test, val) if v is not None]
8f334380 6369 if val:
352d63fd 6370 return val if get_all else val[0]
6371 else:
6372 val = type_test(val)
6373 if val is not None:
8f334380 6374 return val
325ebc17 6375 return default
324ad820 6376
6377
6378def traverse_dict(dictn, keys, casesense=True):
6379 ''' For backward compatibility. Do not use '''
6380 return traverse_obj(dictn, keys, casesense=casesense,
6381 is_user_input=True, traverse_string=True)
6606817a 6382
6383
c634ad2a 6384def variadic(x, allowed_types=(str, bytes)):
cb89cfc1 6385 return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
bd50a52b
THD
6386
6387
49fa4d9a
N
6388# create a JSON Web Signature (jws) with HS256 algorithm
6389# the resulting format is in JWS Compact Serialization
6390# implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
6391# implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
6392def jwt_encode_hs256(payload_data, key, headers={}):
6393 header_data = {
6394 'alg': 'HS256',
6395 'typ': 'JWT',
6396 }
6397 if headers:
6398 header_data.update(headers)
6399 header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8'))
6400 payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8'))
6401 h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256)
6402 signature_b64 = base64.b64encode(h.digest())
6403 token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
6404 return token