]> jfr.im git - yt-dlp.git/blame - yt_dlp/utils.py
[utils] Improve `extract_timezone`
[yt-dlp.git] / yt_dlp / utils.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
d77c3dfd 3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
c380cc28 10import collections
62e609ab 11import contextlib
e3946f98 12import ctypes
c496ca96
PH
13import datetime
14import email.utils
0c265486 15import email.header
f45c185f 16import errno
be4a824d 17import functools
d77c3dfd 18import gzip
f74980cb 19import imp
03f9daab 20import io
79a2e94e 21import itertools
f4bfd65f 22import json
d77c3dfd 23import locale
02dbf93f 24import math
347de493 25import operator
d77c3dfd 26import os
c496ca96 27import platform
773f291d 28import random
d77c3dfd 29import re
c496ca96 30import socket
79a2e94e 31import ssl
1c088fa8 32import subprocess
d77c3dfd 33import sys
181c8655 34import tempfile
c380cc28 35import time
01951dda 36import traceback
bcf89ce6 37import xml.etree.ElementTree
d77c3dfd 38import zlib
d77c3dfd 39
8c25f81b 40from .compat import (
b4a3d461 41 compat_HTMLParseError,
8bb56eee 42 compat_HTMLParser,
201c1459 43 compat_HTTPError,
8f9312c3 44 compat_basestring,
8c25f81b 45 compat_chr,
1bab3437 46 compat_cookiejar,
d7cd9a9e 47 compat_ctypes_WINFUNCTYPE,
36e6f62c 48 compat_etree_fromstring,
51098426 49 compat_expanduser,
8c25f81b 50 compat_html_entities,
55b2f099 51 compat_html_entities_html5,
be4a824d 52 compat_http_client,
42db58ec 53 compat_integer_types,
e29663c6 54 compat_numeric_types,
c86b6142 55 compat_kwargs,
efa97bdc 56 compat_os_name,
8c25f81b 57 compat_parse_qs,
702ccf2d 58 compat_shlex_quote,
8c25f81b 59 compat_str,
edaa23f8 60 compat_struct_pack,
d3f8e038 61 compat_struct_unpack,
8c25f81b
PH
62 compat_urllib_error,
63 compat_urllib_parse,
15707c7e 64 compat_urllib_parse_urlencode,
8c25f81b 65 compat_urllib_parse_urlparse,
732044af 66 compat_urllib_parse_urlunparse,
67 compat_urllib_parse_quote,
68 compat_urllib_parse_quote_plus,
7581bfc9 69 compat_urllib_parse_unquote_plus,
8c25f81b
PH
70 compat_urllib_request,
71 compat_urlparse,
810c10ba 72 compat_xpath,
8c25f81b 73)
4644ac55 74
71aff188
YCH
75from .socks import (
76 ProxyType,
77 sockssocket,
78)
79
4644ac55 80
51fb4995
YCH
81def register_socks_protocols():
82 # "Register" SOCKS protocols
d5ae6bb5
YCH
83 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
84 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
51fb4995
YCH
85 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
86 if scheme not in compat_urlparse.uses_netloc:
87 compat_urlparse.uses_netloc.append(scheme)
88
89
468e2e92
FV
90# This is not clearly defined otherwise
91compiled_regex_type = type(re.compile(''))
92
f7a147e3
S
93
94def random_user_agent():
95 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
96 _CHROME_VERSIONS = (
97 '74.0.3729.129',
98 '76.0.3780.3',
99 '76.0.3780.2',
100 '74.0.3729.128',
101 '76.0.3780.1',
102 '76.0.3780.0',
103 '75.0.3770.15',
104 '74.0.3729.127',
105 '74.0.3729.126',
106 '76.0.3779.1',
107 '76.0.3779.0',
108 '75.0.3770.14',
109 '74.0.3729.125',
110 '76.0.3778.1',
111 '76.0.3778.0',
112 '75.0.3770.13',
113 '74.0.3729.124',
114 '74.0.3729.123',
115 '73.0.3683.121',
116 '76.0.3777.1',
117 '76.0.3777.0',
118 '75.0.3770.12',
119 '74.0.3729.122',
120 '76.0.3776.4',
121 '75.0.3770.11',
122 '74.0.3729.121',
123 '76.0.3776.3',
124 '76.0.3776.2',
125 '73.0.3683.120',
126 '74.0.3729.120',
127 '74.0.3729.119',
128 '74.0.3729.118',
129 '76.0.3776.1',
130 '76.0.3776.0',
131 '76.0.3775.5',
132 '75.0.3770.10',
133 '74.0.3729.117',
134 '76.0.3775.4',
135 '76.0.3775.3',
136 '74.0.3729.116',
137 '75.0.3770.9',
138 '76.0.3775.2',
139 '76.0.3775.1',
140 '76.0.3775.0',
141 '75.0.3770.8',
142 '74.0.3729.115',
143 '74.0.3729.114',
144 '76.0.3774.1',
145 '76.0.3774.0',
146 '75.0.3770.7',
147 '74.0.3729.113',
148 '74.0.3729.112',
149 '74.0.3729.111',
150 '76.0.3773.1',
151 '76.0.3773.0',
152 '75.0.3770.6',
153 '74.0.3729.110',
154 '74.0.3729.109',
155 '76.0.3772.1',
156 '76.0.3772.0',
157 '75.0.3770.5',
158 '74.0.3729.108',
159 '74.0.3729.107',
160 '76.0.3771.1',
161 '76.0.3771.0',
162 '75.0.3770.4',
163 '74.0.3729.106',
164 '74.0.3729.105',
165 '75.0.3770.3',
166 '74.0.3729.104',
167 '74.0.3729.103',
168 '74.0.3729.102',
169 '75.0.3770.2',
170 '74.0.3729.101',
171 '75.0.3770.1',
172 '75.0.3770.0',
173 '74.0.3729.100',
174 '75.0.3769.5',
175 '75.0.3769.4',
176 '74.0.3729.99',
177 '75.0.3769.3',
178 '75.0.3769.2',
179 '75.0.3768.6',
180 '74.0.3729.98',
181 '75.0.3769.1',
182 '75.0.3769.0',
183 '74.0.3729.97',
184 '73.0.3683.119',
185 '73.0.3683.118',
186 '74.0.3729.96',
187 '75.0.3768.5',
188 '75.0.3768.4',
189 '75.0.3768.3',
190 '75.0.3768.2',
191 '74.0.3729.95',
192 '74.0.3729.94',
193 '75.0.3768.1',
194 '75.0.3768.0',
195 '74.0.3729.93',
196 '74.0.3729.92',
197 '73.0.3683.117',
198 '74.0.3729.91',
199 '75.0.3766.3',
200 '74.0.3729.90',
201 '75.0.3767.2',
202 '75.0.3767.1',
203 '75.0.3767.0',
204 '74.0.3729.89',
205 '73.0.3683.116',
206 '75.0.3766.2',
207 '74.0.3729.88',
208 '75.0.3766.1',
209 '75.0.3766.0',
210 '74.0.3729.87',
211 '73.0.3683.115',
212 '74.0.3729.86',
213 '75.0.3765.1',
214 '75.0.3765.0',
215 '74.0.3729.85',
216 '73.0.3683.114',
217 '74.0.3729.84',
218 '75.0.3764.1',
219 '75.0.3764.0',
220 '74.0.3729.83',
221 '73.0.3683.113',
222 '75.0.3763.2',
223 '75.0.3761.4',
224 '74.0.3729.82',
225 '75.0.3763.1',
226 '75.0.3763.0',
227 '74.0.3729.81',
228 '73.0.3683.112',
229 '75.0.3762.1',
230 '75.0.3762.0',
231 '74.0.3729.80',
232 '75.0.3761.3',
233 '74.0.3729.79',
234 '73.0.3683.111',
235 '75.0.3761.2',
236 '74.0.3729.78',
237 '74.0.3729.77',
238 '75.0.3761.1',
239 '75.0.3761.0',
240 '73.0.3683.110',
241 '74.0.3729.76',
242 '74.0.3729.75',
243 '75.0.3760.0',
244 '74.0.3729.74',
245 '75.0.3759.8',
246 '75.0.3759.7',
247 '75.0.3759.6',
248 '74.0.3729.73',
249 '75.0.3759.5',
250 '74.0.3729.72',
251 '73.0.3683.109',
252 '75.0.3759.4',
253 '75.0.3759.3',
254 '74.0.3729.71',
255 '75.0.3759.2',
256 '74.0.3729.70',
257 '73.0.3683.108',
258 '74.0.3729.69',
259 '75.0.3759.1',
260 '75.0.3759.0',
261 '74.0.3729.68',
262 '73.0.3683.107',
263 '74.0.3729.67',
264 '75.0.3758.1',
265 '75.0.3758.0',
266 '74.0.3729.66',
267 '73.0.3683.106',
268 '74.0.3729.65',
269 '75.0.3757.1',
270 '75.0.3757.0',
271 '74.0.3729.64',
272 '73.0.3683.105',
273 '74.0.3729.63',
274 '75.0.3756.1',
275 '75.0.3756.0',
276 '74.0.3729.62',
277 '73.0.3683.104',
278 '75.0.3755.3',
279 '75.0.3755.2',
280 '73.0.3683.103',
281 '75.0.3755.1',
282 '75.0.3755.0',
283 '74.0.3729.61',
284 '73.0.3683.102',
285 '74.0.3729.60',
286 '75.0.3754.2',
287 '74.0.3729.59',
288 '75.0.3753.4',
289 '74.0.3729.58',
290 '75.0.3754.1',
291 '75.0.3754.0',
292 '74.0.3729.57',
293 '73.0.3683.101',
294 '75.0.3753.3',
295 '75.0.3752.2',
296 '75.0.3753.2',
297 '74.0.3729.56',
298 '75.0.3753.1',
299 '75.0.3753.0',
300 '74.0.3729.55',
301 '73.0.3683.100',
302 '74.0.3729.54',
303 '75.0.3752.1',
304 '75.0.3752.0',
305 '74.0.3729.53',
306 '73.0.3683.99',
307 '74.0.3729.52',
308 '75.0.3751.1',
309 '75.0.3751.0',
310 '74.0.3729.51',
311 '73.0.3683.98',
312 '74.0.3729.50',
313 '75.0.3750.0',
314 '74.0.3729.49',
315 '74.0.3729.48',
316 '74.0.3729.47',
317 '75.0.3749.3',
318 '74.0.3729.46',
319 '73.0.3683.97',
320 '75.0.3749.2',
321 '74.0.3729.45',
322 '75.0.3749.1',
323 '75.0.3749.0',
324 '74.0.3729.44',
325 '73.0.3683.96',
326 '74.0.3729.43',
327 '74.0.3729.42',
328 '75.0.3748.1',
329 '75.0.3748.0',
330 '74.0.3729.41',
331 '75.0.3747.1',
332 '73.0.3683.95',
333 '75.0.3746.4',
334 '74.0.3729.40',
335 '74.0.3729.39',
336 '75.0.3747.0',
337 '75.0.3746.3',
338 '75.0.3746.2',
339 '74.0.3729.38',
340 '75.0.3746.1',
341 '75.0.3746.0',
342 '74.0.3729.37',
343 '73.0.3683.94',
344 '75.0.3745.5',
345 '75.0.3745.4',
346 '75.0.3745.3',
347 '75.0.3745.2',
348 '74.0.3729.36',
349 '75.0.3745.1',
350 '75.0.3745.0',
351 '75.0.3744.2',
352 '74.0.3729.35',
353 '73.0.3683.93',
354 '74.0.3729.34',
355 '75.0.3744.1',
356 '75.0.3744.0',
357 '74.0.3729.33',
358 '73.0.3683.92',
359 '74.0.3729.32',
360 '74.0.3729.31',
361 '73.0.3683.91',
362 '75.0.3741.2',
363 '75.0.3740.5',
364 '74.0.3729.30',
365 '75.0.3741.1',
366 '75.0.3741.0',
367 '74.0.3729.29',
368 '75.0.3740.4',
369 '73.0.3683.90',
370 '74.0.3729.28',
371 '75.0.3740.3',
372 '73.0.3683.89',
373 '75.0.3740.2',
374 '74.0.3729.27',
375 '75.0.3740.1',
376 '75.0.3740.0',
377 '74.0.3729.26',
378 '73.0.3683.88',
379 '73.0.3683.87',
380 '74.0.3729.25',
381 '75.0.3739.1',
382 '75.0.3739.0',
383 '73.0.3683.86',
384 '74.0.3729.24',
385 '73.0.3683.85',
386 '75.0.3738.4',
387 '75.0.3738.3',
388 '75.0.3738.2',
389 '75.0.3738.1',
390 '75.0.3738.0',
391 '74.0.3729.23',
392 '73.0.3683.84',
393 '74.0.3729.22',
394 '74.0.3729.21',
395 '75.0.3737.1',
396 '75.0.3737.0',
397 '74.0.3729.20',
398 '73.0.3683.83',
399 '74.0.3729.19',
400 '75.0.3736.1',
401 '75.0.3736.0',
402 '74.0.3729.18',
403 '73.0.3683.82',
404 '74.0.3729.17',
405 '75.0.3735.1',
406 '75.0.3735.0',
407 '74.0.3729.16',
408 '73.0.3683.81',
409 '75.0.3734.1',
410 '75.0.3734.0',
411 '74.0.3729.15',
412 '73.0.3683.80',
413 '74.0.3729.14',
414 '75.0.3733.1',
415 '75.0.3733.0',
416 '75.0.3732.1',
417 '74.0.3729.13',
418 '74.0.3729.12',
419 '73.0.3683.79',
420 '74.0.3729.11',
421 '75.0.3732.0',
422 '74.0.3729.10',
423 '73.0.3683.78',
424 '74.0.3729.9',
425 '74.0.3729.8',
426 '74.0.3729.7',
427 '75.0.3731.3',
428 '75.0.3731.2',
429 '75.0.3731.0',
430 '74.0.3729.6',
431 '73.0.3683.77',
432 '73.0.3683.76',
433 '75.0.3730.5',
434 '75.0.3730.4',
435 '73.0.3683.75',
436 '74.0.3729.5',
437 '73.0.3683.74',
438 '75.0.3730.3',
439 '75.0.3730.2',
440 '74.0.3729.4',
441 '73.0.3683.73',
442 '73.0.3683.72',
443 '75.0.3730.1',
444 '75.0.3730.0',
445 '74.0.3729.3',
446 '73.0.3683.71',
447 '74.0.3729.2',
448 '73.0.3683.70',
449 '74.0.3729.1',
450 '74.0.3729.0',
451 '74.0.3726.4',
452 '73.0.3683.69',
453 '74.0.3726.3',
454 '74.0.3728.0',
455 '74.0.3726.2',
456 '73.0.3683.68',
457 '74.0.3726.1',
458 '74.0.3726.0',
459 '74.0.3725.4',
460 '73.0.3683.67',
461 '73.0.3683.66',
462 '74.0.3725.3',
463 '74.0.3725.2',
464 '74.0.3725.1',
465 '74.0.3724.8',
466 '74.0.3725.0',
467 '73.0.3683.65',
468 '74.0.3724.7',
469 '74.0.3724.6',
470 '74.0.3724.5',
471 '74.0.3724.4',
472 '74.0.3724.3',
473 '74.0.3724.2',
474 '74.0.3724.1',
475 '74.0.3724.0',
476 '73.0.3683.64',
477 '74.0.3723.1',
478 '74.0.3723.0',
479 '73.0.3683.63',
480 '74.0.3722.1',
481 '74.0.3722.0',
482 '73.0.3683.62',
483 '74.0.3718.9',
484 '74.0.3702.3',
485 '74.0.3721.3',
486 '74.0.3721.2',
487 '74.0.3721.1',
488 '74.0.3721.0',
489 '74.0.3720.6',
490 '73.0.3683.61',
491 '72.0.3626.122',
492 '73.0.3683.60',
493 '74.0.3720.5',
494 '72.0.3626.121',
495 '74.0.3718.8',
496 '74.0.3720.4',
497 '74.0.3720.3',
498 '74.0.3718.7',
499 '74.0.3720.2',
500 '74.0.3720.1',
501 '74.0.3720.0',
502 '74.0.3718.6',
503 '74.0.3719.5',
504 '73.0.3683.59',
505 '74.0.3718.5',
506 '74.0.3718.4',
507 '74.0.3719.4',
508 '74.0.3719.3',
509 '74.0.3719.2',
510 '74.0.3719.1',
511 '73.0.3683.58',
512 '74.0.3719.0',
513 '73.0.3683.57',
514 '73.0.3683.56',
515 '74.0.3718.3',
516 '73.0.3683.55',
517 '74.0.3718.2',
518 '74.0.3718.1',
519 '74.0.3718.0',
520 '73.0.3683.54',
521 '74.0.3717.2',
522 '73.0.3683.53',
523 '74.0.3717.1',
524 '74.0.3717.0',
525 '73.0.3683.52',
526 '74.0.3716.1',
527 '74.0.3716.0',
528 '73.0.3683.51',
529 '74.0.3715.1',
530 '74.0.3715.0',
531 '73.0.3683.50',
532 '74.0.3711.2',
533 '74.0.3714.2',
534 '74.0.3713.3',
535 '74.0.3714.1',
536 '74.0.3714.0',
537 '73.0.3683.49',
538 '74.0.3713.1',
539 '74.0.3713.0',
540 '72.0.3626.120',
541 '73.0.3683.48',
542 '74.0.3712.2',
543 '74.0.3712.1',
544 '74.0.3712.0',
545 '73.0.3683.47',
546 '72.0.3626.119',
547 '73.0.3683.46',
548 '74.0.3710.2',
549 '72.0.3626.118',
550 '74.0.3711.1',
551 '74.0.3711.0',
552 '73.0.3683.45',
553 '72.0.3626.117',
554 '74.0.3710.1',
555 '74.0.3710.0',
556 '73.0.3683.44',
557 '72.0.3626.116',
558 '74.0.3709.1',
559 '74.0.3709.0',
560 '74.0.3704.9',
561 '73.0.3683.43',
562 '72.0.3626.115',
563 '74.0.3704.8',
564 '74.0.3704.7',
565 '74.0.3708.0',
566 '74.0.3706.7',
567 '74.0.3704.6',
568 '73.0.3683.42',
569 '72.0.3626.114',
570 '74.0.3706.6',
571 '72.0.3626.113',
572 '74.0.3704.5',
573 '74.0.3706.5',
574 '74.0.3706.4',
575 '74.0.3706.3',
576 '74.0.3706.2',
577 '74.0.3706.1',
578 '74.0.3706.0',
579 '73.0.3683.41',
580 '72.0.3626.112',
581 '74.0.3705.1',
582 '74.0.3705.0',
583 '73.0.3683.40',
584 '72.0.3626.111',
585 '73.0.3683.39',
586 '74.0.3704.4',
587 '73.0.3683.38',
588 '74.0.3704.3',
589 '74.0.3704.2',
590 '74.0.3704.1',
591 '74.0.3704.0',
592 '73.0.3683.37',
593 '72.0.3626.110',
594 '72.0.3626.109',
595 '74.0.3703.3',
596 '74.0.3703.2',
597 '73.0.3683.36',
598 '74.0.3703.1',
599 '74.0.3703.0',
600 '73.0.3683.35',
601 '72.0.3626.108',
602 '74.0.3702.2',
603 '74.0.3699.3',
604 '74.0.3702.1',
605 '74.0.3702.0',
606 '73.0.3683.34',
607 '72.0.3626.107',
608 '73.0.3683.33',
609 '74.0.3701.1',
610 '74.0.3701.0',
611 '73.0.3683.32',
612 '73.0.3683.31',
613 '72.0.3626.105',
614 '74.0.3700.1',
615 '74.0.3700.0',
616 '73.0.3683.29',
617 '72.0.3626.103',
618 '74.0.3699.2',
619 '74.0.3699.1',
620 '74.0.3699.0',
621 '73.0.3683.28',
622 '72.0.3626.102',
623 '73.0.3683.27',
624 '73.0.3683.26',
625 '74.0.3698.0',
626 '74.0.3696.2',
627 '72.0.3626.101',
628 '73.0.3683.25',
629 '74.0.3696.1',
630 '74.0.3696.0',
631 '74.0.3694.8',
632 '72.0.3626.100',
633 '74.0.3694.7',
634 '74.0.3694.6',
635 '74.0.3694.5',
636 '74.0.3694.4',
637 '72.0.3626.99',
638 '72.0.3626.98',
639 '74.0.3694.3',
640 '73.0.3683.24',
641 '72.0.3626.97',
642 '72.0.3626.96',
643 '72.0.3626.95',
644 '73.0.3683.23',
645 '72.0.3626.94',
646 '73.0.3683.22',
647 '73.0.3683.21',
648 '72.0.3626.93',
649 '74.0.3694.2',
650 '72.0.3626.92',
651 '74.0.3694.1',
652 '74.0.3694.0',
653 '74.0.3693.6',
654 '73.0.3683.20',
655 '72.0.3626.91',
656 '74.0.3693.5',
657 '74.0.3693.4',
658 '74.0.3693.3',
659 '74.0.3693.2',
660 '73.0.3683.19',
661 '74.0.3693.1',
662 '74.0.3693.0',
663 '73.0.3683.18',
664 '72.0.3626.90',
665 '74.0.3692.1',
666 '74.0.3692.0',
667 '73.0.3683.17',
668 '72.0.3626.89',
669 '74.0.3687.3',
670 '74.0.3691.1',
671 '74.0.3691.0',
672 '73.0.3683.16',
673 '72.0.3626.88',
674 '72.0.3626.87',
675 '73.0.3683.15',
676 '74.0.3690.1',
677 '74.0.3690.0',
678 '73.0.3683.14',
679 '72.0.3626.86',
680 '73.0.3683.13',
681 '73.0.3683.12',
682 '74.0.3689.1',
683 '74.0.3689.0',
684 '73.0.3683.11',
685 '72.0.3626.85',
686 '73.0.3683.10',
687 '72.0.3626.84',
688 '73.0.3683.9',
689 '74.0.3688.1',
690 '74.0.3688.0',
691 '73.0.3683.8',
692 '72.0.3626.83',
693 '74.0.3687.2',
694 '74.0.3687.1',
695 '74.0.3687.0',
696 '73.0.3683.7',
697 '72.0.3626.82',
698 '74.0.3686.4',
699 '72.0.3626.81',
700 '74.0.3686.3',
701 '74.0.3686.2',
702 '74.0.3686.1',
703 '74.0.3686.0',
704 '73.0.3683.6',
705 '72.0.3626.80',
706 '74.0.3685.1',
707 '74.0.3685.0',
708 '73.0.3683.5',
709 '72.0.3626.79',
710 '74.0.3684.1',
711 '74.0.3684.0',
712 '73.0.3683.4',
713 '72.0.3626.78',
714 '72.0.3626.77',
715 '73.0.3683.3',
716 '73.0.3683.2',
717 '72.0.3626.76',
718 '73.0.3683.1',
719 '73.0.3683.0',
720 '72.0.3626.75',
721 '71.0.3578.141',
722 '73.0.3682.1',
723 '73.0.3682.0',
724 '72.0.3626.74',
725 '71.0.3578.140',
726 '73.0.3681.4',
727 '73.0.3681.3',
728 '73.0.3681.2',
729 '73.0.3681.1',
730 '73.0.3681.0',
731 '72.0.3626.73',
732 '71.0.3578.139',
733 '72.0.3626.72',
734 '72.0.3626.71',
735 '73.0.3680.1',
736 '73.0.3680.0',
737 '72.0.3626.70',
738 '71.0.3578.138',
739 '73.0.3678.2',
740 '73.0.3679.1',
741 '73.0.3679.0',
742 '72.0.3626.69',
743 '71.0.3578.137',
744 '73.0.3678.1',
745 '73.0.3678.0',
746 '71.0.3578.136',
747 '73.0.3677.1',
748 '73.0.3677.0',
749 '72.0.3626.68',
750 '72.0.3626.67',
751 '71.0.3578.135',
752 '73.0.3676.1',
753 '73.0.3676.0',
754 '73.0.3674.2',
755 '72.0.3626.66',
756 '71.0.3578.134',
757 '73.0.3674.1',
758 '73.0.3674.0',
759 '72.0.3626.65',
760 '71.0.3578.133',
761 '73.0.3673.2',
762 '73.0.3673.1',
763 '73.0.3673.0',
764 '72.0.3626.64',
765 '71.0.3578.132',
766 '72.0.3626.63',
767 '72.0.3626.62',
768 '72.0.3626.61',
769 '72.0.3626.60',
770 '73.0.3672.1',
771 '73.0.3672.0',
772 '72.0.3626.59',
773 '71.0.3578.131',
774 '73.0.3671.3',
775 '73.0.3671.2',
776 '73.0.3671.1',
777 '73.0.3671.0',
778 '72.0.3626.58',
779 '71.0.3578.130',
780 '73.0.3670.1',
781 '73.0.3670.0',
782 '72.0.3626.57',
783 '71.0.3578.129',
784 '73.0.3669.1',
785 '73.0.3669.0',
786 '72.0.3626.56',
787 '71.0.3578.128',
788 '73.0.3668.2',
789 '73.0.3668.1',
790 '73.0.3668.0',
791 '72.0.3626.55',
792 '71.0.3578.127',
793 '73.0.3667.2',
794 '73.0.3667.1',
795 '73.0.3667.0',
796 '72.0.3626.54',
797 '71.0.3578.126',
798 '73.0.3666.1',
799 '73.0.3666.0',
800 '72.0.3626.53',
801 '71.0.3578.125',
802 '73.0.3665.4',
803 '73.0.3665.3',
804 '72.0.3626.52',
805 '73.0.3665.2',
806 '73.0.3664.4',
807 '73.0.3665.1',
808 '73.0.3665.0',
809 '72.0.3626.51',
810 '71.0.3578.124',
811 '72.0.3626.50',
812 '73.0.3664.3',
813 '73.0.3664.2',
814 '73.0.3664.1',
815 '73.0.3664.0',
816 '73.0.3663.2',
817 '72.0.3626.49',
818 '71.0.3578.123',
819 '73.0.3663.1',
820 '73.0.3663.0',
821 '72.0.3626.48',
822 '71.0.3578.122',
823 '73.0.3662.1',
824 '73.0.3662.0',
825 '72.0.3626.47',
826 '71.0.3578.121',
827 '73.0.3661.1',
828 '72.0.3626.46',
829 '73.0.3661.0',
830 '72.0.3626.45',
831 '71.0.3578.120',
832 '73.0.3660.2',
833 '73.0.3660.1',
834 '73.0.3660.0',
835 '72.0.3626.44',
836 '71.0.3578.119',
837 '73.0.3659.1',
838 '73.0.3659.0',
839 '72.0.3626.43',
840 '71.0.3578.118',
841 '73.0.3658.1',
842 '73.0.3658.0',
843 '72.0.3626.42',
844 '71.0.3578.117',
845 '73.0.3657.1',
846 '73.0.3657.0',
847 '72.0.3626.41',
848 '71.0.3578.116',
849 '73.0.3656.1',
850 '73.0.3656.0',
851 '72.0.3626.40',
852 '71.0.3578.115',
853 '73.0.3655.1',
854 '73.0.3655.0',
855 '72.0.3626.39',
856 '71.0.3578.114',
857 '73.0.3654.1',
858 '73.0.3654.0',
859 '72.0.3626.38',
860 '71.0.3578.113',
861 '73.0.3653.1',
862 '73.0.3653.0',
863 '72.0.3626.37',
864 '71.0.3578.112',
865 '73.0.3652.1',
866 '73.0.3652.0',
867 '72.0.3626.36',
868 '71.0.3578.111',
869 '73.0.3651.1',
870 '73.0.3651.0',
871 '72.0.3626.35',
872 '71.0.3578.110',
873 '73.0.3650.1',
874 '73.0.3650.0',
875 '72.0.3626.34',
876 '71.0.3578.109',
877 '73.0.3649.1',
878 '73.0.3649.0',
879 '72.0.3626.33',
880 '71.0.3578.108',
881 '73.0.3648.2',
882 '73.0.3648.1',
883 '73.0.3648.0',
884 '72.0.3626.32',
885 '71.0.3578.107',
886 '73.0.3647.2',
887 '73.0.3647.1',
888 '73.0.3647.0',
889 '72.0.3626.31',
890 '71.0.3578.106',
891 '73.0.3635.3',
892 '73.0.3646.2',
893 '73.0.3646.1',
894 '73.0.3646.0',
895 '72.0.3626.30',
896 '71.0.3578.105',
897 '72.0.3626.29',
898 '73.0.3645.2',
899 '73.0.3645.1',
900 '73.0.3645.0',
901 '72.0.3626.28',
902 '71.0.3578.104',
903 '72.0.3626.27',
904 '72.0.3626.26',
905 '72.0.3626.25',
906 '72.0.3626.24',
907 '73.0.3644.0',
908 '73.0.3643.2',
909 '72.0.3626.23',
910 '71.0.3578.103',
911 '73.0.3643.1',
912 '73.0.3643.0',
913 '72.0.3626.22',
914 '71.0.3578.102',
915 '73.0.3642.1',
916 '73.0.3642.0',
917 '72.0.3626.21',
918 '71.0.3578.101',
919 '73.0.3641.1',
920 '73.0.3641.0',
921 '72.0.3626.20',
922 '71.0.3578.100',
923 '72.0.3626.19',
924 '73.0.3640.1',
925 '73.0.3640.0',
926 '72.0.3626.18',
927 '73.0.3639.1',
928 '71.0.3578.99',
929 '73.0.3639.0',
930 '72.0.3626.17',
931 '73.0.3638.2',
932 '72.0.3626.16',
933 '73.0.3638.1',
934 '73.0.3638.0',
935 '72.0.3626.15',
936 '71.0.3578.98',
937 '73.0.3635.2',
938 '71.0.3578.97',
939 '73.0.3637.1',
940 '73.0.3637.0',
941 '72.0.3626.14',
942 '71.0.3578.96',
943 '71.0.3578.95',
944 '72.0.3626.13',
945 '71.0.3578.94',
946 '73.0.3636.2',
947 '71.0.3578.93',
948 '73.0.3636.1',
949 '73.0.3636.0',
950 '72.0.3626.12',
951 '71.0.3578.92',
952 '73.0.3635.1',
953 '73.0.3635.0',
954 '72.0.3626.11',
955 '71.0.3578.91',
956 '73.0.3634.2',
957 '73.0.3634.1',
958 '73.0.3634.0',
959 '72.0.3626.10',
960 '71.0.3578.90',
961 '71.0.3578.89',
962 '73.0.3633.2',
963 '73.0.3633.1',
964 '73.0.3633.0',
965 '72.0.3610.4',
966 '72.0.3626.9',
967 '71.0.3578.88',
968 '73.0.3632.5',
969 '73.0.3632.4',
970 '73.0.3632.3',
971 '73.0.3632.2',
972 '73.0.3632.1',
973 '73.0.3632.0',
974 '72.0.3626.8',
975 '71.0.3578.87',
976 '73.0.3631.2',
977 '73.0.3631.1',
978 '73.0.3631.0',
979 '72.0.3626.7',
980 '71.0.3578.86',
981 '72.0.3626.6',
982 '73.0.3630.1',
983 '73.0.3630.0',
984 '72.0.3626.5',
985 '71.0.3578.85',
986 '72.0.3626.4',
987 '73.0.3628.3',
988 '73.0.3628.2',
989 '73.0.3629.1',
990 '73.0.3629.0',
991 '72.0.3626.3',
992 '71.0.3578.84',
993 '73.0.3628.1',
994 '73.0.3628.0',
995 '71.0.3578.83',
996 '73.0.3627.1',
997 '73.0.3627.0',
998 '72.0.3626.2',
999 '71.0.3578.82',
1000 '71.0.3578.81',
1001 '71.0.3578.80',
1002 '72.0.3626.1',
1003 '72.0.3626.0',
1004 '71.0.3578.79',
1005 '70.0.3538.124',
1006 '71.0.3578.78',
1007 '72.0.3623.4',
1008 '72.0.3625.2',
1009 '72.0.3625.1',
1010 '72.0.3625.0',
1011 '71.0.3578.77',
1012 '70.0.3538.123',
1013 '72.0.3624.4',
1014 '72.0.3624.3',
1015 '72.0.3624.2',
1016 '71.0.3578.76',
1017 '72.0.3624.1',
1018 '72.0.3624.0',
1019 '72.0.3623.3',
1020 '71.0.3578.75',
1021 '70.0.3538.122',
1022 '71.0.3578.74',
1023 '72.0.3623.2',
1024 '72.0.3610.3',
1025 '72.0.3623.1',
1026 '72.0.3623.0',
1027 '72.0.3622.3',
1028 '72.0.3622.2',
1029 '71.0.3578.73',
1030 '70.0.3538.121',
1031 '72.0.3622.1',
1032 '72.0.3622.0',
1033 '71.0.3578.72',
1034 '70.0.3538.120',
1035 '72.0.3621.1',
1036 '72.0.3621.0',
1037 '71.0.3578.71',
1038 '70.0.3538.119',
1039 '72.0.3620.1',
1040 '72.0.3620.0',
1041 '71.0.3578.70',
1042 '70.0.3538.118',
1043 '71.0.3578.69',
1044 '72.0.3619.1',
1045 '72.0.3619.0',
1046 '71.0.3578.68',
1047 '70.0.3538.117',
1048 '71.0.3578.67',
1049 '72.0.3618.1',
1050 '72.0.3618.0',
1051 '71.0.3578.66',
1052 '70.0.3538.116',
1053 '72.0.3617.1',
1054 '72.0.3617.0',
1055 '71.0.3578.65',
1056 '70.0.3538.115',
1057 '72.0.3602.3',
1058 '71.0.3578.64',
1059 '72.0.3616.1',
1060 '72.0.3616.0',
1061 '71.0.3578.63',
1062 '70.0.3538.114',
1063 '71.0.3578.62',
1064 '72.0.3615.1',
1065 '72.0.3615.0',
1066 '71.0.3578.61',
1067 '70.0.3538.113',
1068 '72.0.3614.1',
1069 '72.0.3614.0',
1070 '71.0.3578.60',
1071 '70.0.3538.112',
1072 '72.0.3613.1',
1073 '72.0.3613.0',
1074 '71.0.3578.59',
1075 '70.0.3538.111',
1076 '72.0.3612.2',
1077 '72.0.3612.1',
1078 '72.0.3612.0',
1079 '70.0.3538.110',
1080 '71.0.3578.58',
1081 '70.0.3538.109',
1082 '72.0.3611.2',
1083 '72.0.3611.1',
1084 '72.0.3611.0',
1085 '71.0.3578.57',
1086 '70.0.3538.108',
1087 '72.0.3610.2',
1088 '71.0.3578.56',
1089 '71.0.3578.55',
1090 '72.0.3610.1',
1091 '72.0.3610.0',
1092 '71.0.3578.54',
1093 '70.0.3538.107',
1094 '71.0.3578.53',
1095 '72.0.3609.3',
1096 '71.0.3578.52',
1097 '72.0.3609.2',
1098 '71.0.3578.51',
1099 '72.0.3608.5',
1100 '72.0.3609.1',
1101 '72.0.3609.0',
1102 '71.0.3578.50',
1103 '70.0.3538.106',
1104 '72.0.3608.4',
1105 '72.0.3608.3',
1106 '72.0.3608.2',
1107 '71.0.3578.49',
1108 '72.0.3608.1',
1109 '72.0.3608.0',
1110 '70.0.3538.105',
1111 '71.0.3578.48',
1112 '72.0.3607.1',
1113 '72.0.3607.0',
1114 '71.0.3578.47',
1115 '70.0.3538.104',
1116 '72.0.3606.2',
1117 '72.0.3606.1',
1118 '72.0.3606.0',
1119 '71.0.3578.46',
1120 '70.0.3538.103',
1121 '70.0.3538.102',
1122 '72.0.3605.3',
1123 '72.0.3605.2',
1124 '72.0.3605.1',
1125 '72.0.3605.0',
1126 '71.0.3578.45',
1127 '70.0.3538.101',
1128 '71.0.3578.44',
1129 '71.0.3578.43',
1130 '70.0.3538.100',
1131 '70.0.3538.99',
1132 '71.0.3578.42',
1133 '72.0.3604.1',
1134 '72.0.3604.0',
1135 '71.0.3578.41',
1136 '70.0.3538.98',
1137 '71.0.3578.40',
1138 '72.0.3603.2',
1139 '72.0.3603.1',
1140 '72.0.3603.0',
1141 '71.0.3578.39',
1142 '70.0.3538.97',
1143 '72.0.3602.2',
1144 '71.0.3578.38',
1145 '71.0.3578.37',
1146 '72.0.3602.1',
1147 '72.0.3602.0',
1148 '71.0.3578.36',
1149 '70.0.3538.96',
1150 '72.0.3601.1',
1151 '72.0.3601.0',
1152 '71.0.3578.35',
1153 '70.0.3538.95',
1154 '72.0.3600.1',
1155 '72.0.3600.0',
1156 '71.0.3578.34',
1157 '70.0.3538.94',
1158 '72.0.3599.3',
1159 '72.0.3599.2',
1160 '72.0.3599.1',
1161 '72.0.3599.0',
1162 '71.0.3578.33',
1163 '70.0.3538.93',
1164 '72.0.3598.1',
1165 '72.0.3598.0',
1166 '71.0.3578.32',
1167 '70.0.3538.87',
1168 '72.0.3597.1',
1169 '72.0.3597.0',
1170 '72.0.3596.2',
1171 '71.0.3578.31',
1172 '70.0.3538.86',
1173 '71.0.3578.30',
1174 '71.0.3578.29',
1175 '72.0.3596.1',
1176 '72.0.3596.0',
1177 '71.0.3578.28',
1178 '70.0.3538.85',
1179 '72.0.3595.2',
1180 '72.0.3591.3',
1181 '72.0.3595.1',
1182 '72.0.3595.0',
1183 '71.0.3578.27',
1184 '70.0.3538.84',
1185 '72.0.3594.1',
1186 '72.0.3594.0',
1187 '71.0.3578.26',
1188 '70.0.3538.83',
1189 '72.0.3593.2',
1190 '72.0.3593.1',
1191 '72.0.3593.0',
1192 '71.0.3578.25',
1193 '70.0.3538.82',
1194 '72.0.3589.3',
1195 '72.0.3592.2',
1196 '72.0.3592.1',
1197 '72.0.3592.0',
1198 '71.0.3578.24',
1199 '72.0.3589.2',
1200 '70.0.3538.81',
1201 '70.0.3538.80',
1202 '72.0.3591.2',
1203 '72.0.3591.1',
1204 '72.0.3591.0',
1205 '71.0.3578.23',
1206 '70.0.3538.79',
1207 '71.0.3578.22',
1208 '72.0.3590.1',
1209 '72.0.3590.0',
1210 '71.0.3578.21',
1211 '70.0.3538.78',
1212 '70.0.3538.77',
1213 '72.0.3589.1',
1214 '72.0.3589.0',
1215 '71.0.3578.20',
1216 '70.0.3538.76',
1217 '71.0.3578.19',
1218 '70.0.3538.75',
1219 '72.0.3588.1',
1220 '72.0.3588.0',
1221 '71.0.3578.18',
1222 '70.0.3538.74',
1223 '72.0.3586.2',
1224 '72.0.3587.0',
1225 '71.0.3578.17',
1226 '70.0.3538.73',
1227 '72.0.3586.1',
1228 '72.0.3586.0',
1229 '71.0.3578.16',
1230 '70.0.3538.72',
1231 '72.0.3585.1',
1232 '72.0.3585.0',
1233 '71.0.3578.15',
1234 '70.0.3538.71',
1235 '71.0.3578.14',
1236 '72.0.3584.1',
1237 '72.0.3584.0',
1238 '71.0.3578.13',
1239 '70.0.3538.70',
1240 '72.0.3583.2',
1241 '71.0.3578.12',
1242 '72.0.3583.1',
1243 '72.0.3583.0',
1244 '71.0.3578.11',
1245 '70.0.3538.69',
1246 '71.0.3578.10',
1247 '72.0.3582.0',
1248 '72.0.3581.4',
1249 '71.0.3578.9',
1250 '70.0.3538.67',
1251 '72.0.3581.3',
1252 '72.0.3581.2',
1253 '72.0.3581.1',
1254 '72.0.3581.0',
1255 '71.0.3578.8',
1256 '70.0.3538.66',
1257 '72.0.3580.1',
1258 '72.0.3580.0',
1259 '71.0.3578.7',
1260 '70.0.3538.65',
1261 '71.0.3578.6',
1262 '72.0.3579.1',
1263 '72.0.3579.0',
1264 '71.0.3578.5',
1265 '70.0.3538.64',
1266 '71.0.3578.4',
1267 '71.0.3578.3',
1268 '71.0.3578.2',
1269 '71.0.3578.1',
1270 '71.0.3578.0',
1271 '70.0.3538.63',
1272 '69.0.3497.128',
1273 '70.0.3538.62',
1274 '70.0.3538.61',
1275 '70.0.3538.60',
1276 '70.0.3538.59',
1277 '71.0.3577.1',
1278 '71.0.3577.0',
1279 '70.0.3538.58',
1280 '69.0.3497.127',
1281 '71.0.3576.2',
1282 '71.0.3576.1',
1283 '71.0.3576.0',
1284 '70.0.3538.57',
1285 '70.0.3538.56',
1286 '71.0.3575.2',
1287 '70.0.3538.55',
1288 '69.0.3497.126',
1289 '70.0.3538.54',
1290 '71.0.3575.1',
1291 '71.0.3575.0',
1292 '71.0.3574.1',
1293 '71.0.3574.0',
1294 '70.0.3538.53',
1295 '69.0.3497.125',
1296 '70.0.3538.52',
1297 '71.0.3573.1',
1298 '71.0.3573.0',
1299 '70.0.3538.51',
1300 '69.0.3497.124',
1301 '71.0.3572.1',
1302 '71.0.3572.0',
1303 '70.0.3538.50',
1304 '69.0.3497.123',
1305 '71.0.3571.2',
1306 '70.0.3538.49',
1307 '69.0.3497.122',
1308 '71.0.3571.1',
1309 '71.0.3571.0',
1310 '70.0.3538.48',
1311 '69.0.3497.121',
1312 '71.0.3570.1',
1313 '71.0.3570.0',
1314 '70.0.3538.47',
1315 '69.0.3497.120',
1316 '71.0.3568.2',
1317 '71.0.3569.1',
1318 '71.0.3569.0',
1319 '70.0.3538.46',
1320 '69.0.3497.119',
1321 '70.0.3538.45',
1322 '71.0.3568.1',
1323 '71.0.3568.0',
1324 '70.0.3538.44',
1325 '69.0.3497.118',
1326 '70.0.3538.43',
1327 '70.0.3538.42',
1328 '71.0.3567.1',
1329 '71.0.3567.0',
1330 '70.0.3538.41',
1331 '69.0.3497.117',
1332 '71.0.3566.1',
1333 '71.0.3566.0',
1334 '70.0.3538.40',
1335 '69.0.3497.116',
1336 '71.0.3565.1',
1337 '71.0.3565.0',
1338 '70.0.3538.39',
1339 '69.0.3497.115',
1340 '71.0.3564.1',
1341 '71.0.3564.0',
1342 '70.0.3538.38',
1343 '69.0.3497.114',
1344 '71.0.3563.0',
1345 '71.0.3562.2',
1346 '70.0.3538.37',
1347 '69.0.3497.113',
1348 '70.0.3538.36',
1349 '70.0.3538.35',
1350 '71.0.3562.1',
1351 '71.0.3562.0',
1352 '70.0.3538.34',
1353 '69.0.3497.112',
1354 '70.0.3538.33',
1355 '71.0.3561.1',
1356 '71.0.3561.0',
1357 '70.0.3538.32',
1358 '69.0.3497.111',
1359 '71.0.3559.6',
1360 '71.0.3560.1',
1361 '71.0.3560.0',
1362 '71.0.3559.5',
1363 '71.0.3559.4',
1364 '70.0.3538.31',
1365 '69.0.3497.110',
1366 '71.0.3559.3',
1367 '70.0.3538.30',
1368 '69.0.3497.109',
1369 '71.0.3559.2',
1370 '71.0.3559.1',
1371 '71.0.3559.0',
1372 '70.0.3538.29',
1373 '69.0.3497.108',
1374 '71.0.3558.2',
1375 '71.0.3558.1',
1376 '71.0.3558.0',
1377 '70.0.3538.28',
1378 '69.0.3497.107',
1379 '71.0.3557.2',
1380 '71.0.3557.1',
1381 '71.0.3557.0',
1382 '70.0.3538.27',
1383 '69.0.3497.106',
1384 '71.0.3554.4',
1385 '70.0.3538.26',
1386 '71.0.3556.1',
1387 '71.0.3556.0',
1388 '70.0.3538.25',
1389 '71.0.3554.3',
1390 '69.0.3497.105',
1391 '71.0.3554.2',
1392 '70.0.3538.24',
1393 '69.0.3497.104',
1394 '71.0.3555.2',
1395 '70.0.3538.23',
1396 '71.0.3555.1',
1397 '71.0.3555.0',
1398 '70.0.3538.22',
1399 '69.0.3497.103',
1400 '71.0.3554.1',
1401 '71.0.3554.0',
1402 '70.0.3538.21',
1403 '69.0.3497.102',
1404 '71.0.3553.3',
1405 '70.0.3538.20',
1406 '69.0.3497.101',
1407 '71.0.3553.2',
1408 '69.0.3497.100',
1409 '71.0.3553.1',
1410 '71.0.3553.0',
1411 '70.0.3538.19',
1412 '69.0.3497.99',
1413 '69.0.3497.98',
1414 '69.0.3497.97',
1415 '71.0.3552.6',
1416 '71.0.3552.5',
1417 '71.0.3552.4',
1418 '71.0.3552.3',
1419 '71.0.3552.2',
1420 '71.0.3552.1',
1421 '71.0.3552.0',
1422 '70.0.3538.18',
1423 '69.0.3497.96',
1424 '71.0.3551.3',
1425 '71.0.3551.2',
1426 '71.0.3551.1',
1427 '71.0.3551.0',
1428 '70.0.3538.17',
1429 '69.0.3497.95',
1430 '71.0.3550.3',
1431 '71.0.3550.2',
1432 '71.0.3550.1',
1433 '71.0.3550.0',
1434 '70.0.3538.16',
1435 '69.0.3497.94',
1436 '71.0.3549.1',
1437 '71.0.3549.0',
1438 '70.0.3538.15',
1439 '69.0.3497.93',
1440 '69.0.3497.92',
1441 '71.0.3548.1',
1442 '71.0.3548.0',
1443 '70.0.3538.14',
1444 '69.0.3497.91',
1445 '71.0.3547.1',
1446 '71.0.3547.0',
1447 '70.0.3538.13',
1448 '69.0.3497.90',
1449 '71.0.3546.2',
1450 '69.0.3497.89',
1451 '71.0.3546.1',
1452 '71.0.3546.0',
1453 '70.0.3538.12',
1454 '69.0.3497.88',
1455 '71.0.3545.4',
1456 '71.0.3545.3',
1457 '71.0.3545.2',
1458 '71.0.3545.1',
1459 '71.0.3545.0',
1460 '70.0.3538.11',
1461 '69.0.3497.87',
1462 '71.0.3544.5',
1463 '71.0.3544.4',
1464 '71.0.3544.3',
1465 '71.0.3544.2',
1466 '71.0.3544.1',
1467 '71.0.3544.0',
1468 '69.0.3497.86',
1469 '70.0.3538.10',
1470 '69.0.3497.85',
1471 '70.0.3538.9',
1472 '69.0.3497.84',
1473 '71.0.3543.4',
1474 '70.0.3538.8',
1475 '71.0.3543.3',
1476 '71.0.3543.2',
1477 '71.0.3543.1',
1478 '71.0.3543.0',
1479 '70.0.3538.7',
1480 '69.0.3497.83',
1481 '71.0.3542.2',
1482 '71.0.3542.1',
1483 '71.0.3542.0',
1484 '70.0.3538.6',
1485 '69.0.3497.82',
1486 '69.0.3497.81',
1487 '71.0.3541.1',
1488 '71.0.3541.0',
1489 '70.0.3538.5',
1490 '69.0.3497.80',
1491 '71.0.3540.1',
1492 '71.0.3540.0',
1493 '70.0.3538.4',
1494 '69.0.3497.79',
1495 '70.0.3538.3',
1496 '71.0.3539.1',
1497 '71.0.3539.0',
1498 '69.0.3497.78',
1499 '68.0.3440.134',
1500 '69.0.3497.77',
1501 '70.0.3538.2',
1502 '70.0.3538.1',
1503 '70.0.3538.0',
1504 '69.0.3497.76',
1505 '68.0.3440.133',
1506 '69.0.3497.75',
1507 '70.0.3537.2',
1508 '70.0.3537.1',
1509 '70.0.3537.0',
1510 '69.0.3497.74',
1511 '68.0.3440.132',
1512 '70.0.3536.0',
1513 '70.0.3535.5',
1514 '70.0.3535.4',
1515 '70.0.3535.3',
1516 '69.0.3497.73',
1517 '68.0.3440.131',
1518 '70.0.3532.8',
1519 '70.0.3532.7',
1520 '69.0.3497.72',
1521 '69.0.3497.71',
1522 '70.0.3535.2',
1523 '70.0.3535.1',
1524 '70.0.3535.0',
1525 '69.0.3497.70',
1526 '68.0.3440.130',
1527 '69.0.3497.69',
1528 '68.0.3440.129',
1529 '70.0.3534.4',
1530 '70.0.3534.3',
1531 '70.0.3534.2',
1532 '70.0.3534.1',
1533 '70.0.3534.0',
1534 '69.0.3497.68',
1535 '68.0.3440.128',
1536 '70.0.3533.2',
1537 '70.0.3533.1',
1538 '70.0.3533.0',
1539 '69.0.3497.67',
1540 '68.0.3440.127',
1541 '70.0.3532.6',
1542 '70.0.3532.5',
1543 '70.0.3532.4',
1544 '69.0.3497.66',
1545 '68.0.3440.126',
1546 '70.0.3532.3',
1547 '70.0.3532.2',
1548 '70.0.3532.1',
1549 '69.0.3497.60',
1550 '69.0.3497.65',
1551 '69.0.3497.64',
1552 '70.0.3532.0',
1553 '70.0.3531.0',
1554 '70.0.3530.4',
1555 '70.0.3530.3',
1556 '70.0.3530.2',
1557 '69.0.3497.58',
1558 '68.0.3440.125',
1559 '69.0.3497.57',
1560 '69.0.3497.56',
1561 '69.0.3497.55',
1562 '69.0.3497.54',
1563 '70.0.3530.1',
1564 '70.0.3530.0',
1565 '69.0.3497.53',
1566 '68.0.3440.124',
1567 '69.0.3497.52',
1568 '70.0.3529.3',
1569 '70.0.3529.2',
1570 '70.0.3529.1',
1571 '70.0.3529.0',
1572 '69.0.3497.51',
1573 '70.0.3528.4',
1574 '68.0.3440.123',
1575 '70.0.3528.3',
1576 '70.0.3528.2',
1577 '70.0.3528.1',
1578 '70.0.3528.0',
1579 '69.0.3497.50',
1580 '68.0.3440.122',
1581 '70.0.3527.1',
1582 '70.0.3527.0',
1583 '69.0.3497.49',
1584 '68.0.3440.121',
1585 '70.0.3526.1',
1586 '70.0.3526.0',
1587 '68.0.3440.120',
1588 '69.0.3497.48',
1589 '69.0.3497.47',
1590 '68.0.3440.119',
1591 '68.0.3440.118',
1592 '70.0.3525.5',
1593 '70.0.3525.4',
1594 '70.0.3525.3',
1595 '68.0.3440.117',
1596 '69.0.3497.46',
1597 '70.0.3525.2',
1598 '70.0.3525.1',
1599 '70.0.3525.0',
1600 '69.0.3497.45',
1601 '68.0.3440.116',
1602 '70.0.3524.4',
1603 '70.0.3524.3',
1604 '69.0.3497.44',
1605 '70.0.3524.2',
1606 '70.0.3524.1',
1607 '70.0.3524.0',
1608 '70.0.3523.2',
1609 '69.0.3497.43',
1610 '68.0.3440.115',
1611 '70.0.3505.9',
1612 '69.0.3497.42',
1613 '70.0.3505.8',
1614 '70.0.3523.1',
1615 '70.0.3523.0',
1616 '69.0.3497.41',
1617 '68.0.3440.114',
1618 '70.0.3505.7',
1619 '69.0.3497.40',
1620 '70.0.3522.1',
1621 '70.0.3522.0',
1622 '70.0.3521.2',
1623 '69.0.3497.39',
1624 '68.0.3440.113',
1625 '70.0.3505.6',
1626 '70.0.3521.1',
1627 '70.0.3521.0',
1628 '69.0.3497.38',
1629 '68.0.3440.112',
1630 '70.0.3520.1',
1631 '70.0.3520.0',
1632 '69.0.3497.37',
1633 '68.0.3440.111',
1634 '70.0.3519.3',
1635 '70.0.3519.2',
1636 '70.0.3519.1',
1637 '70.0.3519.0',
1638 '69.0.3497.36',
1639 '68.0.3440.110',
1640 '70.0.3518.1',
1641 '70.0.3518.0',
1642 '69.0.3497.35',
1643 '69.0.3497.34',
1644 '68.0.3440.109',
1645 '70.0.3517.1',
1646 '70.0.3517.0',
1647 '69.0.3497.33',
1648 '68.0.3440.108',
1649 '69.0.3497.32',
1650 '70.0.3516.3',
1651 '70.0.3516.2',
1652 '70.0.3516.1',
1653 '70.0.3516.0',
1654 '69.0.3497.31',
1655 '68.0.3440.107',
1656 '70.0.3515.4',
1657 '68.0.3440.106',
1658 '70.0.3515.3',
1659 '70.0.3515.2',
1660 '70.0.3515.1',
1661 '70.0.3515.0',
1662 '69.0.3497.30',
1663 '68.0.3440.105',
1664 '68.0.3440.104',
1665 '70.0.3514.2',
1666 '70.0.3514.1',
1667 '70.0.3514.0',
1668 '69.0.3497.29',
1669 '68.0.3440.103',
1670 '70.0.3513.1',
1671 '70.0.3513.0',
1672 '69.0.3497.28',
1673 )
1674 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1675
1676
3e669f36 1677std_headers = {
f7a147e3 1678 'User-Agent': random_user_agent(),
59ae15a5
PH
1679 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1680 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1681 'Accept-Encoding': 'gzip, deflate',
1682 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 1683}
f427df17 1684
5f6a1245 1685
fb37eb25
S
1686USER_AGENTS = {
1687 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1688}
1689
1690
bf42a990
S
1691NO_DEFAULT = object()
1692
7105440c
YCH
1693ENGLISH_MONTH_NAMES = [
1694 'January', 'February', 'March', 'April', 'May', 'June',
1695 'July', 'August', 'September', 'October', 'November', 'December']
1696
f6717dec
S
1697MONTH_NAMES = {
1698 'en': ENGLISH_MONTH_NAMES,
1699 'fr': [
3e4185c3
S
1700 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1701 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
f6717dec 1702}
a942d6cb 1703
a7aaa398
S
1704KNOWN_EXTENSIONS = (
1705 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1706 'flv', 'f4v', 'f4a', 'f4b',
1707 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1708 'mkv', 'mka', 'mk3d',
1709 'avi', 'divx',
1710 'mov',
1711 'asf', 'wmv', 'wma',
1712 '3gp', '3g2',
1713 'mp3',
1714 'flac',
1715 'ape',
1716 'wav',
1717 'f4f', 'f4m', 'm3u8', 'smil')
1718
c587cbb7 1719# needed for sanitizing filenames in restricted mode
c8827027 1720ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
fd35d8cd
JW
1721 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1722 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
c587cbb7 1723
46f59e89
S
1724DATE_FORMATS = (
1725 '%d %B %Y',
1726 '%d %b %Y',
1727 '%B %d %Y',
cb655f34
S
1728 '%B %dst %Y',
1729 '%B %dnd %Y',
9d30c213 1730 '%B %drd %Y',
cb655f34 1731 '%B %dth %Y',
46f59e89 1732 '%b %d %Y',
cb655f34
S
1733 '%b %dst %Y',
1734 '%b %dnd %Y',
9d30c213 1735 '%b %drd %Y',
cb655f34 1736 '%b %dth %Y',
46f59e89
S
1737 '%b %dst %Y %I:%M',
1738 '%b %dnd %Y %I:%M',
9d30c213 1739 '%b %drd %Y %I:%M',
46f59e89
S
1740 '%b %dth %Y %I:%M',
1741 '%Y %m %d',
1742 '%Y-%m-%d',
bccdbd22 1743 '%Y.%m.%d.',
46f59e89 1744 '%Y/%m/%d',
81c13222 1745 '%Y/%m/%d %H:%M',
46f59e89 1746 '%Y/%m/%d %H:%M:%S',
1931a55e
THD
1747 '%Y%m%d%H%M',
1748 '%Y%m%d%H%M%S',
0c1c6f4b 1749 '%Y-%m-%d %H:%M',
46f59e89
S
1750 '%Y-%m-%d %H:%M:%S',
1751 '%Y-%m-%d %H:%M:%S.%f',
5014558a 1752 '%Y-%m-%d %H:%M:%S:%f',
46f59e89
S
1753 '%d.%m.%Y %H:%M',
1754 '%d.%m.%Y %H.%M',
1755 '%Y-%m-%dT%H:%M:%SZ',
1756 '%Y-%m-%dT%H:%M:%S.%fZ',
1757 '%Y-%m-%dT%H:%M:%S.%f0Z',
1758 '%Y-%m-%dT%H:%M:%S',
1759 '%Y-%m-%dT%H:%M:%S.%f',
1760 '%Y-%m-%dT%H:%M',
c6eed6b8
S
1761 '%b %d %Y at %H:%M',
1762 '%b %d %Y at %H:%M:%S',
b555ae9b
S
1763 '%B %d %Y at %H:%M',
1764 '%B %d %Y at %H:%M:%S',
46f59e89
S
1765)
1766
1767DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1768DATE_FORMATS_DAY_FIRST.extend([
1769 '%d-%m-%Y',
1770 '%d.%m.%Y',
1771 '%d.%m.%y',
1772 '%d/%m/%Y',
1773 '%d/%m/%y',
1774 '%d/%m/%Y %H:%M:%S',
1775])
1776
1777DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1778DATE_FORMATS_MONTH_FIRST.extend([
1779 '%m-%d-%Y',
1780 '%m.%d.%Y',
1781 '%m/%d/%Y',
1782 '%m/%d/%y',
1783 '%m/%d/%Y %H:%M:%S',
1784])
1785
06b3fe29 1786PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
22f5f5c6 1787JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
06b3fe29 1788
7105440c 1789
d77c3dfd 1790def preferredencoding():
59ae15a5 1791 """Get preferred encoding.
d77c3dfd 1792
59ae15a5
PH
1793 Returns the best encoding scheme for the system, based on
1794 locale.getpreferredencoding() and some further tweaks.
1795 """
1796 try:
1797 pref = locale.getpreferredencoding()
28e614de 1798 'TEST'.encode(pref)
70a1165b 1799 except Exception:
59ae15a5 1800 pref = 'UTF-8'
bae611f2 1801
59ae15a5 1802 return pref
d77c3dfd 1803
f4bfd65f 1804
181c8655 1805def write_json_file(obj, fn):
1394646a 1806 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 1807
92120217 1808 fn = encodeFilename(fn)
61ee5aeb 1809 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
1810 encoding = get_filesystem_encoding()
1811 # os.path.basename returns a bytes object, but NamedTemporaryFile
1812 # will fail if the filename contains non ascii characters unless we
1813 # use a unicode object
1814 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1815 # the same for os.path.dirname
1816 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1817 else:
1818 path_basename = os.path.basename
1819 path_dirname = os.path.dirname
1820
73159f99
S
1821 args = {
1822 'suffix': '.tmp',
ec5f6016
JMF
1823 'prefix': path_basename(fn) + '.',
1824 'dir': path_dirname(fn),
73159f99
S
1825 'delete': False,
1826 }
1827
181c8655
PH
1828 # In Python 2.x, json.dump expects a bytestream.
1829 # In Python 3.x, it writes to a character stream
1830 if sys.version_info < (3, 0):
73159f99 1831 args['mode'] = 'wb'
181c8655 1832 else:
73159f99
S
1833 args.update({
1834 'mode': 'w',
1835 'encoding': 'utf-8',
1836 })
1837
c86b6142 1838 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
1839
1840 try:
1841 with tf:
6e84b215 1842 json.dump(obj, tf)
1394646a
IK
1843 if sys.platform == 'win32':
1844 # Need to remove existing file on Windows, else os.rename raises
1845 # WindowsError or FileExistsError.
1846 try:
1847 os.unlink(fn)
1848 except OSError:
1849 pass
9cd5f54e
R
1850 try:
1851 mask = os.umask(0)
1852 os.umask(mask)
1853 os.chmod(tf.name, 0o666 & ~mask)
1854 except OSError:
1855 pass
181c8655 1856 os.rename(tf.name, fn)
70a1165b 1857 except Exception:
181c8655
PH
1858 try:
1859 os.remove(tf.name)
1860 except OSError:
1861 pass
1862 raise
1863
1864
1865if sys.version_info >= (2, 7):
ee114368 1866 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 1867 """ Find the xpath xpath[@key=val] """
5d2354f1 1868 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368 1869 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
1870 return node.find(expr)
1871else:
ee114368 1872 def find_xpath_attr(node, xpath, key, val=None):
810c10ba 1873 for f in node.findall(compat_xpath(xpath)):
ee114368
S
1874 if key not in f.attrib:
1875 continue
1876 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
1877 return f
1878 return None
1879
d7e66d39
JMF
1880# On python2.6 the xml.etree.ElementTree.Element methods don't support
1881# the namespace parameter
5f6a1245
JW
1882
1883
d7e66d39
JMF
1884def xpath_with_ns(path, ns_map):
1885 components = [c.split(':') for c in path.split('/')]
1886 replaced = []
1887 for c in components:
1888 if len(c) == 1:
1889 replaced.append(c[0])
1890 else:
1891 ns, tag = c
1892 replaced.append('{%s}%s' % (ns_map[ns], tag))
1893 return '/'.join(replaced)
1894
d77c3dfd 1895
a41fb80c 1896def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745 1897 def _find_xpath(xpath):
810c10ba 1898 return node.find(compat_xpath(xpath))
578c0745
S
1899
1900 if isinstance(xpath, (str, compat_str)):
1901 n = _find_xpath(xpath)
1902 else:
1903 for xp in xpath:
1904 n = _find_xpath(xp)
1905 if n is not None:
1906 break
d74bebd5 1907
8e636da4 1908 if n is None:
bf42a990
S
1909 if default is not NO_DEFAULT:
1910 return default
1911 elif fatal:
bf0ff932
PH
1912 name = xpath if name is None else name
1913 raise ExtractorError('Could not find XML element %s' % name)
1914 else:
1915 return None
a41fb80c
S
1916 return n
1917
1918
1919def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
1920 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1921 if n is None or n == default:
1922 return n
1923 if n.text is None:
1924 if default is not NO_DEFAULT:
1925 return default
1926 elif fatal:
1927 name = xpath if name is None else name
1928 raise ExtractorError('Could not find XML element\'s text %s' % name)
1929 else:
1930 return None
1931 return n.text
a41fb80c
S
1932
1933
1934def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1935 n = find_xpath_attr(node, xpath, key)
1936 if n is None:
1937 if default is not NO_DEFAULT:
1938 return default
1939 elif fatal:
1940 name = '%s[@%s]' % (xpath, key) if name is None else name
1941 raise ExtractorError('Could not find XML attribute %s' % name)
1942 else:
1943 return None
1944 return n.attrib[key]
bf0ff932
PH
1945
1946
9e6dd238 1947def get_element_by_id(id, html):
43e8fafd 1948 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 1949 return get_element_by_attribute('id', id, html)
43e8fafd 1950
12ea2f30 1951
84c237fb 1952def get_element_by_class(class_name, html):
2af12ad9
TC
1953 """Return the content of the first tag with the specified class in the passed HTML document"""
1954 retval = get_elements_by_class(class_name, html)
1955 return retval[0] if retval else None
1956
1957
1958def get_element_by_attribute(attribute, value, html, escape_value=True):
1959 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1960 return retval[0] if retval else None
1961
1962
1963def get_elements_by_class(class_name, html):
1964 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1965 return get_elements_by_attribute(
84c237fb
YCH
1966 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1967 html, escape_value=False)
1968
1969
2af12ad9 1970def get_elements_by_attribute(attribute, value, html, escape_value=True):
43e8fafd 1971 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 1972
84c237fb
YCH
1973 value = re.escape(value) if escape_value else value
1974
2af12ad9
TC
1975 retlist = []
1976 for m in re.finditer(r'''(?xs)
38285056 1977 <([a-zA-Z0-9:._-]+)
609ff8ca 1978 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056 1979 \s+%s=['"]?%s['"]?
609ff8ca 1980 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056
PH
1981 \s*>
1982 (?P<content>.*?)
1983 </\1>
2af12ad9
TC
1984 ''' % (re.escape(attribute), value), html):
1985 res = m.group('content')
38285056 1986
2af12ad9
TC
1987 if res.startswith('"') or res.startswith("'"):
1988 res = res[1:-1]
38285056 1989
2af12ad9 1990 retlist.append(unescapeHTML(res))
a921f407 1991
2af12ad9 1992 return retlist
a921f407 1993
c5229f39 1994
8bb56eee
BF
1995class HTMLAttributeParser(compat_HTMLParser):
1996 """Trivial HTML parser to gather the attributes for a single element"""
b6e0c7d2 1997
8bb56eee 1998 def __init__(self):
c5229f39 1999 self.attrs = {}
8bb56eee
BF
2000 compat_HTMLParser.__init__(self)
2001
2002 def handle_starttag(self, tag, attrs):
2003 self.attrs = dict(attrs)
2004
c5229f39 2005
8bb56eee
BF
2006def extract_attributes(html_element):
2007 """Given a string for an HTML element such as
2008 <el
2009 a="foo" B="bar" c="&98;az" d=boz
2010 empty= noval entity="&amp;"
2011 sq='"' dq="'"
2012 >
2013 Decode and return a dictionary of attributes.
2014 {
2015 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2016 'empty': '', 'noval': None, 'entity': '&',
2017 'sq': '"', 'dq': '\''
2018 }.
2019 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2020 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2021 """
2022 parser = HTMLAttributeParser()
b4a3d461
S
2023 try:
2024 parser.feed(html_element)
2025 parser.close()
2026 # Older Python may throw HTMLParseError in case of malformed HTML
2027 except compat_HTMLParseError:
2028 pass
8bb56eee 2029 return parser.attrs
9e6dd238 2030
c5229f39 2031
9e6dd238 2032def clean_html(html):
59ae15a5 2033 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
2034
2035 if html is None: # Convenience for sanitizing descriptions etc.
2036 return html
2037
59ae15a5
PH
2038 # Newline vs <br />
2039 html = html.replace('\n', ' ')
edd9221c
TF
2040 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2041 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
2042 # Strip html tags
2043 html = re.sub('<.*?>', '', html)
2044 # Replace html entities
2045 html = unescapeHTML(html)
7decf895 2046 return html.strip()
9e6dd238
FV
2047
2048
d77c3dfd 2049def sanitize_open(filename, open_mode):
59ae15a5
PH
2050 """Try to open the given filename, and slightly tweak it if this fails.
2051
2052 Attempts to open the given filename. If this fails, it tries to change
2053 the filename slightly, step by step, until it's either able to open it
2054 or it fails and raises a final exception, like the standard open()
2055 function.
2056
2057 It returns the tuple (stream, definitive_file_name).
2058 """
2059 try:
28e614de 2060 if filename == '-':
59ae15a5
PH
2061 if sys.platform == 'win32':
2062 import msvcrt
2063 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 2064 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
2065 stream = open(encodeFilename(filename), open_mode)
2066 return (stream, filename)
2067 except (IOError, OSError) as err:
f45c185f
PH
2068 if err.errno in (errno.EACCES,):
2069 raise
59ae15a5 2070
f45c185f 2071 # In case of error, try to remove win32 forbidden chars
d55de57b 2072 alt_filename = sanitize_path(filename)
f45c185f
PH
2073 if alt_filename == filename:
2074 raise
2075 else:
2076 # An exception here should be caught in the caller
d55de57b 2077 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 2078 return (stream, alt_filename)
d77c3dfd
FV
2079
2080
2081def timeconvert(timestr):
59ae15a5
PH
2082 """Convert RFC 2822 defined time string into system timestamp"""
2083 timestamp = None
2084 timetuple = email.utils.parsedate_tz(timestr)
2085 if timetuple is not None:
2086 timestamp = email.utils.mktime_tz(timetuple)
2087 return timestamp
1c469a94 2088
5f6a1245 2089
796173d0 2090def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
2091 """Sanitizes a string so it could be used as part of a filename.
2092 If restricted is set, use a stricter subset of allowed characters.
158af524
S
2093 Set is_id if this is not an arbitrary string, but an ID that should be kept
2094 if possible.
59ae15a5
PH
2095 """
2096 def replace_insane(char):
c587cbb7
AT
2097 if restricted and char in ACCENT_CHARS:
2098 return ACCENT_CHARS[char]
59ae15a5
PH
2099 if char == '?' or ord(char) < 32 or ord(char) == 127:
2100 return ''
2101 elif char == '"':
2102 return '' if restricted else '\''
2103 elif char == ':':
2104 return '_-' if restricted else ' -'
2105 elif char in '\\/|*<>':
2106 return '_'
627dcfff 2107 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
2108 return '_'
2109 if restricted and ord(char) > 127:
2110 return '_'
2111 return char
2112
639f1cea 2113 if s == '':
2114 return ''
2aeb06d6
PH
2115 # Handle timestamps
2116 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 2117 result = ''.join(map(replace_insane, s))
796173d0
PH
2118 if not is_id:
2119 while '__' in result:
2120 result = result.replace('__', '_')
2121 result = result.strip('_')
2122 # Common case of "Foreign band name - English song title"
2123 if restricted and result.startswith('-_'):
2124 result = result[2:]
5a42414b
PH
2125 if result.startswith('-'):
2126 result = '_' + result[len('-'):]
a7440261 2127 result = result.lstrip('.')
796173d0
PH
2128 if not result:
2129 result = '_'
59ae15a5 2130 return result
d77c3dfd 2131
5f6a1245 2132
c2934512 2133def sanitize_path(s, force=False):
a2aaf4db 2134 """Sanitizes and normalizes path on Windows"""
c2934512 2135 if sys.platform == 'win32':
c4218ac3 2136 force = False
c2934512 2137 drive_or_unc, _ = os.path.splitdrive(s)
2138 if sys.version_info < (2, 7) and not drive_or_unc:
2139 drive_or_unc, _ = os.path.splitunc(s)
2140 elif force:
2141 drive_or_unc = ''
2142 else:
a2aaf4db 2143 return s
c2934512 2144
be531ef1
S
2145 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2146 if drive_or_unc:
a2aaf4db
S
2147 norm_path.pop(0)
2148 sanitized_path = [
ec85ded8 2149 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
a2aaf4db 2150 for path_part in norm_path]
be531ef1
S
2151 if drive_or_unc:
2152 sanitized_path.insert(0, drive_or_unc + os.path.sep)
c4218ac3 2153 elif force and s[0] == os.path.sep:
2154 sanitized_path.insert(0, os.path.sep)
a2aaf4db
S
2155 return os.path.join(*sanitized_path)
2156
2157
17bcc626 2158def sanitize_url(url):
befa4708
S
2159 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2160 # the number of unwanted failures due to missing protocol
2161 if url.startswith('//'):
2162 return 'http:%s' % url
2163 # Fix some common typos seen so far
2164 COMMON_TYPOS = (
067aa17e 2165 # https://github.com/ytdl-org/youtube-dl/issues/15649
befa4708
S
2166 (r'^httpss://', r'https://'),
2167 # https://bx1.be/lives/direct-tv/
2168 (r'^rmtp([es]?)://', r'rtmp\1://'),
2169 )
2170 for mistake, fixup in COMMON_TYPOS:
2171 if re.match(mistake, url):
2172 return re.sub(mistake, fixup, url)
bc6b9bcd 2173 return url
17bcc626
S
2174
2175
5435dcf9
HH
2176def extract_basic_auth(url):
2177 parts = compat_urlparse.urlsplit(url)
2178 if parts.username is None:
2179 return url, None
2180 url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2181 parts.hostname if parts.port is None
2182 else '%s:%d' % (parts.hostname, parts.port))))
2183 auth_payload = base64.b64encode(
2184 ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2185 return url, 'Basic ' + auth_payload.decode('utf-8')
2186
2187
67dda517 2188def sanitized_Request(url, *args, **kwargs):
bc6b9bcd 2189 url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
5435dcf9
HH
2190 if auth_header is not None:
2191 headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2192 headers['Authorization'] = auth_header
2193 return compat_urllib_request.Request(url, *args, **kwargs)
67dda517
S
2194
2195
51098426
S
2196def expand_path(s):
2197 """Expand shell variables and ~"""
2198 return os.path.expandvars(compat_expanduser(s))
2199
2200
d77c3dfd 2201def orderedSet(iterable):
59ae15a5
PH
2202 """ Remove all duplicates from the input iterable """
2203 res = []
2204 for el in iterable:
2205 if el not in res:
2206 res.append(el)
2207 return res
d77c3dfd 2208
912b38b4 2209
55b2f099 2210def _htmlentity_transform(entity_with_semicolon):
4e408e47 2211 """Transforms an HTML entity to a character."""
55b2f099
YCH
2212 entity = entity_with_semicolon[:-1]
2213
4e408e47
PH
2214 # Known non-numeric HTML entity
2215 if entity in compat_html_entities.name2codepoint:
2216 return compat_chr(compat_html_entities.name2codepoint[entity])
2217
55b2f099
YCH
2218 # TODO: HTML5 allows entities without a semicolon. For example,
2219 # '&Eacuteric' should be decoded as 'Éric'.
2220 if entity_with_semicolon in compat_html_entities_html5:
2221 return compat_html_entities_html5[entity_with_semicolon]
2222
91757b0f 2223 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
2224 if mobj is not None:
2225 numstr = mobj.group(1)
28e614de 2226 if numstr.startswith('x'):
4e408e47 2227 base = 16
28e614de 2228 numstr = '0%s' % numstr
4e408e47
PH
2229 else:
2230 base = 10
067aa17e 2231 # See https://github.com/ytdl-org/youtube-dl/issues/7518
7aefc49c
S
2232 try:
2233 return compat_chr(int(numstr, base))
2234 except ValueError:
2235 pass
4e408e47
PH
2236
2237 # Unknown entity in name, return its literal representation
7a3f0c00 2238 return '&%s;' % entity
4e408e47
PH
2239
2240
d77c3dfd 2241def unescapeHTML(s):
912b38b4
PH
2242 if s is None:
2243 return None
2244 assert type(s) == compat_str
d77c3dfd 2245
4e408e47 2246 return re.sub(
95f3f7c2 2247 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 2248
8bf48f23 2249
cdb19aa4 2250def escapeHTML(text):
2251 return (
2252 text
2253 .replace('&', '&amp;')
2254 .replace('<', '&lt;')
2255 .replace('>', '&gt;')
2256 .replace('"', '&quot;')
2257 .replace("'", '&#39;')
2258 )
2259
2260
f5b1bca9 2261def process_communicate_or_kill(p, *args, **kwargs):
2262 try:
2263 return p.communicate(*args, **kwargs)
2264 except BaseException: # Including KeyboardInterrupt
2265 p.kill()
2266 p.wait()
2267 raise
2268
2269
aa49acd1
S
2270def get_subprocess_encoding():
2271 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2272 # For subprocess calls, encode with locale encoding
2273 # Refer to http://stackoverflow.com/a/9951851/35070
2274 encoding = preferredencoding()
2275 else:
2276 encoding = sys.getfilesystemencoding()
2277 if encoding is None:
2278 encoding = 'utf-8'
2279 return encoding
2280
2281
8bf48f23 2282def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
2283 """
2284 @param s The name of the file
2285 """
d77c3dfd 2286
8bf48f23 2287 assert type(s) == compat_str
d77c3dfd 2288
59ae15a5
PH
2289 # Python 3 has a Unicode API
2290 if sys.version_info >= (3, 0):
2291 return s
0f00efed 2292
aa49acd1
S
2293 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2294 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2295 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2296 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2297 return s
2298
8ee239e9
YCH
2299 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2300 if sys.platform.startswith('java'):
2301 return s
2302
aa49acd1
S
2303 return s.encode(get_subprocess_encoding(), 'ignore')
2304
2305
2306def decodeFilename(b, for_subprocess=False):
2307
2308 if sys.version_info >= (3, 0):
2309 return b
2310
2311 if not isinstance(b, bytes):
2312 return b
2313
2314 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 2315
f07b74fc
PH
2316
2317def encodeArgument(s):
2318 if not isinstance(s, compat_str):
2319 # Legacy code that uses byte strings
2320 # Uncomment the following line after fixing all post processors
7af808a5 2321 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
2322 s = s.decode('ascii')
2323 return encodeFilename(s, True)
2324
2325
aa49acd1
S
2326def decodeArgument(b):
2327 return decodeFilename(b, True)
2328
2329
8271226a
PH
2330def decodeOption(optval):
2331 if optval is None:
2332 return optval
2333 if isinstance(optval, bytes):
2334 optval = optval.decode(preferredencoding())
2335
2336 assert isinstance(optval, compat_str)
2337 return optval
1c256f70 2338
5f6a1245 2339
cdb19aa4 2340def formatSeconds(secs, delim=':', msec=False):
4539dd30 2341 if secs > 3600:
cdb19aa4 2342 ret = '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
4539dd30 2343 elif secs > 60:
cdb19aa4 2344 ret = '%d%s%02d' % (secs // 60, delim, secs % 60)
4539dd30 2345 else:
cdb19aa4 2346 ret = '%d' % secs
2347 return '%s.%03d' % (ret, secs % 1) if msec else ret
4539dd30 2348
a0ddb8a2 2349
be4a824d
PH
2350def make_HTTPS_handler(params, **kwargs):
2351 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 2352 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 2353 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 2354 if opts_no_check_certificate:
be5f2c19 2355 context.check_hostname = False
0db261ba 2356 context.verify_mode = ssl.CERT_NONE
a2366922 2357 try:
be4a824d 2358 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
2359 except TypeError:
2360 # Python 2.7.8
2361 # (create_default_context present but HTTPSHandler has no context=)
2362 pass
2363
2364 if sys.version_info < (3, 2):
d7932313 2365 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 2366 else: # Python < 3.4
d7932313 2367 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 2368 context.verify_mode = (ssl.CERT_NONE
dca08720 2369 if opts_no_check_certificate
ea6d901e 2370 else ssl.CERT_REQUIRED)
303b479e 2371 context.set_default_verify_paths()
be4a824d 2372 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 2373
732ea2f0 2374
5873d4cc 2375def bug_reports_message(before=';'):
08f2a92c 2376 if ytdl_is_updateable():
7a5c1cfe 2377 update_cmd = 'type yt-dlp -U to update'
08f2a92c 2378 else:
7a5c1cfe 2379 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
5873d4cc 2380 msg = 'please report this issue on https://github.com/yt-dlp/yt-dlp .'
08f2a92c 2381 msg += ' Make sure you are using the latest version; %s.' % update_cmd
7a5c1cfe 2382 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
5873d4cc
F
2383
2384 before = before.rstrip()
2385 if not before or before.endswith(('.', '!', '?')):
2386 msg = msg[0].title() + msg[1:]
2387
2388 return (before + ' ' if before else '') + msg
08f2a92c
JMF
2389
2390
bf5b9d85
PM
2391class YoutubeDLError(Exception):
2392 """Base exception for YoutubeDL errors."""
2393 pass
2394
2395
3158150c 2396network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2397if hasattr(ssl, 'CertificateError'):
2398 network_exceptions.append(ssl.CertificateError)
2399network_exceptions = tuple(network_exceptions)
2400
2401
bf5b9d85 2402class ExtractorError(YoutubeDLError):
1c256f70 2403 """Error during info extraction."""
5f6a1245 2404
1151c407 2405 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
9a82b238 2406 """ tb, if given, is the original traceback (so that it can be printed out).
7a5c1cfe 2407 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
9a82b238 2408 """
3158150c 2409 if sys.exc_info()[0] in network_exceptions:
9a82b238 2410 expected = True
d5979c5d 2411
526d74ec 2412 self.msg = str(msg)
1c256f70 2413 self.traceback = tb
1151c407 2414 self.expected = expected
2eabb802 2415 self.cause = cause
d11271dd 2416 self.video_id = video_id
1151c407 2417 self.ie = ie
2418 self.exc_info = sys.exc_info() # preserve original exception
2419
2420 super(ExtractorError, self).__init__(''.join((
2421 format_field(ie, template='[%s] '),
2422 format_field(video_id, template='%s: '),
526d74ec 2423 self.msg,
1151c407 2424 format_field(cause, template=' (caused by %r)'),
2425 '' if expected else bug_reports_message())))
1c256f70 2426
01951dda
PH
2427 def format_traceback(self):
2428 if self.traceback is None:
2429 return None
28e614de 2430 return ''.join(traceback.format_tb(self.traceback))
01951dda 2431
1c256f70 2432
416c7fcb
PH
2433class UnsupportedError(ExtractorError):
2434 def __init__(self, url):
2435 super(UnsupportedError, self).__init__(
2436 'Unsupported URL: %s' % url, expected=True)
2437 self.url = url
2438
2439
55b3e45b
JMF
2440class RegexNotFoundError(ExtractorError):
2441 """Error when a regex didn't match"""
2442 pass
2443
2444
773f291d
S
2445class GeoRestrictedError(ExtractorError):
2446 """Geographic restriction Error exception.
2447
2448 This exception may be thrown when a video is not available from your
2449 geographic location due to geographic restrictions imposed by a website.
2450 """
b6e0c7d2 2451
773f291d
S
2452 def __init__(self, msg, countries=None):
2453 super(GeoRestrictedError, self).__init__(msg, expected=True)
2454 self.msg = msg
2455 self.countries = countries
2456
2457
bf5b9d85 2458class DownloadError(YoutubeDLError):
59ae15a5 2459 """Download Error exception.
d77c3dfd 2460
59ae15a5
PH
2461 This exception may be thrown by FileDownloader objects if they are not
2462 configured to continue on errors. They will contain the appropriate
2463 error message.
2464 """
5f6a1245 2465
8cc83b8d
FV
2466 def __init__(self, msg, exc_info=None):
2467 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2468 super(DownloadError, self).__init__(msg)
2469 self.exc_info = exc_info
d77c3dfd
FV
2470
2471
498f5606 2472class EntryNotInPlaylist(YoutubeDLError):
2473 """Entry not in playlist exception.
2474
2475 This exception will be thrown by YoutubeDL when a requested entry
2476 is not found in the playlist info_dict
2477 """
2478 pass
2479
2480
bf5b9d85 2481class SameFileError(YoutubeDLError):
59ae15a5 2482 """Same File exception.
d77c3dfd 2483
59ae15a5
PH
2484 This exception will be thrown by FileDownloader objects if they detect
2485 multiple files would have to be downloaded to the same file on disk.
2486 """
2487 pass
d77c3dfd
FV
2488
2489
bf5b9d85 2490class PostProcessingError(YoutubeDLError):
59ae15a5 2491 """Post Processing exception.
d77c3dfd 2492
59ae15a5
PH
2493 This exception may be raised by PostProcessor's .run() method to
2494 indicate an error in the postprocessing task.
2495 """
5f6a1245 2496
7851b379 2497 def __init__(self, msg):
bf5b9d85 2498 super(PostProcessingError, self).__init__(msg)
7851b379 2499 self.msg = msg
d77c3dfd 2500
5f6a1245 2501
8b0d7497 2502class ExistingVideoReached(YoutubeDLError):
2503 """ --max-downloads limit has been reached. """
2504 pass
2505
2506
2507class RejectedVideoReached(YoutubeDLError):
2508 """ --max-downloads limit has been reached. """
2509 pass
2510
2511
51d9739f 2512class ThrottledDownload(YoutubeDLError):
2513 """ Download speed below --throttled-rate. """
2514 pass
2515
2516
bf5b9d85 2517class MaxDownloadsReached(YoutubeDLError):
59ae15a5
PH
2518 """ --max-downloads limit has been reached. """
2519 pass
d77c3dfd
FV
2520
2521
bf5b9d85 2522class UnavailableVideoError(YoutubeDLError):
59ae15a5 2523 """Unavailable Format exception.
d77c3dfd 2524
59ae15a5
PH
2525 This exception will be thrown when a video is requested
2526 in a format that is not available for that video.
2527 """
2528 pass
d77c3dfd
FV
2529
2530
bf5b9d85 2531class ContentTooShortError(YoutubeDLError):
59ae15a5 2532 """Content Too Short exception.
d77c3dfd 2533
59ae15a5
PH
2534 This exception may be raised by FileDownloader objects when a file they
2535 download is too small for what the server announced first, indicating
2536 the connection was probably interrupted.
2537 """
d77c3dfd 2538
59ae15a5 2539 def __init__(self, downloaded, expected):
bf5b9d85
PM
2540 super(ContentTooShortError, self).__init__(
2541 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2542 )
2c7ed247 2543 # Both in bytes
59ae15a5
PH
2544 self.downloaded = downloaded
2545 self.expected = expected
d77c3dfd 2546
5f6a1245 2547
bf5b9d85 2548class XAttrMetadataError(YoutubeDLError):
efa97bdc
YCH
2549 def __init__(self, code=None, msg='Unknown error'):
2550 super(XAttrMetadataError, self).__init__(msg)
2551 self.code = code
bd264412 2552 self.msg = msg
efa97bdc
YCH
2553
2554 # Parsing code and msg
3089bc74 2555 if (self.code in (errno.ENOSPC, errno.EDQUOT)
a0566bbf 2556 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
efa97bdc
YCH
2557 self.reason = 'NO_SPACE'
2558 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2559 self.reason = 'VALUE_TOO_LONG'
2560 else:
2561 self.reason = 'NOT_SUPPORTED'
2562
2563
bf5b9d85 2564class XAttrUnavailableError(YoutubeDLError):
efa97bdc
YCH
2565 pass
2566
2567
c5a59d93 2568def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
2569 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2570 # expected HTTP responses to meet HTTP/1.0 or later (see also
067aa17e 2571 # https://github.com/ytdl-org/youtube-dl/issues/6727)
e5e78797 2572 if sys.version_info < (3, 0):
65220c3b
S
2573 kwargs['strict'] = True
2574 hc = http_class(*args, **compat_kwargs(kwargs))
be4a824d 2575 source_address = ydl_handler._params.get('source_address')
8959018a 2576
be4a824d 2577 if source_address is not None:
8959018a
AU
2578 # This is to workaround _create_connection() from socket where it will try all
2579 # address data from getaddrinfo() including IPv6. This filters the result from
2580 # getaddrinfo() based on the source_address value.
2581 # This is based on the cpython socket.create_connection() function.
2582 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2583 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2584 host, port = address
2585 err = None
2586 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
9e21e6d9
S
2587 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2588 ip_addrs = [addr for addr in addrs if addr[0] == af]
2589 if addrs and not ip_addrs:
2590 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2591 raise socket.error(
2592 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2593 % (ip_version, source_address[0]))
8959018a
AU
2594 for res in ip_addrs:
2595 af, socktype, proto, canonname, sa = res
2596 sock = None
2597 try:
2598 sock = socket.socket(af, socktype, proto)
2599 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2600 sock.settimeout(timeout)
2601 sock.bind(source_address)
2602 sock.connect(sa)
2603 err = None # Explicitly break reference cycle
2604 return sock
2605 except socket.error as _:
2606 err = _
2607 if sock is not None:
2608 sock.close()
2609 if err is not None:
2610 raise err
2611 else:
9e21e6d9
S
2612 raise socket.error('getaddrinfo returns an empty list')
2613 if hasattr(hc, '_create_connection'):
2614 hc._create_connection = _create_connection
be4a824d
PH
2615 sa = (source_address, 0)
2616 if hasattr(hc, 'source_address'): # Python 2.7+
2617 hc.source_address = sa
2618 else: # Python 2.6
2619 def _hc_connect(self, *args, **kwargs):
9e21e6d9 2620 sock = _create_connection(
be4a824d
PH
2621 (self.host, self.port), self.timeout, sa)
2622 if is_https:
d7932313
PH
2623 self.sock = ssl.wrap_socket(
2624 sock, self.key_file, self.cert_file,
2625 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
2626 else:
2627 self.sock = sock
2628 hc.connect = functools.partial(_hc_connect, hc)
2629
2630 return hc
2631
2632
87f0e62d 2633def handle_youtubedl_headers(headers):
992fc9d6
YCH
2634 filtered_headers = headers
2635
2636 if 'Youtubedl-no-compression' in filtered_headers:
2637 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 2638 del filtered_headers['Youtubedl-no-compression']
87f0e62d 2639
992fc9d6 2640 return filtered_headers
87f0e62d
YCH
2641
2642
acebc9cd 2643class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
2644 """Handler for HTTP requests and responses.
2645
2646 This class, when installed with an OpenerDirector, automatically adds
2647 the standard headers to every HTTP request and handles gzipped and
2648 deflated responses from web servers. If compression is to be avoided in
2649 a particular request, the original request in the program code only has
0424ec30 2650 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
2651 removed before making the real request.
2652
2653 Part of this code was copied from:
2654
2655 http://techknack.net/python-urllib2-handlers/
2656
2657 Andrew Rowls, the author of that code, agreed to release it to the
2658 public domain.
2659 """
2660
be4a824d
PH
2661 def __init__(self, params, *args, **kwargs):
2662 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2663 self._params = params
2664
2665 def http_open(self, req):
71aff188
YCH
2666 conn_class = compat_http_client.HTTPConnection
2667
2668 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2669 if socks_proxy:
2670 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2671 del req.headers['Ytdl-socks-proxy']
2672
be4a824d 2673 return self.do_open(functools.partial(
71aff188 2674 _create_http_connection, self, conn_class, False),
be4a824d
PH
2675 req)
2676
59ae15a5
PH
2677 @staticmethod
2678 def deflate(data):
fc2119f2 2679 if not data:
2680 return data
59ae15a5
PH
2681 try:
2682 return zlib.decompress(data, -zlib.MAX_WBITS)
2683 except zlib.error:
2684 return zlib.decompress(data)
2685
acebc9cd 2686 def http_request(self, req):
51f267d9
S
2687 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2688 # always respected by websites, some tend to give out URLs with non percent-encoded
2689 # non-ASCII characters (see telemb.py, ard.py [#3412])
2690 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2691 # To work around aforementioned issue we will replace request's original URL with
2692 # percent-encoded one
2693 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2694 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2695 url = req.get_full_url()
2696 url_escaped = escape_url(url)
2697
2698 # Substitute URL if any change after escaping
2699 if url != url_escaped:
15d260eb 2700 req = update_Request(req, url=url_escaped)
51f267d9 2701
33ac271b 2702 for h, v in std_headers.items():
3d5f7a39
JK
2703 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2704 # The dict keys are capitalized because of this bug by urllib
2705 if h.capitalize() not in req.headers:
33ac271b 2706 req.add_header(h, v)
87f0e62d
YCH
2707
2708 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
2709
2710 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2711 # Python 2.6 is brain-dead when it comes to fragments
2712 req._Request__original = req._Request__original.partition('#')[0]
2713 req._Request__r_type = req._Request__r_type.partition('#')[0]
2714
59ae15a5
PH
2715 return req
2716
acebc9cd 2717 def http_response(self, req, resp):
59ae15a5
PH
2718 old_resp = resp
2719 # gzip
2720 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
2721 content = resp.read()
2722 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2723 try:
2724 uncompressed = io.BytesIO(gz.read())
2725 except IOError as original_ioerror:
2726 # There may be junk add the end of the file
2727 # See http://stackoverflow.com/q/4928560/35070 for details
2728 for i in range(1, 1024):
2729 try:
2730 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2731 uncompressed = io.BytesIO(gz.read())
2732 except IOError:
2733 continue
2734 break
2735 else:
2736 raise original_ioerror
b407d853 2737 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2738 resp.msg = old_resp.msg
c047270c 2739 del resp.headers['Content-encoding']
59ae15a5
PH
2740 # deflate
2741 if resp.headers.get('Content-encoding', '') == 'deflate':
2742 gz = io.BytesIO(self.deflate(resp.read()))
b407d853 2743 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2744 resp.msg = old_resp.msg
c047270c 2745 del resp.headers['Content-encoding']
ad729172 2746 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
067aa17e 2747 # https://github.com/ytdl-org/youtube-dl/issues/6457).
5a4d9ddb
S
2748 if 300 <= resp.code < 400:
2749 location = resp.headers.get('Location')
2750 if location:
2751 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2752 if sys.version_info >= (3, 0):
2753 location = location.encode('iso-8859-1').decode('utf-8')
0ea59007
YCH
2754 else:
2755 location = location.decode('utf-8')
5a4d9ddb
S
2756 location_escaped = escape_url(location)
2757 if location != location_escaped:
2758 del resp.headers['Location']
9a4aec8b
YCH
2759 if sys.version_info < (3, 0):
2760 location_escaped = location_escaped.encode('utf-8')
5a4d9ddb 2761 resp.headers['Location'] = location_escaped
59ae15a5 2762 return resp
0f8d03f8 2763
acebc9cd
PH
2764 https_request = http_request
2765 https_response = http_response
bf50b038 2766
5de90176 2767
71aff188
YCH
2768def make_socks_conn_class(base_class, socks_proxy):
2769 assert issubclass(base_class, (
2770 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2771
2772 url_components = compat_urlparse.urlparse(socks_proxy)
2773 if url_components.scheme.lower() == 'socks5':
2774 socks_type = ProxyType.SOCKS5
2775 elif url_components.scheme.lower() in ('socks', 'socks4'):
2776 socks_type = ProxyType.SOCKS4
51fb4995
YCH
2777 elif url_components.scheme.lower() == 'socks4a':
2778 socks_type = ProxyType.SOCKS4A
71aff188 2779
cdd94c2e
YCH
2780 def unquote_if_non_empty(s):
2781 if not s:
2782 return s
2783 return compat_urllib_parse_unquote_plus(s)
2784
71aff188
YCH
2785 proxy_args = (
2786 socks_type,
2787 url_components.hostname, url_components.port or 1080,
2788 True, # Remote DNS
cdd94c2e
YCH
2789 unquote_if_non_empty(url_components.username),
2790 unquote_if_non_empty(url_components.password),
71aff188
YCH
2791 )
2792
2793 class SocksConnection(base_class):
2794 def connect(self):
2795 self.sock = sockssocket()
2796 self.sock.setproxy(*proxy_args)
2797 if type(self.timeout) in (int, float):
2798 self.sock.settimeout(self.timeout)
2799 self.sock.connect((self.host, self.port))
2800
2801 if isinstance(self, compat_http_client.HTTPSConnection):
2802 if hasattr(self, '_context'): # Python > 2.6
2803 self.sock = self._context.wrap_socket(
2804 self.sock, server_hostname=self.host)
2805 else:
2806 self.sock = ssl.wrap_socket(self.sock)
2807
2808 return SocksConnection
2809
2810
be4a824d
PH
2811class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2812 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2813 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2814 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2815 self._params = params
2816
2817 def https_open(self, req):
4f264c02 2818 kwargs = {}
71aff188
YCH
2819 conn_class = self._https_conn_class
2820
4f264c02
JMF
2821 if hasattr(self, '_context'): # python > 2.6
2822 kwargs['context'] = self._context
2823 if hasattr(self, '_check_hostname'): # python 3.x
2824 kwargs['check_hostname'] = self._check_hostname
71aff188
YCH
2825
2826 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2827 if socks_proxy:
2828 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2829 del req.headers['Ytdl-socks-proxy']
2830
be4a824d 2831 return self.do_open(functools.partial(
71aff188 2832 _create_http_connection, self, conn_class, True),
4f264c02 2833 req, **kwargs)
be4a824d
PH
2834
2835
1bab3437 2836class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
f1a8511f
S
2837 """
2838 See [1] for cookie file format.
2839
2840 1. https://curl.haxx.se/docs/http-cookies.html
2841 """
e7e62441 2842 _HTTPONLY_PREFIX = '#HttpOnly_'
c380cc28
S
2843 _ENTRY_LEN = 7
2844 _HEADER = '''# Netscape HTTP Cookie File
7a5c1cfe 2845# This file is generated by yt-dlp. Do not edit.
c380cc28
S
2846
2847'''
2848 _CookieFileEntry = collections.namedtuple(
2849 'CookieFileEntry',
2850 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
e7e62441 2851
1bab3437 2852 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
c380cc28
S
2853 """
2854 Save cookies to a file.
2855
2856 Most of the code is taken from CPython 3.8 and slightly adapted
2857 to support cookie files with UTF-8 in both python 2 and 3.
2858 """
2859 if filename is None:
2860 if self.filename is not None:
2861 filename = self.filename
2862 else:
2863 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2864
1bab3437
S
2865 # Store session cookies with `expires` set to 0 instead of an empty
2866 # string
2867 for cookie in self:
2868 if cookie.expires is None:
2869 cookie.expires = 0
c380cc28
S
2870
2871 with io.open(filename, 'w', encoding='utf-8') as f:
2872 f.write(self._HEADER)
2873 now = time.time()
2874 for cookie in self:
2875 if not ignore_discard and cookie.discard:
2876 continue
2877 if not ignore_expires and cookie.is_expired(now):
2878 continue
2879 if cookie.secure:
2880 secure = 'TRUE'
2881 else:
2882 secure = 'FALSE'
2883 if cookie.domain.startswith('.'):
2884 initial_dot = 'TRUE'
2885 else:
2886 initial_dot = 'FALSE'
2887 if cookie.expires is not None:
2888 expires = compat_str(cookie.expires)
2889 else:
2890 expires = ''
2891 if cookie.value is None:
2892 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2893 # with no name, whereas http.cookiejar regards it as a
2894 # cookie with no value.
2895 name = ''
2896 value = cookie.name
2897 else:
2898 name = cookie.name
2899 value = cookie.value
2900 f.write(
2901 '\t'.join([cookie.domain, initial_dot, cookie.path,
2902 secure, expires, name, value]) + '\n')
1bab3437
S
2903
2904 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
e7e62441 2905 """Load cookies from a file."""
2906 if filename is None:
2907 if self.filename is not None:
2908 filename = self.filename
2909 else:
2910 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2911
c380cc28
S
2912 def prepare_line(line):
2913 if line.startswith(self._HTTPONLY_PREFIX):
2914 line = line[len(self._HTTPONLY_PREFIX):]
2915 # comments and empty lines are fine
2916 if line.startswith('#') or not line.strip():
2917 return line
2918 cookie_list = line.split('\t')
2919 if len(cookie_list) != self._ENTRY_LEN:
2920 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2921 cookie = self._CookieFileEntry(*cookie_list)
2922 if cookie.expires_at and not cookie.expires_at.isdigit():
2923 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2924 return line
2925
e7e62441 2926 cf = io.StringIO()
c380cc28 2927 with io.open(filename, encoding='utf-8') as f:
e7e62441 2928 for line in f:
c380cc28
S
2929 try:
2930 cf.write(prepare_line(line))
2931 except compat_cookiejar.LoadError as e:
2932 write_string(
2933 'WARNING: skipping cookie file entry due to %s: %r\n'
2934 % (e, line), sys.stderr)
2935 continue
e7e62441 2936 cf.seek(0)
2937 self._really_load(cf, filename, ignore_discard, ignore_expires)
1bab3437
S
2938 # Session cookies are denoted by either `expires` field set to
2939 # an empty string or 0. MozillaCookieJar only recognizes the former
2940 # (see [1]). So we need force the latter to be recognized as session
2941 # cookies on our own.
2942 # Session cookies may be important for cookies-based authentication,
2943 # e.g. usually, when user does not check 'Remember me' check box while
2944 # logging in on a site, some important cookies are stored as session
2945 # cookies so that not recognizing them will result in failed login.
2946 # 1. https://bugs.python.org/issue17164
2947 for cookie in self:
2948 # Treat `expires=0` cookies as session cookies
2949 if cookie.expires == 0:
2950 cookie.expires = None
2951 cookie.discard = True
2952
2953
a6420bf5
S
2954class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2955 def __init__(self, cookiejar=None):
2956 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2957
2958 def http_response(self, request, response):
2959 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2960 # characters in Set-Cookie HTTP header of last response (see
067aa17e 2961 # https://github.com/ytdl-org/youtube-dl/issues/6769).
a6420bf5
S
2962 # In order to at least prevent crashing we will percent encode Set-Cookie
2963 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
2964 # if sys.version_info < (3, 0) and response.headers:
2965 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2966 # set_cookie = response.headers.get(set_cookie_header)
2967 # if set_cookie:
2968 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2969 # if set_cookie != set_cookie_escaped:
2970 # del response.headers[set_cookie_header]
2971 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
2972 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2973
f5fa042c 2974 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
a6420bf5
S
2975 https_response = http_response
2976
2977
fca6dba8 2978class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
201c1459 2979 """YoutubeDL redirect handler
2980
2981 The code is based on HTTPRedirectHandler implementation from CPython [1].
2982
2983 This redirect handler solves two issues:
2984 - ensures redirect URL is always unicode under python 2
2985 - introduces support for experimental HTTP response status code
2986 308 Permanent Redirect [2] used by some sites [3]
2987
2988 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
2989 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
2990 3. https://github.com/ytdl-org/youtube-dl/issues/28768
2991 """
2992
2993 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
2994
2995 def redirect_request(self, req, fp, code, msg, headers, newurl):
2996 """Return a Request or None in response to a redirect.
2997
2998 This is called by the http_error_30x methods when a
2999 redirection response is received. If a redirection should
3000 take place, return a new Request to allow http_error_30x to
3001 perform the redirect. Otherwise, raise HTTPError if no-one
3002 else should try to handle this url. Return None if you can't
3003 but another Handler might.
3004 """
3005 m = req.get_method()
3006 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
3007 or code in (301, 302, 303) and m == "POST")):
3008 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
3009 # Strictly (according to RFC 2616), 301 or 302 in response to
3010 # a POST MUST NOT cause a redirection without confirmation
3011 # from the user (of urllib.request, in this case). In practice,
3012 # essentially all clients do redirect in this case, so we do
3013 # the same.
3014
3015 # On python 2 urlh.geturl() may sometimes return redirect URL
3016 # as byte string instead of unicode. This workaround allows
3017 # to force it always return unicode.
3018 if sys.version_info[0] < 3:
3019 newurl = compat_str(newurl)
3020
3021 # Be conciliant with URIs containing a space. This is mainly
3022 # redundant with the more complete encoding done in http_error_302(),
3023 # but it is kept for compatibility with other callers.
3024 newurl = newurl.replace(' ', '%20')
3025
3026 CONTENT_HEADERS = ("content-length", "content-type")
3027 # NB: don't use dict comprehension for python 2.6 compatibility
3028 newheaders = dict((k, v) for k, v in req.headers.items()
3029 if k.lower() not in CONTENT_HEADERS)
3030 return compat_urllib_request.Request(
3031 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3032 unverifiable=True)
fca6dba8
S
3033
3034
46f59e89
S
3035def extract_timezone(date_str):
3036 m = re.search(
f137e4c2 3037 r'''(?x)
3038 ^.{8,}? # >=8 char non-TZ prefix, if present
3039 (?P<tz>Z| # just the UTC Z, or
3040 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
3041 (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
3042 [ ]? # optional space
3043 (?P<sign>\+|-) # +/-
3044 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
3045 $)
3046 ''', date_str)
46f59e89
S
3047 if not m:
3048 timezone = datetime.timedelta()
3049 else:
3050 date_str = date_str[:-len(m.group('tz'))]
3051 if not m.group('sign'):
3052 timezone = datetime.timedelta()
3053 else:
3054 sign = 1 if m.group('sign') == '+' else -1
3055 timezone = datetime.timedelta(
3056 hours=sign * int(m.group('hours')),
3057 minutes=sign * int(m.group('minutes')))
3058 return timezone, date_str
3059
3060
08b38d54 3061def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
3062 """ Return a UNIX timestamp from the given date """
3063
3064 if date_str is None:
3065 return None
3066
52c3a6e4
S
3067 date_str = re.sub(r'\.[0-9]+', '', date_str)
3068
08b38d54 3069 if timezone is None:
46f59e89
S
3070 timezone, date_str = extract_timezone(date_str)
3071
52c3a6e4
S
3072 try:
3073 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3074 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3075 return calendar.timegm(dt.timetuple())
3076 except ValueError:
3077 pass
912b38b4
PH
3078
3079
46f59e89
S
3080def date_formats(day_first=True):
3081 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3082
3083
42bdd9d0 3084def unified_strdate(date_str, day_first=True):
bf50b038 3085 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
3086
3087 if date_str is None:
3088 return None
bf50b038 3089 upload_date = None
5f6a1245 3090 # Replace commas
026fcc04 3091 date_str = date_str.replace(',', ' ')
42bdd9d0 3092 # Remove AM/PM + timezone
9bb8e0a3 3093 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89 3094 _, date_str = extract_timezone(date_str)
42bdd9d0 3095
46f59e89 3096 for expression in date_formats(day_first):
bf50b038
JMF
3097 try:
3098 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 3099 except ValueError:
bf50b038 3100 pass
42393ce2
PH
3101 if upload_date is None:
3102 timetuple = email.utils.parsedate_tz(date_str)
3103 if timetuple:
c6b9cf05
S
3104 try:
3105 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3106 except ValueError:
3107 pass
6a750402
JMF
3108 if upload_date is not None:
3109 return compat_str(upload_date)
bf50b038 3110
5f6a1245 3111
46f59e89
S
3112def unified_timestamp(date_str, day_first=True):
3113 if date_str is None:
3114 return None
3115
2ae2ffda 3116 date_str = re.sub(r'[,|]', '', date_str)
46f59e89 3117
7dc2a74e 3118 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89
S
3119 timezone, date_str = extract_timezone(date_str)
3120
3121 # Remove AM/PM + timezone
3122 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3123
deef3195
S
3124 # Remove unrecognized timezones from ISO 8601 alike timestamps
3125 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3126 if m:
3127 date_str = date_str[:-len(m.group('tz'))]
3128
f226880c
PH
3129 # Python only supports microseconds, so remove nanoseconds
3130 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3131 if m:
3132 date_str = m.group(1)
3133
46f59e89
S
3134 for expression in date_formats(day_first):
3135 try:
7dc2a74e 3136 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
46f59e89
S
3137 return calendar.timegm(dt.timetuple())
3138 except ValueError:
3139 pass
3140 timetuple = email.utils.parsedate_tz(date_str)
3141 if timetuple:
7dc2a74e 3142 return calendar.timegm(timetuple) + pm_delta * 3600
46f59e89
S
3143
3144
28e614de 3145def determine_ext(url, default_ext='unknown_video'):
85750f89 3146 if url is None or '.' not in url:
f4776371 3147 return default_ext
9cb9a5df 3148 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
3149 if re.match(r'^[A-Za-z0-9]+$', guess):
3150 return guess
a7aaa398
S
3151 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3152 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 3153 return guess.rstrip('/')
73e79f2a 3154 else:
cbdbb766 3155 return default_ext
73e79f2a 3156
5f6a1245 3157
824fa511
S
3158def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3159 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
d4051a8e 3160
5f6a1245 3161
9e62f283 3162def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
37254abc
JMF
3163 """
3164 Return a datetime object from a string in the format YYYYMMDD or
9e62f283 3165 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3166
3167 format: string date format used to return datetime object from
3168 precision: round the time portion of a datetime object.
3169 auto|microsecond|second|minute|hour|day.
3170 auto: round to the unit provided in date_str (if applicable).
3171 """
3172 auto_precision = False
3173 if precision == 'auto':
3174 auto_precision = True
3175 precision = 'microsecond'
3176 today = datetime_round(datetime.datetime.now(), precision)
f8795e10 3177 if date_str in ('now', 'today'):
37254abc 3178 return today
f8795e10
PH
3179 if date_str == 'yesterday':
3180 return today - datetime.timedelta(days=1)
9e62f283 3181 match = re.match(
3182 r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3183 date_str)
37254abc 3184 if match is not None:
9e62f283 3185 start_time = datetime_from_str(match.group('start'), precision, format)
3186 time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
37254abc 3187 unit = match.group('unit')
9e62f283 3188 if unit == 'month' or unit == 'year':
3189 new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
37254abc 3190 unit = 'day'
9e62f283 3191 else:
3192 if unit == 'week':
3193 unit = 'day'
3194 time *= 7
3195 delta = datetime.timedelta(**{unit + 's': time})
3196 new_date = start_time + delta
3197 if auto_precision:
3198 return datetime_round(new_date, unit)
3199 return new_date
3200
3201 return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3202
3203
3204def date_from_str(date_str, format='%Y%m%d'):
3205 """
3206 Return a datetime object from a string in the format YYYYMMDD or
3207 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3208
3209 format: string date format used to return datetime object from
3210 """
3211 return datetime_from_str(date_str, precision='microsecond', format=format).date()
3212
3213
3214def datetime_add_months(dt, months):
3215 """Increment/Decrement a datetime object by months."""
3216 month = dt.month + months - 1
3217 year = dt.year + month // 12
3218 month = month % 12 + 1
3219 day = min(dt.day, calendar.monthrange(year, month)[1])
3220 return dt.replace(year, month, day)
3221
3222
3223def datetime_round(dt, precision='day'):
3224 """
3225 Round a datetime object's time to a specific precision
3226 """
3227 if precision == 'microsecond':
3228 return dt
3229
3230 unit_seconds = {
3231 'day': 86400,
3232 'hour': 3600,
3233 'minute': 60,
3234 'second': 1,
3235 }
3236 roundto = lambda x, n: ((x + n / 2) // n) * n
3237 timestamp = calendar.timegm(dt.timetuple())
3238 return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
5f6a1245
JW
3239
3240
e63fc1be 3241def hyphenate_date(date_str):
3242 """
3243 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3244 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3245 if match is not None:
3246 return '-'.join(match.groups())
3247 else:
3248 return date_str
3249
5f6a1245 3250
bd558525
JMF
3251class DateRange(object):
3252 """Represents a time interval between two dates"""
5f6a1245 3253
bd558525
JMF
3254 def __init__(self, start=None, end=None):
3255 """start and end must be strings in the format accepted by date"""
3256 if start is not None:
3257 self.start = date_from_str(start)
3258 else:
3259 self.start = datetime.datetime.min.date()
3260 if end is not None:
3261 self.end = date_from_str(end)
3262 else:
3263 self.end = datetime.datetime.max.date()
37254abc 3264 if self.start > self.end:
bd558525 3265 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 3266
bd558525
JMF
3267 @classmethod
3268 def day(cls, day):
3269 """Returns a range that only contains the given day"""
5f6a1245
JW
3270 return cls(day, day)
3271
bd558525
JMF
3272 def __contains__(self, date):
3273 """Check if the date is in the range"""
37254abc
JMF
3274 if not isinstance(date, datetime.date):
3275 date = date_from_str(date)
3276 return self.start <= date <= self.end
5f6a1245 3277
bd558525 3278 def __str__(self):
5f6a1245 3279 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
3280
3281
3282def platform_name():
3283 """ Returns the platform name as a compat_str """
3284 res = platform.platform()
3285 if isinstance(res, bytes):
3286 res = res.decode(preferredencoding())
3287
3288 assert isinstance(res, compat_str)
3289 return res
c257baff
PH
3290
3291
b58ddb32
PH
3292def _windows_write_string(s, out):
3293 """ Returns True if the string was written using special methods,
3294 False if it has yet to be written out."""
3295 # Adapted from http://stackoverflow.com/a/3259271/35070
3296
3297 import ctypes
3298 import ctypes.wintypes
3299
3300 WIN_OUTPUT_IDS = {
3301 1: -11,
3302 2: -12,
3303 }
3304
a383a98a
PH
3305 try:
3306 fileno = out.fileno()
3307 except AttributeError:
3308 # If the output stream doesn't have a fileno, it's virtual
3309 return False
aa42e873
PH
3310 except io.UnsupportedOperation:
3311 # Some strange Windows pseudo files?
3312 return False
b58ddb32
PH
3313 if fileno not in WIN_OUTPUT_IDS:
3314 return False
3315
d7cd9a9e 3316 GetStdHandle = compat_ctypes_WINFUNCTYPE(
b58ddb32 3317 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
d7cd9a9e 3318 ('GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
3319 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3320
d7cd9a9e 3321 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3322 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3323 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
d7cd9a9e 3324 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
3325 written = ctypes.wintypes.DWORD(0)
3326
d7cd9a9e 3327 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
3328 FILE_TYPE_CHAR = 0x0002
3329 FILE_TYPE_REMOTE = 0x8000
d7cd9a9e 3330 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3331 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3332 ctypes.POINTER(ctypes.wintypes.DWORD))(
d7cd9a9e 3333 ('GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
3334 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3335
3336 def not_a_console(handle):
3337 if handle == INVALID_HANDLE_VALUE or handle is None:
3338 return True
3089bc74
S
3339 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3340 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
3341
3342 if not_a_console(h):
3343 return False
3344
d1b9c912
PH
3345 def next_nonbmp_pos(s):
3346 try:
3347 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3348 except StopIteration:
3349 return len(s)
3350
3351 while s:
3352 count = min(next_nonbmp_pos(s), 1024)
3353
b58ddb32 3354 ret = WriteConsoleW(
d1b9c912 3355 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
3356 if ret == 0:
3357 raise OSError('Failed to write string')
d1b9c912
PH
3358 if not count: # We just wrote a non-BMP character
3359 assert written.value == 2
3360 s = s[1:]
3361 else:
3362 assert written.value > 0
3363 s = s[written.value:]
b58ddb32
PH
3364 return True
3365
3366
734f90bb 3367def write_string(s, out=None, encoding=None):
7459e3a2
PH
3368 if out is None:
3369 out = sys.stderr
8bf48f23 3370 assert type(s) == compat_str
7459e3a2 3371
b58ddb32
PH
3372 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3373 if _windows_write_string(s, out):
3374 return
3375
3089bc74
S
3376 if ('b' in getattr(out, 'mode', '')
3377 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
3378 byt = s.encode(encoding or preferredencoding(), 'ignore')
3379 out.write(byt)
3380 elif hasattr(out, 'buffer'):
3381 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3382 byt = s.encode(enc, 'ignore')
3383 out.buffer.write(byt)
3384 else:
8bf48f23 3385 out.write(s)
7459e3a2
PH
3386 out.flush()
3387
3388
48ea9cea
PH
3389def bytes_to_intlist(bs):
3390 if not bs:
3391 return []
3392 if isinstance(bs[0], int): # Python 3
3393 return list(bs)
3394 else:
3395 return [ord(c) for c in bs]
3396
c257baff 3397
cba892fa 3398def intlist_to_bytes(xs):
3399 if not xs:
3400 return b''
edaa23f8 3401 return compat_struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
3402
3403
c1c9a79c
PH
3404# Cross-platform file locking
3405if sys.platform == 'win32':
3406 import ctypes.wintypes
3407 import msvcrt
3408
3409 class OVERLAPPED(ctypes.Structure):
3410 _fields_ = [
3411 ('Internal', ctypes.wintypes.LPVOID),
3412 ('InternalHigh', ctypes.wintypes.LPVOID),
3413 ('Offset', ctypes.wintypes.DWORD),
3414 ('OffsetHigh', ctypes.wintypes.DWORD),
3415 ('hEvent', ctypes.wintypes.HANDLE),
3416 ]
3417
3418 kernel32 = ctypes.windll.kernel32
3419 LockFileEx = kernel32.LockFileEx
3420 LockFileEx.argtypes = [
3421 ctypes.wintypes.HANDLE, # hFile
3422 ctypes.wintypes.DWORD, # dwFlags
3423 ctypes.wintypes.DWORD, # dwReserved
3424 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3425 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3426 ctypes.POINTER(OVERLAPPED) # Overlapped
3427 ]
3428 LockFileEx.restype = ctypes.wintypes.BOOL
3429 UnlockFileEx = kernel32.UnlockFileEx
3430 UnlockFileEx.argtypes = [
3431 ctypes.wintypes.HANDLE, # hFile
3432 ctypes.wintypes.DWORD, # dwReserved
3433 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3434 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3435 ctypes.POINTER(OVERLAPPED) # Overlapped
3436 ]
3437 UnlockFileEx.restype = ctypes.wintypes.BOOL
3438 whole_low = 0xffffffff
3439 whole_high = 0x7fffffff
3440
3441 def _lock_file(f, exclusive):
3442 overlapped = OVERLAPPED()
3443 overlapped.Offset = 0
3444 overlapped.OffsetHigh = 0
3445 overlapped.hEvent = 0
3446 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3447 handle = msvcrt.get_osfhandle(f.fileno())
3448 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3449 whole_low, whole_high, f._lock_file_overlapped_p):
3450 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3451
3452 def _unlock_file(f):
3453 assert f._lock_file_overlapped_p
3454 handle = msvcrt.get_osfhandle(f.fileno())
3455 if not UnlockFileEx(handle, 0,
3456 whole_low, whole_high, f._lock_file_overlapped_p):
3457 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3458
3459else:
399a76e6
YCH
3460 # Some platforms, such as Jython, is missing fcntl
3461 try:
3462 import fcntl
c1c9a79c 3463
399a76e6
YCH
3464 def _lock_file(f, exclusive):
3465 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 3466
399a76e6
YCH
3467 def _unlock_file(f):
3468 fcntl.flock(f, fcntl.LOCK_UN)
3469 except ImportError:
3470 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3471
3472 def _lock_file(f, exclusive):
3473 raise IOError(UNSUPPORTED_MSG)
3474
3475 def _unlock_file(f):
3476 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
3477
3478
3479class locked_file(object):
3480 def __init__(self, filename, mode, encoding=None):
3481 assert mode in ['r', 'a', 'w']
3482 self.f = io.open(filename, mode, encoding=encoding)
3483 self.mode = mode
3484
3485 def __enter__(self):
3486 exclusive = self.mode != 'r'
3487 try:
3488 _lock_file(self.f, exclusive)
3489 except IOError:
3490 self.f.close()
3491 raise
3492 return self
3493
3494 def __exit__(self, etype, value, traceback):
3495 try:
3496 _unlock_file(self.f)
3497 finally:
3498 self.f.close()
3499
3500 def __iter__(self):
3501 return iter(self.f)
3502
3503 def write(self, *args):
3504 return self.f.write(*args)
3505
3506 def read(self, *args):
3507 return self.f.read(*args)
4eb7f1d1
JMF
3508
3509
4644ac55
S
3510def get_filesystem_encoding():
3511 encoding = sys.getfilesystemencoding()
3512 return encoding if encoding is not None else 'utf-8'
3513
3514
4eb7f1d1 3515def shell_quote(args):
a6a173c2 3516 quoted_args = []
4644ac55 3517 encoding = get_filesystem_encoding()
a6a173c2
JMF
3518 for a in args:
3519 if isinstance(a, bytes):
3520 # We may get a filename encoded with 'encodeFilename'
3521 a = a.decode(encoding)
aefce8e6 3522 quoted_args.append(compat_shlex_quote(a))
28e614de 3523 return ' '.join(quoted_args)
9d4660ca
PH
3524
3525
3526def smuggle_url(url, data):
3527 """ Pass additional data in a URL for internal use. """
3528
81953d1a
RA
3529 url, idata = unsmuggle_url(url, {})
3530 data.update(idata)
15707c7e 3531 sdata = compat_urllib_parse_urlencode(
28e614de
PH
3532 {'__youtubedl_smuggle': json.dumps(data)})
3533 return url + '#' + sdata
9d4660ca
PH
3534
3535
79f82953 3536def unsmuggle_url(smug_url, default=None):
83e865a3 3537 if '#__youtubedl_smuggle' not in smug_url:
79f82953 3538 return smug_url, default
28e614de
PH
3539 url, _, sdata = smug_url.rpartition('#')
3540 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
3541 data = json.loads(jsond)
3542 return url, data
02dbf93f
PH
3543
3544
02dbf93f
PH
3545def format_bytes(bytes):
3546 if bytes is None:
28e614de 3547 return 'N/A'
02dbf93f
PH
3548 if type(bytes) is str:
3549 bytes = float(bytes)
3550 if bytes == 0.0:
3551 exponent = 0
3552 else:
3553 exponent = int(math.log(bytes, 1024.0))
28e614de 3554 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 3555 converted = float(bytes) / float(1024 ** exponent)
28e614de 3556 return '%.2f%s' % (converted, suffix)
f53c966a 3557
1c088fa8 3558
fb47597b
S
3559def lookup_unit_table(unit_table, s):
3560 units_re = '|'.join(re.escape(u) for u in unit_table)
3561 m = re.match(
782b1b5b 3562 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
fb47597b
S
3563 if not m:
3564 return None
3565 num_str = m.group('num').replace(',', '.')
3566 mult = unit_table[m.group('unit')]
3567 return int(float(num_str) * mult)
3568
3569
be64b5b0
PH
3570def parse_filesize(s):
3571 if s is None:
3572 return None
3573
dfb1b146 3574 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
3575 # but we support those too
3576 _UNIT_TABLE = {
3577 'B': 1,
3578 'b': 1,
70852b47 3579 'bytes': 1,
be64b5b0
PH
3580 'KiB': 1024,
3581 'KB': 1000,
3582 'kB': 1024,
3583 'Kb': 1000,
13585d76 3584 'kb': 1000,
70852b47
YCH
3585 'kilobytes': 1000,
3586 'kibibytes': 1024,
be64b5b0
PH
3587 'MiB': 1024 ** 2,
3588 'MB': 1000 ** 2,
3589 'mB': 1024 ** 2,
3590 'Mb': 1000 ** 2,
13585d76 3591 'mb': 1000 ** 2,
70852b47
YCH
3592 'megabytes': 1000 ** 2,
3593 'mebibytes': 1024 ** 2,
be64b5b0
PH
3594 'GiB': 1024 ** 3,
3595 'GB': 1000 ** 3,
3596 'gB': 1024 ** 3,
3597 'Gb': 1000 ** 3,
13585d76 3598 'gb': 1000 ** 3,
70852b47
YCH
3599 'gigabytes': 1000 ** 3,
3600 'gibibytes': 1024 ** 3,
be64b5b0
PH
3601 'TiB': 1024 ** 4,
3602 'TB': 1000 ** 4,
3603 'tB': 1024 ** 4,
3604 'Tb': 1000 ** 4,
13585d76 3605 'tb': 1000 ** 4,
70852b47
YCH
3606 'terabytes': 1000 ** 4,
3607 'tebibytes': 1024 ** 4,
be64b5b0
PH
3608 'PiB': 1024 ** 5,
3609 'PB': 1000 ** 5,
3610 'pB': 1024 ** 5,
3611 'Pb': 1000 ** 5,
13585d76 3612 'pb': 1000 ** 5,
70852b47
YCH
3613 'petabytes': 1000 ** 5,
3614 'pebibytes': 1024 ** 5,
be64b5b0
PH
3615 'EiB': 1024 ** 6,
3616 'EB': 1000 ** 6,
3617 'eB': 1024 ** 6,
3618 'Eb': 1000 ** 6,
13585d76 3619 'eb': 1000 ** 6,
70852b47
YCH
3620 'exabytes': 1000 ** 6,
3621 'exbibytes': 1024 ** 6,
be64b5b0
PH
3622 'ZiB': 1024 ** 7,
3623 'ZB': 1000 ** 7,
3624 'zB': 1024 ** 7,
3625 'Zb': 1000 ** 7,
13585d76 3626 'zb': 1000 ** 7,
70852b47
YCH
3627 'zettabytes': 1000 ** 7,
3628 'zebibytes': 1024 ** 7,
be64b5b0
PH
3629 'YiB': 1024 ** 8,
3630 'YB': 1000 ** 8,
3631 'yB': 1024 ** 8,
3632 'Yb': 1000 ** 8,
13585d76 3633 'yb': 1000 ** 8,
70852b47
YCH
3634 'yottabytes': 1000 ** 8,
3635 'yobibytes': 1024 ** 8,
be64b5b0
PH
3636 }
3637
fb47597b
S
3638 return lookup_unit_table(_UNIT_TABLE, s)
3639
3640
3641def parse_count(s):
3642 if s is None:
be64b5b0
PH
3643 return None
3644
fb47597b
S
3645 s = s.strip()
3646
3647 if re.match(r'^[\d,.]+$', s):
3648 return str_to_int(s)
3649
3650 _UNIT_TABLE = {
3651 'k': 1000,
3652 'K': 1000,
3653 'm': 1000 ** 2,
3654 'M': 1000 ** 2,
3655 'kk': 1000 ** 2,
3656 'KK': 1000 ** 2,
3657 }
be64b5b0 3658
fb47597b 3659 return lookup_unit_table(_UNIT_TABLE, s)
be64b5b0 3660
2f7ae819 3661
b871d7e9
S
3662def parse_resolution(s):
3663 if s is None:
3664 return {}
3665
3666 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3667 if mobj:
3668 return {
3669 'width': int(mobj.group('w')),
3670 'height': int(mobj.group('h')),
3671 }
3672
3673 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3674 if mobj:
3675 return {'height': int(mobj.group(1))}
3676
3677 mobj = re.search(r'\b([48])[kK]\b', s)
3678 if mobj:
3679 return {'height': int(mobj.group(1)) * 540}
3680
3681 return {}
3682
3683
0dc41787
S
3684def parse_bitrate(s):
3685 if not isinstance(s, compat_str):
3686 return
3687 mobj = re.search(r'\b(\d+)\s*kbps', s)
3688 if mobj:
3689 return int(mobj.group(1))
3690
3691
a942d6cb 3692def month_by_name(name, lang='en'):
caefb1de
PH
3693 """ Return the number of a month by (locale-independently) English name """
3694
f6717dec 3695 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb 3696
caefb1de 3697 try:
f6717dec 3698 return month_names.index(name) + 1
7105440c
YCH
3699 except ValueError:
3700 return None
3701
3702
3703def month_by_abbreviation(abbrev):
3704 """ Return the number of a month by (locale-independently) English
3705 abbreviations """
3706
3707 try:
3708 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
3709 except ValueError:
3710 return None
18258362
JMF
3711
3712
5aafe895 3713def fix_xml_ampersands(xml_str):
18258362 3714 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
3715 return re.sub(
3716 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 3717 '&amp;',
5aafe895 3718 xml_str)
e3946f98
PH
3719
3720
3721def setproctitle(title):
8bf48f23 3722 assert isinstance(title, compat_str)
c1c05c67
YCH
3723
3724 # ctypes in Jython is not complete
3725 # http://bugs.jython.org/issue2148
3726 if sys.platform.startswith('java'):
3727 return
3728
e3946f98 3729 try:
611c1dd9 3730 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
3731 except OSError:
3732 return
2f49bcd6
RC
3733 except TypeError:
3734 # LoadLibrary in Windows Python 2.7.13 only expects
3735 # a bytestring, but since unicode_literals turns
3736 # every string into a unicode string, it fails.
3737 return
6eefe533
PH
3738 title_bytes = title.encode('utf-8')
3739 buf = ctypes.create_string_buffer(len(title_bytes))
3740 buf.value = title_bytes
e3946f98 3741 try:
6eefe533 3742 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
3743 except AttributeError:
3744 return # Strange libc, just skip this
d7dda168
PH
3745
3746
3747def remove_start(s, start):
46bc9b7d 3748 return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174
PH
3749
3750
2b9faf55 3751def remove_end(s, end):
46bc9b7d 3752 return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55
PH
3753
3754
31b2051e
S
3755def remove_quotes(s):
3756 if s is None or len(s) < 2:
3757 return s
3758 for quote in ('"', "'", ):
3759 if s[0] == quote and s[-1] == quote:
3760 return s[1:-1]
3761 return s
3762
3763
b6e0c7d2
U
3764def get_domain(url):
3765 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3766 return domain.group('domain') if domain else None
3767
3768
29eb5174 3769def url_basename(url):
9b8aaeed 3770 path = compat_urlparse.urlparse(url).path
28e614de 3771 return path.strip('/').split('/')[-1]
aa94a6d3
PH
3772
3773
02dc0a36
S
3774def base_url(url):
3775 return re.match(r'https?://[^?#&]+/', url).group()
3776
3777
e34c3361 3778def urljoin(base, path):
4b5de77b
S
3779 if isinstance(path, bytes):
3780 path = path.decode('utf-8')
e34c3361
S
3781 if not isinstance(path, compat_str) or not path:
3782 return None
fad4ceb5 3783 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
e34c3361 3784 return path
4b5de77b
S
3785 if isinstance(base, bytes):
3786 base = base.decode('utf-8')
3787 if not isinstance(base, compat_str) or not re.match(
3788 r'^(?:https?:)?//', base):
e34c3361
S
3789 return None
3790 return compat_urlparse.urljoin(base, path)
3791
3792
aa94a6d3
PH
3793class HEADRequest(compat_urllib_request.Request):
3794 def get_method(self):
611c1dd9 3795 return 'HEAD'
7217e148
PH
3796
3797
95cf60e8
S
3798class PUTRequest(compat_urllib_request.Request):
3799 def get_method(self):
3800 return 'PUT'
3801
3802
9732d77e 3803def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
3804 if get_attr:
3805 if v is not None:
3806 v = getattr(v, get_attr, None)
9572013d
PH
3807 if v == '':
3808 v = None
1812afb7
S
3809 if v is None:
3810 return default
3811 try:
3812 return int(v) * invscale // scale
5e1271c5 3813 except (ValueError, TypeError):
af98f8ff 3814 return default
9732d77e 3815
9572013d 3816
40a90862
JMF
3817def str_or_none(v, default=None):
3818 return default if v is None else compat_str(v)
3819
9732d77e
PH
3820
3821def str_to_int(int_str):
48d4681e 3822 """ A more relaxed version of int_or_none """
42db58ec 3823 if isinstance(int_str, compat_integer_types):
348c6bf1 3824 return int_str
42db58ec
S
3825 elif isinstance(int_str, compat_str):
3826 int_str = re.sub(r'[,\.\+]', '', int_str)
3827 return int_or_none(int_str)
608d11f5
PH
3828
3829
9732d77e 3830def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
3831 if v is None:
3832 return default
3833 try:
3834 return float(v) * invscale / scale
5e1271c5 3835 except (ValueError, TypeError):
caf80631 3836 return default
43f775e4
PH
3837
3838
c7e327c4
S
3839def bool_or_none(v, default=None):
3840 return v if isinstance(v, bool) else default
3841
3842
53cd37ba
S
3843def strip_or_none(v, default=None):
3844 return v.strip() if isinstance(v, compat_str) else default
b72b4431
S
3845
3846
af03000a
S
3847def url_or_none(url):
3848 if not url or not isinstance(url, compat_str):
3849 return None
3850 url = url.strip()
29f7c58a 3851 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
af03000a
S
3852
3853
e29663c6 3854def strftime_or_none(timestamp, date_format, default=None):
3855 datetime_object = None
3856 try:
3857 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3858 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3859 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3860 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3861 return datetime_object.strftime(date_format)
3862 except (ValueError, TypeError, AttributeError):
3863 return default
3864
3865
608d11f5 3866def parse_duration(s):
8f9312c3 3867 if not isinstance(s, compat_basestring):
608d11f5
PH
3868 return None
3869
ca7b3246
S
3870 s = s.strip()
3871
acaff495 3872 days, hours, mins, secs, ms = [None] * 5
15846398 3873 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
acaff495 3874 if m:
3875 days, hours, mins, secs, ms = m.groups()
3876 else:
3877 m = re.match(
056653bb
S
3878 r'''(?ix)(?:P?
3879 (?:
3880 [0-9]+\s*y(?:ears?)?\s*
3881 )?
3882 (?:
3883 [0-9]+\s*m(?:onths?)?\s*
3884 )?
3885 (?:
3886 [0-9]+\s*w(?:eeks?)?\s*
3887 )?
8f4b58d7 3888 (?:
acaff495 3889 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
8f4b58d7 3890 )?
056653bb 3891 T)?
acaff495 3892 (?:
3893 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3894 )?
3895 (?:
3896 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3897 )?
3898 (?:
3899 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
15846398 3900 )?Z?$''', s)
acaff495 3901 if m:
3902 days, hours, mins, secs, ms = m.groups()
3903 else:
15846398 3904 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
acaff495 3905 if m:
3906 hours, mins = m.groups()
3907 else:
3908 return None
3909
3910 duration = 0
3911 if secs:
3912 duration += float(secs)
3913 if mins:
3914 duration += float(mins) * 60
3915 if hours:
3916 duration += float(hours) * 60 * 60
3917 if days:
3918 duration += float(days) * 24 * 60 * 60
3919 if ms:
3920 duration += float(ms)
3921 return duration
91d7d0b3
JMF
3922
3923
e65e4c88 3924def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 3925 name, real_ext = os.path.splitext(filename)
e65e4c88
S
3926 return (
3927 '{0}.{1}{2}'.format(name, ext, real_ext)
3928 if not expected_real_ext or real_ext[1:] == expected_real_ext
3929 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
3930
3931
b3ed15b7
S
3932def replace_extension(filename, ext, expected_real_ext=None):
3933 name, real_ext = os.path.splitext(filename)
3934 return '{0}.{1}'.format(
3935 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3936 ext)
3937
3938
d70ad093
PH
3939def check_executable(exe, args=[]):
3940 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3941 args can be a list of arguments for a short output (like -version) """
3942 try:
f5b1bca9 3943 process_communicate_or_kill(subprocess.Popen(
3944 [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
d70ad093
PH
3945 except OSError:
3946 return False
3947 return exe
b7ab0590
PH
3948
3949
95807118 3950def get_exe_version(exe, args=['--version'],
cae97f65 3951 version_re=None, unrecognized='present'):
95807118
PH
3952 """ Returns the version of the specified executable,
3953 or False if the executable is not present """
3954 try:
b64d04c1 3955 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
7a5c1cfe 3956 # SIGTTOU if yt-dlp is run in the background.
067aa17e 3957 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
f5b1bca9 3958 out, _ = process_communicate_or_kill(subprocess.Popen(
54116803 3959 [encodeArgument(exe)] + args,
00ca7552 3960 stdin=subprocess.PIPE,
f5b1bca9 3961 stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
95807118
PH
3962 except OSError:
3963 return False
cae97f65
PH
3964 if isinstance(out, bytes): # Python 2.x
3965 out = out.decode('ascii', 'ignore')
3966 return detect_exe_version(out, version_re, unrecognized)
3967
3968
3969def detect_exe_version(output, version_re=None, unrecognized='present'):
3970 assert isinstance(output, compat_str)
3971 if version_re is None:
3972 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3973 m = re.search(version_re, output)
95807118
PH
3974 if m:
3975 return m.group(1)
3976 else:
3977 return unrecognized
3978
3979
cb89cfc1 3980class LazyList(collections.abc.Sequence):
483336e7 3981 ''' Lazy immutable list from an iterable
3982 Note that slices of a LazyList are lists and not LazyList'''
3983
8e5fecc8 3984 class IndexError(IndexError):
3985 pass
3986
483336e7 3987 def __init__(self, iterable):
3988 self.__iterable = iter(iterable)
3989 self.__cache = []
28419ca2 3990 self.__reversed = False
483336e7 3991
3992 def __iter__(self):
28419ca2 3993 if self.__reversed:
3994 # We need to consume the entire iterable to iterate in reverse
981052c9 3995 yield from self.exhaust()
28419ca2 3996 return
3997 yield from self.__cache
483336e7 3998 for item in self.__iterable:
3999 self.__cache.append(item)
4000 yield item
4001
981052c9 4002 def __exhaust(self):
483336e7 4003 self.__cache.extend(self.__iterable)
28419ca2 4004 return self.__cache
4005
981052c9 4006 def exhaust(self):
4007 ''' Evaluate the entire iterable '''
4008 return self.__exhaust()[::-1 if self.__reversed else 1]
4009
28419ca2 4010 @staticmethod
981052c9 4011 def __reverse_index(x):
e0f2b4b4 4012 return None if x is None else -(x + 1)
483336e7 4013
4014 def __getitem__(self, idx):
4015 if isinstance(idx, slice):
28419ca2 4016 if self.__reversed:
e0f2b4b4 4017 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
4018 start, stop, step = idx.start, idx.stop, idx.step or 1
483336e7 4019 elif isinstance(idx, int):
28419ca2 4020 if self.__reversed:
981052c9 4021 idx = self.__reverse_index(idx)
e0f2b4b4 4022 start, stop, step = idx, idx, 0
483336e7 4023 else:
4024 raise TypeError('indices must be integers or slices')
e0f2b4b4 4025 if ((start or 0) < 0 or (stop or 0) < 0
4026 or (start is None and step < 0)
4027 or (stop is None and step > 0)):
483336e7 4028 # We need to consume the entire iterable to be able to slice from the end
4029 # Obviously, never use this with infinite iterables
8e5fecc8 4030 self.__exhaust()
4031 try:
4032 return self.__cache[idx]
4033 except IndexError as e:
4034 raise self.IndexError(e) from e
e0f2b4b4 4035 n = max(start or 0, stop or 0) - len(self.__cache) + 1
28419ca2 4036 if n > 0:
4037 self.__cache.extend(itertools.islice(self.__iterable, n))
8e5fecc8 4038 try:
4039 return self.__cache[idx]
4040 except IndexError as e:
4041 raise self.IndexError(e) from e
483336e7 4042
4043 def __bool__(self):
4044 try:
28419ca2 4045 self[-1] if self.__reversed else self[0]
8e5fecc8 4046 except self.IndexError:
483336e7 4047 return False
4048 return True
4049
4050 def __len__(self):
8e5fecc8 4051 self.__exhaust()
483336e7 4052 return len(self.__cache)
4053
981052c9 4054 def reverse(self):
28419ca2 4055 self.__reversed = not self.__reversed
4056 return self
4057
4058 def __repr__(self):
4059 # repr and str should mimic a list. So we exhaust the iterable
4060 return repr(self.exhaust())
4061
4062 def __str__(self):
4063 return repr(self.exhaust())
4064
483336e7 4065
7be9ccff 4066class PagedList:
dd26ced1
PH
4067 def __len__(self):
4068 # This is only useful for tests
4069 return len(self.getslice())
4070
7be9ccff 4071 def __init__(self, pagefunc, pagesize, use_cache=True):
4072 self._pagefunc = pagefunc
4073 self._pagesize = pagesize
4074 self._use_cache = use_cache
4075 self._cache = {}
4076
4077 def getpage(self, pagenum):
4078 page_results = self._cache.get(pagenum) or list(self._pagefunc(pagenum))
4079 if self._use_cache:
4080 self._cache[pagenum] = page_results
4081 return page_results
4082
4083 def getslice(self, start=0, end=None):
4084 return list(self._getslice(start, end))
4085
4086 def _getslice(self, start, end):
55575225 4087 raise NotImplementedError('This method must be implemented by subclasses')
4088
4089 def __getitem__(self, idx):
7be9ccff 4090 # NOTE: cache must be enabled if this is used
55575225 4091 if not isinstance(idx, int) or idx < 0:
4092 raise TypeError('indices must be non-negative integers')
4093 entries = self.getslice(idx, idx + 1)
4094 return entries[0] if entries else None
4095
9c44d242
PH
4096
4097class OnDemandPagedList(PagedList):
7be9ccff 4098 def _getslice(self, start, end):
b7ab0590
PH
4099 for pagenum in itertools.count(start // self._pagesize):
4100 firstid = pagenum * self._pagesize
4101 nextfirstid = pagenum * self._pagesize + self._pagesize
4102 if start >= nextfirstid:
4103 continue
4104
b7ab0590
PH
4105 startv = (
4106 start % self._pagesize
4107 if firstid <= start < nextfirstid
4108 else 0)
b7ab0590
PH
4109 endv = (
4110 ((end - 1) % self._pagesize) + 1
4111 if (end is not None and firstid <= end <= nextfirstid)
4112 else None)
4113
7be9ccff 4114 page_results = self.getpage(pagenum)
b7ab0590
PH
4115 if startv != 0 or endv is not None:
4116 page_results = page_results[startv:endv]
7be9ccff 4117 yield from page_results
b7ab0590
PH
4118
4119 # A little optimization - if current page is not "full", ie. does
4120 # not contain page_size videos then we can assume that this page
4121 # is the last one - there are no more ids on further pages -
4122 # i.e. no need to query again.
4123 if len(page_results) + startv < self._pagesize:
4124 break
4125
4126 # If we got the whole page, but the next page is not interesting,
4127 # break out early as well
4128 if end == nextfirstid:
4129 break
81c2f20b
PH
4130
4131
9c44d242
PH
4132class InAdvancePagedList(PagedList):
4133 def __init__(self, pagefunc, pagecount, pagesize):
9c44d242 4134 self._pagecount = pagecount
7be9ccff 4135 PagedList.__init__(self, pagefunc, pagesize, True)
9c44d242 4136
7be9ccff 4137 def _getslice(self, start, end):
9c44d242
PH
4138 start_page = start // self._pagesize
4139 end_page = (
4140 self._pagecount if end is None else (end // self._pagesize + 1))
4141 skip_elems = start - start_page * self._pagesize
4142 only_more = None if end is None else end - start
4143 for pagenum in range(start_page, end_page):
7be9ccff 4144 page_results = self.getpage(pagenum)
9c44d242 4145 if skip_elems:
7be9ccff 4146 page_results = page_results[skip_elems:]
9c44d242
PH
4147 skip_elems = None
4148 if only_more is not None:
7be9ccff 4149 if len(page_results) < only_more:
4150 only_more -= len(page_results)
9c44d242 4151 else:
7be9ccff 4152 yield from page_results[:only_more]
9c44d242 4153 break
7be9ccff 4154 yield from page_results
9c44d242
PH
4155
4156
81c2f20b 4157def uppercase_escape(s):
676eb3f2 4158 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 4159 return re.sub(
a612753d 4160 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
4161 lambda m: unicode_escape(m.group(0))[0],
4162 s)
0fe2ff78
YCH
4163
4164
4165def lowercase_escape(s):
4166 unicode_escape = codecs.getdecoder('unicode_escape')
4167 return re.sub(
4168 r'\\u[0-9a-fA-F]{4}',
4169 lambda m: unicode_escape(m.group(0))[0],
4170 s)
b53466e1 4171
d05cfe06
S
4172
4173def escape_rfc3986(s):
4174 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 4175 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 4176 s = s.encode('utf-8')
ecc0c5ee 4177 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
4178
4179
4180def escape_url(url):
4181 """Escape URL as suggested by RFC 3986"""
4182 url_parsed = compat_urllib_parse_urlparse(url)
4183 return url_parsed._replace(
efbed08d 4184 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
d05cfe06
S
4185 path=escape_rfc3986(url_parsed.path),
4186 params=escape_rfc3986(url_parsed.params),
4187 query=escape_rfc3986(url_parsed.query),
4188 fragment=escape_rfc3986(url_parsed.fragment)
4189 ).geturl()
4190
62e609ab 4191
4dfbf869 4192def parse_qs(url):
4193 return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4194
4195
62e609ab
PH
4196def read_batch_urls(batch_fd):
4197 def fixup(url):
4198 if not isinstance(url, compat_str):
4199 url = url.decode('utf-8', 'replace')
8c04f0be 4200 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4201 for bom in BOM_UTF8:
4202 if url.startswith(bom):
4203 url = url[len(bom):]
4204 url = url.lstrip()
4205 if not url or url.startswith(('#', ';', ']')):
62e609ab 4206 return False
8c04f0be 4207 # "#" cannot be stripped out since it is part of the URI
4208 # However, it can be safely stipped out if follwing a whitespace
4209 return re.split(r'\s#', url, 1)[0].rstrip()
62e609ab
PH
4210
4211 with contextlib.closing(batch_fd) as fd:
4212 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
4213
4214
4215def urlencode_postdata(*args, **kargs):
15707c7e 4216 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
4217
4218
38f9ef31 4219def update_url_query(url, query):
cacd9966
YCH
4220 if not query:
4221 return url
38f9ef31 4222 parsed_url = compat_urlparse.urlparse(url)
4223 qs = compat_parse_qs(parsed_url.query)
4224 qs.update(query)
4225 return compat_urlparse.urlunparse(parsed_url._replace(
15707c7e 4226 query=compat_urllib_parse_urlencode(qs, True)))
16392824 4227
8e60dc75 4228
ed0291d1
S
4229def update_Request(req, url=None, data=None, headers={}, query={}):
4230 req_headers = req.headers.copy()
4231 req_headers.update(headers)
4232 req_data = data or req.data
4233 req_url = update_url_query(url or req.get_full_url(), query)
95cf60e8
S
4234 req_get_method = req.get_method()
4235 if req_get_method == 'HEAD':
4236 req_type = HEADRequest
4237 elif req_get_method == 'PUT':
4238 req_type = PUTRequest
4239 else:
4240 req_type = compat_urllib_request.Request
ed0291d1
S
4241 new_req = req_type(
4242 req_url, data=req_data, headers=req_headers,
4243 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4244 if hasattr(req, 'timeout'):
4245 new_req.timeout = req.timeout
4246 return new_req
4247
4248
10c87c15 4249def _multipart_encode_impl(data, boundary):
0c265486
YCH
4250 content_type = 'multipart/form-data; boundary=%s' % boundary
4251
4252 out = b''
4253 for k, v in data.items():
4254 out += b'--' + boundary.encode('ascii') + b'\r\n'
4255 if isinstance(k, compat_str):
4256 k = k.encode('utf-8')
4257 if isinstance(v, compat_str):
4258 v = v.encode('utf-8')
4259 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4260 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
b2ad479d 4261 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
0c265486
YCH
4262 if boundary.encode('ascii') in content:
4263 raise ValueError('Boundary overlaps with data')
4264 out += content
4265
4266 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4267
4268 return out, content_type
4269
4270
4271def multipart_encode(data, boundary=None):
4272 '''
4273 Encode a dict to RFC 7578-compliant form-data
4274
4275 data:
4276 A dict where keys and values can be either Unicode or bytes-like
4277 objects.
4278 boundary:
4279 If specified a Unicode object, it's used as the boundary. Otherwise
4280 a random boundary is generated.
4281
4282 Reference: https://tools.ietf.org/html/rfc7578
4283 '''
4284 has_specified_boundary = boundary is not None
4285
4286 while True:
4287 if boundary is None:
4288 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4289
4290 try:
10c87c15 4291 out, content_type = _multipart_encode_impl(data, boundary)
0c265486
YCH
4292 break
4293 except ValueError:
4294 if has_specified_boundary:
4295 raise
4296 boundary = None
4297
4298 return out, content_type
4299
4300
86296ad2 4301def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
4302 if isinstance(key_or_keys, (list, tuple)):
4303 for key in key_or_keys:
86296ad2
S
4304 if key not in d or d[key] is None or skip_false_values and not d[key]:
4305 continue
4306 return d[key]
cbecc9b9
S
4307 return default
4308 return d.get(key_or_keys, default)
4309
4310
329ca3be 4311def try_get(src, getter, expected_type=None):
6606817a 4312 for get in variadic(getter):
a32a9a7e
S
4313 try:
4314 v = get(src)
4315 except (AttributeError, KeyError, TypeError, IndexError):
4316 pass
4317 else:
4318 if expected_type is None or isinstance(v, expected_type):
4319 return v
329ca3be
S
4320
4321
6cc62232
S
4322def merge_dicts(*dicts):
4323 merged = {}
4324 for a_dict in dicts:
4325 for k, v in a_dict.items():
4326 if v is None:
4327 continue
3089bc74
S
4328 if (k not in merged
4329 or (isinstance(v, compat_str) and v
4330 and isinstance(merged[k], compat_str)
4331 and not merged[k])):
6cc62232
S
4332 merged[k] = v
4333 return merged
4334
4335
8e60dc75
S
4336def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4337 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4338
16392824 4339
a1a530b0
PH
4340US_RATINGS = {
4341 'G': 0,
4342 'PG': 10,
4343 'PG-13': 13,
4344 'R': 16,
4345 'NC': 18,
4346}
fac55558
PH
4347
4348
a8795327 4349TV_PARENTAL_GUIDELINES = {
5a16c9d9
RA
4350 'TV-Y': 0,
4351 'TV-Y7': 7,
4352 'TV-G': 0,
4353 'TV-PG': 0,
4354 'TV-14': 14,
4355 'TV-MA': 17,
a8795327
S
4356}
4357
4358
146c80e2 4359def parse_age_limit(s):
a8795327
S
4360 if type(s) == int:
4361 return s if 0 <= s <= 21 else None
4362 if not isinstance(s, compat_basestring):
d838b1bd 4363 return None
146c80e2 4364 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327
S
4365 if m:
4366 return int(m.group('age'))
5c5fae6d 4367 s = s.upper()
a8795327
S
4368 if s in US_RATINGS:
4369 return US_RATINGS[s]
5a16c9d9 4370 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
b8361187 4371 if m:
5a16c9d9 4372 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
b8361187 4373 return None
146c80e2
S
4374
4375
fac55558 4376def strip_jsonp(code):
609a61e3 4377 return re.sub(
5552c9eb 4378 r'''(?sx)^
e9c671d5 4379 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
5552c9eb
YCH
4380 (?:\s*&&\s*(?P=func_name))?
4381 \s*\(\s*(?P<callback_data>.*)\);?
4382 \s*?(?://[^\n]*)*$''',
4383 r'\g<callback_data>', code)
478c2c61
PH
4384
4385
5c610515 4386def js_to_json(code, vars={}):
4387 # vars is a dict of var, val pairs to substitute
c843e685 4388 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4195096e
S
4389 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4390 INTEGER_TABLE = (
4391 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4392 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4393 )
4394
e05f6939 4395 def fix_kv(m):
e7b6d122
PH
4396 v = m.group(0)
4397 if v in ('true', 'false', 'null'):
4398 return v
421ddcb8
C
4399 elif v in ('undefined', 'void 0'):
4400 return 'null'
8bdd16b4 4401 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
bd1e4844 4402 return ""
4403
4404 if v[0] in ("'", '"'):
4405 v = re.sub(r'(?s)\\.|"', lambda m: {
e7b6d122 4406 '"': '\\"',
bd1e4844 4407 "\\'": "'",
4408 '\\\n': '',
4409 '\\x': '\\u00',
4410 }.get(m.group(0), m.group(0)), v[1:-1])
8bdd16b4 4411 else:
4412 for regex, base in INTEGER_TABLE:
4413 im = re.match(regex, v)
4414 if im:
4415 i = int(im.group(1), base)
4416 return '"%d":' % i if v.endswith(':') else '%d' % i
89ac4a19 4417
5c610515 4418 if v in vars:
4419 return vars[v]
4420
e7b6d122 4421 return '"%s"' % v
e05f6939 4422
bd1e4844 4423 return re.sub(r'''(?sx)
4424 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4425 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4195096e 4426 {comment}|,(?={skip}[\]}}])|
421ddcb8 4427 void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
4195096e 4428 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
8bdd16b4 4429 [0-9]+(?={skip}:)|
4430 !+
4195096e 4431 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
e05f6939
PH
4432
4433
478c2c61
PH
4434def qualities(quality_ids):
4435 """ Get a numeric quality value out of a list of possible values """
4436 def q(qid):
4437 try:
4438 return quality_ids.index(qid)
4439 except ValueError:
4440 return -1
4441 return q
4442
acd69589 4443
de6000d9 4444DEFAULT_OUTTMPL = {
4445 'default': '%(title)s [%(id)s].%(ext)s',
72755351 4446 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
de6000d9 4447}
4448OUTTMPL_TYPES = {
72755351 4449 'chapter': None,
de6000d9 4450 'subtitle': None,
4451 'thumbnail': None,
4452 'description': 'description',
4453 'annotation': 'annotations.xml',
4454 'infojson': 'info.json',
5112f26a 4455 'pl_thumbnail': None,
de6000d9 4456 'pl_description': 'description',
4457 'pl_infojson': 'info.json',
4458}
0a871f68 4459
143db31d 4460# As of [1] format syntax is:
4461# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4462# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
901130bb 4463STR_FORMAT_RE_TMPL = r'''(?x)
4464 (?<!%)(?P<prefix>(?:%%)*)
143db31d 4465 %
752cda38 4466 (?P<has_key>\((?P<key>{0})\))? # mapping key
4467 (?P<format>
4468 (?:[#0\-+ ]+)? # conversion flags (optional)
4469 (?:\d+)? # minimum field width (optional)
4470 (?:\.\d+)? # precision (optional)
4471 [hlL]? # length modifier (optional)
901130bb 4472 {1} # conversion type
752cda38 4473 )
143db31d 4474'''
4475
7d1eb38a 4476
901130bb 4477STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
a020a0dc 4478
7d1eb38a 4479
a020a0dc
PH
4480def limit_length(s, length):
4481 """ Add ellipses to overly long strings """
4482 if s is None:
4483 return None
4484 ELLIPSES = '...'
4485 if len(s) > length:
4486 return s[:length - len(ELLIPSES)] + ELLIPSES
4487 return s
48844745
PH
4488
4489
4490def version_tuple(v):
5f9b8394 4491 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
4492
4493
4494def is_outdated_version(version, limit, assume_new=True):
4495 if not version:
4496 return not assume_new
4497 try:
4498 return version_tuple(version) < version_tuple(limit)
4499 except ValueError:
4500 return not assume_new
732ea2f0
PH
4501
4502
4503def ytdl_is_updateable():
7a5c1cfe 4504 """ Returns if yt-dlp can be updated with -U """
735d865e 4505 return False
4506
732ea2f0
PH
4507 from zipimport import zipimporter
4508
4509 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
4510
4511
4512def args_to_str(args):
4513 # Get a short string representation for a subprocess command
702ccf2d 4514 return ' '.join(compat_shlex_quote(a) for a in args)
2ccd1b10
PH
4515
4516
9b9c5355 4517def error_to_compat_str(err):
fdae2358
S
4518 err_str = str(err)
4519 # On python 2 error byte string must be decoded with proper
4520 # encoding rather than ascii
4521 if sys.version_info[0] < 3:
4522 err_str = err_str.decode(preferredencoding())
4523 return err_str
4524
4525
c460bdd5 4526def mimetype2ext(mt):
eb9ee194
S
4527 if mt is None:
4528 return None
4529
765ac263
JMF
4530 ext = {
4531 'audio/mp4': 'm4a',
6c33d24b
YCH
4532 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4533 # it's the most popular one
4534 'audio/mpeg': 'mp3',
ba39289d 4535 'audio/x-wav': 'wav',
765ac263
JMF
4536 }.get(mt)
4537 if ext is not None:
4538 return ext
4539
c460bdd5 4540 _, _, res = mt.rpartition('/')
6562d34a 4541 res = res.split(';')[0].strip().lower()
c460bdd5
PH
4542
4543 return {
f6861ec9 4544 '3gpp': '3gp',
cafcf657 4545 'smptett+xml': 'tt',
cafcf657 4546 'ttaf+xml': 'dfxp',
a0d8d704 4547 'ttml+xml': 'ttml',
f6861ec9 4548 'x-flv': 'flv',
a0d8d704 4549 'x-mp4-fragmented': 'mp4',
d4f05d47 4550 'x-ms-sami': 'sami',
a0d8d704 4551 'x-ms-wmv': 'wmv',
b4173f15
RA
4552 'mpegurl': 'm3u8',
4553 'x-mpegurl': 'm3u8',
4554 'vnd.apple.mpegurl': 'm3u8',
4555 'dash+xml': 'mpd',
b4173f15 4556 'f4m+xml': 'f4m',
f164b971 4557 'hds+xml': 'f4m',
e910fe2f 4558 'vnd.ms-sstr+xml': 'ism',
c2b2c7e1 4559 'quicktime': 'mov',
98ce1a3f 4560 'mp2t': 'ts',
39e7107d 4561 'x-wav': 'wav',
c460bdd5
PH
4562 }.get(res, res)
4563
4564
4f3c5e06 4565def parse_codecs(codecs_str):
4566 # http://tools.ietf.org/html/rfc6381
4567 if not codecs_str:
4568 return {}
a0566bbf 4569 split_codecs = list(filter(None, map(
dbf5416a 4570 str.strip, codecs_str.strip().strip(',').split(','))))
4f3c5e06 4571 vcodec, acodec = None, None
a0566bbf 4572 for full_codec in split_codecs:
4f3c5e06 4573 codec = full_codec.split('.')[0]
28cc2241 4574 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4f3c5e06 4575 if not vcodec:
4576 vcodec = full_codec
60f5c9fb 4577 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4f3c5e06 4578 if not acodec:
4579 acodec = full_codec
4580 else:
60f5c9fb 4581 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4f3c5e06 4582 if not vcodec and not acodec:
a0566bbf 4583 if len(split_codecs) == 2:
4f3c5e06 4584 return {
a0566bbf 4585 'vcodec': split_codecs[0],
4586 'acodec': split_codecs[1],
4f3c5e06 4587 }
4588 else:
4589 return {
4590 'vcodec': vcodec or 'none',
4591 'acodec': acodec or 'none',
4592 }
4593 return {}
4594
4595
2ccd1b10 4596def urlhandle_detect_ext(url_handle):
79298173 4597 getheader = url_handle.headers.get
2ccd1b10 4598
b55ee18f
PH
4599 cd = getheader('Content-Disposition')
4600 if cd:
4601 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4602 if m:
4603 e = determine_ext(m.group('filename'), default_ext=None)
4604 if e:
4605 return e
4606
c460bdd5 4607 return mimetype2ext(getheader('Content-Type'))
05900629
PH
4608
4609
1e399778
YCH
4610def encode_data_uri(data, mime_type):
4611 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4612
4613
05900629 4614def age_restricted(content_limit, age_limit):
6ec6cb4e 4615 """ Returns True iff the content should be blocked """
05900629
PH
4616
4617 if age_limit is None: # No limit set
4618 return False
4619 if content_limit is None:
4620 return False # Content available for everyone
4621 return age_limit < content_limit
61ca9a80
PH
4622
4623
4624def is_html(first_bytes):
4625 """ Detect whether a file contains HTML by examining its first bytes. """
4626
4627 BOMS = [
4628 (b'\xef\xbb\xbf', 'utf-8'),
4629 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4630 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4631 (b'\xff\xfe', 'utf-16-le'),
4632 (b'\xfe\xff', 'utf-16-be'),
4633 ]
4634 for bom, enc in BOMS:
4635 if first_bytes.startswith(bom):
4636 s = first_bytes[len(bom):].decode(enc, 'replace')
4637 break
4638 else:
4639 s = first_bytes.decode('utf-8', 'replace')
4640
4641 return re.match(r'^\s*<', s)
a055469f
PH
4642
4643
4644def determine_protocol(info_dict):
4645 protocol = info_dict.get('protocol')
4646 if protocol is not None:
4647 return protocol
4648
4649 url = info_dict['url']
4650 if url.startswith('rtmp'):
4651 return 'rtmp'
4652 elif url.startswith('mms'):
4653 return 'mms'
4654 elif url.startswith('rtsp'):
4655 return 'rtsp'
4656
4657 ext = determine_ext(url)
4658 if ext == 'm3u8':
4659 return 'm3u8'
4660 elif ext == 'f4m':
4661 return 'f4m'
4662
4663 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
4664
4665
76d321f6 4666def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
cfb56d1a 4667 """ Render a list of rows, each as a list of values """
76d321f6 4668
4669 def get_max_lens(table):
4670 return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4671
4672 def filter_using_list(row, filterArray):
4673 return [col for (take, col) in zip(filterArray, row) if take]
4674
4675 if hideEmpty:
4676 max_lens = get_max_lens(data)
4677 header_row = filter_using_list(header_row, max_lens)
4678 data = [filter_using_list(row, max_lens) for row in data]
4679
cfb56d1a 4680 table = [header_row] + data
76d321f6 4681 max_lens = get_max_lens(table)
4682 if delim:
4683 table = [header_row] + [['-' * ml for ml in max_lens]] + data
4684 format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
cfb56d1a 4685 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
4686
4687
8f18aca8 4688def _match_one(filter_part, dct, incomplete):
77b87f05 4689 # TODO: Generalize code with YoutubeDL._build_format_filter
a047eeb6 4690 STRING_OPERATORS = {
4691 '*=': operator.contains,
4692 '^=': lambda attr, value: attr.startswith(value),
4693 '$=': lambda attr, value: attr.endswith(value),
4694 '~=': lambda attr, value: re.search(value, attr),
4695 }
347de493 4696 COMPARISON_OPERATORS = {
a047eeb6 4697 **STRING_OPERATORS,
4698 '<=': operator.le, # "<=" must be defined above "<"
347de493 4699 '<': operator.lt,
347de493 4700 '>=': operator.ge,
a047eeb6 4701 '>': operator.gt,
347de493 4702 '=': operator.eq,
347de493 4703 }
a047eeb6 4704
347de493
PH
4705 operator_rex = re.compile(r'''(?x)\s*
4706 (?P<key>[a-z_]+)
77b87f05 4707 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
347de493
PH
4708 (?:
4709 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
a047eeb6 4710 (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
4711 (?P<strval>.+?)
347de493
PH
4712 )
4713 \s*$
4714 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4715 m = operator_rex.search(filter_part)
4716 if m:
77b87f05
MT
4717 unnegated_op = COMPARISON_OPERATORS[m.group('op')]
4718 if m.group('negation'):
4719 op = lambda attr, value: not unnegated_op(attr, value)
4720 else:
4721 op = unnegated_op
e5a088dc 4722 actual_value = dct.get(m.group('key'))
3089bc74
S
4723 if (m.group('quotedstrval') is not None
4724 or m.group('strval') is not None
e5a088dc
S
4725 # If the original field is a string and matching comparisonvalue is
4726 # a number we should respect the origin of the original field
4727 # and process comparison value as a string (see
067aa17e 4728 # https://github.com/ytdl-org/youtube-dl/issues/11082).
3089bc74
S
4729 or actual_value is not None and m.group('intval') is not None
4730 and isinstance(actual_value, compat_str)):
db13c16e
S
4731 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4732 quote = m.group('quote')
4733 if quote is not None:
4734 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
347de493 4735 else:
a047eeb6 4736 if m.group('op') in STRING_OPERATORS:
4737 raise ValueError('Operator %s only supports string values!' % m.group('op'))
347de493
PH
4738 try:
4739 comparison_value = int(m.group('intval'))
4740 except ValueError:
4741 comparison_value = parse_filesize(m.group('intval'))
4742 if comparison_value is None:
4743 comparison_value = parse_filesize(m.group('intval') + 'B')
4744 if comparison_value is None:
4745 raise ValueError(
4746 'Invalid integer value %r in filter part %r' % (
4747 m.group('intval'), filter_part))
347de493 4748 if actual_value is None:
8f18aca8 4749 return incomplete or m.group('none_inclusive')
347de493
PH
4750 return op(actual_value, comparison_value)
4751
4752 UNARY_OPERATORS = {
1cc47c66
S
4753 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4754 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
347de493
PH
4755 }
4756 operator_rex = re.compile(r'''(?x)\s*
4757 (?P<op>%s)\s*(?P<key>[a-z_]+)
4758 \s*$
4759 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4760 m = operator_rex.search(filter_part)
4761 if m:
4762 op = UNARY_OPERATORS[m.group('op')]
4763 actual_value = dct.get(m.group('key'))
8f18aca8 4764 if incomplete and actual_value is None:
4765 return True
347de493
PH
4766 return op(actual_value)
4767
4768 raise ValueError('Invalid filter part %r' % filter_part)
4769
4770
8f18aca8 4771def match_str(filter_str, dct, incomplete=False):
4772 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
4773 When incomplete, all conditions passes on missing fields
4774 """
347de493 4775 return all(
8f18aca8 4776 _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
a047eeb6 4777 for filter_part in re.split(r'(?<!\\)&', filter_str))
347de493
PH
4778
4779
4780def match_filter_func(filter_str):
8f18aca8 4781 def _match_func(info_dict, *args, **kwargs):
4782 if match_str(filter_str, info_dict, *args, **kwargs):
347de493
PH
4783 return None
4784 else:
4785 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4786 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4787 return _match_func
91410c9b
PH
4788
4789
bf6427d2
YCH
4790def parse_dfxp_time_expr(time_expr):
4791 if not time_expr:
d631d5f9 4792 return
bf6427d2
YCH
4793
4794 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4795 if mobj:
4796 return float(mobj.group('time_offset'))
4797
db2fe38b 4798 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 4799 if mobj:
db2fe38b 4800 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
4801
4802
c1c924ab
YCH
4803def srt_subtitles_timecode(seconds):
4804 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
4805
4806
4807def dfxp2srt(dfxp_data):
3869028f
YCH
4808 '''
4809 @param dfxp_data A bytes-like object containing DFXP data
4810 @returns A unicode object containing converted SRT data
4811 '''
5b995f71 4812 LEGACY_NAMESPACES = (
3869028f
YCH
4813 (b'http://www.w3.org/ns/ttml', [
4814 b'http://www.w3.org/2004/11/ttaf1',
4815 b'http://www.w3.org/2006/04/ttaf1',
4816 b'http://www.w3.org/2006/10/ttaf1',
5b995f71 4817 ]),
3869028f
YCH
4818 (b'http://www.w3.org/ns/ttml#styling', [
4819 b'http://www.w3.org/ns/ttml#style',
5b995f71
RA
4820 ]),
4821 )
4822
4823 SUPPORTED_STYLING = [
4824 'color',
4825 'fontFamily',
4826 'fontSize',
4827 'fontStyle',
4828 'fontWeight',
4829 'textDecoration'
4830 ]
4831
4e335771 4832 _x = functools.partial(xpath_with_ns, ns_map={
261f4730 4833 'xml': 'http://www.w3.org/XML/1998/namespace',
4e335771 4834 'ttml': 'http://www.w3.org/ns/ttml',
5b995f71 4835 'tts': 'http://www.w3.org/ns/ttml#styling',
4e335771 4836 })
bf6427d2 4837
5b995f71
RA
4838 styles = {}
4839 default_style = {}
4840
87de7069 4841 class TTMLPElementParser(object):
5b995f71
RA
4842 _out = ''
4843 _unclosed_elements = []
4844 _applied_styles = []
bf6427d2 4845
2b14cb56 4846 def start(self, tag, attrib):
5b995f71
RA
4847 if tag in (_x('ttml:br'), 'br'):
4848 self._out += '\n'
4849 else:
4850 unclosed_elements = []
4851 style = {}
4852 element_style_id = attrib.get('style')
4853 if default_style:
4854 style.update(default_style)
4855 if element_style_id:
4856 style.update(styles.get(element_style_id, {}))
4857 for prop in SUPPORTED_STYLING:
4858 prop_val = attrib.get(_x('tts:' + prop))
4859 if prop_val:
4860 style[prop] = prop_val
4861 if style:
4862 font = ''
4863 for k, v in sorted(style.items()):
4864 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4865 continue
4866 if k == 'color':
4867 font += ' color="%s"' % v
4868 elif k == 'fontSize':
4869 font += ' size="%s"' % v
4870 elif k == 'fontFamily':
4871 font += ' face="%s"' % v
4872 elif k == 'fontWeight' and v == 'bold':
4873 self._out += '<b>'
4874 unclosed_elements.append('b')
4875 elif k == 'fontStyle' and v == 'italic':
4876 self._out += '<i>'
4877 unclosed_elements.append('i')
4878 elif k == 'textDecoration' and v == 'underline':
4879 self._out += '<u>'
4880 unclosed_elements.append('u')
4881 if font:
4882 self._out += '<font' + font + '>'
4883 unclosed_elements.append('font')
4884 applied_style = {}
4885 if self._applied_styles:
4886 applied_style.update(self._applied_styles[-1])
4887 applied_style.update(style)
4888 self._applied_styles.append(applied_style)
4889 self._unclosed_elements.append(unclosed_elements)
bf6427d2 4890
2b14cb56 4891 def end(self, tag):
5b995f71
RA
4892 if tag not in (_x('ttml:br'), 'br'):
4893 unclosed_elements = self._unclosed_elements.pop()
4894 for element in reversed(unclosed_elements):
4895 self._out += '</%s>' % element
4896 if unclosed_elements and self._applied_styles:
4897 self._applied_styles.pop()
bf6427d2 4898
2b14cb56 4899 def data(self, data):
5b995f71 4900 self._out += data
2b14cb56 4901
4902 def close(self):
5b995f71 4903 return self._out.strip()
2b14cb56 4904
4905 def parse_node(node):
4906 target = TTMLPElementParser()
4907 parser = xml.etree.ElementTree.XMLParser(target=target)
4908 parser.feed(xml.etree.ElementTree.tostring(node))
4909 return parser.close()
bf6427d2 4910
5b995f71
RA
4911 for k, v in LEGACY_NAMESPACES:
4912 for ns in v:
4913 dfxp_data = dfxp_data.replace(ns, k)
4914
3869028f 4915 dfxp = compat_etree_fromstring(dfxp_data)
bf6427d2 4916 out = []
5b995f71 4917 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
1b0427e6
YCH
4918
4919 if not paras:
4920 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2 4921
5b995f71
RA
4922 repeat = False
4923 while True:
4924 for style in dfxp.findall(_x('.//ttml:style')):
261f4730
RA
4925 style_id = style.get('id') or style.get(_x('xml:id'))
4926 if not style_id:
4927 continue
5b995f71
RA
4928 parent_style_id = style.get('style')
4929 if parent_style_id:
4930 if parent_style_id not in styles:
4931 repeat = True
4932 continue
4933 styles[style_id] = styles[parent_style_id].copy()
4934 for prop in SUPPORTED_STYLING:
4935 prop_val = style.get(_x('tts:' + prop))
4936 if prop_val:
4937 styles.setdefault(style_id, {})[prop] = prop_val
4938 if repeat:
4939 repeat = False
4940 else:
4941 break
4942
4943 for p in ('body', 'div'):
4944 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4945 if ele is None:
4946 continue
4947 style = styles.get(ele.get('style'))
4948 if not style:
4949 continue
4950 default_style.update(style)
4951
bf6427d2 4952 for para, index in zip(paras, itertools.count(1)):
d631d5f9 4953 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 4954 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
4955 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4956 if begin_time is None:
4957 continue
7dff0363 4958 if not end_time:
d631d5f9
YCH
4959 if not dur:
4960 continue
4961 end_time = begin_time + dur
bf6427d2
YCH
4962 out.append('%d\n%s --> %s\n%s\n\n' % (
4963 index,
c1c924ab
YCH
4964 srt_subtitles_timecode(begin_time),
4965 srt_subtitles_timecode(end_time),
bf6427d2
YCH
4966 parse_node(para)))
4967
4968 return ''.join(out)
4969
4970
66e289ba
S
4971def cli_option(params, command_option, param):
4972 param = params.get(param)
98e698f1
RA
4973 if param:
4974 param = compat_str(param)
66e289ba
S
4975 return [command_option, param] if param is not None else []
4976
4977
4978def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4979 param = params.get(param)
5b232f46
S
4980 if param is None:
4981 return []
66e289ba
S
4982 assert isinstance(param, bool)
4983 if separator:
4984 return [command_option + separator + (true_value if param else false_value)]
4985 return [command_option, true_value if param else false_value]
4986
4987
4988def cli_valueless_option(params, command_option, param, expected_value=True):
4989 param = params.get(param)
4990 return [command_option] if param == expected_value else []
4991
4992
e92caff5 4993def cli_configuration_args(argdict, keys, default=[], use_compat=True):
eab9b2bc 4994 if isinstance(argdict, (list, tuple)): # for backward compatibility
e92caff5 4995 if use_compat:
5b1ecbb3 4996 return argdict
4997 else:
4998 argdict = None
eab9b2bc 4999 if argdict is None:
5b1ecbb3 5000 return default
eab9b2bc 5001 assert isinstance(argdict, dict)
5002
e92caff5 5003 assert isinstance(keys, (list, tuple))
5004 for key_list in keys:
e92caff5 5005 arg_list = list(filter(
5006 lambda x: x is not None,
6606817a 5007 [argdict.get(key.lower()) for key in variadic(key_list)]))
e92caff5 5008 if arg_list:
5009 return [arg for args in arg_list for arg in args]
5010 return default
66e289ba 5011
6251555f 5012
330690a2 5013def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
5014 main_key, exe = main_key.lower(), exe.lower()
5015 root_key = exe if main_key == exe else f'{main_key}+{exe}'
5016 keys = [f'{root_key}{k}' for k in (keys or [''])]
5017 if root_key in keys:
5018 if main_key != exe:
5019 keys.append((main_key, exe))
5020 keys.append('default')
5021 else:
5022 use_compat = False
5023 return cli_configuration_args(argdict, keys, default, use_compat)
5024
66e289ba 5025
39672624
YCH
5026class ISO639Utils(object):
5027 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
5028 _lang_map = {
5029 'aa': 'aar',
5030 'ab': 'abk',
5031 'ae': 'ave',
5032 'af': 'afr',
5033 'ak': 'aka',
5034 'am': 'amh',
5035 'an': 'arg',
5036 'ar': 'ara',
5037 'as': 'asm',
5038 'av': 'ava',
5039 'ay': 'aym',
5040 'az': 'aze',
5041 'ba': 'bak',
5042 'be': 'bel',
5043 'bg': 'bul',
5044 'bh': 'bih',
5045 'bi': 'bis',
5046 'bm': 'bam',
5047 'bn': 'ben',
5048 'bo': 'bod',
5049 'br': 'bre',
5050 'bs': 'bos',
5051 'ca': 'cat',
5052 'ce': 'che',
5053 'ch': 'cha',
5054 'co': 'cos',
5055 'cr': 'cre',
5056 'cs': 'ces',
5057 'cu': 'chu',
5058 'cv': 'chv',
5059 'cy': 'cym',
5060 'da': 'dan',
5061 'de': 'deu',
5062 'dv': 'div',
5063 'dz': 'dzo',
5064 'ee': 'ewe',
5065 'el': 'ell',
5066 'en': 'eng',
5067 'eo': 'epo',
5068 'es': 'spa',
5069 'et': 'est',
5070 'eu': 'eus',
5071 'fa': 'fas',
5072 'ff': 'ful',
5073 'fi': 'fin',
5074 'fj': 'fij',
5075 'fo': 'fao',
5076 'fr': 'fra',
5077 'fy': 'fry',
5078 'ga': 'gle',
5079 'gd': 'gla',
5080 'gl': 'glg',
5081 'gn': 'grn',
5082 'gu': 'guj',
5083 'gv': 'glv',
5084 'ha': 'hau',
5085 'he': 'heb',
b7acc835 5086 'iw': 'heb', # Replaced by he in 1989 revision
39672624
YCH
5087 'hi': 'hin',
5088 'ho': 'hmo',
5089 'hr': 'hrv',
5090 'ht': 'hat',
5091 'hu': 'hun',
5092 'hy': 'hye',
5093 'hz': 'her',
5094 'ia': 'ina',
5095 'id': 'ind',
b7acc835 5096 'in': 'ind', # Replaced by id in 1989 revision
39672624
YCH
5097 'ie': 'ile',
5098 'ig': 'ibo',
5099 'ii': 'iii',
5100 'ik': 'ipk',
5101 'io': 'ido',
5102 'is': 'isl',
5103 'it': 'ita',
5104 'iu': 'iku',
5105 'ja': 'jpn',
5106 'jv': 'jav',
5107 'ka': 'kat',
5108 'kg': 'kon',
5109 'ki': 'kik',
5110 'kj': 'kua',
5111 'kk': 'kaz',
5112 'kl': 'kal',
5113 'km': 'khm',
5114 'kn': 'kan',
5115 'ko': 'kor',
5116 'kr': 'kau',
5117 'ks': 'kas',
5118 'ku': 'kur',
5119 'kv': 'kom',
5120 'kw': 'cor',
5121 'ky': 'kir',
5122 'la': 'lat',
5123 'lb': 'ltz',
5124 'lg': 'lug',
5125 'li': 'lim',
5126 'ln': 'lin',
5127 'lo': 'lao',
5128 'lt': 'lit',
5129 'lu': 'lub',
5130 'lv': 'lav',
5131 'mg': 'mlg',
5132 'mh': 'mah',
5133 'mi': 'mri',
5134 'mk': 'mkd',
5135 'ml': 'mal',
5136 'mn': 'mon',
5137 'mr': 'mar',
5138 'ms': 'msa',
5139 'mt': 'mlt',
5140 'my': 'mya',
5141 'na': 'nau',
5142 'nb': 'nob',
5143 'nd': 'nde',
5144 'ne': 'nep',
5145 'ng': 'ndo',
5146 'nl': 'nld',
5147 'nn': 'nno',
5148 'no': 'nor',
5149 'nr': 'nbl',
5150 'nv': 'nav',
5151 'ny': 'nya',
5152 'oc': 'oci',
5153 'oj': 'oji',
5154 'om': 'orm',
5155 'or': 'ori',
5156 'os': 'oss',
5157 'pa': 'pan',
5158 'pi': 'pli',
5159 'pl': 'pol',
5160 'ps': 'pus',
5161 'pt': 'por',
5162 'qu': 'que',
5163 'rm': 'roh',
5164 'rn': 'run',
5165 'ro': 'ron',
5166 'ru': 'rus',
5167 'rw': 'kin',
5168 'sa': 'san',
5169 'sc': 'srd',
5170 'sd': 'snd',
5171 'se': 'sme',
5172 'sg': 'sag',
5173 'si': 'sin',
5174 'sk': 'slk',
5175 'sl': 'slv',
5176 'sm': 'smo',
5177 'sn': 'sna',
5178 'so': 'som',
5179 'sq': 'sqi',
5180 'sr': 'srp',
5181 'ss': 'ssw',
5182 'st': 'sot',
5183 'su': 'sun',
5184 'sv': 'swe',
5185 'sw': 'swa',
5186 'ta': 'tam',
5187 'te': 'tel',
5188 'tg': 'tgk',
5189 'th': 'tha',
5190 'ti': 'tir',
5191 'tk': 'tuk',
5192 'tl': 'tgl',
5193 'tn': 'tsn',
5194 'to': 'ton',
5195 'tr': 'tur',
5196 'ts': 'tso',
5197 'tt': 'tat',
5198 'tw': 'twi',
5199 'ty': 'tah',
5200 'ug': 'uig',
5201 'uk': 'ukr',
5202 'ur': 'urd',
5203 'uz': 'uzb',
5204 've': 'ven',
5205 'vi': 'vie',
5206 'vo': 'vol',
5207 'wa': 'wln',
5208 'wo': 'wol',
5209 'xh': 'xho',
5210 'yi': 'yid',
e9a50fba 5211 'ji': 'yid', # Replaced by yi in 1989 revision
39672624
YCH
5212 'yo': 'yor',
5213 'za': 'zha',
5214 'zh': 'zho',
5215 'zu': 'zul',
5216 }
5217
5218 @classmethod
5219 def short2long(cls, code):
5220 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5221 return cls._lang_map.get(code[:2])
5222
5223 @classmethod
5224 def long2short(cls, code):
5225 """Convert language code from ISO 639-2/T to ISO 639-1"""
5226 for short_name, long_name in cls._lang_map.items():
5227 if long_name == code:
5228 return short_name
5229
5230
4eb10f66
YCH
5231class ISO3166Utils(object):
5232 # From http://data.okfn.org/data/core/country-list
5233 _country_map = {
5234 'AF': 'Afghanistan',
5235 'AX': 'Åland Islands',
5236 'AL': 'Albania',
5237 'DZ': 'Algeria',
5238 'AS': 'American Samoa',
5239 'AD': 'Andorra',
5240 'AO': 'Angola',
5241 'AI': 'Anguilla',
5242 'AQ': 'Antarctica',
5243 'AG': 'Antigua and Barbuda',
5244 'AR': 'Argentina',
5245 'AM': 'Armenia',
5246 'AW': 'Aruba',
5247 'AU': 'Australia',
5248 'AT': 'Austria',
5249 'AZ': 'Azerbaijan',
5250 'BS': 'Bahamas',
5251 'BH': 'Bahrain',
5252 'BD': 'Bangladesh',
5253 'BB': 'Barbados',
5254 'BY': 'Belarus',
5255 'BE': 'Belgium',
5256 'BZ': 'Belize',
5257 'BJ': 'Benin',
5258 'BM': 'Bermuda',
5259 'BT': 'Bhutan',
5260 'BO': 'Bolivia, Plurinational State of',
5261 'BQ': 'Bonaire, Sint Eustatius and Saba',
5262 'BA': 'Bosnia and Herzegovina',
5263 'BW': 'Botswana',
5264 'BV': 'Bouvet Island',
5265 'BR': 'Brazil',
5266 'IO': 'British Indian Ocean Territory',
5267 'BN': 'Brunei Darussalam',
5268 'BG': 'Bulgaria',
5269 'BF': 'Burkina Faso',
5270 'BI': 'Burundi',
5271 'KH': 'Cambodia',
5272 'CM': 'Cameroon',
5273 'CA': 'Canada',
5274 'CV': 'Cape Verde',
5275 'KY': 'Cayman Islands',
5276 'CF': 'Central African Republic',
5277 'TD': 'Chad',
5278 'CL': 'Chile',
5279 'CN': 'China',
5280 'CX': 'Christmas Island',
5281 'CC': 'Cocos (Keeling) Islands',
5282 'CO': 'Colombia',
5283 'KM': 'Comoros',
5284 'CG': 'Congo',
5285 'CD': 'Congo, the Democratic Republic of the',
5286 'CK': 'Cook Islands',
5287 'CR': 'Costa Rica',
5288 'CI': 'Côte d\'Ivoire',
5289 'HR': 'Croatia',
5290 'CU': 'Cuba',
5291 'CW': 'Curaçao',
5292 'CY': 'Cyprus',
5293 'CZ': 'Czech Republic',
5294 'DK': 'Denmark',
5295 'DJ': 'Djibouti',
5296 'DM': 'Dominica',
5297 'DO': 'Dominican Republic',
5298 'EC': 'Ecuador',
5299 'EG': 'Egypt',
5300 'SV': 'El Salvador',
5301 'GQ': 'Equatorial Guinea',
5302 'ER': 'Eritrea',
5303 'EE': 'Estonia',
5304 'ET': 'Ethiopia',
5305 'FK': 'Falkland Islands (Malvinas)',
5306 'FO': 'Faroe Islands',
5307 'FJ': 'Fiji',
5308 'FI': 'Finland',
5309 'FR': 'France',
5310 'GF': 'French Guiana',
5311 'PF': 'French Polynesia',
5312 'TF': 'French Southern Territories',
5313 'GA': 'Gabon',
5314 'GM': 'Gambia',
5315 'GE': 'Georgia',
5316 'DE': 'Germany',
5317 'GH': 'Ghana',
5318 'GI': 'Gibraltar',
5319 'GR': 'Greece',
5320 'GL': 'Greenland',
5321 'GD': 'Grenada',
5322 'GP': 'Guadeloupe',
5323 'GU': 'Guam',
5324 'GT': 'Guatemala',
5325 'GG': 'Guernsey',
5326 'GN': 'Guinea',
5327 'GW': 'Guinea-Bissau',
5328 'GY': 'Guyana',
5329 'HT': 'Haiti',
5330 'HM': 'Heard Island and McDonald Islands',
5331 'VA': 'Holy See (Vatican City State)',
5332 'HN': 'Honduras',
5333 'HK': 'Hong Kong',
5334 'HU': 'Hungary',
5335 'IS': 'Iceland',
5336 'IN': 'India',
5337 'ID': 'Indonesia',
5338 'IR': 'Iran, Islamic Republic of',
5339 'IQ': 'Iraq',
5340 'IE': 'Ireland',
5341 'IM': 'Isle of Man',
5342 'IL': 'Israel',
5343 'IT': 'Italy',
5344 'JM': 'Jamaica',
5345 'JP': 'Japan',
5346 'JE': 'Jersey',
5347 'JO': 'Jordan',
5348 'KZ': 'Kazakhstan',
5349 'KE': 'Kenya',
5350 'KI': 'Kiribati',
5351 'KP': 'Korea, Democratic People\'s Republic of',
5352 'KR': 'Korea, Republic of',
5353 'KW': 'Kuwait',
5354 'KG': 'Kyrgyzstan',
5355 'LA': 'Lao People\'s Democratic Republic',
5356 'LV': 'Latvia',
5357 'LB': 'Lebanon',
5358 'LS': 'Lesotho',
5359 'LR': 'Liberia',
5360 'LY': 'Libya',
5361 'LI': 'Liechtenstein',
5362 'LT': 'Lithuania',
5363 'LU': 'Luxembourg',
5364 'MO': 'Macao',
5365 'MK': 'Macedonia, the Former Yugoslav Republic of',
5366 'MG': 'Madagascar',
5367 'MW': 'Malawi',
5368 'MY': 'Malaysia',
5369 'MV': 'Maldives',
5370 'ML': 'Mali',
5371 'MT': 'Malta',
5372 'MH': 'Marshall Islands',
5373 'MQ': 'Martinique',
5374 'MR': 'Mauritania',
5375 'MU': 'Mauritius',
5376 'YT': 'Mayotte',
5377 'MX': 'Mexico',
5378 'FM': 'Micronesia, Federated States of',
5379 'MD': 'Moldova, Republic of',
5380 'MC': 'Monaco',
5381 'MN': 'Mongolia',
5382 'ME': 'Montenegro',
5383 'MS': 'Montserrat',
5384 'MA': 'Morocco',
5385 'MZ': 'Mozambique',
5386 'MM': 'Myanmar',
5387 'NA': 'Namibia',
5388 'NR': 'Nauru',
5389 'NP': 'Nepal',
5390 'NL': 'Netherlands',
5391 'NC': 'New Caledonia',
5392 'NZ': 'New Zealand',
5393 'NI': 'Nicaragua',
5394 'NE': 'Niger',
5395 'NG': 'Nigeria',
5396 'NU': 'Niue',
5397 'NF': 'Norfolk Island',
5398 'MP': 'Northern Mariana Islands',
5399 'NO': 'Norway',
5400 'OM': 'Oman',
5401 'PK': 'Pakistan',
5402 'PW': 'Palau',
5403 'PS': 'Palestine, State of',
5404 'PA': 'Panama',
5405 'PG': 'Papua New Guinea',
5406 'PY': 'Paraguay',
5407 'PE': 'Peru',
5408 'PH': 'Philippines',
5409 'PN': 'Pitcairn',
5410 'PL': 'Poland',
5411 'PT': 'Portugal',
5412 'PR': 'Puerto Rico',
5413 'QA': 'Qatar',
5414 'RE': 'Réunion',
5415 'RO': 'Romania',
5416 'RU': 'Russian Federation',
5417 'RW': 'Rwanda',
5418 'BL': 'Saint Barthélemy',
5419 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5420 'KN': 'Saint Kitts and Nevis',
5421 'LC': 'Saint Lucia',
5422 'MF': 'Saint Martin (French part)',
5423 'PM': 'Saint Pierre and Miquelon',
5424 'VC': 'Saint Vincent and the Grenadines',
5425 'WS': 'Samoa',
5426 'SM': 'San Marino',
5427 'ST': 'Sao Tome and Principe',
5428 'SA': 'Saudi Arabia',
5429 'SN': 'Senegal',
5430 'RS': 'Serbia',
5431 'SC': 'Seychelles',
5432 'SL': 'Sierra Leone',
5433 'SG': 'Singapore',
5434 'SX': 'Sint Maarten (Dutch part)',
5435 'SK': 'Slovakia',
5436 'SI': 'Slovenia',
5437 'SB': 'Solomon Islands',
5438 'SO': 'Somalia',
5439 'ZA': 'South Africa',
5440 'GS': 'South Georgia and the South Sandwich Islands',
5441 'SS': 'South Sudan',
5442 'ES': 'Spain',
5443 'LK': 'Sri Lanka',
5444 'SD': 'Sudan',
5445 'SR': 'Suriname',
5446 'SJ': 'Svalbard and Jan Mayen',
5447 'SZ': 'Swaziland',
5448 'SE': 'Sweden',
5449 'CH': 'Switzerland',
5450 'SY': 'Syrian Arab Republic',
5451 'TW': 'Taiwan, Province of China',
5452 'TJ': 'Tajikistan',
5453 'TZ': 'Tanzania, United Republic of',
5454 'TH': 'Thailand',
5455 'TL': 'Timor-Leste',
5456 'TG': 'Togo',
5457 'TK': 'Tokelau',
5458 'TO': 'Tonga',
5459 'TT': 'Trinidad and Tobago',
5460 'TN': 'Tunisia',
5461 'TR': 'Turkey',
5462 'TM': 'Turkmenistan',
5463 'TC': 'Turks and Caicos Islands',
5464 'TV': 'Tuvalu',
5465 'UG': 'Uganda',
5466 'UA': 'Ukraine',
5467 'AE': 'United Arab Emirates',
5468 'GB': 'United Kingdom',
5469 'US': 'United States',
5470 'UM': 'United States Minor Outlying Islands',
5471 'UY': 'Uruguay',
5472 'UZ': 'Uzbekistan',
5473 'VU': 'Vanuatu',
5474 'VE': 'Venezuela, Bolivarian Republic of',
5475 'VN': 'Viet Nam',
5476 'VG': 'Virgin Islands, British',
5477 'VI': 'Virgin Islands, U.S.',
5478 'WF': 'Wallis and Futuna',
5479 'EH': 'Western Sahara',
5480 'YE': 'Yemen',
5481 'ZM': 'Zambia',
5482 'ZW': 'Zimbabwe',
5483 }
5484
5485 @classmethod
5486 def short2full(cls, code):
5487 """Convert an ISO 3166-2 country code to the corresponding full name"""
5488 return cls._country_map.get(code.upper())
5489
5490
773f291d
S
5491class GeoUtils(object):
5492 # Major IPv4 address blocks per country
5493 _country_ip_map = {
53896ca5 5494 'AD': '46.172.224.0/19',
773f291d
S
5495 'AE': '94.200.0.0/13',
5496 'AF': '149.54.0.0/17',
5497 'AG': '209.59.64.0/18',
5498 'AI': '204.14.248.0/21',
5499 'AL': '46.99.0.0/16',
5500 'AM': '46.70.0.0/15',
5501 'AO': '105.168.0.0/13',
53896ca5
S
5502 'AP': '182.50.184.0/21',
5503 'AQ': '23.154.160.0/24',
773f291d
S
5504 'AR': '181.0.0.0/12',
5505 'AS': '202.70.112.0/20',
53896ca5 5506 'AT': '77.116.0.0/14',
773f291d
S
5507 'AU': '1.128.0.0/11',
5508 'AW': '181.41.0.0/18',
53896ca5
S
5509 'AX': '185.217.4.0/22',
5510 'AZ': '5.197.0.0/16',
773f291d
S
5511 'BA': '31.176.128.0/17',
5512 'BB': '65.48.128.0/17',
5513 'BD': '114.130.0.0/16',
5514 'BE': '57.0.0.0/8',
53896ca5 5515 'BF': '102.178.0.0/15',
773f291d
S
5516 'BG': '95.42.0.0/15',
5517 'BH': '37.131.0.0/17',
5518 'BI': '154.117.192.0/18',
5519 'BJ': '137.255.0.0/16',
53896ca5 5520 'BL': '185.212.72.0/23',
773f291d
S
5521 'BM': '196.12.64.0/18',
5522 'BN': '156.31.0.0/16',
5523 'BO': '161.56.0.0/16',
5524 'BQ': '161.0.80.0/20',
53896ca5 5525 'BR': '191.128.0.0/12',
773f291d
S
5526 'BS': '24.51.64.0/18',
5527 'BT': '119.2.96.0/19',
5528 'BW': '168.167.0.0/16',
5529 'BY': '178.120.0.0/13',
5530 'BZ': '179.42.192.0/18',
5531 'CA': '99.224.0.0/11',
5532 'CD': '41.243.0.0/16',
53896ca5
S
5533 'CF': '197.242.176.0/21',
5534 'CG': '160.113.0.0/16',
773f291d 5535 'CH': '85.0.0.0/13',
53896ca5 5536 'CI': '102.136.0.0/14',
773f291d
S
5537 'CK': '202.65.32.0/19',
5538 'CL': '152.172.0.0/14',
53896ca5 5539 'CM': '102.244.0.0/14',
773f291d
S
5540 'CN': '36.128.0.0/10',
5541 'CO': '181.240.0.0/12',
5542 'CR': '201.192.0.0/12',
5543 'CU': '152.206.0.0/15',
5544 'CV': '165.90.96.0/19',
5545 'CW': '190.88.128.0/17',
53896ca5 5546 'CY': '31.153.0.0/16',
773f291d
S
5547 'CZ': '88.100.0.0/14',
5548 'DE': '53.0.0.0/8',
5549 'DJ': '197.241.0.0/17',
5550 'DK': '87.48.0.0/12',
5551 'DM': '192.243.48.0/20',
5552 'DO': '152.166.0.0/15',
5553 'DZ': '41.96.0.0/12',
5554 'EC': '186.68.0.0/15',
5555 'EE': '90.190.0.0/15',
5556 'EG': '156.160.0.0/11',
5557 'ER': '196.200.96.0/20',
5558 'ES': '88.0.0.0/11',
5559 'ET': '196.188.0.0/14',
5560 'EU': '2.16.0.0/13',
5561 'FI': '91.152.0.0/13',
5562 'FJ': '144.120.0.0/16',
53896ca5 5563 'FK': '80.73.208.0/21',
773f291d
S
5564 'FM': '119.252.112.0/20',
5565 'FO': '88.85.32.0/19',
5566 'FR': '90.0.0.0/9',
5567 'GA': '41.158.0.0/15',
5568 'GB': '25.0.0.0/8',
5569 'GD': '74.122.88.0/21',
5570 'GE': '31.146.0.0/16',
5571 'GF': '161.22.64.0/18',
5572 'GG': '62.68.160.0/19',
53896ca5
S
5573 'GH': '154.160.0.0/12',
5574 'GI': '95.164.0.0/16',
773f291d
S
5575 'GL': '88.83.0.0/19',
5576 'GM': '160.182.0.0/15',
5577 'GN': '197.149.192.0/18',
5578 'GP': '104.250.0.0/19',
5579 'GQ': '105.235.224.0/20',
5580 'GR': '94.64.0.0/13',
5581 'GT': '168.234.0.0/16',
5582 'GU': '168.123.0.0/16',
5583 'GW': '197.214.80.0/20',
5584 'GY': '181.41.64.0/18',
5585 'HK': '113.252.0.0/14',
5586 'HN': '181.210.0.0/16',
5587 'HR': '93.136.0.0/13',
5588 'HT': '148.102.128.0/17',
5589 'HU': '84.0.0.0/14',
5590 'ID': '39.192.0.0/10',
5591 'IE': '87.32.0.0/12',
5592 'IL': '79.176.0.0/13',
5593 'IM': '5.62.80.0/20',
5594 'IN': '117.192.0.0/10',
5595 'IO': '203.83.48.0/21',
5596 'IQ': '37.236.0.0/14',
5597 'IR': '2.176.0.0/12',
5598 'IS': '82.221.0.0/16',
5599 'IT': '79.0.0.0/10',
5600 'JE': '87.244.64.0/18',
5601 'JM': '72.27.0.0/17',
5602 'JO': '176.29.0.0/16',
53896ca5 5603 'JP': '133.0.0.0/8',
773f291d
S
5604 'KE': '105.48.0.0/12',
5605 'KG': '158.181.128.0/17',
5606 'KH': '36.37.128.0/17',
5607 'KI': '103.25.140.0/22',
5608 'KM': '197.255.224.0/20',
53896ca5 5609 'KN': '198.167.192.0/19',
773f291d
S
5610 'KP': '175.45.176.0/22',
5611 'KR': '175.192.0.0/10',
5612 'KW': '37.36.0.0/14',
5613 'KY': '64.96.0.0/15',
5614 'KZ': '2.72.0.0/13',
5615 'LA': '115.84.64.0/18',
5616 'LB': '178.135.0.0/16',
53896ca5 5617 'LC': '24.92.144.0/20',
773f291d
S
5618 'LI': '82.117.0.0/19',
5619 'LK': '112.134.0.0/15',
53896ca5 5620 'LR': '102.183.0.0/16',
773f291d
S
5621 'LS': '129.232.0.0/17',
5622 'LT': '78.56.0.0/13',
5623 'LU': '188.42.0.0/16',
5624 'LV': '46.109.0.0/16',
5625 'LY': '41.252.0.0/14',
5626 'MA': '105.128.0.0/11',
5627 'MC': '88.209.64.0/18',
5628 'MD': '37.246.0.0/16',
5629 'ME': '178.175.0.0/17',
5630 'MF': '74.112.232.0/21',
5631 'MG': '154.126.0.0/17',
5632 'MH': '117.103.88.0/21',
5633 'MK': '77.28.0.0/15',
5634 'ML': '154.118.128.0/18',
5635 'MM': '37.111.0.0/17',
5636 'MN': '49.0.128.0/17',
5637 'MO': '60.246.0.0/16',
5638 'MP': '202.88.64.0/20',
5639 'MQ': '109.203.224.0/19',
5640 'MR': '41.188.64.0/18',
5641 'MS': '208.90.112.0/22',
5642 'MT': '46.11.0.0/16',
5643 'MU': '105.16.0.0/12',
5644 'MV': '27.114.128.0/18',
53896ca5 5645 'MW': '102.70.0.0/15',
773f291d
S
5646 'MX': '187.192.0.0/11',
5647 'MY': '175.136.0.0/13',
5648 'MZ': '197.218.0.0/15',
5649 'NA': '41.182.0.0/16',
5650 'NC': '101.101.0.0/18',
5651 'NE': '197.214.0.0/18',
5652 'NF': '203.17.240.0/22',
5653 'NG': '105.112.0.0/12',
5654 'NI': '186.76.0.0/15',
5655 'NL': '145.96.0.0/11',
5656 'NO': '84.208.0.0/13',
5657 'NP': '36.252.0.0/15',
5658 'NR': '203.98.224.0/19',
5659 'NU': '49.156.48.0/22',
5660 'NZ': '49.224.0.0/14',
5661 'OM': '5.36.0.0/15',
5662 'PA': '186.72.0.0/15',
5663 'PE': '186.160.0.0/14',
5664 'PF': '123.50.64.0/18',
5665 'PG': '124.240.192.0/19',
5666 'PH': '49.144.0.0/13',
5667 'PK': '39.32.0.0/11',
5668 'PL': '83.0.0.0/11',
5669 'PM': '70.36.0.0/20',
5670 'PR': '66.50.0.0/16',
5671 'PS': '188.161.0.0/16',
5672 'PT': '85.240.0.0/13',
5673 'PW': '202.124.224.0/20',
5674 'PY': '181.120.0.0/14',
5675 'QA': '37.210.0.0/15',
53896ca5 5676 'RE': '102.35.0.0/16',
773f291d 5677 'RO': '79.112.0.0/13',
53896ca5 5678 'RS': '93.86.0.0/15',
773f291d 5679 'RU': '5.136.0.0/13',
53896ca5 5680 'RW': '41.186.0.0/16',
773f291d
S
5681 'SA': '188.48.0.0/13',
5682 'SB': '202.1.160.0/19',
5683 'SC': '154.192.0.0/11',
53896ca5 5684 'SD': '102.120.0.0/13',
773f291d 5685 'SE': '78.64.0.0/12',
53896ca5 5686 'SG': '8.128.0.0/10',
773f291d
S
5687 'SI': '188.196.0.0/14',
5688 'SK': '78.98.0.0/15',
53896ca5 5689 'SL': '102.143.0.0/17',
773f291d
S
5690 'SM': '89.186.32.0/19',
5691 'SN': '41.82.0.0/15',
53896ca5 5692 'SO': '154.115.192.0/18',
773f291d
S
5693 'SR': '186.179.128.0/17',
5694 'SS': '105.235.208.0/21',
5695 'ST': '197.159.160.0/19',
5696 'SV': '168.243.0.0/16',
5697 'SX': '190.102.0.0/20',
5698 'SY': '5.0.0.0/16',
5699 'SZ': '41.84.224.0/19',
5700 'TC': '65.255.48.0/20',
5701 'TD': '154.68.128.0/19',
5702 'TG': '196.168.0.0/14',
5703 'TH': '171.96.0.0/13',
5704 'TJ': '85.9.128.0/18',
5705 'TK': '27.96.24.0/21',
5706 'TL': '180.189.160.0/20',
5707 'TM': '95.85.96.0/19',
5708 'TN': '197.0.0.0/11',
5709 'TO': '175.176.144.0/21',
5710 'TR': '78.160.0.0/11',
5711 'TT': '186.44.0.0/15',
5712 'TV': '202.2.96.0/19',
5713 'TW': '120.96.0.0/11',
5714 'TZ': '156.156.0.0/14',
53896ca5
S
5715 'UA': '37.52.0.0/14',
5716 'UG': '102.80.0.0/13',
5717 'US': '6.0.0.0/8',
773f291d 5718 'UY': '167.56.0.0/13',
53896ca5 5719 'UZ': '84.54.64.0/18',
773f291d 5720 'VA': '212.77.0.0/19',
53896ca5 5721 'VC': '207.191.240.0/21',
773f291d 5722 'VE': '186.88.0.0/13',
53896ca5 5723 'VG': '66.81.192.0/20',
773f291d
S
5724 'VI': '146.226.0.0/16',
5725 'VN': '14.160.0.0/11',
5726 'VU': '202.80.32.0/20',
5727 'WF': '117.20.32.0/21',
5728 'WS': '202.4.32.0/19',
5729 'YE': '134.35.0.0/16',
5730 'YT': '41.242.116.0/22',
5731 'ZA': '41.0.0.0/11',
53896ca5
S
5732 'ZM': '102.144.0.0/13',
5733 'ZW': '102.177.192.0/18',
773f291d
S
5734 }
5735
5736 @classmethod
5f95927a
S
5737 def random_ipv4(cls, code_or_block):
5738 if len(code_or_block) == 2:
5739 block = cls._country_ip_map.get(code_or_block.upper())
5740 if not block:
5741 return None
5742 else:
5743 block = code_or_block
773f291d
S
5744 addr, preflen = block.split('/')
5745 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5746 addr_max = addr_min | (0xffffffff >> int(preflen))
18a0defa 5747 return compat_str(socket.inet_ntoa(
4248dad9 5748 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
773f291d
S
5749
5750
91410c9b 5751class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
5752 def __init__(self, proxies=None):
5753 # Set default handlers
5754 for type in ('http', 'https'):
5755 setattr(self, '%s_open' % type,
5756 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5757 meth(r, proxy, type))
38e87f6c 5758 compat_urllib_request.ProxyHandler.__init__(self, proxies)
2461f79d 5759
91410c9b 5760 def proxy_open(self, req, proxy, type):
2461f79d 5761 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
5762 if req_proxy is not None:
5763 proxy = req_proxy
2461f79d
PH
5764 del req.headers['Ytdl-request-proxy']
5765
5766 if proxy == '__noproxy__':
5767 return None # No Proxy
51fb4995 5768 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
71aff188 5769 req.add_header('Ytdl-socks-proxy', proxy)
7a5c1cfe 5770 # yt-dlp's http/https handlers do wrapping the socket with socks
71aff188 5771 return None
91410c9b
PH
5772 return compat_urllib_request.ProxyHandler.proxy_open(
5773 self, req, proxy, type)
5bc880b9
YCH
5774
5775
0a5445dd
YCH
5776# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5777# released into Public Domain
5778# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5779
5780def long_to_bytes(n, blocksize=0):
5781 """long_to_bytes(n:long, blocksize:int) : string
5782 Convert a long integer to a byte string.
5783
5784 If optional blocksize is given and greater than zero, pad the front of the
5785 byte string with binary zeros so that the length is a multiple of
5786 blocksize.
5787 """
5788 # after much testing, this algorithm was deemed to be the fastest
5789 s = b''
5790 n = int(n)
5791 while n > 0:
5792 s = compat_struct_pack('>I', n & 0xffffffff) + s
5793 n = n >> 32
5794 # strip off leading zeros
5795 for i in range(len(s)):
5796 if s[i] != b'\000'[0]:
5797 break
5798 else:
5799 # only happens when n == 0
5800 s = b'\000'
5801 i = 0
5802 s = s[i:]
5803 # add back some pad bytes. this could be done more efficiently w.r.t. the
5804 # de-padding being done above, but sigh...
5805 if blocksize > 0 and len(s) % blocksize:
5806 s = (blocksize - len(s) % blocksize) * b'\000' + s
5807 return s
5808
5809
5810def bytes_to_long(s):
5811 """bytes_to_long(string) : long
5812 Convert a byte string to a long integer.
5813
5814 This is (essentially) the inverse of long_to_bytes().
5815 """
5816 acc = 0
5817 length = len(s)
5818 if length % 4:
5819 extra = (4 - length % 4)
5820 s = b'\000' * extra + s
5821 length = length + extra
5822 for i in range(0, length, 4):
5823 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5824 return acc
5825
5826
5bc880b9
YCH
5827def ohdave_rsa_encrypt(data, exponent, modulus):
5828 '''
5829 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5830
5831 Input:
5832 data: data to encrypt, bytes-like object
5833 exponent, modulus: parameter e and N of RSA algorithm, both integer
5834 Output: hex string of encrypted data
5835
5836 Limitation: supports one block encryption only
5837 '''
5838
5839 payload = int(binascii.hexlify(data[::-1]), 16)
5840 encrypted = pow(payload, exponent, modulus)
5841 return '%x' % encrypted
81bdc8fd
YCH
5842
5843
f48409c7
YCH
5844def pkcs1pad(data, length):
5845 """
5846 Padding input data with PKCS#1 scheme
5847
5848 @param {int[]} data input data
5849 @param {int} length target length
5850 @returns {int[]} padded data
5851 """
5852 if len(data) > length - 11:
5853 raise ValueError('Input data too long for PKCS#1 padding')
5854
5855 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5856 return [0, 2] + pseudo_random + [0] + data
5857
5858
5eb6bdce 5859def encode_base_n(num, n, table=None):
59f898b7 5860 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
59f898b7
YCH
5861 if not table:
5862 table = FULL_TABLE[:n]
5863
5eb6bdce
YCH
5864 if n > len(table):
5865 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5866
5867 if num == 0:
5868 return table[0]
5869
81bdc8fd
YCH
5870 ret = ''
5871 while num:
5872 ret = table[num % n] + ret
5873 num = num // n
5874 return ret
f52354a8
YCH
5875
5876
5877def decode_packed_codes(code):
06b3fe29 5878 mobj = re.search(PACKED_CODES_RE, code)
a0566bbf 5879 obfuscated_code, base, count, symbols = mobj.groups()
f52354a8
YCH
5880 base = int(base)
5881 count = int(count)
5882 symbols = symbols.split('|')
5883 symbol_table = {}
5884
5885 while count:
5886 count -= 1
5eb6bdce 5887 base_n_count = encode_base_n(count, base)
f52354a8
YCH
5888 symbol_table[base_n_count] = symbols[count] or base_n_count
5889
5890 return re.sub(
5891 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
a0566bbf 5892 obfuscated_code)
e154c651 5893
5894
1ced2221
S
5895def caesar(s, alphabet, shift):
5896 if shift == 0:
5897 return s
5898 l = len(alphabet)
5899 return ''.join(
5900 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5901 for c in s)
5902
5903
5904def rot47(s):
5905 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5906
5907
e154c651 5908def parse_m3u8_attributes(attrib):
5909 info = {}
5910 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5911 if val.startswith('"'):
5912 val = val[1:-1]
5913 info[key] = val
5914 return info
1143535d
YCH
5915
5916
5917def urshift(val, n):
5918 return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038
YCH
5919
5920
5921# Based on png2str() written by @gdkchan and improved by @yokrysty
067aa17e 5922# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
d3f8e038
YCH
5923def decode_png(png_data):
5924 # Reference: https://www.w3.org/TR/PNG/
5925 header = png_data[8:]
5926
5927 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5928 raise IOError('Not a valid PNG file.')
5929
5930 int_map = {1: '>B', 2: '>H', 4: '>I'}
5931 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5932
5933 chunks = []
5934
5935 while header:
5936 length = unpack_integer(header[:4])
5937 header = header[4:]
5938
5939 chunk_type = header[:4]
5940 header = header[4:]
5941
5942 chunk_data = header[:length]
5943 header = header[length:]
5944
5945 header = header[4:] # Skip CRC
5946
5947 chunks.append({
5948 'type': chunk_type,
5949 'length': length,
5950 'data': chunk_data
5951 })
5952
5953 ihdr = chunks[0]['data']
5954
5955 width = unpack_integer(ihdr[:4])
5956 height = unpack_integer(ihdr[4:8])
5957
5958 idat = b''
5959
5960 for chunk in chunks:
5961 if chunk['type'] == b'IDAT':
5962 idat += chunk['data']
5963
5964 if not idat:
5965 raise IOError('Unable to read PNG data.')
5966
5967 decompressed_data = bytearray(zlib.decompress(idat))
5968
5969 stride = width * 3
5970 pixels = []
5971
5972 def _get_pixel(idx):
5973 x = idx % stride
5974 y = idx // stride
5975 return pixels[y][x]
5976
5977 for y in range(height):
5978 basePos = y * (1 + stride)
5979 filter_type = decompressed_data[basePos]
5980
5981 current_row = []
5982
5983 pixels.append(current_row)
5984
5985 for x in range(stride):
5986 color = decompressed_data[1 + basePos + x]
5987 basex = y * stride + x
5988 left = 0
5989 up = 0
5990
5991 if x > 2:
5992 left = _get_pixel(basex - 3)
5993 if y > 0:
5994 up = _get_pixel(basex - stride)
5995
5996 if filter_type == 1: # Sub
5997 color = (color + left) & 0xff
5998 elif filter_type == 2: # Up
5999 color = (color + up) & 0xff
6000 elif filter_type == 3: # Average
6001 color = (color + ((left + up) >> 1)) & 0xff
6002 elif filter_type == 4: # Paeth
6003 a = left
6004 b = up
6005 c = 0
6006
6007 if x > 2 and y > 0:
6008 c = _get_pixel(basex - stride - 3)
6009
6010 p = a + b - c
6011
6012 pa = abs(p - a)
6013 pb = abs(p - b)
6014 pc = abs(p - c)
6015
6016 if pa <= pb and pa <= pc:
6017 color = (color + a) & 0xff
6018 elif pb <= pc:
6019 color = (color + b) & 0xff
6020 else:
6021 color = (color + c) & 0xff
6022
6023 current_row.append(color)
6024
6025 return width, height, pixels
efa97bdc
YCH
6026
6027
6028def write_xattr(path, key, value):
6029 # This mess below finds the best xattr tool for the job
6030 try:
6031 # try the pyxattr module...
6032 import xattr
6033
53a7e3d2
YCH
6034 if hasattr(xattr, 'set'): # pyxattr
6035 # Unicode arguments are not supported in python-pyxattr until
6036 # version 0.5.0
067aa17e 6037 # See https://github.com/ytdl-org/youtube-dl/issues/5498
53a7e3d2
YCH
6038 pyxattr_required_version = '0.5.0'
6039 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
6040 # TODO: fallback to CLI tools
6041 raise XAttrUnavailableError(
6042 'python-pyxattr is detected but is too old. '
7a5c1cfe 6043 'yt-dlp requires %s or above while your version is %s. '
53a7e3d2
YCH
6044 'Falling back to other xattr implementations' % (
6045 pyxattr_required_version, xattr.__version__))
6046
6047 setxattr = xattr.set
6048 else: # xattr
6049 setxattr = xattr.setxattr
efa97bdc
YCH
6050
6051 try:
53a7e3d2 6052 setxattr(path, key, value)
efa97bdc
YCH
6053 except EnvironmentError as e:
6054 raise XAttrMetadataError(e.errno, e.strerror)
6055
6056 except ImportError:
6057 if compat_os_name == 'nt':
6058 # Write xattrs to NTFS Alternate Data Streams:
6059 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6060 assert ':' not in key
6061 assert os.path.exists(path)
6062
6063 ads_fn = path + ':' + key
6064 try:
6065 with open(ads_fn, 'wb') as f:
6066 f.write(value)
6067 except EnvironmentError as e:
6068 raise XAttrMetadataError(e.errno, e.strerror)
6069 else:
6070 user_has_setfattr = check_executable('setfattr', ['--version'])
6071 user_has_xattr = check_executable('xattr', ['-h'])
6072
6073 if user_has_setfattr or user_has_xattr:
6074
6075 value = value.decode('utf-8')
6076 if user_has_setfattr:
6077 executable = 'setfattr'
6078 opts = ['-n', key, '-v', value]
6079 elif user_has_xattr:
6080 executable = 'xattr'
6081 opts = ['-w', key, value]
6082
3089bc74
S
6083 cmd = ([encodeFilename(executable, True)]
6084 + [encodeArgument(o) for o in opts]
6085 + [encodeFilename(path, True)])
efa97bdc
YCH
6086
6087 try:
6088 p = subprocess.Popen(
6089 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6090 except EnvironmentError as e:
6091 raise XAttrMetadataError(e.errno, e.strerror)
f5b1bca9 6092 stdout, stderr = process_communicate_or_kill(p)
efa97bdc
YCH
6093 stderr = stderr.decode('utf-8', 'replace')
6094 if p.returncode != 0:
6095 raise XAttrMetadataError(p.returncode, stderr)
6096
6097 else:
6098 # On Unix, and can't find pyxattr, setfattr, or xattr.
6099 if sys.platform.startswith('linux'):
6100 raise XAttrUnavailableError(
6101 "Couldn't find a tool to set the xattrs. "
6102 "Install either the python 'pyxattr' or 'xattr' "
6103 "modules, or the GNU 'attr' package "
6104 "(which contains the 'setfattr' tool).")
6105 else:
6106 raise XAttrUnavailableError(
6107 "Couldn't find a tool to set the xattrs. "
6108 "Install either the python 'xattr' module, "
6109 "or the 'xattr' binary.")
0c265486
YCH
6110
6111
6112def random_birthday(year_field, month_field, day_field):
aa374bc7
AS
6113 start_date = datetime.date(1950, 1, 1)
6114 end_date = datetime.date(1995, 12, 31)
6115 offset = random.randint(0, (end_date - start_date).days)
6116 random_date = start_date + datetime.timedelta(offset)
0c265486 6117 return {
aa374bc7
AS
6118 year_field: str(random_date.year),
6119 month_field: str(random_date.month),
6120 day_field: str(random_date.day),
0c265486 6121 }
732044af 6122
c76eb41b 6123
732044af 6124# Templates for internet shortcut files, which are plain text files.
6125DOT_URL_LINK_TEMPLATE = '''
6126[InternetShortcut]
6127URL=%(url)s
6128'''.lstrip()
6129
6130DOT_WEBLOC_LINK_TEMPLATE = '''
6131<?xml version="1.0" encoding="UTF-8"?>
6132<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6133<plist version="1.0">
6134<dict>
6135\t<key>URL</key>
6136\t<string>%(url)s</string>
6137</dict>
6138</plist>
6139'''.lstrip()
6140
6141DOT_DESKTOP_LINK_TEMPLATE = '''
6142[Desktop Entry]
6143Encoding=UTF-8
6144Name=%(filename)s
6145Type=Link
6146URL=%(url)s
6147Icon=text-html
6148'''.lstrip()
6149
6150
6151def iri_to_uri(iri):
6152 """
6153 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6154
6155 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6156 """
6157
6158 iri_parts = compat_urllib_parse_urlparse(iri)
6159
6160 if '[' in iri_parts.netloc:
6161 raise ValueError('IPv6 URIs are not, yet, supported.')
6162 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6163
6164 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6165
6166 net_location = ''
6167 if iri_parts.username:
6168 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6169 if iri_parts.password is not None:
6170 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6171 net_location += '@'
6172
6173 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
6174 # The 'idna' encoding produces ASCII text.
6175 if iri_parts.port is not None and iri_parts.port != 80:
6176 net_location += ':' + str(iri_parts.port)
6177
6178 return compat_urllib_parse_urlunparse(
6179 (iri_parts.scheme,
6180 net_location,
6181
6182 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6183
6184 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6185 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6186
6187 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6188 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6189
6190 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6191
6192 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6193
6194
6195def to_high_limit_path(path):
6196 if sys.platform in ['win32', 'cygwin']:
6197 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6198 return r'\\?\ '.rstrip() + os.path.abspath(path)
6199
6200 return path
76d321f6 6201
c76eb41b 6202
b868936c 6203def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
6204 if field is None:
6205 val = obj if obj is not None else default
6206 else:
6207 val = obj.get(field, default)
76d321f6 6208 if func and val not in ignore:
6209 val = func(val)
6210 return template % val if val not in ignore else default
00dd0cd5 6211
6212
6213def clean_podcast_url(url):
6214 return re.sub(r'''(?x)
6215 (?:
6216 (?:
6217 chtbl\.com/track|
6218 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6219 play\.podtrac\.com
6220 )/[^/]+|
6221 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6222 flex\.acast\.com|
6223 pd(?:
6224 cn\.co| # https://podcorn.com/analytics-prefix/
6225 st\.fm # https://podsights.com/docs/
6226 )/e
6227 )/''', '', url)
ffcb8191
THD
6228
6229
6230_HEX_TABLE = '0123456789abcdef'
6231
6232
6233def random_uuidv4():
6234 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
0202b52a 6235
6236
6237def make_dir(path, to_screen=None):
6238 try:
6239 dn = os.path.dirname(path)
6240 if dn and not os.path.exists(dn):
6241 os.makedirs(dn)
6242 return True
6243 except (OSError, IOError) as err:
6244 if callable(to_screen) is not None:
6245 to_screen('unable to create directory ' + error_to_compat_str(err))
6246 return False
f74980cb 6247
6248
6249def get_executable_path():
c552ae88 6250 from zipimport import zipimporter
6251 if hasattr(sys, 'frozen'): # Running from PyInstaller
6252 path = os.path.dirname(sys.executable)
6253 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
6254 path = os.path.join(os.path.dirname(__file__), '../..')
6255 else:
6256 path = os.path.join(os.path.dirname(__file__), '..')
f74980cb 6257 return os.path.abspath(path)
6258
6259
2f567473 6260def load_plugins(name, suffix, namespace):
f74980cb 6261 plugin_info = [None]
6262 classes = []
6263 try:
6264 plugin_info = imp.find_module(
6265 name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
6266 plugins = imp.load_module(name, *plugin_info)
6267 for name in dir(plugins):
2f567473 6268 if name in namespace:
6269 continue
6270 if not name.endswith(suffix):
f74980cb 6271 continue
6272 klass = getattr(plugins, name)
6273 classes.append(klass)
6274 namespace[name] = klass
6275 except ImportError:
6276 pass
6277 finally:
6278 if plugin_info[0] is not None:
6279 plugin_info[0].close()
6280 return classes
06167fbb 6281
6282
325ebc17 6283def traverse_obj(
352d63fd 6284 obj, *path_list, default=None, expected_type=None, get_all=True,
325ebc17 6285 casesense=True, is_user_input=False, traverse_string=False):
324ad820 6286 ''' Traverse nested list/dict/tuple
8f334380 6287 @param path_list A list of paths which are checked one by one.
6288 Each path is a list of keys where each key is a string,
6289 a tuple of strings or "...". When a tuple is given,
6290 all the keys given in the tuple are traversed, and
6291 "..." traverses all the keys in the object
325ebc17 6292 @param default Default value to return
352d63fd 6293 @param expected_type Only accept final value of this type (Can also be any callable)
6294 @param get_all Return all the values obtained from a path or only the first one
324ad820 6295 @param casesense Whether to consider dictionary keys as case sensitive
6296 @param is_user_input Whether the keys are generated from user input. If True,
6297 strings are converted to int/slice if necessary
6298 @param traverse_string Whether to traverse inside strings. If True, any
6299 non-compatible object will also be converted into a string
8f334380 6300 # TODO: Write tests
324ad820 6301 '''
325ebc17 6302 if not casesense:
dbf5416a 6303 _lower = lambda k: (k.lower() if isinstance(k, str) else k)
8f334380 6304 path_list = (map(_lower, variadic(path)) for path in path_list)
6305
6306 def _traverse_obj(obj, path, _current_depth=0):
6307 nonlocal depth
575e17a1 6308 if obj is None:
6309 return None
8f334380 6310 path = tuple(variadic(path))
6311 for i, key in enumerate(path):
6312 if isinstance(key, (list, tuple)):
6313 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
6314 key = ...
6315 if key is ...:
6316 obj = (obj.values() if isinstance(obj, dict)
6317 else obj if isinstance(obj, (list, tuple, LazyList))
6318 else str(obj) if traverse_string else [])
6319 _current_depth += 1
6320 depth = max(depth, _current_depth)
6321 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
575e17a1 6322 elif isinstance(obj, dict) and not (is_user_input and key == ':'):
325ebc17 6323 obj = (obj.get(key) if casesense or (key in obj)
6324 else next((v for k, v in obj.items() if _lower(k) == key), None))
6325 else:
6326 if is_user_input:
6327 key = (int_or_none(key) if ':' not in key
6328 else slice(*map(int_or_none, key.split(':'))))
8f334380 6329 if key == slice(None):
575e17a1 6330 return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
325ebc17 6331 if not isinstance(key, (int, slice)):
9fea350f 6332 return None
8f334380 6333 if not isinstance(obj, (list, tuple, LazyList)):
325ebc17 6334 if not traverse_string:
6335 return None
6336 obj = str(obj)
6337 try:
6338 obj = obj[key]
6339 except IndexError:
324ad820 6340 return None
325ebc17 6341 return obj
6342
352d63fd 6343 if isinstance(expected_type, type):
6344 type_test = lambda val: val if isinstance(val, expected_type) else None
6345 elif expected_type is not None:
6346 type_test = expected_type
6347 else:
6348 type_test = lambda val: val
6349
8f334380 6350 for path in path_list:
6351 depth = 0
6352 val = _traverse_obj(obj, path)
325ebc17 6353 if val is not None:
8f334380 6354 if depth:
6355 for _ in range(depth - 1):
6586bca9 6356 val = itertools.chain.from_iterable(v for v in val if v is not None)
352d63fd 6357 val = [v for v in map(type_test, val) if v is not None]
8f334380 6358 if val:
352d63fd 6359 return val if get_all else val[0]
6360 else:
6361 val = type_test(val)
6362 if val is not None:
8f334380 6363 return val
325ebc17 6364 return default
324ad820 6365
6366
6367def traverse_dict(dictn, keys, casesense=True):
6368 ''' For backward compatibility. Do not use '''
6369 return traverse_obj(dictn, keys, casesense=casesense,
6370 is_user_input=True, traverse_string=True)
6606817a 6371
6372
c634ad2a 6373def variadic(x, allowed_types=(str, bytes)):
cb89cfc1 6374 return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)