]> jfr.im git - yt-dlp.git/blame - yt_dlp/utils.py
[downloader/niconico] Pass custom headers (#1063)
[yt-dlp.git] / yt_dlp / utils.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
d77c3dfd 3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
c380cc28 10import collections
62e609ab 11import contextlib
e3946f98 12import ctypes
c496ca96
PH
13import datetime
14import email.utils
0c265486 15import email.header
f45c185f 16import errno
be4a824d 17import functools
d77c3dfd 18import gzip
f74980cb 19import imp
03f9daab 20import io
79a2e94e 21import itertools
f4bfd65f 22import json
d77c3dfd 23import locale
02dbf93f 24import math
347de493 25import operator
d77c3dfd 26import os
c496ca96 27import platform
773f291d 28import random
d77c3dfd 29import re
c496ca96 30import socket
79a2e94e 31import ssl
1c088fa8 32import subprocess
d77c3dfd 33import sys
181c8655 34import tempfile
c380cc28 35import time
01951dda 36import traceback
bcf89ce6 37import xml.etree.ElementTree
d77c3dfd 38import zlib
d77c3dfd 39
8c25f81b 40from .compat import (
b4a3d461 41 compat_HTMLParseError,
8bb56eee 42 compat_HTMLParser,
201c1459 43 compat_HTTPError,
8f9312c3 44 compat_basestring,
8c25f81b 45 compat_chr,
1bab3437 46 compat_cookiejar,
d7cd9a9e 47 compat_ctypes_WINFUNCTYPE,
36e6f62c 48 compat_etree_fromstring,
51098426 49 compat_expanduser,
8c25f81b 50 compat_html_entities,
55b2f099 51 compat_html_entities_html5,
be4a824d 52 compat_http_client,
42db58ec 53 compat_integer_types,
e29663c6 54 compat_numeric_types,
c86b6142 55 compat_kwargs,
efa97bdc 56 compat_os_name,
8c25f81b 57 compat_parse_qs,
702ccf2d 58 compat_shlex_quote,
8c25f81b 59 compat_str,
edaa23f8 60 compat_struct_pack,
d3f8e038 61 compat_struct_unpack,
8c25f81b
PH
62 compat_urllib_error,
63 compat_urllib_parse,
15707c7e 64 compat_urllib_parse_urlencode,
8c25f81b 65 compat_urllib_parse_urlparse,
732044af 66 compat_urllib_parse_urlunparse,
67 compat_urllib_parse_quote,
68 compat_urllib_parse_quote_plus,
7581bfc9 69 compat_urllib_parse_unquote_plus,
8c25f81b
PH
70 compat_urllib_request,
71 compat_urlparse,
810c10ba 72 compat_xpath,
8c25f81b 73)
4644ac55 74
71aff188
YCH
75from .socks import (
76 ProxyType,
77 sockssocket,
78)
79
4644ac55 80
51fb4995
YCH
81def register_socks_protocols():
82 # "Register" SOCKS protocols
d5ae6bb5
YCH
83 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
84 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
51fb4995
YCH
85 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
86 if scheme not in compat_urlparse.uses_netloc:
87 compat_urlparse.uses_netloc.append(scheme)
88
89
468e2e92
FV
90# This is not clearly defined otherwise
91compiled_regex_type = type(re.compile(''))
92
f7a147e3
S
93
94def random_user_agent():
95 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
96 _CHROME_VERSIONS = (
97 '74.0.3729.129',
98 '76.0.3780.3',
99 '76.0.3780.2',
100 '74.0.3729.128',
101 '76.0.3780.1',
102 '76.0.3780.0',
103 '75.0.3770.15',
104 '74.0.3729.127',
105 '74.0.3729.126',
106 '76.0.3779.1',
107 '76.0.3779.0',
108 '75.0.3770.14',
109 '74.0.3729.125',
110 '76.0.3778.1',
111 '76.0.3778.0',
112 '75.0.3770.13',
113 '74.0.3729.124',
114 '74.0.3729.123',
115 '73.0.3683.121',
116 '76.0.3777.1',
117 '76.0.3777.0',
118 '75.0.3770.12',
119 '74.0.3729.122',
120 '76.0.3776.4',
121 '75.0.3770.11',
122 '74.0.3729.121',
123 '76.0.3776.3',
124 '76.0.3776.2',
125 '73.0.3683.120',
126 '74.0.3729.120',
127 '74.0.3729.119',
128 '74.0.3729.118',
129 '76.0.3776.1',
130 '76.0.3776.0',
131 '76.0.3775.5',
132 '75.0.3770.10',
133 '74.0.3729.117',
134 '76.0.3775.4',
135 '76.0.3775.3',
136 '74.0.3729.116',
137 '75.0.3770.9',
138 '76.0.3775.2',
139 '76.0.3775.1',
140 '76.0.3775.0',
141 '75.0.3770.8',
142 '74.0.3729.115',
143 '74.0.3729.114',
144 '76.0.3774.1',
145 '76.0.3774.0',
146 '75.0.3770.7',
147 '74.0.3729.113',
148 '74.0.3729.112',
149 '74.0.3729.111',
150 '76.0.3773.1',
151 '76.0.3773.0',
152 '75.0.3770.6',
153 '74.0.3729.110',
154 '74.0.3729.109',
155 '76.0.3772.1',
156 '76.0.3772.0',
157 '75.0.3770.5',
158 '74.0.3729.108',
159 '74.0.3729.107',
160 '76.0.3771.1',
161 '76.0.3771.0',
162 '75.0.3770.4',
163 '74.0.3729.106',
164 '74.0.3729.105',
165 '75.0.3770.3',
166 '74.0.3729.104',
167 '74.0.3729.103',
168 '74.0.3729.102',
169 '75.0.3770.2',
170 '74.0.3729.101',
171 '75.0.3770.1',
172 '75.0.3770.0',
173 '74.0.3729.100',
174 '75.0.3769.5',
175 '75.0.3769.4',
176 '74.0.3729.99',
177 '75.0.3769.3',
178 '75.0.3769.2',
179 '75.0.3768.6',
180 '74.0.3729.98',
181 '75.0.3769.1',
182 '75.0.3769.0',
183 '74.0.3729.97',
184 '73.0.3683.119',
185 '73.0.3683.118',
186 '74.0.3729.96',
187 '75.0.3768.5',
188 '75.0.3768.4',
189 '75.0.3768.3',
190 '75.0.3768.2',
191 '74.0.3729.95',
192 '74.0.3729.94',
193 '75.0.3768.1',
194 '75.0.3768.0',
195 '74.0.3729.93',
196 '74.0.3729.92',
197 '73.0.3683.117',
198 '74.0.3729.91',
199 '75.0.3766.3',
200 '74.0.3729.90',
201 '75.0.3767.2',
202 '75.0.3767.1',
203 '75.0.3767.0',
204 '74.0.3729.89',
205 '73.0.3683.116',
206 '75.0.3766.2',
207 '74.0.3729.88',
208 '75.0.3766.1',
209 '75.0.3766.0',
210 '74.0.3729.87',
211 '73.0.3683.115',
212 '74.0.3729.86',
213 '75.0.3765.1',
214 '75.0.3765.0',
215 '74.0.3729.85',
216 '73.0.3683.114',
217 '74.0.3729.84',
218 '75.0.3764.1',
219 '75.0.3764.0',
220 '74.0.3729.83',
221 '73.0.3683.113',
222 '75.0.3763.2',
223 '75.0.3761.4',
224 '74.0.3729.82',
225 '75.0.3763.1',
226 '75.0.3763.0',
227 '74.0.3729.81',
228 '73.0.3683.112',
229 '75.0.3762.1',
230 '75.0.3762.0',
231 '74.0.3729.80',
232 '75.0.3761.3',
233 '74.0.3729.79',
234 '73.0.3683.111',
235 '75.0.3761.2',
236 '74.0.3729.78',
237 '74.0.3729.77',
238 '75.0.3761.1',
239 '75.0.3761.0',
240 '73.0.3683.110',
241 '74.0.3729.76',
242 '74.0.3729.75',
243 '75.0.3760.0',
244 '74.0.3729.74',
245 '75.0.3759.8',
246 '75.0.3759.7',
247 '75.0.3759.6',
248 '74.0.3729.73',
249 '75.0.3759.5',
250 '74.0.3729.72',
251 '73.0.3683.109',
252 '75.0.3759.4',
253 '75.0.3759.3',
254 '74.0.3729.71',
255 '75.0.3759.2',
256 '74.0.3729.70',
257 '73.0.3683.108',
258 '74.0.3729.69',
259 '75.0.3759.1',
260 '75.0.3759.0',
261 '74.0.3729.68',
262 '73.0.3683.107',
263 '74.0.3729.67',
264 '75.0.3758.1',
265 '75.0.3758.0',
266 '74.0.3729.66',
267 '73.0.3683.106',
268 '74.0.3729.65',
269 '75.0.3757.1',
270 '75.0.3757.0',
271 '74.0.3729.64',
272 '73.0.3683.105',
273 '74.0.3729.63',
274 '75.0.3756.1',
275 '75.0.3756.0',
276 '74.0.3729.62',
277 '73.0.3683.104',
278 '75.0.3755.3',
279 '75.0.3755.2',
280 '73.0.3683.103',
281 '75.0.3755.1',
282 '75.0.3755.0',
283 '74.0.3729.61',
284 '73.0.3683.102',
285 '74.0.3729.60',
286 '75.0.3754.2',
287 '74.0.3729.59',
288 '75.0.3753.4',
289 '74.0.3729.58',
290 '75.0.3754.1',
291 '75.0.3754.0',
292 '74.0.3729.57',
293 '73.0.3683.101',
294 '75.0.3753.3',
295 '75.0.3752.2',
296 '75.0.3753.2',
297 '74.0.3729.56',
298 '75.0.3753.1',
299 '75.0.3753.0',
300 '74.0.3729.55',
301 '73.0.3683.100',
302 '74.0.3729.54',
303 '75.0.3752.1',
304 '75.0.3752.0',
305 '74.0.3729.53',
306 '73.0.3683.99',
307 '74.0.3729.52',
308 '75.0.3751.1',
309 '75.0.3751.0',
310 '74.0.3729.51',
311 '73.0.3683.98',
312 '74.0.3729.50',
313 '75.0.3750.0',
314 '74.0.3729.49',
315 '74.0.3729.48',
316 '74.0.3729.47',
317 '75.0.3749.3',
318 '74.0.3729.46',
319 '73.0.3683.97',
320 '75.0.3749.2',
321 '74.0.3729.45',
322 '75.0.3749.1',
323 '75.0.3749.0',
324 '74.0.3729.44',
325 '73.0.3683.96',
326 '74.0.3729.43',
327 '74.0.3729.42',
328 '75.0.3748.1',
329 '75.0.3748.0',
330 '74.0.3729.41',
331 '75.0.3747.1',
332 '73.0.3683.95',
333 '75.0.3746.4',
334 '74.0.3729.40',
335 '74.0.3729.39',
336 '75.0.3747.0',
337 '75.0.3746.3',
338 '75.0.3746.2',
339 '74.0.3729.38',
340 '75.0.3746.1',
341 '75.0.3746.0',
342 '74.0.3729.37',
343 '73.0.3683.94',
344 '75.0.3745.5',
345 '75.0.3745.4',
346 '75.0.3745.3',
347 '75.0.3745.2',
348 '74.0.3729.36',
349 '75.0.3745.1',
350 '75.0.3745.0',
351 '75.0.3744.2',
352 '74.0.3729.35',
353 '73.0.3683.93',
354 '74.0.3729.34',
355 '75.0.3744.1',
356 '75.0.3744.0',
357 '74.0.3729.33',
358 '73.0.3683.92',
359 '74.0.3729.32',
360 '74.0.3729.31',
361 '73.0.3683.91',
362 '75.0.3741.2',
363 '75.0.3740.5',
364 '74.0.3729.30',
365 '75.0.3741.1',
366 '75.0.3741.0',
367 '74.0.3729.29',
368 '75.0.3740.4',
369 '73.0.3683.90',
370 '74.0.3729.28',
371 '75.0.3740.3',
372 '73.0.3683.89',
373 '75.0.3740.2',
374 '74.0.3729.27',
375 '75.0.3740.1',
376 '75.0.3740.0',
377 '74.0.3729.26',
378 '73.0.3683.88',
379 '73.0.3683.87',
380 '74.0.3729.25',
381 '75.0.3739.1',
382 '75.0.3739.0',
383 '73.0.3683.86',
384 '74.0.3729.24',
385 '73.0.3683.85',
386 '75.0.3738.4',
387 '75.0.3738.3',
388 '75.0.3738.2',
389 '75.0.3738.1',
390 '75.0.3738.0',
391 '74.0.3729.23',
392 '73.0.3683.84',
393 '74.0.3729.22',
394 '74.0.3729.21',
395 '75.0.3737.1',
396 '75.0.3737.0',
397 '74.0.3729.20',
398 '73.0.3683.83',
399 '74.0.3729.19',
400 '75.0.3736.1',
401 '75.0.3736.0',
402 '74.0.3729.18',
403 '73.0.3683.82',
404 '74.0.3729.17',
405 '75.0.3735.1',
406 '75.0.3735.0',
407 '74.0.3729.16',
408 '73.0.3683.81',
409 '75.0.3734.1',
410 '75.0.3734.0',
411 '74.0.3729.15',
412 '73.0.3683.80',
413 '74.0.3729.14',
414 '75.0.3733.1',
415 '75.0.3733.0',
416 '75.0.3732.1',
417 '74.0.3729.13',
418 '74.0.3729.12',
419 '73.0.3683.79',
420 '74.0.3729.11',
421 '75.0.3732.0',
422 '74.0.3729.10',
423 '73.0.3683.78',
424 '74.0.3729.9',
425 '74.0.3729.8',
426 '74.0.3729.7',
427 '75.0.3731.3',
428 '75.0.3731.2',
429 '75.0.3731.0',
430 '74.0.3729.6',
431 '73.0.3683.77',
432 '73.0.3683.76',
433 '75.0.3730.5',
434 '75.0.3730.4',
435 '73.0.3683.75',
436 '74.0.3729.5',
437 '73.0.3683.74',
438 '75.0.3730.3',
439 '75.0.3730.2',
440 '74.0.3729.4',
441 '73.0.3683.73',
442 '73.0.3683.72',
443 '75.0.3730.1',
444 '75.0.3730.0',
445 '74.0.3729.3',
446 '73.0.3683.71',
447 '74.0.3729.2',
448 '73.0.3683.70',
449 '74.0.3729.1',
450 '74.0.3729.0',
451 '74.0.3726.4',
452 '73.0.3683.69',
453 '74.0.3726.3',
454 '74.0.3728.0',
455 '74.0.3726.2',
456 '73.0.3683.68',
457 '74.0.3726.1',
458 '74.0.3726.0',
459 '74.0.3725.4',
460 '73.0.3683.67',
461 '73.0.3683.66',
462 '74.0.3725.3',
463 '74.0.3725.2',
464 '74.0.3725.1',
465 '74.0.3724.8',
466 '74.0.3725.0',
467 '73.0.3683.65',
468 '74.0.3724.7',
469 '74.0.3724.6',
470 '74.0.3724.5',
471 '74.0.3724.4',
472 '74.0.3724.3',
473 '74.0.3724.2',
474 '74.0.3724.1',
475 '74.0.3724.0',
476 '73.0.3683.64',
477 '74.0.3723.1',
478 '74.0.3723.0',
479 '73.0.3683.63',
480 '74.0.3722.1',
481 '74.0.3722.0',
482 '73.0.3683.62',
483 '74.0.3718.9',
484 '74.0.3702.3',
485 '74.0.3721.3',
486 '74.0.3721.2',
487 '74.0.3721.1',
488 '74.0.3721.0',
489 '74.0.3720.6',
490 '73.0.3683.61',
491 '72.0.3626.122',
492 '73.0.3683.60',
493 '74.0.3720.5',
494 '72.0.3626.121',
495 '74.0.3718.8',
496 '74.0.3720.4',
497 '74.0.3720.3',
498 '74.0.3718.7',
499 '74.0.3720.2',
500 '74.0.3720.1',
501 '74.0.3720.0',
502 '74.0.3718.6',
503 '74.0.3719.5',
504 '73.0.3683.59',
505 '74.0.3718.5',
506 '74.0.3718.4',
507 '74.0.3719.4',
508 '74.0.3719.3',
509 '74.0.3719.2',
510 '74.0.3719.1',
511 '73.0.3683.58',
512 '74.0.3719.0',
513 '73.0.3683.57',
514 '73.0.3683.56',
515 '74.0.3718.3',
516 '73.0.3683.55',
517 '74.0.3718.2',
518 '74.0.3718.1',
519 '74.0.3718.0',
520 '73.0.3683.54',
521 '74.0.3717.2',
522 '73.0.3683.53',
523 '74.0.3717.1',
524 '74.0.3717.0',
525 '73.0.3683.52',
526 '74.0.3716.1',
527 '74.0.3716.0',
528 '73.0.3683.51',
529 '74.0.3715.1',
530 '74.0.3715.0',
531 '73.0.3683.50',
532 '74.0.3711.2',
533 '74.0.3714.2',
534 '74.0.3713.3',
535 '74.0.3714.1',
536 '74.0.3714.0',
537 '73.0.3683.49',
538 '74.0.3713.1',
539 '74.0.3713.0',
540 '72.0.3626.120',
541 '73.0.3683.48',
542 '74.0.3712.2',
543 '74.0.3712.1',
544 '74.0.3712.0',
545 '73.0.3683.47',
546 '72.0.3626.119',
547 '73.0.3683.46',
548 '74.0.3710.2',
549 '72.0.3626.118',
550 '74.0.3711.1',
551 '74.0.3711.0',
552 '73.0.3683.45',
553 '72.0.3626.117',
554 '74.0.3710.1',
555 '74.0.3710.0',
556 '73.0.3683.44',
557 '72.0.3626.116',
558 '74.0.3709.1',
559 '74.0.3709.0',
560 '74.0.3704.9',
561 '73.0.3683.43',
562 '72.0.3626.115',
563 '74.0.3704.8',
564 '74.0.3704.7',
565 '74.0.3708.0',
566 '74.0.3706.7',
567 '74.0.3704.6',
568 '73.0.3683.42',
569 '72.0.3626.114',
570 '74.0.3706.6',
571 '72.0.3626.113',
572 '74.0.3704.5',
573 '74.0.3706.5',
574 '74.0.3706.4',
575 '74.0.3706.3',
576 '74.0.3706.2',
577 '74.0.3706.1',
578 '74.0.3706.0',
579 '73.0.3683.41',
580 '72.0.3626.112',
581 '74.0.3705.1',
582 '74.0.3705.0',
583 '73.0.3683.40',
584 '72.0.3626.111',
585 '73.0.3683.39',
586 '74.0.3704.4',
587 '73.0.3683.38',
588 '74.0.3704.3',
589 '74.0.3704.2',
590 '74.0.3704.1',
591 '74.0.3704.0',
592 '73.0.3683.37',
593 '72.0.3626.110',
594 '72.0.3626.109',
595 '74.0.3703.3',
596 '74.0.3703.2',
597 '73.0.3683.36',
598 '74.0.3703.1',
599 '74.0.3703.0',
600 '73.0.3683.35',
601 '72.0.3626.108',
602 '74.0.3702.2',
603 '74.0.3699.3',
604 '74.0.3702.1',
605 '74.0.3702.0',
606 '73.0.3683.34',
607 '72.0.3626.107',
608 '73.0.3683.33',
609 '74.0.3701.1',
610 '74.0.3701.0',
611 '73.0.3683.32',
612 '73.0.3683.31',
613 '72.0.3626.105',
614 '74.0.3700.1',
615 '74.0.3700.0',
616 '73.0.3683.29',
617 '72.0.3626.103',
618 '74.0.3699.2',
619 '74.0.3699.1',
620 '74.0.3699.0',
621 '73.0.3683.28',
622 '72.0.3626.102',
623 '73.0.3683.27',
624 '73.0.3683.26',
625 '74.0.3698.0',
626 '74.0.3696.2',
627 '72.0.3626.101',
628 '73.0.3683.25',
629 '74.0.3696.1',
630 '74.0.3696.0',
631 '74.0.3694.8',
632 '72.0.3626.100',
633 '74.0.3694.7',
634 '74.0.3694.6',
635 '74.0.3694.5',
636 '74.0.3694.4',
637 '72.0.3626.99',
638 '72.0.3626.98',
639 '74.0.3694.3',
640 '73.0.3683.24',
641 '72.0.3626.97',
642 '72.0.3626.96',
643 '72.0.3626.95',
644 '73.0.3683.23',
645 '72.0.3626.94',
646 '73.0.3683.22',
647 '73.0.3683.21',
648 '72.0.3626.93',
649 '74.0.3694.2',
650 '72.0.3626.92',
651 '74.0.3694.1',
652 '74.0.3694.0',
653 '74.0.3693.6',
654 '73.0.3683.20',
655 '72.0.3626.91',
656 '74.0.3693.5',
657 '74.0.3693.4',
658 '74.0.3693.3',
659 '74.0.3693.2',
660 '73.0.3683.19',
661 '74.0.3693.1',
662 '74.0.3693.0',
663 '73.0.3683.18',
664 '72.0.3626.90',
665 '74.0.3692.1',
666 '74.0.3692.0',
667 '73.0.3683.17',
668 '72.0.3626.89',
669 '74.0.3687.3',
670 '74.0.3691.1',
671 '74.0.3691.0',
672 '73.0.3683.16',
673 '72.0.3626.88',
674 '72.0.3626.87',
675 '73.0.3683.15',
676 '74.0.3690.1',
677 '74.0.3690.0',
678 '73.0.3683.14',
679 '72.0.3626.86',
680 '73.0.3683.13',
681 '73.0.3683.12',
682 '74.0.3689.1',
683 '74.0.3689.0',
684 '73.0.3683.11',
685 '72.0.3626.85',
686 '73.0.3683.10',
687 '72.0.3626.84',
688 '73.0.3683.9',
689 '74.0.3688.1',
690 '74.0.3688.0',
691 '73.0.3683.8',
692 '72.0.3626.83',
693 '74.0.3687.2',
694 '74.0.3687.1',
695 '74.0.3687.0',
696 '73.0.3683.7',
697 '72.0.3626.82',
698 '74.0.3686.4',
699 '72.0.3626.81',
700 '74.0.3686.3',
701 '74.0.3686.2',
702 '74.0.3686.1',
703 '74.0.3686.0',
704 '73.0.3683.6',
705 '72.0.3626.80',
706 '74.0.3685.1',
707 '74.0.3685.0',
708 '73.0.3683.5',
709 '72.0.3626.79',
710 '74.0.3684.1',
711 '74.0.3684.0',
712 '73.0.3683.4',
713 '72.0.3626.78',
714 '72.0.3626.77',
715 '73.0.3683.3',
716 '73.0.3683.2',
717 '72.0.3626.76',
718 '73.0.3683.1',
719 '73.0.3683.0',
720 '72.0.3626.75',
721 '71.0.3578.141',
722 '73.0.3682.1',
723 '73.0.3682.0',
724 '72.0.3626.74',
725 '71.0.3578.140',
726 '73.0.3681.4',
727 '73.0.3681.3',
728 '73.0.3681.2',
729 '73.0.3681.1',
730 '73.0.3681.0',
731 '72.0.3626.73',
732 '71.0.3578.139',
733 '72.0.3626.72',
734 '72.0.3626.71',
735 '73.0.3680.1',
736 '73.0.3680.0',
737 '72.0.3626.70',
738 '71.0.3578.138',
739 '73.0.3678.2',
740 '73.0.3679.1',
741 '73.0.3679.0',
742 '72.0.3626.69',
743 '71.0.3578.137',
744 '73.0.3678.1',
745 '73.0.3678.0',
746 '71.0.3578.136',
747 '73.0.3677.1',
748 '73.0.3677.0',
749 '72.0.3626.68',
750 '72.0.3626.67',
751 '71.0.3578.135',
752 '73.0.3676.1',
753 '73.0.3676.0',
754 '73.0.3674.2',
755 '72.0.3626.66',
756 '71.0.3578.134',
757 '73.0.3674.1',
758 '73.0.3674.0',
759 '72.0.3626.65',
760 '71.0.3578.133',
761 '73.0.3673.2',
762 '73.0.3673.1',
763 '73.0.3673.0',
764 '72.0.3626.64',
765 '71.0.3578.132',
766 '72.0.3626.63',
767 '72.0.3626.62',
768 '72.0.3626.61',
769 '72.0.3626.60',
770 '73.0.3672.1',
771 '73.0.3672.0',
772 '72.0.3626.59',
773 '71.0.3578.131',
774 '73.0.3671.3',
775 '73.0.3671.2',
776 '73.0.3671.1',
777 '73.0.3671.0',
778 '72.0.3626.58',
779 '71.0.3578.130',
780 '73.0.3670.1',
781 '73.0.3670.0',
782 '72.0.3626.57',
783 '71.0.3578.129',
784 '73.0.3669.1',
785 '73.0.3669.0',
786 '72.0.3626.56',
787 '71.0.3578.128',
788 '73.0.3668.2',
789 '73.0.3668.1',
790 '73.0.3668.0',
791 '72.0.3626.55',
792 '71.0.3578.127',
793 '73.0.3667.2',
794 '73.0.3667.1',
795 '73.0.3667.0',
796 '72.0.3626.54',
797 '71.0.3578.126',
798 '73.0.3666.1',
799 '73.0.3666.0',
800 '72.0.3626.53',
801 '71.0.3578.125',
802 '73.0.3665.4',
803 '73.0.3665.3',
804 '72.0.3626.52',
805 '73.0.3665.2',
806 '73.0.3664.4',
807 '73.0.3665.1',
808 '73.0.3665.0',
809 '72.0.3626.51',
810 '71.0.3578.124',
811 '72.0.3626.50',
812 '73.0.3664.3',
813 '73.0.3664.2',
814 '73.0.3664.1',
815 '73.0.3664.0',
816 '73.0.3663.2',
817 '72.0.3626.49',
818 '71.0.3578.123',
819 '73.0.3663.1',
820 '73.0.3663.0',
821 '72.0.3626.48',
822 '71.0.3578.122',
823 '73.0.3662.1',
824 '73.0.3662.0',
825 '72.0.3626.47',
826 '71.0.3578.121',
827 '73.0.3661.1',
828 '72.0.3626.46',
829 '73.0.3661.0',
830 '72.0.3626.45',
831 '71.0.3578.120',
832 '73.0.3660.2',
833 '73.0.3660.1',
834 '73.0.3660.0',
835 '72.0.3626.44',
836 '71.0.3578.119',
837 '73.0.3659.1',
838 '73.0.3659.0',
839 '72.0.3626.43',
840 '71.0.3578.118',
841 '73.0.3658.1',
842 '73.0.3658.0',
843 '72.0.3626.42',
844 '71.0.3578.117',
845 '73.0.3657.1',
846 '73.0.3657.0',
847 '72.0.3626.41',
848 '71.0.3578.116',
849 '73.0.3656.1',
850 '73.0.3656.0',
851 '72.0.3626.40',
852 '71.0.3578.115',
853 '73.0.3655.1',
854 '73.0.3655.0',
855 '72.0.3626.39',
856 '71.0.3578.114',
857 '73.0.3654.1',
858 '73.0.3654.0',
859 '72.0.3626.38',
860 '71.0.3578.113',
861 '73.0.3653.1',
862 '73.0.3653.0',
863 '72.0.3626.37',
864 '71.0.3578.112',
865 '73.0.3652.1',
866 '73.0.3652.0',
867 '72.0.3626.36',
868 '71.0.3578.111',
869 '73.0.3651.1',
870 '73.0.3651.0',
871 '72.0.3626.35',
872 '71.0.3578.110',
873 '73.0.3650.1',
874 '73.0.3650.0',
875 '72.0.3626.34',
876 '71.0.3578.109',
877 '73.0.3649.1',
878 '73.0.3649.0',
879 '72.0.3626.33',
880 '71.0.3578.108',
881 '73.0.3648.2',
882 '73.0.3648.1',
883 '73.0.3648.0',
884 '72.0.3626.32',
885 '71.0.3578.107',
886 '73.0.3647.2',
887 '73.0.3647.1',
888 '73.0.3647.0',
889 '72.0.3626.31',
890 '71.0.3578.106',
891 '73.0.3635.3',
892 '73.0.3646.2',
893 '73.0.3646.1',
894 '73.0.3646.0',
895 '72.0.3626.30',
896 '71.0.3578.105',
897 '72.0.3626.29',
898 '73.0.3645.2',
899 '73.0.3645.1',
900 '73.0.3645.0',
901 '72.0.3626.28',
902 '71.0.3578.104',
903 '72.0.3626.27',
904 '72.0.3626.26',
905 '72.0.3626.25',
906 '72.0.3626.24',
907 '73.0.3644.0',
908 '73.0.3643.2',
909 '72.0.3626.23',
910 '71.0.3578.103',
911 '73.0.3643.1',
912 '73.0.3643.0',
913 '72.0.3626.22',
914 '71.0.3578.102',
915 '73.0.3642.1',
916 '73.0.3642.0',
917 '72.0.3626.21',
918 '71.0.3578.101',
919 '73.0.3641.1',
920 '73.0.3641.0',
921 '72.0.3626.20',
922 '71.0.3578.100',
923 '72.0.3626.19',
924 '73.0.3640.1',
925 '73.0.3640.0',
926 '72.0.3626.18',
927 '73.0.3639.1',
928 '71.0.3578.99',
929 '73.0.3639.0',
930 '72.0.3626.17',
931 '73.0.3638.2',
932 '72.0.3626.16',
933 '73.0.3638.1',
934 '73.0.3638.0',
935 '72.0.3626.15',
936 '71.0.3578.98',
937 '73.0.3635.2',
938 '71.0.3578.97',
939 '73.0.3637.1',
940 '73.0.3637.0',
941 '72.0.3626.14',
942 '71.0.3578.96',
943 '71.0.3578.95',
944 '72.0.3626.13',
945 '71.0.3578.94',
946 '73.0.3636.2',
947 '71.0.3578.93',
948 '73.0.3636.1',
949 '73.0.3636.0',
950 '72.0.3626.12',
951 '71.0.3578.92',
952 '73.0.3635.1',
953 '73.0.3635.0',
954 '72.0.3626.11',
955 '71.0.3578.91',
956 '73.0.3634.2',
957 '73.0.3634.1',
958 '73.0.3634.0',
959 '72.0.3626.10',
960 '71.0.3578.90',
961 '71.0.3578.89',
962 '73.0.3633.2',
963 '73.0.3633.1',
964 '73.0.3633.0',
965 '72.0.3610.4',
966 '72.0.3626.9',
967 '71.0.3578.88',
968 '73.0.3632.5',
969 '73.0.3632.4',
970 '73.0.3632.3',
971 '73.0.3632.2',
972 '73.0.3632.1',
973 '73.0.3632.0',
974 '72.0.3626.8',
975 '71.0.3578.87',
976 '73.0.3631.2',
977 '73.0.3631.1',
978 '73.0.3631.0',
979 '72.0.3626.7',
980 '71.0.3578.86',
981 '72.0.3626.6',
982 '73.0.3630.1',
983 '73.0.3630.0',
984 '72.0.3626.5',
985 '71.0.3578.85',
986 '72.0.3626.4',
987 '73.0.3628.3',
988 '73.0.3628.2',
989 '73.0.3629.1',
990 '73.0.3629.0',
991 '72.0.3626.3',
992 '71.0.3578.84',
993 '73.0.3628.1',
994 '73.0.3628.0',
995 '71.0.3578.83',
996 '73.0.3627.1',
997 '73.0.3627.0',
998 '72.0.3626.2',
999 '71.0.3578.82',
1000 '71.0.3578.81',
1001 '71.0.3578.80',
1002 '72.0.3626.1',
1003 '72.0.3626.0',
1004 '71.0.3578.79',
1005 '70.0.3538.124',
1006 '71.0.3578.78',
1007 '72.0.3623.4',
1008 '72.0.3625.2',
1009 '72.0.3625.1',
1010 '72.0.3625.0',
1011 '71.0.3578.77',
1012 '70.0.3538.123',
1013 '72.0.3624.4',
1014 '72.0.3624.3',
1015 '72.0.3624.2',
1016 '71.0.3578.76',
1017 '72.0.3624.1',
1018 '72.0.3624.0',
1019 '72.0.3623.3',
1020 '71.0.3578.75',
1021 '70.0.3538.122',
1022 '71.0.3578.74',
1023 '72.0.3623.2',
1024 '72.0.3610.3',
1025 '72.0.3623.1',
1026 '72.0.3623.0',
1027 '72.0.3622.3',
1028 '72.0.3622.2',
1029 '71.0.3578.73',
1030 '70.0.3538.121',
1031 '72.0.3622.1',
1032 '72.0.3622.0',
1033 '71.0.3578.72',
1034 '70.0.3538.120',
1035 '72.0.3621.1',
1036 '72.0.3621.0',
1037 '71.0.3578.71',
1038 '70.0.3538.119',
1039 '72.0.3620.1',
1040 '72.0.3620.0',
1041 '71.0.3578.70',
1042 '70.0.3538.118',
1043 '71.0.3578.69',
1044 '72.0.3619.1',
1045 '72.0.3619.0',
1046 '71.0.3578.68',
1047 '70.0.3538.117',
1048 '71.0.3578.67',
1049 '72.0.3618.1',
1050 '72.0.3618.0',
1051 '71.0.3578.66',
1052 '70.0.3538.116',
1053 '72.0.3617.1',
1054 '72.0.3617.0',
1055 '71.0.3578.65',
1056 '70.0.3538.115',
1057 '72.0.3602.3',
1058 '71.0.3578.64',
1059 '72.0.3616.1',
1060 '72.0.3616.0',
1061 '71.0.3578.63',
1062 '70.0.3538.114',
1063 '71.0.3578.62',
1064 '72.0.3615.1',
1065 '72.0.3615.0',
1066 '71.0.3578.61',
1067 '70.0.3538.113',
1068 '72.0.3614.1',
1069 '72.0.3614.0',
1070 '71.0.3578.60',
1071 '70.0.3538.112',
1072 '72.0.3613.1',
1073 '72.0.3613.0',
1074 '71.0.3578.59',
1075 '70.0.3538.111',
1076 '72.0.3612.2',
1077 '72.0.3612.1',
1078 '72.0.3612.0',
1079 '70.0.3538.110',
1080 '71.0.3578.58',
1081 '70.0.3538.109',
1082 '72.0.3611.2',
1083 '72.0.3611.1',
1084 '72.0.3611.0',
1085 '71.0.3578.57',
1086 '70.0.3538.108',
1087 '72.0.3610.2',
1088 '71.0.3578.56',
1089 '71.0.3578.55',
1090 '72.0.3610.1',
1091 '72.0.3610.0',
1092 '71.0.3578.54',
1093 '70.0.3538.107',
1094 '71.0.3578.53',
1095 '72.0.3609.3',
1096 '71.0.3578.52',
1097 '72.0.3609.2',
1098 '71.0.3578.51',
1099 '72.0.3608.5',
1100 '72.0.3609.1',
1101 '72.0.3609.0',
1102 '71.0.3578.50',
1103 '70.0.3538.106',
1104 '72.0.3608.4',
1105 '72.0.3608.3',
1106 '72.0.3608.2',
1107 '71.0.3578.49',
1108 '72.0.3608.1',
1109 '72.0.3608.0',
1110 '70.0.3538.105',
1111 '71.0.3578.48',
1112 '72.0.3607.1',
1113 '72.0.3607.0',
1114 '71.0.3578.47',
1115 '70.0.3538.104',
1116 '72.0.3606.2',
1117 '72.0.3606.1',
1118 '72.0.3606.0',
1119 '71.0.3578.46',
1120 '70.0.3538.103',
1121 '70.0.3538.102',
1122 '72.0.3605.3',
1123 '72.0.3605.2',
1124 '72.0.3605.1',
1125 '72.0.3605.0',
1126 '71.0.3578.45',
1127 '70.0.3538.101',
1128 '71.0.3578.44',
1129 '71.0.3578.43',
1130 '70.0.3538.100',
1131 '70.0.3538.99',
1132 '71.0.3578.42',
1133 '72.0.3604.1',
1134 '72.0.3604.0',
1135 '71.0.3578.41',
1136 '70.0.3538.98',
1137 '71.0.3578.40',
1138 '72.0.3603.2',
1139 '72.0.3603.1',
1140 '72.0.3603.0',
1141 '71.0.3578.39',
1142 '70.0.3538.97',
1143 '72.0.3602.2',
1144 '71.0.3578.38',
1145 '71.0.3578.37',
1146 '72.0.3602.1',
1147 '72.0.3602.0',
1148 '71.0.3578.36',
1149 '70.0.3538.96',
1150 '72.0.3601.1',
1151 '72.0.3601.0',
1152 '71.0.3578.35',
1153 '70.0.3538.95',
1154 '72.0.3600.1',
1155 '72.0.3600.0',
1156 '71.0.3578.34',
1157 '70.0.3538.94',
1158 '72.0.3599.3',
1159 '72.0.3599.2',
1160 '72.0.3599.1',
1161 '72.0.3599.0',
1162 '71.0.3578.33',
1163 '70.0.3538.93',
1164 '72.0.3598.1',
1165 '72.0.3598.0',
1166 '71.0.3578.32',
1167 '70.0.3538.87',
1168 '72.0.3597.1',
1169 '72.0.3597.0',
1170 '72.0.3596.2',
1171 '71.0.3578.31',
1172 '70.0.3538.86',
1173 '71.0.3578.30',
1174 '71.0.3578.29',
1175 '72.0.3596.1',
1176 '72.0.3596.0',
1177 '71.0.3578.28',
1178 '70.0.3538.85',
1179 '72.0.3595.2',
1180 '72.0.3591.3',
1181 '72.0.3595.1',
1182 '72.0.3595.0',
1183 '71.0.3578.27',
1184 '70.0.3538.84',
1185 '72.0.3594.1',
1186 '72.0.3594.0',
1187 '71.0.3578.26',
1188 '70.0.3538.83',
1189 '72.0.3593.2',
1190 '72.0.3593.1',
1191 '72.0.3593.0',
1192 '71.0.3578.25',
1193 '70.0.3538.82',
1194 '72.0.3589.3',
1195 '72.0.3592.2',
1196 '72.0.3592.1',
1197 '72.0.3592.0',
1198 '71.0.3578.24',
1199 '72.0.3589.2',
1200 '70.0.3538.81',
1201 '70.0.3538.80',
1202 '72.0.3591.2',
1203 '72.0.3591.1',
1204 '72.0.3591.0',
1205 '71.0.3578.23',
1206 '70.0.3538.79',
1207 '71.0.3578.22',
1208 '72.0.3590.1',
1209 '72.0.3590.0',
1210 '71.0.3578.21',
1211 '70.0.3538.78',
1212 '70.0.3538.77',
1213 '72.0.3589.1',
1214 '72.0.3589.0',
1215 '71.0.3578.20',
1216 '70.0.3538.76',
1217 '71.0.3578.19',
1218 '70.0.3538.75',
1219 '72.0.3588.1',
1220 '72.0.3588.0',
1221 '71.0.3578.18',
1222 '70.0.3538.74',
1223 '72.0.3586.2',
1224 '72.0.3587.0',
1225 '71.0.3578.17',
1226 '70.0.3538.73',
1227 '72.0.3586.1',
1228 '72.0.3586.0',
1229 '71.0.3578.16',
1230 '70.0.3538.72',
1231 '72.0.3585.1',
1232 '72.0.3585.0',
1233 '71.0.3578.15',
1234 '70.0.3538.71',
1235 '71.0.3578.14',
1236 '72.0.3584.1',
1237 '72.0.3584.0',
1238 '71.0.3578.13',
1239 '70.0.3538.70',
1240 '72.0.3583.2',
1241 '71.0.3578.12',
1242 '72.0.3583.1',
1243 '72.0.3583.0',
1244 '71.0.3578.11',
1245 '70.0.3538.69',
1246 '71.0.3578.10',
1247 '72.0.3582.0',
1248 '72.0.3581.4',
1249 '71.0.3578.9',
1250 '70.0.3538.67',
1251 '72.0.3581.3',
1252 '72.0.3581.2',
1253 '72.0.3581.1',
1254 '72.0.3581.0',
1255 '71.0.3578.8',
1256 '70.0.3538.66',
1257 '72.0.3580.1',
1258 '72.0.3580.0',
1259 '71.0.3578.7',
1260 '70.0.3538.65',
1261 '71.0.3578.6',
1262 '72.0.3579.1',
1263 '72.0.3579.0',
1264 '71.0.3578.5',
1265 '70.0.3538.64',
1266 '71.0.3578.4',
1267 '71.0.3578.3',
1268 '71.0.3578.2',
1269 '71.0.3578.1',
1270 '71.0.3578.0',
1271 '70.0.3538.63',
1272 '69.0.3497.128',
1273 '70.0.3538.62',
1274 '70.0.3538.61',
1275 '70.0.3538.60',
1276 '70.0.3538.59',
1277 '71.0.3577.1',
1278 '71.0.3577.0',
1279 '70.0.3538.58',
1280 '69.0.3497.127',
1281 '71.0.3576.2',
1282 '71.0.3576.1',
1283 '71.0.3576.0',
1284 '70.0.3538.57',
1285 '70.0.3538.56',
1286 '71.0.3575.2',
1287 '70.0.3538.55',
1288 '69.0.3497.126',
1289 '70.0.3538.54',
1290 '71.0.3575.1',
1291 '71.0.3575.0',
1292 '71.0.3574.1',
1293 '71.0.3574.0',
1294 '70.0.3538.53',
1295 '69.0.3497.125',
1296 '70.0.3538.52',
1297 '71.0.3573.1',
1298 '71.0.3573.0',
1299 '70.0.3538.51',
1300 '69.0.3497.124',
1301 '71.0.3572.1',
1302 '71.0.3572.0',
1303 '70.0.3538.50',
1304 '69.0.3497.123',
1305 '71.0.3571.2',
1306 '70.0.3538.49',
1307 '69.0.3497.122',
1308 '71.0.3571.1',
1309 '71.0.3571.0',
1310 '70.0.3538.48',
1311 '69.0.3497.121',
1312 '71.0.3570.1',
1313 '71.0.3570.0',
1314 '70.0.3538.47',
1315 '69.0.3497.120',
1316 '71.0.3568.2',
1317 '71.0.3569.1',
1318 '71.0.3569.0',
1319 '70.0.3538.46',
1320 '69.0.3497.119',
1321 '70.0.3538.45',
1322 '71.0.3568.1',
1323 '71.0.3568.0',
1324 '70.0.3538.44',
1325 '69.0.3497.118',
1326 '70.0.3538.43',
1327 '70.0.3538.42',
1328 '71.0.3567.1',
1329 '71.0.3567.0',
1330 '70.0.3538.41',
1331 '69.0.3497.117',
1332 '71.0.3566.1',
1333 '71.0.3566.0',
1334 '70.0.3538.40',
1335 '69.0.3497.116',
1336 '71.0.3565.1',
1337 '71.0.3565.0',
1338 '70.0.3538.39',
1339 '69.0.3497.115',
1340 '71.0.3564.1',
1341 '71.0.3564.0',
1342 '70.0.3538.38',
1343 '69.0.3497.114',
1344 '71.0.3563.0',
1345 '71.0.3562.2',
1346 '70.0.3538.37',
1347 '69.0.3497.113',
1348 '70.0.3538.36',
1349 '70.0.3538.35',
1350 '71.0.3562.1',
1351 '71.0.3562.0',
1352 '70.0.3538.34',
1353 '69.0.3497.112',
1354 '70.0.3538.33',
1355 '71.0.3561.1',
1356 '71.0.3561.0',
1357 '70.0.3538.32',
1358 '69.0.3497.111',
1359 '71.0.3559.6',
1360 '71.0.3560.1',
1361 '71.0.3560.0',
1362 '71.0.3559.5',
1363 '71.0.3559.4',
1364 '70.0.3538.31',
1365 '69.0.3497.110',
1366 '71.0.3559.3',
1367 '70.0.3538.30',
1368 '69.0.3497.109',
1369 '71.0.3559.2',
1370 '71.0.3559.1',
1371 '71.0.3559.0',
1372 '70.0.3538.29',
1373 '69.0.3497.108',
1374 '71.0.3558.2',
1375 '71.0.3558.1',
1376 '71.0.3558.0',
1377 '70.0.3538.28',
1378 '69.0.3497.107',
1379 '71.0.3557.2',
1380 '71.0.3557.1',
1381 '71.0.3557.0',
1382 '70.0.3538.27',
1383 '69.0.3497.106',
1384 '71.0.3554.4',
1385 '70.0.3538.26',
1386 '71.0.3556.1',
1387 '71.0.3556.0',
1388 '70.0.3538.25',
1389 '71.0.3554.3',
1390 '69.0.3497.105',
1391 '71.0.3554.2',
1392 '70.0.3538.24',
1393 '69.0.3497.104',
1394 '71.0.3555.2',
1395 '70.0.3538.23',
1396 '71.0.3555.1',
1397 '71.0.3555.0',
1398 '70.0.3538.22',
1399 '69.0.3497.103',
1400 '71.0.3554.1',
1401 '71.0.3554.0',
1402 '70.0.3538.21',
1403 '69.0.3497.102',
1404 '71.0.3553.3',
1405 '70.0.3538.20',
1406 '69.0.3497.101',
1407 '71.0.3553.2',
1408 '69.0.3497.100',
1409 '71.0.3553.1',
1410 '71.0.3553.0',
1411 '70.0.3538.19',
1412 '69.0.3497.99',
1413 '69.0.3497.98',
1414 '69.0.3497.97',
1415 '71.0.3552.6',
1416 '71.0.3552.5',
1417 '71.0.3552.4',
1418 '71.0.3552.3',
1419 '71.0.3552.2',
1420 '71.0.3552.1',
1421 '71.0.3552.0',
1422 '70.0.3538.18',
1423 '69.0.3497.96',
1424 '71.0.3551.3',
1425 '71.0.3551.2',
1426 '71.0.3551.1',
1427 '71.0.3551.0',
1428 '70.0.3538.17',
1429 '69.0.3497.95',
1430 '71.0.3550.3',
1431 '71.0.3550.2',
1432 '71.0.3550.1',
1433 '71.0.3550.0',
1434 '70.0.3538.16',
1435 '69.0.3497.94',
1436 '71.0.3549.1',
1437 '71.0.3549.0',
1438 '70.0.3538.15',
1439 '69.0.3497.93',
1440 '69.0.3497.92',
1441 '71.0.3548.1',
1442 '71.0.3548.0',
1443 '70.0.3538.14',
1444 '69.0.3497.91',
1445 '71.0.3547.1',
1446 '71.0.3547.0',
1447 '70.0.3538.13',
1448 '69.0.3497.90',
1449 '71.0.3546.2',
1450 '69.0.3497.89',
1451 '71.0.3546.1',
1452 '71.0.3546.0',
1453 '70.0.3538.12',
1454 '69.0.3497.88',
1455 '71.0.3545.4',
1456 '71.0.3545.3',
1457 '71.0.3545.2',
1458 '71.0.3545.1',
1459 '71.0.3545.0',
1460 '70.0.3538.11',
1461 '69.0.3497.87',
1462 '71.0.3544.5',
1463 '71.0.3544.4',
1464 '71.0.3544.3',
1465 '71.0.3544.2',
1466 '71.0.3544.1',
1467 '71.0.3544.0',
1468 '69.0.3497.86',
1469 '70.0.3538.10',
1470 '69.0.3497.85',
1471 '70.0.3538.9',
1472 '69.0.3497.84',
1473 '71.0.3543.4',
1474 '70.0.3538.8',
1475 '71.0.3543.3',
1476 '71.0.3543.2',
1477 '71.0.3543.1',
1478 '71.0.3543.0',
1479 '70.0.3538.7',
1480 '69.0.3497.83',
1481 '71.0.3542.2',
1482 '71.0.3542.1',
1483 '71.0.3542.0',
1484 '70.0.3538.6',
1485 '69.0.3497.82',
1486 '69.0.3497.81',
1487 '71.0.3541.1',
1488 '71.0.3541.0',
1489 '70.0.3538.5',
1490 '69.0.3497.80',
1491 '71.0.3540.1',
1492 '71.0.3540.0',
1493 '70.0.3538.4',
1494 '69.0.3497.79',
1495 '70.0.3538.3',
1496 '71.0.3539.1',
1497 '71.0.3539.0',
1498 '69.0.3497.78',
1499 '68.0.3440.134',
1500 '69.0.3497.77',
1501 '70.0.3538.2',
1502 '70.0.3538.1',
1503 '70.0.3538.0',
1504 '69.0.3497.76',
1505 '68.0.3440.133',
1506 '69.0.3497.75',
1507 '70.0.3537.2',
1508 '70.0.3537.1',
1509 '70.0.3537.0',
1510 '69.0.3497.74',
1511 '68.0.3440.132',
1512 '70.0.3536.0',
1513 '70.0.3535.5',
1514 '70.0.3535.4',
1515 '70.0.3535.3',
1516 '69.0.3497.73',
1517 '68.0.3440.131',
1518 '70.0.3532.8',
1519 '70.0.3532.7',
1520 '69.0.3497.72',
1521 '69.0.3497.71',
1522 '70.0.3535.2',
1523 '70.0.3535.1',
1524 '70.0.3535.0',
1525 '69.0.3497.70',
1526 '68.0.3440.130',
1527 '69.0.3497.69',
1528 '68.0.3440.129',
1529 '70.0.3534.4',
1530 '70.0.3534.3',
1531 '70.0.3534.2',
1532 '70.0.3534.1',
1533 '70.0.3534.0',
1534 '69.0.3497.68',
1535 '68.0.3440.128',
1536 '70.0.3533.2',
1537 '70.0.3533.1',
1538 '70.0.3533.0',
1539 '69.0.3497.67',
1540 '68.0.3440.127',
1541 '70.0.3532.6',
1542 '70.0.3532.5',
1543 '70.0.3532.4',
1544 '69.0.3497.66',
1545 '68.0.3440.126',
1546 '70.0.3532.3',
1547 '70.0.3532.2',
1548 '70.0.3532.1',
1549 '69.0.3497.60',
1550 '69.0.3497.65',
1551 '69.0.3497.64',
1552 '70.0.3532.0',
1553 '70.0.3531.0',
1554 '70.0.3530.4',
1555 '70.0.3530.3',
1556 '70.0.3530.2',
1557 '69.0.3497.58',
1558 '68.0.3440.125',
1559 '69.0.3497.57',
1560 '69.0.3497.56',
1561 '69.0.3497.55',
1562 '69.0.3497.54',
1563 '70.0.3530.1',
1564 '70.0.3530.0',
1565 '69.0.3497.53',
1566 '68.0.3440.124',
1567 '69.0.3497.52',
1568 '70.0.3529.3',
1569 '70.0.3529.2',
1570 '70.0.3529.1',
1571 '70.0.3529.0',
1572 '69.0.3497.51',
1573 '70.0.3528.4',
1574 '68.0.3440.123',
1575 '70.0.3528.3',
1576 '70.0.3528.2',
1577 '70.0.3528.1',
1578 '70.0.3528.0',
1579 '69.0.3497.50',
1580 '68.0.3440.122',
1581 '70.0.3527.1',
1582 '70.0.3527.0',
1583 '69.0.3497.49',
1584 '68.0.3440.121',
1585 '70.0.3526.1',
1586 '70.0.3526.0',
1587 '68.0.3440.120',
1588 '69.0.3497.48',
1589 '69.0.3497.47',
1590 '68.0.3440.119',
1591 '68.0.3440.118',
1592 '70.0.3525.5',
1593 '70.0.3525.4',
1594 '70.0.3525.3',
1595 '68.0.3440.117',
1596 '69.0.3497.46',
1597 '70.0.3525.2',
1598 '70.0.3525.1',
1599 '70.0.3525.0',
1600 '69.0.3497.45',
1601 '68.0.3440.116',
1602 '70.0.3524.4',
1603 '70.0.3524.3',
1604 '69.0.3497.44',
1605 '70.0.3524.2',
1606 '70.0.3524.1',
1607 '70.0.3524.0',
1608 '70.0.3523.2',
1609 '69.0.3497.43',
1610 '68.0.3440.115',
1611 '70.0.3505.9',
1612 '69.0.3497.42',
1613 '70.0.3505.8',
1614 '70.0.3523.1',
1615 '70.0.3523.0',
1616 '69.0.3497.41',
1617 '68.0.3440.114',
1618 '70.0.3505.7',
1619 '69.0.3497.40',
1620 '70.0.3522.1',
1621 '70.0.3522.0',
1622 '70.0.3521.2',
1623 '69.0.3497.39',
1624 '68.0.3440.113',
1625 '70.0.3505.6',
1626 '70.0.3521.1',
1627 '70.0.3521.0',
1628 '69.0.3497.38',
1629 '68.0.3440.112',
1630 '70.0.3520.1',
1631 '70.0.3520.0',
1632 '69.0.3497.37',
1633 '68.0.3440.111',
1634 '70.0.3519.3',
1635 '70.0.3519.2',
1636 '70.0.3519.1',
1637 '70.0.3519.0',
1638 '69.0.3497.36',
1639 '68.0.3440.110',
1640 '70.0.3518.1',
1641 '70.0.3518.0',
1642 '69.0.3497.35',
1643 '69.0.3497.34',
1644 '68.0.3440.109',
1645 '70.0.3517.1',
1646 '70.0.3517.0',
1647 '69.0.3497.33',
1648 '68.0.3440.108',
1649 '69.0.3497.32',
1650 '70.0.3516.3',
1651 '70.0.3516.2',
1652 '70.0.3516.1',
1653 '70.0.3516.0',
1654 '69.0.3497.31',
1655 '68.0.3440.107',
1656 '70.0.3515.4',
1657 '68.0.3440.106',
1658 '70.0.3515.3',
1659 '70.0.3515.2',
1660 '70.0.3515.1',
1661 '70.0.3515.0',
1662 '69.0.3497.30',
1663 '68.0.3440.105',
1664 '68.0.3440.104',
1665 '70.0.3514.2',
1666 '70.0.3514.1',
1667 '70.0.3514.0',
1668 '69.0.3497.29',
1669 '68.0.3440.103',
1670 '70.0.3513.1',
1671 '70.0.3513.0',
1672 '69.0.3497.28',
1673 )
1674 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1675
1676
3e669f36 1677std_headers = {
f7a147e3 1678 'User-Agent': random_user_agent(),
59ae15a5
PH
1679 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1680 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1681 'Accept-Encoding': 'gzip, deflate',
1682 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 1683}
f427df17 1684
5f6a1245 1685
fb37eb25
S
1686USER_AGENTS = {
1687 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1688}
1689
1690
bf42a990
S
1691NO_DEFAULT = object()
1692
7105440c
YCH
1693ENGLISH_MONTH_NAMES = [
1694 'January', 'February', 'March', 'April', 'May', 'June',
1695 'July', 'August', 'September', 'October', 'November', 'December']
1696
f6717dec
S
1697MONTH_NAMES = {
1698 'en': ENGLISH_MONTH_NAMES,
1699 'fr': [
3e4185c3
S
1700 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1701 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
f6717dec 1702}
a942d6cb 1703
a7aaa398
S
1704KNOWN_EXTENSIONS = (
1705 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1706 'flv', 'f4v', 'f4a', 'f4b',
1707 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1708 'mkv', 'mka', 'mk3d',
1709 'avi', 'divx',
1710 'mov',
1711 'asf', 'wmv', 'wma',
1712 '3gp', '3g2',
1713 'mp3',
1714 'flac',
1715 'ape',
1716 'wav',
1717 'f4f', 'f4m', 'm3u8', 'smil')
1718
c587cbb7 1719# needed for sanitizing filenames in restricted mode
c8827027 1720ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
fd35d8cd
JW
1721 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1722 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
c587cbb7 1723
46f59e89
S
1724DATE_FORMATS = (
1725 '%d %B %Y',
1726 '%d %b %Y',
1727 '%B %d %Y',
cb655f34
S
1728 '%B %dst %Y',
1729 '%B %dnd %Y',
9d30c213 1730 '%B %drd %Y',
cb655f34 1731 '%B %dth %Y',
46f59e89 1732 '%b %d %Y',
cb655f34
S
1733 '%b %dst %Y',
1734 '%b %dnd %Y',
9d30c213 1735 '%b %drd %Y',
cb655f34 1736 '%b %dth %Y',
46f59e89
S
1737 '%b %dst %Y %I:%M',
1738 '%b %dnd %Y %I:%M',
9d30c213 1739 '%b %drd %Y %I:%M',
46f59e89
S
1740 '%b %dth %Y %I:%M',
1741 '%Y %m %d',
1742 '%Y-%m-%d',
bccdbd22 1743 '%Y.%m.%d.',
46f59e89 1744 '%Y/%m/%d',
81c13222 1745 '%Y/%m/%d %H:%M',
46f59e89 1746 '%Y/%m/%d %H:%M:%S',
1931a55e
THD
1747 '%Y%m%d%H%M',
1748 '%Y%m%d%H%M%S',
0c1c6f4b 1749 '%Y-%m-%d %H:%M',
46f59e89
S
1750 '%Y-%m-%d %H:%M:%S',
1751 '%Y-%m-%d %H:%M:%S.%f',
5014558a 1752 '%Y-%m-%d %H:%M:%S:%f',
46f59e89
S
1753 '%d.%m.%Y %H:%M',
1754 '%d.%m.%Y %H.%M',
1755 '%Y-%m-%dT%H:%M:%SZ',
1756 '%Y-%m-%dT%H:%M:%S.%fZ',
1757 '%Y-%m-%dT%H:%M:%S.%f0Z',
1758 '%Y-%m-%dT%H:%M:%S',
1759 '%Y-%m-%dT%H:%M:%S.%f',
1760 '%Y-%m-%dT%H:%M',
c6eed6b8
S
1761 '%b %d %Y at %H:%M',
1762 '%b %d %Y at %H:%M:%S',
b555ae9b
S
1763 '%B %d %Y at %H:%M',
1764 '%B %d %Y at %H:%M:%S',
a63d9bd0 1765 '%H:%M %d-%b-%Y',
46f59e89
S
1766)
1767
1768DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1769DATE_FORMATS_DAY_FIRST.extend([
1770 '%d-%m-%Y',
1771 '%d.%m.%Y',
1772 '%d.%m.%y',
1773 '%d/%m/%Y',
1774 '%d/%m/%y',
1775 '%d/%m/%Y %H:%M:%S',
1776])
1777
1778DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1779DATE_FORMATS_MONTH_FIRST.extend([
1780 '%m-%d-%Y',
1781 '%m.%d.%Y',
1782 '%m/%d/%Y',
1783 '%m/%d/%y',
1784 '%m/%d/%Y %H:%M:%S',
1785])
1786
06b3fe29 1787PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
22f5f5c6 1788JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
06b3fe29 1789
7105440c 1790
d77c3dfd 1791def preferredencoding():
59ae15a5 1792 """Get preferred encoding.
d77c3dfd 1793
59ae15a5
PH
1794 Returns the best encoding scheme for the system, based on
1795 locale.getpreferredencoding() and some further tweaks.
1796 """
1797 try:
1798 pref = locale.getpreferredencoding()
28e614de 1799 'TEST'.encode(pref)
70a1165b 1800 except Exception:
59ae15a5 1801 pref = 'UTF-8'
bae611f2 1802
59ae15a5 1803 return pref
d77c3dfd 1804
f4bfd65f 1805
181c8655 1806def write_json_file(obj, fn):
1394646a 1807 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 1808
92120217 1809 fn = encodeFilename(fn)
61ee5aeb 1810 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
1811 encoding = get_filesystem_encoding()
1812 # os.path.basename returns a bytes object, but NamedTemporaryFile
1813 # will fail if the filename contains non ascii characters unless we
1814 # use a unicode object
1815 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1816 # the same for os.path.dirname
1817 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1818 else:
1819 path_basename = os.path.basename
1820 path_dirname = os.path.dirname
1821
73159f99
S
1822 args = {
1823 'suffix': '.tmp',
ec5f6016
JMF
1824 'prefix': path_basename(fn) + '.',
1825 'dir': path_dirname(fn),
73159f99
S
1826 'delete': False,
1827 }
1828
181c8655
PH
1829 # In Python 2.x, json.dump expects a bytestream.
1830 # In Python 3.x, it writes to a character stream
1831 if sys.version_info < (3, 0):
73159f99 1832 args['mode'] = 'wb'
181c8655 1833 else:
73159f99
S
1834 args.update({
1835 'mode': 'w',
1836 'encoding': 'utf-8',
1837 })
1838
c86b6142 1839 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
1840
1841 try:
1842 with tf:
6e84b215 1843 json.dump(obj, tf)
1394646a
IK
1844 if sys.platform == 'win32':
1845 # Need to remove existing file on Windows, else os.rename raises
1846 # WindowsError or FileExistsError.
1847 try:
1848 os.unlink(fn)
1849 except OSError:
1850 pass
9cd5f54e
R
1851 try:
1852 mask = os.umask(0)
1853 os.umask(mask)
1854 os.chmod(tf.name, 0o666 & ~mask)
1855 except OSError:
1856 pass
181c8655 1857 os.rename(tf.name, fn)
70a1165b 1858 except Exception:
181c8655
PH
1859 try:
1860 os.remove(tf.name)
1861 except OSError:
1862 pass
1863 raise
1864
1865
1866if sys.version_info >= (2, 7):
ee114368 1867 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 1868 """ Find the xpath xpath[@key=val] """
5d2354f1 1869 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368 1870 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
1871 return node.find(expr)
1872else:
ee114368 1873 def find_xpath_attr(node, xpath, key, val=None):
810c10ba 1874 for f in node.findall(compat_xpath(xpath)):
ee114368
S
1875 if key not in f.attrib:
1876 continue
1877 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
1878 return f
1879 return None
1880
d7e66d39
JMF
1881# On python2.6 the xml.etree.ElementTree.Element methods don't support
1882# the namespace parameter
5f6a1245
JW
1883
1884
d7e66d39
JMF
1885def xpath_with_ns(path, ns_map):
1886 components = [c.split(':') for c in path.split('/')]
1887 replaced = []
1888 for c in components:
1889 if len(c) == 1:
1890 replaced.append(c[0])
1891 else:
1892 ns, tag = c
1893 replaced.append('{%s}%s' % (ns_map[ns], tag))
1894 return '/'.join(replaced)
1895
d77c3dfd 1896
a41fb80c 1897def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745 1898 def _find_xpath(xpath):
810c10ba 1899 return node.find(compat_xpath(xpath))
578c0745
S
1900
1901 if isinstance(xpath, (str, compat_str)):
1902 n = _find_xpath(xpath)
1903 else:
1904 for xp in xpath:
1905 n = _find_xpath(xp)
1906 if n is not None:
1907 break
d74bebd5 1908
8e636da4 1909 if n is None:
bf42a990
S
1910 if default is not NO_DEFAULT:
1911 return default
1912 elif fatal:
bf0ff932
PH
1913 name = xpath if name is None else name
1914 raise ExtractorError('Could not find XML element %s' % name)
1915 else:
1916 return None
a41fb80c
S
1917 return n
1918
1919
1920def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
1921 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1922 if n is None or n == default:
1923 return n
1924 if n.text is None:
1925 if default is not NO_DEFAULT:
1926 return default
1927 elif fatal:
1928 name = xpath if name is None else name
1929 raise ExtractorError('Could not find XML element\'s text %s' % name)
1930 else:
1931 return None
1932 return n.text
a41fb80c
S
1933
1934
1935def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1936 n = find_xpath_attr(node, xpath, key)
1937 if n is None:
1938 if default is not NO_DEFAULT:
1939 return default
1940 elif fatal:
1941 name = '%s[@%s]' % (xpath, key) if name is None else name
1942 raise ExtractorError('Could not find XML attribute %s' % name)
1943 else:
1944 return None
1945 return n.attrib[key]
bf0ff932
PH
1946
1947
9e6dd238 1948def get_element_by_id(id, html):
43e8fafd 1949 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 1950 return get_element_by_attribute('id', id, html)
43e8fafd 1951
12ea2f30 1952
84c237fb 1953def get_element_by_class(class_name, html):
2af12ad9
TC
1954 """Return the content of the first tag with the specified class in the passed HTML document"""
1955 retval = get_elements_by_class(class_name, html)
1956 return retval[0] if retval else None
1957
1958
1959def get_element_by_attribute(attribute, value, html, escape_value=True):
1960 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1961 return retval[0] if retval else None
1962
1963
1964def get_elements_by_class(class_name, html):
1965 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1966 return get_elements_by_attribute(
84c237fb
YCH
1967 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1968 html, escape_value=False)
1969
1970
2af12ad9 1971def get_elements_by_attribute(attribute, value, html, escape_value=True):
43e8fafd 1972 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 1973
84c237fb
YCH
1974 value = re.escape(value) if escape_value else value
1975
2af12ad9
TC
1976 retlist = []
1977 for m in re.finditer(r'''(?xs)
38285056 1978 <([a-zA-Z0-9:._-]+)
609ff8ca 1979 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056 1980 \s+%s=['"]?%s['"]?
609ff8ca 1981 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056
PH
1982 \s*>
1983 (?P<content>.*?)
1984 </\1>
2af12ad9
TC
1985 ''' % (re.escape(attribute), value), html):
1986 res = m.group('content')
38285056 1987
2af12ad9
TC
1988 if res.startswith('"') or res.startswith("'"):
1989 res = res[1:-1]
38285056 1990
2af12ad9 1991 retlist.append(unescapeHTML(res))
a921f407 1992
2af12ad9 1993 return retlist
a921f407 1994
c5229f39 1995
8bb56eee
BF
1996class HTMLAttributeParser(compat_HTMLParser):
1997 """Trivial HTML parser to gather the attributes for a single element"""
b6e0c7d2 1998
8bb56eee 1999 def __init__(self):
c5229f39 2000 self.attrs = {}
8bb56eee
BF
2001 compat_HTMLParser.__init__(self)
2002
2003 def handle_starttag(self, tag, attrs):
2004 self.attrs = dict(attrs)
2005
c5229f39 2006
8bb56eee
BF
2007def extract_attributes(html_element):
2008 """Given a string for an HTML element such as
2009 <el
2010 a="foo" B="bar" c="&98;az" d=boz
2011 empty= noval entity="&amp;"
2012 sq='"' dq="'"
2013 >
2014 Decode and return a dictionary of attributes.
2015 {
2016 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2017 'empty': '', 'noval': None, 'entity': '&',
2018 'sq': '"', 'dq': '\''
2019 }.
2020 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2021 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2022 """
2023 parser = HTMLAttributeParser()
b4a3d461
S
2024 try:
2025 parser.feed(html_element)
2026 parser.close()
2027 # Older Python may throw HTMLParseError in case of malformed HTML
2028 except compat_HTMLParseError:
2029 pass
8bb56eee 2030 return parser.attrs
9e6dd238 2031
c5229f39 2032
9e6dd238 2033def clean_html(html):
59ae15a5 2034 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
2035
2036 if html is None: # Convenience for sanitizing descriptions etc.
2037 return html
2038
59ae15a5
PH
2039 # Newline vs <br />
2040 html = html.replace('\n', ' ')
edd9221c
TF
2041 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2042 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
2043 # Strip html tags
2044 html = re.sub('<.*?>', '', html)
2045 # Replace html entities
2046 html = unescapeHTML(html)
7decf895 2047 return html.strip()
9e6dd238
FV
2048
2049
d77c3dfd 2050def sanitize_open(filename, open_mode):
59ae15a5
PH
2051 """Try to open the given filename, and slightly tweak it if this fails.
2052
2053 Attempts to open the given filename. If this fails, it tries to change
2054 the filename slightly, step by step, until it's either able to open it
2055 or it fails and raises a final exception, like the standard open()
2056 function.
2057
2058 It returns the tuple (stream, definitive_file_name).
2059 """
2060 try:
28e614de 2061 if filename == '-':
59ae15a5
PH
2062 if sys.platform == 'win32':
2063 import msvcrt
2064 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 2065 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
2066 stream = open(encodeFilename(filename), open_mode)
2067 return (stream, filename)
2068 except (IOError, OSError) as err:
f45c185f
PH
2069 if err.errno in (errno.EACCES,):
2070 raise
59ae15a5 2071
f45c185f 2072 # In case of error, try to remove win32 forbidden chars
d55de57b 2073 alt_filename = sanitize_path(filename)
f45c185f
PH
2074 if alt_filename == filename:
2075 raise
2076 else:
2077 # An exception here should be caught in the caller
d55de57b 2078 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 2079 return (stream, alt_filename)
d77c3dfd
FV
2080
2081
2082def timeconvert(timestr):
59ae15a5
PH
2083 """Convert RFC 2822 defined time string into system timestamp"""
2084 timestamp = None
2085 timetuple = email.utils.parsedate_tz(timestr)
2086 if timetuple is not None:
2087 timestamp = email.utils.mktime_tz(timetuple)
2088 return timestamp
1c469a94 2089
5f6a1245 2090
796173d0 2091def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
2092 """Sanitizes a string so it could be used as part of a filename.
2093 If restricted is set, use a stricter subset of allowed characters.
158af524
S
2094 Set is_id if this is not an arbitrary string, but an ID that should be kept
2095 if possible.
59ae15a5
PH
2096 """
2097 def replace_insane(char):
c587cbb7
AT
2098 if restricted and char in ACCENT_CHARS:
2099 return ACCENT_CHARS[char]
59ae15a5
PH
2100 if char == '?' or ord(char) < 32 or ord(char) == 127:
2101 return ''
2102 elif char == '"':
2103 return '' if restricted else '\''
2104 elif char == ':':
2105 return '_-' if restricted else ' -'
2106 elif char in '\\/|*<>':
2107 return '_'
627dcfff 2108 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
2109 return '_'
2110 if restricted and ord(char) > 127:
2111 return '_'
2112 return char
2113
639f1cea 2114 if s == '':
2115 return ''
2aeb06d6
PH
2116 # Handle timestamps
2117 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 2118 result = ''.join(map(replace_insane, s))
796173d0
PH
2119 if not is_id:
2120 while '__' in result:
2121 result = result.replace('__', '_')
2122 result = result.strip('_')
2123 # Common case of "Foreign band name - English song title"
2124 if restricted and result.startswith('-_'):
2125 result = result[2:]
5a42414b
PH
2126 if result.startswith('-'):
2127 result = '_' + result[len('-'):]
a7440261 2128 result = result.lstrip('.')
796173d0
PH
2129 if not result:
2130 result = '_'
59ae15a5 2131 return result
d77c3dfd 2132
5f6a1245 2133
c2934512 2134def sanitize_path(s, force=False):
a2aaf4db 2135 """Sanitizes and normalizes path on Windows"""
c2934512 2136 if sys.platform == 'win32':
c4218ac3 2137 force = False
c2934512 2138 drive_or_unc, _ = os.path.splitdrive(s)
2139 if sys.version_info < (2, 7) and not drive_or_unc:
2140 drive_or_unc, _ = os.path.splitunc(s)
2141 elif force:
2142 drive_or_unc = ''
2143 else:
a2aaf4db 2144 return s
c2934512 2145
be531ef1
S
2146 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2147 if drive_or_unc:
a2aaf4db
S
2148 norm_path.pop(0)
2149 sanitized_path = [
ec85ded8 2150 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
a2aaf4db 2151 for path_part in norm_path]
be531ef1
S
2152 if drive_or_unc:
2153 sanitized_path.insert(0, drive_or_unc + os.path.sep)
c4218ac3 2154 elif force and s[0] == os.path.sep:
2155 sanitized_path.insert(0, os.path.sep)
a2aaf4db
S
2156 return os.path.join(*sanitized_path)
2157
2158
17bcc626 2159def sanitize_url(url):
befa4708
S
2160 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2161 # the number of unwanted failures due to missing protocol
2162 if url.startswith('//'):
2163 return 'http:%s' % url
2164 # Fix some common typos seen so far
2165 COMMON_TYPOS = (
067aa17e 2166 # https://github.com/ytdl-org/youtube-dl/issues/15649
befa4708
S
2167 (r'^httpss://', r'https://'),
2168 # https://bx1.be/lives/direct-tv/
2169 (r'^rmtp([es]?)://', r'rtmp\1://'),
2170 )
2171 for mistake, fixup in COMMON_TYPOS:
2172 if re.match(mistake, url):
2173 return re.sub(mistake, fixup, url)
bc6b9bcd 2174 return url
17bcc626
S
2175
2176
5435dcf9
HH
2177def extract_basic_auth(url):
2178 parts = compat_urlparse.urlsplit(url)
2179 if parts.username is None:
2180 return url, None
2181 url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2182 parts.hostname if parts.port is None
2183 else '%s:%d' % (parts.hostname, parts.port))))
2184 auth_payload = base64.b64encode(
2185 ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2186 return url, 'Basic ' + auth_payload.decode('utf-8')
2187
2188
67dda517 2189def sanitized_Request(url, *args, **kwargs):
bc6b9bcd 2190 url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
5435dcf9
HH
2191 if auth_header is not None:
2192 headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2193 headers['Authorization'] = auth_header
2194 return compat_urllib_request.Request(url, *args, **kwargs)
67dda517
S
2195
2196
51098426
S
2197def expand_path(s):
2198 """Expand shell variables and ~"""
2199 return os.path.expandvars(compat_expanduser(s))
2200
2201
d77c3dfd 2202def orderedSet(iterable):
59ae15a5
PH
2203 """ Remove all duplicates from the input iterable """
2204 res = []
2205 for el in iterable:
2206 if el not in res:
2207 res.append(el)
2208 return res
d77c3dfd 2209
912b38b4 2210
55b2f099 2211def _htmlentity_transform(entity_with_semicolon):
4e408e47 2212 """Transforms an HTML entity to a character."""
55b2f099
YCH
2213 entity = entity_with_semicolon[:-1]
2214
4e408e47
PH
2215 # Known non-numeric HTML entity
2216 if entity in compat_html_entities.name2codepoint:
2217 return compat_chr(compat_html_entities.name2codepoint[entity])
2218
55b2f099
YCH
2219 # TODO: HTML5 allows entities without a semicolon. For example,
2220 # '&Eacuteric' should be decoded as 'Éric'.
2221 if entity_with_semicolon in compat_html_entities_html5:
2222 return compat_html_entities_html5[entity_with_semicolon]
2223
91757b0f 2224 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
2225 if mobj is not None:
2226 numstr = mobj.group(1)
28e614de 2227 if numstr.startswith('x'):
4e408e47 2228 base = 16
28e614de 2229 numstr = '0%s' % numstr
4e408e47
PH
2230 else:
2231 base = 10
067aa17e 2232 # See https://github.com/ytdl-org/youtube-dl/issues/7518
7aefc49c
S
2233 try:
2234 return compat_chr(int(numstr, base))
2235 except ValueError:
2236 pass
4e408e47
PH
2237
2238 # Unknown entity in name, return its literal representation
7a3f0c00 2239 return '&%s;' % entity
4e408e47
PH
2240
2241
d77c3dfd 2242def unescapeHTML(s):
912b38b4
PH
2243 if s is None:
2244 return None
2245 assert type(s) == compat_str
d77c3dfd 2246
4e408e47 2247 return re.sub(
95f3f7c2 2248 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 2249
8bf48f23 2250
cdb19aa4 2251def escapeHTML(text):
2252 return (
2253 text
2254 .replace('&', '&amp;')
2255 .replace('<', '&lt;')
2256 .replace('>', '&gt;')
2257 .replace('"', '&quot;')
2258 .replace("'", '&#39;')
2259 )
2260
2261
f5b1bca9 2262def process_communicate_or_kill(p, *args, **kwargs):
2263 try:
2264 return p.communicate(*args, **kwargs)
2265 except BaseException: # Including KeyboardInterrupt
2266 p.kill()
2267 p.wait()
2268 raise
2269
2270
aa49acd1
S
2271def get_subprocess_encoding():
2272 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2273 # For subprocess calls, encode with locale encoding
2274 # Refer to http://stackoverflow.com/a/9951851/35070
2275 encoding = preferredencoding()
2276 else:
2277 encoding = sys.getfilesystemencoding()
2278 if encoding is None:
2279 encoding = 'utf-8'
2280 return encoding
2281
2282
8bf48f23 2283def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
2284 """
2285 @param s The name of the file
2286 """
d77c3dfd 2287
8bf48f23 2288 assert type(s) == compat_str
d77c3dfd 2289
59ae15a5
PH
2290 # Python 3 has a Unicode API
2291 if sys.version_info >= (3, 0):
2292 return s
0f00efed 2293
aa49acd1
S
2294 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2295 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2296 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2297 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2298 return s
2299
8ee239e9
YCH
2300 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2301 if sys.platform.startswith('java'):
2302 return s
2303
aa49acd1
S
2304 return s.encode(get_subprocess_encoding(), 'ignore')
2305
2306
2307def decodeFilename(b, for_subprocess=False):
2308
2309 if sys.version_info >= (3, 0):
2310 return b
2311
2312 if not isinstance(b, bytes):
2313 return b
2314
2315 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 2316
f07b74fc
PH
2317
2318def encodeArgument(s):
2319 if not isinstance(s, compat_str):
2320 # Legacy code that uses byte strings
2321 # Uncomment the following line after fixing all post processors
7af808a5 2322 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
2323 s = s.decode('ascii')
2324 return encodeFilename(s, True)
2325
2326
aa49acd1
S
2327def decodeArgument(b):
2328 return decodeFilename(b, True)
2329
2330
8271226a
PH
2331def decodeOption(optval):
2332 if optval is None:
2333 return optval
2334 if isinstance(optval, bytes):
2335 optval = optval.decode(preferredencoding())
2336
2337 assert isinstance(optval, compat_str)
2338 return optval
1c256f70 2339
5f6a1245 2340
cdb19aa4 2341def formatSeconds(secs, delim=':', msec=False):
4539dd30 2342 if secs > 3600:
cdb19aa4 2343 ret = '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
4539dd30 2344 elif secs > 60:
cdb19aa4 2345 ret = '%d%s%02d' % (secs // 60, delim, secs % 60)
4539dd30 2346 else:
cdb19aa4 2347 ret = '%d' % secs
2348 return '%s.%03d' % (ret, secs % 1) if msec else ret
4539dd30 2349
a0ddb8a2 2350
be4a824d
PH
2351def make_HTTPS_handler(params, **kwargs):
2352 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 2353 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 2354 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 2355 if opts_no_check_certificate:
be5f2c19 2356 context.check_hostname = False
0db261ba 2357 context.verify_mode = ssl.CERT_NONE
a2366922 2358 try:
be4a824d 2359 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
2360 except TypeError:
2361 # Python 2.7.8
2362 # (create_default_context present but HTTPSHandler has no context=)
2363 pass
2364
2365 if sys.version_info < (3, 2):
d7932313 2366 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 2367 else: # Python < 3.4
d7932313 2368 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 2369 context.verify_mode = (ssl.CERT_NONE
dca08720 2370 if opts_no_check_certificate
ea6d901e 2371 else ssl.CERT_REQUIRED)
303b479e 2372 context.set_default_verify_paths()
be4a824d 2373 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 2374
732ea2f0 2375
5873d4cc 2376def bug_reports_message(before=';'):
08f2a92c 2377 if ytdl_is_updateable():
7a5c1cfe 2378 update_cmd = 'type yt-dlp -U to update'
08f2a92c 2379 else:
7a5c1cfe 2380 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
5873d4cc 2381 msg = 'please report this issue on https://github.com/yt-dlp/yt-dlp .'
08f2a92c 2382 msg += ' Make sure you are using the latest version; %s.' % update_cmd
7a5c1cfe 2383 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
5873d4cc
F
2384
2385 before = before.rstrip()
2386 if not before or before.endswith(('.', '!', '?')):
2387 msg = msg[0].title() + msg[1:]
2388
2389 return (before + ' ' if before else '') + msg
08f2a92c
JMF
2390
2391
bf5b9d85
PM
2392class YoutubeDLError(Exception):
2393 """Base exception for YoutubeDL errors."""
2394 pass
2395
2396
3158150c 2397network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2398if hasattr(ssl, 'CertificateError'):
2399 network_exceptions.append(ssl.CertificateError)
2400network_exceptions = tuple(network_exceptions)
2401
2402
bf5b9d85 2403class ExtractorError(YoutubeDLError):
1c256f70 2404 """Error during info extraction."""
5f6a1245 2405
1151c407 2406 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
9a82b238 2407 """ tb, if given, is the original traceback (so that it can be printed out).
7a5c1cfe 2408 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
9a82b238 2409 """
3158150c 2410 if sys.exc_info()[0] in network_exceptions:
9a82b238 2411 expected = True
d5979c5d 2412
526d74ec 2413 self.msg = str(msg)
1c256f70 2414 self.traceback = tb
1151c407 2415 self.expected = expected
2eabb802 2416 self.cause = cause
d11271dd 2417 self.video_id = video_id
1151c407 2418 self.ie = ie
2419 self.exc_info = sys.exc_info() # preserve original exception
2420
2421 super(ExtractorError, self).__init__(''.join((
2422 format_field(ie, template='[%s] '),
2423 format_field(video_id, template='%s: '),
526d74ec 2424 self.msg,
1151c407 2425 format_field(cause, template=' (caused by %r)'),
2426 '' if expected else bug_reports_message())))
1c256f70 2427
01951dda
PH
2428 def format_traceback(self):
2429 if self.traceback is None:
2430 return None
28e614de 2431 return ''.join(traceback.format_tb(self.traceback))
01951dda 2432
1c256f70 2433
416c7fcb
PH
2434class UnsupportedError(ExtractorError):
2435 def __init__(self, url):
2436 super(UnsupportedError, self).__init__(
2437 'Unsupported URL: %s' % url, expected=True)
2438 self.url = url
2439
2440
55b3e45b
JMF
2441class RegexNotFoundError(ExtractorError):
2442 """Error when a regex didn't match"""
2443 pass
2444
2445
773f291d
S
2446class GeoRestrictedError(ExtractorError):
2447 """Geographic restriction Error exception.
2448
2449 This exception may be thrown when a video is not available from your
2450 geographic location due to geographic restrictions imposed by a website.
2451 """
b6e0c7d2 2452
773f291d
S
2453 def __init__(self, msg, countries=None):
2454 super(GeoRestrictedError, self).__init__(msg, expected=True)
2455 self.msg = msg
2456 self.countries = countries
2457
2458
bf5b9d85 2459class DownloadError(YoutubeDLError):
59ae15a5 2460 """Download Error exception.
d77c3dfd 2461
59ae15a5
PH
2462 This exception may be thrown by FileDownloader objects if they are not
2463 configured to continue on errors. They will contain the appropriate
2464 error message.
2465 """
5f6a1245 2466
8cc83b8d
FV
2467 def __init__(self, msg, exc_info=None):
2468 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2469 super(DownloadError, self).__init__(msg)
2470 self.exc_info = exc_info
d77c3dfd
FV
2471
2472
498f5606 2473class EntryNotInPlaylist(YoutubeDLError):
2474 """Entry not in playlist exception.
2475
2476 This exception will be thrown by YoutubeDL when a requested entry
2477 is not found in the playlist info_dict
2478 """
2479 pass
2480
2481
bf5b9d85 2482class SameFileError(YoutubeDLError):
59ae15a5 2483 """Same File exception.
d77c3dfd 2484
59ae15a5
PH
2485 This exception will be thrown by FileDownloader objects if they detect
2486 multiple files would have to be downloaded to the same file on disk.
2487 """
2488 pass
d77c3dfd
FV
2489
2490
bf5b9d85 2491class PostProcessingError(YoutubeDLError):
59ae15a5 2492 """Post Processing exception.
d77c3dfd 2493
59ae15a5
PH
2494 This exception may be raised by PostProcessor's .run() method to
2495 indicate an error in the postprocessing task.
2496 """
5f6a1245 2497
7851b379 2498 def __init__(self, msg):
bf5b9d85 2499 super(PostProcessingError, self).__init__(msg)
7851b379 2500 self.msg = msg
d77c3dfd 2501
5f6a1245 2502
8b0d7497 2503class ExistingVideoReached(YoutubeDLError):
2504 """ --max-downloads limit has been reached. """
2505 pass
2506
2507
2508class RejectedVideoReached(YoutubeDLError):
2509 """ --max-downloads limit has been reached. """
2510 pass
2511
2512
51d9739f 2513class ThrottledDownload(YoutubeDLError):
2514 """ Download speed below --throttled-rate. """
2515 pass
2516
2517
bf5b9d85 2518class MaxDownloadsReached(YoutubeDLError):
59ae15a5
PH
2519 """ --max-downloads limit has been reached. """
2520 pass
d77c3dfd
FV
2521
2522
bf5b9d85 2523class UnavailableVideoError(YoutubeDLError):
59ae15a5 2524 """Unavailable Format exception.
d77c3dfd 2525
59ae15a5
PH
2526 This exception will be thrown when a video is requested
2527 in a format that is not available for that video.
2528 """
2529 pass
d77c3dfd
FV
2530
2531
bf5b9d85 2532class ContentTooShortError(YoutubeDLError):
59ae15a5 2533 """Content Too Short exception.
d77c3dfd 2534
59ae15a5
PH
2535 This exception may be raised by FileDownloader objects when a file they
2536 download is too small for what the server announced first, indicating
2537 the connection was probably interrupted.
2538 """
d77c3dfd 2539
59ae15a5 2540 def __init__(self, downloaded, expected):
bf5b9d85
PM
2541 super(ContentTooShortError, self).__init__(
2542 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2543 )
2c7ed247 2544 # Both in bytes
59ae15a5
PH
2545 self.downloaded = downloaded
2546 self.expected = expected
d77c3dfd 2547
5f6a1245 2548
bf5b9d85 2549class XAttrMetadataError(YoutubeDLError):
efa97bdc
YCH
2550 def __init__(self, code=None, msg='Unknown error'):
2551 super(XAttrMetadataError, self).__init__(msg)
2552 self.code = code
bd264412 2553 self.msg = msg
efa97bdc
YCH
2554
2555 # Parsing code and msg
3089bc74 2556 if (self.code in (errno.ENOSPC, errno.EDQUOT)
a0566bbf 2557 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
efa97bdc
YCH
2558 self.reason = 'NO_SPACE'
2559 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2560 self.reason = 'VALUE_TOO_LONG'
2561 else:
2562 self.reason = 'NOT_SUPPORTED'
2563
2564
bf5b9d85 2565class XAttrUnavailableError(YoutubeDLError):
efa97bdc
YCH
2566 pass
2567
2568
c5a59d93 2569def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
2570 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2571 # expected HTTP responses to meet HTTP/1.0 or later (see also
067aa17e 2572 # https://github.com/ytdl-org/youtube-dl/issues/6727)
e5e78797 2573 if sys.version_info < (3, 0):
65220c3b
S
2574 kwargs['strict'] = True
2575 hc = http_class(*args, **compat_kwargs(kwargs))
be4a824d 2576 source_address = ydl_handler._params.get('source_address')
8959018a 2577
be4a824d 2578 if source_address is not None:
8959018a
AU
2579 # This is to workaround _create_connection() from socket where it will try all
2580 # address data from getaddrinfo() including IPv6. This filters the result from
2581 # getaddrinfo() based on the source_address value.
2582 # This is based on the cpython socket.create_connection() function.
2583 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2584 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2585 host, port = address
2586 err = None
2587 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
9e21e6d9
S
2588 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2589 ip_addrs = [addr for addr in addrs if addr[0] == af]
2590 if addrs and not ip_addrs:
2591 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2592 raise socket.error(
2593 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2594 % (ip_version, source_address[0]))
8959018a
AU
2595 for res in ip_addrs:
2596 af, socktype, proto, canonname, sa = res
2597 sock = None
2598 try:
2599 sock = socket.socket(af, socktype, proto)
2600 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2601 sock.settimeout(timeout)
2602 sock.bind(source_address)
2603 sock.connect(sa)
2604 err = None # Explicitly break reference cycle
2605 return sock
2606 except socket.error as _:
2607 err = _
2608 if sock is not None:
2609 sock.close()
2610 if err is not None:
2611 raise err
2612 else:
9e21e6d9
S
2613 raise socket.error('getaddrinfo returns an empty list')
2614 if hasattr(hc, '_create_connection'):
2615 hc._create_connection = _create_connection
be4a824d
PH
2616 sa = (source_address, 0)
2617 if hasattr(hc, 'source_address'): # Python 2.7+
2618 hc.source_address = sa
2619 else: # Python 2.6
2620 def _hc_connect(self, *args, **kwargs):
9e21e6d9 2621 sock = _create_connection(
be4a824d
PH
2622 (self.host, self.port), self.timeout, sa)
2623 if is_https:
d7932313
PH
2624 self.sock = ssl.wrap_socket(
2625 sock, self.key_file, self.cert_file,
2626 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
2627 else:
2628 self.sock = sock
2629 hc.connect = functools.partial(_hc_connect, hc)
2630
2631 return hc
2632
2633
87f0e62d 2634def handle_youtubedl_headers(headers):
992fc9d6
YCH
2635 filtered_headers = headers
2636
2637 if 'Youtubedl-no-compression' in filtered_headers:
2638 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 2639 del filtered_headers['Youtubedl-no-compression']
87f0e62d 2640
992fc9d6 2641 return filtered_headers
87f0e62d
YCH
2642
2643
acebc9cd 2644class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
2645 """Handler for HTTP requests and responses.
2646
2647 This class, when installed with an OpenerDirector, automatically adds
2648 the standard headers to every HTTP request and handles gzipped and
2649 deflated responses from web servers. If compression is to be avoided in
2650 a particular request, the original request in the program code only has
0424ec30 2651 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
2652 removed before making the real request.
2653
2654 Part of this code was copied from:
2655
2656 http://techknack.net/python-urllib2-handlers/
2657
2658 Andrew Rowls, the author of that code, agreed to release it to the
2659 public domain.
2660 """
2661
be4a824d
PH
2662 def __init__(self, params, *args, **kwargs):
2663 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2664 self._params = params
2665
2666 def http_open(self, req):
71aff188
YCH
2667 conn_class = compat_http_client.HTTPConnection
2668
2669 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2670 if socks_proxy:
2671 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2672 del req.headers['Ytdl-socks-proxy']
2673
be4a824d 2674 return self.do_open(functools.partial(
71aff188 2675 _create_http_connection, self, conn_class, False),
be4a824d
PH
2676 req)
2677
59ae15a5
PH
2678 @staticmethod
2679 def deflate(data):
fc2119f2 2680 if not data:
2681 return data
59ae15a5
PH
2682 try:
2683 return zlib.decompress(data, -zlib.MAX_WBITS)
2684 except zlib.error:
2685 return zlib.decompress(data)
2686
acebc9cd 2687 def http_request(self, req):
51f267d9
S
2688 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2689 # always respected by websites, some tend to give out URLs with non percent-encoded
2690 # non-ASCII characters (see telemb.py, ard.py [#3412])
2691 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2692 # To work around aforementioned issue we will replace request's original URL with
2693 # percent-encoded one
2694 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2695 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2696 url = req.get_full_url()
2697 url_escaped = escape_url(url)
2698
2699 # Substitute URL if any change after escaping
2700 if url != url_escaped:
15d260eb 2701 req = update_Request(req, url=url_escaped)
51f267d9 2702
33ac271b 2703 for h, v in std_headers.items():
3d5f7a39
JK
2704 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2705 # The dict keys are capitalized because of this bug by urllib
2706 if h.capitalize() not in req.headers:
33ac271b 2707 req.add_header(h, v)
87f0e62d
YCH
2708
2709 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
2710
2711 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2712 # Python 2.6 is brain-dead when it comes to fragments
2713 req._Request__original = req._Request__original.partition('#')[0]
2714 req._Request__r_type = req._Request__r_type.partition('#')[0]
2715
59ae15a5
PH
2716 return req
2717
acebc9cd 2718 def http_response(self, req, resp):
59ae15a5
PH
2719 old_resp = resp
2720 # gzip
2721 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
2722 content = resp.read()
2723 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2724 try:
2725 uncompressed = io.BytesIO(gz.read())
2726 except IOError as original_ioerror:
2727 # There may be junk add the end of the file
2728 # See http://stackoverflow.com/q/4928560/35070 for details
2729 for i in range(1, 1024):
2730 try:
2731 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2732 uncompressed = io.BytesIO(gz.read())
2733 except IOError:
2734 continue
2735 break
2736 else:
2737 raise original_ioerror
b407d853 2738 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2739 resp.msg = old_resp.msg
c047270c 2740 del resp.headers['Content-encoding']
59ae15a5
PH
2741 # deflate
2742 if resp.headers.get('Content-encoding', '') == 'deflate':
2743 gz = io.BytesIO(self.deflate(resp.read()))
b407d853 2744 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2745 resp.msg = old_resp.msg
c047270c 2746 del resp.headers['Content-encoding']
ad729172 2747 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
067aa17e 2748 # https://github.com/ytdl-org/youtube-dl/issues/6457).
5a4d9ddb
S
2749 if 300 <= resp.code < 400:
2750 location = resp.headers.get('Location')
2751 if location:
2752 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2753 if sys.version_info >= (3, 0):
2754 location = location.encode('iso-8859-1').decode('utf-8')
0ea59007
YCH
2755 else:
2756 location = location.decode('utf-8')
5a4d9ddb
S
2757 location_escaped = escape_url(location)
2758 if location != location_escaped:
2759 del resp.headers['Location']
9a4aec8b
YCH
2760 if sys.version_info < (3, 0):
2761 location_escaped = location_escaped.encode('utf-8')
5a4d9ddb 2762 resp.headers['Location'] = location_escaped
59ae15a5 2763 return resp
0f8d03f8 2764
acebc9cd
PH
2765 https_request = http_request
2766 https_response = http_response
bf50b038 2767
5de90176 2768
71aff188
YCH
2769def make_socks_conn_class(base_class, socks_proxy):
2770 assert issubclass(base_class, (
2771 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2772
2773 url_components = compat_urlparse.urlparse(socks_proxy)
2774 if url_components.scheme.lower() == 'socks5':
2775 socks_type = ProxyType.SOCKS5
2776 elif url_components.scheme.lower() in ('socks', 'socks4'):
2777 socks_type = ProxyType.SOCKS4
51fb4995
YCH
2778 elif url_components.scheme.lower() == 'socks4a':
2779 socks_type = ProxyType.SOCKS4A
71aff188 2780
cdd94c2e
YCH
2781 def unquote_if_non_empty(s):
2782 if not s:
2783 return s
2784 return compat_urllib_parse_unquote_plus(s)
2785
71aff188
YCH
2786 proxy_args = (
2787 socks_type,
2788 url_components.hostname, url_components.port or 1080,
2789 True, # Remote DNS
cdd94c2e
YCH
2790 unquote_if_non_empty(url_components.username),
2791 unquote_if_non_empty(url_components.password),
71aff188
YCH
2792 )
2793
2794 class SocksConnection(base_class):
2795 def connect(self):
2796 self.sock = sockssocket()
2797 self.sock.setproxy(*proxy_args)
2798 if type(self.timeout) in (int, float):
2799 self.sock.settimeout(self.timeout)
2800 self.sock.connect((self.host, self.port))
2801
2802 if isinstance(self, compat_http_client.HTTPSConnection):
2803 if hasattr(self, '_context'): # Python > 2.6
2804 self.sock = self._context.wrap_socket(
2805 self.sock, server_hostname=self.host)
2806 else:
2807 self.sock = ssl.wrap_socket(self.sock)
2808
2809 return SocksConnection
2810
2811
be4a824d
PH
2812class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2813 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2814 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2815 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2816 self._params = params
2817
2818 def https_open(self, req):
4f264c02 2819 kwargs = {}
71aff188
YCH
2820 conn_class = self._https_conn_class
2821
4f264c02
JMF
2822 if hasattr(self, '_context'): # python > 2.6
2823 kwargs['context'] = self._context
2824 if hasattr(self, '_check_hostname'): # python 3.x
2825 kwargs['check_hostname'] = self._check_hostname
71aff188
YCH
2826
2827 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2828 if socks_proxy:
2829 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2830 del req.headers['Ytdl-socks-proxy']
2831
be4a824d 2832 return self.do_open(functools.partial(
71aff188 2833 _create_http_connection, self, conn_class, True),
4f264c02 2834 req, **kwargs)
be4a824d
PH
2835
2836
1bab3437 2837class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
f1a8511f
S
2838 """
2839 See [1] for cookie file format.
2840
2841 1. https://curl.haxx.se/docs/http-cookies.html
2842 """
e7e62441 2843 _HTTPONLY_PREFIX = '#HttpOnly_'
c380cc28
S
2844 _ENTRY_LEN = 7
2845 _HEADER = '''# Netscape HTTP Cookie File
7a5c1cfe 2846# This file is generated by yt-dlp. Do not edit.
c380cc28
S
2847
2848'''
2849 _CookieFileEntry = collections.namedtuple(
2850 'CookieFileEntry',
2851 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
e7e62441 2852
1bab3437 2853 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
c380cc28
S
2854 """
2855 Save cookies to a file.
2856
2857 Most of the code is taken from CPython 3.8 and slightly adapted
2858 to support cookie files with UTF-8 in both python 2 and 3.
2859 """
2860 if filename is None:
2861 if self.filename is not None:
2862 filename = self.filename
2863 else:
2864 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2865
1bab3437
S
2866 # Store session cookies with `expires` set to 0 instead of an empty
2867 # string
2868 for cookie in self:
2869 if cookie.expires is None:
2870 cookie.expires = 0
c380cc28
S
2871
2872 with io.open(filename, 'w', encoding='utf-8') as f:
2873 f.write(self._HEADER)
2874 now = time.time()
2875 for cookie in self:
2876 if not ignore_discard and cookie.discard:
2877 continue
2878 if not ignore_expires and cookie.is_expired(now):
2879 continue
2880 if cookie.secure:
2881 secure = 'TRUE'
2882 else:
2883 secure = 'FALSE'
2884 if cookie.domain.startswith('.'):
2885 initial_dot = 'TRUE'
2886 else:
2887 initial_dot = 'FALSE'
2888 if cookie.expires is not None:
2889 expires = compat_str(cookie.expires)
2890 else:
2891 expires = ''
2892 if cookie.value is None:
2893 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2894 # with no name, whereas http.cookiejar regards it as a
2895 # cookie with no value.
2896 name = ''
2897 value = cookie.name
2898 else:
2899 name = cookie.name
2900 value = cookie.value
2901 f.write(
2902 '\t'.join([cookie.domain, initial_dot, cookie.path,
2903 secure, expires, name, value]) + '\n')
1bab3437
S
2904
2905 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
e7e62441 2906 """Load cookies from a file."""
2907 if filename is None:
2908 if self.filename is not None:
2909 filename = self.filename
2910 else:
2911 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2912
c380cc28
S
2913 def prepare_line(line):
2914 if line.startswith(self._HTTPONLY_PREFIX):
2915 line = line[len(self._HTTPONLY_PREFIX):]
2916 # comments and empty lines are fine
2917 if line.startswith('#') or not line.strip():
2918 return line
2919 cookie_list = line.split('\t')
2920 if len(cookie_list) != self._ENTRY_LEN:
2921 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2922 cookie = self._CookieFileEntry(*cookie_list)
2923 if cookie.expires_at and not cookie.expires_at.isdigit():
2924 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2925 return line
2926
e7e62441 2927 cf = io.StringIO()
c380cc28 2928 with io.open(filename, encoding='utf-8') as f:
e7e62441 2929 for line in f:
c380cc28
S
2930 try:
2931 cf.write(prepare_line(line))
2932 except compat_cookiejar.LoadError as e:
2933 write_string(
2934 'WARNING: skipping cookie file entry due to %s: %r\n'
2935 % (e, line), sys.stderr)
2936 continue
e7e62441 2937 cf.seek(0)
2938 self._really_load(cf, filename, ignore_discard, ignore_expires)
1bab3437
S
2939 # Session cookies are denoted by either `expires` field set to
2940 # an empty string or 0. MozillaCookieJar only recognizes the former
2941 # (see [1]). So we need force the latter to be recognized as session
2942 # cookies on our own.
2943 # Session cookies may be important for cookies-based authentication,
2944 # e.g. usually, when user does not check 'Remember me' check box while
2945 # logging in on a site, some important cookies are stored as session
2946 # cookies so that not recognizing them will result in failed login.
2947 # 1. https://bugs.python.org/issue17164
2948 for cookie in self:
2949 # Treat `expires=0` cookies as session cookies
2950 if cookie.expires == 0:
2951 cookie.expires = None
2952 cookie.discard = True
2953
2954
a6420bf5
S
2955class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2956 def __init__(self, cookiejar=None):
2957 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2958
2959 def http_response(self, request, response):
2960 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2961 # characters in Set-Cookie HTTP header of last response (see
067aa17e 2962 # https://github.com/ytdl-org/youtube-dl/issues/6769).
a6420bf5
S
2963 # In order to at least prevent crashing we will percent encode Set-Cookie
2964 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
2965 # if sys.version_info < (3, 0) and response.headers:
2966 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2967 # set_cookie = response.headers.get(set_cookie_header)
2968 # if set_cookie:
2969 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2970 # if set_cookie != set_cookie_escaped:
2971 # del response.headers[set_cookie_header]
2972 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
2973 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2974
f5fa042c 2975 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
a6420bf5
S
2976 https_response = http_response
2977
2978
fca6dba8 2979class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
201c1459 2980 """YoutubeDL redirect handler
2981
2982 The code is based on HTTPRedirectHandler implementation from CPython [1].
2983
2984 This redirect handler solves two issues:
2985 - ensures redirect URL is always unicode under python 2
2986 - introduces support for experimental HTTP response status code
2987 308 Permanent Redirect [2] used by some sites [3]
2988
2989 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
2990 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
2991 3. https://github.com/ytdl-org/youtube-dl/issues/28768
2992 """
2993
2994 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
2995
2996 def redirect_request(self, req, fp, code, msg, headers, newurl):
2997 """Return a Request or None in response to a redirect.
2998
2999 This is called by the http_error_30x methods when a
3000 redirection response is received. If a redirection should
3001 take place, return a new Request to allow http_error_30x to
3002 perform the redirect. Otherwise, raise HTTPError if no-one
3003 else should try to handle this url. Return None if you can't
3004 but another Handler might.
3005 """
3006 m = req.get_method()
3007 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
3008 or code in (301, 302, 303) and m == "POST")):
3009 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
3010 # Strictly (according to RFC 2616), 301 or 302 in response to
3011 # a POST MUST NOT cause a redirection without confirmation
3012 # from the user (of urllib.request, in this case). In practice,
3013 # essentially all clients do redirect in this case, so we do
3014 # the same.
3015
3016 # On python 2 urlh.geturl() may sometimes return redirect URL
3017 # as byte string instead of unicode. This workaround allows
3018 # to force it always return unicode.
3019 if sys.version_info[0] < 3:
3020 newurl = compat_str(newurl)
3021
3022 # Be conciliant with URIs containing a space. This is mainly
3023 # redundant with the more complete encoding done in http_error_302(),
3024 # but it is kept for compatibility with other callers.
3025 newurl = newurl.replace(' ', '%20')
3026
3027 CONTENT_HEADERS = ("content-length", "content-type")
3028 # NB: don't use dict comprehension for python 2.6 compatibility
3029 newheaders = dict((k, v) for k, v in req.headers.items()
3030 if k.lower() not in CONTENT_HEADERS)
3031 return compat_urllib_request.Request(
3032 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3033 unverifiable=True)
fca6dba8
S
3034
3035
46f59e89
S
3036def extract_timezone(date_str):
3037 m = re.search(
f137e4c2 3038 r'''(?x)
3039 ^.{8,}? # >=8 char non-TZ prefix, if present
3040 (?P<tz>Z| # just the UTC Z, or
3041 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
3042 (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
3043 [ ]? # optional space
3044 (?P<sign>\+|-) # +/-
3045 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
3046 $)
3047 ''', date_str)
46f59e89
S
3048 if not m:
3049 timezone = datetime.timedelta()
3050 else:
3051 date_str = date_str[:-len(m.group('tz'))]
3052 if not m.group('sign'):
3053 timezone = datetime.timedelta()
3054 else:
3055 sign = 1 if m.group('sign') == '+' else -1
3056 timezone = datetime.timedelta(
3057 hours=sign * int(m.group('hours')),
3058 minutes=sign * int(m.group('minutes')))
3059 return timezone, date_str
3060
3061
08b38d54 3062def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
3063 """ Return a UNIX timestamp from the given date """
3064
3065 if date_str is None:
3066 return None
3067
52c3a6e4
S
3068 date_str = re.sub(r'\.[0-9]+', '', date_str)
3069
08b38d54 3070 if timezone is None:
46f59e89
S
3071 timezone, date_str = extract_timezone(date_str)
3072
52c3a6e4
S
3073 try:
3074 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3075 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3076 return calendar.timegm(dt.timetuple())
3077 except ValueError:
3078 pass
912b38b4
PH
3079
3080
46f59e89
S
3081def date_formats(day_first=True):
3082 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3083
3084
42bdd9d0 3085def unified_strdate(date_str, day_first=True):
bf50b038 3086 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
3087
3088 if date_str is None:
3089 return None
bf50b038 3090 upload_date = None
5f6a1245 3091 # Replace commas
026fcc04 3092 date_str = date_str.replace(',', ' ')
42bdd9d0 3093 # Remove AM/PM + timezone
9bb8e0a3 3094 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89 3095 _, date_str = extract_timezone(date_str)
42bdd9d0 3096
46f59e89 3097 for expression in date_formats(day_first):
bf50b038
JMF
3098 try:
3099 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 3100 except ValueError:
bf50b038 3101 pass
42393ce2
PH
3102 if upload_date is None:
3103 timetuple = email.utils.parsedate_tz(date_str)
3104 if timetuple:
c6b9cf05
S
3105 try:
3106 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3107 except ValueError:
3108 pass
6a750402
JMF
3109 if upload_date is not None:
3110 return compat_str(upload_date)
bf50b038 3111
5f6a1245 3112
46f59e89
S
3113def unified_timestamp(date_str, day_first=True):
3114 if date_str is None:
3115 return None
3116
2ae2ffda 3117 date_str = re.sub(r'[,|]', '', date_str)
46f59e89 3118
7dc2a74e 3119 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89
S
3120 timezone, date_str = extract_timezone(date_str)
3121
3122 # Remove AM/PM + timezone
3123 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3124
deef3195
S
3125 # Remove unrecognized timezones from ISO 8601 alike timestamps
3126 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3127 if m:
3128 date_str = date_str[:-len(m.group('tz'))]
3129
f226880c
PH
3130 # Python only supports microseconds, so remove nanoseconds
3131 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3132 if m:
3133 date_str = m.group(1)
3134
46f59e89
S
3135 for expression in date_formats(day_first):
3136 try:
7dc2a74e 3137 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
46f59e89
S
3138 return calendar.timegm(dt.timetuple())
3139 except ValueError:
3140 pass
3141 timetuple = email.utils.parsedate_tz(date_str)
3142 if timetuple:
7dc2a74e 3143 return calendar.timegm(timetuple) + pm_delta * 3600
46f59e89
S
3144
3145
28e614de 3146def determine_ext(url, default_ext='unknown_video'):
85750f89 3147 if url is None or '.' not in url:
f4776371 3148 return default_ext
9cb9a5df 3149 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
3150 if re.match(r'^[A-Za-z0-9]+$', guess):
3151 return guess
a7aaa398
S
3152 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3153 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 3154 return guess.rstrip('/')
73e79f2a 3155 else:
cbdbb766 3156 return default_ext
73e79f2a 3157
5f6a1245 3158
824fa511
S
3159def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3160 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
d4051a8e 3161
5f6a1245 3162
9e62f283 3163def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
37254abc
JMF
3164 """
3165 Return a datetime object from a string in the format YYYYMMDD or
9e62f283 3166 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3167
3168 format: string date format used to return datetime object from
3169 precision: round the time portion of a datetime object.
3170 auto|microsecond|second|minute|hour|day.
3171 auto: round to the unit provided in date_str (if applicable).
3172 """
3173 auto_precision = False
3174 if precision == 'auto':
3175 auto_precision = True
3176 precision = 'microsecond'
3177 today = datetime_round(datetime.datetime.now(), precision)
f8795e10 3178 if date_str in ('now', 'today'):
37254abc 3179 return today
f8795e10
PH
3180 if date_str == 'yesterday':
3181 return today - datetime.timedelta(days=1)
9e62f283 3182 match = re.match(
3183 r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3184 date_str)
37254abc 3185 if match is not None:
9e62f283 3186 start_time = datetime_from_str(match.group('start'), precision, format)
3187 time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
37254abc 3188 unit = match.group('unit')
9e62f283 3189 if unit == 'month' or unit == 'year':
3190 new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
37254abc 3191 unit = 'day'
9e62f283 3192 else:
3193 if unit == 'week':
3194 unit = 'day'
3195 time *= 7
3196 delta = datetime.timedelta(**{unit + 's': time})
3197 new_date = start_time + delta
3198 if auto_precision:
3199 return datetime_round(new_date, unit)
3200 return new_date
3201
3202 return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3203
3204
3205def date_from_str(date_str, format='%Y%m%d'):
3206 """
3207 Return a datetime object from a string in the format YYYYMMDD or
3208 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3209
3210 format: string date format used to return datetime object from
3211 """
3212 return datetime_from_str(date_str, precision='microsecond', format=format).date()
3213
3214
3215def datetime_add_months(dt, months):
3216 """Increment/Decrement a datetime object by months."""
3217 month = dt.month + months - 1
3218 year = dt.year + month // 12
3219 month = month % 12 + 1
3220 day = min(dt.day, calendar.monthrange(year, month)[1])
3221 return dt.replace(year, month, day)
3222
3223
3224def datetime_round(dt, precision='day'):
3225 """
3226 Round a datetime object's time to a specific precision
3227 """
3228 if precision == 'microsecond':
3229 return dt
3230
3231 unit_seconds = {
3232 'day': 86400,
3233 'hour': 3600,
3234 'minute': 60,
3235 'second': 1,
3236 }
3237 roundto = lambda x, n: ((x + n / 2) // n) * n
3238 timestamp = calendar.timegm(dt.timetuple())
3239 return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
5f6a1245
JW
3240
3241
e63fc1be 3242def hyphenate_date(date_str):
3243 """
3244 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3245 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3246 if match is not None:
3247 return '-'.join(match.groups())
3248 else:
3249 return date_str
3250
5f6a1245 3251
bd558525
JMF
3252class DateRange(object):
3253 """Represents a time interval between two dates"""
5f6a1245 3254
bd558525
JMF
3255 def __init__(self, start=None, end=None):
3256 """start and end must be strings in the format accepted by date"""
3257 if start is not None:
3258 self.start = date_from_str(start)
3259 else:
3260 self.start = datetime.datetime.min.date()
3261 if end is not None:
3262 self.end = date_from_str(end)
3263 else:
3264 self.end = datetime.datetime.max.date()
37254abc 3265 if self.start > self.end:
bd558525 3266 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 3267
bd558525
JMF
3268 @classmethod
3269 def day(cls, day):
3270 """Returns a range that only contains the given day"""
5f6a1245
JW
3271 return cls(day, day)
3272
bd558525
JMF
3273 def __contains__(self, date):
3274 """Check if the date is in the range"""
37254abc
JMF
3275 if not isinstance(date, datetime.date):
3276 date = date_from_str(date)
3277 return self.start <= date <= self.end
5f6a1245 3278
bd558525 3279 def __str__(self):
5f6a1245 3280 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
3281
3282
3283def platform_name():
3284 """ Returns the platform name as a compat_str """
3285 res = platform.platform()
3286 if isinstance(res, bytes):
3287 res = res.decode(preferredencoding())
3288
3289 assert isinstance(res, compat_str)
3290 return res
c257baff
PH
3291
3292
b58ddb32
PH
3293def _windows_write_string(s, out):
3294 """ Returns True if the string was written using special methods,
3295 False if it has yet to be written out."""
3296 # Adapted from http://stackoverflow.com/a/3259271/35070
3297
3298 import ctypes
3299 import ctypes.wintypes
3300
3301 WIN_OUTPUT_IDS = {
3302 1: -11,
3303 2: -12,
3304 }
3305
a383a98a
PH
3306 try:
3307 fileno = out.fileno()
3308 except AttributeError:
3309 # If the output stream doesn't have a fileno, it's virtual
3310 return False
aa42e873
PH
3311 except io.UnsupportedOperation:
3312 # Some strange Windows pseudo files?
3313 return False
b58ddb32
PH
3314 if fileno not in WIN_OUTPUT_IDS:
3315 return False
3316
d7cd9a9e 3317 GetStdHandle = compat_ctypes_WINFUNCTYPE(
b58ddb32 3318 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
d7cd9a9e 3319 ('GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
3320 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3321
d7cd9a9e 3322 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3323 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3324 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
d7cd9a9e 3325 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
3326 written = ctypes.wintypes.DWORD(0)
3327
d7cd9a9e 3328 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
3329 FILE_TYPE_CHAR = 0x0002
3330 FILE_TYPE_REMOTE = 0x8000
d7cd9a9e 3331 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3332 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3333 ctypes.POINTER(ctypes.wintypes.DWORD))(
d7cd9a9e 3334 ('GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
3335 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3336
3337 def not_a_console(handle):
3338 if handle == INVALID_HANDLE_VALUE or handle is None:
3339 return True
3089bc74
S
3340 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3341 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
3342
3343 if not_a_console(h):
3344 return False
3345
d1b9c912
PH
3346 def next_nonbmp_pos(s):
3347 try:
3348 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3349 except StopIteration:
3350 return len(s)
3351
3352 while s:
3353 count = min(next_nonbmp_pos(s), 1024)
3354
b58ddb32 3355 ret = WriteConsoleW(
d1b9c912 3356 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
3357 if ret == 0:
3358 raise OSError('Failed to write string')
d1b9c912
PH
3359 if not count: # We just wrote a non-BMP character
3360 assert written.value == 2
3361 s = s[1:]
3362 else:
3363 assert written.value > 0
3364 s = s[written.value:]
b58ddb32
PH
3365 return True
3366
3367
734f90bb 3368def write_string(s, out=None, encoding=None):
7459e3a2
PH
3369 if out is None:
3370 out = sys.stderr
8bf48f23 3371 assert type(s) == compat_str
7459e3a2 3372
b58ddb32
PH
3373 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3374 if _windows_write_string(s, out):
3375 return
3376
3089bc74
S
3377 if ('b' in getattr(out, 'mode', '')
3378 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
3379 byt = s.encode(encoding or preferredencoding(), 'ignore')
3380 out.write(byt)
3381 elif hasattr(out, 'buffer'):
3382 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3383 byt = s.encode(enc, 'ignore')
3384 out.buffer.write(byt)
3385 else:
8bf48f23 3386 out.write(s)
7459e3a2
PH
3387 out.flush()
3388
3389
48ea9cea
PH
3390def bytes_to_intlist(bs):
3391 if not bs:
3392 return []
3393 if isinstance(bs[0], int): # Python 3
3394 return list(bs)
3395 else:
3396 return [ord(c) for c in bs]
3397
c257baff 3398
cba892fa 3399def intlist_to_bytes(xs):
3400 if not xs:
3401 return b''
edaa23f8 3402 return compat_struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
3403
3404
c1c9a79c
PH
3405# Cross-platform file locking
3406if sys.platform == 'win32':
3407 import ctypes.wintypes
3408 import msvcrt
3409
3410 class OVERLAPPED(ctypes.Structure):
3411 _fields_ = [
3412 ('Internal', ctypes.wintypes.LPVOID),
3413 ('InternalHigh', ctypes.wintypes.LPVOID),
3414 ('Offset', ctypes.wintypes.DWORD),
3415 ('OffsetHigh', ctypes.wintypes.DWORD),
3416 ('hEvent', ctypes.wintypes.HANDLE),
3417 ]
3418
3419 kernel32 = ctypes.windll.kernel32
3420 LockFileEx = kernel32.LockFileEx
3421 LockFileEx.argtypes = [
3422 ctypes.wintypes.HANDLE, # hFile
3423 ctypes.wintypes.DWORD, # dwFlags
3424 ctypes.wintypes.DWORD, # dwReserved
3425 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3426 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3427 ctypes.POINTER(OVERLAPPED) # Overlapped
3428 ]
3429 LockFileEx.restype = ctypes.wintypes.BOOL
3430 UnlockFileEx = kernel32.UnlockFileEx
3431 UnlockFileEx.argtypes = [
3432 ctypes.wintypes.HANDLE, # hFile
3433 ctypes.wintypes.DWORD, # dwReserved
3434 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3435 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3436 ctypes.POINTER(OVERLAPPED) # Overlapped
3437 ]
3438 UnlockFileEx.restype = ctypes.wintypes.BOOL
3439 whole_low = 0xffffffff
3440 whole_high = 0x7fffffff
3441
3442 def _lock_file(f, exclusive):
3443 overlapped = OVERLAPPED()
3444 overlapped.Offset = 0
3445 overlapped.OffsetHigh = 0
3446 overlapped.hEvent = 0
3447 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3448 handle = msvcrt.get_osfhandle(f.fileno())
3449 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3450 whole_low, whole_high, f._lock_file_overlapped_p):
3451 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3452
3453 def _unlock_file(f):
3454 assert f._lock_file_overlapped_p
3455 handle = msvcrt.get_osfhandle(f.fileno())
3456 if not UnlockFileEx(handle, 0,
3457 whole_low, whole_high, f._lock_file_overlapped_p):
3458 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3459
3460else:
399a76e6
YCH
3461 # Some platforms, such as Jython, is missing fcntl
3462 try:
3463 import fcntl
c1c9a79c 3464
399a76e6
YCH
3465 def _lock_file(f, exclusive):
3466 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 3467
399a76e6
YCH
3468 def _unlock_file(f):
3469 fcntl.flock(f, fcntl.LOCK_UN)
3470 except ImportError:
3471 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3472
3473 def _lock_file(f, exclusive):
3474 raise IOError(UNSUPPORTED_MSG)
3475
3476 def _unlock_file(f):
3477 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
3478
3479
3480class locked_file(object):
3481 def __init__(self, filename, mode, encoding=None):
3482 assert mode in ['r', 'a', 'w']
3483 self.f = io.open(filename, mode, encoding=encoding)
3484 self.mode = mode
3485
3486 def __enter__(self):
3487 exclusive = self.mode != 'r'
3488 try:
3489 _lock_file(self.f, exclusive)
3490 except IOError:
3491 self.f.close()
3492 raise
3493 return self
3494
3495 def __exit__(self, etype, value, traceback):
3496 try:
3497 _unlock_file(self.f)
3498 finally:
3499 self.f.close()
3500
3501 def __iter__(self):
3502 return iter(self.f)
3503
3504 def write(self, *args):
3505 return self.f.write(*args)
3506
3507 def read(self, *args):
3508 return self.f.read(*args)
4eb7f1d1
JMF
3509
3510
4644ac55
S
3511def get_filesystem_encoding():
3512 encoding = sys.getfilesystemencoding()
3513 return encoding if encoding is not None else 'utf-8'
3514
3515
4eb7f1d1 3516def shell_quote(args):
a6a173c2 3517 quoted_args = []
4644ac55 3518 encoding = get_filesystem_encoding()
a6a173c2
JMF
3519 for a in args:
3520 if isinstance(a, bytes):
3521 # We may get a filename encoded with 'encodeFilename'
3522 a = a.decode(encoding)
aefce8e6 3523 quoted_args.append(compat_shlex_quote(a))
28e614de 3524 return ' '.join(quoted_args)
9d4660ca
PH
3525
3526
3527def smuggle_url(url, data):
3528 """ Pass additional data in a URL for internal use. """
3529
81953d1a
RA
3530 url, idata = unsmuggle_url(url, {})
3531 data.update(idata)
15707c7e 3532 sdata = compat_urllib_parse_urlencode(
28e614de
PH
3533 {'__youtubedl_smuggle': json.dumps(data)})
3534 return url + '#' + sdata
9d4660ca
PH
3535
3536
79f82953 3537def unsmuggle_url(smug_url, default=None):
83e865a3 3538 if '#__youtubedl_smuggle' not in smug_url:
79f82953 3539 return smug_url, default
28e614de
PH
3540 url, _, sdata = smug_url.rpartition('#')
3541 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
3542 data = json.loads(jsond)
3543 return url, data
02dbf93f
PH
3544
3545
02dbf93f
PH
3546def format_bytes(bytes):
3547 if bytes is None:
28e614de 3548 return 'N/A'
02dbf93f
PH
3549 if type(bytes) is str:
3550 bytes = float(bytes)
3551 if bytes == 0.0:
3552 exponent = 0
3553 else:
3554 exponent = int(math.log(bytes, 1024.0))
28e614de 3555 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 3556 converted = float(bytes) / float(1024 ** exponent)
28e614de 3557 return '%.2f%s' % (converted, suffix)
f53c966a 3558
1c088fa8 3559
fb47597b
S
3560def lookup_unit_table(unit_table, s):
3561 units_re = '|'.join(re.escape(u) for u in unit_table)
3562 m = re.match(
782b1b5b 3563 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
fb47597b
S
3564 if not m:
3565 return None
3566 num_str = m.group('num').replace(',', '.')
3567 mult = unit_table[m.group('unit')]
3568 return int(float(num_str) * mult)
3569
3570
be64b5b0
PH
3571def parse_filesize(s):
3572 if s is None:
3573 return None
3574
dfb1b146 3575 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
3576 # but we support those too
3577 _UNIT_TABLE = {
3578 'B': 1,
3579 'b': 1,
70852b47 3580 'bytes': 1,
be64b5b0
PH
3581 'KiB': 1024,
3582 'KB': 1000,
3583 'kB': 1024,
3584 'Kb': 1000,
13585d76 3585 'kb': 1000,
70852b47
YCH
3586 'kilobytes': 1000,
3587 'kibibytes': 1024,
be64b5b0
PH
3588 'MiB': 1024 ** 2,
3589 'MB': 1000 ** 2,
3590 'mB': 1024 ** 2,
3591 'Mb': 1000 ** 2,
13585d76 3592 'mb': 1000 ** 2,
70852b47
YCH
3593 'megabytes': 1000 ** 2,
3594 'mebibytes': 1024 ** 2,
be64b5b0
PH
3595 'GiB': 1024 ** 3,
3596 'GB': 1000 ** 3,
3597 'gB': 1024 ** 3,
3598 'Gb': 1000 ** 3,
13585d76 3599 'gb': 1000 ** 3,
70852b47
YCH
3600 'gigabytes': 1000 ** 3,
3601 'gibibytes': 1024 ** 3,
be64b5b0
PH
3602 'TiB': 1024 ** 4,
3603 'TB': 1000 ** 4,
3604 'tB': 1024 ** 4,
3605 'Tb': 1000 ** 4,
13585d76 3606 'tb': 1000 ** 4,
70852b47
YCH
3607 'terabytes': 1000 ** 4,
3608 'tebibytes': 1024 ** 4,
be64b5b0
PH
3609 'PiB': 1024 ** 5,
3610 'PB': 1000 ** 5,
3611 'pB': 1024 ** 5,
3612 'Pb': 1000 ** 5,
13585d76 3613 'pb': 1000 ** 5,
70852b47
YCH
3614 'petabytes': 1000 ** 5,
3615 'pebibytes': 1024 ** 5,
be64b5b0
PH
3616 'EiB': 1024 ** 6,
3617 'EB': 1000 ** 6,
3618 'eB': 1024 ** 6,
3619 'Eb': 1000 ** 6,
13585d76 3620 'eb': 1000 ** 6,
70852b47
YCH
3621 'exabytes': 1000 ** 6,
3622 'exbibytes': 1024 ** 6,
be64b5b0
PH
3623 'ZiB': 1024 ** 7,
3624 'ZB': 1000 ** 7,
3625 'zB': 1024 ** 7,
3626 'Zb': 1000 ** 7,
13585d76 3627 'zb': 1000 ** 7,
70852b47
YCH
3628 'zettabytes': 1000 ** 7,
3629 'zebibytes': 1024 ** 7,
be64b5b0
PH
3630 'YiB': 1024 ** 8,
3631 'YB': 1000 ** 8,
3632 'yB': 1024 ** 8,
3633 'Yb': 1000 ** 8,
13585d76 3634 'yb': 1000 ** 8,
70852b47
YCH
3635 'yottabytes': 1000 ** 8,
3636 'yobibytes': 1024 ** 8,
be64b5b0
PH
3637 }
3638
fb47597b
S
3639 return lookup_unit_table(_UNIT_TABLE, s)
3640
3641
3642def parse_count(s):
3643 if s is None:
be64b5b0
PH
3644 return None
3645
fb47597b
S
3646 s = s.strip()
3647
3648 if re.match(r'^[\d,.]+$', s):
3649 return str_to_int(s)
3650
3651 _UNIT_TABLE = {
3652 'k': 1000,
3653 'K': 1000,
3654 'm': 1000 ** 2,
3655 'M': 1000 ** 2,
3656 'kk': 1000 ** 2,
3657 'KK': 1000 ** 2,
3658 }
be64b5b0 3659
fb47597b 3660 return lookup_unit_table(_UNIT_TABLE, s)
be64b5b0 3661
2f7ae819 3662
b871d7e9
S
3663def parse_resolution(s):
3664 if s is None:
3665 return {}
3666
3667 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3668 if mobj:
3669 return {
3670 'width': int(mobj.group('w')),
3671 'height': int(mobj.group('h')),
3672 }
3673
3674 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3675 if mobj:
3676 return {'height': int(mobj.group(1))}
3677
3678 mobj = re.search(r'\b([48])[kK]\b', s)
3679 if mobj:
3680 return {'height': int(mobj.group(1)) * 540}
3681
3682 return {}
3683
3684
0dc41787
S
3685def parse_bitrate(s):
3686 if not isinstance(s, compat_str):
3687 return
3688 mobj = re.search(r'\b(\d+)\s*kbps', s)
3689 if mobj:
3690 return int(mobj.group(1))
3691
3692
a942d6cb 3693def month_by_name(name, lang='en'):
caefb1de
PH
3694 """ Return the number of a month by (locale-independently) English name """
3695
f6717dec 3696 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb 3697
caefb1de 3698 try:
f6717dec 3699 return month_names.index(name) + 1
7105440c
YCH
3700 except ValueError:
3701 return None
3702
3703
3704def month_by_abbreviation(abbrev):
3705 """ Return the number of a month by (locale-independently) English
3706 abbreviations """
3707
3708 try:
3709 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
3710 except ValueError:
3711 return None
18258362
JMF
3712
3713
5aafe895 3714def fix_xml_ampersands(xml_str):
18258362 3715 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
3716 return re.sub(
3717 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 3718 '&amp;',
5aafe895 3719 xml_str)
e3946f98
PH
3720
3721
3722def setproctitle(title):
8bf48f23 3723 assert isinstance(title, compat_str)
c1c05c67
YCH
3724
3725 # ctypes in Jython is not complete
3726 # http://bugs.jython.org/issue2148
3727 if sys.platform.startswith('java'):
3728 return
3729
e3946f98 3730 try:
611c1dd9 3731 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
3732 except OSError:
3733 return
2f49bcd6
RC
3734 except TypeError:
3735 # LoadLibrary in Windows Python 2.7.13 only expects
3736 # a bytestring, but since unicode_literals turns
3737 # every string into a unicode string, it fails.
3738 return
6eefe533
PH
3739 title_bytes = title.encode('utf-8')
3740 buf = ctypes.create_string_buffer(len(title_bytes))
3741 buf.value = title_bytes
e3946f98 3742 try:
6eefe533 3743 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
3744 except AttributeError:
3745 return # Strange libc, just skip this
d7dda168
PH
3746
3747
3748def remove_start(s, start):
46bc9b7d 3749 return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174
PH
3750
3751
2b9faf55 3752def remove_end(s, end):
46bc9b7d 3753 return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55
PH
3754
3755
31b2051e
S
3756def remove_quotes(s):
3757 if s is None or len(s) < 2:
3758 return s
3759 for quote in ('"', "'", ):
3760 if s[0] == quote and s[-1] == quote:
3761 return s[1:-1]
3762 return s
3763
3764
b6e0c7d2
U
3765def get_domain(url):
3766 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3767 return domain.group('domain') if domain else None
3768
3769
29eb5174 3770def url_basename(url):
9b8aaeed 3771 path = compat_urlparse.urlparse(url).path
28e614de 3772 return path.strip('/').split('/')[-1]
aa94a6d3
PH
3773
3774
02dc0a36
S
3775def base_url(url):
3776 return re.match(r'https?://[^?#&]+/', url).group()
3777
3778
e34c3361 3779def urljoin(base, path):
4b5de77b
S
3780 if isinstance(path, bytes):
3781 path = path.decode('utf-8')
e34c3361
S
3782 if not isinstance(path, compat_str) or not path:
3783 return None
fad4ceb5 3784 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
e34c3361 3785 return path
4b5de77b
S
3786 if isinstance(base, bytes):
3787 base = base.decode('utf-8')
3788 if not isinstance(base, compat_str) or not re.match(
3789 r'^(?:https?:)?//', base):
e34c3361
S
3790 return None
3791 return compat_urlparse.urljoin(base, path)
3792
3793
aa94a6d3
PH
3794class HEADRequest(compat_urllib_request.Request):
3795 def get_method(self):
611c1dd9 3796 return 'HEAD'
7217e148
PH
3797
3798
95cf60e8
S
3799class PUTRequest(compat_urllib_request.Request):
3800 def get_method(self):
3801 return 'PUT'
3802
3803
9732d77e 3804def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
3805 if get_attr:
3806 if v is not None:
3807 v = getattr(v, get_attr, None)
9572013d
PH
3808 if v == '':
3809 v = None
1812afb7
S
3810 if v is None:
3811 return default
3812 try:
3813 return int(v) * invscale // scale
5e1271c5 3814 except (ValueError, TypeError):
af98f8ff 3815 return default
9732d77e 3816
9572013d 3817
40a90862
JMF
3818def str_or_none(v, default=None):
3819 return default if v is None else compat_str(v)
3820
9732d77e
PH
3821
3822def str_to_int(int_str):
48d4681e 3823 """ A more relaxed version of int_or_none """
42db58ec 3824 if isinstance(int_str, compat_integer_types):
348c6bf1 3825 return int_str
42db58ec
S
3826 elif isinstance(int_str, compat_str):
3827 int_str = re.sub(r'[,\.\+]', '', int_str)
3828 return int_or_none(int_str)
608d11f5
PH
3829
3830
9732d77e 3831def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
3832 if v is None:
3833 return default
3834 try:
3835 return float(v) * invscale / scale
5e1271c5 3836 except (ValueError, TypeError):
caf80631 3837 return default
43f775e4
PH
3838
3839
c7e327c4
S
3840def bool_or_none(v, default=None):
3841 return v if isinstance(v, bool) else default
3842
3843
53cd37ba
S
3844def strip_or_none(v, default=None):
3845 return v.strip() if isinstance(v, compat_str) else default
b72b4431
S
3846
3847
af03000a
S
3848def url_or_none(url):
3849 if not url or not isinstance(url, compat_str):
3850 return None
3851 url = url.strip()
29f7c58a 3852 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
af03000a
S
3853
3854
e29663c6 3855def strftime_or_none(timestamp, date_format, default=None):
3856 datetime_object = None
3857 try:
3858 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3859 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3860 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3861 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3862 return datetime_object.strftime(date_format)
3863 except (ValueError, TypeError, AttributeError):
3864 return default
3865
3866
608d11f5 3867def parse_duration(s):
8f9312c3 3868 if not isinstance(s, compat_basestring):
608d11f5
PH
3869 return None
3870
ca7b3246
S
3871 s = s.strip()
3872
acaff495 3873 days, hours, mins, secs, ms = [None] * 5
15846398 3874 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
acaff495 3875 if m:
3876 days, hours, mins, secs, ms = m.groups()
3877 else:
3878 m = re.match(
056653bb
S
3879 r'''(?ix)(?:P?
3880 (?:
3881 [0-9]+\s*y(?:ears?)?\s*
3882 )?
3883 (?:
3884 [0-9]+\s*m(?:onths?)?\s*
3885 )?
3886 (?:
3887 [0-9]+\s*w(?:eeks?)?\s*
3888 )?
8f4b58d7 3889 (?:
acaff495 3890 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
8f4b58d7 3891 )?
056653bb 3892 T)?
acaff495 3893 (?:
3894 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3895 )?
3896 (?:
3897 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3898 )?
3899 (?:
3900 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
15846398 3901 )?Z?$''', s)
acaff495 3902 if m:
3903 days, hours, mins, secs, ms = m.groups()
3904 else:
15846398 3905 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
acaff495 3906 if m:
3907 hours, mins = m.groups()
3908 else:
3909 return None
3910
3911 duration = 0
3912 if secs:
3913 duration += float(secs)
3914 if mins:
3915 duration += float(mins) * 60
3916 if hours:
3917 duration += float(hours) * 60 * 60
3918 if days:
3919 duration += float(days) * 24 * 60 * 60
3920 if ms:
3921 duration += float(ms)
3922 return duration
91d7d0b3
JMF
3923
3924
e65e4c88 3925def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 3926 name, real_ext = os.path.splitext(filename)
e65e4c88
S
3927 return (
3928 '{0}.{1}{2}'.format(name, ext, real_ext)
3929 if not expected_real_ext or real_ext[1:] == expected_real_ext
3930 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
3931
3932
b3ed15b7
S
3933def replace_extension(filename, ext, expected_real_ext=None):
3934 name, real_ext = os.path.splitext(filename)
3935 return '{0}.{1}'.format(
3936 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3937 ext)
3938
3939
d70ad093
PH
3940def check_executable(exe, args=[]):
3941 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3942 args can be a list of arguments for a short output (like -version) """
3943 try:
f5b1bca9 3944 process_communicate_or_kill(subprocess.Popen(
3945 [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
d70ad093
PH
3946 except OSError:
3947 return False
3948 return exe
b7ab0590
PH
3949
3950
95807118 3951def get_exe_version(exe, args=['--version'],
cae97f65 3952 version_re=None, unrecognized='present'):
95807118
PH
3953 """ Returns the version of the specified executable,
3954 or False if the executable is not present """
3955 try:
b64d04c1 3956 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
7a5c1cfe 3957 # SIGTTOU if yt-dlp is run in the background.
067aa17e 3958 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
f5b1bca9 3959 out, _ = process_communicate_or_kill(subprocess.Popen(
54116803 3960 [encodeArgument(exe)] + args,
00ca7552 3961 stdin=subprocess.PIPE,
f5b1bca9 3962 stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
95807118
PH
3963 except OSError:
3964 return False
cae97f65
PH
3965 if isinstance(out, bytes): # Python 2.x
3966 out = out.decode('ascii', 'ignore')
3967 return detect_exe_version(out, version_re, unrecognized)
3968
3969
3970def detect_exe_version(output, version_re=None, unrecognized='present'):
3971 assert isinstance(output, compat_str)
3972 if version_re is None:
3973 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3974 m = re.search(version_re, output)
95807118
PH
3975 if m:
3976 return m.group(1)
3977 else:
3978 return unrecognized
3979
3980
cb89cfc1 3981class LazyList(collections.abc.Sequence):
483336e7 3982 ''' Lazy immutable list from an iterable
3983 Note that slices of a LazyList are lists and not LazyList'''
3984
8e5fecc8 3985 class IndexError(IndexError):
3986 pass
3987
483336e7 3988 def __init__(self, iterable):
3989 self.__iterable = iter(iterable)
3990 self.__cache = []
28419ca2 3991 self.__reversed = False
483336e7 3992
3993 def __iter__(self):
28419ca2 3994 if self.__reversed:
3995 # We need to consume the entire iterable to iterate in reverse
981052c9 3996 yield from self.exhaust()
28419ca2 3997 return
3998 yield from self.__cache
483336e7 3999 for item in self.__iterable:
4000 self.__cache.append(item)
4001 yield item
4002
981052c9 4003 def __exhaust(self):
483336e7 4004 self.__cache.extend(self.__iterable)
28419ca2 4005 return self.__cache
4006
981052c9 4007 def exhaust(self):
4008 ''' Evaluate the entire iterable '''
4009 return self.__exhaust()[::-1 if self.__reversed else 1]
4010
28419ca2 4011 @staticmethod
981052c9 4012 def __reverse_index(x):
e0f2b4b4 4013 return None if x is None else -(x + 1)
483336e7 4014
4015 def __getitem__(self, idx):
4016 if isinstance(idx, slice):
28419ca2 4017 if self.__reversed:
e0f2b4b4 4018 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
4019 start, stop, step = idx.start, idx.stop, idx.step or 1
483336e7 4020 elif isinstance(idx, int):
28419ca2 4021 if self.__reversed:
981052c9 4022 idx = self.__reverse_index(idx)
e0f2b4b4 4023 start, stop, step = idx, idx, 0
483336e7 4024 else:
4025 raise TypeError('indices must be integers or slices')
e0f2b4b4 4026 if ((start or 0) < 0 or (stop or 0) < 0
4027 or (start is None and step < 0)
4028 or (stop is None and step > 0)):
483336e7 4029 # We need to consume the entire iterable to be able to slice from the end
4030 # Obviously, never use this with infinite iterables
8e5fecc8 4031 self.__exhaust()
4032 try:
4033 return self.__cache[idx]
4034 except IndexError as e:
4035 raise self.IndexError(e) from e
e0f2b4b4 4036 n = max(start or 0, stop or 0) - len(self.__cache) + 1
28419ca2 4037 if n > 0:
4038 self.__cache.extend(itertools.islice(self.__iterable, n))
8e5fecc8 4039 try:
4040 return self.__cache[idx]
4041 except IndexError as e:
4042 raise self.IndexError(e) from e
483336e7 4043
4044 def __bool__(self):
4045 try:
28419ca2 4046 self[-1] if self.__reversed else self[0]
8e5fecc8 4047 except self.IndexError:
483336e7 4048 return False
4049 return True
4050
4051 def __len__(self):
8e5fecc8 4052 self.__exhaust()
483336e7 4053 return len(self.__cache)
4054
981052c9 4055 def reverse(self):
28419ca2 4056 self.__reversed = not self.__reversed
4057 return self
4058
4059 def __repr__(self):
4060 # repr and str should mimic a list. So we exhaust the iterable
4061 return repr(self.exhaust())
4062
4063 def __str__(self):
4064 return repr(self.exhaust())
4065
483336e7 4066
7be9ccff 4067class PagedList:
dd26ced1
PH
4068 def __len__(self):
4069 # This is only useful for tests
4070 return len(self.getslice())
4071
7be9ccff 4072 def __init__(self, pagefunc, pagesize, use_cache=True):
4073 self._pagefunc = pagefunc
4074 self._pagesize = pagesize
4075 self._use_cache = use_cache
4076 self._cache = {}
4077
4078 def getpage(self, pagenum):
4079 page_results = self._cache.get(pagenum) or list(self._pagefunc(pagenum))
4080 if self._use_cache:
4081 self._cache[pagenum] = page_results
4082 return page_results
4083
4084 def getslice(self, start=0, end=None):
4085 return list(self._getslice(start, end))
4086
4087 def _getslice(self, start, end):
55575225 4088 raise NotImplementedError('This method must be implemented by subclasses')
4089
4090 def __getitem__(self, idx):
7be9ccff 4091 # NOTE: cache must be enabled if this is used
55575225 4092 if not isinstance(idx, int) or idx < 0:
4093 raise TypeError('indices must be non-negative integers')
4094 entries = self.getslice(idx, idx + 1)
4095 return entries[0] if entries else None
4096
9c44d242
PH
4097
4098class OnDemandPagedList(PagedList):
7be9ccff 4099 def _getslice(self, start, end):
b7ab0590
PH
4100 for pagenum in itertools.count(start // self._pagesize):
4101 firstid = pagenum * self._pagesize
4102 nextfirstid = pagenum * self._pagesize + self._pagesize
4103 if start >= nextfirstid:
4104 continue
4105
b7ab0590
PH
4106 startv = (
4107 start % self._pagesize
4108 if firstid <= start < nextfirstid
4109 else 0)
b7ab0590
PH
4110 endv = (
4111 ((end - 1) % self._pagesize) + 1
4112 if (end is not None and firstid <= end <= nextfirstid)
4113 else None)
4114
7be9ccff 4115 page_results = self.getpage(pagenum)
b7ab0590
PH
4116 if startv != 0 or endv is not None:
4117 page_results = page_results[startv:endv]
7be9ccff 4118 yield from page_results
b7ab0590
PH
4119
4120 # A little optimization - if current page is not "full", ie. does
4121 # not contain page_size videos then we can assume that this page
4122 # is the last one - there are no more ids on further pages -
4123 # i.e. no need to query again.
4124 if len(page_results) + startv < self._pagesize:
4125 break
4126
4127 # If we got the whole page, but the next page is not interesting,
4128 # break out early as well
4129 if end == nextfirstid:
4130 break
81c2f20b
PH
4131
4132
9c44d242
PH
4133class InAdvancePagedList(PagedList):
4134 def __init__(self, pagefunc, pagecount, pagesize):
9c44d242 4135 self._pagecount = pagecount
7be9ccff 4136 PagedList.__init__(self, pagefunc, pagesize, True)
9c44d242 4137
7be9ccff 4138 def _getslice(self, start, end):
9c44d242
PH
4139 start_page = start // self._pagesize
4140 end_page = (
4141 self._pagecount if end is None else (end // self._pagesize + 1))
4142 skip_elems = start - start_page * self._pagesize
4143 only_more = None if end is None else end - start
4144 for pagenum in range(start_page, end_page):
7be9ccff 4145 page_results = self.getpage(pagenum)
9c44d242 4146 if skip_elems:
7be9ccff 4147 page_results = page_results[skip_elems:]
9c44d242
PH
4148 skip_elems = None
4149 if only_more is not None:
7be9ccff 4150 if len(page_results) < only_more:
4151 only_more -= len(page_results)
9c44d242 4152 else:
7be9ccff 4153 yield from page_results[:only_more]
9c44d242 4154 break
7be9ccff 4155 yield from page_results
9c44d242
PH
4156
4157
81c2f20b 4158def uppercase_escape(s):
676eb3f2 4159 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 4160 return re.sub(
a612753d 4161 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
4162 lambda m: unicode_escape(m.group(0))[0],
4163 s)
0fe2ff78
YCH
4164
4165
4166def lowercase_escape(s):
4167 unicode_escape = codecs.getdecoder('unicode_escape')
4168 return re.sub(
4169 r'\\u[0-9a-fA-F]{4}',
4170 lambda m: unicode_escape(m.group(0))[0],
4171 s)
b53466e1 4172
d05cfe06
S
4173
4174def escape_rfc3986(s):
4175 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 4176 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 4177 s = s.encode('utf-8')
ecc0c5ee 4178 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
4179
4180
4181def escape_url(url):
4182 """Escape URL as suggested by RFC 3986"""
4183 url_parsed = compat_urllib_parse_urlparse(url)
4184 return url_parsed._replace(
efbed08d 4185 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
d05cfe06
S
4186 path=escape_rfc3986(url_parsed.path),
4187 params=escape_rfc3986(url_parsed.params),
4188 query=escape_rfc3986(url_parsed.query),
4189 fragment=escape_rfc3986(url_parsed.fragment)
4190 ).geturl()
4191
62e609ab 4192
4dfbf869 4193def parse_qs(url):
4194 return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4195
4196
62e609ab
PH
4197def read_batch_urls(batch_fd):
4198 def fixup(url):
4199 if not isinstance(url, compat_str):
4200 url = url.decode('utf-8', 'replace')
8c04f0be 4201 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4202 for bom in BOM_UTF8:
4203 if url.startswith(bom):
4204 url = url[len(bom):]
4205 url = url.lstrip()
4206 if not url or url.startswith(('#', ';', ']')):
62e609ab 4207 return False
8c04f0be 4208 # "#" cannot be stripped out since it is part of the URI
4209 # However, it can be safely stipped out if follwing a whitespace
4210 return re.split(r'\s#', url, 1)[0].rstrip()
62e609ab
PH
4211
4212 with contextlib.closing(batch_fd) as fd:
4213 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
4214
4215
4216def urlencode_postdata(*args, **kargs):
15707c7e 4217 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
4218
4219
38f9ef31 4220def update_url_query(url, query):
cacd9966
YCH
4221 if not query:
4222 return url
38f9ef31 4223 parsed_url = compat_urlparse.urlparse(url)
4224 qs = compat_parse_qs(parsed_url.query)
4225 qs.update(query)
4226 return compat_urlparse.urlunparse(parsed_url._replace(
15707c7e 4227 query=compat_urllib_parse_urlencode(qs, True)))
16392824 4228
8e60dc75 4229
ed0291d1
S
4230def update_Request(req, url=None, data=None, headers={}, query={}):
4231 req_headers = req.headers.copy()
4232 req_headers.update(headers)
4233 req_data = data or req.data
4234 req_url = update_url_query(url or req.get_full_url(), query)
95cf60e8
S
4235 req_get_method = req.get_method()
4236 if req_get_method == 'HEAD':
4237 req_type = HEADRequest
4238 elif req_get_method == 'PUT':
4239 req_type = PUTRequest
4240 else:
4241 req_type = compat_urllib_request.Request
ed0291d1
S
4242 new_req = req_type(
4243 req_url, data=req_data, headers=req_headers,
4244 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4245 if hasattr(req, 'timeout'):
4246 new_req.timeout = req.timeout
4247 return new_req
4248
4249
10c87c15 4250def _multipart_encode_impl(data, boundary):
0c265486
YCH
4251 content_type = 'multipart/form-data; boundary=%s' % boundary
4252
4253 out = b''
4254 for k, v in data.items():
4255 out += b'--' + boundary.encode('ascii') + b'\r\n'
4256 if isinstance(k, compat_str):
4257 k = k.encode('utf-8')
4258 if isinstance(v, compat_str):
4259 v = v.encode('utf-8')
4260 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4261 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
b2ad479d 4262 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
0c265486
YCH
4263 if boundary.encode('ascii') in content:
4264 raise ValueError('Boundary overlaps with data')
4265 out += content
4266
4267 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4268
4269 return out, content_type
4270
4271
4272def multipart_encode(data, boundary=None):
4273 '''
4274 Encode a dict to RFC 7578-compliant form-data
4275
4276 data:
4277 A dict where keys and values can be either Unicode or bytes-like
4278 objects.
4279 boundary:
4280 If specified a Unicode object, it's used as the boundary. Otherwise
4281 a random boundary is generated.
4282
4283 Reference: https://tools.ietf.org/html/rfc7578
4284 '''
4285 has_specified_boundary = boundary is not None
4286
4287 while True:
4288 if boundary is None:
4289 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4290
4291 try:
10c87c15 4292 out, content_type = _multipart_encode_impl(data, boundary)
0c265486
YCH
4293 break
4294 except ValueError:
4295 if has_specified_boundary:
4296 raise
4297 boundary = None
4298
4299 return out, content_type
4300
4301
86296ad2 4302def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
4303 if isinstance(key_or_keys, (list, tuple)):
4304 for key in key_or_keys:
86296ad2
S
4305 if key not in d or d[key] is None or skip_false_values and not d[key]:
4306 continue
4307 return d[key]
cbecc9b9
S
4308 return default
4309 return d.get(key_or_keys, default)
4310
4311
329ca3be 4312def try_get(src, getter, expected_type=None):
6606817a 4313 for get in variadic(getter):
a32a9a7e
S
4314 try:
4315 v = get(src)
4316 except (AttributeError, KeyError, TypeError, IndexError):
4317 pass
4318 else:
4319 if expected_type is None or isinstance(v, expected_type):
4320 return v
329ca3be
S
4321
4322
6cc62232
S
4323def merge_dicts(*dicts):
4324 merged = {}
4325 for a_dict in dicts:
4326 for k, v in a_dict.items():
4327 if v is None:
4328 continue
3089bc74
S
4329 if (k not in merged
4330 or (isinstance(v, compat_str) and v
4331 and isinstance(merged[k], compat_str)
4332 and not merged[k])):
6cc62232
S
4333 merged[k] = v
4334 return merged
4335
4336
8e60dc75
S
4337def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4338 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4339
16392824 4340
a1a530b0
PH
4341US_RATINGS = {
4342 'G': 0,
4343 'PG': 10,
4344 'PG-13': 13,
4345 'R': 16,
4346 'NC': 18,
4347}
fac55558
PH
4348
4349
a8795327 4350TV_PARENTAL_GUIDELINES = {
5a16c9d9
RA
4351 'TV-Y': 0,
4352 'TV-Y7': 7,
4353 'TV-G': 0,
4354 'TV-PG': 0,
4355 'TV-14': 14,
4356 'TV-MA': 17,
a8795327
S
4357}
4358
4359
146c80e2 4360def parse_age_limit(s):
a8795327
S
4361 if type(s) == int:
4362 return s if 0 <= s <= 21 else None
4363 if not isinstance(s, compat_basestring):
d838b1bd 4364 return None
146c80e2 4365 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327
S
4366 if m:
4367 return int(m.group('age'))
5c5fae6d 4368 s = s.upper()
a8795327
S
4369 if s in US_RATINGS:
4370 return US_RATINGS[s]
5a16c9d9 4371 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
b8361187 4372 if m:
5a16c9d9 4373 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
b8361187 4374 return None
146c80e2
S
4375
4376
fac55558 4377def strip_jsonp(code):
609a61e3 4378 return re.sub(
5552c9eb 4379 r'''(?sx)^
e9c671d5 4380 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
5552c9eb
YCH
4381 (?:\s*&&\s*(?P=func_name))?
4382 \s*\(\s*(?P<callback_data>.*)\);?
4383 \s*?(?://[^\n]*)*$''',
4384 r'\g<callback_data>', code)
478c2c61
PH
4385
4386
5c610515 4387def js_to_json(code, vars={}):
4388 # vars is a dict of var, val pairs to substitute
c843e685 4389 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4195096e
S
4390 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4391 INTEGER_TABLE = (
4392 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4393 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4394 )
4395
e05f6939 4396 def fix_kv(m):
e7b6d122
PH
4397 v = m.group(0)
4398 if v in ('true', 'false', 'null'):
4399 return v
421ddcb8
C
4400 elif v in ('undefined', 'void 0'):
4401 return 'null'
8bdd16b4 4402 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
bd1e4844 4403 return ""
4404
4405 if v[0] in ("'", '"'):
4406 v = re.sub(r'(?s)\\.|"', lambda m: {
e7b6d122 4407 '"': '\\"',
bd1e4844 4408 "\\'": "'",
4409 '\\\n': '',
4410 '\\x': '\\u00',
4411 }.get(m.group(0), m.group(0)), v[1:-1])
8bdd16b4 4412 else:
4413 for regex, base in INTEGER_TABLE:
4414 im = re.match(regex, v)
4415 if im:
4416 i = int(im.group(1), base)
4417 return '"%d":' % i if v.endswith(':') else '%d' % i
89ac4a19 4418
5c610515 4419 if v in vars:
4420 return vars[v]
4421
e7b6d122 4422 return '"%s"' % v
e05f6939 4423
bd1e4844 4424 return re.sub(r'''(?sx)
4425 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4426 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4195096e 4427 {comment}|,(?={skip}[\]}}])|
421ddcb8 4428 void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
4195096e 4429 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
8bdd16b4 4430 [0-9]+(?={skip}:)|
4431 !+
4195096e 4432 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
e05f6939
PH
4433
4434
478c2c61
PH
4435def qualities(quality_ids):
4436 """ Get a numeric quality value out of a list of possible values """
4437 def q(qid):
4438 try:
4439 return quality_ids.index(qid)
4440 except ValueError:
4441 return -1
4442 return q
4443
acd69589 4444
de6000d9 4445DEFAULT_OUTTMPL = {
4446 'default': '%(title)s [%(id)s].%(ext)s',
72755351 4447 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
de6000d9 4448}
4449OUTTMPL_TYPES = {
72755351 4450 'chapter': None,
de6000d9 4451 'subtitle': None,
4452 'thumbnail': None,
4453 'description': 'description',
4454 'annotation': 'annotations.xml',
4455 'infojson': 'info.json',
5112f26a 4456 'pl_thumbnail': None,
de6000d9 4457 'pl_description': 'description',
4458 'pl_infojson': 'info.json',
4459}
0a871f68 4460
143db31d 4461# As of [1] format syntax is:
4462# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4463# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
901130bb 4464STR_FORMAT_RE_TMPL = r'''(?x)
4465 (?<!%)(?P<prefix>(?:%%)*)
143db31d 4466 %
752cda38 4467 (?P<has_key>\((?P<key>{0})\))? # mapping key
4468 (?P<format>
4469 (?:[#0\-+ ]+)? # conversion flags (optional)
4470 (?:\d+)? # minimum field width (optional)
4471 (?:\.\d+)? # precision (optional)
4472 [hlL]? # length modifier (optional)
901130bb 4473 {1} # conversion type
752cda38 4474 )
143db31d 4475'''
4476
7d1eb38a 4477
901130bb 4478STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
a020a0dc 4479
7d1eb38a 4480
a020a0dc
PH
4481def limit_length(s, length):
4482 """ Add ellipses to overly long strings """
4483 if s is None:
4484 return None
4485 ELLIPSES = '...'
4486 if len(s) > length:
4487 return s[:length - len(ELLIPSES)] + ELLIPSES
4488 return s
48844745
PH
4489
4490
4491def version_tuple(v):
5f9b8394 4492 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
4493
4494
4495def is_outdated_version(version, limit, assume_new=True):
4496 if not version:
4497 return not assume_new
4498 try:
4499 return version_tuple(version) < version_tuple(limit)
4500 except ValueError:
4501 return not assume_new
732ea2f0
PH
4502
4503
4504def ytdl_is_updateable():
7a5c1cfe 4505 """ Returns if yt-dlp can be updated with -U """
735d865e 4506 return False
4507
732ea2f0
PH
4508 from zipimport import zipimporter
4509
4510 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
4511
4512
4513def args_to_str(args):
4514 # Get a short string representation for a subprocess command
702ccf2d 4515 return ' '.join(compat_shlex_quote(a) for a in args)
2ccd1b10
PH
4516
4517
9b9c5355 4518def error_to_compat_str(err):
fdae2358
S
4519 err_str = str(err)
4520 # On python 2 error byte string must be decoded with proper
4521 # encoding rather than ascii
4522 if sys.version_info[0] < 3:
4523 err_str = err_str.decode(preferredencoding())
4524 return err_str
4525
4526
c460bdd5 4527def mimetype2ext(mt):
eb9ee194
S
4528 if mt is None:
4529 return None
4530
765ac263
JMF
4531 ext = {
4532 'audio/mp4': 'm4a',
6c33d24b
YCH
4533 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4534 # it's the most popular one
4535 'audio/mpeg': 'mp3',
ba39289d 4536 'audio/x-wav': 'wav',
765ac263
JMF
4537 }.get(mt)
4538 if ext is not None:
4539 return ext
4540
c460bdd5 4541 _, _, res = mt.rpartition('/')
6562d34a 4542 res = res.split(';')[0].strip().lower()
c460bdd5
PH
4543
4544 return {
f6861ec9 4545 '3gpp': '3gp',
cafcf657 4546 'smptett+xml': 'tt',
cafcf657 4547 'ttaf+xml': 'dfxp',
a0d8d704 4548 'ttml+xml': 'ttml',
f6861ec9 4549 'x-flv': 'flv',
a0d8d704 4550 'x-mp4-fragmented': 'mp4',
d4f05d47 4551 'x-ms-sami': 'sami',
a0d8d704 4552 'x-ms-wmv': 'wmv',
b4173f15
RA
4553 'mpegurl': 'm3u8',
4554 'x-mpegurl': 'm3u8',
4555 'vnd.apple.mpegurl': 'm3u8',
4556 'dash+xml': 'mpd',
b4173f15 4557 'f4m+xml': 'f4m',
f164b971 4558 'hds+xml': 'f4m',
e910fe2f 4559 'vnd.ms-sstr+xml': 'ism',
c2b2c7e1 4560 'quicktime': 'mov',
98ce1a3f 4561 'mp2t': 'ts',
39e7107d 4562 'x-wav': 'wav',
c460bdd5
PH
4563 }.get(res, res)
4564
4565
4f3c5e06 4566def parse_codecs(codecs_str):
4567 # http://tools.ietf.org/html/rfc6381
4568 if not codecs_str:
4569 return {}
a0566bbf 4570 split_codecs = list(filter(None, map(
dbf5416a 4571 str.strip, codecs_str.strip().strip(',').split(','))))
4f3c5e06 4572 vcodec, acodec = None, None
a0566bbf 4573 for full_codec in split_codecs:
4f3c5e06 4574 codec = full_codec.split('.')[0]
28cc2241 4575 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4f3c5e06 4576 if not vcodec:
4577 vcodec = full_codec
60f5c9fb 4578 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4f3c5e06 4579 if not acodec:
4580 acodec = full_codec
4581 else:
60f5c9fb 4582 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4f3c5e06 4583 if not vcodec and not acodec:
a0566bbf 4584 if len(split_codecs) == 2:
4f3c5e06 4585 return {
a0566bbf 4586 'vcodec': split_codecs[0],
4587 'acodec': split_codecs[1],
4f3c5e06 4588 }
4589 else:
4590 return {
4591 'vcodec': vcodec or 'none',
4592 'acodec': acodec or 'none',
4593 }
4594 return {}
4595
4596
2ccd1b10 4597def urlhandle_detect_ext(url_handle):
79298173 4598 getheader = url_handle.headers.get
2ccd1b10 4599
b55ee18f
PH
4600 cd = getheader('Content-Disposition')
4601 if cd:
4602 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4603 if m:
4604 e = determine_ext(m.group('filename'), default_ext=None)
4605 if e:
4606 return e
4607
c460bdd5 4608 return mimetype2ext(getheader('Content-Type'))
05900629
PH
4609
4610
1e399778
YCH
4611def encode_data_uri(data, mime_type):
4612 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4613
4614
05900629 4615def age_restricted(content_limit, age_limit):
6ec6cb4e 4616 """ Returns True iff the content should be blocked """
05900629
PH
4617
4618 if age_limit is None: # No limit set
4619 return False
4620 if content_limit is None:
4621 return False # Content available for everyone
4622 return age_limit < content_limit
61ca9a80
PH
4623
4624
4625def is_html(first_bytes):
4626 """ Detect whether a file contains HTML by examining its first bytes. """
4627
4628 BOMS = [
4629 (b'\xef\xbb\xbf', 'utf-8'),
4630 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4631 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4632 (b'\xff\xfe', 'utf-16-le'),
4633 (b'\xfe\xff', 'utf-16-be'),
4634 ]
4635 for bom, enc in BOMS:
4636 if first_bytes.startswith(bom):
4637 s = first_bytes[len(bom):].decode(enc, 'replace')
4638 break
4639 else:
4640 s = first_bytes.decode('utf-8', 'replace')
4641
4642 return re.match(r'^\s*<', s)
a055469f
PH
4643
4644
4645def determine_protocol(info_dict):
4646 protocol = info_dict.get('protocol')
4647 if protocol is not None:
4648 return protocol
4649
4650 url = info_dict['url']
4651 if url.startswith('rtmp'):
4652 return 'rtmp'
4653 elif url.startswith('mms'):
4654 return 'mms'
4655 elif url.startswith('rtsp'):
4656 return 'rtsp'
4657
4658 ext = determine_ext(url)
4659 if ext == 'm3u8':
4660 return 'm3u8'
4661 elif ext == 'f4m':
4662 return 'f4m'
4663
4664 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
4665
4666
76d321f6 4667def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
cfb56d1a 4668 """ Render a list of rows, each as a list of values """
76d321f6 4669
4670 def get_max_lens(table):
4671 return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4672
4673 def filter_using_list(row, filterArray):
4674 return [col for (take, col) in zip(filterArray, row) if take]
4675
4676 if hideEmpty:
4677 max_lens = get_max_lens(data)
4678 header_row = filter_using_list(header_row, max_lens)
4679 data = [filter_using_list(row, max_lens) for row in data]
4680
cfb56d1a 4681 table = [header_row] + data
76d321f6 4682 max_lens = get_max_lens(table)
4683 if delim:
4684 table = [header_row] + [['-' * ml for ml in max_lens]] + data
4685 format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
cfb56d1a 4686 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
4687
4688
8f18aca8 4689def _match_one(filter_part, dct, incomplete):
77b87f05 4690 # TODO: Generalize code with YoutubeDL._build_format_filter
a047eeb6 4691 STRING_OPERATORS = {
4692 '*=': operator.contains,
4693 '^=': lambda attr, value: attr.startswith(value),
4694 '$=': lambda attr, value: attr.endswith(value),
4695 '~=': lambda attr, value: re.search(value, attr),
4696 }
347de493 4697 COMPARISON_OPERATORS = {
a047eeb6 4698 **STRING_OPERATORS,
4699 '<=': operator.le, # "<=" must be defined above "<"
347de493 4700 '<': operator.lt,
347de493 4701 '>=': operator.ge,
a047eeb6 4702 '>': operator.gt,
347de493 4703 '=': operator.eq,
347de493 4704 }
a047eeb6 4705
347de493
PH
4706 operator_rex = re.compile(r'''(?x)\s*
4707 (?P<key>[a-z_]+)
77b87f05 4708 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
347de493
PH
4709 (?:
4710 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
a047eeb6 4711 (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
4712 (?P<strval>.+?)
347de493
PH
4713 )
4714 \s*$
4715 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4716 m = operator_rex.search(filter_part)
4717 if m:
77b87f05
MT
4718 unnegated_op = COMPARISON_OPERATORS[m.group('op')]
4719 if m.group('negation'):
4720 op = lambda attr, value: not unnegated_op(attr, value)
4721 else:
4722 op = unnegated_op
e5a088dc 4723 actual_value = dct.get(m.group('key'))
3089bc74
S
4724 if (m.group('quotedstrval') is not None
4725 or m.group('strval') is not None
e5a088dc
S
4726 # If the original field is a string and matching comparisonvalue is
4727 # a number we should respect the origin of the original field
4728 # and process comparison value as a string (see
067aa17e 4729 # https://github.com/ytdl-org/youtube-dl/issues/11082).
3089bc74
S
4730 or actual_value is not None and m.group('intval') is not None
4731 and isinstance(actual_value, compat_str)):
db13c16e
S
4732 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4733 quote = m.group('quote')
4734 if quote is not None:
4735 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
347de493 4736 else:
a047eeb6 4737 if m.group('op') in STRING_OPERATORS:
4738 raise ValueError('Operator %s only supports string values!' % m.group('op'))
347de493
PH
4739 try:
4740 comparison_value = int(m.group('intval'))
4741 except ValueError:
4742 comparison_value = parse_filesize(m.group('intval'))
4743 if comparison_value is None:
4744 comparison_value = parse_filesize(m.group('intval') + 'B')
4745 if comparison_value is None:
4746 raise ValueError(
4747 'Invalid integer value %r in filter part %r' % (
4748 m.group('intval'), filter_part))
347de493 4749 if actual_value is None:
8f18aca8 4750 return incomplete or m.group('none_inclusive')
347de493
PH
4751 return op(actual_value, comparison_value)
4752
4753 UNARY_OPERATORS = {
1cc47c66
S
4754 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4755 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
347de493
PH
4756 }
4757 operator_rex = re.compile(r'''(?x)\s*
4758 (?P<op>%s)\s*(?P<key>[a-z_]+)
4759 \s*$
4760 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4761 m = operator_rex.search(filter_part)
4762 if m:
4763 op = UNARY_OPERATORS[m.group('op')]
4764 actual_value = dct.get(m.group('key'))
8f18aca8 4765 if incomplete and actual_value is None:
4766 return True
347de493
PH
4767 return op(actual_value)
4768
4769 raise ValueError('Invalid filter part %r' % filter_part)
4770
4771
8f18aca8 4772def match_str(filter_str, dct, incomplete=False):
4773 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
4774 When incomplete, all conditions passes on missing fields
4775 """
347de493 4776 return all(
8f18aca8 4777 _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
a047eeb6 4778 for filter_part in re.split(r'(?<!\\)&', filter_str))
347de493
PH
4779
4780
4781def match_filter_func(filter_str):
8f18aca8 4782 def _match_func(info_dict, *args, **kwargs):
4783 if match_str(filter_str, info_dict, *args, **kwargs):
347de493
PH
4784 return None
4785 else:
4786 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4787 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4788 return _match_func
91410c9b
PH
4789
4790
bf6427d2
YCH
4791def parse_dfxp_time_expr(time_expr):
4792 if not time_expr:
d631d5f9 4793 return
bf6427d2
YCH
4794
4795 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4796 if mobj:
4797 return float(mobj.group('time_offset'))
4798
db2fe38b 4799 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 4800 if mobj:
db2fe38b 4801 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
4802
4803
c1c924ab
YCH
4804def srt_subtitles_timecode(seconds):
4805 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
4806
4807
4808def dfxp2srt(dfxp_data):
3869028f
YCH
4809 '''
4810 @param dfxp_data A bytes-like object containing DFXP data
4811 @returns A unicode object containing converted SRT data
4812 '''
5b995f71 4813 LEGACY_NAMESPACES = (
3869028f
YCH
4814 (b'http://www.w3.org/ns/ttml', [
4815 b'http://www.w3.org/2004/11/ttaf1',
4816 b'http://www.w3.org/2006/04/ttaf1',
4817 b'http://www.w3.org/2006/10/ttaf1',
5b995f71 4818 ]),
3869028f
YCH
4819 (b'http://www.w3.org/ns/ttml#styling', [
4820 b'http://www.w3.org/ns/ttml#style',
5b995f71
RA
4821 ]),
4822 )
4823
4824 SUPPORTED_STYLING = [
4825 'color',
4826 'fontFamily',
4827 'fontSize',
4828 'fontStyle',
4829 'fontWeight',
4830 'textDecoration'
4831 ]
4832
4e335771 4833 _x = functools.partial(xpath_with_ns, ns_map={
261f4730 4834 'xml': 'http://www.w3.org/XML/1998/namespace',
4e335771 4835 'ttml': 'http://www.w3.org/ns/ttml',
5b995f71 4836 'tts': 'http://www.w3.org/ns/ttml#styling',
4e335771 4837 })
bf6427d2 4838
5b995f71
RA
4839 styles = {}
4840 default_style = {}
4841
87de7069 4842 class TTMLPElementParser(object):
5b995f71
RA
4843 _out = ''
4844 _unclosed_elements = []
4845 _applied_styles = []
bf6427d2 4846
2b14cb56 4847 def start(self, tag, attrib):
5b995f71
RA
4848 if tag in (_x('ttml:br'), 'br'):
4849 self._out += '\n'
4850 else:
4851 unclosed_elements = []
4852 style = {}
4853 element_style_id = attrib.get('style')
4854 if default_style:
4855 style.update(default_style)
4856 if element_style_id:
4857 style.update(styles.get(element_style_id, {}))
4858 for prop in SUPPORTED_STYLING:
4859 prop_val = attrib.get(_x('tts:' + prop))
4860 if prop_val:
4861 style[prop] = prop_val
4862 if style:
4863 font = ''
4864 for k, v in sorted(style.items()):
4865 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4866 continue
4867 if k == 'color':
4868 font += ' color="%s"' % v
4869 elif k == 'fontSize':
4870 font += ' size="%s"' % v
4871 elif k == 'fontFamily':
4872 font += ' face="%s"' % v
4873 elif k == 'fontWeight' and v == 'bold':
4874 self._out += '<b>'
4875 unclosed_elements.append('b')
4876 elif k == 'fontStyle' and v == 'italic':
4877 self._out += '<i>'
4878 unclosed_elements.append('i')
4879 elif k == 'textDecoration' and v == 'underline':
4880 self._out += '<u>'
4881 unclosed_elements.append('u')
4882 if font:
4883 self._out += '<font' + font + '>'
4884 unclosed_elements.append('font')
4885 applied_style = {}
4886 if self._applied_styles:
4887 applied_style.update(self._applied_styles[-1])
4888 applied_style.update(style)
4889 self._applied_styles.append(applied_style)
4890 self._unclosed_elements.append(unclosed_elements)
bf6427d2 4891
2b14cb56 4892 def end(self, tag):
5b995f71
RA
4893 if tag not in (_x('ttml:br'), 'br'):
4894 unclosed_elements = self._unclosed_elements.pop()
4895 for element in reversed(unclosed_elements):
4896 self._out += '</%s>' % element
4897 if unclosed_elements and self._applied_styles:
4898 self._applied_styles.pop()
bf6427d2 4899
2b14cb56 4900 def data(self, data):
5b995f71 4901 self._out += data
2b14cb56 4902
4903 def close(self):
5b995f71 4904 return self._out.strip()
2b14cb56 4905
4906 def parse_node(node):
4907 target = TTMLPElementParser()
4908 parser = xml.etree.ElementTree.XMLParser(target=target)
4909 parser.feed(xml.etree.ElementTree.tostring(node))
4910 return parser.close()
bf6427d2 4911
5b995f71
RA
4912 for k, v in LEGACY_NAMESPACES:
4913 for ns in v:
4914 dfxp_data = dfxp_data.replace(ns, k)
4915
3869028f 4916 dfxp = compat_etree_fromstring(dfxp_data)
bf6427d2 4917 out = []
5b995f71 4918 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
1b0427e6
YCH
4919
4920 if not paras:
4921 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2 4922
5b995f71
RA
4923 repeat = False
4924 while True:
4925 for style in dfxp.findall(_x('.//ttml:style')):
261f4730
RA
4926 style_id = style.get('id') or style.get(_x('xml:id'))
4927 if not style_id:
4928 continue
5b995f71
RA
4929 parent_style_id = style.get('style')
4930 if parent_style_id:
4931 if parent_style_id not in styles:
4932 repeat = True
4933 continue
4934 styles[style_id] = styles[parent_style_id].copy()
4935 for prop in SUPPORTED_STYLING:
4936 prop_val = style.get(_x('tts:' + prop))
4937 if prop_val:
4938 styles.setdefault(style_id, {})[prop] = prop_val
4939 if repeat:
4940 repeat = False
4941 else:
4942 break
4943
4944 for p in ('body', 'div'):
4945 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4946 if ele is None:
4947 continue
4948 style = styles.get(ele.get('style'))
4949 if not style:
4950 continue
4951 default_style.update(style)
4952
bf6427d2 4953 for para, index in zip(paras, itertools.count(1)):
d631d5f9 4954 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 4955 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
4956 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4957 if begin_time is None:
4958 continue
7dff0363 4959 if not end_time:
d631d5f9
YCH
4960 if not dur:
4961 continue
4962 end_time = begin_time + dur
bf6427d2
YCH
4963 out.append('%d\n%s --> %s\n%s\n\n' % (
4964 index,
c1c924ab
YCH
4965 srt_subtitles_timecode(begin_time),
4966 srt_subtitles_timecode(end_time),
bf6427d2
YCH
4967 parse_node(para)))
4968
4969 return ''.join(out)
4970
4971
66e289ba
S
4972def cli_option(params, command_option, param):
4973 param = params.get(param)
98e698f1
RA
4974 if param:
4975 param = compat_str(param)
66e289ba
S
4976 return [command_option, param] if param is not None else []
4977
4978
4979def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4980 param = params.get(param)
5b232f46
S
4981 if param is None:
4982 return []
66e289ba
S
4983 assert isinstance(param, bool)
4984 if separator:
4985 return [command_option + separator + (true_value if param else false_value)]
4986 return [command_option, true_value if param else false_value]
4987
4988
4989def cli_valueless_option(params, command_option, param, expected_value=True):
4990 param = params.get(param)
4991 return [command_option] if param == expected_value else []
4992
4993
e92caff5 4994def cli_configuration_args(argdict, keys, default=[], use_compat=True):
eab9b2bc 4995 if isinstance(argdict, (list, tuple)): # for backward compatibility
e92caff5 4996 if use_compat:
5b1ecbb3 4997 return argdict
4998 else:
4999 argdict = None
eab9b2bc 5000 if argdict is None:
5b1ecbb3 5001 return default
eab9b2bc 5002 assert isinstance(argdict, dict)
5003
e92caff5 5004 assert isinstance(keys, (list, tuple))
5005 for key_list in keys:
e92caff5 5006 arg_list = list(filter(
5007 lambda x: x is not None,
6606817a 5008 [argdict.get(key.lower()) for key in variadic(key_list)]))
e92caff5 5009 if arg_list:
5010 return [arg for args in arg_list for arg in args]
5011 return default
66e289ba 5012
6251555f 5013
330690a2 5014def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
5015 main_key, exe = main_key.lower(), exe.lower()
5016 root_key = exe if main_key == exe else f'{main_key}+{exe}'
5017 keys = [f'{root_key}{k}' for k in (keys or [''])]
5018 if root_key in keys:
5019 if main_key != exe:
5020 keys.append((main_key, exe))
5021 keys.append('default')
5022 else:
5023 use_compat = False
5024 return cli_configuration_args(argdict, keys, default, use_compat)
5025
66e289ba 5026
39672624
YCH
5027class ISO639Utils(object):
5028 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
5029 _lang_map = {
5030 'aa': 'aar',
5031 'ab': 'abk',
5032 'ae': 'ave',
5033 'af': 'afr',
5034 'ak': 'aka',
5035 'am': 'amh',
5036 'an': 'arg',
5037 'ar': 'ara',
5038 'as': 'asm',
5039 'av': 'ava',
5040 'ay': 'aym',
5041 'az': 'aze',
5042 'ba': 'bak',
5043 'be': 'bel',
5044 'bg': 'bul',
5045 'bh': 'bih',
5046 'bi': 'bis',
5047 'bm': 'bam',
5048 'bn': 'ben',
5049 'bo': 'bod',
5050 'br': 'bre',
5051 'bs': 'bos',
5052 'ca': 'cat',
5053 'ce': 'che',
5054 'ch': 'cha',
5055 'co': 'cos',
5056 'cr': 'cre',
5057 'cs': 'ces',
5058 'cu': 'chu',
5059 'cv': 'chv',
5060 'cy': 'cym',
5061 'da': 'dan',
5062 'de': 'deu',
5063 'dv': 'div',
5064 'dz': 'dzo',
5065 'ee': 'ewe',
5066 'el': 'ell',
5067 'en': 'eng',
5068 'eo': 'epo',
5069 'es': 'spa',
5070 'et': 'est',
5071 'eu': 'eus',
5072 'fa': 'fas',
5073 'ff': 'ful',
5074 'fi': 'fin',
5075 'fj': 'fij',
5076 'fo': 'fao',
5077 'fr': 'fra',
5078 'fy': 'fry',
5079 'ga': 'gle',
5080 'gd': 'gla',
5081 'gl': 'glg',
5082 'gn': 'grn',
5083 'gu': 'guj',
5084 'gv': 'glv',
5085 'ha': 'hau',
5086 'he': 'heb',
b7acc835 5087 'iw': 'heb', # Replaced by he in 1989 revision
39672624
YCH
5088 'hi': 'hin',
5089 'ho': 'hmo',
5090 'hr': 'hrv',
5091 'ht': 'hat',
5092 'hu': 'hun',
5093 'hy': 'hye',
5094 'hz': 'her',
5095 'ia': 'ina',
5096 'id': 'ind',
b7acc835 5097 'in': 'ind', # Replaced by id in 1989 revision
39672624
YCH
5098 'ie': 'ile',
5099 'ig': 'ibo',
5100 'ii': 'iii',
5101 'ik': 'ipk',
5102 'io': 'ido',
5103 'is': 'isl',
5104 'it': 'ita',
5105 'iu': 'iku',
5106 'ja': 'jpn',
5107 'jv': 'jav',
5108 'ka': 'kat',
5109 'kg': 'kon',
5110 'ki': 'kik',
5111 'kj': 'kua',
5112 'kk': 'kaz',
5113 'kl': 'kal',
5114 'km': 'khm',
5115 'kn': 'kan',
5116 'ko': 'kor',
5117 'kr': 'kau',
5118 'ks': 'kas',
5119 'ku': 'kur',
5120 'kv': 'kom',
5121 'kw': 'cor',
5122 'ky': 'kir',
5123 'la': 'lat',
5124 'lb': 'ltz',
5125 'lg': 'lug',
5126 'li': 'lim',
5127 'ln': 'lin',
5128 'lo': 'lao',
5129 'lt': 'lit',
5130 'lu': 'lub',
5131 'lv': 'lav',
5132 'mg': 'mlg',
5133 'mh': 'mah',
5134 'mi': 'mri',
5135 'mk': 'mkd',
5136 'ml': 'mal',
5137 'mn': 'mon',
5138 'mr': 'mar',
5139 'ms': 'msa',
5140 'mt': 'mlt',
5141 'my': 'mya',
5142 'na': 'nau',
5143 'nb': 'nob',
5144 'nd': 'nde',
5145 'ne': 'nep',
5146 'ng': 'ndo',
5147 'nl': 'nld',
5148 'nn': 'nno',
5149 'no': 'nor',
5150 'nr': 'nbl',
5151 'nv': 'nav',
5152 'ny': 'nya',
5153 'oc': 'oci',
5154 'oj': 'oji',
5155 'om': 'orm',
5156 'or': 'ori',
5157 'os': 'oss',
5158 'pa': 'pan',
5159 'pi': 'pli',
5160 'pl': 'pol',
5161 'ps': 'pus',
5162 'pt': 'por',
5163 'qu': 'que',
5164 'rm': 'roh',
5165 'rn': 'run',
5166 'ro': 'ron',
5167 'ru': 'rus',
5168 'rw': 'kin',
5169 'sa': 'san',
5170 'sc': 'srd',
5171 'sd': 'snd',
5172 'se': 'sme',
5173 'sg': 'sag',
5174 'si': 'sin',
5175 'sk': 'slk',
5176 'sl': 'slv',
5177 'sm': 'smo',
5178 'sn': 'sna',
5179 'so': 'som',
5180 'sq': 'sqi',
5181 'sr': 'srp',
5182 'ss': 'ssw',
5183 'st': 'sot',
5184 'su': 'sun',
5185 'sv': 'swe',
5186 'sw': 'swa',
5187 'ta': 'tam',
5188 'te': 'tel',
5189 'tg': 'tgk',
5190 'th': 'tha',
5191 'ti': 'tir',
5192 'tk': 'tuk',
5193 'tl': 'tgl',
5194 'tn': 'tsn',
5195 'to': 'ton',
5196 'tr': 'tur',
5197 'ts': 'tso',
5198 'tt': 'tat',
5199 'tw': 'twi',
5200 'ty': 'tah',
5201 'ug': 'uig',
5202 'uk': 'ukr',
5203 'ur': 'urd',
5204 'uz': 'uzb',
5205 've': 'ven',
5206 'vi': 'vie',
5207 'vo': 'vol',
5208 'wa': 'wln',
5209 'wo': 'wol',
5210 'xh': 'xho',
5211 'yi': 'yid',
e9a50fba 5212 'ji': 'yid', # Replaced by yi in 1989 revision
39672624
YCH
5213 'yo': 'yor',
5214 'za': 'zha',
5215 'zh': 'zho',
5216 'zu': 'zul',
5217 }
5218
5219 @classmethod
5220 def short2long(cls, code):
5221 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5222 return cls._lang_map.get(code[:2])
5223
5224 @classmethod
5225 def long2short(cls, code):
5226 """Convert language code from ISO 639-2/T to ISO 639-1"""
5227 for short_name, long_name in cls._lang_map.items():
5228 if long_name == code:
5229 return short_name
5230
5231
4eb10f66
YCH
5232class ISO3166Utils(object):
5233 # From http://data.okfn.org/data/core/country-list
5234 _country_map = {
5235 'AF': 'Afghanistan',
5236 'AX': 'Åland Islands',
5237 'AL': 'Albania',
5238 'DZ': 'Algeria',
5239 'AS': 'American Samoa',
5240 'AD': 'Andorra',
5241 'AO': 'Angola',
5242 'AI': 'Anguilla',
5243 'AQ': 'Antarctica',
5244 'AG': 'Antigua and Barbuda',
5245 'AR': 'Argentina',
5246 'AM': 'Armenia',
5247 'AW': 'Aruba',
5248 'AU': 'Australia',
5249 'AT': 'Austria',
5250 'AZ': 'Azerbaijan',
5251 'BS': 'Bahamas',
5252 'BH': 'Bahrain',
5253 'BD': 'Bangladesh',
5254 'BB': 'Barbados',
5255 'BY': 'Belarus',
5256 'BE': 'Belgium',
5257 'BZ': 'Belize',
5258 'BJ': 'Benin',
5259 'BM': 'Bermuda',
5260 'BT': 'Bhutan',
5261 'BO': 'Bolivia, Plurinational State of',
5262 'BQ': 'Bonaire, Sint Eustatius and Saba',
5263 'BA': 'Bosnia and Herzegovina',
5264 'BW': 'Botswana',
5265 'BV': 'Bouvet Island',
5266 'BR': 'Brazil',
5267 'IO': 'British Indian Ocean Territory',
5268 'BN': 'Brunei Darussalam',
5269 'BG': 'Bulgaria',
5270 'BF': 'Burkina Faso',
5271 'BI': 'Burundi',
5272 'KH': 'Cambodia',
5273 'CM': 'Cameroon',
5274 'CA': 'Canada',
5275 'CV': 'Cape Verde',
5276 'KY': 'Cayman Islands',
5277 'CF': 'Central African Republic',
5278 'TD': 'Chad',
5279 'CL': 'Chile',
5280 'CN': 'China',
5281 'CX': 'Christmas Island',
5282 'CC': 'Cocos (Keeling) Islands',
5283 'CO': 'Colombia',
5284 'KM': 'Comoros',
5285 'CG': 'Congo',
5286 'CD': 'Congo, the Democratic Republic of the',
5287 'CK': 'Cook Islands',
5288 'CR': 'Costa Rica',
5289 'CI': 'Côte d\'Ivoire',
5290 'HR': 'Croatia',
5291 'CU': 'Cuba',
5292 'CW': 'Curaçao',
5293 'CY': 'Cyprus',
5294 'CZ': 'Czech Republic',
5295 'DK': 'Denmark',
5296 'DJ': 'Djibouti',
5297 'DM': 'Dominica',
5298 'DO': 'Dominican Republic',
5299 'EC': 'Ecuador',
5300 'EG': 'Egypt',
5301 'SV': 'El Salvador',
5302 'GQ': 'Equatorial Guinea',
5303 'ER': 'Eritrea',
5304 'EE': 'Estonia',
5305 'ET': 'Ethiopia',
5306 'FK': 'Falkland Islands (Malvinas)',
5307 'FO': 'Faroe Islands',
5308 'FJ': 'Fiji',
5309 'FI': 'Finland',
5310 'FR': 'France',
5311 'GF': 'French Guiana',
5312 'PF': 'French Polynesia',
5313 'TF': 'French Southern Territories',
5314 'GA': 'Gabon',
5315 'GM': 'Gambia',
5316 'GE': 'Georgia',
5317 'DE': 'Germany',
5318 'GH': 'Ghana',
5319 'GI': 'Gibraltar',
5320 'GR': 'Greece',
5321 'GL': 'Greenland',
5322 'GD': 'Grenada',
5323 'GP': 'Guadeloupe',
5324 'GU': 'Guam',
5325 'GT': 'Guatemala',
5326 'GG': 'Guernsey',
5327 'GN': 'Guinea',
5328 'GW': 'Guinea-Bissau',
5329 'GY': 'Guyana',
5330 'HT': 'Haiti',
5331 'HM': 'Heard Island and McDonald Islands',
5332 'VA': 'Holy See (Vatican City State)',
5333 'HN': 'Honduras',
5334 'HK': 'Hong Kong',
5335 'HU': 'Hungary',
5336 'IS': 'Iceland',
5337 'IN': 'India',
5338 'ID': 'Indonesia',
5339 'IR': 'Iran, Islamic Republic of',
5340 'IQ': 'Iraq',
5341 'IE': 'Ireland',
5342 'IM': 'Isle of Man',
5343 'IL': 'Israel',
5344 'IT': 'Italy',
5345 'JM': 'Jamaica',
5346 'JP': 'Japan',
5347 'JE': 'Jersey',
5348 'JO': 'Jordan',
5349 'KZ': 'Kazakhstan',
5350 'KE': 'Kenya',
5351 'KI': 'Kiribati',
5352 'KP': 'Korea, Democratic People\'s Republic of',
5353 'KR': 'Korea, Republic of',
5354 'KW': 'Kuwait',
5355 'KG': 'Kyrgyzstan',
5356 'LA': 'Lao People\'s Democratic Republic',
5357 'LV': 'Latvia',
5358 'LB': 'Lebanon',
5359 'LS': 'Lesotho',
5360 'LR': 'Liberia',
5361 'LY': 'Libya',
5362 'LI': 'Liechtenstein',
5363 'LT': 'Lithuania',
5364 'LU': 'Luxembourg',
5365 'MO': 'Macao',
5366 'MK': 'Macedonia, the Former Yugoslav Republic of',
5367 'MG': 'Madagascar',
5368 'MW': 'Malawi',
5369 'MY': 'Malaysia',
5370 'MV': 'Maldives',
5371 'ML': 'Mali',
5372 'MT': 'Malta',
5373 'MH': 'Marshall Islands',
5374 'MQ': 'Martinique',
5375 'MR': 'Mauritania',
5376 'MU': 'Mauritius',
5377 'YT': 'Mayotte',
5378 'MX': 'Mexico',
5379 'FM': 'Micronesia, Federated States of',
5380 'MD': 'Moldova, Republic of',
5381 'MC': 'Monaco',
5382 'MN': 'Mongolia',
5383 'ME': 'Montenegro',
5384 'MS': 'Montserrat',
5385 'MA': 'Morocco',
5386 'MZ': 'Mozambique',
5387 'MM': 'Myanmar',
5388 'NA': 'Namibia',
5389 'NR': 'Nauru',
5390 'NP': 'Nepal',
5391 'NL': 'Netherlands',
5392 'NC': 'New Caledonia',
5393 'NZ': 'New Zealand',
5394 'NI': 'Nicaragua',
5395 'NE': 'Niger',
5396 'NG': 'Nigeria',
5397 'NU': 'Niue',
5398 'NF': 'Norfolk Island',
5399 'MP': 'Northern Mariana Islands',
5400 'NO': 'Norway',
5401 'OM': 'Oman',
5402 'PK': 'Pakistan',
5403 'PW': 'Palau',
5404 'PS': 'Palestine, State of',
5405 'PA': 'Panama',
5406 'PG': 'Papua New Guinea',
5407 'PY': 'Paraguay',
5408 'PE': 'Peru',
5409 'PH': 'Philippines',
5410 'PN': 'Pitcairn',
5411 'PL': 'Poland',
5412 'PT': 'Portugal',
5413 'PR': 'Puerto Rico',
5414 'QA': 'Qatar',
5415 'RE': 'Réunion',
5416 'RO': 'Romania',
5417 'RU': 'Russian Federation',
5418 'RW': 'Rwanda',
5419 'BL': 'Saint Barthélemy',
5420 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5421 'KN': 'Saint Kitts and Nevis',
5422 'LC': 'Saint Lucia',
5423 'MF': 'Saint Martin (French part)',
5424 'PM': 'Saint Pierre and Miquelon',
5425 'VC': 'Saint Vincent and the Grenadines',
5426 'WS': 'Samoa',
5427 'SM': 'San Marino',
5428 'ST': 'Sao Tome and Principe',
5429 'SA': 'Saudi Arabia',
5430 'SN': 'Senegal',
5431 'RS': 'Serbia',
5432 'SC': 'Seychelles',
5433 'SL': 'Sierra Leone',
5434 'SG': 'Singapore',
5435 'SX': 'Sint Maarten (Dutch part)',
5436 'SK': 'Slovakia',
5437 'SI': 'Slovenia',
5438 'SB': 'Solomon Islands',
5439 'SO': 'Somalia',
5440 'ZA': 'South Africa',
5441 'GS': 'South Georgia and the South Sandwich Islands',
5442 'SS': 'South Sudan',
5443 'ES': 'Spain',
5444 'LK': 'Sri Lanka',
5445 'SD': 'Sudan',
5446 'SR': 'Suriname',
5447 'SJ': 'Svalbard and Jan Mayen',
5448 'SZ': 'Swaziland',
5449 'SE': 'Sweden',
5450 'CH': 'Switzerland',
5451 'SY': 'Syrian Arab Republic',
5452 'TW': 'Taiwan, Province of China',
5453 'TJ': 'Tajikistan',
5454 'TZ': 'Tanzania, United Republic of',
5455 'TH': 'Thailand',
5456 'TL': 'Timor-Leste',
5457 'TG': 'Togo',
5458 'TK': 'Tokelau',
5459 'TO': 'Tonga',
5460 'TT': 'Trinidad and Tobago',
5461 'TN': 'Tunisia',
5462 'TR': 'Turkey',
5463 'TM': 'Turkmenistan',
5464 'TC': 'Turks and Caicos Islands',
5465 'TV': 'Tuvalu',
5466 'UG': 'Uganda',
5467 'UA': 'Ukraine',
5468 'AE': 'United Arab Emirates',
5469 'GB': 'United Kingdom',
5470 'US': 'United States',
5471 'UM': 'United States Minor Outlying Islands',
5472 'UY': 'Uruguay',
5473 'UZ': 'Uzbekistan',
5474 'VU': 'Vanuatu',
5475 'VE': 'Venezuela, Bolivarian Republic of',
5476 'VN': 'Viet Nam',
5477 'VG': 'Virgin Islands, British',
5478 'VI': 'Virgin Islands, U.S.',
5479 'WF': 'Wallis and Futuna',
5480 'EH': 'Western Sahara',
5481 'YE': 'Yemen',
5482 'ZM': 'Zambia',
5483 'ZW': 'Zimbabwe',
5484 }
5485
5486 @classmethod
5487 def short2full(cls, code):
5488 """Convert an ISO 3166-2 country code to the corresponding full name"""
5489 return cls._country_map.get(code.upper())
5490
5491
773f291d
S
5492class GeoUtils(object):
5493 # Major IPv4 address blocks per country
5494 _country_ip_map = {
53896ca5 5495 'AD': '46.172.224.0/19',
773f291d
S
5496 'AE': '94.200.0.0/13',
5497 'AF': '149.54.0.0/17',
5498 'AG': '209.59.64.0/18',
5499 'AI': '204.14.248.0/21',
5500 'AL': '46.99.0.0/16',
5501 'AM': '46.70.0.0/15',
5502 'AO': '105.168.0.0/13',
53896ca5
S
5503 'AP': '182.50.184.0/21',
5504 'AQ': '23.154.160.0/24',
773f291d
S
5505 'AR': '181.0.0.0/12',
5506 'AS': '202.70.112.0/20',
53896ca5 5507 'AT': '77.116.0.0/14',
773f291d
S
5508 'AU': '1.128.0.0/11',
5509 'AW': '181.41.0.0/18',
53896ca5
S
5510 'AX': '185.217.4.0/22',
5511 'AZ': '5.197.0.0/16',
773f291d
S
5512 'BA': '31.176.128.0/17',
5513 'BB': '65.48.128.0/17',
5514 'BD': '114.130.0.0/16',
5515 'BE': '57.0.0.0/8',
53896ca5 5516 'BF': '102.178.0.0/15',
773f291d
S
5517 'BG': '95.42.0.0/15',
5518 'BH': '37.131.0.0/17',
5519 'BI': '154.117.192.0/18',
5520 'BJ': '137.255.0.0/16',
53896ca5 5521 'BL': '185.212.72.0/23',
773f291d
S
5522 'BM': '196.12.64.0/18',
5523 'BN': '156.31.0.0/16',
5524 'BO': '161.56.0.0/16',
5525 'BQ': '161.0.80.0/20',
53896ca5 5526 'BR': '191.128.0.0/12',
773f291d
S
5527 'BS': '24.51.64.0/18',
5528 'BT': '119.2.96.0/19',
5529 'BW': '168.167.0.0/16',
5530 'BY': '178.120.0.0/13',
5531 'BZ': '179.42.192.0/18',
5532 'CA': '99.224.0.0/11',
5533 'CD': '41.243.0.0/16',
53896ca5
S
5534 'CF': '197.242.176.0/21',
5535 'CG': '160.113.0.0/16',
773f291d 5536 'CH': '85.0.0.0/13',
53896ca5 5537 'CI': '102.136.0.0/14',
773f291d
S
5538 'CK': '202.65.32.0/19',
5539 'CL': '152.172.0.0/14',
53896ca5 5540 'CM': '102.244.0.0/14',
773f291d
S
5541 'CN': '36.128.0.0/10',
5542 'CO': '181.240.0.0/12',
5543 'CR': '201.192.0.0/12',
5544 'CU': '152.206.0.0/15',
5545 'CV': '165.90.96.0/19',
5546 'CW': '190.88.128.0/17',
53896ca5 5547 'CY': '31.153.0.0/16',
773f291d
S
5548 'CZ': '88.100.0.0/14',
5549 'DE': '53.0.0.0/8',
5550 'DJ': '197.241.0.0/17',
5551 'DK': '87.48.0.0/12',
5552 'DM': '192.243.48.0/20',
5553 'DO': '152.166.0.0/15',
5554 'DZ': '41.96.0.0/12',
5555 'EC': '186.68.0.0/15',
5556 'EE': '90.190.0.0/15',
5557 'EG': '156.160.0.0/11',
5558 'ER': '196.200.96.0/20',
5559 'ES': '88.0.0.0/11',
5560 'ET': '196.188.0.0/14',
5561 'EU': '2.16.0.0/13',
5562 'FI': '91.152.0.0/13',
5563 'FJ': '144.120.0.0/16',
53896ca5 5564 'FK': '80.73.208.0/21',
773f291d
S
5565 'FM': '119.252.112.0/20',
5566 'FO': '88.85.32.0/19',
5567 'FR': '90.0.0.0/9',
5568 'GA': '41.158.0.0/15',
5569 'GB': '25.0.0.0/8',
5570 'GD': '74.122.88.0/21',
5571 'GE': '31.146.0.0/16',
5572 'GF': '161.22.64.0/18',
5573 'GG': '62.68.160.0/19',
53896ca5
S
5574 'GH': '154.160.0.0/12',
5575 'GI': '95.164.0.0/16',
773f291d
S
5576 'GL': '88.83.0.0/19',
5577 'GM': '160.182.0.0/15',
5578 'GN': '197.149.192.0/18',
5579 'GP': '104.250.0.0/19',
5580 'GQ': '105.235.224.0/20',
5581 'GR': '94.64.0.0/13',
5582 'GT': '168.234.0.0/16',
5583 'GU': '168.123.0.0/16',
5584 'GW': '197.214.80.0/20',
5585 'GY': '181.41.64.0/18',
5586 'HK': '113.252.0.0/14',
5587 'HN': '181.210.0.0/16',
5588 'HR': '93.136.0.0/13',
5589 'HT': '148.102.128.0/17',
5590 'HU': '84.0.0.0/14',
5591 'ID': '39.192.0.0/10',
5592 'IE': '87.32.0.0/12',
5593 'IL': '79.176.0.0/13',
5594 'IM': '5.62.80.0/20',
5595 'IN': '117.192.0.0/10',
5596 'IO': '203.83.48.0/21',
5597 'IQ': '37.236.0.0/14',
5598 'IR': '2.176.0.0/12',
5599 'IS': '82.221.0.0/16',
5600 'IT': '79.0.0.0/10',
5601 'JE': '87.244.64.0/18',
5602 'JM': '72.27.0.0/17',
5603 'JO': '176.29.0.0/16',
53896ca5 5604 'JP': '133.0.0.0/8',
773f291d
S
5605 'KE': '105.48.0.0/12',
5606 'KG': '158.181.128.0/17',
5607 'KH': '36.37.128.0/17',
5608 'KI': '103.25.140.0/22',
5609 'KM': '197.255.224.0/20',
53896ca5 5610 'KN': '198.167.192.0/19',
773f291d
S
5611 'KP': '175.45.176.0/22',
5612 'KR': '175.192.0.0/10',
5613 'KW': '37.36.0.0/14',
5614 'KY': '64.96.0.0/15',
5615 'KZ': '2.72.0.0/13',
5616 'LA': '115.84.64.0/18',
5617 'LB': '178.135.0.0/16',
53896ca5 5618 'LC': '24.92.144.0/20',
773f291d
S
5619 'LI': '82.117.0.0/19',
5620 'LK': '112.134.0.0/15',
53896ca5 5621 'LR': '102.183.0.0/16',
773f291d
S
5622 'LS': '129.232.0.0/17',
5623 'LT': '78.56.0.0/13',
5624 'LU': '188.42.0.0/16',
5625 'LV': '46.109.0.0/16',
5626 'LY': '41.252.0.0/14',
5627 'MA': '105.128.0.0/11',
5628 'MC': '88.209.64.0/18',
5629 'MD': '37.246.0.0/16',
5630 'ME': '178.175.0.0/17',
5631 'MF': '74.112.232.0/21',
5632 'MG': '154.126.0.0/17',
5633 'MH': '117.103.88.0/21',
5634 'MK': '77.28.0.0/15',
5635 'ML': '154.118.128.0/18',
5636 'MM': '37.111.0.0/17',
5637 'MN': '49.0.128.0/17',
5638 'MO': '60.246.0.0/16',
5639 'MP': '202.88.64.0/20',
5640 'MQ': '109.203.224.0/19',
5641 'MR': '41.188.64.0/18',
5642 'MS': '208.90.112.0/22',
5643 'MT': '46.11.0.0/16',
5644 'MU': '105.16.0.0/12',
5645 'MV': '27.114.128.0/18',
53896ca5 5646 'MW': '102.70.0.0/15',
773f291d
S
5647 'MX': '187.192.0.0/11',
5648 'MY': '175.136.0.0/13',
5649 'MZ': '197.218.0.0/15',
5650 'NA': '41.182.0.0/16',
5651 'NC': '101.101.0.0/18',
5652 'NE': '197.214.0.0/18',
5653 'NF': '203.17.240.0/22',
5654 'NG': '105.112.0.0/12',
5655 'NI': '186.76.0.0/15',
5656 'NL': '145.96.0.0/11',
5657 'NO': '84.208.0.0/13',
5658 'NP': '36.252.0.0/15',
5659 'NR': '203.98.224.0/19',
5660 'NU': '49.156.48.0/22',
5661 'NZ': '49.224.0.0/14',
5662 'OM': '5.36.0.0/15',
5663 'PA': '186.72.0.0/15',
5664 'PE': '186.160.0.0/14',
5665 'PF': '123.50.64.0/18',
5666 'PG': '124.240.192.0/19',
5667 'PH': '49.144.0.0/13',
5668 'PK': '39.32.0.0/11',
5669 'PL': '83.0.0.0/11',
5670 'PM': '70.36.0.0/20',
5671 'PR': '66.50.0.0/16',
5672 'PS': '188.161.0.0/16',
5673 'PT': '85.240.0.0/13',
5674 'PW': '202.124.224.0/20',
5675 'PY': '181.120.0.0/14',
5676 'QA': '37.210.0.0/15',
53896ca5 5677 'RE': '102.35.0.0/16',
773f291d 5678 'RO': '79.112.0.0/13',
53896ca5 5679 'RS': '93.86.0.0/15',
773f291d 5680 'RU': '5.136.0.0/13',
53896ca5 5681 'RW': '41.186.0.0/16',
773f291d
S
5682 'SA': '188.48.0.0/13',
5683 'SB': '202.1.160.0/19',
5684 'SC': '154.192.0.0/11',
53896ca5 5685 'SD': '102.120.0.0/13',
773f291d 5686 'SE': '78.64.0.0/12',
53896ca5 5687 'SG': '8.128.0.0/10',
773f291d
S
5688 'SI': '188.196.0.0/14',
5689 'SK': '78.98.0.0/15',
53896ca5 5690 'SL': '102.143.0.0/17',
773f291d
S
5691 'SM': '89.186.32.0/19',
5692 'SN': '41.82.0.0/15',
53896ca5 5693 'SO': '154.115.192.0/18',
773f291d
S
5694 'SR': '186.179.128.0/17',
5695 'SS': '105.235.208.0/21',
5696 'ST': '197.159.160.0/19',
5697 'SV': '168.243.0.0/16',
5698 'SX': '190.102.0.0/20',
5699 'SY': '5.0.0.0/16',
5700 'SZ': '41.84.224.0/19',
5701 'TC': '65.255.48.0/20',
5702 'TD': '154.68.128.0/19',
5703 'TG': '196.168.0.0/14',
5704 'TH': '171.96.0.0/13',
5705 'TJ': '85.9.128.0/18',
5706 'TK': '27.96.24.0/21',
5707 'TL': '180.189.160.0/20',
5708 'TM': '95.85.96.0/19',
5709 'TN': '197.0.0.0/11',
5710 'TO': '175.176.144.0/21',
5711 'TR': '78.160.0.0/11',
5712 'TT': '186.44.0.0/15',
5713 'TV': '202.2.96.0/19',
5714 'TW': '120.96.0.0/11',
5715 'TZ': '156.156.0.0/14',
53896ca5
S
5716 'UA': '37.52.0.0/14',
5717 'UG': '102.80.0.0/13',
5718 'US': '6.0.0.0/8',
773f291d 5719 'UY': '167.56.0.0/13',
53896ca5 5720 'UZ': '84.54.64.0/18',
773f291d 5721 'VA': '212.77.0.0/19',
53896ca5 5722 'VC': '207.191.240.0/21',
773f291d 5723 'VE': '186.88.0.0/13',
53896ca5 5724 'VG': '66.81.192.0/20',
773f291d
S
5725 'VI': '146.226.0.0/16',
5726 'VN': '14.160.0.0/11',
5727 'VU': '202.80.32.0/20',
5728 'WF': '117.20.32.0/21',
5729 'WS': '202.4.32.0/19',
5730 'YE': '134.35.0.0/16',
5731 'YT': '41.242.116.0/22',
5732 'ZA': '41.0.0.0/11',
53896ca5
S
5733 'ZM': '102.144.0.0/13',
5734 'ZW': '102.177.192.0/18',
773f291d
S
5735 }
5736
5737 @classmethod
5f95927a
S
5738 def random_ipv4(cls, code_or_block):
5739 if len(code_or_block) == 2:
5740 block = cls._country_ip_map.get(code_or_block.upper())
5741 if not block:
5742 return None
5743 else:
5744 block = code_or_block
773f291d
S
5745 addr, preflen = block.split('/')
5746 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5747 addr_max = addr_min | (0xffffffff >> int(preflen))
18a0defa 5748 return compat_str(socket.inet_ntoa(
4248dad9 5749 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
773f291d
S
5750
5751
91410c9b 5752class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
5753 def __init__(self, proxies=None):
5754 # Set default handlers
5755 for type in ('http', 'https'):
5756 setattr(self, '%s_open' % type,
5757 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5758 meth(r, proxy, type))
38e87f6c 5759 compat_urllib_request.ProxyHandler.__init__(self, proxies)
2461f79d 5760
91410c9b 5761 def proxy_open(self, req, proxy, type):
2461f79d 5762 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
5763 if req_proxy is not None:
5764 proxy = req_proxy
2461f79d
PH
5765 del req.headers['Ytdl-request-proxy']
5766
5767 if proxy == '__noproxy__':
5768 return None # No Proxy
51fb4995 5769 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
71aff188 5770 req.add_header('Ytdl-socks-proxy', proxy)
7a5c1cfe 5771 # yt-dlp's http/https handlers do wrapping the socket with socks
71aff188 5772 return None
91410c9b
PH
5773 return compat_urllib_request.ProxyHandler.proxy_open(
5774 self, req, proxy, type)
5bc880b9
YCH
5775
5776
0a5445dd
YCH
5777# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5778# released into Public Domain
5779# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5780
5781def long_to_bytes(n, blocksize=0):
5782 """long_to_bytes(n:long, blocksize:int) : string
5783 Convert a long integer to a byte string.
5784
5785 If optional blocksize is given and greater than zero, pad the front of the
5786 byte string with binary zeros so that the length is a multiple of
5787 blocksize.
5788 """
5789 # after much testing, this algorithm was deemed to be the fastest
5790 s = b''
5791 n = int(n)
5792 while n > 0:
5793 s = compat_struct_pack('>I', n & 0xffffffff) + s
5794 n = n >> 32
5795 # strip off leading zeros
5796 for i in range(len(s)):
5797 if s[i] != b'\000'[0]:
5798 break
5799 else:
5800 # only happens when n == 0
5801 s = b'\000'
5802 i = 0
5803 s = s[i:]
5804 # add back some pad bytes. this could be done more efficiently w.r.t. the
5805 # de-padding being done above, but sigh...
5806 if blocksize > 0 and len(s) % blocksize:
5807 s = (blocksize - len(s) % blocksize) * b'\000' + s
5808 return s
5809
5810
5811def bytes_to_long(s):
5812 """bytes_to_long(string) : long
5813 Convert a byte string to a long integer.
5814
5815 This is (essentially) the inverse of long_to_bytes().
5816 """
5817 acc = 0
5818 length = len(s)
5819 if length % 4:
5820 extra = (4 - length % 4)
5821 s = b'\000' * extra + s
5822 length = length + extra
5823 for i in range(0, length, 4):
5824 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5825 return acc
5826
5827
5bc880b9
YCH
5828def ohdave_rsa_encrypt(data, exponent, modulus):
5829 '''
5830 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5831
5832 Input:
5833 data: data to encrypt, bytes-like object
5834 exponent, modulus: parameter e and N of RSA algorithm, both integer
5835 Output: hex string of encrypted data
5836
5837 Limitation: supports one block encryption only
5838 '''
5839
5840 payload = int(binascii.hexlify(data[::-1]), 16)
5841 encrypted = pow(payload, exponent, modulus)
5842 return '%x' % encrypted
81bdc8fd
YCH
5843
5844
f48409c7
YCH
5845def pkcs1pad(data, length):
5846 """
5847 Padding input data with PKCS#1 scheme
5848
5849 @param {int[]} data input data
5850 @param {int} length target length
5851 @returns {int[]} padded data
5852 """
5853 if len(data) > length - 11:
5854 raise ValueError('Input data too long for PKCS#1 padding')
5855
5856 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5857 return [0, 2] + pseudo_random + [0] + data
5858
5859
5eb6bdce 5860def encode_base_n(num, n, table=None):
59f898b7 5861 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
59f898b7
YCH
5862 if not table:
5863 table = FULL_TABLE[:n]
5864
5eb6bdce
YCH
5865 if n > len(table):
5866 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5867
5868 if num == 0:
5869 return table[0]
5870
81bdc8fd
YCH
5871 ret = ''
5872 while num:
5873 ret = table[num % n] + ret
5874 num = num // n
5875 return ret
f52354a8
YCH
5876
5877
5878def decode_packed_codes(code):
06b3fe29 5879 mobj = re.search(PACKED_CODES_RE, code)
a0566bbf 5880 obfuscated_code, base, count, symbols = mobj.groups()
f52354a8
YCH
5881 base = int(base)
5882 count = int(count)
5883 symbols = symbols.split('|')
5884 symbol_table = {}
5885
5886 while count:
5887 count -= 1
5eb6bdce 5888 base_n_count = encode_base_n(count, base)
f52354a8
YCH
5889 symbol_table[base_n_count] = symbols[count] or base_n_count
5890
5891 return re.sub(
5892 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
a0566bbf 5893 obfuscated_code)
e154c651 5894
5895
1ced2221
S
5896def caesar(s, alphabet, shift):
5897 if shift == 0:
5898 return s
5899 l = len(alphabet)
5900 return ''.join(
5901 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5902 for c in s)
5903
5904
5905def rot47(s):
5906 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5907
5908
e154c651 5909def parse_m3u8_attributes(attrib):
5910 info = {}
5911 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5912 if val.startswith('"'):
5913 val = val[1:-1]
5914 info[key] = val
5915 return info
1143535d
YCH
5916
5917
5918def urshift(val, n):
5919 return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038
YCH
5920
5921
5922# Based on png2str() written by @gdkchan and improved by @yokrysty
067aa17e 5923# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
d3f8e038
YCH
5924def decode_png(png_data):
5925 # Reference: https://www.w3.org/TR/PNG/
5926 header = png_data[8:]
5927
5928 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5929 raise IOError('Not a valid PNG file.')
5930
5931 int_map = {1: '>B', 2: '>H', 4: '>I'}
5932 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5933
5934 chunks = []
5935
5936 while header:
5937 length = unpack_integer(header[:4])
5938 header = header[4:]
5939
5940 chunk_type = header[:4]
5941 header = header[4:]
5942
5943 chunk_data = header[:length]
5944 header = header[length:]
5945
5946 header = header[4:] # Skip CRC
5947
5948 chunks.append({
5949 'type': chunk_type,
5950 'length': length,
5951 'data': chunk_data
5952 })
5953
5954 ihdr = chunks[0]['data']
5955
5956 width = unpack_integer(ihdr[:4])
5957 height = unpack_integer(ihdr[4:8])
5958
5959 idat = b''
5960
5961 for chunk in chunks:
5962 if chunk['type'] == b'IDAT':
5963 idat += chunk['data']
5964
5965 if not idat:
5966 raise IOError('Unable to read PNG data.')
5967
5968 decompressed_data = bytearray(zlib.decompress(idat))
5969
5970 stride = width * 3
5971 pixels = []
5972
5973 def _get_pixel(idx):
5974 x = idx % stride
5975 y = idx // stride
5976 return pixels[y][x]
5977
5978 for y in range(height):
5979 basePos = y * (1 + stride)
5980 filter_type = decompressed_data[basePos]
5981
5982 current_row = []
5983
5984 pixels.append(current_row)
5985
5986 for x in range(stride):
5987 color = decompressed_data[1 + basePos + x]
5988 basex = y * stride + x
5989 left = 0
5990 up = 0
5991
5992 if x > 2:
5993 left = _get_pixel(basex - 3)
5994 if y > 0:
5995 up = _get_pixel(basex - stride)
5996
5997 if filter_type == 1: # Sub
5998 color = (color + left) & 0xff
5999 elif filter_type == 2: # Up
6000 color = (color + up) & 0xff
6001 elif filter_type == 3: # Average
6002 color = (color + ((left + up) >> 1)) & 0xff
6003 elif filter_type == 4: # Paeth
6004 a = left
6005 b = up
6006 c = 0
6007
6008 if x > 2 and y > 0:
6009 c = _get_pixel(basex - stride - 3)
6010
6011 p = a + b - c
6012
6013 pa = abs(p - a)
6014 pb = abs(p - b)
6015 pc = abs(p - c)
6016
6017 if pa <= pb and pa <= pc:
6018 color = (color + a) & 0xff
6019 elif pb <= pc:
6020 color = (color + b) & 0xff
6021 else:
6022 color = (color + c) & 0xff
6023
6024 current_row.append(color)
6025
6026 return width, height, pixels
efa97bdc
YCH
6027
6028
6029def write_xattr(path, key, value):
6030 # This mess below finds the best xattr tool for the job
6031 try:
6032 # try the pyxattr module...
6033 import xattr
6034
53a7e3d2
YCH
6035 if hasattr(xattr, 'set'): # pyxattr
6036 # Unicode arguments are not supported in python-pyxattr until
6037 # version 0.5.0
067aa17e 6038 # See https://github.com/ytdl-org/youtube-dl/issues/5498
53a7e3d2
YCH
6039 pyxattr_required_version = '0.5.0'
6040 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
6041 # TODO: fallback to CLI tools
6042 raise XAttrUnavailableError(
6043 'python-pyxattr is detected but is too old. '
7a5c1cfe 6044 'yt-dlp requires %s or above while your version is %s. '
53a7e3d2
YCH
6045 'Falling back to other xattr implementations' % (
6046 pyxattr_required_version, xattr.__version__))
6047
6048 setxattr = xattr.set
6049 else: # xattr
6050 setxattr = xattr.setxattr
efa97bdc
YCH
6051
6052 try:
53a7e3d2 6053 setxattr(path, key, value)
efa97bdc
YCH
6054 except EnvironmentError as e:
6055 raise XAttrMetadataError(e.errno, e.strerror)
6056
6057 except ImportError:
6058 if compat_os_name == 'nt':
6059 # Write xattrs to NTFS Alternate Data Streams:
6060 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6061 assert ':' not in key
6062 assert os.path.exists(path)
6063
6064 ads_fn = path + ':' + key
6065 try:
6066 with open(ads_fn, 'wb') as f:
6067 f.write(value)
6068 except EnvironmentError as e:
6069 raise XAttrMetadataError(e.errno, e.strerror)
6070 else:
6071 user_has_setfattr = check_executable('setfattr', ['--version'])
6072 user_has_xattr = check_executable('xattr', ['-h'])
6073
6074 if user_has_setfattr or user_has_xattr:
6075
6076 value = value.decode('utf-8')
6077 if user_has_setfattr:
6078 executable = 'setfattr'
6079 opts = ['-n', key, '-v', value]
6080 elif user_has_xattr:
6081 executable = 'xattr'
6082 opts = ['-w', key, value]
6083
3089bc74
S
6084 cmd = ([encodeFilename(executable, True)]
6085 + [encodeArgument(o) for o in opts]
6086 + [encodeFilename(path, True)])
efa97bdc
YCH
6087
6088 try:
6089 p = subprocess.Popen(
6090 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6091 except EnvironmentError as e:
6092 raise XAttrMetadataError(e.errno, e.strerror)
f5b1bca9 6093 stdout, stderr = process_communicate_or_kill(p)
efa97bdc
YCH
6094 stderr = stderr.decode('utf-8', 'replace')
6095 if p.returncode != 0:
6096 raise XAttrMetadataError(p.returncode, stderr)
6097
6098 else:
6099 # On Unix, and can't find pyxattr, setfattr, or xattr.
6100 if sys.platform.startswith('linux'):
6101 raise XAttrUnavailableError(
6102 "Couldn't find a tool to set the xattrs. "
6103 "Install either the python 'pyxattr' or 'xattr' "
6104 "modules, or the GNU 'attr' package "
6105 "(which contains the 'setfattr' tool).")
6106 else:
6107 raise XAttrUnavailableError(
6108 "Couldn't find a tool to set the xattrs. "
6109 "Install either the python 'xattr' module, "
6110 "or the 'xattr' binary.")
0c265486
YCH
6111
6112
6113def random_birthday(year_field, month_field, day_field):
aa374bc7
AS
6114 start_date = datetime.date(1950, 1, 1)
6115 end_date = datetime.date(1995, 12, 31)
6116 offset = random.randint(0, (end_date - start_date).days)
6117 random_date = start_date + datetime.timedelta(offset)
0c265486 6118 return {
aa374bc7
AS
6119 year_field: str(random_date.year),
6120 month_field: str(random_date.month),
6121 day_field: str(random_date.day),
0c265486 6122 }
732044af 6123
c76eb41b 6124
732044af 6125# Templates for internet shortcut files, which are plain text files.
6126DOT_URL_LINK_TEMPLATE = '''
6127[InternetShortcut]
6128URL=%(url)s
6129'''.lstrip()
6130
6131DOT_WEBLOC_LINK_TEMPLATE = '''
6132<?xml version="1.0" encoding="UTF-8"?>
6133<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6134<plist version="1.0">
6135<dict>
6136\t<key>URL</key>
6137\t<string>%(url)s</string>
6138</dict>
6139</plist>
6140'''.lstrip()
6141
6142DOT_DESKTOP_LINK_TEMPLATE = '''
6143[Desktop Entry]
6144Encoding=UTF-8
6145Name=%(filename)s
6146Type=Link
6147URL=%(url)s
6148Icon=text-html
6149'''.lstrip()
6150
6151
6152def iri_to_uri(iri):
6153 """
6154 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6155
6156 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6157 """
6158
6159 iri_parts = compat_urllib_parse_urlparse(iri)
6160
6161 if '[' in iri_parts.netloc:
6162 raise ValueError('IPv6 URIs are not, yet, supported.')
6163 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6164
6165 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6166
6167 net_location = ''
6168 if iri_parts.username:
6169 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6170 if iri_parts.password is not None:
6171 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6172 net_location += '@'
6173
6174 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
6175 # The 'idna' encoding produces ASCII text.
6176 if iri_parts.port is not None and iri_parts.port != 80:
6177 net_location += ':' + str(iri_parts.port)
6178
6179 return compat_urllib_parse_urlunparse(
6180 (iri_parts.scheme,
6181 net_location,
6182
6183 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6184
6185 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6186 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6187
6188 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6189 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6190
6191 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6192
6193 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6194
6195
6196def to_high_limit_path(path):
6197 if sys.platform in ['win32', 'cygwin']:
6198 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6199 return r'\\?\ '.rstrip() + os.path.abspath(path)
6200
6201 return path
76d321f6 6202
c76eb41b 6203
b868936c 6204def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
6205 if field is None:
6206 val = obj if obj is not None else default
6207 else:
6208 val = obj.get(field, default)
76d321f6 6209 if func and val not in ignore:
6210 val = func(val)
6211 return template % val if val not in ignore else default
00dd0cd5 6212
6213
6214def clean_podcast_url(url):
6215 return re.sub(r'''(?x)
6216 (?:
6217 (?:
6218 chtbl\.com/track|
6219 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6220 play\.podtrac\.com
6221 )/[^/]+|
6222 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6223 flex\.acast\.com|
6224 pd(?:
6225 cn\.co| # https://podcorn.com/analytics-prefix/
6226 st\.fm # https://podsights.com/docs/
6227 )/e
6228 )/''', '', url)
ffcb8191
THD
6229
6230
6231_HEX_TABLE = '0123456789abcdef'
6232
6233
6234def random_uuidv4():
6235 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
0202b52a 6236
6237
6238def make_dir(path, to_screen=None):
6239 try:
6240 dn = os.path.dirname(path)
6241 if dn and not os.path.exists(dn):
6242 os.makedirs(dn)
6243 return True
6244 except (OSError, IOError) as err:
6245 if callable(to_screen) is not None:
6246 to_screen('unable to create directory ' + error_to_compat_str(err))
6247 return False
f74980cb 6248
6249
6250def get_executable_path():
c552ae88 6251 from zipimport import zipimporter
6252 if hasattr(sys, 'frozen'): # Running from PyInstaller
6253 path = os.path.dirname(sys.executable)
6254 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
6255 path = os.path.join(os.path.dirname(__file__), '../..')
6256 else:
6257 path = os.path.join(os.path.dirname(__file__), '..')
f74980cb 6258 return os.path.abspath(path)
6259
6260
2f567473 6261def load_plugins(name, suffix, namespace):
f74980cb 6262 plugin_info = [None]
6263 classes = []
6264 try:
6265 plugin_info = imp.find_module(
6266 name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
6267 plugins = imp.load_module(name, *plugin_info)
6268 for name in dir(plugins):
2f567473 6269 if name in namespace:
6270 continue
6271 if not name.endswith(suffix):
f74980cb 6272 continue
6273 klass = getattr(plugins, name)
6274 classes.append(klass)
6275 namespace[name] = klass
6276 except ImportError:
6277 pass
6278 finally:
6279 if plugin_info[0] is not None:
6280 plugin_info[0].close()
6281 return classes
06167fbb 6282
6283
325ebc17 6284def traverse_obj(
352d63fd 6285 obj, *path_list, default=None, expected_type=None, get_all=True,
325ebc17 6286 casesense=True, is_user_input=False, traverse_string=False):
324ad820 6287 ''' Traverse nested list/dict/tuple
8f334380 6288 @param path_list A list of paths which are checked one by one.
6289 Each path is a list of keys where each key is a string,
6290 a tuple of strings or "...". When a tuple is given,
6291 all the keys given in the tuple are traversed, and
6292 "..." traverses all the keys in the object
325ebc17 6293 @param default Default value to return
352d63fd 6294 @param expected_type Only accept final value of this type (Can also be any callable)
6295 @param get_all Return all the values obtained from a path or only the first one
324ad820 6296 @param casesense Whether to consider dictionary keys as case sensitive
6297 @param is_user_input Whether the keys are generated from user input. If True,
6298 strings are converted to int/slice if necessary
6299 @param traverse_string Whether to traverse inside strings. If True, any
6300 non-compatible object will also be converted into a string
8f334380 6301 # TODO: Write tests
324ad820 6302 '''
325ebc17 6303 if not casesense:
dbf5416a 6304 _lower = lambda k: (k.lower() if isinstance(k, str) else k)
8f334380 6305 path_list = (map(_lower, variadic(path)) for path in path_list)
6306
6307 def _traverse_obj(obj, path, _current_depth=0):
6308 nonlocal depth
575e17a1 6309 if obj is None:
6310 return None
8f334380 6311 path = tuple(variadic(path))
6312 for i, key in enumerate(path):
6313 if isinstance(key, (list, tuple)):
6314 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
6315 key = ...
6316 if key is ...:
6317 obj = (obj.values() if isinstance(obj, dict)
6318 else obj if isinstance(obj, (list, tuple, LazyList))
6319 else str(obj) if traverse_string else [])
6320 _current_depth += 1
6321 depth = max(depth, _current_depth)
6322 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
575e17a1 6323 elif isinstance(obj, dict) and not (is_user_input and key == ':'):
325ebc17 6324 obj = (obj.get(key) if casesense or (key in obj)
6325 else next((v for k, v in obj.items() if _lower(k) == key), None))
6326 else:
6327 if is_user_input:
6328 key = (int_or_none(key) if ':' not in key
6329 else slice(*map(int_or_none, key.split(':'))))
8f334380 6330 if key == slice(None):
575e17a1 6331 return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
325ebc17 6332 if not isinstance(key, (int, slice)):
9fea350f 6333 return None
8f334380 6334 if not isinstance(obj, (list, tuple, LazyList)):
325ebc17 6335 if not traverse_string:
6336 return None
6337 obj = str(obj)
6338 try:
6339 obj = obj[key]
6340 except IndexError:
324ad820 6341 return None
325ebc17 6342 return obj
6343
352d63fd 6344 if isinstance(expected_type, type):
6345 type_test = lambda val: val if isinstance(val, expected_type) else None
6346 elif expected_type is not None:
6347 type_test = expected_type
6348 else:
6349 type_test = lambda val: val
6350
8f334380 6351 for path in path_list:
6352 depth = 0
6353 val = _traverse_obj(obj, path)
325ebc17 6354 if val is not None:
8f334380 6355 if depth:
6356 for _ in range(depth - 1):
6586bca9 6357 val = itertools.chain.from_iterable(v for v in val if v is not None)
352d63fd 6358 val = [v for v in map(type_test, val) if v is not None]
8f334380 6359 if val:
352d63fd 6360 return val if get_all else val[0]
6361 else:
6362 val = type_test(val)
6363 if val is not None:
8f334380 6364 return val
325ebc17 6365 return default
324ad820 6366
6367
6368def traverse_dict(dictn, keys, casesense=True):
6369 ''' For backward compatibility. Do not use '''
6370 return traverse_obj(dictn, keys, casesense=casesense,
6371 is_user_input=True, traverse_string=True)
6606817a 6372
6373
c634ad2a 6374def variadic(x, allowed_types=(str, bytes)):
cb89cfc1 6375 return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
bd50a52b
THD
6376
6377
6378def get_windows_version():
6379 ''' Get Windows version. None if it's not running on Windows '''
6380 if compat_os_name == 'nt':
6381 return version_tuple(platform.win32_ver()[1])
6382 else:
6383 return None