]> jfr.im git - yt-dlp.git/blame - yt_dlp/utils.py
[videa] Fix some extraction errors (#1028)
[yt-dlp.git] / yt_dlp / utils.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
d77c3dfd 3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
c380cc28 10import collections
62e609ab 11import contextlib
e3946f98 12import ctypes
c496ca96
PH
13import datetime
14import email.utils
0c265486 15import email.header
f45c185f 16import errno
be4a824d 17import functools
d77c3dfd 18import gzip
f74980cb 19import imp
03f9daab 20import io
79a2e94e 21import itertools
f4bfd65f 22import json
d77c3dfd 23import locale
02dbf93f 24import math
347de493 25import operator
d77c3dfd 26import os
c496ca96 27import platform
773f291d 28import random
d77c3dfd 29import re
c496ca96 30import socket
79a2e94e 31import ssl
1c088fa8 32import subprocess
d77c3dfd 33import sys
181c8655 34import tempfile
c380cc28 35import time
01951dda 36import traceback
bcf89ce6 37import xml.etree.ElementTree
d77c3dfd 38import zlib
d77c3dfd 39
8c25f81b 40from .compat import (
b4a3d461 41 compat_HTMLParseError,
8bb56eee 42 compat_HTMLParser,
201c1459 43 compat_HTTPError,
8f9312c3 44 compat_basestring,
8c25f81b 45 compat_chr,
1bab3437 46 compat_cookiejar,
d7cd9a9e 47 compat_ctypes_WINFUNCTYPE,
36e6f62c 48 compat_etree_fromstring,
51098426 49 compat_expanduser,
8c25f81b 50 compat_html_entities,
55b2f099 51 compat_html_entities_html5,
be4a824d 52 compat_http_client,
42db58ec 53 compat_integer_types,
e29663c6 54 compat_numeric_types,
c86b6142 55 compat_kwargs,
efa97bdc 56 compat_os_name,
8c25f81b 57 compat_parse_qs,
702ccf2d 58 compat_shlex_quote,
8c25f81b 59 compat_str,
edaa23f8 60 compat_struct_pack,
d3f8e038 61 compat_struct_unpack,
8c25f81b
PH
62 compat_urllib_error,
63 compat_urllib_parse,
15707c7e 64 compat_urllib_parse_urlencode,
8c25f81b 65 compat_urllib_parse_urlparse,
732044af 66 compat_urllib_parse_urlunparse,
67 compat_urllib_parse_quote,
68 compat_urllib_parse_quote_plus,
7581bfc9 69 compat_urllib_parse_unquote_plus,
8c25f81b
PH
70 compat_urllib_request,
71 compat_urlparse,
810c10ba 72 compat_xpath,
8c25f81b 73)
4644ac55 74
71aff188
YCH
75from .socks import (
76 ProxyType,
77 sockssocket,
78)
79
4644ac55 80
51fb4995
YCH
81def register_socks_protocols():
82 # "Register" SOCKS protocols
d5ae6bb5
YCH
83 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
84 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
51fb4995
YCH
85 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
86 if scheme not in compat_urlparse.uses_netloc:
87 compat_urlparse.uses_netloc.append(scheme)
88
89
468e2e92
FV
90# This is not clearly defined otherwise
91compiled_regex_type = type(re.compile(''))
92
f7a147e3
S
93
94def random_user_agent():
95 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
96 _CHROME_VERSIONS = (
97 '74.0.3729.129',
98 '76.0.3780.3',
99 '76.0.3780.2',
100 '74.0.3729.128',
101 '76.0.3780.1',
102 '76.0.3780.0',
103 '75.0.3770.15',
104 '74.0.3729.127',
105 '74.0.3729.126',
106 '76.0.3779.1',
107 '76.0.3779.0',
108 '75.0.3770.14',
109 '74.0.3729.125',
110 '76.0.3778.1',
111 '76.0.3778.0',
112 '75.0.3770.13',
113 '74.0.3729.124',
114 '74.0.3729.123',
115 '73.0.3683.121',
116 '76.0.3777.1',
117 '76.0.3777.0',
118 '75.0.3770.12',
119 '74.0.3729.122',
120 '76.0.3776.4',
121 '75.0.3770.11',
122 '74.0.3729.121',
123 '76.0.3776.3',
124 '76.0.3776.2',
125 '73.0.3683.120',
126 '74.0.3729.120',
127 '74.0.3729.119',
128 '74.0.3729.118',
129 '76.0.3776.1',
130 '76.0.3776.0',
131 '76.0.3775.5',
132 '75.0.3770.10',
133 '74.0.3729.117',
134 '76.0.3775.4',
135 '76.0.3775.3',
136 '74.0.3729.116',
137 '75.0.3770.9',
138 '76.0.3775.2',
139 '76.0.3775.1',
140 '76.0.3775.0',
141 '75.0.3770.8',
142 '74.0.3729.115',
143 '74.0.3729.114',
144 '76.0.3774.1',
145 '76.0.3774.0',
146 '75.0.3770.7',
147 '74.0.3729.113',
148 '74.0.3729.112',
149 '74.0.3729.111',
150 '76.0.3773.1',
151 '76.0.3773.0',
152 '75.0.3770.6',
153 '74.0.3729.110',
154 '74.0.3729.109',
155 '76.0.3772.1',
156 '76.0.3772.0',
157 '75.0.3770.5',
158 '74.0.3729.108',
159 '74.0.3729.107',
160 '76.0.3771.1',
161 '76.0.3771.0',
162 '75.0.3770.4',
163 '74.0.3729.106',
164 '74.0.3729.105',
165 '75.0.3770.3',
166 '74.0.3729.104',
167 '74.0.3729.103',
168 '74.0.3729.102',
169 '75.0.3770.2',
170 '74.0.3729.101',
171 '75.0.3770.1',
172 '75.0.3770.0',
173 '74.0.3729.100',
174 '75.0.3769.5',
175 '75.0.3769.4',
176 '74.0.3729.99',
177 '75.0.3769.3',
178 '75.0.3769.2',
179 '75.0.3768.6',
180 '74.0.3729.98',
181 '75.0.3769.1',
182 '75.0.3769.0',
183 '74.0.3729.97',
184 '73.0.3683.119',
185 '73.0.3683.118',
186 '74.0.3729.96',
187 '75.0.3768.5',
188 '75.0.3768.4',
189 '75.0.3768.3',
190 '75.0.3768.2',
191 '74.0.3729.95',
192 '74.0.3729.94',
193 '75.0.3768.1',
194 '75.0.3768.0',
195 '74.0.3729.93',
196 '74.0.3729.92',
197 '73.0.3683.117',
198 '74.0.3729.91',
199 '75.0.3766.3',
200 '74.0.3729.90',
201 '75.0.3767.2',
202 '75.0.3767.1',
203 '75.0.3767.0',
204 '74.0.3729.89',
205 '73.0.3683.116',
206 '75.0.3766.2',
207 '74.0.3729.88',
208 '75.0.3766.1',
209 '75.0.3766.0',
210 '74.0.3729.87',
211 '73.0.3683.115',
212 '74.0.3729.86',
213 '75.0.3765.1',
214 '75.0.3765.0',
215 '74.0.3729.85',
216 '73.0.3683.114',
217 '74.0.3729.84',
218 '75.0.3764.1',
219 '75.0.3764.0',
220 '74.0.3729.83',
221 '73.0.3683.113',
222 '75.0.3763.2',
223 '75.0.3761.4',
224 '74.0.3729.82',
225 '75.0.3763.1',
226 '75.0.3763.0',
227 '74.0.3729.81',
228 '73.0.3683.112',
229 '75.0.3762.1',
230 '75.0.3762.0',
231 '74.0.3729.80',
232 '75.0.3761.3',
233 '74.0.3729.79',
234 '73.0.3683.111',
235 '75.0.3761.2',
236 '74.0.3729.78',
237 '74.0.3729.77',
238 '75.0.3761.1',
239 '75.0.3761.0',
240 '73.0.3683.110',
241 '74.0.3729.76',
242 '74.0.3729.75',
243 '75.0.3760.0',
244 '74.0.3729.74',
245 '75.0.3759.8',
246 '75.0.3759.7',
247 '75.0.3759.6',
248 '74.0.3729.73',
249 '75.0.3759.5',
250 '74.0.3729.72',
251 '73.0.3683.109',
252 '75.0.3759.4',
253 '75.0.3759.3',
254 '74.0.3729.71',
255 '75.0.3759.2',
256 '74.0.3729.70',
257 '73.0.3683.108',
258 '74.0.3729.69',
259 '75.0.3759.1',
260 '75.0.3759.0',
261 '74.0.3729.68',
262 '73.0.3683.107',
263 '74.0.3729.67',
264 '75.0.3758.1',
265 '75.0.3758.0',
266 '74.0.3729.66',
267 '73.0.3683.106',
268 '74.0.3729.65',
269 '75.0.3757.1',
270 '75.0.3757.0',
271 '74.0.3729.64',
272 '73.0.3683.105',
273 '74.0.3729.63',
274 '75.0.3756.1',
275 '75.0.3756.0',
276 '74.0.3729.62',
277 '73.0.3683.104',
278 '75.0.3755.3',
279 '75.0.3755.2',
280 '73.0.3683.103',
281 '75.0.3755.1',
282 '75.0.3755.0',
283 '74.0.3729.61',
284 '73.0.3683.102',
285 '74.0.3729.60',
286 '75.0.3754.2',
287 '74.0.3729.59',
288 '75.0.3753.4',
289 '74.0.3729.58',
290 '75.0.3754.1',
291 '75.0.3754.0',
292 '74.0.3729.57',
293 '73.0.3683.101',
294 '75.0.3753.3',
295 '75.0.3752.2',
296 '75.0.3753.2',
297 '74.0.3729.56',
298 '75.0.3753.1',
299 '75.0.3753.0',
300 '74.0.3729.55',
301 '73.0.3683.100',
302 '74.0.3729.54',
303 '75.0.3752.1',
304 '75.0.3752.0',
305 '74.0.3729.53',
306 '73.0.3683.99',
307 '74.0.3729.52',
308 '75.0.3751.1',
309 '75.0.3751.0',
310 '74.0.3729.51',
311 '73.0.3683.98',
312 '74.0.3729.50',
313 '75.0.3750.0',
314 '74.0.3729.49',
315 '74.0.3729.48',
316 '74.0.3729.47',
317 '75.0.3749.3',
318 '74.0.3729.46',
319 '73.0.3683.97',
320 '75.0.3749.2',
321 '74.0.3729.45',
322 '75.0.3749.1',
323 '75.0.3749.0',
324 '74.0.3729.44',
325 '73.0.3683.96',
326 '74.0.3729.43',
327 '74.0.3729.42',
328 '75.0.3748.1',
329 '75.0.3748.0',
330 '74.0.3729.41',
331 '75.0.3747.1',
332 '73.0.3683.95',
333 '75.0.3746.4',
334 '74.0.3729.40',
335 '74.0.3729.39',
336 '75.0.3747.0',
337 '75.0.3746.3',
338 '75.0.3746.2',
339 '74.0.3729.38',
340 '75.0.3746.1',
341 '75.0.3746.0',
342 '74.0.3729.37',
343 '73.0.3683.94',
344 '75.0.3745.5',
345 '75.0.3745.4',
346 '75.0.3745.3',
347 '75.0.3745.2',
348 '74.0.3729.36',
349 '75.0.3745.1',
350 '75.0.3745.0',
351 '75.0.3744.2',
352 '74.0.3729.35',
353 '73.0.3683.93',
354 '74.0.3729.34',
355 '75.0.3744.1',
356 '75.0.3744.0',
357 '74.0.3729.33',
358 '73.0.3683.92',
359 '74.0.3729.32',
360 '74.0.3729.31',
361 '73.0.3683.91',
362 '75.0.3741.2',
363 '75.0.3740.5',
364 '74.0.3729.30',
365 '75.0.3741.1',
366 '75.0.3741.0',
367 '74.0.3729.29',
368 '75.0.3740.4',
369 '73.0.3683.90',
370 '74.0.3729.28',
371 '75.0.3740.3',
372 '73.0.3683.89',
373 '75.0.3740.2',
374 '74.0.3729.27',
375 '75.0.3740.1',
376 '75.0.3740.0',
377 '74.0.3729.26',
378 '73.0.3683.88',
379 '73.0.3683.87',
380 '74.0.3729.25',
381 '75.0.3739.1',
382 '75.0.3739.0',
383 '73.0.3683.86',
384 '74.0.3729.24',
385 '73.0.3683.85',
386 '75.0.3738.4',
387 '75.0.3738.3',
388 '75.0.3738.2',
389 '75.0.3738.1',
390 '75.0.3738.0',
391 '74.0.3729.23',
392 '73.0.3683.84',
393 '74.0.3729.22',
394 '74.0.3729.21',
395 '75.0.3737.1',
396 '75.0.3737.0',
397 '74.0.3729.20',
398 '73.0.3683.83',
399 '74.0.3729.19',
400 '75.0.3736.1',
401 '75.0.3736.0',
402 '74.0.3729.18',
403 '73.0.3683.82',
404 '74.0.3729.17',
405 '75.0.3735.1',
406 '75.0.3735.0',
407 '74.0.3729.16',
408 '73.0.3683.81',
409 '75.0.3734.1',
410 '75.0.3734.0',
411 '74.0.3729.15',
412 '73.0.3683.80',
413 '74.0.3729.14',
414 '75.0.3733.1',
415 '75.0.3733.0',
416 '75.0.3732.1',
417 '74.0.3729.13',
418 '74.0.3729.12',
419 '73.0.3683.79',
420 '74.0.3729.11',
421 '75.0.3732.0',
422 '74.0.3729.10',
423 '73.0.3683.78',
424 '74.0.3729.9',
425 '74.0.3729.8',
426 '74.0.3729.7',
427 '75.0.3731.3',
428 '75.0.3731.2',
429 '75.0.3731.0',
430 '74.0.3729.6',
431 '73.0.3683.77',
432 '73.0.3683.76',
433 '75.0.3730.5',
434 '75.0.3730.4',
435 '73.0.3683.75',
436 '74.0.3729.5',
437 '73.0.3683.74',
438 '75.0.3730.3',
439 '75.0.3730.2',
440 '74.0.3729.4',
441 '73.0.3683.73',
442 '73.0.3683.72',
443 '75.0.3730.1',
444 '75.0.3730.0',
445 '74.0.3729.3',
446 '73.0.3683.71',
447 '74.0.3729.2',
448 '73.0.3683.70',
449 '74.0.3729.1',
450 '74.0.3729.0',
451 '74.0.3726.4',
452 '73.0.3683.69',
453 '74.0.3726.3',
454 '74.0.3728.0',
455 '74.0.3726.2',
456 '73.0.3683.68',
457 '74.0.3726.1',
458 '74.0.3726.0',
459 '74.0.3725.4',
460 '73.0.3683.67',
461 '73.0.3683.66',
462 '74.0.3725.3',
463 '74.0.3725.2',
464 '74.0.3725.1',
465 '74.0.3724.8',
466 '74.0.3725.0',
467 '73.0.3683.65',
468 '74.0.3724.7',
469 '74.0.3724.6',
470 '74.0.3724.5',
471 '74.0.3724.4',
472 '74.0.3724.3',
473 '74.0.3724.2',
474 '74.0.3724.1',
475 '74.0.3724.0',
476 '73.0.3683.64',
477 '74.0.3723.1',
478 '74.0.3723.0',
479 '73.0.3683.63',
480 '74.0.3722.1',
481 '74.0.3722.0',
482 '73.0.3683.62',
483 '74.0.3718.9',
484 '74.0.3702.3',
485 '74.0.3721.3',
486 '74.0.3721.2',
487 '74.0.3721.1',
488 '74.0.3721.0',
489 '74.0.3720.6',
490 '73.0.3683.61',
491 '72.0.3626.122',
492 '73.0.3683.60',
493 '74.0.3720.5',
494 '72.0.3626.121',
495 '74.0.3718.8',
496 '74.0.3720.4',
497 '74.0.3720.3',
498 '74.0.3718.7',
499 '74.0.3720.2',
500 '74.0.3720.1',
501 '74.0.3720.0',
502 '74.0.3718.6',
503 '74.0.3719.5',
504 '73.0.3683.59',
505 '74.0.3718.5',
506 '74.0.3718.4',
507 '74.0.3719.4',
508 '74.0.3719.3',
509 '74.0.3719.2',
510 '74.0.3719.1',
511 '73.0.3683.58',
512 '74.0.3719.0',
513 '73.0.3683.57',
514 '73.0.3683.56',
515 '74.0.3718.3',
516 '73.0.3683.55',
517 '74.0.3718.2',
518 '74.0.3718.1',
519 '74.0.3718.0',
520 '73.0.3683.54',
521 '74.0.3717.2',
522 '73.0.3683.53',
523 '74.0.3717.1',
524 '74.0.3717.0',
525 '73.0.3683.52',
526 '74.0.3716.1',
527 '74.0.3716.0',
528 '73.0.3683.51',
529 '74.0.3715.1',
530 '74.0.3715.0',
531 '73.0.3683.50',
532 '74.0.3711.2',
533 '74.0.3714.2',
534 '74.0.3713.3',
535 '74.0.3714.1',
536 '74.0.3714.0',
537 '73.0.3683.49',
538 '74.0.3713.1',
539 '74.0.3713.0',
540 '72.0.3626.120',
541 '73.0.3683.48',
542 '74.0.3712.2',
543 '74.0.3712.1',
544 '74.0.3712.0',
545 '73.0.3683.47',
546 '72.0.3626.119',
547 '73.0.3683.46',
548 '74.0.3710.2',
549 '72.0.3626.118',
550 '74.0.3711.1',
551 '74.0.3711.0',
552 '73.0.3683.45',
553 '72.0.3626.117',
554 '74.0.3710.1',
555 '74.0.3710.0',
556 '73.0.3683.44',
557 '72.0.3626.116',
558 '74.0.3709.1',
559 '74.0.3709.0',
560 '74.0.3704.9',
561 '73.0.3683.43',
562 '72.0.3626.115',
563 '74.0.3704.8',
564 '74.0.3704.7',
565 '74.0.3708.0',
566 '74.0.3706.7',
567 '74.0.3704.6',
568 '73.0.3683.42',
569 '72.0.3626.114',
570 '74.0.3706.6',
571 '72.0.3626.113',
572 '74.0.3704.5',
573 '74.0.3706.5',
574 '74.0.3706.4',
575 '74.0.3706.3',
576 '74.0.3706.2',
577 '74.0.3706.1',
578 '74.0.3706.0',
579 '73.0.3683.41',
580 '72.0.3626.112',
581 '74.0.3705.1',
582 '74.0.3705.0',
583 '73.0.3683.40',
584 '72.0.3626.111',
585 '73.0.3683.39',
586 '74.0.3704.4',
587 '73.0.3683.38',
588 '74.0.3704.3',
589 '74.0.3704.2',
590 '74.0.3704.1',
591 '74.0.3704.0',
592 '73.0.3683.37',
593 '72.0.3626.110',
594 '72.0.3626.109',
595 '74.0.3703.3',
596 '74.0.3703.2',
597 '73.0.3683.36',
598 '74.0.3703.1',
599 '74.0.3703.0',
600 '73.0.3683.35',
601 '72.0.3626.108',
602 '74.0.3702.2',
603 '74.0.3699.3',
604 '74.0.3702.1',
605 '74.0.3702.0',
606 '73.0.3683.34',
607 '72.0.3626.107',
608 '73.0.3683.33',
609 '74.0.3701.1',
610 '74.0.3701.0',
611 '73.0.3683.32',
612 '73.0.3683.31',
613 '72.0.3626.105',
614 '74.0.3700.1',
615 '74.0.3700.0',
616 '73.0.3683.29',
617 '72.0.3626.103',
618 '74.0.3699.2',
619 '74.0.3699.1',
620 '74.0.3699.0',
621 '73.0.3683.28',
622 '72.0.3626.102',
623 '73.0.3683.27',
624 '73.0.3683.26',
625 '74.0.3698.0',
626 '74.0.3696.2',
627 '72.0.3626.101',
628 '73.0.3683.25',
629 '74.0.3696.1',
630 '74.0.3696.0',
631 '74.0.3694.8',
632 '72.0.3626.100',
633 '74.0.3694.7',
634 '74.0.3694.6',
635 '74.0.3694.5',
636 '74.0.3694.4',
637 '72.0.3626.99',
638 '72.0.3626.98',
639 '74.0.3694.3',
640 '73.0.3683.24',
641 '72.0.3626.97',
642 '72.0.3626.96',
643 '72.0.3626.95',
644 '73.0.3683.23',
645 '72.0.3626.94',
646 '73.0.3683.22',
647 '73.0.3683.21',
648 '72.0.3626.93',
649 '74.0.3694.2',
650 '72.0.3626.92',
651 '74.0.3694.1',
652 '74.0.3694.0',
653 '74.0.3693.6',
654 '73.0.3683.20',
655 '72.0.3626.91',
656 '74.0.3693.5',
657 '74.0.3693.4',
658 '74.0.3693.3',
659 '74.0.3693.2',
660 '73.0.3683.19',
661 '74.0.3693.1',
662 '74.0.3693.0',
663 '73.0.3683.18',
664 '72.0.3626.90',
665 '74.0.3692.1',
666 '74.0.3692.0',
667 '73.0.3683.17',
668 '72.0.3626.89',
669 '74.0.3687.3',
670 '74.0.3691.1',
671 '74.0.3691.0',
672 '73.0.3683.16',
673 '72.0.3626.88',
674 '72.0.3626.87',
675 '73.0.3683.15',
676 '74.0.3690.1',
677 '74.0.3690.0',
678 '73.0.3683.14',
679 '72.0.3626.86',
680 '73.0.3683.13',
681 '73.0.3683.12',
682 '74.0.3689.1',
683 '74.0.3689.0',
684 '73.0.3683.11',
685 '72.0.3626.85',
686 '73.0.3683.10',
687 '72.0.3626.84',
688 '73.0.3683.9',
689 '74.0.3688.1',
690 '74.0.3688.0',
691 '73.0.3683.8',
692 '72.0.3626.83',
693 '74.0.3687.2',
694 '74.0.3687.1',
695 '74.0.3687.0',
696 '73.0.3683.7',
697 '72.0.3626.82',
698 '74.0.3686.4',
699 '72.0.3626.81',
700 '74.0.3686.3',
701 '74.0.3686.2',
702 '74.0.3686.1',
703 '74.0.3686.0',
704 '73.0.3683.6',
705 '72.0.3626.80',
706 '74.0.3685.1',
707 '74.0.3685.0',
708 '73.0.3683.5',
709 '72.0.3626.79',
710 '74.0.3684.1',
711 '74.0.3684.0',
712 '73.0.3683.4',
713 '72.0.3626.78',
714 '72.0.3626.77',
715 '73.0.3683.3',
716 '73.0.3683.2',
717 '72.0.3626.76',
718 '73.0.3683.1',
719 '73.0.3683.0',
720 '72.0.3626.75',
721 '71.0.3578.141',
722 '73.0.3682.1',
723 '73.0.3682.0',
724 '72.0.3626.74',
725 '71.0.3578.140',
726 '73.0.3681.4',
727 '73.0.3681.3',
728 '73.0.3681.2',
729 '73.0.3681.1',
730 '73.0.3681.0',
731 '72.0.3626.73',
732 '71.0.3578.139',
733 '72.0.3626.72',
734 '72.0.3626.71',
735 '73.0.3680.1',
736 '73.0.3680.0',
737 '72.0.3626.70',
738 '71.0.3578.138',
739 '73.0.3678.2',
740 '73.0.3679.1',
741 '73.0.3679.0',
742 '72.0.3626.69',
743 '71.0.3578.137',
744 '73.0.3678.1',
745 '73.0.3678.0',
746 '71.0.3578.136',
747 '73.0.3677.1',
748 '73.0.3677.0',
749 '72.0.3626.68',
750 '72.0.3626.67',
751 '71.0.3578.135',
752 '73.0.3676.1',
753 '73.0.3676.0',
754 '73.0.3674.2',
755 '72.0.3626.66',
756 '71.0.3578.134',
757 '73.0.3674.1',
758 '73.0.3674.0',
759 '72.0.3626.65',
760 '71.0.3578.133',
761 '73.0.3673.2',
762 '73.0.3673.1',
763 '73.0.3673.0',
764 '72.0.3626.64',
765 '71.0.3578.132',
766 '72.0.3626.63',
767 '72.0.3626.62',
768 '72.0.3626.61',
769 '72.0.3626.60',
770 '73.0.3672.1',
771 '73.0.3672.0',
772 '72.0.3626.59',
773 '71.0.3578.131',
774 '73.0.3671.3',
775 '73.0.3671.2',
776 '73.0.3671.1',
777 '73.0.3671.0',
778 '72.0.3626.58',
779 '71.0.3578.130',
780 '73.0.3670.1',
781 '73.0.3670.0',
782 '72.0.3626.57',
783 '71.0.3578.129',
784 '73.0.3669.1',
785 '73.0.3669.0',
786 '72.0.3626.56',
787 '71.0.3578.128',
788 '73.0.3668.2',
789 '73.0.3668.1',
790 '73.0.3668.0',
791 '72.0.3626.55',
792 '71.0.3578.127',
793 '73.0.3667.2',
794 '73.0.3667.1',
795 '73.0.3667.0',
796 '72.0.3626.54',
797 '71.0.3578.126',
798 '73.0.3666.1',
799 '73.0.3666.0',
800 '72.0.3626.53',
801 '71.0.3578.125',
802 '73.0.3665.4',
803 '73.0.3665.3',
804 '72.0.3626.52',
805 '73.0.3665.2',
806 '73.0.3664.4',
807 '73.0.3665.1',
808 '73.0.3665.0',
809 '72.0.3626.51',
810 '71.0.3578.124',
811 '72.0.3626.50',
812 '73.0.3664.3',
813 '73.0.3664.2',
814 '73.0.3664.1',
815 '73.0.3664.0',
816 '73.0.3663.2',
817 '72.0.3626.49',
818 '71.0.3578.123',
819 '73.0.3663.1',
820 '73.0.3663.0',
821 '72.0.3626.48',
822 '71.0.3578.122',
823 '73.0.3662.1',
824 '73.0.3662.0',
825 '72.0.3626.47',
826 '71.0.3578.121',
827 '73.0.3661.1',
828 '72.0.3626.46',
829 '73.0.3661.0',
830 '72.0.3626.45',
831 '71.0.3578.120',
832 '73.0.3660.2',
833 '73.0.3660.1',
834 '73.0.3660.0',
835 '72.0.3626.44',
836 '71.0.3578.119',
837 '73.0.3659.1',
838 '73.0.3659.0',
839 '72.0.3626.43',
840 '71.0.3578.118',
841 '73.0.3658.1',
842 '73.0.3658.0',
843 '72.0.3626.42',
844 '71.0.3578.117',
845 '73.0.3657.1',
846 '73.0.3657.0',
847 '72.0.3626.41',
848 '71.0.3578.116',
849 '73.0.3656.1',
850 '73.0.3656.0',
851 '72.0.3626.40',
852 '71.0.3578.115',
853 '73.0.3655.1',
854 '73.0.3655.0',
855 '72.0.3626.39',
856 '71.0.3578.114',
857 '73.0.3654.1',
858 '73.0.3654.0',
859 '72.0.3626.38',
860 '71.0.3578.113',
861 '73.0.3653.1',
862 '73.0.3653.0',
863 '72.0.3626.37',
864 '71.0.3578.112',
865 '73.0.3652.1',
866 '73.0.3652.0',
867 '72.0.3626.36',
868 '71.0.3578.111',
869 '73.0.3651.1',
870 '73.0.3651.0',
871 '72.0.3626.35',
872 '71.0.3578.110',
873 '73.0.3650.1',
874 '73.0.3650.0',
875 '72.0.3626.34',
876 '71.0.3578.109',
877 '73.0.3649.1',
878 '73.0.3649.0',
879 '72.0.3626.33',
880 '71.0.3578.108',
881 '73.0.3648.2',
882 '73.0.3648.1',
883 '73.0.3648.0',
884 '72.0.3626.32',
885 '71.0.3578.107',
886 '73.0.3647.2',
887 '73.0.3647.1',
888 '73.0.3647.0',
889 '72.0.3626.31',
890 '71.0.3578.106',
891 '73.0.3635.3',
892 '73.0.3646.2',
893 '73.0.3646.1',
894 '73.0.3646.0',
895 '72.0.3626.30',
896 '71.0.3578.105',
897 '72.0.3626.29',
898 '73.0.3645.2',
899 '73.0.3645.1',
900 '73.0.3645.0',
901 '72.0.3626.28',
902 '71.0.3578.104',
903 '72.0.3626.27',
904 '72.0.3626.26',
905 '72.0.3626.25',
906 '72.0.3626.24',
907 '73.0.3644.0',
908 '73.0.3643.2',
909 '72.0.3626.23',
910 '71.0.3578.103',
911 '73.0.3643.1',
912 '73.0.3643.0',
913 '72.0.3626.22',
914 '71.0.3578.102',
915 '73.0.3642.1',
916 '73.0.3642.0',
917 '72.0.3626.21',
918 '71.0.3578.101',
919 '73.0.3641.1',
920 '73.0.3641.0',
921 '72.0.3626.20',
922 '71.0.3578.100',
923 '72.0.3626.19',
924 '73.0.3640.1',
925 '73.0.3640.0',
926 '72.0.3626.18',
927 '73.0.3639.1',
928 '71.0.3578.99',
929 '73.0.3639.0',
930 '72.0.3626.17',
931 '73.0.3638.2',
932 '72.0.3626.16',
933 '73.0.3638.1',
934 '73.0.3638.0',
935 '72.0.3626.15',
936 '71.0.3578.98',
937 '73.0.3635.2',
938 '71.0.3578.97',
939 '73.0.3637.1',
940 '73.0.3637.0',
941 '72.0.3626.14',
942 '71.0.3578.96',
943 '71.0.3578.95',
944 '72.0.3626.13',
945 '71.0.3578.94',
946 '73.0.3636.2',
947 '71.0.3578.93',
948 '73.0.3636.1',
949 '73.0.3636.0',
950 '72.0.3626.12',
951 '71.0.3578.92',
952 '73.0.3635.1',
953 '73.0.3635.0',
954 '72.0.3626.11',
955 '71.0.3578.91',
956 '73.0.3634.2',
957 '73.0.3634.1',
958 '73.0.3634.0',
959 '72.0.3626.10',
960 '71.0.3578.90',
961 '71.0.3578.89',
962 '73.0.3633.2',
963 '73.0.3633.1',
964 '73.0.3633.0',
965 '72.0.3610.4',
966 '72.0.3626.9',
967 '71.0.3578.88',
968 '73.0.3632.5',
969 '73.0.3632.4',
970 '73.0.3632.3',
971 '73.0.3632.2',
972 '73.0.3632.1',
973 '73.0.3632.0',
974 '72.0.3626.8',
975 '71.0.3578.87',
976 '73.0.3631.2',
977 '73.0.3631.1',
978 '73.0.3631.0',
979 '72.0.3626.7',
980 '71.0.3578.86',
981 '72.0.3626.6',
982 '73.0.3630.1',
983 '73.0.3630.0',
984 '72.0.3626.5',
985 '71.0.3578.85',
986 '72.0.3626.4',
987 '73.0.3628.3',
988 '73.0.3628.2',
989 '73.0.3629.1',
990 '73.0.3629.0',
991 '72.0.3626.3',
992 '71.0.3578.84',
993 '73.0.3628.1',
994 '73.0.3628.0',
995 '71.0.3578.83',
996 '73.0.3627.1',
997 '73.0.3627.0',
998 '72.0.3626.2',
999 '71.0.3578.82',
1000 '71.0.3578.81',
1001 '71.0.3578.80',
1002 '72.0.3626.1',
1003 '72.0.3626.0',
1004 '71.0.3578.79',
1005 '70.0.3538.124',
1006 '71.0.3578.78',
1007 '72.0.3623.4',
1008 '72.0.3625.2',
1009 '72.0.3625.1',
1010 '72.0.3625.0',
1011 '71.0.3578.77',
1012 '70.0.3538.123',
1013 '72.0.3624.4',
1014 '72.0.3624.3',
1015 '72.0.3624.2',
1016 '71.0.3578.76',
1017 '72.0.3624.1',
1018 '72.0.3624.0',
1019 '72.0.3623.3',
1020 '71.0.3578.75',
1021 '70.0.3538.122',
1022 '71.0.3578.74',
1023 '72.0.3623.2',
1024 '72.0.3610.3',
1025 '72.0.3623.1',
1026 '72.0.3623.0',
1027 '72.0.3622.3',
1028 '72.0.3622.2',
1029 '71.0.3578.73',
1030 '70.0.3538.121',
1031 '72.0.3622.1',
1032 '72.0.3622.0',
1033 '71.0.3578.72',
1034 '70.0.3538.120',
1035 '72.0.3621.1',
1036 '72.0.3621.0',
1037 '71.0.3578.71',
1038 '70.0.3538.119',
1039 '72.0.3620.1',
1040 '72.0.3620.0',
1041 '71.0.3578.70',
1042 '70.0.3538.118',
1043 '71.0.3578.69',
1044 '72.0.3619.1',
1045 '72.0.3619.0',
1046 '71.0.3578.68',
1047 '70.0.3538.117',
1048 '71.0.3578.67',
1049 '72.0.3618.1',
1050 '72.0.3618.0',
1051 '71.0.3578.66',
1052 '70.0.3538.116',
1053 '72.0.3617.1',
1054 '72.0.3617.0',
1055 '71.0.3578.65',
1056 '70.0.3538.115',
1057 '72.0.3602.3',
1058 '71.0.3578.64',
1059 '72.0.3616.1',
1060 '72.0.3616.0',
1061 '71.0.3578.63',
1062 '70.0.3538.114',
1063 '71.0.3578.62',
1064 '72.0.3615.1',
1065 '72.0.3615.0',
1066 '71.0.3578.61',
1067 '70.0.3538.113',
1068 '72.0.3614.1',
1069 '72.0.3614.0',
1070 '71.0.3578.60',
1071 '70.0.3538.112',
1072 '72.0.3613.1',
1073 '72.0.3613.0',
1074 '71.0.3578.59',
1075 '70.0.3538.111',
1076 '72.0.3612.2',
1077 '72.0.3612.1',
1078 '72.0.3612.0',
1079 '70.0.3538.110',
1080 '71.0.3578.58',
1081 '70.0.3538.109',
1082 '72.0.3611.2',
1083 '72.0.3611.1',
1084 '72.0.3611.0',
1085 '71.0.3578.57',
1086 '70.0.3538.108',
1087 '72.0.3610.2',
1088 '71.0.3578.56',
1089 '71.0.3578.55',
1090 '72.0.3610.1',
1091 '72.0.3610.0',
1092 '71.0.3578.54',
1093 '70.0.3538.107',
1094 '71.0.3578.53',
1095 '72.0.3609.3',
1096 '71.0.3578.52',
1097 '72.0.3609.2',
1098 '71.0.3578.51',
1099 '72.0.3608.5',
1100 '72.0.3609.1',
1101 '72.0.3609.0',
1102 '71.0.3578.50',
1103 '70.0.3538.106',
1104 '72.0.3608.4',
1105 '72.0.3608.3',
1106 '72.0.3608.2',
1107 '71.0.3578.49',
1108 '72.0.3608.1',
1109 '72.0.3608.0',
1110 '70.0.3538.105',
1111 '71.0.3578.48',
1112 '72.0.3607.1',
1113 '72.0.3607.0',
1114 '71.0.3578.47',
1115 '70.0.3538.104',
1116 '72.0.3606.2',
1117 '72.0.3606.1',
1118 '72.0.3606.0',
1119 '71.0.3578.46',
1120 '70.0.3538.103',
1121 '70.0.3538.102',
1122 '72.0.3605.3',
1123 '72.0.3605.2',
1124 '72.0.3605.1',
1125 '72.0.3605.0',
1126 '71.0.3578.45',
1127 '70.0.3538.101',
1128 '71.0.3578.44',
1129 '71.0.3578.43',
1130 '70.0.3538.100',
1131 '70.0.3538.99',
1132 '71.0.3578.42',
1133 '72.0.3604.1',
1134 '72.0.3604.0',
1135 '71.0.3578.41',
1136 '70.0.3538.98',
1137 '71.0.3578.40',
1138 '72.0.3603.2',
1139 '72.0.3603.1',
1140 '72.0.3603.0',
1141 '71.0.3578.39',
1142 '70.0.3538.97',
1143 '72.0.3602.2',
1144 '71.0.3578.38',
1145 '71.0.3578.37',
1146 '72.0.3602.1',
1147 '72.0.3602.0',
1148 '71.0.3578.36',
1149 '70.0.3538.96',
1150 '72.0.3601.1',
1151 '72.0.3601.0',
1152 '71.0.3578.35',
1153 '70.0.3538.95',
1154 '72.0.3600.1',
1155 '72.0.3600.0',
1156 '71.0.3578.34',
1157 '70.0.3538.94',
1158 '72.0.3599.3',
1159 '72.0.3599.2',
1160 '72.0.3599.1',
1161 '72.0.3599.0',
1162 '71.0.3578.33',
1163 '70.0.3538.93',
1164 '72.0.3598.1',
1165 '72.0.3598.0',
1166 '71.0.3578.32',
1167 '70.0.3538.87',
1168 '72.0.3597.1',
1169 '72.0.3597.0',
1170 '72.0.3596.2',
1171 '71.0.3578.31',
1172 '70.0.3538.86',
1173 '71.0.3578.30',
1174 '71.0.3578.29',
1175 '72.0.3596.1',
1176 '72.0.3596.0',
1177 '71.0.3578.28',
1178 '70.0.3538.85',
1179 '72.0.3595.2',
1180 '72.0.3591.3',
1181 '72.0.3595.1',
1182 '72.0.3595.0',
1183 '71.0.3578.27',
1184 '70.0.3538.84',
1185 '72.0.3594.1',
1186 '72.0.3594.0',
1187 '71.0.3578.26',
1188 '70.0.3538.83',
1189 '72.0.3593.2',
1190 '72.0.3593.1',
1191 '72.0.3593.0',
1192 '71.0.3578.25',
1193 '70.0.3538.82',
1194 '72.0.3589.3',
1195 '72.0.3592.2',
1196 '72.0.3592.1',
1197 '72.0.3592.0',
1198 '71.0.3578.24',
1199 '72.0.3589.2',
1200 '70.0.3538.81',
1201 '70.0.3538.80',
1202 '72.0.3591.2',
1203 '72.0.3591.1',
1204 '72.0.3591.0',
1205 '71.0.3578.23',
1206 '70.0.3538.79',
1207 '71.0.3578.22',
1208 '72.0.3590.1',
1209 '72.0.3590.0',
1210 '71.0.3578.21',
1211 '70.0.3538.78',
1212 '70.0.3538.77',
1213 '72.0.3589.1',
1214 '72.0.3589.0',
1215 '71.0.3578.20',
1216 '70.0.3538.76',
1217 '71.0.3578.19',
1218 '70.0.3538.75',
1219 '72.0.3588.1',
1220 '72.0.3588.0',
1221 '71.0.3578.18',
1222 '70.0.3538.74',
1223 '72.0.3586.2',
1224 '72.0.3587.0',
1225 '71.0.3578.17',
1226 '70.0.3538.73',
1227 '72.0.3586.1',
1228 '72.0.3586.0',
1229 '71.0.3578.16',
1230 '70.0.3538.72',
1231 '72.0.3585.1',
1232 '72.0.3585.0',
1233 '71.0.3578.15',
1234 '70.0.3538.71',
1235 '71.0.3578.14',
1236 '72.0.3584.1',
1237 '72.0.3584.0',
1238 '71.0.3578.13',
1239 '70.0.3538.70',
1240 '72.0.3583.2',
1241 '71.0.3578.12',
1242 '72.0.3583.1',
1243 '72.0.3583.0',
1244 '71.0.3578.11',
1245 '70.0.3538.69',
1246 '71.0.3578.10',
1247 '72.0.3582.0',
1248 '72.0.3581.4',
1249 '71.0.3578.9',
1250 '70.0.3538.67',
1251 '72.0.3581.3',
1252 '72.0.3581.2',
1253 '72.0.3581.1',
1254 '72.0.3581.0',
1255 '71.0.3578.8',
1256 '70.0.3538.66',
1257 '72.0.3580.1',
1258 '72.0.3580.0',
1259 '71.0.3578.7',
1260 '70.0.3538.65',
1261 '71.0.3578.6',
1262 '72.0.3579.1',
1263 '72.0.3579.0',
1264 '71.0.3578.5',
1265 '70.0.3538.64',
1266 '71.0.3578.4',
1267 '71.0.3578.3',
1268 '71.0.3578.2',
1269 '71.0.3578.1',
1270 '71.0.3578.0',
1271 '70.0.3538.63',
1272 '69.0.3497.128',
1273 '70.0.3538.62',
1274 '70.0.3538.61',
1275 '70.0.3538.60',
1276 '70.0.3538.59',
1277 '71.0.3577.1',
1278 '71.0.3577.0',
1279 '70.0.3538.58',
1280 '69.0.3497.127',
1281 '71.0.3576.2',
1282 '71.0.3576.1',
1283 '71.0.3576.0',
1284 '70.0.3538.57',
1285 '70.0.3538.56',
1286 '71.0.3575.2',
1287 '70.0.3538.55',
1288 '69.0.3497.126',
1289 '70.0.3538.54',
1290 '71.0.3575.1',
1291 '71.0.3575.0',
1292 '71.0.3574.1',
1293 '71.0.3574.0',
1294 '70.0.3538.53',
1295 '69.0.3497.125',
1296 '70.0.3538.52',
1297 '71.0.3573.1',
1298 '71.0.3573.0',
1299 '70.0.3538.51',
1300 '69.0.3497.124',
1301 '71.0.3572.1',
1302 '71.0.3572.0',
1303 '70.0.3538.50',
1304 '69.0.3497.123',
1305 '71.0.3571.2',
1306 '70.0.3538.49',
1307 '69.0.3497.122',
1308 '71.0.3571.1',
1309 '71.0.3571.0',
1310 '70.0.3538.48',
1311 '69.0.3497.121',
1312 '71.0.3570.1',
1313 '71.0.3570.0',
1314 '70.0.3538.47',
1315 '69.0.3497.120',
1316 '71.0.3568.2',
1317 '71.0.3569.1',
1318 '71.0.3569.0',
1319 '70.0.3538.46',
1320 '69.0.3497.119',
1321 '70.0.3538.45',
1322 '71.0.3568.1',
1323 '71.0.3568.0',
1324 '70.0.3538.44',
1325 '69.0.3497.118',
1326 '70.0.3538.43',
1327 '70.0.3538.42',
1328 '71.0.3567.1',
1329 '71.0.3567.0',
1330 '70.0.3538.41',
1331 '69.0.3497.117',
1332 '71.0.3566.1',
1333 '71.0.3566.0',
1334 '70.0.3538.40',
1335 '69.0.3497.116',
1336 '71.0.3565.1',
1337 '71.0.3565.0',
1338 '70.0.3538.39',
1339 '69.0.3497.115',
1340 '71.0.3564.1',
1341 '71.0.3564.0',
1342 '70.0.3538.38',
1343 '69.0.3497.114',
1344 '71.0.3563.0',
1345 '71.0.3562.2',
1346 '70.0.3538.37',
1347 '69.0.3497.113',
1348 '70.0.3538.36',
1349 '70.0.3538.35',
1350 '71.0.3562.1',
1351 '71.0.3562.0',
1352 '70.0.3538.34',
1353 '69.0.3497.112',
1354 '70.0.3538.33',
1355 '71.0.3561.1',
1356 '71.0.3561.0',
1357 '70.0.3538.32',
1358 '69.0.3497.111',
1359 '71.0.3559.6',
1360 '71.0.3560.1',
1361 '71.0.3560.0',
1362 '71.0.3559.5',
1363 '71.0.3559.4',
1364 '70.0.3538.31',
1365 '69.0.3497.110',
1366 '71.0.3559.3',
1367 '70.0.3538.30',
1368 '69.0.3497.109',
1369 '71.0.3559.2',
1370 '71.0.3559.1',
1371 '71.0.3559.0',
1372 '70.0.3538.29',
1373 '69.0.3497.108',
1374 '71.0.3558.2',
1375 '71.0.3558.1',
1376 '71.0.3558.0',
1377 '70.0.3538.28',
1378 '69.0.3497.107',
1379 '71.0.3557.2',
1380 '71.0.3557.1',
1381 '71.0.3557.0',
1382 '70.0.3538.27',
1383 '69.0.3497.106',
1384 '71.0.3554.4',
1385 '70.0.3538.26',
1386 '71.0.3556.1',
1387 '71.0.3556.0',
1388 '70.0.3538.25',
1389 '71.0.3554.3',
1390 '69.0.3497.105',
1391 '71.0.3554.2',
1392 '70.0.3538.24',
1393 '69.0.3497.104',
1394 '71.0.3555.2',
1395 '70.0.3538.23',
1396 '71.0.3555.1',
1397 '71.0.3555.0',
1398 '70.0.3538.22',
1399 '69.0.3497.103',
1400 '71.0.3554.1',
1401 '71.0.3554.0',
1402 '70.0.3538.21',
1403 '69.0.3497.102',
1404 '71.0.3553.3',
1405 '70.0.3538.20',
1406 '69.0.3497.101',
1407 '71.0.3553.2',
1408 '69.0.3497.100',
1409 '71.0.3553.1',
1410 '71.0.3553.0',
1411 '70.0.3538.19',
1412 '69.0.3497.99',
1413 '69.0.3497.98',
1414 '69.0.3497.97',
1415 '71.0.3552.6',
1416 '71.0.3552.5',
1417 '71.0.3552.4',
1418 '71.0.3552.3',
1419 '71.0.3552.2',
1420 '71.0.3552.1',
1421 '71.0.3552.0',
1422 '70.0.3538.18',
1423 '69.0.3497.96',
1424 '71.0.3551.3',
1425 '71.0.3551.2',
1426 '71.0.3551.1',
1427 '71.0.3551.0',
1428 '70.0.3538.17',
1429 '69.0.3497.95',
1430 '71.0.3550.3',
1431 '71.0.3550.2',
1432 '71.0.3550.1',
1433 '71.0.3550.0',
1434 '70.0.3538.16',
1435 '69.0.3497.94',
1436 '71.0.3549.1',
1437 '71.0.3549.0',
1438 '70.0.3538.15',
1439 '69.0.3497.93',
1440 '69.0.3497.92',
1441 '71.0.3548.1',
1442 '71.0.3548.0',
1443 '70.0.3538.14',
1444 '69.0.3497.91',
1445 '71.0.3547.1',
1446 '71.0.3547.0',
1447 '70.0.3538.13',
1448 '69.0.3497.90',
1449 '71.0.3546.2',
1450 '69.0.3497.89',
1451 '71.0.3546.1',
1452 '71.0.3546.0',
1453 '70.0.3538.12',
1454 '69.0.3497.88',
1455 '71.0.3545.4',
1456 '71.0.3545.3',
1457 '71.0.3545.2',
1458 '71.0.3545.1',
1459 '71.0.3545.0',
1460 '70.0.3538.11',
1461 '69.0.3497.87',
1462 '71.0.3544.5',
1463 '71.0.3544.4',
1464 '71.0.3544.3',
1465 '71.0.3544.2',
1466 '71.0.3544.1',
1467 '71.0.3544.0',
1468 '69.0.3497.86',
1469 '70.0.3538.10',
1470 '69.0.3497.85',
1471 '70.0.3538.9',
1472 '69.0.3497.84',
1473 '71.0.3543.4',
1474 '70.0.3538.8',
1475 '71.0.3543.3',
1476 '71.0.3543.2',
1477 '71.0.3543.1',
1478 '71.0.3543.0',
1479 '70.0.3538.7',
1480 '69.0.3497.83',
1481 '71.0.3542.2',
1482 '71.0.3542.1',
1483 '71.0.3542.0',
1484 '70.0.3538.6',
1485 '69.0.3497.82',
1486 '69.0.3497.81',
1487 '71.0.3541.1',
1488 '71.0.3541.0',
1489 '70.0.3538.5',
1490 '69.0.3497.80',
1491 '71.0.3540.1',
1492 '71.0.3540.0',
1493 '70.0.3538.4',
1494 '69.0.3497.79',
1495 '70.0.3538.3',
1496 '71.0.3539.1',
1497 '71.0.3539.0',
1498 '69.0.3497.78',
1499 '68.0.3440.134',
1500 '69.0.3497.77',
1501 '70.0.3538.2',
1502 '70.0.3538.1',
1503 '70.0.3538.0',
1504 '69.0.3497.76',
1505 '68.0.3440.133',
1506 '69.0.3497.75',
1507 '70.0.3537.2',
1508 '70.0.3537.1',
1509 '70.0.3537.0',
1510 '69.0.3497.74',
1511 '68.0.3440.132',
1512 '70.0.3536.0',
1513 '70.0.3535.5',
1514 '70.0.3535.4',
1515 '70.0.3535.3',
1516 '69.0.3497.73',
1517 '68.0.3440.131',
1518 '70.0.3532.8',
1519 '70.0.3532.7',
1520 '69.0.3497.72',
1521 '69.0.3497.71',
1522 '70.0.3535.2',
1523 '70.0.3535.1',
1524 '70.0.3535.0',
1525 '69.0.3497.70',
1526 '68.0.3440.130',
1527 '69.0.3497.69',
1528 '68.0.3440.129',
1529 '70.0.3534.4',
1530 '70.0.3534.3',
1531 '70.0.3534.2',
1532 '70.0.3534.1',
1533 '70.0.3534.0',
1534 '69.0.3497.68',
1535 '68.0.3440.128',
1536 '70.0.3533.2',
1537 '70.0.3533.1',
1538 '70.0.3533.0',
1539 '69.0.3497.67',
1540 '68.0.3440.127',
1541 '70.0.3532.6',
1542 '70.0.3532.5',
1543 '70.0.3532.4',
1544 '69.0.3497.66',
1545 '68.0.3440.126',
1546 '70.0.3532.3',
1547 '70.0.3532.2',
1548 '70.0.3532.1',
1549 '69.0.3497.60',
1550 '69.0.3497.65',
1551 '69.0.3497.64',
1552 '70.0.3532.0',
1553 '70.0.3531.0',
1554 '70.0.3530.4',
1555 '70.0.3530.3',
1556 '70.0.3530.2',
1557 '69.0.3497.58',
1558 '68.0.3440.125',
1559 '69.0.3497.57',
1560 '69.0.3497.56',
1561 '69.0.3497.55',
1562 '69.0.3497.54',
1563 '70.0.3530.1',
1564 '70.0.3530.0',
1565 '69.0.3497.53',
1566 '68.0.3440.124',
1567 '69.0.3497.52',
1568 '70.0.3529.3',
1569 '70.0.3529.2',
1570 '70.0.3529.1',
1571 '70.0.3529.0',
1572 '69.0.3497.51',
1573 '70.0.3528.4',
1574 '68.0.3440.123',
1575 '70.0.3528.3',
1576 '70.0.3528.2',
1577 '70.0.3528.1',
1578 '70.0.3528.0',
1579 '69.0.3497.50',
1580 '68.0.3440.122',
1581 '70.0.3527.1',
1582 '70.0.3527.0',
1583 '69.0.3497.49',
1584 '68.0.3440.121',
1585 '70.0.3526.1',
1586 '70.0.3526.0',
1587 '68.0.3440.120',
1588 '69.0.3497.48',
1589 '69.0.3497.47',
1590 '68.0.3440.119',
1591 '68.0.3440.118',
1592 '70.0.3525.5',
1593 '70.0.3525.4',
1594 '70.0.3525.3',
1595 '68.0.3440.117',
1596 '69.0.3497.46',
1597 '70.0.3525.2',
1598 '70.0.3525.1',
1599 '70.0.3525.0',
1600 '69.0.3497.45',
1601 '68.0.3440.116',
1602 '70.0.3524.4',
1603 '70.0.3524.3',
1604 '69.0.3497.44',
1605 '70.0.3524.2',
1606 '70.0.3524.1',
1607 '70.0.3524.0',
1608 '70.0.3523.2',
1609 '69.0.3497.43',
1610 '68.0.3440.115',
1611 '70.0.3505.9',
1612 '69.0.3497.42',
1613 '70.0.3505.8',
1614 '70.0.3523.1',
1615 '70.0.3523.0',
1616 '69.0.3497.41',
1617 '68.0.3440.114',
1618 '70.0.3505.7',
1619 '69.0.3497.40',
1620 '70.0.3522.1',
1621 '70.0.3522.0',
1622 '70.0.3521.2',
1623 '69.0.3497.39',
1624 '68.0.3440.113',
1625 '70.0.3505.6',
1626 '70.0.3521.1',
1627 '70.0.3521.0',
1628 '69.0.3497.38',
1629 '68.0.3440.112',
1630 '70.0.3520.1',
1631 '70.0.3520.0',
1632 '69.0.3497.37',
1633 '68.0.3440.111',
1634 '70.0.3519.3',
1635 '70.0.3519.2',
1636 '70.0.3519.1',
1637 '70.0.3519.0',
1638 '69.0.3497.36',
1639 '68.0.3440.110',
1640 '70.0.3518.1',
1641 '70.0.3518.0',
1642 '69.0.3497.35',
1643 '69.0.3497.34',
1644 '68.0.3440.109',
1645 '70.0.3517.1',
1646 '70.0.3517.0',
1647 '69.0.3497.33',
1648 '68.0.3440.108',
1649 '69.0.3497.32',
1650 '70.0.3516.3',
1651 '70.0.3516.2',
1652 '70.0.3516.1',
1653 '70.0.3516.0',
1654 '69.0.3497.31',
1655 '68.0.3440.107',
1656 '70.0.3515.4',
1657 '68.0.3440.106',
1658 '70.0.3515.3',
1659 '70.0.3515.2',
1660 '70.0.3515.1',
1661 '70.0.3515.0',
1662 '69.0.3497.30',
1663 '68.0.3440.105',
1664 '68.0.3440.104',
1665 '70.0.3514.2',
1666 '70.0.3514.1',
1667 '70.0.3514.0',
1668 '69.0.3497.29',
1669 '68.0.3440.103',
1670 '70.0.3513.1',
1671 '70.0.3513.0',
1672 '69.0.3497.28',
1673 )
1674 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1675
1676
3e669f36 1677std_headers = {
f7a147e3 1678 'User-Agent': random_user_agent(),
59ae15a5
PH
1679 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1680 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1681 'Accept-Encoding': 'gzip, deflate',
1682 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 1683}
f427df17 1684
5f6a1245 1685
fb37eb25
S
1686USER_AGENTS = {
1687 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1688}
1689
1690
bf42a990
S
1691NO_DEFAULT = object()
1692
7105440c
YCH
1693ENGLISH_MONTH_NAMES = [
1694 'January', 'February', 'March', 'April', 'May', 'June',
1695 'July', 'August', 'September', 'October', 'November', 'December']
1696
f6717dec
S
1697MONTH_NAMES = {
1698 'en': ENGLISH_MONTH_NAMES,
1699 'fr': [
3e4185c3
S
1700 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1701 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
f6717dec 1702}
a942d6cb 1703
a7aaa398
S
1704KNOWN_EXTENSIONS = (
1705 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1706 'flv', 'f4v', 'f4a', 'f4b',
1707 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1708 'mkv', 'mka', 'mk3d',
1709 'avi', 'divx',
1710 'mov',
1711 'asf', 'wmv', 'wma',
1712 '3gp', '3g2',
1713 'mp3',
1714 'flac',
1715 'ape',
1716 'wav',
1717 'f4f', 'f4m', 'm3u8', 'smil')
1718
c587cbb7 1719# needed for sanitizing filenames in restricted mode
c8827027 1720ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
fd35d8cd
JW
1721 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1722 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
c587cbb7 1723
46f59e89
S
1724DATE_FORMATS = (
1725 '%d %B %Y',
1726 '%d %b %Y',
1727 '%B %d %Y',
cb655f34
S
1728 '%B %dst %Y',
1729 '%B %dnd %Y',
9d30c213 1730 '%B %drd %Y',
cb655f34 1731 '%B %dth %Y',
46f59e89 1732 '%b %d %Y',
cb655f34
S
1733 '%b %dst %Y',
1734 '%b %dnd %Y',
9d30c213 1735 '%b %drd %Y',
cb655f34 1736 '%b %dth %Y',
46f59e89
S
1737 '%b %dst %Y %I:%M',
1738 '%b %dnd %Y %I:%M',
9d30c213 1739 '%b %drd %Y %I:%M',
46f59e89
S
1740 '%b %dth %Y %I:%M',
1741 '%Y %m %d',
1742 '%Y-%m-%d',
bccdbd22 1743 '%Y.%m.%d.',
46f59e89 1744 '%Y/%m/%d',
81c13222 1745 '%Y/%m/%d %H:%M',
46f59e89 1746 '%Y/%m/%d %H:%M:%S',
1931a55e
THD
1747 '%Y%m%d%H%M',
1748 '%Y%m%d%H%M%S',
0c1c6f4b 1749 '%Y-%m-%d %H:%M',
46f59e89
S
1750 '%Y-%m-%d %H:%M:%S',
1751 '%Y-%m-%d %H:%M:%S.%f',
5014558a 1752 '%Y-%m-%d %H:%M:%S:%f',
46f59e89
S
1753 '%d.%m.%Y %H:%M',
1754 '%d.%m.%Y %H.%M',
1755 '%Y-%m-%dT%H:%M:%SZ',
1756 '%Y-%m-%dT%H:%M:%S.%fZ',
1757 '%Y-%m-%dT%H:%M:%S.%f0Z',
1758 '%Y-%m-%dT%H:%M:%S',
1759 '%Y-%m-%dT%H:%M:%S.%f',
1760 '%Y-%m-%dT%H:%M',
c6eed6b8
S
1761 '%b %d %Y at %H:%M',
1762 '%b %d %Y at %H:%M:%S',
b555ae9b
S
1763 '%B %d %Y at %H:%M',
1764 '%B %d %Y at %H:%M:%S',
46f59e89
S
1765)
1766
1767DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1768DATE_FORMATS_DAY_FIRST.extend([
1769 '%d-%m-%Y',
1770 '%d.%m.%Y',
1771 '%d.%m.%y',
1772 '%d/%m/%Y',
1773 '%d/%m/%y',
1774 '%d/%m/%Y %H:%M:%S',
1775])
1776
1777DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1778DATE_FORMATS_MONTH_FIRST.extend([
1779 '%m-%d-%Y',
1780 '%m.%d.%Y',
1781 '%m/%d/%Y',
1782 '%m/%d/%y',
1783 '%m/%d/%Y %H:%M:%S',
1784])
1785
06b3fe29 1786PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
22f5f5c6 1787JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
06b3fe29 1788
7105440c 1789
d77c3dfd 1790def preferredencoding():
59ae15a5 1791 """Get preferred encoding.
d77c3dfd 1792
59ae15a5
PH
1793 Returns the best encoding scheme for the system, based on
1794 locale.getpreferredencoding() and some further tweaks.
1795 """
1796 try:
1797 pref = locale.getpreferredencoding()
28e614de 1798 'TEST'.encode(pref)
70a1165b 1799 except Exception:
59ae15a5 1800 pref = 'UTF-8'
bae611f2 1801
59ae15a5 1802 return pref
d77c3dfd 1803
f4bfd65f 1804
181c8655 1805def write_json_file(obj, fn):
1394646a 1806 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 1807
92120217 1808 fn = encodeFilename(fn)
61ee5aeb 1809 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
1810 encoding = get_filesystem_encoding()
1811 # os.path.basename returns a bytes object, but NamedTemporaryFile
1812 # will fail if the filename contains non ascii characters unless we
1813 # use a unicode object
1814 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1815 # the same for os.path.dirname
1816 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1817 else:
1818 path_basename = os.path.basename
1819 path_dirname = os.path.dirname
1820
73159f99
S
1821 args = {
1822 'suffix': '.tmp',
ec5f6016
JMF
1823 'prefix': path_basename(fn) + '.',
1824 'dir': path_dirname(fn),
73159f99
S
1825 'delete': False,
1826 }
1827
181c8655
PH
1828 # In Python 2.x, json.dump expects a bytestream.
1829 # In Python 3.x, it writes to a character stream
1830 if sys.version_info < (3, 0):
73159f99 1831 args['mode'] = 'wb'
181c8655 1832 else:
73159f99
S
1833 args.update({
1834 'mode': 'w',
1835 'encoding': 'utf-8',
1836 })
1837
c86b6142 1838 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
1839
1840 try:
1841 with tf:
6e84b215 1842 json.dump(obj, tf)
1394646a
IK
1843 if sys.platform == 'win32':
1844 # Need to remove existing file on Windows, else os.rename raises
1845 # WindowsError or FileExistsError.
1846 try:
1847 os.unlink(fn)
1848 except OSError:
1849 pass
9cd5f54e
R
1850 try:
1851 mask = os.umask(0)
1852 os.umask(mask)
1853 os.chmod(tf.name, 0o666 & ~mask)
1854 except OSError:
1855 pass
181c8655 1856 os.rename(tf.name, fn)
70a1165b 1857 except Exception:
181c8655
PH
1858 try:
1859 os.remove(tf.name)
1860 except OSError:
1861 pass
1862 raise
1863
1864
1865if sys.version_info >= (2, 7):
ee114368 1866 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 1867 """ Find the xpath xpath[@key=val] """
5d2354f1 1868 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368 1869 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
1870 return node.find(expr)
1871else:
ee114368 1872 def find_xpath_attr(node, xpath, key, val=None):
810c10ba 1873 for f in node.findall(compat_xpath(xpath)):
ee114368
S
1874 if key not in f.attrib:
1875 continue
1876 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
1877 return f
1878 return None
1879
d7e66d39
JMF
1880# On python2.6 the xml.etree.ElementTree.Element methods don't support
1881# the namespace parameter
5f6a1245
JW
1882
1883
d7e66d39
JMF
1884def xpath_with_ns(path, ns_map):
1885 components = [c.split(':') for c in path.split('/')]
1886 replaced = []
1887 for c in components:
1888 if len(c) == 1:
1889 replaced.append(c[0])
1890 else:
1891 ns, tag = c
1892 replaced.append('{%s}%s' % (ns_map[ns], tag))
1893 return '/'.join(replaced)
1894
d77c3dfd 1895
a41fb80c 1896def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745 1897 def _find_xpath(xpath):
810c10ba 1898 return node.find(compat_xpath(xpath))
578c0745
S
1899
1900 if isinstance(xpath, (str, compat_str)):
1901 n = _find_xpath(xpath)
1902 else:
1903 for xp in xpath:
1904 n = _find_xpath(xp)
1905 if n is not None:
1906 break
d74bebd5 1907
8e636da4 1908 if n is None:
bf42a990
S
1909 if default is not NO_DEFAULT:
1910 return default
1911 elif fatal:
bf0ff932
PH
1912 name = xpath if name is None else name
1913 raise ExtractorError('Could not find XML element %s' % name)
1914 else:
1915 return None
a41fb80c
S
1916 return n
1917
1918
1919def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
1920 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1921 if n is None or n == default:
1922 return n
1923 if n.text is None:
1924 if default is not NO_DEFAULT:
1925 return default
1926 elif fatal:
1927 name = xpath if name is None else name
1928 raise ExtractorError('Could not find XML element\'s text %s' % name)
1929 else:
1930 return None
1931 return n.text
a41fb80c
S
1932
1933
1934def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1935 n = find_xpath_attr(node, xpath, key)
1936 if n is None:
1937 if default is not NO_DEFAULT:
1938 return default
1939 elif fatal:
1940 name = '%s[@%s]' % (xpath, key) if name is None else name
1941 raise ExtractorError('Could not find XML attribute %s' % name)
1942 else:
1943 return None
1944 return n.attrib[key]
bf0ff932
PH
1945
1946
9e6dd238 1947def get_element_by_id(id, html):
43e8fafd 1948 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 1949 return get_element_by_attribute('id', id, html)
43e8fafd 1950
12ea2f30 1951
84c237fb 1952def get_element_by_class(class_name, html):
2af12ad9
TC
1953 """Return the content of the first tag with the specified class in the passed HTML document"""
1954 retval = get_elements_by_class(class_name, html)
1955 return retval[0] if retval else None
1956
1957
1958def get_element_by_attribute(attribute, value, html, escape_value=True):
1959 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1960 return retval[0] if retval else None
1961
1962
1963def get_elements_by_class(class_name, html):
1964 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1965 return get_elements_by_attribute(
84c237fb
YCH
1966 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1967 html, escape_value=False)
1968
1969
2af12ad9 1970def get_elements_by_attribute(attribute, value, html, escape_value=True):
43e8fafd 1971 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 1972
84c237fb
YCH
1973 value = re.escape(value) if escape_value else value
1974
2af12ad9
TC
1975 retlist = []
1976 for m in re.finditer(r'''(?xs)
38285056 1977 <([a-zA-Z0-9:._-]+)
609ff8ca 1978 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056 1979 \s+%s=['"]?%s['"]?
609ff8ca 1980 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056
PH
1981 \s*>
1982 (?P<content>.*?)
1983 </\1>
2af12ad9
TC
1984 ''' % (re.escape(attribute), value), html):
1985 res = m.group('content')
38285056 1986
2af12ad9
TC
1987 if res.startswith('"') or res.startswith("'"):
1988 res = res[1:-1]
38285056 1989
2af12ad9 1990 retlist.append(unescapeHTML(res))
a921f407 1991
2af12ad9 1992 return retlist
a921f407 1993
c5229f39 1994
8bb56eee
BF
1995class HTMLAttributeParser(compat_HTMLParser):
1996 """Trivial HTML parser to gather the attributes for a single element"""
b6e0c7d2 1997
8bb56eee 1998 def __init__(self):
c5229f39 1999 self.attrs = {}
8bb56eee
BF
2000 compat_HTMLParser.__init__(self)
2001
2002 def handle_starttag(self, tag, attrs):
2003 self.attrs = dict(attrs)
2004
c5229f39 2005
8bb56eee
BF
2006def extract_attributes(html_element):
2007 """Given a string for an HTML element such as
2008 <el
2009 a="foo" B="bar" c="&98;az" d=boz
2010 empty= noval entity="&amp;"
2011 sq='"' dq="'"
2012 >
2013 Decode and return a dictionary of attributes.
2014 {
2015 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2016 'empty': '', 'noval': None, 'entity': '&',
2017 'sq': '"', 'dq': '\''
2018 }.
2019 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2020 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2021 """
2022 parser = HTMLAttributeParser()
b4a3d461
S
2023 try:
2024 parser.feed(html_element)
2025 parser.close()
2026 # Older Python may throw HTMLParseError in case of malformed HTML
2027 except compat_HTMLParseError:
2028 pass
8bb56eee 2029 return parser.attrs
9e6dd238 2030
c5229f39 2031
9e6dd238 2032def clean_html(html):
59ae15a5 2033 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
2034
2035 if html is None: # Convenience for sanitizing descriptions etc.
2036 return html
2037
59ae15a5
PH
2038 # Newline vs <br />
2039 html = html.replace('\n', ' ')
edd9221c
TF
2040 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2041 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
2042 # Strip html tags
2043 html = re.sub('<.*?>', '', html)
2044 # Replace html entities
2045 html = unescapeHTML(html)
7decf895 2046 return html.strip()
9e6dd238
FV
2047
2048
d77c3dfd 2049def sanitize_open(filename, open_mode):
59ae15a5
PH
2050 """Try to open the given filename, and slightly tweak it if this fails.
2051
2052 Attempts to open the given filename. If this fails, it tries to change
2053 the filename slightly, step by step, until it's either able to open it
2054 or it fails and raises a final exception, like the standard open()
2055 function.
2056
2057 It returns the tuple (stream, definitive_file_name).
2058 """
2059 try:
28e614de 2060 if filename == '-':
59ae15a5
PH
2061 if sys.platform == 'win32':
2062 import msvcrt
2063 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 2064 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
2065 stream = open(encodeFilename(filename), open_mode)
2066 return (stream, filename)
2067 except (IOError, OSError) as err:
f45c185f
PH
2068 if err.errno in (errno.EACCES,):
2069 raise
59ae15a5 2070
f45c185f 2071 # In case of error, try to remove win32 forbidden chars
d55de57b 2072 alt_filename = sanitize_path(filename)
f45c185f
PH
2073 if alt_filename == filename:
2074 raise
2075 else:
2076 # An exception here should be caught in the caller
d55de57b 2077 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 2078 return (stream, alt_filename)
d77c3dfd
FV
2079
2080
2081def timeconvert(timestr):
59ae15a5
PH
2082 """Convert RFC 2822 defined time string into system timestamp"""
2083 timestamp = None
2084 timetuple = email.utils.parsedate_tz(timestr)
2085 if timetuple is not None:
2086 timestamp = email.utils.mktime_tz(timetuple)
2087 return timestamp
1c469a94 2088
5f6a1245 2089
796173d0 2090def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
2091 """Sanitizes a string so it could be used as part of a filename.
2092 If restricted is set, use a stricter subset of allowed characters.
158af524
S
2093 Set is_id if this is not an arbitrary string, but an ID that should be kept
2094 if possible.
59ae15a5
PH
2095 """
2096 def replace_insane(char):
c587cbb7
AT
2097 if restricted and char in ACCENT_CHARS:
2098 return ACCENT_CHARS[char]
59ae15a5
PH
2099 if char == '?' or ord(char) < 32 or ord(char) == 127:
2100 return ''
2101 elif char == '"':
2102 return '' if restricted else '\''
2103 elif char == ':':
2104 return '_-' if restricted else ' -'
2105 elif char in '\\/|*<>':
2106 return '_'
627dcfff 2107 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
2108 return '_'
2109 if restricted and ord(char) > 127:
2110 return '_'
2111 return char
2112
639f1cea 2113 if s == '':
2114 return ''
2aeb06d6
PH
2115 # Handle timestamps
2116 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 2117 result = ''.join(map(replace_insane, s))
796173d0
PH
2118 if not is_id:
2119 while '__' in result:
2120 result = result.replace('__', '_')
2121 result = result.strip('_')
2122 # Common case of "Foreign band name - English song title"
2123 if restricted and result.startswith('-_'):
2124 result = result[2:]
5a42414b
PH
2125 if result.startswith('-'):
2126 result = '_' + result[len('-'):]
a7440261 2127 result = result.lstrip('.')
796173d0
PH
2128 if not result:
2129 result = '_'
59ae15a5 2130 return result
d77c3dfd 2131
5f6a1245 2132
c2934512 2133def sanitize_path(s, force=False):
a2aaf4db 2134 """Sanitizes and normalizes path on Windows"""
c2934512 2135 if sys.platform == 'win32':
c4218ac3 2136 force = False
c2934512 2137 drive_or_unc, _ = os.path.splitdrive(s)
2138 if sys.version_info < (2, 7) and not drive_or_unc:
2139 drive_or_unc, _ = os.path.splitunc(s)
2140 elif force:
2141 drive_or_unc = ''
2142 else:
a2aaf4db 2143 return s
c2934512 2144
be531ef1
S
2145 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2146 if drive_or_unc:
a2aaf4db
S
2147 norm_path.pop(0)
2148 sanitized_path = [
ec85ded8 2149 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
a2aaf4db 2150 for path_part in norm_path]
be531ef1
S
2151 if drive_or_unc:
2152 sanitized_path.insert(0, drive_or_unc + os.path.sep)
c4218ac3 2153 elif force and s[0] == os.path.sep:
2154 sanitized_path.insert(0, os.path.sep)
a2aaf4db
S
2155 return os.path.join(*sanitized_path)
2156
2157
17bcc626 2158def sanitize_url(url):
befa4708
S
2159 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2160 # the number of unwanted failures due to missing protocol
2161 if url.startswith('//'):
2162 return 'http:%s' % url
2163 # Fix some common typos seen so far
2164 COMMON_TYPOS = (
067aa17e 2165 # https://github.com/ytdl-org/youtube-dl/issues/15649
befa4708
S
2166 (r'^httpss://', r'https://'),
2167 # https://bx1.be/lives/direct-tv/
2168 (r'^rmtp([es]?)://', r'rtmp\1://'),
2169 )
2170 for mistake, fixup in COMMON_TYPOS:
2171 if re.match(mistake, url):
2172 return re.sub(mistake, fixup, url)
bc6b9bcd 2173 return url
17bcc626
S
2174
2175
5435dcf9
HH
2176def extract_basic_auth(url):
2177 parts = compat_urlparse.urlsplit(url)
2178 if parts.username is None:
2179 return url, None
2180 url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2181 parts.hostname if parts.port is None
2182 else '%s:%d' % (parts.hostname, parts.port))))
2183 auth_payload = base64.b64encode(
2184 ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2185 return url, 'Basic ' + auth_payload.decode('utf-8')
2186
2187
67dda517 2188def sanitized_Request(url, *args, **kwargs):
bc6b9bcd 2189 url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
5435dcf9
HH
2190 if auth_header is not None:
2191 headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2192 headers['Authorization'] = auth_header
2193 return compat_urllib_request.Request(url, *args, **kwargs)
67dda517
S
2194
2195
51098426
S
2196def expand_path(s):
2197 """Expand shell variables and ~"""
2198 return os.path.expandvars(compat_expanduser(s))
2199
2200
d77c3dfd 2201def orderedSet(iterable):
59ae15a5
PH
2202 """ Remove all duplicates from the input iterable """
2203 res = []
2204 for el in iterable:
2205 if el not in res:
2206 res.append(el)
2207 return res
d77c3dfd 2208
912b38b4 2209
55b2f099 2210def _htmlentity_transform(entity_with_semicolon):
4e408e47 2211 """Transforms an HTML entity to a character."""
55b2f099
YCH
2212 entity = entity_with_semicolon[:-1]
2213
4e408e47
PH
2214 # Known non-numeric HTML entity
2215 if entity in compat_html_entities.name2codepoint:
2216 return compat_chr(compat_html_entities.name2codepoint[entity])
2217
55b2f099
YCH
2218 # TODO: HTML5 allows entities without a semicolon. For example,
2219 # '&Eacuteric' should be decoded as 'Éric'.
2220 if entity_with_semicolon in compat_html_entities_html5:
2221 return compat_html_entities_html5[entity_with_semicolon]
2222
91757b0f 2223 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
2224 if mobj is not None:
2225 numstr = mobj.group(1)
28e614de 2226 if numstr.startswith('x'):
4e408e47 2227 base = 16
28e614de 2228 numstr = '0%s' % numstr
4e408e47
PH
2229 else:
2230 base = 10
067aa17e 2231 # See https://github.com/ytdl-org/youtube-dl/issues/7518
7aefc49c
S
2232 try:
2233 return compat_chr(int(numstr, base))
2234 except ValueError:
2235 pass
4e408e47
PH
2236
2237 # Unknown entity in name, return its literal representation
7a3f0c00 2238 return '&%s;' % entity
4e408e47
PH
2239
2240
d77c3dfd 2241def unescapeHTML(s):
912b38b4
PH
2242 if s is None:
2243 return None
2244 assert type(s) == compat_str
d77c3dfd 2245
4e408e47 2246 return re.sub(
95f3f7c2 2247 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 2248
8bf48f23 2249
cdb19aa4 2250def escapeHTML(text):
2251 return (
2252 text
2253 .replace('&', '&amp;')
2254 .replace('<', '&lt;')
2255 .replace('>', '&gt;')
2256 .replace('"', '&quot;')
2257 .replace("'", '&#39;')
2258 )
2259
2260
f5b1bca9 2261def process_communicate_or_kill(p, *args, **kwargs):
2262 try:
2263 return p.communicate(*args, **kwargs)
2264 except BaseException: # Including KeyboardInterrupt
2265 p.kill()
2266 p.wait()
2267 raise
2268
2269
aa49acd1
S
2270def get_subprocess_encoding():
2271 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2272 # For subprocess calls, encode with locale encoding
2273 # Refer to http://stackoverflow.com/a/9951851/35070
2274 encoding = preferredencoding()
2275 else:
2276 encoding = sys.getfilesystemencoding()
2277 if encoding is None:
2278 encoding = 'utf-8'
2279 return encoding
2280
2281
8bf48f23 2282def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
2283 """
2284 @param s The name of the file
2285 """
d77c3dfd 2286
8bf48f23 2287 assert type(s) == compat_str
d77c3dfd 2288
59ae15a5
PH
2289 # Python 3 has a Unicode API
2290 if sys.version_info >= (3, 0):
2291 return s
0f00efed 2292
aa49acd1
S
2293 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2294 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2295 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2296 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2297 return s
2298
8ee239e9
YCH
2299 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2300 if sys.platform.startswith('java'):
2301 return s
2302
aa49acd1
S
2303 return s.encode(get_subprocess_encoding(), 'ignore')
2304
2305
2306def decodeFilename(b, for_subprocess=False):
2307
2308 if sys.version_info >= (3, 0):
2309 return b
2310
2311 if not isinstance(b, bytes):
2312 return b
2313
2314 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 2315
f07b74fc
PH
2316
2317def encodeArgument(s):
2318 if not isinstance(s, compat_str):
2319 # Legacy code that uses byte strings
2320 # Uncomment the following line after fixing all post processors
7af808a5 2321 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
2322 s = s.decode('ascii')
2323 return encodeFilename(s, True)
2324
2325
aa49acd1
S
2326def decodeArgument(b):
2327 return decodeFilename(b, True)
2328
2329
8271226a
PH
2330def decodeOption(optval):
2331 if optval is None:
2332 return optval
2333 if isinstance(optval, bytes):
2334 optval = optval.decode(preferredencoding())
2335
2336 assert isinstance(optval, compat_str)
2337 return optval
1c256f70 2338
5f6a1245 2339
cdb19aa4 2340def formatSeconds(secs, delim=':', msec=False):
4539dd30 2341 if secs > 3600:
cdb19aa4 2342 ret = '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
4539dd30 2343 elif secs > 60:
cdb19aa4 2344 ret = '%d%s%02d' % (secs // 60, delim, secs % 60)
4539dd30 2345 else:
cdb19aa4 2346 ret = '%d' % secs
2347 return '%s.%03d' % (ret, secs % 1) if msec else ret
4539dd30 2348
a0ddb8a2 2349
be4a824d
PH
2350def make_HTTPS_handler(params, **kwargs):
2351 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 2352 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 2353 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 2354 if opts_no_check_certificate:
be5f2c19 2355 context.check_hostname = False
0db261ba 2356 context.verify_mode = ssl.CERT_NONE
a2366922 2357 try:
be4a824d 2358 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
2359 except TypeError:
2360 # Python 2.7.8
2361 # (create_default_context present but HTTPSHandler has no context=)
2362 pass
2363
2364 if sys.version_info < (3, 2):
d7932313 2365 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 2366 else: # Python < 3.4
d7932313 2367 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 2368 context.verify_mode = (ssl.CERT_NONE
dca08720 2369 if opts_no_check_certificate
ea6d901e 2370 else ssl.CERT_REQUIRED)
303b479e 2371 context.set_default_verify_paths()
be4a824d 2372 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 2373
732ea2f0 2374
5873d4cc 2375def bug_reports_message(before=';'):
08f2a92c 2376 if ytdl_is_updateable():
7a5c1cfe 2377 update_cmd = 'type yt-dlp -U to update'
08f2a92c 2378 else:
7a5c1cfe 2379 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
5873d4cc 2380 msg = 'please report this issue on https://github.com/yt-dlp/yt-dlp .'
08f2a92c 2381 msg += ' Make sure you are using the latest version; %s.' % update_cmd
7a5c1cfe 2382 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
5873d4cc
F
2383
2384 before = before.rstrip()
2385 if not before or before.endswith(('.', '!', '?')):
2386 msg = msg[0].title() + msg[1:]
2387
2388 return (before + ' ' if before else '') + msg
08f2a92c
JMF
2389
2390
bf5b9d85
PM
2391class YoutubeDLError(Exception):
2392 """Base exception for YoutubeDL errors."""
2393 pass
2394
2395
3158150c 2396network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2397if hasattr(ssl, 'CertificateError'):
2398 network_exceptions.append(ssl.CertificateError)
2399network_exceptions = tuple(network_exceptions)
2400
2401
bf5b9d85 2402class ExtractorError(YoutubeDLError):
1c256f70 2403 """Error during info extraction."""
5f6a1245 2404
1151c407 2405 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
9a82b238 2406 """ tb, if given, is the original traceback (so that it can be printed out).
7a5c1cfe 2407 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
9a82b238 2408 """
3158150c 2409 if sys.exc_info()[0] in network_exceptions:
9a82b238 2410 expected = True
d5979c5d 2411
526d74ec 2412 self.msg = str(msg)
1c256f70 2413 self.traceback = tb
1151c407 2414 self.expected = expected
2eabb802 2415 self.cause = cause
d11271dd 2416 self.video_id = video_id
1151c407 2417 self.ie = ie
2418 self.exc_info = sys.exc_info() # preserve original exception
2419
2420 super(ExtractorError, self).__init__(''.join((
2421 format_field(ie, template='[%s] '),
2422 format_field(video_id, template='%s: '),
526d74ec 2423 self.msg,
1151c407 2424 format_field(cause, template=' (caused by %r)'),
2425 '' if expected else bug_reports_message())))
1c256f70 2426
01951dda
PH
2427 def format_traceback(self):
2428 if self.traceback is None:
2429 return None
28e614de 2430 return ''.join(traceback.format_tb(self.traceback))
01951dda 2431
1c256f70 2432
416c7fcb
PH
2433class UnsupportedError(ExtractorError):
2434 def __init__(self, url):
2435 super(UnsupportedError, self).__init__(
2436 'Unsupported URL: %s' % url, expected=True)
2437 self.url = url
2438
2439
55b3e45b
JMF
2440class RegexNotFoundError(ExtractorError):
2441 """Error when a regex didn't match"""
2442 pass
2443
2444
773f291d
S
2445class GeoRestrictedError(ExtractorError):
2446 """Geographic restriction Error exception.
2447
2448 This exception may be thrown when a video is not available from your
2449 geographic location due to geographic restrictions imposed by a website.
2450 """
b6e0c7d2 2451
773f291d
S
2452 def __init__(self, msg, countries=None):
2453 super(GeoRestrictedError, self).__init__(msg, expected=True)
2454 self.msg = msg
2455 self.countries = countries
2456
2457
bf5b9d85 2458class DownloadError(YoutubeDLError):
59ae15a5 2459 """Download Error exception.
d77c3dfd 2460
59ae15a5
PH
2461 This exception may be thrown by FileDownloader objects if they are not
2462 configured to continue on errors. They will contain the appropriate
2463 error message.
2464 """
5f6a1245 2465
8cc83b8d
FV
2466 def __init__(self, msg, exc_info=None):
2467 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2468 super(DownloadError, self).__init__(msg)
2469 self.exc_info = exc_info
d77c3dfd
FV
2470
2471
498f5606 2472class EntryNotInPlaylist(YoutubeDLError):
2473 """Entry not in playlist exception.
2474
2475 This exception will be thrown by YoutubeDL when a requested entry
2476 is not found in the playlist info_dict
2477 """
2478 pass
2479
2480
bf5b9d85 2481class SameFileError(YoutubeDLError):
59ae15a5 2482 """Same File exception.
d77c3dfd 2483
59ae15a5
PH
2484 This exception will be thrown by FileDownloader objects if they detect
2485 multiple files would have to be downloaded to the same file on disk.
2486 """
2487 pass
d77c3dfd
FV
2488
2489
bf5b9d85 2490class PostProcessingError(YoutubeDLError):
59ae15a5 2491 """Post Processing exception.
d77c3dfd 2492
59ae15a5
PH
2493 This exception may be raised by PostProcessor's .run() method to
2494 indicate an error in the postprocessing task.
2495 """
5f6a1245 2496
7851b379 2497 def __init__(self, msg):
bf5b9d85 2498 super(PostProcessingError, self).__init__(msg)
7851b379 2499 self.msg = msg
d77c3dfd 2500
5f6a1245 2501
8b0d7497 2502class ExistingVideoReached(YoutubeDLError):
2503 """ --max-downloads limit has been reached. """
2504 pass
2505
2506
2507class RejectedVideoReached(YoutubeDLError):
2508 """ --max-downloads limit has been reached. """
2509 pass
2510
2511
51d9739f 2512class ThrottledDownload(YoutubeDLError):
2513 """ Download speed below --throttled-rate. """
2514 pass
2515
2516
bf5b9d85 2517class MaxDownloadsReached(YoutubeDLError):
59ae15a5
PH
2518 """ --max-downloads limit has been reached. """
2519 pass
d77c3dfd
FV
2520
2521
bf5b9d85 2522class UnavailableVideoError(YoutubeDLError):
59ae15a5 2523 """Unavailable Format exception.
d77c3dfd 2524
59ae15a5
PH
2525 This exception will be thrown when a video is requested
2526 in a format that is not available for that video.
2527 """
2528 pass
d77c3dfd
FV
2529
2530
bf5b9d85 2531class ContentTooShortError(YoutubeDLError):
59ae15a5 2532 """Content Too Short exception.
d77c3dfd 2533
59ae15a5
PH
2534 This exception may be raised by FileDownloader objects when a file they
2535 download is too small for what the server announced first, indicating
2536 the connection was probably interrupted.
2537 """
d77c3dfd 2538
59ae15a5 2539 def __init__(self, downloaded, expected):
bf5b9d85
PM
2540 super(ContentTooShortError, self).__init__(
2541 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2542 )
2c7ed247 2543 # Both in bytes
59ae15a5
PH
2544 self.downloaded = downloaded
2545 self.expected = expected
d77c3dfd 2546
5f6a1245 2547
bf5b9d85 2548class XAttrMetadataError(YoutubeDLError):
efa97bdc
YCH
2549 def __init__(self, code=None, msg='Unknown error'):
2550 super(XAttrMetadataError, self).__init__(msg)
2551 self.code = code
bd264412 2552 self.msg = msg
efa97bdc
YCH
2553
2554 # Parsing code and msg
3089bc74 2555 if (self.code in (errno.ENOSPC, errno.EDQUOT)
a0566bbf 2556 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
efa97bdc
YCH
2557 self.reason = 'NO_SPACE'
2558 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2559 self.reason = 'VALUE_TOO_LONG'
2560 else:
2561 self.reason = 'NOT_SUPPORTED'
2562
2563
bf5b9d85 2564class XAttrUnavailableError(YoutubeDLError):
efa97bdc
YCH
2565 pass
2566
2567
c5a59d93 2568def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
2569 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2570 # expected HTTP responses to meet HTTP/1.0 or later (see also
067aa17e 2571 # https://github.com/ytdl-org/youtube-dl/issues/6727)
e5e78797 2572 if sys.version_info < (3, 0):
65220c3b
S
2573 kwargs['strict'] = True
2574 hc = http_class(*args, **compat_kwargs(kwargs))
be4a824d 2575 source_address = ydl_handler._params.get('source_address')
8959018a 2576
be4a824d 2577 if source_address is not None:
8959018a
AU
2578 # This is to workaround _create_connection() from socket where it will try all
2579 # address data from getaddrinfo() including IPv6. This filters the result from
2580 # getaddrinfo() based on the source_address value.
2581 # This is based on the cpython socket.create_connection() function.
2582 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2583 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2584 host, port = address
2585 err = None
2586 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
9e21e6d9
S
2587 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2588 ip_addrs = [addr for addr in addrs if addr[0] == af]
2589 if addrs and not ip_addrs:
2590 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2591 raise socket.error(
2592 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2593 % (ip_version, source_address[0]))
8959018a
AU
2594 for res in ip_addrs:
2595 af, socktype, proto, canonname, sa = res
2596 sock = None
2597 try:
2598 sock = socket.socket(af, socktype, proto)
2599 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2600 sock.settimeout(timeout)
2601 sock.bind(source_address)
2602 sock.connect(sa)
2603 err = None # Explicitly break reference cycle
2604 return sock
2605 except socket.error as _:
2606 err = _
2607 if sock is not None:
2608 sock.close()
2609 if err is not None:
2610 raise err
2611 else:
9e21e6d9
S
2612 raise socket.error('getaddrinfo returns an empty list')
2613 if hasattr(hc, '_create_connection'):
2614 hc._create_connection = _create_connection
be4a824d
PH
2615 sa = (source_address, 0)
2616 if hasattr(hc, 'source_address'): # Python 2.7+
2617 hc.source_address = sa
2618 else: # Python 2.6
2619 def _hc_connect(self, *args, **kwargs):
9e21e6d9 2620 sock = _create_connection(
be4a824d
PH
2621 (self.host, self.port), self.timeout, sa)
2622 if is_https:
d7932313
PH
2623 self.sock = ssl.wrap_socket(
2624 sock, self.key_file, self.cert_file,
2625 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
2626 else:
2627 self.sock = sock
2628 hc.connect = functools.partial(_hc_connect, hc)
2629
2630 return hc
2631
2632
87f0e62d 2633def handle_youtubedl_headers(headers):
992fc9d6
YCH
2634 filtered_headers = headers
2635
2636 if 'Youtubedl-no-compression' in filtered_headers:
2637 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 2638 del filtered_headers['Youtubedl-no-compression']
87f0e62d 2639
992fc9d6 2640 return filtered_headers
87f0e62d
YCH
2641
2642
acebc9cd 2643class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
2644 """Handler for HTTP requests and responses.
2645
2646 This class, when installed with an OpenerDirector, automatically adds
2647 the standard headers to every HTTP request and handles gzipped and
2648 deflated responses from web servers. If compression is to be avoided in
2649 a particular request, the original request in the program code only has
0424ec30 2650 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
2651 removed before making the real request.
2652
2653 Part of this code was copied from:
2654
2655 http://techknack.net/python-urllib2-handlers/
2656
2657 Andrew Rowls, the author of that code, agreed to release it to the
2658 public domain.
2659 """
2660
be4a824d
PH
2661 def __init__(self, params, *args, **kwargs):
2662 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2663 self._params = params
2664
2665 def http_open(self, req):
71aff188
YCH
2666 conn_class = compat_http_client.HTTPConnection
2667
2668 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2669 if socks_proxy:
2670 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2671 del req.headers['Ytdl-socks-proxy']
2672
be4a824d 2673 return self.do_open(functools.partial(
71aff188 2674 _create_http_connection, self, conn_class, False),
be4a824d
PH
2675 req)
2676
59ae15a5
PH
2677 @staticmethod
2678 def deflate(data):
fc2119f2 2679 if not data:
2680 return data
59ae15a5
PH
2681 try:
2682 return zlib.decompress(data, -zlib.MAX_WBITS)
2683 except zlib.error:
2684 return zlib.decompress(data)
2685
acebc9cd 2686 def http_request(self, req):
51f267d9
S
2687 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2688 # always respected by websites, some tend to give out URLs with non percent-encoded
2689 # non-ASCII characters (see telemb.py, ard.py [#3412])
2690 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2691 # To work around aforementioned issue we will replace request's original URL with
2692 # percent-encoded one
2693 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2694 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2695 url = req.get_full_url()
2696 url_escaped = escape_url(url)
2697
2698 # Substitute URL if any change after escaping
2699 if url != url_escaped:
15d260eb 2700 req = update_Request(req, url=url_escaped)
51f267d9 2701
33ac271b 2702 for h, v in std_headers.items():
3d5f7a39
JK
2703 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2704 # The dict keys are capitalized because of this bug by urllib
2705 if h.capitalize() not in req.headers:
33ac271b 2706 req.add_header(h, v)
87f0e62d
YCH
2707
2708 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
2709
2710 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2711 # Python 2.6 is brain-dead when it comes to fragments
2712 req._Request__original = req._Request__original.partition('#')[0]
2713 req._Request__r_type = req._Request__r_type.partition('#')[0]
2714
59ae15a5
PH
2715 return req
2716
acebc9cd 2717 def http_response(self, req, resp):
59ae15a5
PH
2718 old_resp = resp
2719 # gzip
2720 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
2721 content = resp.read()
2722 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2723 try:
2724 uncompressed = io.BytesIO(gz.read())
2725 except IOError as original_ioerror:
2726 # There may be junk add the end of the file
2727 # See http://stackoverflow.com/q/4928560/35070 for details
2728 for i in range(1, 1024):
2729 try:
2730 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2731 uncompressed = io.BytesIO(gz.read())
2732 except IOError:
2733 continue
2734 break
2735 else:
2736 raise original_ioerror
b407d853 2737 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2738 resp.msg = old_resp.msg
c047270c 2739 del resp.headers['Content-encoding']
59ae15a5
PH
2740 # deflate
2741 if resp.headers.get('Content-encoding', '') == 'deflate':
2742 gz = io.BytesIO(self.deflate(resp.read()))
b407d853 2743 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2744 resp.msg = old_resp.msg
c047270c 2745 del resp.headers['Content-encoding']
ad729172 2746 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
067aa17e 2747 # https://github.com/ytdl-org/youtube-dl/issues/6457).
5a4d9ddb
S
2748 if 300 <= resp.code < 400:
2749 location = resp.headers.get('Location')
2750 if location:
2751 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2752 if sys.version_info >= (3, 0):
2753 location = location.encode('iso-8859-1').decode('utf-8')
0ea59007
YCH
2754 else:
2755 location = location.decode('utf-8')
5a4d9ddb
S
2756 location_escaped = escape_url(location)
2757 if location != location_escaped:
2758 del resp.headers['Location']
9a4aec8b
YCH
2759 if sys.version_info < (3, 0):
2760 location_escaped = location_escaped.encode('utf-8')
5a4d9ddb 2761 resp.headers['Location'] = location_escaped
59ae15a5 2762 return resp
0f8d03f8 2763
acebc9cd
PH
2764 https_request = http_request
2765 https_response = http_response
bf50b038 2766
5de90176 2767
71aff188
YCH
2768def make_socks_conn_class(base_class, socks_proxy):
2769 assert issubclass(base_class, (
2770 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2771
2772 url_components = compat_urlparse.urlparse(socks_proxy)
2773 if url_components.scheme.lower() == 'socks5':
2774 socks_type = ProxyType.SOCKS5
2775 elif url_components.scheme.lower() in ('socks', 'socks4'):
2776 socks_type = ProxyType.SOCKS4
51fb4995
YCH
2777 elif url_components.scheme.lower() == 'socks4a':
2778 socks_type = ProxyType.SOCKS4A
71aff188 2779
cdd94c2e
YCH
2780 def unquote_if_non_empty(s):
2781 if not s:
2782 return s
2783 return compat_urllib_parse_unquote_plus(s)
2784
71aff188
YCH
2785 proxy_args = (
2786 socks_type,
2787 url_components.hostname, url_components.port or 1080,
2788 True, # Remote DNS
cdd94c2e
YCH
2789 unquote_if_non_empty(url_components.username),
2790 unquote_if_non_empty(url_components.password),
71aff188
YCH
2791 )
2792
2793 class SocksConnection(base_class):
2794 def connect(self):
2795 self.sock = sockssocket()
2796 self.sock.setproxy(*proxy_args)
2797 if type(self.timeout) in (int, float):
2798 self.sock.settimeout(self.timeout)
2799 self.sock.connect((self.host, self.port))
2800
2801 if isinstance(self, compat_http_client.HTTPSConnection):
2802 if hasattr(self, '_context'): # Python > 2.6
2803 self.sock = self._context.wrap_socket(
2804 self.sock, server_hostname=self.host)
2805 else:
2806 self.sock = ssl.wrap_socket(self.sock)
2807
2808 return SocksConnection
2809
2810
be4a824d
PH
2811class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2812 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2813 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2814 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2815 self._params = params
2816
2817 def https_open(self, req):
4f264c02 2818 kwargs = {}
71aff188
YCH
2819 conn_class = self._https_conn_class
2820
4f264c02
JMF
2821 if hasattr(self, '_context'): # python > 2.6
2822 kwargs['context'] = self._context
2823 if hasattr(self, '_check_hostname'): # python 3.x
2824 kwargs['check_hostname'] = self._check_hostname
71aff188
YCH
2825
2826 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2827 if socks_proxy:
2828 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2829 del req.headers['Ytdl-socks-proxy']
2830
be4a824d 2831 return self.do_open(functools.partial(
71aff188 2832 _create_http_connection, self, conn_class, True),
4f264c02 2833 req, **kwargs)
be4a824d
PH
2834
2835
1bab3437 2836class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
f1a8511f
S
2837 """
2838 See [1] for cookie file format.
2839
2840 1. https://curl.haxx.se/docs/http-cookies.html
2841 """
e7e62441 2842 _HTTPONLY_PREFIX = '#HttpOnly_'
c380cc28
S
2843 _ENTRY_LEN = 7
2844 _HEADER = '''# Netscape HTTP Cookie File
7a5c1cfe 2845# This file is generated by yt-dlp. Do not edit.
c380cc28
S
2846
2847'''
2848 _CookieFileEntry = collections.namedtuple(
2849 'CookieFileEntry',
2850 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
e7e62441 2851
1bab3437 2852 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
c380cc28
S
2853 """
2854 Save cookies to a file.
2855
2856 Most of the code is taken from CPython 3.8 and slightly adapted
2857 to support cookie files with UTF-8 in both python 2 and 3.
2858 """
2859 if filename is None:
2860 if self.filename is not None:
2861 filename = self.filename
2862 else:
2863 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2864
1bab3437
S
2865 # Store session cookies with `expires` set to 0 instead of an empty
2866 # string
2867 for cookie in self:
2868 if cookie.expires is None:
2869 cookie.expires = 0
c380cc28
S
2870
2871 with io.open(filename, 'w', encoding='utf-8') as f:
2872 f.write(self._HEADER)
2873 now = time.time()
2874 for cookie in self:
2875 if not ignore_discard and cookie.discard:
2876 continue
2877 if not ignore_expires and cookie.is_expired(now):
2878 continue
2879 if cookie.secure:
2880 secure = 'TRUE'
2881 else:
2882 secure = 'FALSE'
2883 if cookie.domain.startswith('.'):
2884 initial_dot = 'TRUE'
2885 else:
2886 initial_dot = 'FALSE'
2887 if cookie.expires is not None:
2888 expires = compat_str(cookie.expires)
2889 else:
2890 expires = ''
2891 if cookie.value is None:
2892 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2893 # with no name, whereas http.cookiejar regards it as a
2894 # cookie with no value.
2895 name = ''
2896 value = cookie.name
2897 else:
2898 name = cookie.name
2899 value = cookie.value
2900 f.write(
2901 '\t'.join([cookie.domain, initial_dot, cookie.path,
2902 secure, expires, name, value]) + '\n')
1bab3437
S
2903
2904 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
e7e62441 2905 """Load cookies from a file."""
2906 if filename is None:
2907 if self.filename is not None:
2908 filename = self.filename
2909 else:
2910 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2911
c380cc28
S
2912 def prepare_line(line):
2913 if line.startswith(self._HTTPONLY_PREFIX):
2914 line = line[len(self._HTTPONLY_PREFIX):]
2915 # comments and empty lines are fine
2916 if line.startswith('#') or not line.strip():
2917 return line
2918 cookie_list = line.split('\t')
2919 if len(cookie_list) != self._ENTRY_LEN:
2920 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2921 cookie = self._CookieFileEntry(*cookie_list)
2922 if cookie.expires_at and not cookie.expires_at.isdigit():
2923 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2924 return line
2925
e7e62441 2926 cf = io.StringIO()
c380cc28 2927 with io.open(filename, encoding='utf-8') as f:
e7e62441 2928 for line in f:
c380cc28
S
2929 try:
2930 cf.write(prepare_line(line))
2931 except compat_cookiejar.LoadError as e:
2932 write_string(
2933 'WARNING: skipping cookie file entry due to %s: %r\n'
2934 % (e, line), sys.stderr)
2935 continue
e7e62441 2936 cf.seek(0)
2937 self._really_load(cf, filename, ignore_discard, ignore_expires)
1bab3437
S
2938 # Session cookies are denoted by either `expires` field set to
2939 # an empty string or 0. MozillaCookieJar only recognizes the former
2940 # (see [1]). So we need force the latter to be recognized as session
2941 # cookies on our own.
2942 # Session cookies may be important for cookies-based authentication,
2943 # e.g. usually, when user does not check 'Remember me' check box while
2944 # logging in on a site, some important cookies are stored as session
2945 # cookies so that not recognizing them will result in failed login.
2946 # 1. https://bugs.python.org/issue17164
2947 for cookie in self:
2948 # Treat `expires=0` cookies as session cookies
2949 if cookie.expires == 0:
2950 cookie.expires = None
2951 cookie.discard = True
2952
2953
a6420bf5
S
2954class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2955 def __init__(self, cookiejar=None):
2956 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2957
2958 def http_response(self, request, response):
2959 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2960 # characters in Set-Cookie HTTP header of last response (see
067aa17e 2961 # https://github.com/ytdl-org/youtube-dl/issues/6769).
a6420bf5
S
2962 # In order to at least prevent crashing we will percent encode Set-Cookie
2963 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
2964 # if sys.version_info < (3, 0) and response.headers:
2965 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2966 # set_cookie = response.headers.get(set_cookie_header)
2967 # if set_cookie:
2968 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2969 # if set_cookie != set_cookie_escaped:
2970 # del response.headers[set_cookie_header]
2971 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
2972 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2973
f5fa042c 2974 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
a6420bf5
S
2975 https_response = http_response
2976
2977
fca6dba8 2978class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
201c1459 2979 """YoutubeDL redirect handler
2980
2981 The code is based on HTTPRedirectHandler implementation from CPython [1].
2982
2983 This redirect handler solves two issues:
2984 - ensures redirect URL is always unicode under python 2
2985 - introduces support for experimental HTTP response status code
2986 308 Permanent Redirect [2] used by some sites [3]
2987
2988 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
2989 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
2990 3. https://github.com/ytdl-org/youtube-dl/issues/28768
2991 """
2992
2993 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
2994
2995 def redirect_request(self, req, fp, code, msg, headers, newurl):
2996 """Return a Request or None in response to a redirect.
2997
2998 This is called by the http_error_30x methods when a
2999 redirection response is received. If a redirection should
3000 take place, return a new Request to allow http_error_30x to
3001 perform the redirect. Otherwise, raise HTTPError if no-one
3002 else should try to handle this url. Return None if you can't
3003 but another Handler might.
3004 """
3005 m = req.get_method()
3006 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
3007 or code in (301, 302, 303) and m == "POST")):
3008 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
3009 # Strictly (according to RFC 2616), 301 or 302 in response to
3010 # a POST MUST NOT cause a redirection without confirmation
3011 # from the user (of urllib.request, in this case). In practice,
3012 # essentially all clients do redirect in this case, so we do
3013 # the same.
3014
3015 # On python 2 urlh.geturl() may sometimes return redirect URL
3016 # as byte string instead of unicode. This workaround allows
3017 # to force it always return unicode.
3018 if sys.version_info[0] < 3:
3019 newurl = compat_str(newurl)
3020
3021 # Be conciliant with URIs containing a space. This is mainly
3022 # redundant with the more complete encoding done in http_error_302(),
3023 # but it is kept for compatibility with other callers.
3024 newurl = newurl.replace(' ', '%20')
3025
3026 CONTENT_HEADERS = ("content-length", "content-type")
3027 # NB: don't use dict comprehension for python 2.6 compatibility
3028 newheaders = dict((k, v) for k, v in req.headers.items()
3029 if k.lower() not in CONTENT_HEADERS)
3030 return compat_urllib_request.Request(
3031 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3032 unverifiable=True)
fca6dba8
S
3033
3034
46f59e89
S
3035def extract_timezone(date_str):
3036 m = re.search(
3037 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
3038 date_str)
3039 if not m:
3040 timezone = datetime.timedelta()
3041 else:
3042 date_str = date_str[:-len(m.group('tz'))]
3043 if not m.group('sign'):
3044 timezone = datetime.timedelta()
3045 else:
3046 sign = 1 if m.group('sign') == '+' else -1
3047 timezone = datetime.timedelta(
3048 hours=sign * int(m.group('hours')),
3049 minutes=sign * int(m.group('minutes')))
3050 return timezone, date_str
3051
3052
08b38d54 3053def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
3054 """ Return a UNIX timestamp from the given date """
3055
3056 if date_str is None:
3057 return None
3058
52c3a6e4
S
3059 date_str = re.sub(r'\.[0-9]+', '', date_str)
3060
08b38d54 3061 if timezone is None:
46f59e89
S
3062 timezone, date_str = extract_timezone(date_str)
3063
52c3a6e4
S
3064 try:
3065 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3066 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3067 return calendar.timegm(dt.timetuple())
3068 except ValueError:
3069 pass
912b38b4
PH
3070
3071
46f59e89
S
3072def date_formats(day_first=True):
3073 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3074
3075
42bdd9d0 3076def unified_strdate(date_str, day_first=True):
bf50b038 3077 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
3078
3079 if date_str is None:
3080 return None
bf50b038 3081 upload_date = None
5f6a1245 3082 # Replace commas
026fcc04 3083 date_str = date_str.replace(',', ' ')
42bdd9d0 3084 # Remove AM/PM + timezone
9bb8e0a3 3085 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89 3086 _, date_str = extract_timezone(date_str)
42bdd9d0 3087
46f59e89 3088 for expression in date_formats(day_first):
bf50b038
JMF
3089 try:
3090 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 3091 except ValueError:
bf50b038 3092 pass
42393ce2
PH
3093 if upload_date is None:
3094 timetuple = email.utils.parsedate_tz(date_str)
3095 if timetuple:
c6b9cf05
S
3096 try:
3097 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3098 except ValueError:
3099 pass
6a750402
JMF
3100 if upload_date is not None:
3101 return compat_str(upload_date)
bf50b038 3102
5f6a1245 3103
46f59e89
S
3104def unified_timestamp(date_str, day_first=True):
3105 if date_str is None:
3106 return None
3107
2ae2ffda 3108 date_str = re.sub(r'[,|]', '', date_str)
46f59e89 3109
7dc2a74e 3110 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89
S
3111 timezone, date_str = extract_timezone(date_str)
3112
3113 # Remove AM/PM + timezone
3114 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3115
deef3195
S
3116 # Remove unrecognized timezones from ISO 8601 alike timestamps
3117 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3118 if m:
3119 date_str = date_str[:-len(m.group('tz'))]
3120
f226880c
PH
3121 # Python only supports microseconds, so remove nanoseconds
3122 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3123 if m:
3124 date_str = m.group(1)
3125
46f59e89
S
3126 for expression in date_formats(day_first):
3127 try:
7dc2a74e 3128 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
46f59e89
S
3129 return calendar.timegm(dt.timetuple())
3130 except ValueError:
3131 pass
3132 timetuple = email.utils.parsedate_tz(date_str)
3133 if timetuple:
7dc2a74e 3134 return calendar.timegm(timetuple) + pm_delta * 3600
46f59e89
S
3135
3136
28e614de 3137def determine_ext(url, default_ext='unknown_video'):
85750f89 3138 if url is None or '.' not in url:
f4776371 3139 return default_ext
9cb9a5df 3140 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
3141 if re.match(r'^[A-Za-z0-9]+$', guess):
3142 return guess
a7aaa398
S
3143 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3144 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 3145 return guess.rstrip('/')
73e79f2a 3146 else:
cbdbb766 3147 return default_ext
73e79f2a 3148
5f6a1245 3149
824fa511
S
3150def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3151 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
d4051a8e 3152
5f6a1245 3153
9e62f283 3154def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
37254abc
JMF
3155 """
3156 Return a datetime object from a string in the format YYYYMMDD or
9e62f283 3157 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3158
3159 format: string date format used to return datetime object from
3160 precision: round the time portion of a datetime object.
3161 auto|microsecond|second|minute|hour|day.
3162 auto: round to the unit provided in date_str (if applicable).
3163 """
3164 auto_precision = False
3165 if precision == 'auto':
3166 auto_precision = True
3167 precision = 'microsecond'
3168 today = datetime_round(datetime.datetime.now(), precision)
f8795e10 3169 if date_str in ('now', 'today'):
37254abc 3170 return today
f8795e10
PH
3171 if date_str == 'yesterday':
3172 return today - datetime.timedelta(days=1)
9e62f283 3173 match = re.match(
3174 r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3175 date_str)
37254abc 3176 if match is not None:
9e62f283 3177 start_time = datetime_from_str(match.group('start'), precision, format)
3178 time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
37254abc 3179 unit = match.group('unit')
9e62f283 3180 if unit == 'month' or unit == 'year':
3181 new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
37254abc 3182 unit = 'day'
9e62f283 3183 else:
3184 if unit == 'week':
3185 unit = 'day'
3186 time *= 7
3187 delta = datetime.timedelta(**{unit + 's': time})
3188 new_date = start_time + delta
3189 if auto_precision:
3190 return datetime_round(new_date, unit)
3191 return new_date
3192
3193 return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3194
3195
3196def date_from_str(date_str, format='%Y%m%d'):
3197 """
3198 Return a datetime object from a string in the format YYYYMMDD or
3199 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3200
3201 format: string date format used to return datetime object from
3202 """
3203 return datetime_from_str(date_str, precision='microsecond', format=format).date()
3204
3205
3206def datetime_add_months(dt, months):
3207 """Increment/Decrement a datetime object by months."""
3208 month = dt.month + months - 1
3209 year = dt.year + month // 12
3210 month = month % 12 + 1
3211 day = min(dt.day, calendar.monthrange(year, month)[1])
3212 return dt.replace(year, month, day)
3213
3214
3215def datetime_round(dt, precision='day'):
3216 """
3217 Round a datetime object's time to a specific precision
3218 """
3219 if precision == 'microsecond':
3220 return dt
3221
3222 unit_seconds = {
3223 'day': 86400,
3224 'hour': 3600,
3225 'minute': 60,
3226 'second': 1,
3227 }
3228 roundto = lambda x, n: ((x + n / 2) // n) * n
3229 timestamp = calendar.timegm(dt.timetuple())
3230 return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
5f6a1245
JW
3231
3232
e63fc1be 3233def hyphenate_date(date_str):
3234 """
3235 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3236 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3237 if match is not None:
3238 return '-'.join(match.groups())
3239 else:
3240 return date_str
3241
5f6a1245 3242
bd558525
JMF
3243class DateRange(object):
3244 """Represents a time interval between two dates"""
5f6a1245 3245
bd558525
JMF
3246 def __init__(self, start=None, end=None):
3247 """start and end must be strings in the format accepted by date"""
3248 if start is not None:
3249 self.start = date_from_str(start)
3250 else:
3251 self.start = datetime.datetime.min.date()
3252 if end is not None:
3253 self.end = date_from_str(end)
3254 else:
3255 self.end = datetime.datetime.max.date()
37254abc 3256 if self.start > self.end:
bd558525 3257 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 3258
bd558525
JMF
3259 @classmethod
3260 def day(cls, day):
3261 """Returns a range that only contains the given day"""
5f6a1245
JW
3262 return cls(day, day)
3263
bd558525
JMF
3264 def __contains__(self, date):
3265 """Check if the date is in the range"""
37254abc
JMF
3266 if not isinstance(date, datetime.date):
3267 date = date_from_str(date)
3268 return self.start <= date <= self.end
5f6a1245 3269
bd558525 3270 def __str__(self):
5f6a1245 3271 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
3272
3273
3274def platform_name():
3275 """ Returns the platform name as a compat_str """
3276 res = platform.platform()
3277 if isinstance(res, bytes):
3278 res = res.decode(preferredencoding())
3279
3280 assert isinstance(res, compat_str)
3281 return res
c257baff
PH
3282
3283
b58ddb32
PH
3284def _windows_write_string(s, out):
3285 """ Returns True if the string was written using special methods,
3286 False if it has yet to be written out."""
3287 # Adapted from http://stackoverflow.com/a/3259271/35070
3288
3289 import ctypes
3290 import ctypes.wintypes
3291
3292 WIN_OUTPUT_IDS = {
3293 1: -11,
3294 2: -12,
3295 }
3296
a383a98a
PH
3297 try:
3298 fileno = out.fileno()
3299 except AttributeError:
3300 # If the output stream doesn't have a fileno, it's virtual
3301 return False
aa42e873
PH
3302 except io.UnsupportedOperation:
3303 # Some strange Windows pseudo files?
3304 return False
b58ddb32
PH
3305 if fileno not in WIN_OUTPUT_IDS:
3306 return False
3307
d7cd9a9e 3308 GetStdHandle = compat_ctypes_WINFUNCTYPE(
b58ddb32 3309 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
d7cd9a9e 3310 ('GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
3311 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3312
d7cd9a9e 3313 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3314 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3315 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
d7cd9a9e 3316 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
3317 written = ctypes.wintypes.DWORD(0)
3318
d7cd9a9e 3319 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
3320 FILE_TYPE_CHAR = 0x0002
3321 FILE_TYPE_REMOTE = 0x8000
d7cd9a9e 3322 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3323 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3324 ctypes.POINTER(ctypes.wintypes.DWORD))(
d7cd9a9e 3325 ('GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
3326 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3327
3328 def not_a_console(handle):
3329 if handle == INVALID_HANDLE_VALUE or handle is None:
3330 return True
3089bc74
S
3331 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3332 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
3333
3334 if not_a_console(h):
3335 return False
3336
d1b9c912
PH
3337 def next_nonbmp_pos(s):
3338 try:
3339 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3340 except StopIteration:
3341 return len(s)
3342
3343 while s:
3344 count = min(next_nonbmp_pos(s), 1024)
3345
b58ddb32 3346 ret = WriteConsoleW(
d1b9c912 3347 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
3348 if ret == 0:
3349 raise OSError('Failed to write string')
d1b9c912
PH
3350 if not count: # We just wrote a non-BMP character
3351 assert written.value == 2
3352 s = s[1:]
3353 else:
3354 assert written.value > 0
3355 s = s[written.value:]
b58ddb32
PH
3356 return True
3357
3358
734f90bb 3359def write_string(s, out=None, encoding=None):
7459e3a2
PH
3360 if out is None:
3361 out = sys.stderr
8bf48f23 3362 assert type(s) == compat_str
7459e3a2 3363
b58ddb32
PH
3364 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3365 if _windows_write_string(s, out):
3366 return
3367
3089bc74
S
3368 if ('b' in getattr(out, 'mode', '')
3369 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
3370 byt = s.encode(encoding or preferredencoding(), 'ignore')
3371 out.write(byt)
3372 elif hasattr(out, 'buffer'):
3373 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3374 byt = s.encode(enc, 'ignore')
3375 out.buffer.write(byt)
3376 else:
8bf48f23 3377 out.write(s)
7459e3a2
PH
3378 out.flush()
3379
3380
48ea9cea
PH
3381def bytes_to_intlist(bs):
3382 if not bs:
3383 return []
3384 if isinstance(bs[0], int): # Python 3
3385 return list(bs)
3386 else:
3387 return [ord(c) for c in bs]
3388
c257baff 3389
cba892fa 3390def intlist_to_bytes(xs):
3391 if not xs:
3392 return b''
edaa23f8 3393 return compat_struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
3394
3395
c1c9a79c
PH
3396# Cross-platform file locking
3397if sys.platform == 'win32':
3398 import ctypes.wintypes
3399 import msvcrt
3400
3401 class OVERLAPPED(ctypes.Structure):
3402 _fields_ = [
3403 ('Internal', ctypes.wintypes.LPVOID),
3404 ('InternalHigh', ctypes.wintypes.LPVOID),
3405 ('Offset', ctypes.wintypes.DWORD),
3406 ('OffsetHigh', ctypes.wintypes.DWORD),
3407 ('hEvent', ctypes.wintypes.HANDLE),
3408 ]
3409
3410 kernel32 = ctypes.windll.kernel32
3411 LockFileEx = kernel32.LockFileEx
3412 LockFileEx.argtypes = [
3413 ctypes.wintypes.HANDLE, # hFile
3414 ctypes.wintypes.DWORD, # dwFlags
3415 ctypes.wintypes.DWORD, # dwReserved
3416 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3417 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3418 ctypes.POINTER(OVERLAPPED) # Overlapped
3419 ]
3420 LockFileEx.restype = ctypes.wintypes.BOOL
3421 UnlockFileEx = kernel32.UnlockFileEx
3422 UnlockFileEx.argtypes = [
3423 ctypes.wintypes.HANDLE, # hFile
3424 ctypes.wintypes.DWORD, # dwReserved
3425 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3426 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3427 ctypes.POINTER(OVERLAPPED) # Overlapped
3428 ]
3429 UnlockFileEx.restype = ctypes.wintypes.BOOL
3430 whole_low = 0xffffffff
3431 whole_high = 0x7fffffff
3432
3433 def _lock_file(f, exclusive):
3434 overlapped = OVERLAPPED()
3435 overlapped.Offset = 0
3436 overlapped.OffsetHigh = 0
3437 overlapped.hEvent = 0
3438 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3439 handle = msvcrt.get_osfhandle(f.fileno())
3440 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3441 whole_low, whole_high, f._lock_file_overlapped_p):
3442 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3443
3444 def _unlock_file(f):
3445 assert f._lock_file_overlapped_p
3446 handle = msvcrt.get_osfhandle(f.fileno())
3447 if not UnlockFileEx(handle, 0,
3448 whole_low, whole_high, f._lock_file_overlapped_p):
3449 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3450
3451else:
399a76e6
YCH
3452 # Some platforms, such as Jython, is missing fcntl
3453 try:
3454 import fcntl
c1c9a79c 3455
399a76e6
YCH
3456 def _lock_file(f, exclusive):
3457 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 3458
399a76e6
YCH
3459 def _unlock_file(f):
3460 fcntl.flock(f, fcntl.LOCK_UN)
3461 except ImportError:
3462 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3463
3464 def _lock_file(f, exclusive):
3465 raise IOError(UNSUPPORTED_MSG)
3466
3467 def _unlock_file(f):
3468 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
3469
3470
3471class locked_file(object):
3472 def __init__(self, filename, mode, encoding=None):
3473 assert mode in ['r', 'a', 'w']
3474 self.f = io.open(filename, mode, encoding=encoding)
3475 self.mode = mode
3476
3477 def __enter__(self):
3478 exclusive = self.mode != 'r'
3479 try:
3480 _lock_file(self.f, exclusive)
3481 except IOError:
3482 self.f.close()
3483 raise
3484 return self
3485
3486 def __exit__(self, etype, value, traceback):
3487 try:
3488 _unlock_file(self.f)
3489 finally:
3490 self.f.close()
3491
3492 def __iter__(self):
3493 return iter(self.f)
3494
3495 def write(self, *args):
3496 return self.f.write(*args)
3497
3498 def read(self, *args):
3499 return self.f.read(*args)
4eb7f1d1
JMF
3500
3501
4644ac55
S
3502def get_filesystem_encoding():
3503 encoding = sys.getfilesystemencoding()
3504 return encoding if encoding is not None else 'utf-8'
3505
3506
4eb7f1d1 3507def shell_quote(args):
a6a173c2 3508 quoted_args = []
4644ac55 3509 encoding = get_filesystem_encoding()
a6a173c2
JMF
3510 for a in args:
3511 if isinstance(a, bytes):
3512 # We may get a filename encoded with 'encodeFilename'
3513 a = a.decode(encoding)
aefce8e6 3514 quoted_args.append(compat_shlex_quote(a))
28e614de 3515 return ' '.join(quoted_args)
9d4660ca
PH
3516
3517
3518def smuggle_url(url, data):
3519 """ Pass additional data in a URL for internal use. """
3520
81953d1a
RA
3521 url, idata = unsmuggle_url(url, {})
3522 data.update(idata)
15707c7e 3523 sdata = compat_urllib_parse_urlencode(
28e614de
PH
3524 {'__youtubedl_smuggle': json.dumps(data)})
3525 return url + '#' + sdata
9d4660ca
PH
3526
3527
79f82953 3528def unsmuggle_url(smug_url, default=None):
83e865a3 3529 if '#__youtubedl_smuggle' not in smug_url:
79f82953 3530 return smug_url, default
28e614de
PH
3531 url, _, sdata = smug_url.rpartition('#')
3532 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
3533 data = json.loads(jsond)
3534 return url, data
02dbf93f
PH
3535
3536
02dbf93f
PH
3537def format_bytes(bytes):
3538 if bytes is None:
28e614de 3539 return 'N/A'
02dbf93f
PH
3540 if type(bytes) is str:
3541 bytes = float(bytes)
3542 if bytes == 0.0:
3543 exponent = 0
3544 else:
3545 exponent = int(math.log(bytes, 1024.0))
28e614de 3546 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 3547 converted = float(bytes) / float(1024 ** exponent)
28e614de 3548 return '%.2f%s' % (converted, suffix)
f53c966a 3549
1c088fa8 3550
fb47597b
S
3551def lookup_unit_table(unit_table, s):
3552 units_re = '|'.join(re.escape(u) for u in unit_table)
3553 m = re.match(
782b1b5b 3554 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
fb47597b
S
3555 if not m:
3556 return None
3557 num_str = m.group('num').replace(',', '.')
3558 mult = unit_table[m.group('unit')]
3559 return int(float(num_str) * mult)
3560
3561
be64b5b0
PH
3562def parse_filesize(s):
3563 if s is None:
3564 return None
3565
dfb1b146 3566 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
3567 # but we support those too
3568 _UNIT_TABLE = {
3569 'B': 1,
3570 'b': 1,
70852b47 3571 'bytes': 1,
be64b5b0
PH
3572 'KiB': 1024,
3573 'KB': 1000,
3574 'kB': 1024,
3575 'Kb': 1000,
13585d76 3576 'kb': 1000,
70852b47
YCH
3577 'kilobytes': 1000,
3578 'kibibytes': 1024,
be64b5b0
PH
3579 'MiB': 1024 ** 2,
3580 'MB': 1000 ** 2,
3581 'mB': 1024 ** 2,
3582 'Mb': 1000 ** 2,
13585d76 3583 'mb': 1000 ** 2,
70852b47
YCH
3584 'megabytes': 1000 ** 2,
3585 'mebibytes': 1024 ** 2,
be64b5b0
PH
3586 'GiB': 1024 ** 3,
3587 'GB': 1000 ** 3,
3588 'gB': 1024 ** 3,
3589 'Gb': 1000 ** 3,
13585d76 3590 'gb': 1000 ** 3,
70852b47
YCH
3591 'gigabytes': 1000 ** 3,
3592 'gibibytes': 1024 ** 3,
be64b5b0
PH
3593 'TiB': 1024 ** 4,
3594 'TB': 1000 ** 4,
3595 'tB': 1024 ** 4,
3596 'Tb': 1000 ** 4,
13585d76 3597 'tb': 1000 ** 4,
70852b47
YCH
3598 'terabytes': 1000 ** 4,
3599 'tebibytes': 1024 ** 4,
be64b5b0
PH
3600 'PiB': 1024 ** 5,
3601 'PB': 1000 ** 5,
3602 'pB': 1024 ** 5,
3603 'Pb': 1000 ** 5,
13585d76 3604 'pb': 1000 ** 5,
70852b47
YCH
3605 'petabytes': 1000 ** 5,
3606 'pebibytes': 1024 ** 5,
be64b5b0
PH
3607 'EiB': 1024 ** 6,
3608 'EB': 1000 ** 6,
3609 'eB': 1024 ** 6,
3610 'Eb': 1000 ** 6,
13585d76 3611 'eb': 1000 ** 6,
70852b47
YCH
3612 'exabytes': 1000 ** 6,
3613 'exbibytes': 1024 ** 6,
be64b5b0
PH
3614 'ZiB': 1024 ** 7,
3615 'ZB': 1000 ** 7,
3616 'zB': 1024 ** 7,
3617 'Zb': 1000 ** 7,
13585d76 3618 'zb': 1000 ** 7,
70852b47
YCH
3619 'zettabytes': 1000 ** 7,
3620 'zebibytes': 1024 ** 7,
be64b5b0
PH
3621 'YiB': 1024 ** 8,
3622 'YB': 1000 ** 8,
3623 'yB': 1024 ** 8,
3624 'Yb': 1000 ** 8,
13585d76 3625 'yb': 1000 ** 8,
70852b47
YCH
3626 'yottabytes': 1000 ** 8,
3627 'yobibytes': 1024 ** 8,
be64b5b0
PH
3628 }
3629
fb47597b
S
3630 return lookup_unit_table(_UNIT_TABLE, s)
3631
3632
3633def parse_count(s):
3634 if s is None:
be64b5b0
PH
3635 return None
3636
fb47597b
S
3637 s = s.strip()
3638
3639 if re.match(r'^[\d,.]+$', s):
3640 return str_to_int(s)
3641
3642 _UNIT_TABLE = {
3643 'k': 1000,
3644 'K': 1000,
3645 'm': 1000 ** 2,
3646 'M': 1000 ** 2,
3647 'kk': 1000 ** 2,
3648 'KK': 1000 ** 2,
3649 }
be64b5b0 3650
fb47597b 3651 return lookup_unit_table(_UNIT_TABLE, s)
be64b5b0 3652
2f7ae819 3653
b871d7e9
S
3654def parse_resolution(s):
3655 if s is None:
3656 return {}
3657
3658 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3659 if mobj:
3660 return {
3661 'width': int(mobj.group('w')),
3662 'height': int(mobj.group('h')),
3663 }
3664
3665 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3666 if mobj:
3667 return {'height': int(mobj.group(1))}
3668
3669 mobj = re.search(r'\b([48])[kK]\b', s)
3670 if mobj:
3671 return {'height': int(mobj.group(1)) * 540}
3672
3673 return {}
3674
3675
0dc41787
S
3676def parse_bitrate(s):
3677 if not isinstance(s, compat_str):
3678 return
3679 mobj = re.search(r'\b(\d+)\s*kbps', s)
3680 if mobj:
3681 return int(mobj.group(1))
3682
3683
a942d6cb 3684def month_by_name(name, lang='en'):
caefb1de
PH
3685 """ Return the number of a month by (locale-independently) English name """
3686
f6717dec 3687 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb 3688
caefb1de 3689 try:
f6717dec 3690 return month_names.index(name) + 1
7105440c
YCH
3691 except ValueError:
3692 return None
3693
3694
3695def month_by_abbreviation(abbrev):
3696 """ Return the number of a month by (locale-independently) English
3697 abbreviations """
3698
3699 try:
3700 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
3701 except ValueError:
3702 return None
18258362
JMF
3703
3704
5aafe895 3705def fix_xml_ampersands(xml_str):
18258362 3706 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
3707 return re.sub(
3708 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 3709 '&amp;',
5aafe895 3710 xml_str)
e3946f98
PH
3711
3712
3713def setproctitle(title):
8bf48f23 3714 assert isinstance(title, compat_str)
c1c05c67
YCH
3715
3716 # ctypes in Jython is not complete
3717 # http://bugs.jython.org/issue2148
3718 if sys.platform.startswith('java'):
3719 return
3720
e3946f98 3721 try:
611c1dd9 3722 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
3723 except OSError:
3724 return
2f49bcd6
RC
3725 except TypeError:
3726 # LoadLibrary in Windows Python 2.7.13 only expects
3727 # a bytestring, but since unicode_literals turns
3728 # every string into a unicode string, it fails.
3729 return
6eefe533
PH
3730 title_bytes = title.encode('utf-8')
3731 buf = ctypes.create_string_buffer(len(title_bytes))
3732 buf.value = title_bytes
e3946f98 3733 try:
6eefe533 3734 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
3735 except AttributeError:
3736 return # Strange libc, just skip this
d7dda168
PH
3737
3738
3739def remove_start(s, start):
46bc9b7d 3740 return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174
PH
3741
3742
2b9faf55 3743def remove_end(s, end):
46bc9b7d 3744 return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55
PH
3745
3746
31b2051e
S
3747def remove_quotes(s):
3748 if s is None or len(s) < 2:
3749 return s
3750 for quote in ('"', "'", ):
3751 if s[0] == quote and s[-1] == quote:
3752 return s[1:-1]
3753 return s
3754
3755
b6e0c7d2
U
3756def get_domain(url):
3757 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3758 return domain.group('domain') if domain else None
3759
3760
29eb5174 3761def url_basename(url):
9b8aaeed 3762 path = compat_urlparse.urlparse(url).path
28e614de 3763 return path.strip('/').split('/')[-1]
aa94a6d3
PH
3764
3765
02dc0a36
S
3766def base_url(url):
3767 return re.match(r'https?://[^?#&]+/', url).group()
3768
3769
e34c3361 3770def urljoin(base, path):
4b5de77b
S
3771 if isinstance(path, bytes):
3772 path = path.decode('utf-8')
e34c3361
S
3773 if not isinstance(path, compat_str) or not path:
3774 return None
fad4ceb5 3775 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
e34c3361 3776 return path
4b5de77b
S
3777 if isinstance(base, bytes):
3778 base = base.decode('utf-8')
3779 if not isinstance(base, compat_str) or not re.match(
3780 r'^(?:https?:)?//', base):
e34c3361
S
3781 return None
3782 return compat_urlparse.urljoin(base, path)
3783
3784
aa94a6d3
PH
3785class HEADRequest(compat_urllib_request.Request):
3786 def get_method(self):
611c1dd9 3787 return 'HEAD'
7217e148
PH
3788
3789
95cf60e8
S
3790class PUTRequest(compat_urllib_request.Request):
3791 def get_method(self):
3792 return 'PUT'
3793
3794
9732d77e 3795def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
3796 if get_attr:
3797 if v is not None:
3798 v = getattr(v, get_attr, None)
9572013d
PH
3799 if v == '':
3800 v = None
1812afb7
S
3801 if v is None:
3802 return default
3803 try:
3804 return int(v) * invscale // scale
5e1271c5 3805 except (ValueError, TypeError):
af98f8ff 3806 return default
9732d77e 3807
9572013d 3808
40a90862
JMF
3809def str_or_none(v, default=None):
3810 return default if v is None else compat_str(v)
3811
9732d77e
PH
3812
3813def str_to_int(int_str):
48d4681e 3814 """ A more relaxed version of int_or_none """
42db58ec 3815 if isinstance(int_str, compat_integer_types):
348c6bf1 3816 return int_str
42db58ec
S
3817 elif isinstance(int_str, compat_str):
3818 int_str = re.sub(r'[,\.\+]', '', int_str)
3819 return int_or_none(int_str)
608d11f5
PH
3820
3821
9732d77e 3822def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
3823 if v is None:
3824 return default
3825 try:
3826 return float(v) * invscale / scale
5e1271c5 3827 except (ValueError, TypeError):
caf80631 3828 return default
43f775e4
PH
3829
3830
c7e327c4
S
3831def bool_or_none(v, default=None):
3832 return v if isinstance(v, bool) else default
3833
3834
53cd37ba
S
3835def strip_or_none(v, default=None):
3836 return v.strip() if isinstance(v, compat_str) else default
b72b4431
S
3837
3838
af03000a
S
3839def url_or_none(url):
3840 if not url or not isinstance(url, compat_str):
3841 return None
3842 url = url.strip()
29f7c58a 3843 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
af03000a
S
3844
3845
e29663c6 3846def strftime_or_none(timestamp, date_format, default=None):
3847 datetime_object = None
3848 try:
3849 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3850 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3851 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3852 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3853 return datetime_object.strftime(date_format)
3854 except (ValueError, TypeError, AttributeError):
3855 return default
3856
3857
608d11f5 3858def parse_duration(s):
8f9312c3 3859 if not isinstance(s, compat_basestring):
608d11f5
PH
3860 return None
3861
ca7b3246
S
3862 s = s.strip()
3863
acaff495 3864 days, hours, mins, secs, ms = [None] * 5
15846398 3865 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
acaff495 3866 if m:
3867 days, hours, mins, secs, ms = m.groups()
3868 else:
3869 m = re.match(
056653bb
S
3870 r'''(?ix)(?:P?
3871 (?:
3872 [0-9]+\s*y(?:ears?)?\s*
3873 )?
3874 (?:
3875 [0-9]+\s*m(?:onths?)?\s*
3876 )?
3877 (?:
3878 [0-9]+\s*w(?:eeks?)?\s*
3879 )?
8f4b58d7 3880 (?:
acaff495 3881 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
8f4b58d7 3882 )?
056653bb 3883 T)?
acaff495 3884 (?:
3885 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3886 )?
3887 (?:
3888 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3889 )?
3890 (?:
3891 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
15846398 3892 )?Z?$''', s)
acaff495 3893 if m:
3894 days, hours, mins, secs, ms = m.groups()
3895 else:
15846398 3896 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
acaff495 3897 if m:
3898 hours, mins = m.groups()
3899 else:
3900 return None
3901
3902 duration = 0
3903 if secs:
3904 duration += float(secs)
3905 if mins:
3906 duration += float(mins) * 60
3907 if hours:
3908 duration += float(hours) * 60 * 60
3909 if days:
3910 duration += float(days) * 24 * 60 * 60
3911 if ms:
3912 duration += float(ms)
3913 return duration
91d7d0b3
JMF
3914
3915
e65e4c88 3916def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 3917 name, real_ext = os.path.splitext(filename)
e65e4c88
S
3918 return (
3919 '{0}.{1}{2}'.format(name, ext, real_ext)
3920 if not expected_real_ext or real_ext[1:] == expected_real_ext
3921 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
3922
3923
b3ed15b7
S
3924def replace_extension(filename, ext, expected_real_ext=None):
3925 name, real_ext = os.path.splitext(filename)
3926 return '{0}.{1}'.format(
3927 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3928 ext)
3929
3930
d70ad093
PH
3931def check_executable(exe, args=[]):
3932 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3933 args can be a list of arguments for a short output (like -version) """
3934 try:
f5b1bca9 3935 process_communicate_or_kill(subprocess.Popen(
3936 [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
d70ad093
PH
3937 except OSError:
3938 return False
3939 return exe
b7ab0590
PH
3940
3941
95807118 3942def get_exe_version(exe, args=['--version'],
cae97f65 3943 version_re=None, unrecognized='present'):
95807118
PH
3944 """ Returns the version of the specified executable,
3945 or False if the executable is not present """
3946 try:
b64d04c1 3947 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
7a5c1cfe 3948 # SIGTTOU if yt-dlp is run in the background.
067aa17e 3949 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
f5b1bca9 3950 out, _ = process_communicate_or_kill(subprocess.Popen(
54116803 3951 [encodeArgument(exe)] + args,
00ca7552 3952 stdin=subprocess.PIPE,
f5b1bca9 3953 stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
95807118
PH
3954 except OSError:
3955 return False
cae97f65
PH
3956 if isinstance(out, bytes): # Python 2.x
3957 out = out.decode('ascii', 'ignore')
3958 return detect_exe_version(out, version_re, unrecognized)
3959
3960
3961def detect_exe_version(output, version_re=None, unrecognized='present'):
3962 assert isinstance(output, compat_str)
3963 if version_re is None:
3964 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3965 m = re.search(version_re, output)
95807118
PH
3966 if m:
3967 return m.group(1)
3968 else:
3969 return unrecognized
3970
3971
cb89cfc1 3972class LazyList(collections.abc.Sequence):
483336e7 3973 ''' Lazy immutable list from an iterable
3974 Note that slices of a LazyList are lists and not LazyList'''
3975
8e5fecc8 3976 class IndexError(IndexError):
3977 pass
3978
483336e7 3979 def __init__(self, iterable):
3980 self.__iterable = iter(iterable)
3981 self.__cache = []
28419ca2 3982 self.__reversed = False
483336e7 3983
3984 def __iter__(self):
28419ca2 3985 if self.__reversed:
3986 # We need to consume the entire iterable to iterate in reverse
981052c9 3987 yield from self.exhaust()
28419ca2 3988 return
3989 yield from self.__cache
483336e7 3990 for item in self.__iterable:
3991 self.__cache.append(item)
3992 yield item
3993
981052c9 3994 def __exhaust(self):
483336e7 3995 self.__cache.extend(self.__iterable)
28419ca2 3996 return self.__cache
3997
981052c9 3998 def exhaust(self):
3999 ''' Evaluate the entire iterable '''
4000 return self.__exhaust()[::-1 if self.__reversed else 1]
4001
28419ca2 4002 @staticmethod
981052c9 4003 def __reverse_index(x):
e0f2b4b4 4004 return None if x is None else -(x + 1)
483336e7 4005
4006 def __getitem__(self, idx):
4007 if isinstance(idx, slice):
28419ca2 4008 if self.__reversed:
e0f2b4b4 4009 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
4010 start, stop, step = idx.start, idx.stop, idx.step or 1
483336e7 4011 elif isinstance(idx, int):
28419ca2 4012 if self.__reversed:
981052c9 4013 idx = self.__reverse_index(idx)
e0f2b4b4 4014 start, stop, step = idx, idx, 0
483336e7 4015 else:
4016 raise TypeError('indices must be integers or slices')
e0f2b4b4 4017 if ((start or 0) < 0 or (stop or 0) < 0
4018 or (start is None and step < 0)
4019 or (stop is None and step > 0)):
483336e7 4020 # We need to consume the entire iterable to be able to slice from the end
4021 # Obviously, never use this with infinite iterables
8e5fecc8 4022 self.__exhaust()
4023 try:
4024 return self.__cache[idx]
4025 except IndexError as e:
4026 raise self.IndexError(e) from e
e0f2b4b4 4027 n = max(start or 0, stop or 0) - len(self.__cache) + 1
28419ca2 4028 if n > 0:
4029 self.__cache.extend(itertools.islice(self.__iterable, n))
8e5fecc8 4030 try:
4031 return self.__cache[idx]
4032 except IndexError as e:
4033 raise self.IndexError(e) from e
483336e7 4034
4035 def __bool__(self):
4036 try:
28419ca2 4037 self[-1] if self.__reversed else self[0]
8e5fecc8 4038 except self.IndexError:
483336e7 4039 return False
4040 return True
4041
4042 def __len__(self):
8e5fecc8 4043 self.__exhaust()
483336e7 4044 return len(self.__cache)
4045
981052c9 4046 def reverse(self):
28419ca2 4047 self.__reversed = not self.__reversed
4048 return self
4049
4050 def __repr__(self):
4051 # repr and str should mimic a list. So we exhaust the iterable
4052 return repr(self.exhaust())
4053
4054 def __str__(self):
4055 return repr(self.exhaust())
4056
483336e7 4057
7be9ccff 4058class PagedList:
dd26ced1
PH
4059 def __len__(self):
4060 # This is only useful for tests
4061 return len(self.getslice())
4062
7be9ccff 4063 def __init__(self, pagefunc, pagesize, use_cache=True):
4064 self._pagefunc = pagefunc
4065 self._pagesize = pagesize
4066 self._use_cache = use_cache
4067 self._cache = {}
4068
4069 def getpage(self, pagenum):
4070 page_results = self._cache.get(pagenum) or list(self._pagefunc(pagenum))
4071 if self._use_cache:
4072 self._cache[pagenum] = page_results
4073 return page_results
4074
4075 def getslice(self, start=0, end=None):
4076 return list(self._getslice(start, end))
4077
4078 def _getslice(self, start, end):
55575225 4079 raise NotImplementedError('This method must be implemented by subclasses')
4080
4081 def __getitem__(self, idx):
7be9ccff 4082 # NOTE: cache must be enabled if this is used
55575225 4083 if not isinstance(idx, int) or idx < 0:
4084 raise TypeError('indices must be non-negative integers')
4085 entries = self.getslice(idx, idx + 1)
4086 return entries[0] if entries else None
4087
9c44d242
PH
4088
4089class OnDemandPagedList(PagedList):
7be9ccff 4090 def _getslice(self, start, end):
b7ab0590
PH
4091 for pagenum in itertools.count(start // self._pagesize):
4092 firstid = pagenum * self._pagesize
4093 nextfirstid = pagenum * self._pagesize + self._pagesize
4094 if start >= nextfirstid:
4095 continue
4096
b7ab0590
PH
4097 startv = (
4098 start % self._pagesize
4099 if firstid <= start < nextfirstid
4100 else 0)
b7ab0590
PH
4101 endv = (
4102 ((end - 1) % self._pagesize) + 1
4103 if (end is not None and firstid <= end <= nextfirstid)
4104 else None)
4105
7be9ccff 4106 page_results = self.getpage(pagenum)
b7ab0590
PH
4107 if startv != 0 or endv is not None:
4108 page_results = page_results[startv:endv]
7be9ccff 4109 yield from page_results
b7ab0590
PH
4110
4111 # A little optimization - if current page is not "full", ie. does
4112 # not contain page_size videos then we can assume that this page
4113 # is the last one - there are no more ids on further pages -
4114 # i.e. no need to query again.
4115 if len(page_results) + startv < self._pagesize:
4116 break
4117
4118 # If we got the whole page, but the next page is not interesting,
4119 # break out early as well
4120 if end == nextfirstid:
4121 break
81c2f20b
PH
4122
4123
9c44d242
PH
4124class InAdvancePagedList(PagedList):
4125 def __init__(self, pagefunc, pagecount, pagesize):
9c44d242 4126 self._pagecount = pagecount
7be9ccff 4127 PagedList.__init__(self, pagefunc, pagesize, True)
9c44d242 4128
7be9ccff 4129 def _getslice(self, start, end):
9c44d242
PH
4130 start_page = start // self._pagesize
4131 end_page = (
4132 self._pagecount if end is None else (end // self._pagesize + 1))
4133 skip_elems = start - start_page * self._pagesize
4134 only_more = None if end is None else end - start
4135 for pagenum in range(start_page, end_page):
7be9ccff 4136 page_results = self.getpage(pagenum)
9c44d242 4137 if skip_elems:
7be9ccff 4138 page_results = page_results[skip_elems:]
9c44d242
PH
4139 skip_elems = None
4140 if only_more is not None:
7be9ccff 4141 if len(page_results) < only_more:
4142 only_more -= len(page_results)
9c44d242 4143 else:
7be9ccff 4144 yield from page_results[:only_more]
9c44d242 4145 break
7be9ccff 4146 yield from page_results
9c44d242
PH
4147
4148
81c2f20b 4149def uppercase_escape(s):
676eb3f2 4150 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 4151 return re.sub(
a612753d 4152 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
4153 lambda m: unicode_escape(m.group(0))[0],
4154 s)
0fe2ff78
YCH
4155
4156
4157def lowercase_escape(s):
4158 unicode_escape = codecs.getdecoder('unicode_escape')
4159 return re.sub(
4160 r'\\u[0-9a-fA-F]{4}',
4161 lambda m: unicode_escape(m.group(0))[0],
4162 s)
b53466e1 4163
d05cfe06
S
4164
4165def escape_rfc3986(s):
4166 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 4167 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 4168 s = s.encode('utf-8')
ecc0c5ee 4169 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
4170
4171
4172def escape_url(url):
4173 """Escape URL as suggested by RFC 3986"""
4174 url_parsed = compat_urllib_parse_urlparse(url)
4175 return url_parsed._replace(
efbed08d 4176 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
d05cfe06
S
4177 path=escape_rfc3986(url_parsed.path),
4178 params=escape_rfc3986(url_parsed.params),
4179 query=escape_rfc3986(url_parsed.query),
4180 fragment=escape_rfc3986(url_parsed.fragment)
4181 ).geturl()
4182
62e609ab 4183
4dfbf869 4184def parse_qs(url):
4185 return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4186
4187
62e609ab
PH
4188def read_batch_urls(batch_fd):
4189 def fixup(url):
4190 if not isinstance(url, compat_str):
4191 url = url.decode('utf-8', 'replace')
8c04f0be 4192 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4193 for bom in BOM_UTF8:
4194 if url.startswith(bom):
4195 url = url[len(bom):]
4196 url = url.lstrip()
4197 if not url or url.startswith(('#', ';', ']')):
62e609ab 4198 return False
8c04f0be 4199 # "#" cannot be stripped out since it is part of the URI
4200 # However, it can be safely stipped out if follwing a whitespace
4201 return re.split(r'\s#', url, 1)[0].rstrip()
62e609ab
PH
4202
4203 with contextlib.closing(batch_fd) as fd:
4204 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
4205
4206
4207def urlencode_postdata(*args, **kargs):
15707c7e 4208 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
4209
4210
38f9ef31 4211def update_url_query(url, query):
cacd9966
YCH
4212 if not query:
4213 return url
38f9ef31 4214 parsed_url = compat_urlparse.urlparse(url)
4215 qs = compat_parse_qs(parsed_url.query)
4216 qs.update(query)
4217 return compat_urlparse.urlunparse(parsed_url._replace(
15707c7e 4218 query=compat_urllib_parse_urlencode(qs, True)))
16392824 4219
8e60dc75 4220
ed0291d1
S
4221def update_Request(req, url=None, data=None, headers={}, query={}):
4222 req_headers = req.headers.copy()
4223 req_headers.update(headers)
4224 req_data = data or req.data
4225 req_url = update_url_query(url or req.get_full_url(), query)
95cf60e8
S
4226 req_get_method = req.get_method()
4227 if req_get_method == 'HEAD':
4228 req_type = HEADRequest
4229 elif req_get_method == 'PUT':
4230 req_type = PUTRequest
4231 else:
4232 req_type = compat_urllib_request.Request
ed0291d1
S
4233 new_req = req_type(
4234 req_url, data=req_data, headers=req_headers,
4235 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4236 if hasattr(req, 'timeout'):
4237 new_req.timeout = req.timeout
4238 return new_req
4239
4240
10c87c15 4241def _multipart_encode_impl(data, boundary):
0c265486
YCH
4242 content_type = 'multipart/form-data; boundary=%s' % boundary
4243
4244 out = b''
4245 for k, v in data.items():
4246 out += b'--' + boundary.encode('ascii') + b'\r\n'
4247 if isinstance(k, compat_str):
4248 k = k.encode('utf-8')
4249 if isinstance(v, compat_str):
4250 v = v.encode('utf-8')
4251 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4252 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
b2ad479d 4253 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
0c265486
YCH
4254 if boundary.encode('ascii') in content:
4255 raise ValueError('Boundary overlaps with data')
4256 out += content
4257
4258 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4259
4260 return out, content_type
4261
4262
4263def multipart_encode(data, boundary=None):
4264 '''
4265 Encode a dict to RFC 7578-compliant form-data
4266
4267 data:
4268 A dict where keys and values can be either Unicode or bytes-like
4269 objects.
4270 boundary:
4271 If specified a Unicode object, it's used as the boundary. Otherwise
4272 a random boundary is generated.
4273
4274 Reference: https://tools.ietf.org/html/rfc7578
4275 '''
4276 has_specified_boundary = boundary is not None
4277
4278 while True:
4279 if boundary is None:
4280 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4281
4282 try:
10c87c15 4283 out, content_type = _multipart_encode_impl(data, boundary)
0c265486
YCH
4284 break
4285 except ValueError:
4286 if has_specified_boundary:
4287 raise
4288 boundary = None
4289
4290 return out, content_type
4291
4292
86296ad2 4293def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
4294 if isinstance(key_or_keys, (list, tuple)):
4295 for key in key_or_keys:
86296ad2
S
4296 if key not in d or d[key] is None or skip_false_values and not d[key]:
4297 continue
4298 return d[key]
cbecc9b9
S
4299 return default
4300 return d.get(key_or_keys, default)
4301
4302
329ca3be 4303def try_get(src, getter, expected_type=None):
6606817a 4304 for get in variadic(getter):
a32a9a7e
S
4305 try:
4306 v = get(src)
4307 except (AttributeError, KeyError, TypeError, IndexError):
4308 pass
4309 else:
4310 if expected_type is None or isinstance(v, expected_type):
4311 return v
329ca3be
S
4312
4313
6cc62232
S
4314def merge_dicts(*dicts):
4315 merged = {}
4316 for a_dict in dicts:
4317 for k, v in a_dict.items():
4318 if v is None:
4319 continue
3089bc74
S
4320 if (k not in merged
4321 or (isinstance(v, compat_str) and v
4322 and isinstance(merged[k], compat_str)
4323 and not merged[k])):
6cc62232
S
4324 merged[k] = v
4325 return merged
4326
4327
8e60dc75
S
4328def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4329 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4330
16392824 4331
a1a530b0
PH
4332US_RATINGS = {
4333 'G': 0,
4334 'PG': 10,
4335 'PG-13': 13,
4336 'R': 16,
4337 'NC': 18,
4338}
fac55558
PH
4339
4340
a8795327 4341TV_PARENTAL_GUIDELINES = {
5a16c9d9
RA
4342 'TV-Y': 0,
4343 'TV-Y7': 7,
4344 'TV-G': 0,
4345 'TV-PG': 0,
4346 'TV-14': 14,
4347 'TV-MA': 17,
a8795327
S
4348}
4349
4350
146c80e2 4351def parse_age_limit(s):
a8795327
S
4352 if type(s) == int:
4353 return s if 0 <= s <= 21 else None
4354 if not isinstance(s, compat_basestring):
d838b1bd 4355 return None
146c80e2 4356 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327
S
4357 if m:
4358 return int(m.group('age'))
5c5fae6d 4359 s = s.upper()
a8795327
S
4360 if s in US_RATINGS:
4361 return US_RATINGS[s]
5a16c9d9 4362 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
b8361187 4363 if m:
5a16c9d9 4364 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
b8361187 4365 return None
146c80e2
S
4366
4367
fac55558 4368def strip_jsonp(code):
609a61e3 4369 return re.sub(
5552c9eb 4370 r'''(?sx)^
e9c671d5 4371 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
5552c9eb
YCH
4372 (?:\s*&&\s*(?P=func_name))?
4373 \s*\(\s*(?P<callback_data>.*)\);?
4374 \s*?(?://[^\n]*)*$''',
4375 r'\g<callback_data>', code)
478c2c61
PH
4376
4377
5c610515 4378def js_to_json(code, vars={}):
4379 # vars is a dict of var, val pairs to substitute
c843e685 4380 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4195096e
S
4381 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4382 INTEGER_TABLE = (
4383 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4384 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4385 )
4386
e05f6939 4387 def fix_kv(m):
e7b6d122
PH
4388 v = m.group(0)
4389 if v in ('true', 'false', 'null'):
4390 return v
421ddcb8
C
4391 elif v in ('undefined', 'void 0'):
4392 return 'null'
8bdd16b4 4393 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
bd1e4844 4394 return ""
4395
4396 if v[0] in ("'", '"'):
4397 v = re.sub(r'(?s)\\.|"', lambda m: {
e7b6d122 4398 '"': '\\"',
bd1e4844 4399 "\\'": "'",
4400 '\\\n': '',
4401 '\\x': '\\u00',
4402 }.get(m.group(0), m.group(0)), v[1:-1])
8bdd16b4 4403 else:
4404 for regex, base in INTEGER_TABLE:
4405 im = re.match(regex, v)
4406 if im:
4407 i = int(im.group(1), base)
4408 return '"%d":' % i if v.endswith(':') else '%d' % i
89ac4a19 4409
5c610515 4410 if v in vars:
4411 return vars[v]
4412
e7b6d122 4413 return '"%s"' % v
e05f6939 4414
bd1e4844 4415 return re.sub(r'''(?sx)
4416 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4417 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4195096e 4418 {comment}|,(?={skip}[\]}}])|
421ddcb8 4419 void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
4195096e 4420 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
8bdd16b4 4421 [0-9]+(?={skip}:)|
4422 !+
4195096e 4423 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
e05f6939
PH
4424
4425
478c2c61
PH
4426def qualities(quality_ids):
4427 """ Get a numeric quality value out of a list of possible values """
4428 def q(qid):
4429 try:
4430 return quality_ids.index(qid)
4431 except ValueError:
4432 return -1
4433 return q
4434
acd69589 4435
de6000d9 4436DEFAULT_OUTTMPL = {
4437 'default': '%(title)s [%(id)s].%(ext)s',
72755351 4438 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
de6000d9 4439}
4440OUTTMPL_TYPES = {
72755351 4441 'chapter': None,
de6000d9 4442 'subtitle': None,
4443 'thumbnail': None,
4444 'description': 'description',
4445 'annotation': 'annotations.xml',
4446 'infojson': 'info.json',
5112f26a 4447 'pl_thumbnail': None,
de6000d9 4448 'pl_description': 'description',
4449 'pl_infojson': 'info.json',
4450}
0a871f68 4451
143db31d 4452# As of [1] format syntax is:
4453# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4454# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
901130bb 4455STR_FORMAT_RE_TMPL = r'''(?x)
4456 (?<!%)(?P<prefix>(?:%%)*)
143db31d 4457 %
752cda38 4458 (?P<has_key>\((?P<key>{0})\))? # mapping key
4459 (?P<format>
4460 (?:[#0\-+ ]+)? # conversion flags (optional)
4461 (?:\d+)? # minimum field width (optional)
4462 (?:\.\d+)? # precision (optional)
4463 [hlL]? # length modifier (optional)
901130bb 4464 {1} # conversion type
752cda38 4465 )
143db31d 4466'''
4467
7d1eb38a 4468
901130bb 4469STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
a020a0dc 4470
7d1eb38a 4471
a020a0dc
PH
4472def limit_length(s, length):
4473 """ Add ellipses to overly long strings """
4474 if s is None:
4475 return None
4476 ELLIPSES = '...'
4477 if len(s) > length:
4478 return s[:length - len(ELLIPSES)] + ELLIPSES
4479 return s
48844745
PH
4480
4481
4482def version_tuple(v):
5f9b8394 4483 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
4484
4485
4486def is_outdated_version(version, limit, assume_new=True):
4487 if not version:
4488 return not assume_new
4489 try:
4490 return version_tuple(version) < version_tuple(limit)
4491 except ValueError:
4492 return not assume_new
732ea2f0
PH
4493
4494
4495def ytdl_is_updateable():
7a5c1cfe 4496 """ Returns if yt-dlp can be updated with -U """
735d865e 4497 return False
4498
732ea2f0
PH
4499 from zipimport import zipimporter
4500
4501 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
4502
4503
4504def args_to_str(args):
4505 # Get a short string representation for a subprocess command
702ccf2d 4506 return ' '.join(compat_shlex_quote(a) for a in args)
2ccd1b10
PH
4507
4508
9b9c5355 4509def error_to_compat_str(err):
fdae2358
S
4510 err_str = str(err)
4511 # On python 2 error byte string must be decoded with proper
4512 # encoding rather than ascii
4513 if sys.version_info[0] < 3:
4514 err_str = err_str.decode(preferredencoding())
4515 return err_str
4516
4517
c460bdd5 4518def mimetype2ext(mt):
eb9ee194
S
4519 if mt is None:
4520 return None
4521
765ac263
JMF
4522 ext = {
4523 'audio/mp4': 'm4a',
6c33d24b
YCH
4524 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4525 # it's the most popular one
4526 'audio/mpeg': 'mp3',
ba39289d 4527 'audio/x-wav': 'wav',
765ac263
JMF
4528 }.get(mt)
4529 if ext is not None:
4530 return ext
4531
c460bdd5 4532 _, _, res = mt.rpartition('/')
6562d34a 4533 res = res.split(';')[0].strip().lower()
c460bdd5
PH
4534
4535 return {
f6861ec9 4536 '3gpp': '3gp',
cafcf657 4537 'smptett+xml': 'tt',
cafcf657 4538 'ttaf+xml': 'dfxp',
a0d8d704 4539 'ttml+xml': 'ttml',
f6861ec9 4540 'x-flv': 'flv',
a0d8d704 4541 'x-mp4-fragmented': 'mp4',
d4f05d47 4542 'x-ms-sami': 'sami',
a0d8d704 4543 'x-ms-wmv': 'wmv',
b4173f15
RA
4544 'mpegurl': 'm3u8',
4545 'x-mpegurl': 'm3u8',
4546 'vnd.apple.mpegurl': 'm3u8',
4547 'dash+xml': 'mpd',
b4173f15 4548 'f4m+xml': 'f4m',
f164b971 4549 'hds+xml': 'f4m',
e910fe2f 4550 'vnd.ms-sstr+xml': 'ism',
c2b2c7e1 4551 'quicktime': 'mov',
98ce1a3f 4552 'mp2t': 'ts',
39e7107d 4553 'x-wav': 'wav',
c460bdd5
PH
4554 }.get(res, res)
4555
4556
4f3c5e06 4557def parse_codecs(codecs_str):
4558 # http://tools.ietf.org/html/rfc6381
4559 if not codecs_str:
4560 return {}
a0566bbf 4561 split_codecs = list(filter(None, map(
dbf5416a 4562 str.strip, codecs_str.strip().strip(',').split(','))))
4f3c5e06 4563 vcodec, acodec = None, None
a0566bbf 4564 for full_codec in split_codecs:
4f3c5e06 4565 codec = full_codec.split('.')[0]
28cc2241 4566 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4f3c5e06 4567 if not vcodec:
4568 vcodec = full_codec
60f5c9fb 4569 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4f3c5e06 4570 if not acodec:
4571 acodec = full_codec
4572 else:
60f5c9fb 4573 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4f3c5e06 4574 if not vcodec and not acodec:
a0566bbf 4575 if len(split_codecs) == 2:
4f3c5e06 4576 return {
a0566bbf 4577 'vcodec': split_codecs[0],
4578 'acodec': split_codecs[1],
4f3c5e06 4579 }
4580 else:
4581 return {
4582 'vcodec': vcodec or 'none',
4583 'acodec': acodec or 'none',
4584 }
4585 return {}
4586
4587
2ccd1b10 4588def urlhandle_detect_ext(url_handle):
79298173 4589 getheader = url_handle.headers.get
2ccd1b10 4590
b55ee18f
PH
4591 cd = getheader('Content-Disposition')
4592 if cd:
4593 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4594 if m:
4595 e = determine_ext(m.group('filename'), default_ext=None)
4596 if e:
4597 return e
4598
c460bdd5 4599 return mimetype2ext(getheader('Content-Type'))
05900629
PH
4600
4601
1e399778
YCH
4602def encode_data_uri(data, mime_type):
4603 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4604
4605
05900629 4606def age_restricted(content_limit, age_limit):
6ec6cb4e 4607 """ Returns True iff the content should be blocked """
05900629
PH
4608
4609 if age_limit is None: # No limit set
4610 return False
4611 if content_limit is None:
4612 return False # Content available for everyone
4613 return age_limit < content_limit
61ca9a80
PH
4614
4615
4616def is_html(first_bytes):
4617 """ Detect whether a file contains HTML by examining its first bytes. """
4618
4619 BOMS = [
4620 (b'\xef\xbb\xbf', 'utf-8'),
4621 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4622 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4623 (b'\xff\xfe', 'utf-16-le'),
4624 (b'\xfe\xff', 'utf-16-be'),
4625 ]
4626 for bom, enc in BOMS:
4627 if first_bytes.startswith(bom):
4628 s = first_bytes[len(bom):].decode(enc, 'replace')
4629 break
4630 else:
4631 s = first_bytes.decode('utf-8', 'replace')
4632
4633 return re.match(r'^\s*<', s)
a055469f
PH
4634
4635
4636def determine_protocol(info_dict):
4637 protocol = info_dict.get('protocol')
4638 if protocol is not None:
4639 return protocol
4640
4641 url = info_dict['url']
4642 if url.startswith('rtmp'):
4643 return 'rtmp'
4644 elif url.startswith('mms'):
4645 return 'mms'
4646 elif url.startswith('rtsp'):
4647 return 'rtsp'
4648
4649 ext = determine_ext(url)
4650 if ext == 'm3u8':
4651 return 'm3u8'
4652 elif ext == 'f4m':
4653 return 'f4m'
4654
4655 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
4656
4657
76d321f6 4658def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
cfb56d1a 4659 """ Render a list of rows, each as a list of values """
76d321f6 4660
4661 def get_max_lens(table):
4662 return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4663
4664 def filter_using_list(row, filterArray):
4665 return [col for (take, col) in zip(filterArray, row) if take]
4666
4667 if hideEmpty:
4668 max_lens = get_max_lens(data)
4669 header_row = filter_using_list(header_row, max_lens)
4670 data = [filter_using_list(row, max_lens) for row in data]
4671
cfb56d1a 4672 table = [header_row] + data
76d321f6 4673 max_lens = get_max_lens(table)
4674 if delim:
4675 table = [header_row] + [['-' * ml for ml in max_lens]] + data
4676 format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
cfb56d1a 4677 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
4678
4679
8f18aca8 4680def _match_one(filter_part, dct, incomplete):
77b87f05 4681 # TODO: Generalize code with YoutubeDL._build_format_filter
a047eeb6 4682 STRING_OPERATORS = {
4683 '*=': operator.contains,
4684 '^=': lambda attr, value: attr.startswith(value),
4685 '$=': lambda attr, value: attr.endswith(value),
4686 '~=': lambda attr, value: re.search(value, attr),
4687 }
347de493 4688 COMPARISON_OPERATORS = {
a047eeb6 4689 **STRING_OPERATORS,
4690 '<=': operator.le, # "<=" must be defined above "<"
347de493 4691 '<': operator.lt,
347de493 4692 '>=': operator.ge,
a047eeb6 4693 '>': operator.gt,
347de493 4694 '=': operator.eq,
347de493 4695 }
a047eeb6 4696
347de493
PH
4697 operator_rex = re.compile(r'''(?x)\s*
4698 (?P<key>[a-z_]+)
77b87f05 4699 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
347de493
PH
4700 (?:
4701 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
a047eeb6 4702 (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
4703 (?P<strval>.+?)
347de493
PH
4704 )
4705 \s*$
4706 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4707 m = operator_rex.search(filter_part)
4708 if m:
77b87f05
MT
4709 unnegated_op = COMPARISON_OPERATORS[m.group('op')]
4710 if m.group('negation'):
4711 op = lambda attr, value: not unnegated_op(attr, value)
4712 else:
4713 op = unnegated_op
e5a088dc 4714 actual_value = dct.get(m.group('key'))
3089bc74
S
4715 if (m.group('quotedstrval') is not None
4716 or m.group('strval') is not None
e5a088dc
S
4717 # If the original field is a string and matching comparisonvalue is
4718 # a number we should respect the origin of the original field
4719 # and process comparison value as a string (see
067aa17e 4720 # https://github.com/ytdl-org/youtube-dl/issues/11082).
3089bc74
S
4721 or actual_value is not None and m.group('intval') is not None
4722 and isinstance(actual_value, compat_str)):
db13c16e
S
4723 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4724 quote = m.group('quote')
4725 if quote is not None:
4726 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
347de493 4727 else:
a047eeb6 4728 if m.group('op') in STRING_OPERATORS:
4729 raise ValueError('Operator %s only supports string values!' % m.group('op'))
347de493
PH
4730 try:
4731 comparison_value = int(m.group('intval'))
4732 except ValueError:
4733 comparison_value = parse_filesize(m.group('intval'))
4734 if comparison_value is None:
4735 comparison_value = parse_filesize(m.group('intval') + 'B')
4736 if comparison_value is None:
4737 raise ValueError(
4738 'Invalid integer value %r in filter part %r' % (
4739 m.group('intval'), filter_part))
347de493 4740 if actual_value is None:
8f18aca8 4741 return incomplete or m.group('none_inclusive')
347de493
PH
4742 return op(actual_value, comparison_value)
4743
4744 UNARY_OPERATORS = {
1cc47c66
S
4745 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4746 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
347de493
PH
4747 }
4748 operator_rex = re.compile(r'''(?x)\s*
4749 (?P<op>%s)\s*(?P<key>[a-z_]+)
4750 \s*$
4751 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4752 m = operator_rex.search(filter_part)
4753 if m:
4754 op = UNARY_OPERATORS[m.group('op')]
4755 actual_value = dct.get(m.group('key'))
8f18aca8 4756 if incomplete and actual_value is None:
4757 return True
347de493
PH
4758 return op(actual_value)
4759
4760 raise ValueError('Invalid filter part %r' % filter_part)
4761
4762
8f18aca8 4763def match_str(filter_str, dct, incomplete=False):
4764 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
4765 When incomplete, all conditions passes on missing fields
4766 """
347de493 4767 return all(
8f18aca8 4768 _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
a047eeb6 4769 for filter_part in re.split(r'(?<!\\)&', filter_str))
347de493
PH
4770
4771
4772def match_filter_func(filter_str):
8f18aca8 4773 def _match_func(info_dict, *args, **kwargs):
4774 if match_str(filter_str, info_dict, *args, **kwargs):
347de493
PH
4775 return None
4776 else:
4777 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4778 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4779 return _match_func
91410c9b
PH
4780
4781
bf6427d2
YCH
4782def parse_dfxp_time_expr(time_expr):
4783 if not time_expr:
d631d5f9 4784 return
bf6427d2
YCH
4785
4786 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4787 if mobj:
4788 return float(mobj.group('time_offset'))
4789
db2fe38b 4790 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 4791 if mobj:
db2fe38b 4792 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
4793
4794
c1c924ab
YCH
4795def srt_subtitles_timecode(seconds):
4796 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
4797
4798
4799def dfxp2srt(dfxp_data):
3869028f
YCH
4800 '''
4801 @param dfxp_data A bytes-like object containing DFXP data
4802 @returns A unicode object containing converted SRT data
4803 '''
5b995f71 4804 LEGACY_NAMESPACES = (
3869028f
YCH
4805 (b'http://www.w3.org/ns/ttml', [
4806 b'http://www.w3.org/2004/11/ttaf1',
4807 b'http://www.w3.org/2006/04/ttaf1',
4808 b'http://www.w3.org/2006/10/ttaf1',
5b995f71 4809 ]),
3869028f
YCH
4810 (b'http://www.w3.org/ns/ttml#styling', [
4811 b'http://www.w3.org/ns/ttml#style',
5b995f71
RA
4812 ]),
4813 )
4814
4815 SUPPORTED_STYLING = [
4816 'color',
4817 'fontFamily',
4818 'fontSize',
4819 'fontStyle',
4820 'fontWeight',
4821 'textDecoration'
4822 ]
4823
4e335771 4824 _x = functools.partial(xpath_with_ns, ns_map={
261f4730 4825 'xml': 'http://www.w3.org/XML/1998/namespace',
4e335771 4826 'ttml': 'http://www.w3.org/ns/ttml',
5b995f71 4827 'tts': 'http://www.w3.org/ns/ttml#styling',
4e335771 4828 })
bf6427d2 4829
5b995f71
RA
4830 styles = {}
4831 default_style = {}
4832
87de7069 4833 class TTMLPElementParser(object):
5b995f71
RA
4834 _out = ''
4835 _unclosed_elements = []
4836 _applied_styles = []
bf6427d2 4837
2b14cb56 4838 def start(self, tag, attrib):
5b995f71
RA
4839 if tag in (_x('ttml:br'), 'br'):
4840 self._out += '\n'
4841 else:
4842 unclosed_elements = []
4843 style = {}
4844 element_style_id = attrib.get('style')
4845 if default_style:
4846 style.update(default_style)
4847 if element_style_id:
4848 style.update(styles.get(element_style_id, {}))
4849 for prop in SUPPORTED_STYLING:
4850 prop_val = attrib.get(_x('tts:' + prop))
4851 if prop_val:
4852 style[prop] = prop_val
4853 if style:
4854 font = ''
4855 for k, v in sorted(style.items()):
4856 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4857 continue
4858 if k == 'color':
4859 font += ' color="%s"' % v
4860 elif k == 'fontSize':
4861 font += ' size="%s"' % v
4862 elif k == 'fontFamily':
4863 font += ' face="%s"' % v
4864 elif k == 'fontWeight' and v == 'bold':
4865 self._out += '<b>'
4866 unclosed_elements.append('b')
4867 elif k == 'fontStyle' and v == 'italic':
4868 self._out += '<i>'
4869 unclosed_elements.append('i')
4870 elif k == 'textDecoration' and v == 'underline':
4871 self._out += '<u>'
4872 unclosed_elements.append('u')
4873 if font:
4874 self._out += '<font' + font + '>'
4875 unclosed_elements.append('font')
4876 applied_style = {}
4877 if self._applied_styles:
4878 applied_style.update(self._applied_styles[-1])
4879 applied_style.update(style)
4880 self._applied_styles.append(applied_style)
4881 self._unclosed_elements.append(unclosed_elements)
bf6427d2 4882
2b14cb56 4883 def end(self, tag):
5b995f71
RA
4884 if tag not in (_x('ttml:br'), 'br'):
4885 unclosed_elements = self._unclosed_elements.pop()
4886 for element in reversed(unclosed_elements):
4887 self._out += '</%s>' % element
4888 if unclosed_elements and self._applied_styles:
4889 self._applied_styles.pop()
bf6427d2 4890
2b14cb56 4891 def data(self, data):
5b995f71 4892 self._out += data
2b14cb56 4893
4894 def close(self):
5b995f71 4895 return self._out.strip()
2b14cb56 4896
4897 def parse_node(node):
4898 target = TTMLPElementParser()
4899 parser = xml.etree.ElementTree.XMLParser(target=target)
4900 parser.feed(xml.etree.ElementTree.tostring(node))
4901 return parser.close()
bf6427d2 4902
5b995f71
RA
4903 for k, v in LEGACY_NAMESPACES:
4904 for ns in v:
4905 dfxp_data = dfxp_data.replace(ns, k)
4906
3869028f 4907 dfxp = compat_etree_fromstring(dfxp_data)
bf6427d2 4908 out = []
5b995f71 4909 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
1b0427e6
YCH
4910
4911 if not paras:
4912 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2 4913
5b995f71
RA
4914 repeat = False
4915 while True:
4916 for style in dfxp.findall(_x('.//ttml:style')):
261f4730
RA
4917 style_id = style.get('id') or style.get(_x('xml:id'))
4918 if not style_id:
4919 continue
5b995f71
RA
4920 parent_style_id = style.get('style')
4921 if parent_style_id:
4922 if parent_style_id not in styles:
4923 repeat = True
4924 continue
4925 styles[style_id] = styles[parent_style_id].copy()
4926 for prop in SUPPORTED_STYLING:
4927 prop_val = style.get(_x('tts:' + prop))
4928 if prop_val:
4929 styles.setdefault(style_id, {})[prop] = prop_val
4930 if repeat:
4931 repeat = False
4932 else:
4933 break
4934
4935 for p in ('body', 'div'):
4936 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4937 if ele is None:
4938 continue
4939 style = styles.get(ele.get('style'))
4940 if not style:
4941 continue
4942 default_style.update(style)
4943
bf6427d2 4944 for para, index in zip(paras, itertools.count(1)):
d631d5f9 4945 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 4946 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
4947 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4948 if begin_time is None:
4949 continue
7dff0363 4950 if not end_time:
d631d5f9
YCH
4951 if not dur:
4952 continue
4953 end_time = begin_time + dur
bf6427d2
YCH
4954 out.append('%d\n%s --> %s\n%s\n\n' % (
4955 index,
c1c924ab
YCH
4956 srt_subtitles_timecode(begin_time),
4957 srt_subtitles_timecode(end_time),
bf6427d2
YCH
4958 parse_node(para)))
4959
4960 return ''.join(out)
4961
4962
66e289ba
S
4963def cli_option(params, command_option, param):
4964 param = params.get(param)
98e698f1
RA
4965 if param:
4966 param = compat_str(param)
66e289ba
S
4967 return [command_option, param] if param is not None else []
4968
4969
4970def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4971 param = params.get(param)
5b232f46
S
4972 if param is None:
4973 return []
66e289ba
S
4974 assert isinstance(param, bool)
4975 if separator:
4976 return [command_option + separator + (true_value if param else false_value)]
4977 return [command_option, true_value if param else false_value]
4978
4979
4980def cli_valueless_option(params, command_option, param, expected_value=True):
4981 param = params.get(param)
4982 return [command_option] if param == expected_value else []
4983
4984
e92caff5 4985def cli_configuration_args(argdict, keys, default=[], use_compat=True):
eab9b2bc 4986 if isinstance(argdict, (list, tuple)): # for backward compatibility
e92caff5 4987 if use_compat:
5b1ecbb3 4988 return argdict
4989 else:
4990 argdict = None
eab9b2bc 4991 if argdict is None:
5b1ecbb3 4992 return default
eab9b2bc 4993 assert isinstance(argdict, dict)
4994
e92caff5 4995 assert isinstance(keys, (list, tuple))
4996 for key_list in keys:
e92caff5 4997 arg_list = list(filter(
4998 lambda x: x is not None,
6606817a 4999 [argdict.get(key.lower()) for key in variadic(key_list)]))
e92caff5 5000 if arg_list:
5001 return [arg for args in arg_list for arg in args]
5002 return default
66e289ba 5003
6251555f 5004
330690a2 5005def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
5006 main_key, exe = main_key.lower(), exe.lower()
5007 root_key = exe if main_key == exe else f'{main_key}+{exe}'
5008 keys = [f'{root_key}{k}' for k in (keys or [''])]
5009 if root_key in keys:
5010 if main_key != exe:
5011 keys.append((main_key, exe))
5012 keys.append('default')
5013 else:
5014 use_compat = False
5015 return cli_configuration_args(argdict, keys, default, use_compat)
5016
66e289ba 5017
39672624
YCH
5018class ISO639Utils(object):
5019 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
5020 _lang_map = {
5021 'aa': 'aar',
5022 'ab': 'abk',
5023 'ae': 'ave',
5024 'af': 'afr',
5025 'ak': 'aka',
5026 'am': 'amh',
5027 'an': 'arg',
5028 'ar': 'ara',
5029 'as': 'asm',
5030 'av': 'ava',
5031 'ay': 'aym',
5032 'az': 'aze',
5033 'ba': 'bak',
5034 'be': 'bel',
5035 'bg': 'bul',
5036 'bh': 'bih',
5037 'bi': 'bis',
5038 'bm': 'bam',
5039 'bn': 'ben',
5040 'bo': 'bod',
5041 'br': 'bre',
5042 'bs': 'bos',
5043 'ca': 'cat',
5044 'ce': 'che',
5045 'ch': 'cha',
5046 'co': 'cos',
5047 'cr': 'cre',
5048 'cs': 'ces',
5049 'cu': 'chu',
5050 'cv': 'chv',
5051 'cy': 'cym',
5052 'da': 'dan',
5053 'de': 'deu',
5054 'dv': 'div',
5055 'dz': 'dzo',
5056 'ee': 'ewe',
5057 'el': 'ell',
5058 'en': 'eng',
5059 'eo': 'epo',
5060 'es': 'spa',
5061 'et': 'est',
5062 'eu': 'eus',
5063 'fa': 'fas',
5064 'ff': 'ful',
5065 'fi': 'fin',
5066 'fj': 'fij',
5067 'fo': 'fao',
5068 'fr': 'fra',
5069 'fy': 'fry',
5070 'ga': 'gle',
5071 'gd': 'gla',
5072 'gl': 'glg',
5073 'gn': 'grn',
5074 'gu': 'guj',
5075 'gv': 'glv',
5076 'ha': 'hau',
5077 'he': 'heb',
b7acc835 5078 'iw': 'heb', # Replaced by he in 1989 revision
39672624
YCH
5079 'hi': 'hin',
5080 'ho': 'hmo',
5081 'hr': 'hrv',
5082 'ht': 'hat',
5083 'hu': 'hun',
5084 'hy': 'hye',
5085 'hz': 'her',
5086 'ia': 'ina',
5087 'id': 'ind',
b7acc835 5088 'in': 'ind', # Replaced by id in 1989 revision
39672624
YCH
5089 'ie': 'ile',
5090 'ig': 'ibo',
5091 'ii': 'iii',
5092 'ik': 'ipk',
5093 'io': 'ido',
5094 'is': 'isl',
5095 'it': 'ita',
5096 'iu': 'iku',
5097 'ja': 'jpn',
5098 'jv': 'jav',
5099 'ka': 'kat',
5100 'kg': 'kon',
5101 'ki': 'kik',
5102 'kj': 'kua',
5103 'kk': 'kaz',
5104 'kl': 'kal',
5105 'km': 'khm',
5106 'kn': 'kan',
5107 'ko': 'kor',
5108 'kr': 'kau',
5109 'ks': 'kas',
5110 'ku': 'kur',
5111 'kv': 'kom',
5112 'kw': 'cor',
5113 'ky': 'kir',
5114 'la': 'lat',
5115 'lb': 'ltz',
5116 'lg': 'lug',
5117 'li': 'lim',
5118 'ln': 'lin',
5119 'lo': 'lao',
5120 'lt': 'lit',
5121 'lu': 'lub',
5122 'lv': 'lav',
5123 'mg': 'mlg',
5124 'mh': 'mah',
5125 'mi': 'mri',
5126 'mk': 'mkd',
5127 'ml': 'mal',
5128 'mn': 'mon',
5129 'mr': 'mar',
5130 'ms': 'msa',
5131 'mt': 'mlt',
5132 'my': 'mya',
5133 'na': 'nau',
5134 'nb': 'nob',
5135 'nd': 'nde',
5136 'ne': 'nep',
5137 'ng': 'ndo',
5138 'nl': 'nld',
5139 'nn': 'nno',
5140 'no': 'nor',
5141 'nr': 'nbl',
5142 'nv': 'nav',
5143 'ny': 'nya',
5144 'oc': 'oci',
5145 'oj': 'oji',
5146 'om': 'orm',
5147 'or': 'ori',
5148 'os': 'oss',
5149 'pa': 'pan',
5150 'pi': 'pli',
5151 'pl': 'pol',
5152 'ps': 'pus',
5153 'pt': 'por',
5154 'qu': 'que',
5155 'rm': 'roh',
5156 'rn': 'run',
5157 'ro': 'ron',
5158 'ru': 'rus',
5159 'rw': 'kin',
5160 'sa': 'san',
5161 'sc': 'srd',
5162 'sd': 'snd',
5163 'se': 'sme',
5164 'sg': 'sag',
5165 'si': 'sin',
5166 'sk': 'slk',
5167 'sl': 'slv',
5168 'sm': 'smo',
5169 'sn': 'sna',
5170 'so': 'som',
5171 'sq': 'sqi',
5172 'sr': 'srp',
5173 'ss': 'ssw',
5174 'st': 'sot',
5175 'su': 'sun',
5176 'sv': 'swe',
5177 'sw': 'swa',
5178 'ta': 'tam',
5179 'te': 'tel',
5180 'tg': 'tgk',
5181 'th': 'tha',
5182 'ti': 'tir',
5183 'tk': 'tuk',
5184 'tl': 'tgl',
5185 'tn': 'tsn',
5186 'to': 'ton',
5187 'tr': 'tur',
5188 'ts': 'tso',
5189 'tt': 'tat',
5190 'tw': 'twi',
5191 'ty': 'tah',
5192 'ug': 'uig',
5193 'uk': 'ukr',
5194 'ur': 'urd',
5195 'uz': 'uzb',
5196 've': 'ven',
5197 'vi': 'vie',
5198 'vo': 'vol',
5199 'wa': 'wln',
5200 'wo': 'wol',
5201 'xh': 'xho',
5202 'yi': 'yid',
e9a50fba 5203 'ji': 'yid', # Replaced by yi in 1989 revision
39672624
YCH
5204 'yo': 'yor',
5205 'za': 'zha',
5206 'zh': 'zho',
5207 'zu': 'zul',
5208 }
5209
5210 @classmethod
5211 def short2long(cls, code):
5212 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5213 return cls._lang_map.get(code[:2])
5214
5215 @classmethod
5216 def long2short(cls, code):
5217 """Convert language code from ISO 639-2/T to ISO 639-1"""
5218 for short_name, long_name in cls._lang_map.items():
5219 if long_name == code:
5220 return short_name
5221
5222
4eb10f66
YCH
5223class ISO3166Utils(object):
5224 # From http://data.okfn.org/data/core/country-list
5225 _country_map = {
5226 'AF': 'Afghanistan',
5227 'AX': 'Åland Islands',
5228 'AL': 'Albania',
5229 'DZ': 'Algeria',
5230 'AS': 'American Samoa',
5231 'AD': 'Andorra',
5232 'AO': 'Angola',
5233 'AI': 'Anguilla',
5234 'AQ': 'Antarctica',
5235 'AG': 'Antigua and Barbuda',
5236 'AR': 'Argentina',
5237 'AM': 'Armenia',
5238 'AW': 'Aruba',
5239 'AU': 'Australia',
5240 'AT': 'Austria',
5241 'AZ': 'Azerbaijan',
5242 'BS': 'Bahamas',
5243 'BH': 'Bahrain',
5244 'BD': 'Bangladesh',
5245 'BB': 'Barbados',
5246 'BY': 'Belarus',
5247 'BE': 'Belgium',
5248 'BZ': 'Belize',
5249 'BJ': 'Benin',
5250 'BM': 'Bermuda',
5251 'BT': 'Bhutan',
5252 'BO': 'Bolivia, Plurinational State of',
5253 'BQ': 'Bonaire, Sint Eustatius and Saba',
5254 'BA': 'Bosnia and Herzegovina',
5255 'BW': 'Botswana',
5256 'BV': 'Bouvet Island',
5257 'BR': 'Brazil',
5258 'IO': 'British Indian Ocean Territory',
5259 'BN': 'Brunei Darussalam',
5260 'BG': 'Bulgaria',
5261 'BF': 'Burkina Faso',
5262 'BI': 'Burundi',
5263 'KH': 'Cambodia',
5264 'CM': 'Cameroon',
5265 'CA': 'Canada',
5266 'CV': 'Cape Verde',
5267 'KY': 'Cayman Islands',
5268 'CF': 'Central African Republic',
5269 'TD': 'Chad',
5270 'CL': 'Chile',
5271 'CN': 'China',
5272 'CX': 'Christmas Island',
5273 'CC': 'Cocos (Keeling) Islands',
5274 'CO': 'Colombia',
5275 'KM': 'Comoros',
5276 'CG': 'Congo',
5277 'CD': 'Congo, the Democratic Republic of the',
5278 'CK': 'Cook Islands',
5279 'CR': 'Costa Rica',
5280 'CI': 'Côte d\'Ivoire',
5281 'HR': 'Croatia',
5282 'CU': 'Cuba',
5283 'CW': 'Curaçao',
5284 'CY': 'Cyprus',
5285 'CZ': 'Czech Republic',
5286 'DK': 'Denmark',
5287 'DJ': 'Djibouti',
5288 'DM': 'Dominica',
5289 'DO': 'Dominican Republic',
5290 'EC': 'Ecuador',
5291 'EG': 'Egypt',
5292 'SV': 'El Salvador',
5293 'GQ': 'Equatorial Guinea',
5294 'ER': 'Eritrea',
5295 'EE': 'Estonia',
5296 'ET': 'Ethiopia',
5297 'FK': 'Falkland Islands (Malvinas)',
5298 'FO': 'Faroe Islands',
5299 'FJ': 'Fiji',
5300 'FI': 'Finland',
5301 'FR': 'France',
5302 'GF': 'French Guiana',
5303 'PF': 'French Polynesia',
5304 'TF': 'French Southern Territories',
5305 'GA': 'Gabon',
5306 'GM': 'Gambia',
5307 'GE': 'Georgia',
5308 'DE': 'Germany',
5309 'GH': 'Ghana',
5310 'GI': 'Gibraltar',
5311 'GR': 'Greece',
5312 'GL': 'Greenland',
5313 'GD': 'Grenada',
5314 'GP': 'Guadeloupe',
5315 'GU': 'Guam',
5316 'GT': 'Guatemala',
5317 'GG': 'Guernsey',
5318 'GN': 'Guinea',
5319 'GW': 'Guinea-Bissau',
5320 'GY': 'Guyana',
5321 'HT': 'Haiti',
5322 'HM': 'Heard Island and McDonald Islands',
5323 'VA': 'Holy See (Vatican City State)',
5324 'HN': 'Honduras',
5325 'HK': 'Hong Kong',
5326 'HU': 'Hungary',
5327 'IS': 'Iceland',
5328 'IN': 'India',
5329 'ID': 'Indonesia',
5330 'IR': 'Iran, Islamic Republic of',
5331 'IQ': 'Iraq',
5332 'IE': 'Ireland',
5333 'IM': 'Isle of Man',
5334 'IL': 'Israel',
5335 'IT': 'Italy',
5336 'JM': 'Jamaica',
5337 'JP': 'Japan',
5338 'JE': 'Jersey',
5339 'JO': 'Jordan',
5340 'KZ': 'Kazakhstan',
5341 'KE': 'Kenya',
5342 'KI': 'Kiribati',
5343 'KP': 'Korea, Democratic People\'s Republic of',
5344 'KR': 'Korea, Republic of',
5345 'KW': 'Kuwait',
5346 'KG': 'Kyrgyzstan',
5347 'LA': 'Lao People\'s Democratic Republic',
5348 'LV': 'Latvia',
5349 'LB': 'Lebanon',
5350 'LS': 'Lesotho',
5351 'LR': 'Liberia',
5352 'LY': 'Libya',
5353 'LI': 'Liechtenstein',
5354 'LT': 'Lithuania',
5355 'LU': 'Luxembourg',
5356 'MO': 'Macao',
5357 'MK': 'Macedonia, the Former Yugoslav Republic of',
5358 'MG': 'Madagascar',
5359 'MW': 'Malawi',
5360 'MY': 'Malaysia',
5361 'MV': 'Maldives',
5362 'ML': 'Mali',
5363 'MT': 'Malta',
5364 'MH': 'Marshall Islands',
5365 'MQ': 'Martinique',
5366 'MR': 'Mauritania',
5367 'MU': 'Mauritius',
5368 'YT': 'Mayotte',
5369 'MX': 'Mexico',
5370 'FM': 'Micronesia, Federated States of',
5371 'MD': 'Moldova, Republic of',
5372 'MC': 'Monaco',
5373 'MN': 'Mongolia',
5374 'ME': 'Montenegro',
5375 'MS': 'Montserrat',
5376 'MA': 'Morocco',
5377 'MZ': 'Mozambique',
5378 'MM': 'Myanmar',
5379 'NA': 'Namibia',
5380 'NR': 'Nauru',
5381 'NP': 'Nepal',
5382 'NL': 'Netherlands',
5383 'NC': 'New Caledonia',
5384 'NZ': 'New Zealand',
5385 'NI': 'Nicaragua',
5386 'NE': 'Niger',
5387 'NG': 'Nigeria',
5388 'NU': 'Niue',
5389 'NF': 'Norfolk Island',
5390 'MP': 'Northern Mariana Islands',
5391 'NO': 'Norway',
5392 'OM': 'Oman',
5393 'PK': 'Pakistan',
5394 'PW': 'Palau',
5395 'PS': 'Palestine, State of',
5396 'PA': 'Panama',
5397 'PG': 'Papua New Guinea',
5398 'PY': 'Paraguay',
5399 'PE': 'Peru',
5400 'PH': 'Philippines',
5401 'PN': 'Pitcairn',
5402 'PL': 'Poland',
5403 'PT': 'Portugal',
5404 'PR': 'Puerto Rico',
5405 'QA': 'Qatar',
5406 'RE': 'Réunion',
5407 'RO': 'Romania',
5408 'RU': 'Russian Federation',
5409 'RW': 'Rwanda',
5410 'BL': 'Saint Barthélemy',
5411 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5412 'KN': 'Saint Kitts and Nevis',
5413 'LC': 'Saint Lucia',
5414 'MF': 'Saint Martin (French part)',
5415 'PM': 'Saint Pierre and Miquelon',
5416 'VC': 'Saint Vincent and the Grenadines',
5417 'WS': 'Samoa',
5418 'SM': 'San Marino',
5419 'ST': 'Sao Tome and Principe',
5420 'SA': 'Saudi Arabia',
5421 'SN': 'Senegal',
5422 'RS': 'Serbia',
5423 'SC': 'Seychelles',
5424 'SL': 'Sierra Leone',
5425 'SG': 'Singapore',
5426 'SX': 'Sint Maarten (Dutch part)',
5427 'SK': 'Slovakia',
5428 'SI': 'Slovenia',
5429 'SB': 'Solomon Islands',
5430 'SO': 'Somalia',
5431 'ZA': 'South Africa',
5432 'GS': 'South Georgia and the South Sandwich Islands',
5433 'SS': 'South Sudan',
5434 'ES': 'Spain',
5435 'LK': 'Sri Lanka',
5436 'SD': 'Sudan',
5437 'SR': 'Suriname',
5438 'SJ': 'Svalbard and Jan Mayen',
5439 'SZ': 'Swaziland',
5440 'SE': 'Sweden',
5441 'CH': 'Switzerland',
5442 'SY': 'Syrian Arab Republic',
5443 'TW': 'Taiwan, Province of China',
5444 'TJ': 'Tajikistan',
5445 'TZ': 'Tanzania, United Republic of',
5446 'TH': 'Thailand',
5447 'TL': 'Timor-Leste',
5448 'TG': 'Togo',
5449 'TK': 'Tokelau',
5450 'TO': 'Tonga',
5451 'TT': 'Trinidad and Tobago',
5452 'TN': 'Tunisia',
5453 'TR': 'Turkey',
5454 'TM': 'Turkmenistan',
5455 'TC': 'Turks and Caicos Islands',
5456 'TV': 'Tuvalu',
5457 'UG': 'Uganda',
5458 'UA': 'Ukraine',
5459 'AE': 'United Arab Emirates',
5460 'GB': 'United Kingdom',
5461 'US': 'United States',
5462 'UM': 'United States Minor Outlying Islands',
5463 'UY': 'Uruguay',
5464 'UZ': 'Uzbekistan',
5465 'VU': 'Vanuatu',
5466 'VE': 'Venezuela, Bolivarian Republic of',
5467 'VN': 'Viet Nam',
5468 'VG': 'Virgin Islands, British',
5469 'VI': 'Virgin Islands, U.S.',
5470 'WF': 'Wallis and Futuna',
5471 'EH': 'Western Sahara',
5472 'YE': 'Yemen',
5473 'ZM': 'Zambia',
5474 'ZW': 'Zimbabwe',
5475 }
5476
5477 @classmethod
5478 def short2full(cls, code):
5479 """Convert an ISO 3166-2 country code to the corresponding full name"""
5480 return cls._country_map.get(code.upper())
5481
5482
773f291d
S
5483class GeoUtils(object):
5484 # Major IPv4 address blocks per country
5485 _country_ip_map = {
53896ca5 5486 'AD': '46.172.224.0/19',
773f291d
S
5487 'AE': '94.200.0.0/13',
5488 'AF': '149.54.0.0/17',
5489 'AG': '209.59.64.0/18',
5490 'AI': '204.14.248.0/21',
5491 'AL': '46.99.0.0/16',
5492 'AM': '46.70.0.0/15',
5493 'AO': '105.168.0.0/13',
53896ca5
S
5494 'AP': '182.50.184.0/21',
5495 'AQ': '23.154.160.0/24',
773f291d
S
5496 'AR': '181.0.0.0/12',
5497 'AS': '202.70.112.0/20',
53896ca5 5498 'AT': '77.116.0.0/14',
773f291d
S
5499 'AU': '1.128.0.0/11',
5500 'AW': '181.41.0.0/18',
53896ca5
S
5501 'AX': '185.217.4.0/22',
5502 'AZ': '5.197.0.0/16',
773f291d
S
5503 'BA': '31.176.128.0/17',
5504 'BB': '65.48.128.0/17',
5505 'BD': '114.130.0.0/16',
5506 'BE': '57.0.0.0/8',
53896ca5 5507 'BF': '102.178.0.0/15',
773f291d
S
5508 'BG': '95.42.0.0/15',
5509 'BH': '37.131.0.0/17',
5510 'BI': '154.117.192.0/18',
5511 'BJ': '137.255.0.0/16',
53896ca5 5512 'BL': '185.212.72.0/23',
773f291d
S
5513 'BM': '196.12.64.0/18',
5514 'BN': '156.31.0.0/16',
5515 'BO': '161.56.0.0/16',
5516 'BQ': '161.0.80.0/20',
53896ca5 5517 'BR': '191.128.0.0/12',
773f291d
S
5518 'BS': '24.51.64.0/18',
5519 'BT': '119.2.96.0/19',
5520 'BW': '168.167.0.0/16',
5521 'BY': '178.120.0.0/13',
5522 'BZ': '179.42.192.0/18',
5523 'CA': '99.224.0.0/11',
5524 'CD': '41.243.0.0/16',
53896ca5
S
5525 'CF': '197.242.176.0/21',
5526 'CG': '160.113.0.0/16',
773f291d 5527 'CH': '85.0.0.0/13',
53896ca5 5528 'CI': '102.136.0.0/14',
773f291d
S
5529 'CK': '202.65.32.0/19',
5530 'CL': '152.172.0.0/14',
53896ca5 5531 'CM': '102.244.0.0/14',
773f291d
S
5532 'CN': '36.128.0.0/10',
5533 'CO': '181.240.0.0/12',
5534 'CR': '201.192.0.0/12',
5535 'CU': '152.206.0.0/15',
5536 'CV': '165.90.96.0/19',
5537 'CW': '190.88.128.0/17',
53896ca5 5538 'CY': '31.153.0.0/16',
773f291d
S
5539 'CZ': '88.100.0.0/14',
5540 'DE': '53.0.0.0/8',
5541 'DJ': '197.241.0.0/17',
5542 'DK': '87.48.0.0/12',
5543 'DM': '192.243.48.0/20',
5544 'DO': '152.166.0.0/15',
5545 'DZ': '41.96.0.0/12',
5546 'EC': '186.68.0.0/15',
5547 'EE': '90.190.0.0/15',
5548 'EG': '156.160.0.0/11',
5549 'ER': '196.200.96.0/20',
5550 'ES': '88.0.0.0/11',
5551 'ET': '196.188.0.0/14',
5552 'EU': '2.16.0.0/13',
5553 'FI': '91.152.0.0/13',
5554 'FJ': '144.120.0.0/16',
53896ca5 5555 'FK': '80.73.208.0/21',
773f291d
S
5556 'FM': '119.252.112.0/20',
5557 'FO': '88.85.32.0/19',
5558 'FR': '90.0.0.0/9',
5559 'GA': '41.158.0.0/15',
5560 'GB': '25.0.0.0/8',
5561 'GD': '74.122.88.0/21',
5562 'GE': '31.146.0.0/16',
5563 'GF': '161.22.64.0/18',
5564 'GG': '62.68.160.0/19',
53896ca5
S
5565 'GH': '154.160.0.0/12',
5566 'GI': '95.164.0.0/16',
773f291d
S
5567 'GL': '88.83.0.0/19',
5568 'GM': '160.182.0.0/15',
5569 'GN': '197.149.192.0/18',
5570 'GP': '104.250.0.0/19',
5571 'GQ': '105.235.224.0/20',
5572 'GR': '94.64.0.0/13',
5573 'GT': '168.234.0.0/16',
5574 'GU': '168.123.0.0/16',
5575 'GW': '197.214.80.0/20',
5576 'GY': '181.41.64.0/18',
5577 'HK': '113.252.0.0/14',
5578 'HN': '181.210.0.0/16',
5579 'HR': '93.136.0.0/13',
5580 'HT': '148.102.128.0/17',
5581 'HU': '84.0.0.0/14',
5582 'ID': '39.192.0.0/10',
5583 'IE': '87.32.0.0/12',
5584 'IL': '79.176.0.0/13',
5585 'IM': '5.62.80.0/20',
5586 'IN': '117.192.0.0/10',
5587 'IO': '203.83.48.0/21',
5588 'IQ': '37.236.0.0/14',
5589 'IR': '2.176.0.0/12',
5590 'IS': '82.221.0.0/16',
5591 'IT': '79.0.0.0/10',
5592 'JE': '87.244.64.0/18',
5593 'JM': '72.27.0.0/17',
5594 'JO': '176.29.0.0/16',
53896ca5 5595 'JP': '133.0.0.0/8',
773f291d
S
5596 'KE': '105.48.0.0/12',
5597 'KG': '158.181.128.0/17',
5598 'KH': '36.37.128.0/17',
5599 'KI': '103.25.140.0/22',
5600 'KM': '197.255.224.0/20',
53896ca5 5601 'KN': '198.167.192.0/19',
773f291d
S
5602 'KP': '175.45.176.0/22',
5603 'KR': '175.192.0.0/10',
5604 'KW': '37.36.0.0/14',
5605 'KY': '64.96.0.0/15',
5606 'KZ': '2.72.0.0/13',
5607 'LA': '115.84.64.0/18',
5608 'LB': '178.135.0.0/16',
53896ca5 5609 'LC': '24.92.144.0/20',
773f291d
S
5610 'LI': '82.117.0.0/19',
5611 'LK': '112.134.0.0/15',
53896ca5 5612 'LR': '102.183.0.0/16',
773f291d
S
5613 'LS': '129.232.0.0/17',
5614 'LT': '78.56.0.0/13',
5615 'LU': '188.42.0.0/16',
5616 'LV': '46.109.0.0/16',
5617 'LY': '41.252.0.0/14',
5618 'MA': '105.128.0.0/11',
5619 'MC': '88.209.64.0/18',
5620 'MD': '37.246.0.0/16',
5621 'ME': '178.175.0.0/17',
5622 'MF': '74.112.232.0/21',
5623 'MG': '154.126.0.0/17',
5624 'MH': '117.103.88.0/21',
5625 'MK': '77.28.0.0/15',
5626 'ML': '154.118.128.0/18',
5627 'MM': '37.111.0.0/17',
5628 'MN': '49.0.128.0/17',
5629 'MO': '60.246.0.0/16',
5630 'MP': '202.88.64.0/20',
5631 'MQ': '109.203.224.0/19',
5632 'MR': '41.188.64.0/18',
5633 'MS': '208.90.112.0/22',
5634 'MT': '46.11.0.0/16',
5635 'MU': '105.16.0.0/12',
5636 'MV': '27.114.128.0/18',
53896ca5 5637 'MW': '102.70.0.0/15',
773f291d
S
5638 'MX': '187.192.0.0/11',
5639 'MY': '175.136.0.0/13',
5640 'MZ': '197.218.0.0/15',
5641 'NA': '41.182.0.0/16',
5642 'NC': '101.101.0.0/18',
5643 'NE': '197.214.0.0/18',
5644 'NF': '203.17.240.0/22',
5645 'NG': '105.112.0.0/12',
5646 'NI': '186.76.0.0/15',
5647 'NL': '145.96.0.0/11',
5648 'NO': '84.208.0.0/13',
5649 'NP': '36.252.0.0/15',
5650 'NR': '203.98.224.0/19',
5651 'NU': '49.156.48.0/22',
5652 'NZ': '49.224.0.0/14',
5653 'OM': '5.36.0.0/15',
5654 'PA': '186.72.0.0/15',
5655 'PE': '186.160.0.0/14',
5656 'PF': '123.50.64.0/18',
5657 'PG': '124.240.192.0/19',
5658 'PH': '49.144.0.0/13',
5659 'PK': '39.32.0.0/11',
5660 'PL': '83.0.0.0/11',
5661 'PM': '70.36.0.0/20',
5662 'PR': '66.50.0.0/16',
5663 'PS': '188.161.0.0/16',
5664 'PT': '85.240.0.0/13',
5665 'PW': '202.124.224.0/20',
5666 'PY': '181.120.0.0/14',
5667 'QA': '37.210.0.0/15',
53896ca5 5668 'RE': '102.35.0.0/16',
773f291d 5669 'RO': '79.112.0.0/13',
53896ca5 5670 'RS': '93.86.0.0/15',
773f291d 5671 'RU': '5.136.0.0/13',
53896ca5 5672 'RW': '41.186.0.0/16',
773f291d
S
5673 'SA': '188.48.0.0/13',
5674 'SB': '202.1.160.0/19',
5675 'SC': '154.192.0.0/11',
53896ca5 5676 'SD': '102.120.0.0/13',
773f291d 5677 'SE': '78.64.0.0/12',
53896ca5 5678 'SG': '8.128.0.0/10',
773f291d
S
5679 'SI': '188.196.0.0/14',
5680 'SK': '78.98.0.0/15',
53896ca5 5681 'SL': '102.143.0.0/17',
773f291d
S
5682 'SM': '89.186.32.0/19',
5683 'SN': '41.82.0.0/15',
53896ca5 5684 'SO': '154.115.192.0/18',
773f291d
S
5685 'SR': '186.179.128.0/17',
5686 'SS': '105.235.208.0/21',
5687 'ST': '197.159.160.0/19',
5688 'SV': '168.243.0.0/16',
5689 'SX': '190.102.0.0/20',
5690 'SY': '5.0.0.0/16',
5691 'SZ': '41.84.224.0/19',
5692 'TC': '65.255.48.0/20',
5693 'TD': '154.68.128.0/19',
5694 'TG': '196.168.0.0/14',
5695 'TH': '171.96.0.0/13',
5696 'TJ': '85.9.128.0/18',
5697 'TK': '27.96.24.0/21',
5698 'TL': '180.189.160.0/20',
5699 'TM': '95.85.96.0/19',
5700 'TN': '197.0.0.0/11',
5701 'TO': '175.176.144.0/21',
5702 'TR': '78.160.0.0/11',
5703 'TT': '186.44.0.0/15',
5704 'TV': '202.2.96.0/19',
5705 'TW': '120.96.0.0/11',
5706 'TZ': '156.156.0.0/14',
53896ca5
S
5707 'UA': '37.52.0.0/14',
5708 'UG': '102.80.0.0/13',
5709 'US': '6.0.0.0/8',
773f291d 5710 'UY': '167.56.0.0/13',
53896ca5 5711 'UZ': '84.54.64.0/18',
773f291d 5712 'VA': '212.77.0.0/19',
53896ca5 5713 'VC': '207.191.240.0/21',
773f291d 5714 'VE': '186.88.0.0/13',
53896ca5 5715 'VG': '66.81.192.0/20',
773f291d
S
5716 'VI': '146.226.0.0/16',
5717 'VN': '14.160.0.0/11',
5718 'VU': '202.80.32.0/20',
5719 'WF': '117.20.32.0/21',
5720 'WS': '202.4.32.0/19',
5721 'YE': '134.35.0.0/16',
5722 'YT': '41.242.116.0/22',
5723 'ZA': '41.0.0.0/11',
53896ca5
S
5724 'ZM': '102.144.0.0/13',
5725 'ZW': '102.177.192.0/18',
773f291d
S
5726 }
5727
5728 @classmethod
5f95927a
S
5729 def random_ipv4(cls, code_or_block):
5730 if len(code_or_block) == 2:
5731 block = cls._country_ip_map.get(code_or_block.upper())
5732 if not block:
5733 return None
5734 else:
5735 block = code_or_block
773f291d
S
5736 addr, preflen = block.split('/')
5737 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5738 addr_max = addr_min | (0xffffffff >> int(preflen))
18a0defa 5739 return compat_str(socket.inet_ntoa(
4248dad9 5740 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
773f291d
S
5741
5742
91410c9b 5743class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
5744 def __init__(self, proxies=None):
5745 # Set default handlers
5746 for type in ('http', 'https'):
5747 setattr(self, '%s_open' % type,
5748 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5749 meth(r, proxy, type))
38e87f6c 5750 compat_urllib_request.ProxyHandler.__init__(self, proxies)
2461f79d 5751
91410c9b 5752 def proxy_open(self, req, proxy, type):
2461f79d 5753 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
5754 if req_proxy is not None:
5755 proxy = req_proxy
2461f79d
PH
5756 del req.headers['Ytdl-request-proxy']
5757
5758 if proxy == '__noproxy__':
5759 return None # No Proxy
51fb4995 5760 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
71aff188 5761 req.add_header('Ytdl-socks-proxy', proxy)
7a5c1cfe 5762 # yt-dlp's http/https handlers do wrapping the socket with socks
71aff188 5763 return None
91410c9b
PH
5764 return compat_urllib_request.ProxyHandler.proxy_open(
5765 self, req, proxy, type)
5bc880b9
YCH
5766
5767
0a5445dd
YCH
5768# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5769# released into Public Domain
5770# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5771
5772def long_to_bytes(n, blocksize=0):
5773 """long_to_bytes(n:long, blocksize:int) : string
5774 Convert a long integer to a byte string.
5775
5776 If optional blocksize is given and greater than zero, pad the front of the
5777 byte string with binary zeros so that the length is a multiple of
5778 blocksize.
5779 """
5780 # after much testing, this algorithm was deemed to be the fastest
5781 s = b''
5782 n = int(n)
5783 while n > 0:
5784 s = compat_struct_pack('>I', n & 0xffffffff) + s
5785 n = n >> 32
5786 # strip off leading zeros
5787 for i in range(len(s)):
5788 if s[i] != b'\000'[0]:
5789 break
5790 else:
5791 # only happens when n == 0
5792 s = b'\000'
5793 i = 0
5794 s = s[i:]
5795 # add back some pad bytes. this could be done more efficiently w.r.t. the
5796 # de-padding being done above, but sigh...
5797 if blocksize > 0 and len(s) % blocksize:
5798 s = (blocksize - len(s) % blocksize) * b'\000' + s
5799 return s
5800
5801
5802def bytes_to_long(s):
5803 """bytes_to_long(string) : long
5804 Convert a byte string to a long integer.
5805
5806 This is (essentially) the inverse of long_to_bytes().
5807 """
5808 acc = 0
5809 length = len(s)
5810 if length % 4:
5811 extra = (4 - length % 4)
5812 s = b'\000' * extra + s
5813 length = length + extra
5814 for i in range(0, length, 4):
5815 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5816 return acc
5817
5818
5bc880b9
YCH
5819def ohdave_rsa_encrypt(data, exponent, modulus):
5820 '''
5821 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5822
5823 Input:
5824 data: data to encrypt, bytes-like object
5825 exponent, modulus: parameter e and N of RSA algorithm, both integer
5826 Output: hex string of encrypted data
5827
5828 Limitation: supports one block encryption only
5829 '''
5830
5831 payload = int(binascii.hexlify(data[::-1]), 16)
5832 encrypted = pow(payload, exponent, modulus)
5833 return '%x' % encrypted
81bdc8fd
YCH
5834
5835
f48409c7
YCH
5836def pkcs1pad(data, length):
5837 """
5838 Padding input data with PKCS#1 scheme
5839
5840 @param {int[]} data input data
5841 @param {int} length target length
5842 @returns {int[]} padded data
5843 """
5844 if len(data) > length - 11:
5845 raise ValueError('Input data too long for PKCS#1 padding')
5846
5847 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5848 return [0, 2] + pseudo_random + [0] + data
5849
5850
5eb6bdce 5851def encode_base_n(num, n, table=None):
59f898b7 5852 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
59f898b7
YCH
5853 if not table:
5854 table = FULL_TABLE[:n]
5855
5eb6bdce
YCH
5856 if n > len(table):
5857 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5858
5859 if num == 0:
5860 return table[0]
5861
81bdc8fd
YCH
5862 ret = ''
5863 while num:
5864 ret = table[num % n] + ret
5865 num = num // n
5866 return ret
f52354a8
YCH
5867
5868
5869def decode_packed_codes(code):
06b3fe29 5870 mobj = re.search(PACKED_CODES_RE, code)
a0566bbf 5871 obfuscated_code, base, count, symbols = mobj.groups()
f52354a8
YCH
5872 base = int(base)
5873 count = int(count)
5874 symbols = symbols.split('|')
5875 symbol_table = {}
5876
5877 while count:
5878 count -= 1
5eb6bdce 5879 base_n_count = encode_base_n(count, base)
f52354a8
YCH
5880 symbol_table[base_n_count] = symbols[count] or base_n_count
5881
5882 return re.sub(
5883 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
a0566bbf 5884 obfuscated_code)
e154c651 5885
5886
1ced2221
S
5887def caesar(s, alphabet, shift):
5888 if shift == 0:
5889 return s
5890 l = len(alphabet)
5891 return ''.join(
5892 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5893 for c in s)
5894
5895
5896def rot47(s):
5897 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5898
5899
e154c651 5900def parse_m3u8_attributes(attrib):
5901 info = {}
5902 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5903 if val.startswith('"'):
5904 val = val[1:-1]
5905 info[key] = val
5906 return info
1143535d
YCH
5907
5908
5909def urshift(val, n):
5910 return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038
YCH
5911
5912
5913# Based on png2str() written by @gdkchan and improved by @yokrysty
067aa17e 5914# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
d3f8e038
YCH
5915def decode_png(png_data):
5916 # Reference: https://www.w3.org/TR/PNG/
5917 header = png_data[8:]
5918
5919 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5920 raise IOError('Not a valid PNG file.')
5921
5922 int_map = {1: '>B', 2: '>H', 4: '>I'}
5923 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5924
5925 chunks = []
5926
5927 while header:
5928 length = unpack_integer(header[:4])
5929 header = header[4:]
5930
5931 chunk_type = header[:4]
5932 header = header[4:]
5933
5934 chunk_data = header[:length]
5935 header = header[length:]
5936
5937 header = header[4:] # Skip CRC
5938
5939 chunks.append({
5940 'type': chunk_type,
5941 'length': length,
5942 'data': chunk_data
5943 })
5944
5945 ihdr = chunks[0]['data']
5946
5947 width = unpack_integer(ihdr[:4])
5948 height = unpack_integer(ihdr[4:8])
5949
5950 idat = b''
5951
5952 for chunk in chunks:
5953 if chunk['type'] == b'IDAT':
5954 idat += chunk['data']
5955
5956 if not idat:
5957 raise IOError('Unable to read PNG data.')
5958
5959 decompressed_data = bytearray(zlib.decompress(idat))
5960
5961 stride = width * 3
5962 pixels = []
5963
5964 def _get_pixel(idx):
5965 x = idx % stride
5966 y = idx // stride
5967 return pixels[y][x]
5968
5969 for y in range(height):
5970 basePos = y * (1 + stride)
5971 filter_type = decompressed_data[basePos]
5972
5973 current_row = []
5974
5975 pixels.append(current_row)
5976
5977 for x in range(stride):
5978 color = decompressed_data[1 + basePos + x]
5979 basex = y * stride + x
5980 left = 0
5981 up = 0
5982
5983 if x > 2:
5984 left = _get_pixel(basex - 3)
5985 if y > 0:
5986 up = _get_pixel(basex - stride)
5987
5988 if filter_type == 1: # Sub
5989 color = (color + left) & 0xff
5990 elif filter_type == 2: # Up
5991 color = (color + up) & 0xff
5992 elif filter_type == 3: # Average
5993 color = (color + ((left + up) >> 1)) & 0xff
5994 elif filter_type == 4: # Paeth
5995 a = left
5996 b = up
5997 c = 0
5998
5999 if x > 2 and y > 0:
6000 c = _get_pixel(basex - stride - 3)
6001
6002 p = a + b - c
6003
6004 pa = abs(p - a)
6005 pb = abs(p - b)
6006 pc = abs(p - c)
6007
6008 if pa <= pb and pa <= pc:
6009 color = (color + a) & 0xff
6010 elif pb <= pc:
6011 color = (color + b) & 0xff
6012 else:
6013 color = (color + c) & 0xff
6014
6015 current_row.append(color)
6016
6017 return width, height, pixels
efa97bdc
YCH
6018
6019
6020def write_xattr(path, key, value):
6021 # This mess below finds the best xattr tool for the job
6022 try:
6023 # try the pyxattr module...
6024 import xattr
6025
53a7e3d2
YCH
6026 if hasattr(xattr, 'set'): # pyxattr
6027 # Unicode arguments are not supported in python-pyxattr until
6028 # version 0.5.0
067aa17e 6029 # See https://github.com/ytdl-org/youtube-dl/issues/5498
53a7e3d2
YCH
6030 pyxattr_required_version = '0.5.0'
6031 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
6032 # TODO: fallback to CLI tools
6033 raise XAttrUnavailableError(
6034 'python-pyxattr is detected but is too old. '
7a5c1cfe 6035 'yt-dlp requires %s or above while your version is %s. '
53a7e3d2
YCH
6036 'Falling back to other xattr implementations' % (
6037 pyxattr_required_version, xattr.__version__))
6038
6039 setxattr = xattr.set
6040 else: # xattr
6041 setxattr = xattr.setxattr
efa97bdc
YCH
6042
6043 try:
53a7e3d2 6044 setxattr(path, key, value)
efa97bdc
YCH
6045 except EnvironmentError as e:
6046 raise XAttrMetadataError(e.errno, e.strerror)
6047
6048 except ImportError:
6049 if compat_os_name == 'nt':
6050 # Write xattrs to NTFS Alternate Data Streams:
6051 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6052 assert ':' not in key
6053 assert os.path.exists(path)
6054
6055 ads_fn = path + ':' + key
6056 try:
6057 with open(ads_fn, 'wb') as f:
6058 f.write(value)
6059 except EnvironmentError as e:
6060 raise XAttrMetadataError(e.errno, e.strerror)
6061 else:
6062 user_has_setfattr = check_executable('setfattr', ['--version'])
6063 user_has_xattr = check_executable('xattr', ['-h'])
6064
6065 if user_has_setfattr or user_has_xattr:
6066
6067 value = value.decode('utf-8')
6068 if user_has_setfattr:
6069 executable = 'setfattr'
6070 opts = ['-n', key, '-v', value]
6071 elif user_has_xattr:
6072 executable = 'xattr'
6073 opts = ['-w', key, value]
6074
3089bc74
S
6075 cmd = ([encodeFilename(executable, True)]
6076 + [encodeArgument(o) for o in opts]
6077 + [encodeFilename(path, True)])
efa97bdc
YCH
6078
6079 try:
6080 p = subprocess.Popen(
6081 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6082 except EnvironmentError as e:
6083 raise XAttrMetadataError(e.errno, e.strerror)
f5b1bca9 6084 stdout, stderr = process_communicate_or_kill(p)
efa97bdc
YCH
6085 stderr = stderr.decode('utf-8', 'replace')
6086 if p.returncode != 0:
6087 raise XAttrMetadataError(p.returncode, stderr)
6088
6089 else:
6090 # On Unix, and can't find pyxattr, setfattr, or xattr.
6091 if sys.platform.startswith('linux'):
6092 raise XAttrUnavailableError(
6093 "Couldn't find a tool to set the xattrs. "
6094 "Install either the python 'pyxattr' or 'xattr' "
6095 "modules, or the GNU 'attr' package "
6096 "(which contains the 'setfattr' tool).")
6097 else:
6098 raise XAttrUnavailableError(
6099 "Couldn't find a tool to set the xattrs. "
6100 "Install either the python 'xattr' module, "
6101 "or the 'xattr' binary.")
0c265486
YCH
6102
6103
6104def random_birthday(year_field, month_field, day_field):
aa374bc7
AS
6105 start_date = datetime.date(1950, 1, 1)
6106 end_date = datetime.date(1995, 12, 31)
6107 offset = random.randint(0, (end_date - start_date).days)
6108 random_date = start_date + datetime.timedelta(offset)
0c265486 6109 return {
aa374bc7
AS
6110 year_field: str(random_date.year),
6111 month_field: str(random_date.month),
6112 day_field: str(random_date.day),
0c265486 6113 }
732044af 6114
c76eb41b 6115
732044af 6116# Templates for internet shortcut files, which are plain text files.
6117DOT_URL_LINK_TEMPLATE = '''
6118[InternetShortcut]
6119URL=%(url)s
6120'''.lstrip()
6121
6122DOT_WEBLOC_LINK_TEMPLATE = '''
6123<?xml version="1.0" encoding="UTF-8"?>
6124<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6125<plist version="1.0">
6126<dict>
6127\t<key>URL</key>
6128\t<string>%(url)s</string>
6129</dict>
6130</plist>
6131'''.lstrip()
6132
6133DOT_DESKTOP_LINK_TEMPLATE = '''
6134[Desktop Entry]
6135Encoding=UTF-8
6136Name=%(filename)s
6137Type=Link
6138URL=%(url)s
6139Icon=text-html
6140'''.lstrip()
6141
6142
6143def iri_to_uri(iri):
6144 """
6145 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6146
6147 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6148 """
6149
6150 iri_parts = compat_urllib_parse_urlparse(iri)
6151
6152 if '[' in iri_parts.netloc:
6153 raise ValueError('IPv6 URIs are not, yet, supported.')
6154 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6155
6156 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6157
6158 net_location = ''
6159 if iri_parts.username:
6160 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6161 if iri_parts.password is not None:
6162 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6163 net_location += '@'
6164
6165 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
6166 # The 'idna' encoding produces ASCII text.
6167 if iri_parts.port is not None and iri_parts.port != 80:
6168 net_location += ':' + str(iri_parts.port)
6169
6170 return compat_urllib_parse_urlunparse(
6171 (iri_parts.scheme,
6172 net_location,
6173
6174 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6175
6176 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6177 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6178
6179 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6180 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6181
6182 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6183
6184 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6185
6186
6187def to_high_limit_path(path):
6188 if sys.platform in ['win32', 'cygwin']:
6189 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6190 return r'\\?\ '.rstrip() + os.path.abspath(path)
6191
6192 return path
76d321f6 6193
c76eb41b 6194
b868936c 6195def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
6196 if field is None:
6197 val = obj if obj is not None else default
6198 else:
6199 val = obj.get(field, default)
76d321f6 6200 if func and val not in ignore:
6201 val = func(val)
6202 return template % val if val not in ignore else default
00dd0cd5 6203
6204
6205def clean_podcast_url(url):
6206 return re.sub(r'''(?x)
6207 (?:
6208 (?:
6209 chtbl\.com/track|
6210 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6211 play\.podtrac\.com
6212 )/[^/]+|
6213 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6214 flex\.acast\.com|
6215 pd(?:
6216 cn\.co| # https://podcorn.com/analytics-prefix/
6217 st\.fm # https://podsights.com/docs/
6218 )/e
6219 )/''', '', url)
ffcb8191
THD
6220
6221
6222_HEX_TABLE = '0123456789abcdef'
6223
6224
6225def random_uuidv4():
6226 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
0202b52a 6227
6228
6229def make_dir(path, to_screen=None):
6230 try:
6231 dn = os.path.dirname(path)
6232 if dn and not os.path.exists(dn):
6233 os.makedirs(dn)
6234 return True
6235 except (OSError, IOError) as err:
6236 if callable(to_screen) is not None:
6237 to_screen('unable to create directory ' + error_to_compat_str(err))
6238 return False
f74980cb 6239
6240
6241def get_executable_path():
c552ae88 6242 from zipimport import zipimporter
6243 if hasattr(sys, 'frozen'): # Running from PyInstaller
6244 path = os.path.dirname(sys.executable)
6245 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
6246 path = os.path.join(os.path.dirname(__file__), '../..')
6247 else:
6248 path = os.path.join(os.path.dirname(__file__), '..')
f74980cb 6249 return os.path.abspath(path)
6250
6251
2f567473 6252def load_plugins(name, suffix, namespace):
f74980cb 6253 plugin_info = [None]
6254 classes = []
6255 try:
6256 plugin_info = imp.find_module(
6257 name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
6258 plugins = imp.load_module(name, *plugin_info)
6259 for name in dir(plugins):
2f567473 6260 if name in namespace:
6261 continue
6262 if not name.endswith(suffix):
f74980cb 6263 continue
6264 klass = getattr(plugins, name)
6265 classes.append(klass)
6266 namespace[name] = klass
6267 except ImportError:
6268 pass
6269 finally:
6270 if plugin_info[0] is not None:
6271 plugin_info[0].close()
6272 return classes
06167fbb 6273
6274
325ebc17 6275def traverse_obj(
352d63fd 6276 obj, *path_list, default=None, expected_type=None, get_all=True,
325ebc17 6277 casesense=True, is_user_input=False, traverse_string=False):
324ad820 6278 ''' Traverse nested list/dict/tuple
8f334380 6279 @param path_list A list of paths which are checked one by one.
6280 Each path is a list of keys where each key is a string,
6281 a tuple of strings or "...". When a tuple is given,
6282 all the keys given in the tuple are traversed, and
6283 "..." traverses all the keys in the object
325ebc17 6284 @param default Default value to return
352d63fd 6285 @param expected_type Only accept final value of this type (Can also be any callable)
6286 @param get_all Return all the values obtained from a path or only the first one
324ad820 6287 @param casesense Whether to consider dictionary keys as case sensitive
6288 @param is_user_input Whether the keys are generated from user input. If True,
6289 strings are converted to int/slice if necessary
6290 @param traverse_string Whether to traverse inside strings. If True, any
6291 non-compatible object will also be converted into a string
8f334380 6292 # TODO: Write tests
324ad820 6293 '''
325ebc17 6294 if not casesense:
dbf5416a 6295 _lower = lambda k: (k.lower() if isinstance(k, str) else k)
8f334380 6296 path_list = (map(_lower, variadic(path)) for path in path_list)
6297
6298 def _traverse_obj(obj, path, _current_depth=0):
6299 nonlocal depth
575e17a1 6300 if obj is None:
6301 return None
8f334380 6302 path = tuple(variadic(path))
6303 for i, key in enumerate(path):
6304 if isinstance(key, (list, tuple)):
6305 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
6306 key = ...
6307 if key is ...:
6308 obj = (obj.values() if isinstance(obj, dict)
6309 else obj if isinstance(obj, (list, tuple, LazyList))
6310 else str(obj) if traverse_string else [])
6311 _current_depth += 1
6312 depth = max(depth, _current_depth)
6313 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
575e17a1 6314 elif isinstance(obj, dict) and not (is_user_input and key == ':'):
325ebc17 6315 obj = (obj.get(key) if casesense or (key in obj)
6316 else next((v for k, v in obj.items() if _lower(k) == key), None))
6317 else:
6318 if is_user_input:
6319 key = (int_or_none(key) if ':' not in key
6320 else slice(*map(int_or_none, key.split(':'))))
8f334380 6321 if key == slice(None):
575e17a1 6322 return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
325ebc17 6323 if not isinstance(key, (int, slice)):
9fea350f 6324 return None
8f334380 6325 if not isinstance(obj, (list, tuple, LazyList)):
325ebc17 6326 if not traverse_string:
6327 return None
6328 obj = str(obj)
6329 try:
6330 obj = obj[key]
6331 except IndexError:
324ad820 6332 return None
325ebc17 6333 return obj
6334
352d63fd 6335 if isinstance(expected_type, type):
6336 type_test = lambda val: val if isinstance(val, expected_type) else None
6337 elif expected_type is not None:
6338 type_test = expected_type
6339 else:
6340 type_test = lambda val: val
6341
8f334380 6342 for path in path_list:
6343 depth = 0
6344 val = _traverse_obj(obj, path)
325ebc17 6345 if val is not None:
8f334380 6346 if depth:
6347 for _ in range(depth - 1):
6586bca9 6348 val = itertools.chain.from_iterable(v for v in val if v is not None)
352d63fd 6349 val = [v for v in map(type_test, val) if v is not None]
8f334380 6350 if val:
352d63fd 6351 return val if get_all else val[0]
6352 else:
6353 val = type_test(val)
6354 if val is not None:
8f334380 6355 return val
325ebc17 6356 return default
324ad820 6357
6358
6359def traverse_dict(dictn, keys, casesense=True):
6360 ''' For backward compatibility. Do not use '''
6361 return traverse_obj(dictn, keys, casesense=casesense,
6362 is_user_input=True, traverse_string=True)
6606817a 6363
6364
c634ad2a 6365def variadic(x, allowed_types=(str, bytes)):
cb89cfc1 6366 return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)