]> jfr.im git - yt-dlp.git/blame - yt_dlp/utils.py
[EmbedThumbnail] Add compat-option `embed-thumbnail-atomicparsley`
[yt-dlp.git] / yt_dlp / utils.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
d77c3dfd 3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
c380cc28 10import collections
62e609ab 11import contextlib
e3946f98 12import ctypes
c496ca96
PH
13import datetime
14import email.utils
0c265486 15import email.header
f45c185f 16import errno
be4a824d 17import functools
d77c3dfd 18import gzip
f74980cb 19import imp
03f9daab 20import io
79a2e94e 21import itertools
f4bfd65f 22import json
d77c3dfd 23import locale
02dbf93f 24import math
347de493 25import operator
d77c3dfd 26import os
c496ca96 27import platform
773f291d 28import random
d77c3dfd 29import re
c496ca96 30import socket
79a2e94e 31import ssl
1c088fa8 32import subprocess
d77c3dfd 33import sys
181c8655 34import tempfile
c380cc28 35import time
01951dda 36import traceback
bcf89ce6 37import xml.etree.ElementTree
d77c3dfd 38import zlib
d77c3dfd 39
8c25f81b 40from .compat import (
b4a3d461 41 compat_HTMLParseError,
8bb56eee 42 compat_HTMLParser,
201c1459 43 compat_HTTPError,
8f9312c3 44 compat_basestring,
8c25f81b 45 compat_chr,
1bab3437 46 compat_cookiejar,
d7cd9a9e 47 compat_ctypes_WINFUNCTYPE,
36e6f62c 48 compat_etree_fromstring,
51098426 49 compat_expanduser,
8c25f81b 50 compat_html_entities,
55b2f099 51 compat_html_entities_html5,
be4a824d 52 compat_http_client,
42db58ec 53 compat_integer_types,
e29663c6 54 compat_numeric_types,
c86b6142 55 compat_kwargs,
efa97bdc 56 compat_os_name,
8c25f81b 57 compat_parse_qs,
702ccf2d 58 compat_shlex_quote,
8c25f81b 59 compat_str,
edaa23f8 60 compat_struct_pack,
d3f8e038 61 compat_struct_unpack,
8c25f81b
PH
62 compat_urllib_error,
63 compat_urllib_parse,
15707c7e 64 compat_urllib_parse_urlencode,
8c25f81b 65 compat_urllib_parse_urlparse,
732044af 66 compat_urllib_parse_urlunparse,
67 compat_urllib_parse_quote,
68 compat_urllib_parse_quote_plus,
7581bfc9 69 compat_urllib_parse_unquote_plus,
8c25f81b
PH
70 compat_urllib_request,
71 compat_urlparse,
810c10ba 72 compat_xpath,
8c25f81b 73)
4644ac55 74
71aff188
YCH
75from .socks import (
76 ProxyType,
77 sockssocket,
78)
79
4644ac55 80
51fb4995
YCH
81def register_socks_protocols():
82 # "Register" SOCKS protocols
d5ae6bb5
YCH
83 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
84 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
51fb4995
YCH
85 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
86 if scheme not in compat_urlparse.uses_netloc:
87 compat_urlparse.uses_netloc.append(scheme)
88
89
468e2e92
FV
90# This is not clearly defined otherwise
91compiled_regex_type = type(re.compile(''))
92
f7a147e3
S
93
94def random_user_agent():
95 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
96 _CHROME_VERSIONS = (
97 '74.0.3729.129',
98 '76.0.3780.3',
99 '76.0.3780.2',
100 '74.0.3729.128',
101 '76.0.3780.1',
102 '76.0.3780.0',
103 '75.0.3770.15',
104 '74.0.3729.127',
105 '74.0.3729.126',
106 '76.0.3779.1',
107 '76.0.3779.0',
108 '75.0.3770.14',
109 '74.0.3729.125',
110 '76.0.3778.1',
111 '76.0.3778.0',
112 '75.0.3770.13',
113 '74.0.3729.124',
114 '74.0.3729.123',
115 '73.0.3683.121',
116 '76.0.3777.1',
117 '76.0.3777.0',
118 '75.0.3770.12',
119 '74.0.3729.122',
120 '76.0.3776.4',
121 '75.0.3770.11',
122 '74.0.3729.121',
123 '76.0.3776.3',
124 '76.0.3776.2',
125 '73.0.3683.120',
126 '74.0.3729.120',
127 '74.0.3729.119',
128 '74.0.3729.118',
129 '76.0.3776.1',
130 '76.0.3776.0',
131 '76.0.3775.5',
132 '75.0.3770.10',
133 '74.0.3729.117',
134 '76.0.3775.4',
135 '76.0.3775.3',
136 '74.0.3729.116',
137 '75.0.3770.9',
138 '76.0.3775.2',
139 '76.0.3775.1',
140 '76.0.3775.0',
141 '75.0.3770.8',
142 '74.0.3729.115',
143 '74.0.3729.114',
144 '76.0.3774.1',
145 '76.0.3774.0',
146 '75.0.3770.7',
147 '74.0.3729.113',
148 '74.0.3729.112',
149 '74.0.3729.111',
150 '76.0.3773.1',
151 '76.0.3773.0',
152 '75.0.3770.6',
153 '74.0.3729.110',
154 '74.0.3729.109',
155 '76.0.3772.1',
156 '76.0.3772.0',
157 '75.0.3770.5',
158 '74.0.3729.108',
159 '74.0.3729.107',
160 '76.0.3771.1',
161 '76.0.3771.0',
162 '75.0.3770.4',
163 '74.0.3729.106',
164 '74.0.3729.105',
165 '75.0.3770.3',
166 '74.0.3729.104',
167 '74.0.3729.103',
168 '74.0.3729.102',
169 '75.0.3770.2',
170 '74.0.3729.101',
171 '75.0.3770.1',
172 '75.0.3770.0',
173 '74.0.3729.100',
174 '75.0.3769.5',
175 '75.0.3769.4',
176 '74.0.3729.99',
177 '75.0.3769.3',
178 '75.0.3769.2',
179 '75.0.3768.6',
180 '74.0.3729.98',
181 '75.0.3769.1',
182 '75.0.3769.0',
183 '74.0.3729.97',
184 '73.0.3683.119',
185 '73.0.3683.118',
186 '74.0.3729.96',
187 '75.0.3768.5',
188 '75.0.3768.4',
189 '75.0.3768.3',
190 '75.0.3768.2',
191 '74.0.3729.95',
192 '74.0.3729.94',
193 '75.0.3768.1',
194 '75.0.3768.0',
195 '74.0.3729.93',
196 '74.0.3729.92',
197 '73.0.3683.117',
198 '74.0.3729.91',
199 '75.0.3766.3',
200 '74.0.3729.90',
201 '75.0.3767.2',
202 '75.0.3767.1',
203 '75.0.3767.0',
204 '74.0.3729.89',
205 '73.0.3683.116',
206 '75.0.3766.2',
207 '74.0.3729.88',
208 '75.0.3766.1',
209 '75.0.3766.0',
210 '74.0.3729.87',
211 '73.0.3683.115',
212 '74.0.3729.86',
213 '75.0.3765.1',
214 '75.0.3765.0',
215 '74.0.3729.85',
216 '73.0.3683.114',
217 '74.0.3729.84',
218 '75.0.3764.1',
219 '75.0.3764.0',
220 '74.0.3729.83',
221 '73.0.3683.113',
222 '75.0.3763.2',
223 '75.0.3761.4',
224 '74.0.3729.82',
225 '75.0.3763.1',
226 '75.0.3763.0',
227 '74.0.3729.81',
228 '73.0.3683.112',
229 '75.0.3762.1',
230 '75.0.3762.0',
231 '74.0.3729.80',
232 '75.0.3761.3',
233 '74.0.3729.79',
234 '73.0.3683.111',
235 '75.0.3761.2',
236 '74.0.3729.78',
237 '74.0.3729.77',
238 '75.0.3761.1',
239 '75.0.3761.0',
240 '73.0.3683.110',
241 '74.0.3729.76',
242 '74.0.3729.75',
243 '75.0.3760.0',
244 '74.0.3729.74',
245 '75.0.3759.8',
246 '75.0.3759.7',
247 '75.0.3759.6',
248 '74.0.3729.73',
249 '75.0.3759.5',
250 '74.0.3729.72',
251 '73.0.3683.109',
252 '75.0.3759.4',
253 '75.0.3759.3',
254 '74.0.3729.71',
255 '75.0.3759.2',
256 '74.0.3729.70',
257 '73.0.3683.108',
258 '74.0.3729.69',
259 '75.0.3759.1',
260 '75.0.3759.0',
261 '74.0.3729.68',
262 '73.0.3683.107',
263 '74.0.3729.67',
264 '75.0.3758.1',
265 '75.0.3758.0',
266 '74.0.3729.66',
267 '73.0.3683.106',
268 '74.0.3729.65',
269 '75.0.3757.1',
270 '75.0.3757.0',
271 '74.0.3729.64',
272 '73.0.3683.105',
273 '74.0.3729.63',
274 '75.0.3756.1',
275 '75.0.3756.0',
276 '74.0.3729.62',
277 '73.0.3683.104',
278 '75.0.3755.3',
279 '75.0.3755.2',
280 '73.0.3683.103',
281 '75.0.3755.1',
282 '75.0.3755.0',
283 '74.0.3729.61',
284 '73.0.3683.102',
285 '74.0.3729.60',
286 '75.0.3754.2',
287 '74.0.3729.59',
288 '75.0.3753.4',
289 '74.0.3729.58',
290 '75.0.3754.1',
291 '75.0.3754.0',
292 '74.0.3729.57',
293 '73.0.3683.101',
294 '75.0.3753.3',
295 '75.0.3752.2',
296 '75.0.3753.2',
297 '74.0.3729.56',
298 '75.0.3753.1',
299 '75.0.3753.0',
300 '74.0.3729.55',
301 '73.0.3683.100',
302 '74.0.3729.54',
303 '75.0.3752.1',
304 '75.0.3752.0',
305 '74.0.3729.53',
306 '73.0.3683.99',
307 '74.0.3729.52',
308 '75.0.3751.1',
309 '75.0.3751.0',
310 '74.0.3729.51',
311 '73.0.3683.98',
312 '74.0.3729.50',
313 '75.0.3750.0',
314 '74.0.3729.49',
315 '74.0.3729.48',
316 '74.0.3729.47',
317 '75.0.3749.3',
318 '74.0.3729.46',
319 '73.0.3683.97',
320 '75.0.3749.2',
321 '74.0.3729.45',
322 '75.0.3749.1',
323 '75.0.3749.0',
324 '74.0.3729.44',
325 '73.0.3683.96',
326 '74.0.3729.43',
327 '74.0.3729.42',
328 '75.0.3748.1',
329 '75.0.3748.0',
330 '74.0.3729.41',
331 '75.0.3747.1',
332 '73.0.3683.95',
333 '75.0.3746.4',
334 '74.0.3729.40',
335 '74.0.3729.39',
336 '75.0.3747.0',
337 '75.0.3746.3',
338 '75.0.3746.2',
339 '74.0.3729.38',
340 '75.0.3746.1',
341 '75.0.3746.0',
342 '74.0.3729.37',
343 '73.0.3683.94',
344 '75.0.3745.5',
345 '75.0.3745.4',
346 '75.0.3745.3',
347 '75.0.3745.2',
348 '74.0.3729.36',
349 '75.0.3745.1',
350 '75.0.3745.0',
351 '75.0.3744.2',
352 '74.0.3729.35',
353 '73.0.3683.93',
354 '74.0.3729.34',
355 '75.0.3744.1',
356 '75.0.3744.0',
357 '74.0.3729.33',
358 '73.0.3683.92',
359 '74.0.3729.32',
360 '74.0.3729.31',
361 '73.0.3683.91',
362 '75.0.3741.2',
363 '75.0.3740.5',
364 '74.0.3729.30',
365 '75.0.3741.1',
366 '75.0.3741.0',
367 '74.0.3729.29',
368 '75.0.3740.4',
369 '73.0.3683.90',
370 '74.0.3729.28',
371 '75.0.3740.3',
372 '73.0.3683.89',
373 '75.0.3740.2',
374 '74.0.3729.27',
375 '75.0.3740.1',
376 '75.0.3740.0',
377 '74.0.3729.26',
378 '73.0.3683.88',
379 '73.0.3683.87',
380 '74.0.3729.25',
381 '75.0.3739.1',
382 '75.0.3739.0',
383 '73.0.3683.86',
384 '74.0.3729.24',
385 '73.0.3683.85',
386 '75.0.3738.4',
387 '75.0.3738.3',
388 '75.0.3738.2',
389 '75.0.3738.1',
390 '75.0.3738.0',
391 '74.0.3729.23',
392 '73.0.3683.84',
393 '74.0.3729.22',
394 '74.0.3729.21',
395 '75.0.3737.1',
396 '75.0.3737.0',
397 '74.0.3729.20',
398 '73.0.3683.83',
399 '74.0.3729.19',
400 '75.0.3736.1',
401 '75.0.3736.0',
402 '74.0.3729.18',
403 '73.0.3683.82',
404 '74.0.3729.17',
405 '75.0.3735.1',
406 '75.0.3735.0',
407 '74.0.3729.16',
408 '73.0.3683.81',
409 '75.0.3734.1',
410 '75.0.3734.0',
411 '74.0.3729.15',
412 '73.0.3683.80',
413 '74.0.3729.14',
414 '75.0.3733.1',
415 '75.0.3733.0',
416 '75.0.3732.1',
417 '74.0.3729.13',
418 '74.0.3729.12',
419 '73.0.3683.79',
420 '74.0.3729.11',
421 '75.0.3732.0',
422 '74.0.3729.10',
423 '73.0.3683.78',
424 '74.0.3729.9',
425 '74.0.3729.8',
426 '74.0.3729.7',
427 '75.0.3731.3',
428 '75.0.3731.2',
429 '75.0.3731.0',
430 '74.0.3729.6',
431 '73.0.3683.77',
432 '73.0.3683.76',
433 '75.0.3730.5',
434 '75.0.3730.4',
435 '73.0.3683.75',
436 '74.0.3729.5',
437 '73.0.3683.74',
438 '75.0.3730.3',
439 '75.0.3730.2',
440 '74.0.3729.4',
441 '73.0.3683.73',
442 '73.0.3683.72',
443 '75.0.3730.1',
444 '75.0.3730.0',
445 '74.0.3729.3',
446 '73.0.3683.71',
447 '74.0.3729.2',
448 '73.0.3683.70',
449 '74.0.3729.1',
450 '74.0.3729.0',
451 '74.0.3726.4',
452 '73.0.3683.69',
453 '74.0.3726.3',
454 '74.0.3728.0',
455 '74.0.3726.2',
456 '73.0.3683.68',
457 '74.0.3726.1',
458 '74.0.3726.0',
459 '74.0.3725.4',
460 '73.0.3683.67',
461 '73.0.3683.66',
462 '74.0.3725.3',
463 '74.0.3725.2',
464 '74.0.3725.1',
465 '74.0.3724.8',
466 '74.0.3725.0',
467 '73.0.3683.65',
468 '74.0.3724.7',
469 '74.0.3724.6',
470 '74.0.3724.5',
471 '74.0.3724.4',
472 '74.0.3724.3',
473 '74.0.3724.2',
474 '74.0.3724.1',
475 '74.0.3724.0',
476 '73.0.3683.64',
477 '74.0.3723.1',
478 '74.0.3723.0',
479 '73.0.3683.63',
480 '74.0.3722.1',
481 '74.0.3722.0',
482 '73.0.3683.62',
483 '74.0.3718.9',
484 '74.0.3702.3',
485 '74.0.3721.3',
486 '74.0.3721.2',
487 '74.0.3721.1',
488 '74.0.3721.0',
489 '74.0.3720.6',
490 '73.0.3683.61',
491 '72.0.3626.122',
492 '73.0.3683.60',
493 '74.0.3720.5',
494 '72.0.3626.121',
495 '74.0.3718.8',
496 '74.0.3720.4',
497 '74.0.3720.3',
498 '74.0.3718.7',
499 '74.0.3720.2',
500 '74.0.3720.1',
501 '74.0.3720.0',
502 '74.0.3718.6',
503 '74.0.3719.5',
504 '73.0.3683.59',
505 '74.0.3718.5',
506 '74.0.3718.4',
507 '74.0.3719.4',
508 '74.0.3719.3',
509 '74.0.3719.2',
510 '74.0.3719.1',
511 '73.0.3683.58',
512 '74.0.3719.0',
513 '73.0.3683.57',
514 '73.0.3683.56',
515 '74.0.3718.3',
516 '73.0.3683.55',
517 '74.0.3718.2',
518 '74.0.3718.1',
519 '74.0.3718.0',
520 '73.0.3683.54',
521 '74.0.3717.2',
522 '73.0.3683.53',
523 '74.0.3717.1',
524 '74.0.3717.0',
525 '73.0.3683.52',
526 '74.0.3716.1',
527 '74.0.3716.0',
528 '73.0.3683.51',
529 '74.0.3715.1',
530 '74.0.3715.0',
531 '73.0.3683.50',
532 '74.0.3711.2',
533 '74.0.3714.2',
534 '74.0.3713.3',
535 '74.0.3714.1',
536 '74.0.3714.0',
537 '73.0.3683.49',
538 '74.0.3713.1',
539 '74.0.3713.0',
540 '72.0.3626.120',
541 '73.0.3683.48',
542 '74.0.3712.2',
543 '74.0.3712.1',
544 '74.0.3712.0',
545 '73.0.3683.47',
546 '72.0.3626.119',
547 '73.0.3683.46',
548 '74.0.3710.2',
549 '72.0.3626.118',
550 '74.0.3711.1',
551 '74.0.3711.0',
552 '73.0.3683.45',
553 '72.0.3626.117',
554 '74.0.3710.1',
555 '74.0.3710.0',
556 '73.0.3683.44',
557 '72.0.3626.116',
558 '74.0.3709.1',
559 '74.0.3709.0',
560 '74.0.3704.9',
561 '73.0.3683.43',
562 '72.0.3626.115',
563 '74.0.3704.8',
564 '74.0.3704.7',
565 '74.0.3708.0',
566 '74.0.3706.7',
567 '74.0.3704.6',
568 '73.0.3683.42',
569 '72.0.3626.114',
570 '74.0.3706.6',
571 '72.0.3626.113',
572 '74.0.3704.5',
573 '74.0.3706.5',
574 '74.0.3706.4',
575 '74.0.3706.3',
576 '74.0.3706.2',
577 '74.0.3706.1',
578 '74.0.3706.0',
579 '73.0.3683.41',
580 '72.0.3626.112',
581 '74.0.3705.1',
582 '74.0.3705.0',
583 '73.0.3683.40',
584 '72.0.3626.111',
585 '73.0.3683.39',
586 '74.0.3704.4',
587 '73.0.3683.38',
588 '74.0.3704.3',
589 '74.0.3704.2',
590 '74.0.3704.1',
591 '74.0.3704.0',
592 '73.0.3683.37',
593 '72.0.3626.110',
594 '72.0.3626.109',
595 '74.0.3703.3',
596 '74.0.3703.2',
597 '73.0.3683.36',
598 '74.0.3703.1',
599 '74.0.3703.0',
600 '73.0.3683.35',
601 '72.0.3626.108',
602 '74.0.3702.2',
603 '74.0.3699.3',
604 '74.0.3702.1',
605 '74.0.3702.0',
606 '73.0.3683.34',
607 '72.0.3626.107',
608 '73.0.3683.33',
609 '74.0.3701.1',
610 '74.0.3701.0',
611 '73.0.3683.32',
612 '73.0.3683.31',
613 '72.0.3626.105',
614 '74.0.3700.1',
615 '74.0.3700.0',
616 '73.0.3683.29',
617 '72.0.3626.103',
618 '74.0.3699.2',
619 '74.0.3699.1',
620 '74.0.3699.0',
621 '73.0.3683.28',
622 '72.0.3626.102',
623 '73.0.3683.27',
624 '73.0.3683.26',
625 '74.0.3698.0',
626 '74.0.3696.2',
627 '72.0.3626.101',
628 '73.0.3683.25',
629 '74.0.3696.1',
630 '74.0.3696.0',
631 '74.0.3694.8',
632 '72.0.3626.100',
633 '74.0.3694.7',
634 '74.0.3694.6',
635 '74.0.3694.5',
636 '74.0.3694.4',
637 '72.0.3626.99',
638 '72.0.3626.98',
639 '74.0.3694.3',
640 '73.0.3683.24',
641 '72.0.3626.97',
642 '72.0.3626.96',
643 '72.0.3626.95',
644 '73.0.3683.23',
645 '72.0.3626.94',
646 '73.0.3683.22',
647 '73.0.3683.21',
648 '72.0.3626.93',
649 '74.0.3694.2',
650 '72.0.3626.92',
651 '74.0.3694.1',
652 '74.0.3694.0',
653 '74.0.3693.6',
654 '73.0.3683.20',
655 '72.0.3626.91',
656 '74.0.3693.5',
657 '74.0.3693.4',
658 '74.0.3693.3',
659 '74.0.3693.2',
660 '73.0.3683.19',
661 '74.0.3693.1',
662 '74.0.3693.0',
663 '73.0.3683.18',
664 '72.0.3626.90',
665 '74.0.3692.1',
666 '74.0.3692.0',
667 '73.0.3683.17',
668 '72.0.3626.89',
669 '74.0.3687.3',
670 '74.0.3691.1',
671 '74.0.3691.0',
672 '73.0.3683.16',
673 '72.0.3626.88',
674 '72.0.3626.87',
675 '73.0.3683.15',
676 '74.0.3690.1',
677 '74.0.3690.0',
678 '73.0.3683.14',
679 '72.0.3626.86',
680 '73.0.3683.13',
681 '73.0.3683.12',
682 '74.0.3689.1',
683 '74.0.3689.0',
684 '73.0.3683.11',
685 '72.0.3626.85',
686 '73.0.3683.10',
687 '72.0.3626.84',
688 '73.0.3683.9',
689 '74.0.3688.1',
690 '74.0.3688.0',
691 '73.0.3683.8',
692 '72.0.3626.83',
693 '74.0.3687.2',
694 '74.0.3687.1',
695 '74.0.3687.0',
696 '73.0.3683.7',
697 '72.0.3626.82',
698 '74.0.3686.4',
699 '72.0.3626.81',
700 '74.0.3686.3',
701 '74.0.3686.2',
702 '74.0.3686.1',
703 '74.0.3686.0',
704 '73.0.3683.6',
705 '72.0.3626.80',
706 '74.0.3685.1',
707 '74.0.3685.0',
708 '73.0.3683.5',
709 '72.0.3626.79',
710 '74.0.3684.1',
711 '74.0.3684.0',
712 '73.0.3683.4',
713 '72.0.3626.78',
714 '72.0.3626.77',
715 '73.0.3683.3',
716 '73.0.3683.2',
717 '72.0.3626.76',
718 '73.0.3683.1',
719 '73.0.3683.0',
720 '72.0.3626.75',
721 '71.0.3578.141',
722 '73.0.3682.1',
723 '73.0.3682.0',
724 '72.0.3626.74',
725 '71.0.3578.140',
726 '73.0.3681.4',
727 '73.0.3681.3',
728 '73.0.3681.2',
729 '73.0.3681.1',
730 '73.0.3681.0',
731 '72.0.3626.73',
732 '71.0.3578.139',
733 '72.0.3626.72',
734 '72.0.3626.71',
735 '73.0.3680.1',
736 '73.0.3680.0',
737 '72.0.3626.70',
738 '71.0.3578.138',
739 '73.0.3678.2',
740 '73.0.3679.1',
741 '73.0.3679.0',
742 '72.0.3626.69',
743 '71.0.3578.137',
744 '73.0.3678.1',
745 '73.0.3678.0',
746 '71.0.3578.136',
747 '73.0.3677.1',
748 '73.0.3677.0',
749 '72.0.3626.68',
750 '72.0.3626.67',
751 '71.0.3578.135',
752 '73.0.3676.1',
753 '73.0.3676.0',
754 '73.0.3674.2',
755 '72.0.3626.66',
756 '71.0.3578.134',
757 '73.0.3674.1',
758 '73.0.3674.0',
759 '72.0.3626.65',
760 '71.0.3578.133',
761 '73.0.3673.2',
762 '73.0.3673.1',
763 '73.0.3673.0',
764 '72.0.3626.64',
765 '71.0.3578.132',
766 '72.0.3626.63',
767 '72.0.3626.62',
768 '72.0.3626.61',
769 '72.0.3626.60',
770 '73.0.3672.1',
771 '73.0.3672.0',
772 '72.0.3626.59',
773 '71.0.3578.131',
774 '73.0.3671.3',
775 '73.0.3671.2',
776 '73.0.3671.1',
777 '73.0.3671.0',
778 '72.0.3626.58',
779 '71.0.3578.130',
780 '73.0.3670.1',
781 '73.0.3670.0',
782 '72.0.3626.57',
783 '71.0.3578.129',
784 '73.0.3669.1',
785 '73.0.3669.0',
786 '72.0.3626.56',
787 '71.0.3578.128',
788 '73.0.3668.2',
789 '73.0.3668.1',
790 '73.0.3668.0',
791 '72.0.3626.55',
792 '71.0.3578.127',
793 '73.0.3667.2',
794 '73.0.3667.1',
795 '73.0.3667.0',
796 '72.0.3626.54',
797 '71.0.3578.126',
798 '73.0.3666.1',
799 '73.0.3666.0',
800 '72.0.3626.53',
801 '71.0.3578.125',
802 '73.0.3665.4',
803 '73.0.3665.3',
804 '72.0.3626.52',
805 '73.0.3665.2',
806 '73.0.3664.4',
807 '73.0.3665.1',
808 '73.0.3665.0',
809 '72.0.3626.51',
810 '71.0.3578.124',
811 '72.0.3626.50',
812 '73.0.3664.3',
813 '73.0.3664.2',
814 '73.0.3664.1',
815 '73.0.3664.0',
816 '73.0.3663.2',
817 '72.0.3626.49',
818 '71.0.3578.123',
819 '73.0.3663.1',
820 '73.0.3663.0',
821 '72.0.3626.48',
822 '71.0.3578.122',
823 '73.0.3662.1',
824 '73.0.3662.0',
825 '72.0.3626.47',
826 '71.0.3578.121',
827 '73.0.3661.1',
828 '72.0.3626.46',
829 '73.0.3661.0',
830 '72.0.3626.45',
831 '71.0.3578.120',
832 '73.0.3660.2',
833 '73.0.3660.1',
834 '73.0.3660.0',
835 '72.0.3626.44',
836 '71.0.3578.119',
837 '73.0.3659.1',
838 '73.0.3659.0',
839 '72.0.3626.43',
840 '71.0.3578.118',
841 '73.0.3658.1',
842 '73.0.3658.0',
843 '72.0.3626.42',
844 '71.0.3578.117',
845 '73.0.3657.1',
846 '73.0.3657.0',
847 '72.0.3626.41',
848 '71.0.3578.116',
849 '73.0.3656.1',
850 '73.0.3656.0',
851 '72.0.3626.40',
852 '71.0.3578.115',
853 '73.0.3655.1',
854 '73.0.3655.0',
855 '72.0.3626.39',
856 '71.0.3578.114',
857 '73.0.3654.1',
858 '73.0.3654.0',
859 '72.0.3626.38',
860 '71.0.3578.113',
861 '73.0.3653.1',
862 '73.0.3653.0',
863 '72.0.3626.37',
864 '71.0.3578.112',
865 '73.0.3652.1',
866 '73.0.3652.0',
867 '72.0.3626.36',
868 '71.0.3578.111',
869 '73.0.3651.1',
870 '73.0.3651.0',
871 '72.0.3626.35',
872 '71.0.3578.110',
873 '73.0.3650.1',
874 '73.0.3650.0',
875 '72.0.3626.34',
876 '71.0.3578.109',
877 '73.0.3649.1',
878 '73.0.3649.0',
879 '72.0.3626.33',
880 '71.0.3578.108',
881 '73.0.3648.2',
882 '73.0.3648.1',
883 '73.0.3648.0',
884 '72.0.3626.32',
885 '71.0.3578.107',
886 '73.0.3647.2',
887 '73.0.3647.1',
888 '73.0.3647.0',
889 '72.0.3626.31',
890 '71.0.3578.106',
891 '73.0.3635.3',
892 '73.0.3646.2',
893 '73.0.3646.1',
894 '73.0.3646.0',
895 '72.0.3626.30',
896 '71.0.3578.105',
897 '72.0.3626.29',
898 '73.0.3645.2',
899 '73.0.3645.1',
900 '73.0.3645.0',
901 '72.0.3626.28',
902 '71.0.3578.104',
903 '72.0.3626.27',
904 '72.0.3626.26',
905 '72.0.3626.25',
906 '72.0.3626.24',
907 '73.0.3644.0',
908 '73.0.3643.2',
909 '72.0.3626.23',
910 '71.0.3578.103',
911 '73.0.3643.1',
912 '73.0.3643.0',
913 '72.0.3626.22',
914 '71.0.3578.102',
915 '73.0.3642.1',
916 '73.0.3642.0',
917 '72.0.3626.21',
918 '71.0.3578.101',
919 '73.0.3641.1',
920 '73.0.3641.0',
921 '72.0.3626.20',
922 '71.0.3578.100',
923 '72.0.3626.19',
924 '73.0.3640.1',
925 '73.0.3640.0',
926 '72.0.3626.18',
927 '73.0.3639.1',
928 '71.0.3578.99',
929 '73.0.3639.0',
930 '72.0.3626.17',
931 '73.0.3638.2',
932 '72.0.3626.16',
933 '73.0.3638.1',
934 '73.0.3638.0',
935 '72.0.3626.15',
936 '71.0.3578.98',
937 '73.0.3635.2',
938 '71.0.3578.97',
939 '73.0.3637.1',
940 '73.0.3637.0',
941 '72.0.3626.14',
942 '71.0.3578.96',
943 '71.0.3578.95',
944 '72.0.3626.13',
945 '71.0.3578.94',
946 '73.0.3636.2',
947 '71.0.3578.93',
948 '73.0.3636.1',
949 '73.0.3636.0',
950 '72.0.3626.12',
951 '71.0.3578.92',
952 '73.0.3635.1',
953 '73.0.3635.0',
954 '72.0.3626.11',
955 '71.0.3578.91',
956 '73.0.3634.2',
957 '73.0.3634.1',
958 '73.0.3634.0',
959 '72.0.3626.10',
960 '71.0.3578.90',
961 '71.0.3578.89',
962 '73.0.3633.2',
963 '73.0.3633.1',
964 '73.0.3633.0',
965 '72.0.3610.4',
966 '72.0.3626.9',
967 '71.0.3578.88',
968 '73.0.3632.5',
969 '73.0.3632.4',
970 '73.0.3632.3',
971 '73.0.3632.2',
972 '73.0.3632.1',
973 '73.0.3632.0',
974 '72.0.3626.8',
975 '71.0.3578.87',
976 '73.0.3631.2',
977 '73.0.3631.1',
978 '73.0.3631.0',
979 '72.0.3626.7',
980 '71.0.3578.86',
981 '72.0.3626.6',
982 '73.0.3630.1',
983 '73.0.3630.0',
984 '72.0.3626.5',
985 '71.0.3578.85',
986 '72.0.3626.4',
987 '73.0.3628.3',
988 '73.0.3628.2',
989 '73.0.3629.1',
990 '73.0.3629.0',
991 '72.0.3626.3',
992 '71.0.3578.84',
993 '73.0.3628.1',
994 '73.0.3628.0',
995 '71.0.3578.83',
996 '73.0.3627.1',
997 '73.0.3627.0',
998 '72.0.3626.2',
999 '71.0.3578.82',
1000 '71.0.3578.81',
1001 '71.0.3578.80',
1002 '72.0.3626.1',
1003 '72.0.3626.0',
1004 '71.0.3578.79',
1005 '70.0.3538.124',
1006 '71.0.3578.78',
1007 '72.0.3623.4',
1008 '72.0.3625.2',
1009 '72.0.3625.1',
1010 '72.0.3625.0',
1011 '71.0.3578.77',
1012 '70.0.3538.123',
1013 '72.0.3624.4',
1014 '72.0.3624.3',
1015 '72.0.3624.2',
1016 '71.0.3578.76',
1017 '72.0.3624.1',
1018 '72.0.3624.0',
1019 '72.0.3623.3',
1020 '71.0.3578.75',
1021 '70.0.3538.122',
1022 '71.0.3578.74',
1023 '72.0.3623.2',
1024 '72.0.3610.3',
1025 '72.0.3623.1',
1026 '72.0.3623.0',
1027 '72.0.3622.3',
1028 '72.0.3622.2',
1029 '71.0.3578.73',
1030 '70.0.3538.121',
1031 '72.0.3622.1',
1032 '72.0.3622.0',
1033 '71.0.3578.72',
1034 '70.0.3538.120',
1035 '72.0.3621.1',
1036 '72.0.3621.0',
1037 '71.0.3578.71',
1038 '70.0.3538.119',
1039 '72.0.3620.1',
1040 '72.0.3620.0',
1041 '71.0.3578.70',
1042 '70.0.3538.118',
1043 '71.0.3578.69',
1044 '72.0.3619.1',
1045 '72.0.3619.0',
1046 '71.0.3578.68',
1047 '70.0.3538.117',
1048 '71.0.3578.67',
1049 '72.0.3618.1',
1050 '72.0.3618.0',
1051 '71.0.3578.66',
1052 '70.0.3538.116',
1053 '72.0.3617.1',
1054 '72.0.3617.0',
1055 '71.0.3578.65',
1056 '70.0.3538.115',
1057 '72.0.3602.3',
1058 '71.0.3578.64',
1059 '72.0.3616.1',
1060 '72.0.3616.0',
1061 '71.0.3578.63',
1062 '70.0.3538.114',
1063 '71.0.3578.62',
1064 '72.0.3615.1',
1065 '72.0.3615.0',
1066 '71.0.3578.61',
1067 '70.0.3538.113',
1068 '72.0.3614.1',
1069 '72.0.3614.0',
1070 '71.0.3578.60',
1071 '70.0.3538.112',
1072 '72.0.3613.1',
1073 '72.0.3613.0',
1074 '71.0.3578.59',
1075 '70.0.3538.111',
1076 '72.0.3612.2',
1077 '72.0.3612.1',
1078 '72.0.3612.0',
1079 '70.0.3538.110',
1080 '71.0.3578.58',
1081 '70.0.3538.109',
1082 '72.0.3611.2',
1083 '72.0.3611.1',
1084 '72.0.3611.0',
1085 '71.0.3578.57',
1086 '70.0.3538.108',
1087 '72.0.3610.2',
1088 '71.0.3578.56',
1089 '71.0.3578.55',
1090 '72.0.3610.1',
1091 '72.0.3610.0',
1092 '71.0.3578.54',
1093 '70.0.3538.107',
1094 '71.0.3578.53',
1095 '72.0.3609.3',
1096 '71.0.3578.52',
1097 '72.0.3609.2',
1098 '71.0.3578.51',
1099 '72.0.3608.5',
1100 '72.0.3609.1',
1101 '72.0.3609.0',
1102 '71.0.3578.50',
1103 '70.0.3538.106',
1104 '72.0.3608.4',
1105 '72.0.3608.3',
1106 '72.0.3608.2',
1107 '71.0.3578.49',
1108 '72.0.3608.1',
1109 '72.0.3608.0',
1110 '70.0.3538.105',
1111 '71.0.3578.48',
1112 '72.0.3607.1',
1113 '72.0.3607.0',
1114 '71.0.3578.47',
1115 '70.0.3538.104',
1116 '72.0.3606.2',
1117 '72.0.3606.1',
1118 '72.0.3606.0',
1119 '71.0.3578.46',
1120 '70.0.3538.103',
1121 '70.0.3538.102',
1122 '72.0.3605.3',
1123 '72.0.3605.2',
1124 '72.0.3605.1',
1125 '72.0.3605.0',
1126 '71.0.3578.45',
1127 '70.0.3538.101',
1128 '71.0.3578.44',
1129 '71.0.3578.43',
1130 '70.0.3538.100',
1131 '70.0.3538.99',
1132 '71.0.3578.42',
1133 '72.0.3604.1',
1134 '72.0.3604.0',
1135 '71.0.3578.41',
1136 '70.0.3538.98',
1137 '71.0.3578.40',
1138 '72.0.3603.2',
1139 '72.0.3603.1',
1140 '72.0.3603.0',
1141 '71.0.3578.39',
1142 '70.0.3538.97',
1143 '72.0.3602.2',
1144 '71.0.3578.38',
1145 '71.0.3578.37',
1146 '72.0.3602.1',
1147 '72.0.3602.0',
1148 '71.0.3578.36',
1149 '70.0.3538.96',
1150 '72.0.3601.1',
1151 '72.0.3601.0',
1152 '71.0.3578.35',
1153 '70.0.3538.95',
1154 '72.0.3600.1',
1155 '72.0.3600.0',
1156 '71.0.3578.34',
1157 '70.0.3538.94',
1158 '72.0.3599.3',
1159 '72.0.3599.2',
1160 '72.0.3599.1',
1161 '72.0.3599.0',
1162 '71.0.3578.33',
1163 '70.0.3538.93',
1164 '72.0.3598.1',
1165 '72.0.3598.0',
1166 '71.0.3578.32',
1167 '70.0.3538.87',
1168 '72.0.3597.1',
1169 '72.0.3597.0',
1170 '72.0.3596.2',
1171 '71.0.3578.31',
1172 '70.0.3538.86',
1173 '71.0.3578.30',
1174 '71.0.3578.29',
1175 '72.0.3596.1',
1176 '72.0.3596.0',
1177 '71.0.3578.28',
1178 '70.0.3538.85',
1179 '72.0.3595.2',
1180 '72.0.3591.3',
1181 '72.0.3595.1',
1182 '72.0.3595.0',
1183 '71.0.3578.27',
1184 '70.0.3538.84',
1185 '72.0.3594.1',
1186 '72.0.3594.0',
1187 '71.0.3578.26',
1188 '70.0.3538.83',
1189 '72.0.3593.2',
1190 '72.0.3593.1',
1191 '72.0.3593.0',
1192 '71.0.3578.25',
1193 '70.0.3538.82',
1194 '72.0.3589.3',
1195 '72.0.3592.2',
1196 '72.0.3592.1',
1197 '72.0.3592.0',
1198 '71.0.3578.24',
1199 '72.0.3589.2',
1200 '70.0.3538.81',
1201 '70.0.3538.80',
1202 '72.0.3591.2',
1203 '72.0.3591.1',
1204 '72.0.3591.0',
1205 '71.0.3578.23',
1206 '70.0.3538.79',
1207 '71.0.3578.22',
1208 '72.0.3590.1',
1209 '72.0.3590.0',
1210 '71.0.3578.21',
1211 '70.0.3538.78',
1212 '70.0.3538.77',
1213 '72.0.3589.1',
1214 '72.0.3589.0',
1215 '71.0.3578.20',
1216 '70.0.3538.76',
1217 '71.0.3578.19',
1218 '70.0.3538.75',
1219 '72.0.3588.1',
1220 '72.0.3588.0',
1221 '71.0.3578.18',
1222 '70.0.3538.74',
1223 '72.0.3586.2',
1224 '72.0.3587.0',
1225 '71.0.3578.17',
1226 '70.0.3538.73',
1227 '72.0.3586.1',
1228 '72.0.3586.0',
1229 '71.0.3578.16',
1230 '70.0.3538.72',
1231 '72.0.3585.1',
1232 '72.0.3585.0',
1233 '71.0.3578.15',
1234 '70.0.3538.71',
1235 '71.0.3578.14',
1236 '72.0.3584.1',
1237 '72.0.3584.0',
1238 '71.0.3578.13',
1239 '70.0.3538.70',
1240 '72.0.3583.2',
1241 '71.0.3578.12',
1242 '72.0.3583.1',
1243 '72.0.3583.0',
1244 '71.0.3578.11',
1245 '70.0.3538.69',
1246 '71.0.3578.10',
1247 '72.0.3582.0',
1248 '72.0.3581.4',
1249 '71.0.3578.9',
1250 '70.0.3538.67',
1251 '72.0.3581.3',
1252 '72.0.3581.2',
1253 '72.0.3581.1',
1254 '72.0.3581.0',
1255 '71.0.3578.8',
1256 '70.0.3538.66',
1257 '72.0.3580.1',
1258 '72.0.3580.0',
1259 '71.0.3578.7',
1260 '70.0.3538.65',
1261 '71.0.3578.6',
1262 '72.0.3579.1',
1263 '72.0.3579.0',
1264 '71.0.3578.5',
1265 '70.0.3538.64',
1266 '71.0.3578.4',
1267 '71.0.3578.3',
1268 '71.0.3578.2',
1269 '71.0.3578.1',
1270 '71.0.3578.0',
1271 '70.0.3538.63',
1272 '69.0.3497.128',
1273 '70.0.3538.62',
1274 '70.0.3538.61',
1275 '70.0.3538.60',
1276 '70.0.3538.59',
1277 '71.0.3577.1',
1278 '71.0.3577.0',
1279 '70.0.3538.58',
1280 '69.0.3497.127',
1281 '71.0.3576.2',
1282 '71.0.3576.1',
1283 '71.0.3576.0',
1284 '70.0.3538.57',
1285 '70.0.3538.56',
1286 '71.0.3575.2',
1287 '70.0.3538.55',
1288 '69.0.3497.126',
1289 '70.0.3538.54',
1290 '71.0.3575.1',
1291 '71.0.3575.0',
1292 '71.0.3574.1',
1293 '71.0.3574.0',
1294 '70.0.3538.53',
1295 '69.0.3497.125',
1296 '70.0.3538.52',
1297 '71.0.3573.1',
1298 '71.0.3573.0',
1299 '70.0.3538.51',
1300 '69.0.3497.124',
1301 '71.0.3572.1',
1302 '71.0.3572.0',
1303 '70.0.3538.50',
1304 '69.0.3497.123',
1305 '71.0.3571.2',
1306 '70.0.3538.49',
1307 '69.0.3497.122',
1308 '71.0.3571.1',
1309 '71.0.3571.0',
1310 '70.0.3538.48',
1311 '69.0.3497.121',
1312 '71.0.3570.1',
1313 '71.0.3570.0',
1314 '70.0.3538.47',
1315 '69.0.3497.120',
1316 '71.0.3568.2',
1317 '71.0.3569.1',
1318 '71.0.3569.0',
1319 '70.0.3538.46',
1320 '69.0.3497.119',
1321 '70.0.3538.45',
1322 '71.0.3568.1',
1323 '71.0.3568.0',
1324 '70.0.3538.44',
1325 '69.0.3497.118',
1326 '70.0.3538.43',
1327 '70.0.3538.42',
1328 '71.0.3567.1',
1329 '71.0.3567.0',
1330 '70.0.3538.41',
1331 '69.0.3497.117',
1332 '71.0.3566.1',
1333 '71.0.3566.0',
1334 '70.0.3538.40',
1335 '69.0.3497.116',
1336 '71.0.3565.1',
1337 '71.0.3565.0',
1338 '70.0.3538.39',
1339 '69.0.3497.115',
1340 '71.0.3564.1',
1341 '71.0.3564.0',
1342 '70.0.3538.38',
1343 '69.0.3497.114',
1344 '71.0.3563.0',
1345 '71.0.3562.2',
1346 '70.0.3538.37',
1347 '69.0.3497.113',
1348 '70.0.3538.36',
1349 '70.0.3538.35',
1350 '71.0.3562.1',
1351 '71.0.3562.0',
1352 '70.0.3538.34',
1353 '69.0.3497.112',
1354 '70.0.3538.33',
1355 '71.0.3561.1',
1356 '71.0.3561.0',
1357 '70.0.3538.32',
1358 '69.0.3497.111',
1359 '71.0.3559.6',
1360 '71.0.3560.1',
1361 '71.0.3560.0',
1362 '71.0.3559.5',
1363 '71.0.3559.4',
1364 '70.0.3538.31',
1365 '69.0.3497.110',
1366 '71.0.3559.3',
1367 '70.0.3538.30',
1368 '69.0.3497.109',
1369 '71.0.3559.2',
1370 '71.0.3559.1',
1371 '71.0.3559.0',
1372 '70.0.3538.29',
1373 '69.0.3497.108',
1374 '71.0.3558.2',
1375 '71.0.3558.1',
1376 '71.0.3558.0',
1377 '70.0.3538.28',
1378 '69.0.3497.107',
1379 '71.0.3557.2',
1380 '71.0.3557.1',
1381 '71.0.3557.0',
1382 '70.0.3538.27',
1383 '69.0.3497.106',
1384 '71.0.3554.4',
1385 '70.0.3538.26',
1386 '71.0.3556.1',
1387 '71.0.3556.0',
1388 '70.0.3538.25',
1389 '71.0.3554.3',
1390 '69.0.3497.105',
1391 '71.0.3554.2',
1392 '70.0.3538.24',
1393 '69.0.3497.104',
1394 '71.0.3555.2',
1395 '70.0.3538.23',
1396 '71.0.3555.1',
1397 '71.0.3555.0',
1398 '70.0.3538.22',
1399 '69.0.3497.103',
1400 '71.0.3554.1',
1401 '71.0.3554.0',
1402 '70.0.3538.21',
1403 '69.0.3497.102',
1404 '71.0.3553.3',
1405 '70.0.3538.20',
1406 '69.0.3497.101',
1407 '71.0.3553.2',
1408 '69.0.3497.100',
1409 '71.0.3553.1',
1410 '71.0.3553.0',
1411 '70.0.3538.19',
1412 '69.0.3497.99',
1413 '69.0.3497.98',
1414 '69.0.3497.97',
1415 '71.0.3552.6',
1416 '71.0.3552.5',
1417 '71.0.3552.4',
1418 '71.0.3552.3',
1419 '71.0.3552.2',
1420 '71.0.3552.1',
1421 '71.0.3552.0',
1422 '70.0.3538.18',
1423 '69.0.3497.96',
1424 '71.0.3551.3',
1425 '71.0.3551.2',
1426 '71.0.3551.1',
1427 '71.0.3551.0',
1428 '70.0.3538.17',
1429 '69.0.3497.95',
1430 '71.0.3550.3',
1431 '71.0.3550.2',
1432 '71.0.3550.1',
1433 '71.0.3550.0',
1434 '70.0.3538.16',
1435 '69.0.3497.94',
1436 '71.0.3549.1',
1437 '71.0.3549.0',
1438 '70.0.3538.15',
1439 '69.0.3497.93',
1440 '69.0.3497.92',
1441 '71.0.3548.1',
1442 '71.0.3548.0',
1443 '70.0.3538.14',
1444 '69.0.3497.91',
1445 '71.0.3547.1',
1446 '71.0.3547.0',
1447 '70.0.3538.13',
1448 '69.0.3497.90',
1449 '71.0.3546.2',
1450 '69.0.3497.89',
1451 '71.0.3546.1',
1452 '71.0.3546.0',
1453 '70.0.3538.12',
1454 '69.0.3497.88',
1455 '71.0.3545.4',
1456 '71.0.3545.3',
1457 '71.0.3545.2',
1458 '71.0.3545.1',
1459 '71.0.3545.0',
1460 '70.0.3538.11',
1461 '69.0.3497.87',
1462 '71.0.3544.5',
1463 '71.0.3544.4',
1464 '71.0.3544.3',
1465 '71.0.3544.2',
1466 '71.0.3544.1',
1467 '71.0.3544.0',
1468 '69.0.3497.86',
1469 '70.0.3538.10',
1470 '69.0.3497.85',
1471 '70.0.3538.9',
1472 '69.0.3497.84',
1473 '71.0.3543.4',
1474 '70.0.3538.8',
1475 '71.0.3543.3',
1476 '71.0.3543.2',
1477 '71.0.3543.1',
1478 '71.0.3543.0',
1479 '70.0.3538.7',
1480 '69.0.3497.83',
1481 '71.0.3542.2',
1482 '71.0.3542.1',
1483 '71.0.3542.0',
1484 '70.0.3538.6',
1485 '69.0.3497.82',
1486 '69.0.3497.81',
1487 '71.0.3541.1',
1488 '71.0.3541.0',
1489 '70.0.3538.5',
1490 '69.0.3497.80',
1491 '71.0.3540.1',
1492 '71.0.3540.0',
1493 '70.0.3538.4',
1494 '69.0.3497.79',
1495 '70.0.3538.3',
1496 '71.0.3539.1',
1497 '71.0.3539.0',
1498 '69.0.3497.78',
1499 '68.0.3440.134',
1500 '69.0.3497.77',
1501 '70.0.3538.2',
1502 '70.0.3538.1',
1503 '70.0.3538.0',
1504 '69.0.3497.76',
1505 '68.0.3440.133',
1506 '69.0.3497.75',
1507 '70.0.3537.2',
1508 '70.0.3537.1',
1509 '70.0.3537.0',
1510 '69.0.3497.74',
1511 '68.0.3440.132',
1512 '70.0.3536.0',
1513 '70.0.3535.5',
1514 '70.0.3535.4',
1515 '70.0.3535.3',
1516 '69.0.3497.73',
1517 '68.0.3440.131',
1518 '70.0.3532.8',
1519 '70.0.3532.7',
1520 '69.0.3497.72',
1521 '69.0.3497.71',
1522 '70.0.3535.2',
1523 '70.0.3535.1',
1524 '70.0.3535.0',
1525 '69.0.3497.70',
1526 '68.0.3440.130',
1527 '69.0.3497.69',
1528 '68.0.3440.129',
1529 '70.0.3534.4',
1530 '70.0.3534.3',
1531 '70.0.3534.2',
1532 '70.0.3534.1',
1533 '70.0.3534.0',
1534 '69.0.3497.68',
1535 '68.0.3440.128',
1536 '70.0.3533.2',
1537 '70.0.3533.1',
1538 '70.0.3533.0',
1539 '69.0.3497.67',
1540 '68.0.3440.127',
1541 '70.0.3532.6',
1542 '70.0.3532.5',
1543 '70.0.3532.4',
1544 '69.0.3497.66',
1545 '68.0.3440.126',
1546 '70.0.3532.3',
1547 '70.0.3532.2',
1548 '70.0.3532.1',
1549 '69.0.3497.60',
1550 '69.0.3497.65',
1551 '69.0.3497.64',
1552 '70.0.3532.0',
1553 '70.0.3531.0',
1554 '70.0.3530.4',
1555 '70.0.3530.3',
1556 '70.0.3530.2',
1557 '69.0.3497.58',
1558 '68.0.3440.125',
1559 '69.0.3497.57',
1560 '69.0.3497.56',
1561 '69.0.3497.55',
1562 '69.0.3497.54',
1563 '70.0.3530.1',
1564 '70.0.3530.0',
1565 '69.0.3497.53',
1566 '68.0.3440.124',
1567 '69.0.3497.52',
1568 '70.0.3529.3',
1569 '70.0.3529.2',
1570 '70.0.3529.1',
1571 '70.0.3529.0',
1572 '69.0.3497.51',
1573 '70.0.3528.4',
1574 '68.0.3440.123',
1575 '70.0.3528.3',
1576 '70.0.3528.2',
1577 '70.0.3528.1',
1578 '70.0.3528.0',
1579 '69.0.3497.50',
1580 '68.0.3440.122',
1581 '70.0.3527.1',
1582 '70.0.3527.0',
1583 '69.0.3497.49',
1584 '68.0.3440.121',
1585 '70.0.3526.1',
1586 '70.0.3526.0',
1587 '68.0.3440.120',
1588 '69.0.3497.48',
1589 '69.0.3497.47',
1590 '68.0.3440.119',
1591 '68.0.3440.118',
1592 '70.0.3525.5',
1593 '70.0.3525.4',
1594 '70.0.3525.3',
1595 '68.0.3440.117',
1596 '69.0.3497.46',
1597 '70.0.3525.2',
1598 '70.0.3525.1',
1599 '70.0.3525.0',
1600 '69.0.3497.45',
1601 '68.0.3440.116',
1602 '70.0.3524.4',
1603 '70.0.3524.3',
1604 '69.0.3497.44',
1605 '70.0.3524.2',
1606 '70.0.3524.1',
1607 '70.0.3524.0',
1608 '70.0.3523.2',
1609 '69.0.3497.43',
1610 '68.0.3440.115',
1611 '70.0.3505.9',
1612 '69.0.3497.42',
1613 '70.0.3505.8',
1614 '70.0.3523.1',
1615 '70.0.3523.0',
1616 '69.0.3497.41',
1617 '68.0.3440.114',
1618 '70.0.3505.7',
1619 '69.0.3497.40',
1620 '70.0.3522.1',
1621 '70.0.3522.0',
1622 '70.0.3521.2',
1623 '69.0.3497.39',
1624 '68.0.3440.113',
1625 '70.0.3505.6',
1626 '70.0.3521.1',
1627 '70.0.3521.0',
1628 '69.0.3497.38',
1629 '68.0.3440.112',
1630 '70.0.3520.1',
1631 '70.0.3520.0',
1632 '69.0.3497.37',
1633 '68.0.3440.111',
1634 '70.0.3519.3',
1635 '70.0.3519.2',
1636 '70.0.3519.1',
1637 '70.0.3519.0',
1638 '69.0.3497.36',
1639 '68.0.3440.110',
1640 '70.0.3518.1',
1641 '70.0.3518.0',
1642 '69.0.3497.35',
1643 '69.0.3497.34',
1644 '68.0.3440.109',
1645 '70.0.3517.1',
1646 '70.0.3517.0',
1647 '69.0.3497.33',
1648 '68.0.3440.108',
1649 '69.0.3497.32',
1650 '70.0.3516.3',
1651 '70.0.3516.2',
1652 '70.0.3516.1',
1653 '70.0.3516.0',
1654 '69.0.3497.31',
1655 '68.0.3440.107',
1656 '70.0.3515.4',
1657 '68.0.3440.106',
1658 '70.0.3515.3',
1659 '70.0.3515.2',
1660 '70.0.3515.1',
1661 '70.0.3515.0',
1662 '69.0.3497.30',
1663 '68.0.3440.105',
1664 '68.0.3440.104',
1665 '70.0.3514.2',
1666 '70.0.3514.1',
1667 '70.0.3514.0',
1668 '69.0.3497.29',
1669 '68.0.3440.103',
1670 '70.0.3513.1',
1671 '70.0.3513.0',
1672 '69.0.3497.28',
1673 )
1674 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1675
1676
3e669f36 1677std_headers = {
f7a147e3 1678 'User-Agent': random_user_agent(),
59ae15a5
PH
1679 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1680 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1681 'Accept-Encoding': 'gzip, deflate',
1682 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 1683}
f427df17 1684
5f6a1245 1685
fb37eb25
S
1686USER_AGENTS = {
1687 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1688}
1689
1690
bf42a990
S
1691NO_DEFAULT = object()
1692
7105440c
YCH
1693ENGLISH_MONTH_NAMES = [
1694 'January', 'February', 'March', 'April', 'May', 'June',
1695 'July', 'August', 'September', 'October', 'November', 'December']
1696
f6717dec
S
1697MONTH_NAMES = {
1698 'en': ENGLISH_MONTH_NAMES,
1699 'fr': [
3e4185c3
S
1700 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1701 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
f6717dec 1702}
a942d6cb 1703
a7aaa398
S
1704KNOWN_EXTENSIONS = (
1705 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1706 'flv', 'f4v', 'f4a', 'f4b',
1707 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1708 'mkv', 'mka', 'mk3d',
1709 'avi', 'divx',
1710 'mov',
1711 'asf', 'wmv', 'wma',
1712 '3gp', '3g2',
1713 'mp3',
1714 'flac',
1715 'ape',
1716 'wav',
1717 'f4f', 'f4m', 'm3u8', 'smil')
1718
c587cbb7 1719# needed for sanitizing filenames in restricted mode
c8827027 1720ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
fd35d8cd
JW
1721 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1722 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
c587cbb7 1723
46f59e89
S
1724DATE_FORMATS = (
1725 '%d %B %Y',
1726 '%d %b %Y',
1727 '%B %d %Y',
cb655f34
S
1728 '%B %dst %Y',
1729 '%B %dnd %Y',
9d30c213 1730 '%B %drd %Y',
cb655f34 1731 '%B %dth %Y',
46f59e89 1732 '%b %d %Y',
cb655f34
S
1733 '%b %dst %Y',
1734 '%b %dnd %Y',
9d30c213 1735 '%b %drd %Y',
cb655f34 1736 '%b %dth %Y',
46f59e89
S
1737 '%b %dst %Y %I:%M',
1738 '%b %dnd %Y %I:%M',
9d30c213 1739 '%b %drd %Y %I:%M',
46f59e89
S
1740 '%b %dth %Y %I:%M',
1741 '%Y %m %d',
1742 '%Y-%m-%d',
1743 '%Y/%m/%d',
81c13222 1744 '%Y/%m/%d %H:%M',
46f59e89 1745 '%Y/%m/%d %H:%M:%S',
0c1c6f4b 1746 '%Y-%m-%d %H:%M',
46f59e89
S
1747 '%Y-%m-%d %H:%M:%S',
1748 '%Y-%m-%d %H:%M:%S.%f',
5014558a 1749 '%Y-%m-%d %H:%M:%S:%f',
46f59e89
S
1750 '%d.%m.%Y %H:%M',
1751 '%d.%m.%Y %H.%M',
1752 '%Y-%m-%dT%H:%M:%SZ',
1753 '%Y-%m-%dT%H:%M:%S.%fZ',
1754 '%Y-%m-%dT%H:%M:%S.%f0Z',
1755 '%Y-%m-%dT%H:%M:%S',
1756 '%Y-%m-%dT%H:%M:%S.%f',
1757 '%Y-%m-%dT%H:%M',
c6eed6b8
S
1758 '%b %d %Y at %H:%M',
1759 '%b %d %Y at %H:%M:%S',
b555ae9b
S
1760 '%B %d %Y at %H:%M',
1761 '%B %d %Y at %H:%M:%S',
46f59e89
S
1762)
1763
1764DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1765DATE_FORMATS_DAY_FIRST.extend([
1766 '%d-%m-%Y',
1767 '%d.%m.%Y',
1768 '%d.%m.%y',
1769 '%d/%m/%Y',
1770 '%d/%m/%y',
1771 '%d/%m/%Y %H:%M:%S',
1772])
1773
1774DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1775DATE_FORMATS_MONTH_FIRST.extend([
1776 '%m-%d-%Y',
1777 '%m.%d.%Y',
1778 '%m/%d/%Y',
1779 '%m/%d/%y',
1780 '%m/%d/%Y %H:%M:%S',
1781])
1782
06b3fe29 1783PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
22f5f5c6 1784JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
06b3fe29 1785
7105440c 1786
d77c3dfd 1787def preferredencoding():
59ae15a5 1788 """Get preferred encoding.
d77c3dfd 1789
59ae15a5
PH
1790 Returns the best encoding scheme for the system, based on
1791 locale.getpreferredencoding() and some further tweaks.
1792 """
1793 try:
1794 pref = locale.getpreferredencoding()
28e614de 1795 'TEST'.encode(pref)
70a1165b 1796 except Exception:
59ae15a5 1797 pref = 'UTF-8'
bae611f2 1798
59ae15a5 1799 return pref
d77c3dfd 1800
f4bfd65f 1801
181c8655 1802def write_json_file(obj, fn):
1394646a 1803 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 1804
92120217 1805 fn = encodeFilename(fn)
61ee5aeb 1806 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
1807 encoding = get_filesystem_encoding()
1808 # os.path.basename returns a bytes object, but NamedTemporaryFile
1809 # will fail if the filename contains non ascii characters unless we
1810 # use a unicode object
1811 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1812 # the same for os.path.dirname
1813 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1814 else:
1815 path_basename = os.path.basename
1816 path_dirname = os.path.dirname
1817
73159f99
S
1818 args = {
1819 'suffix': '.tmp',
ec5f6016
JMF
1820 'prefix': path_basename(fn) + '.',
1821 'dir': path_dirname(fn),
73159f99
S
1822 'delete': False,
1823 }
1824
181c8655
PH
1825 # In Python 2.x, json.dump expects a bytestream.
1826 # In Python 3.x, it writes to a character stream
1827 if sys.version_info < (3, 0):
73159f99 1828 args['mode'] = 'wb'
181c8655 1829 else:
73159f99
S
1830 args.update({
1831 'mode': 'w',
1832 'encoding': 'utf-8',
1833 })
1834
c86b6142 1835 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
1836
1837 try:
1838 with tf:
75d43ca0 1839 json.dump(obj, tf, default=repr)
1394646a
IK
1840 if sys.platform == 'win32':
1841 # Need to remove existing file on Windows, else os.rename raises
1842 # WindowsError or FileExistsError.
1843 try:
1844 os.unlink(fn)
1845 except OSError:
1846 pass
9cd5f54e
R
1847 try:
1848 mask = os.umask(0)
1849 os.umask(mask)
1850 os.chmod(tf.name, 0o666 & ~mask)
1851 except OSError:
1852 pass
181c8655 1853 os.rename(tf.name, fn)
70a1165b 1854 except Exception:
181c8655
PH
1855 try:
1856 os.remove(tf.name)
1857 except OSError:
1858 pass
1859 raise
1860
1861
1862if sys.version_info >= (2, 7):
ee114368 1863 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 1864 """ Find the xpath xpath[@key=val] """
5d2354f1 1865 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368 1866 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
1867 return node.find(expr)
1868else:
ee114368 1869 def find_xpath_attr(node, xpath, key, val=None):
810c10ba 1870 for f in node.findall(compat_xpath(xpath)):
ee114368
S
1871 if key not in f.attrib:
1872 continue
1873 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
1874 return f
1875 return None
1876
d7e66d39
JMF
1877# On python2.6 the xml.etree.ElementTree.Element methods don't support
1878# the namespace parameter
5f6a1245
JW
1879
1880
d7e66d39
JMF
1881def xpath_with_ns(path, ns_map):
1882 components = [c.split(':') for c in path.split('/')]
1883 replaced = []
1884 for c in components:
1885 if len(c) == 1:
1886 replaced.append(c[0])
1887 else:
1888 ns, tag = c
1889 replaced.append('{%s}%s' % (ns_map[ns], tag))
1890 return '/'.join(replaced)
1891
d77c3dfd 1892
a41fb80c 1893def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745 1894 def _find_xpath(xpath):
810c10ba 1895 return node.find(compat_xpath(xpath))
578c0745
S
1896
1897 if isinstance(xpath, (str, compat_str)):
1898 n = _find_xpath(xpath)
1899 else:
1900 for xp in xpath:
1901 n = _find_xpath(xp)
1902 if n is not None:
1903 break
d74bebd5 1904
8e636da4 1905 if n is None:
bf42a990
S
1906 if default is not NO_DEFAULT:
1907 return default
1908 elif fatal:
bf0ff932
PH
1909 name = xpath if name is None else name
1910 raise ExtractorError('Could not find XML element %s' % name)
1911 else:
1912 return None
a41fb80c
S
1913 return n
1914
1915
1916def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
1917 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1918 if n is None or n == default:
1919 return n
1920 if n.text is None:
1921 if default is not NO_DEFAULT:
1922 return default
1923 elif fatal:
1924 name = xpath if name is None else name
1925 raise ExtractorError('Could not find XML element\'s text %s' % name)
1926 else:
1927 return None
1928 return n.text
a41fb80c
S
1929
1930
1931def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1932 n = find_xpath_attr(node, xpath, key)
1933 if n is None:
1934 if default is not NO_DEFAULT:
1935 return default
1936 elif fatal:
1937 name = '%s[@%s]' % (xpath, key) if name is None else name
1938 raise ExtractorError('Could not find XML attribute %s' % name)
1939 else:
1940 return None
1941 return n.attrib[key]
bf0ff932
PH
1942
1943
9e6dd238 1944def get_element_by_id(id, html):
43e8fafd 1945 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 1946 return get_element_by_attribute('id', id, html)
43e8fafd 1947
12ea2f30 1948
84c237fb 1949def get_element_by_class(class_name, html):
2af12ad9
TC
1950 """Return the content of the first tag with the specified class in the passed HTML document"""
1951 retval = get_elements_by_class(class_name, html)
1952 return retval[0] if retval else None
1953
1954
1955def get_element_by_attribute(attribute, value, html, escape_value=True):
1956 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1957 return retval[0] if retval else None
1958
1959
1960def get_elements_by_class(class_name, html):
1961 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1962 return get_elements_by_attribute(
84c237fb
YCH
1963 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1964 html, escape_value=False)
1965
1966
2af12ad9 1967def get_elements_by_attribute(attribute, value, html, escape_value=True):
43e8fafd 1968 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 1969
84c237fb
YCH
1970 value = re.escape(value) if escape_value else value
1971
2af12ad9
TC
1972 retlist = []
1973 for m in re.finditer(r'''(?xs)
38285056 1974 <([a-zA-Z0-9:._-]+)
609ff8ca 1975 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056 1976 \s+%s=['"]?%s['"]?
609ff8ca 1977 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056
PH
1978 \s*>
1979 (?P<content>.*?)
1980 </\1>
2af12ad9
TC
1981 ''' % (re.escape(attribute), value), html):
1982 res = m.group('content')
38285056 1983
2af12ad9
TC
1984 if res.startswith('"') or res.startswith("'"):
1985 res = res[1:-1]
38285056 1986
2af12ad9 1987 retlist.append(unescapeHTML(res))
a921f407 1988
2af12ad9 1989 return retlist
a921f407 1990
c5229f39 1991
8bb56eee
BF
1992class HTMLAttributeParser(compat_HTMLParser):
1993 """Trivial HTML parser to gather the attributes for a single element"""
b6e0c7d2 1994
8bb56eee 1995 def __init__(self):
c5229f39 1996 self.attrs = {}
8bb56eee
BF
1997 compat_HTMLParser.__init__(self)
1998
1999 def handle_starttag(self, tag, attrs):
2000 self.attrs = dict(attrs)
2001
c5229f39 2002
8bb56eee
BF
2003def extract_attributes(html_element):
2004 """Given a string for an HTML element such as
2005 <el
2006 a="foo" B="bar" c="&98;az" d=boz
2007 empty= noval entity="&amp;"
2008 sq='"' dq="'"
2009 >
2010 Decode and return a dictionary of attributes.
2011 {
2012 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2013 'empty': '', 'noval': None, 'entity': '&',
2014 'sq': '"', 'dq': '\''
2015 }.
2016 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2017 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2018 """
2019 parser = HTMLAttributeParser()
b4a3d461
S
2020 try:
2021 parser.feed(html_element)
2022 parser.close()
2023 # Older Python may throw HTMLParseError in case of malformed HTML
2024 except compat_HTMLParseError:
2025 pass
8bb56eee 2026 return parser.attrs
9e6dd238 2027
c5229f39 2028
9e6dd238 2029def clean_html(html):
59ae15a5 2030 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
2031
2032 if html is None: # Convenience for sanitizing descriptions etc.
2033 return html
2034
59ae15a5
PH
2035 # Newline vs <br />
2036 html = html.replace('\n', ' ')
edd9221c
TF
2037 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2038 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
2039 # Strip html tags
2040 html = re.sub('<.*?>', '', html)
2041 # Replace html entities
2042 html = unescapeHTML(html)
7decf895 2043 return html.strip()
9e6dd238
FV
2044
2045
d77c3dfd 2046def sanitize_open(filename, open_mode):
59ae15a5
PH
2047 """Try to open the given filename, and slightly tweak it if this fails.
2048
2049 Attempts to open the given filename. If this fails, it tries to change
2050 the filename slightly, step by step, until it's either able to open it
2051 or it fails and raises a final exception, like the standard open()
2052 function.
2053
2054 It returns the tuple (stream, definitive_file_name).
2055 """
2056 try:
28e614de 2057 if filename == '-':
59ae15a5
PH
2058 if sys.platform == 'win32':
2059 import msvcrt
2060 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 2061 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
2062 stream = open(encodeFilename(filename), open_mode)
2063 return (stream, filename)
2064 except (IOError, OSError) as err:
f45c185f
PH
2065 if err.errno in (errno.EACCES,):
2066 raise
59ae15a5 2067
f45c185f 2068 # In case of error, try to remove win32 forbidden chars
d55de57b 2069 alt_filename = sanitize_path(filename)
f45c185f
PH
2070 if alt_filename == filename:
2071 raise
2072 else:
2073 # An exception here should be caught in the caller
d55de57b 2074 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 2075 return (stream, alt_filename)
d77c3dfd
FV
2076
2077
2078def timeconvert(timestr):
59ae15a5
PH
2079 """Convert RFC 2822 defined time string into system timestamp"""
2080 timestamp = None
2081 timetuple = email.utils.parsedate_tz(timestr)
2082 if timetuple is not None:
2083 timestamp = email.utils.mktime_tz(timetuple)
2084 return timestamp
1c469a94 2085
5f6a1245 2086
796173d0 2087def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
2088 """Sanitizes a string so it could be used as part of a filename.
2089 If restricted is set, use a stricter subset of allowed characters.
158af524
S
2090 Set is_id if this is not an arbitrary string, but an ID that should be kept
2091 if possible.
59ae15a5
PH
2092 """
2093 def replace_insane(char):
c587cbb7
AT
2094 if restricted and char in ACCENT_CHARS:
2095 return ACCENT_CHARS[char]
59ae15a5
PH
2096 if char == '?' or ord(char) < 32 or ord(char) == 127:
2097 return ''
2098 elif char == '"':
2099 return '' if restricted else '\''
2100 elif char == ':':
2101 return '_-' if restricted else ' -'
2102 elif char in '\\/|*<>':
2103 return '_'
627dcfff 2104 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
2105 return '_'
2106 if restricted and ord(char) > 127:
2107 return '_'
2108 return char
2109
639f1cea 2110 if s == '':
2111 return ''
2aeb06d6
PH
2112 # Handle timestamps
2113 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 2114 result = ''.join(map(replace_insane, s))
796173d0
PH
2115 if not is_id:
2116 while '__' in result:
2117 result = result.replace('__', '_')
2118 result = result.strip('_')
2119 # Common case of "Foreign band name - English song title"
2120 if restricted and result.startswith('-_'):
2121 result = result[2:]
5a42414b
PH
2122 if result.startswith('-'):
2123 result = '_' + result[len('-'):]
a7440261 2124 result = result.lstrip('.')
796173d0
PH
2125 if not result:
2126 result = '_'
59ae15a5 2127 return result
d77c3dfd 2128
5f6a1245 2129
c2934512 2130def sanitize_path(s, force=False):
a2aaf4db 2131 """Sanitizes and normalizes path on Windows"""
c2934512 2132 if sys.platform == 'win32':
c4218ac3 2133 force = False
c2934512 2134 drive_or_unc, _ = os.path.splitdrive(s)
2135 if sys.version_info < (2, 7) and not drive_or_unc:
2136 drive_or_unc, _ = os.path.splitunc(s)
2137 elif force:
2138 drive_or_unc = ''
2139 else:
a2aaf4db 2140 return s
c2934512 2141
be531ef1
S
2142 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2143 if drive_or_unc:
a2aaf4db
S
2144 norm_path.pop(0)
2145 sanitized_path = [
ec85ded8 2146 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
a2aaf4db 2147 for path_part in norm_path]
be531ef1
S
2148 if drive_or_unc:
2149 sanitized_path.insert(0, drive_or_unc + os.path.sep)
c4218ac3 2150 elif force and s[0] == os.path.sep:
2151 sanitized_path.insert(0, os.path.sep)
a2aaf4db
S
2152 return os.path.join(*sanitized_path)
2153
2154
17bcc626 2155def sanitize_url(url):
befa4708
S
2156 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2157 # the number of unwanted failures due to missing protocol
2158 if url.startswith('//'):
2159 return 'http:%s' % url
2160 # Fix some common typos seen so far
2161 COMMON_TYPOS = (
067aa17e 2162 # https://github.com/ytdl-org/youtube-dl/issues/15649
befa4708
S
2163 (r'^httpss://', r'https://'),
2164 # https://bx1.be/lives/direct-tv/
2165 (r'^rmtp([es]?)://', r'rtmp\1://'),
2166 )
2167 for mistake, fixup in COMMON_TYPOS:
2168 if re.match(mistake, url):
2169 return re.sub(mistake, fixup, url)
bc6b9bcd 2170 return url
17bcc626
S
2171
2172
5435dcf9
HH
2173def extract_basic_auth(url):
2174 parts = compat_urlparse.urlsplit(url)
2175 if parts.username is None:
2176 return url, None
2177 url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2178 parts.hostname if parts.port is None
2179 else '%s:%d' % (parts.hostname, parts.port))))
2180 auth_payload = base64.b64encode(
2181 ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2182 return url, 'Basic ' + auth_payload.decode('utf-8')
2183
2184
67dda517 2185def sanitized_Request(url, *args, **kwargs):
bc6b9bcd 2186 url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
5435dcf9
HH
2187 if auth_header is not None:
2188 headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2189 headers['Authorization'] = auth_header
2190 return compat_urllib_request.Request(url, *args, **kwargs)
67dda517
S
2191
2192
51098426
S
2193def expand_path(s):
2194 """Expand shell variables and ~"""
2195 return os.path.expandvars(compat_expanduser(s))
2196
2197
d77c3dfd 2198def orderedSet(iterable):
59ae15a5
PH
2199 """ Remove all duplicates from the input iterable """
2200 res = []
2201 for el in iterable:
2202 if el not in res:
2203 res.append(el)
2204 return res
d77c3dfd 2205
912b38b4 2206
55b2f099 2207def _htmlentity_transform(entity_with_semicolon):
4e408e47 2208 """Transforms an HTML entity to a character."""
55b2f099
YCH
2209 entity = entity_with_semicolon[:-1]
2210
4e408e47
PH
2211 # Known non-numeric HTML entity
2212 if entity in compat_html_entities.name2codepoint:
2213 return compat_chr(compat_html_entities.name2codepoint[entity])
2214
55b2f099
YCH
2215 # TODO: HTML5 allows entities without a semicolon. For example,
2216 # '&Eacuteric' should be decoded as 'Éric'.
2217 if entity_with_semicolon in compat_html_entities_html5:
2218 return compat_html_entities_html5[entity_with_semicolon]
2219
91757b0f 2220 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
2221 if mobj is not None:
2222 numstr = mobj.group(1)
28e614de 2223 if numstr.startswith('x'):
4e408e47 2224 base = 16
28e614de 2225 numstr = '0%s' % numstr
4e408e47
PH
2226 else:
2227 base = 10
067aa17e 2228 # See https://github.com/ytdl-org/youtube-dl/issues/7518
7aefc49c
S
2229 try:
2230 return compat_chr(int(numstr, base))
2231 except ValueError:
2232 pass
4e408e47
PH
2233
2234 # Unknown entity in name, return its literal representation
7a3f0c00 2235 return '&%s;' % entity
4e408e47
PH
2236
2237
d77c3dfd 2238def unescapeHTML(s):
912b38b4
PH
2239 if s is None:
2240 return None
2241 assert type(s) == compat_str
d77c3dfd 2242
4e408e47 2243 return re.sub(
95f3f7c2 2244 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 2245
8bf48f23 2246
cdb19aa4 2247def escapeHTML(text):
2248 return (
2249 text
2250 .replace('&', '&amp;')
2251 .replace('<', '&lt;')
2252 .replace('>', '&gt;')
2253 .replace('"', '&quot;')
2254 .replace("'", '&#39;')
2255 )
2256
2257
f5b1bca9 2258def process_communicate_or_kill(p, *args, **kwargs):
2259 try:
2260 return p.communicate(*args, **kwargs)
2261 except BaseException: # Including KeyboardInterrupt
2262 p.kill()
2263 p.wait()
2264 raise
2265
2266
aa49acd1
S
2267def get_subprocess_encoding():
2268 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2269 # For subprocess calls, encode with locale encoding
2270 # Refer to http://stackoverflow.com/a/9951851/35070
2271 encoding = preferredencoding()
2272 else:
2273 encoding = sys.getfilesystemencoding()
2274 if encoding is None:
2275 encoding = 'utf-8'
2276 return encoding
2277
2278
8bf48f23 2279def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
2280 """
2281 @param s The name of the file
2282 """
d77c3dfd 2283
8bf48f23 2284 assert type(s) == compat_str
d77c3dfd 2285
59ae15a5
PH
2286 # Python 3 has a Unicode API
2287 if sys.version_info >= (3, 0):
2288 return s
0f00efed 2289
aa49acd1
S
2290 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2291 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2292 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2293 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2294 return s
2295
8ee239e9
YCH
2296 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2297 if sys.platform.startswith('java'):
2298 return s
2299
aa49acd1
S
2300 return s.encode(get_subprocess_encoding(), 'ignore')
2301
2302
2303def decodeFilename(b, for_subprocess=False):
2304
2305 if sys.version_info >= (3, 0):
2306 return b
2307
2308 if not isinstance(b, bytes):
2309 return b
2310
2311 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 2312
f07b74fc
PH
2313
2314def encodeArgument(s):
2315 if not isinstance(s, compat_str):
2316 # Legacy code that uses byte strings
2317 # Uncomment the following line after fixing all post processors
7af808a5 2318 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
2319 s = s.decode('ascii')
2320 return encodeFilename(s, True)
2321
2322
aa49acd1
S
2323def decodeArgument(b):
2324 return decodeFilename(b, True)
2325
2326
8271226a
PH
2327def decodeOption(optval):
2328 if optval is None:
2329 return optval
2330 if isinstance(optval, bytes):
2331 optval = optval.decode(preferredencoding())
2332
2333 assert isinstance(optval, compat_str)
2334 return optval
1c256f70 2335
5f6a1245 2336
cdb19aa4 2337def formatSeconds(secs, delim=':', msec=False):
4539dd30 2338 if secs > 3600:
cdb19aa4 2339 ret = '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
4539dd30 2340 elif secs > 60:
cdb19aa4 2341 ret = '%d%s%02d' % (secs // 60, delim, secs % 60)
4539dd30 2342 else:
cdb19aa4 2343 ret = '%d' % secs
2344 return '%s.%03d' % (ret, secs % 1) if msec else ret
4539dd30 2345
a0ddb8a2 2346
be4a824d
PH
2347def make_HTTPS_handler(params, **kwargs):
2348 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 2349 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 2350 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 2351 if opts_no_check_certificate:
be5f2c19 2352 context.check_hostname = False
0db261ba 2353 context.verify_mode = ssl.CERT_NONE
a2366922 2354 try:
be4a824d 2355 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
2356 except TypeError:
2357 # Python 2.7.8
2358 # (create_default_context present but HTTPSHandler has no context=)
2359 pass
2360
2361 if sys.version_info < (3, 2):
d7932313 2362 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 2363 else: # Python < 3.4
d7932313 2364 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 2365 context.verify_mode = (ssl.CERT_NONE
dca08720 2366 if opts_no_check_certificate
ea6d901e 2367 else ssl.CERT_REQUIRED)
303b479e 2368 context.set_default_verify_paths()
be4a824d 2369 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 2370
732ea2f0 2371
5873d4cc 2372def bug_reports_message(before=';'):
08f2a92c 2373 if ytdl_is_updateable():
7a5c1cfe 2374 update_cmd = 'type yt-dlp -U to update'
08f2a92c 2375 else:
7a5c1cfe 2376 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
5873d4cc 2377 msg = 'please report this issue on https://github.com/yt-dlp/yt-dlp .'
08f2a92c 2378 msg += ' Make sure you are using the latest version; %s.' % update_cmd
7a5c1cfe 2379 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
5873d4cc
F
2380
2381 before = before.rstrip()
2382 if not before or before.endswith(('.', '!', '?')):
2383 msg = msg[0].title() + msg[1:]
2384
2385 return (before + ' ' if before else '') + msg
08f2a92c
JMF
2386
2387
bf5b9d85
PM
2388class YoutubeDLError(Exception):
2389 """Base exception for YoutubeDL errors."""
2390 pass
2391
2392
3158150c 2393network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2394if hasattr(ssl, 'CertificateError'):
2395 network_exceptions.append(ssl.CertificateError)
2396network_exceptions = tuple(network_exceptions)
2397
2398
bf5b9d85 2399class ExtractorError(YoutubeDLError):
1c256f70 2400 """Error during info extraction."""
5f6a1245 2401
d11271dd 2402 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238 2403 """ tb, if given, is the original traceback (so that it can be printed out).
7a5c1cfe 2404 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
9a82b238
PH
2405 """
2406
3158150c 2407 if sys.exc_info()[0] in network_exceptions:
9a82b238 2408 expected = True
d11271dd
PH
2409 if video_id is not None:
2410 msg = video_id + ': ' + msg
410f3e73 2411 if cause:
28e614de 2412 msg += ' (caused by %r)' % cause
9a82b238 2413 if not expected:
08f2a92c 2414 msg += bug_reports_message()
1c256f70 2415 super(ExtractorError, self).__init__(msg)
d5979c5d 2416
1c256f70 2417 self.traceback = tb
8cc83b8d 2418 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 2419 self.cause = cause
d11271dd 2420 self.video_id = video_id
1c256f70 2421
01951dda
PH
2422 def format_traceback(self):
2423 if self.traceback is None:
2424 return None
28e614de 2425 return ''.join(traceback.format_tb(self.traceback))
01951dda 2426
1c256f70 2427
416c7fcb
PH
2428class UnsupportedError(ExtractorError):
2429 def __init__(self, url):
2430 super(UnsupportedError, self).__init__(
2431 'Unsupported URL: %s' % url, expected=True)
2432 self.url = url
2433
2434
55b3e45b
JMF
2435class RegexNotFoundError(ExtractorError):
2436 """Error when a regex didn't match"""
2437 pass
2438
2439
773f291d
S
2440class GeoRestrictedError(ExtractorError):
2441 """Geographic restriction Error exception.
2442
2443 This exception may be thrown when a video is not available from your
2444 geographic location due to geographic restrictions imposed by a website.
2445 """
b6e0c7d2 2446
773f291d
S
2447 def __init__(self, msg, countries=None):
2448 super(GeoRestrictedError, self).__init__(msg, expected=True)
2449 self.msg = msg
2450 self.countries = countries
2451
2452
bf5b9d85 2453class DownloadError(YoutubeDLError):
59ae15a5 2454 """Download Error exception.
d77c3dfd 2455
59ae15a5
PH
2456 This exception may be thrown by FileDownloader objects if they are not
2457 configured to continue on errors. They will contain the appropriate
2458 error message.
2459 """
5f6a1245 2460
8cc83b8d
FV
2461 def __init__(self, msg, exc_info=None):
2462 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2463 super(DownloadError, self).__init__(msg)
2464 self.exc_info = exc_info
d77c3dfd
FV
2465
2466
498f5606 2467class EntryNotInPlaylist(YoutubeDLError):
2468 """Entry not in playlist exception.
2469
2470 This exception will be thrown by YoutubeDL when a requested entry
2471 is not found in the playlist info_dict
2472 """
2473 pass
2474
2475
bf5b9d85 2476class SameFileError(YoutubeDLError):
59ae15a5 2477 """Same File exception.
d77c3dfd 2478
59ae15a5
PH
2479 This exception will be thrown by FileDownloader objects if they detect
2480 multiple files would have to be downloaded to the same file on disk.
2481 """
2482 pass
d77c3dfd
FV
2483
2484
bf5b9d85 2485class PostProcessingError(YoutubeDLError):
59ae15a5 2486 """Post Processing exception.
d77c3dfd 2487
59ae15a5
PH
2488 This exception may be raised by PostProcessor's .run() method to
2489 indicate an error in the postprocessing task.
2490 """
5f6a1245 2491
7851b379 2492 def __init__(self, msg):
bf5b9d85 2493 super(PostProcessingError, self).__init__(msg)
7851b379 2494 self.msg = msg
d77c3dfd 2495
5f6a1245 2496
8b0d7497 2497class ExistingVideoReached(YoutubeDLError):
2498 """ --max-downloads limit has been reached. """
2499 pass
2500
2501
2502class RejectedVideoReached(YoutubeDLError):
2503 """ --max-downloads limit has been reached. """
2504 pass
2505
2506
bf5b9d85 2507class MaxDownloadsReached(YoutubeDLError):
59ae15a5
PH
2508 """ --max-downloads limit has been reached. """
2509 pass
d77c3dfd
FV
2510
2511
bf5b9d85 2512class UnavailableVideoError(YoutubeDLError):
59ae15a5 2513 """Unavailable Format exception.
d77c3dfd 2514
59ae15a5
PH
2515 This exception will be thrown when a video is requested
2516 in a format that is not available for that video.
2517 """
2518 pass
d77c3dfd
FV
2519
2520
bf5b9d85 2521class ContentTooShortError(YoutubeDLError):
59ae15a5 2522 """Content Too Short exception.
d77c3dfd 2523
59ae15a5
PH
2524 This exception may be raised by FileDownloader objects when a file they
2525 download is too small for what the server announced first, indicating
2526 the connection was probably interrupted.
2527 """
d77c3dfd 2528
59ae15a5 2529 def __init__(self, downloaded, expected):
bf5b9d85
PM
2530 super(ContentTooShortError, self).__init__(
2531 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2532 )
2c7ed247 2533 # Both in bytes
59ae15a5
PH
2534 self.downloaded = downloaded
2535 self.expected = expected
d77c3dfd 2536
5f6a1245 2537
bf5b9d85 2538class XAttrMetadataError(YoutubeDLError):
efa97bdc
YCH
2539 def __init__(self, code=None, msg='Unknown error'):
2540 super(XAttrMetadataError, self).__init__(msg)
2541 self.code = code
bd264412 2542 self.msg = msg
efa97bdc
YCH
2543
2544 # Parsing code and msg
3089bc74 2545 if (self.code in (errno.ENOSPC, errno.EDQUOT)
a0566bbf 2546 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
efa97bdc
YCH
2547 self.reason = 'NO_SPACE'
2548 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2549 self.reason = 'VALUE_TOO_LONG'
2550 else:
2551 self.reason = 'NOT_SUPPORTED'
2552
2553
bf5b9d85 2554class XAttrUnavailableError(YoutubeDLError):
efa97bdc
YCH
2555 pass
2556
2557
c5a59d93 2558def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
2559 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2560 # expected HTTP responses to meet HTTP/1.0 or later (see also
067aa17e 2561 # https://github.com/ytdl-org/youtube-dl/issues/6727)
e5e78797 2562 if sys.version_info < (3, 0):
65220c3b
S
2563 kwargs['strict'] = True
2564 hc = http_class(*args, **compat_kwargs(kwargs))
be4a824d 2565 source_address = ydl_handler._params.get('source_address')
8959018a 2566
be4a824d 2567 if source_address is not None:
8959018a
AU
2568 # This is to workaround _create_connection() from socket where it will try all
2569 # address data from getaddrinfo() including IPv6. This filters the result from
2570 # getaddrinfo() based on the source_address value.
2571 # This is based on the cpython socket.create_connection() function.
2572 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2573 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2574 host, port = address
2575 err = None
2576 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
9e21e6d9
S
2577 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2578 ip_addrs = [addr for addr in addrs if addr[0] == af]
2579 if addrs and not ip_addrs:
2580 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2581 raise socket.error(
2582 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2583 % (ip_version, source_address[0]))
8959018a
AU
2584 for res in ip_addrs:
2585 af, socktype, proto, canonname, sa = res
2586 sock = None
2587 try:
2588 sock = socket.socket(af, socktype, proto)
2589 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2590 sock.settimeout(timeout)
2591 sock.bind(source_address)
2592 sock.connect(sa)
2593 err = None # Explicitly break reference cycle
2594 return sock
2595 except socket.error as _:
2596 err = _
2597 if sock is not None:
2598 sock.close()
2599 if err is not None:
2600 raise err
2601 else:
9e21e6d9
S
2602 raise socket.error('getaddrinfo returns an empty list')
2603 if hasattr(hc, '_create_connection'):
2604 hc._create_connection = _create_connection
be4a824d
PH
2605 sa = (source_address, 0)
2606 if hasattr(hc, 'source_address'): # Python 2.7+
2607 hc.source_address = sa
2608 else: # Python 2.6
2609 def _hc_connect(self, *args, **kwargs):
9e21e6d9 2610 sock = _create_connection(
be4a824d
PH
2611 (self.host, self.port), self.timeout, sa)
2612 if is_https:
d7932313
PH
2613 self.sock = ssl.wrap_socket(
2614 sock, self.key_file, self.cert_file,
2615 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
2616 else:
2617 self.sock = sock
2618 hc.connect = functools.partial(_hc_connect, hc)
2619
2620 return hc
2621
2622
87f0e62d 2623def handle_youtubedl_headers(headers):
992fc9d6
YCH
2624 filtered_headers = headers
2625
2626 if 'Youtubedl-no-compression' in filtered_headers:
2627 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 2628 del filtered_headers['Youtubedl-no-compression']
87f0e62d 2629
992fc9d6 2630 return filtered_headers
87f0e62d
YCH
2631
2632
acebc9cd 2633class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
2634 """Handler for HTTP requests and responses.
2635
2636 This class, when installed with an OpenerDirector, automatically adds
2637 the standard headers to every HTTP request and handles gzipped and
2638 deflated responses from web servers. If compression is to be avoided in
2639 a particular request, the original request in the program code only has
0424ec30 2640 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
2641 removed before making the real request.
2642
2643 Part of this code was copied from:
2644
2645 http://techknack.net/python-urllib2-handlers/
2646
2647 Andrew Rowls, the author of that code, agreed to release it to the
2648 public domain.
2649 """
2650
be4a824d
PH
2651 def __init__(self, params, *args, **kwargs):
2652 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2653 self._params = params
2654
2655 def http_open(self, req):
71aff188
YCH
2656 conn_class = compat_http_client.HTTPConnection
2657
2658 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2659 if socks_proxy:
2660 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2661 del req.headers['Ytdl-socks-proxy']
2662
be4a824d 2663 return self.do_open(functools.partial(
71aff188 2664 _create_http_connection, self, conn_class, False),
be4a824d
PH
2665 req)
2666
59ae15a5
PH
2667 @staticmethod
2668 def deflate(data):
fc2119f2 2669 if not data:
2670 return data
59ae15a5
PH
2671 try:
2672 return zlib.decompress(data, -zlib.MAX_WBITS)
2673 except zlib.error:
2674 return zlib.decompress(data)
2675
acebc9cd 2676 def http_request(self, req):
51f267d9
S
2677 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2678 # always respected by websites, some tend to give out URLs with non percent-encoded
2679 # non-ASCII characters (see telemb.py, ard.py [#3412])
2680 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2681 # To work around aforementioned issue we will replace request's original URL with
2682 # percent-encoded one
2683 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2684 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2685 url = req.get_full_url()
2686 url_escaped = escape_url(url)
2687
2688 # Substitute URL if any change after escaping
2689 if url != url_escaped:
15d260eb 2690 req = update_Request(req, url=url_escaped)
51f267d9 2691
33ac271b 2692 for h, v in std_headers.items():
3d5f7a39
JK
2693 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2694 # The dict keys are capitalized because of this bug by urllib
2695 if h.capitalize() not in req.headers:
33ac271b 2696 req.add_header(h, v)
87f0e62d
YCH
2697
2698 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
2699
2700 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2701 # Python 2.6 is brain-dead when it comes to fragments
2702 req._Request__original = req._Request__original.partition('#')[0]
2703 req._Request__r_type = req._Request__r_type.partition('#')[0]
2704
59ae15a5
PH
2705 return req
2706
acebc9cd 2707 def http_response(self, req, resp):
59ae15a5
PH
2708 old_resp = resp
2709 # gzip
2710 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
2711 content = resp.read()
2712 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2713 try:
2714 uncompressed = io.BytesIO(gz.read())
2715 except IOError as original_ioerror:
2716 # There may be junk add the end of the file
2717 # See http://stackoverflow.com/q/4928560/35070 for details
2718 for i in range(1, 1024):
2719 try:
2720 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2721 uncompressed = io.BytesIO(gz.read())
2722 except IOError:
2723 continue
2724 break
2725 else:
2726 raise original_ioerror
b407d853 2727 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2728 resp.msg = old_resp.msg
c047270c 2729 del resp.headers['Content-encoding']
59ae15a5
PH
2730 # deflate
2731 if resp.headers.get('Content-encoding', '') == 'deflate':
2732 gz = io.BytesIO(self.deflate(resp.read()))
b407d853 2733 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2734 resp.msg = old_resp.msg
c047270c 2735 del resp.headers['Content-encoding']
ad729172 2736 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
067aa17e 2737 # https://github.com/ytdl-org/youtube-dl/issues/6457).
5a4d9ddb
S
2738 if 300 <= resp.code < 400:
2739 location = resp.headers.get('Location')
2740 if location:
2741 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2742 if sys.version_info >= (3, 0):
2743 location = location.encode('iso-8859-1').decode('utf-8')
0ea59007
YCH
2744 else:
2745 location = location.decode('utf-8')
5a4d9ddb
S
2746 location_escaped = escape_url(location)
2747 if location != location_escaped:
2748 del resp.headers['Location']
9a4aec8b
YCH
2749 if sys.version_info < (3, 0):
2750 location_escaped = location_escaped.encode('utf-8')
5a4d9ddb 2751 resp.headers['Location'] = location_escaped
59ae15a5 2752 return resp
0f8d03f8 2753
acebc9cd
PH
2754 https_request = http_request
2755 https_response = http_response
bf50b038 2756
5de90176 2757
71aff188
YCH
2758def make_socks_conn_class(base_class, socks_proxy):
2759 assert issubclass(base_class, (
2760 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2761
2762 url_components = compat_urlparse.urlparse(socks_proxy)
2763 if url_components.scheme.lower() == 'socks5':
2764 socks_type = ProxyType.SOCKS5
2765 elif url_components.scheme.lower() in ('socks', 'socks4'):
2766 socks_type = ProxyType.SOCKS4
51fb4995
YCH
2767 elif url_components.scheme.lower() == 'socks4a':
2768 socks_type = ProxyType.SOCKS4A
71aff188 2769
cdd94c2e
YCH
2770 def unquote_if_non_empty(s):
2771 if not s:
2772 return s
2773 return compat_urllib_parse_unquote_plus(s)
2774
71aff188
YCH
2775 proxy_args = (
2776 socks_type,
2777 url_components.hostname, url_components.port or 1080,
2778 True, # Remote DNS
cdd94c2e
YCH
2779 unquote_if_non_empty(url_components.username),
2780 unquote_if_non_empty(url_components.password),
71aff188
YCH
2781 )
2782
2783 class SocksConnection(base_class):
2784 def connect(self):
2785 self.sock = sockssocket()
2786 self.sock.setproxy(*proxy_args)
2787 if type(self.timeout) in (int, float):
2788 self.sock.settimeout(self.timeout)
2789 self.sock.connect((self.host, self.port))
2790
2791 if isinstance(self, compat_http_client.HTTPSConnection):
2792 if hasattr(self, '_context'): # Python > 2.6
2793 self.sock = self._context.wrap_socket(
2794 self.sock, server_hostname=self.host)
2795 else:
2796 self.sock = ssl.wrap_socket(self.sock)
2797
2798 return SocksConnection
2799
2800
be4a824d
PH
2801class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2802 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2803 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2804 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2805 self._params = params
2806
2807 def https_open(self, req):
4f264c02 2808 kwargs = {}
71aff188
YCH
2809 conn_class = self._https_conn_class
2810
4f264c02
JMF
2811 if hasattr(self, '_context'): # python > 2.6
2812 kwargs['context'] = self._context
2813 if hasattr(self, '_check_hostname'): # python 3.x
2814 kwargs['check_hostname'] = self._check_hostname
71aff188
YCH
2815
2816 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2817 if socks_proxy:
2818 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2819 del req.headers['Ytdl-socks-proxy']
2820
be4a824d 2821 return self.do_open(functools.partial(
71aff188 2822 _create_http_connection, self, conn_class, True),
4f264c02 2823 req, **kwargs)
be4a824d
PH
2824
2825
1bab3437 2826class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
f1a8511f
S
2827 """
2828 See [1] for cookie file format.
2829
2830 1. https://curl.haxx.se/docs/http-cookies.html
2831 """
e7e62441 2832 _HTTPONLY_PREFIX = '#HttpOnly_'
c380cc28
S
2833 _ENTRY_LEN = 7
2834 _HEADER = '''# Netscape HTTP Cookie File
7a5c1cfe 2835# This file is generated by yt-dlp. Do not edit.
c380cc28
S
2836
2837'''
2838 _CookieFileEntry = collections.namedtuple(
2839 'CookieFileEntry',
2840 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
e7e62441 2841
1bab3437 2842 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
c380cc28
S
2843 """
2844 Save cookies to a file.
2845
2846 Most of the code is taken from CPython 3.8 and slightly adapted
2847 to support cookie files with UTF-8 in both python 2 and 3.
2848 """
2849 if filename is None:
2850 if self.filename is not None:
2851 filename = self.filename
2852 else:
2853 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2854
1bab3437
S
2855 # Store session cookies with `expires` set to 0 instead of an empty
2856 # string
2857 for cookie in self:
2858 if cookie.expires is None:
2859 cookie.expires = 0
c380cc28
S
2860
2861 with io.open(filename, 'w', encoding='utf-8') as f:
2862 f.write(self._HEADER)
2863 now = time.time()
2864 for cookie in self:
2865 if not ignore_discard and cookie.discard:
2866 continue
2867 if not ignore_expires and cookie.is_expired(now):
2868 continue
2869 if cookie.secure:
2870 secure = 'TRUE'
2871 else:
2872 secure = 'FALSE'
2873 if cookie.domain.startswith('.'):
2874 initial_dot = 'TRUE'
2875 else:
2876 initial_dot = 'FALSE'
2877 if cookie.expires is not None:
2878 expires = compat_str(cookie.expires)
2879 else:
2880 expires = ''
2881 if cookie.value is None:
2882 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2883 # with no name, whereas http.cookiejar regards it as a
2884 # cookie with no value.
2885 name = ''
2886 value = cookie.name
2887 else:
2888 name = cookie.name
2889 value = cookie.value
2890 f.write(
2891 '\t'.join([cookie.domain, initial_dot, cookie.path,
2892 secure, expires, name, value]) + '\n')
1bab3437
S
2893
2894 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
e7e62441 2895 """Load cookies from a file."""
2896 if filename is None:
2897 if self.filename is not None:
2898 filename = self.filename
2899 else:
2900 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2901
c380cc28
S
2902 def prepare_line(line):
2903 if line.startswith(self._HTTPONLY_PREFIX):
2904 line = line[len(self._HTTPONLY_PREFIX):]
2905 # comments and empty lines are fine
2906 if line.startswith('#') or not line.strip():
2907 return line
2908 cookie_list = line.split('\t')
2909 if len(cookie_list) != self._ENTRY_LEN:
2910 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2911 cookie = self._CookieFileEntry(*cookie_list)
2912 if cookie.expires_at and not cookie.expires_at.isdigit():
2913 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2914 return line
2915
e7e62441 2916 cf = io.StringIO()
c380cc28 2917 with io.open(filename, encoding='utf-8') as f:
e7e62441 2918 for line in f:
c380cc28
S
2919 try:
2920 cf.write(prepare_line(line))
2921 except compat_cookiejar.LoadError as e:
2922 write_string(
2923 'WARNING: skipping cookie file entry due to %s: %r\n'
2924 % (e, line), sys.stderr)
2925 continue
e7e62441 2926 cf.seek(0)
2927 self._really_load(cf, filename, ignore_discard, ignore_expires)
1bab3437
S
2928 # Session cookies are denoted by either `expires` field set to
2929 # an empty string or 0. MozillaCookieJar only recognizes the former
2930 # (see [1]). So we need force the latter to be recognized as session
2931 # cookies on our own.
2932 # Session cookies may be important for cookies-based authentication,
2933 # e.g. usually, when user does not check 'Remember me' check box while
2934 # logging in on a site, some important cookies are stored as session
2935 # cookies so that not recognizing them will result in failed login.
2936 # 1. https://bugs.python.org/issue17164
2937 for cookie in self:
2938 # Treat `expires=0` cookies as session cookies
2939 if cookie.expires == 0:
2940 cookie.expires = None
2941 cookie.discard = True
2942
2943
a6420bf5
S
2944class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2945 def __init__(self, cookiejar=None):
2946 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2947
2948 def http_response(self, request, response):
2949 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2950 # characters in Set-Cookie HTTP header of last response (see
067aa17e 2951 # https://github.com/ytdl-org/youtube-dl/issues/6769).
a6420bf5
S
2952 # In order to at least prevent crashing we will percent encode Set-Cookie
2953 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
2954 # if sys.version_info < (3, 0) and response.headers:
2955 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2956 # set_cookie = response.headers.get(set_cookie_header)
2957 # if set_cookie:
2958 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2959 # if set_cookie != set_cookie_escaped:
2960 # del response.headers[set_cookie_header]
2961 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
2962 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2963
f5fa042c 2964 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
a6420bf5
S
2965 https_response = http_response
2966
2967
fca6dba8 2968class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
201c1459 2969 """YoutubeDL redirect handler
2970
2971 The code is based on HTTPRedirectHandler implementation from CPython [1].
2972
2973 This redirect handler solves two issues:
2974 - ensures redirect URL is always unicode under python 2
2975 - introduces support for experimental HTTP response status code
2976 308 Permanent Redirect [2] used by some sites [3]
2977
2978 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
2979 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
2980 3. https://github.com/ytdl-org/youtube-dl/issues/28768
2981 """
2982
2983 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
2984
2985 def redirect_request(self, req, fp, code, msg, headers, newurl):
2986 """Return a Request or None in response to a redirect.
2987
2988 This is called by the http_error_30x methods when a
2989 redirection response is received. If a redirection should
2990 take place, return a new Request to allow http_error_30x to
2991 perform the redirect. Otherwise, raise HTTPError if no-one
2992 else should try to handle this url. Return None if you can't
2993 but another Handler might.
2994 """
2995 m = req.get_method()
2996 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
2997 or code in (301, 302, 303) and m == "POST")):
2998 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
2999 # Strictly (according to RFC 2616), 301 or 302 in response to
3000 # a POST MUST NOT cause a redirection without confirmation
3001 # from the user (of urllib.request, in this case). In practice,
3002 # essentially all clients do redirect in this case, so we do
3003 # the same.
3004
3005 # On python 2 urlh.geturl() may sometimes return redirect URL
3006 # as byte string instead of unicode. This workaround allows
3007 # to force it always return unicode.
3008 if sys.version_info[0] < 3:
3009 newurl = compat_str(newurl)
3010
3011 # Be conciliant with URIs containing a space. This is mainly
3012 # redundant with the more complete encoding done in http_error_302(),
3013 # but it is kept for compatibility with other callers.
3014 newurl = newurl.replace(' ', '%20')
3015
3016 CONTENT_HEADERS = ("content-length", "content-type")
3017 # NB: don't use dict comprehension for python 2.6 compatibility
3018 newheaders = dict((k, v) for k, v in req.headers.items()
3019 if k.lower() not in CONTENT_HEADERS)
3020 return compat_urllib_request.Request(
3021 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3022 unverifiable=True)
fca6dba8
S
3023
3024
46f59e89
S
3025def extract_timezone(date_str):
3026 m = re.search(
3027 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
3028 date_str)
3029 if not m:
3030 timezone = datetime.timedelta()
3031 else:
3032 date_str = date_str[:-len(m.group('tz'))]
3033 if not m.group('sign'):
3034 timezone = datetime.timedelta()
3035 else:
3036 sign = 1 if m.group('sign') == '+' else -1
3037 timezone = datetime.timedelta(
3038 hours=sign * int(m.group('hours')),
3039 minutes=sign * int(m.group('minutes')))
3040 return timezone, date_str
3041
3042
08b38d54 3043def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
3044 """ Return a UNIX timestamp from the given date """
3045
3046 if date_str is None:
3047 return None
3048
52c3a6e4
S
3049 date_str = re.sub(r'\.[0-9]+', '', date_str)
3050
08b38d54 3051 if timezone is None:
46f59e89
S
3052 timezone, date_str = extract_timezone(date_str)
3053
52c3a6e4
S
3054 try:
3055 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3056 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3057 return calendar.timegm(dt.timetuple())
3058 except ValueError:
3059 pass
912b38b4
PH
3060
3061
46f59e89
S
3062def date_formats(day_first=True):
3063 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3064
3065
42bdd9d0 3066def unified_strdate(date_str, day_first=True):
bf50b038 3067 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
3068
3069 if date_str is None:
3070 return None
bf50b038 3071 upload_date = None
5f6a1245 3072 # Replace commas
026fcc04 3073 date_str = date_str.replace(',', ' ')
42bdd9d0 3074 # Remove AM/PM + timezone
9bb8e0a3 3075 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89 3076 _, date_str = extract_timezone(date_str)
42bdd9d0 3077
46f59e89 3078 for expression in date_formats(day_first):
bf50b038
JMF
3079 try:
3080 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 3081 except ValueError:
bf50b038 3082 pass
42393ce2
PH
3083 if upload_date is None:
3084 timetuple = email.utils.parsedate_tz(date_str)
3085 if timetuple:
c6b9cf05
S
3086 try:
3087 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3088 except ValueError:
3089 pass
6a750402
JMF
3090 if upload_date is not None:
3091 return compat_str(upload_date)
bf50b038 3092
5f6a1245 3093
46f59e89
S
3094def unified_timestamp(date_str, day_first=True):
3095 if date_str is None:
3096 return None
3097
2ae2ffda 3098 date_str = re.sub(r'[,|]', '', date_str)
46f59e89 3099
7dc2a74e 3100 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89
S
3101 timezone, date_str = extract_timezone(date_str)
3102
3103 # Remove AM/PM + timezone
3104 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3105
deef3195
S
3106 # Remove unrecognized timezones from ISO 8601 alike timestamps
3107 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3108 if m:
3109 date_str = date_str[:-len(m.group('tz'))]
3110
f226880c
PH
3111 # Python only supports microseconds, so remove nanoseconds
3112 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3113 if m:
3114 date_str = m.group(1)
3115
46f59e89
S
3116 for expression in date_formats(day_first):
3117 try:
7dc2a74e 3118 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
46f59e89
S
3119 return calendar.timegm(dt.timetuple())
3120 except ValueError:
3121 pass
3122 timetuple = email.utils.parsedate_tz(date_str)
3123 if timetuple:
7dc2a74e 3124 return calendar.timegm(timetuple) + pm_delta * 3600
46f59e89
S
3125
3126
28e614de 3127def determine_ext(url, default_ext='unknown_video'):
85750f89 3128 if url is None or '.' not in url:
f4776371 3129 return default_ext
9cb9a5df 3130 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
3131 if re.match(r'^[A-Za-z0-9]+$', guess):
3132 return guess
a7aaa398
S
3133 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3134 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 3135 return guess.rstrip('/')
73e79f2a 3136 else:
cbdbb766 3137 return default_ext
73e79f2a 3138
5f6a1245 3139
824fa511
S
3140def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3141 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
d4051a8e 3142
5f6a1245 3143
9e62f283 3144def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
37254abc
JMF
3145 """
3146 Return a datetime object from a string in the format YYYYMMDD or
9e62f283 3147 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3148
3149 format: string date format used to return datetime object from
3150 precision: round the time portion of a datetime object.
3151 auto|microsecond|second|minute|hour|day.
3152 auto: round to the unit provided in date_str (if applicable).
3153 """
3154 auto_precision = False
3155 if precision == 'auto':
3156 auto_precision = True
3157 precision = 'microsecond'
3158 today = datetime_round(datetime.datetime.now(), precision)
f8795e10 3159 if date_str in ('now', 'today'):
37254abc 3160 return today
f8795e10
PH
3161 if date_str == 'yesterday':
3162 return today - datetime.timedelta(days=1)
9e62f283 3163 match = re.match(
3164 r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3165 date_str)
37254abc 3166 if match is not None:
9e62f283 3167 start_time = datetime_from_str(match.group('start'), precision, format)
3168 time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
37254abc 3169 unit = match.group('unit')
9e62f283 3170 if unit == 'month' or unit == 'year':
3171 new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
37254abc 3172 unit = 'day'
9e62f283 3173 else:
3174 if unit == 'week':
3175 unit = 'day'
3176 time *= 7
3177 delta = datetime.timedelta(**{unit + 's': time})
3178 new_date = start_time + delta
3179 if auto_precision:
3180 return datetime_round(new_date, unit)
3181 return new_date
3182
3183 return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3184
3185
3186def date_from_str(date_str, format='%Y%m%d'):
3187 """
3188 Return a datetime object from a string in the format YYYYMMDD or
3189 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3190
3191 format: string date format used to return datetime object from
3192 """
3193 return datetime_from_str(date_str, precision='microsecond', format=format).date()
3194
3195
3196def datetime_add_months(dt, months):
3197 """Increment/Decrement a datetime object by months."""
3198 month = dt.month + months - 1
3199 year = dt.year + month // 12
3200 month = month % 12 + 1
3201 day = min(dt.day, calendar.monthrange(year, month)[1])
3202 return dt.replace(year, month, day)
3203
3204
3205def datetime_round(dt, precision='day'):
3206 """
3207 Round a datetime object's time to a specific precision
3208 """
3209 if precision == 'microsecond':
3210 return dt
3211
3212 unit_seconds = {
3213 'day': 86400,
3214 'hour': 3600,
3215 'minute': 60,
3216 'second': 1,
3217 }
3218 roundto = lambda x, n: ((x + n / 2) // n) * n
3219 timestamp = calendar.timegm(dt.timetuple())
3220 return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
5f6a1245
JW
3221
3222
e63fc1be 3223def hyphenate_date(date_str):
3224 """
3225 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3226 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3227 if match is not None:
3228 return '-'.join(match.groups())
3229 else:
3230 return date_str
3231
5f6a1245 3232
bd558525
JMF
3233class DateRange(object):
3234 """Represents a time interval between two dates"""
5f6a1245 3235
bd558525
JMF
3236 def __init__(self, start=None, end=None):
3237 """start and end must be strings in the format accepted by date"""
3238 if start is not None:
3239 self.start = date_from_str(start)
3240 else:
3241 self.start = datetime.datetime.min.date()
3242 if end is not None:
3243 self.end = date_from_str(end)
3244 else:
3245 self.end = datetime.datetime.max.date()
37254abc 3246 if self.start > self.end:
bd558525 3247 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 3248
bd558525
JMF
3249 @classmethod
3250 def day(cls, day):
3251 """Returns a range that only contains the given day"""
5f6a1245
JW
3252 return cls(day, day)
3253
bd558525
JMF
3254 def __contains__(self, date):
3255 """Check if the date is in the range"""
37254abc
JMF
3256 if not isinstance(date, datetime.date):
3257 date = date_from_str(date)
3258 return self.start <= date <= self.end
5f6a1245 3259
bd558525 3260 def __str__(self):
5f6a1245 3261 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
3262
3263
3264def platform_name():
3265 """ Returns the platform name as a compat_str """
3266 res = platform.platform()
3267 if isinstance(res, bytes):
3268 res = res.decode(preferredencoding())
3269
3270 assert isinstance(res, compat_str)
3271 return res
c257baff
PH
3272
3273
b58ddb32
PH
3274def _windows_write_string(s, out):
3275 """ Returns True if the string was written using special methods,
3276 False if it has yet to be written out."""
3277 # Adapted from http://stackoverflow.com/a/3259271/35070
3278
3279 import ctypes
3280 import ctypes.wintypes
3281
3282 WIN_OUTPUT_IDS = {
3283 1: -11,
3284 2: -12,
3285 }
3286
a383a98a
PH
3287 try:
3288 fileno = out.fileno()
3289 except AttributeError:
3290 # If the output stream doesn't have a fileno, it's virtual
3291 return False
aa42e873
PH
3292 except io.UnsupportedOperation:
3293 # Some strange Windows pseudo files?
3294 return False
b58ddb32
PH
3295 if fileno not in WIN_OUTPUT_IDS:
3296 return False
3297
d7cd9a9e 3298 GetStdHandle = compat_ctypes_WINFUNCTYPE(
b58ddb32 3299 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
d7cd9a9e 3300 ('GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
3301 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3302
d7cd9a9e 3303 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3304 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3305 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
d7cd9a9e 3306 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
3307 written = ctypes.wintypes.DWORD(0)
3308
d7cd9a9e 3309 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
3310 FILE_TYPE_CHAR = 0x0002
3311 FILE_TYPE_REMOTE = 0x8000
d7cd9a9e 3312 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3313 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3314 ctypes.POINTER(ctypes.wintypes.DWORD))(
d7cd9a9e 3315 ('GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
3316 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3317
3318 def not_a_console(handle):
3319 if handle == INVALID_HANDLE_VALUE or handle is None:
3320 return True
3089bc74
S
3321 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3322 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
3323
3324 if not_a_console(h):
3325 return False
3326
d1b9c912
PH
3327 def next_nonbmp_pos(s):
3328 try:
3329 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3330 except StopIteration:
3331 return len(s)
3332
3333 while s:
3334 count = min(next_nonbmp_pos(s), 1024)
3335
b58ddb32 3336 ret = WriteConsoleW(
d1b9c912 3337 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
3338 if ret == 0:
3339 raise OSError('Failed to write string')
d1b9c912
PH
3340 if not count: # We just wrote a non-BMP character
3341 assert written.value == 2
3342 s = s[1:]
3343 else:
3344 assert written.value > 0
3345 s = s[written.value:]
b58ddb32
PH
3346 return True
3347
3348
734f90bb 3349def write_string(s, out=None, encoding=None):
7459e3a2
PH
3350 if out is None:
3351 out = sys.stderr
8bf48f23 3352 assert type(s) == compat_str
7459e3a2 3353
b58ddb32
PH
3354 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3355 if _windows_write_string(s, out):
3356 return
3357
3089bc74
S
3358 if ('b' in getattr(out, 'mode', '')
3359 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
3360 byt = s.encode(encoding or preferredencoding(), 'ignore')
3361 out.write(byt)
3362 elif hasattr(out, 'buffer'):
3363 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3364 byt = s.encode(enc, 'ignore')
3365 out.buffer.write(byt)
3366 else:
8bf48f23 3367 out.write(s)
7459e3a2
PH
3368 out.flush()
3369
3370
48ea9cea
PH
3371def bytes_to_intlist(bs):
3372 if not bs:
3373 return []
3374 if isinstance(bs[0], int): # Python 3
3375 return list(bs)
3376 else:
3377 return [ord(c) for c in bs]
3378
c257baff 3379
cba892fa 3380def intlist_to_bytes(xs):
3381 if not xs:
3382 return b''
edaa23f8 3383 return compat_struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
3384
3385
c1c9a79c
PH
3386# Cross-platform file locking
3387if sys.platform == 'win32':
3388 import ctypes.wintypes
3389 import msvcrt
3390
3391 class OVERLAPPED(ctypes.Structure):
3392 _fields_ = [
3393 ('Internal', ctypes.wintypes.LPVOID),
3394 ('InternalHigh', ctypes.wintypes.LPVOID),
3395 ('Offset', ctypes.wintypes.DWORD),
3396 ('OffsetHigh', ctypes.wintypes.DWORD),
3397 ('hEvent', ctypes.wintypes.HANDLE),
3398 ]
3399
3400 kernel32 = ctypes.windll.kernel32
3401 LockFileEx = kernel32.LockFileEx
3402 LockFileEx.argtypes = [
3403 ctypes.wintypes.HANDLE, # hFile
3404 ctypes.wintypes.DWORD, # dwFlags
3405 ctypes.wintypes.DWORD, # dwReserved
3406 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3407 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3408 ctypes.POINTER(OVERLAPPED) # Overlapped
3409 ]
3410 LockFileEx.restype = ctypes.wintypes.BOOL
3411 UnlockFileEx = kernel32.UnlockFileEx
3412 UnlockFileEx.argtypes = [
3413 ctypes.wintypes.HANDLE, # hFile
3414 ctypes.wintypes.DWORD, # dwReserved
3415 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3416 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3417 ctypes.POINTER(OVERLAPPED) # Overlapped
3418 ]
3419 UnlockFileEx.restype = ctypes.wintypes.BOOL
3420 whole_low = 0xffffffff
3421 whole_high = 0x7fffffff
3422
3423 def _lock_file(f, exclusive):
3424 overlapped = OVERLAPPED()
3425 overlapped.Offset = 0
3426 overlapped.OffsetHigh = 0
3427 overlapped.hEvent = 0
3428 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3429 handle = msvcrt.get_osfhandle(f.fileno())
3430 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3431 whole_low, whole_high, f._lock_file_overlapped_p):
3432 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3433
3434 def _unlock_file(f):
3435 assert f._lock_file_overlapped_p
3436 handle = msvcrt.get_osfhandle(f.fileno())
3437 if not UnlockFileEx(handle, 0,
3438 whole_low, whole_high, f._lock_file_overlapped_p):
3439 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3440
3441else:
399a76e6
YCH
3442 # Some platforms, such as Jython, is missing fcntl
3443 try:
3444 import fcntl
c1c9a79c 3445
399a76e6
YCH
3446 def _lock_file(f, exclusive):
3447 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 3448
399a76e6
YCH
3449 def _unlock_file(f):
3450 fcntl.flock(f, fcntl.LOCK_UN)
3451 except ImportError:
3452 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3453
3454 def _lock_file(f, exclusive):
3455 raise IOError(UNSUPPORTED_MSG)
3456
3457 def _unlock_file(f):
3458 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
3459
3460
3461class locked_file(object):
3462 def __init__(self, filename, mode, encoding=None):
3463 assert mode in ['r', 'a', 'w']
3464 self.f = io.open(filename, mode, encoding=encoding)
3465 self.mode = mode
3466
3467 def __enter__(self):
3468 exclusive = self.mode != 'r'
3469 try:
3470 _lock_file(self.f, exclusive)
3471 except IOError:
3472 self.f.close()
3473 raise
3474 return self
3475
3476 def __exit__(self, etype, value, traceback):
3477 try:
3478 _unlock_file(self.f)
3479 finally:
3480 self.f.close()
3481
3482 def __iter__(self):
3483 return iter(self.f)
3484
3485 def write(self, *args):
3486 return self.f.write(*args)
3487
3488 def read(self, *args):
3489 return self.f.read(*args)
4eb7f1d1
JMF
3490
3491
4644ac55
S
3492def get_filesystem_encoding():
3493 encoding = sys.getfilesystemencoding()
3494 return encoding if encoding is not None else 'utf-8'
3495
3496
4eb7f1d1 3497def shell_quote(args):
a6a173c2 3498 quoted_args = []
4644ac55 3499 encoding = get_filesystem_encoding()
a6a173c2
JMF
3500 for a in args:
3501 if isinstance(a, bytes):
3502 # We may get a filename encoded with 'encodeFilename'
3503 a = a.decode(encoding)
aefce8e6 3504 quoted_args.append(compat_shlex_quote(a))
28e614de 3505 return ' '.join(quoted_args)
9d4660ca
PH
3506
3507
3508def smuggle_url(url, data):
3509 """ Pass additional data in a URL for internal use. """
3510
81953d1a
RA
3511 url, idata = unsmuggle_url(url, {})
3512 data.update(idata)
15707c7e 3513 sdata = compat_urllib_parse_urlencode(
28e614de
PH
3514 {'__youtubedl_smuggle': json.dumps(data)})
3515 return url + '#' + sdata
9d4660ca
PH
3516
3517
79f82953 3518def unsmuggle_url(smug_url, default=None):
83e865a3 3519 if '#__youtubedl_smuggle' not in smug_url:
79f82953 3520 return smug_url, default
28e614de
PH
3521 url, _, sdata = smug_url.rpartition('#')
3522 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
3523 data = json.loads(jsond)
3524 return url, data
02dbf93f
PH
3525
3526
02dbf93f
PH
3527def format_bytes(bytes):
3528 if bytes is None:
28e614de 3529 return 'N/A'
02dbf93f
PH
3530 if type(bytes) is str:
3531 bytes = float(bytes)
3532 if bytes == 0.0:
3533 exponent = 0
3534 else:
3535 exponent = int(math.log(bytes, 1024.0))
28e614de 3536 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 3537 converted = float(bytes) / float(1024 ** exponent)
28e614de 3538 return '%.2f%s' % (converted, suffix)
f53c966a 3539
1c088fa8 3540
fb47597b
S
3541def lookup_unit_table(unit_table, s):
3542 units_re = '|'.join(re.escape(u) for u in unit_table)
3543 m = re.match(
782b1b5b 3544 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
fb47597b
S
3545 if not m:
3546 return None
3547 num_str = m.group('num').replace(',', '.')
3548 mult = unit_table[m.group('unit')]
3549 return int(float(num_str) * mult)
3550
3551
be64b5b0
PH
3552def parse_filesize(s):
3553 if s is None:
3554 return None
3555
dfb1b146 3556 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
3557 # but we support those too
3558 _UNIT_TABLE = {
3559 'B': 1,
3560 'b': 1,
70852b47 3561 'bytes': 1,
be64b5b0
PH
3562 'KiB': 1024,
3563 'KB': 1000,
3564 'kB': 1024,
3565 'Kb': 1000,
13585d76 3566 'kb': 1000,
70852b47
YCH
3567 'kilobytes': 1000,
3568 'kibibytes': 1024,
be64b5b0
PH
3569 'MiB': 1024 ** 2,
3570 'MB': 1000 ** 2,
3571 'mB': 1024 ** 2,
3572 'Mb': 1000 ** 2,
13585d76 3573 'mb': 1000 ** 2,
70852b47
YCH
3574 'megabytes': 1000 ** 2,
3575 'mebibytes': 1024 ** 2,
be64b5b0
PH
3576 'GiB': 1024 ** 3,
3577 'GB': 1000 ** 3,
3578 'gB': 1024 ** 3,
3579 'Gb': 1000 ** 3,
13585d76 3580 'gb': 1000 ** 3,
70852b47
YCH
3581 'gigabytes': 1000 ** 3,
3582 'gibibytes': 1024 ** 3,
be64b5b0
PH
3583 'TiB': 1024 ** 4,
3584 'TB': 1000 ** 4,
3585 'tB': 1024 ** 4,
3586 'Tb': 1000 ** 4,
13585d76 3587 'tb': 1000 ** 4,
70852b47
YCH
3588 'terabytes': 1000 ** 4,
3589 'tebibytes': 1024 ** 4,
be64b5b0
PH
3590 'PiB': 1024 ** 5,
3591 'PB': 1000 ** 5,
3592 'pB': 1024 ** 5,
3593 'Pb': 1000 ** 5,
13585d76 3594 'pb': 1000 ** 5,
70852b47
YCH
3595 'petabytes': 1000 ** 5,
3596 'pebibytes': 1024 ** 5,
be64b5b0
PH
3597 'EiB': 1024 ** 6,
3598 'EB': 1000 ** 6,
3599 'eB': 1024 ** 6,
3600 'Eb': 1000 ** 6,
13585d76 3601 'eb': 1000 ** 6,
70852b47
YCH
3602 'exabytes': 1000 ** 6,
3603 'exbibytes': 1024 ** 6,
be64b5b0
PH
3604 'ZiB': 1024 ** 7,
3605 'ZB': 1000 ** 7,
3606 'zB': 1024 ** 7,
3607 'Zb': 1000 ** 7,
13585d76 3608 'zb': 1000 ** 7,
70852b47
YCH
3609 'zettabytes': 1000 ** 7,
3610 'zebibytes': 1024 ** 7,
be64b5b0
PH
3611 'YiB': 1024 ** 8,
3612 'YB': 1000 ** 8,
3613 'yB': 1024 ** 8,
3614 'Yb': 1000 ** 8,
13585d76 3615 'yb': 1000 ** 8,
70852b47
YCH
3616 'yottabytes': 1000 ** 8,
3617 'yobibytes': 1024 ** 8,
be64b5b0
PH
3618 }
3619
fb47597b
S
3620 return lookup_unit_table(_UNIT_TABLE, s)
3621
3622
3623def parse_count(s):
3624 if s is None:
be64b5b0
PH
3625 return None
3626
fb47597b
S
3627 s = s.strip()
3628
3629 if re.match(r'^[\d,.]+$', s):
3630 return str_to_int(s)
3631
3632 _UNIT_TABLE = {
3633 'k': 1000,
3634 'K': 1000,
3635 'm': 1000 ** 2,
3636 'M': 1000 ** 2,
3637 'kk': 1000 ** 2,
3638 'KK': 1000 ** 2,
3639 }
be64b5b0 3640
fb47597b 3641 return lookup_unit_table(_UNIT_TABLE, s)
be64b5b0 3642
2f7ae819 3643
b871d7e9
S
3644def parse_resolution(s):
3645 if s is None:
3646 return {}
3647
3648 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3649 if mobj:
3650 return {
3651 'width': int(mobj.group('w')),
3652 'height': int(mobj.group('h')),
3653 }
3654
3655 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3656 if mobj:
3657 return {'height': int(mobj.group(1))}
3658
3659 mobj = re.search(r'\b([48])[kK]\b', s)
3660 if mobj:
3661 return {'height': int(mobj.group(1)) * 540}
3662
3663 return {}
3664
3665
0dc41787
S
3666def parse_bitrate(s):
3667 if not isinstance(s, compat_str):
3668 return
3669 mobj = re.search(r'\b(\d+)\s*kbps', s)
3670 if mobj:
3671 return int(mobj.group(1))
3672
3673
a942d6cb 3674def month_by_name(name, lang='en'):
caefb1de
PH
3675 """ Return the number of a month by (locale-independently) English name """
3676
f6717dec 3677 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb 3678
caefb1de 3679 try:
f6717dec 3680 return month_names.index(name) + 1
7105440c
YCH
3681 except ValueError:
3682 return None
3683
3684
3685def month_by_abbreviation(abbrev):
3686 """ Return the number of a month by (locale-independently) English
3687 abbreviations """
3688
3689 try:
3690 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
3691 except ValueError:
3692 return None
18258362
JMF
3693
3694
5aafe895 3695def fix_xml_ampersands(xml_str):
18258362 3696 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
3697 return re.sub(
3698 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 3699 '&amp;',
5aafe895 3700 xml_str)
e3946f98
PH
3701
3702
3703def setproctitle(title):
8bf48f23 3704 assert isinstance(title, compat_str)
c1c05c67
YCH
3705
3706 # ctypes in Jython is not complete
3707 # http://bugs.jython.org/issue2148
3708 if sys.platform.startswith('java'):
3709 return
3710
e3946f98 3711 try:
611c1dd9 3712 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
3713 except OSError:
3714 return
2f49bcd6
RC
3715 except TypeError:
3716 # LoadLibrary in Windows Python 2.7.13 only expects
3717 # a bytestring, but since unicode_literals turns
3718 # every string into a unicode string, it fails.
3719 return
6eefe533
PH
3720 title_bytes = title.encode('utf-8')
3721 buf = ctypes.create_string_buffer(len(title_bytes))
3722 buf.value = title_bytes
e3946f98 3723 try:
6eefe533 3724 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
3725 except AttributeError:
3726 return # Strange libc, just skip this
d7dda168
PH
3727
3728
3729def remove_start(s, start):
46bc9b7d 3730 return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174
PH
3731
3732
2b9faf55 3733def remove_end(s, end):
46bc9b7d 3734 return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55
PH
3735
3736
31b2051e
S
3737def remove_quotes(s):
3738 if s is None or len(s) < 2:
3739 return s
3740 for quote in ('"', "'", ):
3741 if s[0] == quote and s[-1] == quote:
3742 return s[1:-1]
3743 return s
3744
3745
b6e0c7d2
U
3746def get_domain(url):
3747 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3748 return domain.group('domain') if domain else None
3749
3750
29eb5174 3751def url_basename(url):
9b8aaeed 3752 path = compat_urlparse.urlparse(url).path
28e614de 3753 return path.strip('/').split('/')[-1]
aa94a6d3
PH
3754
3755
02dc0a36
S
3756def base_url(url):
3757 return re.match(r'https?://[^?#&]+/', url).group()
3758
3759
e34c3361 3760def urljoin(base, path):
4b5de77b
S
3761 if isinstance(path, bytes):
3762 path = path.decode('utf-8')
e34c3361
S
3763 if not isinstance(path, compat_str) or not path:
3764 return None
fad4ceb5 3765 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
e34c3361 3766 return path
4b5de77b
S
3767 if isinstance(base, bytes):
3768 base = base.decode('utf-8')
3769 if not isinstance(base, compat_str) or not re.match(
3770 r'^(?:https?:)?//', base):
e34c3361
S
3771 return None
3772 return compat_urlparse.urljoin(base, path)
3773
3774
aa94a6d3
PH
3775class HEADRequest(compat_urllib_request.Request):
3776 def get_method(self):
611c1dd9 3777 return 'HEAD'
7217e148
PH
3778
3779
95cf60e8
S
3780class PUTRequest(compat_urllib_request.Request):
3781 def get_method(self):
3782 return 'PUT'
3783
3784
9732d77e 3785def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
3786 if get_attr:
3787 if v is not None:
3788 v = getattr(v, get_attr, None)
9572013d
PH
3789 if v == '':
3790 v = None
1812afb7
S
3791 if v is None:
3792 return default
3793 try:
3794 return int(v) * invscale // scale
5e1271c5 3795 except (ValueError, TypeError):
af98f8ff 3796 return default
9732d77e 3797
9572013d 3798
40a90862
JMF
3799def str_or_none(v, default=None):
3800 return default if v is None else compat_str(v)
3801
9732d77e
PH
3802
3803def str_to_int(int_str):
48d4681e 3804 """ A more relaxed version of int_or_none """
42db58ec 3805 if isinstance(int_str, compat_integer_types):
348c6bf1 3806 return int_str
42db58ec
S
3807 elif isinstance(int_str, compat_str):
3808 int_str = re.sub(r'[,\.\+]', '', int_str)
3809 return int_or_none(int_str)
608d11f5
PH
3810
3811
9732d77e 3812def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
3813 if v is None:
3814 return default
3815 try:
3816 return float(v) * invscale / scale
5e1271c5 3817 except (ValueError, TypeError):
caf80631 3818 return default
43f775e4
PH
3819
3820
c7e327c4
S
3821def bool_or_none(v, default=None):
3822 return v if isinstance(v, bool) else default
3823
3824
53cd37ba
S
3825def strip_or_none(v, default=None):
3826 return v.strip() if isinstance(v, compat_str) else default
b72b4431
S
3827
3828
af03000a
S
3829def url_or_none(url):
3830 if not url or not isinstance(url, compat_str):
3831 return None
3832 url = url.strip()
29f7c58a 3833 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
af03000a
S
3834
3835
e29663c6 3836def strftime_or_none(timestamp, date_format, default=None):
3837 datetime_object = None
3838 try:
3839 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3840 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3841 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3842 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3843 return datetime_object.strftime(date_format)
3844 except (ValueError, TypeError, AttributeError):
3845 return default
3846
3847
608d11f5 3848def parse_duration(s):
8f9312c3 3849 if not isinstance(s, compat_basestring):
608d11f5
PH
3850 return None
3851
ca7b3246
S
3852 s = s.strip()
3853
acaff495 3854 days, hours, mins, secs, ms = [None] * 5
15846398 3855 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
acaff495 3856 if m:
3857 days, hours, mins, secs, ms = m.groups()
3858 else:
3859 m = re.match(
056653bb
S
3860 r'''(?ix)(?:P?
3861 (?:
3862 [0-9]+\s*y(?:ears?)?\s*
3863 )?
3864 (?:
3865 [0-9]+\s*m(?:onths?)?\s*
3866 )?
3867 (?:
3868 [0-9]+\s*w(?:eeks?)?\s*
3869 )?
8f4b58d7 3870 (?:
acaff495 3871 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
8f4b58d7 3872 )?
056653bb 3873 T)?
acaff495 3874 (?:
3875 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3876 )?
3877 (?:
3878 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3879 )?
3880 (?:
3881 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
15846398 3882 )?Z?$''', s)
acaff495 3883 if m:
3884 days, hours, mins, secs, ms = m.groups()
3885 else:
15846398 3886 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
acaff495 3887 if m:
3888 hours, mins = m.groups()
3889 else:
3890 return None
3891
3892 duration = 0
3893 if secs:
3894 duration += float(secs)
3895 if mins:
3896 duration += float(mins) * 60
3897 if hours:
3898 duration += float(hours) * 60 * 60
3899 if days:
3900 duration += float(days) * 24 * 60 * 60
3901 if ms:
3902 duration += float(ms)
3903 return duration
91d7d0b3
JMF
3904
3905
e65e4c88 3906def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 3907 name, real_ext = os.path.splitext(filename)
e65e4c88
S
3908 return (
3909 '{0}.{1}{2}'.format(name, ext, real_ext)
3910 if not expected_real_ext or real_ext[1:] == expected_real_ext
3911 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
3912
3913
b3ed15b7
S
3914def replace_extension(filename, ext, expected_real_ext=None):
3915 name, real_ext = os.path.splitext(filename)
3916 return '{0}.{1}'.format(
3917 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3918 ext)
3919
3920
d70ad093
PH
3921def check_executable(exe, args=[]):
3922 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3923 args can be a list of arguments for a short output (like -version) """
3924 try:
f5b1bca9 3925 process_communicate_or_kill(subprocess.Popen(
3926 [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
d70ad093
PH
3927 except OSError:
3928 return False
3929 return exe
b7ab0590
PH
3930
3931
95807118 3932def get_exe_version(exe, args=['--version'],
cae97f65 3933 version_re=None, unrecognized='present'):
95807118
PH
3934 """ Returns the version of the specified executable,
3935 or False if the executable is not present """
3936 try:
b64d04c1 3937 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
7a5c1cfe 3938 # SIGTTOU if yt-dlp is run in the background.
067aa17e 3939 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
f5b1bca9 3940 out, _ = process_communicate_or_kill(subprocess.Popen(
54116803 3941 [encodeArgument(exe)] + args,
00ca7552 3942 stdin=subprocess.PIPE,
f5b1bca9 3943 stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
95807118
PH
3944 except OSError:
3945 return False
cae97f65
PH
3946 if isinstance(out, bytes): # Python 2.x
3947 out = out.decode('ascii', 'ignore')
3948 return detect_exe_version(out, version_re, unrecognized)
3949
3950
3951def detect_exe_version(output, version_re=None, unrecognized='present'):
3952 assert isinstance(output, compat_str)
3953 if version_re is None:
3954 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3955 m = re.search(version_re, output)
95807118
PH
3956 if m:
3957 return m.group(1)
3958 else:
3959 return unrecognized
3960
3961
483336e7 3962class LazyList(collections.Sequence):
3963 ''' Lazy immutable list from an iterable
3964 Note that slices of a LazyList are lists and not LazyList'''
3965
3966 def __init__(self, iterable):
3967 self.__iterable = iter(iterable)
3968 self.__cache = []
28419ca2 3969 self.__reversed = False
483336e7 3970
3971 def __iter__(self):
28419ca2 3972 if self.__reversed:
3973 # We need to consume the entire iterable to iterate in reverse
3974 yield from self.exhaust()[::-1]
3975 return
3976 yield from self.__cache
483336e7 3977 for item in self.__iterable:
3978 self.__cache.append(item)
3979 yield item
3980
3981 def exhaust(self):
3982 ''' Evaluate the entire iterable '''
3983 self.__cache.extend(self.__iterable)
28419ca2 3984 return self.__cache
3985
3986 @staticmethod
3987 def _reverse_index(x):
3988 return -(x + 1)
483336e7 3989
3990 def __getitem__(self, idx):
3991 if isinstance(idx, slice):
3992 step = idx.step or 1
28419ca2 3993 start = idx.start if idx.start is not None else 0 if step > 0 else -1
483336e7 3994 stop = idx.stop if idx.stop is not None else -1 if step > 0 else 0
28419ca2 3995 if self.__reversed:
3996 start, stop, step = map(self._reverse_index, (start, stop, step))
3997 idx = slice(start, stop, step)
483336e7 3998 elif isinstance(idx, int):
28419ca2 3999 if self.__reversed:
4000 idx = self._reverse_index(idx)
483336e7 4001 start = stop = idx
4002 else:
4003 raise TypeError('indices must be integers or slices')
4004 if start < 0 or stop < 0:
4005 # We need to consume the entire iterable to be able to slice from the end
4006 # Obviously, never use this with infinite iterables
28419ca2 4007 return self.exhaust()[idx]
4008
4009 n = max(start, stop) - len(self.__cache) + 1
4010 if n > 0:
4011 self.__cache.extend(itertools.islice(self.__iterable, n))
483336e7 4012 return self.__cache[idx]
4013
4014 def __bool__(self):
4015 try:
28419ca2 4016 self[-1] if self.__reversed else self[0]
483336e7 4017 except IndexError:
4018 return False
4019 return True
4020
4021 def __len__(self):
4022 self.exhaust()
4023 return len(self.__cache)
4024
28419ca2 4025 def __reversed__(self):
4026 self.__reversed = not self.__reversed
4027 return self
4028
4029 def __repr__(self):
4030 # repr and str should mimic a list. So we exhaust the iterable
4031 return repr(self.exhaust())
4032
4033 def __str__(self):
4034 return repr(self.exhaust())
4035
483336e7 4036
b7ab0590 4037class PagedList(object):
dd26ced1
PH
4038 def __len__(self):
4039 # This is only useful for tests
4040 return len(self.getslice())
4041
55575225 4042 def getslice(self, start, end):
4043 raise NotImplementedError('This method must be implemented by subclasses')
4044
4045 def __getitem__(self, idx):
4046 if not isinstance(idx, int) or idx < 0:
4047 raise TypeError('indices must be non-negative integers')
4048 entries = self.getslice(idx, idx + 1)
4049 return entries[0] if entries else None
4050
9c44d242
PH
4051
4052class OnDemandPagedList(PagedList):
6be08ce6 4053 def __init__(self, pagefunc, pagesize, use_cache=True):
9c44d242
PH
4054 self._pagefunc = pagefunc
4055 self._pagesize = pagesize
b95dc034
YCH
4056 self._use_cache = use_cache
4057 if use_cache:
4058 self._cache = {}
9c44d242 4059
b7ab0590
PH
4060 def getslice(self, start=0, end=None):
4061 res = []
4062 for pagenum in itertools.count(start // self._pagesize):
4063 firstid = pagenum * self._pagesize
4064 nextfirstid = pagenum * self._pagesize + self._pagesize
4065 if start >= nextfirstid:
4066 continue
4067
b95dc034
YCH
4068 page_results = None
4069 if self._use_cache:
4070 page_results = self._cache.get(pagenum)
4071 if page_results is None:
4072 page_results = list(self._pagefunc(pagenum))
4073 if self._use_cache:
4074 self._cache[pagenum] = page_results
b7ab0590
PH
4075
4076 startv = (
4077 start % self._pagesize
4078 if firstid <= start < nextfirstid
4079 else 0)
4080
4081 endv = (
4082 ((end - 1) % self._pagesize) + 1
4083 if (end is not None and firstid <= end <= nextfirstid)
4084 else None)
4085
4086 if startv != 0 or endv is not None:
4087 page_results = page_results[startv:endv]
4088 res.extend(page_results)
4089
4090 # A little optimization - if current page is not "full", ie. does
4091 # not contain page_size videos then we can assume that this page
4092 # is the last one - there are no more ids on further pages -
4093 # i.e. no need to query again.
4094 if len(page_results) + startv < self._pagesize:
4095 break
4096
4097 # If we got the whole page, but the next page is not interesting,
4098 # break out early as well
4099 if end == nextfirstid:
4100 break
4101 return res
81c2f20b
PH
4102
4103
9c44d242
PH
4104class InAdvancePagedList(PagedList):
4105 def __init__(self, pagefunc, pagecount, pagesize):
4106 self._pagefunc = pagefunc
4107 self._pagecount = pagecount
4108 self._pagesize = pagesize
4109
4110 def getslice(self, start=0, end=None):
4111 res = []
4112 start_page = start // self._pagesize
4113 end_page = (
4114 self._pagecount if end is None else (end // self._pagesize + 1))
4115 skip_elems = start - start_page * self._pagesize
4116 only_more = None if end is None else end - start
4117 for pagenum in range(start_page, end_page):
4118 page = list(self._pagefunc(pagenum))
4119 if skip_elems:
4120 page = page[skip_elems:]
4121 skip_elems = None
4122 if only_more is not None:
4123 if len(page) < only_more:
4124 only_more -= len(page)
4125 else:
4126 page = page[:only_more]
4127 res.extend(page)
4128 break
4129 res.extend(page)
4130 return res
4131
4132
81c2f20b 4133def uppercase_escape(s):
676eb3f2 4134 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 4135 return re.sub(
a612753d 4136 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
4137 lambda m: unicode_escape(m.group(0))[0],
4138 s)
0fe2ff78
YCH
4139
4140
4141def lowercase_escape(s):
4142 unicode_escape = codecs.getdecoder('unicode_escape')
4143 return re.sub(
4144 r'\\u[0-9a-fA-F]{4}',
4145 lambda m: unicode_escape(m.group(0))[0],
4146 s)
b53466e1 4147
d05cfe06
S
4148
4149def escape_rfc3986(s):
4150 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 4151 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 4152 s = s.encode('utf-8')
ecc0c5ee 4153 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
4154
4155
4156def escape_url(url):
4157 """Escape URL as suggested by RFC 3986"""
4158 url_parsed = compat_urllib_parse_urlparse(url)
4159 return url_parsed._replace(
efbed08d 4160 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
d05cfe06
S
4161 path=escape_rfc3986(url_parsed.path),
4162 params=escape_rfc3986(url_parsed.params),
4163 query=escape_rfc3986(url_parsed.query),
4164 fragment=escape_rfc3986(url_parsed.fragment)
4165 ).geturl()
4166
62e609ab
PH
4167
4168def read_batch_urls(batch_fd):
4169 def fixup(url):
4170 if not isinstance(url, compat_str):
4171 url = url.decode('utf-8', 'replace')
8c04f0be 4172 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4173 for bom in BOM_UTF8:
4174 if url.startswith(bom):
4175 url = url[len(bom):]
4176 url = url.lstrip()
4177 if not url or url.startswith(('#', ';', ']')):
62e609ab 4178 return False
8c04f0be 4179 # "#" cannot be stripped out since it is part of the URI
4180 # However, it can be safely stipped out if follwing a whitespace
4181 return re.split(r'\s#', url, 1)[0].rstrip()
62e609ab
PH
4182
4183 with contextlib.closing(batch_fd) as fd:
4184 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
4185
4186
4187def urlencode_postdata(*args, **kargs):
15707c7e 4188 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
4189
4190
38f9ef31 4191def update_url_query(url, query):
cacd9966
YCH
4192 if not query:
4193 return url
38f9ef31 4194 parsed_url = compat_urlparse.urlparse(url)
4195 qs = compat_parse_qs(parsed_url.query)
4196 qs.update(query)
4197 return compat_urlparse.urlunparse(parsed_url._replace(
15707c7e 4198 query=compat_urllib_parse_urlencode(qs, True)))
16392824 4199
8e60dc75 4200
ed0291d1
S
4201def update_Request(req, url=None, data=None, headers={}, query={}):
4202 req_headers = req.headers.copy()
4203 req_headers.update(headers)
4204 req_data = data or req.data
4205 req_url = update_url_query(url or req.get_full_url(), query)
95cf60e8
S
4206 req_get_method = req.get_method()
4207 if req_get_method == 'HEAD':
4208 req_type = HEADRequest
4209 elif req_get_method == 'PUT':
4210 req_type = PUTRequest
4211 else:
4212 req_type = compat_urllib_request.Request
ed0291d1
S
4213 new_req = req_type(
4214 req_url, data=req_data, headers=req_headers,
4215 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4216 if hasattr(req, 'timeout'):
4217 new_req.timeout = req.timeout
4218 return new_req
4219
4220
10c87c15 4221def _multipart_encode_impl(data, boundary):
0c265486
YCH
4222 content_type = 'multipart/form-data; boundary=%s' % boundary
4223
4224 out = b''
4225 for k, v in data.items():
4226 out += b'--' + boundary.encode('ascii') + b'\r\n'
4227 if isinstance(k, compat_str):
4228 k = k.encode('utf-8')
4229 if isinstance(v, compat_str):
4230 v = v.encode('utf-8')
4231 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4232 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
b2ad479d 4233 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
0c265486
YCH
4234 if boundary.encode('ascii') in content:
4235 raise ValueError('Boundary overlaps with data')
4236 out += content
4237
4238 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4239
4240 return out, content_type
4241
4242
4243def multipart_encode(data, boundary=None):
4244 '''
4245 Encode a dict to RFC 7578-compliant form-data
4246
4247 data:
4248 A dict where keys and values can be either Unicode or bytes-like
4249 objects.
4250 boundary:
4251 If specified a Unicode object, it's used as the boundary. Otherwise
4252 a random boundary is generated.
4253
4254 Reference: https://tools.ietf.org/html/rfc7578
4255 '''
4256 has_specified_boundary = boundary is not None
4257
4258 while True:
4259 if boundary is None:
4260 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4261
4262 try:
10c87c15 4263 out, content_type = _multipart_encode_impl(data, boundary)
0c265486
YCH
4264 break
4265 except ValueError:
4266 if has_specified_boundary:
4267 raise
4268 boundary = None
4269
4270 return out, content_type
4271
4272
86296ad2 4273def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
4274 if isinstance(key_or_keys, (list, tuple)):
4275 for key in key_or_keys:
86296ad2
S
4276 if key not in d or d[key] is None or skip_false_values and not d[key]:
4277 continue
4278 return d[key]
cbecc9b9
S
4279 return default
4280 return d.get(key_or_keys, default)
4281
4282
329ca3be 4283def try_get(src, getter, expected_type=None):
a32a9a7e
S
4284 if not isinstance(getter, (list, tuple)):
4285 getter = [getter]
4286 for get in getter:
4287 try:
4288 v = get(src)
4289 except (AttributeError, KeyError, TypeError, IndexError):
4290 pass
4291 else:
4292 if expected_type is None or isinstance(v, expected_type):
4293 return v
329ca3be
S
4294
4295
6cc62232
S
4296def merge_dicts(*dicts):
4297 merged = {}
4298 for a_dict in dicts:
4299 for k, v in a_dict.items():
4300 if v is None:
4301 continue
3089bc74
S
4302 if (k not in merged
4303 or (isinstance(v, compat_str) and v
4304 and isinstance(merged[k], compat_str)
4305 and not merged[k])):
6cc62232
S
4306 merged[k] = v
4307 return merged
4308
4309
8e60dc75
S
4310def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4311 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4312
16392824 4313
a1a530b0
PH
4314US_RATINGS = {
4315 'G': 0,
4316 'PG': 10,
4317 'PG-13': 13,
4318 'R': 16,
4319 'NC': 18,
4320}
fac55558
PH
4321
4322
a8795327 4323TV_PARENTAL_GUIDELINES = {
5a16c9d9
RA
4324 'TV-Y': 0,
4325 'TV-Y7': 7,
4326 'TV-G': 0,
4327 'TV-PG': 0,
4328 'TV-14': 14,
4329 'TV-MA': 17,
a8795327
S
4330}
4331
4332
146c80e2 4333def parse_age_limit(s):
a8795327
S
4334 if type(s) == int:
4335 return s if 0 <= s <= 21 else None
4336 if not isinstance(s, compat_basestring):
d838b1bd 4337 return None
146c80e2 4338 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327
S
4339 if m:
4340 return int(m.group('age'))
5c5fae6d 4341 s = s.upper()
a8795327
S
4342 if s in US_RATINGS:
4343 return US_RATINGS[s]
5a16c9d9 4344 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
b8361187 4345 if m:
5a16c9d9 4346 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
b8361187 4347 return None
146c80e2
S
4348
4349
fac55558 4350def strip_jsonp(code):
609a61e3 4351 return re.sub(
5552c9eb 4352 r'''(?sx)^
e9c671d5 4353 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
5552c9eb
YCH
4354 (?:\s*&&\s*(?P=func_name))?
4355 \s*\(\s*(?P<callback_data>.*)\);?
4356 \s*?(?://[^\n]*)*$''',
4357 r'\g<callback_data>', code)
478c2c61
PH
4358
4359
5c610515 4360def js_to_json(code, vars={}):
4361 # vars is a dict of var, val pairs to substitute
4195096e
S
4362 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4363 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4364 INTEGER_TABLE = (
4365 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4366 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4367 )
4368
e05f6939 4369 def fix_kv(m):
e7b6d122
PH
4370 v = m.group(0)
4371 if v in ('true', 'false', 'null'):
4372 return v
8bdd16b4 4373 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
bd1e4844 4374 return ""
4375
4376 if v[0] in ("'", '"'):
4377 v = re.sub(r'(?s)\\.|"', lambda m: {
e7b6d122 4378 '"': '\\"',
bd1e4844 4379 "\\'": "'",
4380 '\\\n': '',
4381 '\\x': '\\u00',
4382 }.get(m.group(0), m.group(0)), v[1:-1])
8bdd16b4 4383 else:
4384 for regex, base in INTEGER_TABLE:
4385 im = re.match(regex, v)
4386 if im:
4387 i = int(im.group(1), base)
4388 return '"%d":' % i if v.endswith(':') else '%d' % i
89ac4a19 4389
5c610515 4390 if v in vars:
4391 return vars[v]
4392
e7b6d122 4393 return '"%s"' % v
e05f6939 4394
bd1e4844 4395 return re.sub(r'''(?sx)
4396 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4397 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4195096e 4398 {comment}|,(?={skip}[\]}}])|
c384d537 4399 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4195096e 4400 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
8bdd16b4 4401 [0-9]+(?={skip}:)|
4402 !+
4195096e 4403 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
e05f6939
PH
4404
4405
478c2c61
PH
4406def qualities(quality_ids):
4407 """ Get a numeric quality value out of a list of possible values """
4408 def q(qid):
4409 try:
4410 return quality_ids.index(qid)
4411 except ValueError:
4412 return -1
4413 return q
4414
acd69589 4415
de6000d9 4416DEFAULT_OUTTMPL = {
4417 'default': '%(title)s [%(id)s].%(ext)s',
72755351 4418 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
de6000d9 4419}
4420OUTTMPL_TYPES = {
72755351 4421 'chapter': None,
de6000d9 4422 'subtitle': None,
4423 'thumbnail': None,
4424 'description': 'description',
4425 'annotation': 'annotations.xml',
4426 'infojson': 'info.json',
5112f26a 4427 'pl_thumbnail': None,
de6000d9 4428 'pl_description': 'description',
4429 'pl_infojson': 'info.json',
4430}
0a871f68 4431
143db31d 4432# As of [1] format syntax is:
4433# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4434# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
752cda38 4435STR_FORMAT_RE = r'''(?x)
143db31d 4436 (?<!%)
4437 %
752cda38 4438 (?P<has_key>\((?P<key>{0})\))? # mapping key
4439 (?P<format>
4440 (?:[#0\-+ ]+)? # conversion flags (optional)
4441 (?:\d+)? # minimum field width (optional)
4442 (?:\.\d+)? # precision (optional)
4443 [hlL]? # length modifier (optional)
4444 [diouxXeEfFgGcrs] # conversion type
4445 )
143db31d 4446'''
4447
a020a0dc
PH
4448
4449def limit_length(s, length):
4450 """ Add ellipses to overly long strings """
4451 if s is None:
4452 return None
4453 ELLIPSES = '...'
4454 if len(s) > length:
4455 return s[:length - len(ELLIPSES)] + ELLIPSES
4456 return s
48844745
PH
4457
4458
4459def version_tuple(v):
5f9b8394 4460 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
4461
4462
4463def is_outdated_version(version, limit, assume_new=True):
4464 if not version:
4465 return not assume_new
4466 try:
4467 return version_tuple(version) < version_tuple(limit)
4468 except ValueError:
4469 return not assume_new
732ea2f0
PH
4470
4471
4472def ytdl_is_updateable():
7a5c1cfe 4473 """ Returns if yt-dlp can be updated with -U """
735d865e 4474 return False
4475
732ea2f0
PH
4476 from zipimport import zipimporter
4477
4478 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
4479
4480
4481def args_to_str(args):
4482 # Get a short string representation for a subprocess command
702ccf2d 4483 return ' '.join(compat_shlex_quote(a) for a in args)
2ccd1b10
PH
4484
4485
9b9c5355 4486def error_to_compat_str(err):
fdae2358
S
4487 err_str = str(err)
4488 # On python 2 error byte string must be decoded with proper
4489 # encoding rather than ascii
4490 if sys.version_info[0] < 3:
4491 err_str = err_str.decode(preferredencoding())
4492 return err_str
4493
4494
c460bdd5 4495def mimetype2ext(mt):
eb9ee194
S
4496 if mt is None:
4497 return None
4498
765ac263
JMF
4499 ext = {
4500 'audio/mp4': 'm4a',
6c33d24b
YCH
4501 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4502 # it's the most popular one
4503 'audio/mpeg': 'mp3',
ba39289d 4504 'audio/x-wav': 'wav',
765ac263
JMF
4505 }.get(mt)
4506 if ext is not None:
4507 return ext
4508
c460bdd5 4509 _, _, res = mt.rpartition('/')
6562d34a 4510 res = res.split(';')[0].strip().lower()
c460bdd5
PH
4511
4512 return {
f6861ec9 4513 '3gpp': '3gp',
cafcf657 4514 'smptett+xml': 'tt',
cafcf657 4515 'ttaf+xml': 'dfxp',
a0d8d704 4516 'ttml+xml': 'ttml',
f6861ec9 4517 'x-flv': 'flv',
a0d8d704 4518 'x-mp4-fragmented': 'mp4',
d4f05d47 4519 'x-ms-sami': 'sami',
a0d8d704 4520 'x-ms-wmv': 'wmv',
b4173f15
RA
4521 'mpegurl': 'm3u8',
4522 'x-mpegurl': 'm3u8',
4523 'vnd.apple.mpegurl': 'm3u8',
4524 'dash+xml': 'mpd',
b4173f15 4525 'f4m+xml': 'f4m',
f164b971 4526 'hds+xml': 'f4m',
e910fe2f 4527 'vnd.ms-sstr+xml': 'ism',
c2b2c7e1 4528 'quicktime': 'mov',
98ce1a3f 4529 'mp2t': 'ts',
39e7107d 4530 'x-wav': 'wav',
c460bdd5
PH
4531 }.get(res, res)
4532
4533
4f3c5e06 4534def parse_codecs(codecs_str):
4535 # http://tools.ietf.org/html/rfc6381
4536 if not codecs_str:
4537 return {}
a0566bbf 4538 split_codecs = list(filter(None, map(
4f3c5e06 4539 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4540 vcodec, acodec = None, None
a0566bbf 4541 for full_codec in split_codecs:
4f3c5e06 4542 codec = full_codec.split('.')[0]
28cc2241 4543 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4f3c5e06 4544 if not vcodec:
4545 vcodec = full_codec
60f5c9fb 4546 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4f3c5e06 4547 if not acodec:
4548 acodec = full_codec
4549 else:
60f5c9fb 4550 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4f3c5e06 4551 if not vcodec and not acodec:
a0566bbf 4552 if len(split_codecs) == 2:
4f3c5e06 4553 return {
a0566bbf 4554 'vcodec': split_codecs[0],
4555 'acodec': split_codecs[1],
4f3c5e06 4556 }
4557 else:
4558 return {
4559 'vcodec': vcodec or 'none',
4560 'acodec': acodec or 'none',
4561 }
4562 return {}
4563
4564
2ccd1b10 4565def urlhandle_detect_ext(url_handle):
79298173 4566 getheader = url_handle.headers.get
2ccd1b10 4567
b55ee18f
PH
4568 cd = getheader('Content-Disposition')
4569 if cd:
4570 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4571 if m:
4572 e = determine_ext(m.group('filename'), default_ext=None)
4573 if e:
4574 return e
4575
c460bdd5 4576 return mimetype2ext(getheader('Content-Type'))
05900629
PH
4577
4578
1e399778
YCH
4579def encode_data_uri(data, mime_type):
4580 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4581
4582
05900629 4583def age_restricted(content_limit, age_limit):
6ec6cb4e 4584 """ Returns True iff the content should be blocked """
05900629
PH
4585
4586 if age_limit is None: # No limit set
4587 return False
4588 if content_limit is None:
4589 return False # Content available for everyone
4590 return age_limit < content_limit
61ca9a80
PH
4591
4592
4593def is_html(first_bytes):
4594 """ Detect whether a file contains HTML by examining its first bytes. """
4595
4596 BOMS = [
4597 (b'\xef\xbb\xbf', 'utf-8'),
4598 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4599 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4600 (b'\xff\xfe', 'utf-16-le'),
4601 (b'\xfe\xff', 'utf-16-be'),
4602 ]
4603 for bom, enc in BOMS:
4604 if first_bytes.startswith(bom):
4605 s = first_bytes[len(bom):].decode(enc, 'replace')
4606 break
4607 else:
4608 s = first_bytes.decode('utf-8', 'replace')
4609
4610 return re.match(r'^\s*<', s)
a055469f
PH
4611
4612
4613def determine_protocol(info_dict):
4614 protocol = info_dict.get('protocol')
4615 if protocol is not None:
4616 return protocol
4617
4618 url = info_dict['url']
4619 if url.startswith('rtmp'):
4620 return 'rtmp'
4621 elif url.startswith('mms'):
4622 return 'mms'
4623 elif url.startswith('rtsp'):
4624 return 'rtsp'
4625
4626 ext = determine_ext(url)
4627 if ext == 'm3u8':
4628 return 'm3u8'
4629 elif ext == 'f4m':
4630 return 'f4m'
4631
4632 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
4633
4634
76d321f6 4635def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
cfb56d1a 4636 """ Render a list of rows, each as a list of values """
76d321f6 4637
4638 def get_max_lens(table):
4639 return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4640
4641 def filter_using_list(row, filterArray):
4642 return [col for (take, col) in zip(filterArray, row) if take]
4643
4644 if hideEmpty:
4645 max_lens = get_max_lens(data)
4646 header_row = filter_using_list(header_row, max_lens)
4647 data = [filter_using_list(row, max_lens) for row in data]
4648
cfb56d1a 4649 table = [header_row] + data
76d321f6 4650 max_lens = get_max_lens(table)
4651 if delim:
4652 table = [header_row] + [['-' * ml for ml in max_lens]] + data
4653 format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
cfb56d1a 4654 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
4655
4656
4657def _match_one(filter_part, dct):
4658 COMPARISON_OPERATORS = {
4659 '<': operator.lt,
4660 '<=': operator.le,
4661 '>': operator.gt,
4662 '>=': operator.ge,
4663 '=': operator.eq,
4664 '!=': operator.ne,
4665 }
4666 operator_rex = re.compile(r'''(?x)\s*
4667 (?P<key>[a-z_]+)
4668 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4669 (?:
4670 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
db13c16e 4671 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
347de493
PH
4672 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4673 )
4674 \s*$
4675 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4676 m = operator_rex.search(filter_part)
4677 if m:
4678 op = COMPARISON_OPERATORS[m.group('op')]
e5a088dc 4679 actual_value = dct.get(m.group('key'))
3089bc74
S
4680 if (m.group('quotedstrval') is not None
4681 or m.group('strval') is not None
e5a088dc
S
4682 # If the original field is a string and matching comparisonvalue is
4683 # a number we should respect the origin of the original field
4684 # and process comparison value as a string (see
067aa17e 4685 # https://github.com/ytdl-org/youtube-dl/issues/11082).
3089bc74
S
4686 or actual_value is not None and m.group('intval') is not None
4687 and isinstance(actual_value, compat_str)):
347de493
PH
4688 if m.group('op') not in ('=', '!='):
4689 raise ValueError(
4690 'Operator %s does not support string values!' % m.group('op'))
db13c16e
S
4691 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4692 quote = m.group('quote')
4693 if quote is not None:
4694 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
347de493
PH
4695 else:
4696 try:
4697 comparison_value = int(m.group('intval'))
4698 except ValueError:
4699 comparison_value = parse_filesize(m.group('intval'))
4700 if comparison_value is None:
4701 comparison_value = parse_filesize(m.group('intval') + 'B')
4702 if comparison_value is None:
4703 raise ValueError(
4704 'Invalid integer value %r in filter part %r' % (
4705 m.group('intval'), filter_part))
347de493
PH
4706 if actual_value is None:
4707 return m.group('none_inclusive')
4708 return op(actual_value, comparison_value)
4709
4710 UNARY_OPERATORS = {
1cc47c66
S
4711 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4712 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
347de493
PH
4713 }
4714 operator_rex = re.compile(r'''(?x)\s*
4715 (?P<op>%s)\s*(?P<key>[a-z_]+)
4716 \s*$
4717 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4718 m = operator_rex.search(filter_part)
4719 if m:
4720 op = UNARY_OPERATORS[m.group('op')]
4721 actual_value = dct.get(m.group('key'))
4722 return op(actual_value)
4723
4724 raise ValueError('Invalid filter part %r' % filter_part)
4725
4726
4727def match_str(filter_str, dct):
4728 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4729
4730 return all(
4731 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4732
4733
4734def match_filter_func(filter_str):
4735 def _match_func(info_dict):
4736 if match_str(filter_str, info_dict):
4737 return None
4738 else:
4739 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4740 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4741 return _match_func
91410c9b
PH
4742
4743
bf6427d2
YCH
4744def parse_dfxp_time_expr(time_expr):
4745 if not time_expr:
d631d5f9 4746 return
bf6427d2
YCH
4747
4748 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4749 if mobj:
4750 return float(mobj.group('time_offset'))
4751
db2fe38b 4752 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 4753 if mobj:
db2fe38b 4754 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
4755
4756
c1c924ab
YCH
4757def srt_subtitles_timecode(seconds):
4758 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
4759
4760
4761def dfxp2srt(dfxp_data):
3869028f
YCH
4762 '''
4763 @param dfxp_data A bytes-like object containing DFXP data
4764 @returns A unicode object containing converted SRT data
4765 '''
5b995f71 4766 LEGACY_NAMESPACES = (
3869028f
YCH
4767 (b'http://www.w3.org/ns/ttml', [
4768 b'http://www.w3.org/2004/11/ttaf1',
4769 b'http://www.w3.org/2006/04/ttaf1',
4770 b'http://www.w3.org/2006/10/ttaf1',
5b995f71 4771 ]),
3869028f
YCH
4772 (b'http://www.w3.org/ns/ttml#styling', [
4773 b'http://www.w3.org/ns/ttml#style',
5b995f71
RA
4774 ]),
4775 )
4776
4777 SUPPORTED_STYLING = [
4778 'color',
4779 'fontFamily',
4780 'fontSize',
4781 'fontStyle',
4782 'fontWeight',
4783 'textDecoration'
4784 ]
4785
4e335771 4786 _x = functools.partial(xpath_with_ns, ns_map={
261f4730 4787 'xml': 'http://www.w3.org/XML/1998/namespace',
4e335771 4788 'ttml': 'http://www.w3.org/ns/ttml',
5b995f71 4789 'tts': 'http://www.w3.org/ns/ttml#styling',
4e335771 4790 })
bf6427d2 4791
5b995f71
RA
4792 styles = {}
4793 default_style = {}
4794
87de7069 4795 class TTMLPElementParser(object):
5b995f71
RA
4796 _out = ''
4797 _unclosed_elements = []
4798 _applied_styles = []
bf6427d2 4799
2b14cb56 4800 def start(self, tag, attrib):
5b995f71
RA
4801 if tag in (_x('ttml:br'), 'br'):
4802 self._out += '\n'
4803 else:
4804 unclosed_elements = []
4805 style = {}
4806 element_style_id = attrib.get('style')
4807 if default_style:
4808 style.update(default_style)
4809 if element_style_id:
4810 style.update(styles.get(element_style_id, {}))
4811 for prop in SUPPORTED_STYLING:
4812 prop_val = attrib.get(_x('tts:' + prop))
4813 if prop_val:
4814 style[prop] = prop_val
4815 if style:
4816 font = ''
4817 for k, v in sorted(style.items()):
4818 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4819 continue
4820 if k == 'color':
4821 font += ' color="%s"' % v
4822 elif k == 'fontSize':
4823 font += ' size="%s"' % v
4824 elif k == 'fontFamily':
4825 font += ' face="%s"' % v
4826 elif k == 'fontWeight' and v == 'bold':
4827 self._out += '<b>'
4828 unclosed_elements.append('b')
4829 elif k == 'fontStyle' and v == 'italic':
4830 self._out += '<i>'
4831 unclosed_elements.append('i')
4832 elif k == 'textDecoration' and v == 'underline':
4833 self._out += '<u>'
4834 unclosed_elements.append('u')
4835 if font:
4836 self._out += '<font' + font + '>'
4837 unclosed_elements.append('font')
4838 applied_style = {}
4839 if self._applied_styles:
4840 applied_style.update(self._applied_styles[-1])
4841 applied_style.update(style)
4842 self._applied_styles.append(applied_style)
4843 self._unclosed_elements.append(unclosed_elements)
bf6427d2 4844
2b14cb56 4845 def end(self, tag):
5b995f71
RA
4846 if tag not in (_x('ttml:br'), 'br'):
4847 unclosed_elements = self._unclosed_elements.pop()
4848 for element in reversed(unclosed_elements):
4849 self._out += '</%s>' % element
4850 if unclosed_elements and self._applied_styles:
4851 self._applied_styles.pop()
bf6427d2 4852
2b14cb56 4853 def data(self, data):
5b995f71 4854 self._out += data
2b14cb56 4855
4856 def close(self):
5b995f71 4857 return self._out.strip()
2b14cb56 4858
4859 def parse_node(node):
4860 target = TTMLPElementParser()
4861 parser = xml.etree.ElementTree.XMLParser(target=target)
4862 parser.feed(xml.etree.ElementTree.tostring(node))
4863 return parser.close()
bf6427d2 4864
5b995f71
RA
4865 for k, v in LEGACY_NAMESPACES:
4866 for ns in v:
4867 dfxp_data = dfxp_data.replace(ns, k)
4868
3869028f 4869 dfxp = compat_etree_fromstring(dfxp_data)
bf6427d2 4870 out = []
5b995f71 4871 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
1b0427e6
YCH
4872
4873 if not paras:
4874 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2 4875
5b995f71
RA
4876 repeat = False
4877 while True:
4878 for style in dfxp.findall(_x('.//ttml:style')):
261f4730
RA
4879 style_id = style.get('id') or style.get(_x('xml:id'))
4880 if not style_id:
4881 continue
5b995f71
RA
4882 parent_style_id = style.get('style')
4883 if parent_style_id:
4884 if parent_style_id not in styles:
4885 repeat = True
4886 continue
4887 styles[style_id] = styles[parent_style_id].copy()
4888 for prop in SUPPORTED_STYLING:
4889 prop_val = style.get(_x('tts:' + prop))
4890 if prop_val:
4891 styles.setdefault(style_id, {})[prop] = prop_val
4892 if repeat:
4893 repeat = False
4894 else:
4895 break
4896
4897 for p in ('body', 'div'):
4898 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4899 if ele is None:
4900 continue
4901 style = styles.get(ele.get('style'))
4902 if not style:
4903 continue
4904 default_style.update(style)
4905
bf6427d2 4906 for para, index in zip(paras, itertools.count(1)):
d631d5f9 4907 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 4908 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
4909 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4910 if begin_time is None:
4911 continue
7dff0363 4912 if not end_time:
d631d5f9
YCH
4913 if not dur:
4914 continue
4915 end_time = begin_time + dur
bf6427d2
YCH
4916 out.append('%d\n%s --> %s\n%s\n\n' % (
4917 index,
c1c924ab
YCH
4918 srt_subtitles_timecode(begin_time),
4919 srt_subtitles_timecode(end_time),
bf6427d2
YCH
4920 parse_node(para)))
4921
4922 return ''.join(out)
4923
4924
66e289ba
S
4925def cli_option(params, command_option, param):
4926 param = params.get(param)
98e698f1
RA
4927 if param:
4928 param = compat_str(param)
66e289ba
S
4929 return [command_option, param] if param is not None else []
4930
4931
4932def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4933 param = params.get(param)
5b232f46
S
4934 if param is None:
4935 return []
66e289ba
S
4936 assert isinstance(param, bool)
4937 if separator:
4938 return [command_option + separator + (true_value if param else false_value)]
4939 return [command_option, true_value if param else false_value]
4940
4941
4942def cli_valueless_option(params, command_option, param, expected_value=True):
4943 param = params.get(param)
4944 return [command_option] if param == expected_value else []
4945
4946
e92caff5 4947def cli_configuration_args(argdict, keys, default=[], use_compat=True):
eab9b2bc 4948 if isinstance(argdict, (list, tuple)): # for backward compatibility
e92caff5 4949 if use_compat:
5b1ecbb3 4950 return argdict
4951 else:
4952 argdict = None
eab9b2bc 4953 if argdict is None:
5b1ecbb3 4954 return default
eab9b2bc 4955 assert isinstance(argdict, dict)
4956
e92caff5 4957 assert isinstance(keys, (list, tuple))
4958 for key_list in keys:
4959 if isinstance(key_list, compat_str):
4960 key_list = (key_list,)
4961 arg_list = list(filter(
4962 lambda x: x is not None,
4963 [argdict.get(key.lower()) for key in key_list]))
4964 if arg_list:
4965 return [arg for args in arg_list for arg in args]
4966 return default
66e289ba
S
4967
4968
39672624
YCH
4969class ISO639Utils(object):
4970 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4971 _lang_map = {
4972 'aa': 'aar',
4973 'ab': 'abk',
4974 'ae': 'ave',
4975 'af': 'afr',
4976 'ak': 'aka',
4977 'am': 'amh',
4978 'an': 'arg',
4979 'ar': 'ara',
4980 'as': 'asm',
4981 'av': 'ava',
4982 'ay': 'aym',
4983 'az': 'aze',
4984 'ba': 'bak',
4985 'be': 'bel',
4986 'bg': 'bul',
4987 'bh': 'bih',
4988 'bi': 'bis',
4989 'bm': 'bam',
4990 'bn': 'ben',
4991 'bo': 'bod',
4992 'br': 'bre',
4993 'bs': 'bos',
4994 'ca': 'cat',
4995 'ce': 'che',
4996 'ch': 'cha',
4997 'co': 'cos',
4998 'cr': 'cre',
4999 'cs': 'ces',
5000 'cu': 'chu',
5001 'cv': 'chv',
5002 'cy': 'cym',
5003 'da': 'dan',
5004 'de': 'deu',
5005 'dv': 'div',
5006 'dz': 'dzo',
5007 'ee': 'ewe',
5008 'el': 'ell',
5009 'en': 'eng',
5010 'eo': 'epo',
5011 'es': 'spa',
5012 'et': 'est',
5013 'eu': 'eus',
5014 'fa': 'fas',
5015 'ff': 'ful',
5016 'fi': 'fin',
5017 'fj': 'fij',
5018 'fo': 'fao',
5019 'fr': 'fra',
5020 'fy': 'fry',
5021 'ga': 'gle',
5022 'gd': 'gla',
5023 'gl': 'glg',
5024 'gn': 'grn',
5025 'gu': 'guj',
5026 'gv': 'glv',
5027 'ha': 'hau',
5028 'he': 'heb',
b7acc835 5029 'iw': 'heb', # Replaced by he in 1989 revision
39672624
YCH
5030 'hi': 'hin',
5031 'ho': 'hmo',
5032 'hr': 'hrv',
5033 'ht': 'hat',
5034 'hu': 'hun',
5035 'hy': 'hye',
5036 'hz': 'her',
5037 'ia': 'ina',
5038 'id': 'ind',
b7acc835 5039 'in': 'ind', # Replaced by id in 1989 revision
39672624
YCH
5040 'ie': 'ile',
5041 'ig': 'ibo',
5042 'ii': 'iii',
5043 'ik': 'ipk',
5044 'io': 'ido',
5045 'is': 'isl',
5046 'it': 'ita',
5047 'iu': 'iku',
5048 'ja': 'jpn',
5049 'jv': 'jav',
5050 'ka': 'kat',
5051 'kg': 'kon',
5052 'ki': 'kik',
5053 'kj': 'kua',
5054 'kk': 'kaz',
5055 'kl': 'kal',
5056 'km': 'khm',
5057 'kn': 'kan',
5058 'ko': 'kor',
5059 'kr': 'kau',
5060 'ks': 'kas',
5061 'ku': 'kur',
5062 'kv': 'kom',
5063 'kw': 'cor',
5064 'ky': 'kir',
5065 'la': 'lat',
5066 'lb': 'ltz',
5067 'lg': 'lug',
5068 'li': 'lim',
5069 'ln': 'lin',
5070 'lo': 'lao',
5071 'lt': 'lit',
5072 'lu': 'lub',
5073 'lv': 'lav',
5074 'mg': 'mlg',
5075 'mh': 'mah',
5076 'mi': 'mri',
5077 'mk': 'mkd',
5078 'ml': 'mal',
5079 'mn': 'mon',
5080 'mr': 'mar',
5081 'ms': 'msa',
5082 'mt': 'mlt',
5083 'my': 'mya',
5084 'na': 'nau',
5085 'nb': 'nob',
5086 'nd': 'nde',
5087 'ne': 'nep',
5088 'ng': 'ndo',
5089 'nl': 'nld',
5090 'nn': 'nno',
5091 'no': 'nor',
5092 'nr': 'nbl',
5093 'nv': 'nav',
5094 'ny': 'nya',
5095 'oc': 'oci',
5096 'oj': 'oji',
5097 'om': 'orm',
5098 'or': 'ori',
5099 'os': 'oss',
5100 'pa': 'pan',
5101 'pi': 'pli',
5102 'pl': 'pol',
5103 'ps': 'pus',
5104 'pt': 'por',
5105 'qu': 'que',
5106 'rm': 'roh',
5107 'rn': 'run',
5108 'ro': 'ron',
5109 'ru': 'rus',
5110 'rw': 'kin',
5111 'sa': 'san',
5112 'sc': 'srd',
5113 'sd': 'snd',
5114 'se': 'sme',
5115 'sg': 'sag',
5116 'si': 'sin',
5117 'sk': 'slk',
5118 'sl': 'slv',
5119 'sm': 'smo',
5120 'sn': 'sna',
5121 'so': 'som',
5122 'sq': 'sqi',
5123 'sr': 'srp',
5124 'ss': 'ssw',
5125 'st': 'sot',
5126 'su': 'sun',
5127 'sv': 'swe',
5128 'sw': 'swa',
5129 'ta': 'tam',
5130 'te': 'tel',
5131 'tg': 'tgk',
5132 'th': 'tha',
5133 'ti': 'tir',
5134 'tk': 'tuk',
5135 'tl': 'tgl',
5136 'tn': 'tsn',
5137 'to': 'ton',
5138 'tr': 'tur',
5139 'ts': 'tso',
5140 'tt': 'tat',
5141 'tw': 'twi',
5142 'ty': 'tah',
5143 'ug': 'uig',
5144 'uk': 'ukr',
5145 'ur': 'urd',
5146 'uz': 'uzb',
5147 've': 'ven',
5148 'vi': 'vie',
5149 'vo': 'vol',
5150 'wa': 'wln',
5151 'wo': 'wol',
5152 'xh': 'xho',
5153 'yi': 'yid',
e9a50fba 5154 'ji': 'yid', # Replaced by yi in 1989 revision
39672624
YCH
5155 'yo': 'yor',
5156 'za': 'zha',
5157 'zh': 'zho',
5158 'zu': 'zul',
5159 }
5160
5161 @classmethod
5162 def short2long(cls, code):
5163 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5164 return cls._lang_map.get(code[:2])
5165
5166 @classmethod
5167 def long2short(cls, code):
5168 """Convert language code from ISO 639-2/T to ISO 639-1"""
5169 for short_name, long_name in cls._lang_map.items():
5170 if long_name == code:
5171 return short_name
5172
5173
4eb10f66
YCH
5174class ISO3166Utils(object):
5175 # From http://data.okfn.org/data/core/country-list
5176 _country_map = {
5177 'AF': 'Afghanistan',
5178 'AX': 'Åland Islands',
5179 'AL': 'Albania',
5180 'DZ': 'Algeria',
5181 'AS': 'American Samoa',
5182 'AD': 'Andorra',
5183 'AO': 'Angola',
5184 'AI': 'Anguilla',
5185 'AQ': 'Antarctica',
5186 'AG': 'Antigua and Barbuda',
5187 'AR': 'Argentina',
5188 'AM': 'Armenia',
5189 'AW': 'Aruba',
5190 'AU': 'Australia',
5191 'AT': 'Austria',
5192 'AZ': 'Azerbaijan',
5193 'BS': 'Bahamas',
5194 'BH': 'Bahrain',
5195 'BD': 'Bangladesh',
5196 'BB': 'Barbados',
5197 'BY': 'Belarus',
5198 'BE': 'Belgium',
5199 'BZ': 'Belize',
5200 'BJ': 'Benin',
5201 'BM': 'Bermuda',
5202 'BT': 'Bhutan',
5203 'BO': 'Bolivia, Plurinational State of',
5204 'BQ': 'Bonaire, Sint Eustatius and Saba',
5205 'BA': 'Bosnia and Herzegovina',
5206 'BW': 'Botswana',
5207 'BV': 'Bouvet Island',
5208 'BR': 'Brazil',
5209 'IO': 'British Indian Ocean Territory',
5210 'BN': 'Brunei Darussalam',
5211 'BG': 'Bulgaria',
5212 'BF': 'Burkina Faso',
5213 'BI': 'Burundi',
5214 'KH': 'Cambodia',
5215 'CM': 'Cameroon',
5216 'CA': 'Canada',
5217 'CV': 'Cape Verde',
5218 'KY': 'Cayman Islands',
5219 'CF': 'Central African Republic',
5220 'TD': 'Chad',
5221 'CL': 'Chile',
5222 'CN': 'China',
5223 'CX': 'Christmas Island',
5224 'CC': 'Cocos (Keeling) Islands',
5225 'CO': 'Colombia',
5226 'KM': 'Comoros',
5227 'CG': 'Congo',
5228 'CD': 'Congo, the Democratic Republic of the',
5229 'CK': 'Cook Islands',
5230 'CR': 'Costa Rica',
5231 'CI': 'Côte d\'Ivoire',
5232 'HR': 'Croatia',
5233 'CU': 'Cuba',
5234 'CW': 'Curaçao',
5235 'CY': 'Cyprus',
5236 'CZ': 'Czech Republic',
5237 'DK': 'Denmark',
5238 'DJ': 'Djibouti',
5239 'DM': 'Dominica',
5240 'DO': 'Dominican Republic',
5241 'EC': 'Ecuador',
5242 'EG': 'Egypt',
5243 'SV': 'El Salvador',
5244 'GQ': 'Equatorial Guinea',
5245 'ER': 'Eritrea',
5246 'EE': 'Estonia',
5247 'ET': 'Ethiopia',
5248 'FK': 'Falkland Islands (Malvinas)',
5249 'FO': 'Faroe Islands',
5250 'FJ': 'Fiji',
5251 'FI': 'Finland',
5252 'FR': 'France',
5253 'GF': 'French Guiana',
5254 'PF': 'French Polynesia',
5255 'TF': 'French Southern Territories',
5256 'GA': 'Gabon',
5257 'GM': 'Gambia',
5258 'GE': 'Georgia',
5259 'DE': 'Germany',
5260 'GH': 'Ghana',
5261 'GI': 'Gibraltar',
5262 'GR': 'Greece',
5263 'GL': 'Greenland',
5264 'GD': 'Grenada',
5265 'GP': 'Guadeloupe',
5266 'GU': 'Guam',
5267 'GT': 'Guatemala',
5268 'GG': 'Guernsey',
5269 'GN': 'Guinea',
5270 'GW': 'Guinea-Bissau',
5271 'GY': 'Guyana',
5272 'HT': 'Haiti',
5273 'HM': 'Heard Island and McDonald Islands',
5274 'VA': 'Holy See (Vatican City State)',
5275 'HN': 'Honduras',
5276 'HK': 'Hong Kong',
5277 'HU': 'Hungary',
5278 'IS': 'Iceland',
5279 'IN': 'India',
5280 'ID': 'Indonesia',
5281 'IR': 'Iran, Islamic Republic of',
5282 'IQ': 'Iraq',
5283 'IE': 'Ireland',
5284 'IM': 'Isle of Man',
5285 'IL': 'Israel',
5286 'IT': 'Italy',
5287 'JM': 'Jamaica',
5288 'JP': 'Japan',
5289 'JE': 'Jersey',
5290 'JO': 'Jordan',
5291 'KZ': 'Kazakhstan',
5292 'KE': 'Kenya',
5293 'KI': 'Kiribati',
5294 'KP': 'Korea, Democratic People\'s Republic of',
5295 'KR': 'Korea, Republic of',
5296 'KW': 'Kuwait',
5297 'KG': 'Kyrgyzstan',
5298 'LA': 'Lao People\'s Democratic Republic',
5299 'LV': 'Latvia',
5300 'LB': 'Lebanon',
5301 'LS': 'Lesotho',
5302 'LR': 'Liberia',
5303 'LY': 'Libya',
5304 'LI': 'Liechtenstein',
5305 'LT': 'Lithuania',
5306 'LU': 'Luxembourg',
5307 'MO': 'Macao',
5308 'MK': 'Macedonia, the Former Yugoslav Republic of',
5309 'MG': 'Madagascar',
5310 'MW': 'Malawi',
5311 'MY': 'Malaysia',
5312 'MV': 'Maldives',
5313 'ML': 'Mali',
5314 'MT': 'Malta',
5315 'MH': 'Marshall Islands',
5316 'MQ': 'Martinique',
5317 'MR': 'Mauritania',
5318 'MU': 'Mauritius',
5319 'YT': 'Mayotte',
5320 'MX': 'Mexico',
5321 'FM': 'Micronesia, Federated States of',
5322 'MD': 'Moldova, Republic of',
5323 'MC': 'Monaco',
5324 'MN': 'Mongolia',
5325 'ME': 'Montenegro',
5326 'MS': 'Montserrat',
5327 'MA': 'Morocco',
5328 'MZ': 'Mozambique',
5329 'MM': 'Myanmar',
5330 'NA': 'Namibia',
5331 'NR': 'Nauru',
5332 'NP': 'Nepal',
5333 'NL': 'Netherlands',
5334 'NC': 'New Caledonia',
5335 'NZ': 'New Zealand',
5336 'NI': 'Nicaragua',
5337 'NE': 'Niger',
5338 'NG': 'Nigeria',
5339 'NU': 'Niue',
5340 'NF': 'Norfolk Island',
5341 'MP': 'Northern Mariana Islands',
5342 'NO': 'Norway',
5343 'OM': 'Oman',
5344 'PK': 'Pakistan',
5345 'PW': 'Palau',
5346 'PS': 'Palestine, State of',
5347 'PA': 'Panama',
5348 'PG': 'Papua New Guinea',
5349 'PY': 'Paraguay',
5350 'PE': 'Peru',
5351 'PH': 'Philippines',
5352 'PN': 'Pitcairn',
5353 'PL': 'Poland',
5354 'PT': 'Portugal',
5355 'PR': 'Puerto Rico',
5356 'QA': 'Qatar',
5357 'RE': 'Réunion',
5358 'RO': 'Romania',
5359 'RU': 'Russian Federation',
5360 'RW': 'Rwanda',
5361 'BL': 'Saint Barthélemy',
5362 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5363 'KN': 'Saint Kitts and Nevis',
5364 'LC': 'Saint Lucia',
5365 'MF': 'Saint Martin (French part)',
5366 'PM': 'Saint Pierre and Miquelon',
5367 'VC': 'Saint Vincent and the Grenadines',
5368 'WS': 'Samoa',
5369 'SM': 'San Marino',
5370 'ST': 'Sao Tome and Principe',
5371 'SA': 'Saudi Arabia',
5372 'SN': 'Senegal',
5373 'RS': 'Serbia',
5374 'SC': 'Seychelles',
5375 'SL': 'Sierra Leone',
5376 'SG': 'Singapore',
5377 'SX': 'Sint Maarten (Dutch part)',
5378 'SK': 'Slovakia',
5379 'SI': 'Slovenia',
5380 'SB': 'Solomon Islands',
5381 'SO': 'Somalia',
5382 'ZA': 'South Africa',
5383 'GS': 'South Georgia and the South Sandwich Islands',
5384 'SS': 'South Sudan',
5385 'ES': 'Spain',
5386 'LK': 'Sri Lanka',
5387 'SD': 'Sudan',
5388 'SR': 'Suriname',
5389 'SJ': 'Svalbard and Jan Mayen',
5390 'SZ': 'Swaziland',
5391 'SE': 'Sweden',
5392 'CH': 'Switzerland',
5393 'SY': 'Syrian Arab Republic',
5394 'TW': 'Taiwan, Province of China',
5395 'TJ': 'Tajikistan',
5396 'TZ': 'Tanzania, United Republic of',
5397 'TH': 'Thailand',
5398 'TL': 'Timor-Leste',
5399 'TG': 'Togo',
5400 'TK': 'Tokelau',
5401 'TO': 'Tonga',
5402 'TT': 'Trinidad and Tobago',
5403 'TN': 'Tunisia',
5404 'TR': 'Turkey',
5405 'TM': 'Turkmenistan',
5406 'TC': 'Turks and Caicos Islands',
5407 'TV': 'Tuvalu',
5408 'UG': 'Uganda',
5409 'UA': 'Ukraine',
5410 'AE': 'United Arab Emirates',
5411 'GB': 'United Kingdom',
5412 'US': 'United States',
5413 'UM': 'United States Minor Outlying Islands',
5414 'UY': 'Uruguay',
5415 'UZ': 'Uzbekistan',
5416 'VU': 'Vanuatu',
5417 'VE': 'Venezuela, Bolivarian Republic of',
5418 'VN': 'Viet Nam',
5419 'VG': 'Virgin Islands, British',
5420 'VI': 'Virgin Islands, U.S.',
5421 'WF': 'Wallis and Futuna',
5422 'EH': 'Western Sahara',
5423 'YE': 'Yemen',
5424 'ZM': 'Zambia',
5425 'ZW': 'Zimbabwe',
5426 }
5427
5428 @classmethod
5429 def short2full(cls, code):
5430 """Convert an ISO 3166-2 country code to the corresponding full name"""
5431 return cls._country_map.get(code.upper())
5432
5433
773f291d
S
5434class GeoUtils(object):
5435 # Major IPv4 address blocks per country
5436 _country_ip_map = {
53896ca5 5437 'AD': '46.172.224.0/19',
773f291d
S
5438 'AE': '94.200.0.0/13',
5439 'AF': '149.54.0.0/17',
5440 'AG': '209.59.64.0/18',
5441 'AI': '204.14.248.0/21',
5442 'AL': '46.99.0.0/16',
5443 'AM': '46.70.0.0/15',
5444 'AO': '105.168.0.0/13',
53896ca5
S
5445 'AP': '182.50.184.0/21',
5446 'AQ': '23.154.160.0/24',
773f291d
S
5447 'AR': '181.0.0.0/12',
5448 'AS': '202.70.112.0/20',
53896ca5 5449 'AT': '77.116.0.0/14',
773f291d
S
5450 'AU': '1.128.0.0/11',
5451 'AW': '181.41.0.0/18',
53896ca5
S
5452 'AX': '185.217.4.0/22',
5453 'AZ': '5.197.0.0/16',
773f291d
S
5454 'BA': '31.176.128.0/17',
5455 'BB': '65.48.128.0/17',
5456 'BD': '114.130.0.0/16',
5457 'BE': '57.0.0.0/8',
53896ca5 5458 'BF': '102.178.0.0/15',
773f291d
S
5459 'BG': '95.42.0.0/15',
5460 'BH': '37.131.0.0/17',
5461 'BI': '154.117.192.0/18',
5462 'BJ': '137.255.0.0/16',
53896ca5 5463 'BL': '185.212.72.0/23',
773f291d
S
5464 'BM': '196.12.64.0/18',
5465 'BN': '156.31.0.0/16',
5466 'BO': '161.56.0.0/16',
5467 'BQ': '161.0.80.0/20',
53896ca5 5468 'BR': '191.128.0.0/12',
773f291d
S
5469 'BS': '24.51.64.0/18',
5470 'BT': '119.2.96.0/19',
5471 'BW': '168.167.0.0/16',
5472 'BY': '178.120.0.0/13',
5473 'BZ': '179.42.192.0/18',
5474 'CA': '99.224.0.0/11',
5475 'CD': '41.243.0.0/16',
53896ca5
S
5476 'CF': '197.242.176.0/21',
5477 'CG': '160.113.0.0/16',
773f291d 5478 'CH': '85.0.0.0/13',
53896ca5 5479 'CI': '102.136.0.0/14',
773f291d
S
5480 'CK': '202.65.32.0/19',
5481 'CL': '152.172.0.0/14',
53896ca5 5482 'CM': '102.244.0.0/14',
773f291d
S
5483 'CN': '36.128.0.0/10',
5484 'CO': '181.240.0.0/12',
5485 'CR': '201.192.0.0/12',
5486 'CU': '152.206.0.0/15',
5487 'CV': '165.90.96.0/19',
5488 'CW': '190.88.128.0/17',
53896ca5 5489 'CY': '31.153.0.0/16',
773f291d
S
5490 'CZ': '88.100.0.0/14',
5491 'DE': '53.0.0.0/8',
5492 'DJ': '197.241.0.0/17',
5493 'DK': '87.48.0.0/12',
5494 'DM': '192.243.48.0/20',
5495 'DO': '152.166.0.0/15',
5496 'DZ': '41.96.0.0/12',
5497 'EC': '186.68.0.0/15',
5498 'EE': '90.190.0.0/15',
5499 'EG': '156.160.0.0/11',
5500 'ER': '196.200.96.0/20',
5501 'ES': '88.0.0.0/11',
5502 'ET': '196.188.0.0/14',
5503 'EU': '2.16.0.0/13',
5504 'FI': '91.152.0.0/13',
5505 'FJ': '144.120.0.0/16',
53896ca5 5506 'FK': '80.73.208.0/21',
773f291d
S
5507 'FM': '119.252.112.0/20',
5508 'FO': '88.85.32.0/19',
5509 'FR': '90.0.0.0/9',
5510 'GA': '41.158.0.0/15',
5511 'GB': '25.0.0.0/8',
5512 'GD': '74.122.88.0/21',
5513 'GE': '31.146.0.0/16',
5514 'GF': '161.22.64.0/18',
5515 'GG': '62.68.160.0/19',
53896ca5
S
5516 'GH': '154.160.0.0/12',
5517 'GI': '95.164.0.0/16',
773f291d
S
5518 'GL': '88.83.0.0/19',
5519 'GM': '160.182.0.0/15',
5520 'GN': '197.149.192.0/18',
5521 'GP': '104.250.0.0/19',
5522 'GQ': '105.235.224.0/20',
5523 'GR': '94.64.0.0/13',
5524 'GT': '168.234.0.0/16',
5525 'GU': '168.123.0.0/16',
5526 'GW': '197.214.80.0/20',
5527 'GY': '181.41.64.0/18',
5528 'HK': '113.252.0.0/14',
5529 'HN': '181.210.0.0/16',
5530 'HR': '93.136.0.0/13',
5531 'HT': '148.102.128.0/17',
5532 'HU': '84.0.0.0/14',
5533 'ID': '39.192.0.0/10',
5534 'IE': '87.32.0.0/12',
5535 'IL': '79.176.0.0/13',
5536 'IM': '5.62.80.0/20',
5537 'IN': '117.192.0.0/10',
5538 'IO': '203.83.48.0/21',
5539 'IQ': '37.236.0.0/14',
5540 'IR': '2.176.0.0/12',
5541 'IS': '82.221.0.0/16',
5542 'IT': '79.0.0.0/10',
5543 'JE': '87.244.64.0/18',
5544 'JM': '72.27.0.0/17',
5545 'JO': '176.29.0.0/16',
53896ca5 5546 'JP': '133.0.0.0/8',
773f291d
S
5547 'KE': '105.48.0.0/12',
5548 'KG': '158.181.128.0/17',
5549 'KH': '36.37.128.0/17',
5550 'KI': '103.25.140.0/22',
5551 'KM': '197.255.224.0/20',
53896ca5 5552 'KN': '198.167.192.0/19',
773f291d
S
5553 'KP': '175.45.176.0/22',
5554 'KR': '175.192.0.0/10',
5555 'KW': '37.36.0.0/14',
5556 'KY': '64.96.0.0/15',
5557 'KZ': '2.72.0.0/13',
5558 'LA': '115.84.64.0/18',
5559 'LB': '178.135.0.0/16',
53896ca5 5560 'LC': '24.92.144.0/20',
773f291d
S
5561 'LI': '82.117.0.0/19',
5562 'LK': '112.134.0.0/15',
53896ca5 5563 'LR': '102.183.0.0/16',
773f291d
S
5564 'LS': '129.232.0.0/17',
5565 'LT': '78.56.0.0/13',
5566 'LU': '188.42.0.0/16',
5567 'LV': '46.109.0.0/16',
5568 'LY': '41.252.0.0/14',
5569 'MA': '105.128.0.0/11',
5570 'MC': '88.209.64.0/18',
5571 'MD': '37.246.0.0/16',
5572 'ME': '178.175.0.0/17',
5573 'MF': '74.112.232.0/21',
5574 'MG': '154.126.0.0/17',
5575 'MH': '117.103.88.0/21',
5576 'MK': '77.28.0.0/15',
5577 'ML': '154.118.128.0/18',
5578 'MM': '37.111.0.0/17',
5579 'MN': '49.0.128.0/17',
5580 'MO': '60.246.0.0/16',
5581 'MP': '202.88.64.0/20',
5582 'MQ': '109.203.224.0/19',
5583 'MR': '41.188.64.0/18',
5584 'MS': '208.90.112.0/22',
5585 'MT': '46.11.0.0/16',
5586 'MU': '105.16.0.0/12',
5587 'MV': '27.114.128.0/18',
53896ca5 5588 'MW': '102.70.0.0/15',
773f291d
S
5589 'MX': '187.192.0.0/11',
5590 'MY': '175.136.0.0/13',
5591 'MZ': '197.218.0.0/15',
5592 'NA': '41.182.0.0/16',
5593 'NC': '101.101.0.0/18',
5594 'NE': '197.214.0.0/18',
5595 'NF': '203.17.240.0/22',
5596 'NG': '105.112.0.0/12',
5597 'NI': '186.76.0.0/15',
5598 'NL': '145.96.0.0/11',
5599 'NO': '84.208.0.0/13',
5600 'NP': '36.252.0.0/15',
5601 'NR': '203.98.224.0/19',
5602 'NU': '49.156.48.0/22',
5603 'NZ': '49.224.0.0/14',
5604 'OM': '5.36.0.0/15',
5605 'PA': '186.72.0.0/15',
5606 'PE': '186.160.0.0/14',
5607 'PF': '123.50.64.0/18',
5608 'PG': '124.240.192.0/19',
5609 'PH': '49.144.0.0/13',
5610 'PK': '39.32.0.0/11',
5611 'PL': '83.0.0.0/11',
5612 'PM': '70.36.0.0/20',
5613 'PR': '66.50.0.0/16',
5614 'PS': '188.161.0.0/16',
5615 'PT': '85.240.0.0/13',
5616 'PW': '202.124.224.0/20',
5617 'PY': '181.120.0.0/14',
5618 'QA': '37.210.0.0/15',
53896ca5 5619 'RE': '102.35.0.0/16',
773f291d 5620 'RO': '79.112.0.0/13',
53896ca5 5621 'RS': '93.86.0.0/15',
773f291d 5622 'RU': '5.136.0.0/13',
53896ca5 5623 'RW': '41.186.0.0/16',
773f291d
S
5624 'SA': '188.48.0.0/13',
5625 'SB': '202.1.160.0/19',
5626 'SC': '154.192.0.0/11',
53896ca5 5627 'SD': '102.120.0.0/13',
773f291d 5628 'SE': '78.64.0.0/12',
53896ca5 5629 'SG': '8.128.0.0/10',
773f291d
S
5630 'SI': '188.196.0.0/14',
5631 'SK': '78.98.0.0/15',
53896ca5 5632 'SL': '102.143.0.0/17',
773f291d
S
5633 'SM': '89.186.32.0/19',
5634 'SN': '41.82.0.0/15',
53896ca5 5635 'SO': '154.115.192.0/18',
773f291d
S
5636 'SR': '186.179.128.0/17',
5637 'SS': '105.235.208.0/21',
5638 'ST': '197.159.160.0/19',
5639 'SV': '168.243.0.0/16',
5640 'SX': '190.102.0.0/20',
5641 'SY': '5.0.0.0/16',
5642 'SZ': '41.84.224.0/19',
5643 'TC': '65.255.48.0/20',
5644 'TD': '154.68.128.0/19',
5645 'TG': '196.168.0.0/14',
5646 'TH': '171.96.0.0/13',
5647 'TJ': '85.9.128.0/18',
5648 'TK': '27.96.24.0/21',
5649 'TL': '180.189.160.0/20',
5650 'TM': '95.85.96.0/19',
5651 'TN': '197.0.0.0/11',
5652 'TO': '175.176.144.0/21',
5653 'TR': '78.160.0.0/11',
5654 'TT': '186.44.0.0/15',
5655 'TV': '202.2.96.0/19',
5656 'TW': '120.96.0.0/11',
5657 'TZ': '156.156.0.0/14',
53896ca5
S
5658 'UA': '37.52.0.0/14',
5659 'UG': '102.80.0.0/13',
5660 'US': '6.0.0.0/8',
773f291d 5661 'UY': '167.56.0.0/13',
53896ca5 5662 'UZ': '84.54.64.0/18',
773f291d 5663 'VA': '212.77.0.0/19',
53896ca5 5664 'VC': '207.191.240.0/21',
773f291d 5665 'VE': '186.88.0.0/13',
53896ca5 5666 'VG': '66.81.192.0/20',
773f291d
S
5667 'VI': '146.226.0.0/16',
5668 'VN': '14.160.0.0/11',
5669 'VU': '202.80.32.0/20',
5670 'WF': '117.20.32.0/21',
5671 'WS': '202.4.32.0/19',
5672 'YE': '134.35.0.0/16',
5673 'YT': '41.242.116.0/22',
5674 'ZA': '41.0.0.0/11',
53896ca5
S
5675 'ZM': '102.144.0.0/13',
5676 'ZW': '102.177.192.0/18',
773f291d
S
5677 }
5678
5679 @classmethod
5f95927a
S
5680 def random_ipv4(cls, code_or_block):
5681 if len(code_or_block) == 2:
5682 block = cls._country_ip_map.get(code_or_block.upper())
5683 if not block:
5684 return None
5685 else:
5686 block = code_or_block
773f291d
S
5687 addr, preflen = block.split('/')
5688 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5689 addr_max = addr_min | (0xffffffff >> int(preflen))
18a0defa 5690 return compat_str(socket.inet_ntoa(
4248dad9 5691 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
773f291d
S
5692
5693
91410c9b 5694class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
5695 def __init__(self, proxies=None):
5696 # Set default handlers
5697 for type in ('http', 'https'):
5698 setattr(self, '%s_open' % type,
5699 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5700 meth(r, proxy, type))
38e87f6c 5701 compat_urllib_request.ProxyHandler.__init__(self, proxies)
2461f79d 5702
91410c9b 5703 def proxy_open(self, req, proxy, type):
2461f79d 5704 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
5705 if req_proxy is not None:
5706 proxy = req_proxy
2461f79d
PH
5707 del req.headers['Ytdl-request-proxy']
5708
5709 if proxy == '__noproxy__':
5710 return None # No Proxy
51fb4995 5711 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
71aff188 5712 req.add_header('Ytdl-socks-proxy', proxy)
7a5c1cfe 5713 # yt-dlp's http/https handlers do wrapping the socket with socks
71aff188 5714 return None
91410c9b
PH
5715 return compat_urllib_request.ProxyHandler.proxy_open(
5716 self, req, proxy, type)
5bc880b9
YCH
5717
5718
0a5445dd
YCH
5719# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5720# released into Public Domain
5721# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5722
5723def long_to_bytes(n, blocksize=0):
5724 """long_to_bytes(n:long, blocksize:int) : string
5725 Convert a long integer to a byte string.
5726
5727 If optional blocksize is given and greater than zero, pad the front of the
5728 byte string with binary zeros so that the length is a multiple of
5729 blocksize.
5730 """
5731 # after much testing, this algorithm was deemed to be the fastest
5732 s = b''
5733 n = int(n)
5734 while n > 0:
5735 s = compat_struct_pack('>I', n & 0xffffffff) + s
5736 n = n >> 32
5737 # strip off leading zeros
5738 for i in range(len(s)):
5739 if s[i] != b'\000'[0]:
5740 break
5741 else:
5742 # only happens when n == 0
5743 s = b'\000'
5744 i = 0
5745 s = s[i:]
5746 # add back some pad bytes. this could be done more efficiently w.r.t. the
5747 # de-padding being done above, but sigh...
5748 if blocksize > 0 and len(s) % blocksize:
5749 s = (blocksize - len(s) % blocksize) * b'\000' + s
5750 return s
5751
5752
5753def bytes_to_long(s):
5754 """bytes_to_long(string) : long
5755 Convert a byte string to a long integer.
5756
5757 This is (essentially) the inverse of long_to_bytes().
5758 """
5759 acc = 0
5760 length = len(s)
5761 if length % 4:
5762 extra = (4 - length % 4)
5763 s = b'\000' * extra + s
5764 length = length + extra
5765 for i in range(0, length, 4):
5766 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5767 return acc
5768
5769
5bc880b9
YCH
5770def ohdave_rsa_encrypt(data, exponent, modulus):
5771 '''
5772 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5773
5774 Input:
5775 data: data to encrypt, bytes-like object
5776 exponent, modulus: parameter e and N of RSA algorithm, both integer
5777 Output: hex string of encrypted data
5778
5779 Limitation: supports one block encryption only
5780 '''
5781
5782 payload = int(binascii.hexlify(data[::-1]), 16)
5783 encrypted = pow(payload, exponent, modulus)
5784 return '%x' % encrypted
81bdc8fd
YCH
5785
5786
f48409c7
YCH
5787def pkcs1pad(data, length):
5788 """
5789 Padding input data with PKCS#1 scheme
5790
5791 @param {int[]} data input data
5792 @param {int} length target length
5793 @returns {int[]} padded data
5794 """
5795 if len(data) > length - 11:
5796 raise ValueError('Input data too long for PKCS#1 padding')
5797
5798 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5799 return [0, 2] + pseudo_random + [0] + data
5800
5801
5eb6bdce 5802def encode_base_n(num, n, table=None):
59f898b7 5803 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
59f898b7
YCH
5804 if not table:
5805 table = FULL_TABLE[:n]
5806
5eb6bdce
YCH
5807 if n > len(table):
5808 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5809
5810 if num == 0:
5811 return table[0]
5812
81bdc8fd
YCH
5813 ret = ''
5814 while num:
5815 ret = table[num % n] + ret
5816 num = num // n
5817 return ret
f52354a8
YCH
5818
5819
5820def decode_packed_codes(code):
06b3fe29 5821 mobj = re.search(PACKED_CODES_RE, code)
a0566bbf 5822 obfuscated_code, base, count, symbols = mobj.groups()
f52354a8
YCH
5823 base = int(base)
5824 count = int(count)
5825 symbols = symbols.split('|')
5826 symbol_table = {}
5827
5828 while count:
5829 count -= 1
5eb6bdce 5830 base_n_count = encode_base_n(count, base)
f52354a8
YCH
5831 symbol_table[base_n_count] = symbols[count] or base_n_count
5832
5833 return re.sub(
5834 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
a0566bbf 5835 obfuscated_code)
e154c651 5836
5837
1ced2221
S
5838def caesar(s, alphabet, shift):
5839 if shift == 0:
5840 return s
5841 l = len(alphabet)
5842 return ''.join(
5843 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5844 for c in s)
5845
5846
5847def rot47(s):
5848 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5849
5850
e154c651 5851def parse_m3u8_attributes(attrib):
5852 info = {}
5853 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5854 if val.startswith('"'):
5855 val = val[1:-1]
5856 info[key] = val
5857 return info
1143535d
YCH
5858
5859
5860def urshift(val, n):
5861 return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038
YCH
5862
5863
5864# Based on png2str() written by @gdkchan and improved by @yokrysty
067aa17e 5865# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
d3f8e038
YCH
5866def decode_png(png_data):
5867 # Reference: https://www.w3.org/TR/PNG/
5868 header = png_data[8:]
5869
5870 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5871 raise IOError('Not a valid PNG file.')
5872
5873 int_map = {1: '>B', 2: '>H', 4: '>I'}
5874 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5875
5876 chunks = []
5877
5878 while header:
5879 length = unpack_integer(header[:4])
5880 header = header[4:]
5881
5882 chunk_type = header[:4]
5883 header = header[4:]
5884
5885 chunk_data = header[:length]
5886 header = header[length:]
5887
5888 header = header[4:] # Skip CRC
5889
5890 chunks.append({
5891 'type': chunk_type,
5892 'length': length,
5893 'data': chunk_data
5894 })
5895
5896 ihdr = chunks[0]['data']
5897
5898 width = unpack_integer(ihdr[:4])
5899 height = unpack_integer(ihdr[4:8])
5900
5901 idat = b''
5902
5903 for chunk in chunks:
5904 if chunk['type'] == b'IDAT':
5905 idat += chunk['data']
5906
5907 if not idat:
5908 raise IOError('Unable to read PNG data.')
5909
5910 decompressed_data = bytearray(zlib.decompress(idat))
5911
5912 stride = width * 3
5913 pixels = []
5914
5915 def _get_pixel(idx):
5916 x = idx % stride
5917 y = idx // stride
5918 return pixels[y][x]
5919
5920 for y in range(height):
5921 basePos = y * (1 + stride)
5922 filter_type = decompressed_data[basePos]
5923
5924 current_row = []
5925
5926 pixels.append(current_row)
5927
5928 for x in range(stride):
5929 color = decompressed_data[1 + basePos + x]
5930 basex = y * stride + x
5931 left = 0
5932 up = 0
5933
5934 if x > 2:
5935 left = _get_pixel(basex - 3)
5936 if y > 0:
5937 up = _get_pixel(basex - stride)
5938
5939 if filter_type == 1: # Sub
5940 color = (color + left) & 0xff
5941 elif filter_type == 2: # Up
5942 color = (color + up) & 0xff
5943 elif filter_type == 3: # Average
5944 color = (color + ((left + up) >> 1)) & 0xff
5945 elif filter_type == 4: # Paeth
5946 a = left
5947 b = up
5948 c = 0
5949
5950 if x > 2 and y > 0:
5951 c = _get_pixel(basex - stride - 3)
5952
5953 p = a + b - c
5954
5955 pa = abs(p - a)
5956 pb = abs(p - b)
5957 pc = abs(p - c)
5958
5959 if pa <= pb and pa <= pc:
5960 color = (color + a) & 0xff
5961 elif pb <= pc:
5962 color = (color + b) & 0xff
5963 else:
5964 color = (color + c) & 0xff
5965
5966 current_row.append(color)
5967
5968 return width, height, pixels
efa97bdc
YCH
5969
5970
5971def write_xattr(path, key, value):
5972 # This mess below finds the best xattr tool for the job
5973 try:
5974 # try the pyxattr module...
5975 import xattr
5976
53a7e3d2
YCH
5977 if hasattr(xattr, 'set'): # pyxattr
5978 # Unicode arguments are not supported in python-pyxattr until
5979 # version 0.5.0
067aa17e 5980 # See https://github.com/ytdl-org/youtube-dl/issues/5498
53a7e3d2
YCH
5981 pyxattr_required_version = '0.5.0'
5982 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5983 # TODO: fallback to CLI tools
5984 raise XAttrUnavailableError(
5985 'python-pyxattr is detected but is too old. '
7a5c1cfe 5986 'yt-dlp requires %s or above while your version is %s. '
53a7e3d2
YCH
5987 'Falling back to other xattr implementations' % (
5988 pyxattr_required_version, xattr.__version__))
5989
5990 setxattr = xattr.set
5991 else: # xattr
5992 setxattr = xattr.setxattr
efa97bdc
YCH
5993
5994 try:
53a7e3d2 5995 setxattr(path, key, value)
efa97bdc
YCH
5996 except EnvironmentError as e:
5997 raise XAttrMetadataError(e.errno, e.strerror)
5998
5999 except ImportError:
6000 if compat_os_name == 'nt':
6001 # Write xattrs to NTFS Alternate Data Streams:
6002 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6003 assert ':' not in key
6004 assert os.path.exists(path)
6005
6006 ads_fn = path + ':' + key
6007 try:
6008 with open(ads_fn, 'wb') as f:
6009 f.write(value)
6010 except EnvironmentError as e:
6011 raise XAttrMetadataError(e.errno, e.strerror)
6012 else:
6013 user_has_setfattr = check_executable('setfattr', ['--version'])
6014 user_has_xattr = check_executable('xattr', ['-h'])
6015
6016 if user_has_setfattr or user_has_xattr:
6017
6018 value = value.decode('utf-8')
6019 if user_has_setfattr:
6020 executable = 'setfattr'
6021 opts = ['-n', key, '-v', value]
6022 elif user_has_xattr:
6023 executable = 'xattr'
6024 opts = ['-w', key, value]
6025
3089bc74
S
6026 cmd = ([encodeFilename(executable, True)]
6027 + [encodeArgument(o) for o in opts]
6028 + [encodeFilename(path, True)])
efa97bdc
YCH
6029
6030 try:
6031 p = subprocess.Popen(
6032 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6033 except EnvironmentError as e:
6034 raise XAttrMetadataError(e.errno, e.strerror)
f5b1bca9 6035 stdout, stderr = process_communicate_or_kill(p)
efa97bdc
YCH
6036 stderr = stderr.decode('utf-8', 'replace')
6037 if p.returncode != 0:
6038 raise XAttrMetadataError(p.returncode, stderr)
6039
6040 else:
6041 # On Unix, and can't find pyxattr, setfattr, or xattr.
6042 if sys.platform.startswith('linux'):
6043 raise XAttrUnavailableError(
6044 "Couldn't find a tool to set the xattrs. "
6045 "Install either the python 'pyxattr' or 'xattr' "
6046 "modules, or the GNU 'attr' package "
6047 "(which contains the 'setfattr' tool).")
6048 else:
6049 raise XAttrUnavailableError(
6050 "Couldn't find a tool to set the xattrs. "
6051 "Install either the python 'xattr' module, "
6052 "or the 'xattr' binary.")
0c265486
YCH
6053
6054
6055def random_birthday(year_field, month_field, day_field):
aa374bc7
AS
6056 start_date = datetime.date(1950, 1, 1)
6057 end_date = datetime.date(1995, 12, 31)
6058 offset = random.randint(0, (end_date - start_date).days)
6059 random_date = start_date + datetime.timedelta(offset)
0c265486 6060 return {
aa374bc7
AS
6061 year_field: str(random_date.year),
6062 month_field: str(random_date.month),
6063 day_field: str(random_date.day),
0c265486 6064 }
732044af 6065
c76eb41b 6066
732044af 6067# Templates for internet shortcut files, which are plain text files.
6068DOT_URL_LINK_TEMPLATE = '''
6069[InternetShortcut]
6070URL=%(url)s
6071'''.lstrip()
6072
6073DOT_WEBLOC_LINK_TEMPLATE = '''
6074<?xml version="1.0" encoding="UTF-8"?>
6075<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6076<plist version="1.0">
6077<dict>
6078\t<key>URL</key>
6079\t<string>%(url)s</string>
6080</dict>
6081</plist>
6082'''.lstrip()
6083
6084DOT_DESKTOP_LINK_TEMPLATE = '''
6085[Desktop Entry]
6086Encoding=UTF-8
6087Name=%(filename)s
6088Type=Link
6089URL=%(url)s
6090Icon=text-html
6091'''.lstrip()
6092
6093
6094def iri_to_uri(iri):
6095 """
6096 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6097
6098 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6099 """
6100
6101 iri_parts = compat_urllib_parse_urlparse(iri)
6102
6103 if '[' in iri_parts.netloc:
6104 raise ValueError('IPv6 URIs are not, yet, supported.')
6105 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6106
6107 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6108
6109 net_location = ''
6110 if iri_parts.username:
6111 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6112 if iri_parts.password is not None:
6113 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6114 net_location += '@'
6115
6116 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
6117 # The 'idna' encoding produces ASCII text.
6118 if iri_parts.port is not None and iri_parts.port != 80:
6119 net_location += ':' + str(iri_parts.port)
6120
6121 return compat_urllib_parse_urlunparse(
6122 (iri_parts.scheme,
6123 net_location,
6124
6125 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6126
6127 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6128 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6129
6130 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6131 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6132
6133 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6134
6135 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6136
6137
6138def to_high_limit_path(path):
6139 if sys.platform in ['win32', 'cygwin']:
6140 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6141 return r'\\?\ '.rstrip() + os.path.abspath(path)
6142
6143 return path
76d321f6 6144
c76eb41b 6145
76d321f6 6146def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
6147 val = obj.get(field, default)
6148 if func and val not in ignore:
6149 val = func(val)
6150 return template % val if val not in ignore else default
00dd0cd5 6151
6152
6153def clean_podcast_url(url):
6154 return re.sub(r'''(?x)
6155 (?:
6156 (?:
6157 chtbl\.com/track|
6158 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6159 play\.podtrac\.com
6160 )/[^/]+|
6161 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6162 flex\.acast\.com|
6163 pd(?:
6164 cn\.co| # https://podcorn.com/analytics-prefix/
6165 st\.fm # https://podsights.com/docs/
6166 )/e
6167 )/''', '', url)
ffcb8191
THD
6168
6169
6170_HEX_TABLE = '0123456789abcdef'
6171
6172
6173def random_uuidv4():
6174 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
0202b52a 6175
6176
6177def make_dir(path, to_screen=None):
6178 try:
6179 dn = os.path.dirname(path)
6180 if dn and not os.path.exists(dn):
6181 os.makedirs(dn)
6182 return True
6183 except (OSError, IOError) as err:
6184 if callable(to_screen) is not None:
6185 to_screen('unable to create directory ' + error_to_compat_str(err))
6186 return False
f74980cb 6187
6188
6189def get_executable_path():
c552ae88 6190 from zipimport import zipimporter
6191 if hasattr(sys, 'frozen'): # Running from PyInstaller
6192 path = os.path.dirname(sys.executable)
6193 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
6194 path = os.path.join(os.path.dirname(__file__), '../..')
6195 else:
6196 path = os.path.join(os.path.dirname(__file__), '..')
f74980cb 6197 return os.path.abspath(path)
6198
6199
2f567473 6200def load_plugins(name, suffix, namespace):
f74980cb 6201 plugin_info = [None]
6202 classes = []
6203 try:
6204 plugin_info = imp.find_module(
6205 name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
6206 plugins = imp.load_module(name, *plugin_info)
6207 for name in dir(plugins):
2f567473 6208 if name in namespace:
6209 continue
6210 if not name.endswith(suffix):
f74980cb 6211 continue
6212 klass = getattr(plugins, name)
6213 classes.append(klass)
6214 namespace[name] = klass
6215 except ImportError:
6216 pass
6217 finally:
6218 if plugin_info[0] is not None:
6219 plugin_info[0].close()
6220 return classes
06167fbb 6221
6222
324ad820 6223def traverse_obj(obj, keys, *, casesense=True, is_user_input=False, traverse_string=False):
6224 ''' Traverse nested list/dict/tuple
6225 @param casesense Whether to consider dictionary keys as case sensitive
6226 @param is_user_input Whether the keys are generated from user input. If True,
6227 strings are converted to int/slice if necessary
6228 @param traverse_string Whether to traverse inside strings. If True, any
6229 non-compatible object will also be converted into a string
6230 '''
a439a3a4 6231 keys = list(keys)[::-1]
6232 while keys:
6233 key = keys.pop()
324ad820 6234 if isinstance(obj, dict):
6235 assert isinstance(key, compat_str)
a439a3a4 6236 if not casesense:
324ad820 6237 obj = {k.lower(): v for k, v in obj.items()}
a439a3a4 6238 key = key.lower()
324ad820 6239 obj = obj.get(key)
a439a3a4 6240 else:
324ad820 6241 if is_user_input:
6242 key = (int_or_none(key) if ':' not in key
6243 else slice(*map(int_or_none, key.split(':'))))
6244 if not isinstance(obj, (list, tuple)):
6245 if traverse_string:
6246 obj = compat_str(obj)
6247 else:
6248 return None
6249 assert isinstance(key, (int, slice))
6250 obj = try_get(obj, lambda x: x[key])
6251 return obj
6252
6253
6254def traverse_dict(dictn, keys, casesense=True):
6255 ''' For backward compatibility. Do not use '''
6256 return traverse_obj(dictn, keys, casesense=casesense,
6257 is_user_input=True, traverse_string=True)