]> jfr.im git - yt-dlp.git/blame - yt_dlp/utils.py
[cleanup] Mark unused files
[yt-dlp.git] / yt_dlp / utils.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
d77c3dfd 3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
c380cc28 10import collections
62e609ab 11import contextlib
e3946f98 12import ctypes
c496ca96
PH
13import datetime
14import email.utils
0c265486 15import email.header
f45c185f 16import errno
be4a824d 17import functools
d77c3dfd 18import gzip
f74980cb 19import imp
03f9daab 20import io
79a2e94e 21import itertools
f4bfd65f 22import json
d77c3dfd 23import locale
02dbf93f 24import math
347de493 25import operator
d77c3dfd 26import os
c496ca96 27import platform
773f291d 28import random
d77c3dfd 29import re
c496ca96 30import socket
79a2e94e 31import ssl
1c088fa8 32import subprocess
d77c3dfd 33import sys
181c8655 34import tempfile
c380cc28 35import time
01951dda 36import traceback
bcf89ce6 37import xml.etree.ElementTree
d77c3dfd 38import zlib
d77c3dfd 39
8c25f81b 40from .compat import (
b4a3d461 41 compat_HTMLParseError,
8bb56eee 42 compat_HTMLParser,
201c1459 43 compat_HTTPError,
8f9312c3 44 compat_basestring,
8c25f81b 45 compat_chr,
1bab3437 46 compat_cookiejar,
d7cd9a9e 47 compat_ctypes_WINFUNCTYPE,
36e6f62c 48 compat_etree_fromstring,
51098426 49 compat_expanduser,
8c25f81b 50 compat_html_entities,
55b2f099 51 compat_html_entities_html5,
be4a824d 52 compat_http_client,
42db58ec 53 compat_integer_types,
e29663c6 54 compat_numeric_types,
c86b6142 55 compat_kwargs,
efa97bdc 56 compat_os_name,
8c25f81b 57 compat_parse_qs,
702ccf2d 58 compat_shlex_quote,
8c25f81b 59 compat_str,
edaa23f8 60 compat_struct_pack,
d3f8e038 61 compat_struct_unpack,
8c25f81b
PH
62 compat_urllib_error,
63 compat_urllib_parse,
15707c7e 64 compat_urllib_parse_urlencode,
8c25f81b 65 compat_urllib_parse_urlparse,
732044af 66 compat_urllib_parse_urlunparse,
67 compat_urllib_parse_quote,
68 compat_urllib_parse_quote_plus,
7581bfc9 69 compat_urllib_parse_unquote_plus,
8c25f81b
PH
70 compat_urllib_request,
71 compat_urlparse,
810c10ba 72 compat_xpath,
8c25f81b 73)
4644ac55 74
71aff188
YCH
75from .socks import (
76 ProxyType,
77 sockssocket,
78)
79
4644ac55 80
51fb4995
YCH
81def register_socks_protocols():
82 # "Register" SOCKS protocols
d5ae6bb5
YCH
83 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
84 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
51fb4995
YCH
85 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
86 if scheme not in compat_urlparse.uses_netloc:
87 compat_urlparse.uses_netloc.append(scheme)
88
89
468e2e92
FV
90# This is not clearly defined otherwise
91compiled_regex_type = type(re.compile(''))
92
f7a147e3
S
93
94def random_user_agent():
95 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
96 _CHROME_VERSIONS = (
97 '74.0.3729.129',
98 '76.0.3780.3',
99 '76.0.3780.2',
100 '74.0.3729.128',
101 '76.0.3780.1',
102 '76.0.3780.0',
103 '75.0.3770.15',
104 '74.0.3729.127',
105 '74.0.3729.126',
106 '76.0.3779.1',
107 '76.0.3779.0',
108 '75.0.3770.14',
109 '74.0.3729.125',
110 '76.0.3778.1',
111 '76.0.3778.0',
112 '75.0.3770.13',
113 '74.0.3729.124',
114 '74.0.3729.123',
115 '73.0.3683.121',
116 '76.0.3777.1',
117 '76.0.3777.0',
118 '75.0.3770.12',
119 '74.0.3729.122',
120 '76.0.3776.4',
121 '75.0.3770.11',
122 '74.0.3729.121',
123 '76.0.3776.3',
124 '76.0.3776.2',
125 '73.0.3683.120',
126 '74.0.3729.120',
127 '74.0.3729.119',
128 '74.0.3729.118',
129 '76.0.3776.1',
130 '76.0.3776.0',
131 '76.0.3775.5',
132 '75.0.3770.10',
133 '74.0.3729.117',
134 '76.0.3775.4',
135 '76.0.3775.3',
136 '74.0.3729.116',
137 '75.0.3770.9',
138 '76.0.3775.2',
139 '76.0.3775.1',
140 '76.0.3775.0',
141 '75.0.3770.8',
142 '74.0.3729.115',
143 '74.0.3729.114',
144 '76.0.3774.1',
145 '76.0.3774.0',
146 '75.0.3770.7',
147 '74.0.3729.113',
148 '74.0.3729.112',
149 '74.0.3729.111',
150 '76.0.3773.1',
151 '76.0.3773.0',
152 '75.0.3770.6',
153 '74.0.3729.110',
154 '74.0.3729.109',
155 '76.0.3772.1',
156 '76.0.3772.0',
157 '75.0.3770.5',
158 '74.0.3729.108',
159 '74.0.3729.107',
160 '76.0.3771.1',
161 '76.0.3771.0',
162 '75.0.3770.4',
163 '74.0.3729.106',
164 '74.0.3729.105',
165 '75.0.3770.3',
166 '74.0.3729.104',
167 '74.0.3729.103',
168 '74.0.3729.102',
169 '75.0.3770.2',
170 '74.0.3729.101',
171 '75.0.3770.1',
172 '75.0.3770.0',
173 '74.0.3729.100',
174 '75.0.3769.5',
175 '75.0.3769.4',
176 '74.0.3729.99',
177 '75.0.3769.3',
178 '75.0.3769.2',
179 '75.0.3768.6',
180 '74.0.3729.98',
181 '75.0.3769.1',
182 '75.0.3769.0',
183 '74.0.3729.97',
184 '73.0.3683.119',
185 '73.0.3683.118',
186 '74.0.3729.96',
187 '75.0.3768.5',
188 '75.0.3768.4',
189 '75.0.3768.3',
190 '75.0.3768.2',
191 '74.0.3729.95',
192 '74.0.3729.94',
193 '75.0.3768.1',
194 '75.0.3768.0',
195 '74.0.3729.93',
196 '74.0.3729.92',
197 '73.0.3683.117',
198 '74.0.3729.91',
199 '75.0.3766.3',
200 '74.0.3729.90',
201 '75.0.3767.2',
202 '75.0.3767.1',
203 '75.0.3767.0',
204 '74.0.3729.89',
205 '73.0.3683.116',
206 '75.0.3766.2',
207 '74.0.3729.88',
208 '75.0.3766.1',
209 '75.0.3766.0',
210 '74.0.3729.87',
211 '73.0.3683.115',
212 '74.0.3729.86',
213 '75.0.3765.1',
214 '75.0.3765.0',
215 '74.0.3729.85',
216 '73.0.3683.114',
217 '74.0.3729.84',
218 '75.0.3764.1',
219 '75.0.3764.0',
220 '74.0.3729.83',
221 '73.0.3683.113',
222 '75.0.3763.2',
223 '75.0.3761.4',
224 '74.0.3729.82',
225 '75.0.3763.1',
226 '75.0.3763.0',
227 '74.0.3729.81',
228 '73.0.3683.112',
229 '75.0.3762.1',
230 '75.0.3762.0',
231 '74.0.3729.80',
232 '75.0.3761.3',
233 '74.0.3729.79',
234 '73.0.3683.111',
235 '75.0.3761.2',
236 '74.0.3729.78',
237 '74.0.3729.77',
238 '75.0.3761.1',
239 '75.0.3761.0',
240 '73.0.3683.110',
241 '74.0.3729.76',
242 '74.0.3729.75',
243 '75.0.3760.0',
244 '74.0.3729.74',
245 '75.0.3759.8',
246 '75.0.3759.7',
247 '75.0.3759.6',
248 '74.0.3729.73',
249 '75.0.3759.5',
250 '74.0.3729.72',
251 '73.0.3683.109',
252 '75.0.3759.4',
253 '75.0.3759.3',
254 '74.0.3729.71',
255 '75.0.3759.2',
256 '74.0.3729.70',
257 '73.0.3683.108',
258 '74.0.3729.69',
259 '75.0.3759.1',
260 '75.0.3759.0',
261 '74.0.3729.68',
262 '73.0.3683.107',
263 '74.0.3729.67',
264 '75.0.3758.1',
265 '75.0.3758.0',
266 '74.0.3729.66',
267 '73.0.3683.106',
268 '74.0.3729.65',
269 '75.0.3757.1',
270 '75.0.3757.0',
271 '74.0.3729.64',
272 '73.0.3683.105',
273 '74.0.3729.63',
274 '75.0.3756.1',
275 '75.0.3756.0',
276 '74.0.3729.62',
277 '73.0.3683.104',
278 '75.0.3755.3',
279 '75.0.3755.2',
280 '73.0.3683.103',
281 '75.0.3755.1',
282 '75.0.3755.0',
283 '74.0.3729.61',
284 '73.0.3683.102',
285 '74.0.3729.60',
286 '75.0.3754.2',
287 '74.0.3729.59',
288 '75.0.3753.4',
289 '74.0.3729.58',
290 '75.0.3754.1',
291 '75.0.3754.0',
292 '74.0.3729.57',
293 '73.0.3683.101',
294 '75.0.3753.3',
295 '75.0.3752.2',
296 '75.0.3753.2',
297 '74.0.3729.56',
298 '75.0.3753.1',
299 '75.0.3753.0',
300 '74.0.3729.55',
301 '73.0.3683.100',
302 '74.0.3729.54',
303 '75.0.3752.1',
304 '75.0.3752.0',
305 '74.0.3729.53',
306 '73.0.3683.99',
307 '74.0.3729.52',
308 '75.0.3751.1',
309 '75.0.3751.0',
310 '74.0.3729.51',
311 '73.0.3683.98',
312 '74.0.3729.50',
313 '75.0.3750.0',
314 '74.0.3729.49',
315 '74.0.3729.48',
316 '74.0.3729.47',
317 '75.0.3749.3',
318 '74.0.3729.46',
319 '73.0.3683.97',
320 '75.0.3749.2',
321 '74.0.3729.45',
322 '75.0.3749.1',
323 '75.0.3749.0',
324 '74.0.3729.44',
325 '73.0.3683.96',
326 '74.0.3729.43',
327 '74.0.3729.42',
328 '75.0.3748.1',
329 '75.0.3748.0',
330 '74.0.3729.41',
331 '75.0.3747.1',
332 '73.0.3683.95',
333 '75.0.3746.4',
334 '74.0.3729.40',
335 '74.0.3729.39',
336 '75.0.3747.0',
337 '75.0.3746.3',
338 '75.0.3746.2',
339 '74.0.3729.38',
340 '75.0.3746.1',
341 '75.0.3746.0',
342 '74.0.3729.37',
343 '73.0.3683.94',
344 '75.0.3745.5',
345 '75.0.3745.4',
346 '75.0.3745.3',
347 '75.0.3745.2',
348 '74.0.3729.36',
349 '75.0.3745.1',
350 '75.0.3745.0',
351 '75.0.3744.2',
352 '74.0.3729.35',
353 '73.0.3683.93',
354 '74.0.3729.34',
355 '75.0.3744.1',
356 '75.0.3744.0',
357 '74.0.3729.33',
358 '73.0.3683.92',
359 '74.0.3729.32',
360 '74.0.3729.31',
361 '73.0.3683.91',
362 '75.0.3741.2',
363 '75.0.3740.5',
364 '74.0.3729.30',
365 '75.0.3741.1',
366 '75.0.3741.0',
367 '74.0.3729.29',
368 '75.0.3740.4',
369 '73.0.3683.90',
370 '74.0.3729.28',
371 '75.0.3740.3',
372 '73.0.3683.89',
373 '75.0.3740.2',
374 '74.0.3729.27',
375 '75.0.3740.1',
376 '75.0.3740.0',
377 '74.0.3729.26',
378 '73.0.3683.88',
379 '73.0.3683.87',
380 '74.0.3729.25',
381 '75.0.3739.1',
382 '75.0.3739.0',
383 '73.0.3683.86',
384 '74.0.3729.24',
385 '73.0.3683.85',
386 '75.0.3738.4',
387 '75.0.3738.3',
388 '75.0.3738.2',
389 '75.0.3738.1',
390 '75.0.3738.0',
391 '74.0.3729.23',
392 '73.0.3683.84',
393 '74.0.3729.22',
394 '74.0.3729.21',
395 '75.0.3737.1',
396 '75.0.3737.0',
397 '74.0.3729.20',
398 '73.0.3683.83',
399 '74.0.3729.19',
400 '75.0.3736.1',
401 '75.0.3736.0',
402 '74.0.3729.18',
403 '73.0.3683.82',
404 '74.0.3729.17',
405 '75.0.3735.1',
406 '75.0.3735.0',
407 '74.0.3729.16',
408 '73.0.3683.81',
409 '75.0.3734.1',
410 '75.0.3734.0',
411 '74.0.3729.15',
412 '73.0.3683.80',
413 '74.0.3729.14',
414 '75.0.3733.1',
415 '75.0.3733.0',
416 '75.0.3732.1',
417 '74.0.3729.13',
418 '74.0.3729.12',
419 '73.0.3683.79',
420 '74.0.3729.11',
421 '75.0.3732.0',
422 '74.0.3729.10',
423 '73.0.3683.78',
424 '74.0.3729.9',
425 '74.0.3729.8',
426 '74.0.3729.7',
427 '75.0.3731.3',
428 '75.0.3731.2',
429 '75.0.3731.0',
430 '74.0.3729.6',
431 '73.0.3683.77',
432 '73.0.3683.76',
433 '75.0.3730.5',
434 '75.0.3730.4',
435 '73.0.3683.75',
436 '74.0.3729.5',
437 '73.0.3683.74',
438 '75.0.3730.3',
439 '75.0.3730.2',
440 '74.0.3729.4',
441 '73.0.3683.73',
442 '73.0.3683.72',
443 '75.0.3730.1',
444 '75.0.3730.0',
445 '74.0.3729.3',
446 '73.0.3683.71',
447 '74.0.3729.2',
448 '73.0.3683.70',
449 '74.0.3729.1',
450 '74.0.3729.0',
451 '74.0.3726.4',
452 '73.0.3683.69',
453 '74.0.3726.3',
454 '74.0.3728.0',
455 '74.0.3726.2',
456 '73.0.3683.68',
457 '74.0.3726.1',
458 '74.0.3726.0',
459 '74.0.3725.4',
460 '73.0.3683.67',
461 '73.0.3683.66',
462 '74.0.3725.3',
463 '74.0.3725.2',
464 '74.0.3725.1',
465 '74.0.3724.8',
466 '74.0.3725.0',
467 '73.0.3683.65',
468 '74.0.3724.7',
469 '74.0.3724.6',
470 '74.0.3724.5',
471 '74.0.3724.4',
472 '74.0.3724.3',
473 '74.0.3724.2',
474 '74.0.3724.1',
475 '74.0.3724.0',
476 '73.0.3683.64',
477 '74.0.3723.1',
478 '74.0.3723.0',
479 '73.0.3683.63',
480 '74.0.3722.1',
481 '74.0.3722.0',
482 '73.0.3683.62',
483 '74.0.3718.9',
484 '74.0.3702.3',
485 '74.0.3721.3',
486 '74.0.3721.2',
487 '74.0.3721.1',
488 '74.0.3721.0',
489 '74.0.3720.6',
490 '73.0.3683.61',
491 '72.0.3626.122',
492 '73.0.3683.60',
493 '74.0.3720.5',
494 '72.0.3626.121',
495 '74.0.3718.8',
496 '74.0.3720.4',
497 '74.0.3720.3',
498 '74.0.3718.7',
499 '74.0.3720.2',
500 '74.0.3720.1',
501 '74.0.3720.0',
502 '74.0.3718.6',
503 '74.0.3719.5',
504 '73.0.3683.59',
505 '74.0.3718.5',
506 '74.0.3718.4',
507 '74.0.3719.4',
508 '74.0.3719.3',
509 '74.0.3719.2',
510 '74.0.3719.1',
511 '73.0.3683.58',
512 '74.0.3719.0',
513 '73.0.3683.57',
514 '73.0.3683.56',
515 '74.0.3718.3',
516 '73.0.3683.55',
517 '74.0.3718.2',
518 '74.0.3718.1',
519 '74.0.3718.0',
520 '73.0.3683.54',
521 '74.0.3717.2',
522 '73.0.3683.53',
523 '74.0.3717.1',
524 '74.0.3717.0',
525 '73.0.3683.52',
526 '74.0.3716.1',
527 '74.0.3716.0',
528 '73.0.3683.51',
529 '74.0.3715.1',
530 '74.0.3715.0',
531 '73.0.3683.50',
532 '74.0.3711.2',
533 '74.0.3714.2',
534 '74.0.3713.3',
535 '74.0.3714.1',
536 '74.0.3714.0',
537 '73.0.3683.49',
538 '74.0.3713.1',
539 '74.0.3713.0',
540 '72.0.3626.120',
541 '73.0.3683.48',
542 '74.0.3712.2',
543 '74.0.3712.1',
544 '74.0.3712.0',
545 '73.0.3683.47',
546 '72.0.3626.119',
547 '73.0.3683.46',
548 '74.0.3710.2',
549 '72.0.3626.118',
550 '74.0.3711.1',
551 '74.0.3711.0',
552 '73.0.3683.45',
553 '72.0.3626.117',
554 '74.0.3710.1',
555 '74.0.3710.0',
556 '73.0.3683.44',
557 '72.0.3626.116',
558 '74.0.3709.1',
559 '74.0.3709.0',
560 '74.0.3704.9',
561 '73.0.3683.43',
562 '72.0.3626.115',
563 '74.0.3704.8',
564 '74.0.3704.7',
565 '74.0.3708.0',
566 '74.0.3706.7',
567 '74.0.3704.6',
568 '73.0.3683.42',
569 '72.0.3626.114',
570 '74.0.3706.6',
571 '72.0.3626.113',
572 '74.0.3704.5',
573 '74.0.3706.5',
574 '74.0.3706.4',
575 '74.0.3706.3',
576 '74.0.3706.2',
577 '74.0.3706.1',
578 '74.0.3706.0',
579 '73.0.3683.41',
580 '72.0.3626.112',
581 '74.0.3705.1',
582 '74.0.3705.0',
583 '73.0.3683.40',
584 '72.0.3626.111',
585 '73.0.3683.39',
586 '74.0.3704.4',
587 '73.0.3683.38',
588 '74.0.3704.3',
589 '74.0.3704.2',
590 '74.0.3704.1',
591 '74.0.3704.0',
592 '73.0.3683.37',
593 '72.0.3626.110',
594 '72.0.3626.109',
595 '74.0.3703.3',
596 '74.0.3703.2',
597 '73.0.3683.36',
598 '74.0.3703.1',
599 '74.0.3703.0',
600 '73.0.3683.35',
601 '72.0.3626.108',
602 '74.0.3702.2',
603 '74.0.3699.3',
604 '74.0.3702.1',
605 '74.0.3702.0',
606 '73.0.3683.34',
607 '72.0.3626.107',
608 '73.0.3683.33',
609 '74.0.3701.1',
610 '74.0.3701.0',
611 '73.0.3683.32',
612 '73.0.3683.31',
613 '72.0.3626.105',
614 '74.0.3700.1',
615 '74.0.3700.0',
616 '73.0.3683.29',
617 '72.0.3626.103',
618 '74.0.3699.2',
619 '74.0.3699.1',
620 '74.0.3699.0',
621 '73.0.3683.28',
622 '72.0.3626.102',
623 '73.0.3683.27',
624 '73.0.3683.26',
625 '74.0.3698.0',
626 '74.0.3696.2',
627 '72.0.3626.101',
628 '73.0.3683.25',
629 '74.0.3696.1',
630 '74.0.3696.0',
631 '74.0.3694.8',
632 '72.0.3626.100',
633 '74.0.3694.7',
634 '74.0.3694.6',
635 '74.0.3694.5',
636 '74.0.3694.4',
637 '72.0.3626.99',
638 '72.0.3626.98',
639 '74.0.3694.3',
640 '73.0.3683.24',
641 '72.0.3626.97',
642 '72.0.3626.96',
643 '72.0.3626.95',
644 '73.0.3683.23',
645 '72.0.3626.94',
646 '73.0.3683.22',
647 '73.0.3683.21',
648 '72.0.3626.93',
649 '74.0.3694.2',
650 '72.0.3626.92',
651 '74.0.3694.1',
652 '74.0.3694.0',
653 '74.0.3693.6',
654 '73.0.3683.20',
655 '72.0.3626.91',
656 '74.0.3693.5',
657 '74.0.3693.4',
658 '74.0.3693.3',
659 '74.0.3693.2',
660 '73.0.3683.19',
661 '74.0.3693.1',
662 '74.0.3693.0',
663 '73.0.3683.18',
664 '72.0.3626.90',
665 '74.0.3692.1',
666 '74.0.3692.0',
667 '73.0.3683.17',
668 '72.0.3626.89',
669 '74.0.3687.3',
670 '74.0.3691.1',
671 '74.0.3691.0',
672 '73.0.3683.16',
673 '72.0.3626.88',
674 '72.0.3626.87',
675 '73.0.3683.15',
676 '74.0.3690.1',
677 '74.0.3690.0',
678 '73.0.3683.14',
679 '72.0.3626.86',
680 '73.0.3683.13',
681 '73.0.3683.12',
682 '74.0.3689.1',
683 '74.0.3689.0',
684 '73.0.3683.11',
685 '72.0.3626.85',
686 '73.0.3683.10',
687 '72.0.3626.84',
688 '73.0.3683.9',
689 '74.0.3688.1',
690 '74.0.3688.0',
691 '73.0.3683.8',
692 '72.0.3626.83',
693 '74.0.3687.2',
694 '74.0.3687.1',
695 '74.0.3687.0',
696 '73.0.3683.7',
697 '72.0.3626.82',
698 '74.0.3686.4',
699 '72.0.3626.81',
700 '74.0.3686.3',
701 '74.0.3686.2',
702 '74.0.3686.1',
703 '74.0.3686.0',
704 '73.0.3683.6',
705 '72.0.3626.80',
706 '74.0.3685.1',
707 '74.0.3685.0',
708 '73.0.3683.5',
709 '72.0.3626.79',
710 '74.0.3684.1',
711 '74.0.3684.0',
712 '73.0.3683.4',
713 '72.0.3626.78',
714 '72.0.3626.77',
715 '73.0.3683.3',
716 '73.0.3683.2',
717 '72.0.3626.76',
718 '73.0.3683.1',
719 '73.0.3683.0',
720 '72.0.3626.75',
721 '71.0.3578.141',
722 '73.0.3682.1',
723 '73.0.3682.0',
724 '72.0.3626.74',
725 '71.0.3578.140',
726 '73.0.3681.4',
727 '73.0.3681.3',
728 '73.0.3681.2',
729 '73.0.3681.1',
730 '73.0.3681.0',
731 '72.0.3626.73',
732 '71.0.3578.139',
733 '72.0.3626.72',
734 '72.0.3626.71',
735 '73.0.3680.1',
736 '73.0.3680.0',
737 '72.0.3626.70',
738 '71.0.3578.138',
739 '73.0.3678.2',
740 '73.0.3679.1',
741 '73.0.3679.0',
742 '72.0.3626.69',
743 '71.0.3578.137',
744 '73.0.3678.1',
745 '73.0.3678.0',
746 '71.0.3578.136',
747 '73.0.3677.1',
748 '73.0.3677.0',
749 '72.0.3626.68',
750 '72.0.3626.67',
751 '71.0.3578.135',
752 '73.0.3676.1',
753 '73.0.3676.0',
754 '73.0.3674.2',
755 '72.0.3626.66',
756 '71.0.3578.134',
757 '73.0.3674.1',
758 '73.0.3674.0',
759 '72.0.3626.65',
760 '71.0.3578.133',
761 '73.0.3673.2',
762 '73.0.3673.1',
763 '73.0.3673.0',
764 '72.0.3626.64',
765 '71.0.3578.132',
766 '72.0.3626.63',
767 '72.0.3626.62',
768 '72.0.3626.61',
769 '72.0.3626.60',
770 '73.0.3672.1',
771 '73.0.3672.0',
772 '72.0.3626.59',
773 '71.0.3578.131',
774 '73.0.3671.3',
775 '73.0.3671.2',
776 '73.0.3671.1',
777 '73.0.3671.0',
778 '72.0.3626.58',
779 '71.0.3578.130',
780 '73.0.3670.1',
781 '73.0.3670.0',
782 '72.0.3626.57',
783 '71.0.3578.129',
784 '73.0.3669.1',
785 '73.0.3669.0',
786 '72.0.3626.56',
787 '71.0.3578.128',
788 '73.0.3668.2',
789 '73.0.3668.1',
790 '73.0.3668.0',
791 '72.0.3626.55',
792 '71.0.3578.127',
793 '73.0.3667.2',
794 '73.0.3667.1',
795 '73.0.3667.0',
796 '72.0.3626.54',
797 '71.0.3578.126',
798 '73.0.3666.1',
799 '73.0.3666.0',
800 '72.0.3626.53',
801 '71.0.3578.125',
802 '73.0.3665.4',
803 '73.0.3665.3',
804 '72.0.3626.52',
805 '73.0.3665.2',
806 '73.0.3664.4',
807 '73.0.3665.1',
808 '73.0.3665.0',
809 '72.0.3626.51',
810 '71.0.3578.124',
811 '72.0.3626.50',
812 '73.0.3664.3',
813 '73.0.3664.2',
814 '73.0.3664.1',
815 '73.0.3664.0',
816 '73.0.3663.2',
817 '72.0.3626.49',
818 '71.0.3578.123',
819 '73.0.3663.1',
820 '73.0.3663.0',
821 '72.0.3626.48',
822 '71.0.3578.122',
823 '73.0.3662.1',
824 '73.0.3662.0',
825 '72.0.3626.47',
826 '71.0.3578.121',
827 '73.0.3661.1',
828 '72.0.3626.46',
829 '73.0.3661.0',
830 '72.0.3626.45',
831 '71.0.3578.120',
832 '73.0.3660.2',
833 '73.0.3660.1',
834 '73.0.3660.0',
835 '72.0.3626.44',
836 '71.0.3578.119',
837 '73.0.3659.1',
838 '73.0.3659.0',
839 '72.0.3626.43',
840 '71.0.3578.118',
841 '73.0.3658.1',
842 '73.0.3658.0',
843 '72.0.3626.42',
844 '71.0.3578.117',
845 '73.0.3657.1',
846 '73.0.3657.0',
847 '72.0.3626.41',
848 '71.0.3578.116',
849 '73.0.3656.1',
850 '73.0.3656.0',
851 '72.0.3626.40',
852 '71.0.3578.115',
853 '73.0.3655.1',
854 '73.0.3655.0',
855 '72.0.3626.39',
856 '71.0.3578.114',
857 '73.0.3654.1',
858 '73.0.3654.0',
859 '72.0.3626.38',
860 '71.0.3578.113',
861 '73.0.3653.1',
862 '73.0.3653.0',
863 '72.0.3626.37',
864 '71.0.3578.112',
865 '73.0.3652.1',
866 '73.0.3652.0',
867 '72.0.3626.36',
868 '71.0.3578.111',
869 '73.0.3651.1',
870 '73.0.3651.0',
871 '72.0.3626.35',
872 '71.0.3578.110',
873 '73.0.3650.1',
874 '73.0.3650.0',
875 '72.0.3626.34',
876 '71.0.3578.109',
877 '73.0.3649.1',
878 '73.0.3649.0',
879 '72.0.3626.33',
880 '71.0.3578.108',
881 '73.0.3648.2',
882 '73.0.3648.1',
883 '73.0.3648.0',
884 '72.0.3626.32',
885 '71.0.3578.107',
886 '73.0.3647.2',
887 '73.0.3647.1',
888 '73.0.3647.0',
889 '72.0.3626.31',
890 '71.0.3578.106',
891 '73.0.3635.3',
892 '73.0.3646.2',
893 '73.0.3646.1',
894 '73.0.3646.0',
895 '72.0.3626.30',
896 '71.0.3578.105',
897 '72.0.3626.29',
898 '73.0.3645.2',
899 '73.0.3645.1',
900 '73.0.3645.0',
901 '72.0.3626.28',
902 '71.0.3578.104',
903 '72.0.3626.27',
904 '72.0.3626.26',
905 '72.0.3626.25',
906 '72.0.3626.24',
907 '73.0.3644.0',
908 '73.0.3643.2',
909 '72.0.3626.23',
910 '71.0.3578.103',
911 '73.0.3643.1',
912 '73.0.3643.0',
913 '72.0.3626.22',
914 '71.0.3578.102',
915 '73.0.3642.1',
916 '73.0.3642.0',
917 '72.0.3626.21',
918 '71.0.3578.101',
919 '73.0.3641.1',
920 '73.0.3641.0',
921 '72.0.3626.20',
922 '71.0.3578.100',
923 '72.0.3626.19',
924 '73.0.3640.1',
925 '73.0.3640.0',
926 '72.0.3626.18',
927 '73.0.3639.1',
928 '71.0.3578.99',
929 '73.0.3639.0',
930 '72.0.3626.17',
931 '73.0.3638.2',
932 '72.0.3626.16',
933 '73.0.3638.1',
934 '73.0.3638.0',
935 '72.0.3626.15',
936 '71.0.3578.98',
937 '73.0.3635.2',
938 '71.0.3578.97',
939 '73.0.3637.1',
940 '73.0.3637.0',
941 '72.0.3626.14',
942 '71.0.3578.96',
943 '71.0.3578.95',
944 '72.0.3626.13',
945 '71.0.3578.94',
946 '73.0.3636.2',
947 '71.0.3578.93',
948 '73.0.3636.1',
949 '73.0.3636.0',
950 '72.0.3626.12',
951 '71.0.3578.92',
952 '73.0.3635.1',
953 '73.0.3635.0',
954 '72.0.3626.11',
955 '71.0.3578.91',
956 '73.0.3634.2',
957 '73.0.3634.1',
958 '73.0.3634.0',
959 '72.0.3626.10',
960 '71.0.3578.90',
961 '71.0.3578.89',
962 '73.0.3633.2',
963 '73.0.3633.1',
964 '73.0.3633.0',
965 '72.0.3610.4',
966 '72.0.3626.9',
967 '71.0.3578.88',
968 '73.0.3632.5',
969 '73.0.3632.4',
970 '73.0.3632.3',
971 '73.0.3632.2',
972 '73.0.3632.1',
973 '73.0.3632.0',
974 '72.0.3626.8',
975 '71.0.3578.87',
976 '73.0.3631.2',
977 '73.0.3631.1',
978 '73.0.3631.0',
979 '72.0.3626.7',
980 '71.0.3578.86',
981 '72.0.3626.6',
982 '73.0.3630.1',
983 '73.0.3630.0',
984 '72.0.3626.5',
985 '71.0.3578.85',
986 '72.0.3626.4',
987 '73.0.3628.3',
988 '73.0.3628.2',
989 '73.0.3629.1',
990 '73.0.3629.0',
991 '72.0.3626.3',
992 '71.0.3578.84',
993 '73.0.3628.1',
994 '73.0.3628.0',
995 '71.0.3578.83',
996 '73.0.3627.1',
997 '73.0.3627.0',
998 '72.0.3626.2',
999 '71.0.3578.82',
1000 '71.0.3578.81',
1001 '71.0.3578.80',
1002 '72.0.3626.1',
1003 '72.0.3626.0',
1004 '71.0.3578.79',
1005 '70.0.3538.124',
1006 '71.0.3578.78',
1007 '72.0.3623.4',
1008 '72.0.3625.2',
1009 '72.0.3625.1',
1010 '72.0.3625.0',
1011 '71.0.3578.77',
1012 '70.0.3538.123',
1013 '72.0.3624.4',
1014 '72.0.3624.3',
1015 '72.0.3624.2',
1016 '71.0.3578.76',
1017 '72.0.3624.1',
1018 '72.0.3624.0',
1019 '72.0.3623.3',
1020 '71.0.3578.75',
1021 '70.0.3538.122',
1022 '71.0.3578.74',
1023 '72.0.3623.2',
1024 '72.0.3610.3',
1025 '72.0.3623.1',
1026 '72.0.3623.0',
1027 '72.0.3622.3',
1028 '72.0.3622.2',
1029 '71.0.3578.73',
1030 '70.0.3538.121',
1031 '72.0.3622.1',
1032 '72.0.3622.0',
1033 '71.0.3578.72',
1034 '70.0.3538.120',
1035 '72.0.3621.1',
1036 '72.0.3621.0',
1037 '71.0.3578.71',
1038 '70.0.3538.119',
1039 '72.0.3620.1',
1040 '72.0.3620.0',
1041 '71.0.3578.70',
1042 '70.0.3538.118',
1043 '71.0.3578.69',
1044 '72.0.3619.1',
1045 '72.0.3619.0',
1046 '71.0.3578.68',
1047 '70.0.3538.117',
1048 '71.0.3578.67',
1049 '72.0.3618.1',
1050 '72.0.3618.0',
1051 '71.0.3578.66',
1052 '70.0.3538.116',
1053 '72.0.3617.1',
1054 '72.0.3617.0',
1055 '71.0.3578.65',
1056 '70.0.3538.115',
1057 '72.0.3602.3',
1058 '71.0.3578.64',
1059 '72.0.3616.1',
1060 '72.0.3616.0',
1061 '71.0.3578.63',
1062 '70.0.3538.114',
1063 '71.0.3578.62',
1064 '72.0.3615.1',
1065 '72.0.3615.0',
1066 '71.0.3578.61',
1067 '70.0.3538.113',
1068 '72.0.3614.1',
1069 '72.0.3614.0',
1070 '71.0.3578.60',
1071 '70.0.3538.112',
1072 '72.0.3613.1',
1073 '72.0.3613.0',
1074 '71.0.3578.59',
1075 '70.0.3538.111',
1076 '72.0.3612.2',
1077 '72.0.3612.1',
1078 '72.0.3612.0',
1079 '70.0.3538.110',
1080 '71.0.3578.58',
1081 '70.0.3538.109',
1082 '72.0.3611.2',
1083 '72.0.3611.1',
1084 '72.0.3611.0',
1085 '71.0.3578.57',
1086 '70.0.3538.108',
1087 '72.0.3610.2',
1088 '71.0.3578.56',
1089 '71.0.3578.55',
1090 '72.0.3610.1',
1091 '72.0.3610.0',
1092 '71.0.3578.54',
1093 '70.0.3538.107',
1094 '71.0.3578.53',
1095 '72.0.3609.3',
1096 '71.0.3578.52',
1097 '72.0.3609.2',
1098 '71.0.3578.51',
1099 '72.0.3608.5',
1100 '72.0.3609.1',
1101 '72.0.3609.0',
1102 '71.0.3578.50',
1103 '70.0.3538.106',
1104 '72.0.3608.4',
1105 '72.0.3608.3',
1106 '72.0.3608.2',
1107 '71.0.3578.49',
1108 '72.0.3608.1',
1109 '72.0.3608.0',
1110 '70.0.3538.105',
1111 '71.0.3578.48',
1112 '72.0.3607.1',
1113 '72.0.3607.0',
1114 '71.0.3578.47',
1115 '70.0.3538.104',
1116 '72.0.3606.2',
1117 '72.0.3606.1',
1118 '72.0.3606.0',
1119 '71.0.3578.46',
1120 '70.0.3538.103',
1121 '70.0.3538.102',
1122 '72.0.3605.3',
1123 '72.0.3605.2',
1124 '72.0.3605.1',
1125 '72.0.3605.0',
1126 '71.0.3578.45',
1127 '70.0.3538.101',
1128 '71.0.3578.44',
1129 '71.0.3578.43',
1130 '70.0.3538.100',
1131 '70.0.3538.99',
1132 '71.0.3578.42',
1133 '72.0.3604.1',
1134 '72.0.3604.0',
1135 '71.0.3578.41',
1136 '70.0.3538.98',
1137 '71.0.3578.40',
1138 '72.0.3603.2',
1139 '72.0.3603.1',
1140 '72.0.3603.0',
1141 '71.0.3578.39',
1142 '70.0.3538.97',
1143 '72.0.3602.2',
1144 '71.0.3578.38',
1145 '71.0.3578.37',
1146 '72.0.3602.1',
1147 '72.0.3602.0',
1148 '71.0.3578.36',
1149 '70.0.3538.96',
1150 '72.0.3601.1',
1151 '72.0.3601.0',
1152 '71.0.3578.35',
1153 '70.0.3538.95',
1154 '72.0.3600.1',
1155 '72.0.3600.0',
1156 '71.0.3578.34',
1157 '70.0.3538.94',
1158 '72.0.3599.3',
1159 '72.0.3599.2',
1160 '72.0.3599.1',
1161 '72.0.3599.0',
1162 '71.0.3578.33',
1163 '70.0.3538.93',
1164 '72.0.3598.1',
1165 '72.0.3598.0',
1166 '71.0.3578.32',
1167 '70.0.3538.87',
1168 '72.0.3597.1',
1169 '72.0.3597.0',
1170 '72.0.3596.2',
1171 '71.0.3578.31',
1172 '70.0.3538.86',
1173 '71.0.3578.30',
1174 '71.0.3578.29',
1175 '72.0.3596.1',
1176 '72.0.3596.0',
1177 '71.0.3578.28',
1178 '70.0.3538.85',
1179 '72.0.3595.2',
1180 '72.0.3591.3',
1181 '72.0.3595.1',
1182 '72.0.3595.0',
1183 '71.0.3578.27',
1184 '70.0.3538.84',
1185 '72.0.3594.1',
1186 '72.0.3594.0',
1187 '71.0.3578.26',
1188 '70.0.3538.83',
1189 '72.0.3593.2',
1190 '72.0.3593.1',
1191 '72.0.3593.0',
1192 '71.0.3578.25',
1193 '70.0.3538.82',
1194 '72.0.3589.3',
1195 '72.0.3592.2',
1196 '72.0.3592.1',
1197 '72.0.3592.0',
1198 '71.0.3578.24',
1199 '72.0.3589.2',
1200 '70.0.3538.81',
1201 '70.0.3538.80',
1202 '72.0.3591.2',
1203 '72.0.3591.1',
1204 '72.0.3591.0',
1205 '71.0.3578.23',
1206 '70.0.3538.79',
1207 '71.0.3578.22',
1208 '72.0.3590.1',
1209 '72.0.3590.0',
1210 '71.0.3578.21',
1211 '70.0.3538.78',
1212 '70.0.3538.77',
1213 '72.0.3589.1',
1214 '72.0.3589.0',
1215 '71.0.3578.20',
1216 '70.0.3538.76',
1217 '71.0.3578.19',
1218 '70.0.3538.75',
1219 '72.0.3588.1',
1220 '72.0.3588.0',
1221 '71.0.3578.18',
1222 '70.0.3538.74',
1223 '72.0.3586.2',
1224 '72.0.3587.0',
1225 '71.0.3578.17',
1226 '70.0.3538.73',
1227 '72.0.3586.1',
1228 '72.0.3586.0',
1229 '71.0.3578.16',
1230 '70.0.3538.72',
1231 '72.0.3585.1',
1232 '72.0.3585.0',
1233 '71.0.3578.15',
1234 '70.0.3538.71',
1235 '71.0.3578.14',
1236 '72.0.3584.1',
1237 '72.0.3584.0',
1238 '71.0.3578.13',
1239 '70.0.3538.70',
1240 '72.0.3583.2',
1241 '71.0.3578.12',
1242 '72.0.3583.1',
1243 '72.0.3583.0',
1244 '71.0.3578.11',
1245 '70.0.3538.69',
1246 '71.0.3578.10',
1247 '72.0.3582.0',
1248 '72.0.3581.4',
1249 '71.0.3578.9',
1250 '70.0.3538.67',
1251 '72.0.3581.3',
1252 '72.0.3581.2',
1253 '72.0.3581.1',
1254 '72.0.3581.0',
1255 '71.0.3578.8',
1256 '70.0.3538.66',
1257 '72.0.3580.1',
1258 '72.0.3580.0',
1259 '71.0.3578.7',
1260 '70.0.3538.65',
1261 '71.0.3578.6',
1262 '72.0.3579.1',
1263 '72.0.3579.0',
1264 '71.0.3578.5',
1265 '70.0.3538.64',
1266 '71.0.3578.4',
1267 '71.0.3578.3',
1268 '71.0.3578.2',
1269 '71.0.3578.1',
1270 '71.0.3578.0',
1271 '70.0.3538.63',
1272 '69.0.3497.128',
1273 '70.0.3538.62',
1274 '70.0.3538.61',
1275 '70.0.3538.60',
1276 '70.0.3538.59',
1277 '71.0.3577.1',
1278 '71.0.3577.0',
1279 '70.0.3538.58',
1280 '69.0.3497.127',
1281 '71.0.3576.2',
1282 '71.0.3576.1',
1283 '71.0.3576.0',
1284 '70.0.3538.57',
1285 '70.0.3538.56',
1286 '71.0.3575.2',
1287 '70.0.3538.55',
1288 '69.0.3497.126',
1289 '70.0.3538.54',
1290 '71.0.3575.1',
1291 '71.0.3575.0',
1292 '71.0.3574.1',
1293 '71.0.3574.0',
1294 '70.0.3538.53',
1295 '69.0.3497.125',
1296 '70.0.3538.52',
1297 '71.0.3573.1',
1298 '71.0.3573.0',
1299 '70.0.3538.51',
1300 '69.0.3497.124',
1301 '71.0.3572.1',
1302 '71.0.3572.0',
1303 '70.0.3538.50',
1304 '69.0.3497.123',
1305 '71.0.3571.2',
1306 '70.0.3538.49',
1307 '69.0.3497.122',
1308 '71.0.3571.1',
1309 '71.0.3571.0',
1310 '70.0.3538.48',
1311 '69.0.3497.121',
1312 '71.0.3570.1',
1313 '71.0.3570.0',
1314 '70.0.3538.47',
1315 '69.0.3497.120',
1316 '71.0.3568.2',
1317 '71.0.3569.1',
1318 '71.0.3569.0',
1319 '70.0.3538.46',
1320 '69.0.3497.119',
1321 '70.0.3538.45',
1322 '71.0.3568.1',
1323 '71.0.3568.0',
1324 '70.0.3538.44',
1325 '69.0.3497.118',
1326 '70.0.3538.43',
1327 '70.0.3538.42',
1328 '71.0.3567.1',
1329 '71.0.3567.0',
1330 '70.0.3538.41',
1331 '69.0.3497.117',
1332 '71.0.3566.1',
1333 '71.0.3566.0',
1334 '70.0.3538.40',
1335 '69.0.3497.116',
1336 '71.0.3565.1',
1337 '71.0.3565.0',
1338 '70.0.3538.39',
1339 '69.0.3497.115',
1340 '71.0.3564.1',
1341 '71.0.3564.0',
1342 '70.0.3538.38',
1343 '69.0.3497.114',
1344 '71.0.3563.0',
1345 '71.0.3562.2',
1346 '70.0.3538.37',
1347 '69.0.3497.113',
1348 '70.0.3538.36',
1349 '70.0.3538.35',
1350 '71.0.3562.1',
1351 '71.0.3562.0',
1352 '70.0.3538.34',
1353 '69.0.3497.112',
1354 '70.0.3538.33',
1355 '71.0.3561.1',
1356 '71.0.3561.0',
1357 '70.0.3538.32',
1358 '69.0.3497.111',
1359 '71.0.3559.6',
1360 '71.0.3560.1',
1361 '71.0.3560.0',
1362 '71.0.3559.5',
1363 '71.0.3559.4',
1364 '70.0.3538.31',
1365 '69.0.3497.110',
1366 '71.0.3559.3',
1367 '70.0.3538.30',
1368 '69.0.3497.109',
1369 '71.0.3559.2',
1370 '71.0.3559.1',
1371 '71.0.3559.0',
1372 '70.0.3538.29',
1373 '69.0.3497.108',
1374 '71.0.3558.2',
1375 '71.0.3558.1',
1376 '71.0.3558.0',
1377 '70.0.3538.28',
1378 '69.0.3497.107',
1379 '71.0.3557.2',
1380 '71.0.3557.1',
1381 '71.0.3557.0',
1382 '70.0.3538.27',
1383 '69.0.3497.106',
1384 '71.0.3554.4',
1385 '70.0.3538.26',
1386 '71.0.3556.1',
1387 '71.0.3556.0',
1388 '70.0.3538.25',
1389 '71.0.3554.3',
1390 '69.0.3497.105',
1391 '71.0.3554.2',
1392 '70.0.3538.24',
1393 '69.0.3497.104',
1394 '71.0.3555.2',
1395 '70.0.3538.23',
1396 '71.0.3555.1',
1397 '71.0.3555.0',
1398 '70.0.3538.22',
1399 '69.0.3497.103',
1400 '71.0.3554.1',
1401 '71.0.3554.0',
1402 '70.0.3538.21',
1403 '69.0.3497.102',
1404 '71.0.3553.3',
1405 '70.0.3538.20',
1406 '69.0.3497.101',
1407 '71.0.3553.2',
1408 '69.0.3497.100',
1409 '71.0.3553.1',
1410 '71.0.3553.0',
1411 '70.0.3538.19',
1412 '69.0.3497.99',
1413 '69.0.3497.98',
1414 '69.0.3497.97',
1415 '71.0.3552.6',
1416 '71.0.3552.5',
1417 '71.0.3552.4',
1418 '71.0.3552.3',
1419 '71.0.3552.2',
1420 '71.0.3552.1',
1421 '71.0.3552.0',
1422 '70.0.3538.18',
1423 '69.0.3497.96',
1424 '71.0.3551.3',
1425 '71.0.3551.2',
1426 '71.0.3551.1',
1427 '71.0.3551.0',
1428 '70.0.3538.17',
1429 '69.0.3497.95',
1430 '71.0.3550.3',
1431 '71.0.3550.2',
1432 '71.0.3550.1',
1433 '71.0.3550.0',
1434 '70.0.3538.16',
1435 '69.0.3497.94',
1436 '71.0.3549.1',
1437 '71.0.3549.0',
1438 '70.0.3538.15',
1439 '69.0.3497.93',
1440 '69.0.3497.92',
1441 '71.0.3548.1',
1442 '71.0.3548.0',
1443 '70.0.3538.14',
1444 '69.0.3497.91',
1445 '71.0.3547.1',
1446 '71.0.3547.0',
1447 '70.0.3538.13',
1448 '69.0.3497.90',
1449 '71.0.3546.2',
1450 '69.0.3497.89',
1451 '71.0.3546.1',
1452 '71.0.3546.0',
1453 '70.0.3538.12',
1454 '69.0.3497.88',
1455 '71.0.3545.4',
1456 '71.0.3545.3',
1457 '71.0.3545.2',
1458 '71.0.3545.1',
1459 '71.0.3545.0',
1460 '70.0.3538.11',
1461 '69.0.3497.87',
1462 '71.0.3544.5',
1463 '71.0.3544.4',
1464 '71.0.3544.3',
1465 '71.0.3544.2',
1466 '71.0.3544.1',
1467 '71.0.3544.0',
1468 '69.0.3497.86',
1469 '70.0.3538.10',
1470 '69.0.3497.85',
1471 '70.0.3538.9',
1472 '69.0.3497.84',
1473 '71.0.3543.4',
1474 '70.0.3538.8',
1475 '71.0.3543.3',
1476 '71.0.3543.2',
1477 '71.0.3543.1',
1478 '71.0.3543.0',
1479 '70.0.3538.7',
1480 '69.0.3497.83',
1481 '71.0.3542.2',
1482 '71.0.3542.1',
1483 '71.0.3542.0',
1484 '70.0.3538.6',
1485 '69.0.3497.82',
1486 '69.0.3497.81',
1487 '71.0.3541.1',
1488 '71.0.3541.0',
1489 '70.0.3538.5',
1490 '69.0.3497.80',
1491 '71.0.3540.1',
1492 '71.0.3540.0',
1493 '70.0.3538.4',
1494 '69.0.3497.79',
1495 '70.0.3538.3',
1496 '71.0.3539.1',
1497 '71.0.3539.0',
1498 '69.0.3497.78',
1499 '68.0.3440.134',
1500 '69.0.3497.77',
1501 '70.0.3538.2',
1502 '70.0.3538.1',
1503 '70.0.3538.0',
1504 '69.0.3497.76',
1505 '68.0.3440.133',
1506 '69.0.3497.75',
1507 '70.0.3537.2',
1508 '70.0.3537.1',
1509 '70.0.3537.0',
1510 '69.0.3497.74',
1511 '68.0.3440.132',
1512 '70.0.3536.0',
1513 '70.0.3535.5',
1514 '70.0.3535.4',
1515 '70.0.3535.3',
1516 '69.0.3497.73',
1517 '68.0.3440.131',
1518 '70.0.3532.8',
1519 '70.0.3532.7',
1520 '69.0.3497.72',
1521 '69.0.3497.71',
1522 '70.0.3535.2',
1523 '70.0.3535.1',
1524 '70.0.3535.0',
1525 '69.0.3497.70',
1526 '68.0.3440.130',
1527 '69.0.3497.69',
1528 '68.0.3440.129',
1529 '70.0.3534.4',
1530 '70.0.3534.3',
1531 '70.0.3534.2',
1532 '70.0.3534.1',
1533 '70.0.3534.0',
1534 '69.0.3497.68',
1535 '68.0.3440.128',
1536 '70.0.3533.2',
1537 '70.0.3533.1',
1538 '70.0.3533.0',
1539 '69.0.3497.67',
1540 '68.0.3440.127',
1541 '70.0.3532.6',
1542 '70.0.3532.5',
1543 '70.0.3532.4',
1544 '69.0.3497.66',
1545 '68.0.3440.126',
1546 '70.0.3532.3',
1547 '70.0.3532.2',
1548 '70.0.3532.1',
1549 '69.0.3497.60',
1550 '69.0.3497.65',
1551 '69.0.3497.64',
1552 '70.0.3532.0',
1553 '70.0.3531.0',
1554 '70.0.3530.4',
1555 '70.0.3530.3',
1556 '70.0.3530.2',
1557 '69.0.3497.58',
1558 '68.0.3440.125',
1559 '69.0.3497.57',
1560 '69.0.3497.56',
1561 '69.0.3497.55',
1562 '69.0.3497.54',
1563 '70.0.3530.1',
1564 '70.0.3530.0',
1565 '69.0.3497.53',
1566 '68.0.3440.124',
1567 '69.0.3497.52',
1568 '70.0.3529.3',
1569 '70.0.3529.2',
1570 '70.0.3529.1',
1571 '70.0.3529.0',
1572 '69.0.3497.51',
1573 '70.0.3528.4',
1574 '68.0.3440.123',
1575 '70.0.3528.3',
1576 '70.0.3528.2',
1577 '70.0.3528.1',
1578 '70.0.3528.0',
1579 '69.0.3497.50',
1580 '68.0.3440.122',
1581 '70.0.3527.1',
1582 '70.0.3527.0',
1583 '69.0.3497.49',
1584 '68.0.3440.121',
1585 '70.0.3526.1',
1586 '70.0.3526.0',
1587 '68.0.3440.120',
1588 '69.0.3497.48',
1589 '69.0.3497.47',
1590 '68.0.3440.119',
1591 '68.0.3440.118',
1592 '70.0.3525.5',
1593 '70.0.3525.4',
1594 '70.0.3525.3',
1595 '68.0.3440.117',
1596 '69.0.3497.46',
1597 '70.0.3525.2',
1598 '70.0.3525.1',
1599 '70.0.3525.0',
1600 '69.0.3497.45',
1601 '68.0.3440.116',
1602 '70.0.3524.4',
1603 '70.0.3524.3',
1604 '69.0.3497.44',
1605 '70.0.3524.2',
1606 '70.0.3524.1',
1607 '70.0.3524.0',
1608 '70.0.3523.2',
1609 '69.0.3497.43',
1610 '68.0.3440.115',
1611 '70.0.3505.9',
1612 '69.0.3497.42',
1613 '70.0.3505.8',
1614 '70.0.3523.1',
1615 '70.0.3523.0',
1616 '69.0.3497.41',
1617 '68.0.3440.114',
1618 '70.0.3505.7',
1619 '69.0.3497.40',
1620 '70.0.3522.1',
1621 '70.0.3522.0',
1622 '70.0.3521.2',
1623 '69.0.3497.39',
1624 '68.0.3440.113',
1625 '70.0.3505.6',
1626 '70.0.3521.1',
1627 '70.0.3521.0',
1628 '69.0.3497.38',
1629 '68.0.3440.112',
1630 '70.0.3520.1',
1631 '70.0.3520.0',
1632 '69.0.3497.37',
1633 '68.0.3440.111',
1634 '70.0.3519.3',
1635 '70.0.3519.2',
1636 '70.0.3519.1',
1637 '70.0.3519.0',
1638 '69.0.3497.36',
1639 '68.0.3440.110',
1640 '70.0.3518.1',
1641 '70.0.3518.0',
1642 '69.0.3497.35',
1643 '69.0.3497.34',
1644 '68.0.3440.109',
1645 '70.0.3517.1',
1646 '70.0.3517.0',
1647 '69.0.3497.33',
1648 '68.0.3440.108',
1649 '69.0.3497.32',
1650 '70.0.3516.3',
1651 '70.0.3516.2',
1652 '70.0.3516.1',
1653 '70.0.3516.0',
1654 '69.0.3497.31',
1655 '68.0.3440.107',
1656 '70.0.3515.4',
1657 '68.0.3440.106',
1658 '70.0.3515.3',
1659 '70.0.3515.2',
1660 '70.0.3515.1',
1661 '70.0.3515.0',
1662 '69.0.3497.30',
1663 '68.0.3440.105',
1664 '68.0.3440.104',
1665 '70.0.3514.2',
1666 '70.0.3514.1',
1667 '70.0.3514.0',
1668 '69.0.3497.29',
1669 '68.0.3440.103',
1670 '70.0.3513.1',
1671 '70.0.3513.0',
1672 '69.0.3497.28',
1673 )
1674 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1675
1676
3e669f36 1677std_headers = {
f7a147e3 1678 'User-Agent': random_user_agent(),
59ae15a5
PH
1679 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1680 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1681 'Accept-Encoding': 'gzip, deflate',
1682 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 1683}
f427df17 1684
5f6a1245 1685
fb37eb25
S
1686USER_AGENTS = {
1687 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1688}
1689
1690
bf42a990
S
1691NO_DEFAULT = object()
1692
7105440c
YCH
1693ENGLISH_MONTH_NAMES = [
1694 'January', 'February', 'March', 'April', 'May', 'June',
1695 'July', 'August', 'September', 'October', 'November', 'December']
1696
f6717dec
S
1697MONTH_NAMES = {
1698 'en': ENGLISH_MONTH_NAMES,
1699 'fr': [
3e4185c3
S
1700 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1701 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
f6717dec 1702}
a942d6cb 1703
a7aaa398
S
1704KNOWN_EXTENSIONS = (
1705 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1706 'flv', 'f4v', 'f4a', 'f4b',
1707 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1708 'mkv', 'mka', 'mk3d',
1709 'avi', 'divx',
1710 'mov',
1711 'asf', 'wmv', 'wma',
1712 '3gp', '3g2',
1713 'mp3',
1714 'flac',
1715 'ape',
1716 'wav',
1717 'f4f', 'f4m', 'm3u8', 'smil')
1718
c587cbb7 1719# needed for sanitizing filenames in restricted mode
c8827027 1720ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
fd35d8cd
JW
1721 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1722 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
c587cbb7 1723
46f59e89
S
1724DATE_FORMATS = (
1725 '%d %B %Y',
1726 '%d %b %Y',
1727 '%B %d %Y',
cb655f34
S
1728 '%B %dst %Y',
1729 '%B %dnd %Y',
9d30c213 1730 '%B %drd %Y',
cb655f34 1731 '%B %dth %Y',
46f59e89 1732 '%b %d %Y',
cb655f34
S
1733 '%b %dst %Y',
1734 '%b %dnd %Y',
9d30c213 1735 '%b %drd %Y',
cb655f34 1736 '%b %dth %Y',
46f59e89
S
1737 '%b %dst %Y %I:%M',
1738 '%b %dnd %Y %I:%M',
9d30c213 1739 '%b %drd %Y %I:%M',
46f59e89
S
1740 '%b %dth %Y %I:%M',
1741 '%Y %m %d',
1742 '%Y-%m-%d',
1743 '%Y/%m/%d',
81c13222 1744 '%Y/%m/%d %H:%M',
46f59e89 1745 '%Y/%m/%d %H:%M:%S',
0c1c6f4b 1746 '%Y-%m-%d %H:%M',
46f59e89
S
1747 '%Y-%m-%d %H:%M:%S',
1748 '%Y-%m-%d %H:%M:%S.%f',
5014558a 1749 '%Y-%m-%d %H:%M:%S:%f',
46f59e89
S
1750 '%d.%m.%Y %H:%M',
1751 '%d.%m.%Y %H.%M',
1752 '%Y-%m-%dT%H:%M:%SZ',
1753 '%Y-%m-%dT%H:%M:%S.%fZ',
1754 '%Y-%m-%dT%H:%M:%S.%f0Z',
1755 '%Y-%m-%dT%H:%M:%S',
1756 '%Y-%m-%dT%H:%M:%S.%f',
1757 '%Y-%m-%dT%H:%M',
c6eed6b8
S
1758 '%b %d %Y at %H:%M',
1759 '%b %d %Y at %H:%M:%S',
b555ae9b
S
1760 '%B %d %Y at %H:%M',
1761 '%B %d %Y at %H:%M:%S',
46f59e89
S
1762)
1763
1764DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1765DATE_FORMATS_DAY_FIRST.extend([
1766 '%d-%m-%Y',
1767 '%d.%m.%Y',
1768 '%d.%m.%y',
1769 '%d/%m/%Y',
1770 '%d/%m/%y',
1771 '%d/%m/%Y %H:%M:%S',
1772])
1773
1774DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1775DATE_FORMATS_MONTH_FIRST.extend([
1776 '%m-%d-%Y',
1777 '%m.%d.%Y',
1778 '%m/%d/%Y',
1779 '%m/%d/%y',
1780 '%m/%d/%Y %H:%M:%S',
1781])
1782
06b3fe29 1783PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
22f5f5c6 1784JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
06b3fe29 1785
7105440c 1786
d77c3dfd 1787def preferredencoding():
59ae15a5 1788 """Get preferred encoding.
d77c3dfd 1789
59ae15a5
PH
1790 Returns the best encoding scheme for the system, based on
1791 locale.getpreferredencoding() and some further tweaks.
1792 """
1793 try:
1794 pref = locale.getpreferredencoding()
28e614de 1795 'TEST'.encode(pref)
70a1165b 1796 except Exception:
59ae15a5 1797 pref = 'UTF-8'
bae611f2 1798
59ae15a5 1799 return pref
d77c3dfd 1800
f4bfd65f 1801
181c8655 1802def write_json_file(obj, fn):
1394646a 1803 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 1804
92120217 1805 fn = encodeFilename(fn)
61ee5aeb 1806 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
1807 encoding = get_filesystem_encoding()
1808 # os.path.basename returns a bytes object, but NamedTemporaryFile
1809 # will fail if the filename contains non ascii characters unless we
1810 # use a unicode object
1811 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1812 # the same for os.path.dirname
1813 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1814 else:
1815 path_basename = os.path.basename
1816 path_dirname = os.path.dirname
1817
73159f99
S
1818 args = {
1819 'suffix': '.tmp',
ec5f6016
JMF
1820 'prefix': path_basename(fn) + '.',
1821 'dir': path_dirname(fn),
73159f99
S
1822 'delete': False,
1823 }
1824
181c8655
PH
1825 # In Python 2.x, json.dump expects a bytestream.
1826 # In Python 3.x, it writes to a character stream
1827 if sys.version_info < (3, 0):
73159f99 1828 args['mode'] = 'wb'
181c8655 1829 else:
73159f99
S
1830 args.update({
1831 'mode': 'w',
1832 'encoding': 'utf-8',
1833 })
1834
c86b6142 1835 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
1836
1837 try:
1838 with tf:
75d43ca0 1839 json.dump(obj, tf, default=repr)
1394646a
IK
1840 if sys.platform == 'win32':
1841 # Need to remove existing file on Windows, else os.rename raises
1842 # WindowsError or FileExistsError.
1843 try:
1844 os.unlink(fn)
1845 except OSError:
1846 pass
9cd5f54e
R
1847 try:
1848 mask = os.umask(0)
1849 os.umask(mask)
1850 os.chmod(tf.name, 0o666 & ~mask)
1851 except OSError:
1852 pass
181c8655 1853 os.rename(tf.name, fn)
70a1165b 1854 except Exception:
181c8655
PH
1855 try:
1856 os.remove(tf.name)
1857 except OSError:
1858 pass
1859 raise
1860
1861
1862if sys.version_info >= (2, 7):
ee114368 1863 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 1864 """ Find the xpath xpath[@key=val] """
5d2354f1 1865 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368 1866 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
1867 return node.find(expr)
1868else:
ee114368 1869 def find_xpath_attr(node, xpath, key, val=None):
810c10ba 1870 for f in node.findall(compat_xpath(xpath)):
ee114368
S
1871 if key not in f.attrib:
1872 continue
1873 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
1874 return f
1875 return None
1876
d7e66d39
JMF
1877# On python2.6 the xml.etree.ElementTree.Element methods don't support
1878# the namespace parameter
5f6a1245
JW
1879
1880
d7e66d39
JMF
1881def xpath_with_ns(path, ns_map):
1882 components = [c.split(':') for c in path.split('/')]
1883 replaced = []
1884 for c in components:
1885 if len(c) == 1:
1886 replaced.append(c[0])
1887 else:
1888 ns, tag = c
1889 replaced.append('{%s}%s' % (ns_map[ns], tag))
1890 return '/'.join(replaced)
1891
d77c3dfd 1892
a41fb80c 1893def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745 1894 def _find_xpath(xpath):
810c10ba 1895 return node.find(compat_xpath(xpath))
578c0745
S
1896
1897 if isinstance(xpath, (str, compat_str)):
1898 n = _find_xpath(xpath)
1899 else:
1900 for xp in xpath:
1901 n = _find_xpath(xp)
1902 if n is not None:
1903 break
d74bebd5 1904
8e636da4 1905 if n is None:
bf42a990
S
1906 if default is not NO_DEFAULT:
1907 return default
1908 elif fatal:
bf0ff932
PH
1909 name = xpath if name is None else name
1910 raise ExtractorError('Could not find XML element %s' % name)
1911 else:
1912 return None
a41fb80c
S
1913 return n
1914
1915
1916def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
1917 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1918 if n is None or n == default:
1919 return n
1920 if n.text is None:
1921 if default is not NO_DEFAULT:
1922 return default
1923 elif fatal:
1924 name = xpath if name is None else name
1925 raise ExtractorError('Could not find XML element\'s text %s' % name)
1926 else:
1927 return None
1928 return n.text
a41fb80c
S
1929
1930
1931def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1932 n = find_xpath_attr(node, xpath, key)
1933 if n is None:
1934 if default is not NO_DEFAULT:
1935 return default
1936 elif fatal:
1937 name = '%s[@%s]' % (xpath, key) if name is None else name
1938 raise ExtractorError('Could not find XML attribute %s' % name)
1939 else:
1940 return None
1941 return n.attrib[key]
bf0ff932
PH
1942
1943
9e6dd238 1944def get_element_by_id(id, html):
43e8fafd 1945 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 1946 return get_element_by_attribute('id', id, html)
43e8fafd 1947
12ea2f30 1948
84c237fb 1949def get_element_by_class(class_name, html):
2af12ad9
TC
1950 """Return the content of the first tag with the specified class in the passed HTML document"""
1951 retval = get_elements_by_class(class_name, html)
1952 return retval[0] if retval else None
1953
1954
1955def get_element_by_attribute(attribute, value, html, escape_value=True):
1956 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1957 return retval[0] if retval else None
1958
1959
1960def get_elements_by_class(class_name, html):
1961 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1962 return get_elements_by_attribute(
84c237fb
YCH
1963 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1964 html, escape_value=False)
1965
1966
2af12ad9 1967def get_elements_by_attribute(attribute, value, html, escape_value=True):
43e8fafd 1968 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 1969
84c237fb
YCH
1970 value = re.escape(value) if escape_value else value
1971
2af12ad9
TC
1972 retlist = []
1973 for m in re.finditer(r'''(?xs)
38285056 1974 <([a-zA-Z0-9:._-]+)
609ff8ca 1975 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056 1976 \s+%s=['"]?%s['"]?
609ff8ca 1977 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056
PH
1978 \s*>
1979 (?P<content>.*?)
1980 </\1>
2af12ad9
TC
1981 ''' % (re.escape(attribute), value), html):
1982 res = m.group('content')
38285056 1983
2af12ad9
TC
1984 if res.startswith('"') or res.startswith("'"):
1985 res = res[1:-1]
38285056 1986
2af12ad9 1987 retlist.append(unescapeHTML(res))
a921f407 1988
2af12ad9 1989 return retlist
a921f407 1990
c5229f39 1991
8bb56eee
BF
1992class HTMLAttributeParser(compat_HTMLParser):
1993 """Trivial HTML parser to gather the attributes for a single element"""
b6e0c7d2 1994
8bb56eee 1995 def __init__(self):
c5229f39 1996 self.attrs = {}
8bb56eee
BF
1997 compat_HTMLParser.__init__(self)
1998
1999 def handle_starttag(self, tag, attrs):
2000 self.attrs = dict(attrs)
2001
c5229f39 2002
8bb56eee
BF
2003def extract_attributes(html_element):
2004 """Given a string for an HTML element such as
2005 <el
2006 a="foo" B="bar" c="&98;az" d=boz
2007 empty= noval entity="&amp;"
2008 sq='"' dq="'"
2009 >
2010 Decode and return a dictionary of attributes.
2011 {
2012 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2013 'empty': '', 'noval': None, 'entity': '&',
2014 'sq': '"', 'dq': '\''
2015 }.
2016 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2017 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2018 """
2019 parser = HTMLAttributeParser()
b4a3d461
S
2020 try:
2021 parser.feed(html_element)
2022 parser.close()
2023 # Older Python may throw HTMLParseError in case of malformed HTML
2024 except compat_HTMLParseError:
2025 pass
8bb56eee 2026 return parser.attrs
9e6dd238 2027
c5229f39 2028
9e6dd238 2029def clean_html(html):
59ae15a5 2030 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
2031
2032 if html is None: # Convenience for sanitizing descriptions etc.
2033 return html
2034
59ae15a5
PH
2035 # Newline vs <br />
2036 html = html.replace('\n', ' ')
edd9221c
TF
2037 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2038 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
2039 # Strip html tags
2040 html = re.sub('<.*?>', '', html)
2041 # Replace html entities
2042 html = unescapeHTML(html)
7decf895 2043 return html.strip()
9e6dd238
FV
2044
2045
d77c3dfd 2046def sanitize_open(filename, open_mode):
59ae15a5
PH
2047 """Try to open the given filename, and slightly tweak it if this fails.
2048
2049 Attempts to open the given filename. If this fails, it tries to change
2050 the filename slightly, step by step, until it's either able to open it
2051 or it fails and raises a final exception, like the standard open()
2052 function.
2053
2054 It returns the tuple (stream, definitive_file_name).
2055 """
2056 try:
28e614de 2057 if filename == '-':
59ae15a5
PH
2058 if sys.platform == 'win32':
2059 import msvcrt
2060 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 2061 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
2062 stream = open(encodeFilename(filename), open_mode)
2063 return (stream, filename)
2064 except (IOError, OSError) as err:
f45c185f
PH
2065 if err.errno in (errno.EACCES,):
2066 raise
59ae15a5 2067
f45c185f 2068 # In case of error, try to remove win32 forbidden chars
d55de57b 2069 alt_filename = sanitize_path(filename)
f45c185f
PH
2070 if alt_filename == filename:
2071 raise
2072 else:
2073 # An exception here should be caught in the caller
d55de57b 2074 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 2075 return (stream, alt_filename)
d77c3dfd
FV
2076
2077
2078def timeconvert(timestr):
59ae15a5
PH
2079 """Convert RFC 2822 defined time string into system timestamp"""
2080 timestamp = None
2081 timetuple = email.utils.parsedate_tz(timestr)
2082 if timetuple is not None:
2083 timestamp = email.utils.mktime_tz(timetuple)
2084 return timestamp
1c469a94 2085
5f6a1245 2086
796173d0 2087def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
2088 """Sanitizes a string so it could be used as part of a filename.
2089 If restricted is set, use a stricter subset of allowed characters.
158af524
S
2090 Set is_id if this is not an arbitrary string, but an ID that should be kept
2091 if possible.
59ae15a5
PH
2092 """
2093 def replace_insane(char):
c587cbb7
AT
2094 if restricted and char in ACCENT_CHARS:
2095 return ACCENT_CHARS[char]
59ae15a5
PH
2096 if char == '?' or ord(char) < 32 or ord(char) == 127:
2097 return ''
2098 elif char == '"':
2099 return '' if restricted else '\''
2100 elif char == ':':
2101 return '_-' if restricted else ' -'
2102 elif char in '\\/|*<>':
2103 return '_'
627dcfff 2104 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
2105 return '_'
2106 if restricted and ord(char) > 127:
2107 return '_'
2108 return char
2109
2aeb06d6
PH
2110 # Handle timestamps
2111 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 2112 result = ''.join(map(replace_insane, s))
796173d0
PH
2113 if not is_id:
2114 while '__' in result:
2115 result = result.replace('__', '_')
2116 result = result.strip('_')
2117 # Common case of "Foreign band name - English song title"
2118 if restricted and result.startswith('-_'):
2119 result = result[2:]
5a42414b
PH
2120 if result.startswith('-'):
2121 result = '_' + result[len('-'):]
a7440261 2122 result = result.lstrip('.')
796173d0
PH
2123 if not result:
2124 result = '_'
59ae15a5 2125 return result
d77c3dfd 2126
5f6a1245 2127
c2934512 2128def sanitize_path(s, force=False):
a2aaf4db 2129 """Sanitizes and normalizes path on Windows"""
c2934512 2130 if sys.platform == 'win32':
c4218ac3 2131 force = False
c2934512 2132 drive_or_unc, _ = os.path.splitdrive(s)
2133 if sys.version_info < (2, 7) and not drive_or_unc:
2134 drive_or_unc, _ = os.path.splitunc(s)
2135 elif force:
2136 drive_or_unc = ''
2137 else:
a2aaf4db 2138 return s
c2934512 2139
be531ef1
S
2140 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2141 if drive_or_unc:
a2aaf4db
S
2142 norm_path.pop(0)
2143 sanitized_path = [
ec85ded8 2144 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
a2aaf4db 2145 for path_part in norm_path]
be531ef1
S
2146 if drive_or_unc:
2147 sanitized_path.insert(0, drive_or_unc + os.path.sep)
c4218ac3 2148 elif force and s[0] == os.path.sep:
2149 sanitized_path.insert(0, os.path.sep)
a2aaf4db
S
2150 return os.path.join(*sanitized_path)
2151
2152
17bcc626 2153def sanitize_url(url):
befa4708
S
2154 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2155 # the number of unwanted failures due to missing protocol
2156 if url.startswith('//'):
2157 return 'http:%s' % url
2158 # Fix some common typos seen so far
2159 COMMON_TYPOS = (
067aa17e 2160 # https://github.com/ytdl-org/youtube-dl/issues/15649
befa4708
S
2161 (r'^httpss://', r'https://'),
2162 # https://bx1.be/lives/direct-tv/
2163 (r'^rmtp([es]?)://', r'rtmp\1://'),
2164 )
2165 for mistake, fixup in COMMON_TYPOS:
2166 if re.match(mistake, url):
2167 return re.sub(mistake, fixup, url)
bc6b9bcd 2168 return url
17bcc626
S
2169
2170
5435dcf9
HH
2171def extract_basic_auth(url):
2172 parts = compat_urlparse.urlsplit(url)
2173 if parts.username is None:
2174 return url, None
2175 url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2176 parts.hostname if parts.port is None
2177 else '%s:%d' % (parts.hostname, parts.port))))
2178 auth_payload = base64.b64encode(
2179 ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2180 return url, 'Basic ' + auth_payload.decode('utf-8')
2181
2182
67dda517 2183def sanitized_Request(url, *args, **kwargs):
bc6b9bcd 2184 url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
5435dcf9
HH
2185 if auth_header is not None:
2186 headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2187 headers['Authorization'] = auth_header
2188 return compat_urllib_request.Request(url, *args, **kwargs)
67dda517
S
2189
2190
51098426
S
2191def expand_path(s):
2192 """Expand shell variables and ~"""
2193 return os.path.expandvars(compat_expanduser(s))
2194
2195
d77c3dfd 2196def orderedSet(iterable):
59ae15a5
PH
2197 """ Remove all duplicates from the input iterable """
2198 res = []
2199 for el in iterable:
2200 if el not in res:
2201 res.append(el)
2202 return res
d77c3dfd 2203
912b38b4 2204
55b2f099 2205def _htmlentity_transform(entity_with_semicolon):
4e408e47 2206 """Transforms an HTML entity to a character."""
55b2f099
YCH
2207 entity = entity_with_semicolon[:-1]
2208
4e408e47
PH
2209 # Known non-numeric HTML entity
2210 if entity in compat_html_entities.name2codepoint:
2211 return compat_chr(compat_html_entities.name2codepoint[entity])
2212
55b2f099
YCH
2213 # TODO: HTML5 allows entities without a semicolon. For example,
2214 # '&Eacuteric' should be decoded as 'Éric'.
2215 if entity_with_semicolon in compat_html_entities_html5:
2216 return compat_html_entities_html5[entity_with_semicolon]
2217
91757b0f 2218 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
2219 if mobj is not None:
2220 numstr = mobj.group(1)
28e614de 2221 if numstr.startswith('x'):
4e408e47 2222 base = 16
28e614de 2223 numstr = '0%s' % numstr
4e408e47
PH
2224 else:
2225 base = 10
067aa17e 2226 # See https://github.com/ytdl-org/youtube-dl/issues/7518
7aefc49c
S
2227 try:
2228 return compat_chr(int(numstr, base))
2229 except ValueError:
2230 pass
4e408e47
PH
2231
2232 # Unknown entity in name, return its literal representation
7a3f0c00 2233 return '&%s;' % entity
4e408e47
PH
2234
2235
d77c3dfd 2236def unescapeHTML(s):
912b38b4
PH
2237 if s is None:
2238 return None
2239 assert type(s) == compat_str
d77c3dfd 2240
4e408e47 2241 return re.sub(
95f3f7c2 2242 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 2243
8bf48f23 2244
f5b1bca9 2245def process_communicate_or_kill(p, *args, **kwargs):
2246 try:
2247 return p.communicate(*args, **kwargs)
2248 except BaseException: # Including KeyboardInterrupt
2249 p.kill()
2250 p.wait()
2251 raise
2252
2253
aa49acd1
S
2254def get_subprocess_encoding():
2255 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2256 # For subprocess calls, encode with locale encoding
2257 # Refer to http://stackoverflow.com/a/9951851/35070
2258 encoding = preferredencoding()
2259 else:
2260 encoding = sys.getfilesystemencoding()
2261 if encoding is None:
2262 encoding = 'utf-8'
2263 return encoding
2264
2265
8bf48f23 2266def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
2267 """
2268 @param s The name of the file
2269 """
d77c3dfd 2270
8bf48f23 2271 assert type(s) == compat_str
d77c3dfd 2272
59ae15a5
PH
2273 # Python 3 has a Unicode API
2274 if sys.version_info >= (3, 0):
2275 return s
0f00efed 2276
aa49acd1
S
2277 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2278 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2279 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2280 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2281 return s
2282
8ee239e9
YCH
2283 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2284 if sys.platform.startswith('java'):
2285 return s
2286
aa49acd1
S
2287 return s.encode(get_subprocess_encoding(), 'ignore')
2288
2289
2290def decodeFilename(b, for_subprocess=False):
2291
2292 if sys.version_info >= (3, 0):
2293 return b
2294
2295 if not isinstance(b, bytes):
2296 return b
2297
2298 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 2299
f07b74fc
PH
2300
2301def encodeArgument(s):
2302 if not isinstance(s, compat_str):
2303 # Legacy code that uses byte strings
2304 # Uncomment the following line after fixing all post processors
7af808a5 2305 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
2306 s = s.decode('ascii')
2307 return encodeFilename(s, True)
2308
2309
aa49acd1
S
2310def decodeArgument(b):
2311 return decodeFilename(b, True)
2312
2313
8271226a
PH
2314def decodeOption(optval):
2315 if optval is None:
2316 return optval
2317 if isinstance(optval, bytes):
2318 optval = optval.decode(preferredencoding())
2319
2320 assert isinstance(optval, compat_str)
2321 return optval
1c256f70 2322
5f6a1245 2323
dbbbe555 2324def formatSeconds(secs, delim=':'):
4539dd30 2325 if secs > 3600:
dbbbe555 2326 return '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
4539dd30 2327 elif secs > 60:
dbbbe555 2328 return '%d%s%02d' % (secs // 60, delim, secs % 60)
4539dd30
PH
2329 else:
2330 return '%d' % secs
2331
a0ddb8a2 2332
be4a824d
PH
2333def make_HTTPS_handler(params, **kwargs):
2334 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 2335 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 2336 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 2337 if opts_no_check_certificate:
be5f2c19 2338 context.check_hostname = False
0db261ba 2339 context.verify_mode = ssl.CERT_NONE
a2366922 2340 try:
be4a824d 2341 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
2342 except TypeError:
2343 # Python 2.7.8
2344 # (create_default_context present but HTTPSHandler has no context=)
2345 pass
2346
2347 if sys.version_info < (3, 2):
d7932313 2348 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 2349 else: # Python < 3.4
d7932313 2350 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 2351 context.verify_mode = (ssl.CERT_NONE
dca08720 2352 if opts_no_check_certificate
ea6d901e 2353 else ssl.CERT_REQUIRED)
303b479e 2354 context.set_default_verify_paths()
be4a824d 2355 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 2356
732ea2f0 2357
5873d4cc 2358def bug_reports_message(before=';'):
08f2a92c 2359 if ytdl_is_updateable():
7a5c1cfe 2360 update_cmd = 'type yt-dlp -U to update'
08f2a92c 2361 else:
7a5c1cfe 2362 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
5873d4cc 2363 msg = 'please report this issue on https://github.com/yt-dlp/yt-dlp .'
08f2a92c 2364 msg += ' Make sure you are using the latest version; %s.' % update_cmd
7a5c1cfe 2365 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
5873d4cc
F
2366
2367 before = before.rstrip()
2368 if not before or before.endswith(('.', '!', '?')):
2369 msg = msg[0].title() + msg[1:]
2370
2371 return (before + ' ' if before else '') + msg
08f2a92c
JMF
2372
2373
bf5b9d85
PM
2374class YoutubeDLError(Exception):
2375 """Base exception for YoutubeDL errors."""
2376 pass
2377
2378
3158150c 2379network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2380if hasattr(ssl, 'CertificateError'):
2381 network_exceptions.append(ssl.CertificateError)
2382network_exceptions = tuple(network_exceptions)
2383
2384
bf5b9d85 2385class ExtractorError(YoutubeDLError):
1c256f70 2386 """Error during info extraction."""
5f6a1245 2387
d11271dd 2388 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238 2389 """ tb, if given, is the original traceback (so that it can be printed out).
7a5c1cfe 2390 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
9a82b238
PH
2391 """
2392
3158150c 2393 if sys.exc_info()[0] in network_exceptions:
9a82b238 2394 expected = True
d11271dd
PH
2395 if video_id is not None:
2396 msg = video_id + ': ' + msg
410f3e73 2397 if cause:
28e614de 2398 msg += ' (caused by %r)' % cause
9a82b238 2399 if not expected:
08f2a92c 2400 msg += bug_reports_message()
1c256f70 2401 super(ExtractorError, self).__init__(msg)
d5979c5d 2402
1c256f70 2403 self.traceback = tb
8cc83b8d 2404 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 2405 self.cause = cause
d11271dd 2406 self.video_id = video_id
1c256f70 2407
01951dda
PH
2408 def format_traceback(self):
2409 if self.traceback is None:
2410 return None
28e614de 2411 return ''.join(traceback.format_tb(self.traceback))
01951dda 2412
1c256f70 2413
416c7fcb
PH
2414class UnsupportedError(ExtractorError):
2415 def __init__(self, url):
2416 super(UnsupportedError, self).__init__(
2417 'Unsupported URL: %s' % url, expected=True)
2418 self.url = url
2419
2420
55b3e45b
JMF
2421class RegexNotFoundError(ExtractorError):
2422 """Error when a regex didn't match"""
2423 pass
2424
2425
773f291d
S
2426class GeoRestrictedError(ExtractorError):
2427 """Geographic restriction Error exception.
2428
2429 This exception may be thrown when a video is not available from your
2430 geographic location due to geographic restrictions imposed by a website.
2431 """
b6e0c7d2 2432
773f291d
S
2433 def __init__(self, msg, countries=None):
2434 super(GeoRestrictedError, self).__init__(msg, expected=True)
2435 self.msg = msg
2436 self.countries = countries
2437
2438
bf5b9d85 2439class DownloadError(YoutubeDLError):
59ae15a5 2440 """Download Error exception.
d77c3dfd 2441
59ae15a5
PH
2442 This exception may be thrown by FileDownloader objects if they are not
2443 configured to continue on errors. They will contain the appropriate
2444 error message.
2445 """
5f6a1245 2446
8cc83b8d
FV
2447 def __init__(self, msg, exc_info=None):
2448 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2449 super(DownloadError, self).__init__(msg)
2450 self.exc_info = exc_info
d77c3dfd
FV
2451
2452
498f5606 2453class EntryNotInPlaylist(YoutubeDLError):
2454 """Entry not in playlist exception.
2455
2456 This exception will be thrown by YoutubeDL when a requested entry
2457 is not found in the playlist info_dict
2458 """
2459 pass
2460
2461
bf5b9d85 2462class SameFileError(YoutubeDLError):
59ae15a5 2463 """Same File exception.
d77c3dfd 2464
59ae15a5
PH
2465 This exception will be thrown by FileDownloader objects if they detect
2466 multiple files would have to be downloaded to the same file on disk.
2467 """
2468 pass
d77c3dfd
FV
2469
2470
bf5b9d85 2471class PostProcessingError(YoutubeDLError):
59ae15a5 2472 """Post Processing exception.
d77c3dfd 2473
59ae15a5
PH
2474 This exception may be raised by PostProcessor's .run() method to
2475 indicate an error in the postprocessing task.
2476 """
5f6a1245 2477
7851b379 2478 def __init__(self, msg):
bf5b9d85 2479 super(PostProcessingError, self).__init__(msg)
7851b379 2480 self.msg = msg
d77c3dfd 2481
5f6a1245 2482
8b0d7497 2483class ExistingVideoReached(YoutubeDLError):
2484 """ --max-downloads limit has been reached. """
2485 pass
2486
2487
2488class RejectedVideoReached(YoutubeDLError):
2489 """ --max-downloads limit has been reached. """
2490 pass
2491
2492
bf5b9d85 2493class MaxDownloadsReached(YoutubeDLError):
59ae15a5
PH
2494 """ --max-downloads limit has been reached. """
2495 pass
d77c3dfd
FV
2496
2497
bf5b9d85 2498class UnavailableVideoError(YoutubeDLError):
59ae15a5 2499 """Unavailable Format exception.
d77c3dfd 2500
59ae15a5
PH
2501 This exception will be thrown when a video is requested
2502 in a format that is not available for that video.
2503 """
2504 pass
d77c3dfd
FV
2505
2506
bf5b9d85 2507class ContentTooShortError(YoutubeDLError):
59ae15a5 2508 """Content Too Short exception.
d77c3dfd 2509
59ae15a5
PH
2510 This exception may be raised by FileDownloader objects when a file they
2511 download is too small for what the server announced first, indicating
2512 the connection was probably interrupted.
2513 """
d77c3dfd 2514
59ae15a5 2515 def __init__(self, downloaded, expected):
bf5b9d85
PM
2516 super(ContentTooShortError, self).__init__(
2517 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2518 )
2c7ed247 2519 # Both in bytes
59ae15a5
PH
2520 self.downloaded = downloaded
2521 self.expected = expected
d77c3dfd 2522
5f6a1245 2523
bf5b9d85 2524class XAttrMetadataError(YoutubeDLError):
efa97bdc
YCH
2525 def __init__(self, code=None, msg='Unknown error'):
2526 super(XAttrMetadataError, self).__init__(msg)
2527 self.code = code
bd264412 2528 self.msg = msg
efa97bdc
YCH
2529
2530 # Parsing code and msg
3089bc74 2531 if (self.code in (errno.ENOSPC, errno.EDQUOT)
a0566bbf 2532 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
efa97bdc
YCH
2533 self.reason = 'NO_SPACE'
2534 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2535 self.reason = 'VALUE_TOO_LONG'
2536 else:
2537 self.reason = 'NOT_SUPPORTED'
2538
2539
bf5b9d85 2540class XAttrUnavailableError(YoutubeDLError):
efa97bdc
YCH
2541 pass
2542
2543
c5a59d93 2544def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
2545 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2546 # expected HTTP responses to meet HTTP/1.0 or later (see also
067aa17e 2547 # https://github.com/ytdl-org/youtube-dl/issues/6727)
e5e78797 2548 if sys.version_info < (3, 0):
65220c3b
S
2549 kwargs['strict'] = True
2550 hc = http_class(*args, **compat_kwargs(kwargs))
be4a824d 2551 source_address = ydl_handler._params.get('source_address')
8959018a 2552
be4a824d 2553 if source_address is not None:
8959018a
AU
2554 # This is to workaround _create_connection() from socket where it will try all
2555 # address data from getaddrinfo() including IPv6. This filters the result from
2556 # getaddrinfo() based on the source_address value.
2557 # This is based on the cpython socket.create_connection() function.
2558 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2559 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2560 host, port = address
2561 err = None
2562 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
9e21e6d9
S
2563 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2564 ip_addrs = [addr for addr in addrs if addr[0] == af]
2565 if addrs and not ip_addrs:
2566 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2567 raise socket.error(
2568 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2569 % (ip_version, source_address[0]))
8959018a
AU
2570 for res in ip_addrs:
2571 af, socktype, proto, canonname, sa = res
2572 sock = None
2573 try:
2574 sock = socket.socket(af, socktype, proto)
2575 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2576 sock.settimeout(timeout)
2577 sock.bind(source_address)
2578 sock.connect(sa)
2579 err = None # Explicitly break reference cycle
2580 return sock
2581 except socket.error as _:
2582 err = _
2583 if sock is not None:
2584 sock.close()
2585 if err is not None:
2586 raise err
2587 else:
9e21e6d9
S
2588 raise socket.error('getaddrinfo returns an empty list')
2589 if hasattr(hc, '_create_connection'):
2590 hc._create_connection = _create_connection
be4a824d
PH
2591 sa = (source_address, 0)
2592 if hasattr(hc, 'source_address'): # Python 2.7+
2593 hc.source_address = sa
2594 else: # Python 2.6
2595 def _hc_connect(self, *args, **kwargs):
9e21e6d9 2596 sock = _create_connection(
be4a824d
PH
2597 (self.host, self.port), self.timeout, sa)
2598 if is_https:
d7932313
PH
2599 self.sock = ssl.wrap_socket(
2600 sock, self.key_file, self.cert_file,
2601 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
2602 else:
2603 self.sock = sock
2604 hc.connect = functools.partial(_hc_connect, hc)
2605
2606 return hc
2607
2608
87f0e62d 2609def handle_youtubedl_headers(headers):
992fc9d6
YCH
2610 filtered_headers = headers
2611
2612 if 'Youtubedl-no-compression' in filtered_headers:
2613 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 2614 del filtered_headers['Youtubedl-no-compression']
87f0e62d 2615
992fc9d6 2616 return filtered_headers
87f0e62d
YCH
2617
2618
acebc9cd 2619class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
2620 """Handler for HTTP requests and responses.
2621
2622 This class, when installed with an OpenerDirector, automatically adds
2623 the standard headers to every HTTP request and handles gzipped and
2624 deflated responses from web servers. If compression is to be avoided in
2625 a particular request, the original request in the program code only has
0424ec30 2626 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
2627 removed before making the real request.
2628
2629 Part of this code was copied from:
2630
2631 http://techknack.net/python-urllib2-handlers/
2632
2633 Andrew Rowls, the author of that code, agreed to release it to the
2634 public domain.
2635 """
2636
be4a824d
PH
2637 def __init__(self, params, *args, **kwargs):
2638 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2639 self._params = params
2640
2641 def http_open(self, req):
71aff188
YCH
2642 conn_class = compat_http_client.HTTPConnection
2643
2644 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2645 if socks_proxy:
2646 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2647 del req.headers['Ytdl-socks-proxy']
2648
be4a824d 2649 return self.do_open(functools.partial(
71aff188 2650 _create_http_connection, self, conn_class, False),
be4a824d
PH
2651 req)
2652
59ae15a5
PH
2653 @staticmethod
2654 def deflate(data):
fc2119f2 2655 if not data:
2656 return data
59ae15a5
PH
2657 try:
2658 return zlib.decompress(data, -zlib.MAX_WBITS)
2659 except zlib.error:
2660 return zlib.decompress(data)
2661
acebc9cd 2662 def http_request(self, req):
51f267d9
S
2663 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2664 # always respected by websites, some tend to give out URLs with non percent-encoded
2665 # non-ASCII characters (see telemb.py, ard.py [#3412])
2666 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2667 # To work around aforementioned issue we will replace request's original URL with
2668 # percent-encoded one
2669 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2670 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2671 url = req.get_full_url()
2672 url_escaped = escape_url(url)
2673
2674 # Substitute URL if any change after escaping
2675 if url != url_escaped:
15d260eb 2676 req = update_Request(req, url=url_escaped)
51f267d9 2677
33ac271b 2678 for h, v in std_headers.items():
3d5f7a39
JK
2679 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2680 # The dict keys are capitalized because of this bug by urllib
2681 if h.capitalize() not in req.headers:
33ac271b 2682 req.add_header(h, v)
87f0e62d
YCH
2683
2684 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
2685
2686 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2687 # Python 2.6 is brain-dead when it comes to fragments
2688 req._Request__original = req._Request__original.partition('#')[0]
2689 req._Request__r_type = req._Request__r_type.partition('#')[0]
2690
59ae15a5
PH
2691 return req
2692
acebc9cd 2693 def http_response(self, req, resp):
59ae15a5
PH
2694 old_resp = resp
2695 # gzip
2696 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
2697 content = resp.read()
2698 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2699 try:
2700 uncompressed = io.BytesIO(gz.read())
2701 except IOError as original_ioerror:
2702 # There may be junk add the end of the file
2703 # See http://stackoverflow.com/q/4928560/35070 for details
2704 for i in range(1, 1024):
2705 try:
2706 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2707 uncompressed = io.BytesIO(gz.read())
2708 except IOError:
2709 continue
2710 break
2711 else:
2712 raise original_ioerror
b407d853 2713 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2714 resp.msg = old_resp.msg
c047270c 2715 del resp.headers['Content-encoding']
59ae15a5
PH
2716 # deflate
2717 if resp.headers.get('Content-encoding', '') == 'deflate':
2718 gz = io.BytesIO(self.deflate(resp.read()))
b407d853 2719 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2720 resp.msg = old_resp.msg
c047270c 2721 del resp.headers['Content-encoding']
ad729172 2722 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
067aa17e 2723 # https://github.com/ytdl-org/youtube-dl/issues/6457).
5a4d9ddb
S
2724 if 300 <= resp.code < 400:
2725 location = resp.headers.get('Location')
2726 if location:
2727 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2728 if sys.version_info >= (3, 0):
2729 location = location.encode('iso-8859-1').decode('utf-8')
0ea59007
YCH
2730 else:
2731 location = location.decode('utf-8')
5a4d9ddb
S
2732 location_escaped = escape_url(location)
2733 if location != location_escaped:
2734 del resp.headers['Location']
9a4aec8b
YCH
2735 if sys.version_info < (3, 0):
2736 location_escaped = location_escaped.encode('utf-8')
5a4d9ddb 2737 resp.headers['Location'] = location_escaped
59ae15a5 2738 return resp
0f8d03f8 2739
acebc9cd
PH
2740 https_request = http_request
2741 https_response = http_response
bf50b038 2742
5de90176 2743
71aff188
YCH
2744def make_socks_conn_class(base_class, socks_proxy):
2745 assert issubclass(base_class, (
2746 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2747
2748 url_components = compat_urlparse.urlparse(socks_proxy)
2749 if url_components.scheme.lower() == 'socks5':
2750 socks_type = ProxyType.SOCKS5
2751 elif url_components.scheme.lower() in ('socks', 'socks4'):
2752 socks_type = ProxyType.SOCKS4
51fb4995
YCH
2753 elif url_components.scheme.lower() == 'socks4a':
2754 socks_type = ProxyType.SOCKS4A
71aff188 2755
cdd94c2e
YCH
2756 def unquote_if_non_empty(s):
2757 if not s:
2758 return s
2759 return compat_urllib_parse_unquote_plus(s)
2760
71aff188
YCH
2761 proxy_args = (
2762 socks_type,
2763 url_components.hostname, url_components.port or 1080,
2764 True, # Remote DNS
cdd94c2e
YCH
2765 unquote_if_non_empty(url_components.username),
2766 unquote_if_non_empty(url_components.password),
71aff188
YCH
2767 )
2768
2769 class SocksConnection(base_class):
2770 def connect(self):
2771 self.sock = sockssocket()
2772 self.sock.setproxy(*proxy_args)
2773 if type(self.timeout) in (int, float):
2774 self.sock.settimeout(self.timeout)
2775 self.sock.connect((self.host, self.port))
2776
2777 if isinstance(self, compat_http_client.HTTPSConnection):
2778 if hasattr(self, '_context'): # Python > 2.6
2779 self.sock = self._context.wrap_socket(
2780 self.sock, server_hostname=self.host)
2781 else:
2782 self.sock = ssl.wrap_socket(self.sock)
2783
2784 return SocksConnection
2785
2786
be4a824d
PH
2787class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2788 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2789 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2790 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2791 self._params = params
2792
2793 def https_open(self, req):
4f264c02 2794 kwargs = {}
71aff188
YCH
2795 conn_class = self._https_conn_class
2796
4f264c02
JMF
2797 if hasattr(self, '_context'): # python > 2.6
2798 kwargs['context'] = self._context
2799 if hasattr(self, '_check_hostname'): # python 3.x
2800 kwargs['check_hostname'] = self._check_hostname
71aff188
YCH
2801
2802 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2803 if socks_proxy:
2804 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2805 del req.headers['Ytdl-socks-proxy']
2806
be4a824d 2807 return self.do_open(functools.partial(
71aff188 2808 _create_http_connection, self, conn_class, True),
4f264c02 2809 req, **kwargs)
be4a824d
PH
2810
2811
1bab3437 2812class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
f1a8511f
S
2813 """
2814 See [1] for cookie file format.
2815
2816 1. https://curl.haxx.se/docs/http-cookies.html
2817 """
e7e62441 2818 _HTTPONLY_PREFIX = '#HttpOnly_'
c380cc28
S
2819 _ENTRY_LEN = 7
2820 _HEADER = '''# Netscape HTTP Cookie File
7a5c1cfe 2821# This file is generated by yt-dlp. Do not edit.
c380cc28
S
2822
2823'''
2824 _CookieFileEntry = collections.namedtuple(
2825 'CookieFileEntry',
2826 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
e7e62441 2827
1bab3437 2828 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
c380cc28
S
2829 """
2830 Save cookies to a file.
2831
2832 Most of the code is taken from CPython 3.8 and slightly adapted
2833 to support cookie files with UTF-8 in both python 2 and 3.
2834 """
2835 if filename is None:
2836 if self.filename is not None:
2837 filename = self.filename
2838 else:
2839 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2840
1bab3437
S
2841 # Store session cookies with `expires` set to 0 instead of an empty
2842 # string
2843 for cookie in self:
2844 if cookie.expires is None:
2845 cookie.expires = 0
c380cc28
S
2846
2847 with io.open(filename, 'w', encoding='utf-8') as f:
2848 f.write(self._HEADER)
2849 now = time.time()
2850 for cookie in self:
2851 if not ignore_discard and cookie.discard:
2852 continue
2853 if not ignore_expires and cookie.is_expired(now):
2854 continue
2855 if cookie.secure:
2856 secure = 'TRUE'
2857 else:
2858 secure = 'FALSE'
2859 if cookie.domain.startswith('.'):
2860 initial_dot = 'TRUE'
2861 else:
2862 initial_dot = 'FALSE'
2863 if cookie.expires is not None:
2864 expires = compat_str(cookie.expires)
2865 else:
2866 expires = ''
2867 if cookie.value is None:
2868 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2869 # with no name, whereas http.cookiejar regards it as a
2870 # cookie with no value.
2871 name = ''
2872 value = cookie.name
2873 else:
2874 name = cookie.name
2875 value = cookie.value
2876 f.write(
2877 '\t'.join([cookie.domain, initial_dot, cookie.path,
2878 secure, expires, name, value]) + '\n')
1bab3437
S
2879
2880 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
e7e62441 2881 """Load cookies from a file."""
2882 if filename is None:
2883 if self.filename is not None:
2884 filename = self.filename
2885 else:
2886 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2887
c380cc28
S
2888 def prepare_line(line):
2889 if line.startswith(self._HTTPONLY_PREFIX):
2890 line = line[len(self._HTTPONLY_PREFIX):]
2891 # comments and empty lines are fine
2892 if line.startswith('#') or not line.strip():
2893 return line
2894 cookie_list = line.split('\t')
2895 if len(cookie_list) != self._ENTRY_LEN:
2896 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2897 cookie = self._CookieFileEntry(*cookie_list)
2898 if cookie.expires_at and not cookie.expires_at.isdigit():
2899 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2900 return line
2901
e7e62441 2902 cf = io.StringIO()
c380cc28 2903 with io.open(filename, encoding='utf-8') as f:
e7e62441 2904 for line in f:
c380cc28
S
2905 try:
2906 cf.write(prepare_line(line))
2907 except compat_cookiejar.LoadError as e:
2908 write_string(
2909 'WARNING: skipping cookie file entry due to %s: %r\n'
2910 % (e, line), sys.stderr)
2911 continue
e7e62441 2912 cf.seek(0)
2913 self._really_load(cf, filename, ignore_discard, ignore_expires)
1bab3437
S
2914 # Session cookies are denoted by either `expires` field set to
2915 # an empty string or 0. MozillaCookieJar only recognizes the former
2916 # (see [1]). So we need force the latter to be recognized as session
2917 # cookies on our own.
2918 # Session cookies may be important for cookies-based authentication,
2919 # e.g. usually, when user does not check 'Remember me' check box while
2920 # logging in on a site, some important cookies are stored as session
2921 # cookies so that not recognizing them will result in failed login.
2922 # 1. https://bugs.python.org/issue17164
2923 for cookie in self:
2924 # Treat `expires=0` cookies as session cookies
2925 if cookie.expires == 0:
2926 cookie.expires = None
2927 cookie.discard = True
2928
2929
a6420bf5
S
2930class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2931 def __init__(self, cookiejar=None):
2932 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2933
2934 def http_response(self, request, response):
2935 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2936 # characters in Set-Cookie HTTP header of last response (see
067aa17e 2937 # https://github.com/ytdl-org/youtube-dl/issues/6769).
a6420bf5
S
2938 # In order to at least prevent crashing we will percent encode Set-Cookie
2939 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
2940 # if sys.version_info < (3, 0) and response.headers:
2941 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2942 # set_cookie = response.headers.get(set_cookie_header)
2943 # if set_cookie:
2944 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2945 # if set_cookie != set_cookie_escaped:
2946 # del response.headers[set_cookie_header]
2947 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
2948 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2949
f5fa042c 2950 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
a6420bf5
S
2951 https_response = http_response
2952
2953
fca6dba8 2954class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
201c1459 2955 """YoutubeDL redirect handler
2956
2957 The code is based on HTTPRedirectHandler implementation from CPython [1].
2958
2959 This redirect handler solves two issues:
2960 - ensures redirect URL is always unicode under python 2
2961 - introduces support for experimental HTTP response status code
2962 308 Permanent Redirect [2] used by some sites [3]
2963
2964 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
2965 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
2966 3. https://github.com/ytdl-org/youtube-dl/issues/28768
2967 """
2968
2969 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
2970
2971 def redirect_request(self, req, fp, code, msg, headers, newurl):
2972 """Return a Request or None in response to a redirect.
2973
2974 This is called by the http_error_30x methods when a
2975 redirection response is received. If a redirection should
2976 take place, return a new Request to allow http_error_30x to
2977 perform the redirect. Otherwise, raise HTTPError if no-one
2978 else should try to handle this url. Return None if you can't
2979 but another Handler might.
2980 """
2981 m = req.get_method()
2982 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
2983 or code in (301, 302, 303) and m == "POST")):
2984 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
2985 # Strictly (according to RFC 2616), 301 or 302 in response to
2986 # a POST MUST NOT cause a redirection without confirmation
2987 # from the user (of urllib.request, in this case). In practice,
2988 # essentially all clients do redirect in this case, so we do
2989 # the same.
2990
2991 # On python 2 urlh.geturl() may sometimes return redirect URL
2992 # as byte string instead of unicode. This workaround allows
2993 # to force it always return unicode.
2994 if sys.version_info[0] < 3:
2995 newurl = compat_str(newurl)
2996
2997 # Be conciliant with URIs containing a space. This is mainly
2998 # redundant with the more complete encoding done in http_error_302(),
2999 # but it is kept for compatibility with other callers.
3000 newurl = newurl.replace(' ', '%20')
3001
3002 CONTENT_HEADERS = ("content-length", "content-type")
3003 # NB: don't use dict comprehension for python 2.6 compatibility
3004 newheaders = dict((k, v) for k, v in req.headers.items()
3005 if k.lower() not in CONTENT_HEADERS)
3006 return compat_urllib_request.Request(
3007 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3008 unverifiable=True)
fca6dba8
S
3009
3010
46f59e89
S
3011def extract_timezone(date_str):
3012 m = re.search(
3013 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
3014 date_str)
3015 if not m:
3016 timezone = datetime.timedelta()
3017 else:
3018 date_str = date_str[:-len(m.group('tz'))]
3019 if not m.group('sign'):
3020 timezone = datetime.timedelta()
3021 else:
3022 sign = 1 if m.group('sign') == '+' else -1
3023 timezone = datetime.timedelta(
3024 hours=sign * int(m.group('hours')),
3025 minutes=sign * int(m.group('minutes')))
3026 return timezone, date_str
3027
3028
08b38d54 3029def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
3030 """ Return a UNIX timestamp from the given date """
3031
3032 if date_str is None:
3033 return None
3034
52c3a6e4
S
3035 date_str = re.sub(r'\.[0-9]+', '', date_str)
3036
08b38d54 3037 if timezone is None:
46f59e89
S
3038 timezone, date_str = extract_timezone(date_str)
3039
52c3a6e4
S
3040 try:
3041 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3042 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3043 return calendar.timegm(dt.timetuple())
3044 except ValueError:
3045 pass
912b38b4
PH
3046
3047
46f59e89
S
3048def date_formats(day_first=True):
3049 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3050
3051
42bdd9d0 3052def unified_strdate(date_str, day_first=True):
bf50b038 3053 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
3054
3055 if date_str is None:
3056 return None
bf50b038 3057 upload_date = None
5f6a1245 3058 # Replace commas
026fcc04 3059 date_str = date_str.replace(',', ' ')
42bdd9d0 3060 # Remove AM/PM + timezone
9bb8e0a3 3061 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89 3062 _, date_str = extract_timezone(date_str)
42bdd9d0 3063
46f59e89 3064 for expression in date_formats(day_first):
bf50b038
JMF
3065 try:
3066 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 3067 except ValueError:
bf50b038 3068 pass
42393ce2
PH
3069 if upload_date is None:
3070 timetuple = email.utils.parsedate_tz(date_str)
3071 if timetuple:
c6b9cf05
S
3072 try:
3073 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3074 except ValueError:
3075 pass
6a750402
JMF
3076 if upload_date is not None:
3077 return compat_str(upload_date)
bf50b038 3078
5f6a1245 3079
46f59e89
S
3080def unified_timestamp(date_str, day_first=True):
3081 if date_str is None:
3082 return None
3083
2ae2ffda 3084 date_str = re.sub(r'[,|]', '', date_str)
46f59e89 3085
7dc2a74e 3086 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89
S
3087 timezone, date_str = extract_timezone(date_str)
3088
3089 # Remove AM/PM + timezone
3090 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3091
deef3195
S
3092 # Remove unrecognized timezones from ISO 8601 alike timestamps
3093 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3094 if m:
3095 date_str = date_str[:-len(m.group('tz'))]
3096
f226880c
PH
3097 # Python only supports microseconds, so remove nanoseconds
3098 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3099 if m:
3100 date_str = m.group(1)
3101
46f59e89
S
3102 for expression in date_formats(day_first):
3103 try:
7dc2a74e 3104 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
46f59e89
S
3105 return calendar.timegm(dt.timetuple())
3106 except ValueError:
3107 pass
3108 timetuple = email.utils.parsedate_tz(date_str)
3109 if timetuple:
7dc2a74e 3110 return calendar.timegm(timetuple) + pm_delta * 3600
46f59e89
S
3111
3112
28e614de 3113def determine_ext(url, default_ext='unknown_video'):
85750f89 3114 if url is None or '.' not in url:
f4776371 3115 return default_ext
9cb9a5df 3116 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
3117 if re.match(r'^[A-Za-z0-9]+$', guess):
3118 return guess
a7aaa398
S
3119 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3120 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 3121 return guess.rstrip('/')
73e79f2a 3122 else:
cbdbb766 3123 return default_ext
73e79f2a 3124
5f6a1245 3125
824fa511
S
3126def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3127 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
d4051a8e 3128
5f6a1245 3129
9e62f283 3130def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
37254abc
JMF
3131 """
3132 Return a datetime object from a string in the format YYYYMMDD or
9e62f283 3133 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3134
3135 format: string date format used to return datetime object from
3136 precision: round the time portion of a datetime object.
3137 auto|microsecond|second|minute|hour|day.
3138 auto: round to the unit provided in date_str (if applicable).
3139 """
3140 auto_precision = False
3141 if precision == 'auto':
3142 auto_precision = True
3143 precision = 'microsecond'
3144 today = datetime_round(datetime.datetime.now(), precision)
f8795e10 3145 if date_str in ('now', 'today'):
37254abc 3146 return today
f8795e10
PH
3147 if date_str == 'yesterday':
3148 return today - datetime.timedelta(days=1)
9e62f283 3149 match = re.match(
3150 r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3151 date_str)
37254abc 3152 if match is not None:
9e62f283 3153 start_time = datetime_from_str(match.group('start'), precision, format)
3154 time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
37254abc 3155 unit = match.group('unit')
9e62f283 3156 if unit == 'month' or unit == 'year':
3157 new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
37254abc 3158 unit = 'day'
9e62f283 3159 else:
3160 if unit == 'week':
3161 unit = 'day'
3162 time *= 7
3163 delta = datetime.timedelta(**{unit + 's': time})
3164 new_date = start_time + delta
3165 if auto_precision:
3166 return datetime_round(new_date, unit)
3167 return new_date
3168
3169 return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3170
3171
3172def date_from_str(date_str, format='%Y%m%d'):
3173 """
3174 Return a datetime object from a string in the format YYYYMMDD or
3175 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3176
3177 format: string date format used to return datetime object from
3178 """
3179 return datetime_from_str(date_str, precision='microsecond', format=format).date()
3180
3181
3182def datetime_add_months(dt, months):
3183 """Increment/Decrement a datetime object by months."""
3184 month = dt.month + months - 1
3185 year = dt.year + month // 12
3186 month = month % 12 + 1
3187 day = min(dt.day, calendar.monthrange(year, month)[1])
3188 return dt.replace(year, month, day)
3189
3190
3191def datetime_round(dt, precision='day'):
3192 """
3193 Round a datetime object's time to a specific precision
3194 """
3195 if precision == 'microsecond':
3196 return dt
3197
3198 unit_seconds = {
3199 'day': 86400,
3200 'hour': 3600,
3201 'minute': 60,
3202 'second': 1,
3203 }
3204 roundto = lambda x, n: ((x + n / 2) // n) * n
3205 timestamp = calendar.timegm(dt.timetuple())
3206 return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
5f6a1245
JW
3207
3208
e63fc1be 3209def hyphenate_date(date_str):
3210 """
3211 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3212 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3213 if match is not None:
3214 return '-'.join(match.groups())
3215 else:
3216 return date_str
3217
5f6a1245 3218
bd558525
JMF
3219class DateRange(object):
3220 """Represents a time interval between two dates"""
5f6a1245 3221
bd558525
JMF
3222 def __init__(self, start=None, end=None):
3223 """start and end must be strings in the format accepted by date"""
3224 if start is not None:
3225 self.start = date_from_str(start)
3226 else:
3227 self.start = datetime.datetime.min.date()
3228 if end is not None:
3229 self.end = date_from_str(end)
3230 else:
3231 self.end = datetime.datetime.max.date()
37254abc 3232 if self.start > self.end:
bd558525 3233 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 3234
bd558525
JMF
3235 @classmethod
3236 def day(cls, day):
3237 """Returns a range that only contains the given day"""
5f6a1245
JW
3238 return cls(day, day)
3239
bd558525
JMF
3240 def __contains__(self, date):
3241 """Check if the date is in the range"""
37254abc
JMF
3242 if not isinstance(date, datetime.date):
3243 date = date_from_str(date)
3244 return self.start <= date <= self.end
5f6a1245 3245
bd558525 3246 def __str__(self):
5f6a1245 3247 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
3248
3249
3250def platform_name():
3251 """ Returns the platform name as a compat_str """
3252 res = platform.platform()
3253 if isinstance(res, bytes):
3254 res = res.decode(preferredencoding())
3255
3256 assert isinstance(res, compat_str)
3257 return res
c257baff
PH
3258
3259
b58ddb32
PH
3260def _windows_write_string(s, out):
3261 """ Returns True if the string was written using special methods,
3262 False if it has yet to be written out."""
3263 # Adapted from http://stackoverflow.com/a/3259271/35070
3264
3265 import ctypes
3266 import ctypes.wintypes
3267
3268 WIN_OUTPUT_IDS = {
3269 1: -11,
3270 2: -12,
3271 }
3272
a383a98a
PH
3273 try:
3274 fileno = out.fileno()
3275 except AttributeError:
3276 # If the output stream doesn't have a fileno, it's virtual
3277 return False
aa42e873
PH
3278 except io.UnsupportedOperation:
3279 # Some strange Windows pseudo files?
3280 return False
b58ddb32
PH
3281 if fileno not in WIN_OUTPUT_IDS:
3282 return False
3283
d7cd9a9e 3284 GetStdHandle = compat_ctypes_WINFUNCTYPE(
b58ddb32 3285 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
d7cd9a9e 3286 ('GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
3287 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3288
d7cd9a9e 3289 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3290 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3291 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
d7cd9a9e 3292 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
3293 written = ctypes.wintypes.DWORD(0)
3294
d7cd9a9e 3295 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
3296 FILE_TYPE_CHAR = 0x0002
3297 FILE_TYPE_REMOTE = 0x8000
d7cd9a9e 3298 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3299 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3300 ctypes.POINTER(ctypes.wintypes.DWORD))(
d7cd9a9e 3301 ('GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
3302 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3303
3304 def not_a_console(handle):
3305 if handle == INVALID_HANDLE_VALUE or handle is None:
3306 return True
3089bc74
S
3307 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3308 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
3309
3310 if not_a_console(h):
3311 return False
3312
d1b9c912
PH
3313 def next_nonbmp_pos(s):
3314 try:
3315 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3316 except StopIteration:
3317 return len(s)
3318
3319 while s:
3320 count = min(next_nonbmp_pos(s), 1024)
3321
b58ddb32 3322 ret = WriteConsoleW(
d1b9c912 3323 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
3324 if ret == 0:
3325 raise OSError('Failed to write string')
d1b9c912
PH
3326 if not count: # We just wrote a non-BMP character
3327 assert written.value == 2
3328 s = s[1:]
3329 else:
3330 assert written.value > 0
3331 s = s[written.value:]
b58ddb32
PH
3332 return True
3333
3334
734f90bb 3335def write_string(s, out=None, encoding=None):
7459e3a2
PH
3336 if out is None:
3337 out = sys.stderr
8bf48f23 3338 assert type(s) == compat_str
7459e3a2 3339
b58ddb32
PH
3340 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3341 if _windows_write_string(s, out):
3342 return
3343
3089bc74
S
3344 if ('b' in getattr(out, 'mode', '')
3345 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
3346 byt = s.encode(encoding or preferredencoding(), 'ignore')
3347 out.write(byt)
3348 elif hasattr(out, 'buffer'):
3349 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3350 byt = s.encode(enc, 'ignore')
3351 out.buffer.write(byt)
3352 else:
8bf48f23 3353 out.write(s)
7459e3a2
PH
3354 out.flush()
3355
3356
48ea9cea
PH
3357def bytes_to_intlist(bs):
3358 if not bs:
3359 return []
3360 if isinstance(bs[0], int): # Python 3
3361 return list(bs)
3362 else:
3363 return [ord(c) for c in bs]
3364
c257baff 3365
cba892fa 3366def intlist_to_bytes(xs):
3367 if not xs:
3368 return b''
edaa23f8 3369 return compat_struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
3370
3371
c1c9a79c
PH
3372# Cross-platform file locking
3373if sys.platform == 'win32':
3374 import ctypes.wintypes
3375 import msvcrt
3376
3377 class OVERLAPPED(ctypes.Structure):
3378 _fields_ = [
3379 ('Internal', ctypes.wintypes.LPVOID),
3380 ('InternalHigh', ctypes.wintypes.LPVOID),
3381 ('Offset', ctypes.wintypes.DWORD),
3382 ('OffsetHigh', ctypes.wintypes.DWORD),
3383 ('hEvent', ctypes.wintypes.HANDLE),
3384 ]
3385
3386 kernel32 = ctypes.windll.kernel32
3387 LockFileEx = kernel32.LockFileEx
3388 LockFileEx.argtypes = [
3389 ctypes.wintypes.HANDLE, # hFile
3390 ctypes.wintypes.DWORD, # dwFlags
3391 ctypes.wintypes.DWORD, # dwReserved
3392 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3393 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3394 ctypes.POINTER(OVERLAPPED) # Overlapped
3395 ]
3396 LockFileEx.restype = ctypes.wintypes.BOOL
3397 UnlockFileEx = kernel32.UnlockFileEx
3398 UnlockFileEx.argtypes = [
3399 ctypes.wintypes.HANDLE, # hFile
3400 ctypes.wintypes.DWORD, # dwReserved
3401 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3402 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3403 ctypes.POINTER(OVERLAPPED) # Overlapped
3404 ]
3405 UnlockFileEx.restype = ctypes.wintypes.BOOL
3406 whole_low = 0xffffffff
3407 whole_high = 0x7fffffff
3408
3409 def _lock_file(f, exclusive):
3410 overlapped = OVERLAPPED()
3411 overlapped.Offset = 0
3412 overlapped.OffsetHigh = 0
3413 overlapped.hEvent = 0
3414 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3415 handle = msvcrt.get_osfhandle(f.fileno())
3416 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3417 whole_low, whole_high, f._lock_file_overlapped_p):
3418 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3419
3420 def _unlock_file(f):
3421 assert f._lock_file_overlapped_p
3422 handle = msvcrt.get_osfhandle(f.fileno())
3423 if not UnlockFileEx(handle, 0,
3424 whole_low, whole_high, f._lock_file_overlapped_p):
3425 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3426
3427else:
399a76e6
YCH
3428 # Some platforms, such as Jython, is missing fcntl
3429 try:
3430 import fcntl
c1c9a79c 3431
399a76e6
YCH
3432 def _lock_file(f, exclusive):
3433 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 3434
399a76e6
YCH
3435 def _unlock_file(f):
3436 fcntl.flock(f, fcntl.LOCK_UN)
3437 except ImportError:
3438 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3439
3440 def _lock_file(f, exclusive):
3441 raise IOError(UNSUPPORTED_MSG)
3442
3443 def _unlock_file(f):
3444 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
3445
3446
3447class locked_file(object):
3448 def __init__(self, filename, mode, encoding=None):
3449 assert mode in ['r', 'a', 'w']
3450 self.f = io.open(filename, mode, encoding=encoding)
3451 self.mode = mode
3452
3453 def __enter__(self):
3454 exclusive = self.mode != 'r'
3455 try:
3456 _lock_file(self.f, exclusive)
3457 except IOError:
3458 self.f.close()
3459 raise
3460 return self
3461
3462 def __exit__(self, etype, value, traceback):
3463 try:
3464 _unlock_file(self.f)
3465 finally:
3466 self.f.close()
3467
3468 def __iter__(self):
3469 return iter(self.f)
3470
3471 def write(self, *args):
3472 return self.f.write(*args)
3473
3474 def read(self, *args):
3475 return self.f.read(*args)
4eb7f1d1
JMF
3476
3477
4644ac55
S
3478def get_filesystem_encoding():
3479 encoding = sys.getfilesystemencoding()
3480 return encoding if encoding is not None else 'utf-8'
3481
3482
4eb7f1d1 3483def shell_quote(args):
a6a173c2 3484 quoted_args = []
4644ac55 3485 encoding = get_filesystem_encoding()
a6a173c2
JMF
3486 for a in args:
3487 if isinstance(a, bytes):
3488 # We may get a filename encoded with 'encodeFilename'
3489 a = a.decode(encoding)
aefce8e6 3490 quoted_args.append(compat_shlex_quote(a))
28e614de 3491 return ' '.join(quoted_args)
9d4660ca
PH
3492
3493
3494def smuggle_url(url, data):
3495 """ Pass additional data in a URL for internal use. """
3496
81953d1a
RA
3497 url, idata = unsmuggle_url(url, {})
3498 data.update(idata)
15707c7e 3499 sdata = compat_urllib_parse_urlencode(
28e614de
PH
3500 {'__youtubedl_smuggle': json.dumps(data)})
3501 return url + '#' + sdata
9d4660ca
PH
3502
3503
79f82953 3504def unsmuggle_url(smug_url, default=None):
83e865a3 3505 if '#__youtubedl_smuggle' not in smug_url:
79f82953 3506 return smug_url, default
28e614de
PH
3507 url, _, sdata = smug_url.rpartition('#')
3508 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
3509 data = json.loads(jsond)
3510 return url, data
02dbf93f
PH
3511
3512
02dbf93f
PH
3513def format_bytes(bytes):
3514 if bytes is None:
28e614de 3515 return 'N/A'
02dbf93f
PH
3516 if type(bytes) is str:
3517 bytes = float(bytes)
3518 if bytes == 0.0:
3519 exponent = 0
3520 else:
3521 exponent = int(math.log(bytes, 1024.0))
28e614de 3522 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 3523 converted = float(bytes) / float(1024 ** exponent)
28e614de 3524 return '%.2f%s' % (converted, suffix)
f53c966a 3525
1c088fa8 3526
fb47597b
S
3527def lookup_unit_table(unit_table, s):
3528 units_re = '|'.join(re.escape(u) for u in unit_table)
3529 m = re.match(
782b1b5b 3530 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
fb47597b
S
3531 if not m:
3532 return None
3533 num_str = m.group('num').replace(',', '.')
3534 mult = unit_table[m.group('unit')]
3535 return int(float(num_str) * mult)
3536
3537
be64b5b0
PH
3538def parse_filesize(s):
3539 if s is None:
3540 return None
3541
dfb1b146 3542 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
3543 # but we support those too
3544 _UNIT_TABLE = {
3545 'B': 1,
3546 'b': 1,
70852b47 3547 'bytes': 1,
be64b5b0
PH
3548 'KiB': 1024,
3549 'KB': 1000,
3550 'kB': 1024,
3551 'Kb': 1000,
13585d76 3552 'kb': 1000,
70852b47
YCH
3553 'kilobytes': 1000,
3554 'kibibytes': 1024,
be64b5b0
PH
3555 'MiB': 1024 ** 2,
3556 'MB': 1000 ** 2,
3557 'mB': 1024 ** 2,
3558 'Mb': 1000 ** 2,
13585d76 3559 'mb': 1000 ** 2,
70852b47
YCH
3560 'megabytes': 1000 ** 2,
3561 'mebibytes': 1024 ** 2,
be64b5b0
PH
3562 'GiB': 1024 ** 3,
3563 'GB': 1000 ** 3,
3564 'gB': 1024 ** 3,
3565 'Gb': 1000 ** 3,
13585d76 3566 'gb': 1000 ** 3,
70852b47
YCH
3567 'gigabytes': 1000 ** 3,
3568 'gibibytes': 1024 ** 3,
be64b5b0
PH
3569 'TiB': 1024 ** 4,
3570 'TB': 1000 ** 4,
3571 'tB': 1024 ** 4,
3572 'Tb': 1000 ** 4,
13585d76 3573 'tb': 1000 ** 4,
70852b47
YCH
3574 'terabytes': 1000 ** 4,
3575 'tebibytes': 1024 ** 4,
be64b5b0
PH
3576 'PiB': 1024 ** 5,
3577 'PB': 1000 ** 5,
3578 'pB': 1024 ** 5,
3579 'Pb': 1000 ** 5,
13585d76 3580 'pb': 1000 ** 5,
70852b47
YCH
3581 'petabytes': 1000 ** 5,
3582 'pebibytes': 1024 ** 5,
be64b5b0
PH
3583 'EiB': 1024 ** 6,
3584 'EB': 1000 ** 6,
3585 'eB': 1024 ** 6,
3586 'Eb': 1000 ** 6,
13585d76 3587 'eb': 1000 ** 6,
70852b47
YCH
3588 'exabytes': 1000 ** 6,
3589 'exbibytes': 1024 ** 6,
be64b5b0
PH
3590 'ZiB': 1024 ** 7,
3591 'ZB': 1000 ** 7,
3592 'zB': 1024 ** 7,
3593 'Zb': 1000 ** 7,
13585d76 3594 'zb': 1000 ** 7,
70852b47
YCH
3595 'zettabytes': 1000 ** 7,
3596 'zebibytes': 1024 ** 7,
be64b5b0
PH
3597 'YiB': 1024 ** 8,
3598 'YB': 1000 ** 8,
3599 'yB': 1024 ** 8,
3600 'Yb': 1000 ** 8,
13585d76 3601 'yb': 1000 ** 8,
70852b47
YCH
3602 'yottabytes': 1000 ** 8,
3603 'yobibytes': 1024 ** 8,
be64b5b0
PH
3604 }
3605
fb47597b
S
3606 return lookup_unit_table(_UNIT_TABLE, s)
3607
3608
3609def parse_count(s):
3610 if s is None:
be64b5b0
PH
3611 return None
3612
fb47597b
S
3613 s = s.strip()
3614
3615 if re.match(r'^[\d,.]+$', s):
3616 return str_to_int(s)
3617
3618 _UNIT_TABLE = {
3619 'k': 1000,
3620 'K': 1000,
3621 'm': 1000 ** 2,
3622 'M': 1000 ** 2,
3623 'kk': 1000 ** 2,
3624 'KK': 1000 ** 2,
3625 }
be64b5b0 3626
fb47597b 3627 return lookup_unit_table(_UNIT_TABLE, s)
be64b5b0 3628
2f7ae819 3629
b871d7e9
S
3630def parse_resolution(s):
3631 if s is None:
3632 return {}
3633
3634 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3635 if mobj:
3636 return {
3637 'width': int(mobj.group('w')),
3638 'height': int(mobj.group('h')),
3639 }
3640
3641 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3642 if mobj:
3643 return {'height': int(mobj.group(1))}
3644
3645 mobj = re.search(r'\b([48])[kK]\b', s)
3646 if mobj:
3647 return {'height': int(mobj.group(1)) * 540}
3648
3649 return {}
3650
3651
0dc41787
S
3652def parse_bitrate(s):
3653 if not isinstance(s, compat_str):
3654 return
3655 mobj = re.search(r'\b(\d+)\s*kbps', s)
3656 if mobj:
3657 return int(mobj.group(1))
3658
3659
a942d6cb 3660def month_by_name(name, lang='en'):
caefb1de
PH
3661 """ Return the number of a month by (locale-independently) English name """
3662
f6717dec 3663 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb 3664
caefb1de 3665 try:
f6717dec 3666 return month_names.index(name) + 1
7105440c
YCH
3667 except ValueError:
3668 return None
3669
3670
3671def month_by_abbreviation(abbrev):
3672 """ Return the number of a month by (locale-independently) English
3673 abbreviations """
3674
3675 try:
3676 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
3677 except ValueError:
3678 return None
18258362
JMF
3679
3680
5aafe895 3681def fix_xml_ampersands(xml_str):
18258362 3682 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
3683 return re.sub(
3684 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 3685 '&amp;',
5aafe895 3686 xml_str)
e3946f98
PH
3687
3688
3689def setproctitle(title):
8bf48f23 3690 assert isinstance(title, compat_str)
c1c05c67
YCH
3691
3692 # ctypes in Jython is not complete
3693 # http://bugs.jython.org/issue2148
3694 if sys.platform.startswith('java'):
3695 return
3696
e3946f98 3697 try:
611c1dd9 3698 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
3699 except OSError:
3700 return
2f49bcd6
RC
3701 except TypeError:
3702 # LoadLibrary in Windows Python 2.7.13 only expects
3703 # a bytestring, but since unicode_literals turns
3704 # every string into a unicode string, it fails.
3705 return
6eefe533
PH
3706 title_bytes = title.encode('utf-8')
3707 buf = ctypes.create_string_buffer(len(title_bytes))
3708 buf.value = title_bytes
e3946f98 3709 try:
6eefe533 3710 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
3711 except AttributeError:
3712 return # Strange libc, just skip this
d7dda168
PH
3713
3714
3715def remove_start(s, start):
46bc9b7d 3716 return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174
PH
3717
3718
2b9faf55 3719def remove_end(s, end):
46bc9b7d 3720 return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55
PH
3721
3722
31b2051e
S
3723def remove_quotes(s):
3724 if s is None or len(s) < 2:
3725 return s
3726 for quote in ('"', "'", ):
3727 if s[0] == quote and s[-1] == quote:
3728 return s[1:-1]
3729 return s
3730
3731
b6e0c7d2
U
3732def get_domain(url):
3733 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3734 return domain.group('domain') if domain else None
3735
3736
29eb5174 3737def url_basename(url):
9b8aaeed 3738 path = compat_urlparse.urlparse(url).path
28e614de 3739 return path.strip('/').split('/')[-1]
aa94a6d3
PH
3740
3741
02dc0a36
S
3742def base_url(url):
3743 return re.match(r'https?://[^?#&]+/', url).group()
3744
3745
e34c3361 3746def urljoin(base, path):
4b5de77b
S
3747 if isinstance(path, bytes):
3748 path = path.decode('utf-8')
e34c3361
S
3749 if not isinstance(path, compat_str) or not path:
3750 return None
fad4ceb5 3751 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
e34c3361 3752 return path
4b5de77b
S
3753 if isinstance(base, bytes):
3754 base = base.decode('utf-8')
3755 if not isinstance(base, compat_str) or not re.match(
3756 r'^(?:https?:)?//', base):
e34c3361
S
3757 return None
3758 return compat_urlparse.urljoin(base, path)
3759
3760
aa94a6d3
PH
3761class HEADRequest(compat_urllib_request.Request):
3762 def get_method(self):
611c1dd9 3763 return 'HEAD'
7217e148
PH
3764
3765
95cf60e8
S
3766class PUTRequest(compat_urllib_request.Request):
3767 def get_method(self):
3768 return 'PUT'
3769
3770
9732d77e 3771def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
3772 if get_attr:
3773 if v is not None:
3774 v = getattr(v, get_attr, None)
9572013d
PH
3775 if v == '':
3776 v = None
1812afb7
S
3777 if v is None:
3778 return default
3779 try:
3780 return int(v) * invscale // scale
5e1271c5 3781 except (ValueError, TypeError):
af98f8ff 3782 return default
9732d77e 3783
9572013d 3784
40a90862
JMF
3785def str_or_none(v, default=None):
3786 return default if v is None else compat_str(v)
3787
9732d77e
PH
3788
3789def str_to_int(int_str):
48d4681e 3790 """ A more relaxed version of int_or_none """
42db58ec 3791 if isinstance(int_str, compat_integer_types):
348c6bf1 3792 return int_str
42db58ec
S
3793 elif isinstance(int_str, compat_str):
3794 int_str = re.sub(r'[,\.\+]', '', int_str)
3795 return int_or_none(int_str)
608d11f5
PH
3796
3797
9732d77e 3798def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
3799 if v is None:
3800 return default
3801 try:
3802 return float(v) * invscale / scale
5e1271c5 3803 except (ValueError, TypeError):
caf80631 3804 return default
43f775e4
PH
3805
3806
c7e327c4
S
3807def bool_or_none(v, default=None):
3808 return v if isinstance(v, bool) else default
3809
3810
53cd37ba
S
3811def strip_or_none(v, default=None):
3812 return v.strip() if isinstance(v, compat_str) else default
b72b4431
S
3813
3814
af03000a
S
3815def url_or_none(url):
3816 if not url or not isinstance(url, compat_str):
3817 return None
3818 url = url.strip()
29f7c58a 3819 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
af03000a
S
3820
3821
e29663c6 3822def strftime_or_none(timestamp, date_format, default=None):
3823 datetime_object = None
3824 try:
3825 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3826 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3827 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3828 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3829 return datetime_object.strftime(date_format)
3830 except (ValueError, TypeError, AttributeError):
3831 return default
3832
3833
608d11f5 3834def parse_duration(s):
8f9312c3 3835 if not isinstance(s, compat_basestring):
608d11f5
PH
3836 return None
3837
ca7b3246
S
3838 s = s.strip()
3839
acaff495 3840 days, hours, mins, secs, ms = [None] * 5
15846398 3841 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
acaff495 3842 if m:
3843 days, hours, mins, secs, ms = m.groups()
3844 else:
3845 m = re.match(
056653bb
S
3846 r'''(?ix)(?:P?
3847 (?:
3848 [0-9]+\s*y(?:ears?)?\s*
3849 )?
3850 (?:
3851 [0-9]+\s*m(?:onths?)?\s*
3852 )?
3853 (?:
3854 [0-9]+\s*w(?:eeks?)?\s*
3855 )?
8f4b58d7 3856 (?:
acaff495 3857 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
8f4b58d7 3858 )?
056653bb 3859 T)?
acaff495 3860 (?:
3861 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3862 )?
3863 (?:
3864 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3865 )?
3866 (?:
3867 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
15846398 3868 )?Z?$''', s)
acaff495 3869 if m:
3870 days, hours, mins, secs, ms = m.groups()
3871 else:
15846398 3872 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
acaff495 3873 if m:
3874 hours, mins = m.groups()
3875 else:
3876 return None
3877
3878 duration = 0
3879 if secs:
3880 duration += float(secs)
3881 if mins:
3882 duration += float(mins) * 60
3883 if hours:
3884 duration += float(hours) * 60 * 60
3885 if days:
3886 duration += float(days) * 24 * 60 * 60
3887 if ms:
3888 duration += float(ms)
3889 return duration
91d7d0b3
JMF
3890
3891
e65e4c88 3892def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 3893 name, real_ext = os.path.splitext(filename)
e65e4c88
S
3894 return (
3895 '{0}.{1}{2}'.format(name, ext, real_ext)
3896 if not expected_real_ext or real_ext[1:] == expected_real_ext
3897 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
3898
3899
b3ed15b7
S
3900def replace_extension(filename, ext, expected_real_ext=None):
3901 name, real_ext = os.path.splitext(filename)
3902 return '{0}.{1}'.format(
3903 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3904 ext)
3905
3906
d70ad093
PH
3907def check_executable(exe, args=[]):
3908 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3909 args can be a list of arguments for a short output (like -version) """
3910 try:
f5b1bca9 3911 process_communicate_or_kill(subprocess.Popen(
3912 [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
d70ad093
PH
3913 except OSError:
3914 return False
3915 return exe
b7ab0590
PH
3916
3917
95807118 3918def get_exe_version(exe, args=['--version'],
cae97f65 3919 version_re=None, unrecognized='present'):
95807118
PH
3920 """ Returns the version of the specified executable,
3921 or False if the executable is not present """
3922 try:
b64d04c1 3923 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
7a5c1cfe 3924 # SIGTTOU if yt-dlp is run in the background.
067aa17e 3925 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
f5b1bca9 3926 out, _ = process_communicate_or_kill(subprocess.Popen(
54116803 3927 [encodeArgument(exe)] + args,
00ca7552 3928 stdin=subprocess.PIPE,
f5b1bca9 3929 stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
95807118
PH
3930 except OSError:
3931 return False
cae97f65
PH
3932 if isinstance(out, bytes): # Python 2.x
3933 out = out.decode('ascii', 'ignore')
3934 return detect_exe_version(out, version_re, unrecognized)
3935
3936
3937def detect_exe_version(output, version_re=None, unrecognized='present'):
3938 assert isinstance(output, compat_str)
3939 if version_re is None:
3940 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3941 m = re.search(version_re, output)
95807118
PH
3942 if m:
3943 return m.group(1)
3944 else:
3945 return unrecognized
3946
3947
483336e7 3948class LazyList(collections.Sequence):
3949 ''' Lazy immutable list from an iterable
3950 Note that slices of a LazyList are lists and not LazyList'''
3951
3952 def __init__(self, iterable):
3953 self.__iterable = iter(iterable)
3954 self.__cache = []
3955
3956 def __iter__(self):
3957 for item in self.__cache:
3958 yield item
3959 for item in self.__iterable:
3960 self.__cache.append(item)
3961 yield item
3962
3963 def exhaust(self):
3964 ''' Evaluate the entire iterable '''
3965 self.__cache.extend(self.__iterable)
3966
3967 def __getitem__(self, idx):
3968 if isinstance(idx, slice):
3969 step = idx.step or 1
3970 start = idx.start if idx.start is not None else 1 if step > 0 else -1
3971 stop = idx.stop if idx.stop is not None else -1 if step > 0 else 0
3972 elif isinstance(idx, int):
3973 start = stop = idx
3974 else:
3975 raise TypeError('indices must be integers or slices')
3976 if start < 0 or stop < 0:
3977 # We need to consume the entire iterable to be able to slice from the end
3978 # Obviously, never use this with infinite iterables
3979 self.exhaust()
3980 else:
3981 n = max(start, stop) - len(self.__cache) + 1
3982 if n > 0:
3983 self.__cache.extend(itertools.islice(self.__iterable, n))
3984 return self.__cache[idx]
3985
3986 def __bool__(self):
3987 try:
3988 self[0]
3989 except IndexError:
3990 return False
3991 return True
3992
3993 def __len__(self):
3994 self.exhaust()
3995 return len(self.__cache)
3996
3997
b7ab0590 3998class PagedList(object):
dd26ced1
PH
3999 def __len__(self):
4000 # This is only useful for tests
4001 return len(self.getslice())
4002
55575225 4003 def getslice(self, start, end):
4004 raise NotImplementedError('This method must be implemented by subclasses')
4005
4006 def __getitem__(self, idx):
4007 if not isinstance(idx, int) or idx < 0:
4008 raise TypeError('indices must be non-negative integers')
4009 entries = self.getslice(idx, idx + 1)
4010 return entries[0] if entries else None
4011
9c44d242
PH
4012
4013class OnDemandPagedList(PagedList):
6be08ce6 4014 def __init__(self, pagefunc, pagesize, use_cache=True):
9c44d242
PH
4015 self._pagefunc = pagefunc
4016 self._pagesize = pagesize
b95dc034
YCH
4017 self._use_cache = use_cache
4018 if use_cache:
4019 self._cache = {}
9c44d242 4020
b7ab0590
PH
4021 def getslice(self, start=0, end=None):
4022 res = []
4023 for pagenum in itertools.count(start // self._pagesize):
4024 firstid = pagenum * self._pagesize
4025 nextfirstid = pagenum * self._pagesize + self._pagesize
4026 if start >= nextfirstid:
4027 continue
4028
b95dc034
YCH
4029 page_results = None
4030 if self._use_cache:
4031 page_results = self._cache.get(pagenum)
4032 if page_results is None:
4033 page_results = list(self._pagefunc(pagenum))
4034 if self._use_cache:
4035 self._cache[pagenum] = page_results
b7ab0590
PH
4036
4037 startv = (
4038 start % self._pagesize
4039 if firstid <= start < nextfirstid
4040 else 0)
4041
4042 endv = (
4043 ((end - 1) % self._pagesize) + 1
4044 if (end is not None and firstid <= end <= nextfirstid)
4045 else None)
4046
4047 if startv != 0 or endv is not None:
4048 page_results = page_results[startv:endv]
4049 res.extend(page_results)
4050
4051 # A little optimization - if current page is not "full", ie. does
4052 # not contain page_size videos then we can assume that this page
4053 # is the last one - there are no more ids on further pages -
4054 # i.e. no need to query again.
4055 if len(page_results) + startv < self._pagesize:
4056 break
4057
4058 # If we got the whole page, but the next page is not interesting,
4059 # break out early as well
4060 if end == nextfirstid:
4061 break
4062 return res
81c2f20b
PH
4063
4064
9c44d242
PH
4065class InAdvancePagedList(PagedList):
4066 def __init__(self, pagefunc, pagecount, pagesize):
4067 self._pagefunc = pagefunc
4068 self._pagecount = pagecount
4069 self._pagesize = pagesize
4070
4071 def getslice(self, start=0, end=None):
4072 res = []
4073 start_page = start // self._pagesize
4074 end_page = (
4075 self._pagecount if end is None else (end // self._pagesize + 1))
4076 skip_elems = start - start_page * self._pagesize
4077 only_more = None if end is None else end - start
4078 for pagenum in range(start_page, end_page):
4079 page = list(self._pagefunc(pagenum))
4080 if skip_elems:
4081 page = page[skip_elems:]
4082 skip_elems = None
4083 if only_more is not None:
4084 if len(page) < only_more:
4085 only_more -= len(page)
4086 else:
4087 page = page[:only_more]
4088 res.extend(page)
4089 break
4090 res.extend(page)
4091 return res
4092
4093
81c2f20b 4094def uppercase_escape(s):
676eb3f2 4095 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 4096 return re.sub(
a612753d 4097 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
4098 lambda m: unicode_escape(m.group(0))[0],
4099 s)
0fe2ff78
YCH
4100
4101
4102def lowercase_escape(s):
4103 unicode_escape = codecs.getdecoder('unicode_escape')
4104 return re.sub(
4105 r'\\u[0-9a-fA-F]{4}',
4106 lambda m: unicode_escape(m.group(0))[0],
4107 s)
b53466e1 4108
d05cfe06
S
4109
4110def escape_rfc3986(s):
4111 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 4112 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 4113 s = s.encode('utf-8')
ecc0c5ee 4114 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
4115
4116
4117def escape_url(url):
4118 """Escape URL as suggested by RFC 3986"""
4119 url_parsed = compat_urllib_parse_urlparse(url)
4120 return url_parsed._replace(
efbed08d 4121 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
d05cfe06
S
4122 path=escape_rfc3986(url_parsed.path),
4123 params=escape_rfc3986(url_parsed.params),
4124 query=escape_rfc3986(url_parsed.query),
4125 fragment=escape_rfc3986(url_parsed.fragment)
4126 ).geturl()
4127
62e609ab
PH
4128
4129def read_batch_urls(batch_fd):
4130 def fixup(url):
4131 if not isinstance(url, compat_str):
4132 url = url.decode('utf-8', 'replace')
8c04f0be 4133 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4134 for bom in BOM_UTF8:
4135 if url.startswith(bom):
4136 url = url[len(bom):]
4137 url = url.lstrip()
4138 if not url or url.startswith(('#', ';', ']')):
62e609ab 4139 return False
8c04f0be 4140 # "#" cannot be stripped out since it is part of the URI
4141 # However, it can be safely stipped out if follwing a whitespace
4142 return re.split(r'\s#', url, 1)[0].rstrip()
62e609ab
PH
4143
4144 with contextlib.closing(batch_fd) as fd:
4145 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
4146
4147
4148def urlencode_postdata(*args, **kargs):
15707c7e 4149 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
4150
4151
38f9ef31 4152def update_url_query(url, query):
cacd9966
YCH
4153 if not query:
4154 return url
38f9ef31 4155 parsed_url = compat_urlparse.urlparse(url)
4156 qs = compat_parse_qs(parsed_url.query)
4157 qs.update(query)
4158 return compat_urlparse.urlunparse(parsed_url._replace(
15707c7e 4159 query=compat_urllib_parse_urlencode(qs, True)))
16392824 4160
8e60dc75 4161
ed0291d1
S
4162def update_Request(req, url=None, data=None, headers={}, query={}):
4163 req_headers = req.headers.copy()
4164 req_headers.update(headers)
4165 req_data = data or req.data
4166 req_url = update_url_query(url or req.get_full_url(), query)
95cf60e8
S
4167 req_get_method = req.get_method()
4168 if req_get_method == 'HEAD':
4169 req_type = HEADRequest
4170 elif req_get_method == 'PUT':
4171 req_type = PUTRequest
4172 else:
4173 req_type = compat_urllib_request.Request
ed0291d1
S
4174 new_req = req_type(
4175 req_url, data=req_data, headers=req_headers,
4176 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4177 if hasattr(req, 'timeout'):
4178 new_req.timeout = req.timeout
4179 return new_req
4180
4181
10c87c15 4182def _multipart_encode_impl(data, boundary):
0c265486
YCH
4183 content_type = 'multipart/form-data; boundary=%s' % boundary
4184
4185 out = b''
4186 for k, v in data.items():
4187 out += b'--' + boundary.encode('ascii') + b'\r\n'
4188 if isinstance(k, compat_str):
4189 k = k.encode('utf-8')
4190 if isinstance(v, compat_str):
4191 v = v.encode('utf-8')
4192 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4193 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
b2ad479d 4194 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
0c265486
YCH
4195 if boundary.encode('ascii') in content:
4196 raise ValueError('Boundary overlaps with data')
4197 out += content
4198
4199 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4200
4201 return out, content_type
4202
4203
4204def multipart_encode(data, boundary=None):
4205 '''
4206 Encode a dict to RFC 7578-compliant form-data
4207
4208 data:
4209 A dict where keys and values can be either Unicode or bytes-like
4210 objects.
4211 boundary:
4212 If specified a Unicode object, it's used as the boundary. Otherwise
4213 a random boundary is generated.
4214
4215 Reference: https://tools.ietf.org/html/rfc7578
4216 '''
4217 has_specified_boundary = boundary is not None
4218
4219 while True:
4220 if boundary is None:
4221 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4222
4223 try:
10c87c15 4224 out, content_type = _multipart_encode_impl(data, boundary)
0c265486
YCH
4225 break
4226 except ValueError:
4227 if has_specified_boundary:
4228 raise
4229 boundary = None
4230
4231 return out, content_type
4232
4233
86296ad2 4234def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
4235 if isinstance(key_or_keys, (list, tuple)):
4236 for key in key_or_keys:
86296ad2
S
4237 if key not in d or d[key] is None or skip_false_values and not d[key]:
4238 continue
4239 return d[key]
cbecc9b9
S
4240 return default
4241 return d.get(key_or_keys, default)
4242
4243
329ca3be 4244def try_get(src, getter, expected_type=None):
a32a9a7e
S
4245 if not isinstance(getter, (list, tuple)):
4246 getter = [getter]
4247 for get in getter:
4248 try:
4249 v = get(src)
4250 except (AttributeError, KeyError, TypeError, IndexError):
4251 pass
4252 else:
4253 if expected_type is None or isinstance(v, expected_type):
4254 return v
329ca3be
S
4255
4256
6cc62232
S
4257def merge_dicts(*dicts):
4258 merged = {}
4259 for a_dict in dicts:
4260 for k, v in a_dict.items():
4261 if v is None:
4262 continue
3089bc74
S
4263 if (k not in merged
4264 or (isinstance(v, compat_str) and v
4265 and isinstance(merged[k], compat_str)
4266 and not merged[k])):
6cc62232
S
4267 merged[k] = v
4268 return merged
4269
4270
8e60dc75
S
4271def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4272 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4273
16392824 4274
a1a530b0
PH
4275US_RATINGS = {
4276 'G': 0,
4277 'PG': 10,
4278 'PG-13': 13,
4279 'R': 16,
4280 'NC': 18,
4281}
fac55558
PH
4282
4283
a8795327 4284TV_PARENTAL_GUIDELINES = {
5a16c9d9
RA
4285 'TV-Y': 0,
4286 'TV-Y7': 7,
4287 'TV-G': 0,
4288 'TV-PG': 0,
4289 'TV-14': 14,
4290 'TV-MA': 17,
a8795327
S
4291}
4292
4293
146c80e2 4294def parse_age_limit(s):
a8795327
S
4295 if type(s) == int:
4296 return s if 0 <= s <= 21 else None
4297 if not isinstance(s, compat_basestring):
d838b1bd 4298 return None
146c80e2 4299 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327
S
4300 if m:
4301 return int(m.group('age'))
5c5fae6d 4302 s = s.upper()
a8795327
S
4303 if s in US_RATINGS:
4304 return US_RATINGS[s]
5a16c9d9 4305 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
b8361187 4306 if m:
5a16c9d9 4307 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
b8361187 4308 return None
146c80e2
S
4309
4310
fac55558 4311def strip_jsonp(code):
609a61e3 4312 return re.sub(
5552c9eb 4313 r'''(?sx)^
e9c671d5 4314 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
5552c9eb
YCH
4315 (?:\s*&&\s*(?P=func_name))?
4316 \s*\(\s*(?P<callback_data>.*)\);?
4317 \s*?(?://[^\n]*)*$''',
4318 r'\g<callback_data>', code)
478c2c61
PH
4319
4320
5c610515 4321def js_to_json(code, vars={}):
4322 # vars is a dict of var, val pairs to substitute
4195096e
S
4323 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4324 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4325 INTEGER_TABLE = (
4326 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4327 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4328 )
4329
e05f6939 4330 def fix_kv(m):
e7b6d122
PH
4331 v = m.group(0)
4332 if v in ('true', 'false', 'null'):
4333 return v
8bdd16b4 4334 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
bd1e4844 4335 return ""
4336
4337 if v[0] in ("'", '"'):
4338 v = re.sub(r'(?s)\\.|"', lambda m: {
e7b6d122 4339 '"': '\\"',
bd1e4844 4340 "\\'": "'",
4341 '\\\n': '',
4342 '\\x': '\\u00',
4343 }.get(m.group(0), m.group(0)), v[1:-1])
8bdd16b4 4344 else:
4345 for regex, base in INTEGER_TABLE:
4346 im = re.match(regex, v)
4347 if im:
4348 i = int(im.group(1), base)
4349 return '"%d":' % i if v.endswith(':') else '%d' % i
89ac4a19 4350
5c610515 4351 if v in vars:
4352 return vars[v]
4353
e7b6d122 4354 return '"%s"' % v
e05f6939 4355
bd1e4844 4356 return re.sub(r'''(?sx)
4357 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4358 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4195096e 4359 {comment}|,(?={skip}[\]}}])|
c384d537 4360 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4195096e 4361 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
8bdd16b4 4362 [0-9]+(?={skip}:)|
4363 !+
4195096e 4364 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
e05f6939
PH
4365
4366
478c2c61
PH
4367def qualities(quality_ids):
4368 """ Get a numeric quality value out of a list of possible values """
4369 def q(qid):
4370 try:
4371 return quality_ids.index(qid)
4372 except ValueError:
4373 return -1
4374 return q
4375
acd69589 4376
de6000d9 4377DEFAULT_OUTTMPL = {
4378 'default': '%(title)s [%(id)s].%(ext)s',
72755351 4379 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
de6000d9 4380}
4381OUTTMPL_TYPES = {
72755351 4382 'chapter': None,
de6000d9 4383 'subtitle': None,
4384 'thumbnail': None,
4385 'description': 'description',
4386 'annotation': 'annotations.xml',
4387 'infojson': 'info.json',
5112f26a 4388 'pl_thumbnail': None,
de6000d9 4389 'pl_description': 'description',
4390 'pl_infojson': 'info.json',
4391}
0a871f68 4392
143db31d 4393# As of [1] format syntax is:
4394# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4395# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
4396FORMAT_RE = r'''(?x)
4397 (?<!%)
4398 %
4399 \({0}\) # mapping key
4400 (?:[#0\-+ ]+)? # conversion flags (optional)
4401 (?:\d+)? # minimum field width (optional)
4402 (?:\.\d+)? # precision (optional)
4403 [hlL]? # length modifier (optional)
4404 (?P<type>[diouxXeEfFgGcrs%]) # conversion type
4405'''
4406
a020a0dc
PH
4407
4408def limit_length(s, length):
4409 """ Add ellipses to overly long strings """
4410 if s is None:
4411 return None
4412 ELLIPSES = '...'
4413 if len(s) > length:
4414 return s[:length - len(ELLIPSES)] + ELLIPSES
4415 return s
48844745
PH
4416
4417
4418def version_tuple(v):
5f9b8394 4419 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
4420
4421
4422def is_outdated_version(version, limit, assume_new=True):
4423 if not version:
4424 return not assume_new
4425 try:
4426 return version_tuple(version) < version_tuple(limit)
4427 except ValueError:
4428 return not assume_new
732ea2f0
PH
4429
4430
4431def ytdl_is_updateable():
7a5c1cfe 4432 """ Returns if yt-dlp can be updated with -U """
735d865e 4433 return False
4434
732ea2f0
PH
4435 from zipimport import zipimporter
4436
4437 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
4438
4439
4440def args_to_str(args):
4441 # Get a short string representation for a subprocess command
702ccf2d 4442 return ' '.join(compat_shlex_quote(a) for a in args)
2ccd1b10
PH
4443
4444
9b9c5355 4445def error_to_compat_str(err):
fdae2358
S
4446 err_str = str(err)
4447 # On python 2 error byte string must be decoded with proper
4448 # encoding rather than ascii
4449 if sys.version_info[0] < 3:
4450 err_str = err_str.decode(preferredencoding())
4451 return err_str
4452
4453
c460bdd5 4454def mimetype2ext(mt):
eb9ee194
S
4455 if mt is None:
4456 return None
4457
765ac263
JMF
4458 ext = {
4459 'audio/mp4': 'm4a',
6c33d24b
YCH
4460 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4461 # it's the most popular one
4462 'audio/mpeg': 'mp3',
ba39289d 4463 'audio/x-wav': 'wav',
765ac263
JMF
4464 }.get(mt)
4465 if ext is not None:
4466 return ext
4467
c460bdd5 4468 _, _, res = mt.rpartition('/')
6562d34a 4469 res = res.split(';')[0].strip().lower()
c460bdd5
PH
4470
4471 return {
f6861ec9 4472 '3gpp': '3gp',
cafcf657 4473 'smptett+xml': 'tt',
cafcf657 4474 'ttaf+xml': 'dfxp',
a0d8d704 4475 'ttml+xml': 'ttml',
f6861ec9 4476 'x-flv': 'flv',
a0d8d704 4477 'x-mp4-fragmented': 'mp4',
d4f05d47 4478 'x-ms-sami': 'sami',
a0d8d704 4479 'x-ms-wmv': 'wmv',
b4173f15
RA
4480 'mpegurl': 'm3u8',
4481 'x-mpegurl': 'm3u8',
4482 'vnd.apple.mpegurl': 'm3u8',
4483 'dash+xml': 'mpd',
b4173f15 4484 'f4m+xml': 'f4m',
f164b971 4485 'hds+xml': 'f4m',
e910fe2f 4486 'vnd.ms-sstr+xml': 'ism',
c2b2c7e1 4487 'quicktime': 'mov',
98ce1a3f 4488 'mp2t': 'ts',
39e7107d 4489 'x-wav': 'wav',
c460bdd5
PH
4490 }.get(res, res)
4491
4492
4f3c5e06 4493def parse_codecs(codecs_str):
4494 # http://tools.ietf.org/html/rfc6381
4495 if not codecs_str:
4496 return {}
a0566bbf 4497 split_codecs = list(filter(None, map(
4f3c5e06 4498 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4499 vcodec, acodec = None, None
a0566bbf 4500 for full_codec in split_codecs:
4f3c5e06 4501 codec = full_codec.split('.')[0]
28cc2241 4502 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4f3c5e06 4503 if not vcodec:
4504 vcodec = full_codec
60f5c9fb 4505 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4f3c5e06 4506 if not acodec:
4507 acodec = full_codec
4508 else:
60f5c9fb 4509 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4f3c5e06 4510 if not vcodec and not acodec:
a0566bbf 4511 if len(split_codecs) == 2:
4f3c5e06 4512 return {
a0566bbf 4513 'vcodec': split_codecs[0],
4514 'acodec': split_codecs[1],
4f3c5e06 4515 }
4516 else:
4517 return {
4518 'vcodec': vcodec or 'none',
4519 'acodec': acodec or 'none',
4520 }
4521 return {}
4522
4523
2ccd1b10 4524def urlhandle_detect_ext(url_handle):
79298173 4525 getheader = url_handle.headers.get
2ccd1b10 4526
b55ee18f
PH
4527 cd = getheader('Content-Disposition')
4528 if cd:
4529 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4530 if m:
4531 e = determine_ext(m.group('filename'), default_ext=None)
4532 if e:
4533 return e
4534
c460bdd5 4535 return mimetype2ext(getheader('Content-Type'))
05900629
PH
4536
4537
1e399778
YCH
4538def encode_data_uri(data, mime_type):
4539 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4540
4541
05900629 4542def age_restricted(content_limit, age_limit):
6ec6cb4e 4543 """ Returns True iff the content should be blocked """
05900629
PH
4544
4545 if age_limit is None: # No limit set
4546 return False
4547 if content_limit is None:
4548 return False # Content available for everyone
4549 return age_limit < content_limit
61ca9a80
PH
4550
4551
4552def is_html(first_bytes):
4553 """ Detect whether a file contains HTML by examining its first bytes. """
4554
4555 BOMS = [
4556 (b'\xef\xbb\xbf', 'utf-8'),
4557 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4558 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4559 (b'\xff\xfe', 'utf-16-le'),
4560 (b'\xfe\xff', 'utf-16-be'),
4561 ]
4562 for bom, enc in BOMS:
4563 if first_bytes.startswith(bom):
4564 s = first_bytes[len(bom):].decode(enc, 'replace')
4565 break
4566 else:
4567 s = first_bytes.decode('utf-8', 'replace')
4568
4569 return re.match(r'^\s*<', s)
a055469f
PH
4570
4571
4572def determine_protocol(info_dict):
4573 protocol = info_dict.get('protocol')
4574 if protocol is not None:
4575 return protocol
4576
4577 url = info_dict['url']
4578 if url.startswith('rtmp'):
4579 return 'rtmp'
4580 elif url.startswith('mms'):
4581 return 'mms'
4582 elif url.startswith('rtsp'):
4583 return 'rtsp'
4584
4585 ext = determine_ext(url)
4586 if ext == 'm3u8':
4587 return 'm3u8'
4588 elif ext == 'f4m':
4589 return 'f4m'
4590
4591 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
4592
4593
76d321f6 4594def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
cfb56d1a 4595 """ Render a list of rows, each as a list of values """
76d321f6 4596
4597 def get_max_lens(table):
4598 return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4599
4600 def filter_using_list(row, filterArray):
4601 return [col for (take, col) in zip(filterArray, row) if take]
4602
4603 if hideEmpty:
4604 max_lens = get_max_lens(data)
4605 header_row = filter_using_list(header_row, max_lens)
4606 data = [filter_using_list(row, max_lens) for row in data]
4607
cfb56d1a 4608 table = [header_row] + data
76d321f6 4609 max_lens = get_max_lens(table)
4610 if delim:
4611 table = [header_row] + [['-' * ml for ml in max_lens]] + data
4612 format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
cfb56d1a 4613 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
4614
4615
4616def _match_one(filter_part, dct):
4617 COMPARISON_OPERATORS = {
4618 '<': operator.lt,
4619 '<=': operator.le,
4620 '>': operator.gt,
4621 '>=': operator.ge,
4622 '=': operator.eq,
4623 '!=': operator.ne,
4624 }
4625 operator_rex = re.compile(r'''(?x)\s*
4626 (?P<key>[a-z_]+)
4627 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4628 (?:
4629 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
db13c16e 4630 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
347de493
PH
4631 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4632 )
4633 \s*$
4634 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4635 m = operator_rex.search(filter_part)
4636 if m:
4637 op = COMPARISON_OPERATORS[m.group('op')]
e5a088dc 4638 actual_value = dct.get(m.group('key'))
3089bc74
S
4639 if (m.group('quotedstrval') is not None
4640 or m.group('strval') is not None
e5a088dc
S
4641 # If the original field is a string and matching comparisonvalue is
4642 # a number we should respect the origin of the original field
4643 # and process comparison value as a string (see
067aa17e 4644 # https://github.com/ytdl-org/youtube-dl/issues/11082).
3089bc74
S
4645 or actual_value is not None and m.group('intval') is not None
4646 and isinstance(actual_value, compat_str)):
347de493
PH
4647 if m.group('op') not in ('=', '!='):
4648 raise ValueError(
4649 'Operator %s does not support string values!' % m.group('op'))
db13c16e
S
4650 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4651 quote = m.group('quote')
4652 if quote is not None:
4653 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
347de493
PH
4654 else:
4655 try:
4656 comparison_value = int(m.group('intval'))
4657 except ValueError:
4658 comparison_value = parse_filesize(m.group('intval'))
4659 if comparison_value is None:
4660 comparison_value = parse_filesize(m.group('intval') + 'B')
4661 if comparison_value is None:
4662 raise ValueError(
4663 'Invalid integer value %r in filter part %r' % (
4664 m.group('intval'), filter_part))
347de493
PH
4665 if actual_value is None:
4666 return m.group('none_inclusive')
4667 return op(actual_value, comparison_value)
4668
4669 UNARY_OPERATORS = {
1cc47c66
S
4670 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4671 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
347de493
PH
4672 }
4673 operator_rex = re.compile(r'''(?x)\s*
4674 (?P<op>%s)\s*(?P<key>[a-z_]+)
4675 \s*$
4676 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4677 m = operator_rex.search(filter_part)
4678 if m:
4679 op = UNARY_OPERATORS[m.group('op')]
4680 actual_value = dct.get(m.group('key'))
4681 return op(actual_value)
4682
4683 raise ValueError('Invalid filter part %r' % filter_part)
4684
4685
4686def match_str(filter_str, dct):
4687 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4688
4689 return all(
4690 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4691
4692
4693def match_filter_func(filter_str):
4694 def _match_func(info_dict):
4695 if match_str(filter_str, info_dict):
4696 return None
4697 else:
4698 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4699 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4700 return _match_func
91410c9b
PH
4701
4702
bf6427d2
YCH
4703def parse_dfxp_time_expr(time_expr):
4704 if not time_expr:
d631d5f9 4705 return
bf6427d2
YCH
4706
4707 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4708 if mobj:
4709 return float(mobj.group('time_offset'))
4710
db2fe38b 4711 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 4712 if mobj:
db2fe38b 4713 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
4714
4715
c1c924ab
YCH
4716def srt_subtitles_timecode(seconds):
4717 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
4718
4719
4720def dfxp2srt(dfxp_data):
3869028f
YCH
4721 '''
4722 @param dfxp_data A bytes-like object containing DFXP data
4723 @returns A unicode object containing converted SRT data
4724 '''
5b995f71 4725 LEGACY_NAMESPACES = (
3869028f
YCH
4726 (b'http://www.w3.org/ns/ttml', [
4727 b'http://www.w3.org/2004/11/ttaf1',
4728 b'http://www.w3.org/2006/04/ttaf1',
4729 b'http://www.w3.org/2006/10/ttaf1',
5b995f71 4730 ]),
3869028f
YCH
4731 (b'http://www.w3.org/ns/ttml#styling', [
4732 b'http://www.w3.org/ns/ttml#style',
5b995f71
RA
4733 ]),
4734 )
4735
4736 SUPPORTED_STYLING = [
4737 'color',
4738 'fontFamily',
4739 'fontSize',
4740 'fontStyle',
4741 'fontWeight',
4742 'textDecoration'
4743 ]
4744
4e335771 4745 _x = functools.partial(xpath_with_ns, ns_map={
261f4730 4746 'xml': 'http://www.w3.org/XML/1998/namespace',
4e335771 4747 'ttml': 'http://www.w3.org/ns/ttml',
5b995f71 4748 'tts': 'http://www.w3.org/ns/ttml#styling',
4e335771 4749 })
bf6427d2 4750
5b995f71
RA
4751 styles = {}
4752 default_style = {}
4753
87de7069 4754 class TTMLPElementParser(object):
5b995f71
RA
4755 _out = ''
4756 _unclosed_elements = []
4757 _applied_styles = []
bf6427d2 4758
2b14cb56 4759 def start(self, tag, attrib):
5b995f71
RA
4760 if tag in (_x('ttml:br'), 'br'):
4761 self._out += '\n'
4762 else:
4763 unclosed_elements = []
4764 style = {}
4765 element_style_id = attrib.get('style')
4766 if default_style:
4767 style.update(default_style)
4768 if element_style_id:
4769 style.update(styles.get(element_style_id, {}))
4770 for prop in SUPPORTED_STYLING:
4771 prop_val = attrib.get(_x('tts:' + prop))
4772 if prop_val:
4773 style[prop] = prop_val
4774 if style:
4775 font = ''
4776 for k, v in sorted(style.items()):
4777 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4778 continue
4779 if k == 'color':
4780 font += ' color="%s"' % v
4781 elif k == 'fontSize':
4782 font += ' size="%s"' % v
4783 elif k == 'fontFamily':
4784 font += ' face="%s"' % v
4785 elif k == 'fontWeight' and v == 'bold':
4786 self._out += '<b>'
4787 unclosed_elements.append('b')
4788 elif k == 'fontStyle' and v == 'italic':
4789 self._out += '<i>'
4790 unclosed_elements.append('i')
4791 elif k == 'textDecoration' and v == 'underline':
4792 self._out += '<u>'
4793 unclosed_elements.append('u')
4794 if font:
4795 self._out += '<font' + font + '>'
4796 unclosed_elements.append('font')
4797 applied_style = {}
4798 if self._applied_styles:
4799 applied_style.update(self._applied_styles[-1])
4800 applied_style.update(style)
4801 self._applied_styles.append(applied_style)
4802 self._unclosed_elements.append(unclosed_elements)
bf6427d2 4803
2b14cb56 4804 def end(self, tag):
5b995f71
RA
4805 if tag not in (_x('ttml:br'), 'br'):
4806 unclosed_elements = self._unclosed_elements.pop()
4807 for element in reversed(unclosed_elements):
4808 self._out += '</%s>' % element
4809 if unclosed_elements and self._applied_styles:
4810 self._applied_styles.pop()
bf6427d2 4811
2b14cb56 4812 def data(self, data):
5b995f71 4813 self._out += data
2b14cb56 4814
4815 def close(self):
5b995f71 4816 return self._out.strip()
2b14cb56 4817
4818 def parse_node(node):
4819 target = TTMLPElementParser()
4820 parser = xml.etree.ElementTree.XMLParser(target=target)
4821 parser.feed(xml.etree.ElementTree.tostring(node))
4822 return parser.close()
bf6427d2 4823
5b995f71
RA
4824 for k, v in LEGACY_NAMESPACES:
4825 for ns in v:
4826 dfxp_data = dfxp_data.replace(ns, k)
4827
3869028f 4828 dfxp = compat_etree_fromstring(dfxp_data)
bf6427d2 4829 out = []
5b995f71 4830 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
1b0427e6
YCH
4831
4832 if not paras:
4833 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2 4834
5b995f71
RA
4835 repeat = False
4836 while True:
4837 for style in dfxp.findall(_x('.//ttml:style')):
261f4730
RA
4838 style_id = style.get('id') or style.get(_x('xml:id'))
4839 if not style_id:
4840 continue
5b995f71
RA
4841 parent_style_id = style.get('style')
4842 if parent_style_id:
4843 if parent_style_id not in styles:
4844 repeat = True
4845 continue
4846 styles[style_id] = styles[parent_style_id].copy()
4847 for prop in SUPPORTED_STYLING:
4848 prop_val = style.get(_x('tts:' + prop))
4849 if prop_val:
4850 styles.setdefault(style_id, {})[prop] = prop_val
4851 if repeat:
4852 repeat = False
4853 else:
4854 break
4855
4856 for p in ('body', 'div'):
4857 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4858 if ele is None:
4859 continue
4860 style = styles.get(ele.get('style'))
4861 if not style:
4862 continue
4863 default_style.update(style)
4864
bf6427d2 4865 for para, index in zip(paras, itertools.count(1)):
d631d5f9 4866 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 4867 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
4868 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4869 if begin_time is None:
4870 continue
7dff0363 4871 if not end_time:
d631d5f9
YCH
4872 if not dur:
4873 continue
4874 end_time = begin_time + dur
bf6427d2
YCH
4875 out.append('%d\n%s --> %s\n%s\n\n' % (
4876 index,
c1c924ab
YCH
4877 srt_subtitles_timecode(begin_time),
4878 srt_subtitles_timecode(end_time),
bf6427d2
YCH
4879 parse_node(para)))
4880
4881 return ''.join(out)
4882
4883
66e289ba
S
4884def cli_option(params, command_option, param):
4885 param = params.get(param)
98e698f1
RA
4886 if param:
4887 param = compat_str(param)
66e289ba
S
4888 return [command_option, param] if param is not None else []
4889
4890
4891def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4892 param = params.get(param)
5b232f46
S
4893 if param is None:
4894 return []
66e289ba
S
4895 assert isinstance(param, bool)
4896 if separator:
4897 return [command_option + separator + (true_value if param else false_value)]
4898 return [command_option, true_value if param else false_value]
4899
4900
4901def cli_valueless_option(params, command_option, param, expected_value=True):
4902 param = params.get(param)
4903 return [command_option] if param == expected_value else []
4904
4905
e92caff5 4906def cli_configuration_args(argdict, keys, default=[], use_compat=True):
eab9b2bc 4907 if isinstance(argdict, (list, tuple)): # for backward compatibility
e92caff5 4908 if use_compat:
5b1ecbb3 4909 return argdict
4910 else:
4911 argdict = None
eab9b2bc 4912 if argdict is None:
5b1ecbb3 4913 return default
eab9b2bc 4914 assert isinstance(argdict, dict)
4915
e92caff5 4916 assert isinstance(keys, (list, tuple))
4917 for key_list in keys:
4918 if isinstance(key_list, compat_str):
4919 key_list = (key_list,)
4920 arg_list = list(filter(
4921 lambda x: x is not None,
4922 [argdict.get(key.lower()) for key in key_list]))
4923 if arg_list:
4924 return [arg for args in arg_list for arg in args]
4925 return default
66e289ba
S
4926
4927
39672624
YCH
4928class ISO639Utils(object):
4929 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4930 _lang_map = {
4931 'aa': 'aar',
4932 'ab': 'abk',
4933 'ae': 'ave',
4934 'af': 'afr',
4935 'ak': 'aka',
4936 'am': 'amh',
4937 'an': 'arg',
4938 'ar': 'ara',
4939 'as': 'asm',
4940 'av': 'ava',
4941 'ay': 'aym',
4942 'az': 'aze',
4943 'ba': 'bak',
4944 'be': 'bel',
4945 'bg': 'bul',
4946 'bh': 'bih',
4947 'bi': 'bis',
4948 'bm': 'bam',
4949 'bn': 'ben',
4950 'bo': 'bod',
4951 'br': 'bre',
4952 'bs': 'bos',
4953 'ca': 'cat',
4954 'ce': 'che',
4955 'ch': 'cha',
4956 'co': 'cos',
4957 'cr': 'cre',
4958 'cs': 'ces',
4959 'cu': 'chu',
4960 'cv': 'chv',
4961 'cy': 'cym',
4962 'da': 'dan',
4963 'de': 'deu',
4964 'dv': 'div',
4965 'dz': 'dzo',
4966 'ee': 'ewe',
4967 'el': 'ell',
4968 'en': 'eng',
4969 'eo': 'epo',
4970 'es': 'spa',
4971 'et': 'est',
4972 'eu': 'eus',
4973 'fa': 'fas',
4974 'ff': 'ful',
4975 'fi': 'fin',
4976 'fj': 'fij',
4977 'fo': 'fao',
4978 'fr': 'fra',
4979 'fy': 'fry',
4980 'ga': 'gle',
4981 'gd': 'gla',
4982 'gl': 'glg',
4983 'gn': 'grn',
4984 'gu': 'guj',
4985 'gv': 'glv',
4986 'ha': 'hau',
4987 'he': 'heb',
b7acc835 4988 'iw': 'heb', # Replaced by he in 1989 revision
39672624
YCH
4989 'hi': 'hin',
4990 'ho': 'hmo',
4991 'hr': 'hrv',
4992 'ht': 'hat',
4993 'hu': 'hun',
4994 'hy': 'hye',
4995 'hz': 'her',
4996 'ia': 'ina',
4997 'id': 'ind',
b7acc835 4998 'in': 'ind', # Replaced by id in 1989 revision
39672624
YCH
4999 'ie': 'ile',
5000 'ig': 'ibo',
5001 'ii': 'iii',
5002 'ik': 'ipk',
5003 'io': 'ido',
5004 'is': 'isl',
5005 'it': 'ita',
5006 'iu': 'iku',
5007 'ja': 'jpn',
5008 'jv': 'jav',
5009 'ka': 'kat',
5010 'kg': 'kon',
5011 'ki': 'kik',
5012 'kj': 'kua',
5013 'kk': 'kaz',
5014 'kl': 'kal',
5015 'km': 'khm',
5016 'kn': 'kan',
5017 'ko': 'kor',
5018 'kr': 'kau',
5019 'ks': 'kas',
5020 'ku': 'kur',
5021 'kv': 'kom',
5022 'kw': 'cor',
5023 'ky': 'kir',
5024 'la': 'lat',
5025 'lb': 'ltz',
5026 'lg': 'lug',
5027 'li': 'lim',
5028 'ln': 'lin',
5029 'lo': 'lao',
5030 'lt': 'lit',
5031 'lu': 'lub',
5032 'lv': 'lav',
5033 'mg': 'mlg',
5034 'mh': 'mah',
5035 'mi': 'mri',
5036 'mk': 'mkd',
5037 'ml': 'mal',
5038 'mn': 'mon',
5039 'mr': 'mar',
5040 'ms': 'msa',
5041 'mt': 'mlt',
5042 'my': 'mya',
5043 'na': 'nau',
5044 'nb': 'nob',
5045 'nd': 'nde',
5046 'ne': 'nep',
5047 'ng': 'ndo',
5048 'nl': 'nld',
5049 'nn': 'nno',
5050 'no': 'nor',
5051 'nr': 'nbl',
5052 'nv': 'nav',
5053 'ny': 'nya',
5054 'oc': 'oci',
5055 'oj': 'oji',
5056 'om': 'orm',
5057 'or': 'ori',
5058 'os': 'oss',
5059 'pa': 'pan',
5060 'pi': 'pli',
5061 'pl': 'pol',
5062 'ps': 'pus',
5063 'pt': 'por',
5064 'qu': 'que',
5065 'rm': 'roh',
5066 'rn': 'run',
5067 'ro': 'ron',
5068 'ru': 'rus',
5069 'rw': 'kin',
5070 'sa': 'san',
5071 'sc': 'srd',
5072 'sd': 'snd',
5073 'se': 'sme',
5074 'sg': 'sag',
5075 'si': 'sin',
5076 'sk': 'slk',
5077 'sl': 'slv',
5078 'sm': 'smo',
5079 'sn': 'sna',
5080 'so': 'som',
5081 'sq': 'sqi',
5082 'sr': 'srp',
5083 'ss': 'ssw',
5084 'st': 'sot',
5085 'su': 'sun',
5086 'sv': 'swe',
5087 'sw': 'swa',
5088 'ta': 'tam',
5089 'te': 'tel',
5090 'tg': 'tgk',
5091 'th': 'tha',
5092 'ti': 'tir',
5093 'tk': 'tuk',
5094 'tl': 'tgl',
5095 'tn': 'tsn',
5096 'to': 'ton',
5097 'tr': 'tur',
5098 'ts': 'tso',
5099 'tt': 'tat',
5100 'tw': 'twi',
5101 'ty': 'tah',
5102 'ug': 'uig',
5103 'uk': 'ukr',
5104 'ur': 'urd',
5105 'uz': 'uzb',
5106 've': 'ven',
5107 'vi': 'vie',
5108 'vo': 'vol',
5109 'wa': 'wln',
5110 'wo': 'wol',
5111 'xh': 'xho',
5112 'yi': 'yid',
e9a50fba 5113 'ji': 'yid', # Replaced by yi in 1989 revision
39672624
YCH
5114 'yo': 'yor',
5115 'za': 'zha',
5116 'zh': 'zho',
5117 'zu': 'zul',
5118 }
5119
5120 @classmethod
5121 def short2long(cls, code):
5122 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5123 return cls._lang_map.get(code[:2])
5124
5125 @classmethod
5126 def long2short(cls, code):
5127 """Convert language code from ISO 639-2/T to ISO 639-1"""
5128 for short_name, long_name in cls._lang_map.items():
5129 if long_name == code:
5130 return short_name
5131
5132
4eb10f66
YCH
5133class ISO3166Utils(object):
5134 # From http://data.okfn.org/data/core/country-list
5135 _country_map = {
5136 'AF': 'Afghanistan',
5137 'AX': 'Åland Islands',
5138 'AL': 'Albania',
5139 'DZ': 'Algeria',
5140 'AS': 'American Samoa',
5141 'AD': 'Andorra',
5142 'AO': 'Angola',
5143 'AI': 'Anguilla',
5144 'AQ': 'Antarctica',
5145 'AG': 'Antigua and Barbuda',
5146 'AR': 'Argentina',
5147 'AM': 'Armenia',
5148 'AW': 'Aruba',
5149 'AU': 'Australia',
5150 'AT': 'Austria',
5151 'AZ': 'Azerbaijan',
5152 'BS': 'Bahamas',
5153 'BH': 'Bahrain',
5154 'BD': 'Bangladesh',
5155 'BB': 'Barbados',
5156 'BY': 'Belarus',
5157 'BE': 'Belgium',
5158 'BZ': 'Belize',
5159 'BJ': 'Benin',
5160 'BM': 'Bermuda',
5161 'BT': 'Bhutan',
5162 'BO': 'Bolivia, Plurinational State of',
5163 'BQ': 'Bonaire, Sint Eustatius and Saba',
5164 'BA': 'Bosnia and Herzegovina',
5165 'BW': 'Botswana',
5166 'BV': 'Bouvet Island',
5167 'BR': 'Brazil',
5168 'IO': 'British Indian Ocean Territory',
5169 'BN': 'Brunei Darussalam',
5170 'BG': 'Bulgaria',
5171 'BF': 'Burkina Faso',
5172 'BI': 'Burundi',
5173 'KH': 'Cambodia',
5174 'CM': 'Cameroon',
5175 'CA': 'Canada',
5176 'CV': 'Cape Verde',
5177 'KY': 'Cayman Islands',
5178 'CF': 'Central African Republic',
5179 'TD': 'Chad',
5180 'CL': 'Chile',
5181 'CN': 'China',
5182 'CX': 'Christmas Island',
5183 'CC': 'Cocos (Keeling) Islands',
5184 'CO': 'Colombia',
5185 'KM': 'Comoros',
5186 'CG': 'Congo',
5187 'CD': 'Congo, the Democratic Republic of the',
5188 'CK': 'Cook Islands',
5189 'CR': 'Costa Rica',
5190 'CI': 'Côte d\'Ivoire',
5191 'HR': 'Croatia',
5192 'CU': 'Cuba',
5193 'CW': 'Curaçao',
5194 'CY': 'Cyprus',
5195 'CZ': 'Czech Republic',
5196 'DK': 'Denmark',
5197 'DJ': 'Djibouti',
5198 'DM': 'Dominica',
5199 'DO': 'Dominican Republic',
5200 'EC': 'Ecuador',
5201 'EG': 'Egypt',
5202 'SV': 'El Salvador',
5203 'GQ': 'Equatorial Guinea',
5204 'ER': 'Eritrea',
5205 'EE': 'Estonia',
5206 'ET': 'Ethiopia',
5207 'FK': 'Falkland Islands (Malvinas)',
5208 'FO': 'Faroe Islands',
5209 'FJ': 'Fiji',
5210 'FI': 'Finland',
5211 'FR': 'France',
5212 'GF': 'French Guiana',
5213 'PF': 'French Polynesia',
5214 'TF': 'French Southern Territories',
5215 'GA': 'Gabon',
5216 'GM': 'Gambia',
5217 'GE': 'Georgia',
5218 'DE': 'Germany',
5219 'GH': 'Ghana',
5220 'GI': 'Gibraltar',
5221 'GR': 'Greece',
5222 'GL': 'Greenland',
5223 'GD': 'Grenada',
5224 'GP': 'Guadeloupe',
5225 'GU': 'Guam',
5226 'GT': 'Guatemala',
5227 'GG': 'Guernsey',
5228 'GN': 'Guinea',
5229 'GW': 'Guinea-Bissau',
5230 'GY': 'Guyana',
5231 'HT': 'Haiti',
5232 'HM': 'Heard Island and McDonald Islands',
5233 'VA': 'Holy See (Vatican City State)',
5234 'HN': 'Honduras',
5235 'HK': 'Hong Kong',
5236 'HU': 'Hungary',
5237 'IS': 'Iceland',
5238 'IN': 'India',
5239 'ID': 'Indonesia',
5240 'IR': 'Iran, Islamic Republic of',
5241 'IQ': 'Iraq',
5242 'IE': 'Ireland',
5243 'IM': 'Isle of Man',
5244 'IL': 'Israel',
5245 'IT': 'Italy',
5246 'JM': 'Jamaica',
5247 'JP': 'Japan',
5248 'JE': 'Jersey',
5249 'JO': 'Jordan',
5250 'KZ': 'Kazakhstan',
5251 'KE': 'Kenya',
5252 'KI': 'Kiribati',
5253 'KP': 'Korea, Democratic People\'s Republic of',
5254 'KR': 'Korea, Republic of',
5255 'KW': 'Kuwait',
5256 'KG': 'Kyrgyzstan',
5257 'LA': 'Lao People\'s Democratic Republic',
5258 'LV': 'Latvia',
5259 'LB': 'Lebanon',
5260 'LS': 'Lesotho',
5261 'LR': 'Liberia',
5262 'LY': 'Libya',
5263 'LI': 'Liechtenstein',
5264 'LT': 'Lithuania',
5265 'LU': 'Luxembourg',
5266 'MO': 'Macao',
5267 'MK': 'Macedonia, the Former Yugoslav Republic of',
5268 'MG': 'Madagascar',
5269 'MW': 'Malawi',
5270 'MY': 'Malaysia',
5271 'MV': 'Maldives',
5272 'ML': 'Mali',
5273 'MT': 'Malta',
5274 'MH': 'Marshall Islands',
5275 'MQ': 'Martinique',
5276 'MR': 'Mauritania',
5277 'MU': 'Mauritius',
5278 'YT': 'Mayotte',
5279 'MX': 'Mexico',
5280 'FM': 'Micronesia, Federated States of',
5281 'MD': 'Moldova, Republic of',
5282 'MC': 'Monaco',
5283 'MN': 'Mongolia',
5284 'ME': 'Montenegro',
5285 'MS': 'Montserrat',
5286 'MA': 'Morocco',
5287 'MZ': 'Mozambique',
5288 'MM': 'Myanmar',
5289 'NA': 'Namibia',
5290 'NR': 'Nauru',
5291 'NP': 'Nepal',
5292 'NL': 'Netherlands',
5293 'NC': 'New Caledonia',
5294 'NZ': 'New Zealand',
5295 'NI': 'Nicaragua',
5296 'NE': 'Niger',
5297 'NG': 'Nigeria',
5298 'NU': 'Niue',
5299 'NF': 'Norfolk Island',
5300 'MP': 'Northern Mariana Islands',
5301 'NO': 'Norway',
5302 'OM': 'Oman',
5303 'PK': 'Pakistan',
5304 'PW': 'Palau',
5305 'PS': 'Palestine, State of',
5306 'PA': 'Panama',
5307 'PG': 'Papua New Guinea',
5308 'PY': 'Paraguay',
5309 'PE': 'Peru',
5310 'PH': 'Philippines',
5311 'PN': 'Pitcairn',
5312 'PL': 'Poland',
5313 'PT': 'Portugal',
5314 'PR': 'Puerto Rico',
5315 'QA': 'Qatar',
5316 'RE': 'Réunion',
5317 'RO': 'Romania',
5318 'RU': 'Russian Federation',
5319 'RW': 'Rwanda',
5320 'BL': 'Saint Barthélemy',
5321 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5322 'KN': 'Saint Kitts and Nevis',
5323 'LC': 'Saint Lucia',
5324 'MF': 'Saint Martin (French part)',
5325 'PM': 'Saint Pierre and Miquelon',
5326 'VC': 'Saint Vincent and the Grenadines',
5327 'WS': 'Samoa',
5328 'SM': 'San Marino',
5329 'ST': 'Sao Tome and Principe',
5330 'SA': 'Saudi Arabia',
5331 'SN': 'Senegal',
5332 'RS': 'Serbia',
5333 'SC': 'Seychelles',
5334 'SL': 'Sierra Leone',
5335 'SG': 'Singapore',
5336 'SX': 'Sint Maarten (Dutch part)',
5337 'SK': 'Slovakia',
5338 'SI': 'Slovenia',
5339 'SB': 'Solomon Islands',
5340 'SO': 'Somalia',
5341 'ZA': 'South Africa',
5342 'GS': 'South Georgia and the South Sandwich Islands',
5343 'SS': 'South Sudan',
5344 'ES': 'Spain',
5345 'LK': 'Sri Lanka',
5346 'SD': 'Sudan',
5347 'SR': 'Suriname',
5348 'SJ': 'Svalbard and Jan Mayen',
5349 'SZ': 'Swaziland',
5350 'SE': 'Sweden',
5351 'CH': 'Switzerland',
5352 'SY': 'Syrian Arab Republic',
5353 'TW': 'Taiwan, Province of China',
5354 'TJ': 'Tajikistan',
5355 'TZ': 'Tanzania, United Republic of',
5356 'TH': 'Thailand',
5357 'TL': 'Timor-Leste',
5358 'TG': 'Togo',
5359 'TK': 'Tokelau',
5360 'TO': 'Tonga',
5361 'TT': 'Trinidad and Tobago',
5362 'TN': 'Tunisia',
5363 'TR': 'Turkey',
5364 'TM': 'Turkmenistan',
5365 'TC': 'Turks and Caicos Islands',
5366 'TV': 'Tuvalu',
5367 'UG': 'Uganda',
5368 'UA': 'Ukraine',
5369 'AE': 'United Arab Emirates',
5370 'GB': 'United Kingdom',
5371 'US': 'United States',
5372 'UM': 'United States Minor Outlying Islands',
5373 'UY': 'Uruguay',
5374 'UZ': 'Uzbekistan',
5375 'VU': 'Vanuatu',
5376 'VE': 'Venezuela, Bolivarian Republic of',
5377 'VN': 'Viet Nam',
5378 'VG': 'Virgin Islands, British',
5379 'VI': 'Virgin Islands, U.S.',
5380 'WF': 'Wallis and Futuna',
5381 'EH': 'Western Sahara',
5382 'YE': 'Yemen',
5383 'ZM': 'Zambia',
5384 'ZW': 'Zimbabwe',
5385 }
5386
5387 @classmethod
5388 def short2full(cls, code):
5389 """Convert an ISO 3166-2 country code to the corresponding full name"""
5390 return cls._country_map.get(code.upper())
5391
5392
773f291d
S
5393class GeoUtils(object):
5394 # Major IPv4 address blocks per country
5395 _country_ip_map = {
53896ca5 5396 'AD': '46.172.224.0/19',
773f291d
S
5397 'AE': '94.200.0.0/13',
5398 'AF': '149.54.0.0/17',
5399 'AG': '209.59.64.0/18',
5400 'AI': '204.14.248.0/21',
5401 'AL': '46.99.0.0/16',
5402 'AM': '46.70.0.0/15',
5403 'AO': '105.168.0.0/13',
53896ca5
S
5404 'AP': '182.50.184.0/21',
5405 'AQ': '23.154.160.0/24',
773f291d
S
5406 'AR': '181.0.0.0/12',
5407 'AS': '202.70.112.0/20',
53896ca5 5408 'AT': '77.116.0.0/14',
773f291d
S
5409 'AU': '1.128.0.0/11',
5410 'AW': '181.41.0.0/18',
53896ca5
S
5411 'AX': '185.217.4.0/22',
5412 'AZ': '5.197.0.0/16',
773f291d
S
5413 'BA': '31.176.128.0/17',
5414 'BB': '65.48.128.0/17',
5415 'BD': '114.130.0.0/16',
5416 'BE': '57.0.0.0/8',
53896ca5 5417 'BF': '102.178.0.0/15',
773f291d
S
5418 'BG': '95.42.0.0/15',
5419 'BH': '37.131.0.0/17',
5420 'BI': '154.117.192.0/18',
5421 'BJ': '137.255.0.0/16',
53896ca5 5422 'BL': '185.212.72.0/23',
773f291d
S
5423 'BM': '196.12.64.0/18',
5424 'BN': '156.31.0.0/16',
5425 'BO': '161.56.0.0/16',
5426 'BQ': '161.0.80.0/20',
53896ca5 5427 'BR': '191.128.0.0/12',
773f291d
S
5428 'BS': '24.51.64.0/18',
5429 'BT': '119.2.96.0/19',
5430 'BW': '168.167.0.0/16',
5431 'BY': '178.120.0.0/13',
5432 'BZ': '179.42.192.0/18',
5433 'CA': '99.224.0.0/11',
5434 'CD': '41.243.0.0/16',
53896ca5
S
5435 'CF': '197.242.176.0/21',
5436 'CG': '160.113.0.0/16',
773f291d 5437 'CH': '85.0.0.0/13',
53896ca5 5438 'CI': '102.136.0.0/14',
773f291d
S
5439 'CK': '202.65.32.0/19',
5440 'CL': '152.172.0.0/14',
53896ca5 5441 'CM': '102.244.0.0/14',
773f291d
S
5442 'CN': '36.128.0.0/10',
5443 'CO': '181.240.0.0/12',
5444 'CR': '201.192.0.0/12',
5445 'CU': '152.206.0.0/15',
5446 'CV': '165.90.96.0/19',
5447 'CW': '190.88.128.0/17',
53896ca5 5448 'CY': '31.153.0.0/16',
773f291d
S
5449 'CZ': '88.100.0.0/14',
5450 'DE': '53.0.0.0/8',
5451 'DJ': '197.241.0.0/17',
5452 'DK': '87.48.0.0/12',
5453 'DM': '192.243.48.0/20',
5454 'DO': '152.166.0.0/15',
5455 'DZ': '41.96.0.0/12',
5456 'EC': '186.68.0.0/15',
5457 'EE': '90.190.0.0/15',
5458 'EG': '156.160.0.0/11',
5459 'ER': '196.200.96.0/20',
5460 'ES': '88.0.0.0/11',
5461 'ET': '196.188.0.0/14',
5462 'EU': '2.16.0.0/13',
5463 'FI': '91.152.0.0/13',
5464 'FJ': '144.120.0.0/16',
53896ca5 5465 'FK': '80.73.208.0/21',
773f291d
S
5466 'FM': '119.252.112.0/20',
5467 'FO': '88.85.32.0/19',
5468 'FR': '90.0.0.0/9',
5469 'GA': '41.158.0.0/15',
5470 'GB': '25.0.0.0/8',
5471 'GD': '74.122.88.0/21',
5472 'GE': '31.146.0.0/16',
5473 'GF': '161.22.64.0/18',
5474 'GG': '62.68.160.0/19',
53896ca5
S
5475 'GH': '154.160.0.0/12',
5476 'GI': '95.164.0.0/16',
773f291d
S
5477 'GL': '88.83.0.0/19',
5478 'GM': '160.182.0.0/15',
5479 'GN': '197.149.192.0/18',
5480 'GP': '104.250.0.0/19',
5481 'GQ': '105.235.224.0/20',
5482 'GR': '94.64.0.0/13',
5483 'GT': '168.234.0.0/16',
5484 'GU': '168.123.0.0/16',
5485 'GW': '197.214.80.0/20',
5486 'GY': '181.41.64.0/18',
5487 'HK': '113.252.0.0/14',
5488 'HN': '181.210.0.0/16',
5489 'HR': '93.136.0.0/13',
5490 'HT': '148.102.128.0/17',
5491 'HU': '84.0.0.0/14',
5492 'ID': '39.192.0.0/10',
5493 'IE': '87.32.0.0/12',
5494 'IL': '79.176.0.0/13',
5495 'IM': '5.62.80.0/20',
5496 'IN': '117.192.0.0/10',
5497 'IO': '203.83.48.0/21',
5498 'IQ': '37.236.0.0/14',
5499 'IR': '2.176.0.0/12',
5500 'IS': '82.221.0.0/16',
5501 'IT': '79.0.0.0/10',
5502 'JE': '87.244.64.0/18',
5503 'JM': '72.27.0.0/17',
5504 'JO': '176.29.0.0/16',
53896ca5 5505 'JP': '133.0.0.0/8',
773f291d
S
5506 'KE': '105.48.0.0/12',
5507 'KG': '158.181.128.0/17',
5508 'KH': '36.37.128.0/17',
5509 'KI': '103.25.140.0/22',
5510 'KM': '197.255.224.0/20',
53896ca5 5511 'KN': '198.167.192.0/19',
773f291d
S
5512 'KP': '175.45.176.0/22',
5513 'KR': '175.192.0.0/10',
5514 'KW': '37.36.0.0/14',
5515 'KY': '64.96.0.0/15',
5516 'KZ': '2.72.0.0/13',
5517 'LA': '115.84.64.0/18',
5518 'LB': '178.135.0.0/16',
53896ca5 5519 'LC': '24.92.144.0/20',
773f291d
S
5520 'LI': '82.117.0.0/19',
5521 'LK': '112.134.0.0/15',
53896ca5 5522 'LR': '102.183.0.0/16',
773f291d
S
5523 'LS': '129.232.0.0/17',
5524 'LT': '78.56.0.0/13',
5525 'LU': '188.42.0.0/16',
5526 'LV': '46.109.0.0/16',
5527 'LY': '41.252.0.0/14',
5528 'MA': '105.128.0.0/11',
5529 'MC': '88.209.64.0/18',
5530 'MD': '37.246.0.0/16',
5531 'ME': '178.175.0.0/17',
5532 'MF': '74.112.232.0/21',
5533 'MG': '154.126.0.0/17',
5534 'MH': '117.103.88.0/21',
5535 'MK': '77.28.0.0/15',
5536 'ML': '154.118.128.0/18',
5537 'MM': '37.111.0.0/17',
5538 'MN': '49.0.128.0/17',
5539 'MO': '60.246.0.0/16',
5540 'MP': '202.88.64.0/20',
5541 'MQ': '109.203.224.0/19',
5542 'MR': '41.188.64.0/18',
5543 'MS': '208.90.112.0/22',
5544 'MT': '46.11.0.0/16',
5545 'MU': '105.16.0.0/12',
5546 'MV': '27.114.128.0/18',
53896ca5 5547 'MW': '102.70.0.0/15',
773f291d
S
5548 'MX': '187.192.0.0/11',
5549 'MY': '175.136.0.0/13',
5550 'MZ': '197.218.0.0/15',
5551 'NA': '41.182.0.0/16',
5552 'NC': '101.101.0.0/18',
5553 'NE': '197.214.0.0/18',
5554 'NF': '203.17.240.0/22',
5555 'NG': '105.112.0.0/12',
5556 'NI': '186.76.0.0/15',
5557 'NL': '145.96.0.0/11',
5558 'NO': '84.208.0.0/13',
5559 'NP': '36.252.0.0/15',
5560 'NR': '203.98.224.0/19',
5561 'NU': '49.156.48.0/22',
5562 'NZ': '49.224.0.0/14',
5563 'OM': '5.36.0.0/15',
5564 'PA': '186.72.0.0/15',
5565 'PE': '186.160.0.0/14',
5566 'PF': '123.50.64.0/18',
5567 'PG': '124.240.192.0/19',
5568 'PH': '49.144.0.0/13',
5569 'PK': '39.32.0.0/11',
5570 'PL': '83.0.0.0/11',
5571 'PM': '70.36.0.0/20',
5572 'PR': '66.50.0.0/16',
5573 'PS': '188.161.0.0/16',
5574 'PT': '85.240.0.0/13',
5575 'PW': '202.124.224.0/20',
5576 'PY': '181.120.0.0/14',
5577 'QA': '37.210.0.0/15',
53896ca5 5578 'RE': '102.35.0.0/16',
773f291d 5579 'RO': '79.112.0.0/13',
53896ca5 5580 'RS': '93.86.0.0/15',
773f291d 5581 'RU': '5.136.0.0/13',
53896ca5 5582 'RW': '41.186.0.0/16',
773f291d
S
5583 'SA': '188.48.0.0/13',
5584 'SB': '202.1.160.0/19',
5585 'SC': '154.192.0.0/11',
53896ca5 5586 'SD': '102.120.0.0/13',
773f291d 5587 'SE': '78.64.0.0/12',
53896ca5 5588 'SG': '8.128.0.0/10',
773f291d
S
5589 'SI': '188.196.0.0/14',
5590 'SK': '78.98.0.0/15',
53896ca5 5591 'SL': '102.143.0.0/17',
773f291d
S
5592 'SM': '89.186.32.0/19',
5593 'SN': '41.82.0.0/15',
53896ca5 5594 'SO': '154.115.192.0/18',
773f291d
S
5595 'SR': '186.179.128.0/17',
5596 'SS': '105.235.208.0/21',
5597 'ST': '197.159.160.0/19',
5598 'SV': '168.243.0.0/16',
5599 'SX': '190.102.0.0/20',
5600 'SY': '5.0.0.0/16',
5601 'SZ': '41.84.224.0/19',
5602 'TC': '65.255.48.0/20',
5603 'TD': '154.68.128.0/19',
5604 'TG': '196.168.0.0/14',
5605 'TH': '171.96.0.0/13',
5606 'TJ': '85.9.128.0/18',
5607 'TK': '27.96.24.0/21',
5608 'TL': '180.189.160.0/20',
5609 'TM': '95.85.96.0/19',
5610 'TN': '197.0.0.0/11',
5611 'TO': '175.176.144.0/21',
5612 'TR': '78.160.0.0/11',
5613 'TT': '186.44.0.0/15',
5614 'TV': '202.2.96.0/19',
5615 'TW': '120.96.0.0/11',
5616 'TZ': '156.156.0.0/14',
53896ca5
S
5617 'UA': '37.52.0.0/14',
5618 'UG': '102.80.0.0/13',
5619 'US': '6.0.0.0/8',
773f291d 5620 'UY': '167.56.0.0/13',
53896ca5 5621 'UZ': '84.54.64.0/18',
773f291d 5622 'VA': '212.77.0.0/19',
53896ca5 5623 'VC': '207.191.240.0/21',
773f291d 5624 'VE': '186.88.0.0/13',
53896ca5 5625 'VG': '66.81.192.0/20',
773f291d
S
5626 'VI': '146.226.0.0/16',
5627 'VN': '14.160.0.0/11',
5628 'VU': '202.80.32.0/20',
5629 'WF': '117.20.32.0/21',
5630 'WS': '202.4.32.0/19',
5631 'YE': '134.35.0.0/16',
5632 'YT': '41.242.116.0/22',
5633 'ZA': '41.0.0.0/11',
53896ca5
S
5634 'ZM': '102.144.0.0/13',
5635 'ZW': '102.177.192.0/18',
773f291d
S
5636 }
5637
5638 @classmethod
5f95927a
S
5639 def random_ipv4(cls, code_or_block):
5640 if len(code_or_block) == 2:
5641 block = cls._country_ip_map.get(code_or_block.upper())
5642 if not block:
5643 return None
5644 else:
5645 block = code_or_block
773f291d
S
5646 addr, preflen = block.split('/')
5647 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5648 addr_max = addr_min | (0xffffffff >> int(preflen))
18a0defa 5649 return compat_str(socket.inet_ntoa(
4248dad9 5650 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
773f291d
S
5651
5652
91410c9b 5653class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
5654 def __init__(self, proxies=None):
5655 # Set default handlers
5656 for type in ('http', 'https'):
5657 setattr(self, '%s_open' % type,
5658 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5659 meth(r, proxy, type))
38e87f6c 5660 compat_urllib_request.ProxyHandler.__init__(self, proxies)
2461f79d 5661
91410c9b 5662 def proxy_open(self, req, proxy, type):
2461f79d 5663 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
5664 if req_proxy is not None:
5665 proxy = req_proxy
2461f79d
PH
5666 del req.headers['Ytdl-request-proxy']
5667
5668 if proxy == '__noproxy__':
5669 return None # No Proxy
51fb4995 5670 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
71aff188 5671 req.add_header('Ytdl-socks-proxy', proxy)
7a5c1cfe 5672 # yt-dlp's http/https handlers do wrapping the socket with socks
71aff188 5673 return None
91410c9b
PH
5674 return compat_urllib_request.ProxyHandler.proxy_open(
5675 self, req, proxy, type)
5bc880b9
YCH
5676
5677
0a5445dd
YCH
5678# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5679# released into Public Domain
5680# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5681
5682def long_to_bytes(n, blocksize=0):
5683 """long_to_bytes(n:long, blocksize:int) : string
5684 Convert a long integer to a byte string.
5685
5686 If optional blocksize is given and greater than zero, pad the front of the
5687 byte string with binary zeros so that the length is a multiple of
5688 blocksize.
5689 """
5690 # after much testing, this algorithm was deemed to be the fastest
5691 s = b''
5692 n = int(n)
5693 while n > 0:
5694 s = compat_struct_pack('>I', n & 0xffffffff) + s
5695 n = n >> 32
5696 # strip off leading zeros
5697 for i in range(len(s)):
5698 if s[i] != b'\000'[0]:
5699 break
5700 else:
5701 # only happens when n == 0
5702 s = b'\000'
5703 i = 0
5704 s = s[i:]
5705 # add back some pad bytes. this could be done more efficiently w.r.t. the
5706 # de-padding being done above, but sigh...
5707 if blocksize > 0 and len(s) % blocksize:
5708 s = (blocksize - len(s) % blocksize) * b'\000' + s
5709 return s
5710
5711
5712def bytes_to_long(s):
5713 """bytes_to_long(string) : long
5714 Convert a byte string to a long integer.
5715
5716 This is (essentially) the inverse of long_to_bytes().
5717 """
5718 acc = 0
5719 length = len(s)
5720 if length % 4:
5721 extra = (4 - length % 4)
5722 s = b'\000' * extra + s
5723 length = length + extra
5724 for i in range(0, length, 4):
5725 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5726 return acc
5727
5728
5bc880b9
YCH
5729def ohdave_rsa_encrypt(data, exponent, modulus):
5730 '''
5731 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5732
5733 Input:
5734 data: data to encrypt, bytes-like object
5735 exponent, modulus: parameter e and N of RSA algorithm, both integer
5736 Output: hex string of encrypted data
5737
5738 Limitation: supports one block encryption only
5739 '''
5740
5741 payload = int(binascii.hexlify(data[::-1]), 16)
5742 encrypted = pow(payload, exponent, modulus)
5743 return '%x' % encrypted
81bdc8fd
YCH
5744
5745
f48409c7
YCH
5746def pkcs1pad(data, length):
5747 """
5748 Padding input data with PKCS#1 scheme
5749
5750 @param {int[]} data input data
5751 @param {int} length target length
5752 @returns {int[]} padded data
5753 """
5754 if len(data) > length - 11:
5755 raise ValueError('Input data too long for PKCS#1 padding')
5756
5757 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5758 return [0, 2] + pseudo_random + [0] + data
5759
5760
5eb6bdce 5761def encode_base_n(num, n, table=None):
59f898b7 5762 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
59f898b7
YCH
5763 if not table:
5764 table = FULL_TABLE[:n]
5765
5eb6bdce
YCH
5766 if n > len(table):
5767 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5768
5769 if num == 0:
5770 return table[0]
5771
81bdc8fd
YCH
5772 ret = ''
5773 while num:
5774 ret = table[num % n] + ret
5775 num = num // n
5776 return ret
f52354a8
YCH
5777
5778
5779def decode_packed_codes(code):
06b3fe29 5780 mobj = re.search(PACKED_CODES_RE, code)
a0566bbf 5781 obfuscated_code, base, count, symbols = mobj.groups()
f52354a8
YCH
5782 base = int(base)
5783 count = int(count)
5784 symbols = symbols.split('|')
5785 symbol_table = {}
5786
5787 while count:
5788 count -= 1
5eb6bdce 5789 base_n_count = encode_base_n(count, base)
f52354a8
YCH
5790 symbol_table[base_n_count] = symbols[count] or base_n_count
5791
5792 return re.sub(
5793 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
a0566bbf 5794 obfuscated_code)
e154c651 5795
5796
1ced2221
S
5797def caesar(s, alphabet, shift):
5798 if shift == 0:
5799 return s
5800 l = len(alphabet)
5801 return ''.join(
5802 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5803 for c in s)
5804
5805
5806def rot47(s):
5807 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5808
5809
e154c651 5810def parse_m3u8_attributes(attrib):
5811 info = {}
5812 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5813 if val.startswith('"'):
5814 val = val[1:-1]
5815 info[key] = val
5816 return info
1143535d
YCH
5817
5818
5819def urshift(val, n):
5820 return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038
YCH
5821
5822
5823# Based on png2str() written by @gdkchan and improved by @yokrysty
067aa17e 5824# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
d3f8e038
YCH
5825def decode_png(png_data):
5826 # Reference: https://www.w3.org/TR/PNG/
5827 header = png_data[8:]
5828
5829 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5830 raise IOError('Not a valid PNG file.')
5831
5832 int_map = {1: '>B', 2: '>H', 4: '>I'}
5833 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5834
5835 chunks = []
5836
5837 while header:
5838 length = unpack_integer(header[:4])
5839 header = header[4:]
5840
5841 chunk_type = header[:4]
5842 header = header[4:]
5843
5844 chunk_data = header[:length]
5845 header = header[length:]
5846
5847 header = header[4:] # Skip CRC
5848
5849 chunks.append({
5850 'type': chunk_type,
5851 'length': length,
5852 'data': chunk_data
5853 })
5854
5855 ihdr = chunks[0]['data']
5856
5857 width = unpack_integer(ihdr[:4])
5858 height = unpack_integer(ihdr[4:8])
5859
5860 idat = b''
5861
5862 for chunk in chunks:
5863 if chunk['type'] == b'IDAT':
5864 idat += chunk['data']
5865
5866 if not idat:
5867 raise IOError('Unable to read PNG data.')
5868
5869 decompressed_data = bytearray(zlib.decompress(idat))
5870
5871 stride = width * 3
5872 pixels = []
5873
5874 def _get_pixel(idx):
5875 x = idx % stride
5876 y = idx // stride
5877 return pixels[y][x]
5878
5879 for y in range(height):
5880 basePos = y * (1 + stride)
5881 filter_type = decompressed_data[basePos]
5882
5883 current_row = []
5884
5885 pixels.append(current_row)
5886
5887 for x in range(stride):
5888 color = decompressed_data[1 + basePos + x]
5889 basex = y * stride + x
5890 left = 0
5891 up = 0
5892
5893 if x > 2:
5894 left = _get_pixel(basex - 3)
5895 if y > 0:
5896 up = _get_pixel(basex - stride)
5897
5898 if filter_type == 1: # Sub
5899 color = (color + left) & 0xff
5900 elif filter_type == 2: # Up
5901 color = (color + up) & 0xff
5902 elif filter_type == 3: # Average
5903 color = (color + ((left + up) >> 1)) & 0xff
5904 elif filter_type == 4: # Paeth
5905 a = left
5906 b = up
5907 c = 0
5908
5909 if x > 2 and y > 0:
5910 c = _get_pixel(basex - stride - 3)
5911
5912 p = a + b - c
5913
5914 pa = abs(p - a)
5915 pb = abs(p - b)
5916 pc = abs(p - c)
5917
5918 if pa <= pb and pa <= pc:
5919 color = (color + a) & 0xff
5920 elif pb <= pc:
5921 color = (color + b) & 0xff
5922 else:
5923 color = (color + c) & 0xff
5924
5925 current_row.append(color)
5926
5927 return width, height, pixels
efa97bdc
YCH
5928
5929
5930def write_xattr(path, key, value):
5931 # This mess below finds the best xattr tool for the job
5932 try:
5933 # try the pyxattr module...
5934 import xattr
5935
53a7e3d2
YCH
5936 if hasattr(xattr, 'set'): # pyxattr
5937 # Unicode arguments are not supported in python-pyxattr until
5938 # version 0.5.0
067aa17e 5939 # See https://github.com/ytdl-org/youtube-dl/issues/5498
53a7e3d2
YCH
5940 pyxattr_required_version = '0.5.0'
5941 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5942 # TODO: fallback to CLI tools
5943 raise XAttrUnavailableError(
5944 'python-pyxattr is detected but is too old. '
7a5c1cfe 5945 'yt-dlp requires %s or above while your version is %s. '
53a7e3d2
YCH
5946 'Falling back to other xattr implementations' % (
5947 pyxattr_required_version, xattr.__version__))
5948
5949 setxattr = xattr.set
5950 else: # xattr
5951 setxattr = xattr.setxattr
efa97bdc
YCH
5952
5953 try:
53a7e3d2 5954 setxattr(path, key, value)
efa97bdc
YCH
5955 except EnvironmentError as e:
5956 raise XAttrMetadataError(e.errno, e.strerror)
5957
5958 except ImportError:
5959 if compat_os_name == 'nt':
5960 # Write xattrs to NTFS Alternate Data Streams:
5961 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5962 assert ':' not in key
5963 assert os.path.exists(path)
5964
5965 ads_fn = path + ':' + key
5966 try:
5967 with open(ads_fn, 'wb') as f:
5968 f.write(value)
5969 except EnvironmentError as e:
5970 raise XAttrMetadataError(e.errno, e.strerror)
5971 else:
5972 user_has_setfattr = check_executable('setfattr', ['--version'])
5973 user_has_xattr = check_executable('xattr', ['-h'])
5974
5975 if user_has_setfattr or user_has_xattr:
5976
5977 value = value.decode('utf-8')
5978 if user_has_setfattr:
5979 executable = 'setfattr'
5980 opts = ['-n', key, '-v', value]
5981 elif user_has_xattr:
5982 executable = 'xattr'
5983 opts = ['-w', key, value]
5984
3089bc74
S
5985 cmd = ([encodeFilename(executable, True)]
5986 + [encodeArgument(o) for o in opts]
5987 + [encodeFilename(path, True)])
efa97bdc
YCH
5988
5989 try:
5990 p = subprocess.Popen(
5991 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5992 except EnvironmentError as e:
5993 raise XAttrMetadataError(e.errno, e.strerror)
f5b1bca9 5994 stdout, stderr = process_communicate_or_kill(p)
efa97bdc
YCH
5995 stderr = stderr.decode('utf-8', 'replace')
5996 if p.returncode != 0:
5997 raise XAttrMetadataError(p.returncode, stderr)
5998
5999 else:
6000 # On Unix, and can't find pyxattr, setfattr, or xattr.
6001 if sys.platform.startswith('linux'):
6002 raise XAttrUnavailableError(
6003 "Couldn't find a tool to set the xattrs. "
6004 "Install either the python 'pyxattr' or 'xattr' "
6005 "modules, or the GNU 'attr' package "
6006 "(which contains the 'setfattr' tool).")
6007 else:
6008 raise XAttrUnavailableError(
6009 "Couldn't find a tool to set the xattrs. "
6010 "Install either the python 'xattr' module, "
6011 "or the 'xattr' binary.")
0c265486
YCH
6012
6013
6014def random_birthday(year_field, month_field, day_field):
aa374bc7
AS
6015 start_date = datetime.date(1950, 1, 1)
6016 end_date = datetime.date(1995, 12, 31)
6017 offset = random.randint(0, (end_date - start_date).days)
6018 random_date = start_date + datetime.timedelta(offset)
0c265486 6019 return {
aa374bc7
AS
6020 year_field: str(random_date.year),
6021 month_field: str(random_date.month),
6022 day_field: str(random_date.day),
0c265486 6023 }
732044af 6024
c76eb41b 6025
732044af 6026# Templates for internet shortcut files, which are plain text files.
6027DOT_URL_LINK_TEMPLATE = '''
6028[InternetShortcut]
6029URL=%(url)s
6030'''.lstrip()
6031
6032DOT_WEBLOC_LINK_TEMPLATE = '''
6033<?xml version="1.0" encoding="UTF-8"?>
6034<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6035<plist version="1.0">
6036<dict>
6037\t<key>URL</key>
6038\t<string>%(url)s</string>
6039</dict>
6040</plist>
6041'''.lstrip()
6042
6043DOT_DESKTOP_LINK_TEMPLATE = '''
6044[Desktop Entry]
6045Encoding=UTF-8
6046Name=%(filename)s
6047Type=Link
6048URL=%(url)s
6049Icon=text-html
6050'''.lstrip()
6051
6052
6053def iri_to_uri(iri):
6054 """
6055 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6056
6057 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6058 """
6059
6060 iri_parts = compat_urllib_parse_urlparse(iri)
6061
6062 if '[' in iri_parts.netloc:
6063 raise ValueError('IPv6 URIs are not, yet, supported.')
6064 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6065
6066 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6067
6068 net_location = ''
6069 if iri_parts.username:
6070 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6071 if iri_parts.password is not None:
6072 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6073 net_location += '@'
6074
6075 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
6076 # The 'idna' encoding produces ASCII text.
6077 if iri_parts.port is not None and iri_parts.port != 80:
6078 net_location += ':' + str(iri_parts.port)
6079
6080 return compat_urllib_parse_urlunparse(
6081 (iri_parts.scheme,
6082 net_location,
6083
6084 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6085
6086 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6087 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6088
6089 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6090 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6091
6092 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6093
6094 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6095
6096
6097def to_high_limit_path(path):
6098 if sys.platform in ['win32', 'cygwin']:
6099 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6100 return r'\\?\ '.rstrip() + os.path.abspath(path)
6101
6102 return path
76d321f6 6103
c76eb41b 6104
76d321f6 6105def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
6106 val = obj.get(field, default)
6107 if func and val not in ignore:
6108 val = func(val)
6109 return template % val if val not in ignore else default
00dd0cd5 6110
6111
6112def clean_podcast_url(url):
6113 return re.sub(r'''(?x)
6114 (?:
6115 (?:
6116 chtbl\.com/track|
6117 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6118 play\.podtrac\.com
6119 )/[^/]+|
6120 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6121 flex\.acast\.com|
6122 pd(?:
6123 cn\.co| # https://podcorn.com/analytics-prefix/
6124 st\.fm # https://podsights.com/docs/
6125 )/e
6126 )/''', '', url)
ffcb8191
THD
6127
6128
6129_HEX_TABLE = '0123456789abcdef'
6130
6131
6132def random_uuidv4():
6133 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
0202b52a 6134
6135
6136def make_dir(path, to_screen=None):
6137 try:
6138 dn = os.path.dirname(path)
6139 if dn and not os.path.exists(dn):
6140 os.makedirs(dn)
6141 return True
6142 except (OSError, IOError) as err:
6143 if callable(to_screen) is not None:
6144 to_screen('unable to create directory ' + error_to_compat_str(err))
6145 return False
f74980cb 6146
6147
6148def get_executable_path():
c552ae88 6149 from zipimport import zipimporter
6150 if hasattr(sys, 'frozen'): # Running from PyInstaller
6151 path = os.path.dirname(sys.executable)
6152 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
6153 path = os.path.join(os.path.dirname(__file__), '../..')
6154 else:
6155 path = os.path.join(os.path.dirname(__file__), '..')
f74980cb 6156 return os.path.abspath(path)
6157
6158
2f567473 6159def load_plugins(name, suffix, namespace):
f74980cb 6160 plugin_info = [None]
6161 classes = []
6162 try:
6163 plugin_info = imp.find_module(
6164 name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
6165 plugins = imp.load_module(name, *plugin_info)
6166 for name in dir(plugins):
2f567473 6167 if name in namespace:
6168 continue
6169 if not name.endswith(suffix):
f74980cb 6170 continue
6171 klass = getattr(plugins, name)
6172 classes.append(klass)
6173 namespace[name] = klass
6174 except ImportError:
6175 pass
6176 finally:
6177 if plugin_info[0] is not None:
6178 plugin_info[0].close()
6179 return classes
06167fbb 6180
6181
6182def traverse_dict(dictn, keys, casesense=True):
a439a3a4 6183 keys = list(keys)[::-1]
6184 while keys:
6185 key = keys.pop()
6186 if isinstance(dictn, dict):
6187 if not casesense:
6188 dictn = {k.lower(): v for k, v in dictn.items()}
6189 key = key.lower()
6190 dictn = dictn.get(key)
6191 elif isinstance(dictn, (list, tuple, compat_str)):
e625be0d 6192 if ':' in key:
6193 key = slice(*map(int_or_none, key.split(':')))
a439a3a4 6194 else:
e625be0d 6195 key = int_or_none(key)
6196 dictn = try_get(dictn, lambda x: x[key])
a439a3a4 6197 else:
6198 return None
6199 return dictn