]> jfr.im git - yt-dlp.git/blame - yt_dlp/utils.py
[mediasite] Generalize URL pattern (#275)
[yt-dlp.git] / yt_dlp / utils.py
CommitLineData
d77c3dfd 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
d77c3dfd 3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
c380cc28 10import collections
62e609ab 11import contextlib
e3946f98 12import ctypes
c496ca96
PH
13import datetime
14import email.utils
0c265486 15import email.header
f45c185f 16import errno
be4a824d 17import functools
d77c3dfd 18import gzip
f74980cb 19import imp
03f9daab 20import io
79a2e94e 21import itertools
f4bfd65f 22import json
d77c3dfd 23import locale
02dbf93f 24import math
347de493 25import operator
d77c3dfd 26import os
c496ca96 27import platform
773f291d 28import random
d77c3dfd 29import re
c496ca96 30import socket
79a2e94e 31import ssl
1c088fa8 32import subprocess
d77c3dfd 33import sys
181c8655 34import tempfile
c380cc28 35import time
01951dda 36import traceback
bcf89ce6 37import xml.etree.ElementTree
d77c3dfd 38import zlib
d77c3dfd 39
8c25f81b 40from .compat import (
b4a3d461 41 compat_HTMLParseError,
8bb56eee 42 compat_HTMLParser,
201c1459 43 compat_HTTPError,
8f9312c3 44 compat_basestring,
8c25f81b 45 compat_chr,
1bab3437 46 compat_cookiejar,
d7cd9a9e 47 compat_ctypes_WINFUNCTYPE,
36e6f62c 48 compat_etree_fromstring,
51098426 49 compat_expanduser,
8c25f81b 50 compat_html_entities,
55b2f099 51 compat_html_entities_html5,
be4a824d 52 compat_http_client,
42db58ec 53 compat_integer_types,
e29663c6 54 compat_numeric_types,
c86b6142 55 compat_kwargs,
efa97bdc 56 compat_os_name,
8c25f81b 57 compat_parse_qs,
702ccf2d 58 compat_shlex_quote,
8c25f81b 59 compat_str,
edaa23f8 60 compat_struct_pack,
d3f8e038 61 compat_struct_unpack,
8c25f81b
PH
62 compat_urllib_error,
63 compat_urllib_parse,
15707c7e 64 compat_urllib_parse_urlencode,
8c25f81b 65 compat_urllib_parse_urlparse,
732044af 66 compat_urllib_parse_urlunparse,
67 compat_urllib_parse_quote,
68 compat_urllib_parse_quote_plus,
7581bfc9 69 compat_urllib_parse_unquote_plus,
8c25f81b
PH
70 compat_urllib_request,
71 compat_urlparse,
810c10ba 72 compat_xpath,
8c25f81b 73)
4644ac55 74
71aff188
YCH
75from .socks import (
76 ProxyType,
77 sockssocket,
78)
79
4644ac55 80
51fb4995
YCH
81def register_socks_protocols():
82 # "Register" SOCKS protocols
d5ae6bb5
YCH
83 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
84 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
51fb4995
YCH
85 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
86 if scheme not in compat_urlparse.uses_netloc:
87 compat_urlparse.uses_netloc.append(scheme)
88
89
468e2e92
FV
90# This is not clearly defined otherwise
91compiled_regex_type = type(re.compile(''))
92
f7a147e3
S
93
94def random_user_agent():
95 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
96 _CHROME_VERSIONS = (
97 '74.0.3729.129',
98 '76.0.3780.3',
99 '76.0.3780.2',
100 '74.0.3729.128',
101 '76.0.3780.1',
102 '76.0.3780.0',
103 '75.0.3770.15',
104 '74.0.3729.127',
105 '74.0.3729.126',
106 '76.0.3779.1',
107 '76.0.3779.0',
108 '75.0.3770.14',
109 '74.0.3729.125',
110 '76.0.3778.1',
111 '76.0.3778.0',
112 '75.0.3770.13',
113 '74.0.3729.124',
114 '74.0.3729.123',
115 '73.0.3683.121',
116 '76.0.3777.1',
117 '76.0.3777.0',
118 '75.0.3770.12',
119 '74.0.3729.122',
120 '76.0.3776.4',
121 '75.0.3770.11',
122 '74.0.3729.121',
123 '76.0.3776.3',
124 '76.0.3776.2',
125 '73.0.3683.120',
126 '74.0.3729.120',
127 '74.0.3729.119',
128 '74.0.3729.118',
129 '76.0.3776.1',
130 '76.0.3776.0',
131 '76.0.3775.5',
132 '75.0.3770.10',
133 '74.0.3729.117',
134 '76.0.3775.4',
135 '76.0.3775.3',
136 '74.0.3729.116',
137 '75.0.3770.9',
138 '76.0.3775.2',
139 '76.0.3775.1',
140 '76.0.3775.0',
141 '75.0.3770.8',
142 '74.0.3729.115',
143 '74.0.3729.114',
144 '76.0.3774.1',
145 '76.0.3774.0',
146 '75.0.3770.7',
147 '74.0.3729.113',
148 '74.0.3729.112',
149 '74.0.3729.111',
150 '76.0.3773.1',
151 '76.0.3773.0',
152 '75.0.3770.6',
153 '74.0.3729.110',
154 '74.0.3729.109',
155 '76.0.3772.1',
156 '76.0.3772.0',
157 '75.0.3770.5',
158 '74.0.3729.108',
159 '74.0.3729.107',
160 '76.0.3771.1',
161 '76.0.3771.0',
162 '75.0.3770.4',
163 '74.0.3729.106',
164 '74.0.3729.105',
165 '75.0.3770.3',
166 '74.0.3729.104',
167 '74.0.3729.103',
168 '74.0.3729.102',
169 '75.0.3770.2',
170 '74.0.3729.101',
171 '75.0.3770.1',
172 '75.0.3770.0',
173 '74.0.3729.100',
174 '75.0.3769.5',
175 '75.0.3769.4',
176 '74.0.3729.99',
177 '75.0.3769.3',
178 '75.0.3769.2',
179 '75.0.3768.6',
180 '74.0.3729.98',
181 '75.0.3769.1',
182 '75.0.3769.0',
183 '74.0.3729.97',
184 '73.0.3683.119',
185 '73.0.3683.118',
186 '74.0.3729.96',
187 '75.0.3768.5',
188 '75.0.3768.4',
189 '75.0.3768.3',
190 '75.0.3768.2',
191 '74.0.3729.95',
192 '74.0.3729.94',
193 '75.0.3768.1',
194 '75.0.3768.0',
195 '74.0.3729.93',
196 '74.0.3729.92',
197 '73.0.3683.117',
198 '74.0.3729.91',
199 '75.0.3766.3',
200 '74.0.3729.90',
201 '75.0.3767.2',
202 '75.0.3767.1',
203 '75.0.3767.0',
204 '74.0.3729.89',
205 '73.0.3683.116',
206 '75.0.3766.2',
207 '74.0.3729.88',
208 '75.0.3766.1',
209 '75.0.3766.0',
210 '74.0.3729.87',
211 '73.0.3683.115',
212 '74.0.3729.86',
213 '75.0.3765.1',
214 '75.0.3765.0',
215 '74.0.3729.85',
216 '73.0.3683.114',
217 '74.0.3729.84',
218 '75.0.3764.1',
219 '75.0.3764.0',
220 '74.0.3729.83',
221 '73.0.3683.113',
222 '75.0.3763.2',
223 '75.0.3761.4',
224 '74.0.3729.82',
225 '75.0.3763.1',
226 '75.0.3763.0',
227 '74.0.3729.81',
228 '73.0.3683.112',
229 '75.0.3762.1',
230 '75.0.3762.0',
231 '74.0.3729.80',
232 '75.0.3761.3',
233 '74.0.3729.79',
234 '73.0.3683.111',
235 '75.0.3761.2',
236 '74.0.3729.78',
237 '74.0.3729.77',
238 '75.0.3761.1',
239 '75.0.3761.0',
240 '73.0.3683.110',
241 '74.0.3729.76',
242 '74.0.3729.75',
243 '75.0.3760.0',
244 '74.0.3729.74',
245 '75.0.3759.8',
246 '75.0.3759.7',
247 '75.0.3759.6',
248 '74.0.3729.73',
249 '75.0.3759.5',
250 '74.0.3729.72',
251 '73.0.3683.109',
252 '75.0.3759.4',
253 '75.0.3759.3',
254 '74.0.3729.71',
255 '75.0.3759.2',
256 '74.0.3729.70',
257 '73.0.3683.108',
258 '74.0.3729.69',
259 '75.0.3759.1',
260 '75.0.3759.0',
261 '74.0.3729.68',
262 '73.0.3683.107',
263 '74.0.3729.67',
264 '75.0.3758.1',
265 '75.0.3758.0',
266 '74.0.3729.66',
267 '73.0.3683.106',
268 '74.0.3729.65',
269 '75.0.3757.1',
270 '75.0.3757.0',
271 '74.0.3729.64',
272 '73.0.3683.105',
273 '74.0.3729.63',
274 '75.0.3756.1',
275 '75.0.3756.0',
276 '74.0.3729.62',
277 '73.0.3683.104',
278 '75.0.3755.3',
279 '75.0.3755.2',
280 '73.0.3683.103',
281 '75.0.3755.1',
282 '75.0.3755.0',
283 '74.0.3729.61',
284 '73.0.3683.102',
285 '74.0.3729.60',
286 '75.0.3754.2',
287 '74.0.3729.59',
288 '75.0.3753.4',
289 '74.0.3729.58',
290 '75.0.3754.1',
291 '75.0.3754.0',
292 '74.0.3729.57',
293 '73.0.3683.101',
294 '75.0.3753.3',
295 '75.0.3752.2',
296 '75.0.3753.2',
297 '74.0.3729.56',
298 '75.0.3753.1',
299 '75.0.3753.0',
300 '74.0.3729.55',
301 '73.0.3683.100',
302 '74.0.3729.54',
303 '75.0.3752.1',
304 '75.0.3752.0',
305 '74.0.3729.53',
306 '73.0.3683.99',
307 '74.0.3729.52',
308 '75.0.3751.1',
309 '75.0.3751.0',
310 '74.0.3729.51',
311 '73.0.3683.98',
312 '74.0.3729.50',
313 '75.0.3750.0',
314 '74.0.3729.49',
315 '74.0.3729.48',
316 '74.0.3729.47',
317 '75.0.3749.3',
318 '74.0.3729.46',
319 '73.0.3683.97',
320 '75.0.3749.2',
321 '74.0.3729.45',
322 '75.0.3749.1',
323 '75.0.3749.0',
324 '74.0.3729.44',
325 '73.0.3683.96',
326 '74.0.3729.43',
327 '74.0.3729.42',
328 '75.0.3748.1',
329 '75.0.3748.0',
330 '74.0.3729.41',
331 '75.0.3747.1',
332 '73.0.3683.95',
333 '75.0.3746.4',
334 '74.0.3729.40',
335 '74.0.3729.39',
336 '75.0.3747.0',
337 '75.0.3746.3',
338 '75.0.3746.2',
339 '74.0.3729.38',
340 '75.0.3746.1',
341 '75.0.3746.0',
342 '74.0.3729.37',
343 '73.0.3683.94',
344 '75.0.3745.5',
345 '75.0.3745.4',
346 '75.0.3745.3',
347 '75.0.3745.2',
348 '74.0.3729.36',
349 '75.0.3745.1',
350 '75.0.3745.0',
351 '75.0.3744.2',
352 '74.0.3729.35',
353 '73.0.3683.93',
354 '74.0.3729.34',
355 '75.0.3744.1',
356 '75.0.3744.0',
357 '74.0.3729.33',
358 '73.0.3683.92',
359 '74.0.3729.32',
360 '74.0.3729.31',
361 '73.0.3683.91',
362 '75.0.3741.2',
363 '75.0.3740.5',
364 '74.0.3729.30',
365 '75.0.3741.1',
366 '75.0.3741.0',
367 '74.0.3729.29',
368 '75.0.3740.4',
369 '73.0.3683.90',
370 '74.0.3729.28',
371 '75.0.3740.3',
372 '73.0.3683.89',
373 '75.0.3740.2',
374 '74.0.3729.27',
375 '75.0.3740.1',
376 '75.0.3740.0',
377 '74.0.3729.26',
378 '73.0.3683.88',
379 '73.0.3683.87',
380 '74.0.3729.25',
381 '75.0.3739.1',
382 '75.0.3739.0',
383 '73.0.3683.86',
384 '74.0.3729.24',
385 '73.0.3683.85',
386 '75.0.3738.4',
387 '75.0.3738.3',
388 '75.0.3738.2',
389 '75.0.3738.1',
390 '75.0.3738.0',
391 '74.0.3729.23',
392 '73.0.3683.84',
393 '74.0.3729.22',
394 '74.0.3729.21',
395 '75.0.3737.1',
396 '75.0.3737.0',
397 '74.0.3729.20',
398 '73.0.3683.83',
399 '74.0.3729.19',
400 '75.0.3736.1',
401 '75.0.3736.0',
402 '74.0.3729.18',
403 '73.0.3683.82',
404 '74.0.3729.17',
405 '75.0.3735.1',
406 '75.0.3735.0',
407 '74.0.3729.16',
408 '73.0.3683.81',
409 '75.0.3734.1',
410 '75.0.3734.0',
411 '74.0.3729.15',
412 '73.0.3683.80',
413 '74.0.3729.14',
414 '75.0.3733.1',
415 '75.0.3733.0',
416 '75.0.3732.1',
417 '74.0.3729.13',
418 '74.0.3729.12',
419 '73.0.3683.79',
420 '74.0.3729.11',
421 '75.0.3732.0',
422 '74.0.3729.10',
423 '73.0.3683.78',
424 '74.0.3729.9',
425 '74.0.3729.8',
426 '74.0.3729.7',
427 '75.0.3731.3',
428 '75.0.3731.2',
429 '75.0.3731.0',
430 '74.0.3729.6',
431 '73.0.3683.77',
432 '73.0.3683.76',
433 '75.0.3730.5',
434 '75.0.3730.4',
435 '73.0.3683.75',
436 '74.0.3729.5',
437 '73.0.3683.74',
438 '75.0.3730.3',
439 '75.0.3730.2',
440 '74.0.3729.4',
441 '73.0.3683.73',
442 '73.0.3683.72',
443 '75.0.3730.1',
444 '75.0.3730.0',
445 '74.0.3729.3',
446 '73.0.3683.71',
447 '74.0.3729.2',
448 '73.0.3683.70',
449 '74.0.3729.1',
450 '74.0.3729.0',
451 '74.0.3726.4',
452 '73.0.3683.69',
453 '74.0.3726.3',
454 '74.0.3728.0',
455 '74.0.3726.2',
456 '73.0.3683.68',
457 '74.0.3726.1',
458 '74.0.3726.0',
459 '74.0.3725.4',
460 '73.0.3683.67',
461 '73.0.3683.66',
462 '74.0.3725.3',
463 '74.0.3725.2',
464 '74.0.3725.1',
465 '74.0.3724.8',
466 '74.0.3725.0',
467 '73.0.3683.65',
468 '74.0.3724.7',
469 '74.0.3724.6',
470 '74.0.3724.5',
471 '74.0.3724.4',
472 '74.0.3724.3',
473 '74.0.3724.2',
474 '74.0.3724.1',
475 '74.0.3724.0',
476 '73.0.3683.64',
477 '74.0.3723.1',
478 '74.0.3723.0',
479 '73.0.3683.63',
480 '74.0.3722.1',
481 '74.0.3722.0',
482 '73.0.3683.62',
483 '74.0.3718.9',
484 '74.0.3702.3',
485 '74.0.3721.3',
486 '74.0.3721.2',
487 '74.0.3721.1',
488 '74.0.3721.0',
489 '74.0.3720.6',
490 '73.0.3683.61',
491 '72.0.3626.122',
492 '73.0.3683.60',
493 '74.0.3720.5',
494 '72.0.3626.121',
495 '74.0.3718.8',
496 '74.0.3720.4',
497 '74.0.3720.3',
498 '74.0.3718.7',
499 '74.0.3720.2',
500 '74.0.3720.1',
501 '74.0.3720.0',
502 '74.0.3718.6',
503 '74.0.3719.5',
504 '73.0.3683.59',
505 '74.0.3718.5',
506 '74.0.3718.4',
507 '74.0.3719.4',
508 '74.0.3719.3',
509 '74.0.3719.2',
510 '74.0.3719.1',
511 '73.0.3683.58',
512 '74.0.3719.0',
513 '73.0.3683.57',
514 '73.0.3683.56',
515 '74.0.3718.3',
516 '73.0.3683.55',
517 '74.0.3718.2',
518 '74.0.3718.1',
519 '74.0.3718.0',
520 '73.0.3683.54',
521 '74.0.3717.2',
522 '73.0.3683.53',
523 '74.0.3717.1',
524 '74.0.3717.0',
525 '73.0.3683.52',
526 '74.0.3716.1',
527 '74.0.3716.0',
528 '73.0.3683.51',
529 '74.0.3715.1',
530 '74.0.3715.0',
531 '73.0.3683.50',
532 '74.0.3711.2',
533 '74.0.3714.2',
534 '74.0.3713.3',
535 '74.0.3714.1',
536 '74.0.3714.0',
537 '73.0.3683.49',
538 '74.0.3713.1',
539 '74.0.3713.0',
540 '72.0.3626.120',
541 '73.0.3683.48',
542 '74.0.3712.2',
543 '74.0.3712.1',
544 '74.0.3712.0',
545 '73.0.3683.47',
546 '72.0.3626.119',
547 '73.0.3683.46',
548 '74.0.3710.2',
549 '72.0.3626.118',
550 '74.0.3711.1',
551 '74.0.3711.0',
552 '73.0.3683.45',
553 '72.0.3626.117',
554 '74.0.3710.1',
555 '74.0.3710.0',
556 '73.0.3683.44',
557 '72.0.3626.116',
558 '74.0.3709.1',
559 '74.0.3709.0',
560 '74.0.3704.9',
561 '73.0.3683.43',
562 '72.0.3626.115',
563 '74.0.3704.8',
564 '74.0.3704.7',
565 '74.0.3708.0',
566 '74.0.3706.7',
567 '74.0.3704.6',
568 '73.0.3683.42',
569 '72.0.3626.114',
570 '74.0.3706.6',
571 '72.0.3626.113',
572 '74.0.3704.5',
573 '74.0.3706.5',
574 '74.0.3706.4',
575 '74.0.3706.3',
576 '74.0.3706.2',
577 '74.0.3706.1',
578 '74.0.3706.0',
579 '73.0.3683.41',
580 '72.0.3626.112',
581 '74.0.3705.1',
582 '74.0.3705.0',
583 '73.0.3683.40',
584 '72.0.3626.111',
585 '73.0.3683.39',
586 '74.0.3704.4',
587 '73.0.3683.38',
588 '74.0.3704.3',
589 '74.0.3704.2',
590 '74.0.3704.1',
591 '74.0.3704.0',
592 '73.0.3683.37',
593 '72.0.3626.110',
594 '72.0.3626.109',
595 '74.0.3703.3',
596 '74.0.3703.2',
597 '73.0.3683.36',
598 '74.0.3703.1',
599 '74.0.3703.0',
600 '73.0.3683.35',
601 '72.0.3626.108',
602 '74.0.3702.2',
603 '74.0.3699.3',
604 '74.0.3702.1',
605 '74.0.3702.0',
606 '73.0.3683.34',
607 '72.0.3626.107',
608 '73.0.3683.33',
609 '74.0.3701.1',
610 '74.0.3701.0',
611 '73.0.3683.32',
612 '73.0.3683.31',
613 '72.0.3626.105',
614 '74.0.3700.1',
615 '74.0.3700.0',
616 '73.0.3683.29',
617 '72.0.3626.103',
618 '74.0.3699.2',
619 '74.0.3699.1',
620 '74.0.3699.0',
621 '73.0.3683.28',
622 '72.0.3626.102',
623 '73.0.3683.27',
624 '73.0.3683.26',
625 '74.0.3698.0',
626 '74.0.3696.2',
627 '72.0.3626.101',
628 '73.0.3683.25',
629 '74.0.3696.1',
630 '74.0.3696.0',
631 '74.0.3694.8',
632 '72.0.3626.100',
633 '74.0.3694.7',
634 '74.0.3694.6',
635 '74.0.3694.5',
636 '74.0.3694.4',
637 '72.0.3626.99',
638 '72.0.3626.98',
639 '74.0.3694.3',
640 '73.0.3683.24',
641 '72.0.3626.97',
642 '72.0.3626.96',
643 '72.0.3626.95',
644 '73.0.3683.23',
645 '72.0.3626.94',
646 '73.0.3683.22',
647 '73.0.3683.21',
648 '72.0.3626.93',
649 '74.0.3694.2',
650 '72.0.3626.92',
651 '74.0.3694.1',
652 '74.0.3694.0',
653 '74.0.3693.6',
654 '73.0.3683.20',
655 '72.0.3626.91',
656 '74.0.3693.5',
657 '74.0.3693.4',
658 '74.0.3693.3',
659 '74.0.3693.2',
660 '73.0.3683.19',
661 '74.0.3693.1',
662 '74.0.3693.0',
663 '73.0.3683.18',
664 '72.0.3626.90',
665 '74.0.3692.1',
666 '74.0.3692.0',
667 '73.0.3683.17',
668 '72.0.3626.89',
669 '74.0.3687.3',
670 '74.0.3691.1',
671 '74.0.3691.0',
672 '73.0.3683.16',
673 '72.0.3626.88',
674 '72.0.3626.87',
675 '73.0.3683.15',
676 '74.0.3690.1',
677 '74.0.3690.0',
678 '73.0.3683.14',
679 '72.0.3626.86',
680 '73.0.3683.13',
681 '73.0.3683.12',
682 '74.0.3689.1',
683 '74.0.3689.0',
684 '73.0.3683.11',
685 '72.0.3626.85',
686 '73.0.3683.10',
687 '72.0.3626.84',
688 '73.0.3683.9',
689 '74.0.3688.1',
690 '74.0.3688.0',
691 '73.0.3683.8',
692 '72.0.3626.83',
693 '74.0.3687.2',
694 '74.0.3687.1',
695 '74.0.3687.0',
696 '73.0.3683.7',
697 '72.0.3626.82',
698 '74.0.3686.4',
699 '72.0.3626.81',
700 '74.0.3686.3',
701 '74.0.3686.2',
702 '74.0.3686.1',
703 '74.0.3686.0',
704 '73.0.3683.6',
705 '72.0.3626.80',
706 '74.0.3685.1',
707 '74.0.3685.0',
708 '73.0.3683.5',
709 '72.0.3626.79',
710 '74.0.3684.1',
711 '74.0.3684.0',
712 '73.0.3683.4',
713 '72.0.3626.78',
714 '72.0.3626.77',
715 '73.0.3683.3',
716 '73.0.3683.2',
717 '72.0.3626.76',
718 '73.0.3683.1',
719 '73.0.3683.0',
720 '72.0.3626.75',
721 '71.0.3578.141',
722 '73.0.3682.1',
723 '73.0.3682.0',
724 '72.0.3626.74',
725 '71.0.3578.140',
726 '73.0.3681.4',
727 '73.0.3681.3',
728 '73.0.3681.2',
729 '73.0.3681.1',
730 '73.0.3681.0',
731 '72.0.3626.73',
732 '71.0.3578.139',
733 '72.0.3626.72',
734 '72.0.3626.71',
735 '73.0.3680.1',
736 '73.0.3680.0',
737 '72.0.3626.70',
738 '71.0.3578.138',
739 '73.0.3678.2',
740 '73.0.3679.1',
741 '73.0.3679.0',
742 '72.0.3626.69',
743 '71.0.3578.137',
744 '73.0.3678.1',
745 '73.0.3678.0',
746 '71.0.3578.136',
747 '73.0.3677.1',
748 '73.0.3677.0',
749 '72.0.3626.68',
750 '72.0.3626.67',
751 '71.0.3578.135',
752 '73.0.3676.1',
753 '73.0.3676.0',
754 '73.0.3674.2',
755 '72.0.3626.66',
756 '71.0.3578.134',
757 '73.0.3674.1',
758 '73.0.3674.0',
759 '72.0.3626.65',
760 '71.0.3578.133',
761 '73.0.3673.2',
762 '73.0.3673.1',
763 '73.0.3673.0',
764 '72.0.3626.64',
765 '71.0.3578.132',
766 '72.0.3626.63',
767 '72.0.3626.62',
768 '72.0.3626.61',
769 '72.0.3626.60',
770 '73.0.3672.1',
771 '73.0.3672.0',
772 '72.0.3626.59',
773 '71.0.3578.131',
774 '73.0.3671.3',
775 '73.0.3671.2',
776 '73.0.3671.1',
777 '73.0.3671.0',
778 '72.0.3626.58',
779 '71.0.3578.130',
780 '73.0.3670.1',
781 '73.0.3670.0',
782 '72.0.3626.57',
783 '71.0.3578.129',
784 '73.0.3669.1',
785 '73.0.3669.0',
786 '72.0.3626.56',
787 '71.0.3578.128',
788 '73.0.3668.2',
789 '73.0.3668.1',
790 '73.0.3668.0',
791 '72.0.3626.55',
792 '71.0.3578.127',
793 '73.0.3667.2',
794 '73.0.3667.1',
795 '73.0.3667.0',
796 '72.0.3626.54',
797 '71.0.3578.126',
798 '73.0.3666.1',
799 '73.0.3666.0',
800 '72.0.3626.53',
801 '71.0.3578.125',
802 '73.0.3665.4',
803 '73.0.3665.3',
804 '72.0.3626.52',
805 '73.0.3665.2',
806 '73.0.3664.4',
807 '73.0.3665.1',
808 '73.0.3665.0',
809 '72.0.3626.51',
810 '71.0.3578.124',
811 '72.0.3626.50',
812 '73.0.3664.3',
813 '73.0.3664.2',
814 '73.0.3664.1',
815 '73.0.3664.0',
816 '73.0.3663.2',
817 '72.0.3626.49',
818 '71.0.3578.123',
819 '73.0.3663.1',
820 '73.0.3663.0',
821 '72.0.3626.48',
822 '71.0.3578.122',
823 '73.0.3662.1',
824 '73.0.3662.0',
825 '72.0.3626.47',
826 '71.0.3578.121',
827 '73.0.3661.1',
828 '72.0.3626.46',
829 '73.0.3661.0',
830 '72.0.3626.45',
831 '71.0.3578.120',
832 '73.0.3660.2',
833 '73.0.3660.1',
834 '73.0.3660.0',
835 '72.0.3626.44',
836 '71.0.3578.119',
837 '73.0.3659.1',
838 '73.0.3659.0',
839 '72.0.3626.43',
840 '71.0.3578.118',
841 '73.0.3658.1',
842 '73.0.3658.0',
843 '72.0.3626.42',
844 '71.0.3578.117',
845 '73.0.3657.1',
846 '73.0.3657.0',
847 '72.0.3626.41',
848 '71.0.3578.116',
849 '73.0.3656.1',
850 '73.0.3656.0',
851 '72.0.3626.40',
852 '71.0.3578.115',
853 '73.0.3655.1',
854 '73.0.3655.0',
855 '72.0.3626.39',
856 '71.0.3578.114',
857 '73.0.3654.1',
858 '73.0.3654.0',
859 '72.0.3626.38',
860 '71.0.3578.113',
861 '73.0.3653.1',
862 '73.0.3653.0',
863 '72.0.3626.37',
864 '71.0.3578.112',
865 '73.0.3652.1',
866 '73.0.3652.0',
867 '72.0.3626.36',
868 '71.0.3578.111',
869 '73.0.3651.1',
870 '73.0.3651.0',
871 '72.0.3626.35',
872 '71.0.3578.110',
873 '73.0.3650.1',
874 '73.0.3650.0',
875 '72.0.3626.34',
876 '71.0.3578.109',
877 '73.0.3649.1',
878 '73.0.3649.0',
879 '72.0.3626.33',
880 '71.0.3578.108',
881 '73.0.3648.2',
882 '73.0.3648.1',
883 '73.0.3648.0',
884 '72.0.3626.32',
885 '71.0.3578.107',
886 '73.0.3647.2',
887 '73.0.3647.1',
888 '73.0.3647.0',
889 '72.0.3626.31',
890 '71.0.3578.106',
891 '73.0.3635.3',
892 '73.0.3646.2',
893 '73.0.3646.1',
894 '73.0.3646.0',
895 '72.0.3626.30',
896 '71.0.3578.105',
897 '72.0.3626.29',
898 '73.0.3645.2',
899 '73.0.3645.1',
900 '73.0.3645.0',
901 '72.0.3626.28',
902 '71.0.3578.104',
903 '72.0.3626.27',
904 '72.0.3626.26',
905 '72.0.3626.25',
906 '72.0.3626.24',
907 '73.0.3644.0',
908 '73.0.3643.2',
909 '72.0.3626.23',
910 '71.0.3578.103',
911 '73.0.3643.1',
912 '73.0.3643.0',
913 '72.0.3626.22',
914 '71.0.3578.102',
915 '73.0.3642.1',
916 '73.0.3642.0',
917 '72.0.3626.21',
918 '71.0.3578.101',
919 '73.0.3641.1',
920 '73.0.3641.0',
921 '72.0.3626.20',
922 '71.0.3578.100',
923 '72.0.3626.19',
924 '73.0.3640.1',
925 '73.0.3640.0',
926 '72.0.3626.18',
927 '73.0.3639.1',
928 '71.0.3578.99',
929 '73.0.3639.0',
930 '72.0.3626.17',
931 '73.0.3638.2',
932 '72.0.3626.16',
933 '73.0.3638.1',
934 '73.0.3638.0',
935 '72.0.3626.15',
936 '71.0.3578.98',
937 '73.0.3635.2',
938 '71.0.3578.97',
939 '73.0.3637.1',
940 '73.0.3637.0',
941 '72.0.3626.14',
942 '71.0.3578.96',
943 '71.0.3578.95',
944 '72.0.3626.13',
945 '71.0.3578.94',
946 '73.0.3636.2',
947 '71.0.3578.93',
948 '73.0.3636.1',
949 '73.0.3636.0',
950 '72.0.3626.12',
951 '71.0.3578.92',
952 '73.0.3635.1',
953 '73.0.3635.0',
954 '72.0.3626.11',
955 '71.0.3578.91',
956 '73.0.3634.2',
957 '73.0.3634.1',
958 '73.0.3634.0',
959 '72.0.3626.10',
960 '71.0.3578.90',
961 '71.0.3578.89',
962 '73.0.3633.2',
963 '73.0.3633.1',
964 '73.0.3633.0',
965 '72.0.3610.4',
966 '72.0.3626.9',
967 '71.0.3578.88',
968 '73.0.3632.5',
969 '73.0.3632.4',
970 '73.0.3632.3',
971 '73.0.3632.2',
972 '73.0.3632.1',
973 '73.0.3632.0',
974 '72.0.3626.8',
975 '71.0.3578.87',
976 '73.0.3631.2',
977 '73.0.3631.1',
978 '73.0.3631.0',
979 '72.0.3626.7',
980 '71.0.3578.86',
981 '72.0.3626.6',
982 '73.0.3630.1',
983 '73.0.3630.0',
984 '72.0.3626.5',
985 '71.0.3578.85',
986 '72.0.3626.4',
987 '73.0.3628.3',
988 '73.0.3628.2',
989 '73.0.3629.1',
990 '73.0.3629.0',
991 '72.0.3626.3',
992 '71.0.3578.84',
993 '73.0.3628.1',
994 '73.0.3628.0',
995 '71.0.3578.83',
996 '73.0.3627.1',
997 '73.0.3627.0',
998 '72.0.3626.2',
999 '71.0.3578.82',
1000 '71.0.3578.81',
1001 '71.0.3578.80',
1002 '72.0.3626.1',
1003 '72.0.3626.0',
1004 '71.0.3578.79',
1005 '70.0.3538.124',
1006 '71.0.3578.78',
1007 '72.0.3623.4',
1008 '72.0.3625.2',
1009 '72.0.3625.1',
1010 '72.0.3625.0',
1011 '71.0.3578.77',
1012 '70.0.3538.123',
1013 '72.0.3624.4',
1014 '72.0.3624.3',
1015 '72.0.3624.2',
1016 '71.0.3578.76',
1017 '72.0.3624.1',
1018 '72.0.3624.0',
1019 '72.0.3623.3',
1020 '71.0.3578.75',
1021 '70.0.3538.122',
1022 '71.0.3578.74',
1023 '72.0.3623.2',
1024 '72.0.3610.3',
1025 '72.0.3623.1',
1026 '72.0.3623.0',
1027 '72.0.3622.3',
1028 '72.0.3622.2',
1029 '71.0.3578.73',
1030 '70.0.3538.121',
1031 '72.0.3622.1',
1032 '72.0.3622.0',
1033 '71.0.3578.72',
1034 '70.0.3538.120',
1035 '72.0.3621.1',
1036 '72.0.3621.0',
1037 '71.0.3578.71',
1038 '70.0.3538.119',
1039 '72.0.3620.1',
1040 '72.0.3620.0',
1041 '71.0.3578.70',
1042 '70.0.3538.118',
1043 '71.0.3578.69',
1044 '72.0.3619.1',
1045 '72.0.3619.0',
1046 '71.0.3578.68',
1047 '70.0.3538.117',
1048 '71.0.3578.67',
1049 '72.0.3618.1',
1050 '72.0.3618.0',
1051 '71.0.3578.66',
1052 '70.0.3538.116',
1053 '72.0.3617.1',
1054 '72.0.3617.0',
1055 '71.0.3578.65',
1056 '70.0.3538.115',
1057 '72.0.3602.3',
1058 '71.0.3578.64',
1059 '72.0.3616.1',
1060 '72.0.3616.0',
1061 '71.0.3578.63',
1062 '70.0.3538.114',
1063 '71.0.3578.62',
1064 '72.0.3615.1',
1065 '72.0.3615.0',
1066 '71.0.3578.61',
1067 '70.0.3538.113',
1068 '72.0.3614.1',
1069 '72.0.3614.0',
1070 '71.0.3578.60',
1071 '70.0.3538.112',
1072 '72.0.3613.1',
1073 '72.0.3613.0',
1074 '71.0.3578.59',
1075 '70.0.3538.111',
1076 '72.0.3612.2',
1077 '72.0.3612.1',
1078 '72.0.3612.0',
1079 '70.0.3538.110',
1080 '71.0.3578.58',
1081 '70.0.3538.109',
1082 '72.0.3611.2',
1083 '72.0.3611.1',
1084 '72.0.3611.0',
1085 '71.0.3578.57',
1086 '70.0.3538.108',
1087 '72.0.3610.2',
1088 '71.0.3578.56',
1089 '71.0.3578.55',
1090 '72.0.3610.1',
1091 '72.0.3610.0',
1092 '71.0.3578.54',
1093 '70.0.3538.107',
1094 '71.0.3578.53',
1095 '72.0.3609.3',
1096 '71.0.3578.52',
1097 '72.0.3609.2',
1098 '71.0.3578.51',
1099 '72.0.3608.5',
1100 '72.0.3609.1',
1101 '72.0.3609.0',
1102 '71.0.3578.50',
1103 '70.0.3538.106',
1104 '72.0.3608.4',
1105 '72.0.3608.3',
1106 '72.0.3608.2',
1107 '71.0.3578.49',
1108 '72.0.3608.1',
1109 '72.0.3608.0',
1110 '70.0.3538.105',
1111 '71.0.3578.48',
1112 '72.0.3607.1',
1113 '72.0.3607.0',
1114 '71.0.3578.47',
1115 '70.0.3538.104',
1116 '72.0.3606.2',
1117 '72.0.3606.1',
1118 '72.0.3606.0',
1119 '71.0.3578.46',
1120 '70.0.3538.103',
1121 '70.0.3538.102',
1122 '72.0.3605.3',
1123 '72.0.3605.2',
1124 '72.0.3605.1',
1125 '72.0.3605.0',
1126 '71.0.3578.45',
1127 '70.0.3538.101',
1128 '71.0.3578.44',
1129 '71.0.3578.43',
1130 '70.0.3538.100',
1131 '70.0.3538.99',
1132 '71.0.3578.42',
1133 '72.0.3604.1',
1134 '72.0.3604.0',
1135 '71.0.3578.41',
1136 '70.0.3538.98',
1137 '71.0.3578.40',
1138 '72.0.3603.2',
1139 '72.0.3603.1',
1140 '72.0.3603.0',
1141 '71.0.3578.39',
1142 '70.0.3538.97',
1143 '72.0.3602.2',
1144 '71.0.3578.38',
1145 '71.0.3578.37',
1146 '72.0.3602.1',
1147 '72.0.3602.0',
1148 '71.0.3578.36',
1149 '70.0.3538.96',
1150 '72.0.3601.1',
1151 '72.0.3601.0',
1152 '71.0.3578.35',
1153 '70.0.3538.95',
1154 '72.0.3600.1',
1155 '72.0.3600.0',
1156 '71.0.3578.34',
1157 '70.0.3538.94',
1158 '72.0.3599.3',
1159 '72.0.3599.2',
1160 '72.0.3599.1',
1161 '72.0.3599.0',
1162 '71.0.3578.33',
1163 '70.0.3538.93',
1164 '72.0.3598.1',
1165 '72.0.3598.0',
1166 '71.0.3578.32',
1167 '70.0.3538.87',
1168 '72.0.3597.1',
1169 '72.0.3597.0',
1170 '72.0.3596.2',
1171 '71.0.3578.31',
1172 '70.0.3538.86',
1173 '71.0.3578.30',
1174 '71.0.3578.29',
1175 '72.0.3596.1',
1176 '72.0.3596.0',
1177 '71.0.3578.28',
1178 '70.0.3538.85',
1179 '72.0.3595.2',
1180 '72.0.3591.3',
1181 '72.0.3595.1',
1182 '72.0.3595.0',
1183 '71.0.3578.27',
1184 '70.0.3538.84',
1185 '72.0.3594.1',
1186 '72.0.3594.0',
1187 '71.0.3578.26',
1188 '70.0.3538.83',
1189 '72.0.3593.2',
1190 '72.0.3593.1',
1191 '72.0.3593.0',
1192 '71.0.3578.25',
1193 '70.0.3538.82',
1194 '72.0.3589.3',
1195 '72.0.3592.2',
1196 '72.0.3592.1',
1197 '72.0.3592.0',
1198 '71.0.3578.24',
1199 '72.0.3589.2',
1200 '70.0.3538.81',
1201 '70.0.3538.80',
1202 '72.0.3591.2',
1203 '72.0.3591.1',
1204 '72.0.3591.0',
1205 '71.0.3578.23',
1206 '70.0.3538.79',
1207 '71.0.3578.22',
1208 '72.0.3590.1',
1209 '72.0.3590.0',
1210 '71.0.3578.21',
1211 '70.0.3538.78',
1212 '70.0.3538.77',
1213 '72.0.3589.1',
1214 '72.0.3589.0',
1215 '71.0.3578.20',
1216 '70.0.3538.76',
1217 '71.0.3578.19',
1218 '70.0.3538.75',
1219 '72.0.3588.1',
1220 '72.0.3588.0',
1221 '71.0.3578.18',
1222 '70.0.3538.74',
1223 '72.0.3586.2',
1224 '72.0.3587.0',
1225 '71.0.3578.17',
1226 '70.0.3538.73',
1227 '72.0.3586.1',
1228 '72.0.3586.0',
1229 '71.0.3578.16',
1230 '70.0.3538.72',
1231 '72.0.3585.1',
1232 '72.0.3585.0',
1233 '71.0.3578.15',
1234 '70.0.3538.71',
1235 '71.0.3578.14',
1236 '72.0.3584.1',
1237 '72.0.3584.0',
1238 '71.0.3578.13',
1239 '70.0.3538.70',
1240 '72.0.3583.2',
1241 '71.0.3578.12',
1242 '72.0.3583.1',
1243 '72.0.3583.0',
1244 '71.0.3578.11',
1245 '70.0.3538.69',
1246 '71.0.3578.10',
1247 '72.0.3582.0',
1248 '72.0.3581.4',
1249 '71.0.3578.9',
1250 '70.0.3538.67',
1251 '72.0.3581.3',
1252 '72.0.3581.2',
1253 '72.0.3581.1',
1254 '72.0.3581.0',
1255 '71.0.3578.8',
1256 '70.0.3538.66',
1257 '72.0.3580.1',
1258 '72.0.3580.0',
1259 '71.0.3578.7',
1260 '70.0.3538.65',
1261 '71.0.3578.6',
1262 '72.0.3579.1',
1263 '72.0.3579.0',
1264 '71.0.3578.5',
1265 '70.0.3538.64',
1266 '71.0.3578.4',
1267 '71.0.3578.3',
1268 '71.0.3578.2',
1269 '71.0.3578.1',
1270 '71.0.3578.0',
1271 '70.0.3538.63',
1272 '69.0.3497.128',
1273 '70.0.3538.62',
1274 '70.0.3538.61',
1275 '70.0.3538.60',
1276 '70.0.3538.59',
1277 '71.0.3577.1',
1278 '71.0.3577.0',
1279 '70.0.3538.58',
1280 '69.0.3497.127',
1281 '71.0.3576.2',
1282 '71.0.3576.1',
1283 '71.0.3576.0',
1284 '70.0.3538.57',
1285 '70.0.3538.56',
1286 '71.0.3575.2',
1287 '70.0.3538.55',
1288 '69.0.3497.126',
1289 '70.0.3538.54',
1290 '71.0.3575.1',
1291 '71.0.3575.0',
1292 '71.0.3574.1',
1293 '71.0.3574.0',
1294 '70.0.3538.53',
1295 '69.0.3497.125',
1296 '70.0.3538.52',
1297 '71.0.3573.1',
1298 '71.0.3573.0',
1299 '70.0.3538.51',
1300 '69.0.3497.124',
1301 '71.0.3572.1',
1302 '71.0.3572.0',
1303 '70.0.3538.50',
1304 '69.0.3497.123',
1305 '71.0.3571.2',
1306 '70.0.3538.49',
1307 '69.0.3497.122',
1308 '71.0.3571.1',
1309 '71.0.3571.0',
1310 '70.0.3538.48',
1311 '69.0.3497.121',
1312 '71.0.3570.1',
1313 '71.0.3570.0',
1314 '70.0.3538.47',
1315 '69.0.3497.120',
1316 '71.0.3568.2',
1317 '71.0.3569.1',
1318 '71.0.3569.0',
1319 '70.0.3538.46',
1320 '69.0.3497.119',
1321 '70.0.3538.45',
1322 '71.0.3568.1',
1323 '71.0.3568.0',
1324 '70.0.3538.44',
1325 '69.0.3497.118',
1326 '70.0.3538.43',
1327 '70.0.3538.42',
1328 '71.0.3567.1',
1329 '71.0.3567.0',
1330 '70.0.3538.41',
1331 '69.0.3497.117',
1332 '71.0.3566.1',
1333 '71.0.3566.0',
1334 '70.0.3538.40',
1335 '69.0.3497.116',
1336 '71.0.3565.1',
1337 '71.0.3565.0',
1338 '70.0.3538.39',
1339 '69.0.3497.115',
1340 '71.0.3564.1',
1341 '71.0.3564.0',
1342 '70.0.3538.38',
1343 '69.0.3497.114',
1344 '71.0.3563.0',
1345 '71.0.3562.2',
1346 '70.0.3538.37',
1347 '69.0.3497.113',
1348 '70.0.3538.36',
1349 '70.0.3538.35',
1350 '71.0.3562.1',
1351 '71.0.3562.0',
1352 '70.0.3538.34',
1353 '69.0.3497.112',
1354 '70.0.3538.33',
1355 '71.0.3561.1',
1356 '71.0.3561.0',
1357 '70.0.3538.32',
1358 '69.0.3497.111',
1359 '71.0.3559.6',
1360 '71.0.3560.1',
1361 '71.0.3560.0',
1362 '71.0.3559.5',
1363 '71.0.3559.4',
1364 '70.0.3538.31',
1365 '69.0.3497.110',
1366 '71.0.3559.3',
1367 '70.0.3538.30',
1368 '69.0.3497.109',
1369 '71.0.3559.2',
1370 '71.0.3559.1',
1371 '71.0.3559.0',
1372 '70.0.3538.29',
1373 '69.0.3497.108',
1374 '71.0.3558.2',
1375 '71.0.3558.1',
1376 '71.0.3558.0',
1377 '70.0.3538.28',
1378 '69.0.3497.107',
1379 '71.0.3557.2',
1380 '71.0.3557.1',
1381 '71.0.3557.0',
1382 '70.0.3538.27',
1383 '69.0.3497.106',
1384 '71.0.3554.4',
1385 '70.0.3538.26',
1386 '71.0.3556.1',
1387 '71.0.3556.0',
1388 '70.0.3538.25',
1389 '71.0.3554.3',
1390 '69.0.3497.105',
1391 '71.0.3554.2',
1392 '70.0.3538.24',
1393 '69.0.3497.104',
1394 '71.0.3555.2',
1395 '70.0.3538.23',
1396 '71.0.3555.1',
1397 '71.0.3555.0',
1398 '70.0.3538.22',
1399 '69.0.3497.103',
1400 '71.0.3554.1',
1401 '71.0.3554.0',
1402 '70.0.3538.21',
1403 '69.0.3497.102',
1404 '71.0.3553.3',
1405 '70.0.3538.20',
1406 '69.0.3497.101',
1407 '71.0.3553.2',
1408 '69.0.3497.100',
1409 '71.0.3553.1',
1410 '71.0.3553.0',
1411 '70.0.3538.19',
1412 '69.0.3497.99',
1413 '69.0.3497.98',
1414 '69.0.3497.97',
1415 '71.0.3552.6',
1416 '71.0.3552.5',
1417 '71.0.3552.4',
1418 '71.0.3552.3',
1419 '71.0.3552.2',
1420 '71.0.3552.1',
1421 '71.0.3552.0',
1422 '70.0.3538.18',
1423 '69.0.3497.96',
1424 '71.0.3551.3',
1425 '71.0.3551.2',
1426 '71.0.3551.1',
1427 '71.0.3551.0',
1428 '70.0.3538.17',
1429 '69.0.3497.95',
1430 '71.0.3550.3',
1431 '71.0.3550.2',
1432 '71.0.3550.1',
1433 '71.0.3550.0',
1434 '70.0.3538.16',
1435 '69.0.3497.94',
1436 '71.0.3549.1',
1437 '71.0.3549.0',
1438 '70.0.3538.15',
1439 '69.0.3497.93',
1440 '69.0.3497.92',
1441 '71.0.3548.1',
1442 '71.0.3548.0',
1443 '70.0.3538.14',
1444 '69.0.3497.91',
1445 '71.0.3547.1',
1446 '71.0.3547.0',
1447 '70.0.3538.13',
1448 '69.0.3497.90',
1449 '71.0.3546.2',
1450 '69.0.3497.89',
1451 '71.0.3546.1',
1452 '71.0.3546.0',
1453 '70.0.3538.12',
1454 '69.0.3497.88',
1455 '71.0.3545.4',
1456 '71.0.3545.3',
1457 '71.0.3545.2',
1458 '71.0.3545.1',
1459 '71.0.3545.0',
1460 '70.0.3538.11',
1461 '69.0.3497.87',
1462 '71.0.3544.5',
1463 '71.0.3544.4',
1464 '71.0.3544.3',
1465 '71.0.3544.2',
1466 '71.0.3544.1',
1467 '71.0.3544.0',
1468 '69.0.3497.86',
1469 '70.0.3538.10',
1470 '69.0.3497.85',
1471 '70.0.3538.9',
1472 '69.0.3497.84',
1473 '71.0.3543.4',
1474 '70.0.3538.8',
1475 '71.0.3543.3',
1476 '71.0.3543.2',
1477 '71.0.3543.1',
1478 '71.0.3543.0',
1479 '70.0.3538.7',
1480 '69.0.3497.83',
1481 '71.0.3542.2',
1482 '71.0.3542.1',
1483 '71.0.3542.0',
1484 '70.0.3538.6',
1485 '69.0.3497.82',
1486 '69.0.3497.81',
1487 '71.0.3541.1',
1488 '71.0.3541.0',
1489 '70.0.3538.5',
1490 '69.0.3497.80',
1491 '71.0.3540.1',
1492 '71.0.3540.0',
1493 '70.0.3538.4',
1494 '69.0.3497.79',
1495 '70.0.3538.3',
1496 '71.0.3539.1',
1497 '71.0.3539.0',
1498 '69.0.3497.78',
1499 '68.0.3440.134',
1500 '69.0.3497.77',
1501 '70.0.3538.2',
1502 '70.0.3538.1',
1503 '70.0.3538.0',
1504 '69.0.3497.76',
1505 '68.0.3440.133',
1506 '69.0.3497.75',
1507 '70.0.3537.2',
1508 '70.0.3537.1',
1509 '70.0.3537.0',
1510 '69.0.3497.74',
1511 '68.0.3440.132',
1512 '70.0.3536.0',
1513 '70.0.3535.5',
1514 '70.0.3535.4',
1515 '70.0.3535.3',
1516 '69.0.3497.73',
1517 '68.0.3440.131',
1518 '70.0.3532.8',
1519 '70.0.3532.7',
1520 '69.0.3497.72',
1521 '69.0.3497.71',
1522 '70.0.3535.2',
1523 '70.0.3535.1',
1524 '70.0.3535.0',
1525 '69.0.3497.70',
1526 '68.0.3440.130',
1527 '69.0.3497.69',
1528 '68.0.3440.129',
1529 '70.0.3534.4',
1530 '70.0.3534.3',
1531 '70.0.3534.2',
1532 '70.0.3534.1',
1533 '70.0.3534.0',
1534 '69.0.3497.68',
1535 '68.0.3440.128',
1536 '70.0.3533.2',
1537 '70.0.3533.1',
1538 '70.0.3533.0',
1539 '69.0.3497.67',
1540 '68.0.3440.127',
1541 '70.0.3532.6',
1542 '70.0.3532.5',
1543 '70.0.3532.4',
1544 '69.0.3497.66',
1545 '68.0.3440.126',
1546 '70.0.3532.3',
1547 '70.0.3532.2',
1548 '70.0.3532.1',
1549 '69.0.3497.60',
1550 '69.0.3497.65',
1551 '69.0.3497.64',
1552 '70.0.3532.0',
1553 '70.0.3531.0',
1554 '70.0.3530.4',
1555 '70.0.3530.3',
1556 '70.0.3530.2',
1557 '69.0.3497.58',
1558 '68.0.3440.125',
1559 '69.0.3497.57',
1560 '69.0.3497.56',
1561 '69.0.3497.55',
1562 '69.0.3497.54',
1563 '70.0.3530.1',
1564 '70.0.3530.0',
1565 '69.0.3497.53',
1566 '68.0.3440.124',
1567 '69.0.3497.52',
1568 '70.0.3529.3',
1569 '70.0.3529.2',
1570 '70.0.3529.1',
1571 '70.0.3529.0',
1572 '69.0.3497.51',
1573 '70.0.3528.4',
1574 '68.0.3440.123',
1575 '70.0.3528.3',
1576 '70.0.3528.2',
1577 '70.0.3528.1',
1578 '70.0.3528.0',
1579 '69.0.3497.50',
1580 '68.0.3440.122',
1581 '70.0.3527.1',
1582 '70.0.3527.0',
1583 '69.0.3497.49',
1584 '68.0.3440.121',
1585 '70.0.3526.1',
1586 '70.0.3526.0',
1587 '68.0.3440.120',
1588 '69.0.3497.48',
1589 '69.0.3497.47',
1590 '68.0.3440.119',
1591 '68.0.3440.118',
1592 '70.0.3525.5',
1593 '70.0.3525.4',
1594 '70.0.3525.3',
1595 '68.0.3440.117',
1596 '69.0.3497.46',
1597 '70.0.3525.2',
1598 '70.0.3525.1',
1599 '70.0.3525.0',
1600 '69.0.3497.45',
1601 '68.0.3440.116',
1602 '70.0.3524.4',
1603 '70.0.3524.3',
1604 '69.0.3497.44',
1605 '70.0.3524.2',
1606 '70.0.3524.1',
1607 '70.0.3524.0',
1608 '70.0.3523.2',
1609 '69.0.3497.43',
1610 '68.0.3440.115',
1611 '70.0.3505.9',
1612 '69.0.3497.42',
1613 '70.0.3505.8',
1614 '70.0.3523.1',
1615 '70.0.3523.0',
1616 '69.0.3497.41',
1617 '68.0.3440.114',
1618 '70.0.3505.7',
1619 '69.0.3497.40',
1620 '70.0.3522.1',
1621 '70.0.3522.0',
1622 '70.0.3521.2',
1623 '69.0.3497.39',
1624 '68.0.3440.113',
1625 '70.0.3505.6',
1626 '70.0.3521.1',
1627 '70.0.3521.0',
1628 '69.0.3497.38',
1629 '68.0.3440.112',
1630 '70.0.3520.1',
1631 '70.0.3520.0',
1632 '69.0.3497.37',
1633 '68.0.3440.111',
1634 '70.0.3519.3',
1635 '70.0.3519.2',
1636 '70.0.3519.1',
1637 '70.0.3519.0',
1638 '69.0.3497.36',
1639 '68.0.3440.110',
1640 '70.0.3518.1',
1641 '70.0.3518.0',
1642 '69.0.3497.35',
1643 '69.0.3497.34',
1644 '68.0.3440.109',
1645 '70.0.3517.1',
1646 '70.0.3517.0',
1647 '69.0.3497.33',
1648 '68.0.3440.108',
1649 '69.0.3497.32',
1650 '70.0.3516.3',
1651 '70.0.3516.2',
1652 '70.0.3516.1',
1653 '70.0.3516.0',
1654 '69.0.3497.31',
1655 '68.0.3440.107',
1656 '70.0.3515.4',
1657 '68.0.3440.106',
1658 '70.0.3515.3',
1659 '70.0.3515.2',
1660 '70.0.3515.1',
1661 '70.0.3515.0',
1662 '69.0.3497.30',
1663 '68.0.3440.105',
1664 '68.0.3440.104',
1665 '70.0.3514.2',
1666 '70.0.3514.1',
1667 '70.0.3514.0',
1668 '69.0.3497.29',
1669 '68.0.3440.103',
1670 '70.0.3513.1',
1671 '70.0.3513.0',
1672 '69.0.3497.28',
1673 )
1674 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1675
1676
3e669f36 1677std_headers = {
f7a147e3 1678 'User-Agent': random_user_agent(),
59ae15a5
PH
1679 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1680 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1681 'Accept-Encoding': 'gzip, deflate',
1682 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 1683}
f427df17 1684
5f6a1245 1685
fb37eb25
S
1686USER_AGENTS = {
1687 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1688}
1689
1690
bf42a990
S
1691NO_DEFAULT = object()
1692
7105440c
YCH
1693ENGLISH_MONTH_NAMES = [
1694 'January', 'February', 'March', 'April', 'May', 'June',
1695 'July', 'August', 'September', 'October', 'November', 'December']
1696
f6717dec
S
1697MONTH_NAMES = {
1698 'en': ENGLISH_MONTH_NAMES,
1699 'fr': [
3e4185c3
S
1700 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1701 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
f6717dec 1702}
a942d6cb 1703
a7aaa398
S
1704KNOWN_EXTENSIONS = (
1705 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1706 'flv', 'f4v', 'f4a', 'f4b',
1707 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1708 'mkv', 'mka', 'mk3d',
1709 'avi', 'divx',
1710 'mov',
1711 'asf', 'wmv', 'wma',
1712 '3gp', '3g2',
1713 'mp3',
1714 'flac',
1715 'ape',
1716 'wav',
1717 'f4f', 'f4m', 'm3u8', 'smil')
1718
df692c5a 1719REMUX_EXTENSIONS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mp3', 'mka', 'm4a', 'ogg', 'opus')
1720
c587cbb7 1721# needed for sanitizing filenames in restricted mode
c8827027 1722ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
fd35d8cd
JW
1723 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1724 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
c587cbb7 1725
46f59e89
S
1726DATE_FORMATS = (
1727 '%d %B %Y',
1728 '%d %b %Y',
1729 '%B %d %Y',
cb655f34
S
1730 '%B %dst %Y',
1731 '%B %dnd %Y',
9d30c213 1732 '%B %drd %Y',
cb655f34 1733 '%B %dth %Y',
46f59e89 1734 '%b %d %Y',
cb655f34
S
1735 '%b %dst %Y',
1736 '%b %dnd %Y',
9d30c213 1737 '%b %drd %Y',
cb655f34 1738 '%b %dth %Y',
46f59e89
S
1739 '%b %dst %Y %I:%M',
1740 '%b %dnd %Y %I:%M',
9d30c213 1741 '%b %drd %Y %I:%M',
46f59e89
S
1742 '%b %dth %Y %I:%M',
1743 '%Y %m %d',
1744 '%Y-%m-%d',
1745 '%Y/%m/%d',
81c13222 1746 '%Y/%m/%d %H:%M',
46f59e89 1747 '%Y/%m/%d %H:%M:%S',
0c1c6f4b 1748 '%Y-%m-%d %H:%M',
46f59e89
S
1749 '%Y-%m-%d %H:%M:%S',
1750 '%Y-%m-%d %H:%M:%S.%f',
1751 '%d.%m.%Y %H:%M',
1752 '%d.%m.%Y %H.%M',
1753 '%Y-%m-%dT%H:%M:%SZ',
1754 '%Y-%m-%dT%H:%M:%S.%fZ',
1755 '%Y-%m-%dT%H:%M:%S.%f0Z',
1756 '%Y-%m-%dT%H:%M:%S',
1757 '%Y-%m-%dT%H:%M:%S.%f',
1758 '%Y-%m-%dT%H:%M',
c6eed6b8
S
1759 '%b %d %Y at %H:%M',
1760 '%b %d %Y at %H:%M:%S',
b555ae9b
S
1761 '%B %d %Y at %H:%M',
1762 '%B %d %Y at %H:%M:%S',
46f59e89
S
1763)
1764
1765DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1766DATE_FORMATS_DAY_FIRST.extend([
1767 '%d-%m-%Y',
1768 '%d.%m.%Y',
1769 '%d.%m.%y',
1770 '%d/%m/%Y',
1771 '%d/%m/%y',
1772 '%d/%m/%Y %H:%M:%S',
1773])
1774
1775DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1776DATE_FORMATS_MONTH_FIRST.extend([
1777 '%m-%d-%Y',
1778 '%m.%d.%Y',
1779 '%m/%d/%Y',
1780 '%m/%d/%y',
1781 '%m/%d/%Y %H:%M:%S',
1782])
1783
06b3fe29 1784PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
22f5f5c6 1785JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
06b3fe29 1786
7105440c 1787
d77c3dfd 1788def preferredencoding():
59ae15a5 1789 """Get preferred encoding.
d77c3dfd 1790
59ae15a5
PH
1791 Returns the best encoding scheme for the system, based on
1792 locale.getpreferredencoding() and some further tweaks.
1793 """
1794 try:
1795 pref = locale.getpreferredencoding()
28e614de 1796 'TEST'.encode(pref)
70a1165b 1797 except Exception:
59ae15a5 1798 pref = 'UTF-8'
bae611f2 1799
59ae15a5 1800 return pref
d77c3dfd 1801
f4bfd65f 1802
181c8655 1803def write_json_file(obj, fn):
1394646a 1804 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 1805
92120217 1806 fn = encodeFilename(fn)
61ee5aeb 1807 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
1808 encoding = get_filesystem_encoding()
1809 # os.path.basename returns a bytes object, but NamedTemporaryFile
1810 # will fail if the filename contains non ascii characters unless we
1811 # use a unicode object
1812 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1813 # the same for os.path.dirname
1814 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1815 else:
1816 path_basename = os.path.basename
1817 path_dirname = os.path.dirname
1818
73159f99
S
1819 args = {
1820 'suffix': '.tmp',
ec5f6016
JMF
1821 'prefix': path_basename(fn) + '.',
1822 'dir': path_dirname(fn),
73159f99
S
1823 'delete': False,
1824 }
1825
181c8655
PH
1826 # In Python 2.x, json.dump expects a bytestream.
1827 # In Python 3.x, it writes to a character stream
1828 if sys.version_info < (3, 0):
73159f99 1829 args['mode'] = 'wb'
181c8655 1830 else:
73159f99
S
1831 args.update({
1832 'mode': 'w',
1833 'encoding': 'utf-8',
1834 })
1835
c86b6142 1836 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
1837
1838 try:
1839 with tf:
75d43ca0 1840 json.dump(obj, tf, default=repr)
1394646a
IK
1841 if sys.platform == 'win32':
1842 # Need to remove existing file on Windows, else os.rename raises
1843 # WindowsError or FileExistsError.
1844 try:
1845 os.unlink(fn)
1846 except OSError:
1847 pass
9cd5f54e
R
1848 try:
1849 mask = os.umask(0)
1850 os.umask(mask)
1851 os.chmod(tf.name, 0o666 & ~mask)
1852 except OSError:
1853 pass
181c8655 1854 os.rename(tf.name, fn)
70a1165b 1855 except Exception:
181c8655
PH
1856 try:
1857 os.remove(tf.name)
1858 except OSError:
1859 pass
1860 raise
1861
1862
1863if sys.version_info >= (2, 7):
ee114368 1864 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 1865 """ Find the xpath xpath[@key=val] """
5d2354f1 1866 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368 1867 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
1868 return node.find(expr)
1869else:
ee114368 1870 def find_xpath_attr(node, xpath, key, val=None):
810c10ba 1871 for f in node.findall(compat_xpath(xpath)):
ee114368
S
1872 if key not in f.attrib:
1873 continue
1874 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
1875 return f
1876 return None
1877
d7e66d39
JMF
1878# On python2.6 the xml.etree.ElementTree.Element methods don't support
1879# the namespace parameter
5f6a1245
JW
1880
1881
d7e66d39
JMF
1882def xpath_with_ns(path, ns_map):
1883 components = [c.split(':') for c in path.split('/')]
1884 replaced = []
1885 for c in components:
1886 if len(c) == 1:
1887 replaced.append(c[0])
1888 else:
1889 ns, tag = c
1890 replaced.append('{%s}%s' % (ns_map[ns], tag))
1891 return '/'.join(replaced)
1892
d77c3dfd 1893
a41fb80c 1894def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745 1895 def _find_xpath(xpath):
810c10ba 1896 return node.find(compat_xpath(xpath))
578c0745
S
1897
1898 if isinstance(xpath, (str, compat_str)):
1899 n = _find_xpath(xpath)
1900 else:
1901 for xp in xpath:
1902 n = _find_xpath(xp)
1903 if n is not None:
1904 break
d74bebd5 1905
8e636da4 1906 if n is None:
bf42a990
S
1907 if default is not NO_DEFAULT:
1908 return default
1909 elif fatal:
bf0ff932
PH
1910 name = xpath if name is None else name
1911 raise ExtractorError('Could not find XML element %s' % name)
1912 else:
1913 return None
a41fb80c
S
1914 return n
1915
1916
1917def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
1918 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1919 if n is None or n == default:
1920 return n
1921 if n.text is None:
1922 if default is not NO_DEFAULT:
1923 return default
1924 elif fatal:
1925 name = xpath if name is None else name
1926 raise ExtractorError('Could not find XML element\'s text %s' % name)
1927 else:
1928 return None
1929 return n.text
a41fb80c
S
1930
1931
1932def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1933 n = find_xpath_attr(node, xpath, key)
1934 if n is None:
1935 if default is not NO_DEFAULT:
1936 return default
1937 elif fatal:
1938 name = '%s[@%s]' % (xpath, key) if name is None else name
1939 raise ExtractorError('Could not find XML attribute %s' % name)
1940 else:
1941 return None
1942 return n.attrib[key]
bf0ff932
PH
1943
1944
9e6dd238 1945def get_element_by_id(id, html):
43e8fafd 1946 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 1947 return get_element_by_attribute('id', id, html)
43e8fafd 1948
12ea2f30 1949
84c237fb 1950def get_element_by_class(class_name, html):
2af12ad9
TC
1951 """Return the content of the first tag with the specified class in the passed HTML document"""
1952 retval = get_elements_by_class(class_name, html)
1953 return retval[0] if retval else None
1954
1955
1956def get_element_by_attribute(attribute, value, html, escape_value=True):
1957 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1958 return retval[0] if retval else None
1959
1960
1961def get_elements_by_class(class_name, html):
1962 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1963 return get_elements_by_attribute(
84c237fb
YCH
1964 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1965 html, escape_value=False)
1966
1967
2af12ad9 1968def get_elements_by_attribute(attribute, value, html, escape_value=True):
43e8fafd 1969 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 1970
84c237fb
YCH
1971 value = re.escape(value) if escape_value else value
1972
2af12ad9
TC
1973 retlist = []
1974 for m in re.finditer(r'''(?xs)
38285056 1975 <([a-zA-Z0-9:._-]+)
609ff8ca 1976 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056 1977 \s+%s=['"]?%s['"]?
609ff8ca 1978 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056
PH
1979 \s*>
1980 (?P<content>.*?)
1981 </\1>
2af12ad9
TC
1982 ''' % (re.escape(attribute), value), html):
1983 res = m.group('content')
38285056 1984
2af12ad9
TC
1985 if res.startswith('"') or res.startswith("'"):
1986 res = res[1:-1]
38285056 1987
2af12ad9 1988 retlist.append(unescapeHTML(res))
a921f407 1989
2af12ad9 1990 return retlist
a921f407 1991
c5229f39 1992
8bb56eee
BF
1993class HTMLAttributeParser(compat_HTMLParser):
1994 """Trivial HTML parser to gather the attributes for a single element"""
b6e0c7d2 1995
8bb56eee 1996 def __init__(self):
c5229f39 1997 self.attrs = {}
8bb56eee
BF
1998 compat_HTMLParser.__init__(self)
1999
2000 def handle_starttag(self, tag, attrs):
2001 self.attrs = dict(attrs)
2002
c5229f39 2003
8bb56eee
BF
2004def extract_attributes(html_element):
2005 """Given a string for an HTML element such as
2006 <el
2007 a="foo" B="bar" c="&98;az" d=boz
2008 empty= noval entity="&amp;"
2009 sq='"' dq="'"
2010 >
2011 Decode and return a dictionary of attributes.
2012 {
2013 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2014 'empty': '', 'noval': None, 'entity': '&',
2015 'sq': '"', 'dq': '\''
2016 }.
2017 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2018 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2019 """
2020 parser = HTMLAttributeParser()
b4a3d461
S
2021 try:
2022 parser.feed(html_element)
2023 parser.close()
2024 # Older Python may throw HTMLParseError in case of malformed HTML
2025 except compat_HTMLParseError:
2026 pass
8bb56eee 2027 return parser.attrs
9e6dd238 2028
c5229f39 2029
9e6dd238 2030def clean_html(html):
59ae15a5 2031 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
2032
2033 if html is None: # Convenience for sanitizing descriptions etc.
2034 return html
2035
59ae15a5
PH
2036 # Newline vs <br />
2037 html = html.replace('\n', ' ')
edd9221c
TF
2038 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2039 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
2040 # Strip html tags
2041 html = re.sub('<.*?>', '', html)
2042 # Replace html entities
2043 html = unescapeHTML(html)
7decf895 2044 return html.strip()
9e6dd238
FV
2045
2046
d77c3dfd 2047def sanitize_open(filename, open_mode):
59ae15a5
PH
2048 """Try to open the given filename, and slightly tweak it if this fails.
2049
2050 Attempts to open the given filename. If this fails, it tries to change
2051 the filename slightly, step by step, until it's either able to open it
2052 or it fails and raises a final exception, like the standard open()
2053 function.
2054
2055 It returns the tuple (stream, definitive_file_name).
2056 """
2057 try:
28e614de 2058 if filename == '-':
59ae15a5
PH
2059 if sys.platform == 'win32':
2060 import msvcrt
2061 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 2062 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
2063 stream = open(encodeFilename(filename), open_mode)
2064 return (stream, filename)
2065 except (IOError, OSError) as err:
f45c185f
PH
2066 if err.errno in (errno.EACCES,):
2067 raise
59ae15a5 2068
f45c185f 2069 # In case of error, try to remove win32 forbidden chars
d55de57b 2070 alt_filename = sanitize_path(filename)
f45c185f
PH
2071 if alt_filename == filename:
2072 raise
2073 else:
2074 # An exception here should be caught in the caller
d55de57b 2075 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 2076 return (stream, alt_filename)
d77c3dfd
FV
2077
2078
2079def timeconvert(timestr):
59ae15a5
PH
2080 """Convert RFC 2822 defined time string into system timestamp"""
2081 timestamp = None
2082 timetuple = email.utils.parsedate_tz(timestr)
2083 if timetuple is not None:
2084 timestamp = email.utils.mktime_tz(timetuple)
2085 return timestamp
1c469a94 2086
5f6a1245 2087
796173d0 2088def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
2089 """Sanitizes a string so it could be used as part of a filename.
2090 If restricted is set, use a stricter subset of allowed characters.
158af524
S
2091 Set is_id if this is not an arbitrary string, but an ID that should be kept
2092 if possible.
59ae15a5
PH
2093 """
2094 def replace_insane(char):
c587cbb7
AT
2095 if restricted and char in ACCENT_CHARS:
2096 return ACCENT_CHARS[char]
59ae15a5
PH
2097 if char == '?' or ord(char) < 32 or ord(char) == 127:
2098 return ''
2099 elif char == '"':
2100 return '' if restricted else '\''
2101 elif char == ':':
2102 return '_-' if restricted else ' -'
2103 elif char in '\\/|*<>':
2104 return '_'
627dcfff 2105 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
2106 return '_'
2107 if restricted and ord(char) > 127:
2108 return '_'
2109 return char
2110
2aeb06d6
PH
2111 # Handle timestamps
2112 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 2113 result = ''.join(map(replace_insane, s))
796173d0
PH
2114 if not is_id:
2115 while '__' in result:
2116 result = result.replace('__', '_')
2117 result = result.strip('_')
2118 # Common case of "Foreign band name - English song title"
2119 if restricted and result.startswith('-_'):
2120 result = result[2:]
5a42414b
PH
2121 if result.startswith('-'):
2122 result = '_' + result[len('-'):]
a7440261 2123 result = result.lstrip('.')
796173d0
PH
2124 if not result:
2125 result = '_'
59ae15a5 2126 return result
d77c3dfd 2127
5f6a1245 2128
c2934512 2129def sanitize_path(s, force=False):
a2aaf4db 2130 """Sanitizes and normalizes path on Windows"""
c2934512 2131 if sys.platform == 'win32':
c4218ac3 2132 force = False
c2934512 2133 drive_or_unc, _ = os.path.splitdrive(s)
2134 if sys.version_info < (2, 7) and not drive_or_unc:
2135 drive_or_unc, _ = os.path.splitunc(s)
2136 elif force:
2137 drive_or_unc = ''
2138 else:
a2aaf4db 2139 return s
c2934512 2140
be531ef1
S
2141 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2142 if drive_or_unc:
a2aaf4db
S
2143 norm_path.pop(0)
2144 sanitized_path = [
ec85ded8 2145 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
a2aaf4db 2146 for path_part in norm_path]
be531ef1
S
2147 if drive_or_unc:
2148 sanitized_path.insert(0, drive_or_unc + os.path.sep)
c4218ac3 2149 elif force and s[0] == os.path.sep:
2150 sanitized_path.insert(0, os.path.sep)
a2aaf4db
S
2151 return os.path.join(*sanitized_path)
2152
2153
17bcc626 2154def sanitize_url(url):
befa4708
S
2155 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2156 # the number of unwanted failures due to missing protocol
2157 if url.startswith('//'):
2158 return 'http:%s' % url
2159 # Fix some common typos seen so far
2160 COMMON_TYPOS = (
067aa17e 2161 # https://github.com/ytdl-org/youtube-dl/issues/15649
befa4708
S
2162 (r'^httpss://', r'https://'),
2163 # https://bx1.be/lives/direct-tv/
2164 (r'^rmtp([es]?)://', r'rtmp\1://'),
2165 )
2166 for mistake, fixup in COMMON_TYPOS:
2167 if re.match(mistake, url):
2168 return re.sub(mistake, fixup, url)
2169 return url
17bcc626
S
2170
2171
67dda517 2172def sanitized_Request(url, *args, **kwargs):
17bcc626 2173 return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
67dda517
S
2174
2175
51098426
S
2176def expand_path(s):
2177 """Expand shell variables and ~"""
2178 return os.path.expandvars(compat_expanduser(s))
2179
2180
d77c3dfd 2181def orderedSet(iterable):
59ae15a5
PH
2182 """ Remove all duplicates from the input iterable """
2183 res = []
2184 for el in iterable:
2185 if el not in res:
2186 res.append(el)
2187 return res
d77c3dfd 2188
912b38b4 2189
55b2f099 2190def _htmlentity_transform(entity_with_semicolon):
4e408e47 2191 """Transforms an HTML entity to a character."""
55b2f099
YCH
2192 entity = entity_with_semicolon[:-1]
2193
4e408e47
PH
2194 # Known non-numeric HTML entity
2195 if entity in compat_html_entities.name2codepoint:
2196 return compat_chr(compat_html_entities.name2codepoint[entity])
2197
55b2f099
YCH
2198 # TODO: HTML5 allows entities without a semicolon. For example,
2199 # '&Eacuteric' should be decoded as 'Éric'.
2200 if entity_with_semicolon in compat_html_entities_html5:
2201 return compat_html_entities_html5[entity_with_semicolon]
2202
91757b0f 2203 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
2204 if mobj is not None:
2205 numstr = mobj.group(1)
28e614de 2206 if numstr.startswith('x'):
4e408e47 2207 base = 16
28e614de 2208 numstr = '0%s' % numstr
4e408e47
PH
2209 else:
2210 base = 10
067aa17e 2211 # See https://github.com/ytdl-org/youtube-dl/issues/7518
7aefc49c
S
2212 try:
2213 return compat_chr(int(numstr, base))
2214 except ValueError:
2215 pass
4e408e47
PH
2216
2217 # Unknown entity in name, return its literal representation
7a3f0c00 2218 return '&%s;' % entity
4e408e47
PH
2219
2220
d77c3dfd 2221def unescapeHTML(s):
912b38b4
PH
2222 if s is None:
2223 return None
2224 assert type(s) == compat_str
d77c3dfd 2225
4e408e47 2226 return re.sub(
95f3f7c2 2227 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 2228
8bf48f23 2229
f5b1bca9 2230def process_communicate_or_kill(p, *args, **kwargs):
2231 try:
2232 return p.communicate(*args, **kwargs)
2233 except BaseException: # Including KeyboardInterrupt
2234 p.kill()
2235 p.wait()
2236 raise
2237
2238
aa49acd1
S
2239def get_subprocess_encoding():
2240 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2241 # For subprocess calls, encode with locale encoding
2242 # Refer to http://stackoverflow.com/a/9951851/35070
2243 encoding = preferredencoding()
2244 else:
2245 encoding = sys.getfilesystemencoding()
2246 if encoding is None:
2247 encoding = 'utf-8'
2248 return encoding
2249
2250
8bf48f23 2251def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
2252 """
2253 @param s The name of the file
2254 """
d77c3dfd 2255
8bf48f23 2256 assert type(s) == compat_str
d77c3dfd 2257
59ae15a5
PH
2258 # Python 3 has a Unicode API
2259 if sys.version_info >= (3, 0):
2260 return s
0f00efed 2261
aa49acd1
S
2262 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2263 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2264 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2265 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2266 return s
2267
8ee239e9
YCH
2268 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2269 if sys.platform.startswith('java'):
2270 return s
2271
aa49acd1
S
2272 return s.encode(get_subprocess_encoding(), 'ignore')
2273
2274
2275def decodeFilename(b, for_subprocess=False):
2276
2277 if sys.version_info >= (3, 0):
2278 return b
2279
2280 if not isinstance(b, bytes):
2281 return b
2282
2283 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 2284
f07b74fc
PH
2285
2286def encodeArgument(s):
2287 if not isinstance(s, compat_str):
2288 # Legacy code that uses byte strings
2289 # Uncomment the following line after fixing all post processors
7af808a5 2290 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
2291 s = s.decode('ascii')
2292 return encodeFilename(s, True)
2293
2294
aa49acd1
S
2295def decodeArgument(b):
2296 return decodeFilename(b, True)
2297
2298
8271226a
PH
2299def decodeOption(optval):
2300 if optval is None:
2301 return optval
2302 if isinstance(optval, bytes):
2303 optval = optval.decode(preferredencoding())
2304
2305 assert isinstance(optval, compat_str)
2306 return optval
1c256f70 2307
5f6a1245 2308
dbbbe555 2309def formatSeconds(secs, delim=':'):
4539dd30 2310 if secs > 3600:
dbbbe555 2311 return '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
4539dd30 2312 elif secs > 60:
dbbbe555 2313 return '%d%s%02d' % (secs // 60, delim, secs % 60)
4539dd30
PH
2314 else:
2315 return '%d' % secs
2316
a0ddb8a2 2317
be4a824d
PH
2318def make_HTTPS_handler(params, **kwargs):
2319 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 2320 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 2321 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 2322 if opts_no_check_certificate:
be5f2c19 2323 context.check_hostname = False
0db261ba 2324 context.verify_mode = ssl.CERT_NONE
a2366922 2325 try:
be4a824d 2326 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
2327 except TypeError:
2328 # Python 2.7.8
2329 # (create_default_context present but HTTPSHandler has no context=)
2330 pass
2331
2332 if sys.version_info < (3, 2):
d7932313 2333 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 2334 else: # Python < 3.4
d7932313 2335 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 2336 context.verify_mode = (ssl.CERT_NONE
dca08720 2337 if opts_no_check_certificate
ea6d901e 2338 else ssl.CERT_REQUIRED)
303b479e 2339 context.set_default_verify_paths()
be4a824d 2340 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 2341
732ea2f0 2342
08f2a92c
JMF
2343def bug_reports_message():
2344 if ytdl_is_updateable():
7a5c1cfe 2345 update_cmd = 'type yt-dlp -U to update'
08f2a92c 2346 else:
7a5c1cfe
P
2347 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
2348 msg = '; please report this issue on https://github.com/yt-dlp/yt-dlp .'
08f2a92c 2349 msg += ' Make sure you are using the latest version; %s.' % update_cmd
7a5c1cfe 2350 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
08f2a92c
JMF
2351 return msg
2352
2353
bf5b9d85
PM
2354class YoutubeDLError(Exception):
2355 """Base exception for YoutubeDL errors."""
2356 pass
2357
2358
2359class ExtractorError(YoutubeDLError):
1c256f70 2360 """Error during info extraction."""
5f6a1245 2361
d11271dd 2362 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238 2363 """ tb, if given, is the original traceback (so that it can be printed out).
7a5c1cfe 2364 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
9a82b238
PH
2365 """
2366
2367 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2368 expected = True
d11271dd
PH
2369 if video_id is not None:
2370 msg = video_id + ': ' + msg
410f3e73 2371 if cause:
28e614de 2372 msg += ' (caused by %r)' % cause
9a82b238 2373 if not expected:
08f2a92c 2374 msg += bug_reports_message()
1c256f70 2375 super(ExtractorError, self).__init__(msg)
d5979c5d 2376
1c256f70 2377 self.traceback = tb
8cc83b8d 2378 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 2379 self.cause = cause
d11271dd 2380 self.video_id = video_id
1c256f70 2381
01951dda
PH
2382 def format_traceback(self):
2383 if self.traceback is None:
2384 return None
28e614de 2385 return ''.join(traceback.format_tb(self.traceback))
01951dda 2386
1c256f70 2387
416c7fcb
PH
2388class UnsupportedError(ExtractorError):
2389 def __init__(self, url):
2390 super(UnsupportedError, self).__init__(
2391 'Unsupported URL: %s' % url, expected=True)
2392 self.url = url
2393
2394
55b3e45b
JMF
2395class RegexNotFoundError(ExtractorError):
2396 """Error when a regex didn't match"""
2397 pass
2398
2399
773f291d
S
2400class GeoRestrictedError(ExtractorError):
2401 """Geographic restriction Error exception.
2402
2403 This exception may be thrown when a video is not available from your
2404 geographic location due to geographic restrictions imposed by a website.
2405 """
b6e0c7d2 2406
773f291d
S
2407 def __init__(self, msg, countries=None):
2408 super(GeoRestrictedError, self).__init__(msg, expected=True)
2409 self.msg = msg
2410 self.countries = countries
2411
2412
bf5b9d85 2413class DownloadError(YoutubeDLError):
59ae15a5 2414 """Download Error exception.
d77c3dfd 2415
59ae15a5
PH
2416 This exception may be thrown by FileDownloader objects if they are not
2417 configured to continue on errors. They will contain the appropriate
2418 error message.
2419 """
5f6a1245 2420
8cc83b8d
FV
2421 def __init__(self, msg, exc_info=None):
2422 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2423 super(DownloadError, self).__init__(msg)
2424 self.exc_info = exc_info
d77c3dfd
FV
2425
2426
498f5606 2427class EntryNotInPlaylist(YoutubeDLError):
2428 """Entry not in playlist exception.
2429
2430 This exception will be thrown by YoutubeDL when a requested entry
2431 is not found in the playlist info_dict
2432 """
2433 pass
2434
2435
bf5b9d85 2436class SameFileError(YoutubeDLError):
59ae15a5 2437 """Same File exception.
d77c3dfd 2438
59ae15a5
PH
2439 This exception will be thrown by FileDownloader objects if they detect
2440 multiple files would have to be downloaded to the same file on disk.
2441 """
2442 pass
d77c3dfd
FV
2443
2444
bf5b9d85 2445class PostProcessingError(YoutubeDLError):
59ae15a5 2446 """Post Processing exception.
d77c3dfd 2447
59ae15a5
PH
2448 This exception may be raised by PostProcessor's .run() method to
2449 indicate an error in the postprocessing task.
2450 """
5f6a1245 2451
7851b379 2452 def __init__(self, msg):
bf5b9d85 2453 super(PostProcessingError, self).__init__(msg)
7851b379 2454 self.msg = msg
d77c3dfd 2455
5f6a1245 2456
8b0d7497 2457class ExistingVideoReached(YoutubeDLError):
2458 """ --max-downloads limit has been reached. """
2459 pass
2460
2461
2462class RejectedVideoReached(YoutubeDLError):
2463 """ --max-downloads limit has been reached. """
2464 pass
2465
2466
bf5b9d85 2467class MaxDownloadsReached(YoutubeDLError):
59ae15a5
PH
2468 """ --max-downloads limit has been reached. """
2469 pass
d77c3dfd
FV
2470
2471
bf5b9d85 2472class UnavailableVideoError(YoutubeDLError):
59ae15a5 2473 """Unavailable Format exception.
d77c3dfd 2474
59ae15a5
PH
2475 This exception will be thrown when a video is requested
2476 in a format that is not available for that video.
2477 """
2478 pass
d77c3dfd
FV
2479
2480
bf5b9d85 2481class ContentTooShortError(YoutubeDLError):
59ae15a5 2482 """Content Too Short exception.
d77c3dfd 2483
59ae15a5
PH
2484 This exception may be raised by FileDownloader objects when a file they
2485 download is too small for what the server announced first, indicating
2486 the connection was probably interrupted.
2487 """
d77c3dfd 2488
59ae15a5 2489 def __init__(self, downloaded, expected):
bf5b9d85
PM
2490 super(ContentTooShortError, self).__init__(
2491 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2492 )
2c7ed247 2493 # Both in bytes
59ae15a5
PH
2494 self.downloaded = downloaded
2495 self.expected = expected
d77c3dfd 2496
5f6a1245 2497
bf5b9d85 2498class XAttrMetadataError(YoutubeDLError):
efa97bdc
YCH
2499 def __init__(self, code=None, msg='Unknown error'):
2500 super(XAttrMetadataError, self).__init__(msg)
2501 self.code = code
bd264412 2502 self.msg = msg
efa97bdc
YCH
2503
2504 # Parsing code and msg
3089bc74 2505 if (self.code in (errno.ENOSPC, errno.EDQUOT)
a0566bbf 2506 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
efa97bdc
YCH
2507 self.reason = 'NO_SPACE'
2508 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2509 self.reason = 'VALUE_TOO_LONG'
2510 else:
2511 self.reason = 'NOT_SUPPORTED'
2512
2513
bf5b9d85 2514class XAttrUnavailableError(YoutubeDLError):
efa97bdc
YCH
2515 pass
2516
2517
c5a59d93 2518def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
2519 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2520 # expected HTTP responses to meet HTTP/1.0 or later (see also
067aa17e 2521 # https://github.com/ytdl-org/youtube-dl/issues/6727)
e5e78797 2522 if sys.version_info < (3, 0):
65220c3b
S
2523 kwargs['strict'] = True
2524 hc = http_class(*args, **compat_kwargs(kwargs))
be4a824d 2525 source_address = ydl_handler._params.get('source_address')
8959018a 2526
be4a824d 2527 if source_address is not None:
8959018a
AU
2528 # This is to workaround _create_connection() from socket where it will try all
2529 # address data from getaddrinfo() including IPv6. This filters the result from
2530 # getaddrinfo() based on the source_address value.
2531 # This is based on the cpython socket.create_connection() function.
2532 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2533 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2534 host, port = address
2535 err = None
2536 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
9e21e6d9
S
2537 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2538 ip_addrs = [addr for addr in addrs if addr[0] == af]
2539 if addrs and not ip_addrs:
2540 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2541 raise socket.error(
2542 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2543 % (ip_version, source_address[0]))
8959018a
AU
2544 for res in ip_addrs:
2545 af, socktype, proto, canonname, sa = res
2546 sock = None
2547 try:
2548 sock = socket.socket(af, socktype, proto)
2549 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2550 sock.settimeout(timeout)
2551 sock.bind(source_address)
2552 sock.connect(sa)
2553 err = None # Explicitly break reference cycle
2554 return sock
2555 except socket.error as _:
2556 err = _
2557 if sock is not None:
2558 sock.close()
2559 if err is not None:
2560 raise err
2561 else:
9e21e6d9
S
2562 raise socket.error('getaddrinfo returns an empty list')
2563 if hasattr(hc, '_create_connection'):
2564 hc._create_connection = _create_connection
be4a824d
PH
2565 sa = (source_address, 0)
2566 if hasattr(hc, 'source_address'): # Python 2.7+
2567 hc.source_address = sa
2568 else: # Python 2.6
2569 def _hc_connect(self, *args, **kwargs):
9e21e6d9 2570 sock = _create_connection(
be4a824d
PH
2571 (self.host, self.port), self.timeout, sa)
2572 if is_https:
d7932313
PH
2573 self.sock = ssl.wrap_socket(
2574 sock, self.key_file, self.cert_file,
2575 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
2576 else:
2577 self.sock = sock
2578 hc.connect = functools.partial(_hc_connect, hc)
2579
2580 return hc
2581
2582
87f0e62d 2583def handle_youtubedl_headers(headers):
992fc9d6
YCH
2584 filtered_headers = headers
2585
2586 if 'Youtubedl-no-compression' in filtered_headers:
2587 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 2588 del filtered_headers['Youtubedl-no-compression']
87f0e62d 2589
992fc9d6 2590 return filtered_headers
87f0e62d
YCH
2591
2592
acebc9cd 2593class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
2594 """Handler for HTTP requests and responses.
2595
2596 This class, when installed with an OpenerDirector, automatically adds
2597 the standard headers to every HTTP request and handles gzipped and
2598 deflated responses from web servers. If compression is to be avoided in
2599 a particular request, the original request in the program code only has
0424ec30 2600 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
2601 removed before making the real request.
2602
2603 Part of this code was copied from:
2604
2605 http://techknack.net/python-urllib2-handlers/
2606
2607 Andrew Rowls, the author of that code, agreed to release it to the
2608 public domain.
2609 """
2610
be4a824d
PH
2611 def __init__(self, params, *args, **kwargs):
2612 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2613 self._params = params
2614
2615 def http_open(self, req):
71aff188
YCH
2616 conn_class = compat_http_client.HTTPConnection
2617
2618 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2619 if socks_proxy:
2620 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2621 del req.headers['Ytdl-socks-proxy']
2622
be4a824d 2623 return self.do_open(functools.partial(
71aff188 2624 _create_http_connection, self, conn_class, False),
be4a824d
PH
2625 req)
2626
59ae15a5
PH
2627 @staticmethod
2628 def deflate(data):
fc2119f2 2629 if not data:
2630 return data
59ae15a5
PH
2631 try:
2632 return zlib.decompress(data, -zlib.MAX_WBITS)
2633 except zlib.error:
2634 return zlib.decompress(data)
2635
acebc9cd 2636 def http_request(self, req):
51f267d9
S
2637 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2638 # always respected by websites, some tend to give out URLs with non percent-encoded
2639 # non-ASCII characters (see telemb.py, ard.py [#3412])
2640 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2641 # To work around aforementioned issue we will replace request's original URL with
2642 # percent-encoded one
2643 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2644 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2645 url = req.get_full_url()
2646 url_escaped = escape_url(url)
2647
2648 # Substitute URL if any change after escaping
2649 if url != url_escaped:
15d260eb 2650 req = update_Request(req, url=url_escaped)
51f267d9 2651
33ac271b 2652 for h, v in std_headers.items():
3d5f7a39
JK
2653 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2654 # The dict keys are capitalized because of this bug by urllib
2655 if h.capitalize() not in req.headers:
33ac271b 2656 req.add_header(h, v)
87f0e62d
YCH
2657
2658 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
2659
2660 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2661 # Python 2.6 is brain-dead when it comes to fragments
2662 req._Request__original = req._Request__original.partition('#')[0]
2663 req._Request__r_type = req._Request__r_type.partition('#')[0]
2664
59ae15a5
PH
2665 return req
2666
acebc9cd 2667 def http_response(self, req, resp):
59ae15a5
PH
2668 old_resp = resp
2669 # gzip
2670 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
2671 content = resp.read()
2672 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2673 try:
2674 uncompressed = io.BytesIO(gz.read())
2675 except IOError as original_ioerror:
2676 # There may be junk add the end of the file
2677 # See http://stackoverflow.com/q/4928560/35070 for details
2678 for i in range(1, 1024):
2679 try:
2680 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2681 uncompressed = io.BytesIO(gz.read())
2682 except IOError:
2683 continue
2684 break
2685 else:
2686 raise original_ioerror
b407d853 2687 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2688 resp.msg = old_resp.msg
c047270c 2689 del resp.headers['Content-encoding']
59ae15a5
PH
2690 # deflate
2691 if resp.headers.get('Content-encoding', '') == 'deflate':
2692 gz = io.BytesIO(self.deflate(resp.read()))
b407d853 2693 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2694 resp.msg = old_resp.msg
c047270c 2695 del resp.headers['Content-encoding']
ad729172 2696 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
067aa17e 2697 # https://github.com/ytdl-org/youtube-dl/issues/6457).
5a4d9ddb
S
2698 if 300 <= resp.code < 400:
2699 location = resp.headers.get('Location')
2700 if location:
2701 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2702 if sys.version_info >= (3, 0):
2703 location = location.encode('iso-8859-1').decode('utf-8')
0ea59007
YCH
2704 else:
2705 location = location.decode('utf-8')
5a4d9ddb
S
2706 location_escaped = escape_url(location)
2707 if location != location_escaped:
2708 del resp.headers['Location']
9a4aec8b
YCH
2709 if sys.version_info < (3, 0):
2710 location_escaped = location_escaped.encode('utf-8')
5a4d9ddb 2711 resp.headers['Location'] = location_escaped
59ae15a5 2712 return resp
0f8d03f8 2713
acebc9cd
PH
2714 https_request = http_request
2715 https_response = http_response
bf50b038 2716
5de90176 2717
71aff188
YCH
2718def make_socks_conn_class(base_class, socks_proxy):
2719 assert issubclass(base_class, (
2720 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2721
2722 url_components = compat_urlparse.urlparse(socks_proxy)
2723 if url_components.scheme.lower() == 'socks5':
2724 socks_type = ProxyType.SOCKS5
2725 elif url_components.scheme.lower() in ('socks', 'socks4'):
2726 socks_type = ProxyType.SOCKS4
51fb4995
YCH
2727 elif url_components.scheme.lower() == 'socks4a':
2728 socks_type = ProxyType.SOCKS4A
71aff188 2729
cdd94c2e
YCH
2730 def unquote_if_non_empty(s):
2731 if not s:
2732 return s
2733 return compat_urllib_parse_unquote_plus(s)
2734
71aff188
YCH
2735 proxy_args = (
2736 socks_type,
2737 url_components.hostname, url_components.port or 1080,
2738 True, # Remote DNS
cdd94c2e
YCH
2739 unquote_if_non_empty(url_components.username),
2740 unquote_if_non_empty(url_components.password),
71aff188
YCH
2741 )
2742
2743 class SocksConnection(base_class):
2744 def connect(self):
2745 self.sock = sockssocket()
2746 self.sock.setproxy(*proxy_args)
2747 if type(self.timeout) in (int, float):
2748 self.sock.settimeout(self.timeout)
2749 self.sock.connect((self.host, self.port))
2750
2751 if isinstance(self, compat_http_client.HTTPSConnection):
2752 if hasattr(self, '_context'): # Python > 2.6
2753 self.sock = self._context.wrap_socket(
2754 self.sock, server_hostname=self.host)
2755 else:
2756 self.sock = ssl.wrap_socket(self.sock)
2757
2758 return SocksConnection
2759
2760
be4a824d
PH
2761class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2762 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2763 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2764 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2765 self._params = params
2766
2767 def https_open(self, req):
4f264c02 2768 kwargs = {}
71aff188
YCH
2769 conn_class = self._https_conn_class
2770
4f264c02
JMF
2771 if hasattr(self, '_context'): # python > 2.6
2772 kwargs['context'] = self._context
2773 if hasattr(self, '_check_hostname'): # python 3.x
2774 kwargs['check_hostname'] = self._check_hostname
71aff188
YCH
2775
2776 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2777 if socks_proxy:
2778 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2779 del req.headers['Ytdl-socks-proxy']
2780
be4a824d 2781 return self.do_open(functools.partial(
71aff188 2782 _create_http_connection, self, conn_class, True),
4f264c02 2783 req, **kwargs)
be4a824d
PH
2784
2785
1bab3437 2786class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
f1a8511f
S
2787 """
2788 See [1] for cookie file format.
2789
2790 1. https://curl.haxx.se/docs/http-cookies.html
2791 """
e7e62441 2792 _HTTPONLY_PREFIX = '#HttpOnly_'
c380cc28
S
2793 _ENTRY_LEN = 7
2794 _HEADER = '''# Netscape HTTP Cookie File
7a5c1cfe 2795# This file is generated by yt-dlp. Do not edit.
c380cc28
S
2796
2797'''
2798 _CookieFileEntry = collections.namedtuple(
2799 'CookieFileEntry',
2800 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
e7e62441 2801
1bab3437 2802 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
c380cc28
S
2803 """
2804 Save cookies to a file.
2805
2806 Most of the code is taken from CPython 3.8 and slightly adapted
2807 to support cookie files with UTF-8 in both python 2 and 3.
2808 """
2809 if filename is None:
2810 if self.filename is not None:
2811 filename = self.filename
2812 else:
2813 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2814
1bab3437
S
2815 # Store session cookies with `expires` set to 0 instead of an empty
2816 # string
2817 for cookie in self:
2818 if cookie.expires is None:
2819 cookie.expires = 0
c380cc28
S
2820
2821 with io.open(filename, 'w', encoding='utf-8') as f:
2822 f.write(self._HEADER)
2823 now = time.time()
2824 for cookie in self:
2825 if not ignore_discard and cookie.discard:
2826 continue
2827 if not ignore_expires and cookie.is_expired(now):
2828 continue
2829 if cookie.secure:
2830 secure = 'TRUE'
2831 else:
2832 secure = 'FALSE'
2833 if cookie.domain.startswith('.'):
2834 initial_dot = 'TRUE'
2835 else:
2836 initial_dot = 'FALSE'
2837 if cookie.expires is not None:
2838 expires = compat_str(cookie.expires)
2839 else:
2840 expires = ''
2841 if cookie.value is None:
2842 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2843 # with no name, whereas http.cookiejar regards it as a
2844 # cookie with no value.
2845 name = ''
2846 value = cookie.name
2847 else:
2848 name = cookie.name
2849 value = cookie.value
2850 f.write(
2851 '\t'.join([cookie.domain, initial_dot, cookie.path,
2852 secure, expires, name, value]) + '\n')
1bab3437
S
2853
2854 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
e7e62441 2855 """Load cookies from a file."""
2856 if filename is None:
2857 if self.filename is not None:
2858 filename = self.filename
2859 else:
2860 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2861
c380cc28
S
2862 def prepare_line(line):
2863 if line.startswith(self._HTTPONLY_PREFIX):
2864 line = line[len(self._HTTPONLY_PREFIX):]
2865 # comments and empty lines are fine
2866 if line.startswith('#') or not line.strip():
2867 return line
2868 cookie_list = line.split('\t')
2869 if len(cookie_list) != self._ENTRY_LEN:
2870 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2871 cookie = self._CookieFileEntry(*cookie_list)
2872 if cookie.expires_at and not cookie.expires_at.isdigit():
2873 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2874 return line
2875
e7e62441 2876 cf = io.StringIO()
c380cc28 2877 with io.open(filename, encoding='utf-8') as f:
e7e62441 2878 for line in f:
c380cc28
S
2879 try:
2880 cf.write(prepare_line(line))
2881 except compat_cookiejar.LoadError as e:
2882 write_string(
2883 'WARNING: skipping cookie file entry due to %s: %r\n'
2884 % (e, line), sys.stderr)
2885 continue
e7e62441 2886 cf.seek(0)
2887 self._really_load(cf, filename, ignore_discard, ignore_expires)
1bab3437
S
2888 # Session cookies are denoted by either `expires` field set to
2889 # an empty string or 0. MozillaCookieJar only recognizes the former
2890 # (see [1]). So we need force the latter to be recognized as session
2891 # cookies on our own.
2892 # Session cookies may be important for cookies-based authentication,
2893 # e.g. usually, when user does not check 'Remember me' check box while
2894 # logging in on a site, some important cookies are stored as session
2895 # cookies so that not recognizing them will result in failed login.
2896 # 1. https://bugs.python.org/issue17164
2897 for cookie in self:
2898 # Treat `expires=0` cookies as session cookies
2899 if cookie.expires == 0:
2900 cookie.expires = None
2901 cookie.discard = True
2902
2903
a6420bf5
S
2904class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2905 def __init__(self, cookiejar=None):
2906 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2907
2908 def http_response(self, request, response):
2909 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2910 # characters in Set-Cookie HTTP header of last response (see
067aa17e 2911 # https://github.com/ytdl-org/youtube-dl/issues/6769).
a6420bf5
S
2912 # In order to at least prevent crashing we will percent encode Set-Cookie
2913 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
2914 # if sys.version_info < (3, 0) and response.headers:
2915 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2916 # set_cookie = response.headers.get(set_cookie_header)
2917 # if set_cookie:
2918 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2919 # if set_cookie != set_cookie_escaped:
2920 # del response.headers[set_cookie_header]
2921 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
2922 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2923
915f911e 2924 def http_request(self, request):
2925 # If the URL contains non-ASCII characters, the cookies
2926 # are lost before the request reaches YoutubeDLHandler.
2927 # So we percent encode the url before adding cookies
2928 # See: https://github.com/yt-dlp/yt-dlp/issues/263
2929 request = update_Request(request, url=escape_url(request.get_full_url()))
2930 return compat_urllib_request.HTTPCookieProcessor.http_request(self, request)
2931
2932 https_request = http_request
a6420bf5
S
2933 https_response = http_response
2934
2935
fca6dba8 2936class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
201c1459 2937 """YoutubeDL redirect handler
2938
2939 The code is based on HTTPRedirectHandler implementation from CPython [1].
2940
2941 This redirect handler solves two issues:
2942 - ensures redirect URL is always unicode under python 2
2943 - introduces support for experimental HTTP response status code
2944 308 Permanent Redirect [2] used by some sites [3]
2945
2946 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
2947 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
2948 3. https://github.com/ytdl-org/youtube-dl/issues/28768
2949 """
2950
2951 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
2952
2953 def redirect_request(self, req, fp, code, msg, headers, newurl):
2954 """Return a Request or None in response to a redirect.
2955
2956 This is called by the http_error_30x methods when a
2957 redirection response is received. If a redirection should
2958 take place, return a new Request to allow http_error_30x to
2959 perform the redirect. Otherwise, raise HTTPError if no-one
2960 else should try to handle this url. Return None if you can't
2961 but another Handler might.
2962 """
2963 m = req.get_method()
2964 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
2965 or code in (301, 302, 303) and m == "POST")):
2966 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
2967 # Strictly (according to RFC 2616), 301 or 302 in response to
2968 # a POST MUST NOT cause a redirection without confirmation
2969 # from the user (of urllib.request, in this case). In practice,
2970 # essentially all clients do redirect in this case, so we do
2971 # the same.
2972
2973 # On python 2 urlh.geturl() may sometimes return redirect URL
2974 # as byte string instead of unicode. This workaround allows
2975 # to force it always return unicode.
2976 if sys.version_info[0] < 3:
2977 newurl = compat_str(newurl)
2978
2979 # Be conciliant with URIs containing a space. This is mainly
2980 # redundant with the more complete encoding done in http_error_302(),
2981 # but it is kept for compatibility with other callers.
2982 newurl = newurl.replace(' ', '%20')
2983
2984 CONTENT_HEADERS = ("content-length", "content-type")
2985 # NB: don't use dict comprehension for python 2.6 compatibility
2986 newheaders = dict((k, v) for k, v in req.headers.items()
2987 if k.lower() not in CONTENT_HEADERS)
2988 return compat_urllib_request.Request(
2989 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
2990 unverifiable=True)
fca6dba8
S
2991
2992
46f59e89
S
2993def extract_timezone(date_str):
2994 m = re.search(
2995 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2996 date_str)
2997 if not m:
2998 timezone = datetime.timedelta()
2999 else:
3000 date_str = date_str[:-len(m.group('tz'))]
3001 if not m.group('sign'):
3002 timezone = datetime.timedelta()
3003 else:
3004 sign = 1 if m.group('sign') == '+' else -1
3005 timezone = datetime.timedelta(
3006 hours=sign * int(m.group('hours')),
3007 minutes=sign * int(m.group('minutes')))
3008 return timezone, date_str
3009
3010
08b38d54 3011def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
3012 """ Return a UNIX timestamp from the given date """
3013
3014 if date_str is None:
3015 return None
3016
52c3a6e4
S
3017 date_str = re.sub(r'\.[0-9]+', '', date_str)
3018
08b38d54 3019 if timezone is None:
46f59e89
S
3020 timezone, date_str = extract_timezone(date_str)
3021
52c3a6e4
S
3022 try:
3023 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3024 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3025 return calendar.timegm(dt.timetuple())
3026 except ValueError:
3027 pass
912b38b4
PH
3028
3029
46f59e89
S
3030def date_formats(day_first=True):
3031 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3032
3033
42bdd9d0 3034def unified_strdate(date_str, day_first=True):
bf50b038 3035 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
3036
3037 if date_str is None:
3038 return None
bf50b038 3039 upload_date = None
5f6a1245 3040 # Replace commas
026fcc04 3041 date_str = date_str.replace(',', ' ')
42bdd9d0 3042 # Remove AM/PM + timezone
9bb8e0a3 3043 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89 3044 _, date_str = extract_timezone(date_str)
42bdd9d0 3045
46f59e89 3046 for expression in date_formats(day_first):
bf50b038
JMF
3047 try:
3048 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 3049 except ValueError:
bf50b038 3050 pass
42393ce2
PH
3051 if upload_date is None:
3052 timetuple = email.utils.parsedate_tz(date_str)
3053 if timetuple:
c6b9cf05
S
3054 try:
3055 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3056 except ValueError:
3057 pass
6a750402
JMF
3058 if upload_date is not None:
3059 return compat_str(upload_date)
bf50b038 3060
5f6a1245 3061
46f59e89
S
3062def unified_timestamp(date_str, day_first=True):
3063 if date_str is None:
3064 return None
3065
2ae2ffda 3066 date_str = re.sub(r'[,|]', '', date_str)
46f59e89 3067
7dc2a74e 3068 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89
S
3069 timezone, date_str = extract_timezone(date_str)
3070
3071 # Remove AM/PM + timezone
3072 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3073
deef3195
S
3074 # Remove unrecognized timezones from ISO 8601 alike timestamps
3075 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3076 if m:
3077 date_str = date_str[:-len(m.group('tz'))]
3078
f226880c
PH
3079 # Python only supports microseconds, so remove nanoseconds
3080 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3081 if m:
3082 date_str = m.group(1)
3083
46f59e89
S
3084 for expression in date_formats(day_first):
3085 try:
7dc2a74e 3086 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
46f59e89
S
3087 return calendar.timegm(dt.timetuple())
3088 except ValueError:
3089 pass
3090 timetuple = email.utils.parsedate_tz(date_str)
3091 if timetuple:
7dc2a74e 3092 return calendar.timegm(timetuple) + pm_delta * 3600
46f59e89
S
3093
3094
28e614de 3095def determine_ext(url, default_ext='unknown_video'):
85750f89 3096 if url is None or '.' not in url:
f4776371 3097 return default_ext
9cb9a5df 3098 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
3099 if re.match(r'^[A-Za-z0-9]+$', guess):
3100 return guess
a7aaa398
S
3101 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3102 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 3103 return guess.rstrip('/')
73e79f2a 3104 else:
cbdbb766 3105 return default_ext
73e79f2a 3106
5f6a1245 3107
824fa511
S
3108def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3109 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
d4051a8e 3110
5f6a1245 3111
9e62f283 3112def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
37254abc
JMF
3113 """
3114 Return a datetime object from a string in the format YYYYMMDD or
9e62f283 3115 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3116
3117 format: string date format used to return datetime object from
3118 precision: round the time portion of a datetime object.
3119 auto|microsecond|second|minute|hour|day.
3120 auto: round to the unit provided in date_str (if applicable).
3121 """
3122 auto_precision = False
3123 if precision == 'auto':
3124 auto_precision = True
3125 precision = 'microsecond'
3126 today = datetime_round(datetime.datetime.now(), precision)
f8795e10 3127 if date_str in ('now', 'today'):
37254abc 3128 return today
f8795e10
PH
3129 if date_str == 'yesterday':
3130 return today - datetime.timedelta(days=1)
9e62f283 3131 match = re.match(
3132 r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3133 date_str)
37254abc 3134 if match is not None:
9e62f283 3135 start_time = datetime_from_str(match.group('start'), precision, format)
3136 time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
37254abc 3137 unit = match.group('unit')
9e62f283 3138 if unit == 'month' or unit == 'year':
3139 new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
37254abc 3140 unit = 'day'
9e62f283 3141 else:
3142 if unit == 'week':
3143 unit = 'day'
3144 time *= 7
3145 delta = datetime.timedelta(**{unit + 's': time})
3146 new_date = start_time + delta
3147 if auto_precision:
3148 return datetime_round(new_date, unit)
3149 return new_date
3150
3151 return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3152
3153
3154def date_from_str(date_str, format='%Y%m%d'):
3155 """
3156 Return a datetime object from a string in the format YYYYMMDD or
3157 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3158
3159 format: string date format used to return datetime object from
3160 """
3161 return datetime_from_str(date_str, precision='microsecond', format=format).date()
3162
3163
3164def datetime_add_months(dt, months):
3165 """Increment/Decrement a datetime object by months."""
3166 month = dt.month + months - 1
3167 year = dt.year + month // 12
3168 month = month % 12 + 1
3169 day = min(dt.day, calendar.monthrange(year, month)[1])
3170 return dt.replace(year, month, day)
3171
3172
3173def datetime_round(dt, precision='day'):
3174 """
3175 Round a datetime object's time to a specific precision
3176 """
3177 if precision == 'microsecond':
3178 return dt
3179
3180 unit_seconds = {
3181 'day': 86400,
3182 'hour': 3600,
3183 'minute': 60,
3184 'second': 1,
3185 }
3186 roundto = lambda x, n: ((x + n / 2) // n) * n
3187 timestamp = calendar.timegm(dt.timetuple())
3188 return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
5f6a1245
JW
3189
3190
e63fc1be 3191def hyphenate_date(date_str):
3192 """
3193 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3194 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3195 if match is not None:
3196 return '-'.join(match.groups())
3197 else:
3198 return date_str
3199
5f6a1245 3200
bd558525
JMF
3201class DateRange(object):
3202 """Represents a time interval between two dates"""
5f6a1245 3203
bd558525
JMF
3204 def __init__(self, start=None, end=None):
3205 """start and end must be strings in the format accepted by date"""
3206 if start is not None:
3207 self.start = date_from_str(start)
3208 else:
3209 self.start = datetime.datetime.min.date()
3210 if end is not None:
3211 self.end = date_from_str(end)
3212 else:
3213 self.end = datetime.datetime.max.date()
37254abc 3214 if self.start > self.end:
bd558525 3215 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 3216
bd558525
JMF
3217 @classmethod
3218 def day(cls, day):
3219 """Returns a range that only contains the given day"""
5f6a1245
JW
3220 return cls(day, day)
3221
bd558525
JMF
3222 def __contains__(self, date):
3223 """Check if the date is in the range"""
37254abc
JMF
3224 if not isinstance(date, datetime.date):
3225 date = date_from_str(date)
3226 return self.start <= date <= self.end
5f6a1245 3227
bd558525 3228 def __str__(self):
5f6a1245 3229 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
3230
3231
3232def platform_name():
3233 """ Returns the platform name as a compat_str """
3234 res = platform.platform()
3235 if isinstance(res, bytes):
3236 res = res.decode(preferredencoding())
3237
3238 assert isinstance(res, compat_str)
3239 return res
c257baff
PH
3240
3241
b58ddb32
PH
3242def _windows_write_string(s, out):
3243 """ Returns True if the string was written using special methods,
3244 False if it has yet to be written out."""
3245 # Adapted from http://stackoverflow.com/a/3259271/35070
3246
3247 import ctypes
3248 import ctypes.wintypes
3249
3250 WIN_OUTPUT_IDS = {
3251 1: -11,
3252 2: -12,
3253 }
3254
a383a98a
PH
3255 try:
3256 fileno = out.fileno()
3257 except AttributeError:
3258 # If the output stream doesn't have a fileno, it's virtual
3259 return False
aa42e873
PH
3260 except io.UnsupportedOperation:
3261 # Some strange Windows pseudo files?
3262 return False
b58ddb32
PH
3263 if fileno not in WIN_OUTPUT_IDS:
3264 return False
3265
d7cd9a9e 3266 GetStdHandle = compat_ctypes_WINFUNCTYPE(
b58ddb32 3267 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
d7cd9a9e 3268 ('GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
3269 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3270
d7cd9a9e 3271 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3272 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3273 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
d7cd9a9e 3274 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
3275 written = ctypes.wintypes.DWORD(0)
3276
d7cd9a9e 3277 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
3278 FILE_TYPE_CHAR = 0x0002
3279 FILE_TYPE_REMOTE = 0x8000
d7cd9a9e 3280 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3281 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3282 ctypes.POINTER(ctypes.wintypes.DWORD))(
d7cd9a9e 3283 ('GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
3284 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3285
3286 def not_a_console(handle):
3287 if handle == INVALID_HANDLE_VALUE or handle is None:
3288 return True
3089bc74
S
3289 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3290 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
3291
3292 if not_a_console(h):
3293 return False
3294
d1b9c912
PH
3295 def next_nonbmp_pos(s):
3296 try:
3297 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3298 except StopIteration:
3299 return len(s)
3300
3301 while s:
3302 count = min(next_nonbmp_pos(s), 1024)
3303
b58ddb32 3304 ret = WriteConsoleW(
d1b9c912 3305 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
3306 if ret == 0:
3307 raise OSError('Failed to write string')
d1b9c912
PH
3308 if not count: # We just wrote a non-BMP character
3309 assert written.value == 2
3310 s = s[1:]
3311 else:
3312 assert written.value > 0
3313 s = s[written.value:]
b58ddb32
PH
3314 return True
3315
3316
734f90bb 3317def write_string(s, out=None, encoding=None):
7459e3a2
PH
3318 if out is None:
3319 out = sys.stderr
8bf48f23 3320 assert type(s) == compat_str
7459e3a2 3321
b58ddb32
PH
3322 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3323 if _windows_write_string(s, out):
3324 return
3325
3089bc74
S
3326 if ('b' in getattr(out, 'mode', '')
3327 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
3328 byt = s.encode(encoding or preferredencoding(), 'ignore')
3329 out.write(byt)
3330 elif hasattr(out, 'buffer'):
3331 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3332 byt = s.encode(enc, 'ignore')
3333 out.buffer.write(byt)
3334 else:
8bf48f23 3335 out.write(s)
7459e3a2
PH
3336 out.flush()
3337
3338
48ea9cea
PH
3339def bytes_to_intlist(bs):
3340 if not bs:
3341 return []
3342 if isinstance(bs[0], int): # Python 3
3343 return list(bs)
3344 else:
3345 return [ord(c) for c in bs]
3346
c257baff 3347
cba892fa 3348def intlist_to_bytes(xs):
3349 if not xs:
3350 return b''
edaa23f8 3351 return compat_struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
3352
3353
c1c9a79c
PH
3354# Cross-platform file locking
3355if sys.platform == 'win32':
3356 import ctypes.wintypes
3357 import msvcrt
3358
3359 class OVERLAPPED(ctypes.Structure):
3360 _fields_ = [
3361 ('Internal', ctypes.wintypes.LPVOID),
3362 ('InternalHigh', ctypes.wintypes.LPVOID),
3363 ('Offset', ctypes.wintypes.DWORD),
3364 ('OffsetHigh', ctypes.wintypes.DWORD),
3365 ('hEvent', ctypes.wintypes.HANDLE),
3366 ]
3367
3368 kernel32 = ctypes.windll.kernel32
3369 LockFileEx = kernel32.LockFileEx
3370 LockFileEx.argtypes = [
3371 ctypes.wintypes.HANDLE, # hFile
3372 ctypes.wintypes.DWORD, # dwFlags
3373 ctypes.wintypes.DWORD, # dwReserved
3374 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3375 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3376 ctypes.POINTER(OVERLAPPED) # Overlapped
3377 ]
3378 LockFileEx.restype = ctypes.wintypes.BOOL
3379 UnlockFileEx = kernel32.UnlockFileEx
3380 UnlockFileEx.argtypes = [
3381 ctypes.wintypes.HANDLE, # hFile
3382 ctypes.wintypes.DWORD, # dwReserved
3383 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3384 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3385 ctypes.POINTER(OVERLAPPED) # Overlapped
3386 ]
3387 UnlockFileEx.restype = ctypes.wintypes.BOOL
3388 whole_low = 0xffffffff
3389 whole_high = 0x7fffffff
3390
3391 def _lock_file(f, exclusive):
3392 overlapped = OVERLAPPED()
3393 overlapped.Offset = 0
3394 overlapped.OffsetHigh = 0
3395 overlapped.hEvent = 0
3396 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3397 handle = msvcrt.get_osfhandle(f.fileno())
3398 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3399 whole_low, whole_high, f._lock_file_overlapped_p):
3400 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3401
3402 def _unlock_file(f):
3403 assert f._lock_file_overlapped_p
3404 handle = msvcrt.get_osfhandle(f.fileno())
3405 if not UnlockFileEx(handle, 0,
3406 whole_low, whole_high, f._lock_file_overlapped_p):
3407 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3408
3409else:
399a76e6
YCH
3410 # Some platforms, such as Jython, is missing fcntl
3411 try:
3412 import fcntl
c1c9a79c 3413
399a76e6
YCH
3414 def _lock_file(f, exclusive):
3415 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 3416
399a76e6
YCH
3417 def _unlock_file(f):
3418 fcntl.flock(f, fcntl.LOCK_UN)
3419 except ImportError:
3420 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3421
3422 def _lock_file(f, exclusive):
3423 raise IOError(UNSUPPORTED_MSG)
3424
3425 def _unlock_file(f):
3426 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
3427
3428
3429class locked_file(object):
3430 def __init__(self, filename, mode, encoding=None):
3431 assert mode in ['r', 'a', 'w']
3432 self.f = io.open(filename, mode, encoding=encoding)
3433 self.mode = mode
3434
3435 def __enter__(self):
3436 exclusive = self.mode != 'r'
3437 try:
3438 _lock_file(self.f, exclusive)
3439 except IOError:
3440 self.f.close()
3441 raise
3442 return self
3443
3444 def __exit__(self, etype, value, traceback):
3445 try:
3446 _unlock_file(self.f)
3447 finally:
3448 self.f.close()
3449
3450 def __iter__(self):
3451 return iter(self.f)
3452
3453 def write(self, *args):
3454 return self.f.write(*args)
3455
3456 def read(self, *args):
3457 return self.f.read(*args)
4eb7f1d1
JMF
3458
3459
4644ac55
S
3460def get_filesystem_encoding():
3461 encoding = sys.getfilesystemencoding()
3462 return encoding if encoding is not None else 'utf-8'
3463
3464
4eb7f1d1 3465def shell_quote(args):
a6a173c2 3466 quoted_args = []
4644ac55 3467 encoding = get_filesystem_encoding()
a6a173c2
JMF
3468 for a in args:
3469 if isinstance(a, bytes):
3470 # We may get a filename encoded with 'encodeFilename'
3471 a = a.decode(encoding)
aefce8e6 3472 quoted_args.append(compat_shlex_quote(a))
28e614de 3473 return ' '.join(quoted_args)
9d4660ca
PH
3474
3475
3476def smuggle_url(url, data):
3477 """ Pass additional data in a URL for internal use. """
3478
81953d1a
RA
3479 url, idata = unsmuggle_url(url, {})
3480 data.update(idata)
15707c7e 3481 sdata = compat_urllib_parse_urlencode(
28e614de
PH
3482 {'__youtubedl_smuggle': json.dumps(data)})
3483 return url + '#' + sdata
9d4660ca
PH
3484
3485
79f82953 3486def unsmuggle_url(smug_url, default=None):
83e865a3 3487 if '#__youtubedl_smuggle' not in smug_url:
79f82953 3488 return smug_url, default
28e614de
PH
3489 url, _, sdata = smug_url.rpartition('#')
3490 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
3491 data = json.loads(jsond)
3492 return url, data
02dbf93f
PH
3493
3494
02dbf93f
PH
3495def format_bytes(bytes):
3496 if bytes is None:
28e614de 3497 return 'N/A'
02dbf93f
PH
3498 if type(bytes) is str:
3499 bytes = float(bytes)
3500 if bytes == 0.0:
3501 exponent = 0
3502 else:
3503 exponent = int(math.log(bytes, 1024.0))
28e614de 3504 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 3505 converted = float(bytes) / float(1024 ** exponent)
28e614de 3506 return '%.2f%s' % (converted, suffix)
f53c966a 3507
1c088fa8 3508
fb47597b
S
3509def lookup_unit_table(unit_table, s):
3510 units_re = '|'.join(re.escape(u) for u in unit_table)
3511 m = re.match(
782b1b5b 3512 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
fb47597b
S
3513 if not m:
3514 return None
3515 num_str = m.group('num').replace(',', '.')
3516 mult = unit_table[m.group('unit')]
3517 return int(float(num_str) * mult)
3518
3519
be64b5b0
PH
3520def parse_filesize(s):
3521 if s is None:
3522 return None
3523
dfb1b146 3524 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
3525 # but we support those too
3526 _UNIT_TABLE = {
3527 'B': 1,
3528 'b': 1,
70852b47 3529 'bytes': 1,
be64b5b0
PH
3530 'KiB': 1024,
3531 'KB': 1000,
3532 'kB': 1024,
3533 'Kb': 1000,
13585d76 3534 'kb': 1000,
70852b47
YCH
3535 'kilobytes': 1000,
3536 'kibibytes': 1024,
be64b5b0
PH
3537 'MiB': 1024 ** 2,
3538 'MB': 1000 ** 2,
3539 'mB': 1024 ** 2,
3540 'Mb': 1000 ** 2,
13585d76 3541 'mb': 1000 ** 2,
70852b47
YCH
3542 'megabytes': 1000 ** 2,
3543 'mebibytes': 1024 ** 2,
be64b5b0
PH
3544 'GiB': 1024 ** 3,
3545 'GB': 1000 ** 3,
3546 'gB': 1024 ** 3,
3547 'Gb': 1000 ** 3,
13585d76 3548 'gb': 1000 ** 3,
70852b47
YCH
3549 'gigabytes': 1000 ** 3,
3550 'gibibytes': 1024 ** 3,
be64b5b0
PH
3551 'TiB': 1024 ** 4,
3552 'TB': 1000 ** 4,
3553 'tB': 1024 ** 4,
3554 'Tb': 1000 ** 4,
13585d76 3555 'tb': 1000 ** 4,
70852b47
YCH
3556 'terabytes': 1000 ** 4,
3557 'tebibytes': 1024 ** 4,
be64b5b0
PH
3558 'PiB': 1024 ** 5,
3559 'PB': 1000 ** 5,
3560 'pB': 1024 ** 5,
3561 'Pb': 1000 ** 5,
13585d76 3562 'pb': 1000 ** 5,
70852b47
YCH
3563 'petabytes': 1000 ** 5,
3564 'pebibytes': 1024 ** 5,
be64b5b0
PH
3565 'EiB': 1024 ** 6,
3566 'EB': 1000 ** 6,
3567 'eB': 1024 ** 6,
3568 'Eb': 1000 ** 6,
13585d76 3569 'eb': 1000 ** 6,
70852b47
YCH
3570 'exabytes': 1000 ** 6,
3571 'exbibytes': 1024 ** 6,
be64b5b0
PH
3572 'ZiB': 1024 ** 7,
3573 'ZB': 1000 ** 7,
3574 'zB': 1024 ** 7,
3575 'Zb': 1000 ** 7,
13585d76 3576 'zb': 1000 ** 7,
70852b47
YCH
3577 'zettabytes': 1000 ** 7,
3578 'zebibytes': 1024 ** 7,
be64b5b0
PH
3579 'YiB': 1024 ** 8,
3580 'YB': 1000 ** 8,
3581 'yB': 1024 ** 8,
3582 'Yb': 1000 ** 8,
13585d76 3583 'yb': 1000 ** 8,
70852b47
YCH
3584 'yottabytes': 1000 ** 8,
3585 'yobibytes': 1024 ** 8,
be64b5b0
PH
3586 }
3587
fb47597b
S
3588 return lookup_unit_table(_UNIT_TABLE, s)
3589
3590
3591def parse_count(s):
3592 if s is None:
be64b5b0
PH
3593 return None
3594
fb47597b
S
3595 s = s.strip()
3596
3597 if re.match(r'^[\d,.]+$', s):
3598 return str_to_int(s)
3599
3600 _UNIT_TABLE = {
3601 'k': 1000,
3602 'K': 1000,
3603 'm': 1000 ** 2,
3604 'M': 1000 ** 2,
3605 'kk': 1000 ** 2,
3606 'KK': 1000 ** 2,
3607 }
be64b5b0 3608
fb47597b 3609 return lookup_unit_table(_UNIT_TABLE, s)
be64b5b0 3610
2f7ae819 3611
b871d7e9
S
3612def parse_resolution(s):
3613 if s is None:
3614 return {}
3615
3616 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3617 if mobj:
3618 return {
3619 'width': int(mobj.group('w')),
3620 'height': int(mobj.group('h')),
3621 }
3622
3623 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3624 if mobj:
3625 return {'height': int(mobj.group(1))}
3626
3627 mobj = re.search(r'\b([48])[kK]\b', s)
3628 if mobj:
3629 return {'height': int(mobj.group(1)) * 540}
3630
3631 return {}
3632
3633
0dc41787
S
3634def parse_bitrate(s):
3635 if not isinstance(s, compat_str):
3636 return
3637 mobj = re.search(r'\b(\d+)\s*kbps', s)
3638 if mobj:
3639 return int(mobj.group(1))
3640
3641
a942d6cb 3642def month_by_name(name, lang='en'):
caefb1de
PH
3643 """ Return the number of a month by (locale-independently) English name """
3644
f6717dec 3645 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb 3646
caefb1de 3647 try:
f6717dec 3648 return month_names.index(name) + 1
7105440c
YCH
3649 except ValueError:
3650 return None
3651
3652
3653def month_by_abbreviation(abbrev):
3654 """ Return the number of a month by (locale-independently) English
3655 abbreviations """
3656
3657 try:
3658 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
3659 except ValueError:
3660 return None
18258362
JMF
3661
3662
5aafe895 3663def fix_xml_ampersands(xml_str):
18258362 3664 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
3665 return re.sub(
3666 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 3667 '&amp;',
5aafe895 3668 xml_str)
e3946f98
PH
3669
3670
3671def setproctitle(title):
8bf48f23 3672 assert isinstance(title, compat_str)
c1c05c67
YCH
3673
3674 # ctypes in Jython is not complete
3675 # http://bugs.jython.org/issue2148
3676 if sys.platform.startswith('java'):
3677 return
3678
e3946f98 3679 try:
611c1dd9 3680 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
3681 except OSError:
3682 return
2f49bcd6
RC
3683 except TypeError:
3684 # LoadLibrary in Windows Python 2.7.13 only expects
3685 # a bytestring, but since unicode_literals turns
3686 # every string into a unicode string, it fails.
3687 return
6eefe533
PH
3688 title_bytes = title.encode('utf-8')
3689 buf = ctypes.create_string_buffer(len(title_bytes))
3690 buf.value = title_bytes
e3946f98 3691 try:
6eefe533 3692 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
3693 except AttributeError:
3694 return # Strange libc, just skip this
d7dda168
PH
3695
3696
3697def remove_start(s, start):
46bc9b7d 3698 return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174
PH
3699
3700
2b9faf55 3701def remove_end(s, end):
46bc9b7d 3702 return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55
PH
3703
3704
31b2051e
S
3705def remove_quotes(s):
3706 if s is None or len(s) < 2:
3707 return s
3708 for quote in ('"', "'", ):
3709 if s[0] == quote and s[-1] == quote:
3710 return s[1:-1]
3711 return s
3712
3713
b6e0c7d2
U
3714def get_domain(url):
3715 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3716 return domain.group('domain') if domain else None
3717
3718
29eb5174 3719def url_basename(url):
9b8aaeed 3720 path = compat_urlparse.urlparse(url).path
28e614de 3721 return path.strip('/').split('/')[-1]
aa94a6d3
PH
3722
3723
02dc0a36
S
3724def base_url(url):
3725 return re.match(r'https?://[^?#&]+/', url).group()
3726
3727
e34c3361 3728def urljoin(base, path):
4b5de77b
S
3729 if isinstance(path, bytes):
3730 path = path.decode('utf-8')
e34c3361
S
3731 if not isinstance(path, compat_str) or not path:
3732 return None
fad4ceb5 3733 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
e34c3361 3734 return path
4b5de77b
S
3735 if isinstance(base, bytes):
3736 base = base.decode('utf-8')
3737 if not isinstance(base, compat_str) or not re.match(
3738 r'^(?:https?:)?//', base):
e34c3361
S
3739 return None
3740 return compat_urlparse.urljoin(base, path)
3741
3742
aa94a6d3
PH
3743class HEADRequest(compat_urllib_request.Request):
3744 def get_method(self):
611c1dd9 3745 return 'HEAD'
7217e148
PH
3746
3747
95cf60e8
S
3748class PUTRequest(compat_urllib_request.Request):
3749 def get_method(self):
3750 return 'PUT'
3751
3752
9732d77e 3753def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
3754 if get_attr:
3755 if v is not None:
3756 v = getattr(v, get_attr, None)
9572013d
PH
3757 if v == '':
3758 v = None
1812afb7
S
3759 if v is None:
3760 return default
3761 try:
3762 return int(v) * invscale // scale
5e1271c5 3763 except (ValueError, TypeError):
af98f8ff 3764 return default
9732d77e 3765
9572013d 3766
40a90862
JMF
3767def str_or_none(v, default=None):
3768 return default if v is None else compat_str(v)
3769
9732d77e
PH
3770
3771def str_to_int(int_str):
48d4681e 3772 """ A more relaxed version of int_or_none """
42db58ec 3773 if isinstance(int_str, compat_integer_types):
348c6bf1 3774 return int_str
42db58ec
S
3775 elif isinstance(int_str, compat_str):
3776 int_str = re.sub(r'[,\.\+]', '', int_str)
3777 return int_or_none(int_str)
608d11f5
PH
3778
3779
9732d77e 3780def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
3781 if v is None:
3782 return default
3783 try:
3784 return float(v) * invscale / scale
5e1271c5 3785 except (ValueError, TypeError):
caf80631 3786 return default
43f775e4
PH
3787
3788
c7e327c4
S
3789def bool_or_none(v, default=None):
3790 return v if isinstance(v, bool) else default
3791
3792
53cd37ba
S
3793def strip_or_none(v, default=None):
3794 return v.strip() if isinstance(v, compat_str) else default
b72b4431
S
3795
3796
af03000a
S
3797def url_or_none(url):
3798 if not url or not isinstance(url, compat_str):
3799 return None
3800 url = url.strip()
29f7c58a 3801 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
af03000a
S
3802
3803
e29663c6 3804def strftime_or_none(timestamp, date_format, default=None):
3805 datetime_object = None
3806 try:
3807 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3808 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3809 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3810 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3811 return datetime_object.strftime(date_format)
3812 except (ValueError, TypeError, AttributeError):
3813 return default
3814
3815
608d11f5 3816def parse_duration(s):
8f9312c3 3817 if not isinstance(s, compat_basestring):
608d11f5
PH
3818 return None
3819
ca7b3246
S
3820 s = s.strip()
3821
acaff495 3822 days, hours, mins, secs, ms = [None] * 5
15846398 3823 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
acaff495 3824 if m:
3825 days, hours, mins, secs, ms = m.groups()
3826 else:
3827 m = re.match(
056653bb
S
3828 r'''(?ix)(?:P?
3829 (?:
3830 [0-9]+\s*y(?:ears?)?\s*
3831 )?
3832 (?:
3833 [0-9]+\s*m(?:onths?)?\s*
3834 )?
3835 (?:
3836 [0-9]+\s*w(?:eeks?)?\s*
3837 )?
8f4b58d7 3838 (?:
acaff495 3839 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
8f4b58d7 3840 )?
056653bb 3841 T)?
acaff495 3842 (?:
3843 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3844 )?
3845 (?:
3846 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3847 )?
3848 (?:
3849 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
15846398 3850 )?Z?$''', s)
acaff495 3851 if m:
3852 days, hours, mins, secs, ms = m.groups()
3853 else:
15846398 3854 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
acaff495 3855 if m:
3856 hours, mins = m.groups()
3857 else:
3858 return None
3859
3860 duration = 0
3861 if secs:
3862 duration += float(secs)
3863 if mins:
3864 duration += float(mins) * 60
3865 if hours:
3866 duration += float(hours) * 60 * 60
3867 if days:
3868 duration += float(days) * 24 * 60 * 60
3869 if ms:
3870 duration += float(ms)
3871 return duration
91d7d0b3
JMF
3872
3873
e65e4c88 3874def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 3875 name, real_ext = os.path.splitext(filename)
e65e4c88
S
3876 return (
3877 '{0}.{1}{2}'.format(name, ext, real_ext)
3878 if not expected_real_ext or real_ext[1:] == expected_real_ext
3879 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
3880
3881
b3ed15b7
S
3882def replace_extension(filename, ext, expected_real_ext=None):
3883 name, real_ext = os.path.splitext(filename)
3884 return '{0}.{1}'.format(
3885 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3886 ext)
3887
3888
d70ad093
PH
3889def check_executable(exe, args=[]):
3890 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3891 args can be a list of arguments for a short output (like -version) """
3892 try:
f5b1bca9 3893 process_communicate_or_kill(subprocess.Popen(
3894 [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
d70ad093
PH
3895 except OSError:
3896 return False
3897 return exe
b7ab0590
PH
3898
3899
95807118 3900def get_exe_version(exe, args=['--version'],
cae97f65 3901 version_re=None, unrecognized='present'):
95807118
PH
3902 """ Returns the version of the specified executable,
3903 or False if the executable is not present """
3904 try:
b64d04c1 3905 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
7a5c1cfe 3906 # SIGTTOU if yt-dlp is run in the background.
067aa17e 3907 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
f5b1bca9 3908 out, _ = process_communicate_or_kill(subprocess.Popen(
54116803 3909 [encodeArgument(exe)] + args,
00ca7552 3910 stdin=subprocess.PIPE,
f5b1bca9 3911 stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
95807118
PH
3912 except OSError:
3913 return False
cae97f65
PH
3914 if isinstance(out, bytes): # Python 2.x
3915 out = out.decode('ascii', 'ignore')
3916 return detect_exe_version(out, version_re, unrecognized)
3917
3918
3919def detect_exe_version(output, version_re=None, unrecognized='present'):
3920 assert isinstance(output, compat_str)
3921 if version_re is None:
3922 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3923 m = re.search(version_re, output)
95807118
PH
3924 if m:
3925 return m.group(1)
3926 else:
3927 return unrecognized
3928
3929
b7ab0590 3930class PagedList(object):
dd26ced1
PH
3931 def __len__(self):
3932 # This is only useful for tests
3933 return len(self.getslice())
3934
9c44d242
PH
3935
3936class OnDemandPagedList(PagedList):
6be08ce6 3937 def __init__(self, pagefunc, pagesize, use_cache=True):
9c44d242
PH
3938 self._pagefunc = pagefunc
3939 self._pagesize = pagesize
b95dc034
YCH
3940 self._use_cache = use_cache
3941 if use_cache:
3942 self._cache = {}
9c44d242 3943
b7ab0590
PH
3944 def getslice(self, start=0, end=None):
3945 res = []
3946 for pagenum in itertools.count(start // self._pagesize):
3947 firstid = pagenum * self._pagesize
3948 nextfirstid = pagenum * self._pagesize + self._pagesize
3949 if start >= nextfirstid:
3950 continue
3951
b95dc034
YCH
3952 page_results = None
3953 if self._use_cache:
3954 page_results = self._cache.get(pagenum)
3955 if page_results is None:
3956 page_results = list(self._pagefunc(pagenum))
3957 if self._use_cache:
3958 self._cache[pagenum] = page_results
b7ab0590
PH
3959
3960 startv = (
3961 start % self._pagesize
3962 if firstid <= start < nextfirstid
3963 else 0)
3964
3965 endv = (
3966 ((end - 1) % self._pagesize) + 1
3967 if (end is not None and firstid <= end <= nextfirstid)
3968 else None)
3969
3970 if startv != 0 or endv is not None:
3971 page_results = page_results[startv:endv]
3972 res.extend(page_results)
3973
3974 # A little optimization - if current page is not "full", ie. does
3975 # not contain page_size videos then we can assume that this page
3976 # is the last one - there are no more ids on further pages -
3977 # i.e. no need to query again.
3978 if len(page_results) + startv < self._pagesize:
3979 break
3980
3981 # If we got the whole page, but the next page is not interesting,
3982 # break out early as well
3983 if end == nextfirstid:
3984 break
3985 return res
81c2f20b
PH
3986
3987
9c44d242
PH
3988class InAdvancePagedList(PagedList):
3989 def __init__(self, pagefunc, pagecount, pagesize):
3990 self._pagefunc = pagefunc
3991 self._pagecount = pagecount
3992 self._pagesize = pagesize
3993
3994 def getslice(self, start=0, end=None):
3995 res = []
3996 start_page = start // self._pagesize
3997 end_page = (
3998 self._pagecount if end is None else (end // self._pagesize + 1))
3999 skip_elems = start - start_page * self._pagesize
4000 only_more = None if end is None else end - start
4001 for pagenum in range(start_page, end_page):
4002 page = list(self._pagefunc(pagenum))
4003 if skip_elems:
4004 page = page[skip_elems:]
4005 skip_elems = None
4006 if only_more is not None:
4007 if len(page) < only_more:
4008 only_more -= len(page)
4009 else:
4010 page = page[:only_more]
4011 res.extend(page)
4012 break
4013 res.extend(page)
4014 return res
4015
4016
81c2f20b 4017def uppercase_escape(s):
676eb3f2 4018 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 4019 return re.sub(
a612753d 4020 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
4021 lambda m: unicode_escape(m.group(0))[0],
4022 s)
0fe2ff78
YCH
4023
4024
4025def lowercase_escape(s):
4026 unicode_escape = codecs.getdecoder('unicode_escape')
4027 return re.sub(
4028 r'\\u[0-9a-fA-F]{4}',
4029 lambda m: unicode_escape(m.group(0))[0],
4030 s)
b53466e1 4031
d05cfe06
S
4032
4033def escape_rfc3986(s):
4034 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 4035 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 4036 s = s.encode('utf-8')
ecc0c5ee 4037 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
4038
4039
4040def escape_url(url):
4041 """Escape URL as suggested by RFC 3986"""
4042 url_parsed = compat_urllib_parse_urlparse(url)
4043 return url_parsed._replace(
efbed08d 4044 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
d05cfe06
S
4045 path=escape_rfc3986(url_parsed.path),
4046 params=escape_rfc3986(url_parsed.params),
4047 query=escape_rfc3986(url_parsed.query),
4048 fragment=escape_rfc3986(url_parsed.fragment)
4049 ).geturl()
4050
62e609ab
PH
4051
4052def read_batch_urls(batch_fd):
4053 def fixup(url):
4054 if not isinstance(url, compat_str):
4055 url = url.decode('utf-8', 'replace')
8c04f0be 4056 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4057 for bom in BOM_UTF8:
4058 if url.startswith(bom):
4059 url = url[len(bom):]
4060 url = url.lstrip()
4061 if not url or url.startswith(('#', ';', ']')):
62e609ab 4062 return False
8c04f0be 4063 # "#" cannot be stripped out since it is part of the URI
4064 # However, it can be safely stipped out if follwing a whitespace
4065 return re.split(r'\s#', url, 1)[0].rstrip()
62e609ab
PH
4066
4067 with contextlib.closing(batch_fd) as fd:
4068 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
4069
4070
4071def urlencode_postdata(*args, **kargs):
15707c7e 4072 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
4073
4074
38f9ef31 4075def update_url_query(url, query):
cacd9966
YCH
4076 if not query:
4077 return url
38f9ef31 4078 parsed_url = compat_urlparse.urlparse(url)
4079 qs = compat_parse_qs(parsed_url.query)
4080 qs.update(query)
4081 return compat_urlparse.urlunparse(parsed_url._replace(
15707c7e 4082 query=compat_urllib_parse_urlencode(qs, True)))
16392824 4083
8e60dc75 4084
ed0291d1
S
4085def update_Request(req, url=None, data=None, headers={}, query={}):
4086 req_headers = req.headers.copy()
4087 req_headers.update(headers)
4088 req_data = data or req.data
4089 req_url = update_url_query(url or req.get_full_url(), query)
95cf60e8
S
4090 req_get_method = req.get_method()
4091 if req_get_method == 'HEAD':
4092 req_type = HEADRequest
4093 elif req_get_method == 'PUT':
4094 req_type = PUTRequest
4095 else:
4096 req_type = compat_urllib_request.Request
ed0291d1
S
4097 new_req = req_type(
4098 req_url, data=req_data, headers=req_headers,
4099 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4100 if hasattr(req, 'timeout'):
4101 new_req.timeout = req.timeout
4102 return new_req
4103
4104
10c87c15 4105def _multipart_encode_impl(data, boundary):
0c265486
YCH
4106 content_type = 'multipart/form-data; boundary=%s' % boundary
4107
4108 out = b''
4109 for k, v in data.items():
4110 out += b'--' + boundary.encode('ascii') + b'\r\n'
4111 if isinstance(k, compat_str):
4112 k = k.encode('utf-8')
4113 if isinstance(v, compat_str):
4114 v = v.encode('utf-8')
4115 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4116 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
b2ad479d 4117 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
0c265486
YCH
4118 if boundary.encode('ascii') in content:
4119 raise ValueError('Boundary overlaps with data')
4120 out += content
4121
4122 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4123
4124 return out, content_type
4125
4126
4127def multipart_encode(data, boundary=None):
4128 '''
4129 Encode a dict to RFC 7578-compliant form-data
4130
4131 data:
4132 A dict where keys and values can be either Unicode or bytes-like
4133 objects.
4134 boundary:
4135 If specified a Unicode object, it's used as the boundary. Otherwise
4136 a random boundary is generated.
4137
4138 Reference: https://tools.ietf.org/html/rfc7578
4139 '''
4140 has_specified_boundary = boundary is not None
4141
4142 while True:
4143 if boundary is None:
4144 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4145
4146 try:
10c87c15 4147 out, content_type = _multipart_encode_impl(data, boundary)
0c265486
YCH
4148 break
4149 except ValueError:
4150 if has_specified_boundary:
4151 raise
4152 boundary = None
4153
4154 return out, content_type
4155
4156
86296ad2 4157def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
4158 if isinstance(key_or_keys, (list, tuple)):
4159 for key in key_or_keys:
86296ad2
S
4160 if key not in d or d[key] is None or skip_false_values and not d[key]:
4161 continue
4162 return d[key]
cbecc9b9
S
4163 return default
4164 return d.get(key_or_keys, default)
4165
4166
329ca3be 4167def try_get(src, getter, expected_type=None):
a32a9a7e
S
4168 if not isinstance(getter, (list, tuple)):
4169 getter = [getter]
4170 for get in getter:
4171 try:
4172 v = get(src)
4173 except (AttributeError, KeyError, TypeError, IndexError):
4174 pass
4175 else:
4176 if expected_type is None or isinstance(v, expected_type):
4177 return v
329ca3be
S
4178
4179
6cc62232
S
4180def merge_dicts(*dicts):
4181 merged = {}
4182 for a_dict in dicts:
4183 for k, v in a_dict.items():
4184 if v is None:
4185 continue
3089bc74
S
4186 if (k not in merged
4187 or (isinstance(v, compat_str) and v
4188 and isinstance(merged[k], compat_str)
4189 and not merged[k])):
6cc62232
S
4190 merged[k] = v
4191 return merged
4192
4193
8e60dc75
S
4194def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4195 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4196
16392824 4197
a1a530b0
PH
4198US_RATINGS = {
4199 'G': 0,
4200 'PG': 10,
4201 'PG-13': 13,
4202 'R': 16,
4203 'NC': 18,
4204}
fac55558
PH
4205
4206
a8795327 4207TV_PARENTAL_GUIDELINES = {
5a16c9d9
RA
4208 'TV-Y': 0,
4209 'TV-Y7': 7,
4210 'TV-G': 0,
4211 'TV-PG': 0,
4212 'TV-14': 14,
4213 'TV-MA': 17,
a8795327
S
4214}
4215
4216
146c80e2 4217def parse_age_limit(s):
a8795327
S
4218 if type(s) == int:
4219 return s if 0 <= s <= 21 else None
4220 if not isinstance(s, compat_basestring):
d838b1bd 4221 return None
146c80e2 4222 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327
S
4223 if m:
4224 return int(m.group('age'))
5c5fae6d 4225 s = s.upper()
a8795327
S
4226 if s in US_RATINGS:
4227 return US_RATINGS[s]
5a16c9d9 4228 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
b8361187 4229 if m:
5a16c9d9 4230 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
b8361187 4231 return None
146c80e2
S
4232
4233
fac55558 4234def strip_jsonp(code):
609a61e3 4235 return re.sub(
5552c9eb 4236 r'''(?sx)^
e9c671d5 4237 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
5552c9eb
YCH
4238 (?:\s*&&\s*(?P=func_name))?
4239 \s*\(\s*(?P<callback_data>.*)\);?
4240 \s*?(?://[^\n]*)*$''',
4241 r'\g<callback_data>', code)
478c2c61
PH
4242
4243
5c610515 4244def js_to_json(code, vars={}):
4245 # vars is a dict of var, val pairs to substitute
4195096e
S
4246 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4247 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4248 INTEGER_TABLE = (
4249 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4250 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4251 )
4252
e05f6939 4253 def fix_kv(m):
e7b6d122
PH
4254 v = m.group(0)
4255 if v in ('true', 'false', 'null'):
4256 return v
8bdd16b4 4257 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
bd1e4844 4258 return ""
4259
4260 if v[0] in ("'", '"'):
4261 v = re.sub(r'(?s)\\.|"', lambda m: {
e7b6d122 4262 '"': '\\"',
bd1e4844 4263 "\\'": "'",
4264 '\\\n': '',
4265 '\\x': '\\u00',
4266 }.get(m.group(0), m.group(0)), v[1:-1])
8bdd16b4 4267 else:
4268 for regex, base in INTEGER_TABLE:
4269 im = re.match(regex, v)
4270 if im:
4271 i = int(im.group(1), base)
4272 return '"%d":' % i if v.endswith(':') else '%d' % i
89ac4a19 4273
5c610515 4274 if v in vars:
4275 return vars[v]
4276
e7b6d122 4277 return '"%s"' % v
e05f6939 4278
bd1e4844 4279 return re.sub(r'''(?sx)
4280 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4281 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4195096e 4282 {comment}|,(?={skip}[\]}}])|
c384d537 4283 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4195096e 4284 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
8bdd16b4 4285 [0-9]+(?={skip}:)|
4286 !+
4195096e 4287 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
e05f6939
PH
4288
4289
478c2c61
PH
4290def qualities(quality_ids):
4291 """ Get a numeric quality value out of a list of possible values """
4292 def q(qid):
4293 try:
4294 return quality_ids.index(qid)
4295 except ValueError:
4296 return -1
4297 return q
4298
acd69589 4299
de6000d9 4300DEFAULT_OUTTMPL = {
4301 'default': '%(title)s [%(id)s].%(ext)s',
72755351 4302 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
de6000d9 4303}
4304OUTTMPL_TYPES = {
72755351 4305 'chapter': None,
de6000d9 4306 'subtitle': None,
4307 'thumbnail': None,
4308 'description': 'description',
4309 'annotation': 'annotations.xml',
4310 'infojson': 'info.json',
4311 'pl_description': 'description',
4312 'pl_infojson': 'info.json',
4313}
0a871f68 4314
143db31d 4315# As of [1] format syntax is:
4316# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4317# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
4318FORMAT_RE = r'''(?x)
4319 (?<!%)
4320 %
4321 \({0}\) # mapping key
4322 (?:[#0\-+ ]+)? # conversion flags (optional)
4323 (?:\d+)? # minimum field width (optional)
4324 (?:\.\d+)? # precision (optional)
4325 [hlL]? # length modifier (optional)
4326 (?P<type>[diouxXeEfFgGcrs%]) # conversion type
4327'''
4328
a020a0dc
PH
4329
4330def limit_length(s, length):
4331 """ Add ellipses to overly long strings """
4332 if s is None:
4333 return None
4334 ELLIPSES = '...'
4335 if len(s) > length:
4336 return s[:length - len(ELLIPSES)] + ELLIPSES
4337 return s
48844745
PH
4338
4339
4340def version_tuple(v):
5f9b8394 4341 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
4342
4343
4344def is_outdated_version(version, limit, assume_new=True):
4345 if not version:
4346 return not assume_new
4347 try:
4348 return version_tuple(version) < version_tuple(limit)
4349 except ValueError:
4350 return not assume_new
732ea2f0
PH
4351
4352
4353def ytdl_is_updateable():
7a5c1cfe 4354 """ Returns if yt-dlp can be updated with -U """
735d865e 4355 return False
4356
732ea2f0
PH
4357 from zipimport import zipimporter
4358
4359 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
4360
4361
4362def args_to_str(args):
4363 # Get a short string representation for a subprocess command
702ccf2d 4364 return ' '.join(compat_shlex_quote(a) for a in args)
2ccd1b10
PH
4365
4366
9b9c5355 4367def error_to_compat_str(err):
fdae2358
S
4368 err_str = str(err)
4369 # On python 2 error byte string must be decoded with proper
4370 # encoding rather than ascii
4371 if sys.version_info[0] < 3:
4372 err_str = err_str.decode(preferredencoding())
4373 return err_str
4374
4375
c460bdd5 4376def mimetype2ext(mt):
eb9ee194
S
4377 if mt is None:
4378 return None
4379
765ac263
JMF
4380 ext = {
4381 'audio/mp4': 'm4a',
6c33d24b
YCH
4382 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4383 # it's the most popular one
4384 'audio/mpeg': 'mp3',
ba39289d 4385 'audio/x-wav': 'wav',
765ac263
JMF
4386 }.get(mt)
4387 if ext is not None:
4388 return ext
4389
c460bdd5 4390 _, _, res = mt.rpartition('/')
6562d34a 4391 res = res.split(';')[0].strip().lower()
c460bdd5
PH
4392
4393 return {
f6861ec9 4394 '3gpp': '3gp',
cafcf657 4395 'smptett+xml': 'tt',
cafcf657 4396 'ttaf+xml': 'dfxp',
a0d8d704 4397 'ttml+xml': 'ttml',
f6861ec9 4398 'x-flv': 'flv',
a0d8d704 4399 'x-mp4-fragmented': 'mp4',
d4f05d47 4400 'x-ms-sami': 'sami',
a0d8d704 4401 'x-ms-wmv': 'wmv',
b4173f15
RA
4402 'mpegurl': 'm3u8',
4403 'x-mpegurl': 'm3u8',
4404 'vnd.apple.mpegurl': 'm3u8',
4405 'dash+xml': 'mpd',
b4173f15 4406 'f4m+xml': 'f4m',
f164b971 4407 'hds+xml': 'f4m',
e910fe2f 4408 'vnd.ms-sstr+xml': 'ism',
c2b2c7e1 4409 'quicktime': 'mov',
98ce1a3f 4410 'mp2t': 'ts',
39e7107d 4411 'x-wav': 'wav',
c460bdd5
PH
4412 }.get(res, res)
4413
4414
4f3c5e06 4415def parse_codecs(codecs_str):
4416 # http://tools.ietf.org/html/rfc6381
4417 if not codecs_str:
4418 return {}
a0566bbf 4419 split_codecs = list(filter(None, map(
4f3c5e06 4420 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4421 vcodec, acodec = None, None
a0566bbf 4422 for full_codec in split_codecs:
4f3c5e06 4423 codec = full_codec.split('.')[0]
28cc2241 4424 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4f3c5e06 4425 if not vcodec:
4426 vcodec = full_codec
60f5c9fb 4427 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4f3c5e06 4428 if not acodec:
4429 acodec = full_codec
4430 else:
60f5c9fb 4431 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4f3c5e06 4432 if not vcodec and not acodec:
a0566bbf 4433 if len(split_codecs) == 2:
4f3c5e06 4434 return {
a0566bbf 4435 'vcodec': split_codecs[0],
4436 'acodec': split_codecs[1],
4f3c5e06 4437 }
4438 else:
4439 return {
4440 'vcodec': vcodec or 'none',
4441 'acodec': acodec or 'none',
4442 }
4443 return {}
4444
4445
2ccd1b10 4446def urlhandle_detect_ext(url_handle):
79298173 4447 getheader = url_handle.headers.get
2ccd1b10 4448
b55ee18f
PH
4449 cd = getheader('Content-Disposition')
4450 if cd:
4451 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4452 if m:
4453 e = determine_ext(m.group('filename'), default_ext=None)
4454 if e:
4455 return e
4456
c460bdd5 4457 return mimetype2ext(getheader('Content-Type'))
05900629
PH
4458
4459
1e399778
YCH
4460def encode_data_uri(data, mime_type):
4461 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4462
4463
05900629 4464def age_restricted(content_limit, age_limit):
6ec6cb4e 4465 """ Returns True iff the content should be blocked """
05900629
PH
4466
4467 if age_limit is None: # No limit set
4468 return False
4469 if content_limit is None:
4470 return False # Content available for everyone
4471 return age_limit < content_limit
61ca9a80
PH
4472
4473
4474def is_html(first_bytes):
4475 """ Detect whether a file contains HTML by examining its first bytes. """
4476
4477 BOMS = [
4478 (b'\xef\xbb\xbf', 'utf-8'),
4479 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4480 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4481 (b'\xff\xfe', 'utf-16-le'),
4482 (b'\xfe\xff', 'utf-16-be'),
4483 ]
4484 for bom, enc in BOMS:
4485 if first_bytes.startswith(bom):
4486 s = first_bytes[len(bom):].decode(enc, 'replace')
4487 break
4488 else:
4489 s = first_bytes.decode('utf-8', 'replace')
4490
4491 return re.match(r'^\s*<', s)
a055469f
PH
4492
4493
4494def determine_protocol(info_dict):
4495 protocol = info_dict.get('protocol')
4496 if protocol is not None:
4497 return protocol
4498
4499 url = info_dict['url']
4500 if url.startswith('rtmp'):
4501 return 'rtmp'
4502 elif url.startswith('mms'):
4503 return 'mms'
4504 elif url.startswith('rtsp'):
4505 return 'rtsp'
4506
4507 ext = determine_ext(url)
4508 if ext == 'm3u8':
4509 return 'm3u8'
4510 elif ext == 'f4m':
4511 return 'f4m'
4512
4513 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
4514
4515
76d321f6 4516def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
cfb56d1a 4517 """ Render a list of rows, each as a list of values """
76d321f6 4518
4519 def get_max_lens(table):
4520 return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4521
4522 def filter_using_list(row, filterArray):
4523 return [col for (take, col) in zip(filterArray, row) if take]
4524
4525 if hideEmpty:
4526 max_lens = get_max_lens(data)
4527 header_row = filter_using_list(header_row, max_lens)
4528 data = [filter_using_list(row, max_lens) for row in data]
4529
cfb56d1a 4530 table = [header_row] + data
76d321f6 4531 max_lens = get_max_lens(table)
4532 if delim:
4533 table = [header_row] + [['-' * ml for ml in max_lens]] + data
4534 format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
cfb56d1a 4535 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
4536
4537
4538def _match_one(filter_part, dct):
4539 COMPARISON_OPERATORS = {
4540 '<': operator.lt,
4541 '<=': operator.le,
4542 '>': operator.gt,
4543 '>=': operator.ge,
4544 '=': operator.eq,
4545 '!=': operator.ne,
4546 }
4547 operator_rex = re.compile(r'''(?x)\s*
4548 (?P<key>[a-z_]+)
4549 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4550 (?:
4551 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
db13c16e 4552 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
347de493
PH
4553 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4554 )
4555 \s*$
4556 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4557 m = operator_rex.search(filter_part)
4558 if m:
4559 op = COMPARISON_OPERATORS[m.group('op')]
e5a088dc 4560 actual_value = dct.get(m.group('key'))
3089bc74
S
4561 if (m.group('quotedstrval') is not None
4562 or m.group('strval') is not None
e5a088dc
S
4563 # If the original field is a string and matching comparisonvalue is
4564 # a number we should respect the origin of the original field
4565 # and process comparison value as a string (see
067aa17e 4566 # https://github.com/ytdl-org/youtube-dl/issues/11082).
3089bc74
S
4567 or actual_value is not None and m.group('intval') is not None
4568 and isinstance(actual_value, compat_str)):
347de493
PH
4569 if m.group('op') not in ('=', '!='):
4570 raise ValueError(
4571 'Operator %s does not support string values!' % m.group('op'))
db13c16e
S
4572 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4573 quote = m.group('quote')
4574 if quote is not None:
4575 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
347de493
PH
4576 else:
4577 try:
4578 comparison_value = int(m.group('intval'))
4579 except ValueError:
4580 comparison_value = parse_filesize(m.group('intval'))
4581 if comparison_value is None:
4582 comparison_value = parse_filesize(m.group('intval') + 'B')
4583 if comparison_value is None:
4584 raise ValueError(
4585 'Invalid integer value %r in filter part %r' % (
4586 m.group('intval'), filter_part))
347de493
PH
4587 if actual_value is None:
4588 return m.group('none_inclusive')
4589 return op(actual_value, comparison_value)
4590
4591 UNARY_OPERATORS = {
1cc47c66
S
4592 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4593 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
347de493
PH
4594 }
4595 operator_rex = re.compile(r'''(?x)\s*
4596 (?P<op>%s)\s*(?P<key>[a-z_]+)
4597 \s*$
4598 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4599 m = operator_rex.search(filter_part)
4600 if m:
4601 op = UNARY_OPERATORS[m.group('op')]
4602 actual_value = dct.get(m.group('key'))
4603 return op(actual_value)
4604
4605 raise ValueError('Invalid filter part %r' % filter_part)
4606
4607
4608def match_str(filter_str, dct):
4609 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4610
4611 return all(
4612 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4613
4614
4615def match_filter_func(filter_str):
4616 def _match_func(info_dict):
4617 if match_str(filter_str, info_dict):
4618 return None
4619 else:
4620 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4621 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4622 return _match_func
91410c9b
PH
4623
4624
bf6427d2
YCH
4625def parse_dfxp_time_expr(time_expr):
4626 if not time_expr:
d631d5f9 4627 return
bf6427d2
YCH
4628
4629 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4630 if mobj:
4631 return float(mobj.group('time_offset'))
4632
db2fe38b 4633 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 4634 if mobj:
db2fe38b 4635 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
4636
4637
c1c924ab
YCH
4638def srt_subtitles_timecode(seconds):
4639 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
4640
4641
4642def dfxp2srt(dfxp_data):
3869028f
YCH
4643 '''
4644 @param dfxp_data A bytes-like object containing DFXP data
4645 @returns A unicode object containing converted SRT data
4646 '''
5b995f71 4647 LEGACY_NAMESPACES = (
3869028f
YCH
4648 (b'http://www.w3.org/ns/ttml', [
4649 b'http://www.w3.org/2004/11/ttaf1',
4650 b'http://www.w3.org/2006/04/ttaf1',
4651 b'http://www.w3.org/2006/10/ttaf1',
5b995f71 4652 ]),
3869028f
YCH
4653 (b'http://www.w3.org/ns/ttml#styling', [
4654 b'http://www.w3.org/ns/ttml#style',
5b995f71
RA
4655 ]),
4656 )
4657
4658 SUPPORTED_STYLING = [
4659 'color',
4660 'fontFamily',
4661 'fontSize',
4662 'fontStyle',
4663 'fontWeight',
4664 'textDecoration'
4665 ]
4666
4e335771 4667 _x = functools.partial(xpath_with_ns, ns_map={
261f4730 4668 'xml': 'http://www.w3.org/XML/1998/namespace',
4e335771 4669 'ttml': 'http://www.w3.org/ns/ttml',
5b995f71 4670 'tts': 'http://www.w3.org/ns/ttml#styling',
4e335771 4671 })
bf6427d2 4672
5b995f71
RA
4673 styles = {}
4674 default_style = {}
4675
87de7069 4676 class TTMLPElementParser(object):
5b995f71
RA
4677 _out = ''
4678 _unclosed_elements = []
4679 _applied_styles = []
bf6427d2 4680
2b14cb56 4681 def start(self, tag, attrib):
5b995f71
RA
4682 if tag in (_x('ttml:br'), 'br'):
4683 self._out += '\n'
4684 else:
4685 unclosed_elements = []
4686 style = {}
4687 element_style_id = attrib.get('style')
4688 if default_style:
4689 style.update(default_style)
4690 if element_style_id:
4691 style.update(styles.get(element_style_id, {}))
4692 for prop in SUPPORTED_STYLING:
4693 prop_val = attrib.get(_x('tts:' + prop))
4694 if prop_val:
4695 style[prop] = prop_val
4696 if style:
4697 font = ''
4698 for k, v in sorted(style.items()):
4699 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4700 continue
4701 if k == 'color':
4702 font += ' color="%s"' % v
4703 elif k == 'fontSize':
4704 font += ' size="%s"' % v
4705 elif k == 'fontFamily':
4706 font += ' face="%s"' % v
4707 elif k == 'fontWeight' and v == 'bold':
4708 self._out += '<b>'
4709 unclosed_elements.append('b')
4710 elif k == 'fontStyle' and v == 'italic':
4711 self._out += '<i>'
4712 unclosed_elements.append('i')
4713 elif k == 'textDecoration' and v == 'underline':
4714 self._out += '<u>'
4715 unclosed_elements.append('u')
4716 if font:
4717 self._out += '<font' + font + '>'
4718 unclosed_elements.append('font')
4719 applied_style = {}
4720 if self._applied_styles:
4721 applied_style.update(self._applied_styles[-1])
4722 applied_style.update(style)
4723 self._applied_styles.append(applied_style)
4724 self._unclosed_elements.append(unclosed_elements)
bf6427d2 4725
2b14cb56 4726 def end(self, tag):
5b995f71
RA
4727 if tag not in (_x('ttml:br'), 'br'):
4728 unclosed_elements = self._unclosed_elements.pop()
4729 for element in reversed(unclosed_elements):
4730 self._out += '</%s>' % element
4731 if unclosed_elements and self._applied_styles:
4732 self._applied_styles.pop()
bf6427d2 4733
2b14cb56 4734 def data(self, data):
5b995f71 4735 self._out += data
2b14cb56 4736
4737 def close(self):
5b995f71 4738 return self._out.strip()
2b14cb56 4739
4740 def parse_node(node):
4741 target = TTMLPElementParser()
4742 parser = xml.etree.ElementTree.XMLParser(target=target)
4743 parser.feed(xml.etree.ElementTree.tostring(node))
4744 return parser.close()
bf6427d2 4745
5b995f71
RA
4746 for k, v in LEGACY_NAMESPACES:
4747 for ns in v:
4748 dfxp_data = dfxp_data.replace(ns, k)
4749
3869028f 4750 dfxp = compat_etree_fromstring(dfxp_data)
bf6427d2 4751 out = []
5b995f71 4752 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
1b0427e6
YCH
4753
4754 if not paras:
4755 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2 4756
5b995f71
RA
4757 repeat = False
4758 while True:
4759 for style in dfxp.findall(_x('.//ttml:style')):
261f4730
RA
4760 style_id = style.get('id') or style.get(_x('xml:id'))
4761 if not style_id:
4762 continue
5b995f71
RA
4763 parent_style_id = style.get('style')
4764 if parent_style_id:
4765 if parent_style_id not in styles:
4766 repeat = True
4767 continue
4768 styles[style_id] = styles[parent_style_id].copy()
4769 for prop in SUPPORTED_STYLING:
4770 prop_val = style.get(_x('tts:' + prop))
4771 if prop_val:
4772 styles.setdefault(style_id, {})[prop] = prop_val
4773 if repeat:
4774 repeat = False
4775 else:
4776 break
4777
4778 for p in ('body', 'div'):
4779 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4780 if ele is None:
4781 continue
4782 style = styles.get(ele.get('style'))
4783 if not style:
4784 continue
4785 default_style.update(style)
4786
bf6427d2 4787 for para, index in zip(paras, itertools.count(1)):
d631d5f9 4788 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 4789 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
4790 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4791 if begin_time is None:
4792 continue
7dff0363 4793 if not end_time:
d631d5f9
YCH
4794 if not dur:
4795 continue
4796 end_time = begin_time + dur
bf6427d2
YCH
4797 out.append('%d\n%s --> %s\n%s\n\n' % (
4798 index,
c1c924ab
YCH
4799 srt_subtitles_timecode(begin_time),
4800 srt_subtitles_timecode(end_time),
bf6427d2
YCH
4801 parse_node(para)))
4802
4803 return ''.join(out)
4804
4805
66e289ba
S
4806def cli_option(params, command_option, param):
4807 param = params.get(param)
98e698f1
RA
4808 if param:
4809 param = compat_str(param)
66e289ba
S
4810 return [command_option, param] if param is not None else []
4811
4812
4813def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4814 param = params.get(param)
5b232f46
S
4815 if param is None:
4816 return []
66e289ba
S
4817 assert isinstance(param, bool)
4818 if separator:
4819 return [command_option + separator + (true_value if param else false_value)]
4820 return [command_option, true_value if param else false_value]
4821
4822
4823def cli_valueless_option(params, command_option, param, expected_value=True):
4824 param = params.get(param)
4825 return [command_option] if param == expected_value else []
4826
4827
e92caff5 4828def cli_configuration_args(argdict, keys, default=[], use_compat=True):
eab9b2bc 4829 if isinstance(argdict, (list, tuple)): # for backward compatibility
e92caff5 4830 if use_compat:
5b1ecbb3 4831 return argdict
4832 else:
4833 argdict = None
eab9b2bc 4834 if argdict is None:
5b1ecbb3 4835 return default
eab9b2bc 4836 assert isinstance(argdict, dict)
4837
e92caff5 4838 assert isinstance(keys, (list, tuple))
4839 for key_list in keys:
4840 if isinstance(key_list, compat_str):
4841 key_list = (key_list,)
4842 arg_list = list(filter(
4843 lambda x: x is not None,
4844 [argdict.get(key.lower()) for key in key_list]))
4845 if arg_list:
4846 return [arg for args in arg_list for arg in args]
4847 return default
66e289ba
S
4848
4849
39672624
YCH
4850class ISO639Utils(object):
4851 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4852 _lang_map = {
4853 'aa': 'aar',
4854 'ab': 'abk',
4855 'ae': 'ave',
4856 'af': 'afr',
4857 'ak': 'aka',
4858 'am': 'amh',
4859 'an': 'arg',
4860 'ar': 'ara',
4861 'as': 'asm',
4862 'av': 'ava',
4863 'ay': 'aym',
4864 'az': 'aze',
4865 'ba': 'bak',
4866 'be': 'bel',
4867 'bg': 'bul',
4868 'bh': 'bih',
4869 'bi': 'bis',
4870 'bm': 'bam',
4871 'bn': 'ben',
4872 'bo': 'bod',
4873 'br': 'bre',
4874 'bs': 'bos',
4875 'ca': 'cat',
4876 'ce': 'che',
4877 'ch': 'cha',
4878 'co': 'cos',
4879 'cr': 'cre',
4880 'cs': 'ces',
4881 'cu': 'chu',
4882 'cv': 'chv',
4883 'cy': 'cym',
4884 'da': 'dan',
4885 'de': 'deu',
4886 'dv': 'div',
4887 'dz': 'dzo',
4888 'ee': 'ewe',
4889 'el': 'ell',
4890 'en': 'eng',
4891 'eo': 'epo',
4892 'es': 'spa',
4893 'et': 'est',
4894 'eu': 'eus',
4895 'fa': 'fas',
4896 'ff': 'ful',
4897 'fi': 'fin',
4898 'fj': 'fij',
4899 'fo': 'fao',
4900 'fr': 'fra',
4901 'fy': 'fry',
4902 'ga': 'gle',
4903 'gd': 'gla',
4904 'gl': 'glg',
4905 'gn': 'grn',
4906 'gu': 'guj',
4907 'gv': 'glv',
4908 'ha': 'hau',
4909 'he': 'heb',
b7acc835 4910 'iw': 'heb', # Replaced by he in 1989 revision
39672624
YCH
4911 'hi': 'hin',
4912 'ho': 'hmo',
4913 'hr': 'hrv',
4914 'ht': 'hat',
4915 'hu': 'hun',
4916 'hy': 'hye',
4917 'hz': 'her',
4918 'ia': 'ina',
4919 'id': 'ind',
b7acc835 4920 'in': 'ind', # Replaced by id in 1989 revision
39672624
YCH
4921 'ie': 'ile',
4922 'ig': 'ibo',
4923 'ii': 'iii',
4924 'ik': 'ipk',
4925 'io': 'ido',
4926 'is': 'isl',
4927 'it': 'ita',
4928 'iu': 'iku',
4929 'ja': 'jpn',
4930 'jv': 'jav',
4931 'ka': 'kat',
4932 'kg': 'kon',
4933 'ki': 'kik',
4934 'kj': 'kua',
4935 'kk': 'kaz',
4936 'kl': 'kal',
4937 'km': 'khm',
4938 'kn': 'kan',
4939 'ko': 'kor',
4940 'kr': 'kau',
4941 'ks': 'kas',
4942 'ku': 'kur',
4943 'kv': 'kom',
4944 'kw': 'cor',
4945 'ky': 'kir',
4946 'la': 'lat',
4947 'lb': 'ltz',
4948 'lg': 'lug',
4949 'li': 'lim',
4950 'ln': 'lin',
4951 'lo': 'lao',
4952 'lt': 'lit',
4953 'lu': 'lub',
4954 'lv': 'lav',
4955 'mg': 'mlg',
4956 'mh': 'mah',
4957 'mi': 'mri',
4958 'mk': 'mkd',
4959 'ml': 'mal',
4960 'mn': 'mon',
4961 'mr': 'mar',
4962 'ms': 'msa',
4963 'mt': 'mlt',
4964 'my': 'mya',
4965 'na': 'nau',
4966 'nb': 'nob',
4967 'nd': 'nde',
4968 'ne': 'nep',
4969 'ng': 'ndo',
4970 'nl': 'nld',
4971 'nn': 'nno',
4972 'no': 'nor',
4973 'nr': 'nbl',
4974 'nv': 'nav',
4975 'ny': 'nya',
4976 'oc': 'oci',
4977 'oj': 'oji',
4978 'om': 'orm',
4979 'or': 'ori',
4980 'os': 'oss',
4981 'pa': 'pan',
4982 'pi': 'pli',
4983 'pl': 'pol',
4984 'ps': 'pus',
4985 'pt': 'por',
4986 'qu': 'que',
4987 'rm': 'roh',
4988 'rn': 'run',
4989 'ro': 'ron',
4990 'ru': 'rus',
4991 'rw': 'kin',
4992 'sa': 'san',
4993 'sc': 'srd',
4994 'sd': 'snd',
4995 'se': 'sme',
4996 'sg': 'sag',
4997 'si': 'sin',
4998 'sk': 'slk',
4999 'sl': 'slv',
5000 'sm': 'smo',
5001 'sn': 'sna',
5002 'so': 'som',
5003 'sq': 'sqi',
5004 'sr': 'srp',
5005 'ss': 'ssw',
5006 'st': 'sot',
5007 'su': 'sun',
5008 'sv': 'swe',
5009 'sw': 'swa',
5010 'ta': 'tam',
5011 'te': 'tel',
5012 'tg': 'tgk',
5013 'th': 'tha',
5014 'ti': 'tir',
5015 'tk': 'tuk',
5016 'tl': 'tgl',
5017 'tn': 'tsn',
5018 'to': 'ton',
5019 'tr': 'tur',
5020 'ts': 'tso',
5021 'tt': 'tat',
5022 'tw': 'twi',
5023 'ty': 'tah',
5024 'ug': 'uig',
5025 'uk': 'ukr',
5026 'ur': 'urd',
5027 'uz': 'uzb',
5028 've': 'ven',
5029 'vi': 'vie',
5030 'vo': 'vol',
5031 'wa': 'wln',
5032 'wo': 'wol',
5033 'xh': 'xho',
5034 'yi': 'yid',
e9a50fba 5035 'ji': 'yid', # Replaced by yi in 1989 revision
39672624
YCH
5036 'yo': 'yor',
5037 'za': 'zha',
5038 'zh': 'zho',
5039 'zu': 'zul',
5040 }
5041
5042 @classmethod
5043 def short2long(cls, code):
5044 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5045 return cls._lang_map.get(code[:2])
5046
5047 @classmethod
5048 def long2short(cls, code):
5049 """Convert language code from ISO 639-2/T to ISO 639-1"""
5050 for short_name, long_name in cls._lang_map.items():
5051 if long_name == code:
5052 return short_name
5053
5054
4eb10f66
YCH
5055class ISO3166Utils(object):
5056 # From http://data.okfn.org/data/core/country-list
5057 _country_map = {
5058 'AF': 'Afghanistan',
5059 'AX': 'Åland Islands',
5060 'AL': 'Albania',
5061 'DZ': 'Algeria',
5062 'AS': 'American Samoa',
5063 'AD': 'Andorra',
5064 'AO': 'Angola',
5065 'AI': 'Anguilla',
5066 'AQ': 'Antarctica',
5067 'AG': 'Antigua and Barbuda',
5068 'AR': 'Argentina',
5069 'AM': 'Armenia',
5070 'AW': 'Aruba',
5071 'AU': 'Australia',
5072 'AT': 'Austria',
5073 'AZ': 'Azerbaijan',
5074 'BS': 'Bahamas',
5075 'BH': 'Bahrain',
5076 'BD': 'Bangladesh',
5077 'BB': 'Barbados',
5078 'BY': 'Belarus',
5079 'BE': 'Belgium',
5080 'BZ': 'Belize',
5081 'BJ': 'Benin',
5082 'BM': 'Bermuda',
5083 'BT': 'Bhutan',
5084 'BO': 'Bolivia, Plurinational State of',
5085 'BQ': 'Bonaire, Sint Eustatius and Saba',
5086 'BA': 'Bosnia and Herzegovina',
5087 'BW': 'Botswana',
5088 'BV': 'Bouvet Island',
5089 'BR': 'Brazil',
5090 'IO': 'British Indian Ocean Territory',
5091 'BN': 'Brunei Darussalam',
5092 'BG': 'Bulgaria',
5093 'BF': 'Burkina Faso',
5094 'BI': 'Burundi',
5095 'KH': 'Cambodia',
5096 'CM': 'Cameroon',
5097 'CA': 'Canada',
5098 'CV': 'Cape Verde',
5099 'KY': 'Cayman Islands',
5100 'CF': 'Central African Republic',
5101 'TD': 'Chad',
5102 'CL': 'Chile',
5103 'CN': 'China',
5104 'CX': 'Christmas Island',
5105 'CC': 'Cocos (Keeling) Islands',
5106 'CO': 'Colombia',
5107 'KM': 'Comoros',
5108 'CG': 'Congo',
5109 'CD': 'Congo, the Democratic Republic of the',
5110 'CK': 'Cook Islands',
5111 'CR': 'Costa Rica',
5112 'CI': 'Côte d\'Ivoire',
5113 'HR': 'Croatia',
5114 'CU': 'Cuba',
5115 'CW': 'Curaçao',
5116 'CY': 'Cyprus',
5117 'CZ': 'Czech Republic',
5118 'DK': 'Denmark',
5119 'DJ': 'Djibouti',
5120 'DM': 'Dominica',
5121 'DO': 'Dominican Republic',
5122 'EC': 'Ecuador',
5123 'EG': 'Egypt',
5124 'SV': 'El Salvador',
5125 'GQ': 'Equatorial Guinea',
5126 'ER': 'Eritrea',
5127 'EE': 'Estonia',
5128 'ET': 'Ethiopia',
5129 'FK': 'Falkland Islands (Malvinas)',
5130 'FO': 'Faroe Islands',
5131 'FJ': 'Fiji',
5132 'FI': 'Finland',
5133 'FR': 'France',
5134 'GF': 'French Guiana',
5135 'PF': 'French Polynesia',
5136 'TF': 'French Southern Territories',
5137 'GA': 'Gabon',
5138 'GM': 'Gambia',
5139 'GE': 'Georgia',
5140 'DE': 'Germany',
5141 'GH': 'Ghana',
5142 'GI': 'Gibraltar',
5143 'GR': 'Greece',
5144 'GL': 'Greenland',
5145 'GD': 'Grenada',
5146 'GP': 'Guadeloupe',
5147 'GU': 'Guam',
5148 'GT': 'Guatemala',
5149 'GG': 'Guernsey',
5150 'GN': 'Guinea',
5151 'GW': 'Guinea-Bissau',
5152 'GY': 'Guyana',
5153 'HT': 'Haiti',
5154 'HM': 'Heard Island and McDonald Islands',
5155 'VA': 'Holy See (Vatican City State)',
5156 'HN': 'Honduras',
5157 'HK': 'Hong Kong',
5158 'HU': 'Hungary',
5159 'IS': 'Iceland',
5160 'IN': 'India',
5161 'ID': 'Indonesia',
5162 'IR': 'Iran, Islamic Republic of',
5163 'IQ': 'Iraq',
5164 'IE': 'Ireland',
5165 'IM': 'Isle of Man',
5166 'IL': 'Israel',
5167 'IT': 'Italy',
5168 'JM': 'Jamaica',
5169 'JP': 'Japan',
5170 'JE': 'Jersey',
5171 'JO': 'Jordan',
5172 'KZ': 'Kazakhstan',
5173 'KE': 'Kenya',
5174 'KI': 'Kiribati',
5175 'KP': 'Korea, Democratic People\'s Republic of',
5176 'KR': 'Korea, Republic of',
5177 'KW': 'Kuwait',
5178 'KG': 'Kyrgyzstan',
5179 'LA': 'Lao People\'s Democratic Republic',
5180 'LV': 'Latvia',
5181 'LB': 'Lebanon',
5182 'LS': 'Lesotho',
5183 'LR': 'Liberia',
5184 'LY': 'Libya',
5185 'LI': 'Liechtenstein',
5186 'LT': 'Lithuania',
5187 'LU': 'Luxembourg',
5188 'MO': 'Macao',
5189 'MK': 'Macedonia, the Former Yugoslav Republic of',
5190 'MG': 'Madagascar',
5191 'MW': 'Malawi',
5192 'MY': 'Malaysia',
5193 'MV': 'Maldives',
5194 'ML': 'Mali',
5195 'MT': 'Malta',
5196 'MH': 'Marshall Islands',
5197 'MQ': 'Martinique',
5198 'MR': 'Mauritania',
5199 'MU': 'Mauritius',
5200 'YT': 'Mayotte',
5201 'MX': 'Mexico',
5202 'FM': 'Micronesia, Federated States of',
5203 'MD': 'Moldova, Republic of',
5204 'MC': 'Monaco',
5205 'MN': 'Mongolia',
5206 'ME': 'Montenegro',
5207 'MS': 'Montserrat',
5208 'MA': 'Morocco',
5209 'MZ': 'Mozambique',
5210 'MM': 'Myanmar',
5211 'NA': 'Namibia',
5212 'NR': 'Nauru',
5213 'NP': 'Nepal',
5214 'NL': 'Netherlands',
5215 'NC': 'New Caledonia',
5216 'NZ': 'New Zealand',
5217 'NI': 'Nicaragua',
5218 'NE': 'Niger',
5219 'NG': 'Nigeria',
5220 'NU': 'Niue',
5221 'NF': 'Norfolk Island',
5222 'MP': 'Northern Mariana Islands',
5223 'NO': 'Norway',
5224 'OM': 'Oman',
5225 'PK': 'Pakistan',
5226 'PW': 'Palau',
5227 'PS': 'Palestine, State of',
5228 'PA': 'Panama',
5229 'PG': 'Papua New Guinea',
5230 'PY': 'Paraguay',
5231 'PE': 'Peru',
5232 'PH': 'Philippines',
5233 'PN': 'Pitcairn',
5234 'PL': 'Poland',
5235 'PT': 'Portugal',
5236 'PR': 'Puerto Rico',
5237 'QA': 'Qatar',
5238 'RE': 'Réunion',
5239 'RO': 'Romania',
5240 'RU': 'Russian Federation',
5241 'RW': 'Rwanda',
5242 'BL': 'Saint Barthélemy',
5243 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5244 'KN': 'Saint Kitts and Nevis',
5245 'LC': 'Saint Lucia',
5246 'MF': 'Saint Martin (French part)',
5247 'PM': 'Saint Pierre and Miquelon',
5248 'VC': 'Saint Vincent and the Grenadines',
5249 'WS': 'Samoa',
5250 'SM': 'San Marino',
5251 'ST': 'Sao Tome and Principe',
5252 'SA': 'Saudi Arabia',
5253 'SN': 'Senegal',
5254 'RS': 'Serbia',
5255 'SC': 'Seychelles',
5256 'SL': 'Sierra Leone',
5257 'SG': 'Singapore',
5258 'SX': 'Sint Maarten (Dutch part)',
5259 'SK': 'Slovakia',
5260 'SI': 'Slovenia',
5261 'SB': 'Solomon Islands',
5262 'SO': 'Somalia',
5263 'ZA': 'South Africa',
5264 'GS': 'South Georgia and the South Sandwich Islands',
5265 'SS': 'South Sudan',
5266 'ES': 'Spain',
5267 'LK': 'Sri Lanka',
5268 'SD': 'Sudan',
5269 'SR': 'Suriname',
5270 'SJ': 'Svalbard and Jan Mayen',
5271 'SZ': 'Swaziland',
5272 'SE': 'Sweden',
5273 'CH': 'Switzerland',
5274 'SY': 'Syrian Arab Republic',
5275 'TW': 'Taiwan, Province of China',
5276 'TJ': 'Tajikistan',
5277 'TZ': 'Tanzania, United Republic of',
5278 'TH': 'Thailand',
5279 'TL': 'Timor-Leste',
5280 'TG': 'Togo',
5281 'TK': 'Tokelau',
5282 'TO': 'Tonga',
5283 'TT': 'Trinidad and Tobago',
5284 'TN': 'Tunisia',
5285 'TR': 'Turkey',
5286 'TM': 'Turkmenistan',
5287 'TC': 'Turks and Caicos Islands',
5288 'TV': 'Tuvalu',
5289 'UG': 'Uganda',
5290 'UA': 'Ukraine',
5291 'AE': 'United Arab Emirates',
5292 'GB': 'United Kingdom',
5293 'US': 'United States',
5294 'UM': 'United States Minor Outlying Islands',
5295 'UY': 'Uruguay',
5296 'UZ': 'Uzbekistan',
5297 'VU': 'Vanuatu',
5298 'VE': 'Venezuela, Bolivarian Republic of',
5299 'VN': 'Viet Nam',
5300 'VG': 'Virgin Islands, British',
5301 'VI': 'Virgin Islands, U.S.',
5302 'WF': 'Wallis and Futuna',
5303 'EH': 'Western Sahara',
5304 'YE': 'Yemen',
5305 'ZM': 'Zambia',
5306 'ZW': 'Zimbabwe',
5307 }
5308
5309 @classmethod
5310 def short2full(cls, code):
5311 """Convert an ISO 3166-2 country code to the corresponding full name"""
5312 return cls._country_map.get(code.upper())
5313
5314
773f291d
S
5315class GeoUtils(object):
5316 # Major IPv4 address blocks per country
5317 _country_ip_map = {
53896ca5 5318 'AD': '46.172.224.0/19',
773f291d
S
5319 'AE': '94.200.0.0/13',
5320 'AF': '149.54.0.0/17',
5321 'AG': '209.59.64.0/18',
5322 'AI': '204.14.248.0/21',
5323 'AL': '46.99.0.0/16',
5324 'AM': '46.70.0.0/15',
5325 'AO': '105.168.0.0/13',
53896ca5
S
5326 'AP': '182.50.184.0/21',
5327 'AQ': '23.154.160.0/24',
773f291d
S
5328 'AR': '181.0.0.0/12',
5329 'AS': '202.70.112.0/20',
53896ca5 5330 'AT': '77.116.0.0/14',
773f291d
S
5331 'AU': '1.128.0.0/11',
5332 'AW': '181.41.0.0/18',
53896ca5
S
5333 'AX': '185.217.4.0/22',
5334 'AZ': '5.197.0.0/16',
773f291d
S
5335 'BA': '31.176.128.0/17',
5336 'BB': '65.48.128.0/17',
5337 'BD': '114.130.0.0/16',
5338 'BE': '57.0.0.0/8',
53896ca5 5339 'BF': '102.178.0.0/15',
773f291d
S
5340 'BG': '95.42.0.0/15',
5341 'BH': '37.131.0.0/17',
5342 'BI': '154.117.192.0/18',
5343 'BJ': '137.255.0.0/16',
53896ca5 5344 'BL': '185.212.72.0/23',
773f291d
S
5345 'BM': '196.12.64.0/18',
5346 'BN': '156.31.0.0/16',
5347 'BO': '161.56.0.0/16',
5348 'BQ': '161.0.80.0/20',
53896ca5 5349 'BR': '191.128.0.0/12',
773f291d
S
5350 'BS': '24.51.64.0/18',
5351 'BT': '119.2.96.0/19',
5352 'BW': '168.167.0.0/16',
5353 'BY': '178.120.0.0/13',
5354 'BZ': '179.42.192.0/18',
5355 'CA': '99.224.0.0/11',
5356 'CD': '41.243.0.0/16',
53896ca5
S
5357 'CF': '197.242.176.0/21',
5358 'CG': '160.113.0.0/16',
773f291d 5359 'CH': '85.0.0.0/13',
53896ca5 5360 'CI': '102.136.0.0/14',
773f291d
S
5361 'CK': '202.65.32.0/19',
5362 'CL': '152.172.0.0/14',
53896ca5 5363 'CM': '102.244.0.0/14',
773f291d
S
5364 'CN': '36.128.0.0/10',
5365 'CO': '181.240.0.0/12',
5366 'CR': '201.192.0.0/12',
5367 'CU': '152.206.0.0/15',
5368 'CV': '165.90.96.0/19',
5369 'CW': '190.88.128.0/17',
53896ca5 5370 'CY': '31.153.0.0/16',
773f291d
S
5371 'CZ': '88.100.0.0/14',
5372 'DE': '53.0.0.0/8',
5373 'DJ': '197.241.0.0/17',
5374 'DK': '87.48.0.0/12',
5375 'DM': '192.243.48.0/20',
5376 'DO': '152.166.0.0/15',
5377 'DZ': '41.96.0.0/12',
5378 'EC': '186.68.0.0/15',
5379 'EE': '90.190.0.0/15',
5380 'EG': '156.160.0.0/11',
5381 'ER': '196.200.96.0/20',
5382 'ES': '88.0.0.0/11',
5383 'ET': '196.188.0.0/14',
5384 'EU': '2.16.0.0/13',
5385 'FI': '91.152.0.0/13',
5386 'FJ': '144.120.0.0/16',
53896ca5 5387 'FK': '80.73.208.0/21',
773f291d
S
5388 'FM': '119.252.112.0/20',
5389 'FO': '88.85.32.0/19',
5390 'FR': '90.0.0.0/9',
5391 'GA': '41.158.0.0/15',
5392 'GB': '25.0.0.0/8',
5393 'GD': '74.122.88.0/21',
5394 'GE': '31.146.0.0/16',
5395 'GF': '161.22.64.0/18',
5396 'GG': '62.68.160.0/19',
53896ca5
S
5397 'GH': '154.160.0.0/12',
5398 'GI': '95.164.0.0/16',
773f291d
S
5399 'GL': '88.83.0.0/19',
5400 'GM': '160.182.0.0/15',
5401 'GN': '197.149.192.0/18',
5402 'GP': '104.250.0.0/19',
5403 'GQ': '105.235.224.0/20',
5404 'GR': '94.64.0.0/13',
5405 'GT': '168.234.0.0/16',
5406 'GU': '168.123.0.0/16',
5407 'GW': '197.214.80.0/20',
5408 'GY': '181.41.64.0/18',
5409 'HK': '113.252.0.0/14',
5410 'HN': '181.210.0.0/16',
5411 'HR': '93.136.0.0/13',
5412 'HT': '148.102.128.0/17',
5413 'HU': '84.0.0.0/14',
5414 'ID': '39.192.0.0/10',
5415 'IE': '87.32.0.0/12',
5416 'IL': '79.176.0.0/13',
5417 'IM': '5.62.80.0/20',
5418 'IN': '117.192.0.0/10',
5419 'IO': '203.83.48.0/21',
5420 'IQ': '37.236.0.0/14',
5421 'IR': '2.176.0.0/12',
5422 'IS': '82.221.0.0/16',
5423 'IT': '79.0.0.0/10',
5424 'JE': '87.244.64.0/18',
5425 'JM': '72.27.0.0/17',
5426 'JO': '176.29.0.0/16',
53896ca5 5427 'JP': '133.0.0.0/8',
773f291d
S
5428 'KE': '105.48.0.0/12',
5429 'KG': '158.181.128.0/17',
5430 'KH': '36.37.128.0/17',
5431 'KI': '103.25.140.0/22',
5432 'KM': '197.255.224.0/20',
53896ca5 5433 'KN': '198.167.192.0/19',
773f291d
S
5434 'KP': '175.45.176.0/22',
5435 'KR': '175.192.0.0/10',
5436 'KW': '37.36.0.0/14',
5437 'KY': '64.96.0.0/15',
5438 'KZ': '2.72.0.0/13',
5439 'LA': '115.84.64.0/18',
5440 'LB': '178.135.0.0/16',
53896ca5 5441 'LC': '24.92.144.0/20',
773f291d
S
5442 'LI': '82.117.0.0/19',
5443 'LK': '112.134.0.0/15',
53896ca5 5444 'LR': '102.183.0.0/16',
773f291d
S
5445 'LS': '129.232.0.0/17',
5446 'LT': '78.56.0.0/13',
5447 'LU': '188.42.0.0/16',
5448 'LV': '46.109.0.0/16',
5449 'LY': '41.252.0.0/14',
5450 'MA': '105.128.0.0/11',
5451 'MC': '88.209.64.0/18',
5452 'MD': '37.246.0.0/16',
5453 'ME': '178.175.0.0/17',
5454 'MF': '74.112.232.0/21',
5455 'MG': '154.126.0.0/17',
5456 'MH': '117.103.88.0/21',
5457 'MK': '77.28.0.0/15',
5458 'ML': '154.118.128.0/18',
5459 'MM': '37.111.0.0/17',
5460 'MN': '49.0.128.0/17',
5461 'MO': '60.246.0.0/16',
5462 'MP': '202.88.64.0/20',
5463 'MQ': '109.203.224.0/19',
5464 'MR': '41.188.64.0/18',
5465 'MS': '208.90.112.0/22',
5466 'MT': '46.11.0.0/16',
5467 'MU': '105.16.0.0/12',
5468 'MV': '27.114.128.0/18',
53896ca5 5469 'MW': '102.70.0.0/15',
773f291d
S
5470 'MX': '187.192.0.0/11',
5471 'MY': '175.136.0.0/13',
5472 'MZ': '197.218.0.0/15',
5473 'NA': '41.182.0.0/16',
5474 'NC': '101.101.0.0/18',
5475 'NE': '197.214.0.0/18',
5476 'NF': '203.17.240.0/22',
5477 'NG': '105.112.0.0/12',
5478 'NI': '186.76.0.0/15',
5479 'NL': '145.96.0.0/11',
5480 'NO': '84.208.0.0/13',
5481 'NP': '36.252.0.0/15',
5482 'NR': '203.98.224.0/19',
5483 'NU': '49.156.48.0/22',
5484 'NZ': '49.224.0.0/14',
5485 'OM': '5.36.0.0/15',
5486 'PA': '186.72.0.0/15',
5487 'PE': '186.160.0.0/14',
5488 'PF': '123.50.64.0/18',
5489 'PG': '124.240.192.0/19',
5490 'PH': '49.144.0.0/13',
5491 'PK': '39.32.0.0/11',
5492 'PL': '83.0.0.0/11',
5493 'PM': '70.36.0.0/20',
5494 'PR': '66.50.0.0/16',
5495 'PS': '188.161.0.0/16',
5496 'PT': '85.240.0.0/13',
5497 'PW': '202.124.224.0/20',
5498 'PY': '181.120.0.0/14',
5499 'QA': '37.210.0.0/15',
53896ca5 5500 'RE': '102.35.0.0/16',
773f291d 5501 'RO': '79.112.0.0/13',
53896ca5 5502 'RS': '93.86.0.0/15',
773f291d 5503 'RU': '5.136.0.0/13',
53896ca5 5504 'RW': '41.186.0.0/16',
773f291d
S
5505 'SA': '188.48.0.0/13',
5506 'SB': '202.1.160.0/19',
5507 'SC': '154.192.0.0/11',
53896ca5 5508 'SD': '102.120.0.0/13',
773f291d 5509 'SE': '78.64.0.0/12',
53896ca5 5510 'SG': '8.128.0.0/10',
773f291d
S
5511 'SI': '188.196.0.0/14',
5512 'SK': '78.98.0.0/15',
53896ca5 5513 'SL': '102.143.0.0/17',
773f291d
S
5514 'SM': '89.186.32.0/19',
5515 'SN': '41.82.0.0/15',
53896ca5 5516 'SO': '154.115.192.0/18',
773f291d
S
5517 'SR': '186.179.128.0/17',
5518 'SS': '105.235.208.0/21',
5519 'ST': '197.159.160.0/19',
5520 'SV': '168.243.0.0/16',
5521 'SX': '190.102.0.0/20',
5522 'SY': '5.0.0.0/16',
5523 'SZ': '41.84.224.0/19',
5524 'TC': '65.255.48.0/20',
5525 'TD': '154.68.128.0/19',
5526 'TG': '196.168.0.0/14',
5527 'TH': '171.96.0.0/13',
5528 'TJ': '85.9.128.0/18',
5529 'TK': '27.96.24.0/21',
5530 'TL': '180.189.160.0/20',
5531 'TM': '95.85.96.0/19',
5532 'TN': '197.0.0.0/11',
5533 'TO': '175.176.144.0/21',
5534 'TR': '78.160.0.0/11',
5535 'TT': '186.44.0.0/15',
5536 'TV': '202.2.96.0/19',
5537 'TW': '120.96.0.0/11',
5538 'TZ': '156.156.0.0/14',
53896ca5
S
5539 'UA': '37.52.0.0/14',
5540 'UG': '102.80.0.0/13',
5541 'US': '6.0.0.0/8',
773f291d 5542 'UY': '167.56.0.0/13',
53896ca5 5543 'UZ': '84.54.64.0/18',
773f291d 5544 'VA': '212.77.0.0/19',
53896ca5 5545 'VC': '207.191.240.0/21',
773f291d 5546 'VE': '186.88.0.0/13',
53896ca5 5547 'VG': '66.81.192.0/20',
773f291d
S
5548 'VI': '146.226.0.0/16',
5549 'VN': '14.160.0.0/11',
5550 'VU': '202.80.32.0/20',
5551 'WF': '117.20.32.0/21',
5552 'WS': '202.4.32.0/19',
5553 'YE': '134.35.0.0/16',
5554 'YT': '41.242.116.0/22',
5555 'ZA': '41.0.0.0/11',
53896ca5
S
5556 'ZM': '102.144.0.0/13',
5557 'ZW': '102.177.192.0/18',
773f291d
S
5558 }
5559
5560 @classmethod
5f95927a
S
5561 def random_ipv4(cls, code_or_block):
5562 if len(code_or_block) == 2:
5563 block = cls._country_ip_map.get(code_or_block.upper())
5564 if not block:
5565 return None
5566 else:
5567 block = code_or_block
773f291d
S
5568 addr, preflen = block.split('/')
5569 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5570 addr_max = addr_min | (0xffffffff >> int(preflen))
18a0defa 5571 return compat_str(socket.inet_ntoa(
4248dad9 5572 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
773f291d
S
5573
5574
91410c9b 5575class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
5576 def __init__(self, proxies=None):
5577 # Set default handlers
5578 for type in ('http', 'https'):
5579 setattr(self, '%s_open' % type,
5580 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5581 meth(r, proxy, type))
38e87f6c 5582 compat_urllib_request.ProxyHandler.__init__(self, proxies)
2461f79d 5583
91410c9b 5584 def proxy_open(self, req, proxy, type):
2461f79d 5585 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
5586 if req_proxy is not None:
5587 proxy = req_proxy
2461f79d
PH
5588 del req.headers['Ytdl-request-proxy']
5589
5590 if proxy == '__noproxy__':
5591 return None # No Proxy
51fb4995 5592 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
71aff188 5593 req.add_header('Ytdl-socks-proxy', proxy)
7a5c1cfe 5594 # yt-dlp's http/https handlers do wrapping the socket with socks
71aff188 5595 return None
91410c9b
PH
5596 return compat_urllib_request.ProxyHandler.proxy_open(
5597 self, req, proxy, type)
5bc880b9
YCH
5598
5599
0a5445dd
YCH
5600# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5601# released into Public Domain
5602# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5603
5604def long_to_bytes(n, blocksize=0):
5605 """long_to_bytes(n:long, blocksize:int) : string
5606 Convert a long integer to a byte string.
5607
5608 If optional blocksize is given and greater than zero, pad the front of the
5609 byte string with binary zeros so that the length is a multiple of
5610 blocksize.
5611 """
5612 # after much testing, this algorithm was deemed to be the fastest
5613 s = b''
5614 n = int(n)
5615 while n > 0:
5616 s = compat_struct_pack('>I', n & 0xffffffff) + s
5617 n = n >> 32
5618 # strip off leading zeros
5619 for i in range(len(s)):
5620 if s[i] != b'\000'[0]:
5621 break
5622 else:
5623 # only happens when n == 0
5624 s = b'\000'
5625 i = 0
5626 s = s[i:]
5627 # add back some pad bytes. this could be done more efficiently w.r.t. the
5628 # de-padding being done above, but sigh...
5629 if blocksize > 0 and len(s) % blocksize:
5630 s = (blocksize - len(s) % blocksize) * b'\000' + s
5631 return s
5632
5633
5634def bytes_to_long(s):
5635 """bytes_to_long(string) : long
5636 Convert a byte string to a long integer.
5637
5638 This is (essentially) the inverse of long_to_bytes().
5639 """
5640 acc = 0
5641 length = len(s)
5642 if length % 4:
5643 extra = (4 - length % 4)
5644 s = b'\000' * extra + s
5645 length = length + extra
5646 for i in range(0, length, 4):
5647 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5648 return acc
5649
5650
5bc880b9
YCH
5651def ohdave_rsa_encrypt(data, exponent, modulus):
5652 '''
5653 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5654
5655 Input:
5656 data: data to encrypt, bytes-like object
5657 exponent, modulus: parameter e and N of RSA algorithm, both integer
5658 Output: hex string of encrypted data
5659
5660 Limitation: supports one block encryption only
5661 '''
5662
5663 payload = int(binascii.hexlify(data[::-1]), 16)
5664 encrypted = pow(payload, exponent, modulus)
5665 return '%x' % encrypted
81bdc8fd
YCH
5666
5667
f48409c7
YCH
5668def pkcs1pad(data, length):
5669 """
5670 Padding input data with PKCS#1 scheme
5671
5672 @param {int[]} data input data
5673 @param {int} length target length
5674 @returns {int[]} padded data
5675 """
5676 if len(data) > length - 11:
5677 raise ValueError('Input data too long for PKCS#1 padding')
5678
5679 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5680 return [0, 2] + pseudo_random + [0] + data
5681
5682
5eb6bdce 5683def encode_base_n(num, n, table=None):
59f898b7 5684 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
59f898b7
YCH
5685 if not table:
5686 table = FULL_TABLE[:n]
5687
5eb6bdce
YCH
5688 if n > len(table):
5689 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5690
5691 if num == 0:
5692 return table[0]
5693
81bdc8fd
YCH
5694 ret = ''
5695 while num:
5696 ret = table[num % n] + ret
5697 num = num // n
5698 return ret
f52354a8
YCH
5699
5700
5701def decode_packed_codes(code):
06b3fe29 5702 mobj = re.search(PACKED_CODES_RE, code)
a0566bbf 5703 obfuscated_code, base, count, symbols = mobj.groups()
f52354a8
YCH
5704 base = int(base)
5705 count = int(count)
5706 symbols = symbols.split('|')
5707 symbol_table = {}
5708
5709 while count:
5710 count -= 1
5eb6bdce 5711 base_n_count = encode_base_n(count, base)
f52354a8
YCH
5712 symbol_table[base_n_count] = symbols[count] or base_n_count
5713
5714 return re.sub(
5715 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
a0566bbf 5716 obfuscated_code)
e154c651 5717
5718
1ced2221
S
5719def caesar(s, alphabet, shift):
5720 if shift == 0:
5721 return s
5722 l = len(alphabet)
5723 return ''.join(
5724 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5725 for c in s)
5726
5727
5728def rot47(s):
5729 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5730
5731
e154c651 5732def parse_m3u8_attributes(attrib):
5733 info = {}
5734 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5735 if val.startswith('"'):
5736 val = val[1:-1]
5737 info[key] = val
5738 return info
1143535d
YCH
5739
5740
5741def urshift(val, n):
5742 return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038
YCH
5743
5744
5745# Based on png2str() written by @gdkchan and improved by @yokrysty
067aa17e 5746# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
d3f8e038
YCH
5747def decode_png(png_data):
5748 # Reference: https://www.w3.org/TR/PNG/
5749 header = png_data[8:]
5750
5751 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5752 raise IOError('Not a valid PNG file.')
5753
5754 int_map = {1: '>B', 2: '>H', 4: '>I'}
5755 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5756
5757 chunks = []
5758
5759 while header:
5760 length = unpack_integer(header[:4])
5761 header = header[4:]
5762
5763 chunk_type = header[:4]
5764 header = header[4:]
5765
5766 chunk_data = header[:length]
5767 header = header[length:]
5768
5769 header = header[4:] # Skip CRC
5770
5771 chunks.append({
5772 'type': chunk_type,
5773 'length': length,
5774 'data': chunk_data
5775 })
5776
5777 ihdr = chunks[0]['data']
5778
5779 width = unpack_integer(ihdr[:4])
5780 height = unpack_integer(ihdr[4:8])
5781
5782 idat = b''
5783
5784 for chunk in chunks:
5785 if chunk['type'] == b'IDAT':
5786 idat += chunk['data']
5787
5788 if not idat:
5789 raise IOError('Unable to read PNG data.')
5790
5791 decompressed_data = bytearray(zlib.decompress(idat))
5792
5793 stride = width * 3
5794 pixels = []
5795
5796 def _get_pixel(idx):
5797 x = idx % stride
5798 y = idx // stride
5799 return pixels[y][x]
5800
5801 for y in range(height):
5802 basePos = y * (1 + stride)
5803 filter_type = decompressed_data[basePos]
5804
5805 current_row = []
5806
5807 pixels.append(current_row)
5808
5809 for x in range(stride):
5810 color = decompressed_data[1 + basePos + x]
5811 basex = y * stride + x
5812 left = 0
5813 up = 0
5814
5815 if x > 2:
5816 left = _get_pixel(basex - 3)
5817 if y > 0:
5818 up = _get_pixel(basex - stride)
5819
5820 if filter_type == 1: # Sub
5821 color = (color + left) & 0xff
5822 elif filter_type == 2: # Up
5823 color = (color + up) & 0xff
5824 elif filter_type == 3: # Average
5825 color = (color + ((left + up) >> 1)) & 0xff
5826 elif filter_type == 4: # Paeth
5827 a = left
5828 b = up
5829 c = 0
5830
5831 if x > 2 and y > 0:
5832 c = _get_pixel(basex - stride - 3)
5833
5834 p = a + b - c
5835
5836 pa = abs(p - a)
5837 pb = abs(p - b)
5838 pc = abs(p - c)
5839
5840 if pa <= pb and pa <= pc:
5841 color = (color + a) & 0xff
5842 elif pb <= pc:
5843 color = (color + b) & 0xff
5844 else:
5845 color = (color + c) & 0xff
5846
5847 current_row.append(color)
5848
5849 return width, height, pixels
efa97bdc
YCH
5850
5851
5852def write_xattr(path, key, value):
5853 # This mess below finds the best xattr tool for the job
5854 try:
5855 # try the pyxattr module...
5856 import xattr
5857
53a7e3d2
YCH
5858 if hasattr(xattr, 'set'): # pyxattr
5859 # Unicode arguments are not supported in python-pyxattr until
5860 # version 0.5.0
067aa17e 5861 # See https://github.com/ytdl-org/youtube-dl/issues/5498
53a7e3d2
YCH
5862 pyxattr_required_version = '0.5.0'
5863 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5864 # TODO: fallback to CLI tools
5865 raise XAttrUnavailableError(
5866 'python-pyxattr is detected but is too old. '
7a5c1cfe 5867 'yt-dlp requires %s or above while your version is %s. '
53a7e3d2
YCH
5868 'Falling back to other xattr implementations' % (
5869 pyxattr_required_version, xattr.__version__))
5870
5871 setxattr = xattr.set
5872 else: # xattr
5873 setxattr = xattr.setxattr
efa97bdc
YCH
5874
5875 try:
53a7e3d2 5876 setxattr(path, key, value)
efa97bdc
YCH
5877 except EnvironmentError as e:
5878 raise XAttrMetadataError(e.errno, e.strerror)
5879
5880 except ImportError:
5881 if compat_os_name == 'nt':
5882 # Write xattrs to NTFS Alternate Data Streams:
5883 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5884 assert ':' not in key
5885 assert os.path.exists(path)
5886
5887 ads_fn = path + ':' + key
5888 try:
5889 with open(ads_fn, 'wb') as f:
5890 f.write(value)
5891 except EnvironmentError as e:
5892 raise XAttrMetadataError(e.errno, e.strerror)
5893 else:
5894 user_has_setfattr = check_executable('setfattr', ['--version'])
5895 user_has_xattr = check_executable('xattr', ['-h'])
5896
5897 if user_has_setfattr or user_has_xattr:
5898
5899 value = value.decode('utf-8')
5900 if user_has_setfattr:
5901 executable = 'setfattr'
5902 opts = ['-n', key, '-v', value]
5903 elif user_has_xattr:
5904 executable = 'xattr'
5905 opts = ['-w', key, value]
5906
3089bc74
S
5907 cmd = ([encodeFilename(executable, True)]
5908 + [encodeArgument(o) for o in opts]
5909 + [encodeFilename(path, True)])
efa97bdc
YCH
5910
5911 try:
5912 p = subprocess.Popen(
5913 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5914 except EnvironmentError as e:
5915 raise XAttrMetadataError(e.errno, e.strerror)
f5b1bca9 5916 stdout, stderr = process_communicate_or_kill(p)
efa97bdc
YCH
5917 stderr = stderr.decode('utf-8', 'replace')
5918 if p.returncode != 0:
5919 raise XAttrMetadataError(p.returncode, stderr)
5920
5921 else:
5922 # On Unix, and can't find pyxattr, setfattr, or xattr.
5923 if sys.platform.startswith('linux'):
5924 raise XAttrUnavailableError(
5925 "Couldn't find a tool to set the xattrs. "
5926 "Install either the python 'pyxattr' or 'xattr' "
5927 "modules, or the GNU 'attr' package "
5928 "(which contains the 'setfattr' tool).")
5929 else:
5930 raise XAttrUnavailableError(
5931 "Couldn't find a tool to set the xattrs. "
5932 "Install either the python 'xattr' module, "
5933 "or the 'xattr' binary.")
0c265486
YCH
5934
5935
5936def random_birthday(year_field, month_field, day_field):
aa374bc7
AS
5937 start_date = datetime.date(1950, 1, 1)
5938 end_date = datetime.date(1995, 12, 31)
5939 offset = random.randint(0, (end_date - start_date).days)
5940 random_date = start_date + datetime.timedelta(offset)
0c265486 5941 return {
aa374bc7
AS
5942 year_field: str(random_date.year),
5943 month_field: str(random_date.month),
5944 day_field: str(random_date.day),
0c265486 5945 }
732044af 5946
c76eb41b 5947
732044af 5948# Templates for internet shortcut files, which are plain text files.
5949DOT_URL_LINK_TEMPLATE = '''
5950[InternetShortcut]
5951URL=%(url)s
5952'''.lstrip()
5953
5954DOT_WEBLOC_LINK_TEMPLATE = '''
5955<?xml version="1.0" encoding="UTF-8"?>
5956<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5957<plist version="1.0">
5958<dict>
5959\t<key>URL</key>
5960\t<string>%(url)s</string>
5961</dict>
5962</plist>
5963'''.lstrip()
5964
5965DOT_DESKTOP_LINK_TEMPLATE = '''
5966[Desktop Entry]
5967Encoding=UTF-8
5968Name=%(filename)s
5969Type=Link
5970URL=%(url)s
5971Icon=text-html
5972'''.lstrip()
5973
5974
5975def iri_to_uri(iri):
5976 """
5977 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5978
5979 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5980 """
5981
5982 iri_parts = compat_urllib_parse_urlparse(iri)
5983
5984 if '[' in iri_parts.netloc:
5985 raise ValueError('IPv6 URIs are not, yet, supported.')
5986 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5987
5988 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5989
5990 net_location = ''
5991 if iri_parts.username:
5992 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
5993 if iri_parts.password is not None:
5994 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
5995 net_location += '@'
5996
5997 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
5998 # The 'idna' encoding produces ASCII text.
5999 if iri_parts.port is not None and iri_parts.port != 80:
6000 net_location += ':' + str(iri_parts.port)
6001
6002 return compat_urllib_parse_urlunparse(
6003 (iri_parts.scheme,
6004 net_location,
6005
6006 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6007
6008 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6009 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6010
6011 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6012 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6013
6014 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6015
6016 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6017
6018
6019def to_high_limit_path(path):
6020 if sys.platform in ['win32', 'cygwin']:
6021 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6022 return r'\\?\ '.rstrip() + os.path.abspath(path)
6023
6024 return path
76d321f6 6025
c76eb41b 6026
76d321f6 6027def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
6028 val = obj.get(field, default)
6029 if func and val not in ignore:
6030 val = func(val)
6031 return template % val if val not in ignore else default
00dd0cd5 6032
6033
6034def clean_podcast_url(url):
6035 return re.sub(r'''(?x)
6036 (?:
6037 (?:
6038 chtbl\.com/track|
6039 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6040 play\.podtrac\.com
6041 )/[^/]+|
6042 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6043 flex\.acast\.com|
6044 pd(?:
6045 cn\.co| # https://podcorn.com/analytics-prefix/
6046 st\.fm # https://podsights.com/docs/
6047 )/e
6048 )/''', '', url)
ffcb8191
THD
6049
6050
6051_HEX_TABLE = '0123456789abcdef'
6052
6053
6054def random_uuidv4():
6055 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
0202b52a 6056
6057
6058def make_dir(path, to_screen=None):
6059 try:
6060 dn = os.path.dirname(path)
6061 if dn and not os.path.exists(dn):
6062 os.makedirs(dn)
6063 return True
6064 except (OSError, IOError) as err:
6065 if callable(to_screen) is not None:
6066 to_screen('unable to create directory ' + error_to_compat_str(err))
6067 return False
f74980cb 6068
6069
6070def get_executable_path():
c552ae88 6071 from zipimport import zipimporter
6072 if hasattr(sys, 'frozen'): # Running from PyInstaller
6073 path = os.path.dirname(sys.executable)
6074 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
6075 path = os.path.join(os.path.dirname(__file__), '../..')
6076 else:
6077 path = os.path.join(os.path.dirname(__file__), '..')
f74980cb 6078 return os.path.abspath(path)
6079
6080
6081def load_plugins(name, type, namespace):
6082 plugin_info = [None]
6083 classes = []
6084 try:
6085 plugin_info = imp.find_module(
6086 name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
6087 plugins = imp.load_module(name, *plugin_info)
6088 for name in dir(plugins):
6089 if not name.endswith(type):
6090 continue
6091 klass = getattr(plugins, name)
6092 classes.append(klass)
6093 namespace[name] = klass
6094 except ImportError:
6095 pass
6096 finally:
6097 if plugin_info[0] is not None:
6098 plugin_info[0].close()
6099 return classes
06167fbb 6100
6101
6102def traverse_dict(dictn, keys, casesense=True):
a439a3a4 6103 keys = list(keys)[::-1]
6104 while keys:
6105 key = keys.pop()
6106 if isinstance(dictn, dict):
6107 if not casesense:
6108 dictn = {k.lower(): v for k, v in dictn.items()}
6109 key = key.lower()
6110 dictn = dictn.get(key)
6111 elif isinstance(dictn, (list, tuple, compat_str)):
6112 key, n = int_or_none(key), len(dictn)
6113 if key is not None and -n <= key < n:
6114 dictn = dictn[key]
6115 else:
6116 dictn = None
6117 else:
6118 return None
6119 return dictn