]> jfr.im git - yt-dlp.git/blame - yt_dlp/utils.py
[cleanup] See desc
[yt-dlp.git] / yt_dlp / utils.py
CommitLineData
d77c3dfd 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
d77c3dfd 3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
c380cc28 10import collections
62e609ab 11import contextlib
e3946f98 12import ctypes
c496ca96
PH
13import datetime
14import email.utils
0c265486 15import email.header
f45c185f 16import errno
be4a824d 17import functools
d77c3dfd 18import gzip
f74980cb 19import imp
03f9daab 20import io
79a2e94e 21import itertools
f4bfd65f 22import json
d77c3dfd 23import locale
02dbf93f 24import math
347de493 25import operator
d77c3dfd 26import os
c496ca96 27import platform
773f291d 28import random
d77c3dfd 29import re
c496ca96 30import socket
79a2e94e 31import ssl
1c088fa8 32import subprocess
d77c3dfd 33import sys
181c8655 34import tempfile
c380cc28 35import time
01951dda 36import traceback
bcf89ce6 37import xml.etree.ElementTree
d77c3dfd 38import zlib
d77c3dfd 39
8c25f81b 40from .compat import (
b4a3d461 41 compat_HTMLParseError,
8bb56eee 42 compat_HTMLParser,
201c1459 43 compat_HTTPError,
8f9312c3 44 compat_basestring,
8c25f81b 45 compat_chr,
1bab3437 46 compat_cookiejar,
d7cd9a9e 47 compat_ctypes_WINFUNCTYPE,
36e6f62c 48 compat_etree_fromstring,
51098426 49 compat_expanduser,
8c25f81b 50 compat_html_entities,
55b2f099 51 compat_html_entities_html5,
be4a824d 52 compat_http_client,
42db58ec 53 compat_integer_types,
e29663c6 54 compat_numeric_types,
c86b6142 55 compat_kwargs,
efa97bdc 56 compat_os_name,
8c25f81b 57 compat_parse_qs,
702ccf2d 58 compat_shlex_quote,
8c25f81b 59 compat_str,
edaa23f8 60 compat_struct_pack,
d3f8e038 61 compat_struct_unpack,
8c25f81b
PH
62 compat_urllib_error,
63 compat_urllib_parse,
15707c7e 64 compat_urllib_parse_urlencode,
8c25f81b 65 compat_urllib_parse_urlparse,
732044af 66 compat_urllib_parse_urlunparse,
67 compat_urllib_parse_quote,
68 compat_urllib_parse_quote_plus,
7581bfc9 69 compat_urllib_parse_unquote_plus,
8c25f81b
PH
70 compat_urllib_request,
71 compat_urlparse,
810c10ba 72 compat_xpath,
8c25f81b 73)
4644ac55 74
71aff188
YCH
75from .socks import (
76 ProxyType,
77 sockssocket,
78)
79
4644ac55 80
51fb4995
YCH
81def register_socks_protocols():
82 # "Register" SOCKS protocols
d5ae6bb5
YCH
83 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
84 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
51fb4995
YCH
85 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
86 if scheme not in compat_urlparse.uses_netloc:
87 compat_urlparse.uses_netloc.append(scheme)
88
89
468e2e92
FV
90# This is not clearly defined otherwise
91compiled_regex_type = type(re.compile(''))
92
f7a147e3
S
93
94def random_user_agent():
95 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
96 _CHROME_VERSIONS = (
97 '74.0.3729.129',
98 '76.0.3780.3',
99 '76.0.3780.2',
100 '74.0.3729.128',
101 '76.0.3780.1',
102 '76.0.3780.0',
103 '75.0.3770.15',
104 '74.0.3729.127',
105 '74.0.3729.126',
106 '76.0.3779.1',
107 '76.0.3779.0',
108 '75.0.3770.14',
109 '74.0.3729.125',
110 '76.0.3778.1',
111 '76.0.3778.0',
112 '75.0.3770.13',
113 '74.0.3729.124',
114 '74.0.3729.123',
115 '73.0.3683.121',
116 '76.0.3777.1',
117 '76.0.3777.0',
118 '75.0.3770.12',
119 '74.0.3729.122',
120 '76.0.3776.4',
121 '75.0.3770.11',
122 '74.0.3729.121',
123 '76.0.3776.3',
124 '76.0.3776.2',
125 '73.0.3683.120',
126 '74.0.3729.120',
127 '74.0.3729.119',
128 '74.0.3729.118',
129 '76.0.3776.1',
130 '76.0.3776.0',
131 '76.0.3775.5',
132 '75.0.3770.10',
133 '74.0.3729.117',
134 '76.0.3775.4',
135 '76.0.3775.3',
136 '74.0.3729.116',
137 '75.0.3770.9',
138 '76.0.3775.2',
139 '76.0.3775.1',
140 '76.0.3775.0',
141 '75.0.3770.8',
142 '74.0.3729.115',
143 '74.0.3729.114',
144 '76.0.3774.1',
145 '76.0.3774.0',
146 '75.0.3770.7',
147 '74.0.3729.113',
148 '74.0.3729.112',
149 '74.0.3729.111',
150 '76.0.3773.1',
151 '76.0.3773.0',
152 '75.0.3770.6',
153 '74.0.3729.110',
154 '74.0.3729.109',
155 '76.0.3772.1',
156 '76.0.3772.0',
157 '75.0.3770.5',
158 '74.0.3729.108',
159 '74.0.3729.107',
160 '76.0.3771.1',
161 '76.0.3771.0',
162 '75.0.3770.4',
163 '74.0.3729.106',
164 '74.0.3729.105',
165 '75.0.3770.3',
166 '74.0.3729.104',
167 '74.0.3729.103',
168 '74.0.3729.102',
169 '75.0.3770.2',
170 '74.0.3729.101',
171 '75.0.3770.1',
172 '75.0.3770.0',
173 '74.0.3729.100',
174 '75.0.3769.5',
175 '75.0.3769.4',
176 '74.0.3729.99',
177 '75.0.3769.3',
178 '75.0.3769.2',
179 '75.0.3768.6',
180 '74.0.3729.98',
181 '75.0.3769.1',
182 '75.0.3769.0',
183 '74.0.3729.97',
184 '73.0.3683.119',
185 '73.0.3683.118',
186 '74.0.3729.96',
187 '75.0.3768.5',
188 '75.0.3768.4',
189 '75.0.3768.3',
190 '75.0.3768.2',
191 '74.0.3729.95',
192 '74.0.3729.94',
193 '75.0.3768.1',
194 '75.0.3768.0',
195 '74.0.3729.93',
196 '74.0.3729.92',
197 '73.0.3683.117',
198 '74.0.3729.91',
199 '75.0.3766.3',
200 '74.0.3729.90',
201 '75.0.3767.2',
202 '75.0.3767.1',
203 '75.0.3767.0',
204 '74.0.3729.89',
205 '73.0.3683.116',
206 '75.0.3766.2',
207 '74.0.3729.88',
208 '75.0.3766.1',
209 '75.0.3766.0',
210 '74.0.3729.87',
211 '73.0.3683.115',
212 '74.0.3729.86',
213 '75.0.3765.1',
214 '75.0.3765.0',
215 '74.0.3729.85',
216 '73.0.3683.114',
217 '74.0.3729.84',
218 '75.0.3764.1',
219 '75.0.3764.0',
220 '74.0.3729.83',
221 '73.0.3683.113',
222 '75.0.3763.2',
223 '75.0.3761.4',
224 '74.0.3729.82',
225 '75.0.3763.1',
226 '75.0.3763.0',
227 '74.0.3729.81',
228 '73.0.3683.112',
229 '75.0.3762.1',
230 '75.0.3762.0',
231 '74.0.3729.80',
232 '75.0.3761.3',
233 '74.0.3729.79',
234 '73.0.3683.111',
235 '75.0.3761.2',
236 '74.0.3729.78',
237 '74.0.3729.77',
238 '75.0.3761.1',
239 '75.0.3761.0',
240 '73.0.3683.110',
241 '74.0.3729.76',
242 '74.0.3729.75',
243 '75.0.3760.0',
244 '74.0.3729.74',
245 '75.0.3759.8',
246 '75.0.3759.7',
247 '75.0.3759.6',
248 '74.0.3729.73',
249 '75.0.3759.5',
250 '74.0.3729.72',
251 '73.0.3683.109',
252 '75.0.3759.4',
253 '75.0.3759.3',
254 '74.0.3729.71',
255 '75.0.3759.2',
256 '74.0.3729.70',
257 '73.0.3683.108',
258 '74.0.3729.69',
259 '75.0.3759.1',
260 '75.0.3759.0',
261 '74.0.3729.68',
262 '73.0.3683.107',
263 '74.0.3729.67',
264 '75.0.3758.1',
265 '75.0.3758.0',
266 '74.0.3729.66',
267 '73.0.3683.106',
268 '74.0.3729.65',
269 '75.0.3757.1',
270 '75.0.3757.0',
271 '74.0.3729.64',
272 '73.0.3683.105',
273 '74.0.3729.63',
274 '75.0.3756.1',
275 '75.0.3756.0',
276 '74.0.3729.62',
277 '73.0.3683.104',
278 '75.0.3755.3',
279 '75.0.3755.2',
280 '73.0.3683.103',
281 '75.0.3755.1',
282 '75.0.3755.0',
283 '74.0.3729.61',
284 '73.0.3683.102',
285 '74.0.3729.60',
286 '75.0.3754.2',
287 '74.0.3729.59',
288 '75.0.3753.4',
289 '74.0.3729.58',
290 '75.0.3754.1',
291 '75.0.3754.0',
292 '74.0.3729.57',
293 '73.0.3683.101',
294 '75.0.3753.3',
295 '75.0.3752.2',
296 '75.0.3753.2',
297 '74.0.3729.56',
298 '75.0.3753.1',
299 '75.0.3753.0',
300 '74.0.3729.55',
301 '73.0.3683.100',
302 '74.0.3729.54',
303 '75.0.3752.1',
304 '75.0.3752.0',
305 '74.0.3729.53',
306 '73.0.3683.99',
307 '74.0.3729.52',
308 '75.0.3751.1',
309 '75.0.3751.0',
310 '74.0.3729.51',
311 '73.0.3683.98',
312 '74.0.3729.50',
313 '75.0.3750.0',
314 '74.0.3729.49',
315 '74.0.3729.48',
316 '74.0.3729.47',
317 '75.0.3749.3',
318 '74.0.3729.46',
319 '73.0.3683.97',
320 '75.0.3749.2',
321 '74.0.3729.45',
322 '75.0.3749.1',
323 '75.0.3749.0',
324 '74.0.3729.44',
325 '73.0.3683.96',
326 '74.0.3729.43',
327 '74.0.3729.42',
328 '75.0.3748.1',
329 '75.0.3748.0',
330 '74.0.3729.41',
331 '75.0.3747.1',
332 '73.0.3683.95',
333 '75.0.3746.4',
334 '74.0.3729.40',
335 '74.0.3729.39',
336 '75.0.3747.0',
337 '75.0.3746.3',
338 '75.0.3746.2',
339 '74.0.3729.38',
340 '75.0.3746.1',
341 '75.0.3746.0',
342 '74.0.3729.37',
343 '73.0.3683.94',
344 '75.0.3745.5',
345 '75.0.3745.4',
346 '75.0.3745.3',
347 '75.0.3745.2',
348 '74.0.3729.36',
349 '75.0.3745.1',
350 '75.0.3745.0',
351 '75.0.3744.2',
352 '74.0.3729.35',
353 '73.0.3683.93',
354 '74.0.3729.34',
355 '75.0.3744.1',
356 '75.0.3744.0',
357 '74.0.3729.33',
358 '73.0.3683.92',
359 '74.0.3729.32',
360 '74.0.3729.31',
361 '73.0.3683.91',
362 '75.0.3741.2',
363 '75.0.3740.5',
364 '74.0.3729.30',
365 '75.0.3741.1',
366 '75.0.3741.0',
367 '74.0.3729.29',
368 '75.0.3740.4',
369 '73.0.3683.90',
370 '74.0.3729.28',
371 '75.0.3740.3',
372 '73.0.3683.89',
373 '75.0.3740.2',
374 '74.0.3729.27',
375 '75.0.3740.1',
376 '75.0.3740.0',
377 '74.0.3729.26',
378 '73.0.3683.88',
379 '73.0.3683.87',
380 '74.0.3729.25',
381 '75.0.3739.1',
382 '75.0.3739.0',
383 '73.0.3683.86',
384 '74.0.3729.24',
385 '73.0.3683.85',
386 '75.0.3738.4',
387 '75.0.3738.3',
388 '75.0.3738.2',
389 '75.0.3738.1',
390 '75.0.3738.0',
391 '74.0.3729.23',
392 '73.0.3683.84',
393 '74.0.3729.22',
394 '74.0.3729.21',
395 '75.0.3737.1',
396 '75.0.3737.0',
397 '74.0.3729.20',
398 '73.0.3683.83',
399 '74.0.3729.19',
400 '75.0.3736.1',
401 '75.0.3736.0',
402 '74.0.3729.18',
403 '73.0.3683.82',
404 '74.0.3729.17',
405 '75.0.3735.1',
406 '75.0.3735.0',
407 '74.0.3729.16',
408 '73.0.3683.81',
409 '75.0.3734.1',
410 '75.0.3734.0',
411 '74.0.3729.15',
412 '73.0.3683.80',
413 '74.0.3729.14',
414 '75.0.3733.1',
415 '75.0.3733.0',
416 '75.0.3732.1',
417 '74.0.3729.13',
418 '74.0.3729.12',
419 '73.0.3683.79',
420 '74.0.3729.11',
421 '75.0.3732.0',
422 '74.0.3729.10',
423 '73.0.3683.78',
424 '74.0.3729.9',
425 '74.0.3729.8',
426 '74.0.3729.7',
427 '75.0.3731.3',
428 '75.0.3731.2',
429 '75.0.3731.0',
430 '74.0.3729.6',
431 '73.0.3683.77',
432 '73.0.3683.76',
433 '75.0.3730.5',
434 '75.0.3730.4',
435 '73.0.3683.75',
436 '74.0.3729.5',
437 '73.0.3683.74',
438 '75.0.3730.3',
439 '75.0.3730.2',
440 '74.0.3729.4',
441 '73.0.3683.73',
442 '73.0.3683.72',
443 '75.0.3730.1',
444 '75.0.3730.0',
445 '74.0.3729.3',
446 '73.0.3683.71',
447 '74.0.3729.2',
448 '73.0.3683.70',
449 '74.0.3729.1',
450 '74.0.3729.0',
451 '74.0.3726.4',
452 '73.0.3683.69',
453 '74.0.3726.3',
454 '74.0.3728.0',
455 '74.0.3726.2',
456 '73.0.3683.68',
457 '74.0.3726.1',
458 '74.0.3726.0',
459 '74.0.3725.4',
460 '73.0.3683.67',
461 '73.0.3683.66',
462 '74.0.3725.3',
463 '74.0.3725.2',
464 '74.0.3725.1',
465 '74.0.3724.8',
466 '74.0.3725.0',
467 '73.0.3683.65',
468 '74.0.3724.7',
469 '74.0.3724.6',
470 '74.0.3724.5',
471 '74.0.3724.4',
472 '74.0.3724.3',
473 '74.0.3724.2',
474 '74.0.3724.1',
475 '74.0.3724.0',
476 '73.0.3683.64',
477 '74.0.3723.1',
478 '74.0.3723.0',
479 '73.0.3683.63',
480 '74.0.3722.1',
481 '74.0.3722.0',
482 '73.0.3683.62',
483 '74.0.3718.9',
484 '74.0.3702.3',
485 '74.0.3721.3',
486 '74.0.3721.2',
487 '74.0.3721.1',
488 '74.0.3721.0',
489 '74.0.3720.6',
490 '73.0.3683.61',
491 '72.0.3626.122',
492 '73.0.3683.60',
493 '74.0.3720.5',
494 '72.0.3626.121',
495 '74.0.3718.8',
496 '74.0.3720.4',
497 '74.0.3720.3',
498 '74.0.3718.7',
499 '74.0.3720.2',
500 '74.0.3720.1',
501 '74.0.3720.0',
502 '74.0.3718.6',
503 '74.0.3719.5',
504 '73.0.3683.59',
505 '74.0.3718.5',
506 '74.0.3718.4',
507 '74.0.3719.4',
508 '74.0.3719.3',
509 '74.0.3719.2',
510 '74.0.3719.1',
511 '73.0.3683.58',
512 '74.0.3719.0',
513 '73.0.3683.57',
514 '73.0.3683.56',
515 '74.0.3718.3',
516 '73.0.3683.55',
517 '74.0.3718.2',
518 '74.0.3718.1',
519 '74.0.3718.0',
520 '73.0.3683.54',
521 '74.0.3717.2',
522 '73.0.3683.53',
523 '74.0.3717.1',
524 '74.0.3717.0',
525 '73.0.3683.52',
526 '74.0.3716.1',
527 '74.0.3716.0',
528 '73.0.3683.51',
529 '74.0.3715.1',
530 '74.0.3715.0',
531 '73.0.3683.50',
532 '74.0.3711.2',
533 '74.0.3714.2',
534 '74.0.3713.3',
535 '74.0.3714.1',
536 '74.0.3714.0',
537 '73.0.3683.49',
538 '74.0.3713.1',
539 '74.0.3713.0',
540 '72.0.3626.120',
541 '73.0.3683.48',
542 '74.0.3712.2',
543 '74.0.3712.1',
544 '74.0.3712.0',
545 '73.0.3683.47',
546 '72.0.3626.119',
547 '73.0.3683.46',
548 '74.0.3710.2',
549 '72.0.3626.118',
550 '74.0.3711.1',
551 '74.0.3711.0',
552 '73.0.3683.45',
553 '72.0.3626.117',
554 '74.0.3710.1',
555 '74.0.3710.0',
556 '73.0.3683.44',
557 '72.0.3626.116',
558 '74.0.3709.1',
559 '74.0.3709.0',
560 '74.0.3704.9',
561 '73.0.3683.43',
562 '72.0.3626.115',
563 '74.0.3704.8',
564 '74.0.3704.7',
565 '74.0.3708.0',
566 '74.0.3706.7',
567 '74.0.3704.6',
568 '73.0.3683.42',
569 '72.0.3626.114',
570 '74.0.3706.6',
571 '72.0.3626.113',
572 '74.0.3704.5',
573 '74.0.3706.5',
574 '74.0.3706.4',
575 '74.0.3706.3',
576 '74.0.3706.2',
577 '74.0.3706.1',
578 '74.0.3706.0',
579 '73.0.3683.41',
580 '72.0.3626.112',
581 '74.0.3705.1',
582 '74.0.3705.0',
583 '73.0.3683.40',
584 '72.0.3626.111',
585 '73.0.3683.39',
586 '74.0.3704.4',
587 '73.0.3683.38',
588 '74.0.3704.3',
589 '74.0.3704.2',
590 '74.0.3704.1',
591 '74.0.3704.0',
592 '73.0.3683.37',
593 '72.0.3626.110',
594 '72.0.3626.109',
595 '74.0.3703.3',
596 '74.0.3703.2',
597 '73.0.3683.36',
598 '74.0.3703.1',
599 '74.0.3703.0',
600 '73.0.3683.35',
601 '72.0.3626.108',
602 '74.0.3702.2',
603 '74.0.3699.3',
604 '74.0.3702.1',
605 '74.0.3702.0',
606 '73.0.3683.34',
607 '72.0.3626.107',
608 '73.0.3683.33',
609 '74.0.3701.1',
610 '74.0.3701.0',
611 '73.0.3683.32',
612 '73.0.3683.31',
613 '72.0.3626.105',
614 '74.0.3700.1',
615 '74.0.3700.0',
616 '73.0.3683.29',
617 '72.0.3626.103',
618 '74.0.3699.2',
619 '74.0.3699.1',
620 '74.0.3699.0',
621 '73.0.3683.28',
622 '72.0.3626.102',
623 '73.0.3683.27',
624 '73.0.3683.26',
625 '74.0.3698.0',
626 '74.0.3696.2',
627 '72.0.3626.101',
628 '73.0.3683.25',
629 '74.0.3696.1',
630 '74.0.3696.0',
631 '74.0.3694.8',
632 '72.0.3626.100',
633 '74.0.3694.7',
634 '74.0.3694.6',
635 '74.0.3694.5',
636 '74.0.3694.4',
637 '72.0.3626.99',
638 '72.0.3626.98',
639 '74.0.3694.3',
640 '73.0.3683.24',
641 '72.0.3626.97',
642 '72.0.3626.96',
643 '72.0.3626.95',
644 '73.0.3683.23',
645 '72.0.3626.94',
646 '73.0.3683.22',
647 '73.0.3683.21',
648 '72.0.3626.93',
649 '74.0.3694.2',
650 '72.0.3626.92',
651 '74.0.3694.1',
652 '74.0.3694.0',
653 '74.0.3693.6',
654 '73.0.3683.20',
655 '72.0.3626.91',
656 '74.0.3693.5',
657 '74.0.3693.4',
658 '74.0.3693.3',
659 '74.0.3693.2',
660 '73.0.3683.19',
661 '74.0.3693.1',
662 '74.0.3693.0',
663 '73.0.3683.18',
664 '72.0.3626.90',
665 '74.0.3692.1',
666 '74.0.3692.0',
667 '73.0.3683.17',
668 '72.0.3626.89',
669 '74.0.3687.3',
670 '74.0.3691.1',
671 '74.0.3691.0',
672 '73.0.3683.16',
673 '72.0.3626.88',
674 '72.0.3626.87',
675 '73.0.3683.15',
676 '74.0.3690.1',
677 '74.0.3690.0',
678 '73.0.3683.14',
679 '72.0.3626.86',
680 '73.0.3683.13',
681 '73.0.3683.12',
682 '74.0.3689.1',
683 '74.0.3689.0',
684 '73.0.3683.11',
685 '72.0.3626.85',
686 '73.0.3683.10',
687 '72.0.3626.84',
688 '73.0.3683.9',
689 '74.0.3688.1',
690 '74.0.3688.0',
691 '73.0.3683.8',
692 '72.0.3626.83',
693 '74.0.3687.2',
694 '74.0.3687.1',
695 '74.0.3687.0',
696 '73.0.3683.7',
697 '72.0.3626.82',
698 '74.0.3686.4',
699 '72.0.3626.81',
700 '74.0.3686.3',
701 '74.0.3686.2',
702 '74.0.3686.1',
703 '74.0.3686.0',
704 '73.0.3683.6',
705 '72.0.3626.80',
706 '74.0.3685.1',
707 '74.0.3685.0',
708 '73.0.3683.5',
709 '72.0.3626.79',
710 '74.0.3684.1',
711 '74.0.3684.0',
712 '73.0.3683.4',
713 '72.0.3626.78',
714 '72.0.3626.77',
715 '73.0.3683.3',
716 '73.0.3683.2',
717 '72.0.3626.76',
718 '73.0.3683.1',
719 '73.0.3683.0',
720 '72.0.3626.75',
721 '71.0.3578.141',
722 '73.0.3682.1',
723 '73.0.3682.0',
724 '72.0.3626.74',
725 '71.0.3578.140',
726 '73.0.3681.4',
727 '73.0.3681.3',
728 '73.0.3681.2',
729 '73.0.3681.1',
730 '73.0.3681.0',
731 '72.0.3626.73',
732 '71.0.3578.139',
733 '72.0.3626.72',
734 '72.0.3626.71',
735 '73.0.3680.1',
736 '73.0.3680.0',
737 '72.0.3626.70',
738 '71.0.3578.138',
739 '73.0.3678.2',
740 '73.0.3679.1',
741 '73.0.3679.0',
742 '72.0.3626.69',
743 '71.0.3578.137',
744 '73.0.3678.1',
745 '73.0.3678.0',
746 '71.0.3578.136',
747 '73.0.3677.1',
748 '73.0.3677.0',
749 '72.0.3626.68',
750 '72.0.3626.67',
751 '71.0.3578.135',
752 '73.0.3676.1',
753 '73.0.3676.0',
754 '73.0.3674.2',
755 '72.0.3626.66',
756 '71.0.3578.134',
757 '73.0.3674.1',
758 '73.0.3674.0',
759 '72.0.3626.65',
760 '71.0.3578.133',
761 '73.0.3673.2',
762 '73.0.3673.1',
763 '73.0.3673.0',
764 '72.0.3626.64',
765 '71.0.3578.132',
766 '72.0.3626.63',
767 '72.0.3626.62',
768 '72.0.3626.61',
769 '72.0.3626.60',
770 '73.0.3672.1',
771 '73.0.3672.0',
772 '72.0.3626.59',
773 '71.0.3578.131',
774 '73.0.3671.3',
775 '73.0.3671.2',
776 '73.0.3671.1',
777 '73.0.3671.0',
778 '72.0.3626.58',
779 '71.0.3578.130',
780 '73.0.3670.1',
781 '73.0.3670.0',
782 '72.0.3626.57',
783 '71.0.3578.129',
784 '73.0.3669.1',
785 '73.0.3669.0',
786 '72.0.3626.56',
787 '71.0.3578.128',
788 '73.0.3668.2',
789 '73.0.3668.1',
790 '73.0.3668.0',
791 '72.0.3626.55',
792 '71.0.3578.127',
793 '73.0.3667.2',
794 '73.0.3667.1',
795 '73.0.3667.0',
796 '72.0.3626.54',
797 '71.0.3578.126',
798 '73.0.3666.1',
799 '73.0.3666.0',
800 '72.0.3626.53',
801 '71.0.3578.125',
802 '73.0.3665.4',
803 '73.0.3665.3',
804 '72.0.3626.52',
805 '73.0.3665.2',
806 '73.0.3664.4',
807 '73.0.3665.1',
808 '73.0.3665.0',
809 '72.0.3626.51',
810 '71.0.3578.124',
811 '72.0.3626.50',
812 '73.0.3664.3',
813 '73.0.3664.2',
814 '73.0.3664.1',
815 '73.0.3664.0',
816 '73.0.3663.2',
817 '72.0.3626.49',
818 '71.0.3578.123',
819 '73.0.3663.1',
820 '73.0.3663.0',
821 '72.0.3626.48',
822 '71.0.3578.122',
823 '73.0.3662.1',
824 '73.0.3662.0',
825 '72.0.3626.47',
826 '71.0.3578.121',
827 '73.0.3661.1',
828 '72.0.3626.46',
829 '73.0.3661.0',
830 '72.0.3626.45',
831 '71.0.3578.120',
832 '73.0.3660.2',
833 '73.0.3660.1',
834 '73.0.3660.0',
835 '72.0.3626.44',
836 '71.0.3578.119',
837 '73.0.3659.1',
838 '73.0.3659.0',
839 '72.0.3626.43',
840 '71.0.3578.118',
841 '73.0.3658.1',
842 '73.0.3658.0',
843 '72.0.3626.42',
844 '71.0.3578.117',
845 '73.0.3657.1',
846 '73.0.3657.0',
847 '72.0.3626.41',
848 '71.0.3578.116',
849 '73.0.3656.1',
850 '73.0.3656.0',
851 '72.0.3626.40',
852 '71.0.3578.115',
853 '73.0.3655.1',
854 '73.0.3655.0',
855 '72.0.3626.39',
856 '71.0.3578.114',
857 '73.0.3654.1',
858 '73.0.3654.0',
859 '72.0.3626.38',
860 '71.0.3578.113',
861 '73.0.3653.1',
862 '73.0.3653.0',
863 '72.0.3626.37',
864 '71.0.3578.112',
865 '73.0.3652.1',
866 '73.0.3652.0',
867 '72.0.3626.36',
868 '71.0.3578.111',
869 '73.0.3651.1',
870 '73.0.3651.0',
871 '72.0.3626.35',
872 '71.0.3578.110',
873 '73.0.3650.1',
874 '73.0.3650.0',
875 '72.0.3626.34',
876 '71.0.3578.109',
877 '73.0.3649.1',
878 '73.0.3649.0',
879 '72.0.3626.33',
880 '71.0.3578.108',
881 '73.0.3648.2',
882 '73.0.3648.1',
883 '73.0.3648.0',
884 '72.0.3626.32',
885 '71.0.3578.107',
886 '73.0.3647.2',
887 '73.0.3647.1',
888 '73.0.3647.0',
889 '72.0.3626.31',
890 '71.0.3578.106',
891 '73.0.3635.3',
892 '73.0.3646.2',
893 '73.0.3646.1',
894 '73.0.3646.0',
895 '72.0.3626.30',
896 '71.0.3578.105',
897 '72.0.3626.29',
898 '73.0.3645.2',
899 '73.0.3645.1',
900 '73.0.3645.0',
901 '72.0.3626.28',
902 '71.0.3578.104',
903 '72.0.3626.27',
904 '72.0.3626.26',
905 '72.0.3626.25',
906 '72.0.3626.24',
907 '73.0.3644.0',
908 '73.0.3643.2',
909 '72.0.3626.23',
910 '71.0.3578.103',
911 '73.0.3643.1',
912 '73.0.3643.0',
913 '72.0.3626.22',
914 '71.0.3578.102',
915 '73.0.3642.1',
916 '73.0.3642.0',
917 '72.0.3626.21',
918 '71.0.3578.101',
919 '73.0.3641.1',
920 '73.0.3641.0',
921 '72.0.3626.20',
922 '71.0.3578.100',
923 '72.0.3626.19',
924 '73.0.3640.1',
925 '73.0.3640.0',
926 '72.0.3626.18',
927 '73.0.3639.1',
928 '71.0.3578.99',
929 '73.0.3639.0',
930 '72.0.3626.17',
931 '73.0.3638.2',
932 '72.0.3626.16',
933 '73.0.3638.1',
934 '73.0.3638.0',
935 '72.0.3626.15',
936 '71.0.3578.98',
937 '73.0.3635.2',
938 '71.0.3578.97',
939 '73.0.3637.1',
940 '73.0.3637.0',
941 '72.0.3626.14',
942 '71.0.3578.96',
943 '71.0.3578.95',
944 '72.0.3626.13',
945 '71.0.3578.94',
946 '73.0.3636.2',
947 '71.0.3578.93',
948 '73.0.3636.1',
949 '73.0.3636.0',
950 '72.0.3626.12',
951 '71.0.3578.92',
952 '73.0.3635.1',
953 '73.0.3635.0',
954 '72.0.3626.11',
955 '71.0.3578.91',
956 '73.0.3634.2',
957 '73.0.3634.1',
958 '73.0.3634.0',
959 '72.0.3626.10',
960 '71.0.3578.90',
961 '71.0.3578.89',
962 '73.0.3633.2',
963 '73.0.3633.1',
964 '73.0.3633.0',
965 '72.0.3610.4',
966 '72.0.3626.9',
967 '71.0.3578.88',
968 '73.0.3632.5',
969 '73.0.3632.4',
970 '73.0.3632.3',
971 '73.0.3632.2',
972 '73.0.3632.1',
973 '73.0.3632.0',
974 '72.0.3626.8',
975 '71.0.3578.87',
976 '73.0.3631.2',
977 '73.0.3631.1',
978 '73.0.3631.0',
979 '72.0.3626.7',
980 '71.0.3578.86',
981 '72.0.3626.6',
982 '73.0.3630.1',
983 '73.0.3630.0',
984 '72.0.3626.5',
985 '71.0.3578.85',
986 '72.0.3626.4',
987 '73.0.3628.3',
988 '73.0.3628.2',
989 '73.0.3629.1',
990 '73.0.3629.0',
991 '72.0.3626.3',
992 '71.0.3578.84',
993 '73.0.3628.1',
994 '73.0.3628.0',
995 '71.0.3578.83',
996 '73.0.3627.1',
997 '73.0.3627.0',
998 '72.0.3626.2',
999 '71.0.3578.82',
1000 '71.0.3578.81',
1001 '71.0.3578.80',
1002 '72.0.3626.1',
1003 '72.0.3626.0',
1004 '71.0.3578.79',
1005 '70.0.3538.124',
1006 '71.0.3578.78',
1007 '72.0.3623.4',
1008 '72.0.3625.2',
1009 '72.0.3625.1',
1010 '72.0.3625.0',
1011 '71.0.3578.77',
1012 '70.0.3538.123',
1013 '72.0.3624.4',
1014 '72.0.3624.3',
1015 '72.0.3624.2',
1016 '71.0.3578.76',
1017 '72.0.3624.1',
1018 '72.0.3624.0',
1019 '72.0.3623.3',
1020 '71.0.3578.75',
1021 '70.0.3538.122',
1022 '71.0.3578.74',
1023 '72.0.3623.2',
1024 '72.0.3610.3',
1025 '72.0.3623.1',
1026 '72.0.3623.0',
1027 '72.0.3622.3',
1028 '72.0.3622.2',
1029 '71.0.3578.73',
1030 '70.0.3538.121',
1031 '72.0.3622.1',
1032 '72.0.3622.0',
1033 '71.0.3578.72',
1034 '70.0.3538.120',
1035 '72.0.3621.1',
1036 '72.0.3621.0',
1037 '71.0.3578.71',
1038 '70.0.3538.119',
1039 '72.0.3620.1',
1040 '72.0.3620.0',
1041 '71.0.3578.70',
1042 '70.0.3538.118',
1043 '71.0.3578.69',
1044 '72.0.3619.1',
1045 '72.0.3619.0',
1046 '71.0.3578.68',
1047 '70.0.3538.117',
1048 '71.0.3578.67',
1049 '72.0.3618.1',
1050 '72.0.3618.0',
1051 '71.0.3578.66',
1052 '70.0.3538.116',
1053 '72.0.3617.1',
1054 '72.0.3617.0',
1055 '71.0.3578.65',
1056 '70.0.3538.115',
1057 '72.0.3602.3',
1058 '71.0.3578.64',
1059 '72.0.3616.1',
1060 '72.0.3616.0',
1061 '71.0.3578.63',
1062 '70.0.3538.114',
1063 '71.0.3578.62',
1064 '72.0.3615.1',
1065 '72.0.3615.0',
1066 '71.0.3578.61',
1067 '70.0.3538.113',
1068 '72.0.3614.1',
1069 '72.0.3614.0',
1070 '71.0.3578.60',
1071 '70.0.3538.112',
1072 '72.0.3613.1',
1073 '72.0.3613.0',
1074 '71.0.3578.59',
1075 '70.0.3538.111',
1076 '72.0.3612.2',
1077 '72.0.3612.1',
1078 '72.0.3612.0',
1079 '70.0.3538.110',
1080 '71.0.3578.58',
1081 '70.0.3538.109',
1082 '72.0.3611.2',
1083 '72.0.3611.1',
1084 '72.0.3611.0',
1085 '71.0.3578.57',
1086 '70.0.3538.108',
1087 '72.0.3610.2',
1088 '71.0.3578.56',
1089 '71.0.3578.55',
1090 '72.0.3610.1',
1091 '72.0.3610.0',
1092 '71.0.3578.54',
1093 '70.0.3538.107',
1094 '71.0.3578.53',
1095 '72.0.3609.3',
1096 '71.0.3578.52',
1097 '72.0.3609.2',
1098 '71.0.3578.51',
1099 '72.0.3608.5',
1100 '72.0.3609.1',
1101 '72.0.3609.0',
1102 '71.0.3578.50',
1103 '70.0.3538.106',
1104 '72.0.3608.4',
1105 '72.0.3608.3',
1106 '72.0.3608.2',
1107 '71.0.3578.49',
1108 '72.0.3608.1',
1109 '72.0.3608.0',
1110 '70.0.3538.105',
1111 '71.0.3578.48',
1112 '72.0.3607.1',
1113 '72.0.3607.0',
1114 '71.0.3578.47',
1115 '70.0.3538.104',
1116 '72.0.3606.2',
1117 '72.0.3606.1',
1118 '72.0.3606.0',
1119 '71.0.3578.46',
1120 '70.0.3538.103',
1121 '70.0.3538.102',
1122 '72.0.3605.3',
1123 '72.0.3605.2',
1124 '72.0.3605.1',
1125 '72.0.3605.0',
1126 '71.0.3578.45',
1127 '70.0.3538.101',
1128 '71.0.3578.44',
1129 '71.0.3578.43',
1130 '70.0.3538.100',
1131 '70.0.3538.99',
1132 '71.0.3578.42',
1133 '72.0.3604.1',
1134 '72.0.3604.0',
1135 '71.0.3578.41',
1136 '70.0.3538.98',
1137 '71.0.3578.40',
1138 '72.0.3603.2',
1139 '72.0.3603.1',
1140 '72.0.3603.0',
1141 '71.0.3578.39',
1142 '70.0.3538.97',
1143 '72.0.3602.2',
1144 '71.0.3578.38',
1145 '71.0.3578.37',
1146 '72.0.3602.1',
1147 '72.0.3602.0',
1148 '71.0.3578.36',
1149 '70.0.3538.96',
1150 '72.0.3601.1',
1151 '72.0.3601.0',
1152 '71.0.3578.35',
1153 '70.0.3538.95',
1154 '72.0.3600.1',
1155 '72.0.3600.0',
1156 '71.0.3578.34',
1157 '70.0.3538.94',
1158 '72.0.3599.3',
1159 '72.0.3599.2',
1160 '72.0.3599.1',
1161 '72.0.3599.0',
1162 '71.0.3578.33',
1163 '70.0.3538.93',
1164 '72.0.3598.1',
1165 '72.0.3598.0',
1166 '71.0.3578.32',
1167 '70.0.3538.87',
1168 '72.0.3597.1',
1169 '72.0.3597.0',
1170 '72.0.3596.2',
1171 '71.0.3578.31',
1172 '70.0.3538.86',
1173 '71.0.3578.30',
1174 '71.0.3578.29',
1175 '72.0.3596.1',
1176 '72.0.3596.0',
1177 '71.0.3578.28',
1178 '70.0.3538.85',
1179 '72.0.3595.2',
1180 '72.0.3591.3',
1181 '72.0.3595.1',
1182 '72.0.3595.0',
1183 '71.0.3578.27',
1184 '70.0.3538.84',
1185 '72.0.3594.1',
1186 '72.0.3594.0',
1187 '71.0.3578.26',
1188 '70.0.3538.83',
1189 '72.0.3593.2',
1190 '72.0.3593.1',
1191 '72.0.3593.0',
1192 '71.0.3578.25',
1193 '70.0.3538.82',
1194 '72.0.3589.3',
1195 '72.0.3592.2',
1196 '72.0.3592.1',
1197 '72.0.3592.0',
1198 '71.0.3578.24',
1199 '72.0.3589.2',
1200 '70.0.3538.81',
1201 '70.0.3538.80',
1202 '72.0.3591.2',
1203 '72.0.3591.1',
1204 '72.0.3591.0',
1205 '71.0.3578.23',
1206 '70.0.3538.79',
1207 '71.0.3578.22',
1208 '72.0.3590.1',
1209 '72.0.3590.0',
1210 '71.0.3578.21',
1211 '70.0.3538.78',
1212 '70.0.3538.77',
1213 '72.0.3589.1',
1214 '72.0.3589.0',
1215 '71.0.3578.20',
1216 '70.0.3538.76',
1217 '71.0.3578.19',
1218 '70.0.3538.75',
1219 '72.0.3588.1',
1220 '72.0.3588.0',
1221 '71.0.3578.18',
1222 '70.0.3538.74',
1223 '72.0.3586.2',
1224 '72.0.3587.0',
1225 '71.0.3578.17',
1226 '70.0.3538.73',
1227 '72.0.3586.1',
1228 '72.0.3586.0',
1229 '71.0.3578.16',
1230 '70.0.3538.72',
1231 '72.0.3585.1',
1232 '72.0.3585.0',
1233 '71.0.3578.15',
1234 '70.0.3538.71',
1235 '71.0.3578.14',
1236 '72.0.3584.1',
1237 '72.0.3584.0',
1238 '71.0.3578.13',
1239 '70.0.3538.70',
1240 '72.0.3583.2',
1241 '71.0.3578.12',
1242 '72.0.3583.1',
1243 '72.0.3583.0',
1244 '71.0.3578.11',
1245 '70.0.3538.69',
1246 '71.0.3578.10',
1247 '72.0.3582.0',
1248 '72.0.3581.4',
1249 '71.0.3578.9',
1250 '70.0.3538.67',
1251 '72.0.3581.3',
1252 '72.0.3581.2',
1253 '72.0.3581.1',
1254 '72.0.3581.0',
1255 '71.0.3578.8',
1256 '70.0.3538.66',
1257 '72.0.3580.1',
1258 '72.0.3580.0',
1259 '71.0.3578.7',
1260 '70.0.3538.65',
1261 '71.0.3578.6',
1262 '72.0.3579.1',
1263 '72.0.3579.0',
1264 '71.0.3578.5',
1265 '70.0.3538.64',
1266 '71.0.3578.4',
1267 '71.0.3578.3',
1268 '71.0.3578.2',
1269 '71.0.3578.1',
1270 '71.0.3578.0',
1271 '70.0.3538.63',
1272 '69.0.3497.128',
1273 '70.0.3538.62',
1274 '70.0.3538.61',
1275 '70.0.3538.60',
1276 '70.0.3538.59',
1277 '71.0.3577.1',
1278 '71.0.3577.0',
1279 '70.0.3538.58',
1280 '69.0.3497.127',
1281 '71.0.3576.2',
1282 '71.0.3576.1',
1283 '71.0.3576.0',
1284 '70.0.3538.57',
1285 '70.0.3538.56',
1286 '71.0.3575.2',
1287 '70.0.3538.55',
1288 '69.0.3497.126',
1289 '70.0.3538.54',
1290 '71.0.3575.1',
1291 '71.0.3575.0',
1292 '71.0.3574.1',
1293 '71.0.3574.0',
1294 '70.0.3538.53',
1295 '69.0.3497.125',
1296 '70.0.3538.52',
1297 '71.0.3573.1',
1298 '71.0.3573.0',
1299 '70.0.3538.51',
1300 '69.0.3497.124',
1301 '71.0.3572.1',
1302 '71.0.3572.0',
1303 '70.0.3538.50',
1304 '69.0.3497.123',
1305 '71.0.3571.2',
1306 '70.0.3538.49',
1307 '69.0.3497.122',
1308 '71.0.3571.1',
1309 '71.0.3571.0',
1310 '70.0.3538.48',
1311 '69.0.3497.121',
1312 '71.0.3570.1',
1313 '71.0.3570.0',
1314 '70.0.3538.47',
1315 '69.0.3497.120',
1316 '71.0.3568.2',
1317 '71.0.3569.1',
1318 '71.0.3569.0',
1319 '70.0.3538.46',
1320 '69.0.3497.119',
1321 '70.0.3538.45',
1322 '71.0.3568.1',
1323 '71.0.3568.0',
1324 '70.0.3538.44',
1325 '69.0.3497.118',
1326 '70.0.3538.43',
1327 '70.0.3538.42',
1328 '71.0.3567.1',
1329 '71.0.3567.0',
1330 '70.0.3538.41',
1331 '69.0.3497.117',
1332 '71.0.3566.1',
1333 '71.0.3566.0',
1334 '70.0.3538.40',
1335 '69.0.3497.116',
1336 '71.0.3565.1',
1337 '71.0.3565.0',
1338 '70.0.3538.39',
1339 '69.0.3497.115',
1340 '71.0.3564.1',
1341 '71.0.3564.0',
1342 '70.0.3538.38',
1343 '69.0.3497.114',
1344 '71.0.3563.0',
1345 '71.0.3562.2',
1346 '70.0.3538.37',
1347 '69.0.3497.113',
1348 '70.0.3538.36',
1349 '70.0.3538.35',
1350 '71.0.3562.1',
1351 '71.0.3562.0',
1352 '70.0.3538.34',
1353 '69.0.3497.112',
1354 '70.0.3538.33',
1355 '71.0.3561.1',
1356 '71.0.3561.0',
1357 '70.0.3538.32',
1358 '69.0.3497.111',
1359 '71.0.3559.6',
1360 '71.0.3560.1',
1361 '71.0.3560.0',
1362 '71.0.3559.5',
1363 '71.0.3559.4',
1364 '70.0.3538.31',
1365 '69.0.3497.110',
1366 '71.0.3559.3',
1367 '70.0.3538.30',
1368 '69.0.3497.109',
1369 '71.0.3559.2',
1370 '71.0.3559.1',
1371 '71.0.3559.0',
1372 '70.0.3538.29',
1373 '69.0.3497.108',
1374 '71.0.3558.2',
1375 '71.0.3558.1',
1376 '71.0.3558.0',
1377 '70.0.3538.28',
1378 '69.0.3497.107',
1379 '71.0.3557.2',
1380 '71.0.3557.1',
1381 '71.0.3557.0',
1382 '70.0.3538.27',
1383 '69.0.3497.106',
1384 '71.0.3554.4',
1385 '70.0.3538.26',
1386 '71.0.3556.1',
1387 '71.0.3556.0',
1388 '70.0.3538.25',
1389 '71.0.3554.3',
1390 '69.0.3497.105',
1391 '71.0.3554.2',
1392 '70.0.3538.24',
1393 '69.0.3497.104',
1394 '71.0.3555.2',
1395 '70.0.3538.23',
1396 '71.0.3555.1',
1397 '71.0.3555.0',
1398 '70.0.3538.22',
1399 '69.0.3497.103',
1400 '71.0.3554.1',
1401 '71.0.3554.0',
1402 '70.0.3538.21',
1403 '69.0.3497.102',
1404 '71.0.3553.3',
1405 '70.0.3538.20',
1406 '69.0.3497.101',
1407 '71.0.3553.2',
1408 '69.0.3497.100',
1409 '71.0.3553.1',
1410 '71.0.3553.0',
1411 '70.0.3538.19',
1412 '69.0.3497.99',
1413 '69.0.3497.98',
1414 '69.0.3497.97',
1415 '71.0.3552.6',
1416 '71.0.3552.5',
1417 '71.0.3552.4',
1418 '71.0.3552.3',
1419 '71.0.3552.2',
1420 '71.0.3552.1',
1421 '71.0.3552.0',
1422 '70.0.3538.18',
1423 '69.0.3497.96',
1424 '71.0.3551.3',
1425 '71.0.3551.2',
1426 '71.0.3551.1',
1427 '71.0.3551.0',
1428 '70.0.3538.17',
1429 '69.0.3497.95',
1430 '71.0.3550.3',
1431 '71.0.3550.2',
1432 '71.0.3550.1',
1433 '71.0.3550.0',
1434 '70.0.3538.16',
1435 '69.0.3497.94',
1436 '71.0.3549.1',
1437 '71.0.3549.0',
1438 '70.0.3538.15',
1439 '69.0.3497.93',
1440 '69.0.3497.92',
1441 '71.0.3548.1',
1442 '71.0.3548.0',
1443 '70.0.3538.14',
1444 '69.0.3497.91',
1445 '71.0.3547.1',
1446 '71.0.3547.0',
1447 '70.0.3538.13',
1448 '69.0.3497.90',
1449 '71.0.3546.2',
1450 '69.0.3497.89',
1451 '71.0.3546.1',
1452 '71.0.3546.0',
1453 '70.0.3538.12',
1454 '69.0.3497.88',
1455 '71.0.3545.4',
1456 '71.0.3545.3',
1457 '71.0.3545.2',
1458 '71.0.3545.1',
1459 '71.0.3545.0',
1460 '70.0.3538.11',
1461 '69.0.3497.87',
1462 '71.0.3544.5',
1463 '71.0.3544.4',
1464 '71.0.3544.3',
1465 '71.0.3544.2',
1466 '71.0.3544.1',
1467 '71.0.3544.0',
1468 '69.0.3497.86',
1469 '70.0.3538.10',
1470 '69.0.3497.85',
1471 '70.0.3538.9',
1472 '69.0.3497.84',
1473 '71.0.3543.4',
1474 '70.0.3538.8',
1475 '71.0.3543.3',
1476 '71.0.3543.2',
1477 '71.0.3543.1',
1478 '71.0.3543.0',
1479 '70.0.3538.7',
1480 '69.0.3497.83',
1481 '71.0.3542.2',
1482 '71.0.3542.1',
1483 '71.0.3542.0',
1484 '70.0.3538.6',
1485 '69.0.3497.82',
1486 '69.0.3497.81',
1487 '71.0.3541.1',
1488 '71.0.3541.0',
1489 '70.0.3538.5',
1490 '69.0.3497.80',
1491 '71.0.3540.1',
1492 '71.0.3540.0',
1493 '70.0.3538.4',
1494 '69.0.3497.79',
1495 '70.0.3538.3',
1496 '71.0.3539.1',
1497 '71.0.3539.0',
1498 '69.0.3497.78',
1499 '68.0.3440.134',
1500 '69.0.3497.77',
1501 '70.0.3538.2',
1502 '70.0.3538.1',
1503 '70.0.3538.0',
1504 '69.0.3497.76',
1505 '68.0.3440.133',
1506 '69.0.3497.75',
1507 '70.0.3537.2',
1508 '70.0.3537.1',
1509 '70.0.3537.0',
1510 '69.0.3497.74',
1511 '68.0.3440.132',
1512 '70.0.3536.0',
1513 '70.0.3535.5',
1514 '70.0.3535.4',
1515 '70.0.3535.3',
1516 '69.0.3497.73',
1517 '68.0.3440.131',
1518 '70.0.3532.8',
1519 '70.0.3532.7',
1520 '69.0.3497.72',
1521 '69.0.3497.71',
1522 '70.0.3535.2',
1523 '70.0.3535.1',
1524 '70.0.3535.0',
1525 '69.0.3497.70',
1526 '68.0.3440.130',
1527 '69.0.3497.69',
1528 '68.0.3440.129',
1529 '70.0.3534.4',
1530 '70.0.3534.3',
1531 '70.0.3534.2',
1532 '70.0.3534.1',
1533 '70.0.3534.0',
1534 '69.0.3497.68',
1535 '68.0.3440.128',
1536 '70.0.3533.2',
1537 '70.0.3533.1',
1538 '70.0.3533.0',
1539 '69.0.3497.67',
1540 '68.0.3440.127',
1541 '70.0.3532.6',
1542 '70.0.3532.5',
1543 '70.0.3532.4',
1544 '69.0.3497.66',
1545 '68.0.3440.126',
1546 '70.0.3532.3',
1547 '70.0.3532.2',
1548 '70.0.3532.1',
1549 '69.0.3497.60',
1550 '69.0.3497.65',
1551 '69.0.3497.64',
1552 '70.0.3532.0',
1553 '70.0.3531.0',
1554 '70.0.3530.4',
1555 '70.0.3530.3',
1556 '70.0.3530.2',
1557 '69.0.3497.58',
1558 '68.0.3440.125',
1559 '69.0.3497.57',
1560 '69.0.3497.56',
1561 '69.0.3497.55',
1562 '69.0.3497.54',
1563 '70.0.3530.1',
1564 '70.0.3530.0',
1565 '69.0.3497.53',
1566 '68.0.3440.124',
1567 '69.0.3497.52',
1568 '70.0.3529.3',
1569 '70.0.3529.2',
1570 '70.0.3529.1',
1571 '70.0.3529.0',
1572 '69.0.3497.51',
1573 '70.0.3528.4',
1574 '68.0.3440.123',
1575 '70.0.3528.3',
1576 '70.0.3528.2',
1577 '70.0.3528.1',
1578 '70.0.3528.0',
1579 '69.0.3497.50',
1580 '68.0.3440.122',
1581 '70.0.3527.1',
1582 '70.0.3527.0',
1583 '69.0.3497.49',
1584 '68.0.3440.121',
1585 '70.0.3526.1',
1586 '70.0.3526.0',
1587 '68.0.3440.120',
1588 '69.0.3497.48',
1589 '69.0.3497.47',
1590 '68.0.3440.119',
1591 '68.0.3440.118',
1592 '70.0.3525.5',
1593 '70.0.3525.4',
1594 '70.0.3525.3',
1595 '68.0.3440.117',
1596 '69.0.3497.46',
1597 '70.0.3525.2',
1598 '70.0.3525.1',
1599 '70.0.3525.0',
1600 '69.0.3497.45',
1601 '68.0.3440.116',
1602 '70.0.3524.4',
1603 '70.0.3524.3',
1604 '69.0.3497.44',
1605 '70.0.3524.2',
1606 '70.0.3524.1',
1607 '70.0.3524.0',
1608 '70.0.3523.2',
1609 '69.0.3497.43',
1610 '68.0.3440.115',
1611 '70.0.3505.9',
1612 '69.0.3497.42',
1613 '70.0.3505.8',
1614 '70.0.3523.1',
1615 '70.0.3523.0',
1616 '69.0.3497.41',
1617 '68.0.3440.114',
1618 '70.0.3505.7',
1619 '69.0.3497.40',
1620 '70.0.3522.1',
1621 '70.0.3522.0',
1622 '70.0.3521.2',
1623 '69.0.3497.39',
1624 '68.0.3440.113',
1625 '70.0.3505.6',
1626 '70.0.3521.1',
1627 '70.0.3521.0',
1628 '69.0.3497.38',
1629 '68.0.3440.112',
1630 '70.0.3520.1',
1631 '70.0.3520.0',
1632 '69.0.3497.37',
1633 '68.0.3440.111',
1634 '70.0.3519.3',
1635 '70.0.3519.2',
1636 '70.0.3519.1',
1637 '70.0.3519.0',
1638 '69.0.3497.36',
1639 '68.0.3440.110',
1640 '70.0.3518.1',
1641 '70.0.3518.0',
1642 '69.0.3497.35',
1643 '69.0.3497.34',
1644 '68.0.3440.109',
1645 '70.0.3517.1',
1646 '70.0.3517.0',
1647 '69.0.3497.33',
1648 '68.0.3440.108',
1649 '69.0.3497.32',
1650 '70.0.3516.3',
1651 '70.0.3516.2',
1652 '70.0.3516.1',
1653 '70.0.3516.0',
1654 '69.0.3497.31',
1655 '68.0.3440.107',
1656 '70.0.3515.4',
1657 '68.0.3440.106',
1658 '70.0.3515.3',
1659 '70.0.3515.2',
1660 '70.0.3515.1',
1661 '70.0.3515.0',
1662 '69.0.3497.30',
1663 '68.0.3440.105',
1664 '68.0.3440.104',
1665 '70.0.3514.2',
1666 '70.0.3514.1',
1667 '70.0.3514.0',
1668 '69.0.3497.29',
1669 '68.0.3440.103',
1670 '70.0.3513.1',
1671 '70.0.3513.0',
1672 '69.0.3497.28',
1673 )
1674 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1675
1676
3e669f36 1677std_headers = {
f7a147e3 1678 'User-Agent': random_user_agent(),
59ae15a5
PH
1679 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1680 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1681 'Accept-Encoding': 'gzip, deflate',
1682 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 1683}
f427df17 1684
5f6a1245 1685
fb37eb25
S
1686USER_AGENTS = {
1687 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1688}
1689
1690
bf42a990
S
1691NO_DEFAULT = object()
1692
7105440c
YCH
1693ENGLISH_MONTH_NAMES = [
1694 'January', 'February', 'March', 'April', 'May', 'June',
1695 'July', 'August', 'September', 'October', 'November', 'December']
1696
f6717dec
S
1697MONTH_NAMES = {
1698 'en': ENGLISH_MONTH_NAMES,
1699 'fr': [
3e4185c3
S
1700 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1701 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
f6717dec 1702}
a942d6cb 1703
a7aaa398
S
1704KNOWN_EXTENSIONS = (
1705 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1706 'flv', 'f4v', 'f4a', 'f4b',
1707 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1708 'mkv', 'mka', 'mk3d',
1709 'avi', 'divx',
1710 'mov',
1711 'asf', 'wmv', 'wma',
1712 '3gp', '3g2',
1713 'mp3',
1714 'flac',
1715 'ape',
1716 'wav',
1717 'f4f', 'f4m', 'm3u8', 'smil')
1718
df692c5a 1719REMUX_EXTENSIONS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mp3', 'mka', 'm4a', 'ogg', 'opus')
1720
c587cbb7 1721# needed for sanitizing filenames in restricted mode
c8827027 1722ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
fd35d8cd
JW
1723 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1724 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
c587cbb7 1725
46f59e89
S
1726DATE_FORMATS = (
1727 '%d %B %Y',
1728 '%d %b %Y',
1729 '%B %d %Y',
cb655f34
S
1730 '%B %dst %Y',
1731 '%B %dnd %Y',
9d30c213 1732 '%B %drd %Y',
cb655f34 1733 '%B %dth %Y',
46f59e89 1734 '%b %d %Y',
cb655f34
S
1735 '%b %dst %Y',
1736 '%b %dnd %Y',
9d30c213 1737 '%b %drd %Y',
cb655f34 1738 '%b %dth %Y',
46f59e89
S
1739 '%b %dst %Y %I:%M',
1740 '%b %dnd %Y %I:%M',
9d30c213 1741 '%b %drd %Y %I:%M',
46f59e89
S
1742 '%b %dth %Y %I:%M',
1743 '%Y %m %d',
1744 '%Y-%m-%d',
1745 '%Y/%m/%d',
81c13222 1746 '%Y/%m/%d %H:%M',
46f59e89 1747 '%Y/%m/%d %H:%M:%S',
0c1c6f4b 1748 '%Y-%m-%d %H:%M',
46f59e89
S
1749 '%Y-%m-%d %H:%M:%S',
1750 '%Y-%m-%d %H:%M:%S.%f',
1751 '%d.%m.%Y %H:%M',
1752 '%d.%m.%Y %H.%M',
1753 '%Y-%m-%dT%H:%M:%SZ',
1754 '%Y-%m-%dT%H:%M:%S.%fZ',
1755 '%Y-%m-%dT%H:%M:%S.%f0Z',
1756 '%Y-%m-%dT%H:%M:%S',
1757 '%Y-%m-%dT%H:%M:%S.%f',
1758 '%Y-%m-%dT%H:%M',
c6eed6b8
S
1759 '%b %d %Y at %H:%M',
1760 '%b %d %Y at %H:%M:%S',
b555ae9b
S
1761 '%B %d %Y at %H:%M',
1762 '%B %d %Y at %H:%M:%S',
46f59e89
S
1763)
1764
1765DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1766DATE_FORMATS_DAY_FIRST.extend([
1767 '%d-%m-%Y',
1768 '%d.%m.%Y',
1769 '%d.%m.%y',
1770 '%d/%m/%Y',
1771 '%d/%m/%y',
1772 '%d/%m/%Y %H:%M:%S',
1773])
1774
1775DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1776DATE_FORMATS_MONTH_FIRST.extend([
1777 '%m-%d-%Y',
1778 '%m.%d.%Y',
1779 '%m/%d/%Y',
1780 '%m/%d/%y',
1781 '%m/%d/%Y %H:%M:%S',
1782])
1783
06b3fe29 1784PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
22f5f5c6 1785JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
06b3fe29 1786
7105440c 1787
d77c3dfd 1788def preferredencoding():
59ae15a5 1789 """Get preferred encoding.
d77c3dfd 1790
59ae15a5
PH
1791 Returns the best encoding scheme for the system, based on
1792 locale.getpreferredencoding() and some further tweaks.
1793 """
1794 try:
1795 pref = locale.getpreferredencoding()
28e614de 1796 'TEST'.encode(pref)
70a1165b 1797 except Exception:
59ae15a5 1798 pref = 'UTF-8'
bae611f2 1799
59ae15a5 1800 return pref
d77c3dfd 1801
f4bfd65f 1802
181c8655 1803def write_json_file(obj, fn):
1394646a 1804 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 1805
92120217 1806 fn = encodeFilename(fn)
61ee5aeb 1807 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
1808 encoding = get_filesystem_encoding()
1809 # os.path.basename returns a bytes object, but NamedTemporaryFile
1810 # will fail if the filename contains non ascii characters unless we
1811 # use a unicode object
1812 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1813 # the same for os.path.dirname
1814 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1815 else:
1816 path_basename = os.path.basename
1817 path_dirname = os.path.dirname
1818
73159f99
S
1819 args = {
1820 'suffix': '.tmp',
ec5f6016
JMF
1821 'prefix': path_basename(fn) + '.',
1822 'dir': path_dirname(fn),
73159f99
S
1823 'delete': False,
1824 }
1825
181c8655
PH
1826 # In Python 2.x, json.dump expects a bytestream.
1827 # In Python 3.x, it writes to a character stream
1828 if sys.version_info < (3, 0):
73159f99 1829 args['mode'] = 'wb'
181c8655 1830 else:
73159f99
S
1831 args.update({
1832 'mode': 'w',
1833 'encoding': 'utf-8',
1834 })
1835
c86b6142 1836 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
1837
1838 try:
1839 with tf:
75d43ca0 1840 json.dump(obj, tf, default=repr)
1394646a
IK
1841 if sys.platform == 'win32':
1842 # Need to remove existing file on Windows, else os.rename raises
1843 # WindowsError or FileExistsError.
1844 try:
1845 os.unlink(fn)
1846 except OSError:
1847 pass
9cd5f54e
R
1848 try:
1849 mask = os.umask(0)
1850 os.umask(mask)
1851 os.chmod(tf.name, 0o666 & ~mask)
1852 except OSError:
1853 pass
181c8655 1854 os.rename(tf.name, fn)
70a1165b 1855 except Exception:
181c8655
PH
1856 try:
1857 os.remove(tf.name)
1858 except OSError:
1859 pass
1860 raise
1861
1862
1863if sys.version_info >= (2, 7):
ee114368 1864 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 1865 """ Find the xpath xpath[@key=val] """
5d2354f1 1866 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368 1867 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
1868 return node.find(expr)
1869else:
ee114368 1870 def find_xpath_attr(node, xpath, key, val=None):
810c10ba 1871 for f in node.findall(compat_xpath(xpath)):
ee114368
S
1872 if key not in f.attrib:
1873 continue
1874 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
1875 return f
1876 return None
1877
d7e66d39
JMF
1878# On python2.6 the xml.etree.ElementTree.Element methods don't support
1879# the namespace parameter
5f6a1245
JW
1880
1881
d7e66d39
JMF
1882def xpath_with_ns(path, ns_map):
1883 components = [c.split(':') for c in path.split('/')]
1884 replaced = []
1885 for c in components:
1886 if len(c) == 1:
1887 replaced.append(c[0])
1888 else:
1889 ns, tag = c
1890 replaced.append('{%s}%s' % (ns_map[ns], tag))
1891 return '/'.join(replaced)
1892
d77c3dfd 1893
a41fb80c 1894def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745 1895 def _find_xpath(xpath):
810c10ba 1896 return node.find(compat_xpath(xpath))
578c0745
S
1897
1898 if isinstance(xpath, (str, compat_str)):
1899 n = _find_xpath(xpath)
1900 else:
1901 for xp in xpath:
1902 n = _find_xpath(xp)
1903 if n is not None:
1904 break
d74bebd5 1905
8e636da4 1906 if n is None:
bf42a990
S
1907 if default is not NO_DEFAULT:
1908 return default
1909 elif fatal:
bf0ff932
PH
1910 name = xpath if name is None else name
1911 raise ExtractorError('Could not find XML element %s' % name)
1912 else:
1913 return None
a41fb80c
S
1914 return n
1915
1916
1917def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
1918 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1919 if n is None or n == default:
1920 return n
1921 if n.text is None:
1922 if default is not NO_DEFAULT:
1923 return default
1924 elif fatal:
1925 name = xpath if name is None else name
1926 raise ExtractorError('Could not find XML element\'s text %s' % name)
1927 else:
1928 return None
1929 return n.text
a41fb80c
S
1930
1931
1932def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1933 n = find_xpath_attr(node, xpath, key)
1934 if n is None:
1935 if default is not NO_DEFAULT:
1936 return default
1937 elif fatal:
1938 name = '%s[@%s]' % (xpath, key) if name is None else name
1939 raise ExtractorError('Could not find XML attribute %s' % name)
1940 else:
1941 return None
1942 return n.attrib[key]
bf0ff932
PH
1943
1944
9e6dd238 1945def get_element_by_id(id, html):
43e8fafd 1946 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 1947 return get_element_by_attribute('id', id, html)
43e8fafd 1948
12ea2f30 1949
84c237fb 1950def get_element_by_class(class_name, html):
2af12ad9
TC
1951 """Return the content of the first tag with the specified class in the passed HTML document"""
1952 retval = get_elements_by_class(class_name, html)
1953 return retval[0] if retval else None
1954
1955
1956def get_element_by_attribute(attribute, value, html, escape_value=True):
1957 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1958 return retval[0] if retval else None
1959
1960
1961def get_elements_by_class(class_name, html):
1962 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1963 return get_elements_by_attribute(
84c237fb
YCH
1964 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1965 html, escape_value=False)
1966
1967
2af12ad9 1968def get_elements_by_attribute(attribute, value, html, escape_value=True):
43e8fafd 1969 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 1970
84c237fb
YCH
1971 value = re.escape(value) if escape_value else value
1972
2af12ad9
TC
1973 retlist = []
1974 for m in re.finditer(r'''(?xs)
38285056 1975 <([a-zA-Z0-9:._-]+)
609ff8ca 1976 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056 1977 \s+%s=['"]?%s['"]?
609ff8ca 1978 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056
PH
1979 \s*>
1980 (?P<content>.*?)
1981 </\1>
2af12ad9
TC
1982 ''' % (re.escape(attribute), value), html):
1983 res = m.group('content')
38285056 1984
2af12ad9
TC
1985 if res.startswith('"') or res.startswith("'"):
1986 res = res[1:-1]
38285056 1987
2af12ad9 1988 retlist.append(unescapeHTML(res))
a921f407 1989
2af12ad9 1990 return retlist
a921f407 1991
c5229f39 1992
8bb56eee
BF
1993class HTMLAttributeParser(compat_HTMLParser):
1994 """Trivial HTML parser to gather the attributes for a single element"""
b6e0c7d2 1995
8bb56eee 1996 def __init__(self):
c5229f39 1997 self.attrs = {}
8bb56eee
BF
1998 compat_HTMLParser.__init__(self)
1999
2000 def handle_starttag(self, tag, attrs):
2001 self.attrs = dict(attrs)
2002
c5229f39 2003
8bb56eee
BF
2004def extract_attributes(html_element):
2005 """Given a string for an HTML element such as
2006 <el
2007 a="foo" B="bar" c="&98;az" d=boz
2008 empty= noval entity="&amp;"
2009 sq='"' dq="'"
2010 >
2011 Decode and return a dictionary of attributes.
2012 {
2013 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2014 'empty': '', 'noval': None, 'entity': '&',
2015 'sq': '"', 'dq': '\''
2016 }.
2017 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2018 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2019 """
2020 parser = HTMLAttributeParser()
b4a3d461
S
2021 try:
2022 parser.feed(html_element)
2023 parser.close()
2024 # Older Python may throw HTMLParseError in case of malformed HTML
2025 except compat_HTMLParseError:
2026 pass
8bb56eee 2027 return parser.attrs
9e6dd238 2028
c5229f39 2029
9e6dd238 2030def clean_html(html):
59ae15a5 2031 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
2032
2033 if html is None: # Convenience for sanitizing descriptions etc.
2034 return html
2035
59ae15a5
PH
2036 # Newline vs <br />
2037 html = html.replace('\n', ' ')
edd9221c
TF
2038 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2039 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
2040 # Strip html tags
2041 html = re.sub('<.*?>', '', html)
2042 # Replace html entities
2043 html = unescapeHTML(html)
7decf895 2044 return html.strip()
9e6dd238
FV
2045
2046
d77c3dfd 2047def sanitize_open(filename, open_mode):
59ae15a5
PH
2048 """Try to open the given filename, and slightly tweak it if this fails.
2049
2050 Attempts to open the given filename. If this fails, it tries to change
2051 the filename slightly, step by step, until it's either able to open it
2052 or it fails and raises a final exception, like the standard open()
2053 function.
2054
2055 It returns the tuple (stream, definitive_file_name).
2056 """
2057 try:
28e614de 2058 if filename == '-':
59ae15a5
PH
2059 if sys.platform == 'win32':
2060 import msvcrt
2061 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 2062 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
2063 stream = open(encodeFilename(filename), open_mode)
2064 return (stream, filename)
2065 except (IOError, OSError) as err:
f45c185f
PH
2066 if err.errno in (errno.EACCES,):
2067 raise
59ae15a5 2068
f45c185f 2069 # In case of error, try to remove win32 forbidden chars
d55de57b 2070 alt_filename = sanitize_path(filename)
f45c185f
PH
2071 if alt_filename == filename:
2072 raise
2073 else:
2074 # An exception here should be caught in the caller
d55de57b 2075 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 2076 return (stream, alt_filename)
d77c3dfd
FV
2077
2078
2079def timeconvert(timestr):
59ae15a5
PH
2080 """Convert RFC 2822 defined time string into system timestamp"""
2081 timestamp = None
2082 timetuple = email.utils.parsedate_tz(timestr)
2083 if timetuple is not None:
2084 timestamp = email.utils.mktime_tz(timetuple)
2085 return timestamp
1c469a94 2086
5f6a1245 2087
796173d0 2088def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
2089 """Sanitizes a string so it could be used as part of a filename.
2090 If restricted is set, use a stricter subset of allowed characters.
158af524
S
2091 Set is_id if this is not an arbitrary string, but an ID that should be kept
2092 if possible.
59ae15a5
PH
2093 """
2094 def replace_insane(char):
c587cbb7
AT
2095 if restricted and char in ACCENT_CHARS:
2096 return ACCENT_CHARS[char]
59ae15a5
PH
2097 if char == '?' or ord(char) < 32 or ord(char) == 127:
2098 return ''
2099 elif char == '"':
2100 return '' if restricted else '\''
2101 elif char == ':':
2102 return '_-' if restricted else ' -'
2103 elif char in '\\/|*<>':
2104 return '_'
627dcfff 2105 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
2106 return '_'
2107 if restricted and ord(char) > 127:
2108 return '_'
2109 return char
2110
2aeb06d6
PH
2111 # Handle timestamps
2112 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 2113 result = ''.join(map(replace_insane, s))
796173d0
PH
2114 if not is_id:
2115 while '__' in result:
2116 result = result.replace('__', '_')
2117 result = result.strip('_')
2118 # Common case of "Foreign band name - English song title"
2119 if restricted and result.startswith('-_'):
2120 result = result[2:]
5a42414b
PH
2121 if result.startswith('-'):
2122 result = '_' + result[len('-'):]
a7440261 2123 result = result.lstrip('.')
796173d0
PH
2124 if not result:
2125 result = '_'
59ae15a5 2126 return result
d77c3dfd 2127
5f6a1245 2128
c2934512 2129def sanitize_path(s, force=False):
a2aaf4db 2130 """Sanitizes and normalizes path on Windows"""
c2934512 2131 if sys.platform == 'win32':
c4218ac3 2132 force = False
c2934512 2133 drive_or_unc, _ = os.path.splitdrive(s)
2134 if sys.version_info < (2, 7) and not drive_or_unc:
2135 drive_or_unc, _ = os.path.splitunc(s)
2136 elif force:
2137 drive_or_unc = ''
2138 else:
a2aaf4db 2139 return s
c2934512 2140
be531ef1
S
2141 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2142 if drive_or_unc:
a2aaf4db
S
2143 norm_path.pop(0)
2144 sanitized_path = [
ec85ded8 2145 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
a2aaf4db 2146 for path_part in norm_path]
be531ef1
S
2147 if drive_or_unc:
2148 sanitized_path.insert(0, drive_or_unc + os.path.sep)
c4218ac3 2149 elif force and s[0] == os.path.sep:
2150 sanitized_path.insert(0, os.path.sep)
a2aaf4db
S
2151 return os.path.join(*sanitized_path)
2152
2153
17bcc626 2154def sanitize_url(url):
befa4708
S
2155 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2156 # the number of unwanted failures due to missing protocol
2157 if url.startswith('//'):
2158 return 'http:%s' % url
2159 # Fix some common typos seen so far
2160 COMMON_TYPOS = (
067aa17e 2161 # https://github.com/ytdl-org/youtube-dl/issues/15649
befa4708
S
2162 (r'^httpss://', r'https://'),
2163 # https://bx1.be/lives/direct-tv/
2164 (r'^rmtp([es]?)://', r'rtmp\1://'),
2165 )
2166 for mistake, fixup in COMMON_TYPOS:
2167 if re.match(mistake, url):
2168 return re.sub(mistake, fixup, url)
d2558234 2169 return escape_url(url)
17bcc626
S
2170
2171
67dda517 2172def sanitized_Request(url, *args, **kwargs):
17bcc626 2173 return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
67dda517
S
2174
2175
51098426
S
2176def expand_path(s):
2177 """Expand shell variables and ~"""
2178 return os.path.expandvars(compat_expanduser(s))
2179
2180
d77c3dfd 2181def orderedSet(iterable):
59ae15a5
PH
2182 """ Remove all duplicates from the input iterable """
2183 res = []
2184 for el in iterable:
2185 if el not in res:
2186 res.append(el)
2187 return res
d77c3dfd 2188
912b38b4 2189
55b2f099 2190def _htmlentity_transform(entity_with_semicolon):
4e408e47 2191 """Transforms an HTML entity to a character."""
55b2f099
YCH
2192 entity = entity_with_semicolon[:-1]
2193
4e408e47
PH
2194 # Known non-numeric HTML entity
2195 if entity in compat_html_entities.name2codepoint:
2196 return compat_chr(compat_html_entities.name2codepoint[entity])
2197
55b2f099
YCH
2198 # TODO: HTML5 allows entities without a semicolon. For example,
2199 # '&Eacuteric' should be decoded as 'Éric'.
2200 if entity_with_semicolon in compat_html_entities_html5:
2201 return compat_html_entities_html5[entity_with_semicolon]
2202
91757b0f 2203 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
2204 if mobj is not None:
2205 numstr = mobj.group(1)
28e614de 2206 if numstr.startswith('x'):
4e408e47 2207 base = 16
28e614de 2208 numstr = '0%s' % numstr
4e408e47
PH
2209 else:
2210 base = 10
067aa17e 2211 # See https://github.com/ytdl-org/youtube-dl/issues/7518
7aefc49c
S
2212 try:
2213 return compat_chr(int(numstr, base))
2214 except ValueError:
2215 pass
4e408e47
PH
2216
2217 # Unknown entity in name, return its literal representation
7a3f0c00 2218 return '&%s;' % entity
4e408e47
PH
2219
2220
d77c3dfd 2221def unescapeHTML(s):
912b38b4
PH
2222 if s is None:
2223 return None
2224 assert type(s) == compat_str
d77c3dfd 2225
4e408e47 2226 return re.sub(
95f3f7c2 2227 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 2228
8bf48f23 2229
f5b1bca9 2230def process_communicate_or_kill(p, *args, **kwargs):
2231 try:
2232 return p.communicate(*args, **kwargs)
2233 except BaseException: # Including KeyboardInterrupt
2234 p.kill()
2235 p.wait()
2236 raise
2237
2238
aa49acd1
S
2239def get_subprocess_encoding():
2240 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2241 # For subprocess calls, encode with locale encoding
2242 # Refer to http://stackoverflow.com/a/9951851/35070
2243 encoding = preferredencoding()
2244 else:
2245 encoding = sys.getfilesystemencoding()
2246 if encoding is None:
2247 encoding = 'utf-8'
2248 return encoding
2249
2250
8bf48f23 2251def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
2252 """
2253 @param s The name of the file
2254 """
d77c3dfd 2255
8bf48f23 2256 assert type(s) == compat_str
d77c3dfd 2257
59ae15a5
PH
2258 # Python 3 has a Unicode API
2259 if sys.version_info >= (3, 0):
2260 return s
0f00efed 2261
aa49acd1
S
2262 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2263 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2264 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2265 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2266 return s
2267
8ee239e9
YCH
2268 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2269 if sys.platform.startswith('java'):
2270 return s
2271
aa49acd1
S
2272 return s.encode(get_subprocess_encoding(), 'ignore')
2273
2274
2275def decodeFilename(b, for_subprocess=False):
2276
2277 if sys.version_info >= (3, 0):
2278 return b
2279
2280 if not isinstance(b, bytes):
2281 return b
2282
2283 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 2284
f07b74fc
PH
2285
2286def encodeArgument(s):
2287 if not isinstance(s, compat_str):
2288 # Legacy code that uses byte strings
2289 # Uncomment the following line after fixing all post processors
7af808a5 2290 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
2291 s = s.decode('ascii')
2292 return encodeFilename(s, True)
2293
2294
aa49acd1
S
2295def decodeArgument(b):
2296 return decodeFilename(b, True)
2297
2298
8271226a
PH
2299def decodeOption(optval):
2300 if optval is None:
2301 return optval
2302 if isinstance(optval, bytes):
2303 optval = optval.decode(preferredencoding())
2304
2305 assert isinstance(optval, compat_str)
2306 return optval
1c256f70 2307
5f6a1245 2308
dbbbe555 2309def formatSeconds(secs, delim=':'):
4539dd30 2310 if secs > 3600:
dbbbe555 2311 return '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
4539dd30 2312 elif secs > 60:
dbbbe555 2313 return '%d%s%02d' % (secs // 60, delim, secs % 60)
4539dd30
PH
2314 else:
2315 return '%d' % secs
2316
a0ddb8a2 2317
be4a824d
PH
2318def make_HTTPS_handler(params, **kwargs):
2319 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 2320 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 2321 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 2322 if opts_no_check_certificate:
be5f2c19 2323 context.check_hostname = False
0db261ba 2324 context.verify_mode = ssl.CERT_NONE
a2366922 2325 try:
be4a824d 2326 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
2327 except TypeError:
2328 # Python 2.7.8
2329 # (create_default_context present but HTTPSHandler has no context=)
2330 pass
2331
2332 if sys.version_info < (3, 2):
d7932313 2333 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 2334 else: # Python < 3.4
d7932313 2335 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 2336 context.verify_mode = (ssl.CERT_NONE
dca08720 2337 if opts_no_check_certificate
ea6d901e 2338 else ssl.CERT_REQUIRED)
303b479e 2339 context.set_default_verify_paths()
be4a824d 2340 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 2341
732ea2f0 2342
5873d4cc 2343def bug_reports_message(before=';'):
08f2a92c 2344 if ytdl_is_updateable():
7a5c1cfe 2345 update_cmd = 'type yt-dlp -U to update'
08f2a92c 2346 else:
7a5c1cfe 2347 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
5873d4cc 2348 msg = 'please report this issue on https://github.com/yt-dlp/yt-dlp .'
08f2a92c 2349 msg += ' Make sure you are using the latest version; %s.' % update_cmd
7a5c1cfe 2350 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
5873d4cc
F
2351
2352 before = before.rstrip()
2353 if not before or before.endswith(('.', '!', '?')):
2354 msg = msg[0].title() + msg[1:]
2355
2356 return (before + ' ' if before else '') + msg
08f2a92c
JMF
2357
2358
bf5b9d85
PM
2359class YoutubeDLError(Exception):
2360 """Base exception for YoutubeDL errors."""
2361 pass
2362
2363
3158150c 2364network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2365if hasattr(ssl, 'CertificateError'):
2366 network_exceptions.append(ssl.CertificateError)
2367network_exceptions = tuple(network_exceptions)
2368
2369
bf5b9d85 2370class ExtractorError(YoutubeDLError):
1c256f70 2371 """Error during info extraction."""
5f6a1245 2372
d11271dd 2373 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238 2374 """ tb, if given, is the original traceback (so that it can be printed out).
7a5c1cfe 2375 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
9a82b238
PH
2376 """
2377
3158150c 2378 if sys.exc_info()[0] in network_exceptions:
9a82b238 2379 expected = True
d11271dd
PH
2380 if video_id is not None:
2381 msg = video_id + ': ' + msg
410f3e73 2382 if cause:
28e614de 2383 msg += ' (caused by %r)' % cause
9a82b238 2384 if not expected:
08f2a92c 2385 msg += bug_reports_message()
1c256f70 2386 super(ExtractorError, self).__init__(msg)
d5979c5d 2387
1c256f70 2388 self.traceback = tb
8cc83b8d 2389 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 2390 self.cause = cause
d11271dd 2391 self.video_id = video_id
1c256f70 2392
01951dda
PH
2393 def format_traceback(self):
2394 if self.traceback is None:
2395 return None
28e614de 2396 return ''.join(traceback.format_tb(self.traceback))
01951dda 2397
1c256f70 2398
416c7fcb
PH
2399class UnsupportedError(ExtractorError):
2400 def __init__(self, url):
2401 super(UnsupportedError, self).__init__(
2402 'Unsupported URL: %s' % url, expected=True)
2403 self.url = url
2404
2405
55b3e45b
JMF
2406class RegexNotFoundError(ExtractorError):
2407 """Error when a regex didn't match"""
2408 pass
2409
2410
773f291d
S
2411class GeoRestrictedError(ExtractorError):
2412 """Geographic restriction Error exception.
2413
2414 This exception may be thrown when a video is not available from your
2415 geographic location due to geographic restrictions imposed by a website.
2416 """
b6e0c7d2 2417
773f291d
S
2418 def __init__(self, msg, countries=None):
2419 super(GeoRestrictedError, self).__init__(msg, expected=True)
2420 self.msg = msg
2421 self.countries = countries
2422
2423
bf5b9d85 2424class DownloadError(YoutubeDLError):
59ae15a5 2425 """Download Error exception.
d77c3dfd 2426
59ae15a5
PH
2427 This exception may be thrown by FileDownloader objects if they are not
2428 configured to continue on errors. They will contain the appropriate
2429 error message.
2430 """
5f6a1245 2431
8cc83b8d
FV
2432 def __init__(self, msg, exc_info=None):
2433 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2434 super(DownloadError, self).__init__(msg)
2435 self.exc_info = exc_info
d77c3dfd
FV
2436
2437
498f5606 2438class EntryNotInPlaylist(YoutubeDLError):
2439 """Entry not in playlist exception.
2440
2441 This exception will be thrown by YoutubeDL when a requested entry
2442 is not found in the playlist info_dict
2443 """
2444 pass
2445
2446
bf5b9d85 2447class SameFileError(YoutubeDLError):
59ae15a5 2448 """Same File exception.
d77c3dfd 2449
59ae15a5
PH
2450 This exception will be thrown by FileDownloader objects if they detect
2451 multiple files would have to be downloaded to the same file on disk.
2452 """
2453 pass
d77c3dfd
FV
2454
2455
bf5b9d85 2456class PostProcessingError(YoutubeDLError):
59ae15a5 2457 """Post Processing exception.
d77c3dfd 2458
59ae15a5
PH
2459 This exception may be raised by PostProcessor's .run() method to
2460 indicate an error in the postprocessing task.
2461 """
5f6a1245 2462
7851b379 2463 def __init__(self, msg):
bf5b9d85 2464 super(PostProcessingError, self).__init__(msg)
7851b379 2465 self.msg = msg
d77c3dfd 2466
5f6a1245 2467
8b0d7497 2468class ExistingVideoReached(YoutubeDLError):
2469 """ --max-downloads limit has been reached. """
2470 pass
2471
2472
2473class RejectedVideoReached(YoutubeDLError):
2474 """ --max-downloads limit has been reached. """
2475 pass
2476
2477
bf5b9d85 2478class MaxDownloadsReached(YoutubeDLError):
59ae15a5
PH
2479 """ --max-downloads limit has been reached. """
2480 pass
d77c3dfd
FV
2481
2482
bf5b9d85 2483class UnavailableVideoError(YoutubeDLError):
59ae15a5 2484 """Unavailable Format exception.
d77c3dfd 2485
59ae15a5
PH
2486 This exception will be thrown when a video is requested
2487 in a format that is not available for that video.
2488 """
2489 pass
d77c3dfd
FV
2490
2491
bf5b9d85 2492class ContentTooShortError(YoutubeDLError):
59ae15a5 2493 """Content Too Short exception.
d77c3dfd 2494
59ae15a5
PH
2495 This exception may be raised by FileDownloader objects when a file they
2496 download is too small for what the server announced first, indicating
2497 the connection was probably interrupted.
2498 """
d77c3dfd 2499
59ae15a5 2500 def __init__(self, downloaded, expected):
bf5b9d85
PM
2501 super(ContentTooShortError, self).__init__(
2502 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2503 )
2c7ed247 2504 # Both in bytes
59ae15a5
PH
2505 self.downloaded = downloaded
2506 self.expected = expected
d77c3dfd 2507
5f6a1245 2508
bf5b9d85 2509class XAttrMetadataError(YoutubeDLError):
efa97bdc
YCH
2510 def __init__(self, code=None, msg='Unknown error'):
2511 super(XAttrMetadataError, self).__init__(msg)
2512 self.code = code
bd264412 2513 self.msg = msg
efa97bdc
YCH
2514
2515 # Parsing code and msg
3089bc74 2516 if (self.code in (errno.ENOSPC, errno.EDQUOT)
a0566bbf 2517 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
efa97bdc
YCH
2518 self.reason = 'NO_SPACE'
2519 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2520 self.reason = 'VALUE_TOO_LONG'
2521 else:
2522 self.reason = 'NOT_SUPPORTED'
2523
2524
bf5b9d85 2525class XAttrUnavailableError(YoutubeDLError):
efa97bdc
YCH
2526 pass
2527
2528
c5a59d93 2529def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
2530 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2531 # expected HTTP responses to meet HTTP/1.0 or later (see also
067aa17e 2532 # https://github.com/ytdl-org/youtube-dl/issues/6727)
e5e78797 2533 if sys.version_info < (3, 0):
65220c3b
S
2534 kwargs['strict'] = True
2535 hc = http_class(*args, **compat_kwargs(kwargs))
be4a824d 2536 source_address = ydl_handler._params.get('source_address')
8959018a 2537
be4a824d 2538 if source_address is not None:
8959018a
AU
2539 # This is to workaround _create_connection() from socket where it will try all
2540 # address data from getaddrinfo() including IPv6. This filters the result from
2541 # getaddrinfo() based on the source_address value.
2542 # This is based on the cpython socket.create_connection() function.
2543 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2544 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2545 host, port = address
2546 err = None
2547 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
9e21e6d9
S
2548 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2549 ip_addrs = [addr for addr in addrs if addr[0] == af]
2550 if addrs and not ip_addrs:
2551 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2552 raise socket.error(
2553 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2554 % (ip_version, source_address[0]))
8959018a
AU
2555 for res in ip_addrs:
2556 af, socktype, proto, canonname, sa = res
2557 sock = None
2558 try:
2559 sock = socket.socket(af, socktype, proto)
2560 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2561 sock.settimeout(timeout)
2562 sock.bind(source_address)
2563 sock.connect(sa)
2564 err = None # Explicitly break reference cycle
2565 return sock
2566 except socket.error as _:
2567 err = _
2568 if sock is not None:
2569 sock.close()
2570 if err is not None:
2571 raise err
2572 else:
9e21e6d9
S
2573 raise socket.error('getaddrinfo returns an empty list')
2574 if hasattr(hc, '_create_connection'):
2575 hc._create_connection = _create_connection
be4a824d
PH
2576 sa = (source_address, 0)
2577 if hasattr(hc, 'source_address'): # Python 2.7+
2578 hc.source_address = sa
2579 else: # Python 2.6
2580 def _hc_connect(self, *args, **kwargs):
9e21e6d9 2581 sock = _create_connection(
be4a824d
PH
2582 (self.host, self.port), self.timeout, sa)
2583 if is_https:
d7932313
PH
2584 self.sock = ssl.wrap_socket(
2585 sock, self.key_file, self.cert_file,
2586 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
2587 else:
2588 self.sock = sock
2589 hc.connect = functools.partial(_hc_connect, hc)
2590
2591 return hc
2592
2593
87f0e62d 2594def handle_youtubedl_headers(headers):
992fc9d6
YCH
2595 filtered_headers = headers
2596
2597 if 'Youtubedl-no-compression' in filtered_headers:
2598 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 2599 del filtered_headers['Youtubedl-no-compression']
87f0e62d 2600
992fc9d6 2601 return filtered_headers
87f0e62d
YCH
2602
2603
acebc9cd 2604class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
2605 """Handler for HTTP requests and responses.
2606
2607 This class, when installed with an OpenerDirector, automatically adds
2608 the standard headers to every HTTP request and handles gzipped and
2609 deflated responses from web servers. If compression is to be avoided in
2610 a particular request, the original request in the program code only has
0424ec30 2611 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
2612 removed before making the real request.
2613
2614 Part of this code was copied from:
2615
2616 http://techknack.net/python-urllib2-handlers/
2617
2618 Andrew Rowls, the author of that code, agreed to release it to the
2619 public domain.
2620 """
2621
be4a824d
PH
2622 def __init__(self, params, *args, **kwargs):
2623 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2624 self._params = params
2625
2626 def http_open(self, req):
71aff188
YCH
2627 conn_class = compat_http_client.HTTPConnection
2628
2629 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2630 if socks_proxy:
2631 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2632 del req.headers['Ytdl-socks-proxy']
2633
be4a824d 2634 return self.do_open(functools.partial(
71aff188 2635 _create_http_connection, self, conn_class, False),
be4a824d
PH
2636 req)
2637
59ae15a5
PH
2638 @staticmethod
2639 def deflate(data):
fc2119f2 2640 if not data:
2641 return data
59ae15a5
PH
2642 try:
2643 return zlib.decompress(data, -zlib.MAX_WBITS)
2644 except zlib.error:
2645 return zlib.decompress(data)
2646
acebc9cd 2647 def http_request(self, req):
51f267d9
S
2648 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2649 # always respected by websites, some tend to give out URLs with non percent-encoded
2650 # non-ASCII characters (see telemb.py, ard.py [#3412])
2651 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2652 # To work around aforementioned issue we will replace request's original URL with
2653 # percent-encoded one
2654 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2655 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2656 url = req.get_full_url()
2657 url_escaped = escape_url(url)
2658
2659 # Substitute URL if any change after escaping
2660 if url != url_escaped:
15d260eb 2661 req = update_Request(req, url=url_escaped)
51f267d9 2662
33ac271b 2663 for h, v in std_headers.items():
3d5f7a39
JK
2664 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2665 # The dict keys are capitalized because of this bug by urllib
2666 if h.capitalize() not in req.headers:
33ac271b 2667 req.add_header(h, v)
87f0e62d
YCH
2668
2669 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
2670
2671 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2672 # Python 2.6 is brain-dead when it comes to fragments
2673 req._Request__original = req._Request__original.partition('#')[0]
2674 req._Request__r_type = req._Request__r_type.partition('#')[0]
2675
59ae15a5
PH
2676 return req
2677
acebc9cd 2678 def http_response(self, req, resp):
59ae15a5
PH
2679 old_resp = resp
2680 # gzip
2681 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
2682 content = resp.read()
2683 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2684 try:
2685 uncompressed = io.BytesIO(gz.read())
2686 except IOError as original_ioerror:
2687 # There may be junk add the end of the file
2688 # See http://stackoverflow.com/q/4928560/35070 for details
2689 for i in range(1, 1024):
2690 try:
2691 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2692 uncompressed = io.BytesIO(gz.read())
2693 except IOError:
2694 continue
2695 break
2696 else:
2697 raise original_ioerror
b407d853 2698 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2699 resp.msg = old_resp.msg
c047270c 2700 del resp.headers['Content-encoding']
59ae15a5
PH
2701 # deflate
2702 if resp.headers.get('Content-encoding', '') == 'deflate':
2703 gz = io.BytesIO(self.deflate(resp.read()))
b407d853 2704 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2705 resp.msg = old_resp.msg
c047270c 2706 del resp.headers['Content-encoding']
ad729172 2707 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
067aa17e 2708 # https://github.com/ytdl-org/youtube-dl/issues/6457).
5a4d9ddb
S
2709 if 300 <= resp.code < 400:
2710 location = resp.headers.get('Location')
2711 if location:
2712 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2713 if sys.version_info >= (3, 0):
2714 location = location.encode('iso-8859-1').decode('utf-8')
0ea59007
YCH
2715 else:
2716 location = location.decode('utf-8')
5a4d9ddb
S
2717 location_escaped = escape_url(location)
2718 if location != location_escaped:
2719 del resp.headers['Location']
9a4aec8b
YCH
2720 if sys.version_info < (3, 0):
2721 location_escaped = location_escaped.encode('utf-8')
5a4d9ddb 2722 resp.headers['Location'] = location_escaped
59ae15a5 2723 return resp
0f8d03f8 2724
acebc9cd
PH
2725 https_request = http_request
2726 https_response = http_response
bf50b038 2727
5de90176 2728
71aff188
YCH
2729def make_socks_conn_class(base_class, socks_proxy):
2730 assert issubclass(base_class, (
2731 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2732
2733 url_components = compat_urlparse.urlparse(socks_proxy)
2734 if url_components.scheme.lower() == 'socks5':
2735 socks_type = ProxyType.SOCKS5
2736 elif url_components.scheme.lower() in ('socks', 'socks4'):
2737 socks_type = ProxyType.SOCKS4
51fb4995
YCH
2738 elif url_components.scheme.lower() == 'socks4a':
2739 socks_type = ProxyType.SOCKS4A
71aff188 2740
cdd94c2e
YCH
2741 def unquote_if_non_empty(s):
2742 if not s:
2743 return s
2744 return compat_urllib_parse_unquote_plus(s)
2745
71aff188
YCH
2746 proxy_args = (
2747 socks_type,
2748 url_components.hostname, url_components.port or 1080,
2749 True, # Remote DNS
cdd94c2e
YCH
2750 unquote_if_non_empty(url_components.username),
2751 unquote_if_non_empty(url_components.password),
71aff188
YCH
2752 )
2753
2754 class SocksConnection(base_class):
2755 def connect(self):
2756 self.sock = sockssocket()
2757 self.sock.setproxy(*proxy_args)
2758 if type(self.timeout) in (int, float):
2759 self.sock.settimeout(self.timeout)
2760 self.sock.connect((self.host, self.port))
2761
2762 if isinstance(self, compat_http_client.HTTPSConnection):
2763 if hasattr(self, '_context'): # Python > 2.6
2764 self.sock = self._context.wrap_socket(
2765 self.sock, server_hostname=self.host)
2766 else:
2767 self.sock = ssl.wrap_socket(self.sock)
2768
2769 return SocksConnection
2770
2771
be4a824d
PH
2772class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2773 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2774 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2775 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2776 self._params = params
2777
2778 def https_open(self, req):
4f264c02 2779 kwargs = {}
71aff188
YCH
2780 conn_class = self._https_conn_class
2781
4f264c02
JMF
2782 if hasattr(self, '_context'): # python > 2.6
2783 kwargs['context'] = self._context
2784 if hasattr(self, '_check_hostname'): # python 3.x
2785 kwargs['check_hostname'] = self._check_hostname
71aff188
YCH
2786
2787 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2788 if socks_proxy:
2789 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2790 del req.headers['Ytdl-socks-proxy']
2791
be4a824d 2792 return self.do_open(functools.partial(
71aff188 2793 _create_http_connection, self, conn_class, True),
4f264c02 2794 req, **kwargs)
be4a824d
PH
2795
2796
1bab3437 2797class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
f1a8511f
S
2798 """
2799 See [1] for cookie file format.
2800
2801 1. https://curl.haxx.se/docs/http-cookies.html
2802 """
e7e62441 2803 _HTTPONLY_PREFIX = '#HttpOnly_'
c380cc28
S
2804 _ENTRY_LEN = 7
2805 _HEADER = '''# Netscape HTTP Cookie File
7a5c1cfe 2806# This file is generated by yt-dlp. Do not edit.
c380cc28
S
2807
2808'''
2809 _CookieFileEntry = collections.namedtuple(
2810 'CookieFileEntry',
2811 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
e7e62441 2812
1bab3437 2813 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
c380cc28
S
2814 """
2815 Save cookies to a file.
2816
2817 Most of the code is taken from CPython 3.8 and slightly adapted
2818 to support cookie files with UTF-8 in both python 2 and 3.
2819 """
2820 if filename is None:
2821 if self.filename is not None:
2822 filename = self.filename
2823 else:
2824 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2825
1bab3437
S
2826 # Store session cookies with `expires` set to 0 instead of an empty
2827 # string
2828 for cookie in self:
2829 if cookie.expires is None:
2830 cookie.expires = 0
c380cc28
S
2831
2832 with io.open(filename, 'w', encoding='utf-8') as f:
2833 f.write(self._HEADER)
2834 now = time.time()
2835 for cookie in self:
2836 if not ignore_discard and cookie.discard:
2837 continue
2838 if not ignore_expires and cookie.is_expired(now):
2839 continue
2840 if cookie.secure:
2841 secure = 'TRUE'
2842 else:
2843 secure = 'FALSE'
2844 if cookie.domain.startswith('.'):
2845 initial_dot = 'TRUE'
2846 else:
2847 initial_dot = 'FALSE'
2848 if cookie.expires is not None:
2849 expires = compat_str(cookie.expires)
2850 else:
2851 expires = ''
2852 if cookie.value is None:
2853 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2854 # with no name, whereas http.cookiejar regards it as a
2855 # cookie with no value.
2856 name = ''
2857 value = cookie.name
2858 else:
2859 name = cookie.name
2860 value = cookie.value
2861 f.write(
2862 '\t'.join([cookie.domain, initial_dot, cookie.path,
2863 secure, expires, name, value]) + '\n')
1bab3437
S
2864
2865 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
e7e62441 2866 """Load cookies from a file."""
2867 if filename is None:
2868 if self.filename is not None:
2869 filename = self.filename
2870 else:
2871 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2872
c380cc28
S
2873 def prepare_line(line):
2874 if line.startswith(self._HTTPONLY_PREFIX):
2875 line = line[len(self._HTTPONLY_PREFIX):]
2876 # comments and empty lines are fine
2877 if line.startswith('#') or not line.strip():
2878 return line
2879 cookie_list = line.split('\t')
2880 if len(cookie_list) != self._ENTRY_LEN:
2881 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2882 cookie = self._CookieFileEntry(*cookie_list)
2883 if cookie.expires_at and not cookie.expires_at.isdigit():
2884 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2885 return line
2886
e7e62441 2887 cf = io.StringIO()
c380cc28 2888 with io.open(filename, encoding='utf-8') as f:
e7e62441 2889 for line in f:
c380cc28
S
2890 try:
2891 cf.write(prepare_line(line))
2892 except compat_cookiejar.LoadError as e:
2893 write_string(
2894 'WARNING: skipping cookie file entry due to %s: %r\n'
2895 % (e, line), sys.stderr)
2896 continue
e7e62441 2897 cf.seek(0)
2898 self._really_load(cf, filename, ignore_discard, ignore_expires)
1bab3437
S
2899 # Session cookies are denoted by either `expires` field set to
2900 # an empty string or 0. MozillaCookieJar only recognizes the former
2901 # (see [1]). So we need force the latter to be recognized as session
2902 # cookies on our own.
2903 # Session cookies may be important for cookies-based authentication,
2904 # e.g. usually, when user does not check 'Remember me' check box while
2905 # logging in on a site, some important cookies are stored as session
2906 # cookies so that not recognizing them will result in failed login.
2907 # 1. https://bugs.python.org/issue17164
2908 for cookie in self:
2909 # Treat `expires=0` cookies as session cookies
2910 if cookie.expires == 0:
2911 cookie.expires = None
2912 cookie.discard = True
2913
2914
a6420bf5
S
2915class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2916 def __init__(self, cookiejar=None):
2917 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2918
2919 def http_response(self, request, response):
2920 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2921 # characters in Set-Cookie HTTP header of last response (see
067aa17e 2922 # https://github.com/ytdl-org/youtube-dl/issues/6769).
a6420bf5
S
2923 # In order to at least prevent crashing we will percent encode Set-Cookie
2924 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
2925 # if sys.version_info < (3, 0) and response.headers:
2926 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2927 # set_cookie = response.headers.get(set_cookie_header)
2928 # if set_cookie:
2929 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2930 # if set_cookie != set_cookie_escaped:
2931 # del response.headers[set_cookie_header]
2932 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
2933 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2934
f5fa042c 2935 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
a6420bf5
S
2936 https_response = http_response
2937
2938
fca6dba8 2939class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
201c1459 2940 """YoutubeDL redirect handler
2941
2942 The code is based on HTTPRedirectHandler implementation from CPython [1].
2943
2944 This redirect handler solves two issues:
2945 - ensures redirect URL is always unicode under python 2
2946 - introduces support for experimental HTTP response status code
2947 308 Permanent Redirect [2] used by some sites [3]
2948
2949 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
2950 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
2951 3. https://github.com/ytdl-org/youtube-dl/issues/28768
2952 """
2953
2954 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
2955
2956 def redirect_request(self, req, fp, code, msg, headers, newurl):
2957 """Return a Request or None in response to a redirect.
2958
2959 This is called by the http_error_30x methods when a
2960 redirection response is received. If a redirection should
2961 take place, return a new Request to allow http_error_30x to
2962 perform the redirect. Otherwise, raise HTTPError if no-one
2963 else should try to handle this url. Return None if you can't
2964 but another Handler might.
2965 """
2966 m = req.get_method()
2967 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
2968 or code in (301, 302, 303) and m == "POST")):
2969 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
2970 # Strictly (according to RFC 2616), 301 or 302 in response to
2971 # a POST MUST NOT cause a redirection without confirmation
2972 # from the user (of urllib.request, in this case). In practice,
2973 # essentially all clients do redirect in this case, so we do
2974 # the same.
2975
2976 # On python 2 urlh.geturl() may sometimes return redirect URL
2977 # as byte string instead of unicode. This workaround allows
2978 # to force it always return unicode.
2979 if sys.version_info[0] < 3:
2980 newurl = compat_str(newurl)
2981
2982 # Be conciliant with URIs containing a space. This is mainly
2983 # redundant with the more complete encoding done in http_error_302(),
2984 # but it is kept for compatibility with other callers.
2985 newurl = newurl.replace(' ', '%20')
2986
2987 CONTENT_HEADERS = ("content-length", "content-type")
2988 # NB: don't use dict comprehension for python 2.6 compatibility
2989 newheaders = dict((k, v) for k, v in req.headers.items()
2990 if k.lower() not in CONTENT_HEADERS)
2991 return compat_urllib_request.Request(
2992 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
2993 unverifiable=True)
fca6dba8
S
2994
2995
46f59e89
S
2996def extract_timezone(date_str):
2997 m = re.search(
2998 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2999 date_str)
3000 if not m:
3001 timezone = datetime.timedelta()
3002 else:
3003 date_str = date_str[:-len(m.group('tz'))]
3004 if not m.group('sign'):
3005 timezone = datetime.timedelta()
3006 else:
3007 sign = 1 if m.group('sign') == '+' else -1
3008 timezone = datetime.timedelta(
3009 hours=sign * int(m.group('hours')),
3010 minutes=sign * int(m.group('minutes')))
3011 return timezone, date_str
3012
3013
08b38d54 3014def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
3015 """ Return a UNIX timestamp from the given date """
3016
3017 if date_str is None:
3018 return None
3019
52c3a6e4
S
3020 date_str = re.sub(r'\.[0-9]+', '', date_str)
3021
08b38d54 3022 if timezone is None:
46f59e89
S
3023 timezone, date_str = extract_timezone(date_str)
3024
52c3a6e4
S
3025 try:
3026 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3027 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3028 return calendar.timegm(dt.timetuple())
3029 except ValueError:
3030 pass
912b38b4
PH
3031
3032
46f59e89
S
3033def date_formats(day_first=True):
3034 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3035
3036
42bdd9d0 3037def unified_strdate(date_str, day_first=True):
bf50b038 3038 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
3039
3040 if date_str is None:
3041 return None
bf50b038 3042 upload_date = None
5f6a1245 3043 # Replace commas
026fcc04 3044 date_str = date_str.replace(',', ' ')
42bdd9d0 3045 # Remove AM/PM + timezone
9bb8e0a3 3046 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89 3047 _, date_str = extract_timezone(date_str)
42bdd9d0 3048
46f59e89 3049 for expression in date_formats(day_first):
bf50b038
JMF
3050 try:
3051 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 3052 except ValueError:
bf50b038 3053 pass
42393ce2
PH
3054 if upload_date is None:
3055 timetuple = email.utils.parsedate_tz(date_str)
3056 if timetuple:
c6b9cf05
S
3057 try:
3058 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3059 except ValueError:
3060 pass
6a750402
JMF
3061 if upload_date is not None:
3062 return compat_str(upload_date)
bf50b038 3063
5f6a1245 3064
46f59e89
S
3065def unified_timestamp(date_str, day_first=True):
3066 if date_str is None:
3067 return None
3068
2ae2ffda 3069 date_str = re.sub(r'[,|]', '', date_str)
46f59e89 3070
7dc2a74e 3071 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89
S
3072 timezone, date_str = extract_timezone(date_str)
3073
3074 # Remove AM/PM + timezone
3075 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3076
deef3195
S
3077 # Remove unrecognized timezones from ISO 8601 alike timestamps
3078 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3079 if m:
3080 date_str = date_str[:-len(m.group('tz'))]
3081
f226880c
PH
3082 # Python only supports microseconds, so remove nanoseconds
3083 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3084 if m:
3085 date_str = m.group(1)
3086
46f59e89
S
3087 for expression in date_formats(day_first):
3088 try:
7dc2a74e 3089 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
46f59e89
S
3090 return calendar.timegm(dt.timetuple())
3091 except ValueError:
3092 pass
3093 timetuple = email.utils.parsedate_tz(date_str)
3094 if timetuple:
7dc2a74e 3095 return calendar.timegm(timetuple) + pm_delta * 3600
46f59e89
S
3096
3097
28e614de 3098def determine_ext(url, default_ext='unknown_video'):
85750f89 3099 if url is None or '.' not in url:
f4776371 3100 return default_ext
9cb9a5df 3101 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
3102 if re.match(r'^[A-Za-z0-9]+$', guess):
3103 return guess
a7aaa398
S
3104 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3105 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 3106 return guess.rstrip('/')
73e79f2a 3107 else:
cbdbb766 3108 return default_ext
73e79f2a 3109
5f6a1245 3110
824fa511
S
3111def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3112 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
d4051a8e 3113
5f6a1245 3114
9e62f283 3115def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
37254abc
JMF
3116 """
3117 Return a datetime object from a string in the format YYYYMMDD or
9e62f283 3118 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3119
3120 format: string date format used to return datetime object from
3121 precision: round the time portion of a datetime object.
3122 auto|microsecond|second|minute|hour|day.
3123 auto: round to the unit provided in date_str (if applicable).
3124 """
3125 auto_precision = False
3126 if precision == 'auto':
3127 auto_precision = True
3128 precision = 'microsecond'
3129 today = datetime_round(datetime.datetime.now(), precision)
f8795e10 3130 if date_str in ('now', 'today'):
37254abc 3131 return today
f8795e10
PH
3132 if date_str == 'yesterday':
3133 return today - datetime.timedelta(days=1)
9e62f283 3134 match = re.match(
3135 r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3136 date_str)
37254abc 3137 if match is not None:
9e62f283 3138 start_time = datetime_from_str(match.group('start'), precision, format)
3139 time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
37254abc 3140 unit = match.group('unit')
9e62f283 3141 if unit == 'month' or unit == 'year':
3142 new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
37254abc 3143 unit = 'day'
9e62f283 3144 else:
3145 if unit == 'week':
3146 unit = 'day'
3147 time *= 7
3148 delta = datetime.timedelta(**{unit + 's': time})
3149 new_date = start_time + delta
3150 if auto_precision:
3151 return datetime_round(new_date, unit)
3152 return new_date
3153
3154 return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3155
3156
3157def date_from_str(date_str, format='%Y%m%d'):
3158 """
3159 Return a datetime object from a string in the format YYYYMMDD or
3160 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3161
3162 format: string date format used to return datetime object from
3163 """
3164 return datetime_from_str(date_str, precision='microsecond', format=format).date()
3165
3166
3167def datetime_add_months(dt, months):
3168 """Increment/Decrement a datetime object by months."""
3169 month = dt.month + months - 1
3170 year = dt.year + month // 12
3171 month = month % 12 + 1
3172 day = min(dt.day, calendar.monthrange(year, month)[1])
3173 return dt.replace(year, month, day)
3174
3175
3176def datetime_round(dt, precision='day'):
3177 """
3178 Round a datetime object's time to a specific precision
3179 """
3180 if precision == 'microsecond':
3181 return dt
3182
3183 unit_seconds = {
3184 'day': 86400,
3185 'hour': 3600,
3186 'minute': 60,
3187 'second': 1,
3188 }
3189 roundto = lambda x, n: ((x + n / 2) // n) * n
3190 timestamp = calendar.timegm(dt.timetuple())
3191 return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
5f6a1245
JW
3192
3193
e63fc1be 3194def hyphenate_date(date_str):
3195 """
3196 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3197 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3198 if match is not None:
3199 return '-'.join(match.groups())
3200 else:
3201 return date_str
3202
5f6a1245 3203
bd558525
JMF
3204class DateRange(object):
3205 """Represents a time interval between two dates"""
5f6a1245 3206
bd558525
JMF
3207 def __init__(self, start=None, end=None):
3208 """start and end must be strings in the format accepted by date"""
3209 if start is not None:
3210 self.start = date_from_str(start)
3211 else:
3212 self.start = datetime.datetime.min.date()
3213 if end is not None:
3214 self.end = date_from_str(end)
3215 else:
3216 self.end = datetime.datetime.max.date()
37254abc 3217 if self.start > self.end:
bd558525 3218 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 3219
bd558525
JMF
3220 @classmethod
3221 def day(cls, day):
3222 """Returns a range that only contains the given day"""
5f6a1245
JW
3223 return cls(day, day)
3224
bd558525
JMF
3225 def __contains__(self, date):
3226 """Check if the date is in the range"""
37254abc
JMF
3227 if not isinstance(date, datetime.date):
3228 date = date_from_str(date)
3229 return self.start <= date <= self.end
5f6a1245 3230
bd558525 3231 def __str__(self):
5f6a1245 3232 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
3233
3234
3235def platform_name():
3236 """ Returns the platform name as a compat_str """
3237 res = platform.platform()
3238 if isinstance(res, bytes):
3239 res = res.decode(preferredencoding())
3240
3241 assert isinstance(res, compat_str)
3242 return res
c257baff
PH
3243
3244
b58ddb32
PH
3245def _windows_write_string(s, out):
3246 """ Returns True if the string was written using special methods,
3247 False if it has yet to be written out."""
3248 # Adapted from http://stackoverflow.com/a/3259271/35070
3249
3250 import ctypes
3251 import ctypes.wintypes
3252
3253 WIN_OUTPUT_IDS = {
3254 1: -11,
3255 2: -12,
3256 }
3257
a383a98a
PH
3258 try:
3259 fileno = out.fileno()
3260 except AttributeError:
3261 # If the output stream doesn't have a fileno, it's virtual
3262 return False
aa42e873
PH
3263 except io.UnsupportedOperation:
3264 # Some strange Windows pseudo files?
3265 return False
b58ddb32
PH
3266 if fileno not in WIN_OUTPUT_IDS:
3267 return False
3268
d7cd9a9e 3269 GetStdHandle = compat_ctypes_WINFUNCTYPE(
b58ddb32 3270 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
d7cd9a9e 3271 ('GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
3272 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3273
d7cd9a9e 3274 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3275 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3276 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
d7cd9a9e 3277 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
3278 written = ctypes.wintypes.DWORD(0)
3279
d7cd9a9e 3280 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
3281 FILE_TYPE_CHAR = 0x0002
3282 FILE_TYPE_REMOTE = 0x8000
d7cd9a9e 3283 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3284 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3285 ctypes.POINTER(ctypes.wintypes.DWORD))(
d7cd9a9e 3286 ('GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
3287 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3288
3289 def not_a_console(handle):
3290 if handle == INVALID_HANDLE_VALUE or handle is None:
3291 return True
3089bc74
S
3292 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3293 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
3294
3295 if not_a_console(h):
3296 return False
3297
d1b9c912
PH
3298 def next_nonbmp_pos(s):
3299 try:
3300 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3301 except StopIteration:
3302 return len(s)
3303
3304 while s:
3305 count = min(next_nonbmp_pos(s), 1024)
3306
b58ddb32 3307 ret = WriteConsoleW(
d1b9c912 3308 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
3309 if ret == 0:
3310 raise OSError('Failed to write string')
d1b9c912
PH
3311 if not count: # We just wrote a non-BMP character
3312 assert written.value == 2
3313 s = s[1:]
3314 else:
3315 assert written.value > 0
3316 s = s[written.value:]
b58ddb32
PH
3317 return True
3318
3319
734f90bb 3320def write_string(s, out=None, encoding=None):
7459e3a2
PH
3321 if out is None:
3322 out = sys.stderr
8bf48f23 3323 assert type(s) == compat_str
7459e3a2 3324
b58ddb32
PH
3325 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3326 if _windows_write_string(s, out):
3327 return
3328
3089bc74
S
3329 if ('b' in getattr(out, 'mode', '')
3330 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
3331 byt = s.encode(encoding or preferredencoding(), 'ignore')
3332 out.write(byt)
3333 elif hasattr(out, 'buffer'):
3334 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3335 byt = s.encode(enc, 'ignore')
3336 out.buffer.write(byt)
3337 else:
8bf48f23 3338 out.write(s)
7459e3a2
PH
3339 out.flush()
3340
3341
48ea9cea
PH
3342def bytes_to_intlist(bs):
3343 if not bs:
3344 return []
3345 if isinstance(bs[0], int): # Python 3
3346 return list(bs)
3347 else:
3348 return [ord(c) for c in bs]
3349
c257baff 3350
cba892fa 3351def intlist_to_bytes(xs):
3352 if not xs:
3353 return b''
edaa23f8 3354 return compat_struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
3355
3356
c1c9a79c
PH
3357# Cross-platform file locking
3358if sys.platform == 'win32':
3359 import ctypes.wintypes
3360 import msvcrt
3361
3362 class OVERLAPPED(ctypes.Structure):
3363 _fields_ = [
3364 ('Internal', ctypes.wintypes.LPVOID),
3365 ('InternalHigh', ctypes.wintypes.LPVOID),
3366 ('Offset', ctypes.wintypes.DWORD),
3367 ('OffsetHigh', ctypes.wintypes.DWORD),
3368 ('hEvent', ctypes.wintypes.HANDLE),
3369 ]
3370
3371 kernel32 = ctypes.windll.kernel32
3372 LockFileEx = kernel32.LockFileEx
3373 LockFileEx.argtypes = [
3374 ctypes.wintypes.HANDLE, # hFile
3375 ctypes.wintypes.DWORD, # dwFlags
3376 ctypes.wintypes.DWORD, # dwReserved
3377 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3378 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3379 ctypes.POINTER(OVERLAPPED) # Overlapped
3380 ]
3381 LockFileEx.restype = ctypes.wintypes.BOOL
3382 UnlockFileEx = kernel32.UnlockFileEx
3383 UnlockFileEx.argtypes = [
3384 ctypes.wintypes.HANDLE, # hFile
3385 ctypes.wintypes.DWORD, # dwReserved
3386 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3387 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3388 ctypes.POINTER(OVERLAPPED) # Overlapped
3389 ]
3390 UnlockFileEx.restype = ctypes.wintypes.BOOL
3391 whole_low = 0xffffffff
3392 whole_high = 0x7fffffff
3393
3394 def _lock_file(f, exclusive):
3395 overlapped = OVERLAPPED()
3396 overlapped.Offset = 0
3397 overlapped.OffsetHigh = 0
3398 overlapped.hEvent = 0
3399 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3400 handle = msvcrt.get_osfhandle(f.fileno())
3401 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3402 whole_low, whole_high, f._lock_file_overlapped_p):
3403 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3404
3405 def _unlock_file(f):
3406 assert f._lock_file_overlapped_p
3407 handle = msvcrt.get_osfhandle(f.fileno())
3408 if not UnlockFileEx(handle, 0,
3409 whole_low, whole_high, f._lock_file_overlapped_p):
3410 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3411
3412else:
399a76e6
YCH
3413 # Some platforms, such as Jython, is missing fcntl
3414 try:
3415 import fcntl
c1c9a79c 3416
399a76e6
YCH
3417 def _lock_file(f, exclusive):
3418 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 3419
399a76e6
YCH
3420 def _unlock_file(f):
3421 fcntl.flock(f, fcntl.LOCK_UN)
3422 except ImportError:
3423 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3424
3425 def _lock_file(f, exclusive):
3426 raise IOError(UNSUPPORTED_MSG)
3427
3428 def _unlock_file(f):
3429 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
3430
3431
3432class locked_file(object):
3433 def __init__(self, filename, mode, encoding=None):
3434 assert mode in ['r', 'a', 'w']
3435 self.f = io.open(filename, mode, encoding=encoding)
3436 self.mode = mode
3437
3438 def __enter__(self):
3439 exclusive = self.mode != 'r'
3440 try:
3441 _lock_file(self.f, exclusive)
3442 except IOError:
3443 self.f.close()
3444 raise
3445 return self
3446
3447 def __exit__(self, etype, value, traceback):
3448 try:
3449 _unlock_file(self.f)
3450 finally:
3451 self.f.close()
3452
3453 def __iter__(self):
3454 return iter(self.f)
3455
3456 def write(self, *args):
3457 return self.f.write(*args)
3458
3459 def read(self, *args):
3460 return self.f.read(*args)
4eb7f1d1
JMF
3461
3462
4644ac55
S
3463def get_filesystem_encoding():
3464 encoding = sys.getfilesystemencoding()
3465 return encoding if encoding is not None else 'utf-8'
3466
3467
4eb7f1d1 3468def shell_quote(args):
a6a173c2 3469 quoted_args = []
4644ac55 3470 encoding = get_filesystem_encoding()
a6a173c2
JMF
3471 for a in args:
3472 if isinstance(a, bytes):
3473 # We may get a filename encoded with 'encodeFilename'
3474 a = a.decode(encoding)
aefce8e6 3475 quoted_args.append(compat_shlex_quote(a))
28e614de 3476 return ' '.join(quoted_args)
9d4660ca
PH
3477
3478
3479def smuggle_url(url, data):
3480 """ Pass additional data in a URL for internal use. """
3481
81953d1a
RA
3482 url, idata = unsmuggle_url(url, {})
3483 data.update(idata)
15707c7e 3484 sdata = compat_urllib_parse_urlencode(
28e614de
PH
3485 {'__youtubedl_smuggle': json.dumps(data)})
3486 return url + '#' + sdata
9d4660ca
PH
3487
3488
79f82953 3489def unsmuggle_url(smug_url, default=None):
83e865a3 3490 if '#__youtubedl_smuggle' not in smug_url:
79f82953 3491 return smug_url, default
28e614de
PH
3492 url, _, sdata = smug_url.rpartition('#')
3493 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
3494 data = json.loads(jsond)
3495 return url, data
02dbf93f
PH
3496
3497
02dbf93f
PH
3498def format_bytes(bytes):
3499 if bytes is None:
28e614de 3500 return 'N/A'
02dbf93f
PH
3501 if type(bytes) is str:
3502 bytes = float(bytes)
3503 if bytes == 0.0:
3504 exponent = 0
3505 else:
3506 exponent = int(math.log(bytes, 1024.0))
28e614de 3507 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 3508 converted = float(bytes) / float(1024 ** exponent)
28e614de 3509 return '%.2f%s' % (converted, suffix)
f53c966a 3510
1c088fa8 3511
fb47597b
S
3512def lookup_unit_table(unit_table, s):
3513 units_re = '|'.join(re.escape(u) for u in unit_table)
3514 m = re.match(
782b1b5b 3515 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
fb47597b
S
3516 if not m:
3517 return None
3518 num_str = m.group('num').replace(',', '.')
3519 mult = unit_table[m.group('unit')]
3520 return int(float(num_str) * mult)
3521
3522
be64b5b0
PH
3523def parse_filesize(s):
3524 if s is None:
3525 return None
3526
dfb1b146 3527 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
3528 # but we support those too
3529 _UNIT_TABLE = {
3530 'B': 1,
3531 'b': 1,
70852b47 3532 'bytes': 1,
be64b5b0
PH
3533 'KiB': 1024,
3534 'KB': 1000,
3535 'kB': 1024,
3536 'Kb': 1000,
13585d76 3537 'kb': 1000,
70852b47
YCH
3538 'kilobytes': 1000,
3539 'kibibytes': 1024,
be64b5b0
PH
3540 'MiB': 1024 ** 2,
3541 'MB': 1000 ** 2,
3542 'mB': 1024 ** 2,
3543 'Mb': 1000 ** 2,
13585d76 3544 'mb': 1000 ** 2,
70852b47
YCH
3545 'megabytes': 1000 ** 2,
3546 'mebibytes': 1024 ** 2,
be64b5b0
PH
3547 'GiB': 1024 ** 3,
3548 'GB': 1000 ** 3,
3549 'gB': 1024 ** 3,
3550 'Gb': 1000 ** 3,
13585d76 3551 'gb': 1000 ** 3,
70852b47
YCH
3552 'gigabytes': 1000 ** 3,
3553 'gibibytes': 1024 ** 3,
be64b5b0
PH
3554 'TiB': 1024 ** 4,
3555 'TB': 1000 ** 4,
3556 'tB': 1024 ** 4,
3557 'Tb': 1000 ** 4,
13585d76 3558 'tb': 1000 ** 4,
70852b47
YCH
3559 'terabytes': 1000 ** 4,
3560 'tebibytes': 1024 ** 4,
be64b5b0
PH
3561 'PiB': 1024 ** 5,
3562 'PB': 1000 ** 5,
3563 'pB': 1024 ** 5,
3564 'Pb': 1000 ** 5,
13585d76 3565 'pb': 1000 ** 5,
70852b47
YCH
3566 'petabytes': 1000 ** 5,
3567 'pebibytes': 1024 ** 5,
be64b5b0
PH
3568 'EiB': 1024 ** 6,
3569 'EB': 1000 ** 6,
3570 'eB': 1024 ** 6,
3571 'Eb': 1000 ** 6,
13585d76 3572 'eb': 1000 ** 6,
70852b47
YCH
3573 'exabytes': 1000 ** 6,
3574 'exbibytes': 1024 ** 6,
be64b5b0
PH
3575 'ZiB': 1024 ** 7,
3576 'ZB': 1000 ** 7,
3577 'zB': 1024 ** 7,
3578 'Zb': 1000 ** 7,
13585d76 3579 'zb': 1000 ** 7,
70852b47
YCH
3580 'zettabytes': 1000 ** 7,
3581 'zebibytes': 1024 ** 7,
be64b5b0
PH
3582 'YiB': 1024 ** 8,
3583 'YB': 1000 ** 8,
3584 'yB': 1024 ** 8,
3585 'Yb': 1000 ** 8,
13585d76 3586 'yb': 1000 ** 8,
70852b47
YCH
3587 'yottabytes': 1000 ** 8,
3588 'yobibytes': 1024 ** 8,
be64b5b0
PH
3589 }
3590
fb47597b
S
3591 return lookup_unit_table(_UNIT_TABLE, s)
3592
3593
3594def parse_count(s):
3595 if s is None:
be64b5b0
PH
3596 return None
3597
fb47597b
S
3598 s = s.strip()
3599
3600 if re.match(r'^[\d,.]+$', s):
3601 return str_to_int(s)
3602
3603 _UNIT_TABLE = {
3604 'k': 1000,
3605 'K': 1000,
3606 'm': 1000 ** 2,
3607 'M': 1000 ** 2,
3608 'kk': 1000 ** 2,
3609 'KK': 1000 ** 2,
3610 }
be64b5b0 3611
fb47597b 3612 return lookup_unit_table(_UNIT_TABLE, s)
be64b5b0 3613
2f7ae819 3614
b871d7e9
S
3615def parse_resolution(s):
3616 if s is None:
3617 return {}
3618
3619 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3620 if mobj:
3621 return {
3622 'width': int(mobj.group('w')),
3623 'height': int(mobj.group('h')),
3624 }
3625
3626 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3627 if mobj:
3628 return {'height': int(mobj.group(1))}
3629
3630 mobj = re.search(r'\b([48])[kK]\b', s)
3631 if mobj:
3632 return {'height': int(mobj.group(1)) * 540}
3633
3634 return {}
3635
3636
0dc41787
S
3637def parse_bitrate(s):
3638 if not isinstance(s, compat_str):
3639 return
3640 mobj = re.search(r'\b(\d+)\s*kbps', s)
3641 if mobj:
3642 return int(mobj.group(1))
3643
3644
a942d6cb 3645def month_by_name(name, lang='en'):
caefb1de
PH
3646 """ Return the number of a month by (locale-independently) English name """
3647
f6717dec 3648 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb 3649
caefb1de 3650 try:
f6717dec 3651 return month_names.index(name) + 1
7105440c
YCH
3652 except ValueError:
3653 return None
3654
3655
3656def month_by_abbreviation(abbrev):
3657 """ Return the number of a month by (locale-independently) English
3658 abbreviations """
3659
3660 try:
3661 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
3662 except ValueError:
3663 return None
18258362
JMF
3664
3665
5aafe895 3666def fix_xml_ampersands(xml_str):
18258362 3667 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
3668 return re.sub(
3669 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 3670 '&amp;',
5aafe895 3671 xml_str)
e3946f98
PH
3672
3673
3674def setproctitle(title):
8bf48f23 3675 assert isinstance(title, compat_str)
c1c05c67
YCH
3676
3677 # ctypes in Jython is not complete
3678 # http://bugs.jython.org/issue2148
3679 if sys.platform.startswith('java'):
3680 return
3681
e3946f98 3682 try:
611c1dd9 3683 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
3684 except OSError:
3685 return
2f49bcd6
RC
3686 except TypeError:
3687 # LoadLibrary in Windows Python 2.7.13 only expects
3688 # a bytestring, but since unicode_literals turns
3689 # every string into a unicode string, it fails.
3690 return
6eefe533
PH
3691 title_bytes = title.encode('utf-8')
3692 buf = ctypes.create_string_buffer(len(title_bytes))
3693 buf.value = title_bytes
e3946f98 3694 try:
6eefe533 3695 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
3696 except AttributeError:
3697 return # Strange libc, just skip this
d7dda168
PH
3698
3699
3700def remove_start(s, start):
46bc9b7d 3701 return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174
PH
3702
3703
2b9faf55 3704def remove_end(s, end):
46bc9b7d 3705 return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55
PH
3706
3707
31b2051e
S
3708def remove_quotes(s):
3709 if s is None or len(s) < 2:
3710 return s
3711 for quote in ('"', "'", ):
3712 if s[0] == quote and s[-1] == quote:
3713 return s[1:-1]
3714 return s
3715
3716
b6e0c7d2
U
3717def get_domain(url):
3718 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3719 return domain.group('domain') if domain else None
3720
3721
29eb5174 3722def url_basename(url):
9b8aaeed 3723 path = compat_urlparse.urlparse(url).path
28e614de 3724 return path.strip('/').split('/')[-1]
aa94a6d3
PH
3725
3726
02dc0a36
S
3727def base_url(url):
3728 return re.match(r'https?://[^?#&]+/', url).group()
3729
3730
e34c3361 3731def urljoin(base, path):
4b5de77b
S
3732 if isinstance(path, bytes):
3733 path = path.decode('utf-8')
e34c3361
S
3734 if not isinstance(path, compat_str) or not path:
3735 return None
fad4ceb5 3736 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
e34c3361 3737 return path
4b5de77b
S
3738 if isinstance(base, bytes):
3739 base = base.decode('utf-8')
3740 if not isinstance(base, compat_str) or not re.match(
3741 r'^(?:https?:)?//', base):
e34c3361
S
3742 return None
3743 return compat_urlparse.urljoin(base, path)
3744
3745
aa94a6d3
PH
3746class HEADRequest(compat_urllib_request.Request):
3747 def get_method(self):
611c1dd9 3748 return 'HEAD'
7217e148
PH
3749
3750
95cf60e8
S
3751class PUTRequest(compat_urllib_request.Request):
3752 def get_method(self):
3753 return 'PUT'
3754
3755
9732d77e 3756def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
3757 if get_attr:
3758 if v is not None:
3759 v = getattr(v, get_attr, None)
9572013d
PH
3760 if v == '':
3761 v = None
1812afb7
S
3762 if v is None:
3763 return default
3764 try:
3765 return int(v) * invscale // scale
5e1271c5 3766 except (ValueError, TypeError):
af98f8ff 3767 return default
9732d77e 3768
9572013d 3769
40a90862
JMF
3770def str_or_none(v, default=None):
3771 return default if v is None else compat_str(v)
3772
9732d77e
PH
3773
3774def str_to_int(int_str):
48d4681e 3775 """ A more relaxed version of int_or_none """
42db58ec 3776 if isinstance(int_str, compat_integer_types):
348c6bf1 3777 return int_str
42db58ec
S
3778 elif isinstance(int_str, compat_str):
3779 int_str = re.sub(r'[,\.\+]', '', int_str)
3780 return int_or_none(int_str)
608d11f5
PH
3781
3782
9732d77e 3783def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
3784 if v is None:
3785 return default
3786 try:
3787 return float(v) * invscale / scale
5e1271c5 3788 except (ValueError, TypeError):
caf80631 3789 return default
43f775e4
PH
3790
3791
c7e327c4
S
3792def bool_or_none(v, default=None):
3793 return v if isinstance(v, bool) else default
3794
3795
53cd37ba
S
3796def strip_or_none(v, default=None):
3797 return v.strip() if isinstance(v, compat_str) else default
b72b4431
S
3798
3799
af03000a
S
3800def url_or_none(url):
3801 if not url or not isinstance(url, compat_str):
3802 return None
3803 url = url.strip()
29f7c58a 3804 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
af03000a
S
3805
3806
e29663c6 3807def strftime_or_none(timestamp, date_format, default=None):
3808 datetime_object = None
3809 try:
3810 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3811 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3812 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3813 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3814 return datetime_object.strftime(date_format)
3815 except (ValueError, TypeError, AttributeError):
3816 return default
3817
3818
608d11f5 3819def parse_duration(s):
8f9312c3 3820 if not isinstance(s, compat_basestring):
608d11f5
PH
3821 return None
3822
ca7b3246
S
3823 s = s.strip()
3824
acaff495 3825 days, hours, mins, secs, ms = [None] * 5
15846398 3826 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
acaff495 3827 if m:
3828 days, hours, mins, secs, ms = m.groups()
3829 else:
3830 m = re.match(
056653bb
S
3831 r'''(?ix)(?:P?
3832 (?:
3833 [0-9]+\s*y(?:ears?)?\s*
3834 )?
3835 (?:
3836 [0-9]+\s*m(?:onths?)?\s*
3837 )?
3838 (?:
3839 [0-9]+\s*w(?:eeks?)?\s*
3840 )?
8f4b58d7 3841 (?:
acaff495 3842 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
8f4b58d7 3843 )?
056653bb 3844 T)?
acaff495 3845 (?:
3846 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3847 )?
3848 (?:
3849 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3850 )?
3851 (?:
3852 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
15846398 3853 )?Z?$''', s)
acaff495 3854 if m:
3855 days, hours, mins, secs, ms = m.groups()
3856 else:
15846398 3857 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
acaff495 3858 if m:
3859 hours, mins = m.groups()
3860 else:
3861 return None
3862
3863 duration = 0
3864 if secs:
3865 duration += float(secs)
3866 if mins:
3867 duration += float(mins) * 60
3868 if hours:
3869 duration += float(hours) * 60 * 60
3870 if days:
3871 duration += float(days) * 24 * 60 * 60
3872 if ms:
3873 duration += float(ms)
3874 return duration
91d7d0b3
JMF
3875
3876
e65e4c88 3877def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 3878 name, real_ext = os.path.splitext(filename)
e65e4c88
S
3879 return (
3880 '{0}.{1}{2}'.format(name, ext, real_ext)
3881 if not expected_real_ext or real_ext[1:] == expected_real_ext
3882 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
3883
3884
b3ed15b7
S
3885def replace_extension(filename, ext, expected_real_ext=None):
3886 name, real_ext = os.path.splitext(filename)
3887 return '{0}.{1}'.format(
3888 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3889 ext)
3890
3891
d70ad093
PH
3892def check_executable(exe, args=[]):
3893 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3894 args can be a list of arguments for a short output (like -version) """
3895 try:
f5b1bca9 3896 process_communicate_or_kill(subprocess.Popen(
3897 [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
d70ad093
PH
3898 except OSError:
3899 return False
3900 return exe
b7ab0590
PH
3901
3902
95807118 3903def get_exe_version(exe, args=['--version'],
cae97f65 3904 version_re=None, unrecognized='present'):
95807118
PH
3905 """ Returns the version of the specified executable,
3906 or False if the executable is not present """
3907 try:
b64d04c1 3908 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
7a5c1cfe 3909 # SIGTTOU if yt-dlp is run in the background.
067aa17e 3910 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
f5b1bca9 3911 out, _ = process_communicate_or_kill(subprocess.Popen(
54116803 3912 [encodeArgument(exe)] + args,
00ca7552 3913 stdin=subprocess.PIPE,
f5b1bca9 3914 stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
95807118
PH
3915 except OSError:
3916 return False
cae97f65
PH
3917 if isinstance(out, bytes): # Python 2.x
3918 out = out.decode('ascii', 'ignore')
3919 return detect_exe_version(out, version_re, unrecognized)
3920
3921
3922def detect_exe_version(output, version_re=None, unrecognized='present'):
3923 assert isinstance(output, compat_str)
3924 if version_re is None:
3925 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3926 m = re.search(version_re, output)
95807118
PH
3927 if m:
3928 return m.group(1)
3929 else:
3930 return unrecognized
3931
3932
b7ab0590 3933class PagedList(object):
dd26ced1
PH
3934 def __len__(self):
3935 # This is only useful for tests
3936 return len(self.getslice())
3937
9c44d242
PH
3938
3939class OnDemandPagedList(PagedList):
6be08ce6 3940 def __init__(self, pagefunc, pagesize, use_cache=True):
9c44d242
PH
3941 self._pagefunc = pagefunc
3942 self._pagesize = pagesize
b95dc034
YCH
3943 self._use_cache = use_cache
3944 if use_cache:
3945 self._cache = {}
9c44d242 3946
b7ab0590
PH
3947 def getslice(self, start=0, end=None):
3948 res = []
3949 for pagenum in itertools.count(start // self._pagesize):
3950 firstid = pagenum * self._pagesize
3951 nextfirstid = pagenum * self._pagesize + self._pagesize
3952 if start >= nextfirstid:
3953 continue
3954
b95dc034
YCH
3955 page_results = None
3956 if self._use_cache:
3957 page_results = self._cache.get(pagenum)
3958 if page_results is None:
3959 page_results = list(self._pagefunc(pagenum))
3960 if self._use_cache:
3961 self._cache[pagenum] = page_results
b7ab0590
PH
3962
3963 startv = (
3964 start % self._pagesize
3965 if firstid <= start < nextfirstid
3966 else 0)
3967
3968 endv = (
3969 ((end - 1) % self._pagesize) + 1
3970 if (end is not None and firstid <= end <= nextfirstid)
3971 else None)
3972
3973 if startv != 0 or endv is not None:
3974 page_results = page_results[startv:endv]
3975 res.extend(page_results)
3976
3977 # A little optimization - if current page is not "full", ie. does
3978 # not contain page_size videos then we can assume that this page
3979 # is the last one - there are no more ids on further pages -
3980 # i.e. no need to query again.
3981 if len(page_results) + startv < self._pagesize:
3982 break
3983
3984 # If we got the whole page, but the next page is not interesting,
3985 # break out early as well
3986 if end == nextfirstid:
3987 break
3988 return res
81c2f20b
PH
3989
3990
9c44d242
PH
3991class InAdvancePagedList(PagedList):
3992 def __init__(self, pagefunc, pagecount, pagesize):
3993 self._pagefunc = pagefunc
3994 self._pagecount = pagecount
3995 self._pagesize = pagesize
3996
3997 def getslice(self, start=0, end=None):
3998 res = []
3999 start_page = start // self._pagesize
4000 end_page = (
4001 self._pagecount if end is None else (end // self._pagesize + 1))
4002 skip_elems = start - start_page * self._pagesize
4003 only_more = None if end is None else end - start
4004 for pagenum in range(start_page, end_page):
4005 page = list(self._pagefunc(pagenum))
4006 if skip_elems:
4007 page = page[skip_elems:]
4008 skip_elems = None
4009 if only_more is not None:
4010 if len(page) < only_more:
4011 only_more -= len(page)
4012 else:
4013 page = page[:only_more]
4014 res.extend(page)
4015 break
4016 res.extend(page)
4017 return res
4018
4019
81c2f20b 4020def uppercase_escape(s):
676eb3f2 4021 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 4022 return re.sub(
a612753d 4023 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
4024 lambda m: unicode_escape(m.group(0))[0],
4025 s)
0fe2ff78
YCH
4026
4027
4028def lowercase_escape(s):
4029 unicode_escape = codecs.getdecoder('unicode_escape')
4030 return re.sub(
4031 r'\\u[0-9a-fA-F]{4}',
4032 lambda m: unicode_escape(m.group(0))[0],
4033 s)
b53466e1 4034
d05cfe06
S
4035
4036def escape_rfc3986(s):
4037 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 4038 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 4039 s = s.encode('utf-8')
ecc0c5ee 4040 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
4041
4042
4043def escape_url(url):
4044 """Escape URL as suggested by RFC 3986"""
4045 url_parsed = compat_urllib_parse_urlparse(url)
4046 return url_parsed._replace(
efbed08d 4047 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
d05cfe06
S
4048 path=escape_rfc3986(url_parsed.path),
4049 params=escape_rfc3986(url_parsed.params),
4050 query=escape_rfc3986(url_parsed.query),
4051 fragment=escape_rfc3986(url_parsed.fragment)
4052 ).geturl()
4053
62e609ab
PH
4054
4055def read_batch_urls(batch_fd):
4056 def fixup(url):
4057 if not isinstance(url, compat_str):
4058 url = url.decode('utf-8', 'replace')
8c04f0be 4059 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4060 for bom in BOM_UTF8:
4061 if url.startswith(bom):
4062 url = url[len(bom):]
4063 url = url.lstrip()
4064 if not url or url.startswith(('#', ';', ']')):
62e609ab 4065 return False
8c04f0be 4066 # "#" cannot be stripped out since it is part of the URI
4067 # However, it can be safely stipped out if follwing a whitespace
4068 return re.split(r'\s#', url, 1)[0].rstrip()
62e609ab
PH
4069
4070 with contextlib.closing(batch_fd) as fd:
4071 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
4072
4073
4074def urlencode_postdata(*args, **kargs):
15707c7e 4075 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
4076
4077
38f9ef31 4078def update_url_query(url, query):
cacd9966
YCH
4079 if not query:
4080 return url
38f9ef31 4081 parsed_url = compat_urlparse.urlparse(url)
4082 qs = compat_parse_qs(parsed_url.query)
4083 qs.update(query)
4084 return compat_urlparse.urlunparse(parsed_url._replace(
15707c7e 4085 query=compat_urllib_parse_urlencode(qs, True)))
16392824 4086
8e60dc75 4087
ed0291d1
S
4088def update_Request(req, url=None, data=None, headers={}, query={}):
4089 req_headers = req.headers.copy()
4090 req_headers.update(headers)
4091 req_data = data or req.data
4092 req_url = update_url_query(url or req.get_full_url(), query)
95cf60e8
S
4093 req_get_method = req.get_method()
4094 if req_get_method == 'HEAD':
4095 req_type = HEADRequest
4096 elif req_get_method == 'PUT':
4097 req_type = PUTRequest
4098 else:
4099 req_type = compat_urllib_request.Request
ed0291d1
S
4100 new_req = req_type(
4101 req_url, data=req_data, headers=req_headers,
4102 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4103 if hasattr(req, 'timeout'):
4104 new_req.timeout = req.timeout
4105 return new_req
4106
4107
10c87c15 4108def _multipart_encode_impl(data, boundary):
0c265486
YCH
4109 content_type = 'multipart/form-data; boundary=%s' % boundary
4110
4111 out = b''
4112 for k, v in data.items():
4113 out += b'--' + boundary.encode('ascii') + b'\r\n'
4114 if isinstance(k, compat_str):
4115 k = k.encode('utf-8')
4116 if isinstance(v, compat_str):
4117 v = v.encode('utf-8')
4118 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4119 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
b2ad479d 4120 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
0c265486
YCH
4121 if boundary.encode('ascii') in content:
4122 raise ValueError('Boundary overlaps with data')
4123 out += content
4124
4125 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4126
4127 return out, content_type
4128
4129
4130def multipart_encode(data, boundary=None):
4131 '''
4132 Encode a dict to RFC 7578-compliant form-data
4133
4134 data:
4135 A dict where keys and values can be either Unicode or bytes-like
4136 objects.
4137 boundary:
4138 If specified a Unicode object, it's used as the boundary. Otherwise
4139 a random boundary is generated.
4140
4141 Reference: https://tools.ietf.org/html/rfc7578
4142 '''
4143 has_specified_boundary = boundary is not None
4144
4145 while True:
4146 if boundary is None:
4147 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4148
4149 try:
10c87c15 4150 out, content_type = _multipart_encode_impl(data, boundary)
0c265486
YCH
4151 break
4152 except ValueError:
4153 if has_specified_boundary:
4154 raise
4155 boundary = None
4156
4157 return out, content_type
4158
4159
86296ad2 4160def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
4161 if isinstance(key_or_keys, (list, tuple)):
4162 for key in key_or_keys:
86296ad2
S
4163 if key not in d or d[key] is None or skip_false_values and not d[key]:
4164 continue
4165 return d[key]
cbecc9b9
S
4166 return default
4167 return d.get(key_or_keys, default)
4168
4169
329ca3be 4170def try_get(src, getter, expected_type=None):
a32a9a7e
S
4171 if not isinstance(getter, (list, tuple)):
4172 getter = [getter]
4173 for get in getter:
4174 try:
4175 v = get(src)
4176 except (AttributeError, KeyError, TypeError, IndexError):
4177 pass
4178 else:
4179 if expected_type is None or isinstance(v, expected_type):
4180 return v
329ca3be
S
4181
4182
6cc62232
S
4183def merge_dicts(*dicts):
4184 merged = {}
4185 for a_dict in dicts:
4186 for k, v in a_dict.items():
4187 if v is None:
4188 continue
3089bc74
S
4189 if (k not in merged
4190 or (isinstance(v, compat_str) and v
4191 and isinstance(merged[k], compat_str)
4192 and not merged[k])):
6cc62232
S
4193 merged[k] = v
4194 return merged
4195
4196
8e60dc75
S
4197def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4198 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4199
16392824 4200
a1a530b0
PH
4201US_RATINGS = {
4202 'G': 0,
4203 'PG': 10,
4204 'PG-13': 13,
4205 'R': 16,
4206 'NC': 18,
4207}
fac55558
PH
4208
4209
a8795327 4210TV_PARENTAL_GUIDELINES = {
5a16c9d9
RA
4211 'TV-Y': 0,
4212 'TV-Y7': 7,
4213 'TV-G': 0,
4214 'TV-PG': 0,
4215 'TV-14': 14,
4216 'TV-MA': 17,
a8795327
S
4217}
4218
4219
146c80e2 4220def parse_age_limit(s):
a8795327
S
4221 if type(s) == int:
4222 return s if 0 <= s <= 21 else None
4223 if not isinstance(s, compat_basestring):
d838b1bd 4224 return None
146c80e2 4225 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327
S
4226 if m:
4227 return int(m.group('age'))
5c5fae6d 4228 s = s.upper()
a8795327
S
4229 if s in US_RATINGS:
4230 return US_RATINGS[s]
5a16c9d9 4231 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
b8361187 4232 if m:
5a16c9d9 4233 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
b8361187 4234 return None
146c80e2
S
4235
4236
fac55558 4237def strip_jsonp(code):
609a61e3 4238 return re.sub(
5552c9eb 4239 r'''(?sx)^
e9c671d5 4240 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
5552c9eb
YCH
4241 (?:\s*&&\s*(?P=func_name))?
4242 \s*\(\s*(?P<callback_data>.*)\);?
4243 \s*?(?://[^\n]*)*$''',
4244 r'\g<callback_data>', code)
478c2c61
PH
4245
4246
5c610515 4247def js_to_json(code, vars={}):
4248 # vars is a dict of var, val pairs to substitute
4195096e
S
4249 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4250 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4251 INTEGER_TABLE = (
4252 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4253 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4254 )
4255
e05f6939 4256 def fix_kv(m):
e7b6d122
PH
4257 v = m.group(0)
4258 if v in ('true', 'false', 'null'):
4259 return v
8bdd16b4 4260 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
bd1e4844 4261 return ""
4262
4263 if v[0] in ("'", '"'):
4264 v = re.sub(r'(?s)\\.|"', lambda m: {
e7b6d122 4265 '"': '\\"',
bd1e4844 4266 "\\'": "'",
4267 '\\\n': '',
4268 '\\x': '\\u00',
4269 }.get(m.group(0), m.group(0)), v[1:-1])
8bdd16b4 4270 else:
4271 for regex, base in INTEGER_TABLE:
4272 im = re.match(regex, v)
4273 if im:
4274 i = int(im.group(1), base)
4275 return '"%d":' % i if v.endswith(':') else '%d' % i
89ac4a19 4276
5c610515 4277 if v in vars:
4278 return vars[v]
4279
e7b6d122 4280 return '"%s"' % v
e05f6939 4281
bd1e4844 4282 return re.sub(r'''(?sx)
4283 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4284 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4195096e 4285 {comment}|,(?={skip}[\]}}])|
c384d537 4286 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4195096e 4287 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
8bdd16b4 4288 [0-9]+(?={skip}:)|
4289 !+
4195096e 4290 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
e05f6939
PH
4291
4292
478c2c61
PH
4293def qualities(quality_ids):
4294 """ Get a numeric quality value out of a list of possible values """
4295 def q(qid):
4296 try:
4297 return quality_ids.index(qid)
4298 except ValueError:
4299 return -1
4300 return q
4301
acd69589 4302
de6000d9 4303DEFAULT_OUTTMPL = {
4304 'default': '%(title)s [%(id)s].%(ext)s',
72755351 4305 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
de6000d9 4306}
4307OUTTMPL_TYPES = {
72755351 4308 'chapter': None,
de6000d9 4309 'subtitle': None,
4310 'thumbnail': None,
4311 'description': 'description',
4312 'annotation': 'annotations.xml',
4313 'infojson': 'info.json',
5112f26a 4314 'pl_thumbnail': None,
de6000d9 4315 'pl_description': 'description',
4316 'pl_infojson': 'info.json',
4317}
0a871f68 4318
143db31d 4319# As of [1] format syntax is:
4320# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4321# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
4322FORMAT_RE = r'''(?x)
4323 (?<!%)
4324 %
4325 \({0}\) # mapping key
4326 (?:[#0\-+ ]+)? # conversion flags (optional)
4327 (?:\d+)? # minimum field width (optional)
4328 (?:\.\d+)? # precision (optional)
4329 [hlL]? # length modifier (optional)
4330 (?P<type>[diouxXeEfFgGcrs%]) # conversion type
4331'''
4332
a020a0dc
PH
4333
4334def limit_length(s, length):
4335 """ Add ellipses to overly long strings """
4336 if s is None:
4337 return None
4338 ELLIPSES = '...'
4339 if len(s) > length:
4340 return s[:length - len(ELLIPSES)] + ELLIPSES
4341 return s
48844745
PH
4342
4343
4344def version_tuple(v):
5f9b8394 4345 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
4346
4347
4348def is_outdated_version(version, limit, assume_new=True):
4349 if not version:
4350 return not assume_new
4351 try:
4352 return version_tuple(version) < version_tuple(limit)
4353 except ValueError:
4354 return not assume_new
732ea2f0
PH
4355
4356
4357def ytdl_is_updateable():
7a5c1cfe 4358 """ Returns if yt-dlp can be updated with -U """
735d865e 4359 return False
4360
732ea2f0
PH
4361 from zipimport import zipimporter
4362
4363 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
4364
4365
4366def args_to_str(args):
4367 # Get a short string representation for a subprocess command
702ccf2d 4368 return ' '.join(compat_shlex_quote(a) for a in args)
2ccd1b10
PH
4369
4370
9b9c5355 4371def error_to_compat_str(err):
fdae2358
S
4372 err_str = str(err)
4373 # On python 2 error byte string must be decoded with proper
4374 # encoding rather than ascii
4375 if sys.version_info[0] < 3:
4376 err_str = err_str.decode(preferredencoding())
4377 return err_str
4378
4379
c460bdd5 4380def mimetype2ext(mt):
eb9ee194
S
4381 if mt is None:
4382 return None
4383
765ac263
JMF
4384 ext = {
4385 'audio/mp4': 'm4a',
6c33d24b
YCH
4386 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4387 # it's the most popular one
4388 'audio/mpeg': 'mp3',
ba39289d 4389 'audio/x-wav': 'wav',
765ac263
JMF
4390 }.get(mt)
4391 if ext is not None:
4392 return ext
4393
c460bdd5 4394 _, _, res = mt.rpartition('/')
6562d34a 4395 res = res.split(';')[0].strip().lower()
c460bdd5
PH
4396
4397 return {
f6861ec9 4398 '3gpp': '3gp',
cafcf657 4399 'smptett+xml': 'tt',
cafcf657 4400 'ttaf+xml': 'dfxp',
a0d8d704 4401 'ttml+xml': 'ttml',
f6861ec9 4402 'x-flv': 'flv',
a0d8d704 4403 'x-mp4-fragmented': 'mp4',
d4f05d47 4404 'x-ms-sami': 'sami',
a0d8d704 4405 'x-ms-wmv': 'wmv',
b4173f15
RA
4406 'mpegurl': 'm3u8',
4407 'x-mpegurl': 'm3u8',
4408 'vnd.apple.mpegurl': 'm3u8',
4409 'dash+xml': 'mpd',
b4173f15 4410 'f4m+xml': 'f4m',
f164b971 4411 'hds+xml': 'f4m',
e910fe2f 4412 'vnd.ms-sstr+xml': 'ism',
c2b2c7e1 4413 'quicktime': 'mov',
98ce1a3f 4414 'mp2t': 'ts',
39e7107d 4415 'x-wav': 'wav',
c460bdd5
PH
4416 }.get(res, res)
4417
4418
4f3c5e06 4419def parse_codecs(codecs_str):
4420 # http://tools.ietf.org/html/rfc6381
4421 if not codecs_str:
4422 return {}
a0566bbf 4423 split_codecs = list(filter(None, map(
4f3c5e06 4424 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4425 vcodec, acodec = None, None
a0566bbf 4426 for full_codec in split_codecs:
4f3c5e06 4427 codec = full_codec.split('.')[0]
28cc2241 4428 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4f3c5e06 4429 if not vcodec:
4430 vcodec = full_codec
60f5c9fb 4431 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4f3c5e06 4432 if not acodec:
4433 acodec = full_codec
4434 else:
60f5c9fb 4435 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4f3c5e06 4436 if not vcodec and not acodec:
a0566bbf 4437 if len(split_codecs) == 2:
4f3c5e06 4438 return {
a0566bbf 4439 'vcodec': split_codecs[0],
4440 'acodec': split_codecs[1],
4f3c5e06 4441 }
4442 else:
4443 return {
4444 'vcodec': vcodec or 'none',
4445 'acodec': acodec or 'none',
4446 }
4447 return {}
4448
4449
2ccd1b10 4450def urlhandle_detect_ext(url_handle):
79298173 4451 getheader = url_handle.headers.get
2ccd1b10 4452
b55ee18f
PH
4453 cd = getheader('Content-Disposition')
4454 if cd:
4455 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4456 if m:
4457 e = determine_ext(m.group('filename'), default_ext=None)
4458 if e:
4459 return e
4460
c460bdd5 4461 return mimetype2ext(getheader('Content-Type'))
05900629
PH
4462
4463
1e399778
YCH
4464def encode_data_uri(data, mime_type):
4465 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4466
4467
05900629 4468def age_restricted(content_limit, age_limit):
6ec6cb4e 4469 """ Returns True iff the content should be blocked """
05900629
PH
4470
4471 if age_limit is None: # No limit set
4472 return False
4473 if content_limit is None:
4474 return False # Content available for everyone
4475 return age_limit < content_limit
61ca9a80
PH
4476
4477
4478def is_html(first_bytes):
4479 """ Detect whether a file contains HTML by examining its first bytes. """
4480
4481 BOMS = [
4482 (b'\xef\xbb\xbf', 'utf-8'),
4483 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4484 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4485 (b'\xff\xfe', 'utf-16-le'),
4486 (b'\xfe\xff', 'utf-16-be'),
4487 ]
4488 for bom, enc in BOMS:
4489 if first_bytes.startswith(bom):
4490 s = first_bytes[len(bom):].decode(enc, 'replace')
4491 break
4492 else:
4493 s = first_bytes.decode('utf-8', 'replace')
4494
4495 return re.match(r'^\s*<', s)
a055469f
PH
4496
4497
4498def determine_protocol(info_dict):
4499 protocol = info_dict.get('protocol')
4500 if protocol is not None:
4501 return protocol
4502
4503 url = info_dict['url']
4504 if url.startswith('rtmp'):
4505 return 'rtmp'
4506 elif url.startswith('mms'):
4507 return 'mms'
4508 elif url.startswith('rtsp'):
4509 return 'rtsp'
4510
4511 ext = determine_ext(url)
4512 if ext == 'm3u8':
4513 return 'm3u8'
4514 elif ext == 'f4m':
4515 return 'f4m'
4516
4517 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
4518
4519
76d321f6 4520def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
cfb56d1a 4521 """ Render a list of rows, each as a list of values """
76d321f6 4522
4523 def get_max_lens(table):
4524 return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4525
4526 def filter_using_list(row, filterArray):
4527 return [col for (take, col) in zip(filterArray, row) if take]
4528
4529 if hideEmpty:
4530 max_lens = get_max_lens(data)
4531 header_row = filter_using_list(header_row, max_lens)
4532 data = [filter_using_list(row, max_lens) for row in data]
4533
cfb56d1a 4534 table = [header_row] + data
76d321f6 4535 max_lens = get_max_lens(table)
4536 if delim:
4537 table = [header_row] + [['-' * ml for ml in max_lens]] + data
4538 format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
cfb56d1a 4539 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
4540
4541
4542def _match_one(filter_part, dct):
4543 COMPARISON_OPERATORS = {
4544 '<': operator.lt,
4545 '<=': operator.le,
4546 '>': operator.gt,
4547 '>=': operator.ge,
4548 '=': operator.eq,
4549 '!=': operator.ne,
4550 }
4551 operator_rex = re.compile(r'''(?x)\s*
4552 (?P<key>[a-z_]+)
4553 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4554 (?:
4555 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
db13c16e 4556 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
347de493
PH
4557 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4558 )
4559 \s*$
4560 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4561 m = operator_rex.search(filter_part)
4562 if m:
4563 op = COMPARISON_OPERATORS[m.group('op')]
e5a088dc 4564 actual_value = dct.get(m.group('key'))
3089bc74
S
4565 if (m.group('quotedstrval') is not None
4566 or m.group('strval') is not None
e5a088dc
S
4567 # If the original field is a string and matching comparisonvalue is
4568 # a number we should respect the origin of the original field
4569 # and process comparison value as a string (see
067aa17e 4570 # https://github.com/ytdl-org/youtube-dl/issues/11082).
3089bc74
S
4571 or actual_value is not None and m.group('intval') is not None
4572 and isinstance(actual_value, compat_str)):
347de493
PH
4573 if m.group('op') not in ('=', '!='):
4574 raise ValueError(
4575 'Operator %s does not support string values!' % m.group('op'))
db13c16e
S
4576 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4577 quote = m.group('quote')
4578 if quote is not None:
4579 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
347de493
PH
4580 else:
4581 try:
4582 comparison_value = int(m.group('intval'))
4583 except ValueError:
4584 comparison_value = parse_filesize(m.group('intval'))
4585 if comparison_value is None:
4586 comparison_value = parse_filesize(m.group('intval') + 'B')
4587 if comparison_value is None:
4588 raise ValueError(
4589 'Invalid integer value %r in filter part %r' % (
4590 m.group('intval'), filter_part))
347de493
PH
4591 if actual_value is None:
4592 return m.group('none_inclusive')
4593 return op(actual_value, comparison_value)
4594
4595 UNARY_OPERATORS = {
1cc47c66
S
4596 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4597 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
347de493
PH
4598 }
4599 operator_rex = re.compile(r'''(?x)\s*
4600 (?P<op>%s)\s*(?P<key>[a-z_]+)
4601 \s*$
4602 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4603 m = operator_rex.search(filter_part)
4604 if m:
4605 op = UNARY_OPERATORS[m.group('op')]
4606 actual_value = dct.get(m.group('key'))
4607 return op(actual_value)
4608
4609 raise ValueError('Invalid filter part %r' % filter_part)
4610
4611
4612def match_str(filter_str, dct):
4613 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4614
4615 return all(
4616 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4617
4618
4619def match_filter_func(filter_str):
4620 def _match_func(info_dict):
4621 if match_str(filter_str, info_dict):
4622 return None
4623 else:
4624 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4625 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4626 return _match_func
91410c9b
PH
4627
4628
bf6427d2
YCH
4629def parse_dfxp_time_expr(time_expr):
4630 if not time_expr:
d631d5f9 4631 return
bf6427d2
YCH
4632
4633 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4634 if mobj:
4635 return float(mobj.group('time_offset'))
4636
db2fe38b 4637 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 4638 if mobj:
db2fe38b 4639 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
4640
4641
c1c924ab
YCH
4642def srt_subtitles_timecode(seconds):
4643 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
4644
4645
4646def dfxp2srt(dfxp_data):
3869028f
YCH
4647 '''
4648 @param dfxp_data A bytes-like object containing DFXP data
4649 @returns A unicode object containing converted SRT data
4650 '''
5b995f71 4651 LEGACY_NAMESPACES = (
3869028f
YCH
4652 (b'http://www.w3.org/ns/ttml', [
4653 b'http://www.w3.org/2004/11/ttaf1',
4654 b'http://www.w3.org/2006/04/ttaf1',
4655 b'http://www.w3.org/2006/10/ttaf1',
5b995f71 4656 ]),
3869028f
YCH
4657 (b'http://www.w3.org/ns/ttml#styling', [
4658 b'http://www.w3.org/ns/ttml#style',
5b995f71
RA
4659 ]),
4660 )
4661
4662 SUPPORTED_STYLING = [
4663 'color',
4664 'fontFamily',
4665 'fontSize',
4666 'fontStyle',
4667 'fontWeight',
4668 'textDecoration'
4669 ]
4670
4e335771 4671 _x = functools.partial(xpath_with_ns, ns_map={
261f4730 4672 'xml': 'http://www.w3.org/XML/1998/namespace',
4e335771 4673 'ttml': 'http://www.w3.org/ns/ttml',
5b995f71 4674 'tts': 'http://www.w3.org/ns/ttml#styling',
4e335771 4675 })
bf6427d2 4676
5b995f71
RA
4677 styles = {}
4678 default_style = {}
4679
87de7069 4680 class TTMLPElementParser(object):
5b995f71
RA
4681 _out = ''
4682 _unclosed_elements = []
4683 _applied_styles = []
bf6427d2 4684
2b14cb56 4685 def start(self, tag, attrib):
5b995f71
RA
4686 if tag in (_x('ttml:br'), 'br'):
4687 self._out += '\n'
4688 else:
4689 unclosed_elements = []
4690 style = {}
4691 element_style_id = attrib.get('style')
4692 if default_style:
4693 style.update(default_style)
4694 if element_style_id:
4695 style.update(styles.get(element_style_id, {}))
4696 for prop in SUPPORTED_STYLING:
4697 prop_val = attrib.get(_x('tts:' + prop))
4698 if prop_val:
4699 style[prop] = prop_val
4700 if style:
4701 font = ''
4702 for k, v in sorted(style.items()):
4703 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4704 continue
4705 if k == 'color':
4706 font += ' color="%s"' % v
4707 elif k == 'fontSize':
4708 font += ' size="%s"' % v
4709 elif k == 'fontFamily':
4710 font += ' face="%s"' % v
4711 elif k == 'fontWeight' and v == 'bold':
4712 self._out += '<b>'
4713 unclosed_elements.append('b')
4714 elif k == 'fontStyle' and v == 'italic':
4715 self._out += '<i>'
4716 unclosed_elements.append('i')
4717 elif k == 'textDecoration' and v == 'underline':
4718 self._out += '<u>'
4719 unclosed_elements.append('u')
4720 if font:
4721 self._out += '<font' + font + '>'
4722 unclosed_elements.append('font')
4723 applied_style = {}
4724 if self._applied_styles:
4725 applied_style.update(self._applied_styles[-1])
4726 applied_style.update(style)
4727 self._applied_styles.append(applied_style)
4728 self._unclosed_elements.append(unclosed_elements)
bf6427d2 4729
2b14cb56 4730 def end(self, tag):
5b995f71
RA
4731 if tag not in (_x('ttml:br'), 'br'):
4732 unclosed_elements = self._unclosed_elements.pop()
4733 for element in reversed(unclosed_elements):
4734 self._out += '</%s>' % element
4735 if unclosed_elements and self._applied_styles:
4736 self._applied_styles.pop()
bf6427d2 4737
2b14cb56 4738 def data(self, data):
5b995f71 4739 self._out += data
2b14cb56 4740
4741 def close(self):
5b995f71 4742 return self._out.strip()
2b14cb56 4743
4744 def parse_node(node):
4745 target = TTMLPElementParser()
4746 parser = xml.etree.ElementTree.XMLParser(target=target)
4747 parser.feed(xml.etree.ElementTree.tostring(node))
4748 return parser.close()
bf6427d2 4749
5b995f71
RA
4750 for k, v in LEGACY_NAMESPACES:
4751 for ns in v:
4752 dfxp_data = dfxp_data.replace(ns, k)
4753
3869028f 4754 dfxp = compat_etree_fromstring(dfxp_data)
bf6427d2 4755 out = []
5b995f71 4756 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
1b0427e6
YCH
4757
4758 if not paras:
4759 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2 4760
5b995f71
RA
4761 repeat = False
4762 while True:
4763 for style in dfxp.findall(_x('.//ttml:style')):
261f4730
RA
4764 style_id = style.get('id') or style.get(_x('xml:id'))
4765 if not style_id:
4766 continue
5b995f71
RA
4767 parent_style_id = style.get('style')
4768 if parent_style_id:
4769 if parent_style_id not in styles:
4770 repeat = True
4771 continue
4772 styles[style_id] = styles[parent_style_id].copy()
4773 for prop in SUPPORTED_STYLING:
4774 prop_val = style.get(_x('tts:' + prop))
4775 if prop_val:
4776 styles.setdefault(style_id, {})[prop] = prop_val
4777 if repeat:
4778 repeat = False
4779 else:
4780 break
4781
4782 for p in ('body', 'div'):
4783 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4784 if ele is None:
4785 continue
4786 style = styles.get(ele.get('style'))
4787 if not style:
4788 continue
4789 default_style.update(style)
4790
bf6427d2 4791 for para, index in zip(paras, itertools.count(1)):
d631d5f9 4792 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 4793 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
4794 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4795 if begin_time is None:
4796 continue
7dff0363 4797 if not end_time:
d631d5f9
YCH
4798 if not dur:
4799 continue
4800 end_time = begin_time + dur
bf6427d2
YCH
4801 out.append('%d\n%s --> %s\n%s\n\n' % (
4802 index,
c1c924ab
YCH
4803 srt_subtitles_timecode(begin_time),
4804 srt_subtitles_timecode(end_time),
bf6427d2
YCH
4805 parse_node(para)))
4806
4807 return ''.join(out)
4808
4809
66e289ba
S
4810def cli_option(params, command_option, param):
4811 param = params.get(param)
98e698f1
RA
4812 if param:
4813 param = compat_str(param)
66e289ba
S
4814 return [command_option, param] if param is not None else []
4815
4816
4817def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4818 param = params.get(param)
5b232f46
S
4819 if param is None:
4820 return []
66e289ba
S
4821 assert isinstance(param, bool)
4822 if separator:
4823 return [command_option + separator + (true_value if param else false_value)]
4824 return [command_option, true_value if param else false_value]
4825
4826
4827def cli_valueless_option(params, command_option, param, expected_value=True):
4828 param = params.get(param)
4829 return [command_option] if param == expected_value else []
4830
4831
e92caff5 4832def cli_configuration_args(argdict, keys, default=[], use_compat=True):
eab9b2bc 4833 if isinstance(argdict, (list, tuple)): # for backward compatibility
e92caff5 4834 if use_compat:
5b1ecbb3 4835 return argdict
4836 else:
4837 argdict = None
eab9b2bc 4838 if argdict is None:
5b1ecbb3 4839 return default
eab9b2bc 4840 assert isinstance(argdict, dict)
4841
e92caff5 4842 assert isinstance(keys, (list, tuple))
4843 for key_list in keys:
4844 if isinstance(key_list, compat_str):
4845 key_list = (key_list,)
4846 arg_list = list(filter(
4847 lambda x: x is not None,
4848 [argdict.get(key.lower()) for key in key_list]))
4849 if arg_list:
4850 return [arg for args in arg_list for arg in args]
4851 return default
66e289ba
S
4852
4853
39672624
YCH
4854class ISO639Utils(object):
4855 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4856 _lang_map = {
4857 'aa': 'aar',
4858 'ab': 'abk',
4859 'ae': 'ave',
4860 'af': 'afr',
4861 'ak': 'aka',
4862 'am': 'amh',
4863 'an': 'arg',
4864 'ar': 'ara',
4865 'as': 'asm',
4866 'av': 'ava',
4867 'ay': 'aym',
4868 'az': 'aze',
4869 'ba': 'bak',
4870 'be': 'bel',
4871 'bg': 'bul',
4872 'bh': 'bih',
4873 'bi': 'bis',
4874 'bm': 'bam',
4875 'bn': 'ben',
4876 'bo': 'bod',
4877 'br': 'bre',
4878 'bs': 'bos',
4879 'ca': 'cat',
4880 'ce': 'che',
4881 'ch': 'cha',
4882 'co': 'cos',
4883 'cr': 'cre',
4884 'cs': 'ces',
4885 'cu': 'chu',
4886 'cv': 'chv',
4887 'cy': 'cym',
4888 'da': 'dan',
4889 'de': 'deu',
4890 'dv': 'div',
4891 'dz': 'dzo',
4892 'ee': 'ewe',
4893 'el': 'ell',
4894 'en': 'eng',
4895 'eo': 'epo',
4896 'es': 'spa',
4897 'et': 'est',
4898 'eu': 'eus',
4899 'fa': 'fas',
4900 'ff': 'ful',
4901 'fi': 'fin',
4902 'fj': 'fij',
4903 'fo': 'fao',
4904 'fr': 'fra',
4905 'fy': 'fry',
4906 'ga': 'gle',
4907 'gd': 'gla',
4908 'gl': 'glg',
4909 'gn': 'grn',
4910 'gu': 'guj',
4911 'gv': 'glv',
4912 'ha': 'hau',
4913 'he': 'heb',
b7acc835 4914 'iw': 'heb', # Replaced by he in 1989 revision
39672624
YCH
4915 'hi': 'hin',
4916 'ho': 'hmo',
4917 'hr': 'hrv',
4918 'ht': 'hat',
4919 'hu': 'hun',
4920 'hy': 'hye',
4921 'hz': 'her',
4922 'ia': 'ina',
4923 'id': 'ind',
b7acc835 4924 'in': 'ind', # Replaced by id in 1989 revision
39672624
YCH
4925 'ie': 'ile',
4926 'ig': 'ibo',
4927 'ii': 'iii',
4928 'ik': 'ipk',
4929 'io': 'ido',
4930 'is': 'isl',
4931 'it': 'ita',
4932 'iu': 'iku',
4933 'ja': 'jpn',
4934 'jv': 'jav',
4935 'ka': 'kat',
4936 'kg': 'kon',
4937 'ki': 'kik',
4938 'kj': 'kua',
4939 'kk': 'kaz',
4940 'kl': 'kal',
4941 'km': 'khm',
4942 'kn': 'kan',
4943 'ko': 'kor',
4944 'kr': 'kau',
4945 'ks': 'kas',
4946 'ku': 'kur',
4947 'kv': 'kom',
4948 'kw': 'cor',
4949 'ky': 'kir',
4950 'la': 'lat',
4951 'lb': 'ltz',
4952 'lg': 'lug',
4953 'li': 'lim',
4954 'ln': 'lin',
4955 'lo': 'lao',
4956 'lt': 'lit',
4957 'lu': 'lub',
4958 'lv': 'lav',
4959 'mg': 'mlg',
4960 'mh': 'mah',
4961 'mi': 'mri',
4962 'mk': 'mkd',
4963 'ml': 'mal',
4964 'mn': 'mon',
4965 'mr': 'mar',
4966 'ms': 'msa',
4967 'mt': 'mlt',
4968 'my': 'mya',
4969 'na': 'nau',
4970 'nb': 'nob',
4971 'nd': 'nde',
4972 'ne': 'nep',
4973 'ng': 'ndo',
4974 'nl': 'nld',
4975 'nn': 'nno',
4976 'no': 'nor',
4977 'nr': 'nbl',
4978 'nv': 'nav',
4979 'ny': 'nya',
4980 'oc': 'oci',
4981 'oj': 'oji',
4982 'om': 'orm',
4983 'or': 'ori',
4984 'os': 'oss',
4985 'pa': 'pan',
4986 'pi': 'pli',
4987 'pl': 'pol',
4988 'ps': 'pus',
4989 'pt': 'por',
4990 'qu': 'que',
4991 'rm': 'roh',
4992 'rn': 'run',
4993 'ro': 'ron',
4994 'ru': 'rus',
4995 'rw': 'kin',
4996 'sa': 'san',
4997 'sc': 'srd',
4998 'sd': 'snd',
4999 'se': 'sme',
5000 'sg': 'sag',
5001 'si': 'sin',
5002 'sk': 'slk',
5003 'sl': 'slv',
5004 'sm': 'smo',
5005 'sn': 'sna',
5006 'so': 'som',
5007 'sq': 'sqi',
5008 'sr': 'srp',
5009 'ss': 'ssw',
5010 'st': 'sot',
5011 'su': 'sun',
5012 'sv': 'swe',
5013 'sw': 'swa',
5014 'ta': 'tam',
5015 'te': 'tel',
5016 'tg': 'tgk',
5017 'th': 'tha',
5018 'ti': 'tir',
5019 'tk': 'tuk',
5020 'tl': 'tgl',
5021 'tn': 'tsn',
5022 'to': 'ton',
5023 'tr': 'tur',
5024 'ts': 'tso',
5025 'tt': 'tat',
5026 'tw': 'twi',
5027 'ty': 'tah',
5028 'ug': 'uig',
5029 'uk': 'ukr',
5030 'ur': 'urd',
5031 'uz': 'uzb',
5032 've': 'ven',
5033 'vi': 'vie',
5034 'vo': 'vol',
5035 'wa': 'wln',
5036 'wo': 'wol',
5037 'xh': 'xho',
5038 'yi': 'yid',
e9a50fba 5039 'ji': 'yid', # Replaced by yi in 1989 revision
39672624
YCH
5040 'yo': 'yor',
5041 'za': 'zha',
5042 'zh': 'zho',
5043 'zu': 'zul',
5044 }
5045
5046 @classmethod
5047 def short2long(cls, code):
5048 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5049 return cls._lang_map.get(code[:2])
5050
5051 @classmethod
5052 def long2short(cls, code):
5053 """Convert language code from ISO 639-2/T to ISO 639-1"""
5054 for short_name, long_name in cls._lang_map.items():
5055 if long_name == code:
5056 return short_name
5057
5058
4eb10f66
YCH
5059class ISO3166Utils(object):
5060 # From http://data.okfn.org/data/core/country-list
5061 _country_map = {
5062 'AF': 'Afghanistan',
5063 'AX': 'Åland Islands',
5064 'AL': 'Albania',
5065 'DZ': 'Algeria',
5066 'AS': 'American Samoa',
5067 'AD': 'Andorra',
5068 'AO': 'Angola',
5069 'AI': 'Anguilla',
5070 'AQ': 'Antarctica',
5071 'AG': 'Antigua and Barbuda',
5072 'AR': 'Argentina',
5073 'AM': 'Armenia',
5074 'AW': 'Aruba',
5075 'AU': 'Australia',
5076 'AT': 'Austria',
5077 'AZ': 'Azerbaijan',
5078 'BS': 'Bahamas',
5079 'BH': 'Bahrain',
5080 'BD': 'Bangladesh',
5081 'BB': 'Barbados',
5082 'BY': 'Belarus',
5083 'BE': 'Belgium',
5084 'BZ': 'Belize',
5085 'BJ': 'Benin',
5086 'BM': 'Bermuda',
5087 'BT': 'Bhutan',
5088 'BO': 'Bolivia, Plurinational State of',
5089 'BQ': 'Bonaire, Sint Eustatius and Saba',
5090 'BA': 'Bosnia and Herzegovina',
5091 'BW': 'Botswana',
5092 'BV': 'Bouvet Island',
5093 'BR': 'Brazil',
5094 'IO': 'British Indian Ocean Territory',
5095 'BN': 'Brunei Darussalam',
5096 'BG': 'Bulgaria',
5097 'BF': 'Burkina Faso',
5098 'BI': 'Burundi',
5099 'KH': 'Cambodia',
5100 'CM': 'Cameroon',
5101 'CA': 'Canada',
5102 'CV': 'Cape Verde',
5103 'KY': 'Cayman Islands',
5104 'CF': 'Central African Republic',
5105 'TD': 'Chad',
5106 'CL': 'Chile',
5107 'CN': 'China',
5108 'CX': 'Christmas Island',
5109 'CC': 'Cocos (Keeling) Islands',
5110 'CO': 'Colombia',
5111 'KM': 'Comoros',
5112 'CG': 'Congo',
5113 'CD': 'Congo, the Democratic Republic of the',
5114 'CK': 'Cook Islands',
5115 'CR': 'Costa Rica',
5116 'CI': 'Côte d\'Ivoire',
5117 'HR': 'Croatia',
5118 'CU': 'Cuba',
5119 'CW': 'Curaçao',
5120 'CY': 'Cyprus',
5121 'CZ': 'Czech Republic',
5122 'DK': 'Denmark',
5123 'DJ': 'Djibouti',
5124 'DM': 'Dominica',
5125 'DO': 'Dominican Republic',
5126 'EC': 'Ecuador',
5127 'EG': 'Egypt',
5128 'SV': 'El Salvador',
5129 'GQ': 'Equatorial Guinea',
5130 'ER': 'Eritrea',
5131 'EE': 'Estonia',
5132 'ET': 'Ethiopia',
5133 'FK': 'Falkland Islands (Malvinas)',
5134 'FO': 'Faroe Islands',
5135 'FJ': 'Fiji',
5136 'FI': 'Finland',
5137 'FR': 'France',
5138 'GF': 'French Guiana',
5139 'PF': 'French Polynesia',
5140 'TF': 'French Southern Territories',
5141 'GA': 'Gabon',
5142 'GM': 'Gambia',
5143 'GE': 'Georgia',
5144 'DE': 'Germany',
5145 'GH': 'Ghana',
5146 'GI': 'Gibraltar',
5147 'GR': 'Greece',
5148 'GL': 'Greenland',
5149 'GD': 'Grenada',
5150 'GP': 'Guadeloupe',
5151 'GU': 'Guam',
5152 'GT': 'Guatemala',
5153 'GG': 'Guernsey',
5154 'GN': 'Guinea',
5155 'GW': 'Guinea-Bissau',
5156 'GY': 'Guyana',
5157 'HT': 'Haiti',
5158 'HM': 'Heard Island and McDonald Islands',
5159 'VA': 'Holy See (Vatican City State)',
5160 'HN': 'Honduras',
5161 'HK': 'Hong Kong',
5162 'HU': 'Hungary',
5163 'IS': 'Iceland',
5164 'IN': 'India',
5165 'ID': 'Indonesia',
5166 'IR': 'Iran, Islamic Republic of',
5167 'IQ': 'Iraq',
5168 'IE': 'Ireland',
5169 'IM': 'Isle of Man',
5170 'IL': 'Israel',
5171 'IT': 'Italy',
5172 'JM': 'Jamaica',
5173 'JP': 'Japan',
5174 'JE': 'Jersey',
5175 'JO': 'Jordan',
5176 'KZ': 'Kazakhstan',
5177 'KE': 'Kenya',
5178 'KI': 'Kiribati',
5179 'KP': 'Korea, Democratic People\'s Republic of',
5180 'KR': 'Korea, Republic of',
5181 'KW': 'Kuwait',
5182 'KG': 'Kyrgyzstan',
5183 'LA': 'Lao People\'s Democratic Republic',
5184 'LV': 'Latvia',
5185 'LB': 'Lebanon',
5186 'LS': 'Lesotho',
5187 'LR': 'Liberia',
5188 'LY': 'Libya',
5189 'LI': 'Liechtenstein',
5190 'LT': 'Lithuania',
5191 'LU': 'Luxembourg',
5192 'MO': 'Macao',
5193 'MK': 'Macedonia, the Former Yugoslav Republic of',
5194 'MG': 'Madagascar',
5195 'MW': 'Malawi',
5196 'MY': 'Malaysia',
5197 'MV': 'Maldives',
5198 'ML': 'Mali',
5199 'MT': 'Malta',
5200 'MH': 'Marshall Islands',
5201 'MQ': 'Martinique',
5202 'MR': 'Mauritania',
5203 'MU': 'Mauritius',
5204 'YT': 'Mayotte',
5205 'MX': 'Mexico',
5206 'FM': 'Micronesia, Federated States of',
5207 'MD': 'Moldova, Republic of',
5208 'MC': 'Monaco',
5209 'MN': 'Mongolia',
5210 'ME': 'Montenegro',
5211 'MS': 'Montserrat',
5212 'MA': 'Morocco',
5213 'MZ': 'Mozambique',
5214 'MM': 'Myanmar',
5215 'NA': 'Namibia',
5216 'NR': 'Nauru',
5217 'NP': 'Nepal',
5218 'NL': 'Netherlands',
5219 'NC': 'New Caledonia',
5220 'NZ': 'New Zealand',
5221 'NI': 'Nicaragua',
5222 'NE': 'Niger',
5223 'NG': 'Nigeria',
5224 'NU': 'Niue',
5225 'NF': 'Norfolk Island',
5226 'MP': 'Northern Mariana Islands',
5227 'NO': 'Norway',
5228 'OM': 'Oman',
5229 'PK': 'Pakistan',
5230 'PW': 'Palau',
5231 'PS': 'Palestine, State of',
5232 'PA': 'Panama',
5233 'PG': 'Papua New Guinea',
5234 'PY': 'Paraguay',
5235 'PE': 'Peru',
5236 'PH': 'Philippines',
5237 'PN': 'Pitcairn',
5238 'PL': 'Poland',
5239 'PT': 'Portugal',
5240 'PR': 'Puerto Rico',
5241 'QA': 'Qatar',
5242 'RE': 'Réunion',
5243 'RO': 'Romania',
5244 'RU': 'Russian Federation',
5245 'RW': 'Rwanda',
5246 'BL': 'Saint Barthélemy',
5247 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5248 'KN': 'Saint Kitts and Nevis',
5249 'LC': 'Saint Lucia',
5250 'MF': 'Saint Martin (French part)',
5251 'PM': 'Saint Pierre and Miquelon',
5252 'VC': 'Saint Vincent and the Grenadines',
5253 'WS': 'Samoa',
5254 'SM': 'San Marino',
5255 'ST': 'Sao Tome and Principe',
5256 'SA': 'Saudi Arabia',
5257 'SN': 'Senegal',
5258 'RS': 'Serbia',
5259 'SC': 'Seychelles',
5260 'SL': 'Sierra Leone',
5261 'SG': 'Singapore',
5262 'SX': 'Sint Maarten (Dutch part)',
5263 'SK': 'Slovakia',
5264 'SI': 'Slovenia',
5265 'SB': 'Solomon Islands',
5266 'SO': 'Somalia',
5267 'ZA': 'South Africa',
5268 'GS': 'South Georgia and the South Sandwich Islands',
5269 'SS': 'South Sudan',
5270 'ES': 'Spain',
5271 'LK': 'Sri Lanka',
5272 'SD': 'Sudan',
5273 'SR': 'Suriname',
5274 'SJ': 'Svalbard and Jan Mayen',
5275 'SZ': 'Swaziland',
5276 'SE': 'Sweden',
5277 'CH': 'Switzerland',
5278 'SY': 'Syrian Arab Republic',
5279 'TW': 'Taiwan, Province of China',
5280 'TJ': 'Tajikistan',
5281 'TZ': 'Tanzania, United Republic of',
5282 'TH': 'Thailand',
5283 'TL': 'Timor-Leste',
5284 'TG': 'Togo',
5285 'TK': 'Tokelau',
5286 'TO': 'Tonga',
5287 'TT': 'Trinidad and Tobago',
5288 'TN': 'Tunisia',
5289 'TR': 'Turkey',
5290 'TM': 'Turkmenistan',
5291 'TC': 'Turks and Caicos Islands',
5292 'TV': 'Tuvalu',
5293 'UG': 'Uganda',
5294 'UA': 'Ukraine',
5295 'AE': 'United Arab Emirates',
5296 'GB': 'United Kingdom',
5297 'US': 'United States',
5298 'UM': 'United States Minor Outlying Islands',
5299 'UY': 'Uruguay',
5300 'UZ': 'Uzbekistan',
5301 'VU': 'Vanuatu',
5302 'VE': 'Venezuela, Bolivarian Republic of',
5303 'VN': 'Viet Nam',
5304 'VG': 'Virgin Islands, British',
5305 'VI': 'Virgin Islands, U.S.',
5306 'WF': 'Wallis and Futuna',
5307 'EH': 'Western Sahara',
5308 'YE': 'Yemen',
5309 'ZM': 'Zambia',
5310 'ZW': 'Zimbabwe',
5311 }
5312
5313 @classmethod
5314 def short2full(cls, code):
5315 """Convert an ISO 3166-2 country code to the corresponding full name"""
5316 return cls._country_map.get(code.upper())
5317
5318
773f291d
S
5319class GeoUtils(object):
5320 # Major IPv4 address blocks per country
5321 _country_ip_map = {
53896ca5 5322 'AD': '46.172.224.0/19',
773f291d
S
5323 'AE': '94.200.0.0/13',
5324 'AF': '149.54.0.0/17',
5325 'AG': '209.59.64.0/18',
5326 'AI': '204.14.248.0/21',
5327 'AL': '46.99.0.0/16',
5328 'AM': '46.70.0.0/15',
5329 'AO': '105.168.0.0/13',
53896ca5
S
5330 'AP': '182.50.184.0/21',
5331 'AQ': '23.154.160.0/24',
773f291d
S
5332 'AR': '181.0.0.0/12',
5333 'AS': '202.70.112.0/20',
53896ca5 5334 'AT': '77.116.0.0/14',
773f291d
S
5335 'AU': '1.128.0.0/11',
5336 'AW': '181.41.0.0/18',
53896ca5
S
5337 'AX': '185.217.4.0/22',
5338 'AZ': '5.197.0.0/16',
773f291d
S
5339 'BA': '31.176.128.0/17',
5340 'BB': '65.48.128.0/17',
5341 'BD': '114.130.0.0/16',
5342 'BE': '57.0.0.0/8',
53896ca5 5343 'BF': '102.178.0.0/15',
773f291d
S
5344 'BG': '95.42.0.0/15',
5345 'BH': '37.131.0.0/17',
5346 'BI': '154.117.192.0/18',
5347 'BJ': '137.255.0.0/16',
53896ca5 5348 'BL': '185.212.72.0/23',
773f291d
S
5349 'BM': '196.12.64.0/18',
5350 'BN': '156.31.0.0/16',
5351 'BO': '161.56.0.0/16',
5352 'BQ': '161.0.80.0/20',
53896ca5 5353 'BR': '191.128.0.0/12',
773f291d
S
5354 'BS': '24.51.64.0/18',
5355 'BT': '119.2.96.0/19',
5356 'BW': '168.167.0.0/16',
5357 'BY': '178.120.0.0/13',
5358 'BZ': '179.42.192.0/18',
5359 'CA': '99.224.0.0/11',
5360 'CD': '41.243.0.0/16',
53896ca5
S
5361 'CF': '197.242.176.0/21',
5362 'CG': '160.113.0.0/16',
773f291d 5363 'CH': '85.0.0.0/13',
53896ca5 5364 'CI': '102.136.0.0/14',
773f291d
S
5365 'CK': '202.65.32.0/19',
5366 'CL': '152.172.0.0/14',
53896ca5 5367 'CM': '102.244.0.0/14',
773f291d
S
5368 'CN': '36.128.0.0/10',
5369 'CO': '181.240.0.0/12',
5370 'CR': '201.192.0.0/12',
5371 'CU': '152.206.0.0/15',
5372 'CV': '165.90.96.0/19',
5373 'CW': '190.88.128.0/17',
53896ca5 5374 'CY': '31.153.0.0/16',
773f291d
S
5375 'CZ': '88.100.0.0/14',
5376 'DE': '53.0.0.0/8',
5377 'DJ': '197.241.0.0/17',
5378 'DK': '87.48.0.0/12',
5379 'DM': '192.243.48.0/20',
5380 'DO': '152.166.0.0/15',
5381 'DZ': '41.96.0.0/12',
5382 'EC': '186.68.0.0/15',
5383 'EE': '90.190.0.0/15',
5384 'EG': '156.160.0.0/11',
5385 'ER': '196.200.96.0/20',
5386 'ES': '88.0.0.0/11',
5387 'ET': '196.188.0.0/14',
5388 'EU': '2.16.0.0/13',
5389 'FI': '91.152.0.0/13',
5390 'FJ': '144.120.0.0/16',
53896ca5 5391 'FK': '80.73.208.0/21',
773f291d
S
5392 'FM': '119.252.112.0/20',
5393 'FO': '88.85.32.0/19',
5394 'FR': '90.0.0.0/9',
5395 'GA': '41.158.0.0/15',
5396 'GB': '25.0.0.0/8',
5397 'GD': '74.122.88.0/21',
5398 'GE': '31.146.0.0/16',
5399 'GF': '161.22.64.0/18',
5400 'GG': '62.68.160.0/19',
53896ca5
S
5401 'GH': '154.160.0.0/12',
5402 'GI': '95.164.0.0/16',
773f291d
S
5403 'GL': '88.83.0.0/19',
5404 'GM': '160.182.0.0/15',
5405 'GN': '197.149.192.0/18',
5406 'GP': '104.250.0.0/19',
5407 'GQ': '105.235.224.0/20',
5408 'GR': '94.64.0.0/13',
5409 'GT': '168.234.0.0/16',
5410 'GU': '168.123.0.0/16',
5411 'GW': '197.214.80.0/20',
5412 'GY': '181.41.64.0/18',
5413 'HK': '113.252.0.0/14',
5414 'HN': '181.210.0.0/16',
5415 'HR': '93.136.0.0/13',
5416 'HT': '148.102.128.0/17',
5417 'HU': '84.0.0.0/14',
5418 'ID': '39.192.0.0/10',
5419 'IE': '87.32.0.0/12',
5420 'IL': '79.176.0.0/13',
5421 'IM': '5.62.80.0/20',
5422 'IN': '117.192.0.0/10',
5423 'IO': '203.83.48.0/21',
5424 'IQ': '37.236.0.0/14',
5425 'IR': '2.176.0.0/12',
5426 'IS': '82.221.0.0/16',
5427 'IT': '79.0.0.0/10',
5428 'JE': '87.244.64.0/18',
5429 'JM': '72.27.0.0/17',
5430 'JO': '176.29.0.0/16',
53896ca5 5431 'JP': '133.0.0.0/8',
773f291d
S
5432 'KE': '105.48.0.0/12',
5433 'KG': '158.181.128.0/17',
5434 'KH': '36.37.128.0/17',
5435 'KI': '103.25.140.0/22',
5436 'KM': '197.255.224.0/20',
53896ca5 5437 'KN': '198.167.192.0/19',
773f291d
S
5438 'KP': '175.45.176.0/22',
5439 'KR': '175.192.0.0/10',
5440 'KW': '37.36.0.0/14',
5441 'KY': '64.96.0.0/15',
5442 'KZ': '2.72.0.0/13',
5443 'LA': '115.84.64.0/18',
5444 'LB': '178.135.0.0/16',
53896ca5 5445 'LC': '24.92.144.0/20',
773f291d
S
5446 'LI': '82.117.0.0/19',
5447 'LK': '112.134.0.0/15',
53896ca5 5448 'LR': '102.183.0.0/16',
773f291d
S
5449 'LS': '129.232.0.0/17',
5450 'LT': '78.56.0.0/13',
5451 'LU': '188.42.0.0/16',
5452 'LV': '46.109.0.0/16',
5453 'LY': '41.252.0.0/14',
5454 'MA': '105.128.0.0/11',
5455 'MC': '88.209.64.0/18',
5456 'MD': '37.246.0.0/16',
5457 'ME': '178.175.0.0/17',
5458 'MF': '74.112.232.0/21',
5459 'MG': '154.126.0.0/17',
5460 'MH': '117.103.88.0/21',
5461 'MK': '77.28.0.0/15',
5462 'ML': '154.118.128.0/18',
5463 'MM': '37.111.0.0/17',
5464 'MN': '49.0.128.0/17',
5465 'MO': '60.246.0.0/16',
5466 'MP': '202.88.64.0/20',
5467 'MQ': '109.203.224.0/19',
5468 'MR': '41.188.64.0/18',
5469 'MS': '208.90.112.0/22',
5470 'MT': '46.11.0.0/16',
5471 'MU': '105.16.0.0/12',
5472 'MV': '27.114.128.0/18',
53896ca5 5473 'MW': '102.70.0.0/15',
773f291d
S
5474 'MX': '187.192.0.0/11',
5475 'MY': '175.136.0.0/13',
5476 'MZ': '197.218.0.0/15',
5477 'NA': '41.182.0.0/16',
5478 'NC': '101.101.0.0/18',
5479 'NE': '197.214.0.0/18',
5480 'NF': '203.17.240.0/22',
5481 'NG': '105.112.0.0/12',
5482 'NI': '186.76.0.0/15',
5483 'NL': '145.96.0.0/11',
5484 'NO': '84.208.0.0/13',
5485 'NP': '36.252.0.0/15',
5486 'NR': '203.98.224.0/19',
5487 'NU': '49.156.48.0/22',
5488 'NZ': '49.224.0.0/14',
5489 'OM': '5.36.0.0/15',
5490 'PA': '186.72.0.0/15',
5491 'PE': '186.160.0.0/14',
5492 'PF': '123.50.64.0/18',
5493 'PG': '124.240.192.0/19',
5494 'PH': '49.144.0.0/13',
5495 'PK': '39.32.0.0/11',
5496 'PL': '83.0.0.0/11',
5497 'PM': '70.36.0.0/20',
5498 'PR': '66.50.0.0/16',
5499 'PS': '188.161.0.0/16',
5500 'PT': '85.240.0.0/13',
5501 'PW': '202.124.224.0/20',
5502 'PY': '181.120.0.0/14',
5503 'QA': '37.210.0.0/15',
53896ca5 5504 'RE': '102.35.0.0/16',
773f291d 5505 'RO': '79.112.0.0/13',
53896ca5 5506 'RS': '93.86.0.0/15',
773f291d 5507 'RU': '5.136.0.0/13',
53896ca5 5508 'RW': '41.186.0.0/16',
773f291d
S
5509 'SA': '188.48.0.0/13',
5510 'SB': '202.1.160.0/19',
5511 'SC': '154.192.0.0/11',
53896ca5 5512 'SD': '102.120.0.0/13',
773f291d 5513 'SE': '78.64.0.0/12',
53896ca5 5514 'SG': '8.128.0.0/10',
773f291d
S
5515 'SI': '188.196.0.0/14',
5516 'SK': '78.98.0.0/15',
53896ca5 5517 'SL': '102.143.0.0/17',
773f291d
S
5518 'SM': '89.186.32.0/19',
5519 'SN': '41.82.0.0/15',
53896ca5 5520 'SO': '154.115.192.0/18',
773f291d
S
5521 'SR': '186.179.128.0/17',
5522 'SS': '105.235.208.0/21',
5523 'ST': '197.159.160.0/19',
5524 'SV': '168.243.0.0/16',
5525 'SX': '190.102.0.0/20',
5526 'SY': '5.0.0.0/16',
5527 'SZ': '41.84.224.0/19',
5528 'TC': '65.255.48.0/20',
5529 'TD': '154.68.128.0/19',
5530 'TG': '196.168.0.0/14',
5531 'TH': '171.96.0.0/13',
5532 'TJ': '85.9.128.0/18',
5533 'TK': '27.96.24.0/21',
5534 'TL': '180.189.160.0/20',
5535 'TM': '95.85.96.0/19',
5536 'TN': '197.0.0.0/11',
5537 'TO': '175.176.144.0/21',
5538 'TR': '78.160.0.0/11',
5539 'TT': '186.44.0.0/15',
5540 'TV': '202.2.96.0/19',
5541 'TW': '120.96.0.0/11',
5542 'TZ': '156.156.0.0/14',
53896ca5
S
5543 'UA': '37.52.0.0/14',
5544 'UG': '102.80.0.0/13',
5545 'US': '6.0.0.0/8',
773f291d 5546 'UY': '167.56.0.0/13',
53896ca5 5547 'UZ': '84.54.64.0/18',
773f291d 5548 'VA': '212.77.0.0/19',
53896ca5 5549 'VC': '207.191.240.0/21',
773f291d 5550 'VE': '186.88.0.0/13',
53896ca5 5551 'VG': '66.81.192.0/20',
773f291d
S
5552 'VI': '146.226.0.0/16',
5553 'VN': '14.160.0.0/11',
5554 'VU': '202.80.32.0/20',
5555 'WF': '117.20.32.0/21',
5556 'WS': '202.4.32.0/19',
5557 'YE': '134.35.0.0/16',
5558 'YT': '41.242.116.0/22',
5559 'ZA': '41.0.0.0/11',
53896ca5
S
5560 'ZM': '102.144.0.0/13',
5561 'ZW': '102.177.192.0/18',
773f291d
S
5562 }
5563
5564 @classmethod
5f95927a
S
5565 def random_ipv4(cls, code_or_block):
5566 if len(code_or_block) == 2:
5567 block = cls._country_ip_map.get(code_or_block.upper())
5568 if not block:
5569 return None
5570 else:
5571 block = code_or_block
773f291d
S
5572 addr, preflen = block.split('/')
5573 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5574 addr_max = addr_min | (0xffffffff >> int(preflen))
18a0defa 5575 return compat_str(socket.inet_ntoa(
4248dad9 5576 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
773f291d
S
5577
5578
91410c9b 5579class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
5580 def __init__(self, proxies=None):
5581 # Set default handlers
5582 for type in ('http', 'https'):
5583 setattr(self, '%s_open' % type,
5584 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5585 meth(r, proxy, type))
38e87f6c 5586 compat_urllib_request.ProxyHandler.__init__(self, proxies)
2461f79d 5587
91410c9b 5588 def proxy_open(self, req, proxy, type):
2461f79d 5589 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
5590 if req_proxy is not None:
5591 proxy = req_proxy
2461f79d
PH
5592 del req.headers['Ytdl-request-proxy']
5593
5594 if proxy == '__noproxy__':
5595 return None # No Proxy
51fb4995 5596 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
71aff188 5597 req.add_header('Ytdl-socks-proxy', proxy)
7a5c1cfe 5598 # yt-dlp's http/https handlers do wrapping the socket with socks
71aff188 5599 return None
91410c9b
PH
5600 return compat_urllib_request.ProxyHandler.proxy_open(
5601 self, req, proxy, type)
5bc880b9
YCH
5602
5603
0a5445dd
YCH
5604# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5605# released into Public Domain
5606# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5607
5608def long_to_bytes(n, blocksize=0):
5609 """long_to_bytes(n:long, blocksize:int) : string
5610 Convert a long integer to a byte string.
5611
5612 If optional blocksize is given and greater than zero, pad the front of the
5613 byte string with binary zeros so that the length is a multiple of
5614 blocksize.
5615 """
5616 # after much testing, this algorithm was deemed to be the fastest
5617 s = b''
5618 n = int(n)
5619 while n > 0:
5620 s = compat_struct_pack('>I', n & 0xffffffff) + s
5621 n = n >> 32
5622 # strip off leading zeros
5623 for i in range(len(s)):
5624 if s[i] != b'\000'[0]:
5625 break
5626 else:
5627 # only happens when n == 0
5628 s = b'\000'
5629 i = 0
5630 s = s[i:]
5631 # add back some pad bytes. this could be done more efficiently w.r.t. the
5632 # de-padding being done above, but sigh...
5633 if blocksize > 0 and len(s) % blocksize:
5634 s = (blocksize - len(s) % blocksize) * b'\000' + s
5635 return s
5636
5637
5638def bytes_to_long(s):
5639 """bytes_to_long(string) : long
5640 Convert a byte string to a long integer.
5641
5642 This is (essentially) the inverse of long_to_bytes().
5643 """
5644 acc = 0
5645 length = len(s)
5646 if length % 4:
5647 extra = (4 - length % 4)
5648 s = b'\000' * extra + s
5649 length = length + extra
5650 for i in range(0, length, 4):
5651 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5652 return acc
5653
5654
5bc880b9
YCH
5655def ohdave_rsa_encrypt(data, exponent, modulus):
5656 '''
5657 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5658
5659 Input:
5660 data: data to encrypt, bytes-like object
5661 exponent, modulus: parameter e and N of RSA algorithm, both integer
5662 Output: hex string of encrypted data
5663
5664 Limitation: supports one block encryption only
5665 '''
5666
5667 payload = int(binascii.hexlify(data[::-1]), 16)
5668 encrypted = pow(payload, exponent, modulus)
5669 return '%x' % encrypted
81bdc8fd
YCH
5670
5671
f48409c7
YCH
5672def pkcs1pad(data, length):
5673 """
5674 Padding input data with PKCS#1 scheme
5675
5676 @param {int[]} data input data
5677 @param {int} length target length
5678 @returns {int[]} padded data
5679 """
5680 if len(data) > length - 11:
5681 raise ValueError('Input data too long for PKCS#1 padding')
5682
5683 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5684 return [0, 2] + pseudo_random + [0] + data
5685
5686
5eb6bdce 5687def encode_base_n(num, n, table=None):
59f898b7 5688 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
59f898b7
YCH
5689 if not table:
5690 table = FULL_TABLE[:n]
5691
5eb6bdce
YCH
5692 if n > len(table):
5693 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5694
5695 if num == 0:
5696 return table[0]
5697
81bdc8fd
YCH
5698 ret = ''
5699 while num:
5700 ret = table[num % n] + ret
5701 num = num // n
5702 return ret
f52354a8
YCH
5703
5704
5705def decode_packed_codes(code):
06b3fe29 5706 mobj = re.search(PACKED_CODES_RE, code)
a0566bbf 5707 obfuscated_code, base, count, symbols = mobj.groups()
f52354a8
YCH
5708 base = int(base)
5709 count = int(count)
5710 symbols = symbols.split('|')
5711 symbol_table = {}
5712
5713 while count:
5714 count -= 1
5eb6bdce 5715 base_n_count = encode_base_n(count, base)
f52354a8
YCH
5716 symbol_table[base_n_count] = symbols[count] or base_n_count
5717
5718 return re.sub(
5719 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
a0566bbf 5720 obfuscated_code)
e154c651 5721
5722
1ced2221
S
5723def caesar(s, alphabet, shift):
5724 if shift == 0:
5725 return s
5726 l = len(alphabet)
5727 return ''.join(
5728 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5729 for c in s)
5730
5731
5732def rot47(s):
5733 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5734
5735
e154c651 5736def parse_m3u8_attributes(attrib):
5737 info = {}
5738 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5739 if val.startswith('"'):
5740 val = val[1:-1]
5741 info[key] = val
5742 return info
1143535d
YCH
5743
5744
5745def urshift(val, n):
5746 return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038
YCH
5747
5748
5749# Based on png2str() written by @gdkchan and improved by @yokrysty
067aa17e 5750# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
d3f8e038
YCH
5751def decode_png(png_data):
5752 # Reference: https://www.w3.org/TR/PNG/
5753 header = png_data[8:]
5754
5755 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5756 raise IOError('Not a valid PNG file.')
5757
5758 int_map = {1: '>B', 2: '>H', 4: '>I'}
5759 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5760
5761 chunks = []
5762
5763 while header:
5764 length = unpack_integer(header[:4])
5765 header = header[4:]
5766
5767 chunk_type = header[:4]
5768 header = header[4:]
5769
5770 chunk_data = header[:length]
5771 header = header[length:]
5772
5773 header = header[4:] # Skip CRC
5774
5775 chunks.append({
5776 'type': chunk_type,
5777 'length': length,
5778 'data': chunk_data
5779 })
5780
5781 ihdr = chunks[0]['data']
5782
5783 width = unpack_integer(ihdr[:4])
5784 height = unpack_integer(ihdr[4:8])
5785
5786 idat = b''
5787
5788 for chunk in chunks:
5789 if chunk['type'] == b'IDAT':
5790 idat += chunk['data']
5791
5792 if not idat:
5793 raise IOError('Unable to read PNG data.')
5794
5795 decompressed_data = bytearray(zlib.decompress(idat))
5796
5797 stride = width * 3
5798 pixels = []
5799
5800 def _get_pixel(idx):
5801 x = idx % stride
5802 y = idx // stride
5803 return pixels[y][x]
5804
5805 for y in range(height):
5806 basePos = y * (1 + stride)
5807 filter_type = decompressed_data[basePos]
5808
5809 current_row = []
5810
5811 pixels.append(current_row)
5812
5813 for x in range(stride):
5814 color = decompressed_data[1 + basePos + x]
5815 basex = y * stride + x
5816 left = 0
5817 up = 0
5818
5819 if x > 2:
5820 left = _get_pixel(basex - 3)
5821 if y > 0:
5822 up = _get_pixel(basex - stride)
5823
5824 if filter_type == 1: # Sub
5825 color = (color + left) & 0xff
5826 elif filter_type == 2: # Up
5827 color = (color + up) & 0xff
5828 elif filter_type == 3: # Average
5829 color = (color + ((left + up) >> 1)) & 0xff
5830 elif filter_type == 4: # Paeth
5831 a = left
5832 b = up
5833 c = 0
5834
5835 if x > 2 and y > 0:
5836 c = _get_pixel(basex - stride - 3)
5837
5838 p = a + b - c
5839
5840 pa = abs(p - a)
5841 pb = abs(p - b)
5842 pc = abs(p - c)
5843
5844 if pa <= pb and pa <= pc:
5845 color = (color + a) & 0xff
5846 elif pb <= pc:
5847 color = (color + b) & 0xff
5848 else:
5849 color = (color + c) & 0xff
5850
5851 current_row.append(color)
5852
5853 return width, height, pixels
efa97bdc
YCH
5854
5855
5856def write_xattr(path, key, value):
5857 # This mess below finds the best xattr tool for the job
5858 try:
5859 # try the pyxattr module...
5860 import xattr
5861
53a7e3d2
YCH
5862 if hasattr(xattr, 'set'): # pyxattr
5863 # Unicode arguments are not supported in python-pyxattr until
5864 # version 0.5.0
067aa17e 5865 # See https://github.com/ytdl-org/youtube-dl/issues/5498
53a7e3d2
YCH
5866 pyxattr_required_version = '0.5.0'
5867 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5868 # TODO: fallback to CLI tools
5869 raise XAttrUnavailableError(
5870 'python-pyxattr is detected but is too old. '
7a5c1cfe 5871 'yt-dlp requires %s or above while your version is %s. '
53a7e3d2
YCH
5872 'Falling back to other xattr implementations' % (
5873 pyxattr_required_version, xattr.__version__))
5874
5875 setxattr = xattr.set
5876 else: # xattr
5877 setxattr = xattr.setxattr
efa97bdc
YCH
5878
5879 try:
53a7e3d2 5880 setxattr(path, key, value)
efa97bdc
YCH
5881 except EnvironmentError as e:
5882 raise XAttrMetadataError(e.errno, e.strerror)
5883
5884 except ImportError:
5885 if compat_os_name == 'nt':
5886 # Write xattrs to NTFS Alternate Data Streams:
5887 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5888 assert ':' not in key
5889 assert os.path.exists(path)
5890
5891 ads_fn = path + ':' + key
5892 try:
5893 with open(ads_fn, 'wb') as f:
5894 f.write(value)
5895 except EnvironmentError as e:
5896 raise XAttrMetadataError(e.errno, e.strerror)
5897 else:
5898 user_has_setfattr = check_executable('setfattr', ['--version'])
5899 user_has_xattr = check_executable('xattr', ['-h'])
5900
5901 if user_has_setfattr or user_has_xattr:
5902
5903 value = value.decode('utf-8')
5904 if user_has_setfattr:
5905 executable = 'setfattr'
5906 opts = ['-n', key, '-v', value]
5907 elif user_has_xattr:
5908 executable = 'xattr'
5909 opts = ['-w', key, value]
5910
3089bc74
S
5911 cmd = ([encodeFilename(executable, True)]
5912 + [encodeArgument(o) for o in opts]
5913 + [encodeFilename(path, True)])
efa97bdc
YCH
5914
5915 try:
5916 p = subprocess.Popen(
5917 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5918 except EnvironmentError as e:
5919 raise XAttrMetadataError(e.errno, e.strerror)
f5b1bca9 5920 stdout, stderr = process_communicate_or_kill(p)
efa97bdc
YCH
5921 stderr = stderr.decode('utf-8', 'replace')
5922 if p.returncode != 0:
5923 raise XAttrMetadataError(p.returncode, stderr)
5924
5925 else:
5926 # On Unix, and can't find pyxattr, setfattr, or xattr.
5927 if sys.platform.startswith('linux'):
5928 raise XAttrUnavailableError(
5929 "Couldn't find a tool to set the xattrs. "
5930 "Install either the python 'pyxattr' or 'xattr' "
5931 "modules, or the GNU 'attr' package "
5932 "(which contains the 'setfattr' tool).")
5933 else:
5934 raise XAttrUnavailableError(
5935 "Couldn't find a tool to set the xattrs. "
5936 "Install either the python 'xattr' module, "
5937 "or the 'xattr' binary.")
0c265486
YCH
5938
5939
5940def random_birthday(year_field, month_field, day_field):
aa374bc7
AS
5941 start_date = datetime.date(1950, 1, 1)
5942 end_date = datetime.date(1995, 12, 31)
5943 offset = random.randint(0, (end_date - start_date).days)
5944 random_date = start_date + datetime.timedelta(offset)
0c265486 5945 return {
aa374bc7
AS
5946 year_field: str(random_date.year),
5947 month_field: str(random_date.month),
5948 day_field: str(random_date.day),
0c265486 5949 }
732044af 5950
c76eb41b 5951
732044af 5952# Templates for internet shortcut files, which are plain text files.
5953DOT_URL_LINK_TEMPLATE = '''
5954[InternetShortcut]
5955URL=%(url)s
5956'''.lstrip()
5957
5958DOT_WEBLOC_LINK_TEMPLATE = '''
5959<?xml version="1.0" encoding="UTF-8"?>
5960<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5961<plist version="1.0">
5962<dict>
5963\t<key>URL</key>
5964\t<string>%(url)s</string>
5965</dict>
5966</plist>
5967'''.lstrip()
5968
5969DOT_DESKTOP_LINK_TEMPLATE = '''
5970[Desktop Entry]
5971Encoding=UTF-8
5972Name=%(filename)s
5973Type=Link
5974URL=%(url)s
5975Icon=text-html
5976'''.lstrip()
5977
5978
5979def iri_to_uri(iri):
5980 """
5981 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5982
5983 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5984 """
5985
5986 iri_parts = compat_urllib_parse_urlparse(iri)
5987
5988 if '[' in iri_parts.netloc:
5989 raise ValueError('IPv6 URIs are not, yet, supported.')
5990 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5991
5992 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5993
5994 net_location = ''
5995 if iri_parts.username:
5996 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
5997 if iri_parts.password is not None:
5998 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
5999 net_location += '@'
6000
6001 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
6002 # The 'idna' encoding produces ASCII text.
6003 if iri_parts.port is not None and iri_parts.port != 80:
6004 net_location += ':' + str(iri_parts.port)
6005
6006 return compat_urllib_parse_urlunparse(
6007 (iri_parts.scheme,
6008 net_location,
6009
6010 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6011
6012 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6013 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6014
6015 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6016 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6017
6018 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6019
6020 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6021
6022
6023def to_high_limit_path(path):
6024 if sys.platform in ['win32', 'cygwin']:
6025 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6026 return r'\\?\ '.rstrip() + os.path.abspath(path)
6027
6028 return path
76d321f6 6029
c76eb41b 6030
76d321f6 6031def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
6032 val = obj.get(field, default)
6033 if func and val not in ignore:
6034 val = func(val)
6035 return template % val if val not in ignore else default
00dd0cd5 6036
6037
6038def clean_podcast_url(url):
6039 return re.sub(r'''(?x)
6040 (?:
6041 (?:
6042 chtbl\.com/track|
6043 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6044 play\.podtrac\.com
6045 )/[^/]+|
6046 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6047 flex\.acast\.com|
6048 pd(?:
6049 cn\.co| # https://podcorn.com/analytics-prefix/
6050 st\.fm # https://podsights.com/docs/
6051 )/e
6052 )/''', '', url)
ffcb8191
THD
6053
6054
6055_HEX_TABLE = '0123456789abcdef'
6056
6057
6058def random_uuidv4():
6059 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
0202b52a 6060
6061
6062def make_dir(path, to_screen=None):
6063 try:
6064 dn = os.path.dirname(path)
6065 if dn and not os.path.exists(dn):
6066 os.makedirs(dn)
6067 return True
6068 except (OSError, IOError) as err:
6069 if callable(to_screen) is not None:
6070 to_screen('unable to create directory ' + error_to_compat_str(err))
6071 return False
f74980cb 6072
6073
6074def get_executable_path():
c552ae88 6075 from zipimport import zipimporter
6076 if hasattr(sys, 'frozen'): # Running from PyInstaller
6077 path = os.path.dirname(sys.executable)
6078 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
6079 path = os.path.join(os.path.dirname(__file__), '../..')
6080 else:
6081 path = os.path.join(os.path.dirname(__file__), '..')
f74980cb 6082 return os.path.abspath(path)
6083
6084
2f567473 6085def load_plugins(name, suffix, namespace):
f74980cb 6086 plugin_info = [None]
6087 classes = []
6088 try:
6089 plugin_info = imp.find_module(
6090 name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
6091 plugins = imp.load_module(name, *plugin_info)
6092 for name in dir(plugins):
2f567473 6093 if name in namespace:
6094 continue
6095 if not name.endswith(suffix):
f74980cb 6096 continue
6097 klass = getattr(plugins, name)
6098 classes.append(klass)
6099 namespace[name] = klass
6100 except ImportError:
6101 pass
6102 finally:
6103 if plugin_info[0] is not None:
6104 plugin_info[0].close()
6105 return classes
06167fbb 6106
6107
6108def traverse_dict(dictn, keys, casesense=True):
a439a3a4 6109 keys = list(keys)[::-1]
6110 while keys:
6111 key = keys.pop()
6112 if isinstance(dictn, dict):
6113 if not casesense:
6114 dictn = {k.lower(): v for k, v in dictn.items()}
6115 key = key.lower()
6116 dictn = dictn.get(key)
6117 elif isinstance(dictn, (list, tuple, compat_str)):
e625be0d 6118 if ':' in key:
6119 key = slice(*map(int_or_none, key.split(':')))
a439a3a4 6120 else:
e625be0d 6121 key = int_or_none(key)
6122 dictn = try_get(dictn, lambda x: x[key])
a439a3a4 6123 else:
6124 return None
6125 return dictn