]> jfr.im git - yt-dlp.git/blame - yt_dlp/utils.py
Update to ytdl-commit-9f6c03
[yt-dlp.git] / yt_dlp / utils.py
CommitLineData
d77c3dfd 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
d77c3dfd 3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
c380cc28 10import collections
62e609ab 11import contextlib
e3946f98 12import ctypes
c496ca96
PH
13import datetime
14import email.utils
0c265486 15import email.header
f45c185f 16import errno
be4a824d 17import functools
d77c3dfd 18import gzip
f74980cb 19import imp
03f9daab 20import io
79a2e94e 21import itertools
f4bfd65f 22import json
d77c3dfd 23import locale
02dbf93f 24import math
347de493 25import operator
d77c3dfd 26import os
c496ca96 27import platform
773f291d 28import random
d77c3dfd 29import re
c496ca96 30import socket
79a2e94e 31import ssl
1c088fa8 32import subprocess
d77c3dfd 33import sys
181c8655 34import tempfile
c380cc28 35import time
01951dda 36import traceback
bcf89ce6 37import xml.etree.ElementTree
d77c3dfd 38import zlib
d77c3dfd 39
8c25f81b 40from .compat import (
b4a3d461 41 compat_HTMLParseError,
8bb56eee 42 compat_HTMLParser,
201c1459 43 compat_HTTPError,
8f9312c3 44 compat_basestring,
8c25f81b 45 compat_chr,
1bab3437 46 compat_cookiejar,
d7cd9a9e 47 compat_ctypes_WINFUNCTYPE,
36e6f62c 48 compat_etree_fromstring,
51098426 49 compat_expanduser,
8c25f81b 50 compat_html_entities,
55b2f099 51 compat_html_entities_html5,
be4a824d 52 compat_http_client,
42db58ec 53 compat_integer_types,
e29663c6 54 compat_numeric_types,
c86b6142 55 compat_kwargs,
efa97bdc 56 compat_os_name,
8c25f81b 57 compat_parse_qs,
702ccf2d 58 compat_shlex_quote,
8c25f81b 59 compat_str,
edaa23f8 60 compat_struct_pack,
d3f8e038 61 compat_struct_unpack,
8c25f81b
PH
62 compat_urllib_error,
63 compat_urllib_parse,
15707c7e 64 compat_urllib_parse_urlencode,
8c25f81b 65 compat_urllib_parse_urlparse,
732044af 66 compat_urllib_parse_urlunparse,
67 compat_urllib_parse_quote,
68 compat_urllib_parse_quote_plus,
7581bfc9 69 compat_urllib_parse_unquote_plus,
8c25f81b
PH
70 compat_urllib_request,
71 compat_urlparse,
810c10ba 72 compat_xpath,
8c25f81b 73)
4644ac55 74
71aff188
YCH
75from .socks import (
76 ProxyType,
77 sockssocket,
78)
79
4644ac55 80
51fb4995
YCH
81def register_socks_protocols():
82 # "Register" SOCKS protocols
d5ae6bb5
YCH
83 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
84 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
51fb4995
YCH
85 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
86 if scheme not in compat_urlparse.uses_netloc:
87 compat_urlparse.uses_netloc.append(scheme)
88
89
468e2e92
FV
90# This is not clearly defined otherwise
91compiled_regex_type = type(re.compile(''))
92
f7a147e3
S
93
94def random_user_agent():
95 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
96 _CHROME_VERSIONS = (
97 '74.0.3729.129',
98 '76.0.3780.3',
99 '76.0.3780.2',
100 '74.0.3729.128',
101 '76.0.3780.1',
102 '76.0.3780.0',
103 '75.0.3770.15',
104 '74.0.3729.127',
105 '74.0.3729.126',
106 '76.0.3779.1',
107 '76.0.3779.0',
108 '75.0.3770.14',
109 '74.0.3729.125',
110 '76.0.3778.1',
111 '76.0.3778.0',
112 '75.0.3770.13',
113 '74.0.3729.124',
114 '74.0.3729.123',
115 '73.0.3683.121',
116 '76.0.3777.1',
117 '76.0.3777.0',
118 '75.0.3770.12',
119 '74.0.3729.122',
120 '76.0.3776.4',
121 '75.0.3770.11',
122 '74.0.3729.121',
123 '76.0.3776.3',
124 '76.0.3776.2',
125 '73.0.3683.120',
126 '74.0.3729.120',
127 '74.0.3729.119',
128 '74.0.3729.118',
129 '76.0.3776.1',
130 '76.0.3776.0',
131 '76.0.3775.5',
132 '75.0.3770.10',
133 '74.0.3729.117',
134 '76.0.3775.4',
135 '76.0.3775.3',
136 '74.0.3729.116',
137 '75.0.3770.9',
138 '76.0.3775.2',
139 '76.0.3775.1',
140 '76.0.3775.0',
141 '75.0.3770.8',
142 '74.0.3729.115',
143 '74.0.3729.114',
144 '76.0.3774.1',
145 '76.0.3774.0',
146 '75.0.3770.7',
147 '74.0.3729.113',
148 '74.0.3729.112',
149 '74.0.3729.111',
150 '76.0.3773.1',
151 '76.0.3773.0',
152 '75.0.3770.6',
153 '74.0.3729.110',
154 '74.0.3729.109',
155 '76.0.3772.1',
156 '76.0.3772.0',
157 '75.0.3770.5',
158 '74.0.3729.108',
159 '74.0.3729.107',
160 '76.0.3771.1',
161 '76.0.3771.0',
162 '75.0.3770.4',
163 '74.0.3729.106',
164 '74.0.3729.105',
165 '75.0.3770.3',
166 '74.0.3729.104',
167 '74.0.3729.103',
168 '74.0.3729.102',
169 '75.0.3770.2',
170 '74.0.3729.101',
171 '75.0.3770.1',
172 '75.0.3770.0',
173 '74.0.3729.100',
174 '75.0.3769.5',
175 '75.0.3769.4',
176 '74.0.3729.99',
177 '75.0.3769.3',
178 '75.0.3769.2',
179 '75.0.3768.6',
180 '74.0.3729.98',
181 '75.0.3769.1',
182 '75.0.3769.0',
183 '74.0.3729.97',
184 '73.0.3683.119',
185 '73.0.3683.118',
186 '74.0.3729.96',
187 '75.0.3768.5',
188 '75.0.3768.4',
189 '75.0.3768.3',
190 '75.0.3768.2',
191 '74.0.3729.95',
192 '74.0.3729.94',
193 '75.0.3768.1',
194 '75.0.3768.0',
195 '74.0.3729.93',
196 '74.0.3729.92',
197 '73.0.3683.117',
198 '74.0.3729.91',
199 '75.0.3766.3',
200 '74.0.3729.90',
201 '75.0.3767.2',
202 '75.0.3767.1',
203 '75.0.3767.0',
204 '74.0.3729.89',
205 '73.0.3683.116',
206 '75.0.3766.2',
207 '74.0.3729.88',
208 '75.0.3766.1',
209 '75.0.3766.0',
210 '74.0.3729.87',
211 '73.0.3683.115',
212 '74.0.3729.86',
213 '75.0.3765.1',
214 '75.0.3765.0',
215 '74.0.3729.85',
216 '73.0.3683.114',
217 '74.0.3729.84',
218 '75.0.3764.1',
219 '75.0.3764.0',
220 '74.0.3729.83',
221 '73.0.3683.113',
222 '75.0.3763.2',
223 '75.0.3761.4',
224 '74.0.3729.82',
225 '75.0.3763.1',
226 '75.0.3763.0',
227 '74.0.3729.81',
228 '73.0.3683.112',
229 '75.0.3762.1',
230 '75.0.3762.0',
231 '74.0.3729.80',
232 '75.0.3761.3',
233 '74.0.3729.79',
234 '73.0.3683.111',
235 '75.0.3761.2',
236 '74.0.3729.78',
237 '74.0.3729.77',
238 '75.0.3761.1',
239 '75.0.3761.0',
240 '73.0.3683.110',
241 '74.0.3729.76',
242 '74.0.3729.75',
243 '75.0.3760.0',
244 '74.0.3729.74',
245 '75.0.3759.8',
246 '75.0.3759.7',
247 '75.0.3759.6',
248 '74.0.3729.73',
249 '75.0.3759.5',
250 '74.0.3729.72',
251 '73.0.3683.109',
252 '75.0.3759.4',
253 '75.0.3759.3',
254 '74.0.3729.71',
255 '75.0.3759.2',
256 '74.0.3729.70',
257 '73.0.3683.108',
258 '74.0.3729.69',
259 '75.0.3759.1',
260 '75.0.3759.0',
261 '74.0.3729.68',
262 '73.0.3683.107',
263 '74.0.3729.67',
264 '75.0.3758.1',
265 '75.0.3758.0',
266 '74.0.3729.66',
267 '73.0.3683.106',
268 '74.0.3729.65',
269 '75.0.3757.1',
270 '75.0.3757.0',
271 '74.0.3729.64',
272 '73.0.3683.105',
273 '74.0.3729.63',
274 '75.0.3756.1',
275 '75.0.3756.0',
276 '74.0.3729.62',
277 '73.0.3683.104',
278 '75.0.3755.3',
279 '75.0.3755.2',
280 '73.0.3683.103',
281 '75.0.3755.1',
282 '75.0.3755.0',
283 '74.0.3729.61',
284 '73.0.3683.102',
285 '74.0.3729.60',
286 '75.0.3754.2',
287 '74.0.3729.59',
288 '75.0.3753.4',
289 '74.0.3729.58',
290 '75.0.3754.1',
291 '75.0.3754.0',
292 '74.0.3729.57',
293 '73.0.3683.101',
294 '75.0.3753.3',
295 '75.0.3752.2',
296 '75.0.3753.2',
297 '74.0.3729.56',
298 '75.0.3753.1',
299 '75.0.3753.0',
300 '74.0.3729.55',
301 '73.0.3683.100',
302 '74.0.3729.54',
303 '75.0.3752.1',
304 '75.0.3752.0',
305 '74.0.3729.53',
306 '73.0.3683.99',
307 '74.0.3729.52',
308 '75.0.3751.1',
309 '75.0.3751.0',
310 '74.0.3729.51',
311 '73.0.3683.98',
312 '74.0.3729.50',
313 '75.0.3750.0',
314 '74.0.3729.49',
315 '74.0.3729.48',
316 '74.0.3729.47',
317 '75.0.3749.3',
318 '74.0.3729.46',
319 '73.0.3683.97',
320 '75.0.3749.2',
321 '74.0.3729.45',
322 '75.0.3749.1',
323 '75.0.3749.0',
324 '74.0.3729.44',
325 '73.0.3683.96',
326 '74.0.3729.43',
327 '74.0.3729.42',
328 '75.0.3748.1',
329 '75.0.3748.0',
330 '74.0.3729.41',
331 '75.0.3747.1',
332 '73.0.3683.95',
333 '75.0.3746.4',
334 '74.0.3729.40',
335 '74.0.3729.39',
336 '75.0.3747.0',
337 '75.0.3746.3',
338 '75.0.3746.2',
339 '74.0.3729.38',
340 '75.0.3746.1',
341 '75.0.3746.0',
342 '74.0.3729.37',
343 '73.0.3683.94',
344 '75.0.3745.5',
345 '75.0.3745.4',
346 '75.0.3745.3',
347 '75.0.3745.2',
348 '74.0.3729.36',
349 '75.0.3745.1',
350 '75.0.3745.0',
351 '75.0.3744.2',
352 '74.0.3729.35',
353 '73.0.3683.93',
354 '74.0.3729.34',
355 '75.0.3744.1',
356 '75.0.3744.0',
357 '74.0.3729.33',
358 '73.0.3683.92',
359 '74.0.3729.32',
360 '74.0.3729.31',
361 '73.0.3683.91',
362 '75.0.3741.2',
363 '75.0.3740.5',
364 '74.0.3729.30',
365 '75.0.3741.1',
366 '75.0.3741.0',
367 '74.0.3729.29',
368 '75.0.3740.4',
369 '73.0.3683.90',
370 '74.0.3729.28',
371 '75.0.3740.3',
372 '73.0.3683.89',
373 '75.0.3740.2',
374 '74.0.3729.27',
375 '75.0.3740.1',
376 '75.0.3740.0',
377 '74.0.3729.26',
378 '73.0.3683.88',
379 '73.0.3683.87',
380 '74.0.3729.25',
381 '75.0.3739.1',
382 '75.0.3739.0',
383 '73.0.3683.86',
384 '74.0.3729.24',
385 '73.0.3683.85',
386 '75.0.3738.4',
387 '75.0.3738.3',
388 '75.0.3738.2',
389 '75.0.3738.1',
390 '75.0.3738.0',
391 '74.0.3729.23',
392 '73.0.3683.84',
393 '74.0.3729.22',
394 '74.0.3729.21',
395 '75.0.3737.1',
396 '75.0.3737.0',
397 '74.0.3729.20',
398 '73.0.3683.83',
399 '74.0.3729.19',
400 '75.0.3736.1',
401 '75.0.3736.0',
402 '74.0.3729.18',
403 '73.0.3683.82',
404 '74.0.3729.17',
405 '75.0.3735.1',
406 '75.0.3735.0',
407 '74.0.3729.16',
408 '73.0.3683.81',
409 '75.0.3734.1',
410 '75.0.3734.0',
411 '74.0.3729.15',
412 '73.0.3683.80',
413 '74.0.3729.14',
414 '75.0.3733.1',
415 '75.0.3733.0',
416 '75.0.3732.1',
417 '74.0.3729.13',
418 '74.0.3729.12',
419 '73.0.3683.79',
420 '74.0.3729.11',
421 '75.0.3732.0',
422 '74.0.3729.10',
423 '73.0.3683.78',
424 '74.0.3729.9',
425 '74.0.3729.8',
426 '74.0.3729.7',
427 '75.0.3731.3',
428 '75.0.3731.2',
429 '75.0.3731.0',
430 '74.0.3729.6',
431 '73.0.3683.77',
432 '73.0.3683.76',
433 '75.0.3730.5',
434 '75.0.3730.4',
435 '73.0.3683.75',
436 '74.0.3729.5',
437 '73.0.3683.74',
438 '75.0.3730.3',
439 '75.0.3730.2',
440 '74.0.3729.4',
441 '73.0.3683.73',
442 '73.0.3683.72',
443 '75.0.3730.1',
444 '75.0.3730.0',
445 '74.0.3729.3',
446 '73.0.3683.71',
447 '74.0.3729.2',
448 '73.0.3683.70',
449 '74.0.3729.1',
450 '74.0.3729.0',
451 '74.0.3726.4',
452 '73.0.3683.69',
453 '74.0.3726.3',
454 '74.0.3728.0',
455 '74.0.3726.2',
456 '73.0.3683.68',
457 '74.0.3726.1',
458 '74.0.3726.0',
459 '74.0.3725.4',
460 '73.0.3683.67',
461 '73.0.3683.66',
462 '74.0.3725.3',
463 '74.0.3725.2',
464 '74.0.3725.1',
465 '74.0.3724.8',
466 '74.0.3725.0',
467 '73.0.3683.65',
468 '74.0.3724.7',
469 '74.0.3724.6',
470 '74.0.3724.5',
471 '74.0.3724.4',
472 '74.0.3724.3',
473 '74.0.3724.2',
474 '74.0.3724.1',
475 '74.0.3724.0',
476 '73.0.3683.64',
477 '74.0.3723.1',
478 '74.0.3723.0',
479 '73.0.3683.63',
480 '74.0.3722.1',
481 '74.0.3722.0',
482 '73.0.3683.62',
483 '74.0.3718.9',
484 '74.0.3702.3',
485 '74.0.3721.3',
486 '74.0.3721.2',
487 '74.0.3721.1',
488 '74.0.3721.0',
489 '74.0.3720.6',
490 '73.0.3683.61',
491 '72.0.3626.122',
492 '73.0.3683.60',
493 '74.0.3720.5',
494 '72.0.3626.121',
495 '74.0.3718.8',
496 '74.0.3720.4',
497 '74.0.3720.3',
498 '74.0.3718.7',
499 '74.0.3720.2',
500 '74.0.3720.1',
501 '74.0.3720.0',
502 '74.0.3718.6',
503 '74.0.3719.5',
504 '73.0.3683.59',
505 '74.0.3718.5',
506 '74.0.3718.4',
507 '74.0.3719.4',
508 '74.0.3719.3',
509 '74.0.3719.2',
510 '74.0.3719.1',
511 '73.0.3683.58',
512 '74.0.3719.0',
513 '73.0.3683.57',
514 '73.0.3683.56',
515 '74.0.3718.3',
516 '73.0.3683.55',
517 '74.0.3718.2',
518 '74.0.3718.1',
519 '74.0.3718.0',
520 '73.0.3683.54',
521 '74.0.3717.2',
522 '73.0.3683.53',
523 '74.0.3717.1',
524 '74.0.3717.0',
525 '73.0.3683.52',
526 '74.0.3716.1',
527 '74.0.3716.0',
528 '73.0.3683.51',
529 '74.0.3715.1',
530 '74.0.3715.0',
531 '73.0.3683.50',
532 '74.0.3711.2',
533 '74.0.3714.2',
534 '74.0.3713.3',
535 '74.0.3714.1',
536 '74.0.3714.0',
537 '73.0.3683.49',
538 '74.0.3713.1',
539 '74.0.3713.0',
540 '72.0.3626.120',
541 '73.0.3683.48',
542 '74.0.3712.2',
543 '74.0.3712.1',
544 '74.0.3712.0',
545 '73.0.3683.47',
546 '72.0.3626.119',
547 '73.0.3683.46',
548 '74.0.3710.2',
549 '72.0.3626.118',
550 '74.0.3711.1',
551 '74.0.3711.0',
552 '73.0.3683.45',
553 '72.0.3626.117',
554 '74.0.3710.1',
555 '74.0.3710.0',
556 '73.0.3683.44',
557 '72.0.3626.116',
558 '74.0.3709.1',
559 '74.0.3709.0',
560 '74.0.3704.9',
561 '73.0.3683.43',
562 '72.0.3626.115',
563 '74.0.3704.8',
564 '74.0.3704.7',
565 '74.0.3708.0',
566 '74.0.3706.7',
567 '74.0.3704.6',
568 '73.0.3683.42',
569 '72.0.3626.114',
570 '74.0.3706.6',
571 '72.0.3626.113',
572 '74.0.3704.5',
573 '74.0.3706.5',
574 '74.0.3706.4',
575 '74.0.3706.3',
576 '74.0.3706.2',
577 '74.0.3706.1',
578 '74.0.3706.0',
579 '73.0.3683.41',
580 '72.0.3626.112',
581 '74.0.3705.1',
582 '74.0.3705.0',
583 '73.0.3683.40',
584 '72.0.3626.111',
585 '73.0.3683.39',
586 '74.0.3704.4',
587 '73.0.3683.38',
588 '74.0.3704.3',
589 '74.0.3704.2',
590 '74.0.3704.1',
591 '74.0.3704.0',
592 '73.0.3683.37',
593 '72.0.3626.110',
594 '72.0.3626.109',
595 '74.0.3703.3',
596 '74.0.3703.2',
597 '73.0.3683.36',
598 '74.0.3703.1',
599 '74.0.3703.0',
600 '73.0.3683.35',
601 '72.0.3626.108',
602 '74.0.3702.2',
603 '74.0.3699.3',
604 '74.0.3702.1',
605 '74.0.3702.0',
606 '73.0.3683.34',
607 '72.0.3626.107',
608 '73.0.3683.33',
609 '74.0.3701.1',
610 '74.0.3701.0',
611 '73.0.3683.32',
612 '73.0.3683.31',
613 '72.0.3626.105',
614 '74.0.3700.1',
615 '74.0.3700.0',
616 '73.0.3683.29',
617 '72.0.3626.103',
618 '74.0.3699.2',
619 '74.0.3699.1',
620 '74.0.3699.0',
621 '73.0.3683.28',
622 '72.0.3626.102',
623 '73.0.3683.27',
624 '73.0.3683.26',
625 '74.0.3698.0',
626 '74.0.3696.2',
627 '72.0.3626.101',
628 '73.0.3683.25',
629 '74.0.3696.1',
630 '74.0.3696.0',
631 '74.0.3694.8',
632 '72.0.3626.100',
633 '74.0.3694.7',
634 '74.0.3694.6',
635 '74.0.3694.5',
636 '74.0.3694.4',
637 '72.0.3626.99',
638 '72.0.3626.98',
639 '74.0.3694.3',
640 '73.0.3683.24',
641 '72.0.3626.97',
642 '72.0.3626.96',
643 '72.0.3626.95',
644 '73.0.3683.23',
645 '72.0.3626.94',
646 '73.0.3683.22',
647 '73.0.3683.21',
648 '72.0.3626.93',
649 '74.0.3694.2',
650 '72.0.3626.92',
651 '74.0.3694.1',
652 '74.0.3694.0',
653 '74.0.3693.6',
654 '73.0.3683.20',
655 '72.0.3626.91',
656 '74.0.3693.5',
657 '74.0.3693.4',
658 '74.0.3693.3',
659 '74.0.3693.2',
660 '73.0.3683.19',
661 '74.0.3693.1',
662 '74.0.3693.0',
663 '73.0.3683.18',
664 '72.0.3626.90',
665 '74.0.3692.1',
666 '74.0.3692.0',
667 '73.0.3683.17',
668 '72.0.3626.89',
669 '74.0.3687.3',
670 '74.0.3691.1',
671 '74.0.3691.0',
672 '73.0.3683.16',
673 '72.0.3626.88',
674 '72.0.3626.87',
675 '73.0.3683.15',
676 '74.0.3690.1',
677 '74.0.3690.0',
678 '73.0.3683.14',
679 '72.0.3626.86',
680 '73.0.3683.13',
681 '73.0.3683.12',
682 '74.0.3689.1',
683 '74.0.3689.0',
684 '73.0.3683.11',
685 '72.0.3626.85',
686 '73.0.3683.10',
687 '72.0.3626.84',
688 '73.0.3683.9',
689 '74.0.3688.1',
690 '74.0.3688.0',
691 '73.0.3683.8',
692 '72.0.3626.83',
693 '74.0.3687.2',
694 '74.0.3687.1',
695 '74.0.3687.0',
696 '73.0.3683.7',
697 '72.0.3626.82',
698 '74.0.3686.4',
699 '72.0.3626.81',
700 '74.0.3686.3',
701 '74.0.3686.2',
702 '74.0.3686.1',
703 '74.0.3686.0',
704 '73.0.3683.6',
705 '72.0.3626.80',
706 '74.0.3685.1',
707 '74.0.3685.0',
708 '73.0.3683.5',
709 '72.0.3626.79',
710 '74.0.3684.1',
711 '74.0.3684.0',
712 '73.0.3683.4',
713 '72.0.3626.78',
714 '72.0.3626.77',
715 '73.0.3683.3',
716 '73.0.3683.2',
717 '72.0.3626.76',
718 '73.0.3683.1',
719 '73.0.3683.0',
720 '72.0.3626.75',
721 '71.0.3578.141',
722 '73.0.3682.1',
723 '73.0.3682.0',
724 '72.0.3626.74',
725 '71.0.3578.140',
726 '73.0.3681.4',
727 '73.0.3681.3',
728 '73.0.3681.2',
729 '73.0.3681.1',
730 '73.0.3681.0',
731 '72.0.3626.73',
732 '71.0.3578.139',
733 '72.0.3626.72',
734 '72.0.3626.71',
735 '73.0.3680.1',
736 '73.0.3680.0',
737 '72.0.3626.70',
738 '71.0.3578.138',
739 '73.0.3678.2',
740 '73.0.3679.1',
741 '73.0.3679.0',
742 '72.0.3626.69',
743 '71.0.3578.137',
744 '73.0.3678.1',
745 '73.0.3678.0',
746 '71.0.3578.136',
747 '73.0.3677.1',
748 '73.0.3677.0',
749 '72.0.3626.68',
750 '72.0.3626.67',
751 '71.0.3578.135',
752 '73.0.3676.1',
753 '73.0.3676.0',
754 '73.0.3674.2',
755 '72.0.3626.66',
756 '71.0.3578.134',
757 '73.0.3674.1',
758 '73.0.3674.0',
759 '72.0.3626.65',
760 '71.0.3578.133',
761 '73.0.3673.2',
762 '73.0.3673.1',
763 '73.0.3673.0',
764 '72.0.3626.64',
765 '71.0.3578.132',
766 '72.0.3626.63',
767 '72.0.3626.62',
768 '72.0.3626.61',
769 '72.0.3626.60',
770 '73.0.3672.1',
771 '73.0.3672.0',
772 '72.0.3626.59',
773 '71.0.3578.131',
774 '73.0.3671.3',
775 '73.0.3671.2',
776 '73.0.3671.1',
777 '73.0.3671.0',
778 '72.0.3626.58',
779 '71.0.3578.130',
780 '73.0.3670.1',
781 '73.0.3670.0',
782 '72.0.3626.57',
783 '71.0.3578.129',
784 '73.0.3669.1',
785 '73.0.3669.0',
786 '72.0.3626.56',
787 '71.0.3578.128',
788 '73.0.3668.2',
789 '73.0.3668.1',
790 '73.0.3668.0',
791 '72.0.3626.55',
792 '71.0.3578.127',
793 '73.0.3667.2',
794 '73.0.3667.1',
795 '73.0.3667.0',
796 '72.0.3626.54',
797 '71.0.3578.126',
798 '73.0.3666.1',
799 '73.0.3666.0',
800 '72.0.3626.53',
801 '71.0.3578.125',
802 '73.0.3665.4',
803 '73.0.3665.3',
804 '72.0.3626.52',
805 '73.0.3665.2',
806 '73.0.3664.4',
807 '73.0.3665.1',
808 '73.0.3665.0',
809 '72.0.3626.51',
810 '71.0.3578.124',
811 '72.0.3626.50',
812 '73.0.3664.3',
813 '73.0.3664.2',
814 '73.0.3664.1',
815 '73.0.3664.0',
816 '73.0.3663.2',
817 '72.0.3626.49',
818 '71.0.3578.123',
819 '73.0.3663.1',
820 '73.0.3663.0',
821 '72.0.3626.48',
822 '71.0.3578.122',
823 '73.0.3662.1',
824 '73.0.3662.0',
825 '72.0.3626.47',
826 '71.0.3578.121',
827 '73.0.3661.1',
828 '72.0.3626.46',
829 '73.0.3661.0',
830 '72.0.3626.45',
831 '71.0.3578.120',
832 '73.0.3660.2',
833 '73.0.3660.1',
834 '73.0.3660.0',
835 '72.0.3626.44',
836 '71.0.3578.119',
837 '73.0.3659.1',
838 '73.0.3659.0',
839 '72.0.3626.43',
840 '71.0.3578.118',
841 '73.0.3658.1',
842 '73.0.3658.0',
843 '72.0.3626.42',
844 '71.0.3578.117',
845 '73.0.3657.1',
846 '73.0.3657.0',
847 '72.0.3626.41',
848 '71.0.3578.116',
849 '73.0.3656.1',
850 '73.0.3656.0',
851 '72.0.3626.40',
852 '71.0.3578.115',
853 '73.0.3655.1',
854 '73.0.3655.0',
855 '72.0.3626.39',
856 '71.0.3578.114',
857 '73.0.3654.1',
858 '73.0.3654.0',
859 '72.0.3626.38',
860 '71.0.3578.113',
861 '73.0.3653.1',
862 '73.0.3653.0',
863 '72.0.3626.37',
864 '71.0.3578.112',
865 '73.0.3652.1',
866 '73.0.3652.0',
867 '72.0.3626.36',
868 '71.0.3578.111',
869 '73.0.3651.1',
870 '73.0.3651.0',
871 '72.0.3626.35',
872 '71.0.3578.110',
873 '73.0.3650.1',
874 '73.0.3650.0',
875 '72.0.3626.34',
876 '71.0.3578.109',
877 '73.0.3649.1',
878 '73.0.3649.0',
879 '72.0.3626.33',
880 '71.0.3578.108',
881 '73.0.3648.2',
882 '73.0.3648.1',
883 '73.0.3648.0',
884 '72.0.3626.32',
885 '71.0.3578.107',
886 '73.0.3647.2',
887 '73.0.3647.1',
888 '73.0.3647.0',
889 '72.0.3626.31',
890 '71.0.3578.106',
891 '73.0.3635.3',
892 '73.0.3646.2',
893 '73.0.3646.1',
894 '73.0.3646.0',
895 '72.0.3626.30',
896 '71.0.3578.105',
897 '72.0.3626.29',
898 '73.0.3645.2',
899 '73.0.3645.1',
900 '73.0.3645.0',
901 '72.0.3626.28',
902 '71.0.3578.104',
903 '72.0.3626.27',
904 '72.0.3626.26',
905 '72.0.3626.25',
906 '72.0.3626.24',
907 '73.0.3644.0',
908 '73.0.3643.2',
909 '72.0.3626.23',
910 '71.0.3578.103',
911 '73.0.3643.1',
912 '73.0.3643.0',
913 '72.0.3626.22',
914 '71.0.3578.102',
915 '73.0.3642.1',
916 '73.0.3642.0',
917 '72.0.3626.21',
918 '71.0.3578.101',
919 '73.0.3641.1',
920 '73.0.3641.0',
921 '72.0.3626.20',
922 '71.0.3578.100',
923 '72.0.3626.19',
924 '73.0.3640.1',
925 '73.0.3640.0',
926 '72.0.3626.18',
927 '73.0.3639.1',
928 '71.0.3578.99',
929 '73.0.3639.0',
930 '72.0.3626.17',
931 '73.0.3638.2',
932 '72.0.3626.16',
933 '73.0.3638.1',
934 '73.0.3638.0',
935 '72.0.3626.15',
936 '71.0.3578.98',
937 '73.0.3635.2',
938 '71.0.3578.97',
939 '73.0.3637.1',
940 '73.0.3637.0',
941 '72.0.3626.14',
942 '71.0.3578.96',
943 '71.0.3578.95',
944 '72.0.3626.13',
945 '71.0.3578.94',
946 '73.0.3636.2',
947 '71.0.3578.93',
948 '73.0.3636.1',
949 '73.0.3636.0',
950 '72.0.3626.12',
951 '71.0.3578.92',
952 '73.0.3635.1',
953 '73.0.3635.0',
954 '72.0.3626.11',
955 '71.0.3578.91',
956 '73.0.3634.2',
957 '73.0.3634.1',
958 '73.0.3634.0',
959 '72.0.3626.10',
960 '71.0.3578.90',
961 '71.0.3578.89',
962 '73.0.3633.2',
963 '73.0.3633.1',
964 '73.0.3633.0',
965 '72.0.3610.4',
966 '72.0.3626.9',
967 '71.0.3578.88',
968 '73.0.3632.5',
969 '73.0.3632.4',
970 '73.0.3632.3',
971 '73.0.3632.2',
972 '73.0.3632.1',
973 '73.0.3632.0',
974 '72.0.3626.8',
975 '71.0.3578.87',
976 '73.0.3631.2',
977 '73.0.3631.1',
978 '73.0.3631.0',
979 '72.0.3626.7',
980 '71.0.3578.86',
981 '72.0.3626.6',
982 '73.0.3630.1',
983 '73.0.3630.0',
984 '72.0.3626.5',
985 '71.0.3578.85',
986 '72.0.3626.4',
987 '73.0.3628.3',
988 '73.0.3628.2',
989 '73.0.3629.1',
990 '73.0.3629.0',
991 '72.0.3626.3',
992 '71.0.3578.84',
993 '73.0.3628.1',
994 '73.0.3628.0',
995 '71.0.3578.83',
996 '73.0.3627.1',
997 '73.0.3627.0',
998 '72.0.3626.2',
999 '71.0.3578.82',
1000 '71.0.3578.81',
1001 '71.0.3578.80',
1002 '72.0.3626.1',
1003 '72.0.3626.0',
1004 '71.0.3578.79',
1005 '70.0.3538.124',
1006 '71.0.3578.78',
1007 '72.0.3623.4',
1008 '72.0.3625.2',
1009 '72.0.3625.1',
1010 '72.0.3625.0',
1011 '71.0.3578.77',
1012 '70.0.3538.123',
1013 '72.0.3624.4',
1014 '72.0.3624.3',
1015 '72.0.3624.2',
1016 '71.0.3578.76',
1017 '72.0.3624.1',
1018 '72.0.3624.0',
1019 '72.0.3623.3',
1020 '71.0.3578.75',
1021 '70.0.3538.122',
1022 '71.0.3578.74',
1023 '72.0.3623.2',
1024 '72.0.3610.3',
1025 '72.0.3623.1',
1026 '72.0.3623.0',
1027 '72.0.3622.3',
1028 '72.0.3622.2',
1029 '71.0.3578.73',
1030 '70.0.3538.121',
1031 '72.0.3622.1',
1032 '72.0.3622.0',
1033 '71.0.3578.72',
1034 '70.0.3538.120',
1035 '72.0.3621.1',
1036 '72.0.3621.0',
1037 '71.0.3578.71',
1038 '70.0.3538.119',
1039 '72.0.3620.1',
1040 '72.0.3620.0',
1041 '71.0.3578.70',
1042 '70.0.3538.118',
1043 '71.0.3578.69',
1044 '72.0.3619.1',
1045 '72.0.3619.0',
1046 '71.0.3578.68',
1047 '70.0.3538.117',
1048 '71.0.3578.67',
1049 '72.0.3618.1',
1050 '72.0.3618.0',
1051 '71.0.3578.66',
1052 '70.0.3538.116',
1053 '72.0.3617.1',
1054 '72.0.3617.0',
1055 '71.0.3578.65',
1056 '70.0.3538.115',
1057 '72.0.3602.3',
1058 '71.0.3578.64',
1059 '72.0.3616.1',
1060 '72.0.3616.0',
1061 '71.0.3578.63',
1062 '70.0.3538.114',
1063 '71.0.3578.62',
1064 '72.0.3615.1',
1065 '72.0.3615.0',
1066 '71.0.3578.61',
1067 '70.0.3538.113',
1068 '72.0.3614.1',
1069 '72.0.3614.0',
1070 '71.0.3578.60',
1071 '70.0.3538.112',
1072 '72.0.3613.1',
1073 '72.0.3613.0',
1074 '71.0.3578.59',
1075 '70.0.3538.111',
1076 '72.0.3612.2',
1077 '72.0.3612.1',
1078 '72.0.3612.0',
1079 '70.0.3538.110',
1080 '71.0.3578.58',
1081 '70.0.3538.109',
1082 '72.0.3611.2',
1083 '72.0.3611.1',
1084 '72.0.3611.0',
1085 '71.0.3578.57',
1086 '70.0.3538.108',
1087 '72.0.3610.2',
1088 '71.0.3578.56',
1089 '71.0.3578.55',
1090 '72.0.3610.1',
1091 '72.0.3610.0',
1092 '71.0.3578.54',
1093 '70.0.3538.107',
1094 '71.0.3578.53',
1095 '72.0.3609.3',
1096 '71.0.3578.52',
1097 '72.0.3609.2',
1098 '71.0.3578.51',
1099 '72.0.3608.5',
1100 '72.0.3609.1',
1101 '72.0.3609.0',
1102 '71.0.3578.50',
1103 '70.0.3538.106',
1104 '72.0.3608.4',
1105 '72.0.3608.3',
1106 '72.0.3608.2',
1107 '71.0.3578.49',
1108 '72.0.3608.1',
1109 '72.0.3608.0',
1110 '70.0.3538.105',
1111 '71.0.3578.48',
1112 '72.0.3607.1',
1113 '72.0.3607.0',
1114 '71.0.3578.47',
1115 '70.0.3538.104',
1116 '72.0.3606.2',
1117 '72.0.3606.1',
1118 '72.0.3606.0',
1119 '71.0.3578.46',
1120 '70.0.3538.103',
1121 '70.0.3538.102',
1122 '72.0.3605.3',
1123 '72.0.3605.2',
1124 '72.0.3605.1',
1125 '72.0.3605.0',
1126 '71.0.3578.45',
1127 '70.0.3538.101',
1128 '71.0.3578.44',
1129 '71.0.3578.43',
1130 '70.0.3538.100',
1131 '70.0.3538.99',
1132 '71.0.3578.42',
1133 '72.0.3604.1',
1134 '72.0.3604.0',
1135 '71.0.3578.41',
1136 '70.0.3538.98',
1137 '71.0.3578.40',
1138 '72.0.3603.2',
1139 '72.0.3603.1',
1140 '72.0.3603.0',
1141 '71.0.3578.39',
1142 '70.0.3538.97',
1143 '72.0.3602.2',
1144 '71.0.3578.38',
1145 '71.0.3578.37',
1146 '72.0.3602.1',
1147 '72.0.3602.0',
1148 '71.0.3578.36',
1149 '70.0.3538.96',
1150 '72.0.3601.1',
1151 '72.0.3601.0',
1152 '71.0.3578.35',
1153 '70.0.3538.95',
1154 '72.0.3600.1',
1155 '72.0.3600.0',
1156 '71.0.3578.34',
1157 '70.0.3538.94',
1158 '72.0.3599.3',
1159 '72.0.3599.2',
1160 '72.0.3599.1',
1161 '72.0.3599.0',
1162 '71.0.3578.33',
1163 '70.0.3538.93',
1164 '72.0.3598.1',
1165 '72.0.3598.0',
1166 '71.0.3578.32',
1167 '70.0.3538.87',
1168 '72.0.3597.1',
1169 '72.0.3597.0',
1170 '72.0.3596.2',
1171 '71.0.3578.31',
1172 '70.0.3538.86',
1173 '71.0.3578.30',
1174 '71.0.3578.29',
1175 '72.0.3596.1',
1176 '72.0.3596.0',
1177 '71.0.3578.28',
1178 '70.0.3538.85',
1179 '72.0.3595.2',
1180 '72.0.3591.3',
1181 '72.0.3595.1',
1182 '72.0.3595.0',
1183 '71.0.3578.27',
1184 '70.0.3538.84',
1185 '72.0.3594.1',
1186 '72.0.3594.0',
1187 '71.0.3578.26',
1188 '70.0.3538.83',
1189 '72.0.3593.2',
1190 '72.0.3593.1',
1191 '72.0.3593.0',
1192 '71.0.3578.25',
1193 '70.0.3538.82',
1194 '72.0.3589.3',
1195 '72.0.3592.2',
1196 '72.0.3592.1',
1197 '72.0.3592.0',
1198 '71.0.3578.24',
1199 '72.0.3589.2',
1200 '70.0.3538.81',
1201 '70.0.3538.80',
1202 '72.0.3591.2',
1203 '72.0.3591.1',
1204 '72.0.3591.0',
1205 '71.0.3578.23',
1206 '70.0.3538.79',
1207 '71.0.3578.22',
1208 '72.0.3590.1',
1209 '72.0.3590.0',
1210 '71.0.3578.21',
1211 '70.0.3538.78',
1212 '70.0.3538.77',
1213 '72.0.3589.1',
1214 '72.0.3589.0',
1215 '71.0.3578.20',
1216 '70.0.3538.76',
1217 '71.0.3578.19',
1218 '70.0.3538.75',
1219 '72.0.3588.1',
1220 '72.0.3588.0',
1221 '71.0.3578.18',
1222 '70.0.3538.74',
1223 '72.0.3586.2',
1224 '72.0.3587.0',
1225 '71.0.3578.17',
1226 '70.0.3538.73',
1227 '72.0.3586.1',
1228 '72.0.3586.0',
1229 '71.0.3578.16',
1230 '70.0.3538.72',
1231 '72.0.3585.1',
1232 '72.0.3585.0',
1233 '71.0.3578.15',
1234 '70.0.3538.71',
1235 '71.0.3578.14',
1236 '72.0.3584.1',
1237 '72.0.3584.0',
1238 '71.0.3578.13',
1239 '70.0.3538.70',
1240 '72.0.3583.2',
1241 '71.0.3578.12',
1242 '72.0.3583.1',
1243 '72.0.3583.0',
1244 '71.0.3578.11',
1245 '70.0.3538.69',
1246 '71.0.3578.10',
1247 '72.0.3582.0',
1248 '72.0.3581.4',
1249 '71.0.3578.9',
1250 '70.0.3538.67',
1251 '72.0.3581.3',
1252 '72.0.3581.2',
1253 '72.0.3581.1',
1254 '72.0.3581.0',
1255 '71.0.3578.8',
1256 '70.0.3538.66',
1257 '72.0.3580.1',
1258 '72.0.3580.0',
1259 '71.0.3578.7',
1260 '70.0.3538.65',
1261 '71.0.3578.6',
1262 '72.0.3579.1',
1263 '72.0.3579.0',
1264 '71.0.3578.5',
1265 '70.0.3538.64',
1266 '71.0.3578.4',
1267 '71.0.3578.3',
1268 '71.0.3578.2',
1269 '71.0.3578.1',
1270 '71.0.3578.0',
1271 '70.0.3538.63',
1272 '69.0.3497.128',
1273 '70.0.3538.62',
1274 '70.0.3538.61',
1275 '70.0.3538.60',
1276 '70.0.3538.59',
1277 '71.0.3577.1',
1278 '71.0.3577.0',
1279 '70.0.3538.58',
1280 '69.0.3497.127',
1281 '71.0.3576.2',
1282 '71.0.3576.1',
1283 '71.0.3576.0',
1284 '70.0.3538.57',
1285 '70.0.3538.56',
1286 '71.0.3575.2',
1287 '70.0.3538.55',
1288 '69.0.3497.126',
1289 '70.0.3538.54',
1290 '71.0.3575.1',
1291 '71.0.3575.0',
1292 '71.0.3574.1',
1293 '71.0.3574.0',
1294 '70.0.3538.53',
1295 '69.0.3497.125',
1296 '70.0.3538.52',
1297 '71.0.3573.1',
1298 '71.0.3573.0',
1299 '70.0.3538.51',
1300 '69.0.3497.124',
1301 '71.0.3572.1',
1302 '71.0.3572.0',
1303 '70.0.3538.50',
1304 '69.0.3497.123',
1305 '71.0.3571.2',
1306 '70.0.3538.49',
1307 '69.0.3497.122',
1308 '71.0.3571.1',
1309 '71.0.3571.0',
1310 '70.0.3538.48',
1311 '69.0.3497.121',
1312 '71.0.3570.1',
1313 '71.0.3570.0',
1314 '70.0.3538.47',
1315 '69.0.3497.120',
1316 '71.0.3568.2',
1317 '71.0.3569.1',
1318 '71.0.3569.0',
1319 '70.0.3538.46',
1320 '69.0.3497.119',
1321 '70.0.3538.45',
1322 '71.0.3568.1',
1323 '71.0.3568.0',
1324 '70.0.3538.44',
1325 '69.0.3497.118',
1326 '70.0.3538.43',
1327 '70.0.3538.42',
1328 '71.0.3567.1',
1329 '71.0.3567.0',
1330 '70.0.3538.41',
1331 '69.0.3497.117',
1332 '71.0.3566.1',
1333 '71.0.3566.0',
1334 '70.0.3538.40',
1335 '69.0.3497.116',
1336 '71.0.3565.1',
1337 '71.0.3565.0',
1338 '70.0.3538.39',
1339 '69.0.3497.115',
1340 '71.0.3564.1',
1341 '71.0.3564.0',
1342 '70.0.3538.38',
1343 '69.0.3497.114',
1344 '71.0.3563.0',
1345 '71.0.3562.2',
1346 '70.0.3538.37',
1347 '69.0.3497.113',
1348 '70.0.3538.36',
1349 '70.0.3538.35',
1350 '71.0.3562.1',
1351 '71.0.3562.0',
1352 '70.0.3538.34',
1353 '69.0.3497.112',
1354 '70.0.3538.33',
1355 '71.0.3561.1',
1356 '71.0.3561.0',
1357 '70.0.3538.32',
1358 '69.0.3497.111',
1359 '71.0.3559.6',
1360 '71.0.3560.1',
1361 '71.0.3560.0',
1362 '71.0.3559.5',
1363 '71.0.3559.4',
1364 '70.0.3538.31',
1365 '69.0.3497.110',
1366 '71.0.3559.3',
1367 '70.0.3538.30',
1368 '69.0.3497.109',
1369 '71.0.3559.2',
1370 '71.0.3559.1',
1371 '71.0.3559.0',
1372 '70.0.3538.29',
1373 '69.0.3497.108',
1374 '71.0.3558.2',
1375 '71.0.3558.1',
1376 '71.0.3558.0',
1377 '70.0.3538.28',
1378 '69.0.3497.107',
1379 '71.0.3557.2',
1380 '71.0.3557.1',
1381 '71.0.3557.0',
1382 '70.0.3538.27',
1383 '69.0.3497.106',
1384 '71.0.3554.4',
1385 '70.0.3538.26',
1386 '71.0.3556.1',
1387 '71.0.3556.0',
1388 '70.0.3538.25',
1389 '71.0.3554.3',
1390 '69.0.3497.105',
1391 '71.0.3554.2',
1392 '70.0.3538.24',
1393 '69.0.3497.104',
1394 '71.0.3555.2',
1395 '70.0.3538.23',
1396 '71.0.3555.1',
1397 '71.0.3555.0',
1398 '70.0.3538.22',
1399 '69.0.3497.103',
1400 '71.0.3554.1',
1401 '71.0.3554.0',
1402 '70.0.3538.21',
1403 '69.0.3497.102',
1404 '71.0.3553.3',
1405 '70.0.3538.20',
1406 '69.0.3497.101',
1407 '71.0.3553.2',
1408 '69.0.3497.100',
1409 '71.0.3553.1',
1410 '71.0.3553.0',
1411 '70.0.3538.19',
1412 '69.0.3497.99',
1413 '69.0.3497.98',
1414 '69.0.3497.97',
1415 '71.0.3552.6',
1416 '71.0.3552.5',
1417 '71.0.3552.4',
1418 '71.0.3552.3',
1419 '71.0.3552.2',
1420 '71.0.3552.1',
1421 '71.0.3552.0',
1422 '70.0.3538.18',
1423 '69.0.3497.96',
1424 '71.0.3551.3',
1425 '71.0.3551.2',
1426 '71.0.3551.1',
1427 '71.0.3551.0',
1428 '70.0.3538.17',
1429 '69.0.3497.95',
1430 '71.0.3550.3',
1431 '71.0.3550.2',
1432 '71.0.3550.1',
1433 '71.0.3550.0',
1434 '70.0.3538.16',
1435 '69.0.3497.94',
1436 '71.0.3549.1',
1437 '71.0.3549.0',
1438 '70.0.3538.15',
1439 '69.0.3497.93',
1440 '69.0.3497.92',
1441 '71.0.3548.1',
1442 '71.0.3548.0',
1443 '70.0.3538.14',
1444 '69.0.3497.91',
1445 '71.0.3547.1',
1446 '71.0.3547.0',
1447 '70.0.3538.13',
1448 '69.0.3497.90',
1449 '71.0.3546.2',
1450 '69.0.3497.89',
1451 '71.0.3546.1',
1452 '71.0.3546.0',
1453 '70.0.3538.12',
1454 '69.0.3497.88',
1455 '71.0.3545.4',
1456 '71.0.3545.3',
1457 '71.0.3545.2',
1458 '71.0.3545.1',
1459 '71.0.3545.0',
1460 '70.0.3538.11',
1461 '69.0.3497.87',
1462 '71.0.3544.5',
1463 '71.0.3544.4',
1464 '71.0.3544.3',
1465 '71.0.3544.2',
1466 '71.0.3544.1',
1467 '71.0.3544.0',
1468 '69.0.3497.86',
1469 '70.0.3538.10',
1470 '69.0.3497.85',
1471 '70.0.3538.9',
1472 '69.0.3497.84',
1473 '71.0.3543.4',
1474 '70.0.3538.8',
1475 '71.0.3543.3',
1476 '71.0.3543.2',
1477 '71.0.3543.1',
1478 '71.0.3543.0',
1479 '70.0.3538.7',
1480 '69.0.3497.83',
1481 '71.0.3542.2',
1482 '71.0.3542.1',
1483 '71.0.3542.0',
1484 '70.0.3538.6',
1485 '69.0.3497.82',
1486 '69.0.3497.81',
1487 '71.0.3541.1',
1488 '71.0.3541.0',
1489 '70.0.3538.5',
1490 '69.0.3497.80',
1491 '71.0.3540.1',
1492 '71.0.3540.0',
1493 '70.0.3538.4',
1494 '69.0.3497.79',
1495 '70.0.3538.3',
1496 '71.0.3539.1',
1497 '71.0.3539.0',
1498 '69.0.3497.78',
1499 '68.0.3440.134',
1500 '69.0.3497.77',
1501 '70.0.3538.2',
1502 '70.0.3538.1',
1503 '70.0.3538.0',
1504 '69.0.3497.76',
1505 '68.0.3440.133',
1506 '69.0.3497.75',
1507 '70.0.3537.2',
1508 '70.0.3537.1',
1509 '70.0.3537.0',
1510 '69.0.3497.74',
1511 '68.0.3440.132',
1512 '70.0.3536.0',
1513 '70.0.3535.5',
1514 '70.0.3535.4',
1515 '70.0.3535.3',
1516 '69.0.3497.73',
1517 '68.0.3440.131',
1518 '70.0.3532.8',
1519 '70.0.3532.7',
1520 '69.0.3497.72',
1521 '69.0.3497.71',
1522 '70.0.3535.2',
1523 '70.0.3535.1',
1524 '70.0.3535.0',
1525 '69.0.3497.70',
1526 '68.0.3440.130',
1527 '69.0.3497.69',
1528 '68.0.3440.129',
1529 '70.0.3534.4',
1530 '70.0.3534.3',
1531 '70.0.3534.2',
1532 '70.0.3534.1',
1533 '70.0.3534.0',
1534 '69.0.3497.68',
1535 '68.0.3440.128',
1536 '70.0.3533.2',
1537 '70.0.3533.1',
1538 '70.0.3533.0',
1539 '69.0.3497.67',
1540 '68.0.3440.127',
1541 '70.0.3532.6',
1542 '70.0.3532.5',
1543 '70.0.3532.4',
1544 '69.0.3497.66',
1545 '68.0.3440.126',
1546 '70.0.3532.3',
1547 '70.0.3532.2',
1548 '70.0.3532.1',
1549 '69.0.3497.60',
1550 '69.0.3497.65',
1551 '69.0.3497.64',
1552 '70.0.3532.0',
1553 '70.0.3531.0',
1554 '70.0.3530.4',
1555 '70.0.3530.3',
1556 '70.0.3530.2',
1557 '69.0.3497.58',
1558 '68.0.3440.125',
1559 '69.0.3497.57',
1560 '69.0.3497.56',
1561 '69.0.3497.55',
1562 '69.0.3497.54',
1563 '70.0.3530.1',
1564 '70.0.3530.0',
1565 '69.0.3497.53',
1566 '68.0.3440.124',
1567 '69.0.3497.52',
1568 '70.0.3529.3',
1569 '70.0.3529.2',
1570 '70.0.3529.1',
1571 '70.0.3529.0',
1572 '69.0.3497.51',
1573 '70.0.3528.4',
1574 '68.0.3440.123',
1575 '70.0.3528.3',
1576 '70.0.3528.2',
1577 '70.0.3528.1',
1578 '70.0.3528.0',
1579 '69.0.3497.50',
1580 '68.0.3440.122',
1581 '70.0.3527.1',
1582 '70.0.3527.0',
1583 '69.0.3497.49',
1584 '68.0.3440.121',
1585 '70.0.3526.1',
1586 '70.0.3526.0',
1587 '68.0.3440.120',
1588 '69.0.3497.48',
1589 '69.0.3497.47',
1590 '68.0.3440.119',
1591 '68.0.3440.118',
1592 '70.0.3525.5',
1593 '70.0.3525.4',
1594 '70.0.3525.3',
1595 '68.0.3440.117',
1596 '69.0.3497.46',
1597 '70.0.3525.2',
1598 '70.0.3525.1',
1599 '70.0.3525.0',
1600 '69.0.3497.45',
1601 '68.0.3440.116',
1602 '70.0.3524.4',
1603 '70.0.3524.3',
1604 '69.0.3497.44',
1605 '70.0.3524.2',
1606 '70.0.3524.1',
1607 '70.0.3524.0',
1608 '70.0.3523.2',
1609 '69.0.3497.43',
1610 '68.0.3440.115',
1611 '70.0.3505.9',
1612 '69.0.3497.42',
1613 '70.0.3505.8',
1614 '70.0.3523.1',
1615 '70.0.3523.0',
1616 '69.0.3497.41',
1617 '68.0.3440.114',
1618 '70.0.3505.7',
1619 '69.0.3497.40',
1620 '70.0.3522.1',
1621 '70.0.3522.0',
1622 '70.0.3521.2',
1623 '69.0.3497.39',
1624 '68.0.3440.113',
1625 '70.0.3505.6',
1626 '70.0.3521.1',
1627 '70.0.3521.0',
1628 '69.0.3497.38',
1629 '68.0.3440.112',
1630 '70.0.3520.1',
1631 '70.0.3520.0',
1632 '69.0.3497.37',
1633 '68.0.3440.111',
1634 '70.0.3519.3',
1635 '70.0.3519.2',
1636 '70.0.3519.1',
1637 '70.0.3519.0',
1638 '69.0.3497.36',
1639 '68.0.3440.110',
1640 '70.0.3518.1',
1641 '70.0.3518.0',
1642 '69.0.3497.35',
1643 '69.0.3497.34',
1644 '68.0.3440.109',
1645 '70.0.3517.1',
1646 '70.0.3517.0',
1647 '69.0.3497.33',
1648 '68.0.3440.108',
1649 '69.0.3497.32',
1650 '70.0.3516.3',
1651 '70.0.3516.2',
1652 '70.0.3516.1',
1653 '70.0.3516.0',
1654 '69.0.3497.31',
1655 '68.0.3440.107',
1656 '70.0.3515.4',
1657 '68.0.3440.106',
1658 '70.0.3515.3',
1659 '70.0.3515.2',
1660 '70.0.3515.1',
1661 '70.0.3515.0',
1662 '69.0.3497.30',
1663 '68.0.3440.105',
1664 '68.0.3440.104',
1665 '70.0.3514.2',
1666 '70.0.3514.1',
1667 '70.0.3514.0',
1668 '69.0.3497.29',
1669 '68.0.3440.103',
1670 '70.0.3513.1',
1671 '70.0.3513.0',
1672 '69.0.3497.28',
1673 )
1674 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1675
1676
3e669f36 1677std_headers = {
f7a147e3 1678 'User-Agent': random_user_agent(),
59ae15a5
PH
1679 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1680 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1681 'Accept-Encoding': 'gzip, deflate',
1682 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 1683}
f427df17 1684
5f6a1245 1685
fb37eb25
S
1686USER_AGENTS = {
1687 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1688}
1689
1690
bf42a990
S
1691NO_DEFAULT = object()
1692
7105440c
YCH
1693ENGLISH_MONTH_NAMES = [
1694 'January', 'February', 'March', 'April', 'May', 'June',
1695 'July', 'August', 'September', 'October', 'November', 'December']
1696
f6717dec
S
1697MONTH_NAMES = {
1698 'en': ENGLISH_MONTH_NAMES,
1699 'fr': [
3e4185c3
S
1700 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1701 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
f6717dec 1702}
a942d6cb 1703
a7aaa398
S
1704KNOWN_EXTENSIONS = (
1705 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1706 'flv', 'f4v', 'f4a', 'f4b',
1707 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1708 'mkv', 'mka', 'mk3d',
1709 'avi', 'divx',
1710 'mov',
1711 'asf', 'wmv', 'wma',
1712 '3gp', '3g2',
1713 'mp3',
1714 'flac',
1715 'ape',
1716 'wav',
1717 'f4f', 'f4m', 'm3u8', 'smil')
1718
df692c5a 1719REMUX_EXTENSIONS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mp3', 'mka', 'm4a', 'ogg', 'opus')
1720
c587cbb7 1721# needed for sanitizing filenames in restricted mode
c8827027 1722ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
fd35d8cd
JW
1723 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1724 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
c587cbb7 1725
46f59e89
S
1726DATE_FORMATS = (
1727 '%d %B %Y',
1728 '%d %b %Y',
1729 '%B %d %Y',
cb655f34
S
1730 '%B %dst %Y',
1731 '%B %dnd %Y',
9d30c213 1732 '%B %drd %Y',
cb655f34 1733 '%B %dth %Y',
46f59e89 1734 '%b %d %Y',
cb655f34
S
1735 '%b %dst %Y',
1736 '%b %dnd %Y',
9d30c213 1737 '%b %drd %Y',
cb655f34 1738 '%b %dth %Y',
46f59e89
S
1739 '%b %dst %Y %I:%M',
1740 '%b %dnd %Y %I:%M',
9d30c213 1741 '%b %drd %Y %I:%M',
46f59e89
S
1742 '%b %dth %Y %I:%M',
1743 '%Y %m %d',
1744 '%Y-%m-%d',
1745 '%Y/%m/%d',
81c13222 1746 '%Y/%m/%d %H:%M',
46f59e89 1747 '%Y/%m/%d %H:%M:%S',
0c1c6f4b 1748 '%Y-%m-%d %H:%M',
46f59e89
S
1749 '%Y-%m-%d %H:%M:%S',
1750 '%Y-%m-%d %H:%M:%S.%f',
1751 '%d.%m.%Y %H:%M',
1752 '%d.%m.%Y %H.%M',
1753 '%Y-%m-%dT%H:%M:%SZ',
1754 '%Y-%m-%dT%H:%M:%S.%fZ',
1755 '%Y-%m-%dT%H:%M:%S.%f0Z',
1756 '%Y-%m-%dT%H:%M:%S',
1757 '%Y-%m-%dT%H:%M:%S.%f',
1758 '%Y-%m-%dT%H:%M',
c6eed6b8
S
1759 '%b %d %Y at %H:%M',
1760 '%b %d %Y at %H:%M:%S',
b555ae9b
S
1761 '%B %d %Y at %H:%M',
1762 '%B %d %Y at %H:%M:%S',
46f59e89
S
1763)
1764
1765DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1766DATE_FORMATS_DAY_FIRST.extend([
1767 '%d-%m-%Y',
1768 '%d.%m.%Y',
1769 '%d.%m.%y',
1770 '%d/%m/%Y',
1771 '%d/%m/%y',
1772 '%d/%m/%Y %H:%M:%S',
1773])
1774
1775DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1776DATE_FORMATS_MONTH_FIRST.extend([
1777 '%m-%d-%Y',
1778 '%m.%d.%Y',
1779 '%m/%d/%Y',
1780 '%m/%d/%y',
1781 '%m/%d/%Y %H:%M:%S',
1782])
1783
06b3fe29 1784PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
22f5f5c6 1785JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
06b3fe29 1786
7105440c 1787
d77c3dfd 1788def preferredencoding():
59ae15a5 1789 """Get preferred encoding.
d77c3dfd 1790
59ae15a5
PH
1791 Returns the best encoding scheme for the system, based on
1792 locale.getpreferredencoding() and some further tweaks.
1793 """
1794 try:
1795 pref = locale.getpreferredencoding()
28e614de 1796 'TEST'.encode(pref)
70a1165b 1797 except Exception:
59ae15a5 1798 pref = 'UTF-8'
bae611f2 1799
59ae15a5 1800 return pref
d77c3dfd 1801
f4bfd65f 1802
181c8655 1803def write_json_file(obj, fn):
1394646a 1804 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 1805
92120217 1806 fn = encodeFilename(fn)
61ee5aeb 1807 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
1808 encoding = get_filesystem_encoding()
1809 # os.path.basename returns a bytes object, but NamedTemporaryFile
1810 # will fail if the filename contains non ascii characters unless we
1811 # use a unicode object
1812 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1813 # the same for os.path.dirname
1814 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1815 else:
1816 path_basename = os.path.basename
1817 path_dirname = os.path.dirname
1818
73159f99
S
1819 args = {
1820 'suffix': '.tmp',
ec5f6016
JMF
1821 'prefix': path_basename(fn) + '.',
1822 'dir': path_dirname(fn),
73159f99
S
1823 'delete': False,
1824 }
1825
181c8655
PH
1826 # In Python 2.x, json.dump expects a bytestream.
1827 # In Python 3.x, it writes to a character stream
1828 if sys.version_info < (3, 0):
73159f99 1829 args['mode'] = 'wb'
181c8655 1830 else:
73159f99
S
1831 args.update({
1832 'mode': 'w',
1833 'encoding': 'utf-8',
1834 })
1835
c86b6142 1836 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
1837
1838 try:
1839 with tf:
75d43ca0 1840 json.dump(obj, tf, default=repr)
1394646a
IK
1841 if sys.platform == 'win32':
1842 # Need to remove existing file on Windows, else os.rename raises
1843 # WindowsError or FileExistsError.
1844 try:
1845 os.unlink(fn)
1846 except OSError:
1847 pass
9cd5f54e
R
1848 try:
1849 mask = os.umask(0)
1850 os.umask(mask)
1851 os.chmod(tf.name, 0o666 & ~mask)
1852 except OSError:
1853 pass
181c8655 1854 os.rename(tf.name, fn)
70a1165b 1855 except Exception:
181c8655
PH
1856 try:
1857 os.remove(tf.name)
1858 except OSError:
1859 pass
1860 raise
1861
1862
1863if sys.version_info >= (2, 7):
ee114368 1864 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 1865 """ Find the xpath xpath[@key=val] """
5d2354f1 1866 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368 1867 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
1868 return node.find(expr)
1869else:
ee114368 1870 def find_xpath_attr(node, xpath, key, val=None):
810c10ba 1871 for f in node.findall(compat_xpath(xpath)):
ee114368
S
1872 if key not in f.attrib:
1873 continue
1874 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
1875 return f
1876 return None
1877
d7e66d39
JMF
1878# On python2.6 the xml.etree.ElementTree.Element methods don't support
1879# the namespace parameter
5f6a1245
JW
1880
1881
d7e66d39
JMF
1882def xpath_with_ns(path, ns_map):
1883 components = [c.split(':') for c in path.split('/')]
1884 replaced = []
1885 for c in components:
1886 if len(c) == 1:
1887 replaced.append(c[0])
1888 else:
1889 ns, tag = c
1890 replaced.append('{%s}%s' % (ns_map[ns], tag))
1891 return '/'.join(replaced)
1892
d77c3dfd 1893
a41fb80c 1894def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745 1895 def _find_xpath(xpath):
810c10ba 1896 return node.find(compat_xpath(xpath))
578c0745
S
1897
1898 if isinstance(xpath, (str, compat_str)):
1899 n = _find_xpath(xpath)
1900 else:
1901 for xp in xpath:
1902 n = _find_xpath(xp)
1903 if n is not None:
1904 break
d74bebd5 1905
8e636da4 1906 if n is None:
bf42a990
S
1907 if default is not NO_DEFAULT:
1908 return default
1909 elif fatal:
bf0ff932
PH
1910 name = xpath if name is None else name
1911 raise ExtractorError('Could not find XML element %s' % name)
1912 else:
1913 return None
a41fb80c
S
1914 return n
1915
1916
1917def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
1918 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1919 if n is None or n == default:
1920 return n
1921 if n.text is None:
1922 if default is not NO_DEFAULT:
1923 return default
1924 elif fatal:
1925 name = xpath if name is None else name
1926 raise ExtractorError('Could not find XML element\'s text %s' % name)
1927 else:
1928 return None
1929 return n.text
a41fb80c
S
1930
1931
1932def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1933 n = find_xpath_attr(node, xpath, key)
1934 if n is None:
1935 if default is not NO_DEFAULT:
1936 return default
1937 elif fatal:
1938 name = '%s[@%s]' % (xpath, key) if name is None else name
1939 raise ExtractorError('Could not find XML attribute %s' % name)
1940 else:
1941 return None
1942 return n.attrib[key]
bf0ff932
PH
1943
1944
9e6dd238 1945def get_element_by_id(id, html):
43e8fafd 1946 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 1947 return get_element_by_attribute('id', id, html)
43e8fafd 1948
12ea2f30 1949
84c237fb 1950def get_element_by_class(class_name, html):
2af12ad9
TC
1951 """Return the content of the first tag with the specified class in the passed HTML document"""
1952 retval = get_elements_by_class(class_name, html)
1953 return retval[0] if retval else None
1954
1955
1956def get_element_by_attribute(attribute, value, html, escape_value=True):
1957 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1958 return retval[0] if retval else None
1959
1960
1961def get_elements_by_class(class_name, html):
1962 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1963 return get_elements_by_attribute(
84c237fb
YCH
1964 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1965 html, escape_value=False)
1966
1967
2af12ad9 1968def get_elements_by_attribute(attribute, value, html, escape_value=True):
43e8fafd 1969 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 1970
84c237fb
YCH
1971 value = re.escape(value) if escape_value else value
1972
2af12ad9
TC
1973 retlist = []
1974 for m in re.finditer(r'''(?xs)
38285056 1975 <([a-zA-Z0-9:._-]+)
609ff8ca 1976 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056 1977 \s+%s=['"]?%s['"]?
609ff8ca 1978 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056
PH
1979 \s*>
1980 (?P<content>.*?)
1981 </\1>
2af12ad9
TC
1982 ''' % (re.escape(attribute), value), html):
1983 res = m.group('content')
38285056 1984
2af12ad9
TC
1985 if res.startswith('"') or res.startswith("'"):
1986 res = res[1:-1]
38285056 1987
2af12ad9 1988 retlist.append(unescapeHTML(res))
a921f407 1989
2af12ad9 1990 return retlist
a921f407 1991
c5229f39 1992
8bb56eee
BF
1993class HTMLAttributeParser(compat_HTMLParser):
1994 """Trivial HTML parser to gather the attributes for a single element"""
b6e0c7d2 1995
8bb56eee 1996 def __init__(self):
c5229f39 1997 self.attrs = {}
8bb56eee
BF
1998 compat_HTMLParser.__init__(self)
1999
2000 def handle_starttag(self, tag, attrs):
2001 self.attrs = dict(attrs)
2002
c5229f39 2003
8bb56eee
BF
2004def extract_attributes(html_element):
2005 """Given a string for an HTML element such as
2006 <el
2007 a="foo" B="bar" c="&98;az" d=boz
2008 empty= noval entity="&amp;"
2009 sq='"' dq="'"
2010 >
2011 Decode and return a dictionary of attributes.
2012 {
2013 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2014 'empty': '', 'noval': None, 'entity': '&',
2015 'sq': '"', 'dq': '\''
2016 }.
2017 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2018 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2019 """
2020 parser = HTMLAttributeParser()
b4a3d461
S
2021 try:
2022 parser.feed(html_element)
2023 parser.close()
2024 # Older Python may throw HTMLParseError in case of malformed HTML
2025 except compat_HTMLParseError:
2026 pass
8bb56eee 2027 return parser.attrs
9e6dd238 2028
c5229f39 2029
9e6dd238 2030def clean_html(html):
59ae15a5 2031 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
2032
2033 if html is None: # Convenience for sanitizing descriptions etc.
2034 return html
2035
59ae15a5
PH
2036 # Newline vs <br />
2037 html = html.replace('\n', ' ')
edd9221c
TF
2038 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2039 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
2040 # Strip html tags
2041 html = re.sub('<.*?>', '', html)
2042 # Replace html entities
2043 html = unescapeHTML(html)
7decf895 2044 return html.strip()
9e6dd238
FV
2045
2046
d77c3dfd 2047def sanitize_open(filename, open_mode):
59ae15a5
PH
2048 """Try to open the given filename, and slightly tweak it if this fails.
2049
2050 Attempts to open the given filename. If this fails, it tries to change
2051 the filename slightly, step by step, until it's either able to open it
2052 or it fails and raises a final exception, like the standard open()
2053 function.
2054
2055 It returns the tuple (stream, definitive_file_name).
2056 """
2057 try:
28e614de 2058 if filename == '-':
59ae15a5
PH
2059 if sys.platform == 'win32':
2060 import msvcrt
2061 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 2062 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
2063 stream = open(encodeFilename(filename), open_mode)
2064 return (stream, filename)
2065 except (IOError, OSError) as err:
f45c185f
PH
2066 if err.errno in (errno.EACCES,):
2067 raise
59ae15a5 2068
f45c185f 2069 # In case of error, try to remove win32 forbidden chars
d55de57b 2070 alt_filename = sanitize_path(filename)
f45c185f
PH
2071 if alt_filename == filename:
2072 raise
2073 else:
2074 # An exception here should be caught in the caller
d55de57b 2075 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 2076 return (stream, alt_filename)
d77c3dfd
FV
2077
2078
2079def timeconvert(timestr):
59ae15a5
PH
2080 """Convert RFC 2822 defined time string into system timestamp"""
2081 timestamp = None
2082 timetuple = email.utils.parsedate_tz(timestr)
2083 if timetuple is not None:
2084 timestamp = email.utils.mktime_tz(timetuple)
2085 return timestamp
1c469a94 2086
5f6a1245 2087
796173d0 2088def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
2089 """Sanitizes a string so it could be used as part of a filename.
2090 If restricted is set, use a stricter subset of allowed characters.
158af524
S
2091 Set is_id if this is not an arbitrary string, but an ID that should be kept
2092 if possible.
59ae15a5
PH
2093 """
2094 def replace_insane(char):
c587cbb7
AT
2095 if restricted and char in ACCENT_CHARS:
2096 return ACCENT_CHARS[char]
59ae15a5
PH
2097 if char == '?' or ord(char) < 32 or ord(char) == 127:
2098 return ''
2099 elif char == '"':
2100 return '' if restricted else '\''
2101 elif char == ':':
2102 return '_-' if restricted else ' -'
2103 elif char in '\\/|*<>':
2104 return '_'
627dcfff 2105 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
2106 return '_'
2107 if restricted and ord(char) > 127:
2108 return '_'
2109 return char
2110
2aeb06d6
PH
2111 # Handle timestamps
2112 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 2113 result = ''.join(map(replace_insane, s))
796173d0
PH
2114 if not is_id:
2115 while '__' in result:
2116 result = result.replace('__', '_')
2117 result = result.strip('_')
2118 # Common case of "Foreign band name - English song title"
2119 if restricted and result.startswith('-_'):
2120 result = result[2:]
5a42414b
PH
2121 if result.startswith('-'):
2122 result = '_' + result[len('-'):]
a7440261 2123 result = result.lstrip('.')
796173d0
PH
2124 if not result:
2125 result = '_'
59ae15a5 2126 return result
d77c3dfd 2127
5f6a1245 2128
c2934512 2129def sanitize_path(s, force=False):
a2aaf4db 2130 """Sanitizes and normalizes path on Windows"""
c2934512 2131 if sys.platform == 'win32':
c4218ac3 2132 force = False
c2934512 2133 drive_or_unc, _ = os.path.splitdrive(s)
2134 if sys.version_info < (2, 7) and not drive_or_unc:
2135 drive_or_unc, _ = os.path.splitunc(s)
2136 elif force:
2137 drive_or_unc = ''
2138 else:
a2aaf4db 2139 return s
c2934512 2140
be531ef1
S
2141 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2142 if drive_or_unc:
a2aaf4db
S
2143 norm_path.pop(0)
2144 sanitized_path = [
ec85ded8 2145 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
a2aaf4db 2146 for path_part in norm_path]
be531ef1
S
2147 if drive_or_unc:
2148 sanitized_path.insert(0, drive_or_unc + os.path.sep)
c4218ac3 2149 elif force and s[0] == os.path.sep:
2150 sanitized_path.insert(0, os.path.sep)
a2aaf4db
S
2151 return os.path.join(*sanitized_path)
2152
2153
17bcc626 2154def sanitize_url(url):
befa4708
S
2155 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2156 # the number of unwanted failures due to missing protocol
2157 if url.startswith('//'):
2158 return 'http:%s' % url
2159 # Fix some common typos seen so far
2160 COMMON_TYPOS = (
067aa17e 2161 # https://github.com/ytdl-org/youtube-dl/issues/15649
befa4708
S
2162 (r'^httpss://', r'https://'),
2163 # https://bx1.be/lives/direct-tv/
2164 (r'^rmtp([es]?)://', r'rtmp\1://'),
2165 )
2166 for mistake, fixup in COMMON_TYPOS:
2167 if re.match(mistake, url):
2168 return re.sub(mistake, fixup, url)
2169 return url
17bcc626
S
2170
2171
67dda517 2172def sanitized_Request(url, *args, **kwargs):
17bcc626 2173 return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
67dda517
S
2174
2175
51098426
S
2176def expand_path(s):
2177 """Expand shell variables and ~"""
2178 return os.path.expandvars(compat_expanduser(s))
2179
2180
d77c3dfd 2181def orderedSet(iterable):
59ae15a5
PH
2182 """ Remove all duplicates from the input iterable """
2183 res = []
2184 for el in iterable:
2185 if el not in res:
2186 res.append(el)
2187 return res
d77c3dfd 2188
912b38b4 2189
55b2f099 2190def _htmlentity_transform(entity_with_semicolon):
4e408e47 2191 """Transforms an HTML entity to a character."""
55b2f099
YCH
2192 entity = entity_with_semicolon[:-1]
2193
4e408e47
PH
2194 # Known non-numeric HTML entity
2195 if entity in compat_html_entities.name2codepoint:
2196 return compat_chr(compat_html_entities.name2codepoint[entity])
2197
55b2f099
YCH
2198 # TODO: HTML5 allows entities without a semicolon. For example,
2199 # '&Eacuteric' should be decoded as 'Éric'.
2200 if entity_with_semicolon in compat_html_entities_html5:
2201 return compat_html_entities_html5[entity_with_semicolon]
2202
91757b0f 2203 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
2204 if mobj is not None:
2205 numstr = mobj.group(1)
28e614de 2206 if numstr.startswith('x'):
4e408e47 2207 base = 16
28e614de 2208 numstr = '0%s' % numstr
4e408e47
PH
2209 else:
2210 base = 10
067aa17e 2211 # See https://github.com/ytdl-org/youtube-dl/issues/7518
7aefc49c
S
2212 try:
2213 return compat_chr(int(numstr, base))
2214 except ValueError:
2215 pass
4e408e47
PH
2216
2217 # Unknown entity in name, return its literal representation
7a3f0c00 2218 return '&%s;' % entity
4e408e47
PH
2219
2220
d77c3dfd 2221def unescapeHTML(s):
912b38b4
PH
2222 if s is None:
2223 return None
2224 assert type(s) == compat_str
d77c3dfd 2225
4e408e47 2226 return re.sub(
95f3f7c2 2227 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 2228
8bf48f23 2229
f5b1bca9 2230def process_communicate_or_kill(p, *args, **kwargs):
2231 try:
2232 return p.communicate(*args, **kwargs)
2233 except BaseException: # Including KeyboardInterrupt
2234 p.kill()
2235 p.wait()
2236 raise
2237
2238
aa49acd1
S
2239def get_subprocess_encoding():
2240 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2241 # For subprocess calls, encode with locale encoding
2242 # Refer to http://stackoverflow.com/a/9951851/35070
2243 encoding = preferredencoding()
2244 else:
2245 encoding = sys.getfilesystemencoding()
2246 if encoding is None:
2247 encoding = 'utf-8'
2248 return encoding
2249
2250
8bf48f23 2251def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
2252 """
2253 @param s The name of the file
2254 """
d77c3dfd 2255
8bf48f23 2256 assert type(s) == compat_str
d77c3dfd 2257
59ae15a5
PH
2258 # Python 3 has a Unicode API
2259 if sys.version_info >= (3, 0):
2260 return s
0f00efed 2261
aa49acd1
S
2262 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2263 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2264 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2265 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2266 return s
2267
8ee239e9
YCH
2268 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2269 if sys.platform.startswith('java'):
2270 return s
2271
aa49acd1
S
2272 return s.encode(get_subprocess_encoding(), 'ignore')
2273
2274
2275def decodeFilename(b, for_subprocess=False):
2276
2277 if sys.version_info >= (3, 0):
2278 return b
2279
2280 if not isinstance(b, bytes):
2281 return b
2282
2283 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 2284
f07b74fc
PH
2285
2286def encodeArgument(s):
2287 if not isinstance(s, compat_str):
2288 # Legacy code that uses byte strings
2289 # Uncomment the following line after fixing all post processors
7af808a5 2290 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
2291 s = s.decode('ascii')
2292 return encodeFilename(s, True)
2293
2294
aa49acd1
S
2295def decodeArgument(b):
2296 return decodeFilename(b, True)
2297
2298
8271226a
PH
2299def decodeOption(optval):
2300 if optval is None:
2301 return optval
2302 if isinstance(optval, bytes):
2303 optval = optval.decode(preferredencoding())
2304
2305 assert isinstance(optval, compat_str)
2306 return optval
1c256f70 2307
5f6a1245 2308
dbbbe555 2309def formatSeconds(secs, delim=':'):
4539dd30 2310 if secs > 3600:
dbbbe555 2311 return '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
4539dd30 2312 elif secs > 60:
dbbbe555 2313 return '%d%s%02d' % (secs // 60, delim, secs % 60)
4539dd30
PH
2314 else:
2315 return '%d' % secs
2316
a0ddb8a2 2317
be4a824d
PH
2318def make_HTTPS_handler(params, **kwargs):
2319 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 2320 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 2321 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 2322 if opts_no_check_certificate:
be5f2c19 2323 context.check_hostname = False
0db261ba 2324 context.verify_mode = ssl.CERT_NONE
a2366922 2325 try:
be4a824d 2326 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
2327 except TypeError:
2328 # Python 2.7.8
2329 # (create_default_context present but HTTPSHandler has no context=)
2330 pass
2331
2332 if sys.version_info < (3, 2):
d7932313 2333 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 2334 else: # Python < 3.4
d7932313 2335 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 2336 context.verify_mode = (ssl.CERT_NONE
dca08720 2337 if opts_no_check_certificate
ea6d901e 2338 else ssl.CERT_REQUIRED)
303b479e 2339 context.set_default_verify_paths()
be4a824d 2340 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 2341
732ea2f0 2342
08f2a92c
JMF
2343def bug_reports_message():
2344 if ytdl_is_updateable():
7a5c1cfe 2345 update_cmd = 'type yt-dlp -U to update'
08f2a92c 2346 else:
7a5c1cfe
P
2347 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
2348 msg = '; please report this issue on https://github.com/yt-dlp/yt-dlp .'
08f2a92c 2349 msg += ' Make sure you are using the latest version; %s.' % update_cmd
7a5c1cfe 2350 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
08f2a92c
JMF
2351 return msg
2352
2353
bf5b9d85
PM
2354class YoutubeDLError(Exception):
2355 """Base exception for YoutubeDL errors."""
2356 pass
2357
2358
2359class ExtractorError(YoutubeDLError):
1c256f70 2360 """Error during info extraction."""
5f6a1245 2361
d11271dd 2362 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238 2363 """ tb, if given, is the original traceback (so that it can be printed out).
7a5c1cfe 2364 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
9a82b238
PH
2365 """
2366
2367 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2368 expected = True
d11271dd
PH
2369 if video_id is not None:
2370 msg = video_id + ': ' + msg
410f3e73 2371 if cause:
28e614de 2372 msg += ' (caused by %r)' % cause
9a82b238 2373 if not expected:
08f2a92c 2374 msg += bug_reports_message()
1c256f70 2375 super(ExtractorError, self).__init__(msg)
d5979c5d 2376
1c256f70 2377 self.traceback = tb
8cc83b8d 2378 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 2379 self.cause = cause
d11271dd 2380 self.video_id = video_id
1c256f70 2381
01951dda
PH
2382 def format_traceback(self):
2383 if self.traceback is None:
2384 return None
28e614de 2385 return ''.join(traceback.format_tb(self.traceback))
01951dda 2386
1c256f70 2387
416c7fcb
PH
2388class UnsupportedError(ExtractorError):
2389 def __init__(self, url):
2390 super(UnsupportedError, self).__init__(
2391 'Unsupported URL: %s' % url, expected=True)
2392 self.url = url
2393
2394
55b3e45b
JMF
2395class RegexNotFoundError(ExtractorError):
2396 """Error when a regex didn't match"""
2397 pass
2398
2399
773f291d
S
2400class GeoRestrictedError(ExtractorError):
2401 """Geographic restriction Error exception.
2402
2403 This exception may be thrown when a video is not available from your
2404 geographic location due to geographic restrictions imposed by a website.
2405 """
b6e0c7d2 2406
773f291d
S
2407 def __init__(self, msg, countries=None):
2408 super(GeoRestrictedError, self).__init__(msg, expected=True)
2409 self.msg = msg
2410 self.countries = countries
2411
2412
bf5b9d85 2413class DownloadError(YoutubeDLError):
59ae15a5 2414 """Download Error exception.
d77c3dfd 2415
59ae15a5
PH
2416 This exception may be thrown by FileDownloader objects if they are not
2417 configured to continue on errors. They will contain the appropriate
2418 error message.
2419 """
5f6a1245 2420
8cc83b8d
FV
2421 def __init__(self, msg, exc_info=None):
2422 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2423 super(DownloadError, self).__init__(msg)
2424 self.exc_info = exc_info
d77c3dfd
FV
2425
2426
498f5606 2427class EntryNotInPlaylist(YoutubeDLError):
2428 """Entry not in playlist exception.
2429
2430 This exception will be thrown by YoutubeDL when a requested entry
2431 is not found in the playlist info_dict
2432 """
2433 pass
2434
2435
bf5b9d85 2436class SameFileError(YoutubeDLError):
59ae15a5 2437 """Same File exception.
d77c3dfd 2438
59ae15a5
PH
2439 This exception will be thrown by FileDownloader objects if they detect
2440 multiple files would have to be downloaded to the same file on disk.
2441 """
2442 pass
d77c3dfd
FV
2443
2444
bf5b9d85 2445class PostProcessingError(YoutubeDLError):
59ae15a5 2446 """Post Processing exception.
d77c3dfd 2447
59ae15a5
PH
2448 This exception may be raised by PostProcessor's .run() method to
2449 indicate an error in the postprocessing task.
2450 """
5f6a1245 2451
7851b379 2452 def __init__(self, msg):
bf5b9d85 2453 super(PostProcessingError, self).__init__(msg)
7851b379 2454 self.msg = msg
d77c3dfd 2455
5f6a1245 2456
8b0d7497 2457class ExistingVideoReached(YoutubeDLError):
2458 """ --max-downloads limit has been reached. """
2459 pass
2460
2461
2462class RejectedVideoReached(YoutubeDLError):
2463 """ --max-downloads limit has been reached. """
2464 pass
2465
2466
bf5b9d85 2467class MaxDownloadsReached(YoutubeDLError):
59ae15a5
PH
2468 """ --max-downloads limit has been reached. """
2469 pass
d77c3dfd
FV
2470
2471
bf5b9d85 2472class UnavailableVideoError(YoutubeDLError):
59ae15a5 2473 """Unavailable Format exception.
d77c3dfd 2474
59ae15a5
PH
2475 This exception will be thrown when a video is requested
2476 in a format that is not available for that video.
2477 """
2478 pass
d77c3dfd
FV
2479
2480
bf5b9d85 2481class ContentTooShortError(YoutubeDLError):
59ae15a5 2482 """Content Too Short exception.
d77c3dfd 2483
59ae15a5
PH
2484 This exception may be raised by FileDownloader objects when a file they
2485 download is too small for what the server announced first, indicating
2486 the connection was probably interrupted.
2487 """
d77c3dfd 2488
59ae15a5 2489 def __init__(self, downloaded, expected):
bf5b9d85
PM
2490 super(ContentTooShortError, self).__init__(
2491 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2492 )
2c7ed247 2493 # Both in bytes
59ae15a5
PH
2494 self.downloaded = downloaded
2495 self.expected = expected
d77c3dfd 2496
5f6a1245 2497
bf5b9d85 2498class XAttrMetadataError(YoutubeDLError):
efa97bdc
YCH
2499 def __init__(self, code=None, msg='Unknown error'):
2500 super(XAttrMetadataError, self).__init__(msg)
2501 self.code = code
bd264412 2502 self.msg = msg
efa97bdc
YCH
2503
2504 # Parsing code and msg
3089bc74 2505 if (self.code in (errno.ENOSPC, errno.EDQUOT)
a0566bbf 2506 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
efa97bdc
YCH
2507 self.reason = 'NO_SPACE'
2508 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2509 self.reason = 'VALUE_TOO_LONG'
2510 else:
2511 self.reason = 'NOT_SUPPORTED'
2512
2513
bf5b9d85 2514class XAttrUnavailableError(YoutubeDLError):
efa97bdc
YCH
2515 pass
2516
2517
c5a59d93 2518def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
2519 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2520 # expected HTTP responses to meet HTTP/1.0 or later (see also
067aa17e 2521 # https://github.com/ytdl-org/youtube-dl/issues/6727)
e5e78797 2522 if sys.version_info < (3, 0):
65220c3b
S
2523 kwargs['strict'] = True
2524 hc = http_class(*args, **compat_kwargs(kwargs))
be4a824d 2525 source_address = ydl_handler._params.get('source_address')
8959018a 2526
be4a824d 2527 if source_address is not None:
8959018a
AU
2528 # This is to workaround _create_connection() from socket where it will try all
2529 # address data from getaddrinfo() including IPv6. This filters the result from
2530 # getaddrinfo() based on the source_address value.
2531 # This is based on the cpython socket.create_connection() function.
2532 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2533 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2534 host, port = address
2535 err = None
2536 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
9e21e6d9
S
2537 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2538 ip_addrs = [addr for addr in addrs if addr[0] == af]
2539 if addrs and not ip_addrs:
2540 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2541 raise socket.error(
2542 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2543 % (ip_version, source_address[0]))
8959018a
AU
2544 for res in ip_addrs:
2545 af, socktype, proto, canonname, sa = res
2546 sock = None
2547 try:
2548 sock = socket.socket(af, socktype, proto)
2549 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2550 sock.settimeout(timeout)
2551 sock.bind(source_address)
2552 sock.connect(sa)
2553 err = None # Explicitly break reference cycle
2554 return sock
2555 except socket.error as _:
2556 err = _
2557 if sock is not None:
2558 sock.close()
2559 if err is not None:
2560 raise err
2561 else:
9e21e6d9
S
2562 raise socket.error('getaddrinfo returns an empty list')
2563 if hasattr(hc, '_create_connection'):
2564 hc._create_connection = _create_connection
be4a824d
PH
2565 sa = (source_address, 0)
2566 if hasattr(hc, 'source_address'): # Python 2.7+
2567 hc.source_address = sa
2568 else: # Python 2.6
2569 def _hc_connect(self, *args, **kwargs):
9e21e6d9 2570 sock = _create_connection(
be4a824d
PH
2571 (self.host, self.port), self.timeout, sa)
2572 if is_https:
d7932313
PH
2573 self.sock = ssl.wrap_socket(
2574 sock, self.key_file, self.cert_file,
2575 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
2576 else:
2577 self.sock = sock
2578 hc.connect = functools.partial(_hc_connect, hc)
2579
2580 return hc
2581
2582
87f0e62d 2583def handle_youtubedl_headers(headers):
992fc9d6
YCH
2584 filtered_headers = headers
2585
2586 if 'Youtubedl-no-compression' in filtered_headers:
2587 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 2588 del filtered_headers['Youtubedl-no-compression']
87f0e62d 2589
992fc9d6 2590 return filtered_headers
87f0e62d
YCH
2591
2592
acebc9cd 2593class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
2594 """Handler for HTTP requests and responses.
2595
2596 This class, when installed with an OpenerDirector, automatically adds
2597 the standard headers to every HTTP request and handles gzipped and
2598 deflated responses from web servers. If compression is to be avoided in
2599 a particular request, the original request in the program code only has
0424ec30 2600 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
2601 removed before making the real request.
2602
2603 Part of this code was copied from:
2604
2605 http://techknack.net/python-urllib2-handlers/
2606
2607 Andrew Rowls, the author of that code, agreed to release it to the
2608 public domain.
2609 """
2610
be4a824d
PH
2611 def __init__(self, params, *args, **kwargs):
2612 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2613 self._params = params
2614
2615 def http_open(self, req):
71aff188
YCH
2616 conn_class = compat_http_client.HTTPConnection
2617
2618 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2619 if socks_proxy:
2620 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2621 del req.headers['Ytdl-socks-proxy']
2622
be4a824d 2623 return self.do_open(functools.partial(
71aff188 2624 _create_http_connection, self, conn_class, False),
be4a824d
PH
2625 req)
2626
59ae15a5
PH
2627 @staticmethod
2628 def deflate(data):
fc2119f2 2629 if not data:
2630 return data
59ae15a5
PH
2631 try:
2632 return zlib.decompress(data, -zlib.MAX_WBITS)
2633 except zlib.error:
2634 return zlib.decompress(data)
2635
acebc9cd 2636 def http_request(self, req):
51f267d9
S
2637 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2638 # always respected by websites, some tend to give out URLs with non percent-encoded
2639 # non-ASCII characters (see telemb.py, ard.py [#3412])
2640 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2641 # To work around aforementioned issue we will replace request's original URL with
2642 # percent-encoded one
2643 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2644 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2645 url = req.get_full_url()
2646 url_escaped = escape_url(url)
2647
2648 # Substitute URL if any change after escaping
2649 if url != url_escaped:
15d260eb 2650 req = update_Request(req, url=url_escaped)
51f267d9 2651
33ac271b 2652 for h, v in std_headers.items():
3d5f7a39
JK
2653 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2654 # The dict keys are capitalized because of this bug by urllib
2655 if h.capitalize() not in req.headers:
33ac271b 2656 req.add_header(h, v)
87f0e62d
YCH
2657
2658 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
2659
2660 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2661 # Python 2.6 is brain-dead when it comes to fragments
2662 req._Request__original = req._Request__original.partition('#')[0]
2663 req._Request__r_type = req._Request__r_type.partition('#')[0]
2664
59ae15a5
PH
2665 return req
2666
acebc9cd 2667 def http_response(self, req, resp):
59ae15a5
PH
2668 old_resp = resp
2669 # gzip
2670 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
2671 content = resp.read()
2672 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2673 try:
2674 uncompressed = io.BytesIO(gz.read())
2675 except IOError as original_ioerror:
2676 # There may be junk add the end of the file
2677 # See http://stackoverflow.com/q/4928560/35070 for details
2678 for i in range(1, 1024):
2679 try:
2680 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2681 uncompressed = io.BytesIO(gz.read())
2682 except IOError:
2683 continue
2684 break
2685 else:
2686 raise original_ioerror
b407d853 2687 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2688 resp.msg = old_resp.msg
c047270c 2689 del resp.headers['Content-encoding']
59ae15a5
PH
2690 # deflate
2691 if resp.headers.get('Content-encoding', '') == 'deflate':
2692 gz = io.BytesIO(self.deflate(resp.read()))
b407d853 2693 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2694 resp.msg = old_resp.msg
c047270c 2695 del resp.headers['Content-encoding']
ad729172 2696 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
067aa17e 2697 # https://github.com/ytdl-org/youtube-dl/issues/6457).
5a4d9ddb
S
2698 if 300 <= resp.code < 400:
2699 location = resp.headers.get('Location')
2700 if location:
2701 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2702 if sys.version_info >= (3, 0):
2703 location = location.encode('iso-8859-1').decode('utf-8')
0ea59007
YCH
2704 else:
2705 location = location.decode('utf-8')
5a4d9ddb
S
2706 location_escaped = escape_url(location)
2707 if location != location_escaped:
2708 del resp.headers['Location']
9a4aec8b
YCH
2709 if sys.version_info < (3, 0):
2710 location_escaped = location_escaped.encode('utf-8')
5a4d9ddb 2711 resp.headers['Location'] = location_escaped
59ae15a5 2712 return resp
0f8d03f8 2713
acebc9cd
PH
2714 https_request = http_request
2715 https_response = http_response
bf50b038 2716
5de90176 2717
71aff188
YCH
2718def make_socks_conn_class(base_class, socks_proxy):
2719 assert issubclass(base_class, (
2720 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2721
2722 url_components = compat_urlparse.urlparse(socks_proxy)
2723 if url_components.scheme.lower() == 'socks5':
2724 socks_type = ProxyType.SOCKS5
2725 elif url_components.scheme.lower() in ('socks', 'socks4'):
2726 socks_type = ProxyType.SOCKS4
51fb4995
YCH
2727 elif url_components.scheme.lower() == 'socks4a':
2728 socks_type = ProxyType.SOCKS4A
71aff188 2729
cdd94c2e
YCH
2730 def unquote_if_non_empty(s):
2731 if not s:
2732 return s
2733 return compat_urllib_parse_unquote_plus(s)
2734
71aff188
YCH
2735 proxy_args = (
2736 socks_type,
2737 url_components.hostname, url_components.port or 1080,
2738 True, # Remote DNS
cdd94c2e
YCH
2739 unquote_if_non_empty(url_components.username),
2740 unquote_if_non_empty(url_components.password),
71aff188
YCH
2741 )
2742
2743 class SocksConnection(base_class):
2744 def connect(self):
2745 self.sock = sockssocket()
2746 self.sock.setproxy(*proxy_args)
2747 if type(self.timeout) in (int, float):
2748 self.sock.settimeout(self.timeout)
2749 self.sock.connect((self.host, self.port))
2750
2751 if isinstance(self, compat_http_client.HTTPSConnection):
2752 if hasattr(self, '_context'): # Python > 2.6
2753 self.sock = self._context.wrap_socket(
2754 self.sock, server_hostname=self.host)
2755 else:
2756 self.sock = ssl.wrap_socket(self.sock)
2757
2758 return SocksConnection
2759
2760
be4a824d
PH
2761class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2762 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2763 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2764 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2765 self._params = params
2766
2767 def https_open(self, req):
4f264c02 2768 kwargs = {}
71aff188
YCH
2769 conn_class = self._https_conn_class
2770
4f264c02
JMF
2771 if hasattr(self, '_context'): # python > 2.6
2772 kwargs['context'] = self._context
2773 if hasattr(self, '_check_hostname'): # python 3.x
2774 kwargs['check_hostname'] = self._check_hostname
71aff188
YCH
2775
2776 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2777 if socks_proxy:
2778 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2779 del req.headers['Ytdl-socks-proxy']
2780
be4a824d 2781 return self.do_open(functools.partial(
71aff188 2782 _create_http_connection, self, conn_class, True),
4f264c02 2783 req, **kwargs)
be4a824d
PH
2784
2785
1bab3437 2786class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
f1a8511f
S
2787 """
2788 See [1] for cookie file format.
2789
2790 1. https://curl.haxx.se/docs/http-cookies.html
2791 """
e7e62441 2792 _HTTPONLY_PREFIX = '#HttpOnly_'
c380cc28
S
2793 _ENTRY_LEN = 7
2794 _HEADER = '''# Netscape HTTP Cookie File
7a5c1cfe 2795# This file is generated by yt-dlp. Do not edit.
c380cc28
S
2796
2797'''
2798 _CookieFileEntry = collections.namedtuple(
2799 'CookieFileEntry',
2800 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
e7e62441 2801
1bab3437 2802 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
c380cc28
S
2803 """
2804 Save cookies to a file.
2805
2806 Most of the code is taken from CPython 3.8 and slightly adapted
2807 to support cookie files with UTF-8 in both python 2 and 3.
2808 """
2809 if filename is None:
2810 if self.filename is not None:
2811 filename = self.filename
2812 else:
2813 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2814
1bab3437
S
2815 # Store session cookies with `expires` set to 0 instead of an empty
2816 # string
2817 for cookie in self:
2818 if cookie.expires is None:
2819 cookie.expires = 0
c380cc28
S
2820
2821 with io.open(filename, 'w', encoding='utf-8') as f:
2822 f.write(self._HEADER)
2823 now = time.time()
2824 for cookie in self:
2825 if not ignore_discard and cookie.discard:
2826 continue
2827 if not ignore_expires and cookie.is_expired(now):
2828 continue
2829 if cookie.secure:
2830 secure = 'TRUE'
2831 else:
2832 secure = 'FALSE'
2833 if cookie.domain.startswith('.'):
2834 initial_dot = 'TRUE'
2835 else:
2836 initial_dot = 'FALSE'
2837 if cookie.expires is not None:
2838 expires = compat_str(cookie.expires)
2839 else:
2840 expires = ''
2841 if cookie.value is None:
2842 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2843 # with no name, whereas http.cookiejar regards it as a
2844 # cookie with no value.
2845 name = ''
2846 value = cookie.name
2847 else:
2848 name = cookie.name
2849 value = cookie.value
2850 f.write(
2851 '\t'.join([cookie.domain, initial_dot, cookie.path,
2852 secure, expires, name, value]) + '\n')
1bab3437
S
2853
2854 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
e7e62441 2855 """Load cookies from a file."""
2856 if filename is None:
2857 if self.filename is not None:
2858 filename = self.filename
2859 else:
2860 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2861
c380cc28
S
2862 def prepare_line(line):
2863 if line.startswith(self._HTTPONLY_PREFIX):
2864 line = line[len(self._HTTPONLY_PREFIX):]
2865 # comments and empty lines are fine
2866 if line.startswith('#') or not line.strip():
2867 return line
2868 cookie_list = line.split('\t')
2869 if len(cookie_list) != self._ENTRY_LEN:
2870 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2871 cookie = self._CookieFileEntry(*cookie_list)
2872 if cookie.expires_at and not cookie.expires_at.isdigit():
2873 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2874 return line
2875
e7e62441 2876 cf = io.StringIO()
c380cc28 2877 with io.open(filename, encoding='utf-8') as f:
e7e62441 2878 for line in f:
c380cc28
S
2879 try:
2880 cf.write(prepare_line(line))
2881 except compat_cookiejar.LoadError as e:
2882 write_string(
2883 'WARNING: skipping cookie file entry due to %s: %r\n'
2884 % (e, line), sys.stderr)
2885 continue
e7e62441 2886 cf.seek(0)
2887 self._really_load(cf, filename, ignore_discard, ignore_expires)
1bab3437
S
2888 # Session cookies are denoted by either `expires` field set to
2889 # an empty string or 0. MozillaCookieJar only recognizes the former
2890 # (see [1]). So we need force the latter to be recognized as session
2891 # cookies on our own.
2892 # Session cookies may be important for cookies-based authentication,
2893 # e.g. usually, when user does not check 'Remember me' check box while
2894 # logging in on a site, some important cookies are stored as session
2895 # cookies so that not recognizing them will result in failed login.
2896 # 1. https://bugs.python.org/issue17164
2897 for cookie in self:
2898 # Treat `expires=0` cookies as session cookies
2899 if cookie.expires == 0:
2900 cookie.expires = None
2901 cookie.discard = True
2902
2903
a6420bf5
S
2904class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2905 def __init__(self, cookiejar=None):
2906 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2907
2908 def http_response(self, request, response):
2909 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2910 # characters in Set-Cookie HTTP header of last response (see
067aa17e 2911 # https://github.com/ytdl-org/youtube-dl/issues/6769).
a6420bf5
S
2912 # In order to at least prevent crashing we will percent encode Set-Cookie
2913 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
2914 # if sys.version_info < (3, 0) and response.headers:
2915 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2916 # set_cookie = response.headers.get(set_cookie_header)
2917 # if set_cookie:
2918 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2919 # if set_cookie != set_cookie_escaped:
2920 # del response.headers[set_cookie_header]
2921 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
2922 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2923
2924 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2925 https_response = http_response
2926
2927
fca6dba8 2928class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
201c1459 2929 """YoutubeDL redirect handler
2930
2931 The code is based on HTTPRedirectHandler implementation from CPython [1].
2932
2933 This redirect handler solves two issues:
2934 - ensures redirect URL is always unicode under python 2
2935 - introduces support for experimental HTTP response status code
2936 308 Permanent Redirect [2] used by some sites [3]
2937
2938 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
2939 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
2940 3. https://github.com/ytdl-org/youtube-dl/issues/28768
2941 """
2942
2943 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
2944
2945 def redirect_request(self, req, fp, code, msg, headers, newurl):
2946 """Return a Request or None in response to a redirect.
2947
2948 This is called by the http_error_30x methods when a
2949 redirection response is received. If a redirection should
2950 take place, return a new Request to allow http_error_30x to
2951 perform the redirect. Otherwise, raise HTTPError if no-one
2952 else should try to handle this url. Return None if you can't
2953 but another Handler might.
2954 """
2955 m = req.get_method()
2956 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
2957 or code in (301, 302, 303) and m == "POST")):
2958 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
2959 # Strictly (according to RFC 2616), 301 or 302 in response to
2960 # a POST MUST NOT cause a redirection without confirmation
2961 # from the user (of urllib.request, in this case). In practice,
2962 # essentially all clients do redirect in this case, so we do
2963 # the same.
2964
2965 # On python 2 urlh.geturl() may sometimes return redirect URL
2966 # as byte string instead of unicode. This workaround allows
2967 # to force it always return unicode.
2968 if sys.version_info[0] < 3:
2969 newurl = compat_str(newurl)
2970
2971 # Be conciliant with URIs containing a space. This is mainly
2972 # redundant with the more complete encoding done in http_error_302(),
2973 # but it is kept for compatibility with other callers.
2974 newurl = newurl.replace(' ', '%20')
2975
2976 CONTENT_HEADERS = ("content-length", "content-type")
2977 # NB: don't use dict comprehension for python 2.6 compatibility
2978 newheaders = dict((k, v) for k, v in req.headers.items()
2979 if k.lower() not in CONTENT_HEADERS)
2980 return compat_urllib_request.Request(
2981 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
2982 unverifiable=True)
fca6dba8
S
2983
2984
46f59e89
S
2985def extract_timezone(date_str):
2986 m = re.search(
2987 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2988 date_str)
2989 if not m:
2990 timezone = datetime.timedelta()
2991 else:
2992 date_str = date_str[:-len(m.group('tz'))]
2993 if not m.group('sign'):
2994 timezone = datetime.timedelta()
2995 else:
2996 sign = 1 if m.group('sign') == '+' else -1
2997 timezone = datetime.timedelta(
2998 hours=sign * int(m.group('hours')),
2999 minutes=sign * int(m.group('minutes')))
3000 return timezone, date_str
3001
3002
08b38d54 3003def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
3004 """ Return a UNIX timestamp from the given date """
3005
3006 if date_str is None:
3007 return None
3008
52c3a6e4
S
3009 date_str = re.sub(r'\.[0-9]+', '', date_str)
3010
08b38d54 3011 if timezone is None:
46f59e89
S
3012 timezone, date_str = extract_timezone(date_str)
3013
52c3a6e4
S
3014 try:
3015 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3016 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3017 return calendar.timegm(dt.timetuple())
3018 except ValueError:
3019 pass
912b38b4
PH
3020
3021
46f59e89
S
3022def date_formats(day_first=True):
3023 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3024
3025
42bdd9d0 3026def unified_strdate(date_str, day_first=True):
bf50b038 3027 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
3028
3029 if date_str is None:
3030 return None
bf50b038 3031 upload_date = None
5f6a1245 3032 # Replace commas
026fcc04 3033 date_str = date_str.replace(',', ' ')
42bdd9d0 3034 # Remove AM/PM + timezone
9bb8e0a3 3035 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89 3036 _, date_str = extract_timezone(date_str)
42bdd9d0 3037
46f59e89 3038 for expression in date_formats(day_first):
bf50b038
JMF
3039 try:
3040 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 3041 except ValueError:
bf50b038 3042 pass
42393ce2
PH
3043 if upload_date is None:
3044 timetuple = email.utils.parsedate_tz(date_str)
3045 if timetuple:
c6b9cf05
S
3046 try:
3047 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3048 except ValueError:
3049 pass
6a750402
JMF
3050 if upload_date is not None:
3051 return compat_str(upload_date)
bf50b038 3052
5f6a1245 3053
46f59e89
S
3054def unified_timestamp(date_str, day_first=True):
3055 if date_str is None:
3056 return None
3057
2ae2ffda 3058 date_str = re.sub(r'[,|]', '', date_str)
46f59e89 3059
7dc2a74e 3060 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89
S
3061 timezone, date_str = extract_timezone(date_str)
3062
3063 # Remove AM/PM + timezone
3064 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3065
deef3195
S
3066 # Remove unrecognized timezones from ISO 8601 alike timestamps
3067 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3068 if m:
3069 date_str = date_str[:-len(m.group('tz'))]
3070
f226880c
PH
3071 # Python only supports microseconds, so remove nanoseconds
3072 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3073 if m:
3074 date_str = m.group(1)
3075
46f59e89
S
3076 for expression in date_formats(day_first):
3077 try:
7dc2a74e 3078 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
46f59e89
S
3079 return calendar.timegm(dt.timetuple())
3080 except ValueError:
3081 pass
3082 timetuple = email.utils.parsedate_tz(date_str)
3083 if timetuple:
7dc2a74e 3084 return calendar.timegm(timetuple) + pm_delta * 3600
46f59e89
S
3085
3086
28e614de 3087def determine_ext(url, default_ext='unknown_video'):
85750f89 3088 if url is None or '.' not in url:
f4776371 3089 return default_ext
9cb9a5df 3090 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
3091 if re.match(r'^[A-Za-z0-9]+$', guess):
3092 return guess
a7aaa398
S
3093 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3094 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 3095 return guess.rstrip('/')
73e79f2a 3096 else:
cbdbb766 3097 return default_ext
73e79f2a 3098
5f6a1245 3099
824fa511
S
3100def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3101 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
d4051a8e 3102
5f6a1245 3103
9e62f283 3104def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
37254abc
JMF
3105 """
3106 Return a datetime object from a string in the format YYYYMMDD or
9e62f283 3107 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3108
3109 format: string date format used to return datetime object from
3110 precision: round the time portion of a datetime object.
3111 auto|microsecond|second|minute|hour|day.
3112 auto: round to the unit provided in date_str (if applicable).
3113 """
3114 auto_precision = False
3115 if precision == 'auto':
3116 auto_precision = True
3117 precision = 'microsecond'
3118 today = datetime_round(datetime.datetime.now(), precision)
f8795e10 3119 if date_str in ('now', 'today'):
37254abc 3120 return today
f8795e10
PH
3121 if date_str == 'yesterday':
3122 return today - datetime.timedelta(days=1)
9e62f283 3123 match = re.match(
3124 r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3125 date_str)
37254abc 3126 if match is not None:
9e62f283 3127 start_time = datetime_from_str(match.group('start'), precision, format)
3128 time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
37254abc 3129 unit = match.group('unit')
9e62f283 3130 if unit == 'month' or unit == 'year':
3131 new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
37254abc 3132 unit = 'day'
9e62f283 3133 else:
3134 if unit == 'week':
3135 unit = 'day'
3136 time *= 7
3137 delta = datetime.timedelta(**{unit + 's': time})
3138 new_date = start_time + delta
3139 if auto_precision:
3140 return datetime_round(new_date, unit)
3141 return new_date
3142
3143 return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3144
3145
3146def date_from_str(date_str, format='%Y%m%d'):
3147 """
3148 Return a datetime object from a string in the format YYYYMMDD or
3149 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3150
3151 format: string date format used to return datetime object from
3152 """
3153 return datetime_from_str(date_str, precision='microsecond', format=format).date()
3154
3155
3156def datetime_add_months(dt, months):
3157 """Increment/Decrement a datetime object by months."""
3158 month = dt.month + months - 1
3159 year = dt.year + month // 12
3160 month = month % 12 + 1
3161 day = min(dt.day, calendar.monthrange(year, month)[1])
3162 return dt.replace(year, month, day)
3163
3164
3165def datetime_round(dt, precision='day'):
3166 """
3167 Round a datetime object's time to a specific precision
3168 """
3169 if precision == 'microsecond':
3170 return dt
3171
3172 unit_seconds = {
3173 'day': 86400,
3174 'hour': 3600,
3175 'minute': 60,
3176 'second': 1,
3177 }
3178 roundto = lambda x, n: ((x + n / 2) // n) * n
3179 timestamp = calendar.timegm(dt.timetuple())
3180 return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
5f6a1245
JW
3181
3182
e63fc1be 3183def hyphenate_date(date_str):
3184 """
3185 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3186 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3187 if match is not None:
3188 return '-'.join(match.groups())
3189 else:
3190 return date_str
3191
5f6a1245 3192
bd558525
JMF
3193class DateRange(object):
3194 """Represents a time interval between two dates"""
5f6a1245 3195
bd558525
JMF
3196 def __init__(self, start=None, end=None):
3197 """start and end must be strings in the format accepted by date"""
3198 if start is not None:
3199 self.start = date_from_str(start)
3200 else:
3201 self.start = datetime.datetime.min.date()
3202 if end is not None:
3203 self.end = date_from_str(end)
3204 else:
3205 self.end = datetime.datetime.max.date()
37254abc 3206 if self.start > self.end:
bd558525 3207 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 3208
bd558525
JMF
3209 @classmethod
3210 def day(cls, day):
3211 """Returns a range that only contains the given day"""
5f6a1245
JW
3212 return cls(day, day)
3213
bd558525
JMF
3214 def __contains__(self, date):
3215 """Check if the date is in the range"""
37254abc
JMF
3216 if not isinstance(date, datetime.date):
3217 date = date_from_str(date)
3218 return self.start <= date <= self.end
5f6a1245 3219
bd558525 3220 def __str__(self):
5f6a1245 3221 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
3222
3223
3224def platform_name():
3225 """ Returns the platform name as a compat_str """
3226 res = platform.platform()
3227 if isinstance(res, bytes):
3228 res = res.decode(preferredencoding())
3229
3230 assert isinstance(res, compat_str)
3231 return res
c257baff
PH
3232
3233
b58ddb32
PH
3234def _windows_write_string(s, out):
3235 """ Returns True if the string was written using special methods,
3236 False if it has yet to be written out."""
3237 # Adapted from http://stackoverflow.com/a/3259271/35070
3238
3239 import ctypes
3240 import ctypes.wintypes
3241
3242 WIN_OUTPUT_IDS = {
3243 1: -11,
3244 2: -12,
3245 }
3246
a383a98a
PH
3247 try:
3248 fileno = out.fileno()
3249 except AttributeError:
3250 # If the output stream doesn't have a fileno, it's virtual
3251 return False
aa42e873
PH
3252 except io.UnsupportedOperation:
3253 # Some strange Windows pseudo files?
3254 return False
b58ddb32
PH
3255 if fileno not in WIN_OUTPUT_IDS:
3256 return False
3257
d7cd9a9e 3258 GetStdHandle = compat_ctypes_WINFUNCTYPE(
b58ddb32 3259 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
d7cd9a9e 3260 ('GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
3261 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3262
d7cd9a9e 3263 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3264 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3265 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
d7cd9a9e 3266 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
3267 written = ctypes.wintypes.DWORD(0)
3268
d7cd9a9e 3269 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
3270 FILE_TYPE_CHAR = 0x0002
3271 FILE_TYPE_REMOTE = 0x8000
d7cd9a9e 3272 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3273 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3274 ctypes.POINTER(ctypes.wintypes.DWORD))(
d7cd9a9e 3275 ('GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
3276 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3277
3278 def not_a_console(handle):
3279 if handle == INVALID_HANDLE_VALUE or handle is None:
3280 return True
3089bc74
S
3281 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3282 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
3283
3284 if not_a_console(h):
3285 return False
3286
d1b9c912
PH
3287 def next_nonbmp_pos(s):
3288 try:
3289 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3290 except StopIteration:
3291 return len(s)
3292
3293 while s:
3294 count = min(next_nonbmp_pos(s), 1024)
3295
b58ddb32 3296 ret = WriteConsoleW(
d1b9c912 3297 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
3298 if ret == 0:
3299 raise OSError('Failed to write string')
d1b9c912
PH
3300 if not count: # We just wrote a non-BMP character
3301 assert written.value == 2
3302 s = s[1:]
3303 else:
3304 assert written.value > 0
3305 s = s[written.value:]
b58ddb32
PH
3306 return True
3307
3308
734f90bb 3309def write_string(s, out=None, encoding=None):
7459e3a2
PH
3310 if out is None:
3311 out = sys.stderr
8bf48f23 3312 assert type(s) == compat_str
7459e3a2 3313
b58ddb32
PH
3314 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3315 if _windows_write_string(s, out):
3316 return
3317
3089bc74
S
3318 if ('b' in getattr(out, 'mode', '')
3319 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
3320 byt = s.encode(encoding or preferredencoding(), 'ignore')
3321 out.write(byt)
3322 elif hasattr(out, 'buffer'):
3323 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3324 byt = s.encode(enc, 'ignore')
3325 out.buffer.write(byt)
3326 else:
8bf48f23 3327 out.write(s)
7459e3a2
PH
3328 out.flush()
3329
3330
48ea9cea
PH
3331def bytes_to_intlist(bs):
3332 if not bs:
3333 return []
3334 if isinstance(bs[0], int): # Python 3
3335 return list(bs)
3336 else:
3337 return [ord(c) for c in bs]
3338
c257baff 3339
cba892fa 3340def intlist_to_bytes(xs):
3341 if not xs:
3342 return b''
edaa23f8 3343 return compat_struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
3344
3345
c1c9a79c
PH
3346# Cross-platform file locking
3347if sys.platform == 'win32':
3348 import ctypes.wintypes
3349 import msvcrt
3350
3351 class OVERLAPPED(ctypes.Structure):
3352 _fields_ = [
3353 ('Internal', ctypes.wintypes.LPVOID),
3354 ('InternalHigh', ctypes.wintypes.LPVOID),
3355 ('Offset', ctypes.wintypes.DWORD),
3356 ('OffsetHigh', ctypes.wintypes.DWORD),
3357 ('hEvent', ctypes.wintypes.HANDLE),
3358 ]
3359
3360 kernel32 = ctypes.windll.kernel32
3361 LockFileEx = kernel32.LockFileEx
3362 LockFileEx.argtypes = [
3363 ctypes.wintypes.HANDLE, # hFile
3364 ctypes.wintypes.DWORD, # dwFlags
3365 ctypes.wintypes.DWORD, # dwReserved
3366 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3367 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3368 ctypes.POINTER(OVERLAPPED) # Overlapped
3369 ]
3370 LockFileEx.restype = ctypes.wintypes.BOOL
3371 UnlockFileEx = kernel32.UnlockFileEx
3372 UnlockFileEx.argtypes = [
3373 ctypes.wintypes.HANDLE, # hFile
3374 ctypes.wintypes.DWORD, # dwReserved
3375 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3376 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3377 ctypes.POINTER(OVERLAPPED) # Overlapped
3378 ]
3379 UnlockFileEx.restype = ctypes.wintypes.BOOL
3380 whole_low = 0xffffffff
3381 whole_high = 0x7fffffff
3382
3383 def _lock_file(f, exclusive):
3384 overlapped = OVERLAPPED()
3385 overlapped.Offset = 0
3386 overlapped.OffsetHigh = 0
3387 overlapped.hEvent = 0
3388 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3389 handle = msvcrt.get_osfhandle(f.fileno())
3390 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3391 whole_low, whole_high, f._lock_file_overlapped_p):
3392 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3393
3394 def _unlock_file(f):
3395 assert f._lock_file_overlapped_p
3396 handle = msvcrt.get_osfhandle(f.fileno())
3397 if not UnlockFileEx(handle, 0,
3398 whole_low, whole_high, f._lock_file_overlapped_p):
3399 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3400
3401else:
399a76e6
YCH
3402 # Some platforms, such as Jython, is missing fcntl
3403 try:
3404 import fcntl
c1c9a79c 3405
399a76e6
YCH
3406 def _lock_file(f, exclusive):
3407 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 3408
399a76e6
YCH
3409 def _unlock_file(f):
3410 fcntl.flock(f, fcntl.LOCK_UN)
3411 except ImportError:
3412 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3413
3414 def _lock_file(f, exclusive):
3415 raise IOError(UNSUPPORTED_MSG)
3416
3417 def _unlock_file(f):
3418 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
3419
3420
3421class locked_file(object):
3422 def __init__(self, filename, mode, encoding=None):
3423 assert mode in ['r', 'a', 'w']
3424 self.f = io.open(filename, mode, encoding=encoding)
3425 self.mode = mode
3426
3427 def __enter__(self):
3428 exclusive = self.mode != 'r'
3429 try:
3430 _lock_file(self.f, exclusive)
3431 except IOError:
3432 self.f.close()
3433 raise
3434 return self
3435
3436 def __exit__(self, etype, value, traceback):
3437 try:
3438 _unlock_file(self.f)
3439 finally:
3440 self.f.close()
3441
3442 def __iter__(self):
3443 return iter(self.f)
3444
3445 def write(self, *args):
3446 return self.f.write(*args)
3447
3448 def read(self, *args):
3449 return self.f.read(*args)
4eb7f1d1
JMF
3450
3451
4644ac55
S
3452def get_filesystem_encoding():
3453 encoding = sys.getfilesystemencoding()
3454 return encoding if encoding is not None else 'utf-8'
3455
3456
4eb7f1d1 3457def shell_quote(args):
a6a173c2 3458 quoted_args = []
4644ac55 3459 encoding = get_filesystem_encoding()
a6a173c2
JMF
3460 for a in args:
3461 if isinstance(a, bytes):
3462 # We may get a filename encoded with 'encodeFilename'
3463 a = a.decode(encoding)
aefce8e6 3464 quoted_args.append(compat_shlex_quote(a))
28e614de 3465 return ' '.join(quoted_args)
9d4660ca
PH
3466
3467
3468def smuggle_url(url, data):
3469 """ Pass additional data in a URL for internal use. """
3470
81953d1a
RA
3471 url, idata = unsmuggle_url(url, {})
3472 data.update(idata)
15707c7e 3473 sdata = compat_urllib_parse_urlencode(
28e614de
PH
3474 {'__youtubedl_smuggle': json.dumps(data)})
3475 return url + '#' + sdata
9d4660ca
PH
3476
3477
79f82953 3478def unsmuggle_url(smug_url, default=None):
83e865a3 3479 if '#__youtubedl_smuggle' not in smug_url:
79f82953 3480 return smug_url, default
28e614de
PH
3481 url, _, sdata = smug_url.rpartition('#')
3482 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
3483 data = json.loads(jsond)
3484 return url, data
02dbf93f
PH
3485
3486
02dbf93f
PH
3487def format_bytes(bytes):
3488 if bytes is None:
28e614de 3489 return 'N/A'
02dbf93f
PH
3490 if type(bytes) is str:
3491 bytes = float(bytes)
3492 if bytes == 0.0:
3493 exponent = 0
3494 else:
3495 exponent = int(math.log(bytes, 1024.0))
28e614de 3496 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 3497 converted = float(bytes) / float(1024 ** exponent)
28e614de 3498 return '%.2f%s' % (converted, suffix)
f53c966a 3499
1c088fa8 3500
fb47597b
S
3501def lookup_unit_table(unit_table, s):
3502 units_re = '|'.join(re.escape(u) for u in unit_table)
3503 m = re.match(
782b1b5b 3504 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
fb47597b
S
3505 if not m:
3506 return None
3507 num_str = m.group('num').replace(',', '.')
3508 mult = unit_table[m.group('unit')]
3509 return int(float(num_str) * mult)
3510
3511
be64b5b0
PH
3512def parse_filesize(s):
3513 if s is None:
3514 return None
3515
dfb1b146 3516 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
3517 # but we support those too
3518 _UNIT_TABLE = {
3519 'B': 1,
3520 'b': 1,
70852b47 3521 'bytes': 1,
be64b5b0
PH
3522 'KiB': 1024,
3523 'KB': 1000,
3524 'kB': 1024,
3525 'Kb': 1000,
13585d76 3526 'kb': 1000,
70852b47
YCH
3527 'kilobytes': 1000,
3528 'kibibytes': 1024,
be64b5b0
PH
3529 'MiB': 1024 ** 2,
3530 'MB': 1000 ** 2,
3531 'mB': 1024 ** 2,
3532 'Mb': 1000 ** 2,
13585d76 3533 'mb': 1000 ** 2,
70852b47
YCH
3534 'megabytes': 1000 ** 2,
3535 'mebibytes': 1024 ** 2,
be64b5b0
PH
3536 'GiB': 1024 ** 3,
3537 'GB': 1000 ** 3,
3538 'gB': 1024 ** 3,
3539 'Gb': 1000 ** 3,
13585d76 3540 'gb': 1000 ** 3,
70852b47
YCH
3541 'gigabytes': 1000 ** 3,
3542 'gibibytes': 1024 ** 3,
be64b5b0
PH
3543 'TiB': 1024 ** 4,
3544 'TB': 1000 ** 4,
3545 'tB': 1024 ** 4,
3546 'Tb': 1000 ** 4,
13585d76 3547 'tb': 1000 ** 4,
70852b47
YCH
3548 'terabytes': 1000 ** 4,
3549 'tebibytes': 1024 ** 4,
be64b5b0
PH
3550 'PiB': 1024 ** 5,
3551 'PB': 1000 ** 5,
3552 'pB': 1024 ** 5,
3553 'Pb': 1000 ** 5,
13585d76 3554 'pb': 1000 ** 5,
70852b47
YCH
3555 'petabytes': 1000 ** 5,
3556 'pebibytes': 1024 ** 5,
be64b5b0
PH
3557 'EiB': 1024 ** 6,
3558 'EB': 1000 ** 6,
3559 'eB': 1024 ** 6,
3560 'Eb': 1000 ** 6,
13585d76 3561 'eb': 1000 ** 6,
70852b47
YCH
3562 'exabytes': 1000 ** 6,
3563 'exbibytes': 1024 ** 6,
be64b5b0
PH
3564 'ZiB': 1024 ** 7,
3565 'ZB': 1000 ** 7,
3566 'zB': 1024 ** 7,
3567 'Zb': 1000 ** 7,
13585d76 3568 'zb': 1000 ** 7,
70852b47
YCH
3569 'zettabytes': 1000 ** 7,
3570 'zebibytes': 1024 ** 7,
be64b5b0
PH
3571 'YiB': 1024 ** 8,
3572 'YB': 1000 ** 8,
3573 'yB': 1024 ** 8,
3574 'Yb': 1000 ** 8,
13585d76 3575 'yb': 1000 ** 8,
70852b47
YCH
3576 'yottabytes': 1000 ** 8,
3577 'yobibytes': 1024 ** 8,
be64b5b0
PH
3578 }
3579
fb47597b
S
3580 return lookup_unit_table(_UNIT_TABLE, s)
3581
3582
3583def parse_count(s):
3584 if s is None:
be64b5b0
PH
3585 return None
3586
fb47597b
S
3587 s = s.strip()
3588
3589 if re.match(r'^[\d,.]+$', s):
3590 return str_to_int(s)
3591
3592 _UNIT_TABLE = {
3593 'k': 1000,
3594 'K': 1000,
3595 'm': 1000 ** 2,
3596 'M': 1000 ** 2,
3597 'kk': 1000 ** 2,
3598 'KK': 1000 ** 2,
3599 }
be64b5b0 3600
fb47597b 3601 return lookup_unit_table(_UNIT_TABLE, s)
be64b5b0 3602
2f7ae819 3603
b871d7e9
S
3604def parse_resolution(s):
3605 if s is None:
3606 return {}
3607
3608 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3609 if mobj:
3610 return {
3611 'width': int(mobj.group('w')),
3612 'height': int(mobj.group('h')),
3613 }
3614
3615 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3616 if mobj:
3617 return {'height': int(mobj.group(1))}
3618
3619 mobj = re.search(r'\b([48])[kK]\b', s)
3620 if mobj:
3621 return {'height': int(mobj.group(1)) * 540}
3622
3623 return {}
3624
3625
0dc41787
S
3626def parse_bitrate(s):
3627 if not isinstance(s, compat_str):
3628 return
3629 mobj = re.search(r'\b(\d+)\s*kbps', s)
3630 if mobj:
3631 return int(mobj.group(1))
3632
3633
a942d6cb 3634def month_by_name(name, lang='en'):
caefb1de
PH
3635 """ Return the number of a month by (locale-independently) English name """
3636
f6717dec 3637 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb 3638
caefb1de 3639 try:
f6717dec 3640 return month_names.index(name) + 1
7105440c
YCH
3641 except ValueError:
3642 return None
3643
3644
3645def month_by_abbreviation(abbrev):
3646 """ Return the number of a month by (locale-independently) English
3647 abbreviations """
3648
3649 try:
3650 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
3651 except ValueError:
3652 return None
18258362
JMF
3653
3654
5aafe895 3655def fix_xml_ampersands(xml_str):
18258362 3656 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
3657 return re.sub(
3658 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 3659 '&amp;',
5aafe895 3660 xml_str)
e3946f98
PH
3661
3662
3663def setproctitle(title):
8bf48f23 3664 assert isinstance(title, compat_str)
c1c05c67
YCH
3665
3666 # ctypes in Jython is not complete
3667 # http://bugs.jython.org/issue2148
3668 if sys.platform.startswith('java'):
3669 return
3670
e3946f98 3671 try:
611c1dd9 3672 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
3673 except OSError:
3674 return
2f49bcd6
RC
3675 except TypeError:
3676 # LoadLibrary in Windows Python 2.7.13 only expects
3677 # a bytestring, but since unicode_literals turns
3678 # every string into a unicode string, it fails.
3679 return
6eefe533
PH
3680 title_bytes = title.encode('utf-8')
3681 buf = ctypes.create_string_buffer(len(title_bytes))
3682 buf.value = title_bytes
e3946f98 3683 try:
6eefe533 3684 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
3685 except AttributeError:
3686 return # Strange libc, just skip this
d7dda168
PH
3687
3688
3689def remove_start(s, start):
46bc9b7d 3690 return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174
PH
3691
3692
2b9faf55 3693def remove_end(s, end):
46bc9b7d 3694 return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55
PH
3695
3696
31b2051e
S
3697def remove_quotes(s):
3698 if s is None or len(s) < 2:
3699 return s
3700 for quote in ('"', "'", ):
3701 if s[0] == quote and s[-1] == quote:
3702 return s[1:-1]
3703 return s
3704
3705
b6e0c7d2
U
3706def get_domain(url):
3707 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3708 return domain.group('domain') if domain else None
3709
3710
29eb5174 3711def url_basename(url):
9b8aaeed 3712 path = compat_urlparse.urlparse(url).path
28e614de 3713 return path.strip('/').split('/')[-1]
aa94a6d3
PH
3714
3715
02dc0a36
S
3716def base_url(url):
3717 return re.match(r'https?://[^?#&]+/', url).group()
3718
3719
e34c3361 3720def urljoin(base, path):
4b5de77b
S
3721 if isinstance(path, bytes):
3722 path = path.decode('utf-8')
e34c3361
S
3723 if not isinstance(path, compat_str) or not path:
3724 return None
fad4ceb5 3725 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
e34c3361 3726 return path
4b5de77b
S
3727 if isinstance(base, bytes):
3728 base = base.decode('utf-8')
3729 if not isinstance(base, compat_str) or not re.match(
3730 r'^(?:https?:)?//', base):
e34c3361
S
3731 return None
3732 return compat_urlparse.urljoin(base, path)
3733
3734
aa94a6d3
PH
3735class HEADRequest(compat_urllib_request.Request):
3736 def get_method(self):
611c1dd9 3737 return 'HEAD'
7217e148
PH
3738
3739
95cf60e8
S
3740class PUTRequest(compat_urllib_request.Request):
3741 def get_method(self):
3742 return 'PUT'
3743
3744
9732d77e 3745def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
3746 if get_attr:
3747 if v is not None:
3748 v = getattr(v, get_attr, None)
9572013d
PH
3749 if v == '':
3750 v = None
1812afb7
S
3751 if v is None:
3752 return default
3753 try:
3754 return int(v) * invscale // scale
5e1271c5 3755 except (ValueError, TypeError):
af98f8ff 3756 return default
9732d77e 3757
9572013d 3758
40a90862
JMF
3759def str_or_none(v, default=None):
3760 return default if v is None else compat_str(v)
3761
9732d77e
PH
3762
3763def str_to_int(int_str):
48d4681e 3764 """ A more relaxed version of int_or_none """
42db58ec 3765 if isinstance(int_str, compat_integer_types):
348c6bf1 3766 return int_str
42db58ec
S
3767 elif isinstance(int_str, compat_str):
3768 int_str = re.sub(r'[,\.\+]', '', int_str)
3769 return int_or_none(int_str)
608d11f5
PH
3770
3771
9732d77e 3772def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
3773 if v is None:
3774 return default
3775 try:
3776 return float(v) * invscale / scale
5e1271c5 3777 except (ValueError, TypeError):
caf80631 3778 return default
43f775e4
PH
3779
3780
c7e327c4
S
3781def bool_or_none(v, default=None):
3782 return v if isinstance(v, bool) else default
3783
3784
53cd37ba
S
3785def strip_or_none(v, default=None):
3786 return v.strip() if isinstance(v, compat_str) else default
b72b4431
S
3787
3788
af03000a
S
3789def url_or_none(url):
3790 if not url or not isinstance(url, compat_str):
3791 return None
3792 url = url.strip()
29f7c58a 3793 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
af03000a
S
3794
3795
e29663c6 3796def strftime_or_none(timestamp, date_format, default=None):
3797 datetime_object = None
3798 try:
3799 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3800 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3801 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3802 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3803 return datetime_object.strftime(date_format)
3804 except (ValueError, TypeError, AttributeError):
3805 return default
3806
3807
608d11f5 3808def parse_duration(s):
8f9312c3 3809 if not isinstance(s, compat_basestring):
608d11f5
PH
3810 return None
3811
ca7b3246
S
3812 s = s.strip()
3813
acaff495 3814 days, hours, mins, secs, ms = [None] * 5
15846398 3815 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
acaff495 3816 if m:
3817 days, hours, mins, secs, ms = m.groups()
3818 else:
3819 m = re.match(
056653bb
S
3820 r'''(?ix)(?:P?
3821 (?:
3822 [0-9]+\s*y(?:ears?)?\s*
3823 )?
3824 (?:
3825 [0-9]+\s*m(?:onths?)?\s*
3826 )?
3827 (?:
3828 [0-9]+\s*w(?:eeks?)?\s*
3829 )?
8f4b58d7 3830 (?:
acaff495 3831 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
8f4b58d7 3832 )?
056653bb 3833 T)?
acaff495 3834 (?:
3835 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3836 )?
3837 (?:
3838 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3839 )?
3840 (?:
3841 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
15846398 3842 )?Z?$''', s)
acaff495 3843 if m:
3844 days, hours, mins, secs, ms = m.groups()
3845 else:
15846398 3846 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
acaff495 3847 if m:
3848 hours, mins = m.groups()
3849 else:
3850 return None
3851
3852 duration = 0
3853 if secs:
3854 duration += float(secs)
3855 if mins:
3856 duration += float(mins) * 60
3857 if hours:
3858 duration += float(hours) * 60 * 60
3859 if days:
3860 duration += float(days) * 24 * 60 * 60
3861 if ms:
3862 duration += float(ms)
3863 return duration
91d7d0b3
JMF
3864
3865
e65e4c88 3866def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 3867 name, real_ext = os.path.splitext(filename)
e65e4c88
S
3868 return (
3869 '{0}.{1}{2}'.format(name, ext, real_ext)
3870 if not expected_real_ext or real_ext[1:] == expected_real_ext
3871 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
3872
3873
b3ed15b7
S
3874def replace_extension(filename, ext, expected_real_ext=None):
3875 name, real_ext = os.path.splitext(filename)
3876 return '{0}.{1}'.format(
3877 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3878 ext)
3879
3880
d70ad093
PH
3881def check_executable(exe, args=[]):
3882 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3883 args can be a list of arguments for a short output (like -version) """
3884 try:
f5b1bca9 3885 process_communicate_or_kill(subprocess.Popen(
3886 [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
d70ad093
PH
3887 except OSError:
3888 return False
3889 return exe
b7ab0590
PH
3890
3891
95807118 3892def get_exe_version(exe, args=['--version'],
cae97f65 3893 version_re=None, unrecognized='present'):
95807118
PH
3894 """ Returns the version of the specified executable,
3895 or False if the executable is not present """
3896 try:
b64d04c1 3897 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
7a5c1cfe 3898 # SIGTTOU if yt-dlp is run in the background.
067aa17e 3899 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
f5b1bca9 3900 out, _ = process_communicate_or_kill(subprocess.Popen(
54116803 3901 [encodeArgument(exe)] + args,
00ca7552 3902 stdin=subprocess.PIPE,
f5b1bca9 3903 stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
95807118
PH
3904 except OSError:
3905 return False
cae97f65
PH
3906 if isinstance(out, bytes): # Python 2.x
3907 out = out.decode('ascii', 'ignore')
3908 return detect_exe_version(out, version_re, unrecognized)
3909
3910
3911def detect_exe_version(output, version_re=None, unrecognized='present'):
3912 assert isinstance(output, compat_str)
3913 if version_re is None:
3914 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3915 m = re.search(version_re, output)
95807118
PH
3916 if m:
3917 return m.group(1)
3918 else:
3919 return unrecognized
3920
3921
b7ab0590 3922class PagedList(object):
dd26ced1
PH
3923 def __len__(self):
3924 # This is only useful for tests
3925 return len(self.getslice())
3926
9c44d242
PH
3927
3928class OnDemandPagedList(PagedList):
6be08ce6 3929 def __init__(self, pagefunc, pagesize, use_cache=True):
9c44d242
PH
3930 self._pagefunc = pagefunc
3931 self._pagesize = pagesize
b95dc034
YCH
3932 self._use_cache = use_cache
3933 if use_cache:
3934 self._cache = {}
9c44d242 3935
b7ab0590
PH
3936 def getslice(self, start=0, end=None):
3937 res = []
3938 for pagenum in itertools.count(start // self._pagesize):
3939 firstid = pagenum * self._pagesize
3940 nextfirstid = pagenum * self._pagesize + self._pagesize
3941 if start >= nextfirstid:
3942 continue
3943
b95dc034
YCH
3944 page_results = None
3945 if self._use_cache:
3946 page_results = self._cache.get(pagenum)
3947 if page_results is None:
3948 page_results = list(self._pagefunc(pagenum))
3949 if self._use_cache:
3950 self._cache[pagenum] = page_results
b7ab0590
PH
3951
3952 startv = (
3953 start % self._pagesize
3954 if firstid <= start < nextfirstid
3955 else 0)
3956
3957 endv = (
3958 ((end - 1) % self._pagesize) + 1
3959 if (end is not None and firstid <= end <= nextfirstid)
3960 else None)
3961
3962 if startv != 0 or endv is not None:
3963 page_results = page_results[startv:endv]
3964 res.extend(page_results)
3965
3966 # A little optimization - if current page is not "full", ie. does
3967 # not contain page_size videos then we can assume that this page
3968 # is the last one - there are no more ids on further pages -
3969 # i.e. no need to query again.
3970 if len(page_results) + startv < self._pagesize:
3971 break
3972
3973 # If we got the whole page, but the next page is not interesting,
3974 # break out early as well
3975 if end == nextfirstid:
3976 break
3977 return res
81c2f20b
PH
3978
3979
9c44d242
PH
3980class InAdvancePagedList(PagedList):
3981 def __init__(self, pagefunc, pagecount, pagesize):
3982 self._pagefunc = pagefunc
3983 self._pagecount = pagecount
3984 self._pagesize = pagesize
3985
3986 def getslice(self, start=0, end=None):
3987 res = []
3988 start_page = start // self._pagesize
3989 end_page = (
3990 self._pagecount if end is None else (end // self._pagesize + 1))
3991 skip_elems = start - start_page * self._pagesize
3992 only_more = None if end is None else end - start
3993 for pagenum in range(start_page, end_page):
3994 page = list(self._pagefunc(pagenum))
3995 if skip_elems:
3996 page = page[skip_elems:]
3997 skip_elems = None
3998 if only_more is not None:
3999 if len(page) < only_more:
4000 only_more -= len(page)
4001 else:
4002 page = page[:only_more]
4003 res.extend(page)
4004 break
4005 res.extend(page)
4006 return res
4007
4008
81c2f20b 4009def uppercase_escape(s):
676eb3f2 4010 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 4011 return re.sub(
a612753d 4012 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
4013 lambda m: unicode_escape(m.group(0))[0],
4014 s)
0fe2ff78
YCH
4015
4016
4017def lowercase_escape(s):
4018 unicode_escape = codecs.getdecoder('unicode_escape')
4019 return re.sub(
4020 r'\\u[0-9a-fA-F]{4}',
4021 lambda m: unicode_escape(m.group(0))[0],
4022 s)
b53466e1 4023
d05cfe06
S
4024
4025def escape_rfc3986(s):
4026 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 4027 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 4028 s = s.encode('utf-8')
ecc0c5ee 4029 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
4030
4031
4032def escape_url(url):
4033 """Escape URL as suggested by RFC 3986"""
4034 url_parsed = compat_urllib_parse_urlparse(url)
4035 return url_parsed._replace(
efbed08d 4036 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
d05cfe06
S
4037 path=escape_rfc3986(url_parsed.path),
4038 params=escape_rfc3986(url_parsed.params),
4039 query=escape_rfc3986(url_parsed.query),
4040 fragment=escape_rfc3986(url_parsed.fragment)
4041 ).geturl()
4042
62e609ab
PH
4043
4044def read_batch_urls(batch_fd):
4045 def fixup(url):
4046 if not isinstance(url, compat_str):
4047 url = url.decode('utf-8', 'replace')
8c04f0be 4048 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4049 for bom in BOM_UTF8:
4050 if url.startswith(bom):
4051 url = url[len(bom):]
4052 url = url.lstrip()
4053 if not url or url.startswith(('#', ';', ']')):
62e609ab 4054 return False
8c04f0be 4055 # "#" cannot be stripped out since it is part of the URI
4056 # However, it can be safely stipped out if follwing a whitespace
4057 return re.split(r'\s#', url, 1)[0].rstrip()
62e609ab
PH
4058
4059 with contextlib.closing(batch_fd) as fd:
4060 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
4061
4062
4063def urlencode_postdata(*args, **kargs):
15707c7e 4064 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
4065
4066
38f9ef31 4067def update_url_query(url, query):
cacd9966
YCH
4068 if not query:
4069 return url
38f9ef31 4070 parsed_url = compat_urlparse.urlparse(url)
4071 qs = compat_parse_qs(parsed_url.query)
4072 qs.update(query)
4073 return compat_urlparse.urlunparse(parsed_url._replace(
15707c7e 4074 query=compat_urllib_parse_urlencode(qs, True)))
16392824 4075
8e60dc75 4076
ed0291d1
S
4077def update_Request(req, url=None, data=None, headers={}, query={}):
4078 req_headers = req.headers.copy()
4079 req_headers.update(headers)
4080 req_data = data or req.data
4081 req_url = update_url_query(url or req.get_full_url(), query)
95cf60e8
S
4082 req_get_method = req.get_method()
4083 if req_get_method == 'HEAD':
4084 req_type = HEADRequest
4085 elif req_get_method == 'PUT':
4086 req_type = PUTRequest
4087 else:
4088 req_type = compat_urllib_request.Request
ed0291d1
S
4089 new_req = req_type(
4090 req_url, data=req_data, headers=req_headers,
4091 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4092 if hasattr(req, 'timeout'):
4093 new_req.timeout = req.timeout
4094 return new_req
4095
4096
10c87c15 4097def _multipart_encode_impl(data, boundary):
0c265486
YCH
4098 content_type = 'multipart/form-data; boundary=%s' % boundary
4099
4100 out = b''
4101 for k, v in data.items():
4102 out += b'--' + boundary.encode('ascii') + b'\r\n'
4103 if isinstance(k, compat_str):
4104 k = k.encode('utf-8')
4105 if isinstance(v, compat_str):
4106 v = v.encode('utf-8')
4107 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4108 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
b2ad479d 4109 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
0c265486
YCH
4110 if boundary.encode('ascii') in content:
4111 raise ValueError('Boundary overlaps with data')
4112 out += content
4113
4114 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4115
4116 return out, content_type
4117
4118
4119def multipart_encode(data, boundary=None):
4120 '''
4121 Encode a dict to RFC 7578-compliant form-data
4122
4123 data:
4124 A dict where keys and values can be either Unicode or bytes-like
4125 objects.
4126 boundary:
4127 If specified a Unicode object, it's used as the boundary. Otherwise
4128 a random boundary is generated.
4129
4130 Reference: https://tools.ietf.org/html/rfc7578
4131 '''
4132 has_specified_boundary = boundary is not None
4133
4134 while True:
4135 if boundary is None:
4136 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4137
4138 try:
10c87c15 4139 out, content_type = _multipart_encode_impl(data, boundary)
0c265486
YCH
4140 break
4141 except ValueError:
4142 if has_specified_boundary:
4143 raise
4144 boundary = None
4145
4146 return out, content_type
4147
4148
86296ad2 4149def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
4150 if isinstance(key_or_keys, (list, tuple)):
4151 for key in key_or_keys:
86296ad2
S
4152 if key not in d or d[key] is None or skip_false_values and not d[key]:
4153 continue
4154 return d[key]
cbecc9b9
S
4155 return default
4156 return d.get(key_or_keys, default)
4157
4158
329ca3be 4159def try_get(src, getter, expected_type=None):
a32a9a7e
S
4160 if not isinstance(getter, (list, tuple)):
4161 getter = [getter]
4162 for get in getter:
4163 try:
4164 v = get(src)
4165 except (AttributeError, KeyError, TypeError, IndexError):
4166 pass
4167 else:
4168 if expected_type is None or isinstance(v, expected_type):
4169 return v
329ca3be
S
4170
4171
6cc62232
S
4172def merge_dicts(*dicts):
4173 merged = {}
4174 for a_dict in dicts:
4175 for k, v in a_dict.items():
4176 if v is None:
4177 continue
3089bc74
S
4178 if (k not in merged
4179 or (isinstance(v, compat_str) and v
4180 and isinstance(merged[k], compat_str)
4181 and not merged[k])):
6cc62232
S
4182 merged[k] = v
4183 return merged
4184
4185
8e60dc75
S
4186def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4187 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4188
16392824 4189
a1a530b0
PH
4190US_RATINGS = {
4191 'G': 0,
4192 'PG': 10,
4193 'PG-13': 13,
4194 'R': 16,
4195 'NC': 18,
4196}
fac55558
PH
4197
4198
a8795327 4199TV_PARENTAL_GUIDELINES = {
5a16c9d9
RA
4200 'TV-Y': 0,
4201 'TV-Y7': 7,
4202 'TV-G': 0,
4203 'TV-PG': 0,
4204 'TV-14': 14,
4205 'TV-MA': 17,
a8795327
S
4206}
4207
4208
146c80e2 4209def parse_age_limit(s):
a8795327
S
4210 if type(s) == int:
4211 return s if 0 <= s <= 21 else None
4212 if not isinstance(s, compat_basestring):
d838b1bd 4213 return None
146c80e2 4214 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327
S
4215 if m:
4216 return int(m.group('age'))
5c5fae6d 4217 s = s.upper()
a8795327
S
4218 if s in US_RATINGS:
4219 return US_RATINGS[s]
5a16c9d9 4220 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
b8361187 4221 if m:
5a16c9d9 4222 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
b8361187 4223 return None
146c80e2
S
4224
4225
fac55558 4226def strip_jsonp(code):
609a61e3 4227 return re.sub(
5552c9eb 4228 r'''(?sx)^
e9c671d5 4229 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
5552c9eb
YCH
4230 (?:\s*&&\s*(?P=func_name))?
4231 \s*\(\s*(?P<callback_data>.*)\);?
4232 \s*?(?://[^\n]*)*$''',
4233 r'\g<callback_data>', code)
478c2c61
PH
4234
4235
5c610515 4236def js_to_json(code, vars={}):
4237 # vars is a dict of var, val pairs to substitute
4195096e
S
4238 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4239 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4240 INTEGER_TABLE = (
4241 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4242 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4243 )
4244
e05f6939 4245 def fix_kv(m):
e7b6d122
PH
4246 v = m.group(0)
4247 if v in ('true', 'false', 'null'):
4248 return v
8bdd16b4 4249 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
bd1e4844 4250 return ""
4251
4252 if v[0] in ("'", '"'):
4253 v = re.sub(r'(?s)\\.|"', lambda m: {
e7b6d122 4254 '"': '\\"',
bd1e4844 4255 "\\'": "'",
4256 '\\\n': '',
4257 '\\x': '\\u00',
4258 }.get(m.group(0), m.group(0)), v[1:-1])
8bdd16b4 4259 else:
4260 for regex, base in INTEGER_TABLE:
4261 im = re.match(regex, v)
4262 if im:
4263 i = int(im.group(1), base)
4264 return '"%d":' % i if v.endswith(':') else '%d' % i
89ac4a19 4265
5c610515 4266 if v in vars:
4267 return vars[v]
4268
e7b6d122 4269 return '"%s"' % v
e05f6939 4270
bd1e4844 4271 return re.sub(r'''(?sx)
4272 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4273 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4195096e 4274 {comment}|,(?={skip}[\]}}])|
c384d537 4275 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4195096e 4276 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
8bdd16b4 4277 [0-9]+(?={skip}:)|
4278 !+
4195096e 4279 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
e05f6939
PH
4280
4281
478c2c61
PH
4282def qualities(quality_ids):
4283 """ Get a numeric quality value out of a list of possible values """
4284 def q(qid):
4285 try:
4286 return quality_ids.index(qid)
4287 except ValueError:
4288 return -1
4289 return q
4290
acd69589 4291
de6000d9 4292DEFAULT_OUTTMPL = {
4293 'default': '%(title)s [%(id)s].%(ext)s',
72755351 4294 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
de6000d9 4295}
4296OUTTMPL_TYPES = {
72755351 4297 'chapter': None,
de6000d9 4298 'subtitle': None,
4299 'thumbnail': None,
4300 'description': 'description',
4301 'annotation': 'annotations.xml',
4302 'infojson': 'info.json',
4303 'pl_description': 'description',
4304 'pl_infojson': 'info.json',
4305}
0a871f68 4306
143db31d 4307# As of [1] format syntax is:
4308# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4309# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
4310FORMAT_RE = r'''(?x)
4311 (?<!%)
4312 %
4313 \({0}\) # mapping key
4314 (?:[#0\-+ ]+)? # conversion flags (optional)
4315 (?:\d+)? # minimum field width (optional)
4316 (?:\.\d+)? # precision (optional)
4317 [hlL]? # length modifier (optional)
4318 (?P<type>[diouxXeEfFgGcrs%]) # conversion type
4319'''
4320
a020a0dc
PH
4321
4322def limit_length(s, length):
4323 """ Add ellipses to overly long strings """
4324 if s is None:
4325 return None
4326 ELLIPSES = '...'
4327 if len(s) > length:
4328 return s[:length - len(ELLIPSES)] + ELLIPSES
4329 return s
48844745
PH
4330
4331
4332def version_tuple(v):
5f9b8394 4333 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
4334
4335
4336def is_outdated_version(version, limit, assume_new=True):
4337 if not version:
4338 return not assume_new
4339 try:
4340 return version_tuple(version) < version_tuple(limit)
4341 except ValueError:
4342 return not assume_new
732ea2f0
PH
4343
4344
4345def ytdl_is_updateable():
7a5c1cfe 4346 """ Returns if yt-dlp can be updated with -U """
735d865e 4347 return False
4348
732ea2f0
PH
4349 from zipimport import zipimporter
4350
4351 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
4352
4353
4354def args_to_str(args):
4355 # Get a short string representation for a subprocess command
702ccf2d 4356 return ' '.join(compat_shlex_quote(a) for a in args)
2ccd1b10
PH
4357
4358
9b9c5355 4359def error_to_compat_str(err):
fdae2358
S
4360 err_str = str(err)
4361 # On python 2 error byte string must be decoded with proper
4362 # encoding rather than ascii
4363 if sys.version_info[0] < 3:
4364 err_str = err_str.decode(preferredencoding())
4365 return err_str
4366
4367
c460bdd5 4368def mimetype2ext(mt):
eb9ee194
S
4369 if mt is None:
4370 return None
4371
765ac263
JMF
4372 ext = {
4373 'audio/mp4': 'm4a',
6c33d24b
YCH
4374 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4375 # it's the most popular one
4376 'audio/mpeg': 'mp3',
ba39289d 4377 'audio/x-wav': 'wav',
765ac263
JMF
4378 }.get(mt)
4379 if ext is not None:
4380 return ext
4381
c460bdd5 4382 _, _, res = mt.rpartition('/')
6562d34a 4383 res = res.split(';')[0].strip().lower()
c460bdd5
PH
4384
4385 return {
f6861ec9 4386 '3gpp': '3gp',
cafcf657 4387 'smptett+xml': 'tt',
cafcf657 4388 'ttaf+xml': 'dfxp',
a0d8d704 4389 'ttml+xml': 'ttml',
f6861ec9 4390 'x-flv': 'flv',
a0d8d704 4391 'x-mp4-fragmented': 'mp4',
d4f05d47 4392 'x-ms-sami': 'sami',
a0d8d704 4393 'x-ms-wmv': 'wmv',
b4173f15
RA
4394 'mpegurl': 'm3u8',
4395 'x-mpegurl': 'm3u8',
4396 'vnd.apple.mpegurl': 'm3u8',
4397 'dash+xml': 'mpd',
b4173f15 4398 'f4m+xml': 'f4m',
f164b971 4399 'hds+xml': 'f4m',
e910fe2f 4400 'vnd.ms-sstr+xml': 'ism',
c2b2c7e1 4401 'quicktime': 'mov',
98ce1a3f 4402 'mp2t': 'ts',
39e7107d 4403 'x-wav': 'wav',
c460bdd5
PH
4404 }.get(res, res)
4405
4406
4f3c5e06 4407def parse_codecs(codecs_str):
4408 # http://tools.ietf.org/html/rfc6381
4409 if not codecs_str:
4410 return {}
a0566bbf 4411 split_codecs = list(filter(None, map(
4f3c5e06 4412 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4413 vcodec, acodec = None, None
a0566bbf 4414 for full_codec in split_codecs:
4f3c5e06 4415 codec = full_codec.split('.')[0]
28cc2241 4416 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4f3c5e06 4417 if not vcodec:
4418 vcodec = full_codec
60f5c9fb 4419 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4f3c5e06 4420 if not acodec:
4421 acodec = full_codec
4422 else:
60f5c9fb 4423 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4f3c5e06 4424 if not vcodec and not acodec:
a0566bbf 4425 if len(split_codecs) == 2:
4f3c5e06 4426 return {
a0566bbf 4427 'vcodec': split_codecs[0],
4428 'acodec': split_codecs[1],
4f3c5e06 4429 }
4430 else:
4431 return {
4432 'vcodec': vcodec or 'none',
4433 'acodec': acodec or 'none',
4434 }
4435 return {}
4436
4437
2ccd1b10 4438def urlhandle_detect_ext(url_handle):
79298173 4439 getheader = url_handle.headers.get
2ccd1b10 4440
b55ee18f
PH
4441 cd = getheader('Content-Disposition')
4442 if cd:
4443 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4444 if m:
4445 e = determine_ext(m.group('filename'), default_ext=None)
4446 if e:
4447 return e
4448
c460bdd5 4449 return mimetype2ext(getheader('Content-Type'))
05900629
PH
4450
4451
1e399778
YCH
4452def encode_data_uri(data, mime_type):
4453 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4454
4455
05900629 4456def age_restricted(content_limit, age_limit):
6ec6cb4e 4457 """ Returns True iff the content should be blocked """
05900629
PH
4458
4459 if age_limit is None: # No limit set
4460 return False
4461 if content_limit is None:
4462 return False # Content available for everyone
4463 return age_limit < content_limit
61ca9a80
PH
4464
4465
4466def is_html(first_bytes):
4467 """ Detect whether a file contains HTML by examining its first bytes. """
4468
4469 BOMS = [
4470 (b'\xef\xbb\xbf', 'utf-8'),
4471 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4472 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4473 (b'\xff\xfe', 'utf-16-le'),
4474 (b'\xfe\xff', 'utf-16-be'),
4475 ]
4476 for bom, enc in BOMS:
4477 if first_bytes.startswith(bom):
4478 s = first_bytes[len(bom):].decode(enc, 'replace')
4479 break
4480 else:
4481 s = first_bytes.decode('utf-8', 'replace')
4482
4483 return re.match(r'^\s*<', s)
a055469f
PH
4484
4485
4486def determine_protocol(info_dict):
4487 protocol = info_dict.get('protocol')
4488 if protocol is not None:
4489 return protocol
4490
4491 url = info_dict['url']
4492 if url.startswith('rtmp'):
4493 return 'rtmp'
4494 elif url.startswith('mms'):
4495 return 'mms'
4496 elif url.startswith('rtsp'):
4497 return 'rtsp'
4498
4499 ext = determine_ext(url)
4500 if ext == 'm3u8':
4501 return 'm3u8'
4502 elif ext == 'f4m':
4503 return 'f4m'
4504
4505 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
4506
4507
76d321f6 4508def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
cfb56d1a 4509 """ Render a list of rows, each as a list of values """
76d321f6 4510
4511 def get_max_lens(table):
4512 return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4513
4514 def filter_using_list(row, filterArray):
4515 return [col for (take, col) in zip(filterArray, row) if take]
4516
4517 if hideEmpty:
4518 max_lens = get_max_lens(data)
4519 header_row = filter_using_list(header_row, max_lens)
4520 data = [filter_using_list(row, max_lens) for row in data]
4521
cfb56d1a 4522 table = [header_row] + data
76d321f6 4523 max_lens = get_max_lens(table)
4524 if delim:
4525 table = [header_row] + [['-' * ml for ml in max_lens]] + data
4526 format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
cfb56d1a 4527 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
4528
4529
4530def _match_one(filter_part, dct):
4531 COMPARISON_OPERATORS = {
4532 '<': operator.lt,
4533 '<=': operator.le,
4534 '>': operator.gt,
4535 '>=': operator.ge,
4536 '=': operator.eq,
4537 '!=': operator.ne,
4538 }
4539 operator_rex = re.compile(r'''(?x)\s*
4540 (?P<key>[a-z_]+)
4541 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4542 (?:
4543 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
db13c16e 4544 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
347de493
PH
4545 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4546 )
4547 \s*$
4548 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4549 m = operator_rex.search(filter_part)
4550 if m:
4551 op = COMPARISON_OPERATORS[m.group('op')]
e5a088dc 4552 actual_value = dct.get(m.group('key'))
3089bc74
S
4553 if (m.group('quotedstrval') is not None
4554 or m.group('strval') is not None
e5a088dc
S
4555 # If the original field is a string and matching comparisonvalue is
4556 # a number we should respect the origin of the original field
4557 # and process comparison value as a string (see
067aa17e 4558 # https://github.com/ytdl-org/youtube-dl/issues/11082).
3089bc74
S
4559 or actual_value is not None and m.group('intval') is not None
4560 and isinstance(actual_value, compat_str)):
347de493
PH
4561 if m.group('op') not in ('=', '!='):
4562 raise ValueError(
4563 'Operator %s does not support string values!' % m.group('op'))
db13c16e
S
4564 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4565 quote = m.group('quote')
4566 if quote is not None:
4567 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
347de493
PH
4568 else:
4569 try:
4570 comparison_value = int(m.group('intval'))
4571 except ValueError:
4572 comparison_value = parse_filesize(m.group('intval'))
4573 if comparison_value is None:
4574 comparison_value = parse_filesize(m.group('intval') + 'B')
4575 if comparison_value is None:
4576 raise ValueError(
4577 'Invalid integer value %r in filter part %r' % (
4578 m.group('intval'), filter_part))
347de493
PH
4579 if actual_value is None:
4580 return m.group('none_inclusive')
4581 return op(actual_value, comparison_value)
4582
4583 UNARY_OPERATORS = {
1cc47c66
S
4584 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4585 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
347de493
PH
4586 }
4587 operator_rex = re.compile(r'''(?x)\s*
4588 (?P<op>%s)\s*(?P<key>[a-z_]+)
4589 \s*$
4590 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4591 m = operator_rex.search(filter_part)
4592 if m:
4593 op = UNARY_OPERATORS[m.group('op')]
4594 actual_value = dct.get(m.group('key'))
4595 return op(actual_value)
4596
4597 raise ValueError('Invalid filter part %r' % filter_part)
4598
4599
4600def match_str(filter_str, dct):
4601 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4602
4603 return all(
4604 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4605
4606
4607def match_filter_func(filter_str):
4608 def _match_func(info_dict):
4609 if match_str(filter_str, info_dict):
4610 return None
4611 else:
4612 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4613 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4614 return _match_func
91410c9b
PH
4615
4616
bf6427d2
YCH
4617def parse_dfxp_time_expr(time_expr):
4618 if not time_expr:
d631d5f9 4619 return
bf6427d2
YCH
4620
4621 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4622 if mobj:
4623 return float(mobj.group('time_offset'))
4624
db2fe38b 4625 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 4626 if mobj:
db2fe38b 4627 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
4628
4629
c1c924ab
YCH
4630def srt_subtitles_timecode(seconds):
4631 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
4632
4633
4634def dfxp2srt(dfxp_data):
3869028f
YCH
4635 '''
4636 @param dfxp_data A bytes-like object containing DFXP data
4637 @returns A unicode object containing converted SRT data
4638 '''
5b995f71 4639 LEGACY_NAMESPACES = (
3869028f
YCH
4640 (b'http://www.w3.org/ns/ttml', [
4641 b'http://www.w3.org/2004/11/ttaf1',
4642 b'http://www.w3.org/2006/04/ttaf1',
4643 b'http://www.w3.org/2006/10/ttaf1',
5b995f71 4644 ]),
3869028f
YCH
4645 (b'http://www.w3.org/ns/ttml#styling', [
4646 b'http://www.w3.org/ns/ttml#style',
5b995f71
RA
4647 ]),
4648 )
4649
4650 SUPPORTED_STYLING = [
4651 'color',
4652 'fontFamily',
4653 'fontSize',
4654 'fontStyle',
4655 'fontWeight',
4656 'textDecoration'
4657 ]
4658
4e335771 4659 _x = functools.partial(xpath_with_ns, ns_map={
261f4730 4660 'xml': 'http://www.w3.org/XML/1998/namespace',
4e335771 4661 'ttml': 'http://www.w3.org/ns/ttml',
5b995f71 4662 'tts': 'http://www.w3.org/ns/ttml#styling',
4e335771 4663 })
bf6427d2 4664
5b995f71
RA
4665 styles = {}
4666 default_style = {}
4667
87de7069 4668 class TTMLPElementParser(object):
5b995f71
RA
4669 _out = ''
4670 _unclosed_elements = []
4671 _applied_styles = []
bf6427d2 4672
2b14cb56 4673 def start(self, tag, attrib):
5b995f71
RA
4674 if tag in (_x('ttml:br'), 'br'):
4675 self._out += '\n'
4676 else:
4677 unclosed_elements = []
4678 style = {}
4679 element_style_id = attrib.get('style')
4680 if default_style:
4681 style.update(default_style)
4682 if element_style_id:
4683 style.update(styles.get(element_style_id, {}))
4684 for prop in SUPPORTED_STYLING:
4685 prop_val = attrib.get(_x('tts:' + prop))
4686 if prop_val:
4687 style[prop] = prop_val
4688 if style:
4689 font = ''
4690 for k, v in sorted(style.items()):
4691 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4692 continue
4693 if k == 'color':
4694 font += ' color="%s"' % v
4695 elif k == 'fontSize':
4696 font += ' size="%s"' % v
4697 elif k == 'fontFamily':
4698 font += ' face="%s"' % v
4699 elif k == 'fontWeight' and v == 'bold':
4700 self._out += '<b>'
4701 unclosed_elements.append('b')
4702 elif k == 'fontStyle' and v == 'italic':
4703 self._out += '<i>'
4704 unclosed_elements.append('i')
4705 elif k == 'textDecoration' and v == 'underline':
4706 self._out += '<u>'
4707 unclosed_elements.append('u')
4708 if font:
4709 self._out += '<font' + font + '>'
4710 unclosed_elements.append('font')
4711 applied_style = {}
4712 if self._applied_styles:
4713 applied_style.update(self._applied_styles[-1])
4714 applied_style.update(style)
4715 self._applied_styles.append(applied_style)
4716 self._unclosed_elements.append(unclosed_elements)
bf6427d2 4717
2b14cb56 4718 def end(self, tag):
5b995f71
RA
4719 if tag not in (_x('ttml:br'), 'br'):
4720 unclosed_elements = self._unclosed_elements.pop()
4721 for element in reversed(unclosed_elements):
4722 self._out += '</%s>' % element
4723 if unclosed_elements and self._applied_styles:
4724 self._applied_styles.pop()
bf6427d2 4725
2b14cb56 4726 def data(self, data):
5b995f71 4727 self._out += data
2b14cb56 4728
4729 def close(self):
5b995f71 4730 return self._out.strip()
2b14cb56 4731
4732 def parse_node(node):
4733 target = TTMLPElementParser()
4734 parser = xml.etree.ElementTree.XMLParser(target=target)
4735 parser.feed(xml.etree.ElementTree.tostring(node))
4736 return parser.close()
bf6427d2 4737
5b995f71
RA
4738 for k, v in LEGACY_NAMESPACES:
4739 for ns in v:
4740 dfxp_data = dfxp_data.replace(ns, k)
4741
3869028f 4742 dfxp = compat_etree_fromstring(dfxp_data)
bf6427d2 4743 out = []
5b995f71 4744 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
1b0427e6
YCH
4745
4746 if not paras:
4747 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2 4748
5b995f71
RA
4749 repeat = False
4750 while True:
4751 for style in dfxp.findall(_x('.//ttml:style')):
261f4730
RA
4752 style_id = style.get('id') or style.get(_x('xml:id'))
4753 if not style_id:
4754 continue
5b995f71
RA
4755 parent_style_id = style.get('style')
4756 if parent_style_id:
4757 if parent_style_id not in styles:
4758 repeat = True
4759 continue
4760 styles[style_id] = styles[parent_style_id].copy()
4761 for prop in SUPPORTED_STYLING:
4762 prop_val = style.get(_x('tts:' + prop))
4763 if prop_val:
4764 styles.setdefault(style_id, {})[prop] = prop_val
4765 if repeat:
4766 repeat = False
4767 else:
4768 break
4769
4770 for p in ('body', 'div'):
4771 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4772 if ele is None:
4773 continue
4774 style = styles.get(ele.get('style'))
4775 if not style:
4776 continue
4777 default_style.update(style)
4778
bf6427d2 4779 for para, index in zip(paras, itertools.count(1)):
d631d5f9 4780 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 4781 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
4782 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4783 if begin_time is None:
4784 continue
7dff0363 4785 if not end_time:
d631d5f9
YCH
4786 if not dur:
4787 continue
4788 end_time = begin_time + dur
bf6427d2
YCH
4789 out.append('%d\n%s --> %s\n%s\n\n' % (
4790 index,
c1c924ab
YCH
4791 srt_subtitles_timecode(begin_time),
4792 srt_subtitles_timecode(end_time),
bf6427d2
YCH
4793 parse_node(para)))
4794
4795 return ''.join(out)
4796
4797
66e289ba
S
4798def cli_option(params, command_option, param):
4799 param = params.get(param)
98e698f1
RA
4800 if param:
4801 param = compat_str(param)
66e289ba
S
4802 return [command_option, param] if param is not None else []
4803
4804
4805def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4806 param = params.get(param)
5b232f46
S
4807 if param is None:
4808 return []
66e289ba
S
4809 assert isinstance(param, bool)
4810 if separator:
4811 return [command_option + separator + (true_value if param else false_value)]
4812 return [command_option, true_value if param else false_value]
4813
4814
4815def cli_valueless_option(params, command_option, param, expected_value=True):
4816 param = params.get(param)
4817 return [command_option] if param == expected_value else []
4818
4819
e92caff5 4820def cli_configuration_args(argdict, keys, default=[], use_compat=True):
eab9b2bc 4821 if isinstance(argdict, (list, tuple)): # for backward compatibility
e92caff5 4822 if use_compat:
5b1ecbb3 4823 return argdict
4824 else:
4825 argdict = None
eab9b2bc 4826 if argdict is None:
5b1ecbb3 4827 return default
eab9b2bc 4828 assert isinstance(argdict, dict)
4829
e92caff5 4830 assert isinstance(keys, (list, tuple))
4831 for key_list in keys:
4832 if isinstance(key_list, compat_str):
4833 key_list = (key_list,)
4834 arg_list = list(filter(
4835 lambda x: x is not None,
4836 [argdict.get(key.lower()) for key in key_list]))
4837 if arg_list:
4838 return [arg for args in arg_list for arg in args]
4839 return default
66e289ba
S
4840
4841
39672624
YCH
4842class ISO639Utils(object):
4843 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4844 _lang_map = {
4845 'aa': 'aar',
4846 'ab': 'abk',
4847 'ae': 'ave',
4848 'af': 'afr',
4849 'ak': 'aka',
4850 'am': 'amh',
4851 'an': 'arg',
4852 'ar': 'ara',
4853 'as': 'asm',
4854 'av': 'ava',
4855 'ay': 'aym',
4856 'az': 'aze',
4857 'ba': 'bak',
4858 'be': 'bel',
4859 'bg': 'bul',
4860 'bh': 'bih',
4861 'bi': 'bis',
4862 'bm': 'bam',
4863 'bn': 'ben',
4864 'bo': 'bod',
4865 'br': 'bre',
4866 'bs': 'bos',
4867 'ca': 'cat',
4868 'ce': 'che',
4869 'ch': 'cha',
4870 'co': 'cos',
4871 'cr': 'cre',
4872 'cs': 'ces',
4873 'cu': 'chu',
4874 'cv': 'chv',
4875 'cy': 'cym',
4876 'da': 'dan',
4877 'de': 'deu',
4878 'dv': 'div',
4879 'dz': 'dzo',
4880 'ee': 'ewe',
4881 'el': 'ell',
4882 'en': 'eng',
4883 'eo': 'epo',
4884 'es': 'spa',
4885 'et': 'est',
4886 'eu': 'eus',
4887 'fa': 'fas',
4888 'ff': 'ful',
4889 'fi': 'fin',
4890 'fj': 'fij',
4891 'fo': 'fao',
4892 'fr': 'fra',
4893 'fy': 'fry',
4894 'ga': 'gle',
4895 'gd': 'gla',
4896 'gl': 'glg',
4897 'gn': 'grn',
4898 'gu': 'guj',
4899 'gv': 'glv',
4900 'ha': 'hau',
4901 'he': 'heb',
b7acc835 4902 'iw': 'heb', # Replaced by he in 1989 revision
39672624
YCH
4903 'hi': 'hin',
4904 'ho': 'hmo',
4905 'hr': 'hrv',
4906 'ht': 'hat',
4907 'hu': 'hun',
4908 'hy': 'hye',
4909 'hz': 'her',
4910 'ia': 'ina',
4911 'id': 'ind',
b7acc835 4912 'in': 'ind', # Replaced by id in 1989 revision
39672624
YCH
4913 'ie': 'ile',
4914 'ig': 'ibo',
4915 'ii': 'iii',
4916 'ik': 'ipk',
4917 'io': 'ido',
4918 'is': 'isl',
4919 'it': 'ita',
4920 'iu': 'iku',
4921 'ja': 'jpn',
4922 'jv': 'jav',
4923 'ka': 'kat',
4924 'kg': 'kon',
4925 'ki': 'kik',
4926 'kj': 'kua',
4927 'kk': 'kaz',
4928 'kl': 'kal',
4929 'km': 'khm',
4930 'kn': 'kan',
4931 'ko': 'kor',
4932 'kr': 'kau',
4933 'ks': 'kas',
4934 'ku': 'kur',
4935 'kv': 'kom',
4936 'kw': 'cor',
4937 'ky': 'kir',
4938 'la': 'lat',
4939 'lb': 'ltz',
4940 'lg': 'lug',
4941 'li': 'lim',
4942 'ln': 'lin',
4943 'lo': 'lao',
4944 'lt': 'lit',
4945 'lu': 'lub',
4946 'lv': 'lav',
4947 'mg': 'mlg',
4948 'mh': 'mah',
4949 'mi': 'mri',
4950 'mk': 'mkd',
4951 'ml': 'mal',
4952 'mn': 'mon',
4953 'mr': 'mar',
4954 'ms': 'msa',
4955 'mt': 'mlt',
4956 'my': 'mya',
4957 'na': 'nau',
4958 'nb': 'nob',
4959 'nd': 'nde',
4960 'ne': 'nep',
4961 'ng': 'ndo',
4962 'nl': 'nld',
4963 'nn': 'nno',
4964 'no': 'nor',
4965 'nr': 'nbl',
4966 'nv': 'nav',
4967 'ny': 'nya',
4968 'oc': 'oci',
4969 'oj': 'oji',
4970 'om': 'orm',
4971 'or': 'ori',
4972 'os': 'oss',
4973 'pa': 'pan',
4974 'pi': 'pli',
4975 'pl': 'pol',
4976 'ps': 'pus',
4977 'pt': 'por',
4978 'qu': 'que',
4979 'rm': 'roh',
4980 'rn': 'run',
4981 'ro': 'ron',
4982 'ru': 'rus',
4983 'rw': 'kin',
4984 'sa': 'san',
4985 'sc': 'srd',
4986 'sd': 'snd',
4987 'se': 'sme',
4988 'sg': 'sag',
4989 'si': 'sin',
4990 'sk': 'slk',
4991 'sl': 'slv',
4992 'sm': 'smo',
4993 'sn': 'sna',
4994 'so': 'som',
4995 'sq': 'sqi',
4996 'sr': 'srp',
4997 'ss': 'ssw',
4998 'st': 'sot',
4999 'su': 'sun',
5000 'sv': 'swe',
5001 'sw': 'swa',
5002 'ta': 'tam',
5003 'te': 'tel',
5004 'tg': 'tgk',
5005 'th': 'tha',
5006 'ti': 'tir',
5007 'tk': 'tuk',
5008 'tl': 'tgl',
5009 'tn': 'tsn',
5010 'to': 'ton',
5011 'tr': 'tur',
5012 'ts': 'tso',
5013 'tt': 'tat',
5014 'tw': 'twi',
5015 'ty': 'tah',
5016 'ug': 'uig',
5017 'uk': 'ukr',
5018 'ur': 'urd',
5019 'uz': 'uzb',
5020 've': 'ven',
5021 'vi': 'vie',
5022 'vo': 'vol',
5023 'wa': 'wln',
5024 'wo': 'wol',
5025 'xh': 'xho',
5026 'yi': 'yid',
e9a50fba 5027 'ji': 'yid', # Replaced by yi in 1989 revision
39672624
YCH
5028 'yo': 'yor',
5029 'za': 'zha',
5030 'zh': 'zho',
5031 'zu': 'zul',
5032 }
5033
5034 @classmethod
5035 def short2long(cls, code):
5036 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5037 return cls._lang_map.get(code[:2])
5038
5039 @classmethod
5040 def long2short(cls, code):
5041 """Convert language code from ISO 639-2/T to ISO 639-1"""
5042 for short_name, long_name in cls._lang_map.items():
5043 if long_name == code:
5044 return short_name
5045
5046
4eb10f66
YCH
5047class ISO3166Utils(object):
5048 # From http://data.okfn.org/data/core/country-list
5049 _country_map = {
5050 'AF': 'Afghanistan',
5051 'AX': 'Åland Islands',
5052 'AL': 'Albania',
5053 'DZ': 'Algeria',
5054 'AS': 'American Samoa',
5055 'AD': 'Andorra',
5056 'AO': 'Angola',
5057 'AI': 'Anguilla',
5058 'AQ': 'Antarctica',
5059 'AG': 'Antigua and Barbuda',
5060 'AR': 'Argentina',
5061 'AM': 'Armenia',
5062 'AW': 'Aruba',
5063 'AU': 'Australia',
5064 'AT': 'Austria',
5065 'AZ': 'Azerbaijan',
5066 'BS': 'Bahamas',
5067 'BH': 'Bahrain',
5068 'BD': 'Bangladesh',
5069 'BB': 'Barbados',
5070 'BY': 'Belarus',
5071 'BE': 'Belgium',
5072 'BZ': 'Belize',
5073 'BJ': 'Benin',
5074 'BM': 'Bermuda',
5075 'BT': 'Bhutan',
5076 'BO': 'Bolivia, Plurinational State of',
5077 'BQ': 'Bonaire, Sint Eustatius and Saba',
5078 'BA': 'Bosnia and Herzegovina',
5079 'BW': 'Botswana',
5080 'BV': 'Bouvet Island',
5081 'BR': 'Brazil',
5082 'IO': 'British Indian Ocean Territory',
5083 'BN': 'Brunei Darussalam',
5084 'BG': 'Bulgaria',
5085 'BF': 'Burkina Faso',
5086 'BI': 'Burundi',
5087 'KH': 'Cambodia',
5088 'CM': 'Cameroon',
5089 'CA': 'Canada',
5090 'CV': 'Cape Verde',
5091 'KY': 'Cayman Islands',
5092 'CF': 'Central African Republic',
5093 'TD': 'Chad',
5094 'CL': 'Chile',
5095 'CN': 'China',
5096 'CX': 'Christmas Island',
5097 'CC': 'Cocos (Keeling) Islands',
5098 'CO': 'Colombia',
5099 'KM': 'Comoros',
5100 'CG': 'Congo',
5101 'CD': 'Congo, the Democratic Republic of the',
5102 'CK': 'Cook Islands',
5103 'CR': 'Costa Rica',
5104 'CI': 'Côte d\'Ivoire',
5105 'HR': 'Croatia',
5106 'CU': 'Cuba',
5107 'CW': 'Curaçao',
5108 'CY': 'Cyprus',
5109 'CZ': 'Czech Republic',
5110 'DK': 'Denmark',
5111 'DJ': 'Djibouti',
5112 'DM': 'Dominica',
5113 'DO': 'Dominican Republic',
5114 'EC': 'Ecuador',
5115 'EG': 'Egypt',
5116 'SV': 'El Salvador',
5117 'GQ': 'Equatorial Guinea',
5118 'ER': 'Eritrea',
5119 'EE': 'Estonia',
5120 'ET': 'Ethiopia',
5121 'FK': 'Falkland Islands (Malvinas)',
5122 'FO': 'Faroe Islands',
5123 'FJ': 'Fiji',
5124 'FI': 'Finland',
5125 'FR': 'France',
5126 'GF': 'French Guiana',
5127 'PF': 'French Polynesia',
5128 'TF': 'French Southern Territories',
5129 'GA': 'Gabon',
5130 'GM': 'Gambia',
5131 'GE': 'Georgia',
5132 'DE': 'Germany',
5133 'GH': 'Ghana',
5134 'GI': 'Gibraltar',
5135 'GR': 'Greece',
5136 'GL': 'Greenland',
5137 'GD': 'Grenada',
5138 'GP': 'Guadeloupe',
5139 'GU': 'Guam',
5140 'GT': 'Guatemala',
5141 'GG': 'Guernsey',
5142 'GN': 'Guinea',
5143 'GW': 'Guinea-Bissau',
5144 'GY': 'Guyana',
5145 'HT': 'Haiti',
5146 'HM': 'Heard Island and McDonald Islands',
5147 'VA': 'Holy See (Vatican City State)',
5148 'HN': 'Honduras',
5149 'HK': 'Hong Kong',
5150 'HU': 'Hungary',
5151 'IS': 'Iceland',
5152 'IN': 'India',
5153 'ID': 'Indonesia',
5154 'IR': 'Iran, Islamic Republic of',
5155 'IQ': 'Iraq',
5156 'IE': 'Ireland',
5157 'IM': 'Isle of Man',
5158 'IL': 'Israel',
5159 'IT': 'Italy',
5160 'JM': 'Jamaica',
5161 'JP': 'Japan',
5162 'JE': 'Jersey',
5163 'JO': 'Jordan',
5164 'KZ': 'Kazakhstan',
5165 'KE': 'Kenya',
5166 'KI': 'Kiribati',
5167 'KP': 'Korea, Democratic People\'s Republic of',
5168 'KR': 'Korea, Republic of',
5169 'KW': 'Kuwait',
5170 'KG': 'Kyrgyzstan',
5171 'LA': 'Lao People\'s Democratic Republic',
5172 'LV': 'Latvia',
5173 'LB': 'Lebanon',
5174 'LS': 'Lesotho',
5175 'LR': 'Liberia',
5176 'LY': 'Libya',
5177 'LI': 'Liechtenstein',
5178 'LT': 'Lithuania',
5179 'LU': 'Luxembourg',
5180 'MO': 'Macao',
5181 'MK': 'Macedonia, the Former Yugoslav Republic of',
5182 'MG': 'Madagascar',
5183 'MW': 'Malawi',
5184 'MY': 'Malaysia',
5185 'MV': 'Maldives',
5186 'ML': 'Mali',
5187 'MT': 'Malta',
5188 'MH': 'Marshall Islands',
5189 'MQ': 'Martinique',
5190 'MR': 'Mauritania',
5191 'MU': 'Mauritius',
5192 'YT': 'Mayotte',
5193 'MX': 'Mexico',
5194 'FM': 'Micronesia, Federated States of',
5195 'MD': 'Moldova, Republic of',
5196 'MC': 'Monaco',
5197 'MN': 'Mongolia',
5198 'ME': 'Montenegro',
5199 'MS': 'Montserrat',
5200 'MA': 'Morocco',
5201 'MZ': 'Mozambique',
5202 'MM': 'Myanmar',
5203 'NA': 'Namibia',
5204 'NR': 'Nauru',
5205 'NP': 'Nepal',
5206 'NL': 'Netherlands',
5207 'NC': 'New Caledonia',
5208 'NZ': 'New Zealand',
5209 'NI': 'Nicaragua',
5210 'NE': 'Niger',
5211 'NG': 'Nigeria',
5212 'NU': 'Niue',
5213 'NF': 'Norfolk Island',
5214 'MP': 'Northern Mariana Islands',
5215 'NO': 'Norway',
5216 'OM': 'Oman',
5217 'PK': 'Pakistan',
5218 'PW': 'Palau',
5219 'PS': 'Palestine, State of',
5220 'PA': 'Panama',
5221 'PG': 'Papua New Guinea',
5222 'PY': 'Paraguay',
5223 'PE': 'Peru',
5224 'PH': 'Philippines',
5225 'PN': 'Pitcairn',
5226 'PL': 'Poland',
5227 'PT': 'Portugal',
5228 'PR': 'Puerto Rico',
5229 'QA': 'Qatar',
5230 'RE': 'Réunion',
5231 'RO': 'Romania',
5232 'RU': 'Russian Federation',
5233 'RW': 'Rwanda',
5234 'BL': 'Saint Barthélemy',
5235 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5236 'KN': 'Saint Kitts and Nevis',
5237 'LC': 'Saint Lucia',
5238 'MF': 'Saint Martin (French part)',
5239 'PM': 'Saint Pierre and Miquelon',
5240 'VC': 'Saint Vincent and the Grenadines',
5241 'WS': 'Samoa',
5242 'SM': 'San Marino',
5243 'ST': 'Sao Tome and Principe',
5244 'SA': 'Saudi Arabia',
5245 'SN': 'Senegal',
5246 'RS': 'Serbia',
5247 'SC': 'Seychelles',
5248 'SL': 'Sierra Leone',
5249 'SG': 'Singapore',
5250 'SX': 'Sint Maarten (Dutch part)',
5251 'SK': 'Slovakia',
5252 'SI': 'Slovenia',
5253 'SB': 'Solomon Islands',
5254 'SO': 'Somalia',
5255 'ZA': 'South Africa',
5256 'GS': 'South Georgia and the South Sandwich Islands',
5257 'SS': 'South Sudan',
5258 'ES': 'Spain',
5259 'LK': 'Sri Lanka',
5260 'SD': 'Sudan',
5261 'SR': 'Suriname',
5262 'SJ': 'Svalbard and Jan Mayen',
5263 'SZ': 'Swaziland',
5264 'SE': 'Sweden',
5265 'CH': 'Switzerland',
5266 'SY': 'Syrian Arab Republic',
5267 'TW': 'Taiwan, Province of China',
5268 'TJ': 'Tajikistan',
5269 'TZ': 'Tanzania, United Republic of',
5270 'TH': 'Thailand',
5271 'TL': 'Timor-Leste',
5272 'TG': 'Togo',
5273 'TK': 'Tokelau',
5274 'TO': 'Tonga',
5275 'TT': 'Trinidad and Tobago',
5276 'TN': 'Tunisia',
5277 'TR': 'Turkey',
5278 'TM': 'Turkmenistan',
5279 'TC': 'Turks and Caicos Islands',
5280 'TV': 'Tuvalu',
5281 'UG': 'Uganda',
5282 'UA': 'Ukraine',
5283 'AE': 'United Arab Emirates',
5284 'GB': 'United Kingdom',
5285 'US': 'United States',
5286 'UM': 'United States Minor Outlying Islands',
5287 'UY': 'Uruguay',
5288 'UZ': 'Uzbekistan',
5289 'VU': 'Vanuatu',
5290 'VE': 'Venezuela, Bolivarian Republic of',
5291 'VN': 'Viet Nam',
5292 'VG': 'Virgin Islands, British',
5293 'VI': 'Virgin Islands, U.S.',
5294 'WF': 'Wallis and Futuna',
5295 'EH': 'Western Sahara',
5296 'YE': 'Yemen',
5297 'ZM': 'Zambia',
5298 'ZW': 'Zimbabwe',
5299 }
5300
5301 @classmethod
5302 def short2full(cls, code):
5303 """Convert an ISO 3166-2 country code to the corresponding full name"""
5304 return cls._country_map.get(code.upper())
5305
5306
773f291d
S
5307class GeoUtils(object):
5308 # Major IPv4 address blocks per country
5309 _country_ip_map = {
53896ca5 5310 'AD': '46.172.224.0/19',
773f291d
S
5311 'AE': '94.200.0.0/13',
5312 'AF': '149.54.0.0/17',
5313 'AG': '209.59.64.0/18',
5314 'AI': '204.14.248.0/21',
5315 'AL': '46.99.0.0/16',
5316 'AM': '46.70.0.0/15',
5317 'AO': '105.168.0.0/13',
53896ca5
S
5318 'AP': '182.50.184.0/21',
5319 'AQ': '23.154.160.0/24',
773f291d
S
5320 'AR': '181.0.0.0/12',
5321 'AS': '202.70.112.0/20',
53896ca5 5322 'AT': '77.116.0.0/14',
773f291d
S
5323 'AU': '1.128.0.0/11',
5324 'AW': '181.41.0.0/18',
53896ca5
S
5325 'AX': '185.217.4.0/22',
5326 'AZ': '5.197.0.0/16',
773f291d
S
5327 'BA': '31.176.128.0/17',
5328 'BB': '65.48.128.0/17',
5329 'BD': '114.130.0.0/16',
5330 'BE': '57.0.0.0/8',
53896ca5 5331 'BF': '102.178.0.0/15',
773f291d
S
5332 'BG': '95.42.0.0/15',
5333 'BH': '37.131.0.0/17',
5334 'BI': '154.117.192.0/18',
5335 'BJ': '137.255.0.0/16',
53896ca5 5336 'BL': '185.212.72.0/23',
773f291d
S
5337 'BM': '196.12.64.0/18',
5338 'BN': '156.31.0.0/16',
5339 'BO': '161.56.0.0/16',
5340 'BQ': '161.0.80.0/20',
53896ca5 5341 'BR': '191.128.0.0/12',
773f291d
S
5342 'BS': '24.51.64.0/18',
5343 'BT': '119.2.96.0/19',
5344 'BW': '168.167.0.0/16',
5345 'BY': '178.120.0.0/13',
5346 'BZ': '179.42.192.0/18',
5347 'CA': '99.224.0.0/11',
5348 'CD': '41.243.0.0/16',
53896ca5
S
5349 'CF': '197.242.176.0/21',
5350 'CG': '160.113.0.0/16',
773f291d 5351 'CH': '85.0.0.0/13',
53896ca5 5352 'CI': '102.136.0.0/14',
773f291d
S
5353 'CK': '202.65.32.0/19',
5354 'CL': '152.172.0.0/14',
53896ca5 5355 'CM': '102.244.0.0/14',
773f291d
S
5356 'CN': '36.128.0.0/10',
5357 'CO': '181.240.0.0/12',
5358 'CR': '201.192.0.0/12',
5359 'CU': '152.206.0.0/15',
5360 'CV': '165.90.96.0/19',
5361 'CW': '190.88.128.0/17',
53896ca5 5362 'CY': '31.153.0.0/16',
773f291d
S
5363 'CZ': '88.100.0.0/14',
5364 'DE': '53.0.0.0/8',
5365 'DJ': '197.241.0.0/17',
5366 'DK': '87.48.0.0/12',
5367 'DM': '192.243.48.0/20',
5368 'DO': '152.166.0.0/15',
5369 'DZ': '41.96.0.0/12',
5370 'EC': '186.68.0.0/15',
5371 'EE': '90.190.0.0/15',
5372 'EG': '156.160.0.0/11',
5373 'ER': '196.200.96.0/20',
5374 'ES': '88.0.0.0/11',
5375 'ET': '196.188.0.0/14',
5376 'EU': '2.16.0.0/13',
5377 'FI': '91.152.0.0/13',
5378 'FJ': '144.120.0.0/16',
53896ca5 5379 'FK': '80.73.208.0/21',
773f291d
S
5380 'FM': '119.252.112.0/20',
5381 'FO': '88.85.32.0/19',
5382 'FR': '90.0.0.0/9',
5383 'GA': '41.158.0.0/15',
5384 'GB': '25.0.0.0/8',
5385 'GD': '74.122.88.0/21',
5386 'GE': '31.146.0.0/16',
5387 'GF': '161.22.64.0/18',
5388 'GG': '62.68.160.0/19',
53896ca5
S
5389 'GH': '154.160.0.0/12',
5390 'GI': '95.164.0.0/16',
773f291d
S
5391 'GL': '88.83.0.0/19',
5392 'GM': '160.182.0.0/15',
5393 'GN': '197.149.192.0/18',
5394 'GP': '104.250.0.0/19',
5395 'GQ': '105.235.224.0/20',
5396 'GR': '94.64.0.0/13',
5397 'GT': '168.234.0.0/16',
5398 'GU': '168.123.0.0/16',
5399 'GW': '197.214.80.0/20',
5400 'GY': '181.41.64.0/18',
5401 'HK': '113.252.0.0/14',
5402 'HN': '181.210.0.0/16',
5403 'HR': '93.136.0.0/13',
5404 'HT': '148.102.128.0/17',
5405 'HU': '84.0.0.0/14',
5406 'ID': '39.192.0.0/10',
5407 'IE': '87.32.0.0/12',
5408 'IL': '79.176.0.0/13',
5409 'IM': '5.62.80.0/20',
5410 'IN': '117.192.0.0/10',
5411 'IO': '203.83.48.0/21',
5412 'IQ': '37.236.0.0/14',
5413 'IR': '2.176.0.0/12',
5414 'IS': '82.221.0.0/16',
5415 'IT': '79.0.0.0/10',
5416 'JE': '87.244.64.0/18',
5417 'JM': '72.27.0.0/17',
5418 'JO': '176.29.0.0/16',
53896ca5 5419 'JP': '133.0.0.0/8',
773f291d
S
5420 'KE': '105.48.0.0/12',
5421 'KG': '158.181.128.0/17',
5422 'KH': '36.37.128.0/17',
5423 'KI': '103.25.140.0/22',
5424 'KM': '197.255.224.0/20',
53896ca5 5425 'KN': '198.167.192.0/19',
773f291d
S
5426 'KP': '175.45.176.0/22',
5427 'KR': '175.192.0.0/10',
5428 'KW': '37.36.0.0/14',
5429 'KY': '64.96.0.0/15',
5430 'KZ': '2.72.0.0/13',
5431 'LA': '115.84.64.0/18',
5432 'LB': '178.135.0.0/16',
53896ca5 5433 'LC': '24.92.144.0/20',
773f291d
S
5434 'LI': '82.117.0.0/19',
5435 'LK': '112.134.0.0/15',
53896ca5 5436 'LR': '102.183.0.0/16',
773f291d
S
5437 'LS': '129.232.0.0/17',
5438 'LT': '78.56.0.0/13',
5439 'LU': '188.42.0.0/16',
5440 'LV': '46.109.0.0/16',
5441 'LY': '41.252.0.0/14',
5442 'MA': '105.128.0.0/11',
5443 'MC': '88.209.64.0/18',
5444 'MD': '37.246.0.0/16',
5445 'ME': '178.175.0.0/17',
5446 'MF': '74.112.232.0/21',
5447 'MG': '154.126.0.0/17',
5448 'MH': '117.103.88.0/21',
5449 'MK': '77.28.0.0/15',
5450 'ML': '154.118.128.0/18',
5451 'MM': '37.111.0.0/17',
5452 'MN': '49.0.128.0/17',
5453 'MO': '60.246.0.0/16',
5454 'MP': '202.88.64.0/20',
5455 'MQ': '109.203.224.0/19',
5456 'MR': '41.188.64.0/18',
5457 'MS': '208.90.112.0/22',
5458 'MT': '46.11.0.0/16',
5459 'MU': '105.16.0.0/12',
5460 'MV': '27.114.128.0/18',
53896ca5 5461 'MW': '102.70.0.0/15',
773f291d
S
5462 'MX': '187.192.0.0/11',
5463 'MY': '175.136.0.0/13',
5464 'MZ': '197.218.0.0/15',
5465 'NA': '41.182.0.0/16',
5466 'NC': '101.101.0.0/18',
5467 'NE': '197.214.0.0/18',
5468 'NF': '203.17.240.0/22',
5469 'NG': '105.112.0.0/12',
5470 'NI': '186.76.0.0/15',
5471 'NL': '145.96.0.0/11',
5472 'NO': '84.208.0.0/13',
5473 'NP': '36.252.0.0/15',
5474 'NR': '203.98.224.0/19',
5475 'NU': '49.156.48.0/22',
5476 'NZ': '49.224.0.0/14',
5477 'OM': '5.36.0.0/15',
5478 'PA': '186.72.0.0/15',
5479 'PE': '186.160.0.0/14',
5480 'PF': '123.50.64.0/18',
5481 'PG': '124.240.192.0/19',
5482 'PH': '49.144.0.0/13',
5483 'PK': '39.32.0.0/11',
5484 'PL': '83.0.0.0/11',
5485 'PM': '70.36.0.0/20',
5486 'PR': '66.50.0.0/16',
5487 'PS': '188.161.0.0/16',
5488 'PT': '85.240.0.0/13',
5489 'PW': '202.124.224.0/20',
5490 'PY': '181.120.0.0/14',
5491 'QA': '37.210.0.0/15',
53896ca5 5492 'RE': '102.35.0.0/16',
773f291d 5493 'RO': '79.112.0.0/13',
53896ca5 5494 'RS': '93.86.0.0/15',
773f291d 5495 'RU': '5.136.0.0/13',
53896ca5 5496 'RW': '41.186.0.0/16',
773f291d
S
5497 'SA': '188.48.0.0/13',
5498 'SB': '202.1.160.0/19',
5499 'SC': '154.192.0.0/11',
53896ca5 5500 'SD': '102.120.0.0/13',
773f291d 5501 'SE': '78.64.0.0/12',
53896ca5 5502 'SG': '8.128.0.0/10',
773f291d
S
5503 'SI': '188.196.0.0/14',
5504 'SK': '78.98.0.0/15',
53896ca5 5505 'SL': '102.143.0.0/17',
773f291d
S
5506 'SM': '89.186.32.0/19',
5507 'SN': '41.82.0.0/15',
53896ca5 5508 'SO': '154.115.192.0/18',
773f291d
S
5509 'SR': '186.179.128.0/17',
5510 'SS': '105.235.208.0/21',
5511 'ST': '197.159.160.0/19',
5512 'SV': '168.243.0.0/16',
5513 'SX': '190.102.0.0/20',
5514 'SY': '5.0.0.0/16',
5515 'SZ': '41.84.224.0/19',
5516 'TC': '65.255.48.0/20',
5517 'TD': '154.68.128.0/19',
5518 'TG': '196.168.0.0/14',
5519 'TH': '171.96.0.0/13',
5520 'TJ': '85.9.128.0/18',
5521 'TK': '27.96.24.0/21',
5522 'TL': '180.189.160.0/20',
5523 'TM': '95.85.96.0/19',
5524 'TN': '197.0.0.0/11',
5525 'TO': '175.176.144.0/21',
5526 'TR': '78.160.0.0/11',
5527 'TT': '186.44.0.0/15',
5528 'TV': '202.2.96.0/19',
5529 'TW': '120.96.0.0/11',
5530 'TZ': '156.156.0.0/14',
53896ca5
S
5531 'UA': '37.52.0.0/14',
5532 'UG': '102.80.0.0/13',
5533 'US': '6.0.0.0/8',
773f291d 5534 'UY': '167.56.0.0/13',
53896ca5 5535 'UZ': '84.54.64.0/18',
773f291d 5536 'VA': '212.77.0.0/19',
53896ca5 5537 'VC': '207.191.240.0/21',
773f291d 5538 'VE': '186.88.0.0/13',
53896ca5 5539 'VG': '66.81.192.0/20',
773f291d
S
5540 'VI': '146.226.0.0/16',
5541 'VN': '14.160.0.0/11',
5542 'VU': '202.80.32.0/20',
5543 'WF': '117.20.32.0/21',
5544 'WS': '202.4.32.0/19',
5545 'YE': '134.35.0.0/16',
5546 'YT': '41.242.116.0/22',
5547 'ZA': '41.0.0.0/11',
53896ca5
S
5548 'ZM': '102.144.0.0/13',
5549 'ZW': '102.177.192.0/18',
773f291d
S
5550 }
5551
5552 @classmethod
5f95927a
S
5553 def random_ipv4(cls, code_or_block):
5554 if len(code_or_block) == 2:
5555 block = cls._country_ip_map.get(code_or_block.upper())
5556 if not block:
5557 return None
5558 else:
5559 block = code_or_block
773f291d
S
5560 addr, preflen = block.split('/')
5561 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5562 addr_max = addr_min | (0xffffffff >> int(preflen))
18a0defa 5563 return compat_str(socket.inet_ntoa(
4248dad9 5564 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
773f291d
S
5565
5566
91410c9b 5567class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
5568 def __init__(self, proxies=None):
5569 # Set default handlers
5570 for type in ('http', 'https'):
5571 setattr(self, '%s_open' % type,
5572 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5573 meth(r, proxy, type))
38e87f6c 5574 compat_urllib_request.ProxyHandler.__init__(self, proxies)
2461f79d 5575
91410c9b 5576 def proxy_open(self, req, proxy, type):
2461f79d 5577 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
5578 if req_proxy is not None:
5579 proxy = req_proxy
2461f79d
PH
5580 del req.headers['Ytdl-request-proxy']
5581
5582 if proxy == '__noproxy__':
5583 return None # No Proxy
51fb4995 5584 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
71aff188 5585 req.add_header('Ytdl-socks-proxy', proxy)
7a5c1cfe 5586 # yt-dlp's http/https handlers do wrapping the socket with socks
71aff188 5587 return None
91410c9b
PH
5588 return compat_urllib_request.ProxyHandler.proxy_open(
5589 self, req, proxy, type)
5bc880b9
YCH
5590
5591
0a5445dd
YCH
5592# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5593# released into Public Domain
5594# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5595
5596def long_to_bytes(n, blocksize=0):
5597 """long_to_bytes(n:long, blocksize:int) : string
5598 Convert a long integer to a byte string.
5599
5600 If optional blocksize is given and greater than zero, pad the front of the
5601 byte string with binary zeros so that the length is a multiple of
5602 blocksize.
5603 """
5604 # after much testing, this algorithm was deemed to be the fastest
5605 s = b''
5606 n = int(n)
5607 while n > 0:
5608 s = compat_struct_pack('>I', n & 0xffffffff) + s
5609 n = n >> 32
5610 # strip off leading zeros
5611 for i in range(len(s)):
5612 if s[i] != b'\000'[0]:
5613 break
5614 else:
5615 # only happens when n == 0
5616 s = b'\000'
5617 i = 0
5618 s = s[i:]
5619 # add back some pad bytes. this could be done more efficiently w.r.t. the
5620 # de-padding being done above, but sigh...
5621 if blocksize > 0 and len(s) % blocksize:
5622 s = (blocksize - len(s) % blocksize) * b'\000' + s
5623 return s
5624
5625
5626def bytes_to_long(s):
5627 """bytes_to_long(string) : long
5628 Convert a byte string to a long integer.
5629
5630 This is (essentially) the inverse of long_to_bytes().
5631 """
5632 acc = 0
5633 length = len(s)
5634 if length % 4:
5635 extra = (4 - length % 4)
5636 s = b'\000' * extra + s
5637 length = length + extra
5638 for i in range(0, length, 4):
5639 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5640 return acc
5641
5642
5bc880b9
YCH
5643def ohdave_rsa_encrypt(data, exponent, modulus):
5644 '''
5645 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5646
5647 Input:
5648 data: data to encrypt, bytes-like object
5649 exponent, modulus: parameter e and N of RSA algorithm, both integer
5650 Output: hex string of encrypted data
5651
5652 Limitation: supports one block encryption only
5653 '''
5654
5655 payload = int(binascii.hexlify(data[::-1]), 16)
5656 encrypted = pow(payload, exponent, modulus)
5657 return '%x' % encrypted
81bdc8fd
YCH
5658
5659
f48409c7
YCH
5660def pkcs1pad(data, length):
5661 """
5662 Padding input data with PKCS#1 scheme
5663
5664 @param {int[]} data input data
5665 @param {int} length target length
5666 @returns {int[]} padded data
5667 """
5668 if len(data) > length - 11:
5669 raise ValueError('Input data too long for PKCS#1 padding')
5670
5671 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5672 return [0, 2] + pseudo_random + [0] + data
5673
5674
5eb6bdce 5675def encode_base_n(num, n, table=None):
59f898b7 5676 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
59f898b7
YCH
5677 if not table:
5678 table = FULL_TABLE[:n]
5679
5eb6bdce
YCH
5680 if n > len(table):
5681 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5682
5683 if num == 0:
5684 return table[0]
5685
81bdc8fd
YCH
5686 ret = ''
5687 while num:
5688 ret = table[num % n] + ret
5689 num = num // n
5690 return ret
f52354a8
YCH
5691
5692
5693def decode_packed_codes(code):
06b3fe29 5694 mobj = re.search(PACKED_CODES_RE, code)
a0566bbf 5695 obfuscated_code, base, count, symbols = mobj.groups()
f52354a8
YCH
5696 base = int(base)
5697 count = int(count)
5698 symbols = symbols.split('|')
5699 symbol_table = {}
5700
5701 while count:
5702 count -= 1
5eb6bdce 5703 base_n_count = encode_base_n(count, base)
f52354a8
YCH
5704 symbol_table[base_n_count] = symbols[count] or base_n_count
5705
5706 return re.sub(
5707 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
a0566bbf 5708 obfuscated_code)
e154c651 5709
5710
1ced2221
S
5711def caesar(s, alphabet, shift):
5712 if shift == 0:
5713 return s
5714 l = len(alphabet)
5715 return ''.join(
5716 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5717 for c in s)
5718
5719
5720def rot47(s):
5721 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5722
5723
e154c651 5724def parse_m3u8_attributes(attrib):
5725 info = {}
5726 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5727 if val.startswith('"'):
5728 val = val[1:-1]
5729 info[key] = val
5730 return info
1143535d
YCH
5731
5732
5733def urshift(val, n):
5734 return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038
YCH
5735
5736
5737# Based on png2str() written by @gdkchan and improved by @yokrysty
067aa17e 5738# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
d3f8e038
YCH
5739def decode_png(png_data):
5740 # Reference: https://www.w3.org/TR/PNG/
5741 header = png_data[8:]
5742
5743 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5744 raise IOError('Not a valid PNG file.')
5745
5746 int_map = {1: '>B', 2: '>H', 4: '>I'}
5747 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5748
5749 chunks = []
5750
5751 while header:
5752 length = unpack_integer(header[:4])
5753 header = header[4:]
5754
5755 chunk_type = header[:4]
5756 header = header[4:]
5757
5758 chunk_data = header[:length]
5759 header = header[length:]
5760
5761 header = header[4:] # Skip CRC
5762
5763 chunks.append({
5764 'type': chunk_type,
5765 'length': length,
5766 'data': chunk_data
5767 })
5768
5769 ihdr = chunks[0]['data']
5770
5771 width = unpack_integer(ihdr[:4])
5772 height = unpack_integer(ihdr[4:8])
5773
5774 idat = b''
5775
5776 for chunk in chunks:
5777 if chunk['type'] == b'IDAT':
5778 idat += chunk['data']
5779
5780 if not idat:
5781 raise IOError('Unable to read PNG data.')
5782
5783 decompressed_data = bytearray(zlib.decompress(idat))
5784
5785 stride = width * 3
5786 pixels = []
5787
5788 def _get_pixel(idx):
5789 x = idx % stride
5790 y = idx // stride
5791 return pixels[y][x]
5792
5793 for y in range(height):
5794 basePos = y * (1 + stride)
5795 filter_type = decompressed_data[basePos]
5796
5797 current_row = []
5798
5799 pixels.append(current_row)
5800
5801 for x in range(stride):
5802 color = decompressed_data[1 + basePos + x]
5803 basex = y * stride + x
5804 left = 0
5805 up = 0
5806
5807 if x > 2:
5808 left = _get_pixel(basex - 3)
5809 if y > 0:
5810 up = _get_pixel(basex - stride)
5811
5812 if filter_type == 1: # Sub
5813 color = (color + left) & 0xff
5814 elif filter_type == 2: # Up
5815 color = (color + up) & 0xff
5816 elif filter_type == 3: # Average
5817 color = (color + ((left + up) >> 1)) & 0xff
5818 elif filter_type == 4: # Paeth
5819 a = left
5820 b = up
5821 c = 0
5822
5823 if x > 2 and y > 0:
5824 c = _get_pixel(basex - stride - 3)
5825
5826 p = a + b - c
5827
5828 pa = abs(p - a)
5829 pb = abs(p - b)
5830 pc = abs(p - c)
5831
5832 if pa <= pb and pa <= pc:
5833 color = (color + a) & 0xff
5834 elif pb <= pc:
5835 color = (color + b) & 0xff
5836 else:
5837 color = (color + c) & 0xff
5838
5839 current_row.append(color)
5840
5841 return width, height, pixels
efa97bdc
YCH
5842
5843
5844def write_xattr(path, key, value):
5845 # This mess below finds the best xattr tool for the job
5846 try:
5847 # try the pyxattr module...
5848 import xattr
5849
53a7e3d2
YCH
5850 if hasattr(xattr, 'set'): # pyxattr
5851 # Unicode arguments are not supported in python-pyxattr until
5852 # version 0.5.0
067aa17e 5853 # See https://github.com/ytdl-org/youtube-dl/issues/5498
53a7e3d2
YCH
5854 pyxattr_required_version = '0.5.0'
5855 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5856 # TODO: fallback to CLI tools
5857 raise XAttrUnavailableError(
5858 'python-pyxattr is detected but is too old. '
7a5c1cfe 5859 'yt-dlp requires %s or above while your version is %s. '
53a7e3d2
YCH
5860 'Falling back to other xattr implementations' % (
5861 pyxattr_required_version, xattr.__version__))
5862
5863 setxattr = xattr.set
5864 else: # xattr
5865 setxattr = xattr.setxattr
efa97bdc
YCH
5866
5867 try:
53a7e3d2 5868 setxattr(path, key, value)
efa97bdc
YCH
5869 except EnvironmentError as e:
5870 raise XAttrMetadataError(e.errno, e.strerror)
5871
5872 except ImportError:
5873 if compat_os_name == 'nt':
5874 # Write xattrs to NTFS Alternate Data Streams:
5875 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5876 assert ':' not in key
5877 assert os.path.exists(path)
5878
5879 ads_fn = path + ':' + key
5880 try:
5881 with open(ads_fn, 'wb') as f:
5882 f.write(value)
5883 except EnvironmentError as e:
5884 raise XAttrMetadataError(e.errno, e.strerror)
5885 else:
5886 user_has_setfattr = check_executable('setfattr', ['--version'])
5887 user_has_xattr = check_executable('xattr', ['-h'])
5888
5889 if user_has_setfattr or user_has_xattr:
5890
5891 value = value.decode('utf-8')
5892 if user_has_setfattr:
5893 executable = 'setfattr'
5894 opts = ['-n', key, '-v', value]
5895 elif user_has_xattr:
5896 executable = 'xattr'
5897 opts = ['-w', key, value]
5898
3089bc74
S
5899 cmd = ([encodeFilename(executable, True)]
5900 + [encodeArgument(o) for o in opts]
5901 + [encodeFilename(path, True)])
efa97bdc
YCH
5902
5903 try:
5904 p = subprocess.Popen(
5905 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5906 except EnvironmentError as e:
5907 raise XAttrMetadataError(e.errno, e.strerror)
f5b1bca9 5908 stdout, stderr = process_communicate_or_kill(p)
efa97bdc
YCH
5909 stderr = stderr.decode('utf-8', 'replace')
5910 if p.returncode != 0:
5911 raise XAttrMetadataError(p.returncode, stderr)
5912
5913 else:
5914 # On Unix, and can't find pyxattr, setfattr, or xattr.
5915 if sys.platform.startswith('linux'):
5916 raise XAttrUnavailableError(
5917 "Couldn't find a tool to set the xattrs. "
5918 "Install either the python 'pyxattr' or 'xattr' "
5919 "modules, or the GNU 'attr' package "
5920 "(which contains the 'setfattr' tool).")
5921 else:
5922 raise XAttrUnavailableError(
5923 "Couldn't find a tool to set the xattrs. "
5924 "Install either the python 'xattr' module, "
5925 "or the 'xattr' binary.")
0c265486
YCH
5926
5927
5928def random_birthday(year_field, month_field, day_field):
aa374bc7
AS
5929 start_date = datetime.date(1950, 1, 1)
5930 end_date = datetime.date(1995, 12, 31)
5931 offset = random.randint(0, (end_date - start_date).days)
5932 random_date = start_date + datetime.timedelta(offset)
0c265486 5933 return {
aa374bc7
AS
5934 year_field: str(random_date.year),
5935 month_field: str(random_date.month),
5936 day_field: str(random_date.day),
0c265486 5937 }
732044af 5938
c76eb41b 5939
732044af 5940# Templates for internet shortcut files, which are plain text files.
5941DOT_URL_LINK_TEMPLATE = '''
5942[InternetShortcut]
5943URL=%(url)s
5944'''.lstrip()
5945
5946DOT_WEBLOC_LINK_TEMPLATE = '''
5947<?xml version="1.0" encoding="UTF-8"?>
5948<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5949<plist version="1.0">
5950<dict>
5951\t<key>URL</key>
5952\t<string>%(url)s</string>
5953</dict>
5954</plist>
5955'''.lstrip()
5956
5957DOT_DESKTOP_LINK_TEMPLATE = '''
5958[Desktop Entry]
5959Encoding=UTF-8
5960Name=%(filename)s
5961Type=Link
5962URL=%(url)s
5963Icon=text-html
5964'''.lstrip()
5965
5966
5967def iri_to_uri(iri):
5968 """
5969 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5970
5971 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5972 """
5973
5974 iri_parts = compat_urllib_parse_urlparse(iri)
5975
5976 if '[' in iri_parts.netloc:
5977 raise ValueError('IPv6 URIs are not, yet, supported.')
5978 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5979
5980 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5981
5982 net_location = ''
5983 if iri_parts.username:
5984 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
5985 if iri_parts.password is not None:
5986 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
5987 net_location += '@'
5988
5989 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
5990 # The 'idna' encoding produces ASCII text.
5991 if iri_parts.port is not None and iri_parts.port != 80:
5992 net_location += ':' + str(iri_parts.port)
5993
5994 return compat_urllib_parse_urlunparse(
5995 (iri_parts.scheme,
5996 net_location,
5997
5998 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5999
6000 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6001 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6002
6003 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6004 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6005
6006 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6007
6008 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6009
6010
6011def to_high_limit_path(path):
6012 if sys.platform in ['win32', 'cygwin']:
6013 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6014 return r'\\?\ '.rstrip() + os.path.abspath(path)
6015
6016 return path
76d321f6 6017
c76eb41b 6018
76d321f6 6019def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
6020 val = obj.get(field, default)
6021 if func and val not in ignore:
6022 val = func(val)
6023 return template % val if val not in ignore else default
00dd0cd5 6024
6025
6026def clean_podcast_url(url):
6027 return re.sub(r'''(?x)
6028 (?:
6029 (?:
6030 chtbl\.com/track|
6031 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6032 play\.podtrac\.com
6033 )/[^/]+|
6034 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6035 flex\.acast\.com|
6036 pd(?:
6037 cn\.co| # https://podcorn.com/analytics-prefix/
6038 st\.fm # https://podsights.com/docs/
6039 )/e
6040 )/''', '', url)
ffcb8191
THD
6041
6042
6043_HEX_TABLE = '0123456789abcdef'
6044
6045
6046def random_uuidv4():
6047 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
0202b52a 6048
6049
6050def make_dir(path, to_screen=None):
6051 try:
6052 dn = os.path.dirname(path)
6053 if dn and not os.path.exists(dn):
6054 os.makedirs(dn)
6055 return True
6056 except (OSError, IOError) as err:
6057 if callable(to_screen) is not None:
6058 to_screen('unable to create directory ' + error_to_compat_str(err))
6059 return False
f74980cb 6060
6061
6062def get_executable_path():
c552ae88 6063 from zipimport import zipimporter
6064 if hasattr(sys, 'frozen'): # Running from PyInstaller
6065 path = os.path.dirname(sys.executable)
6066 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
6067 path = os.path.join(os.path.dirname(__file__), '../..')
6068 else:
6069 path = os.path.join(os.path.dirname(__file__), '..')
f74980cb 6070 return os.path.abspath(path)
6071
6072
6073def load_plugins(name, type, namespace):
6074 plugin_info = [None]
6075 classes = []
6076 try:
6077 plugin_info = imp.find_module(
6078 name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
6079 plugins = imp.load_module(name, *plugin_info)
6080 for name in dir(plugins):
6081 if not name.endswith(type):
6082 continue
6083 klass = getattr(plugins, name)
6084 classes.append(klass)
6085 namespace[name] = klass
6086 except ImportError:
6087 pass
6088 finally:
6089 if plugin_info[0] is not None:
6090 plugin_info[0].close()
6091 return classes
06167fbb 6092
6093
6094def traverse_dict(dictn, keys, casesense=True):
6095 if not isinstance(dictn, dict):
6096 return None
6097 first_key = keys[0]
6098 if not casesense:
6099 dictn = {key.lower(): val for key, val in dictn.items()}
6100 first_key = first_key.lower()
6101 value = dictn.get(first_key, None)
6102 return value if len(keys) < 2 else traverse_dict(value, keys[1:], casesense)