]> jfr.im git - yt-dlp.git/blame - youtube_dl/utils.py
[soundcloud] Update client id (closes #23214)
[yt-dlp.git] / youtube_dl / utils.py
CommitLineData
d77c3dfd 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
d77c3dfd 3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
62e609ab 10import contextlib
e3946f98 11import ctypes
c496ca96
PH
12import datetime
13import email.utils
0c265486 14import email.header
f45c185f 15import errno
be4a824d 16import functools
d77c3dfd 17import gzip
03f9daab 18import io
79a2e94e 19import itertools
f4bfd65f 20import json
d77c3dfd 21import locale
02dbf93f 22import math
347de493 23import operator
d77c3dfd 24import os
c496ca96 25import platform
773f291d 26import random
d77c3dfd 27import re
c496ca96 28import socket
79a2e94e 29import ssl
1c088fa8 30import subprocess
d77c3dfd 31import sys
181c8655 32import tempfile
01951dda 33import traceback
bcf89ce6 34import xml.etree.ElementTree
d77c3dfd 35import zlib
d77c3dfd 36
8c25f81b 37from .compat import (
b4a3d461 38 compat_HTMLParseError,
8bb56eee 39 compat_HTMLParser,
8f9312c3 40 compat_basestring,
8c25f81b 41 compat_chr,
1bab3437 42 compat_cookiejar,
d7cd9a9e 43 compat_ctypes_WINFUNCTYPE,
36e6f62c 44 compat_etree_fromstring,
51098426 45 compat_expanduser,
8c25f81b 46 compat_html_entities,
55b2f099 47 compat_html_entities_html5,
be4a824d 48 compat_http_client,
c86b6142 49 compat_kwargs,
efa97bdc 50 compat_os_name,
8c25f81b 51 compat_parse_qs,
702ccf2d 52 compat_shlex_quote,
8c25f81b 53 compat_str,
edaa23f8 54 compat_struct_pack,
d3f8e038 55 compat_struct_unpack,
8c25f81b
PH
56 compat_urllib_error,
57 compat_urllib_parse,
15707c7e 58 compat_urllib_parse_urlencode,
8c25f81b 59 compat_urllib_parse_urlparse,
7581bfc9 60 compat_urllib_parse_unquote_plus,
8c25f81b
PH
61 compat_urllib_request,
62 compat_urlparse,
810c10ba 63 compat_xpath,
8c25f81b 64)
4644ac55 65
71aff188
YCH
66from .socks import (
67 ProxyType,
68 sockssocket,
69)
70
4644ac55 71
51fb4995
YCH
72def register_socks_protocols():
73 # "Register" SOCKS protocols
d5ae6bb5
YCH
74 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
75 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
51fb4995
YCH
76 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
77 if scheme not in compat_urlparse.uses_netloc:
78 compat_urlparse.uses_netloc.append(scheme)
79
80
468e2e92
FV
81# This is not clearly defined otherwise
82compiled_regex_type = type(re.compile(''))
83
f7a147e3
S
84
85def random_user_agent():
86 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
87 _CHROME_VERSIONS = (
88 '74.0.3729.129',
89 '76.0.3780.3',
90 '76.0.3780.2',
91 '74.0.3729.128',
92 '76.0.3780.1',
93 '76.0.3780.0',
94 '75.0.3770.15',
95 '74.0.3729.127',
96 '74.0.3729.126',
97 '76.0.3779.1',
98 '76.0.3779.0',
99 '75.0.3770.14',
100 '74.0.3729.125',
101 '76.0.3778.1',
102 '76.0.3778.0',
103 '75.0.3770.13',
104 '74.0.3729.124',
105 '74.0.3729.123',
106 '73.0.3683.121',
107 '76.0.3777.1',
108 '76.0.3777.0',
109 '75.0.3770.12',
110 '74.0.3729.122',
111 '76.0.3776.4',
112 '75.0.3770.11',
113 '74.0.3729.121',
114 '76.0.3776.3',
115 '76.0.3776.2',
116 '73.0.3683.120',
117 '74.0.3729.120',
118 '74.0.3729.119',
119 '74.0.3729.118',
120 '76.0.3776.1',
121 '76.0.3776.0',
122 '76.0.3775.5',
123 '75.0.3770.10',
124 '74.0.3729.117',
125 '76.0.3775.4',
126 '76.0.3775.3',
127 '74.0.3729.116',
128 '75.0.3770.9',
129 '76.0.3775.2',
130 '76.0.3775.1',
131 '76.0.3775.0',
132 '75.0.3770.8',
133 '74.0.3729.115',
134 '74.0.3729.114',
135 '76.0.3774.1',
136 '76.0.3774.0',
137 '75.0.3770.7',
138 '74.0.3729.113',
139 '74.0.3729.112',
140 '74.0.3729.111',
141 '76.0.3773.1',
142 '76.0.3773.0',
143 '75.0.3770.6',
144 '74.0.3729.110',
145 '74.0.3729.109',
146 '76.0.3772.1',
147 '76.0.3772.0',
148 '75.0.3770.5',
149 '74.0.3729.108',
150 '74.0.3729.107',
151 '76.0.3771.1',
152 '76.0.3771.0',
153 '75.0.3770.4',
154 '74.0.3729.106',
155 '74.0.3729.105',
156 '75.0.3770.3',
157 '74.0.3729.104',
158 '74.0.3729.103',
159 '74.0.3729.102',
160 '75.0.3770.2',
161 '74.0.3729.101',
162 '75.0.3770.1',
163 '75.0.3770.0',
164 '74.0.3729.100',
165 '75.0.3769.5',
166 '75.0.3769.4',
167 '74.0.3729.99',
168 '75.0.3769.3',
169 '75.0.3769.2',
170 '75.0.3768.6',
171 '74.0.3729.98',
172 '75.0.3769.1',
173 '75.0.3769.0',
174 '74.0.3729.97',
175 '73.0.3683.119',
176 '73.0.3683.118',
177 '74.0.3729.96',
178 '75.0.3768.5',
179 '75.0.3768.4',
180 '75.0.3768.3',
181 '75.0.3768.2',
182 '74.0.3729.95',
183 '74.0.3729.94',
184 '75.0.3768.1',
185 '75.0.3768.0',
186 '74.0.3729.93',
187 '74.0.3729.92',
188 '73.0.3683.117',
189 '74.0.3729.91',
190 '75.0.3766.3',
191 '74.0.3729.90',
192 '75.0.3767.2',
193 '75.0.3767.1',
194 '75.0.3767.0',
195 '74.0.3729.89',
196 '73.0.3683.116',
197 '75.0.3766.2',
198 '74.0.3729.88',
199 '75.0.3766.1',
200 '75.0.3766.0',
201 '74.0.3729.87',
202 '73.0.3683.115',
203 '74.0.3729.86',
204 '75.0.3765.1',
205 '75.0.3765.0',
206 '74.0.3729.85',
207 '73.0.3683.114',
208 '74.0.3729.84',
209 '75.0.3764.1',
210 '75.0.3764.0',
211 '74.0.3729.83',
212 '73.0.3683.113',
213 '75.0.3763.2',
214 '75.0.3761.4',
215 '74.0.3729.82',
216 '75.0.3763.1',
217 '75.0.3763.0',
218 '74.0.3729.81',
219 '73.0.3683.112',
220 '75.0.3762.1',
221 '75.0.3762.0',
222 '74.0.3729.80',
223 '75.0.3761.3',
224 '74.0.3729.79',
225 '73.0.3683.111',
226 '75.0.3761.2',
227 '74.0.3729.78',
228 '74.0.3729.77',
229 '75.0.3761.1',
230 '75.0.3761.0',
231 '73.0.3683.110',
232 '74.0.3729.76',
233 '74.0.3729.75',
234 '75.0.3760.0',
235 '74.0.3729.74',
236 '75.0.3759.8',
237 '75.0.3759.7',
238 '75.0.3759.6',
239 '74.0.3729.73',
240 '75.0.3759.5',
241 '74.0.3729.72',
242 '73.0.3683.109',
243 '75.0.3759.4',
244 '75.0.3759.3',
245 '74.0.3729.71',
246 '75.0.3759.2',
247 '74.0.3729.70',
248 '73.0.3683.108',
249 '74.0.3729.69',
250 '75.0.3759.1',
251 '75.0.3759.0',
252 '74.0.3729.68',
253 '73.0.3683.107',
254 '74.0.3729.67',
255 '75.0.3758.1',
256 '75.0.3758.0',
257 '74.0.3729.66',
258 '73.0.3683.106',
259 '74.0.3729.65',
260 '75.0.3757.1',
261 '75.0.3757.0',
262 '74.0.3729.64',
263 '73.0.3683.105',
264 '74.0.3729.63',
265 '75.0.3756.1',
266 '75.0.3756.0',
267 '74.0.3729.62',
268 '73.0.3683.104',
269 '75.0.3755.3',
270 '75.0.3755.2',
271 '73.0.3683.103',
272 '75.0.3755.1',
273 '75.0.3755.0',
274 '74.0.3729.61',
275 '73.0.3683.102',
276 '74.0.3729.60',
277 '75.0.3754.2',
278 '74.0.3729.59',
279 '75.0.3753.4',
280 '74.0.3729.58',
281 '75.0.3754.1',
282 '75.0.3754.0',
283 '74.0.3729.57',
284 '73.0.3683.101',
285 '75.0.3753.3',
286 '75.0.3752.2',
287 '75.0.3753.2',
288 '74.0.3729.56',
289 '75.0.3753.1',
290 '75.0.3753.0',
291 '74.0.3729.55',
292 '73.0.3683.100',
293 '74.0.3729.54',
294 '75.0.3752.1',
295 '75.0.3752.0',
296 '74.0.3729.53',
297 '73.0.3683.99',
298 '74.0.3729.52',
299 '75.0.3751.1',
300 '75.0.3751.0',
301 '74.0.3729.51',
302 '73.0.3683.98',
303 '74.0.3729.50',
304 '75.0.3750.0',
305 '74.0.3729.49',
306 '74.0.3729.48',
307 '74.0.3729.47',
308 '75.0.3749.3',
309 '74.0.3729.46',
310 '73.0.3683.97',
311 '75.0.3749.2',
312 '74.0.3729.45',
313 '75.0.3749.1',
314 '75.0.3749.0',
315 '74.0.3729.44',
316 '73.0.3683.96',
317 '74.0.3729.43',
318 '74.0.3729.42',
319 '75.0.3748.1',
320 '75.0.3748.0',
321 '74.0.3729.41',
322 '75.0.3747.1',
323 '73.0.3683.95',
324 '75.0.3746.4',
325 '74.0.3729.40',
326 '74.0.3729.39',
327 '75.0.3747.0',
328 '75.0.3746.3',
329 '75.0.3746.2',
330 '74.0.3729.38',
331 '75.0.3746.1',
332 '75.0.3746.0',
333 '74.0.3729.37',
334 '73.0.3683.94',
335 '75.0.3745.5',
336 '75.0.3745.4',
337 '75.0.3745.3',
338 '75.0.3745.2',
339 '74.0.3729.36',
340 '75.0.3745.1',
341 '75.0.3745.0',
342 '75.0.3744.2',
343 '74.0.3729.35',
344 '73.0.3683.93',
345 '74.0.3729.34',
346 '75.0.3744.1',
347 '75.0.3744.0',
348 '74.0.3729.33',
349 '73.0.3683.92',
350 '74.0.3729.32',
351 '74.0.3729.31',
352 '73.0.3683.91',
353 '75.0.3741.2',
354 '75.0.3740.5',
355 '74.0.3729.30',
356 '75.0.3741.1',
357 '75.0.3741.0',
358 '74.0.3729.29',
359 '75.0.3740.4',
360 '73.0.3683.90',
361 '74.0.3729.28',
362 '75.0.3740.3',
363 '73.0.3683.89',
364 '75.0.3740.2',
365 '74.0.3729.27',
366 '75.0.3740.1',
367 '75.0.3740.0',
368 '74.0.3729.26',
369 '73.0.3683.88',
370 '73.0.3683.87',
371 '74.0.3729.25',
372 '75.0.3739.1',
373 '75.0.3739.0',
374 '73.0.3683.86',
375 '74.0.3729.24',
376 '73.0.3683.85',
377 '75.0.3738.4',
378 '75.0.3738.3',
379 '75.0.3738.2',
380 '75.0.3738.1',
381 '75.0.3738.0',
382 '74.0.3729.23',
383 '73.0.3683.84',
384 '74.0.3729.22',
385 '74.0.3729.21',
386 '75.0.3737.1',
387 '75.0.3737.0',
388 '74.0.3729.20',
389 '73.0.3683.83',
390 '74.0.3729.19',
391 '75.0.3736.1',
392 '75.0.3736.0',
393 '74.0.3729.18',
394 '73.0.3683.82',
395 '74.0.3729.17',
396 '75.0.3735.1',
397 '75.0.3735.0',
398 '74.0.3729.16',
399 '73.0.3683.81',
400 '75.0.3734.1',
401 '75.0.3734.0',
402 '74.0.3729.15',
403 '73.0.3683.80',
404 '74.0.3729.14',
405 '75.0.3733.1',
406 '75.0.3733.0',
407 '75.0.3732.1',
408 '74.0.3729.13',
409 '74.0.3729.12',
410 '73.0.3683.79',
411 '74.0.3729.11',
412 '75.0.3732.0',
413 '74.0.3729.10',
414 '73.0.3683.78',
415 '74.0.3729.9',
416 '74.0.3729.8',
417 '74.0.3729.7',
418 '75.0.3731.3',
419 '75.0.3731.2',
420 '75.0.3731.0',
421 '74.0.3729.6',
422 '73.0.3683.77',
423 '73.0.3683.76',
424 '75.0.3730.5',
425 '75.0.3730.4',
426 '73.0.3683.75',
427 '74.0.3729.5',
428 '73.0.3683.74',
429 '75.0.3730.3',
430 '75.0.3730.2',
431 '74.0.3729.4',
432 '73.0.3683.73',
433 '73.0.3683.72',
434 '75.0.3730.1',
435 '75.0.3730.0',
436 '74.0.3729.3',
437 '73.0.3683.71',
438 '74.0.3729.2',
439 '73.0.3683.70',
440 '74.0.3729.1',
441 '74.0.3729.0',
442 '74.0.3726.4',
443 '73.0.3683.69',
444 '74.0.3726.3',
445 '74.0.3728.0',
446 '74.0.3726.2',
447 '73.0.3683.68',
448 '74.0.3726.1',
449 '74.0.3726.0',
450 '74.0.3725.4',
451 '73.0.3683.67',
452 '73.0.3683.66',
453 '74.0.3725.3',
454 '74.0.3725.2',
455 '74.0.3725.1',
456 '74.0.3724.8',
457 '74.0.3725.0',
458 '73.0.3683.65',
459 '74.0.3724.7',
460 '74.0.3724.6',
461 '74.0.3724.5',
462 '74.0.3724.4',
463 '74.0.3724.3',
464 '74.0.3724.2',
465 '74.0.3724.1',
466 '74.0.3724.0',
467 '73.0.3683.64',
468 '74.0.3723.1',
469 '74.0.3723.0',
470 '73.0.3683.63',
471 '74.0.3722.1',
472 '74.0.3722.0',
473 '73.0.3683.62',
474 '74.0.3718.9',
475 '74.0.3702.3',
476 '74.0.3721.3',
477 '74.0.3721.2',
478 '74.0.3721.1',
479 '74.0.3721.0',
480 '74.0.3720.6',
481 '73.0.3683.61',
482 '72.0.3626.122',
483 '73.0.3683.60',
484 '74.0.3720.5',
485 '72.0.3626.121',
486 '74.0.3718.8',
487 '74.0.3720.4',
488 '74.0.3720.3',
489 '74.0.3718.7',
490 '74.0.3720.2',
491 '74.0.3720.1',
492 '74.0.3720.0',
493 '74.0.3718.6',
494 '74.0.3719.5',
495 '73.0.3683.59',
496 '74.0.3718.5',
497 '74.0.3718.4',
498 '74.0.3719.4',
499 '74.0.3719.3',
500 '74.0.3719.2',
501 '74.0.3719.1',
502 '73.0.3683.58',
503 '74.0.3719.0',
504 '73.0.3683.57',
505 '73.0.3683.56',
506 '74.0.3718.3',
507 '73.0.3683.55',
508 '74.0.3718.2',
509 '74.0.3718.1',
510 '74.0.3718.0',
511 '73.0.3683.54',
512 '74.0.3717.2',
513 '73.0.3683.53',
514 '74.0.3717.1',
515 '74.0.3717.0',
516 '73.0.3683.52',
517 '74.0.3716.1',
518 '74.0.3716.0',
519 '73.0.3683.51',
520 '74.0.3715.1',
521 '74.0.3715.0',
522 '73.0.3683.50',
523 '74.0.3711.2',
524 '74.0.3714.2',
525 '74.0.3713.3',
526 '74.0.3714.1',
527 '74.0.3714.0',
528 '73.0.3683.49',
529 '74.0.3713.1',
530 '74.0.3713.0',
531 '72.0.3626.120',
532 '73.0.3683.48',
533 '74.0.3712.2',
534 '74.0.3712.1',
535 '74.0.3712.0',
536 '73.0.3683.47',
537 '72.0.3626.119',
538 '73.0.3683.46',
539 '74.0.3710.2',
540 '72.0.3626.118',
541 '74.0.3711.1',
542 '74.0.3711.0',
543 '73.0.3683.45',
544 '72.0.3626.117',
545 '74.0.3710.1',
546 '74.0.3710.0',
547 '73.0.3683.44',
548 '72.0.3626.116',
549 '74.0.3709.1',
550 '74.0.3709.0',
551 '74.0.3704.9',
552 '73.0.3683.43',
553 '72.0.3626.115',
554 '74.0.3704.8',
555 '74.0.3704.7',
556 '74.0.3708.0',
557 '74.0.3706.7',
558 '74.0.3704.6',
559 '73.0.3683.42',
560 '72.0.3626.114',
561 '74.0.3706.6',
562 '72.0.3626.113',
563 '74.0.3704.5',
564 '74.0.3706.5',
565 '74.0.3706.4',
566 '74.0.3706.3',
567 '74.0.3706.2',
568 '74.0.3706.1',
569 '74.0.3706.0',
570 '73.0.3683.41',
571 '72.0.3626.112',
572 '74.0.3705.1',
573 '74.0.3705.0',
574 '73.0.3683.40',
575 '72.0.3626.111',
576 '73.0.3683.39',
577 '74.0.3704.4',
578 '73.0.3683.38',
579 '74.0.3704.3',
580 '74.0.3704.2',
581 '74.0.3704.1',
582 '74.0.3704.0',
583 '73.0.3683.37',
584 '72.0.3626.110',
585 '72.0.3626.109',
586 '74.0.3703.3',
587 '74.0.3703.2',
588 '73.0.3683.36',
589 '74.0.3703.1',
590 '74.0.3703.0',
591 '73.0.3683.35',
592 '72.0.3626.108',
593 '74.0.3702.2',
594 '74.0.3699.3',
595 '74.0.3702.1',
596 '74.0.3702.0',
597 '73.0.3683.34',
598 '72.0.3626.107',
599 '73.0.3683.33',
600 '74.0.3701.1',
601 '74.0.3701.0',
602 '73.0.3683.32',
603 '73.0.3683.31',
604 '72.0.3626.105',
605 '74.0.3700.1',
606 '74.0.3700.0',
607 '73.0.3683.29',
608 '72.0.3626.103',
609 '74.0.3699.2',
610 '74.0.3699.1',
611 '74.0.3699.0',
612 '73.0.3683.28',
613 '72.0.3626.102',
614 '73.0.3683.27',
615 '73.0.3683.26',
616 '74.0.3698.0',
617 '74.0.3696.2',
618 '72.0.3626.101',
619 '73.0.3683.25',
620 '74.0.3696.1',
621 '74.0.3696.0',
622 '74.0.3694.8',
623 '72.0.3626.100',
624 '74.0.3694.7',
625 '74.0.3694.6',
626 '74.0.3694.5',
627 '74.0.3694.4',
628 '72.0.3626.99',
629 '72.0.3626.98',
630 '74.0.3694.3',
631 '73.0.3683.24',
632 '72.0.3626.97',
633 '72.0.3626.96',
634 '72.0.3626.95',
635 '73.0.3683.23',
636 '72.0.3626.94',
637 '73.0.3683.22',
638 '73.0.3683.21',
639 '72.0.3626.93',
640 '74.0.3694.2',
641 '72.0.3626.92',
642 '74.0.3694.1',
643 '74.0.3694.0',
644 '74.0.3693.6',
645 '73.0.3683.20',
646 '72.0.3626.91',
647 '74.0.3693.5',
648 '74.0.3693.4',
649 '74.0.3693.3',
650 '74.0.3693.2',
651 '73.0.3683.19',
652 '74.0.3693.1',
653 '74.0.3693.0',
654 '73.0.3683.18',
655 '72.0.3626.90',
656 '74.0.3692.1',
657 '74.0.3692.0',
658 '73.0.3683.17',
659 '72.0.3626.89',
660 '74.0.3687.3',
661 '74.0.3691.1',
662 '74.0.3691.0',
663 '73.0.3683.16',
664 '72.0.3626.88',
665 '72.0.3626.87',
666 '73.0.3683.15',
667 '74.0.3690.1',
668 '74.0.3690.0',
669 '73.0.3683.14',
670 '72.0.3626.86',
671 '73.0.3683.13',
672 '73.0.3683.12',
673 '74.0.3689.1',
674 '74.0.3689.0',
675 '73.0.3683.11',
676 '72.0.3626.85',
677 '73.0.3683.10',
678 '72.0.3626.84',
679 '73.0.3683.9',
680 '74.0.3688.1',
681 '74.0.3688.0',
682 '73.0.3683.8',
683 '72.0.3626.83',
684 '74.0.3687.2',
685 '74.0.3687.1',
686 '74.0.3687.0',
687 '73.0.3683.7',
688 '72.0.3626.82',
689 '74.0.3686.4',
690 '72.0.3626.81',
691 '74.0.3686.3',
692 '74.0.3686.2',
693 '74.0.3686.1',
694 '74.0.3686.0',
695 '73.0.3683.6',
696 '72.0.3626.80',
697 '74.0.3685.1',
698 '74.0.3685.0',
699 '73.0.3683.5',
700 '72.0.3626.79',
701 '74.0.3684.1',
702 '74.0.3684.0',
703 '73.0.3683.4',
704 '72.0.3626.78',
705 '72.0.3626.77',
706 '73.0.3683.3',
707 '73.0.3683.2',
708 '72.0.3626.76',
709 '73.0.3683.1',
710 '73.0.3683.0',
711 '72.0.3626.75',
712 '71.0.3578.141',
713 '73.0.3682.1',
714 '73.0.3682.0',
715 '72.0.3626.74',
716 '71.0.3578.140',
717 '73.0.3681.4',
718 '73.0.3681.3',
719 '73.0.3681.2',
720 '73.0.3681.1',
721 '73.0.3681.0',
722 '72.0.3626.73',
723 '71.0.3578.139',
724 '72.0.3626.72',
725 '72.0.3626.71',
726 '73.0.3680.1',
727 '73.0.3680.0',
728 '72.0.3626.70',
729 '71.0.3578.138',
730 '73.0.3678.2',
731 '73.0.3679.1',
732 '73.0.3679.0',
733 '72.0.3626.69',
734 '71.0.3578.137',
735 '73.0.3678.1',
736 '73.0.3678.0',
737 '71.0.3578.136',
738 '73.0.3677.1',
739 '73.0.3677.0',
740 '72.0.3626.68',
741 '72.0.3626.67',
742 '71.0.3578.135',
743 '73.0.3676.1',
744 '73.0.3676.0',
745 '73.0.3674.2',
746 '72.0.3626.66',
747 '71.0.3578.134',
748 '73.0.3674.1',
749 '73.0.3674.0',
750 '72.0.3626.65',
751 '71.0.3578.133',
752 '73.0.3673.2',
753 '73.0.3673.1',
754 '73.0.3673.0',
755 '72.0.3626.64',
756 '71.0.3578.132',
757 '72.0.3626.63',
758 '72.0.3626.62',
759 '72.0.3626.61',
760 '72.0.3626.60',
761 '73.0.3672.1',
762 '73.0.3672.0',
763 '72.0.3626.59',
764 '71.0.3578.131',
765 '73.0.3671.3',
766 '73.0.3671.2',
767 '73.0.3671.1',
768 '73.0.3671.0',
769 '72.0.3626.58',
770 '71.0.3578.130',
771 '73.0.3670.1',
772 '73.0.3670.0',
773 '72.0.3626.57',
774 '71.0.3578.129',
775 '73.0.3669.1',
776 '73.0.3669.0',
777 '72.0.3626.56',
778 '71.0.3578.128',
779 '73.0.3668.2',
780 '73.0.3668.1',
781 '73.0.3668.0',
782 '72.0.3626.55',
783 '71.0.3578.127',
784 '73.0.3667.2',
785 '73.0.3667.1',
786 '73.0.3667.0',
787 '72.0.3626.54',
788 '71.0.3578.126',
789 '73.0.3666.1',
790 '73.0.3666.0',
791 '72.0.3626.53',
792 '71.0.3578.125',
793 '73.0.3665.4',
794 '73.0.3665.3',
795 '72.0.3626.52',
796 '73.0.3665.2',
797 '73.0.3664.4',
798 '73.0.3665.1',
799 '73.0.3665.0',
800 '72.0.3626.51',
801 '71.0.3578.124',
802 '72.0.3626.50',
803 '73.0.3664.3',
804 '73.0.3664.2',
805 '73.0.3664.1',
806 '73.0.3664.0',
807 '73.0.3663.2',
808 '72.0.3626.49',
809 '71.0.3578.123',
810 '73.0.3663.1',
811 '73.0.3663.0',
812 '72.0.3626.48',
813 '71.0.3578.122',
814 '73.0.3662.1',
815 '73.0.3662.0',
816 '72.0.3626.47',
817 '71.0.3578.121',
818 '73.0.3661.1',
819 '72.0.3626.46',
820 '73.0.3661.0',
821 '72.0.3626.45',
822 '71.0.3578.120',
823 '73.0.3660.2',
824 '73.0.3660.1',
825 '73.0.3660.0',
826 '72.0.3626.44',
827 '71.0.3578.119',
828 '73.0.3659.1',
829 '73.0.3659.0',
830 '72.0.3626.43',
831 '71.0.3578.118',
832 '73.0.3658.1',
833 '73.0.3658.0',
834 '72.0.3626.42',
835 '71.0.3578.117',
836 '73.0.3657.1',
837 '73.0.3657.0',
838 '72.0.3626.41',
839 '71.0.3578.116',
840 '73.0.3656.1',
841 '73.0.3656.0',
842 '72.0.3626.40',
843 '71.0.3578.115',
844 '73.0.3655.1',
845 '73.0.3655.0',
846 '72.0.3626.39',
847 '71.0.3578.114',
848 '73.0.3654.1',
849 '73.0.3654.0',
850 '72.0.3626.38',
851 '71.0.3578.113',
852 '73.0.3653.1',
853 '73.0.3653.0',
854 '72.0.3626.37',
855 '71.0.3578.112',
856 '73.0.3652.1',
857 '73.0.3652.0',
858 '72.0.3626.36',
859 '71.0.3578.111',
860 '73.0.3651.1',
861 '73.0.3651.0',
862 '72.0.3626.35',
863 '71.0.3578.110',
864 '73.0.3650.1',
865 '73.0.3650.0',
866 '72.0.3626.34',
867 '71.0.3578.109',
868 '73.0.3649.1',
869 '73.0.3649.0',
870 '72.0.3626.33',
871 '71.0.3578.108',
872 '73.0.3648.2',
873 '73.0.3648.1',
874 '73.0.3648.0',
875 '72.0.3626.32',
876 '71.0.3578.107',
877 '73.0.3647.2',
878 '73.0.3647.1',
879 '73.0.3647.0',
880 '72.0.3626.31',
881 '71.0.3578.106',
882 '73.0.3635.3',
883 '73.0.3646.2',
884 '73.0.3646.1',
885 '73.0.3646.0',
886 '72.0.3626.30',
887 '71.0.3578.105',
888 '72.0.3626.29',
889 '73.0.3645.2',
890 '73.0.3645.1',
891 '73.0.3645.0',
892 '72.0.3626.28',
893 '71.0.3578.104',
894 '72.0.3626.27',
895 '72.0.3626.26',
896 '72.0.3626.25',
897 '72.0.3626.24',
898 '73.0.3644.0',
899 '73.0.3643.2',
900 '72.0.3626.23',
901 '71.0.3578.103',
902 '73.0.3643.1',
903 '73.0.3643.0',
904 '72.0.3626.22',
905 '71.0.3578.102',
906 '73.0.3642.1',
907 '73.0.3642.0',
908 '72.0.3626.21',
909 '71.0.3578.101',
910 '73.0.3641.1',
911 '73.0.3641.0',
912 '72.0.3626.20',
913 '71.0.3578.100',
914 '72.0.3626.19',
915 '73.0.3640.1',
916 '73.0.3640.0',
917 '72.0.3626.18',
918 '73.0.3639.1',
919 '71.0.3578.99',
920 '73.0.3639.0',
921 '72.0.3626.17',
922 '73.0.3638.2',
923 '72.0.3626.16',
924 '73.0.3638.1',
925 '73.0.3638.0',
926 '72.0.3626.15',
927 '71.0.3578.98',
928 '73.0.3635.2',
929 '71.0.3578.97',
930 '73.0.3637.1',
931 '73.0.3637.0',
932 '72.0.3626.14',
933 '71.0.3578.96',
934 '71.0.3578.95',
935 '72.0.3626.13',
936 '71.0.3578.94',
937 '73.0.3636.2',
938 '71.0.3578.93',
939 '73.0.3636.1',
940 '73.0.3636.0',
941 '72.0.3626.12',
942 '71.0.3578.92',
943 '73.0.3635.1',
944 '73.0.3635.0',
945 '72.0.3626.11',
946 '71.0.3578.91',
947 '73.0.3634.2',
948 '73.0.3634.1',
949 '73.0.3634.0',
950 '72.0.3626.10',
951 '71.0.3578.90',
952 '71.0.3578.89',
953 '73.0.3633.2',
954 '73.0.3633.1',
955 '73.0.3633.0',
956 '72.0.3610.4',
957 '72.0.3626.9',
958 '71.0.3578.88',
959 '73.0.3632.5',
960 '73.0.3632.4',
961 '73.0.3632.3',
962 '73.0.3632.2',
963 '73.0.3632.1',
964 '73.0.3632.0',
965 '72.0.3626.8',
966 '71.0.3578.87',
967 '73.0.3631.2',
968 '73.0.3631.1',
969 '73.0.3631.0',
970 '72.0.3626.7',
971 '71.0.3578.86',
972 '72.0.3626.6',
973 '73.0.3630.1',
974 '73.0.3630.0',
975 '72.0.3626.5',
976 '71.0.3578.85',
977 '72.0.3626.4',
978 '73.0.3628.3',
979 '73.0.3628.2',
980 '73.0.3629.1',
981 '73.0.3629.0',
982 '72.0.3626.3',
983 '71.0.3578.84',
984 '73.0.3628.1',
985 '73.0.3628.0',
986 '71.0.3578.83',
987 '73.0.3627.1',
988 '73.0.3627.0',
989 '72.0.3626.2',
990 '71.0.3578.82',
991 '71.0.3578.81',
992 '71.0.3578.80',
993 '72.0.3626.1',
994 '72.0.3626.0',
995 '71.0.3578.79',
996 '70.0.3538.124',
997 '71.0.3578.78',
998 '72.0.3623.4',
999 '72.0.3625.2',
1000 '72.0.3625.1',
1001 '72.0.3625.0',
1002 '71.0.3578.77',
1003 '70.0.3538.123',
1004 '72.0.3624.4',
1005 '72.0.3624.3',
1006 '72.0.3624.2',
1007 '71.0.3578.76',
1008 '72.0.3624.1',
1009 '72.0.3624.0',
1010 '72.0.3623.3',
1011 '71.0.3578.75',
1012 '70.0.3538.122',
1013 '71.0.3578.74',
1014 '72.0.3623.2',
1015 '72.0.3610.3',
1016 '72.0.3623.1',
1017 '72.0.3623.0',
1018 '72.0.3622.3',
1019 '72.0.3622.2',
1020 '71.0.3578.73',
1021 '70.0.3538.121',
1022 '72.0.3622.1',
1023 '72.0.3622.0',
1024 '71.0.3578.72',
1025 '70.0.3538.120',
1026 '72.0.3621.1',
1027 '72.0.3621.0',
1028 '71.0.3578.71',
1029 '70.0.3538.119',
1030 '72.0.3620.1',
1031 '72.0.3620.0',
1032 '71.0.3578.70',
1033 '70.0.3538.118',
1034 '71.0.3578.69',
1035 '72.0.3619.1',
1036 '72.0.3619.0',
1037 '71.0.3578.68',
1038 '70.0.3538.117',
1039 '71.0.3578.67',
1040 '72.0.3618.1',
1041 '72.0.3618.0',
1042 '71.0.3578.66',
1043 '70.0.3538.116',
1044 '72.0.3617.1',
1045 '72.0.3617.0',
1046 '71.0.3578.65',
1047 '70.0.3538.115',
1048 '72.0.3602.3',
1049 '71.0.3578.64',
1050 '72.0.3616.1',
1051 '72.0.3616.0',
1052 '71.0.3578.63',
1053 '70.0.3538.114',
1054 '71.0.3578.62',
1055 '72.0.3615.1',
1056 '72.0.3615.0',
1057 '71.0.3578.61',
1058 '70.0.3538.113',
1059 '72.0.3614.1',
1060 '72.0.3614.0',
1061 '71.0.3578.60',
1062 '70.0.3538.112',
1063 '72.0.3613.1',
1064 '72.0.3613.0',
1065 '71.0.3578.59',
1066 '70.0.3538.111',
1067 '72.0.3612.2',
1068 '72.0.3612.1',
1069 '72.0.3612.0',
1070 '70.0.3538.110',
1071 '71.0.3578.58',
1072 '70.0.3538.109',
1073 '72.0.3611.2',
1074 '72.0.3611.1',
1075 '72.0.3611.0',
1076 '71.0.3578.57',
1077 '70.0.3538.108',
1078 '72.0.3610.2',
1079 '71.0.3578.56',
1080 '71.0.3578.55',
1081 '72.0.3610.1',
1082 '72.0.3610.0',
1083 '71.0.3578.54',
1084 '70.0.3538.107',
1085 '71.0.3578.53',
1086 '72.0.3609.3',
1087 '71.0.3578.52',
1088 '72.0.3609.2',
1089 '71.0.3578.51',
1090 '72.0.3608.5',
1091 '72.0.3609.1',
1092 '72.0.3609.0',
1093 '71.0.3578.50',
1094 '70.0.3538.106',
1095 '72.0.3608.4',
1096 '72.0.3608.3',
1097 '72.0.3608.2',
1098 '71.0.3578.49',
1099 '72.0.3608.1',
1100 '72.0.3608.0',
1101 '70.0.3538.105',
1102 '71.0.3578.48',
1103 '72.0.3607.1',
1104 '72.0.3607.0',
1105 '71.0.3578.47',
1106 '70.0.3538.104',
1107 '72.0.3606.2',
1108 '72.0.3606.1',
1109 '72.0.3606.0',
1110 '71.0.3578.46',
1111 '70.0.3538.103',
1112 '70.0.3538.102',
1113 '72.0.3605.3',
1114 '72.0.3605.2',
1115 '72.0.3605.1',
1116 '72.0.3605.0',
1117 '71.0.3578.45',
1118 '70.0.3538.101',
1119 '71.0.3578.44',
1120 '71.0.3578.43',
1121 '70.0.3538.100',
1122 '70.0.3538.99',
1123 '71.0.3578.42',
1124 '72.0.3604.1',
1125 '72.0.3604.0',
1126 '71.0.3578.41',
1127 '70.0.3538.98',
1128 '71.0.3578.40',
1129 '72.0.3603.2',
1130 '72.0.3603.1',
1131 '72.0.3603.0',
1132 '71.0.3578.39',
1133 '70.0.3538.97',
1134 '72.0.3602.2',
1135 '71.0.3578.38',
1136 '71.0.3578.37',
1137 '72.0.3602.1',
1138 '72.0.3602.0',
1139 '71.0.3578.36',
1140 '70.0.3538.96',
1141 '72.0.3601.1',
1142 '72.0.3601.0',
1143 '71.0.3578.35',
1144 '70.0.3538.95',
1145 '72.0.3600.1',
1146 '72.0.3600.0',
1147 '71.0.3578.34',
1148 '70.0.3538.94',
1149 '72.0.3599.3',
1150 '72.0.3599.2',
1151 '72.0.3599.1',
1152 '72.0.3599.0',
1153 '71.0.3578.33',
1154 '70.0.3538.93',
1155 '72.0.3598.1',
1156 '72.0.3598.0',
1157 '71.0.3578.32',
1158 '70.0.3538.87',
1159 '72.0.3597.1',
1160 '72.0.3597.0',
1161 '72.0.3596.2',
1162 '71.0.3578.31',
1163 '70.0.3538.86',
1164 '71.0.3578.30',
1165 '71.0.3578.29',
1166 '72.0.3596.1',
1167 '72.0.3596.0',
1168 '71.0.3578.28',
1169 '70.0.3538.85',
1170 '72.0.3595.2',
1171 '72.0.3591.3',
1172 '72.0.3595.1',
1173 '72.0.3595.0',
1174 '71.0.3578.27',
1175 '70.0.3538.84',
1176 '72.0.3594.1',
1177 '72.0.3594.0',
1178 '71.0.3578.26',
1179 '70.0.3538.83',
1180 '72.0.3593.2',
1181 '72.0.3593.1',
1182 '72.0.3593.0',
1183 '71.0.3578.25',
1184 '70.0.3538.82',
1185 '72.0.3589.3',
1186 '72.0.3592.2',
1187 '72.0.3592.1',
1188 '72.0.3592.0',
1189 '71.0.3578.24',
1190 '72.0.3589.2',
1191 '70.0.3538.81',
1192 '70.0.3538.80',
1193 '72.0.3591.2',
1194 '72.0.3591.1',
1195 '72.0.3591.0',
1196 '71.0.3578.23',
1197 '70.0.3538.79',
1198 '71.0.3578.22',
1199 '72.0.3590.1',
1200 '72.0.3590.0',
1201 '71.0.3578.21',
1202 '70.0.3538.78',
1203 '70.0.3538.77',
1204 '72.0.3589.1',
1205 '72.0.3589.0',
1206 '71.0.3578.20',
1207 '70.0.3538.76',
1208 '71.0.3578.19',
1209 '70.0.3538.75',
1210 '72.0.3588.1',
1211 '72.0.3588.0',
1212 '71.0.3578.18',
1213 '70.0.3538.74',
1214 '72.0.3586.2',
1215 '72.0.3587.0',
1216 '71.0.3578.17',
1217 '70.0.3538.73',
1218 '72.0.3586.1',
1219 '72.0.3586.0',
1220 '71.0.3578.16',
1221 '70.0.3538.72',
1222 '72.0.3585.1',
1223 '72.0.3585.0',
1224 '71.0.3578.15',
1225 '70.0.3538.71',
1226 '71.0.3578.14',
1227 '72.0.3584.1',
1228 '72.0.3584.0',
1229 '71.0.3578.13',
1230 '70.0.3538.70',
1231 '72.0.3583.2',
1232 '71.0.3578.12',
1233 '72.0.3583.1',
1234 '72.0.3583.0',
1235 '71.0.3578.11',
1236 '70.0.3538.69',
1237 '71.0.3578.10',
1238 '72.0.3582.0',
1239 '72.0.3581.4',
1240 '71.0.3578.9',
1241 '70.0.3538.67',
1242 '72.0.3581.3',
1243 '72.0.3581.2',
1244 '72.0.3581.1',
1245 '72.0.3581.0',
1246 '71.0.3578.8',
1247 '70.0.3538.66',
1248 '72.0.3580.1',
1249 '72.0.3580.0',
1250 '71.0.3578.7',
1251 '70.0.3538.65',
1252 '71.0.3578.6',
1253 '72.0.3579.1',
1254 '72.0.3579.0',
1255 '71.0.3578.5',
1256 '70.0.3538.64',
1257 '71.0.3578.4',
1258 '71.0.3578.3',
1259 '71.0.3578.2',
1260 '71.0.3578.1',
1261 '71.0.3578.0',
1262 '70.0.3538.63',
1263 '69.0.3497.128',
1264 '70.0.3538.62',
1265 '70.0.3538.61',
1266 '70.0.3538.60',
1267 '70.0.3538.59',
1268 '71.0.3577.1',
1269 '71.0.3577.0',
1270 '70.0.3538.58',
1271 '69.0.3497.127',
1272 '71.0.3576.2',
1273 '71.0.3576.1',
1274 '71.0.3576.0',
1275 '70.0.3538.57',
1276 '70.0.3538.56',
1277 '71.0.3575.2',
1278 '70.0.3538.55',
1279 '69.0.3497.126',
1280 '70.0.3538.54',
1281 '71.0.3575.1',
1282 '71.0.3575.0',
1283 '71.0.3574.1',
1284 '71.0.3574.0',
1285 '70.0.3538.53',
1286 '69.0.3497.125',
1287 '70.0.3538.52',
1288 '71.0.3573.1',
1289 '71.0.3573.0',
1290 '70.0.3538.51',
1291 '69.0.3497.124',
1292 '71.0.3572.1',
1293 '71.0.3572.0',
1294 '70.0.3538.50',
1295 '69.0.3497.123',
1296 '71.0.3571.2',
1297 '70.0.3538.49',
1298 '69.0.3497.122',
1299 '71.0.3571.1',
1300 '71.0.3571.0',
1301 '70.0.3538.48',
1302 '69.0.3497.121',
1303 '71.0.3570.1',
1304 '71.0.3570.0',
1305 '70.0.3538.47',
1306 '69.0.3497.120',
1307 '71.0.3568.2',
1308 '71.0.3569.1',
1309 '71.0.3569.0',
1310 '70.0.3538.46',
1311 '69.0.3497.119',
1312 '70.0.3538.45',
1313 '71.0.3568.1',
1314 '71.0.3568.0',
1315 '70.0.3538.44',
1316 '69.0.3497.118',
1317 '70.0.3538.43',
1318 '70.0.3538.42',
1319 '71.0.3567.1',
1320 '71.0.3567.0',
1321 '70.0.3538.41',
1322 '69.0.3497.117',
1323 '71.0.3566.1',
1324 '71.0.3566.0',
1325 '70.0.3538.40',
1326 '69.0.3497.116',
1327 '71.0.3565.1',
1328 '71.0.3565.0',
1329 '70.0.3538.39',
1330 '69.0.3497.115',
1331 '71.0.3564.1',
1332 '71.0.3564.0',
1333 '70.0.3538.38',
1334 '69.0.3497.114',
1335 '71.0.3563.0',
1336 '71.0.3562.2',
1337 '70.0.3538.37',
1338 '69.0.3497.113',
1339 '70.0.3538.36',
1340 '70.0.3538.35',
1341 '71.0.3562.1',
1342 '71.0.3562.0',
1343 '70.0.3538.34',
1344 '69.0.3497.112',
1345 '70.0.3538.33',
1346 '71.0.3561.1',
1347 '71.0.3561.0',
1348 '70.0.3538.32',
1349 '69.0.3497.111',
1350 '71.0.3559.6',
1351 '71.0.3560.1',
1352 '71.0.3560.0',
1353 '71.0.3559.5',
1354 '71.0.3559.4',
1355 '70.0.3538.31',
1356 '69.0.3497.110',
1357 '71.0.3559.3',
1358 '70.0.3538.30',
1359 '69.0.3497.109',
1360 '71.0.3559.2',
1361 '71.0.3559.1',
1362 '71.0.3559.0',
1363 '70.0.3538.29',
1364 '69.0.3497.108',
1365 '71.0.3558.2',
1366 '71.0.3558.1',
1367 '71.0.3558.0',
1368 '70.0.3538.28',
1369 '69.0.3497.107',
1370 '71.0.3557.2',
1371 '71.0.3557.1',
1372 '71.0.3557.0',
1373 '70.0.3538.27',
1374 '69.0.3497.106',
1375 '71.0.3554.4',
1376 '70.0.3538.26',
1377 '71.0.3556.1',
1378 '71.0.3556.0',
1379 '70.0.3538.25',
1380 '71.0.3554.3',
1381 '69.0.3497.105',
1382 '71.0.3554.2',
1383 '70.0.3538.24',
1384 '69.0.3497.104',
1385 '71.0.3555.2',
1386 '70.0.3538.23',
1387 '71.0.3555.1',
1388 '71.0.3555.0',
1389 '70.0.3538.22',
1390 '69.0.3497.103',
1391 '71.0.3554.1',
1392 '71.0.3554.0',
1393 '70.0.3538.21',
1394 '69.0.3497.102',
1395 '71.0.3553.3',
1396 '70.0.3538.20',
1397 '69.0.3497.101',
1398 '71.0.3553.2',
1399 '69.0.3497.100',
1400 '71.0.3553.1',
1401 '71.0.3553.0',
1402 '70.0.3538.19',
1403 '69.0.3497.99',
1404 '69.0.3497.98',
1405 '69.0.3497.97',
1406 '71.0.3552.6',
1407 '71.0.3552.5',
1408 '71.0.3552.4',
1409 '71.0.3552.3',
1410 '71.0.3552.2',
1411 '71.0.3552.1',
1412 '71.0.3552.0',
1413 '70.0.3538.18',
1414 '69.0.3497.96',
1415 '71.0.3551.3',
1416 '71.0.3551.2',
1417 '71.0.3551.1',
1418 '71.0.3551.0',
1419 '70.0.3538.17',
1420 '69.0.3497.95',
1421 '71.0.3550.3',
1422 '71.0.3550.2',
1423 '71.0.3550.1',
1424 '71.0.3550.0',
1425 '70.0.3538.16',
1426 '69.0.3497.94',
1427 '71.0.3549.1',
1428 '71.0.3549.0',
1429 '70.0.3538.15',
1430 '69.0.3497.93',
1431 '69.0.3497.92',
1432 '71.0.3548.1',
1433 '71.0.3548.0',
1434 '70.0.3538.14',
1435 '69.0.3497.91',
1436 '71.0.3547.1',
1437 '71.0.3547.0',
1438 '70.0.3538.13',
1439 '69.0.3497.90',
1440 '71.0.3546.2',
1441 '69.0.3497.89',
1442 '71.0.3546.1',
1443 '71.0.3546.0',
1444 '70.0.3538.12',
1445 '69.0.3497.88',
1446 '71.0.3545.4',
1447 '71.0.3545.3',
1448 '71.0.3545.2',
1449 '71.0.3545.1',
1450 '71.0.3545.0',
1451 '70.0.3538.11',
1452 '69.0.3497.87',
1453 '71.0.3544.5',
1454 '71.0.3544.4',
1455 '71.0.3544.3',
1456 '71.0.3544.2',
1457 '71.0.3544.1',
1458 '71.0.3544.0',
1459 '69.0.3497.86',
1460 '70.0.3538.10',
1461 '69.0.3497.85',
1462 '70.0.3538.9',
1463 '69.0.3497.84',
1464 '71.0.3543.4',
1465 '70.0.3538.8',
1466 '71.0.3543.3',
1467 '71.0.3543.2',
1468 '71.0.3543.1',
1469 '71.0.3543.0',
1470 '70.0.3538.7',
1471 '69.0.3497.83',
1472 '71.0.3542.2',
1473 '71.0.3542.1',
1474 '71.0.3542.0',
1475 '70.0.3538.6',
1476 '69.0.3497.82',
1477 '69.0.3497.81',
1478 '71.0.3541.1',
1479 '71.0.3541.0',
1480 '70.0.3538.5',
1481 '69.0.3497.80',
1482 '71.0.3540.1',
1483 '71.0.3540.0',
1484 '70.0.3538.4',
1485 '69.0.3497.79',
1486 '70.0.3538.3',
1487 '71.0.3539.1',
1488 '71.0.3539.0',
1489 '69.0.3497.78',
1490 '68.0.3440.134',
1491 '69.0.3497.77',
1492 '70.0.3538.2',
1493 '70.0.3538.1',
1494 '70.0.3538.0',
1495 '69.0.3497.76',
1496 '68.0.3440.133',
1497 '69.0.3497.75',
1498 '70.0.3537.2',
1499 '70.0.3537.1',
1500 '70.0.3537.0',
1501 '69.0.3497.74',
1502 '68.0.3440.132',
1503 '70.0.3536.0',
1504 '70.0.3535.5',
1505 '70.0.3535.4',
1506 '70.0.3535.3',
1507 '69.0.3497.73',
1508 '68.0.3440.131',
1509 '70.0.3532.8',
1510 '70.0.3532.7',
1511 '69.0.3497.72',
1512 '69.0.3497.71',
1513 '70.0.3535.2',
1514 '70.0.3535.1',
1515 '70.0.3535.0',
1516 '69.0.3497.70',
1517 '68.0.3440.130',
1518 '69.0.3497.69',
1519 '68.0.3440.129',
1520 '70.0.3534.4',
1521 '70.0.3534.3',
1522 '70.0.3534.2',
1523 '70.0.3534.1',
1524 '70.0.3534.0',
1525 '69.0.3497.68',
1526 '68.0.3440.128',
1527 '70.0.3533.2',
1528 '70.0.3533.1',
1529 '70.0.3533.0',
1530 '69.0.3497.67',
1531 '68.0.3440.127',
1532 '70.0.3532.6',
1533 '70.0.3532.5',
1534 '70.0.3532.4',
1535 '69.0.3497.66',
1536 '68.0.3440.126',
1537 '70.0.3532.3',
1538 '70.0.3532.2',
1539 '70.0.3532.1',
1540 '69.0.3497.60',
1541 '69.0.3497.65',
1542 '69.0.3497.64',
1543 '70.0.3532.0',
1544 '70.0.3531.0',
1545 '70.0.3530.4',
1546 '70.0.3530.3',
1547 '70.0.3530.2',
1548 '69.0.3497.58',
1549 '68.0.3440.125',
1550 '69.0.3497.57',
1551 '69.0.3497.56',
1552 '69.0.3497.55',
1553 '69.0.3497.54',
1554 '70.0.3530.1',
1555 '70.0.3530.0',
1556 '69.0.3497.53',
1557 '68.0.3440.124',
1558 '69.0.3497.52',
1559 '70.0.3529.3',
1560 '70.0.3529.2',
1561 '70.0.3529.1',
1562 '70.0.3529.0',
1563 '69.0.3497.51',
1564 '70.0.3528.4',
1565 '68.0.3440.123',
1566 '70.0.3528.3',
1567 '70.0.3528.2',
1568 '70.0.3528.1',
1569 '70.0.3528.0',
1570 '69.0.3497.50',
1571 '68.0.3440.122',
1572 '70.0.3527.1',
1573 '70.0.3527.0',
1574 '69.0.3497.49',
1575 '68.0.3440.121',
1576 '70.0.3526.1',
1577 '70.0.3526.0',
1578 '68.0.3440.120',
1579 '69.0.3497.48',
1580 '69.0.3497.47',
1581 '68.0.3440.119',
1582 '68.0.3440.118',
1583 '70.0.3525.5',
1584 '70.0.3525.4',
1585 '70.0.3525.3',
1586 '68.0.3440.117',
1587 '69.0.3497.46',
1588 '70.0.3525.2',
1589 '70.0.3525.1',
1590 '70.0.3525.0',
1591 '69.0.3497.45',
1592 '68.0.3440.116',
1593 '70.0.3524.4',
1594 '70.0.3524.3',
1595 '69.0.3497.44',
1596 '70.0.3524.2',
1597 '70.0.3524.1',
1598 '70.0.3524.0',
1599 '70.0.3523.2',
1600 '69.0.3497.43',
1601 '68.0.3440.115',
1602 '70.0.3505.9',
1603 '69.0.3497.42',
1604 '70.0.3505.8',
1605 '70.0.3523.1',
1606 '70.0.3523.0',
1607 '69.0.3497.41',
1608 '68.0.3440.114',
1609 '70.0.3505.7',
1610 '69.0.3497.40',
1611 '70.0.3522.1',
1612 '70.0.3522.0',
1613 '70.0.3521.2',
1614 '69.0.3497.39',
1615 '68.0.3440.113',
1616 '70.0.3505.6',
1617 '70.0.3521.1',
1618 '70.0.3521.0',
1619 '69.0.3497.38',
1620 '68.0.3440.112',
1621 '70.0.3520.1',
1622 '70.0.3520.0',
1623 '69.0.3497.37',
1624 '68.0.3440.111',
1625 '70.0.3519.3',
1626 '70.0.3519.2',
1627 '70.0.3519.1',
1628 '70.0.3519.0',
1629 '69.0.3497.36',
1630 '68.0.3440.110',
1631 '70.0.3518.1',
1632 '70.0.3518.0',
1633 '69.0.3497.35',
1634 '69.0.3497.34',
1635 '68.0.3440.109',
1636 '70.0.3517.1',
1637 '70.0.3517.0',
1638 '69.0.3497.33',
1639 '68.0.3440.108',
1640 '69.0.3497.32',
1641 '70.0.3516.3',
1642 '70.0.3516.2',
1643 '70.0.3516.1',
1644 '70.0.3516.0',
1645 '69.0.3497.31',
1646 '68.0.3440.107',
1647 '70.0.3515.4',
1648 '68.0.3440.106',
1649 '70.0.3515.3',
1650 '70.0.3515.2',
1651 '70.0.3515.1',
1652 '70.0.3515.0',
1653 '69.0.3497.30',
1654 '68.0.3440.105',
1655 '68.0.3440.104',
1656 '70.0.3514.2',
1657 '70.0.3514.1',
1658 '70.0.3514.0',
1659 '69.0.3497.29',
1660 '68.0.3440.103',
1661 '70.0.3513.1',
1662 '70.0.3513.0',
1663 '69.0.3497.28',
1664 )
1665 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1666
1667
3e669f36 1668std_headers = {
f7a147e3 1669 'User-Agent': random_user_agent(),
59ae15a5
PH
1670 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1671 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1672 'Accept-Encoding': 'gzip, deflate',
1673 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 1674}
f427df17 1675
5f6a1245 1676
fb37eb25
S
1677USER_AGENTS = {
1678 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1679}
1680
1681
bf42a990
S
1682NO_DEFAULT = object()
1683
7105440c
YCH
1684ENGLISH_MONTH_NAMES = [
1685 'January', 'February', 'March', 'April', 'May', 'June',
1686 'July', 'August', 'September', 'October', 'November', 'December']
1687
f6717dec
S
1688MONTH_NAMES = {
1689 'en': ENGLISH_MONTH_NAMES,
1690 'fr': [
3e4185c3
S
1691 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1692 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
f6717dec 1693}
a942d6cb 1694
a7aaa398
S
1695KNOWN_EXTENSIONS = (
1696 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1697 'flv', 'f4v', 'f4a', 'f4b',
1698 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1699 'mkv', 'mka', 'mk3d',
1700 'avi', 'divx',
1701 'mov',
1702 'asf', 'wmv', 'wma',
1703 '3gp', '3g2',
1704 'mp3',
1705 'flac',
1706 'ape',
1707 'wav',
1708 'f4f', 'f4m', 'm3u8', 'smil')
1709
c587cbb7 1710# needed for sanitizing filenames in restricted mode
c8827027 1711ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
fd35d8cd
JW
1712 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1713 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
c587cbb7 1714
46f59e89
S
1715DATE_FORMATS = (
1716 '%d %B %Y',
1717 '%d %b %Y',
1718 '%B %d %Y',
cb655f34
S
1719 '%B %dst %Y',
1720 '%B %dnd %Y',
1721 '%B %dth %Y',
46f59e89 1722 '%b %d %Y',
cb655f34
S
1723 '%b %dst %Y',
1724 '%b %dnd %Y',
1725 '%b %dth %Y',
46f59e89
S
1726 '%b %dst %Y %I:%M',
1727 '%b %dnd %Y %I:%M',
1728 '%b %dth %Y %I:%M',
1729 '%Y %m %d',
1730 '%Y-%m-%d',
1731 '%Y/%m/%d',
81c13222 1732 '%Y/%m/%d %H:%M',
46f59e89 1733 '%Y/%m/%d %H:%M:%S',
0c1c6f4b 1734 '%Y-%m-%d %H:%M',
46f59e89
S
1735 '%Y-%m-%d %H:%M:%S',
1736 '%Y-%m-%d %H:%M:%S.%f',
1737 '%d.%m.%Y %H:%M',
1738 '%d.%m.%Y %H.%M',
1739 '%Y-%m-%dT%H:%M:%SZ',
1740 '%Y-%m-%dT%H:%M:%S.%fZ',
1741 '%Y-%m-%dT%H:%M:%S.%f0Z',
1742 '%Y-%m-%dT%H:%M:%S',
1743 '%Y-%m-%dT%H:%M:%S.%f',
1744 '%Y-%m-%dT%H:%M',
c6eed6b8
S
1745 '%b %d %Y at %H:%M',
1746 '%b %d %Y at %H:%M:%S',
b555ae9b
S
1747 '%B %d %Y at %H:%M',
1748 '%B %d %Y at %H:%M:%S',
46f59e89
S
1749)
1750
1751DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1752DATE_FORMATS_DAY_FIRST.extend([
1753 '%d-%m-%Y',
1754 '%d.%m.%Y',
1755 '%d.%m.%y',
1756 '%d/%m/%Y',
1757 '%d/%m/%y',
1758 '%d/%m/%Y %H:%M:%S',
1759])
1760
1761DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1762DATE_FORMATS_MONTH_FIRST.extend([
1763 '%m-%d-%Y',
1764 '%m.%d.%Y',
1765 '%m/%d/%Y',
1766 '%m/%d/%y',
1767 '%m/%d/%Y %H:%M:%S',
1768])
1769
06b3fe29 1770PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
22f5f5c6 1771JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
06b3fe29 1772
7105440c 1773
d77c3dfd 1774def preferredencoding():
59ae15a5 1775 """Get preferred encoding.
d77c3dfd 1776
59ae15a5
PH
1777 Returns the best encoding scheme for the system, based on
1778 locale.getpreferredencoding() and some further tweaks.
1779 """
1780 try:
1781 pref = locale.getpreferredencoding()
28e614de 1782 'TEST'.encode(pref)
70a1165b 1783 except Exception:
59ae15a5 1784 pref = 'UTF-8'
bae611f2 1785
59ae15a5 1786 return pref
d77c3dfd 1787
f4bfd65f 1788
181c8655 1789def write_json_file(obj, fn):
1394646a 1790 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 1791
92120217 1792 fn = encodeFilename(fn)
61ee5aeb 1793 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
1794 encoding = get_filesystem_encoding()
1795 # os.path.basename returns a bytes object, but NamedTemporaryFile
1796 # will fail if the filename contains non ascii characters unless we
1797 # use a unicode object
1798 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1799 # the same for os.path.dirname
1800 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1801 else:
1802 path_basename = os.path.basename
1803 path_dirname = os.path.dirname
1804
73159f99
S
1805 args = {
1806 'suffix': '.tmp',
ec5f6016
JMF
1807 'prefix': path_basename(fn) + '.',
1808 'dir': path_dirname(fn),
73159f99
S
1809 'delete': False,
1810 }
1811
181c8655
PH
1812 # In Python 2.x, json.dump expects a bytestream.
1813 # In Python 3.x, it writes to a character stream
1814 if sys.version_info < (3, 0):
73159f99 1815 args['mode'] = 'wb'
181c8655 1816 else:
73159f99
S
1817 args.update({
1818 'mode': 'w',
1819 'encoding': 'utf-8',
1820 })
1821
c86b6142 1822 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
1823
1824 try:
1825 with tf:
1826 json.dump(obj, tf)
1394646a
IK
1827 if sys.platform == 'win32':
1828 # Need to remove existing file on Windows, else os.rename raises
1829 # WindowsError or FileExistsError.
1830 try:
1831 os.unlink(fn)
1832 except OSError:
1833 pass
181c8655 1834 os.rename(tf.name, fn)
70a1165b 1835 except Exception:
181c8655
PH
1836 try:
1837 os.remove(tf.name)
1838 except OSError:
1839 pass
1840 raise
1841
1842
1843if sys.version_info >= (2, 7):
ee114368 1844 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 1845 """ Find the xpath xpath[@key=val] """
5d2354f1 1846 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368 1847 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
1848 return node.find(expr)
1849else:
ee114368 1850 def find_xpath_attr(node, xpath, key, val=None):
810c10ba 1851 for f in node.findall(compat_xpath(xpath)):
ee114368
S
1852 if key not in f.attrib:
1853 continue
1854 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
1855 return f
1856 return None
1857
d7e66d39
JMF
1858# On python2.6 the xml.etree.ElementTree.Element methods don't support
1859# the namespace parameter
5f6a1245
JW
1860
1861
d7e66d39
JMF
1862def xpath_with_ns(path, ns_map):
1863 components = [c.split(':') for c in path.split('/')]
1864 replaced = []
1865 for c in components:
1866 if len(c) == 1:
1867 replaced.append(c[0])
1868 else:
1869 ns, tag = c
1870 replaced.append('{%s}%s' % (ns_map[ns], tag))
1871 return '/'.join(replaced)
1872
d77c3dfd 1873
a41fb80c 1874def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745 1875 def _find_xpath(xpath):
810c10ba 1876 return node.find(compat_xpath(xpath))
578c0745
S
1877
1878 if isinstance(xpath, (str, compat_str)):
1879 n = _find_xpath(xpath)
1880 else:
1881 for xp in xpath:
1882 n = _find_xpath(xp)
1883 if n is not None:
1884 break
d74bebd5 1885
8e636da4 1886 if n is None:
bf42a990
S
1887 if default is not NO_DEFAULT:
1888 return default
1889 elif fatal:
bf0ff932
PH
1890 name = xpath if name is None else name
1891 raise ExtractorError('Could not find XML element %s' % name)
1892 else:
1893 return None
a41fb80c
S
1894 return n
1895
1896
1897def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
1898 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1899 if n is None or n == default:
1900 return n
1901 if n.text is None:
1902 if default is not NO_DEFAULT:
1903 return default
1904 elif fatal:
1905 name = xpath if name is None else name
1906 raise ExtractorError('Could not find XML element\'s text %s' % name)
1907 else:
1908 return None
1909 return n.text
a41fb80c
S
1910
1911
1912def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1913 n = find_xpath_attr(node, xpath, key)
1914 if n is None:
1915 if default is not NO_DEFAULT:
1916 return default
1917 elif fatal:
1918 name = '%s[@%s]' % (xpath, key) if name is None else name
1919 raise ExtractorError('Could not find XML attribute %s' % name)
1920 else:
1921 return None
1922 return n.attrib[key]
bf0ff932
PH
1923
1924
9e6dd238 1925def get_element_by_id(id, html):
43e8fafd 1926 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 1927 return get_element_by_attribute('id', id, html)
43e8fafd 1928
12ea2f30 1929
84c237fb 1930def get_element_by_class(class_name, html):
2af12ad9
TC
1931 """Return the content of the first tag with the specified class in the passed HTML document"""
1932 retval = get_elements_by_class(class_name, html)
1933 return retval[0] if retval else None
1934
1935
1936def get_element_by_attribute(attribute, value, html, escape_value=True):
1937 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1938 return retval[0] if retval else None
1939
1940
1941def get_elements_by_class(class_name, html):
1942 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1943 return get_elements_by_attribute(
84c237fb
YCH
1944 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1945 html, escape_value=False)
1946
1947
2af12ad9 1948def get_elements_by_attribute(attribute, value, html, escape_value=True):
43e8fafd 1949 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 1950
84c237fb
YCH
1951 value = re.escape(value) if escape_value else value
1952
2af12ad9
TC
1953 retlist = []
1954 for m in re.finditer(r'''(?xs)
38285056 1955 <([a-zA-Z0-9:._-]+)
609ff8ca 1956 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056 1957 \s+%s=['"]?%s['"]?
609ff8ca 1958 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056
PH
1959 \s*>
1960 (?P<content>.*?)
1961 </\1>
2af12ad9
TC
1962 ''' % (re.escape(attribute), value), html):
1963 res = m.group('content')
38285056 1964
2af12ad9
TC
1965 if res.startswith('"') or res.startswith("'"):
1966 res = res[1:-1]
38285056 1967
2af12ad9 1968 retlist.append(unescapeHTML(res))
a921f407 1969
2af12ad9 1970 return retlist
a921f407 1971
c5229f39 1972
8bb56eee
BF
1973class HTMLAttributeParser(compat_HTMLParser):
1974 """Trivial HTML parser to gather the attributes for a single element"""
1975 def __init__(self):
c5229f39 1976 self.attrs = {}
8bb56eee
BF
1977 compat_HTMLParser.__init__(self)
1978
1979 def handle_starttag(self, tag, attrs):
1980 self.attrs = dict(attrs)
1981
c5229f39 1982
8bb56eee
BF
1983def extract_attributes(html_element):
1984 """Given a string for an HTML element such as
1985 <el
1986 a="foo" B="bar" c="&98;az" d=boz
1987 empty= noval entity="&amp;"
1988 sq='"' dq="'"
1989 >
1990 Decode and return a dictionary of attributes.
1991 {
1992 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
1993 'empty': '', 'noval': None, 'entity': '&',
1994 'sq': '"', 'dq': '\''
1995 }.
1996 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
1997 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
1998 """
1999 parser = HTMLAttributeParser()
b4a3d461
S
2000 try:
2001 parser.feed(html_element)
2002 parser.close()
2003 # Older Python may throw HTMLParseError in case of malformed HTML
2004 except compat_HTMLParseError:
2005 pass
8bb56eee 2006 return parser.attrs
9e6dd238 2007
c5229f39 2008
9e6dd238 2009def clean_html(html):
59ae15a5 2010 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
2011
2012 if html is None: # Convenience for sanitizing descriptions etc.
2013 return html
2014
59ae15a5
PH
2015 # Newline vs <br />
2016 html = html.replace('\n', ' ')
edd9221c
TF
2017 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2018 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
2019 # Strip html tags
2020 html = re.sub('<.*?>', '', html)
2021 # Replace html entities
2022 html = unescapeHTML(html)
7decf895 2023 return html.strip()
9e6dd238
FV
2024
2025
d77c3dfd 2026def sanitize_open(filename, open_mode):
59ae15a5
PH
2027 """Try to open the given filename, and slightly tweak it if this fails.
2028
2029 Attempts to open the given filename. If this fails, it tries to change
2030 the filename slightly, step by step, until it's either able to open it
2031 or it fails and raises a final exception, like the standard open()
2032 function.
2033
2034 It returns the tuple (stream, definitive_file_name).
2035 """
2036 try:
28e614de 2037 if filename == '-':
59ae15a5
PH
2038 if sys.platform == 'win32':
2039 import msvcrt
2040 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 2041 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
2042 stream = open(encodeFilename(filename), open_mode)
2043 return (stream, filename)
2044 except (IOError, OSError) as err:
f45c185f
PH
2045 if err.errno in (errno.EACCES,):
2046 raise
59ae15a5 2047
f45c185f 2048 # In case of error, try to remove win32 forbidden chars
d55de57b 2049 alt_filename = sanitize_path(filename)
f45c185f
PH
2050 if alt_filename == filename:
2051 raise
2052 else:
2053 # An exception here should be caught in the caller
d55de57b 2054 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 2055 return (stream, alt_filename)
d77c3dfd
FV
2056
2057
2058def timeconvert(timestr):
59ae15a5
PH
2059 """Convert RFC 2822 defined time string into system timestamp"""
2060 timestamp = None
2061 timetuple = email.utils.parsedate_tz(timestr)
2062 if timetuple is not None:
2063 timestamp = email.utils.mktime_tz(timetuple)
2064 return timestamp
1c469a94 2065
5f6a1245 2066
796173d0 2067def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
2068 """Sanitizes a string so it could be used as part of a filename.
2069 If restricted is set, use a stricter subset of allowed characters.
158af524
S
2070 Set is_id if this is not an arbitrary string, but an ID that should be kept
2071 if possible.
59ae15a5
PH
2072 """
2073 def replace_insane(char):
c587cbb7
AT
2074 if restricted and char in ACCENT_CHARS:
2075 return ACCENT_CHARS[char]
59ae15a5
PH
2076 if char == '?' or ord(char) < 32 or ord(char) == 127:
2077 return ''
2078 elif char == '"':
2079 return '' if restricted else '\''
2080 elif char == ':':
2081 return '_-' if restricted else ' -'
2082 elif char in '\\/|*<>':
2083 return '_'
627dcfff 2084 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
2085 return '_'
2086 if restricted and ord(char) > 127:
2087 return '_'
2088 return char
2089
2aeb06d6
PH
2090 # Handle timestamps
2091 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 2092 result = ''.join(map(replace_insane, s))
796173d0
PH
2093 if not is_id:
2094 while '__' in result:
2095 result = result.replace('__', '_')
2096 result = result.strip('_')
2097 # Common case of "Foreign band name - English song title"
2098 if restricted and result.startswith('-_'):
2099 result = result[2:]
5a42414b
PH
2100 if result.startswith('-'):
2101 result = '_' + result[len('-'):]
a7440261 2102 result = result.lstrip('.')
796173d0
PH
2103 if not result:
2104 result = '_'
59ae15a5 2105 return result
d77c3dfd 2106
5f6a1245 2107
a2aaf4db
S
2108def sanitize_path(s):
2109 """Sanitizes and normalizes path on Windows"""
2110 if sys.platform != 'win32':
2111 return s
be531ef1
S
2112 drive_or_unc, _ = os.path.splitdrive(s)
2113 if sys.version_info < (2, 7) and not drive_or_unc:
2114 drive_or_unc, _ = os.path.splitunc(s)
2115 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2116 if drive_or_unc:
a2aaf4db
S
2117 norm_path.pop(0)
2118 sanitized_path = [
ec85ded8 2119 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
a2aaf4db 2120 for path_part in norm_path]
be531ef1
S
2121 if drive_or_unc:
2122 sanitized_path.insert(0, drive_or_unc + os.path.sep)
a2aaf4db
S
2123 return os.path.join(*sanitized_path)
2124
2125
17bcc626 2126def sanitize_url(url):
befa4708
S
2127 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2128 # the number of unwanted failures due to missing protocol
2129 if url.startswith('//'):
2130 return 'http:%s' % url
2131 # Fix some common typos seen so far
2132 COMMON_TYPOS = (
067aa17e 2133 # https://github.com/ytdl-org/youtube-dl/issues/15649
befa4708
S
2134 (r'^httpss://', r'https://'),
2135 # https://bx1.be/lives/direct-tv/
2136 (r'^rmtp([es]?)://', r'rtmp\1://'),
2137 )
2138 for mistake, fixup in COMMON_TYPOS:
2139 if re.match(mistake, url):
2140 return re.sub(mistake, fixup, url)
2141 return url
17bcc626
S
2142
2143
67dda517 2144def sanitized_Request(url, *args, **kwargs):
17bcc626 2145 return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
67dda517
S
2146
2147
51098426
S
2148def expand_path(s):
2149 """Expand shell variables and ~"""
2150 return os.path.expandvars(compat_expanduser(s))
2151
2152
d77c3dfd 2153def orderedSet(iterable):
59ae15a5
PH
2154 """ Remove all duplicates from the input iterable """
2155 res = []
2156 for el in iterable:
2157 if el not in res:
2158 res.append(el)
2159 return res
d77c3dfd 2160
912b38b4 2161
55b2f099 2162def _htmlentity_transform(entity_with_semicolon):
4e408e47 2163 """Transforms an HTML entity to a character."""
55b2f099
YCH
2164 entity = entity_with_semicolon[:-1]
2165
4e408e47
PH
2166 # Known non-numeric HTML entity
2167 if entity in compat_html_entities.name2codepoint:
2168 return compat_chr(compat_html_entities.name2codepoint[entity])
2169
55b2f099
YCH
2170 # TODO: HTML5 allows entities without a semicolon. For example,
2171 # '&Eacuteric' should be decoded as 'Éric'.
2172 if entity_with_semicolon in compat_html_entities_html5:
2173 return compat_html_entities_html5[entity_with_semicolon]
2174
91757b0f 2175 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
2176 if mobj is not None:
2177 numstr = mobj.group(1)
28e614de 2178 if numstr.startswith('x'):
4e408e47 2179 base = 16
28e614de 2180 numstr = '0%s' % numstr
4e408e47
PH
2181 else:
2182 base = 10
067aa17e 2183 # See https://github.com/ytdl-org/youtube-dl/issues/7518
7aefc49c
S
2184 try:
2185 return compat_chr(int(numstr, base))
2186 except ValueError:
2187 pass
4e408e47
PH
2188
2189 # Unknown entity in name, return its literal representation
7a3f0c00 2190 return '&%s;' % entity
4e408e47
PH
2191
2192
d77c3dfd 2193def unescapeHTML(s):
912b38b4
PH
2194 if s is None:
2195 return None
2196 assert type(s) == compat_str
d77c3dfd 2197
4e408e47 2198 return re.sub(
95f3f7c2 2199 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 2200
8bf48f23 2201
aa49acd1
S
2202def get_subprocess_encoding():
2203 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2204 # For subprocess calls, encode with locale encoding
2205 # Refer to http://stackoverflow.com/a/9951851/35070
2206 encoding = preferredencoding()
2207 else:
2208 encoding = sys.getfilesystemencoding()
2209 if encoding is None:
2210 encoding = 'utf-8'
2211 return encoding
2212
2213
8bf48f23 2214def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
2215 """
2216 @param s The name of the file
2217 """
d77c3dfd 2218
8bf48f23 2219 assert type(s) == compat_str
d77c3dfd 2220
59ae15a5
PH
2221 # Python 3 has a Unicode API
2222 if sys.version_info >= (3, 0):
2223 return s
0f00efed 2224
aa49acd1
S
2225 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2226 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2227 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2228 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2229 return s
2230
8ee239e9
YCH
2231 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2232 if sys.platform.startswith('java'):
2233 return s
2234
aa49acd1
S
2235 return s.encode(get_subprocess_encoding(), 'ignore')
2236
2237
2238def decodeFilename(b, for_subprocess=False):
2239
2240 if sys.version_info >= (3, 0):
2241 return b
2242
2243 if not isinstance(b, bytes):
2244 return b
2245
2246 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 2247
f07b74fc
PH
2248
2249def encodeArgument(s):
2250 if not isinstance(s, compat_str):
2251 # Legacy code that uses byte strings
2252 # Uncomment the following line after fixing all post processors
7af808a5 2253 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
2254 s = s.decode('ascii')
2255 return encodeFilename(s, True)
2256
2257
aa49acd1
S
2258def decodeArgument(b):
2259 return decodeFilename(b, True)
2260
2261
8271226a
PH
2262def decodeOption(optval):
2263 if optval is None:
2264 return optval
2265 if isinstance(optval, bytes):
2266 optval = optval.decode(preferredencoding())
2267
2268 assert isinstance(optval, compat_str)
2269 return optval
1c256f70 2270
5f6a1245 2271
4539dd30
PH
2272def formatSeconds(secs):
2273 if secs > 3600:
2274 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
2275 elif secs > 60:
2276 return '%d:%02d' % (secs // 60, secs % 60)
2277 else:
2278 return '%d' % secs
2279
a0ddb8a2 2280
be4a824d
PH
2281def make_HTTPS_handler(params, **kwargs):
2282 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 2283 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 2284 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 2285 if opts_no_check_certificate:
be5f2c19 2286 context.check_hostname = False
0db261ba 2287 context.verify_mode = ssl.CERT_NONE
a2366922 2288 try:
be4a824d 2289 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
2290 except TypeError:
2291 # Python 2.7.8
2292 # (create_default_context present but HTTPSHandler has no context=)
2293 pass
2294
2295 if sys.version_info < (3, 2):
d7932313 2296 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 2297 else: # Python < 3.4
d7932313 2298 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 2299 context.verify_mode = (ssl.CERT_NONE
dca08720 2300 if opts_no_check_certificate
ea6d901e 2301 else ssl.CERT_REQUIRED)
303b479e 2302 context.set_default_verify_paths()
be4a824d 2303 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 2304
732ea2f0 2305
08f2a92c
JMF
2306def bug_reports_message():
2307 if ytdl_is_updateable():
2308 update_cmd = 'type youtube-dl -U to update'
2309 else:
2310 update_cmd = 'see https://yt-dl.org/update on how to update'
2311 msg = '; please report this issue on https://yt-dl.org/bug .'
2312 msg += ' Make sure you are using the latest version; %s.' % update_cmd
2313 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
2314 return msg
2315
2316
bf5b9d85
PM
2317class YoutubeDLError(Exception):
2318 """Base exception for YoutubeDL errors."""
2319 pass
2320
2321
2322class ExtractorError(YoutubeDLError):
1c256f70 2323 """Error during info extraction."""
5f6a1245 2324
d11271dd 2325 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238
PH
2326 """ tb, if given, is the original traceback (so that it can be printed out).
2327 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
2328 """
2329
2330 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2331 expected = True
d11271dd
PH
2332 if video_id is not None:
2333 msg = video_id + ': ' + msg
410f3e73 2334 if cause:
28e614de 2335 msg += ' (caused by %r)' % cause
9a82b238 2336 if not expected:
08f2a92c 2337 msg += bug_reports_message()
1c256f70 2338 super(ExtractorError, self).__init__(msg)
d5979c5d 2339
1c256f70 2340 self.traceback = tb
8cc83b8d 2341 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 2342 self.cause = cause
d11271dd 2343 self.video_id = video_id
1c256f70 2344
01951dda
PH
2345 def format_traceback(self):
2346 if self.traceback is None:
2347 return None
28e614de 2348 return ''.join(traceback.format_tb(self.traceback))
01951dda 2349
1c256f70 2350
416c7fcb
PH
2351class UnsupportedError(ExtractorError):
2352 def __init__(self, url):
2353 super(UnsupportedError, self).__init__(
2354 'Unsupported URL: %s' % url, expected=True)
2355 self.url = url
2356
2357
55b3e45b
JMF
2358class RegexNotFoundError(ExtractorError):
2359 """Error when a regex didn't match"""
2360 pass
2361
2362
773f291d
S
2363class GeoRestrictedError(ExtractorError):
2364 """Geographic restriction Error exception.
2365
2366 This exception may be thrown when a video is not available from your
2367 geographic location due to geographic restrictions imposed by a website.
2368 """
2369 def __init__(self, msg, countries=None):
2370 super(GeoRestrictedError, self).__init__(msg, expected=True)
2371 self.msg = msg
2372 self.countries = countries
2373
2374
bf5b9d85 2375class DownloadError(YoutubeDLError):
59ae15a5 2376 """Download Error exception.
d77c3dfd 2377
59ae15a5
PH
2378 This exception may be thrown by FileDownloader objects if they are not
2379 configured to continue on errors. They will contain the appropriate
2380 error message.
2381 """
5f6a1245 2382
8cc83b8d
FV
2383 def __init__(self, msg, exc_info=None):
2384 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2385 super(DownloadError, self).__init__(msg)
2386 self.exc_info = exc_info
d77c3dfd
FV
2387
2388
bf5b9d85 2389class SameFileError(YoutubeDLError):
59ae15a5 2390 """Same File exception.
d77c3dfd 2391
59ae15a5
PH
2392 This exception will be thrown by FileDownloader objects if they detect
2393 multiple files would have to be downloaded to the same file on disk.
2394 """
2395 pass
d77c3dfd
FV
2396
2397
bf5b9d85 2398class PostProcessingError(YoutubeDLError):
59ae15a5 2399 """Post Processing exception.
d77c3dfd 2400
59ae15a5
PH
2401 This exception may be raised by PostProcessor's .run() method to
2402 indicate an error in the postprocessing task.
2403 """
5f6a1245 2404
7851b379 2405 def __init__(self, msg):
bf5b9d85 2406 super(PostProcessingError, self).__init__(msg)
7851b379 2407 self.msg = msg
d77c3dfd 2408
5f6a1245 2409
bf5b9d85 2410class MaxDownloadsReached(YoutubeDLError):
59ae15a5
PH
2411 """ --max-downloads limit has been reached. """
2412 pass
d77c3dfd
FV
2413
2414
bf5b9d85 2415class UnavailableVideoError(YoutubeDLError):
59ae15a5 2416 """Unavailable Format exception.
d77c3dfd 2417
59ae15a5
PH
2418 This exception will be thrown when a video is requested
2419 in a format that is not available for that video.
2420 """
2421 pass
d77c3dfd
FV
2422
2423
bf5b9d85 2424class ContentTooShortError(YoutubeDLError):
59ae15a5 2425 """Content Too Short exception.
d77c3dfd 2426
59ae15a5
PH
2427 This exception may be raised by FileDownloader objects when a file they
2428 download is too small for what the server announced first, indicating
2429 the connection was probably interrupted.
2430 """
d77c3dfd 2431
59ae15a5 2432 def __init__(self, downloaded, expected):
bf5b9d85
PM
2433 super(ContentTooShortError, self).__init__(
2434 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2435 )
2c7ed247 2436 # Both in bytes
59ae15a5
PH
2437 self.downloaded = downloaded
2438 self.expected = expected
d77c3dfd 2439
5f6a1245 2440
bf5b9d85 2441class XAttrMetadataError(YoutubeDLError):
efa97bdc
YCH
2442 def __init__(self, code=None, msg='Unknown error'):
2443 super(XAttrMetadataError, self).__init__(msg)
2444 self.code = code
bd264412 2445 self.msg = msg
efa97bdc
YCH
2446
2447 # Parsing code and msg
3089bc74
S
2448 if (self.code in (errno.ENOSPC, errno.EDQUOT)
2449 or 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
efa97bdc
YCH
2450 self.reason = 'NO_SPACE'
2451 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2452 self.reason = 'VALUE_TOO_LONG'
2453 else:
2454 self.reason = 'NOT_SUPPORTED'
2455
2456
bf5b9d85 2457class XAttrUnavailableError(YoutubeDLError):
efa97bdc
YCH
2458 pass
2459
2460
c5a59d93 2461def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
2462 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2463 # expected HTTP responses to meet HTTP/1.0 or later (see also
067aa17e 2464 # https://github.com/ytdl-org/youtube-dl/issues/6727)
e5e78797 2465 if sys.version_info < (3, 0):
65220c3b
S
2466 kwargs['strict'] = True
2467 hc = http_class(*args, **compat_kwargs(kwargs))
be4a824d 2468 source_address = ydl_handler._params.get('source_address')
8959018a 2469
be4a824d 2470 if source_address is not None:
8959018a
AU
2471 # This is to workaround _create_connection() from socket where it will try all
2472 # address data from getaddrinfo() including IPv6. This filters the result from
2473 # getaddrinfo() based on the source_address value.
2474 # This is based on the cpython socket.create_connection() function.
2475 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2476 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2477 host, port = address
2478 err = None
2479 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
9e21e6d9
S
2480 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2481 ip_addrs = [addr for addr in addrs if addr[0] == af]
2482 if addrs and not ip_addrs:
2483 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2484 raise socket.error(
2485 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2486 % (ip_version, source_address[0]))
8959018a
AU
2487 for res in ip_addrs:
2488 af, socktype, proto, canonname, sa = res
2489 sock = None
2490 try:
2491 sock = socket.socket(af, socktype, proto)
2492 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2493 sock.settimeout(timeout)
2494 sock.bind(source_address)
2495 sock.connect(sa)
2496 err = None # Explicitly break reference cycle
2497 return sock
2498 except socket.error as _:
2499 err = _
2500 if sock is not None:
2501 sock.close()
2502 if err is not None:
2503 raise err
2504 else:
9e21e6d9
S
2505 raise socket.error('getaddrinfo returns an empty list')
2506 if hasattr(hc, '_create_connection'):
2507 hc._create_connection = _create_connection
be4a824d
PH
2508 sa = (source_address, 0)
2509 if hasattr(hc, 'source_address'): # Python 2.7+
2510 hc.source_address = sa
2511 else: # Python 2.6
2512 def _hc_connect(self, *args, **kwargs):
9e21e6d9 2513 sock = _create_connection(
be4a824d
PH
2514 (self.host, self.port), self.timeout, sa)
2515 if is_https:
d7932313
PH
2516 self.sock = ssl.wrap_socket(
2517 sock, self.key_file, self.cert_file,
2518 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
2519 else:
2520 self.sock = sock
2521 hc.connect = functools.partial(_hc_connect, hc)
2522
2523 return hc
2524
2525
87f0e62d 2526def handle_youtubedl_headers(headers):
992fc9d6
YCH
2527 filtered_headers = headers
2528
2529 if 'Youtubedl-no-compression' in filtered_headers:
2530 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 2531 del filtered_headers['Youtubedl-no-compression']
87f0e62d 2532
992fc9d6 2533 return filtered_headers
87f0e62d
YCH
2534
2535
acebc9cd 2536class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
2537 """Handler for HTTP requests and responses.
2538
2539 This class, when installed with an OpenerDirector, automatically adds
2540 the standard headers to every HTTP request and handles gzipped and
2541 deflated responses from web servers. If compression is to be avoided in
2542 a particular request, the original request in the program code only has
0424ec30 2543 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
2544 removed before making the real request.
2545
2546 Part of this code was copied from:
2547
2548 http://techknack.net/python-urllib2-handlers/
2549
2550 Andrew Rowls, the author of that code, agreed to release it to the
2551 public domain.
2552 """
2553
be4a824d
PH
2554 def __init__(self, params, *args, **kwargs):
2555 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2556 self._params = params
2557
2558 def http_open(self, req):
71aff188
YCH
2559 conn_class = compat_http_client.HTTPConnection
2560
2561 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2562 if socks_proxy:
2563 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2564 del req.headers['Ytdl-socks-proxy']
2565
be4a824d 2566 return self.do_open(functools.partial(
71aff188 2567 _create_http_connection, self, conn_class, False),
be4a824d
PH
2568 req)
2569
59ae15a5
PH
2570 @staticmethod
2571 def deflate(data):
2572 try:
2573 return zlib.decompress(data, -zlib.MAX_WBITS)
2574 except zlib.error:
2575 return zlib.decompress(data)
2576
acebc9cd 2577 def http_request(self, req):
51f267d9
S
2578 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2579 # always respected by websites, some tend to give out URLs with non percent-encoded
2580 # non-ASCII characters (see telemb.py, ard.py [#3412])
2581 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2582 # To work around aforementioned issue we will replace request's original URL with
2583 # percent-encoded one
2584 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2585 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2586 url = req.get_full_url()
2587 url_escaped = escape_url(url)
2588
2589 # Substitute URL if any change after escaping
2590 if url != url_escaped:
15d260eb 2591 req = update_Request(req, url=url_escaped)
51f267d9 2592
33ac271b 2593 for h, v in std_headers.items():
3d5f7a39
JK
2594 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2595 # The dict keys are capitalized because of this bug by urllib
2596 if h.capitalize() not in req.headers:
33ac271b 2597 req.add_header(h, v)
87f0e62d
YCH
2598
2599 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
2600
2601 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2602 # Python 2.6 is brain-dead when it comes to fragments
2603 req._Request__original = req._Request__original.partition('#')[0]
2604 req._Request__r_type = req._Request__r_type.partition('#')[0]
2605
59ae15a5
PH
2606 return req
2607
acebc9cd 2608 def http_response(self, req, resp):
59ae15a5
PH
2609 old_resp = resp
2610 # gzip
2611 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
2612 content = resp.read()
2613 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2614 try:
2615 uncompressed = io.BytesIO(gz.read())
2616 except IOError as original_ioerror:
2617 # There may be junk add the end of the file
2618 # See http://stackoverflow.com/q/4928560/35070 for details
2619 for i in range(1, 1024):
2620 try:
2621 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2622 uncompressed = io.BytesIO(gz.read())
2623 except IOError:
2624 continue
2625 break
2626 else:
2627 raise original_ioerror
b407d853 2628 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2629 resp.msg = old_resp.msg
c047270c 2630 del resp.headers['Content-encoding']
59ae15a5
PH
2631 # deflate
2632 if resp.headers.get('Content-encoding', '') == 'deflate':
2633 gz = io.BytesIO(self.deflate(resp.read()))
b407d853 2634 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2635 resp.msg = old_resp.msg
c047270c 2636 del resp.headers['Content-encoding']
ad729172 2637 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
067aa17e 2638 # https://github.com/ytdl-org/youtube-dl/issues/6457).
5a4d9ddb
S
2639 if 300 <= resp.code < 400:
2640 location = resp.headers.get('Location')
2641 if location:
2642 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2643 if sys.version_info >= (3, 0):
2644 location = location.encode('iso-8859-1').decode('utf-8')
0ea59007
YCH
2645 else:
2646 location = location.decode('utf-8')
5a4d9ddb
S
2647 location_escaped = escape_url(location)
2648 if location != location_escaped:
2649 del resp.headers['Location']
9a4aec8b
YCH
2650 if sys.version_info < (3, 0):
2651 location_escaped = location_escaped.encode('utf-8')
5a4d9ddb 2652 resp.headers['Location'] = location_escaped
59ae15a5 2653 return resp
0f8d03f8 2654
acebc9cd
PH
2655 https_request = http_request
2656 https_response = http_response
bf50b038 2657
5de90176 2658
71aff188
YCH
2659def make_socks_conn_class(base_class, socks_proxy):
2660 assert issubclass(base_class, (
2661 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2662
2663 url_components = compat_urlparse.urlparse(socks_proxy)
2664 if url_components.scheme.lower() == 'socks5':
2665 socks_type = ProxyType.SOCKS5
2666 elif url_components.scheme.lower() in ('socks', 'socks4'):
2667 socks_type = ProxyType.SOCKS4
51fb4995
YCH
2668 elif url_components.scheme.lower() == 'socks4a':
2669 socks_type = ProxyType.SOCKS4A
71aff188 2670
cdd94c2e
YCH
2671 def unquote_if_non_empty(s):
2672 if not s:
2673 return s
2674 return compat_urllib_parse_unquote_plus(s)
2675
71aff188
YCH
2676 proxy_args = (
2677 socks_type,
2678 url_components.hostname, url_components.port or 1080,
2679 True, # Remote DNS
cdd94c2e
YCH
2680 unquote_if_non_empty(url_components.username),
2681 unquote_if_non_empty(url_components.password),
71aff188
YCH
2682 )
2683
2684 class SocksConnection(base_class):
2685 def connect(self):
2686 self.sock = sockssocket()
2687 self.sock.setproxy(*proxy_args)
2688 if type(self.timeout) in (int, float):
2689 self.sock.settimeout(self.timeout)
2690 self.sock.connect((self.host, self.port))
2691
2692 if isinstance(self, compat_http_client.HTTPSConnection):
2693 if hasattr(self, '_context'): # Python > 2.6
2694 self.sock = self._context.wrap_socket(
2695 self.sock, server_hostname=self.host)
2696 else:
2697 self.sock = ssl.wrap_socket(self.sock)
2698
2699 return SocksConnection
2700
2701
be4a824d
PH
2702class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2703 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2704 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2705 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2706 self._params = params
2707
2708 def https_open(self, req):
4f264c02 2709 kwargs = {}
71aff188
YCH
2710 conn_class = self._https_conn_class
2711
4f264c02
JMF
2712 if hasattr(self, '_context'): # python > 2.6
2713 kwargs['context'] = self._context
2714 if hasattr(self, '_check_hostname'): # python 3.x
2715 kwargs['check_hostname'] = self._check_hostname
71aff188
YCH
2716
2717 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2718 if socks_proxy:
2719 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2720 del req.headers['Ytdl-socks-proxy']
2721
be4a824d 2722 return self.do_open(functools.partial(
71aff188 2723 _create_http_connection, self, conn_class, True),
4f264c02 2724 req, **kwargs)
be4a824d
PH
2725
2726
1bab3437 2727class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
e7e62441 2728 _HTTPONLY_PREFIX = '#HttpOnly_'
2729
1bab3437
S
2730 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2731 # Store session cookies with `expires` set to 0 instead of an empty
2732 # string
2733 for cookie in self:
2734 if cookie.expires is None:
2735 cookie.expires = 0
2736 compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires)
2737
2738 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
e7e62441 2739 """Load cookies from a file."""
2740 if filename is None:
2741 if self.filename is not None:
2742 filename = self.filename
2743 else:
2744 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2745
2746 cf = io.StringIO()
2747 with open(filename) as f:
2748 for line in f:
2749 if line.startswith(self._HTTPONLY_PREFIX):
2750 line = line[len(self._HTTPONLY_PREFIX):]
2751 cf.write(compat_str(line))
2752 cf.seek(0)
2753 self._really_load(cf, filename, ignore_discard, ignore_expires)
1bab3437
S
2754 # Session cookies are denoted by either `expires` field set to
2755 # an empty string or 0. MozillaCookieJar only recognizes the former
2756 # (see [1]). So we need force the latter to be recognized as session
2757 # cookies on our own.
2758 # Session cookies may be important for cookies-based authentication,
2759 # e.g. usually, when user does not check 'Remember me' check box while
2760 # logging in on a site, some important cookies are stored as session
2761 # cookies so that not recognizing them will result in failed login.
2762 # 1. https://bugs.python.org/issue17164
2763 for cookie in self:
2764 # Treat `expires=0` cookies as session cookies
2765 if cookie.expires == 0:
2766 cookie.expires = None
2767 cookie.discard = True
2768
2769
a6420bf5
S
2770class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2771 def __init__(self, cookiejar=None):
2772 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2773
2774 def http_response(self, request, response):
2775 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2776 # characters in Set-Cookie HTTP header of last response (see
067aa17e 2777 # https://github.com/ytdl-org/youtube-dl/issues/6769).
a6420bf5
S
2778 # In order to at least prevent crashing we will percent encode Set-Cookie
2779 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
2780 # if sys.version_info < (3, 0) and response.headers:
2781 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2782 # set_cookie = response.headers.get(set_cookie_header)
2783 # if set_cookie:
2784 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2785 # if set_cookie != set_cookie_escaped:
2786 # del response.headers[set_cookie_header]
2787 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
2788 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2789
2790 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2791 https_response = http_response
2792
2793
46f59e89
S
2794def extract_timezone(date_str):
2795 m = re.search(
2796 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2797 date_str)
2798 if not m:
2799 timezone = datetime.timedelta()
2800 else:
2801 date_str = date_str[:-len(m.group('tz'))]
2802 if not m.group('sign'):
2803 timezone = datetime.timedelta()
2804 else:
2805 sign = 1 if m.group('sign') == '+' else -1
2806 timezone = datetime.timedelta(
2807 hours=sign * int(m.group('hours')),
2808 minutes=sign * int(m.group('minutes')))
2809 return timezone, date_str
2810
2811
08b38d54 2812def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
2813 """ Return a UNIX timestamp from the given date """
2814
2815 if date_str is None:
2816 return None
2817
52c3a6e4
S
2818 date_str = re.sub(r'\.[0-9]+', '', date_str)
2819
08b38d54 2820 if timezone is None:
46f59e89
S
2821 timezone, date_str = extract_timezone(date_str)
2822
52c3a6e4
S
2823 try:
2824 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2825 dt = datetime.datetime.strptime(date_str, date_format) - timezone
2826 return calendar.timegm(dt.timetuple())
2827 except ValueError:
2828 pass
912b38b4
PH
2829
2830
46f59e89
S
2831def date_formats(day_first=True):
2832 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2833
2834
42bdd9d0 2835def unified_strdate(date_str, day_first=True):
bf50b038 2836 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
2837
2838 if date_str is None:
2839 return None
bf50b038 2840 upload_date = None
5f6a1245 2841 # Replace commas
026fcc04 2842 date_str = date_str.replace(',', ' ')
42bdd9d0 2843 # Remove AM/PM + timezone
9bb8e0a3 2844 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89 2845 _, date_str = extract_timezone(date_str)
42bdd9d0 2846
46f59e89 2847 for expression in date_formats(day_first):
bf50b038
JMF
2848 try:
2849 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 2850 except ValueError:
bf50b038 2851 pass
42393ce2
PH
2852 if upload_date is None:
2853 timetuple = email.utils.parsedate_tz(date_str)
2854 if timetuple:
c6b9cf05
S
2855 try:
2856 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2857 except ValueError:
2858 pass
6a750402
JMF
2859 if upload_date is not None:
2860 return compat_str(upload_date)
bf50b038 2861
5f6a1245 2862
46f59e89
S
2863def unified_timestamp(date_str, day_first=True):
2864 if date_str is None:
2865 return None
2866
2ae2ffda 2867 date_str = re.sub(r'[,|]', '', date_str)
46f59e89 2868
7dc2a74e 2869 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89
S
2870 timezone, date_str = extract_timezone(date_str)
2871
2872 # Remove AM/PM + timezone
2873 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2874
deef3195
S
2875 # Remove unrecognized timezones from ISO 8601 alike timestamps
2876 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
2877 if m:
2878 date_str = date_str[:-len(m.group('tz'))]
2879
f226880c
PH
2880 # Python only supports microseconds, so remove nanoseconds
2881 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
2882 if m:
2883 date_str = m.group(1)
2884
46f59e89
S
2885 for expression in date_formats(day_first):
2886 try:
7dc2a74e 2887 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
46f59e89
S
2888 return calendar.timegm(dt.timetuple())
2889 except ValueError:
2890 pass
2891 timetuple = email.utils.parsedate_tz(date_str)
2892 if timetuple:
7dc2a74e 2893 return calendar.timegm(timetuple) + pm_delta * 3600
46f59e89
S
2894
2895
28e614de 2896def determine_ext(url, default_ext='unknown_video'):
85750f89 2897 if url is None or '.' not in url:
f4776371 2898 return default_ext
9cb9a5df 2899 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
2900 if re.match(r'^[A-Za-z0-9]+$', guess):
2901 return guess
a7aaa398
S
2902 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
2903 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 2904 return guess.rstrip('/')
73e79f2a 2905 else:
cbdbb766 2906 return default_ext
73e79f2a 2907
5f6a1245 2908
824fa511
S
2909def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
2910 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
d4051a8e 2911
5f6a1245 2912
bd558525 2913def date_from_str(date_str):
37254abc
JMF
2914 """
2915 Return a datetime object from a string in the format YYYYMMDD or
2916 (now|today)[+-][0-9](day|week|month|year)(s)?"""
2917 today = datetime.date.today()
f8795e10 2918 if date_str in ('now', 'today'):
37254abc 2919 return today
f8795e10
PH
2920 if date_str == 'yesterday':
2921 return today - datetime.timedelta(days=1)
ec85ded8 2922 match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
37254abc
JMF
2923 if match is not None:
2924 sign = match.group('sign')
2925 time = int(match.group('time'))
2926 if sign == '-':
2927 time = -time
2928 unit = match.group('unit')
dfb1b146 2929 # A bad approximation?
37254abc
JMF
2930 if unit == 'month':
2931 unit = 'day'
2932 time *= 30
2933 elif unit == 'year':
2934 unit = 'day'
2935 time *= 365
2936 unit += 's'
2937 delta = datetime.timedelta(**{unit: time})
2938 return today + delta
611c1dd9 2939 return datetime.datetime.strptime(date_str, '%Y%m%d').date()
5f6a1245
JW
2940
2941
e63fc1be 2942def hyphenate_date(date_str):
2943 """
2944 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
2945 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
2946 if match is not None:
2947 return '-'.join(match.groups())
2948 else:
2949 return date_str
2950
5f6a1245 2951
bd558525
JMF
2952class DateRange(object):
2953 """Represents a time interval between two dates"""
5f6a1245 2954
bd558525
JMF
2955 def __init__(self, start=None, end=None):
2956 """start and end must be strings in the format accepted by date"""
2957 if start is not None:
2958 self.start = date_from_str(start)
2959 else:
2960 self.start = datetime.datetime.min.date()
2961 if end is not None:
2962 self.end = date_from_str(end)
2963 else:
2964 self.end = datetime.datetime.max.date()
37254abc 2965 if self.start > self.end:
bd558525 2966 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 2967
bd558525
JMF
2968 @classmethod
2969 def day(cls, day):
2970 """Returns a range that only contains the given day"""
5f6a1245
JW
2971 return cls(day, day)
2972
bd558525
JMF
2973 def __contains__(self, date):
2974 """Check if the date is in the range"""
37254abc
JMF
2975 if not isinstance(date, datetime.date):
2976 date = date_from_str(date)
2977 return self.start <= date <= self.end
5f6a1245 2978
bd558525 2979 def __str__(self):
5f6a1245 2980 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
2981
2982
2983def platform_name():
2984 """ Returns the platform name as a compat_str """
2985 res = platform.platform()
2986 if isinstance(res, bytes):
2987 res = res.decode(preferredencoding())
2988
2989 assert isinstance(res, compat_str)
2990 return res
c257baff
PH
2991
2992
b58ddb32
PH
2993def _windows_write_string(s, out):
2994 """ Returns True if the string was written using special methods,
2995 False if it has yet to be written out."""
2996 # Adapted from http://stackoverflow.com/a/3259271/35070
2997
2998 import ctypes
2999 import ctypes.wintypes
3000
3001 WIN_OUTPUT_IDS = {
3002 1: -11,
3003 2: -12,
3004 }
3005
a383a98a
PH
3006 try:
3007 fileno = out.fileno()
3008 except AttributeError:
3009 # If the output stream doesn't have a fileno, it's virtual
3010 return False
aa42e873
PH
3011 except io.UnsupportedOperation:
3012 # Some strange Windows pseudo files?
3013 return False
b58ddb32
PH
3014 if fileno not in WIN_OUTPUT_IDS:
3015 return False
3016
d7cd9a9e 3017 GetStdHandle = compat_ctypes_WINFUNCTYPE(
b58ddb32 3018 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
d7cd9a9e 3019 ('GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
3020 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3021
d7cd9a9e 3022 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3023 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3024 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
d7cd9a9e 3025 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
3026 written = ctypes.wintypes.DWORD(0)
3027
d7cd9a9e 3028 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
3029 FILE_TYPE_CHAR = 0x0002
3030 FILE_TYPE_REMOTE = 0x8000
d7cd9a9e 3031 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3032 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3033 ctypes.POINTER(ctypes.wintypes.DWORD))(
d7cd9a9e 3034 ('GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
3035 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3036
3037 def not_a_console(handle):
3038 if handle == INVALID_HANDLE_VALUE or handle is None:
3039 return True
3089bc74
S
3040 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3041 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
3042
3043 if not_a_console(h):
3044 return False
3045
d1b9c912
PH
3046 def next_nonbmp_pos(s):
3047 try:
3048 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3049 except StopIteration:
3050 return len(s)
3051
3052 while s:
3053 count = min(next_nonbmp_pos(s), 1024)
3054
b58ddb32 3055 ret = WriteConsoleW(
d1b9c912 3056 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
3057 if ret == 0:
3058 raise OSError('Failed to write string')
d1b9c912
PH
3059 if not count: # We just wrote a non-BMP character
3060 assert written.value == 2
3061 s = s[1:]
3062 else:
3063 assert written.value > 0
3064 s = s[written.value:]
b58ddb32
PH
3065 return True
3066
3067
734f90bb 3068def write_string(s, out=None, encoding=None):
7459e3a2
PH
3069 if out is None:
3070 out = sys.stderr
8bf48f23 3071 assert type(s) == compat_str
7459e3a2 3072
b58ddb32
PH
3073 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3074 if _windows_write_string(s, out):
3075 return
3076
3089bc74
S
3077 if ('b' in getattr(out, 'mode', '')
3078 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
3079 byt = s.encode(encoding or preferredencoding(), 'ignore')
3080 out.write(byt)
3081 elif hasattr(out, 'buffer'):
3082 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3083 byt = s.encode(enc, 'ignore')
3084 out.buffer.write(byt)
3085 else:
8bf48f23 3086 out.write(s)
7459e3a2
PH
3087 out.flush()
3088
3089
48ea9cea
PH
3090def bytes_to_intlist(bs):
3091 if not bs:
3092 return []
3093 if isinstance(bs[0], int): # Python 3
3094 return list(bs)
3095 else:
3096 return [ord(c) for c in bs]
3097
c257baff 3098
cba892fa 3099def intlist_to_bytes(xs):
3100 if not xs:
3101 return b''
edaa23f8 3102 return compat_struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
3103
3104
c1c9a79c
PH
3105# Cross-platform file locking
3106if sys.platform == 'win32':
3107 import ctypes.wintypes
3108 import msvcrt
3109
3110 class OVERLAPPED(ctypes.Structure):
3111 _fields_ = [
3112 ('Internal', ctypes.wintypes.LPVOID),
3113 ('InternalHigh', ctypes.wintypes.LPVOID),
3114 ('Offset', ctypes.wintypes.DWORD),
3115 ('OffsetHigh', ctypes.wintypes.DWORD),
3116 ('hEvent', ctypes.wintypes.HANDLE),
3117 ]
3118
3119 kernel32 = ctypes.windll.kernel32
3120 LockFileEx = kernel32.LockFileEx
3121 LockFileEx.argtypes = [
3122 ctypes.wintypes.HANDLE, # hFile
3123 ctypes.wintypes.DWORD, # dwFlags
3124 ctypes.wintypes.DWORD, # dwReserved
3125 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3126 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3127 ctypes.POINTER(OVERLAPPED) # Overlapped
3128 ]
3129 LockFileEx.restype = ctypes.wintypes.BOOL
3130 UnlockFileEx = kernel32.UnlockFileEx
3131 UnlockFileEx.argtypes = [
3132 ctypes.wintypes.HANDLE, # hFile
3133 ctypes.wintypes.DWORD, # dwReserved
3134 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3135 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3136 ctypes.POINTER(OVERLAPPED) # Overlapped
3137 ]
3138 UnlockFileEx.restype = ctypes.wintypes.BOOL
3139 whole_low = 0xffffffff
3140 whole_high = 0x7fffffff
3141
3142 def _lock_file(f, exclusive):
3143 overlapped = OVERLAPPED()
3144 overlapped.Offset = 0
3145 overlapped.OffsetHigh = 0
3146 overlapped.hEvent = 0
3147 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3148 handle = msvcrt.get_osfhandle(f.fileno())
3149 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3150 whole_low, whole_high, f._lock_file_overlapped_p):
3151 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3152
3153 def _unlock_file(f):
3154 assert f._lock_file_overlapped_p
3155 handle = msvcrt.get_osfhandle(f.fileno())
3156 if not UnlockFileEx(handle, 0,
3157 whole_low, whole_high, f._lock_file_overlapped_p):
3158 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3159
3160else:
399a76e6
YCH
3161 # Some platforms, such as Jython, is missing fcntl
3162 try:
3163 import fcntl
c1c9a79c 3164
399a76e6
YCH
3165 def _lock_file(f, exclusive):
3166 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 3167
399a76e6
YCH
3168 def _unlock_file(f):
3169 fcntl.flock(f, fcntl.LOCK_UN)
3170 except ImportError:
3171 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3172
3173 def _lock_file(f, exclusive):
3174 raise IOError(UNSUPPORTED_MSG)
3175
3176 def _unlock_file(f):
3177 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
3178
3179
3180class locked_file(object):
3181 def __init__(self, filename, mode, encoding=None):
3182 assert mode in ['r', 'a', 'w']
3183 self.f = io.open(filename, mode, encoding=encoding)
3184 self.mode = mode
3185
3186 def __enter__(self):
3187 exclusive = self.mode != 'r'
3188 try:
3189 _lock_file(self.f, exclusive)
3190 except IOError:
3191 self.f.close()
3192 raise
3193 return self
3194
3195 def __exit__(self, etype, value, traceback):
3196 try:
3197 _unlock_file(self.f)
3198 finally:
3199 self.f.close()
3200
3201 def __iter__(self):
3202 return iter(self.f)
3203
3204 def write(self, *args):
3205 return self.f.write(*args)
3206
3207 def read(self, *args):
3208 return self.f.read(*args)
4eb7f1d1
JMF
3209
3210
4644ac55
S
3211def get_filesystem_encoding():
3212 encoding = sys.getfilesystemencoding()
3213 return encoding if encoding is not None else 'utf-8'
3214
3215
4eb7f1d1 3216def shell_quote(args):
a6a173c2 3217 quoted_args = []
4644ac55 3218 encoding = get_filesystem_encoding()
a6a173c2
JMF
3219 for a in args:
3220 if isinstance(a, bytes):
3221 # We may get a filename encoded with 'encodeFilename'
3222 a = a.decode(encoding)
aefce8e6 3223 quoted_args.append(compat_shlex_quote(a))
28e614de 3224 return ' '.join(quoted_args)
9d4660ca
PH
3225
3226
3227def smuggle_url(url, data):
3228 """ Pass additional data in a URL for internal use. """
3229
81953d1a
RA
3230 url, idata = unsmuggle_url(url, {})
3231 data.update(idata)
15707c7e 3232 sdata = compat_urllib_parse_urlencode(
28e614de
PH
3233 {'__youtubedl_smuggle': json.dumps(data)})
3234 return url + '#' + sdata
9d4660ca
PH
3235
3236
79f82953 3237def unsmuggle_url(smug_url, default=None):
83e865a3 3238 if '#__youtubedl_smuggle' not in smug_url:
79f82953 3239 return smug_url, default
28e614de
PH
3240 url, _, sdata = smug_url.rpartition('#')
3241 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
3242 data = json.loads(jsond)
3243 return url, data
02dbf93f
PH
3244
3245
02dbf93f
PH
3246def format_bytes(bytes):
3247 if bytes is None:
28e614de 3248 return 'N/A'
02dbf93f
PH
3249 if type(bytes) is str:
3250 bytes = float(bytes)
3251 if bytes == 0.0:
3252 exponent = 0
3253 else:
3254 exponent = int(math.log(bytes, 1024.0))
28e614de 3255 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 3256 converted = float(bytes) / float(1024 ** exponent)
28e614de 3257 return '%.2f%s' % (converted, suffix)
f53c966a 3258
1c088fa8 3259
fb47597b
S
3260def lookup_unit_table(unit_table, s):
3261 units_re = '|'.join(re.escape(u) for u in unit_table)
3262 m = re.match(
782b1b5b 3263 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
fb47597b
S
3264 if not m:
3265 return None
3266 num_str = m.group('num').replace(',', '.')
3267 mult = unit_table[m.group('unit')]
3268 return int(float(num_str) * mult)
3269
3270
be64b5b0
PH
3271def parse_filesize(s):
3272 if s is None:
3273 return None
3274
dfb1b146 3275 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
3276 # but we support those too
3277 _UNIT_TABLE = {
3278 'B': 1,
3279 'b': 1,
70852b47 3280 'bytes': 1,
be64b5b0
PH
3281 'KiB': 1024,
3282 'KB': 1000,
3283 'kB': 1024,
3284 'Kb': 1000,
13585d76 3285 'kb': 1000,
70852b47
YCH
3286 'kilobytes': 1000,
3287 'kibibytes': 1024,
be64b5b0
PH
3288 'MiB': 1024 ** 2,
3289 'MB': 1000 ** 2,
3290 'mB': 1024 ** 2,
3291 'Mb': 1000 ** 2,
13585d76 3292 'mb': 1000 ** 2,
70852b47
YCH
3293 'megabytes': 1000 ** 2,
3294 'mebibytes': 1024 ** 2,
be64b5b0
PH
3295 'GiB': 1024 ** 3,
3296 'GB': 1000 ** 3,
3297 'gB': 1024 ** 3,
3298 'Gb': 1000 ** 3,
13585d76 3299 'gb': 1000 ** 3,
70852b47
YCH
3300 'gigabytes': 1000 ** 3,
3301 'gibibytes': 1024 ** 3,
be64b5b0
PH
3302 'TiB': 1024 ** 4,
3303 'TB': 1000 ** 4,
3304 'tB': 1024 ** 4,
3305 'Tb': 1000 ** 4,
13585d76 3306 'tb': 1000 ** 4,
70852b47
YCH
3307 'terabytes': 1000 ** 4,
3308 'tebibytes': 1024 ** 4,
be64b5b0
PH
3309 'PiB': 1024 ** 5,
3310 'PB': 1000 ** 5,
3311 'pB': 1024 ** 5,
3312 'Pb': 1000 ** 5,
13585d76 3313 'pb': 1000 ** 5,
70852b47
YCH
3314 'petabytes': 1000 ** 5,
3315 'pebibytes': 1024 ** 5,
be64b5b0
PH
3316 'EiB': 1024 ** 6,
3317 'EB': 1000 ** 6,
3318 'eB': 1024 ** 6,
3319 'Eb': 1000 ** 6,
13585d76 3320 'eb': 1000 ** 6,
70852b47
YCH
3321 'exabytes': 1000 ** 6,
3322 'exbibytes': 1024 ** 6,
be64b5b0
PH
3323 'ZiB': 1024 ** 7,
3324 'ZB': 1000 ** 7,
3325 'zB': 1024 ** 7,
3326 'Zb': 1000 ** 7,
13585d76 3327 'zb': 1000 ** 7,
70852b47
YCH
3328 'zettabytes': 1000 ** 7,
3329 'zebibytes': 1024 ** 7,
be64b5b0
PH
3330 'YiB': 1024 ** 8,
3331 'YB': 1000 ** 8,
3332 'yB': 1024 ** 8,
3333 'Yb': 1000 ** 8,
13585d76 3334 'yb': 1000 ** 8,
70852b47
YCH
3335 'yottabytes': 1000 ** 8,
3336 'yobibytes': 1024 ** 8,
be64b5b0
PH
3337 }
3338
fb47597b
S
3339 return lookup_unit_table(_UNIT_TABLE, s)
3340
3341
3342def parse_count(s):
3343 if s is None:
be64b5b0
PH
3344 return None
3345
fb47597b
S
3346 s = s.strip()
3347
3348 if re.match(r'^[\d,.]+$', s):
3349 return str_to_int(s)
3350
3351 _UNIT_TABLE = {
3352 'k': 1000,
3353 'K': 1000,
3354 'm': 1000 ** 2,
3355 'M': 1000 ** 2,
3356 'kk': 1000 ** 2,
3357 'KK': 1000 ** 2,
3358 }
be64b5b0 3359
fb47597b 3360 return lookup_unit_table(_UNIT_TABLE, s)
be64b5b0 3361
2f7ae819 3362
b871d7e9
S
3363def parse_resolution(s):
3364 if s is None:
3365 return {}
3366
3367 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3368 if mobj:
3369 return {
3370 'width': int(mobj.group('w')),
3371 'height': int(mobj.group('h')),
3372 }
3373
3374 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3375 if mobj:
3376 return {'height': int(mobj.group(1))}
3377
3378 mobj = re.search(r'\b([48])[kK]\b', s)
3379 if mobj:
3380 return {'height': int(mobj.group(1)) * 540}
3381
3382 return {}
3383
3384
0dc41787
S
3385def parse_bitrate(s):
3386 if not isinstance(s, compat_str):
3387 return
3388 mobj = re.search(r'\b(\d+)\s*kbps', s)
3389 if mobj:
3390 return int(mobj.group(1))
3391
3392
a942d6cb 3393def month_by_name(name, lang='en'):
caefb1de
PH
3394 """ Return the number of a month by (locale-independently) English name """
3395
f6717dec 3396 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb 3397
caefb1de 3398 try:
f6717dec 3399 return month_names.index(name) + 1
7105440c
YCH
3400 except ValueError:
3401 return None
3402
3403
3404def month_by_abbreviation(abbrev):
3405 """ Return the number of a month by (locale-independently) English
3406 abbreviations """
3407
3408 try:
3409 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
3410 except ValueError:
3411 return None
18258362
JMF
3412
3413
5aafe895 3414def fix_xml_ampersands(xml_str):
18258362 3415 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
3416 return re.sub(
3417 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 3418 '&amp;',
5aafe895 3419 xml_str)
e3946f98
PH
3420
3421
3422def setproctitle(title):
8bf48f23 3423 assert isinstance(title, compat_str)
c1c05c67
YCH
3424
3425 # ctypes in Jython is not complete
3426 # http://bugs.jython.org/issue2148
3427 if sys.platform.startswith('java'):
3428 return
3429
e3946f98 3430 try:
611c1dd9 3431 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
3432 except OSError:
3433 return
2f49bcd6
RC
3434 except TypeError:
3435 # LoadLibrary in Windows Python 2.7.13 only expects
3436 # a bytestring, but since unicode_literals turns
3437 # every string into a unicode string, it fails.
3438 return
6eefe533
PH
3439 title_bytes = title.encode('utf-8')
3440 buf = ctypes.create_string_buffer(len(title_bytes))
3441 buf.value = title_bytes
e3946f98 3442 try:
6eefe533 3443 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
3444 except AttributeError:
3445 return # Strange libc, just skip this
d7dda168
PH
3446
3447
3448def remove_start(s, start):
46bc9b7d 3449 return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174
PH
3450
3451
2b9faf55 3452def remove_end(s, end):
46bc9b7d 3453 return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55
PH
3454
3455
31b2051e
S
3456def remove_quotes(s):
3457 if s is None or len(s) < 2:
3458 return s
3459 for quote in ('"', "'", ):
3460 if s[0] == quote and s[-1] == quote:
3461 return s[1:-1]
3462 return s
3463
3464
29eb5174 3465def url_basename(url):
9b8aaeed 3466 path = compat_urlparse.urlparse(url).path
28e614de 3467 return path.strip('/').split('/')[-1]
aa94a6d3
PH
3468
3469
02dc0a36
S
3470def base_url(url):
3471 return re.match(r'https?://[^?#&]+/', url).group()
3472
3473
e34c3361 3474def urljoin(base, path):
4b5de77b
S
3475 if isinstance(path, bytes):
3476 path = path.decode('utf-8')
e34c3361
S
3477 if not isinstance(path, compat_str) or not path:
3478 return None
fad4ceb5 3479 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
e34c3361 3480 return path
4b5de77b
S
3481 if isinstance(base, bytes):
3482 base = base.decode('utf-8')
3483 if not isinstance(base, compat_str) or not re.match(
3484 r'^(?:https?:)?//', base):
e34c3361
S
3485 return None
3486 return compat_urlparse.urljoin(base, path)
3487
3488
aa94a6d3
PH
3489class HEADRequest(compat_urllib_request.Request):
3490 def get_method(self):
611c1dd9 3491 return 'HEAD'
7217e148
PH
3492
3493
95cf60e8
S
3494class PUTRequest(compat_urllib_request.Request):
3495 def get_method(self):
3496 return 'PUT'
3497
3498
9732d77e 3499def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
3500 if get_attr:
3501 if v is not None:
3502 v = getattr(v, get_attr, None)
9572013d
PH
3503 if v == '':
3504 v = None
1812afb7
S
3505 if v is None:
3506 return default
3507 try:
3508 return int(v) * invscale // scale
5e1271c5 3509 except (ValueError, TypeError):
af98f8ff 3510 return default
9732d77e 3511
9572013d 3512
40a90862
JMF
3513def str_or_none(v, default=None):
3514 return default if v is None else compat_str(v)
3515
9732d77e
PH
3516
3517def str_to_int(int_str):
48d4681e 3518 """ A more relaxed version of int_or_none """
9732d77e
PH
3519 if int_str is None:
3520 return None
28e614de 3521 int_str = re.sub(r'[,\.\+]', '', int_str)
9732d77e 3522 return int(int_str)
608d11f5
PH
3523
3524
9732d77e 3525def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
3526 if v is None:
3527 return default
3528 try:
3529 return float(v) * invscale / scale
5e1271c5 3530 except (ValueError, TypeError):
caf80631 3531 return default
43f775e4
PH
3532
3533
c7e327c4
S
3534def bool_or_none(v, default=None):
3535 return v if isinstance(v, bool) else default
3536
3537
53cd37ba
S
3538def strip_or_none(v, default=None):
3539 return v.strip() if isinstance(v, compat_str) else default
b72b4431
S
3540
3541
af03000a
S
3542def url_or_none(url):
3543 if not url or not isinstance(url, compat_str):
3544 return None
3545 url = url.strip()
3546 return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
3547
3548
608d11f5 3549def parse_duration(s):
8f9312c3 3550 if not isinstance(s, compat_basestring):
608d11f5
PH
3551 return None
3552
ca7b3246
S
3553 s = s.strip()
3554
acaff495 3555 days, hours, mins, secs, ms = [None] * 5
15846398 3556 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
acaff495 3557 if m:
3558 days, hours, mins, secs, ms = m.groups()
3559 else:
3560 m = re.match(
056653bb
S
3561 r'''(?ix)(?:P?
3562 (?:
3563 [0-9]+\s*y(?:ears?)?\s*
3564 )?
3565 (?:
3566 [0-9]+\s*m(?:onths?)?\s*
3567 )?
3568 (?:
3569 [0-9]+\s*w(?:eeks?)?\s*
3570 )?
8f4b58d7 3571 (?:
acaff495 3572 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
8f4b58d7 3573 )?
056653bb 3574 T)?
acaff495 3575 (?:
3576 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3577 )?
3578 (?:
3579 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3580 )?
3581 (?:
3582 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
15846398 3583 )?Z?$''', s)
acaff495 3584 if m:
3585 days, hours, mins, secs, ms = m.groups()
3586 else:
15846398 3587 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
acaff495 3588 if m:
3589 hours, mins = m.groups()
3590 else:
3591 return None
3592
3593 duration = 0
3594 if secs:
3595 duration += float(secs)
3596 if mins:
3597 duration += float(mins) * 60
3598 if hours:
3599 duration += float(hours) * 60 * 60
3600 if days:
3601 duration += float(days) * 24 * 60 * 60
3602 if ms:
3603 duration += float(ms)
3604 return duration
91d7d0b3
JMF
3605
3606
e65e4c88 3607def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 3608 name, real_ext = os.path.splitext(filename)
e65e4c88
S
3609 return (
3610 '{0}.{1}{2}'.format(name, ext, real_ext)
3611 if not expected_real_ext or real_ext[1:] == expected_real_ext
3612 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
3613
3614
b3ed15b7
S
3615def replace_extension(filename, ext, expected_real_ext=None):
3616 name, real_ext = os.path.splitext(filename)
3617 return '{0}.{1}'.format(
3618 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3619 ext)
3620
3621
d70ad093
PH
3622def check_executable(exe, args=[]):
3623 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3624 args can be a list of arguments for a short output (like -version) """
3625 try:
3626 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
3627 except OSError:
3628 return False
3629 return exe
b7ab0590
PH
3630
3631
95807118 3632def get_exe_version(exe, args=['--version'],
cae97f65 3633 version_re=None, unrecognized='present'):
95807118
PH
3634 """ Returns the version of the specified executable,
3635 or False if the executable is not present """
3636 try:
b64d04c1
YCH
3637 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3638 # SIGTTOU if youtube-dl is run in the background.
067aa17e 3639 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
cae97f65 3640 out, _ = subprocess.Popen(
54116803 3641 [encodeArgument(exe)] + args,
00ca7552 3642 stdin=subprocess.PIPE,
95807118
PH
3643 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
3644 except OSError:
3645 return False
cae97f65
PH
3646 if isinstance(out, bytes): # Python 2.x
3647 out = out.decode('ascii', 'ignore')
3648 return detect_exe_version(out, version_re, unrecognized)
3649
3650
3651def detect_exe_version(output, version_re=None, unrecognized='present'):
3652 assert isinstance(output, compat_str)
3653 if version_re is None:
3654 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3655 m = re.search(version_re, output)
95807118
PH
3656 if m:
3657 return m.group(1)
3658 else:
3659 return unrecognized
3660
3661
b7ab0590 3662class PagedList(object):
dd26ced1
PH
3663 def __len__(self):
3664 # This is only useful for tests
3665 return len(self.getslice())
3666
9c44d242
PH
3667
3668class OnDemandPagedList(PagedList):
6be08ce6 3669 def __init__(self, pagefunc, pagesize, use_cache=True):
9c44d242
PH
3670 self._pagefunc = pagefunc
3671 self._pagesize = pagesize
b95dc034
YCH
3672 self._use_cache = use_cache
3673 if use_cache:
3674 self._cache = {}
9c44d242 3675
b7ab0590
PH
3676 def getslice(self, start=0, end=None):
3677 res = []
3678 for pagenum in itertools.count(start // self._pagesize):
3679 firstid = pagenum * self._pagesize
3680 nextfirstid = pagenum * self._pagesize + self._pagesize
3681 if start >= nextfirstid:
3682 continue
3683
b95dc034
YCH
3684 page_results = None
3685 if self._use_cache:
3686 page_results = self._cache.get(pagenum)
3687 if page_results is None:
3688 page_results = list(self._pagefunc(pagenum))
3689 if self._use_cache:
3690 self._cache[pagenum] = page_results
b7ab0590
PH
3691
3692 startv = (
3693 start % self._pagesize
3694 if firstid <= start < nextfirstid
3695 else 0)
3696
3697 endv = (
3698 ((end - 1) % self._pagesize) + 1
3699 if (end is not None and firstid <= end <= nextfirstid)
3700 else None)
3701
3702 if startv != 0 or endv is not None:
3703 page_results = page_results[startv:endv]
3704 res.extend(page_results)
3705
3706 # A little optimization - if current page is not "full", ie. does
3707 # not contain page_size videos then we can assume that this page
3708 # is the last one - there are no more ids on further pages -
3709 # i.e. no need to query again.
3710 if len(page_results) + startv < self._pagesize:
3711 break
3712
3713 # If we got the whole page, but the next page is not interesting,
3714 # break out early as well
3715 if end == nextfirstid:
3716 break
3717 return res
81c2f20b
PH
3718
3719
9c44d242
PH
3720class InAdvancePagedList(PagedList):
3721 def __init__(self, pagefunc, pagecount, pagesize):
3722 self._pagefunc = pagefunc
3723 self._pagecount = pagecount
3724 self._pagesize = pagesize
3725
3726 def getslice(self, start=0, end=None):
3727 res = []
3728 start_page = start // self._pagesize
3729 end_page = (
3730 self._pagecount if end is None else (end // self._pagesize + 1))
3731 skip_elems = start - start_page * self._pagesize
3732 only_more = None if end is None else end - start
3733 for pagenum in range(start_page, end_page):
3734 page = list(self._pagefunc(pagenum))
3735 if skip_elems:
3736 page = page[skip_elems:]
3737 skip_elems = None
3738 if only_more is not None:
3739 if len(page) < only_more:
3740 only_more -= len(page)
3741 else:
3742 page = page[:only_more]
3743 res.extend(page)
3744 break
3745 res.extend(page)
3746 return res
3747
3748
81c2f20b 3749def uppercase_escape(s):
676eb3f2 3750 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 3751 return re.sub(
a612753d 3752 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
3753 lambda m: unicode_escape(m.group(0))[0],
3754 s)
0fe2ff78
YCH
3755
3756
3757def lowercase_escape(s):
3758 unicode_escape = codecs.getdecoder('unicode_escape')
3759 return re.sub(
3760 r'\\u[0-9a-fA-F]{4}',
3761 lambda m: unicode_escape(m.group(0))[0],
3762 s)
b53466e1 3763
d05cfe06
S
3764
3765def escape_rfc3986(s):
3766 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 3767 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 3768 s = s.encode('utf-8')
ecc0c5ee 3769 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
3770
3771
3772def escape_url(url):
3773 """Escape URL as suggested by RFC 3986"""
3774 url_parsed = compat_urllib_parse_urlparse(url)
3775 return url_parsed._replace(
efbed08d 3776 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
d05cfe06
S
3777 path=escape_rfc3986(url_parsed.path),
3778 params=escape_rfc3986(url_parsed.params),
3779 query=escape_rfc3986(url_parsed.query),
3780 fragment=escape_rfc3986(url_parsed.fragment)
3781 ).geturl()
3782
62e609ab
PH
3783
3784def read_batch_urls(batch_fd):
3785 def fixup(url):
3786 if not isinstance(url, compat_str):
3787 url = url.decode('utf-8', 'replace')
28e614de 3788 BOM_UTF8 = '\xef\xbb\xbf'
62e609ab
PH
3789 if url.startswith(BOM_UTF8):
3790 url = url[len(BOM_UTF8):]
3791 url = url.strip()
3792 if url.startswith(('#', ';', ']')):
3793 return False
3794 return url
3795
3796 with contextlib.closing(batch_fd) as fd:
3797 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
3798
3799
3800def urlencode_postdata(*args, **kargs):
15707c7e 3801 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
3802
3803
38f9ef31 3804def update_url_query(url, query):
cacd9966
YCH
3805 if not query:
3806 return url
38f9ef31 3807 parsed_url = compat_urlparse.urlparse(url)
3808 qs = compat_parse_qs(parsed_url.query)
3809 qs.update(query)
3810 return compat_urlparse.urlunparse(parsed_url._replace(
15707c7e 3811 query=compat_urllib_parse_urlencode(qs, True)))
16392824 3812
8e60dc75 3813
ed0291d1
S
3814def update_Request(req, url=None, data=None, headers={}, query={}):
3815 req_headers = req.headers.copy()
3816 req_headers.update(headers)
3817 req_data = data or req.data
3818 req_url = update_url_query(url or req.get_full_url(), query)
95cf60e8
S
3819 req_get_method = req.get_method()
3820 if req_get_method == 'HEAD':
3821 req_type = HEADRequest
3822 elif req_get_method == 'PUT':
3823 req_type = PUTRequest
3824 else:
3825 req_type = compat_urllib_request.Request
ed0291d1
S
3826 new_req = req_type(
3827 req_url, data=req_data, headers=req_headers,
3828 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3829 if hasattr(req, 'timeout'):
3830 new_req.timeout = req.timeout
3831 return new_req
3832
3833
10c87c15 3834def _multipart_encode_impl(data, boundary):
0c265486
YCH
3835 content_type = 'multipart/form-data; boundary=%s' % boundary
3836
3837 out = b''
3838 for k, v in data.items():
3839 out += b'--' + boundary.encode('ascii') + b'\r\n'
3840 if isinstance(k, compat_str):
3841 k = k.encode('utf-8')
3842 if isinstance(v, compat_str):
3843 v = v.encode('utf-8')
3844 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3845 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
b2ad479d 3846 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
0c265486
YCH
3847 if boundary.encode('ascii') in content:
3848 raise ValueError('Boundary overlaps with data')
3849 out += content
3850
3851 out += b'--' + boundary.encode('ascii') + b'--\r\n'
3852
3853 return out, content_type
3854
3855
3856def multipart_encode(data, boundary=None):
3857 '''
3858 Encode a dict to RFC 7578-compliant form-data
3859
3860 data:
3861 A dict where keys and values can be either Unicode or bytes-like
3862 objects.
3863 boundary:
3864 If specified a Unicode object, it's used as the boundary. Otherwise
3865 a random boundary is generated.
3866
3867 Reference: https://tools.ietf.org/html/rfc7578
3868 '''
3869 has_specified_boundary = boundary is not None
3870
3871 while True:
3872 if boundary is None:
3873 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3874
3875 try:
10c87c15 3876 out, content_type = _multipart_encode_impl(data, boundary)
0c265486
YCH
3877 break
3878 except ValueError:
3879 if has_specified_boundary:
3880 raise
3881 boundary = None
3882
3883 return out, content_type
3884
3885
86296ad2 3886def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
3887 if isinstance(key_or_keys, (list, tuple)):
3888 for key in key_or_keys:
86296ad2
S
3889 if key not in d or d[key] is None or skip_false_values and not d[key]:
3890 continue
3891 return d[key]
cbecc9b9
S
3892 return default
3893 return d.get(key_or_keys, default)
3894
3895
329ca3be 3896def try_get(src, getter, expected_type=None):
a32a9a7e
S
3897 if not isinstance(getter, (list, tuple)):
3898 getter = [getter]
3899 for get in getter:
3900 try:
3901 v = get(src)
3902 except (AttributeError, KeyError, TypeError, IndexError):
3903 pass
3904 else:
3905 if expected_type is None or isinstance(v, expected_type):
3906 return v
329ca3be
S
3907
3908
6cc62232
S
3909def merge_dicts(*dicts):
3910 merged = {}
3911 for a_dict in dicts:
3912 for k, v in a_dict.items():
3913 if v is None:
3914 continue
3089bc74
S
3915 if (k not in merged
3916 or (isinstance(v, compat_str) and v
3917 and isinstance(merged[k], compat_str)
3918 and not merged[k])):
6cc62232
S
3919 merged[k] = v
3920 return merged
3921
3922
8e60dc75
S
3923def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
3924 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
3925
16392824 3926
a1a530b0
PH
3927US_RATINGS = {
3928 'G': 0,
3929 'PG': 10,
3930 'PG-13': 13,
3931 'R': 16,
3932 'NC': 18,
3933}
fac55558
PH
3934
3935
a8795327 3936TV_PARENTAL_GUIDELINES = {
5a16c9d9
RA
3937 'TV-Y': 0,
3938 'TV-Y7': 7,
3939 'TV-G': 0,
3940 'TV-PG': 0,
3941 'TV-14': 14,
3942 'TV-MA': 17,
a8795327
S
3943}
3944
3945
146c80e2 3946def parse_age_limit(s):
a8795327
S
3947 if type(s) == int:
3948 return s if 0 <= s <= 21 else None
3949 if not isinstance(s, compat_basestring):
d838b1bd 3950 return None
146c80e2 3951 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327
S
3952 if m:
3953 return int(m.group('age'))
3954 if s in US_RATINGS:
3955 return US_RATINGS[s]
5a16c9d9 3956 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
b8361187 3957 if m:
5a16c9d9 3958 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
b8361187 3959 return None
146c80e2
S
3960
3961
fac55558 3962def strip_jsonp(code):
609a61e3 3963 return re.sub(
5552c9eb 3964 r'''(?sx)^
e9c671d5 3965 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
5552c9eb
YCH
3966 (?:\s*&&\s*(?P=func_name))?
3967 \s*\(\s*(?P<callback_data>.*)\);?
3968 \s*?(?://[^\n]*)*$''',
3969 r'\g<callback_data>', code)
478c2c61
PH
3970
3971
e05f6939 3972def js_to_json(code):
4195096e
S
3973 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
3974 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
3975 INTEGER_TABLE = (
3976 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
3977 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
3978 )
3979
e05f6939 3980 def fix_kv(m):
e7b6d122
PH
3981 v = m.group(0)
3982 if v in ('true', 'false', 'null'):
3983 return v
b3ee552e 3984 elif v.startswith('/*') or v.startswith('//') or v == ',':
bd1e4844 3985 return ""
3986
3987 if v[0] in ("'", '"'):
3988 v = re.sub(r'(?s)\\.|"', lambda m: {
e7b6d122 3989 '"': '\\"',
bd1e4844 3990 "\\'": "'",
3991 '\\\n': '',
3992 '\\x': '\\u00',
3993 }.get(m.group(0), m.group(0)), v[1:-1])
3994
89ac4a19
S
3995 for regex, base in INTEGER_TABLE:
3996 im = re.match(regex, v)
3997 if im:
e4659b45 3998 i = int(im.group(1), base)
89ac4a19
S
3999 return '"%d":' % i if v.endswith(':') else '%d' % i
4000
e7b6d122 4001 return '"%s"' % v
e05f6939 4002
bd1e4844 4003 return re.sub(r'''(?sx)
4004 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4005 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4195096e 4006 {comment}|,(?={skip}[\]}}])|
c384d537 4007 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4195096e
S
4008 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4009 [0-9]+(?={skip}:)
4010 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
e05f6939
PH
4011
4012
478c2c61
PH
4013def qualities(quality_ids):
4014 """ Get a numeric quality value out of a list of possible values """
4015 def q(qid):
4016 try:
4017 return quality_ids.index(qid)
4018 except ValueError:
4019 return -1
4020 return q
4021
acd69589
PH
4022
4023DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
0a871f68 4024
a020a0dc
PH
4025
4026def limit_length(s, length):
4027 """ Add ellipses to overly long strings """
4028 if s is None:
4029 return None
4030 ELLIPSES = '...'
4031 if len(s) > length:
4032 return s[:length - len(ELLIPSES)] + ELLIPSES
4033 return s
48844745
PH
4034
4035
4036def version_tuple(v):
5f9b8394 4037 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
4038
4039
4040def is_outdated_version(version, limit, assume_new=True):
4041 if not version:
4042 return not assume_new
4043 try:
4044 return version_tuple(version) < version_tuple(limit)
4045 except ValueError:
4046 return not assume_new
732ea2f0
PH
4047
4048
4049def ytdl_is_updateable():
4050 """ Returns if youtube-dl can be updated with -U """
4051 from zipimport import zipimporter
4052
4053 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
4054
4055
4056def args_to_str(args):
4057 # Get a short string representation for a subprocess command
702ccf2d 4058 return ' '.join(compat_shlex_quote(a) for a in args)
2ccd1b10
PH
4059
4060
9b9c5355 4061def error_to_compat_str(err):
fdae2358
S
4062 err_str = str(err)
4063 # On python 2 error byte string must be decoded with proper
4064 # encoding rather than ascii
4065 if sys.version_info[0] < 3:
4066 err_str = err_str.decode(preferredencoding())
4067 return err_str
4068
4069
c460bdd5 4070def mimetype2ext(mt):
eb9ee194
S
4071 if mt is None:
4072 return None
4073
765ac263
JMF
4074 ext = {
4075 'audio/mp4': 'm4a',
6c33d24b
YCH
4076 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4077 # it's the most popular one
4078 'audio/mpeg': 'mp3',
765ac263
JMF
4079 }.get(mt)
4080 if ext is not None:
4081 return ext
4082
c460bdd5 4083 _, _, res = mt.rpartition('/')
6562d34a 4084 res = res.split(';')[0].strip().lower()
c460bdd5
PH
4085
4086 return {
f6861ec9 4087 '3gpp': '3gp',
cafcf657 4088 'smptett+xml': 'tt',
cafcf657 4089 'ttaf+xml': 'dfxp',
a0d8d704 4090 'ttml+xml': 'ttml',
f6861ec9 4091 'x-flv': 'flv',
a0d8d704 4092 'x-mp4-fragmented': 'mp4',
d4f05d47 4093 'x-ms-sami': 'sami',
a0d8d704 4094 'x-ms-wmv': 'wmv',
b4173f15
RA
4095 'mpegurl': 'm3u8',
4096 'x-mpegurl': 'm3u8',
4097 'vnd.apple.mpegurl': 'm3u8',
4098 'dash+xml': 'mpd',
b4173f15 4099 'f4m+xml': 'f4m',
f164b971 4100 'hds+xml': 'f4m',
e910fe2f 4101 'vnd.ms-sstr+xml': 'ism',
c2b2c7e1 4102 'quicktime': 'mov',
98ce1a3f 4103 'mp2t': 'ts',
c460bdd5
PH
4104 }.get(res, res)
4105
4106
4f3c5e06 4107def parse_codecs(codecs_str):
4108 # http://tools.ietf.org/html/rfc6381
4109 if not codecs_str:
4110 return {}
4111 splited_codecs = list(filter(None, map(
4112 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4113 vcodec, acodec = None, None
4114 for full_codec in splited_codecs:
4115 codec = full_codec.split('.')[0]
28cc2241 4116 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4f3c5e06 4117 if not vcodec:
4118 vcodec = full_codec
60f5c9fb 4119 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4f3c5e06 4120 if not acodec:
4121 acodec = full_codec
4122 else:
60f5c9fb 4123 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4f3c5e06 4124 if not vcodec and not acodec:
4125 if len(splited_codecs) == 2:
4126 return {
28cc2241
S
4127 'vcodec': splited_codecs[0],
4128 'acodec': splited_codecs[1],
4f3c5e06 4129 }
4130 else:
4131 return {
4132 'vcodec': vcodec or 'none',
4133 'acodec': acodec or 'none',
4134 }
4135 return {}
4136
4137
2ccd1b10 4138def urlhandle_detect_ext(url_handle):
79298173 4139 getheader = url_handle.headers.get
2ccd1b10 4140
b55ee18f
PH
4141 cd = getheader('Content-Disposition')
4142 if cd:
4143 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4144 if m:
4145 e = determine_ext(m.group('filename'), default_ext=None)
4146 if e:
4147 return e
4148
c460bdd5 4149 return mimetype2ext(getheader('Content-Type'))
05900629
PH
4150
4151
1e399778
YCH
4152def encode_data_uri(data, mime_type):
4153 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4154
4155
05900629 4156def age_restricted(content_limit, age_limit):
6ec6cb4e 4157 """ Returns True iff the content should be blocked """
05900629
PH
4158
4159 if age_limit is None: # No limit set
4160 return False
4161 if content_limit is None:
4162 return False # Content available for everyone
4163 return age_limit < content_limit
61ca9a80
PH
4164
4165
4166def is_html(first_bytes):
4167 """ Detect whether a file contains HTML by examining its first bytes. """
4168
4169 BOMS = [
4170 (b'\xef\xbb\xbf', 'utf-8'),
4171 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4172 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4173 (b'\xff\xfe', 'utf-16-le'),
4174 (b'\xfe\xff', 'utf-16-be'),
4175 ]
4176 for bom, enc in BOMS:
4177 if first_bytes.startswith(bom):
4178 s = first_bytes[len(bom):].decode(enc, 'replace')
4179 break
4180 else:
4181 s = first_bytes.decode('utf-8', 'replace')
4182
4183 return re.match(r'^\s*<', s)
a055469f
PH
4184
4185
4186def determine_protocol(info_dict):
4187 protocol = info_dict.get('protocol')
4188 if protocol is not None:
4189 return protocol
4190
4191 url = info_dict['url']
4192 if url.startswith('rtmp'):
4193 return 'rtmp'
4194 elif url.startswith('mms'):
4195 return 'mms'
4196 elif url.startswith('rtsp'):
4197 return 'rtsp'
4198
4199 ext = determine_ext(url)
4200 if ext == 'm3u8':
4201 return 'm3u8'
4202 elif ext == 'f4m':
4203 return 'f4m'
4204
4205 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
4206
4207
4208def render_table(header_row, data):
4209 """ Render a list of rows, each as a list of values """
4210 table = [header_row] + data
4211 max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4212 format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
4213 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
4214
4215
4216def _match_one(filter_part, dct):
4217 COMPARISON_OPERATORS = {
4218 '<': operator.lt,
4219 '<=': operator.le,
4220 '>': operator.gt,
4221 '>=': operator.ge,
4222 '=': operator.eq,
4223 '!=': operator.ne,
4224 }
4225 operator_rex = re.compile(r'''(?x)\s*
4226 (?P<key>[a-z_]+)
4227 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4228 (?:
4229 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
db13c16e 4230 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
347de493
PH
4231 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4232 )
4233 \s*$
4234 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4235 m = operator_rex.search(filter_part)
4236 if m:
4237 op = COMPARISON_OPERATORS[m.group('op')]
e5a088dc 4238 actual_value = dct.get(m.group('key'))
3089bc74
S
4239 if (m.group('quotedstrval') is not None
4240 or m.group('strval') is not None
e5a088dc
S
4241 # If the original field is a string and matching comparisonvalue is
4242 # a number we should respect the origin of the original field
4243 # and process comparison value as a string (see
067aa17e 4244 # https://github.com/ytdl-org/youtube-dl/issues/11082).
3089bc74
S
4245 or actual_value is not None and m.group('intval') is not None
4246 and isinstance(actual_value, compat_str)):
347de493
PH
4247 if m.group('op') not in ('=', '!='):
4248 raise ValueError(
4249 'Operator %s does not support string values!' % m.group('op'))
db13c16e
S
4250 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4251 quote = m.group('quote')
4252 if quote is not None:
4253 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
347de493
PH
4254 else:
4255 try:
4256 comparison_value = int(m.group('intval'))
4257 except ValueError:
4258 comparison_value = parse_filesize(m.group('intval'))
4259 if comparison_value is None:
4260 comparison_value = parse_filesize(m.group('intval') + 'B')
4261 if comparison_value is None:
4262 raise ValueError(
4263 'Invalid integer value %r in filter part %r' % (
4264 m.group('intval'), filter_part))
347de493
PH
4265 if actual_value is None:
4266 return m.group('none_inclusive')
4267 return op(actual_value, comparison_value)
4268
4269 UNARY_OPERATORS = {
1cc47c66
S
4270 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4271 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
347de493
PH
4272 }
4273 operator_rex = re.compile(r'''(?x)\s*
4274 (?P<op>%s)\s*(?P<key>[a-z_]+)
4275 \s*$
4276 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4277 m = operator_rex.search(filter_part)
4278 if m:
4279 op = UNARY_OPERATORS[m.group('op')]
4280 actual_value = dct.get(m.group('key'))
4281 return op(actual_value)
4282
4283 raise ValueError('Invalid filter part %r' % filter_part)
4284
4285
4286def match_str(filter_str, dct):
4287 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4288
4289 return all(
4290 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4291
4292
4293def match_filter_func(filter_str):
4294 def _match_func(info_dict):
4295 if match_str(filter_str, info_dict):
4296 return None
4297 else:
4298 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4299 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4300 return _match_func
91410c9b
PH
4301
4302
bf6427d2
YCH
4303def parse_dfxp_time_expr(time_expr):
4304 if not time_expr:
d631d5f9 4305 return
bf6427d2
YCH
4306
4307 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4308 if mobj:
4309 return float(mobj.group('time_offset'))
4310
db2fe38b 4311 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 4312 if mobj:
db2fe38b 4313 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
4314
4315
c1c924ab
YCH
4316def srt_subtitles_timecode(seconds):
4317 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
4318
4319
4320def dfxp2srt(dfxp_data):
3869028f
YCH
4321 '''
4322 @param dfxp_data A bytes-like object containing DFXP data
4323 @returns A unicode object containing converted SRT data
4324 '''
5b995f71 4325 LEGACY_NAMESPACES = (
3869028f
YCH
4326 (b'http://www.w3.org/ns/ttml', [
4327 b'http://www.w3.org/2004/11/ttaf1',
4328 b'http://www.w3.org/2006/04/ttaf1',
4329 b'http://www.w3.org/2006/10/ttaf1',
5b995f71 4330 ]),
3869028f
YCH
4331 (b'http://www.w3.org/ns/ttml#styling', [
4332 b'http://www.w3.org/ns/ttml#style',
5b995f71
RA
4333 ]),
4334 )
4335
4336 SUPPORTED_STYLING = [
4337 'color',
4338 'fontFamily',
4339 'fontSize',
4340 'fontStyle',
4341 'fontWeight',
4342 'textDecoration'
4343 ]
4344
4e335771 4345 _x = functools.partial(xpath_with_ns, ns_map={
261f4730 4346 'xml': 'http://www.w3.org/XML/1998/namespace',
4e335771 4347 'ttml': 'http://www.w3.org/ns/ttml',
5b995f71 4348 'tts': 'http://www.w3.org/ns/ttml#styling',
4e335771 4349 })
bf6427d2 4350
5b995f71
RA
4351 styles = {}
4352 default_style = {}
4353
87de7069 4354 class TTMLPElementParser(object):
5b995f71
RA
4355 _out = ''
4356 _unclosed_elements = []
4357 _applied_styles = []
bf6427d2 4358
2b14cb56 4359 def start(self, tag, attrib):
5b995f71
RA
4360 if tag in (_x('ttml:br'), 'br'):
4361 self._out += '\n'
4362 else:
4363 unclosed_elements = []
4364 style = {}
4365 element_style_id = attrib.get('style')
4366 if default_style:
4367 style.update(default_style)
4368 if element_style_id:
4369 style.update(styles.get(element_style_id, {}))
4370 for prop in SUPPORTED_STYLING:
4371 prop_val = attrib.get(_x('tts:' + prop))
4372 if prop_val:
4373 style[prop] = prop_val
4374 if style:
4375 font = ''
4376 for k, v in sorted(style.items()):
4377 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4378 continue
4379 if k == 'color':
4380 font += ' color="%s"' % v
4381 elif k == 'fontSize':
4382 font += ' size="%s"' % v
4383 elif k == 'fontFamily':
4384 font += ' face="%s"' % v
4385 elif k == 'fontWeight' and v == 'bold':
4386 self._out += '<b>'
4387 unclosed_elements.append('b')
4388 elif k == 'fontStyle' and v == 'italic':
4389 self._out += '<i>'
4390 unclosed_elements.append('i')
4391 elif k == 'textDecoration' and v == 'underline':
4392 self._out += '<u>'
4393 unclosed_elements.append('u')
4394 if font:
4395 self._out += '<font' + font + '>'
4396 unclosed_elements.append('font')
4397 applied_style = {}
4398 if self._applied_styles:
4399 applied_style.update(self._applied_styles[-1])
4400 applied_style.update(style)
4401 self._applied_styles.append(applied_style)
4402 self._unclosed_elements.append(unclosed_elements)
bf6427d2 4403
2b14cb56 4404 def end(self, tag):
5b995f71
RA
4405 if tag not in (_x('ttml:br'), 'br'):
4406 unclosed_elements = self._unclosed_elements.pop()
4407 for element in reversed(unclosed_elements):
4408 self._out += '</%s>' % element
4409 if unclosed_elements and self._applied_styles:
4410 self._applied_styles.pop()
bf6427d2 4411
2b14cb56 4412 def data(self, data):
5b995f71 4413 self._out += data
2b14cb56 4414
4415 def close(self):
5b995f71 4416 return self._out.strip()
2b14cb56 4417
4418 def parse_node(node):
4419 target = TTMLPElementParser()
4420 parser = xml.etree.ElementTree.XMLParser(target=target)
4421 parser.feed(xml.etree.ElementTree.tostring(node))
4422 return parser.close()
bf6427d2 4423
5b995f71
RA
4424 for k, v in LEGACY_NAMESPACES:
4425 for ns in v:
4426 dfxp_data = dfxp_data.replace(ns, k)
4427
3869028f 4428 dfxp = compat_etree_fromstring(dfxp_data)
bf6427d2 4429 out = []
5b995f71 4430 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
1b0427e6
YCH
4431
4432 if not paras:
4433 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2 4434
5b995f71
RA
4435 repeat = False
4436 while True:
4437 for style in dfxp.findall(_x('.//ttml:style')):
261f4730
RA
4438 style_id = style.get('id') or style.get(_x('xml:id'))
4439 if not style_id:
4440 continue
5b995f71
RA
4441 parent_style_id = style.get('style')
4442 if parent_style_id:
4443 if parent_style_id not in styles:
4444 repeat = True
4445 continue
4446 styles[style_id] = styles[parent_style_id].copy()
4447 for prop in SUPPORTED_STYLING:
4448 prop_val = style.get(_x('tts:' + prop))
4449 if prop_val:
4450 styles.setdefault(style_id, {})[prop] = prop_val
4451 if repeat:
4452 repeat = False
4453 else:
4454 break
4455
4456 for p in ('body', 'div'):
4457 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4458 if ele is None:
4459 continue
4460 style = styles.get(ele.get('style'))
4461 if not style:
4462 continue
4463 default_style.update(style)
4464
bf6427d2 4465 for para, index in zip(paras, itertools.count(1)):
d631d5f9 4466 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 4467 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
4468 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4469 if begin_time is None:
4470 continue
7dff0363 4471 if not end_time:
d631d5f9
YCH
4472 if not dur:
4473 continue
4474 end_time = begin_time + dur
bf6427d2
YCH
4475 out.append('%d\n%s --> %s\n%s\n\n' % (
4476 index,
c1c924ab
YCH
4477 srt_subtitles_timecode(begin_time),
4478 srt_subtitles_timecode(end_time),
bf6427d2
YCH
4479 parse_node(para)))
4480
4481 return ''.join(out)
4482
4483
66e289ba
S
4484def cli_option(params, command_option, param):
4485 param = params.get(param)
98e698f1
RA
4486 if param:
4487 param = compat_str(param)
66e289ba
S
4488 return [command_option, param] if param is not None else []
4489
4490
4491def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4492 param = params.get(param)
5b232f46
S
4493 if param is None:
4494 return []
66e289ba
S
4495 assert isinstance(param, bool)
4496 if separator:
4497 return [command_option + separator + (true_value if param else false_value)]
4498 return [command_option, true_value if param else false_value]
4499
4500
4501def cli_valueless_option(params, command_option, param, expected_value=True):
4502 param = params.get(param)
4503 return [command_option] if param == expected_value else []
4504
4505
4506def cli_configuration_args(params, param, default=[]):
4507 ex_args = params.get(param)
4508 if ex_args is None:
4509 return default
4510 assert isinstance(ex_args, list)
4511 return ex_args
4512
4513
39672624
YCH
4514class ISO639Utils(object):
4515 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4516 _lang_map = {
4517 'aa': 'aar',
4518 'ab': 'abk',
4519 'ae': 'ave',
4520 'af': 'afr',
4521 'ak': 'aka',
4522 'am': 'amh',
4523 'an': 'arg',
4524 'ar': 'ara',
4525 'as': 'asm',
4526 'av': 'ava',
4527 'ay': 'aym',
4528 'az': 'aze',
4529 'ba': 'bak',
4530 'be': 'bel',
4531 'bg': 'bul',
4532 'bh': 'bih',
4533 'bi': 'bis',
4534 'bm': 'bam',
4535 'bn': 'ben',
4536 'bo': 'bod',
4537 'br': 'bre',
4538 'bs': 'bos',
4539 'ca': 'cat',
4540 'ce': 'che',
4541 'ch': 'cha',
4542 'co': 'cos',
4543 'cr': 'cre',
4544 'cs': 'ces',
4545 'cu': 'chu',
4546 'cv': 'chv',
4547 'cy': 'cym',
4548 'da': 'dan',
4549 'de': 'deu',
4550 'dv': 'div',
4551 'dz': 'dzo',
4552 'ee': 'ewe',
4553 'el': 'ell',
4554 'en': 'eng',
4555 'eo': 'epo',
4556 'es': 'spa',
4557 'et': 'est',
4558 'eu': 'eus',
4559 'fa': 'fas',
4560 'ff': 'ful',
4561 'fi': 'fin',
4562 'fj': 'fij',
4563 'fo': 'fao',
4564 'fr': 'fra',
4565 'fy': 'fry',
4566 'ga': 'gle',
4567 'gd': 'gla',
4568 'gl': 'glg',
4569 'gn': 'grn',
4570 'gu': 'guj',
4571 'gv': 'glv',
4572 'ha': 'hau',
4573 'he': 'heb',
b7acc835 4574 'iw': 'heb', # Replaced by he in 1989 revision
39672624
YCH
4575 'hi': 'hin',
4576 'ho': 'hmo',
4577 'hr': 'hrv',
4578 'ht': 'hat',
4579 'hu': 'hun',
4580 'hy': 'hye',
4581 'hz': 'her',
4582 'ia': 'ina',
4583 'id': 'ind',
b7acc835 4584 'in': 'ind', # Replaced by id in 1989 revision
39672624
YCH
4585 'ie': 'ile',
4586 'ig': 'ibo',
4587 'ii': 'iii',
4588 'ik': 'ipk',
4589 'io': 'ido',
4590 'is': 'isl',
4591 'it': 'ita',
4592 'iu': 'iku',
4593 'ja': 'jpn',
4594 'jv': 'jav',
4595 'ka': 'kat',
4596 'kg': 'kon',
4597 'ki': 'kik',
4598 'kj': 'kua',
4599 'kk': 'kaz',
4600 'kl': 'kal',
4601 'km': 'khm',
4602 'kn': 'kan',
4603 'ko': 'kor',
4604 'kr': 'kau',
4605 'ks': 'kas',
4606 'ku': 'kur',
4607 'kv': 'kom',
4608 'kw': 'cor',
4609 'ky': 'kir',
4610 'la': 'lat',
4611 'lb': 'ltz',
4612 'lg': 'lug',
4613 'li': 'lim',
4614 'ln': 'lin',
4615 'lo': 'lao',
4616 'lt': 'lit',
4617 'lu': 'lub',
4618 'lv': 'lav',
4619 'mg': 'mlg',
4620 'mh': 'mah',
4621 'mi': 'mri',
4622 'mk': 'mkd',
4623 'ml': 'mal',
4624 'mn': 'mon',
4625 'mr': 'mar',
4626 'ms': 'msa',
4627 'mt': 'mlt',
4628 'my': 'mya',
4629 'na': 'nau',
4630 'nb': 'nob',
4631 'nd': 'nde',
4632 'ne': 'nep',
4633 'ng': 'ndo',
4634 'nl': 'nld',
4635 'nn': 'nno',
4636 'no': 'nor',
4637 'nr': 'nbl',
4638 'nv': 'nav',
4639 'ny': 'nya',
4640 'oc': 'oci',
4641 'oj': 'oji',
4642 'om': 'orm',
4643 'or': 'ori',
4644 'os': 'oss',
4645 'pa': 'pan',
4646 'pi': 'pli',
4647 'pl': 'pol',
4648 'ps': 'pus',
4649 'pt': 'por',
4650 'qu': 'que',
4651 'rm': 'roh',
4652 'rn': 'run',
4653 'ro': 'ron',
4654 'ru': 'rus',
4655 'rw': 'kin',
4656 'sa': 'san',
4657 'sc': 'srd',
4658 'sd': 'snd',
4659 'se': 'sme',
4660 'sg': 'sag',
4661 'si': 'sin',
4662 'sk': 'slk',
4663 'sl': 'slv',
4664 'sm': 'smo',
4665 'sn': 'sna',
4666 'so': 'som',
4667 'sq': 'sqi',
4668 'sr': 'srp',
4669 'ss': 'ssw',
4670 'st': 'sot',
4671 'su': 'sun',
4672 'sv': 'swe',
4673 'sw': 'swa',
4674 'ta': 'tam',
4675 'te': 'tel',
4676 'tg': 'tgk',
4677 'th': 'tha',
4678 'ti': 'tir',
4679 'tk': 'tuk',
4680 'tl': 'tgl',
4681 'tn': 'tsn',
4682 'to': 'ton',
4683 'tr': 'tur',
4684 'ts': 'tso',
4685 'tt': 'tat',
4686 'tw': 'twi',
4687 'ty': 'tah',
4688 'ug': 'uig',
4689 'uk': 'ukr',
4690 'ur': 'urd',
4691 'uz': 'uzb',
4692 've': 'ven',
4693 'vi': 'vie',
4694 'vo': 'vol',
4695 'wa': 'wln',
4696 'wo': 'wol',
4697 'xh': 'xho',
4698 'yi': 'yid',
e9a50fba 4699 'ji': 'yid', # Replaced by yi in 1989 revision
39672624
YCH
4700 'yo': 'yor',
4701 'za': 'zha',
4702 'zh': 'zho',
4703 'zu': 'zul',
4704 }
4705
4706 @classmethod
4707 def short2long(cls, code):
4708 """Convert language code from ISO 639-1 to ISO 639-2/T"""
4709 return cls._lang_map.get(code[:2])
4710
4711 @classmethod
4712 def long2short(cls, code):
4713 """Convert language code from ISO 639-2/T to ISO 639-1"""
4714 for short_name, long_name in cls._lang_map.items():
4715 if long_name == code:
4716 return short_name
4717
4718
4eb10f66
YCH
4719class ISO3166Utils(object):
4720 # From http://data.okfn.org/data/core/country-list
4721 _country_map = {
4722 'AF': 'Afghanistan',
4723 'AX': 'Åland Islands',
4724 'AL': 'Albania',
4725 'DZ': 'Algeria',
4726 'AS': 'American Samoa',
4727 'AD': 'Andorra',
4728 'AO': 'Angola',
4729 'AI': 'Anguilla',
4730 'AQ': 'Antarctica',
4731 'AG': 'Antigua and Barbuda',
4732 'AR': 'Argentina',
4733 'AM': 'Armenia',
4734 'AW': 'Aruba',
4735 'AU': 'Australia',
4736 'AT': 'Austria',
4737 'AZ': 'Azerbaijan',
4738 'BS': 'Bahamas',
4739 'BH': 'Bahrain',
4740 'BD': 'Bangladesh',
4741 'BB': 'Barbados',
4742 'BY': 'Belarus',
4743 'BE': 'Belgium',
4744 'BZ': 'Belize',
4745 'BJ': 'Benin',
4746 'BM': 'Bermuda',
4747 'BT': 'Bhutan',
4748 'BO': 'Bolivia, Plurinational State of',
4749 'BQ': 'Bonaire, Sint Eustatius and Saba',
4750 'BA': 'Bosnia and Herzegovina',
4751 'BW': 'Botswana',
4752 'BV': 'Bouvet Island',
4753 'BR': 'Brazil',
4754 'IO': 'British Indian Ocean Territory',
4755 'BN': 'Brunei Darussalam',
4756 'BG': 'Bulgaria',
4757 'BF': 'Burkina Faso',
4758 'BI': 'Burundi',
4759 'KH': 'Cambodia',
4760 'CM': 'Cameroon',
4761 'CA': 'Canada',
4762 'CV': 'Cape Verde',
4763 'KY': 'Cayman Islands',
4764 'CF': 'Central African Republic',
4765 'TD': 'Chad',
4766 'CL': 'Chile',
4767 'CN': 'China',
4768 'CX': 'Christmas Island',
4769 'CC': 'Cocos (Keeling) Islands',
4770 'CO': 'Colombia',
4771 'KM': 'Comoros',
4772 'CG': 'Congo',
4773 'CD': 'Congo, the Democratic Republic of the',
4774 'CK': 'Cook Islands',
4775 'CR': 'Costa Rica',
4776 'CI': 'Côte d\'Ivoire',
4777 'HR': 'Croatia',
4778 'CU': 'Cuba',
4779 'CW': 'Curaçao',
4780 'CY': 'Cyprus',
4781 'CZ': 'Czech Republic',
4782 'DK': 'Denmark',
4783 'DJ': 'Djibouti',
4784 'DM': 'Dominica',
4785 'DO': 'Dominican Republic',
4786 'EC': 'Ecuador',
4787 'EG': 'Egypt',
4788 'SV': 'El Salvador',
4789 'GQ': 'Equatorial Guinea',
4790 'ER': 'Eritrea',
4791 'EE': 'Estonia',
4792 'ET': 'Ethiopia',
4793 'FK': 'Falkland Islands (Malvinas)',
4794 'FO': 'Faroe Islands',
4795 'FJ': 'Fiji',
4796 'FI': 'Finland',
4797 'FR': 'France',
4798 'GF': 'French Guiana',
4799 'PF': 'French Polynesia',
4800 'TF': 'French Southern Territories',
4801 'GA': 'Gabon',
4802 'GM': 'Gambia',
4803 'GE': 'Georgia',
4804 'DE': 'Germany',
4805 'GH': 'Ghana',
4806 'GI': 'Gibraltar',
4807 'GR': 'Greece',
4808 'GL': 'Greenland',
4809 'GD': 'Grenada',
4810 'GP': 'Guadeloupe',
4811 'GU': 'Guam',
4812 'GT': 'Guatemala',
4813 'GG': 'Guernsey',
4814 'GN': 'Guinea',
4815 'GW': 'Guinea-Bissau',
4816 'GY': 'Guyana',
4817 'HT': 'Haiti',
4818 'HM': 'Heard Island and McDonald Islands',
4819 'VA': 'Holy See (Vatican City State)',
4820 'HN': 'Honduras',
4821 'HK': 'Hong Kong',
4822 'HU': 'Hungary',
4823 'IS': 'Iceland',
4824 'IN': 'India',
4825 'ID': 'Indonesia',
4826 'IR': 'Iran, Islamic Republic of',
4827 'IQ': 'Iraq',
4828 'IE': 'Ireland',
4829 'IM': 'Isle of Man',
4830 'IL': 'Israel',
4831 'IT': 'Italy',
4832 'JM': 'Jamaica',
4833 'JP': 'Japan',
4834 'JE': 'Jersey',
4835 'JO': 'Jordan',
4836 'KZ': 'Kazakhstan',
4837 'KE': 'Kenya',
4838 'KI': 'Kiribati',
4839 'KP': 'Korea, Democratic People\'s Republic of',
4840 'KR': 'Korea, Republic of',
4841 'KW': 'Kuwait',
4842 'KG': 'Kyrgyzstan',
4843 'LA': 'Lao People\'s Democratic Republic',
4844 'LV': 'Latvia',
4845 'LB': 'Lebanon',
4846 'LS': 'Lesotho',
4847 'LR': 'Liberia',
4848 'LY': 'Libya',
4849 'LI': 'Liechtenstein',
4850 'LT': 'Lithuania',
4851 'LU': 'Luxembourg',
4852 'MO': 'Macao',
4853 'MK': 'Macedonia, the Former Yugoslav Republic of',
4854 'MG': 'Madagascar',
4855 'MW': 'Malawi',
4856 'MY': 'Malaysia',
4857 'MV': 'Maldives',
4858 'ML': 'Mali',
4859 'MT': 'Malta',
4860 'MH': 'Marshall Islands',
4861 'MQ': 'Martinique',
4862 'MR': 'Mauritania',
4863 'MU': 'Mauritius',
4864 'YT': 'Mayotte',
4865 'MX': 'Mexico',
4866 'FM': 'Micronesia, Federated States of',
4867 'MD': 'Moldova, Republic of',
4868 'MC': 'Monaco',
4869 'MN': 'Mongolia',
4870 'ME': 'Montenegro',
4871 'MS': 'Montserrat',
4872 'MA': 'Morocco',
4873 'MZ': 'Mozambique',
4874 'MM': 'Myanmar',
4875 'NA': 'Namibia',
4876 'NR': 'Nauru',
4877 'NP': 'Nepal',
4878 'NL': 'Netherlands',
4879 'NC': 'New Caledonia',
4880 'NZ': 'New Zealand',
4881 'NI': 'Nicaragua',
4882 'NE': 'Niger',
4883 'NG': 'Nigeria',
4884 'NU': 'Niue',
4885 'NF': 'Norfolk Island',
4886 'MP': 'Northern Mariana Islands',
4887 'NO': 'Norway',
4888 'OM': 'Oman',
4889 'PK': 'Pakistan',
4890 'PW': 'Palau',
4891 'PS': 'Palestine, State of',
4892 'PA': 'Panama',
4893 'PG': 'Papua New Guinea',
4894 'PY': 'Paraguay',
4895 'PE': 'Peru',
4896 'PH': 'Philippines',
4897 'PN': 'Pitcairn',
4898 'PL': 'Poland',
4899 'PT': 'Portugal',
4900 'PR': 'Puerto Rico',
4901 'QA': 'Qatar',
4902 'RE': 'Réunion',
4903 'RO': 'Romania',
4904 'RU': 'Russian Federation',
4905 'RW': 'Rwanda',
4906 'BL': 'Saint Barthélemy',
4907 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4908 'KN': 'Saint Kitts and Nevis',
4909 'LC': 'Saint Lucia',
4910 'MF': 'Saint Martin (French part)',
4911 'PM': 'Saint Pierre and Miquelon',
4912 'VC': 'Saint Vincent and the Grenadines',
4913 'WS': 'Samoa',
4914 'SM': 'San Marino',
4915 'ST': 'Sao Tome and Principe',
4916 'SA': 'Saudi Arabia',
4917 'SN': 'Senegal',
4918 'RS': 'Serbia',
4919 'SC': 'Seychelles',
4920 'SL': 'Sierra Leone',
4921 'SG': 'Singapore',
4922 'SX': 'Sint Maarten (Dutch part)',
4923 'SK': 'Slovakia',
4924 'SI': 'Slovenia',
4925 'SB': 'Solomon Islands',
4926 'SO': 'Somalia',
4927 'ZA': 'South Africa',
4928 'GS': 'South Georgia and the South Sandwich Islands',
4929 'SS': 'South Sudan',
4930 'ES': 'Spain',
4931 'LK': 'Sri Lanka',
4932 'SD': 'Sudan',
4933 'SR': 'Suriname',
4934 'SJ': 'Svalbard and Jan Mayen',
4935 'SZ': 'Swaziland',
4936 'SE': 'Sweden',
4937 'CH': 'Switzerland',
4938 'SY': 'Syrian Arab Republic',
4939 'TW': 'Taiwan, Province of China',
4940 'TJ': 'Tajikistan',
4941 'TZ': 'Tanzania, United Republic of',
4942 'TH': 'Thailand',
4943 'TL': 'Timor-Leste',
4944 'TG': 'Togo',
4945 'TK': 'Tokelau',
4946 'TO': 'Tonga',
4947 'TT': 'Trinidad and Tobago',
4948 'TN': 'Tunisia',
4949 'TR': 'Turkey',
4950 'TM': 'Turkmenistan',
4951 'TC': 'Turks and Caicos Islands',
4952 'TV': 'Tuvalu',
4953 'UG': 'Uganda',
4954 'UA': 'Ukraine',
4955 'AE': 'United Arab Emirates',
4956 'GB': 'United Kingdom',
4957 'US': 'United States',
4958 'UM': 'United States Minor Outlying Islands',
4959 'UY': 'Uruguay',
4960 'UZ': 'Uzbekistan',
4961 'VU': 'Vanuatu',
4962 'VE': 'Venezuela, Bolivarian Republic of',
4963 'VN': 'Viet Nam',
4964 'VG': 'Virgin Islands, British',
4965 'VI': 'Virgin Islands, U.S.',
4966 'WF': 'Wallis and Futuna',
4967 'EH': 'Western Sahara',
4968 'YE': 'Yemen',
4969 'ZM': 'Zambia',
4970 'ZW': 'Zimbabwe',
4971 }
4972
4973 @classmethod
4974 def short2full(cls, code):
4975 """Convert an ISO 3166-2 country code to the corresponding full name"""
4976 return cls._country_map.get(code.upper())
4977
4978
773f291d
S
4979class GeoUtils(object):
4980 # Major IPv4 address blocks per country
4981 _country_ip_map = {
53896ca5 4982 'AD': '46.172.224.0/19',
773f291d
S
4983 'AE': '94.200.0.0/13',
4984 'AF': '149.54.0.0/17',
4985 'AG': '209.59.64.0/18',
4986 'AI': '204.14.248.0/21',
4987 'AL': '46.99.0.0/16',
4988 'AM': '46.70.0.0/15',
4989 'AO': '105.168.0.0/13',
53896ca5
S
4990 'AP': '182.50.184.0/21',
4991 'AQ': '23.154.160.0/24',
773f291d
S
4992 'AR': '181.0.0.0/12',
4993 'AS': '202.70.112.0/20',
53896ca5 4994 'AT': '77.116.0.0/14',
773f291d
S
4995 'AU': '1.128.0.0/11',
4996 'AW': '181.41.0.0/18',
53896ca5
S
4997 'AX': '185.217.4.0/22',
4998 'AZ': '5.197.0.0/16',
773f291d
S
4999 'BA': '31.176.128.0/17',
5000 'BB': '65.48.128.0/17',
5001 'BD': '114.130.0.0/16',
5002 'BE': '57.0.0.0/8',
53896ca5 5003 'BF': '102.178.0.0/15',
773f291d
S
5004 'BG': '95.42.0.0/15',
5005 'BH': '37.131.0.0/17',
5006 'BI': '154.117.192.0/18',
5007 'BJ': '137.255.0.0/16',
53896ca5 5008 'BL': '185.212.72.0/23',
773f291d
S
5009 'BM': '196.12.64.0/18',
5010 'BN': '156.31.0.0/16',
5011 'BO': '161.56.0.0/16',
5012 'BQ': '161.0.80.0/20',
53896ca5 5013 'BR': '191.128.0.0/12',
773f291d
S
5014 'BS': '24.51.64.0/18',
5015 'BT': '119.2.96.0/19',
5016 'BW': '168.167.0.0/16',
5017 'BY': '178.120.0.0/13',
5018 'BZ': '179.42.192.0/18',
5019 'CA': '99.224.0.0/11',
5020 'CD': '41.243.0.0/16',
53896ca5
S
5021 'CF': '197.242.176.0/21',
5022 'CG': '160.113.0.0/16',
773f291d 5023 'CH': '85.0.0.0/13',
53896ca5 5024 'CI': '102.136.0.0/14',
773f291d
S
5025 'CK': '202.65.32.0/19',
5026 'CL': '152.172.0.0/14',
53896ca5 5027 'CM': '102.244.0.0/14',
773f291d
S
5028 'CN': '36.128.0.0/10',
5029 'CO': '181.240.0.0/12',
5030 'CR': '201.192.0.0/12',
5031 'CU': '152.206.0.0/15',
5032 'CV': '165.90.96.0/19',
5033 'CW': '190.88.128.0/17',
53896ca5 5034 'CY': '31.153.0.0/16',
773f291d
S
5035 'CZ': '88.100.0.0/14',
5036 'DE': '53.0.0.0/8',
5037 'DJ': '197.241.0.0/17',
5038 'DK': '87.48.0.0/12',
5039 'DM': '192.243.48.0/20',
5040 'DO': '152.166.0.0/15',
5041 'DZ': '41.96.0.0/12',
5042 'EC': '186.68.0.0/15',
5043 'EE': '90.190.0.0/15',
5044 'EG': '156.160.0.0/11',
5045 'ER': '196.200.96.0/20',
5046 'ES': '88.0.0.0/11',
5047 'ET': '196.188.0.0/14',
5048 'EU': '2.16.0.0/13',
5049 'FI': '91.152.0.0/13',
5050 'FJ': '144.120.0.0/16',
53896ca5 5051 'FK': '80.73.208.0/21',
773f291d
S
5052 'FM': '119.252.112.0/20',
5053 'FO': '88.85.32.0/19',
5054 'FR': '90.0.0.0/9',
5055 'GA': '41.158.0.0/15',
5056 'GB': '25.0.0.0/8',
5057 'GD': '74.122.88.0/21',
5058 'GE': '31.146.0.0/16',
5059 'GF': '161.22.64.0/18',
5060 'GG': '62.68.160.0/19',
53896ca5
S
5061 'GH': '154.160.0.0/12',
5062 'GI': '95.164.0.0/16',
773f291d
S
5063 'GL': '88.83.0.0/19',
5064 'GM': '160.182.0.0/15',
5065 'GN': '197.149.192.0/18',
5066 'GP': '104.250.0.0/19',
5067 'GQ': '105.235.224.0/20',
5068 'GR': '94.64.0.0/13',
5069 'GT': '168.234.0.0/16',
5070 'GU': '168.123.0.0/16',
5071 'GW': '197.214.80.0/20',
5072 'GY': '181.41.64.0/18',
5073 'HK': '113.252.0.0/14',
5074 'HN': '181.210.0.0/16',
5075 'HR': '93.136.0.0/13',
5076 'HT': '148.102.128.0/17',
5077 'HU': '84.0.0.0/14',
5078 'ID': '39.192.0.0/10',
5079 'IE': '87.32.0.0/12',
5080 'IL': '79.176.0.0/13',
5081 'IM': '5.62.80.0/20',
5082 'IN': '117.192.0.0/10',
5083 'IO': '203.83.48.0/21',
5084 'IQ': '37.236.0.0/14',
5085 'IR': '2.176.0.0/12',
5086 'IS': '82.221.0.0/16',
5087 'IT': '79.0.0.0/10',
5088 'JE': '87.244.64.0/18',
5089 'JM': '72.27.0.0/17',
5090 'JO': '176.29.0.0/16',
53896ca5 5091 'JP': '133.0.0.0/8',
773f291d
S
5092 'KE': '105.48.0.0/12',
5093 'KG': '158.181.128.0/17',
5094 'KH': '36.37.128.0/17',
5095 'KI': '103.25.140.0/22',
5096 'KM': '197.255.224.0/20',
53896ca5 5097 'KN': '198.167.192.0/19',
773f291d
S
5098 'KP': '175.45.176.0/22',
5099 'KR': '175.192.0.0/10',
5100 'KW': '37.36.0.0/14',
5101 'KY': '64.96.0.0/15',
5102 'KZ': '2.72.0.0/13',
5103 'LA': '115.84.64.0/18',
5104 'LB': '178.135.0.0/16',
53896ca5 5105 'LC': '24.92.144.0/20',
773f291d
S
5106 'LI': '82.117.0.0/19',
5107 'LK': '112.134.0.0/15',
53896ca5 5108 'LR': '102.183.0.0/16',
773f291d
S
5109 'LS': '129.232.0.0/17',
5110 'LT': '78.56.0.0/13',
5111 'LU': '188.42.0.0/16',
5112 'LV': '46.109.0.0/16',
5113 'LY': '41.252.0.0/14',
5114 'MA': '105.128.0.0/11',
5115 'MC': '88.209.64.0/18',
5116 'MD': '37.246.0.0/16',
5117 'ME': '178.175.0.0/17',
5118 'MF': '74.112.232.0/21',
5119 'MG': '154.126.0.0/17',
5120 'MH': '117.103.88.0/21',
5121 'MK': '77.28.0.0/15',
5122 'ML': '154.118.128.0/18',
5123 'MM': '37.111.0.0/17',
5124 'MN': '49.0.128.0/17',
5125 'MO': '60.246.0.0/16',
5126 'MP': '202.88.64.0/20',
5127 'MQ': '109.203.224.0/19',
5128 'MR': '41.188.64.0/18',
5129 'MS': '208.90.112.0/22',
5130 'MT': '46.11.0.0/16',
5131 'MU': '105.16.0.0/12',
5132 'MV': '27.114.128.0/18',
53896ca5 5133 'MW': '102.70.0.0/15',
773f291d
S
5134 'MX': '187.192.0.0/11',
5135 'MY': '175.136.0.0/13',
5136 'MZ': '197.218.0.0/15',
5137 'NA': '41.182.0.0/16',
5138 'NC': '101.101.0.0/18',
5139 'NE': '197.214.0.0/18',
5140 'NF': '203.17.240.0/22',
5141 'NG': '105.112.0.0/12',
5142 'NI': '186.76.0.0/15',
5143 'NL': '145.96.0.0/11',
5144 'NO': '84.208.0.0/13',
5145 'NP': '36.252.0.0/15',
5146 'NR': '203.98.224.0/19',
5147 'NU': '49.156.48.0/22',
5148 'NZ': '49.224.0.0/14',
5149 'OM': '5.36.0.0/15',
5150 'PA': '186.72.0.0/15',
5151 'PE': '186.160.0.0/14',
5152 'PF': '123.50.64.0/18',
5153 'PG': '124.240.192.0/19',
5154 'PH': '49.144.0.0/13',
5155 'PK': '39.32.0.0/11',
5156 'PL': '83.0.0.0/11',
5157 'PM': '70.36.0.0/20',
5158 'PR': '66.50.0.0/16',
5159 'PS': '188.161.0.0/16',
5160 'PT': '85.240.0.0/13',
5161 'PW': '202.124.224.0/20',
5162 'PY': '181.120.0.0/14',
5163 'QA': '37.210.0.0/15',
53896ca5 5164 'RE': '102.35.0.0/16',
773f291d 5165 'RO': '79.112.0.0/13',
53896ca5 5166 'RS': '93.86.0.0/15',
773f291d 5167 'RU': '5.136.0.0/13',
53896ca5 5168 'RW': '41.186.0.0/16',
773f291d
S
5169 'SA': '188.48.0.0/13',
5170 'SB': '202.1.160.0/19',
5171 'SC': '154.192.0.0/11',
53896ca5 5172 'SD': '102.120.0.0/13',
773f291d 5173 'SE': '78.64.0.0/12',
53896ca5 5174 'SG': '8.128.0.0/10',
773f291d
S
5175 'SI': '188.196.0.0/14',
5176 'SK': '78.98.0.0/15',
53896ca5 5177 'SL': '102.143.0.0/17',
773f291d
S
5178 'SM': '89.186.32.0/19',
5179 'SN': '41.82.0.0/15',
53896ca5 5180 'SO': '154.115.192.0/18',
773f291d
S
5181 'SR': '186.179.128.0/17',
5182 'SS': '105.235.208.0/21',
5183 'ST': '197.159.160.0/19',
5184 'SV': '168.243.0.0/16',
5185 'SX': '190.102.0.0/20',
5186 'SY': '5.0.0.0/16',
5187 'SZ': '41.84.224.0/19',
5188 'TC': '65.255.48.0/20',
5189 'TD': '154.68.128.0/19',
5190 'TG': '196.168.0.0/14',
5191 'TH': '171.96.0.0/13',
5192 'TJ': '85.9.128.0/18',
5193 'TK': '27.96.24.0/21',
5194 'TL': '180.189.160.0/20',
5195 'TM': '95.85.96.0/19',
5196 'TN': '197.0.0.0/11',
5197 'TO': '175.176.144.0/21',
5198 'TR': '78.160.0.0/11',
5199 'TT': '186.44.0.0/15',
5200 'TV': '202.2.96.0/19',
5201 'TW': '120.96.0.0/11',
5202 'TZ': '156.156.0.0/14',
53896ca5
S
5203 'UA': '37.52.0.0/14',
5204 'UG': '102.80.0.0/13',
5205 'US': '6.0.0.0/8',
773f291d 5206 'UY': '167.56.0.0/13',
53896ca5 5207 'UZ': '84.54.64.0/18',
773f291d 5208 'VA': '212.77.0.0/19',
53896ca5 5209 'VC': '207.191.240.0/21',
773f291d 5210 'VE': '186.88.0.0/13',
53896ca5 5211 'VG': '66.81.192.0/20',
773f291d
S
5212 'VI': '146.226.0.0/16',
5213 'VN': '14.160.0.0/11',
5214 'VU': '202.80.32.0/20',
5215 'WF': '117.20.32.0/21',
5216 'WS': '202.4.32.0/19',
5217 'YE': '134.35.0.0/16',
5218 'YT': '41.242.116.0/22',
5219 'ZA': '41.0.0.0/11',
53896ca5
S
5220 'ZM': '102.144.0.0/13',
5221 'ZW': '102.177.192.0/18',
773f291d
S
5222 }
5223
5224 @classmethod
5f95927a
S
5225 def random_ipv4(cls, code_or_block):
5226 if len(code_or_block) == 2:
5227 block = cls._country_ip_map.get(code_or_block.upper())
5228 if not block:
5229 return None
5230 else:
5231 block = code_or_block
773f291d
S
5232 addr, preflen = block.split('/')
5233 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5234 addr_max = addr_min | (0xffffffff >> int(preflen))
18a0defa 5235 return compat_str(socket.inet_ntoa(
4248dad9 5236 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
773f291d
S
5237
5238
91410c9b 5239class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
5240 def __init__(self, proxies=None):
5241 # Set default handlers
5242 for type in ('http', 'https'):
5243 setattr(self, '%s_open' % type,
5244 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5245 meth(r, proxy, type))
38e87f6c 5246 compat_urllib_request.ProxyHandler.__init__(self, proxies)
2461f79d 5247
91410c9b 5248 def proxy_open(self, req, proxy, type):
2461f79d 5249 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
5250 if req_proxy is not None:
5251 proxy = req_proxy
2461f79d
PH
5252 del req.headers['Ytdl-request-proxy']
5253
5254 if proxy == '__noproxy__':
5255 return None # No Proxy
51fb4995 5256 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
71aff188
YCH
5257 req.add_header('Ytdl-socks-proxy', proxy)
5258 # youtube-dl's http/https handlers do wrapping the socket with socks
5259 return None
91410c9b
PH
5260 return compat_urllib_request.ProxyHandler.proxy_open(
5261 self, req, proxy, type)
5bc880b9
YCH
5262
5263
0a5445dd
YCH
5264# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5265# released into Public Domain
5266# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5267
5268def long_to_bytes(n, blocksize=0):
5269 """long_to_bytes(n:long, blocksize:int) : string
5270 Convert a long integer to a byte string.
5271
5272 If optional blocksize is given and greater than zero, pad the front of the
5273 byte string with binary zeros so that the length is a multiple of
5274 blocksize.
5275 """
5276 # after much testing, this algorithm was deemed to be the fastest
5277 s = b''
5278 n = int(n)
5279 while n > 0:
5280 s = compat_struct_pack('>I', n & 0xffffffff) + s
5281 n = n >> 32
5282 # strip off leading zeros
5283 for i in range(len(s)):
5284 if s[i] != b'\000'[0]:
5285 break
5286 else:
5287 # only happens when n == 0
5288 s = b'\000'
5289 i = 0
5290 s = s[i:]
5291 # add back some pad bytes. this could be done more efficiently w.r.t. the
5292 # de-padding being done above, but sigh...
5293 if blocksize > 0 and len(s) % blocksize:
5294 s = (blocksize - len(s) % blocksize) * b'\000' + s
5295 return s
5296
5297
5298def bytes_to_long(s):
5299 """bytes_to_long(string) : long
5300 Convert a byte string to a long integer.
5301
5302 This is (essentially) the inverse of long_to_bytes().
5303 """
5304 acc = 0
5305 length = len(s)
5306 if length % 4:
5307 extra = (4 - length % 4)
5308 s = b'\000' * extra + s
5309 length = length + extra
5310 for i in range(0, length, 4):
5311 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5312 return acc
5313
5314
5bc880b9
YCH
5315def ohdave_rsa_encrypt(data, exponent, modulus):
5316 '''
5317 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5318
5319 Input:
5320 data: data to encrypt, bytes-like object
5321 exponent, modulus: parameter e and N of RSA algorithm, both integer
5322 Output: hex string of encrypted data
5323
5324 Limitation: supports one block encryption only
5325 '''
5326
5327 payload = int(binascii.hexlify(data[::-1]), 16)
5328 encrypted = pow(payload, exponent, modulus)
5329 return '%x' % encrypted
81bdc8fd
YCH
5330
5331
f48409c7
YCH
5332def pkcs1pad(data, length):
5333 """
5334 Padding input data with PKCS#1 scheme
5335
5336 @param {int[]} data input data
5337 @param {int} length target length
5338 @returns {int[]} padded data
5339 """
5340 if len(data) > length - 11:
5341 raise ValueError('Input data too long for PKCS#1 padding')
5342
5343 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5344 return [0, 2] + pseudo_random + [0] + data
5345
5346
5eb6bdce 5347def encode_base_n(num, n, table=None):
59f898b7 5348 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
59f898b7
YCH
5349 if not table:
5350 table = FULL_TABLE[:n]
5351
5eb6bdce
YCH
5352 if n > len(table):
5353 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5354
5355 if num == 0:
5356 return table[0]
5357
81bdc8fd
YCH
5358 ret = ''
5359 while num:
5360 ret = table[num % n] + ret
5361 num = num // n
5362 return ret
f52354a8
YCH
5363
5364
5365def decode_packed_codes(code):
06b3fe29 5366 mobj = re.search(PACKED_CODES_RE, code)
f52354a8
YCH
5367 obfucasted_code, base, count, symbols = mobj.groups()
5368 base = int(base)
5369 count = int(count)
5370 symbols = symbols.split('|')
5371 symbol_table = {}
5372
5373 while count:
5374 count -= 1
5eb6bdce 5375 base_n_count = encode_base_n(count, base)
f52354a8
YCH
5376 symbol_table[base_n_count] = symbols[count] or base_n_count
5377
5378 return re.sub(
5379 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5380 obfucasted_code)
e154c651 5381
5382
5383def parse_m3u8_attributes(attrib):
5384 info = {}
5385 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5386 if val.startswith('"'):
5387 val = val[1:-1]
5388 info[key] = val
5389 return info
1143535d
YCH
5390
5391
5392def urshift(val, n):
5393 return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038
YCH
5394
5395
5396# Based on png2str() written by @gdkchan and improved by @yokrysty
067aa17e 5397# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
d3f8e038
YCH
5398def decode_png(png_data):
5399 # Reference: https://www.w3.org/TR/PNG/
5400 header = png_data[8:]
5401
5402 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5403 raise IOError('Not a valid PNG file.')
5404
5405 int_map = {1: '>B', 2: '>H', 4: '>I'}
5406 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5407
5408 chunks = []
5409
5410 while header:
5411 length = unpack_integer(header[:4])
5412 header = header[4:]
5413
5414 chunk_type = header[:4]
5415 header = header[4:]
5416
5417 chunk_data = header[:length]
5418 header = header[length:]
5419
5420 header = header[4:] # Skip CRC
5421
5422 chunks.append({
5423 'type': chunk_type,
5424 'length': length,
5425 'data': chunk_data
5426 })
5427
5428 ihdr = chunks[0]['data']
5429
5430 width = unpack_integer(ihdr[:4])
5431 height = unpack_integer(ihdr[4:8])
5432
5433 idat = b''
5434
5435 for chunk in chunks:
5436 if chunk['type'] == b'IDAT':
5437 idat += chunk['data']
5438
5439 if not idat:
5440 raise IOError('Unable to read PNG data.')
5441
5442 decompressed_data = bytearray(zlib.decompress(idat))
5443
5444 stride = width * 3
5445 pixels = []
5446
5447 def _get_pixel(idx):
5448 x = idx % stride
5449 y = idx // stride
5450 return pixels[y][x]
5451
5452 for y in range(height):
5453 basePos = y * (1 + stride)
5454 filter_type = decompressed_data[basePos]
5455
5456 current_row = []
5457
5458 pixels.append(current_row)
5459
5460 for x in range(stride):
5461 color = decompressed_data[1 + basePos + x]
5462 basex = y * stride + x
5463 left = 0
5464 up = 0
5465
5466 if x > 2:
5467 left = _get_pixel(basex - 3)
5468 if y > 0:
5469 up = _get_pixel(basex - stride)
5470
5471 if filter_type == 1: # Sub
5472 color = (color + left) & 0xff
5473 elif filter_type == 2: # Up
5474 color = (color + up) & 0xff
5475 elif filter_type == 3: # Average
5476 color = (color + ((left + up) >> 1)) & 0xff
5477 elif filter_type == 4: # Paeth
5478 a = left
5479 b = up
5480 c = 0
5481
5482 if x > 2 and y > 0:
5483 c = _get_pixel(basex - stride - 3)
5484
5485 p = a + b - c
5486
5487 pa = abs(p - a)
5488 pb = abs(p - b)
5489 pc = abs(p - c)
5490
5491 if pa <= pb and pa <= pc:
5492 color = (color + a) & 0xff
5493 elif pb <= pc:
5494 color = (color + b) & 0xff
5495 else:
5496 color = (color + c) & 0xff
5497
5498 current_row.append(color)
5499
5500 return width, height, pixels
efa97bdc
YCH
5501
5502
5503def write_xattr(path, key, value):
5504 # This mess below finds the best xattr tool for the job
5505 try:
5506 # try the pyxattr module...
5507 import xattr
5508
53a7e3d2
YCH
5509 if hasattr(xattr, 'set'): # pyxattr
5510 # Unicode arguments are not supported in python-pyxattr until
5511 # version 0.5.0
067aa17e 5512 # See https://github.com/ytdl-org/youtube-dl/issues/5498
53a7e3d2
YCH
5513 pyxattr_required_version = '0.5.0'
5514 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5515 # TODO: fallback to CLI tools
5516 raise XAttrUnavailableError(
5517 'python-pyxattr is detected but is too old. '
5518 'youtube-dl requires %s or above while your version is %s. '
5519 'Falling back to other xattr implementations' % (
5520 pyxattr_required_version, xattr.__version__))
5521
5522 setxattr = xattr.set
5523 else: # xattr
5524 setxattr = xattr.setxattr
efa97bdc
YCH
5525
5526 try:
53a7e3d2 5527 setxattr(path, key, value)
efa97bdc
YCH
5528 except EnvironmentError as e:
5529 raise XAttrMetadataError(e.errno, e.strerror)
5530
5531 except ImportError:
5532 if compat_os_name == 'nt':
5533 # Write xattrs to NTFS Alternate Data Streams:
5534 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5535 assert ':' not in key
5536 assert os.path.exists(path)
5537
5538 ads_fn = path + ':' + key
5539 try:
5540 with open(ads_fn, 'wb') as f:
5541 f.write(value)
5542 except EnvironmentError as e:
5543 raise XAttrMetadataError(e.errno, e.strerror)
5544 else:
5545 user_has_setfattr = check_executable('setfattr', ['--version'])
5546 user_has_xattr = check_executable('xattr', ['-h'])
5547
5548 if user_has_setfattr or user_has_xattr:
5549
5550 value = value.decode('utf-8')
5551 if user_has_setfattr:
5552 executable = 'setfattr'
5553 opts = ['-n', key, '-v', value]
5554 elif user_has_xattr:
5555 executable = 'xattr'
5556 opts = ['-w', key, value]
5557
3089bc74
S
5558 cmd = ([encodeFilename(executable, True)]
5559 + [encodeArgument(o) for o in opts]
5560 + [encodeFilename(path, True)])
efa97bdc
YCH
5561
5562 try:
5563 p = subprocess.Popen(
5564 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5565 except EnvironmentError as e:
5566 raise XAttrMetadataError(e.errno, e.strerror)
5567 stdout, stderr = p.communicate()
5568 stderr = stderr.decode('utf-8', 'replace')
5569 if p.returncode != 0:
5570 raise XAttrMetadataError(p.returncode, stderr)
5571
5572 else:
5573 # On Unix, and can't find pyxattr, setfattr, or xattr.
5574 if sys.platform.startswith('linux'):
5575 raise XAttrUnavailableError(
5576 "Couldn't find a tool to set the xattrs. "
5577 "Install either the python 'pyxattr' or 'xattr' "
5578 "modules, or the GNU 'attr' package "
5579 "(which contains the 'setfattr' tool).")
5580 else:
5581 raise XAttrUnavailableError(
5582 "Couldn't find a tool to set the xattrs. "
5583 "Install either the python 'xattr' module, "
5584 "or the 'xattr' binary.")
0c265486
YCH
5585
5586
5587def random_birthday(year_field, month_field, day_field):
aa374bc7
AS
5588 start_date = datetime.date(1950, 1, 1)
5589 end_date = datetime.date(1995, 12, 31)
5590 offset = random.randint(0, (end_date - start_date).days)
5591 random_date = start_date + datetime.timedelta(offset)
0c265486 5592 return {
aa374bc7
AS
5593 year_field: str(random_date.year),
5594 month_field: str(random_date.month),
5595 day_field: str(random_date.day),
0c265486 5596 }