]> jfr.im git - yt-dlp.git/blame - youtube_dl/utils.py
release 2019.11.28
[yt-dlp.git] / youtube_dl / utils.py
CommitLineData
d77c3dfd 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
d77c3dfd 3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
62e609ab 10import contextlib
e3946f98 11import ctypes
c496ca96
PH
12import datetime
13import email.utils
0c265486 14import email.header
f45c185f 15import errno
be4a824d 16import functools
d77c3dfd 17import gzip
03f9daab 18import io
79a2e94e 19import itertools
f4bfd65f 20import json
d77c3dfd 21import locale
02dbf93f 22import math
347de493 23import operator
d77c3dfd 24import os
c496ca96 25import platform
773f291d 26import random
d77c3dfd 27import re
c496ca96 28import socket
79a2e94e 29import ssl
1c088fa8 30import subprocess
d77c3dfd 31import sys
181c8655 32import tempfile
01951dda 33import traceback
bcf89ce6 34import xml.etree.ElementTree
d77c3dfd 35import zlib
d77c3dfd 36
8c25f81b 37from .compat import (
b4a3d461 38 compat_HTMLParseError,
8bb56eee 39 compat_HTMLParser,
8f9312c3 40 compat_basestring,
8c25f81b 41 compat_chr,
1bab3437 42 compat_cookiejar,
d7cd9a9e 43 compat_ctypes_WINFUNCTYPE,
36e6f62c 44 compat_etree_fromstring,
51098426 45 compat_expanduser,
8c25f81b 46 compat_html_entities,
55b2f099 47 compat_html_entities_html5,
be4a824d 48 compat_http_client,
c86b6142 49 compat_kwargs,
efa97bdc 50 compat_os_name,
8c25f81b 51 compat_parse_qs,
702ccf2d 52 compat_shlex_quote,
8c25f81b 53 compat_str,
edaa23f8 54 compat_struct_pack,
d3f8e038 55 compat_struct_unpack,
8c25f81b
PH
56 compat_urllib_error,
57 compat_urllib_parse,
15707c7e 58 compat_urllib_parse_urlencode,
8c25f81b 59 compat_urllib_parse_urlparse,
7581bfc9 60 compat_urllib_parse_unquote_plus,
8c25f81b
PH
61 compat_urllib_request,
62 compat_urlparse,
810c10ba 63 compat_xpath,
8c25f81b 64)
4644ac55 65
71aff188
YCH
66from .socks import (
67 ProxyType,
68 sockssocket,
69)
70
4644ac55 71
51fb4995
YCH
72def register_socks_protocols():
73 # "Register" SOCKS protocols
d5ae6bb5
YCH
74 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
75 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
51fb4995
YCH
76 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
77 if scheme not in compat_urlparse.uses_netloc:
78 compat_urlparse.uses_netloc.append(scheme)
79
80
468e2e92
FV
81# This is not clearly defined otherwise
82compiled_regex_type = type(re.compile(''))
83
f7a147e3
S
84
85def random_user_agent():
86 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
87 _CHROME_VERSIONS = (
88 '74.0.3729.129',
89 '76.0.3780.3',
90 '76.0.3780.2',
91 '74.0.3729.128',
92 '76.0.3780.1',
93 '76.0.3780.0',
94 '75.0.3770.15',
95 '74.0.3729.127',
96 '74.0.3729.126',
97 '76.0.3779.1',
98 '76.0.3779.0',
99 '75.0.3770.14',
100 '74.0.3729.125',
101 '76.0.3778.1',
102 '76.0.3778.0',
103 '75.0.3770.13',
104 '74.0.3729.124',
105 '74.0.3729.123',
106 '73.0.3683.121',
107 '76.0.3777.1',
108 '76.0.3777.0',
109 '75.0.3770.12',
110 '74.0.3729.122',
111 '76.0.3776.4',
112 '75.0.3770.11',
113 '74.0.3729.121',
114 '76.0.3776.3',
115 '76.0.3776.2',
116 '73.0.3683.120',
117 '74.0.3729.120',
118 '74.0.3729.119',
119 '74.0.3729.118',
120 '76.0.3776.1',
121 '76.0.3776.0',
122 '76.0.3775.5',
123 '75.0.3770.10',
124 '74.0.3729.117',
125 '76.0.3775.4',
126 '76.0.3775.3',
127 '74.0.3729.116',
128 '75.0.3770.9',
129 '76.0.3775.2',
130 '76.0.3775.1',
131 '76.0.3775.0',
132 '75.0.3770.8',
133 '74.0.3729.115',
134 '74.0.3729.114',
135 '76.0.3774.1',
136 '76.0.3774.0',
137 '75.0.3770.7',
138 '74.0.3729.113',
139 '74.0.3729.112',
140 '74.0.3729.111',
141 '76.0.3773.1',
142 '76.0.3773.0',
143 '75.0.3770.6',
144 '74.0.3729.110',
145 '74.0.3729.109',
146 '76.0.3772.1',
147 '76.0.3772.0',
148 '75.0.3770.5',
149 '74.0.3729.108',
150 '74.0.3729.107',
151 '76.0.3771.1',
152 '76.0.3771.0',
153 '75.0.3770.4',
154 '74.0.3729.106',
155 '74.0.3729.105',
156 '75.0.3770.3',
157 '74.0.3729.104',
158 '74.0.3729.103',
159 '74.0.3729.102',
160 '75.0.3770.2',
161 '74.0.3729.101',
162 '75.0.3770.1',
163 '75.0.3770.0',
164 '74.0.3729.100',
165 '75.0.3769.5',
166 '75.0.3769.4',
167 '74.0.3729.99',
168 '75.0.3769.3',
169 '75.0.3769.2',
170 '75.0.3768.6',
171 '74.0.3729.98',
172 '75.0.3769.1',
173 '75.0.3769.0',
174 '74.0.3729.97',
175 '73.0.3683.119',
176 '73.0.3683.118',
177 '74.0.3729.96',
178 '75.0.3768.5',
179 '75.0.3768.4',
180 '75.0.3768.3',
181 '75.0.3768.2',
182 '74.0.3729.95',
183 '74.0.3729.94',
184 '75.0.3768.1',
185 '75.0.3768.0',
186 '74.0.3729.93',
187 '74.0.3729.92',
188 '73.0.3683.117',
189 '74.0.3729.91',
190 '75.0.3766.3',
191 '74.0.3729.90',
192 '75.0.3767.2',
193 '75.0.3767.1',
194 '75.0.3767.0',
195 '74.0.3729.89',
196 '73.0.3683.116',
197 '75.0.3766.2',
198 '74.0.3729.88',
199 '75.0.3766.1',
200 '75.0.3766.0',
201 '74.0.3729.87',
202 '73.0.3683.115',
203 '74.0.3729.86',
204 '75.0.3765.1',
205 '75.0.3765.0',
206 '74.0.3729.85',
207 '73.0.3683.114',
208 '74.0.3729.84',
209 '75.0.3764.1',
210 '75.0.3764.0',
211 '74.0.3729.83',
212 '73.0.3683.113',
213 '75.0.3763.2',
214 '75.0.3761.4',
215 '74.0.3729.82',
216 '75.0.3763.1',
217 '75.0.3763.0',
218 '74.0.3729.81',
219 '73.0.3683.112',
220 '75.0.3762.1',
221 '75.0.3762.0',
222 '74.0.3729.80',
223 '75.0.3761.3',
224 '74.0.3729.79',
225 '73.0.3683.111',
226 '75.0.3761.2',
227 '74.0.3729.78',
228 '74.0.3729.77',
229 '75.0.3761.1',
230 '75.0.3761.0',
231 '73.0.3683.110',
232 '74.0.3729.76',
233 '74.0.3729.75',
234 '75.0.3760.0',
235 '74.0.3729.74',
236 '75.0.3759.8',
237 '75.0.3759.7',
238 '75.0.3759.6',
239 '74.0.3729.73',
240 '75.0.3759.5',
241 '74.0.3729.72',
242 '73.0.3683.109',
243 '75.0.3759.4',
244 '75.0.3759.3',
245 '74.0.3729.71',
246 '75.0.3759.2',
247 '74.0.3729.70',
248 '73.0.3683.108',
249 '74.0.3729.69',
250 '75.0.3759.1',
251 '75.0.3759.0',
252 '74.0.3729.68',
253 '73.0.3683.107',
254 '74.0.3729.67',
255 '75.0.3758.1',
256 '75.0.3758.0',
257 '74.0.3729.66',
258 '73.0.3683.106',
259 '74.0.3729.65',
260 '75.0.3757.1',
261 '75.0.3757.0',
262 '74.0.3729.64',
263 '73.0.3683.105',
264 '74.0.3729.63',
265 '75.0.3756.1',
266 '75.0.3756.0',
267 '74.0.3729.62',
268 '73.0.3683.104',
269 '75.0.3755.3',
270 '75.0.3755.2',
271 '73.0.3683.103',
272 '75.0.3755.1',
273 '75.0.3755.0',
274 '74.0.3729.61',
275 '73.0.3683.102',
276 '74.0.3729.60',
277 '75.0.3754.2',
278 '74.0.3729.59',
279 '75.0.3753.4',
280 '74.0.3729.58',
281 '75.0.3754.1',
282 '75.0.3754.0',
283 '74.0.3729.57',
284 '73.0.3683.101',
285 '75.0.3753.3',
286 '75.0.3752.2',
287 '75.0.3753.2',
288 '74.0.3729.56',
289 '75.0.3753.1',
290 '75.0.3753.0',
291 '74.0.3729.55',
292 '73.0.3683.100',
293 '74.0.3729.54',
294 '75.0.3752.1',
295 '75.0.3752.0',
296 '74.0.3729.53',
297 '73.0.3683.99',
298 '74.0.3729.52',
299 '75.0.3751.1',
300 '75.0.3751.0',
301 '74.0.3729.51',
302 '73.0.3683.98',
303 '74.0.3729.50',
304 '75.0.3750.0',
305 '74.0.3729.49',
306 '74.0.3729.48',
307 '74.0.3729.47',
308 '75.0.3749.3',
309 '74.0.3729.46',
310 '73.0.3683.97',
311 '75.0.3749.2',
312 '74.0.3729.45',
313 '75.0.3749.1',
314 '75.0.3749.0',
315 '74.0.3729.44',
316 '73.0.3683.96',
317 '74.0.3729.43',
318 '74.0.3729.42',
319 '75.0.3748.1',
320 '75.0.3748.0',
321 '74.0.3729.41',
322 '75.0.3747.1',
323 '73.0.3683.95',
324 '75.0.3746.4',
325 '74.0.3729.40',
326 '74.0.3729.39',
327 '75.0.3747.0',
328 '75.0.3746.3',
329 '75.0.3746.2',
330 '74.0.3729.38',
331 '75.0.3746.1',
332 '75.0.3746.0',
333 '74.0.3729.37',
334 '73.0.3683.94',
335 '75.0.3745.5',
336 '75.0.3745.4',
337 '75.0.3745.3',
338 '75.0.3745.2',
339 '74.0.3729.36',
340 '75.0.3745.1',
341 '75.0.3745.0',
342 '75.0.3744.2',
343 '74.0.3729.35',
344 '73.0.3683.93',
345 '74.0.3729.34',
346 '75.0.3744.1',
347 '75.0.3744.0',
348 '74.0.3729.33',
349 '73.0.3683.92',
350 '74.0.3729.32',
351 '74.0.3729.31',
352 '73.0.3683.91',
353 '75.0.3741.2',
354 '75.0.3740.5',
355 '74.0.3729.30',
356 '75.0.3741.1',
357 '75.0.3741.0',
358 '74.0.3729.29',
359 '75.0.3740.4',
360 '73.0.3683.90',
361 '74.0.3729.28',
362 '75.0.3740.3',
363 '73.0.3683.89',
364 '75.0.3740.2',
365 '74.0.3729.27',
366 '75.0.3740.1',
367 '75.0.3740.0',
368 '74.0.3729.26',
369 '73.0.3683.88',
370 '73.0.3683.87',
371 '74.0.3729.25',
372 '75.0.3739.1',
373 '75.0.3739.0',
374 '73.0.3683.86',
375 '74.0.3729.24',
376 '73.0.3683.85',
377 '75.0.3738.4',
378 '75.0.3738.3',
379 '75.0.3738.2',
380 '75.0.3738.1',
381 '75.0.3738.0',
382 '74.0.3729.23',
383 '73.0.3683.84',
384 '74.0.3729.22',
385 '74.0.3729.21',
386 '75.0.3737.1',
387 '75.0.3737.0',
388 '74.0.3729.20',
389 '73.0.3683.83',
390 '74.0.3729.19',
391 '75.0.3736.1',
392 '75.0.3736.0',
393 '74.0.3729.18',
394 '73.0.3683.82',
395 '74.0.3729.17',
396 '75.0.3735.1',
397 '75.0.3735.0',
398 '74.0.3729.16',
399 '73.0.3683.81',
400 '75.0.3734.1',
401 '75.0.3734.0',
402 '74.0.3729.15',
403 '73.0.3683.80',
404 '74.0.3729.14',
405 '75.0.3733.1',
406 '75.0.3733.0',
407 '75.0.3732.1',
408 '74.0.3729.13',
409 '74.0.3729.12',
410 '73.0.3683.79',
411 '74.0.3729.11',
412 '75.0.3732.0',
413 '74.0.3729.10',
414 '73.0.3683.78',
415 '74.0.3729.9',
416 '74.0.3729.8',
417 '74.0.3729.7',
418 '75.0.3731.3',
419 '75.0.3731.2',
420 '75.0.3731.0',
421 '74.0.3729.6',
422 '73.0.3683.77',
423 '73.0.3683.76',
424 '75.0.3730.5',
425 '75.0.3730.4',
426 '73.0.3683.75',
427 '74.0.3729.5',
428 '73.0.3683.74',
429 '75.0.3730.3',
430 '75.0.3730.2',
431 '74.0.3729.4',
432 '73.0.3683.73',
433 '73.0.3683.72',
434 '75.0.3730.1',
435 '75.0.3730.0',
436 '74.0.3729.3',
437 '73.0.3683.71',
438 '74.0.3729.2',
439 '73.0.3683.70',
440 '74.0.3729.1',
441 '74.0.3729.0',
442 '74.0.3726.4',
443 '73.0.3683.69',
444 '74.0.3726.3',
445 '74.0.3728.0',
446 '74.0.3726.2',
447 '73.0.3683.68',
448 '74.0.3726.1',
449 '74.0.3726.0',
450 '74.0.3725.4',
451 '73.0.3683.67',
452 '73.0.3683.66',
453 '74.0.3725.3',
454 '74.0.3725.2',
455 '74.0.3725.1',
456 '74.0.3724.8',
457 '74.0.3725.0',
458 '73.0.3683.65',
459 '74.0.3724.7',
460 '74.0.3724.6',
461 '74.0.3724.5',
462 '74.0.3724.4',
463 '74.0.3724.3',
464 '74.0.3724.2',
465 '74.0.3724.1',
466 '74.0.3724.0',
467 '73.0.3683.64',
468 '74.0.3723.1',
469 '74.0.3723.0',
470 '73.0.3683.63',
471 '74.0.3722.1',
472 '74.0.3722.0',
473 '73.0.3683.62',
474 '74.0.3718.9',
475 '74.0.3702.3',
476 '74.0.3721.3',
477 '74.0.3721.2',
478 '74.0.3721.1',
479 '74.0.3721.0',
480 '74.0.3720.6',
481 '73.0.3683.61',
482 '72.0.3626.122',
483 '73.0.3683.60',
484 '74.0.3720.5',
485 '72.0.3626.121',
486 '74.0.3718.8',
487 '74.0.3720.4',
488 '74.0.3720.3',
489 '74.0.3718.7',
490 '74.0.3720.2',
491 '74.0.3720.1',
492 '74.0.3720.0',
493 '74.0.3718.6',
494 '74.0.3719.5',
495 '73.0.3683.59',
496 '74.0.3718.5',
497 '74.0.3718.4',
498 '74.0.3719.4',
499 '74.0.3719.3',
500 '74.0.3719.2',
501 '74.0.3719.1',
502 '73.0.3683.58',
503 '74.0.3719.0',
504 '73.0.3683.57',
505 '73.0.3683.56',
506 '74.0.3718.3',
507 '73.0.3683.55',
508 '74.0.3718.2',
509 '74.0.3718.1',
510 '74.0.3718.0',
511 '73.0.3683.54',
512 '74.0.3717.2',
513 '73.0.3683.53',
514 '74.0.3717.1',
515 '74.0.3717.0',
516 '73.0.3683.52',
517 '74.0.3716.1',
518 '74.0.3716.0',
519 '73.0.3683.51',
520 '74.0.3715.1',
521 '74.0.3715.0',
522 '73.0.3683.50',
523 '74.0.3711.2',
524 '74.0.3714.2',
525 '74.0.3713.3',
526 '74.0.3714.1',
527 '74.0.3714.0',
528 '73.0.3683.49',
529 '74.0.3713.1',
530 '74.0.3713.0',
531 '72.0.3626.120',
532 '73.0.3683.48',
533 '74.0.3712.2',
534 '74.0.3712.1',
535 '74.0.3712.0',
536 '73.0.3683.47',
537 '72.0.3626.119',
538 '73.0.3683.46',
539 '74.0.3710.2',
540 '72.0.3626.118',
541 '74.0.3711.1',
542 '74.0.3711.0',
543 '73.0.3683.45',
544 '72.0.3626.117',
545 '74.0.3710.1',
546 '74.0.3710.0',
547 '73.0.3683.44',
548 '72.0.3626.116',
549 '74.0.3709.1',
550 '74.0.3709.0',
551 '74.0.3704.9',
552 '73.0.3683.43',
553 '72.0.3626.115',
554 '74.0.3704.8',
555 '74.0.3704.7',
556 '74.0.3708.0',
557 '74.0.3706.7',
558 '74.0.3704.6',
559 '73.0.3683.42',
560 '72.0.3626.114',
561 '74.0.3706.6',
562 '72.0.3626.113',
563 '74.0.3704.5',
564 '74.0.3706.5',
565 '74.0.3706.4',
566 '74.0.3706.3',
567 '74.0.3706.2',
568 '74.0.3706.1',
569 '74.0.3706.0',
570 '73.0.3683.41',
571 '72.0.3626.112',
572 '74.0.3705.1',
573 '74.0.3705.0',
574 '73.0.3683.40',
575 '72.0.3626.111',
576 '73.0.3683.39',
577 '74.0.3704.4',
578 '73.0.3683.38',
579 '74.0.3704.3',
580 '74.0.3704.2',
581 '74.0.3704.1',
582 '74.0.3704.0',
583 '73.0.3683.37',
584 '72.0.3626.110',
585 '72.0.3626.109',
586 '74.0.3703.3',
587 '74.0.3703.2',
588 '73.0.3683.36',
589 '74.0.3703.1',
590 '74.0.3703.0',
591 '73.0.3683.35',
592 '72.0.3626.108',
593 '74.0.3702.2',
594 '74.0.3699.3',
595 '74.0.3702.1',
596 '74.0.3702.0',
597 '73.0.3683.34',
598 '72.0.3626.107',
599 '73.0.3683.33',
600 '74.0.3701.1',
601 '74.0.3701.0',
602 '73.0.3683.32',
603 '73.0.3683.31',
604 '72.0.3626.105',
605 '74.0.3700.1',
606 '74.0.3700.0',
607 '73.0.3683.29',
608 '72.0.3626.103',
609 '74.0.3699.2',
610 '74.0.3699.1',
611 '74.0.3699.0',
612 '73.0.3683.28',
613 '72.0.3626.102',
614 '73.0.3683.27',
615 '73.0.3683.26',
616 '74.0.3698.0',
617 '74.0.3696.2',
618 '72.0.3626.101',
619 '73.0.3683.25',
620 '74.0.3696.1',
621 '74.0.3696.0',
622 '74.0.3694.8',
623 '72.0.3626.100',
624 '74.0.3694.7',
625 '74.0.3694.6',
626 '74.0.3694.5',
627 '74.0.3694.4',
628 '72.0.3626.99',
629 '72.0.3626.98',
630 '74.0.3694.3',
631 '73.0.3683.24',
632 '72.0.3626.97',
633 '72.0.3626.96',
634 '72.0.3626.95',
635 '73.0.3683.23',
636 '72.0.3626.94',
637 '73.0.3683.22',
638 '73.0.3683.21',
639 '72.0.3626.93',
640 '74.0.3694.2',
641 '72.0.3626.92',
642 '74.0.3694.1',
643 '74.0.3694.0',
644 '74.0.3693.6',
645 '73.0.3683.20',
646 '72.0.3626.91',
647 '74.0.3693.5',
648 '74.0.3693.4',
649 '74.0.3693.3',
650 '74.0.3693.2',
651 '73.0.3683.19',
652 '74.0.3693.1',
653 '74.0.3693.0',
654 '73.0.3683.18',
655 '72.0.3626.90',
656 '74.0.3692.1',
657 '74.0.3692.0',
658 '73.0.3683.17',
659 '72.0.3626.89',
660 '74.0.3687.3',
661 '74.0.3691.1',
662 '74.0.3691.0',
663 '73.0.3683.16',
664 '72.0.3626.88',
665 '72.0.3626.87',
666 '73.0.3683.15',
667 '74.0.3690.1',
668 '74.0.3690.0',
669 '73.0.3683.14',
670 '72.0.3626.86',
671 '73.0.3683.13',
672 '73.0.3683.12',
673 '74.0.3689.1',
674 '74.0.3689.0',
675 '73.0.3683.11',
676 '72.0.3626.85',
677 '73.0.3683.10',
678 '72.0.3626.84',
679 '73.0.3683.9',
680 '74.0.3688.1',
681 '74.0.3688.0',
682 '73.0.3683.8',
683 '72.0.3626.83',
684 '74.0.3687.2',
685 '74.0.3687.1',
686 '74.0.3687.0',
687 '73.0.3683.7',
688 '72.0.3626.82',
689 '74.0.3686.4',
690 '72.0.3626.81',
691 '74.0.3686.3',
692 '74.0.3686.2',
693 '74.0.3686.1',
694 '74.0.3686.0',
695 '73.0.3683.6',
696 '72.0.3626.80',
697 '74.0.3685.1',
698 '74.0.3685.0',
699 '73.0.3683.5',
700 '72.0.3626.79',
701 '74.0.3684.1',
702 '74.0.3684.0',
703 '73.0.3683.4',
704 '72.0.3626.78',
705 '72.0.3626.77',
706 '73.0.3683.3',
707 '73.0.3683.2',
708 '72.0.3626.76',
709 '73.0.3683.1',
710 '73.0.3683.0',
711 '72.0.3626.75',
712 '71.0.3578.141',
713 '73.0.3682.1',
714 '73.0.3682.0',
715 '72.0.3626.74',
716 '71.0.3578.140',
717 '73.0.3681.4',
718 '73.0.3681.3',
719 '73.0.3681.2',
720 '73.0.3681.1',
721 '73.0.3681.0',
722 '72.0.3626.73',
723 '71.0.3578.139',
724 '72.0.3626.72',
725 '72.0.3626.71',
726 '73.0.3680.1',
727 '73.0.3680.0',
728 '72.0.3626.70',
729 '71.0.3578.138',
730 '73.0.3678.2',
731 '73.0.3679.1',
732 '73.0.3679.0',
733 '72.0.3626.69',
734 '71.0.3578.137',
735 '73.0.3678.1',
736 '73.0.3678.0',
737 '71.0.3578.136',
738 '73.0.3677.1',
739 '73.0.3677.0',
740 '72.0.3626.68',
741 '72.0.3626.67',
742 '71.0.3578.135',
743 '73.0.3676.1',
744 '73.0.3676.0',
745 '73.0.3674.2',
746 '72.0.3626.66',
747 '71.0.3578.134',
748 '73.0.3674.1',
749 '73.0.3674.0',
750 '72.0.3626.65',
751 '71.0.3578.133',
752 '73.0.3673.2',
753 '73.0.3673.1',
754 '73.0.3673.0',
755 '72.0.3626.64',
756 '71.0.3578.132',
757 '72.0.3626.63',
758 '72.0.3626.62',
759 '72.0.3626.61',
760 '72.0.3626.60',
761 '73.0.3672.1',
762 '73.0.3672.0',
763 '72.0.3626.59',
764 '71.0.3578.131',
765 '73.0.3671.3',
766 '73.0.3671.2',
767 '73.0.3671.1',
768 '73.0.3671.0',
769 '72.0.3626.58',
770 '71.0.3578.130',
771 '73.0.3670.1',
772 '73.0.3670.0',
773 '72.0.3626.57',
774 '71.0.3578.129',
775 '73.0.3669.1',
776 '73.0.3669.0',
777 '72.0.3626.56',
778 '71.0.3578.128',
779 '73.0.3668.2',
780 '73.0.3668.1',
781 '73.0.3668.0',
782 '72.0.3626.55',
783 '71.0.3578.127',
784 '73.0.3667.2',
785 '73.0.3667.1',
786 '73.0.3667.0',
787 '72.0.3626.54',
788 '71.0.3578.126',
789 '73.0.3666.1',
790 '73.0.3666.0',
791 '72.0.3626.53',
792 '71.0.3578.125',
793 '73.0.3665.4',
794 '73.0.3665.3',
795 '72.0.3626.52',
796 '73.0.3665.2',
797 '73.0.3664.4',
798 '73.0.3665.1',
799 '73.0.3665.0',
800 '72.0.3626.51',
801 '71.0.3578.124',
802 '72.0.3626.50',
803 '73.0.3664.3',
804 '73.0.3664.2',
805 '73.0.3664.1',
806 '73.0.3664.0',
807 '73.0.3663.2',
808 '72.0.3626.49',
809 '71.0.3578.123',
810 '73.0.3663.1',
811 '73.0.3663.0',
812 '72.0.3626.48',
813 '71.0.3578.122',
814 '73.0.3662.1',
815 '73.0.3662.0',
816 '72.0.3626.47',
817 '71.0.3578.121',
818 '73.0.3661.1',
819 '72.0.3626.46',
820 '73.0.3661.0',
821 '72.0.3626.45',
822 '71.0.3578.120',
823 '73.0.3660.2',
824 '73.0.3660.1',
825 '73.0.3660.0',
826 '72.0.3626.44',
827 '71.0.3578.119',
828 '73.0.3659.1',
829 '73.0.3659.0',
830 '72.0.3626.43',
831 '71.0.3578.118',
832 '73.0.3658.1',
833 '73.0.3658.0',
834 '72.0.3626.42',
835 '71.0.3578.117',
836 '73.0.3657.1',
837 '73.0.3657.0',
838 '72.0.3626.41',
839 '71.0.3578.116',
840 '73.0.3656.1',
841 '73.0.3656.0',
842 '72.0.3626.40',
843 '71.0.3578.115',
844 '73.0.3655.1',
845 '73.0.3655.0',
846 '72.0.3626.39',
847 '71.0.3578.114',
848 '73.0.3654.1',
849 '73.0.3654.0',
850 '72.0.3626.38',
851 '71.0.3578.113',
852 '73.0.3653.1',
853 '73.0.3653.0',
854 '72.0.3626.37',
855 '71.0.3578.112',
856 '73.0.3652.1',
857 '73.0.3652.0',
858 '72.0.3626.36',
859 '71.0.3578.111',
860 '73.0.3651.1',
861 '73.0.3651.0',
862 '72.0.3626.35',
863 '71.0.3578.110',
864 '73.0.3650.1',
865 '73.0.3650.0',
866 '72.0.3626.34',
867 '71.0.3578.109',
868 '73.0.3649.1',
869 '73.0.3649.0',
870 '72.0.3626.33',
871 '71.0.3578.108',
872 '73.0.3648.2',
873 '73.0.3648.1',
874 '73.0.3648.0',
875 '72.0.3626.32',
876 '71.0.3578.107',
877 '73.0.3647.2',
878 '73.0.3647.1',
879 '73.0.3647.0',
880 '72.0.3626.31',
881 '71.0.3578.106',
882 '73.0.3635.3',
883 '73.0.3646.2',
884 '73.0.3646.1',
885 '73.0.3646.0',
886 '72.0.3626.30',
887 '71.0.3578.105',
888 '72.0.3626.29',
889 '73.0.3645.2',
890 '73.0.3645.1',
891 '73.0.3645.0',
892 '72.0.3626.28',
893 '71.0.3578.104',
894 '72.0.3626.27',
895 '72.0.3626.26',
896 '72.0.3626.25',
897 '72.0.3626.24',
898 '73.0.3644.0',
899 '73.0.3643.2',
900 '72.0.3626.23',
901 '71.0.3578.103',
902 '73.0.3643.1',
903 '73.0.3643.0',
904 '72.0.3626.22',
905 '71.0.3578.102',
906 '73.0.3642.1',
907 '73.0.3642.0',
908 '72.0.3626.21',
909 '71.0.3578.101',
910 '73.0.3641.1',
911 '73.0.3641.0',
912 '72.0.3626.20',
913 '71.0.3578.100',
914 '72.0.3626.19',
915 '73.0.3640.1',
916 '73.0.3640.0',
917 '72.0.3626.18',
918 '73.0.3639.1',
919 '71.0.3578.99',
920 '73.0.3639.0',
921 '72.0.3626.17',
922 '73.0.3638.2',
923 '72.0.3626.16',
924 '73.0.3638.1',
925 '73.0.3638.0',
926 '72.0.3626.15',
927 '71.0.3578.98',
928 '73.0.3635.2',
929 '71.0.3578.97',
930 '73.0.3637.1',
931 '73.0.3637.0',
932 '72.0.3626.14',
933 '71.0.3578.96',
934 '71.0.3578.95',
935 '72.0.3626.13',
936 '71.0.3578.94',
937 '73.0.3636.2',
938 '71.0.3578.93',
939 '73.0.3636.1',
940 '73.0.3636.0',
941 '72.0.3626.12',
942 '71.0.3578.92',
943 '73.0.3635.1',
944 '73.0.3635.0',
945 '72.0.3626.11',
946 '71.0.3578.91',
947 '73.0.3634.2',
948 '73.0.3634.1',
949 '73.0.3634.0',
950 '72.0.3626.10',
951 '71.0.3578.90',
952 '71.0.3578.89',
953 '73.0.3633.2',
954 '73.0.3633.1',
955 '73.0.3633.0',
956 '72.0.3610.4',
957 '72.0.3626.9',
958 '71.0.3578.88',
959 '73.0.3632.5',
960 '73.0.3632.4',
961 '73.0.3632.3',
962 '73.0.3632.2',
963 '73.0.3632.1',
964 '73.0.3632.0',
965 '72.0.3626.8',
966 '71.0.3578.87',
967 '73.0.3631.2',
968 '73.0.3631.1',
969 '73.0.3631.0',
970 '72.0.3626.7',
971 '71.0.3578.86',
972 '72.0.3626.6',
973 '73.0.3630.1',
974 '73.0.3630.0',
975 '72.0.3626.5',
976 '71.0.3578.85',
977 '72.0.3626.4',
978 '73.0.3628.3',
979 '73.0.3628.2',
980 '73.0.3629.1',
981 '73.0.3629.0',
982 '72.0.3626.3',
983 '71.0.3578.84',
984 '73.0.3628.1',
985 '73.0.3628.0',
986 '71.0.3578.83',
987 '73.0.3627.1',
988 '73.0.3627.0',
989 '72.0.3626.2',
990 '71.0.3578.82',
991 '71.0.3578.81',
992 '71.0.3578.80',
993 '72.0.3626.1',
994 '72.0.3626.0',
995 '71.0.3578.79',
996 '70.0.3538.124',
997 '71.0.3578.78',
998 '72.0.3623.4',
999 '72.0.3625.2',
1000 '72.0.3625.1',
1001 '72.0.3625.0',
1002 '71.0.3578.77',
1003 '70.0.3538.123',
1004 '72.0.3624.4',
1005 '72.0.3624.3',
1006 '72.0.3624.2',
1007 '71.0.3578.76',
1008 '72.0.3624.1',
1009 '72.0.3624.0',
1010 '72.0.3623.3',
1011 '71.0.3578.75',
1012 '70.0.3538.122',
1013 '71.0.3578.74',
1014 '72.0.3623.2',
1015 '72.0.3610.3',
1016 '72.0.3623.1',
1017 '72.0.3623.0',
1018 '72.0.3622.3',
1019 '72.0.3622.2',
1020 '71.0.3578.73',
1021 '70.0.3538.121',
1022 '72.0.3622.1',
1023 '72.0.3622.0',
1024 '71.0.3578.72',
1025 '70.0.3538.120',
1026 '72.0.3621.1',
1027 '72.0.3621.0',
1028 '71.0.3578.71',
1029 '70.0.3538.119',
1030 '72.0.3620.1',
1031 '72.0.3620.0',
1032 '71.0.3578.70',
1033 '70.0.3538.118',
1034 '71.0.3578.69',
1035 '72.0.3619.1',
1036 '72.0.3619.0',
1037 '71.0.3578.68',
1038 '70.0.3538.117',
1039 '71.0.3578.67',
1040 '72.0.3618.1',
1041 '72.0.3618.0',
1042 '71.0.3578.66',
1043 '70.0.3538.116',
1044 '72.0.3617.1',
1045 '72.0.3617.0',
1046 '71.0.3578.65',
1047 '70.0.3538.115',
1048 '72.0.3602.3',
1049 '71.0.3578.64',
1050 '72.0.3616.1',
1051 '72.0.3616.0',
1052 '71.0.3578.63',
1053 '70.0.3538.114',
1054 '71.0.3578.62',
1055 '72.0.3615.1',
1056 '72.0.3615.0',
1057 '71.0.3578.61',
1058 '70.0.3538.113',
1059 '72.0.3614.1',
1060 '72.0.3614.0',
1061 '71.0.3578.60',
1062 '70.0.3538.112',
1063 '72.0.3613.1',
1064 '72.0.3613.0',
1065 '71.0.3578.59',
1066 '70.0.3538.111',
1067 '72.0.3612.2',
1068 '72.0.3612.1',
1069 '72.0.3612.0',
1070 '70.0.3538.110',
1071 '71.0.3578.58',
1072 '70.0.3538.109',
1073 '72.0.3611.2',
1074 '72.0.3611.1',
1075 '72.0.3611.0',
1076 '71.0.3578.57',
1077 '70.0.3538.108',
1078 '72.0.3610.2',
1079 '71.0.3578.56',
1080 '71.0.3578.55',
1081 '72.0.3610.1',
1082 '72.0.3610.0',
1083 '71.0.3578.54',
1084 '70.0.3538.107',
1085 '71.0.3578.53',
1086 '72.0.3609.3',
1087 '71.0.3578.52',
1088 '72.0.3609.2',
1089 '71.0.3578.51',
1090 '72.0.3608.5',
1091 '72.0.3609.1',
1092 '72.0.3609.0',
1093 '71.0.3578.50',
1094 '70.0.3538.106',
1095 '72.0.3608.4',
1096 '72.0.3608.3',
1097 '72.0.3608.2',
1098 '71.0.3578.49',
1099 '72.0.3608.1',
1100 '72.0.3608.0',
1101 '70.0.3538.105',
1102 '71.0.3578.48',
1103 '72.0.3607.1',
1104 '72.0.3607.0',
1105 '71.0.3578.47',
1106 '70.0.3538.104',
1107 '72.0.3606.2',
1108 '72.0.3606.1',
1109 '72.0.3606.0',
1110 '71.0.3578.46',
1111 '70.0.3538.103',
1112 '70.0.3538.102',
1113 '72.0.3605.3',
1114 '72.0.3605.2',
1115 '72.0.3605.1',
1116 '72.0.3605.0',
1117 '71.0.3578.45',
1118 '70.0.3538.101',
1119 '71.0.3578.44',
1120 '71.0.3578.43',
1121 '70.0.3538.100',
1122 '70.0.3538.99',
1123 '71.0.3578.42',
1124 '72.0.3604.1',
1125 '72.0.3604.0',
1126 '71.0.3578.41',
1127 '70.0.3538.98',
1128 '71.0.3578.40',
1129 '72.0.3603.2',
1130 '72.0.3603.1',
1131 '72.0.3603.0',
1132 '71.0.3578.39',
1133 '70.0.3538.97',
1134 '72.0.3602.2',
1135 '71.0.3578.38',
1136 '71.0.3578.37',
1137 '72.0.3602.1',
1138 '72.0.3602.0',
1139 '71.0.3578.36',
1140 '70.0.3538.96',
1141 '72.0.3601.1',
1142 '72.0.3601.0',
1143 '71.0.3578.35',
1144 '70.0.3538.95',
1145 '72.0.3600.1',
1146 '72.0.3600.0',
1147 '71.0.3578.34',
1148 '70.0.3538.94',
1149 '72.0.3599.3',
1150 '72.0.3599.2',
1151 '72.0.3599.1',
1152 '72.0.3599.0',
1153 '71.0.3578.33',
1154 '70.0.3538.93',
1155 '72.0.3598.1',
1156 '72.0.3598.0',
1157 '71.0.3578.32',
1158 '70.0.3538.87',
1159 '72.0.3597.1',
1160 '72.0.3597.0',
1161 '72.0.3596.2',
1162 '71.0.3578.31',
1163 '70.0.3538.86',
1164 '71.0.3578.30',
1165 '71.0.3578.29',
1166 '72.0.3596.1',
1167 '72.0.3596.0',
1168 '71.0.3578.28',
1169 '70.0.3538.85',
1170 '72.0.3595.2',
1171 '72.0.3591.3',
1172 '72.0.3595.1',
1173 '72.0.3595.0',
1174 '71.0.3578.27',
1175 '70.0.3538.84',
1176 '72.0.3594.1',
1177 '72.0.3594.0',
1178 '71.0.3578.26',
1179 '70.0.3538.83',
1180 '72.0.3593.2',
1181 '72.0.3593.1',
1182 '72.0.3593.0',
1183 '71.0.3578.25',
1184 '70.0.3538.82',
1185 '72.0.3589.3',
1186 '72.0.3592.2',
1187 '72.0.3592.1',
1188 '72.0.3592.0',
1189 '71.0.3578.24',
1190 '72.0.3589.2',
1191 '70.0.3538.81',
1192 '70.0.3538.80',
1193 '72.0.3591.2',
1194 '72.0.3591.1',
1195 '72.0.3591.0',
1196 '71.0.3578.23',
1197 '70.0.3538.79',
1198 '71.0.3578.22',
1199 '72.0.3590.1',
1200 '72.0.3590.0',
1201 '71.0.3578.21',
1202 '70.0.3538.78',
1203 '70.0.3538.77',
1204 '72.0.3589.1',
1205 '72.0.3589.0',
1206 '71.0.3578.20',
1207 '70.0.3538.76',
1208 '71.0.3578.19',
1209 '70.0.3538.75',
1210 '72.0.3588.1',
1211 '72.0.3588.0',
1212 '71.0.3578.18',
1213 '70.0.3538.74',
1214 '72.0.3586.2',
1215 '72.0.3587.0',
1216 '71.0.3578.17',
1217 '70.0.3538.73',
1218 '72.0.3586.1',
1219 '72.0.3586.0',
1220 '71.0.3578.16',
1221 '70.0.3538.72',
1222 '72.0.3585.1',
1223 '72.0.3585.0',
1224 '71.0.3578.15',
1225 '70.0.3538.71',
1226 '71.0.3578.14',
1227 '72.0.3584.1',
1228 '72.0.3584.0',
1229 '71.0.3578.13',
1230 '70.0.3538.70',
1231 '72.0.3583.2',
1232 '71.0.3578.12',
1233 '72.0.3583.1',
1234 '72.0.3583.0',
1235 '71.0.3578.11',
1236 '70.0.3538.69',
1237 '71.0.3578.10',
1238 '72.0.3582.0',
1239 '72.0.3581.4',
1240 '71.0.3578.9',
1241 '70.0.3538.67',
1242 '72.0.3581.3',
1243 '72.0.3581.2',
1244 '72.0.3581.1',
1245 '72.0.3581.0',
1246 '71.0.3578.8',
1247 '70.0.3538.66',
1248 '72.0.3580.1',
1249 '72.0.3580.0',
1250 '71.0.3578.7',
1251 '70.0.3538.65',
1252 '71.0.3578.6',
1253 '72.0.3579.1',
1254 '72.0.3579.0',
1255 '71.0.3578.5',
1256 '70.0.3538.64',
1257 '71.0.3578.4',
1258 '71.0.3578.3',
1259 '71.0.3578.2',
1260 '71.0.3578.1',
1261 '71.0.3578.0',
1262 '70.0.3538.63',
1263 '69.0.3497.128',
1264 '70.0.3538.62',
1265 '70.0.3538.61',
1266 '70.0.3538.60',
1267 '70.0.3538.59',
1268 '71.0.3577.1',
1269 '71.0.3577.0',
1270 '70.0.3538.58',
1271 '69.0.3497.127',
1272 '71.0.3576.2',
1273 '71.0.3576.1',
1274 '71.0.3576.0',
1275 '70.0.3538.57',
1276 '70.0.3538.56',
1277 '71.0.3575.2',
1278 '70.0.3538.55',
1279 '69.0.3497.126',
1280 '70.0.3538.54',
1281 '71.0.3575.1',
1282 '71.0.3575.0',
1283 '71.0.3574.1',
1284 '71.0.3574.0',
1285 '70.0.3538.53',
1286 '69.0.3497.125',
1287 '70.0.3538.52',
1288 '71.0.3573.1',
1289 '71.0.3573.0',
1290 '70.0.3538.51',
1291 '69.0.3497.124',
1292 '71.0.3572.1',
1293 '71.0.3572.0',
1294 '70.0.3538.50',
1295 '69.0.3497.123',
1296 '71.0.3571.2',
1297 '70.0.3538.49',
1298 '69.0.3497.122',
1299 '71.0.3571.1',
1300 '71.0.3571.0',
1301 '70.0.3538.48',
1302 '69.0.3497.121',
1303 '71.0.3570.1',
1304 '71.0.3570.0',
1305 '70.0.3538.47',
1306 '69.0.3497.120',
1307 '71.0.3568.2',
1308 '71.0.3569.1',
1309 '71.0.3569.0',
1310 '70.0.3538.46',
1311 '69.0.3497.119',
1312 '70.0.3538.45',
1313 '71.0.3568.1',
1314 '71.0.3568.0',
1315 '70.0.3538.44',
1316 '69.0.3497.118',
1317 '70.0.3538.43',
1318 '70.0.3538.42',
1319 '71.0.3567.1',
1320 '71.0.3567.0',
1321 '70.0.3538.41',
1322 '69.0.3497.117',
1323 '71.0.3566.1',
1324 '71.0.3566.0',
1325 '70.0.3538.40',
1326 '69.0.3497.116',
1327 '71.0.3565.1',
1328 '71.0.3565.0',
1329 '70.0.3538.39',
1330 '69.0.3497.115',
1331 '71.0.3564.1',
1332 '71.0.3564.0',
1333 '70.0.3538.38',
1334 '69.0.3497.114',
1335 '71.0.3563.0',
1336 '71.0.3562.2',
1337 '70.0.3538.37',
1338 '69.0.3497.113',
1339 '70.0.3538.36',
1340 '70.0.3538.35',
1341 '71.0.3562.1',
1342 '71.0.3562.0',
1343 '70.0.3538.34',
1344 '69.0.3497.112',
1345 '70.0.3538.33',
1346 '71.0.3561.1',
1347 '71.0.3561.0',
1348 '70.0.3538.32',
1349 '69.0.3497.111',
1350 '71.0.3559.6',
1351 '71.0.3560.1',
1352 '71.0.3560.0',
1353 '71.0.3559.5',
1354 '71.0.3559.4',
1355 '70.0.3538.31',
1356 '69.0.3497.110',
1357 '71.0.3559.3',
1358 '70.0.3538.30',
1359 '69.0.3497.109',
1360 '71.0.3559.2',
1361 '71.0.3559.1',
1362 '71.0.3559.0',
1363 '70.0.3538.29',
1364 '69.0.3497.108',
1365 '71.0.3558.2',
1366 '71.0.3558.1',
1367 '71.0.3558.0',
1368 '70.0.3538.28',
1369 '69.0.3497.107',
1370 '71.0.3557.2',
1371 '71.0.3557.1',
1372 '71.0.3557.0',
1373 '70.0.3538.27',
1374 '69.0.3497.106',
1375 '71.0.3554.4',
1376 '70.0.3538.26',
1377 '71.0.3556.1',
1378 '71.0.3556.0',
1379 '70.0.3538.25',
1380 '71.0.3554.3',
1381 '69.0.3497.105',
1382 '71.0.3554.2',
1383 '70.0.3538.24',
1384 '69.0.3497.104',
1385 '71.0.3555.2',
1386 '70.0.3538.23',
1387 '71.0.3555.1',
1388 '71.0.3555.0',
1389 '70.0.3538.22',
1390 '69.0.3497.103',
1391 '71.0.3554.1',
1392 '71.0.3554.0',
1393 '70.0.3538.21',
1394 '69.0.3497.102',
1395 '71.0.3553.3',
1396 '70.0.3538.20',
1397 '69.0.3497.101',
1398 '71.0.3553.2',
1399 '69.0.3497.100',
1400 '71.0.3553.1',
1401 '71.0.3553.0',
1402 '70.0.3538.19',
1403 '69.0.3497.99',
1404 '69.0.3497.98',
1405 '69.0.3497.97',
1406 '71.0.3552.6',
1407 '71.0.3552.5',
1408 '71.0.3552.4',
1409 '71.0.3552.3',
1410 '71.0.3552.2',
1411 '71.0.3552.1',
1412 '71.0.3552.0',
1413 '70.0.3538.18',
1414 '69.0.3497.96',
1415 '71.0.3551.3',
1416 '71.0.3551.2',
1417 '71.0.3551.1',
1418 '71.0.3551.0',
1419 '70.0.3538.17',
1420 '69.0.3497.95',
1421 '71.0.3550.3',
1422 '71.0.3550.2',
1423 '71.0.3550.1',
1424 '71.0.3550.0',
1425 '70.0.3538.16',
1426 '69.0.3497.94',
1427 '71.0.3549.1',
1428 '71.0.3549.0',
1429 '70.0.3538.15',
1430 '69.0.3497.93',
1431 '69.0.3497.92',
1432 '71.0.3548.1',
1433 '71.0.3548.0',
1434 '70.0.3538.14',
1435 '69.0.3497.91',
1436 '71.0.3547.1',
1437 '71.0.3547.0',
1438 '70.0.3538.13',
1439 '69.0.3497.90',
1440 '71.0.3546.2',
1441 '69.0.3497.89',
1442 '71.0.3546.1',
1443 '71.0.3546.0',
1444 '70.0.3538.12',
1445 '69.0.3497.88',
1446 '71.0.3545.4',
1447 '71.0.3545.3',
1448 '71.0.3545.2',
1449 '71.0.3545.1',
1450 '71.0.3545.0',
1451 '70.0.3538.11',
1452 '69.0.3497.87',
1453 '71.0.3544.5',
1454 '71.0.3544.4',
1455 '71.0.3544.3',
1456 '71.0.3544.2',
1457 '71.0.3544.1',
1458 '71.0.3544.0',
1459 '69.0.3497.86',
1460 '70.0.3538.10',
1461 '69.0.3497.85',
1462 '70.0.3538.9',
1463 '69.0.3497.84',
1464 '71.0.3543.4',
1465 '70.0.3538.8',
1466 '71.0.3543.3',
1467 '71.0.3543.2',
1468 '71.0.3543.1',
1469 '71.0.3543.0',
1470 '70.0.3538.7',
1471 '69.0.3497.83',
1472 '71.0.3542.2',
1473 '71.0.3542.1',
1474 '71.0.3542.0',
1475 '70.0.3538.6',
1476 '69.0.3497.82',
1477 '69.0.3497.81',
1478 '71.0.3541.1',
1479 '71.0.3541.0',
1480 '70.0.3538.5',
1481 '69.0.3497.80',
1482 '71.0.3540.1',
1483 '71.0.3540.0',
1484 '70.0.3538.4',
1485 '69.0.3497.79',
1486 '70.0.3538.3',
1487 '71.0.3539.1',
1488 '71.0.3539.0',
1489 '69.0.3497.78',
1490 '68.0.3440.134',
1491 '69.0.3497.77',
1492 '70.0.3538.2',
1493 '70.0.3538.1',
1494 '70.0.3538.0',
1495 '69.0.3497.76',
1496 '68.0.3440.133',
1497 '69.0.3497.75',
1498 '70.0.3537.2',
1499 '70.0.3537.1',
1500 '70.0.3537.0',
1501 '69.0.3497.74',
1502 '68.0.3440.132',
1503 '70.0.3536.0',
1504 '70.0.3535.5',
1505 '70.0.3535.4',
1506 '70.0.3535.3',
1507 '69.0.3497.73',
1508 '68.0.3440.131',
1509 '70.0.3532.8',
1510 '70.0.3532.7',
1511 '69.0.3497.72',
1512 '69.0.3497.71',
1513 '70.0.3535.2',
1514 '70.0.3535.1',
1515 '70.0.3535.0',
1516 '69.0.3497.70',
1517 '68.0.3440.130',
1518 '69.0.3497.69',
1519 '68.0.3440.129',
1520 '70.0.3534.4',
1521 '70.0.3534.3',
1522 '70.0.3534.2',
1523 '70.0.3534.1',
1524 '70.0.3534.0',
1525 '69.0.3497.68',
1526 '68.0.3440.128',
1527 '70.0.3533.2',
1528 '70.0.3533.1',
1529 '70.0.3533.0',
1530 '69.0.3497.67',
1531 '68.0.3440.127',
1532 '70.0.3532.6',
1533 '70.0.3532.5',
1534 '70.0.3532.4',
1535 '69.0.3497.66',
1536 '68.0.3440.126',
1537 '70.0.3532.3',
1538 '70.0.3532.2',
1539 '70.0.3532.1',
1540 '69.0.3497.60',
1541 '69.0.3497.65',
1542 '69.0.3497.64',
1543 '70.0.3532.0',
1544 '70.0.3531.0',
1545 '70.0.3530.4',
1546 '70.0.3530.3',
1547 '70.0.3530.2',
1548 '69.0.3497.58',
1549 '68.0.3440.125',
1550 '69.0.3497.57',
1551 '69.0.3497.56',
1552 '69.0.3497.55',
1553 '69.0.3497.54',
1554 '70.0.3530.1',
1555 '70.0.3530.0',
1556 '69.0.3497.53',
1557 '68.0.3440.124',
1558 '69.0.3497.52',
1559 '70.0.3529.3',
1560 '70.0.3529.2',
1561 '70.0.3529.1',
1562 '70.0.3529.0',
1563 '69.0.3497.51',
1564 '70.0.3528.4',
1565 '68.0.3440.123',
1566 '70.0.3528.3',
1567 '70.0.3528.2',
1568 '70.0.3528.1',
1569 '70.0.3528.0',
1570 '69.0.3497.50',
1571 '68.0.3440.122',
1572 '70.0.3527.1',
1573 '70.0.3527.0',
1574 '69.0.3497.49',
1575 '68.0.3440.121',
1576 '70.0.3526.1',
1577 '70.0.3526.0',
1578 '68.0.3440.120',
1579 '69.0.3497.48',
1580 '69.0.3497.47',
1581 '68.0.3440.119',
1582 '68.0.3440.118',
1583 '70.0.3525.5',
1584 '70.0.3525.4',
1585 '70.0.3525.3',
1586 '68.0.3440.117',
1587 '69.0.3497.46',
1588 '70.0.3525.2',
1589 '70.0.3525.1',
1590 '70.0.3525.0',
1591 '69.0.3497.45',
1592 '68.0.3440.116',
1593 '70.0.3524.4',
1594 '70.0.3524.3',
1595 '69.0.3497.44',
1596 '70.0.3524.2',
1597 '70.0.3524.1',
1598 '70.0.3524.0',
1599 '70.0.3523.2',
1600 '69.0.3497.43',
1601 '68.0.3440.115',
1602 '70.0.3505.9',
1603 '69.0.3497.42',
1604 '70.0.3505.8',
1605 '70.0.3523.1',
1606 '70.0.3523.0',
1607 '69.0.3497.41',
1608 '68.0.3440.114',
1609 '70.0.3505.7',
1610 '69.0.3497.40',
1611 '70.0.3522.1',
1612 '70.0.3522.0',
1613 '70.0.3521.2',
1614 '69.0.3497.39',
1615 '68.0.3440.113',
1616 '70.0.3505.6',
1617 '70.0.3521.1',
1618 '70.0.3521.0',
1619 '69.0.3497.38',
1620 '68.0.3440.112',
1621 '70.0.3520.1',
1622 '70.0.3520.0',
1623 '69.0.3497.37',
1624 '68.0.3440.111',
1625 '70.0.3519.3',
1626 '70.0.3519.2',
1627 '70.0.3519.1',
1628 '70.0.3519.0',
1629 '69.0.3497.36',
1630 '68.0.3440.110',
1631 '70.0.3518.1',
1632 '70.0.3518.0',
1633 '69.0.3497.35',
1634 '69.0.3497.34',
1635 '68.0.3440.109',
1636 '70.0.3517.1',
1637 '70.0.3517.0',
1638 '69.0.3497.33',
1639 '68.0.3440.108',
1640 '69.0.3497.32',
1641 '70.0.3516.3',
1642 '70.0.3516.2',
1643 '70.0.3516.1',
1644 '70.0.3516.0',
1645 '69.0.3497.31',
1646 '68.0.3440.107',
1647 '70.0.3515.4',
1648 '68.0.3440.106',
1649 '70.0.3515.3',
1650 '70.0.3515.2',
1651 '70.0.3515.1',
1652 '70.0.3515.0',
1653 '69.0.3497.30',
1654 '68.0.3440.105',
1655 '68.0.3440.104',
1656 '70.0.3514.2',
1657 '70.0.3514.1',
1658 '70.0.3514.0',
1659 '69.0.3497.29',
1660 '68.0.3440.103',
1661 '70.0.3513.1',
1662 '70.0.3513.0',
1663 '69.0.3497.28',
1664 )
1665 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1666
1667
3e669f36 1668std_headers = {
f7a147e3 1669 'User-Agent': random_user_agent(),
59ae15a5
PH
1670 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1671 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1672 'Accept-Encoding': 'gzip, deflate',
1673 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 1674}
f427df17 1675
5f6a1245 1676
fb37eb25
S
1677USER_AGENTS = {
1678 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1679}
1680
1681
bf42a990
S
1682NO_DEFAULT = object()
1683
7105440c
YCH
1684ENGLISH_MONTH_NAMES = [
1685 'January', 'February', 'March', 'April', 'May', 'June',
1686 'July', 'August', 'September', 'October', 'November', 'December']
1687
f6717dec
S
1688MONTH_NAMES = {
1689 'en': ENGLISH_MONTH_NAMES,
1690 'fr': [
3e4185c3
S
1691 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1692 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
f6717dec 1693}
a942d6cb 1694
a7aaa398
S
1695KNOWN_EXTENSIONS = (
1696 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1697 'flv', 'f4v', 'f4a', 'f4b',
1698 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1699 'mkv', 'mka', 'mk3d',
1700 'avi', 'divx',
1701 'mov',
1702 'asf', 'wmv', 'wma',
1703 '3gp', '3g2',
1704 'mp3',
1705 'flac',
1706 'ape',
1707 'wav',
1708 'f4f', 'f4m', 'm3u8', 'smil')
1709
c587cbb7 1710# needed for sanitizing filenames in restricted mode
c8827027 1711ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
fd35d8cd
JW
1712 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1713 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
c587cbb7 1714
46f59e89
S
1715DATE_FORMATS = (
1716 '%d %B %Y',
1717 '%d %b %Y',
1718 '%B %d %Y',
cb655f34
S
1719 '%B %dst %Y',
1720 '%B %dnd %Y',
9d30c213 1721 '%B %drd %Y',
cb655f34 1722 '%B %dth %Y',
46f59e89 1723 '%b %d %Y',
cb655f34
S
1724 '%b %dst %Y',
1725 '%b %dnd %Y',
9d30c213 1726 '%b %drd %Y',
cb655f34 1727 '%b %dth %Y',
46f59e89
S
1728 '%b %dst %Y %I:%M',
1729 '%b %dnd %Y %I:%M',
9d30c213 1730 '%b %drd %Y %I:%M',
46f59e89
S
1731 '%b %dth %Y %I:%M',
1732 '%Y %m %d',
1733 '%Y-%m-%d',
1734 '%Y/%m/%d',
81c13222 1735 '%Y/%m/%d %H:%M',
46f59e89 1736 '%Y/%m/%d %H:%M:%S',
0c1c6f4b 1737 '%Y-%m-%d %H:%M',
46f59e89
S
1738 '%Y-%m-%d %H:%M:%S',
1739 '%Y-%m-%d %H:%M:%S.%f',
1740 '%d.%m.%Y %H:%M',
1741 '%d.%m.%Y %H.%M',
1742 '%Y-%m-%dT%H:%M:%SZ',
1743 '%Y-%m-%dT%H:%M:%S.%fZ',
1744 '%Y-%m-%dT%H:%M:%S.%f0Z',
1745 '%Y-%m-%dT%H:%M:%S',
1746 '%Y-%m-%dT%H:%M:%S.%f',
1747 '%Y-%m-%dT%H:%M',
c6eed6b8
S
1748 '%b %d %Y at %H:%M',
1749 '%b %d %Y at %H:%M:%S',
b555ae9b
S
1750 '%B %d %Y at %H:%M',
1751 '%B %d %Y at %H:%M:%S',
46f59e89
S
1752)
1753
1754DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1755DATE_FORMATS_DAY_FIRST.extend([
1756 '%d-%m-%Y',
1757 '%d.%m.%Y',
1758 '%d.%m.%y',
1759 '%d/%m/%Y',
1760 '%d/%m/%y',
1761 '%d/%m/%Y %H:%M:%S',
1762])
1763
1764DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1765DATE_FORMATS_MONTH_FIRST.extend([
1766 '%m-%d-%Y',
1767 '%m.%d.%Y',
1768 '%m/%d/%Y',
1769 '%m/%d/%y',
1770 '%m/%d/%Y %H:%M:%S',
1771])
1772
06b3fe29 1773PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
22f5f5c6 1774JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
06b3fe29 1775
7105440c 1776
d77c3dfd 1777def preferredencoding():
59ae15a5 1778 """Get preferred encoding.
d77c3dfd 1779
59ae15a5
PH
1780 Returns the best encoding scheme for the system, based on
1781 locale.getpreferredencoding() and some further tweaks.
1782 """
1783 try:
1784 pref = locale.getpreferredencoding()
28e614de 1785 'TEST'.encode(pref)
70a1165b 1786 except Exception:
59ae15a5 1787 pref = 'UTF-8'
bae611f2 1788
59ae15a5 1789 return pref
d77c3dfd 1790
f4bfd65f 1791
181c8655 1792def write_json_file(obj, fn):
1394646a 1793 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 1794
92120217 1795 fn = encodeFilename(fn)
61ee5aeb 1796 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
1797 encoding = get_filesystem_encoding()
1798 # os.path.basename returns a bytes object, but NamedTemporaryFile
1799 # will fail if the filename contains non ascii characters unless we
1800 # use a unicode object
1801 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1802 # the same for os.path.dirname
1803 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1804 else:
1805 path_basename = os.path.basename
1806 path_dirname = os.path.dirname
1807
73159f99
S
1808 args = {
1809 'suffix': '.tmp',
ec5f6016
JMF
1810 'prefix': path_basename(fn) + '.',
1811 'dir': path_dirname(fn),
73159f99
S
1812 'delete': False,
1813 }
1814
181c8655
PH
1815 # In Python 2.x, json.dump expects a bytestream.
1816 # In Python 3.x, it writes to a character stream
1817 if sys.version_info < (3, 0):
73159f99 1818 args['mode'] = 'wb'
181c8655 1819 else:
73159f99
S
1820 args.update({
1821 'mode': 'w',
1822 'encoding': 'utf-8',
1823 })
1824
c86b6142 1825 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
1826
1827 try:
1828 with tf:
1829 json.dump(obj, tf)
1394646a
IK
1830 if sys.platform == 'win32':
1831 # Need to remove existing file on Windows, else os.rename raises
1832 # WindowsError or FileExistsError.
1833 try:
1834 os.unlink(fn)
1835 except OSError:
1836 pass
181c8655 1837 os.rename(tf.name, fn)
70a1165b 1838 except Exception:
181c8655
PH
1839 try:
1840 os.remove(tf.name)
1841 except OSError:
1842 pass
1843 raise
1844
1845
1846if sys.version_info >= (2, 7):
ee114368 1847 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 1848 """ Find the xpath xpath[@key=val] """
5d2354f1 1849 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368 1850 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
1851 return node.find(expr)
1852else:
ee114368 1853 def find_xpath_attr(node, xpath, key, val=None):
810c10ba 1854 for f in node.findall(compat_xpath(xpath)):
ee114368
S
1855 if key not in f.attrib:
1856 continue
1857 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
1858 return f
1859 return None
1860
d7e66d39
JMF
1861# On python2.6 the xml.etree.ElementTree.Element methods don't support
1862# the namespace parameter
5f6a1245
JW
1863
1864
d7e66d39
JMF
1865def xpath_with_ns(path, ns_map):
1866 components = [c.split(':') for c in path.split('/')]
1867 replaced = []
1868 for c in components:
1869 if len(c) == 1:
1870 replaced.append(c[0])
1871 else:
1872 ns, tag = c
1873 replaced.append('{%s}%s' % (ns_map[ns], tag))
1874 return '/'.join(replaced)
1875
d77c3dfd 1876
a41fb80c 1877def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745 1878 def _find_xpath(xpath):
810c10ba 1879 return node.find(compat_xpath(xpath))
578c0745
S
1880
1881 if isinstance(xpath, (str, compat_str)):
1882 n = _find_xpath(xpath)
1883 else:
1884 for xp in xpath:
1885 n = _find_xpath(xp)
1886 if n is not None:
1887 break
d74bebd5 1888
8e636da4 1889 if n is None:
bf42a990
S
1890 if default is not NO_DEFAULT:
1891 return default
1892 elif fatal:
bf0ff932
PH
1893 name = xpath if name is None else name
1894 raise ExtractorError('Could not find XML element %s' % name)
1895 else:
1896 return None
a41fb80c
S
1897 return n
1898
1899
1900def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
1901 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1902 if n is None or n == default:
1903 return n
1904 if n.text is None:
1905 if default is not NO_DEFAULT:
1906 return default
1907 elif fatal:
1908 name = xpath if name is None else name
1909 raise ExtractorError('Could not find XML element\'s text %s' % name)
1910 else:
1911 return None
1912 return n.text
a41fb80c
S
1913
1914
1915def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1916 n = find_xpath_attr(node, xpath, key)
1917 if n is None:
1918 if default is not NO_DEFAULT:
1919 return default
1920 elif fatal:
1921 name = '%s[@%s]' % (xpath, key) if name is None else name
1922 raise ExtractorError('Could not find XML attribute %s' % name)
1923 else:
1924 return None
1925 return n.attrib[key]
bf0ff932
PH
1926
1927
9e6dd238 1928def get_element_by_id(id, html):
43e8fafd 1929 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 1930 return get_element_by_attribute('id', id, html)
43e8fafd 1931
12ea2f30 1932
84c237fb 1933def get_element_by_class(class_name, html):
2af12ad9
TC
1934 """Return the content of the first tag with the specified class in the passed HTML document"""
1935 retval = get_elements_by_class(class_name, html)
1936 return retval[0] if retval else None
1937
1938
1939def get_element_by_attribute(attribute, value, html, escape_value=True):
1940 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1941 return retval[0] if retval else None
1942
1943
1944def get_elements_by_class(class_name, html):
1945 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1946 return get_elements_by_attribute(
84c237fb
YCH
1947 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1948 html, escape_value=False)
1949
1950
2af12ad9 1951def get_elements_by_attribute(attribute, value, html, escape_value=True):
43e8fafd 1952 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 1953
84c237fb
YCH
1954 value = re.escape(value) if escape_value else value
1955
2af12ad9
TC
1956 retlist = []
1957 for m in re.finditer(r'''(?xs)
38285056 1958 <([a-zA-Z0-9:._-]+)
609ff8ca 1959 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056 1960 \s+%s=['"]?%s['"]?
609ff8ca 1961 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056
PH
1962 \s*>
1963 (?P<content>.*?)
1964 </\1>
2af12ad9
TC
1965 ''' % (re.escape(attribute), value), html):
1966 res = m.group('content')
38285056 1967
2af12ad9
TC
1968 if res.startswith('"') or res.startswith("'"):
1969 res = res[1:-1]
38285056 1970
2af12ad9 1971 retlist.append(unescapeHTML(res))
a921f407 1972
2af12ad9 1973 return retlist
a921f407 1974
c5229f39 1975
8bb56eee
BF
1976class HTMLAttributeParser(compat_HTMLParser):
1977 """Trivial HTML parser to gather the attributes for a single element"""
1978 def __init__(self):
c5229f39 1979 self.attrs = {}
8bb56eee
BF
1980 compat_HTMLParser.__init__(self)
1981
1982 def handle_starttag(self, tag, attrs):
1983 self.attrs = dict(attrs)
1984
c5229f39 1985
8bb56eee
BF
1986def extract_attributes(html_element):
1987 """Given a string for an HTML element such as
1988 <el
1989 a="foo" B="bar" c="&98;az" d=boz
1990 empty= noval entity="&amp;"
1991 sq='"' dq="'"
1992 >
1993 Decode and return a dictionary of attributes.
1994 {
1995 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
1996 'empty': '', 'noval': None, 'entity': '&',
1997 'sq': '"', 'dq': '\''
1998 }.
1999 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2000 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2001 """
2002 parser = HTMLAttributeParser()
b4a3d461
S
2003 try:
2004 parser.feed(html_element)
2005 parser.close()
2006 # Older Python may throw HTMLParseError in case of malformed HTML
2007 except compat_HTMLParseError:
2008 pass
8bb56eee 2009 return parser.attrs
9e6dd238 2010
c5229f39 2011
9e6dd238 2012def clean_html(html):
59ae15a5 2013 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
2014
2015 if html is None: # Convenience for sanitizing descriptions etc.
2016 return html
2017
59ae15a5
PH
2018 # Newline vs <br />
2019 html = html.replace('\n', ' ')
edd9221c
TF
2020 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2021 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
2022 # Strip html tags
2023 html = re.sub('<.*?>', '', html)
2024 # Replace html entities
2025 html = unescapeHTML(html)
7decf895 2026 return html.strip()
9e6dd238
FV
2027
2028
d77c3dfd 2029def sanitize_open(filename, open_mode):
59ae15a5
PH
2030 """Try to open the given filename, and slightly tweak it if this fails.
2031
2032 Attempts to open the given filename. If this fails, it tries to change
2033 the filename slightly, step by step, until it's either able to open it
2034 or it fails and raises a final exception, like the standard open()
2035 function.
2036
2037 It returns the tuple (stream, definitive_file_name).
2038 """
2039 try:
28e614de 2040 if filename == '-':
59ae15a5
PH
2041 if sys.platform == 'win32':
2042 import msvcrt
2043 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 2044 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
2045 stream = open(encodeFilename(filename), open_mode)
2046 return (stream, filename)
2047 except (IOError, OSError) as err:
f45c185f
PH
2048 if err.errno in (errno.EACCES,):
2049 raise
59ae15a5 2050
f45c185f 2051 # In case of error, try to remove win32 forbidden chars
d55de57b 2052 alt_filename = sanitize_path(filename)
f45c185f
PH
2053 if alt_filename == filename:
2054 raise
2055 else:
2056 # An exception here should be caught in the caller
d55de57b 2057 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 2058 return (stream, alt_filename)
d77c3dfd
FV
2059
2060
2061def timeconvert(timestr):
59ae15a5
PH
2062 """Convert RFC 2822 defined time string into system timestamp"""
2063 timestamp = None
2064 timetuple = email.utils.parsedate_tz(timestr)
2065 if timetuple is not None:
2066 timestamp = email.utils.mktime_tz(timetuple)
2067 return timestamp
1c469a94 2068
5f6a1245 2069
796173d0 2070def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
2071 """Sanitizes a string so it could be used as part of a filename.
2072 If restricted is set, use a stricter subset of allowed characters.
158af524
S
2073 Set is_id if this is not an arbitrary string, but an ID that should be kept
2074 if possible.
59ae15a5
PH
2075 """
2076 def replace_insane(char):
c587cbb7
AT
2077 if restricted and char in ACCENT_CHARS:
2078 return ACCENT_CHARS[char]
59ae15a5
PH
2079 if char == '?' or ord(char) < 32 or ord(char) == 127:
2080 return ''
2081 elif char == '"':
2082 return '' if restricted else '\''
2083 elif char == ':':
2084 return '_-' if restricted else ' -'
2085 elif char in '\\/|*<>':
2086 return '_'
627dcfff 2087 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
2088 return '_'
2089 if restricted and ord(char) > 127:
2090 return '_'
2091 return char
2092
2aeb06d6
PH
2093 # Handle timestamps
2094 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 2095 result = ''.join(map(replace_insane, s))
796173d0
PH
2096 if not is_id:
2097 while '__' in result:
2098 result = result.replace('__', '_')
2099 result = result.strip('_')
2100 # Common case of "Foreign band name - English song title"
2101 if restricted and result.startswith('-_'):
2102 result = result[2:]
5a42414b
PH
2103 if result.startswith('-'):
2104 result = '_' + result[len('-'):]
a7440261 2105 result = result.lstrip('.')
796173d0
PH
2106 if not result:
2107 result = '_'
59ae15a5 2108 return result
d77c3dfd 2109
5f6a1245 2110
a2aaf4db
S
2111def sanitize_path(s):
2112 """Sanitizes and normalizes path on Windows"""
2113 if sys.platform != 'win32':
2114 return s
be531ef1
S
2115 drive_or_unc, _ = os.path.splitdrive(s)
2116 if sys.version_info < (2, 7) and not drive_or_unc:
2117 drive_or_unc, _ = os.path.splitunc(s)
2118 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2119 if drive_or_unc:
a2aaf4db
S
2120 norm_path.pop(0)
2121 sanitized_path = [
ec85ded8 2122 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
a2aaf4db 2123 for path_part in norm_path]
be531ef1
S
2124 if drive_or_unc:
2125 sanitized_path.insert(0, drive_or_unc + os.path.sep)
a2aaf4db
S
2126 return os.path.join(*sanitized_path)
2127
2128
17bcc626 2129def sanitize_url(url):
befa4708
S
2130 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2131 # the number of unwanted failures due to missing protocol
2132 if url.startswith('//'):
2133 return 'http:%s' % url
2134 # Fix some common typos seen so far
2135 COMMON_TYPOS = (
067aa17e 2136 # https://github.com/ytdl-org/youtube-dl/issues/15649
befa4708
S
2137 (r'^httpss://', r'https://'),
2138 # https://bx1.be/lives/direct-tv/
2139 (r'^rmtp([es]?)://', r'rtmp\1://'),
2140 )
2141 for mistake, fixup in COMMON_TYPOS:
2142 if re.match(mistake, url):
2143 return re.sub(mistake, fixup, url)
2144 return url
17bcc626
S
2145
2146
67dda517 2147def sanitized_Request(url, *args, **kwargs):
17bcc626 2148 return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
67dda517
S
2149
2150
51098426
S
2151def expand_path(s):
2152 """Expand shell variables and ~"""
2153 return os.path.expandvars(compat_expanduser(s))
2154
2155
d77c3dfd 2156def orderedSet(iterable):
59ae15a5
PH
2157 """ Remove all duplicates from the input iterable """
2158 res = []
2159 for el in iterable:
2160 if el not in res:
2161 res.append(el)
2162 return res
d77c3dfd 2163
912b38b4 2164
55b2f099 2165def _htmlentity_transform(entity_with_semicolon):
4e408e47 2166 """Transforms an HTML entity to a character."""
55b2f099
YCH
2167 entity = entity_with_semicolon[:-1]
2168
4e408e47
PH
2169 # Known non-numeric HTML entity
2170 if entity in compat_html_entities.name2codepoint:
2171 return compat_chr(compat_html_entities.name2codepoint[entity])
2172
55b2f099
YCH
2173 # TODO: HTML5 allows entities without a semicolon. For example,
2174 # '&Eacuteric' should be decoded as 'Éric'.
2175 if entity_with_semicolon in compat_html_entities_html5:
2176 return compat_html_entities_html5[entity_with_semicolon]
2177
91757b0f 2178 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
2179 if mobj is not None:
2180 numstr = mobj.group(1)
28e614de 2181 if numstr.startswith('x'):
4e408e47 2182 base = 16
28e614de 2183 numstr = '0%s' % numstr
4e408e47
PH
2184 else:
2185 base = 10
067aa17e 2186 # See https://github.com/ytdl-org/youtube-dl/issues/7518
7aefc49c
S
2187 try:
2188 return compat_chr(int(numstr, base))
2189 except ValueError:
2190 pass
4e408e47
PH
2191
2192 # Unknown entity in name, return its literal representation
7a3f0c00 2193 return '&%s;' % entity
4e408e47
PH
2194
2195
d77c3dfd 2196def unescapeHTML(s):
912b38b4
PH
2197 if s is None:
2198 return None
2199 assert type(s) == compat_str
d77c3dfd 2200
4e408e47 2201 return re.sub(
95f3f7c2 2202 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 2203
8bf48f23 2204
aa49acd1
S
2205def get_subprocess_encoding():
2206 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2207 # For subprocess calls, encode with locale encoding
2208 # Refer to http://stackoverflow.com/a/9951851/35070
2209 encoding = preferredencoding()
2210 else:
2211 encoding = sys.getfilesystemencoding()
2212 if encoding is None:
2213 encoding = 'utf-8'
2214 return encoding
2215
2216
8bf48f23 2217def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
2218 """
2219 @param s The name of the file
2220 """
d77c3dfd 2221
8bf48f23 2222 assert type(s) == compat_str
d77c3dfd 2223
59ae15a5
PH
2224 # Python 3 has a Unicode API
2225 if sys.version_info >= (3, 0):
2226 return s
0f00efed 2227
aa49acd1
S
2228 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2229 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2230 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2231 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2232 return s
2233
8ee239e9
YCH
2234 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2235 if sys.platform.startswith('java'):
2236 return s
2237
aa49acd1
S
2238 return s.encode(get_subprocess_encoding(), 'ignore')
2239
2240
2241def decodeFilename(b, for_subprocess=False):
2242
2243 if sys.version_info >= (3, 0):
2244 return b
2245
2246 if not isinstance(b, bytes):
2247 return b
2248
2249 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 2250
f07b74fc
PH
2251
2252def encodeArgument(s):
2253 if not isinstance(s, compat_str):
2254 # Legacy code that uses byte strings
2255 # Uncomment the following line after fixing all post processors
7af808a5 2256 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
2257 s = s.decode('ascii')
2258 return encodeFilename(s, True)
2259
2260
aa49acd1
S
2261def decodeArgument(b):
2262 return decodeFilename(b, True)
2263
2264
8271226a
PH
2265def decodeOption(optval):
2266 if optval is None:
2267 return optval
2268 if isinstance(optval, bytes):
2269 optval = optval.decode(preferredencoding())
2270
2271 assert isinstance(optval, compat_str)
2272 return optval
1c256f70 2273
5f6a1245 2274
4539dd30
PH
2275def formatSeconds(secs):
2276 if secs > 3600:
2277 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
2278 elif secs > 60:
2279 return '%d:%02d' % (secs // 60, secs % 60)
2280 else:
2281 return '%d' % secs
2282
a0ddb8a2 2283
be4a824d
PH
2284def make_HTTPS_handler(params, **kwargs):
2285 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 2286 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 2287 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 2288 if opts_no_check_certificate:
be5f2c19 2289 context.check_hostname = False
0db261ba 2290 context.verify_mode = ssl.CERT_NONE
a2366922 2291 try:
be4a824d 2292 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
2293 except TypeError:
2294 # Python 2.7.8
2295 # (create_default_context present but HTTPSHandler has no context=)
2296 pass
2297
2298 if sys.version_info < (3, 2):
d7932313 2299 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 2300 else: # Python < 3.4
d7932313 2301 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 2302 context.verify_mode = (ssl.CERT_NONE
dca08720 2303 if opts_no_check_certificate
ea6d901e 2304 else ssl.CERT_REQUIRED)
303b479e 2305 context.set_default_verify_paths()
be4a824d 2306 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 2307
732ea2f0 2308
08f2a92c
JMF
2309def bug_reports_message():
2310 if ytdl_is_updateable():
2311 update_cmd = 'type youtube-dl -U to update'
2312 else:
2313 update_cmd = 'see https://yt-dl.org/update on how to update'
2314 msg = '; please report this issue on https://yt-dl.org/bug .'
2315 msg += ' Make sure you are using the latest version; %s.' % update_cmd
2316 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
2317 return msg
2318
2319
bf5b9d85
PM
2320class YoutubeDLError(Exception):
2321 """Base exception for YoutubeDL errors."""
2322 pass
2323
2324
2325class ExtractorError(YoutubeDLError):
1c256f70 2326 """Error during info extraction."""
5f6a1245 2327
d11271dd 2328 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238
PH
2329 """ tb, if given, is the original traceback (so that it can be printed out).
2330 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
2331 """
2332
2333 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2334 expected = True
d11271dd
PH
2335 if video_id is not None:
2336 msg = video_id + ': ' + msg
410f3e73 2337 if cause:
28e614de 2338 msg += ' (caused by %r)' % cause
9a82b238 2339 if not expected:
08f2a92c 2340 msg += bug_reports_message()
1c256f70 2341 super(ExtractorError, self).__init__(msg)
d5979c5d 2342
1c256f70 2343 self.traceback = tb
8cc83b8d 2344 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 2345 self.cause = cause
d11271dd 2346 self.video_id = video_id
1c256f70 2347
01951dda
PH
2348 def format_traceback(self):
2349 if self.traceback is None:
2350 return None
28e614de 2351 return ''.join(traceback.format_tb(self.traceback))
01951dda 2352
1c256f70 2353
416c7fcb
PH
2354class UnsupportedError(ExtractorError):
2355 def __init__(self, url):
2356 super(UnsupportedError, self).__init__(
2357 'Unsupported URL: %s' % url, expected=True)
2358 self.url = url
2359
2360
55b3e45b
JMF
2361class RegexNotFoundError(ExtractorError):
2362 """Error when a regex didn't match"""
2363 pass
2364
2365
773f291d
S
2366class GeoRestrictedError(ExtractorError):
2367 """Geographic restriction Error exception.
2368
2369 This exception may be thrown when a video is not available from your
2370 geographic location due to geographic restrictions imposed by a website.
2371 """
2372 def __init__(self, msg, countries=None):
2373 super(GeoRestrictedError, self).__init__(msg, expected=True)
2374 self.msg = msg
2375 self.countries = countries
2376
2377
bf5b9d85 2378class DownloadError(YoutubeDLError):
59ae15a5 2379 """Download Error exception.
d77c3dfd 2380
59ae15a5
PH
2381 This exception may be thrown by FileDownloader objects if they are not
2382 configured to continue on errors. They will contain the appropriate
2383 error message.
2384 """
5f6a1245 2385
8cc83b8d
FV
2386 def __init__(self, msg, exc_info=None):
2387 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2388 super(DownloadError, self).__init__(msg)
2389 self.exc_info = exc_info
d77c3dfd
FV
2390
2391
bf5b9d85 2392class SameFileError(YoutubeDLError):
59ae15a5 2393 """Same File exception.
d77c3dfd 2394
59ae15a5
PH
2395 This exception will be thrown by FileDownloader objects if they detect
2396 multiple files would have to be downloaded to the same file on disk.
2397 """
2398 pass
d77c3dfd
FV
2399
2400
bf5b9d85 2401class PostProcessingError(YoutubeDLError):
59ae15a5 2402 """Post Processing exception.
d77c3dfd 2403
59ae15a5
PH
2404 This exception may be raised by PostProcessor's .run() method to
2405 indicate an error in the postprocessing task.
2406 """
5f6a1245 2407
7851b379 2408 def __init__(self, msg):
bf5b9d85 2409 super(PostProcessingError, self).__init__(msg)
7851b379 2410 self.msg = msg
d77c3dfd 2411
5f6a1245 2412
bf5b9d85 2413class MaxDownloadsReached(YoutubeDLError):
59ae15a5
PH
2414 """ --max-downloads limit has been reached. """
2415 pass
d77c3dfd
FV
2416
2417
bf5b9d85 2418class UnavailableVideoError(YoutubeDLError):
59ae15a5 2419 """Unavailable Format exception.
d77c3dfd 2420
59ae15a5
PH
2421 This exception will be thrown when a video is requested
2422 in a format that is not available for that video.
2423 """
2424 pass
d77c3dfd
FV
2425
2426
bf5b9d85 2427class ContentTooShortError(YoutubeDLError):
59ae15a5 2428 """Content Too Short exception.
d77c3dfd 2429
59ae15a5
PH
2430 This exception may be raised by FileDownloader objects when a file they
2431 download is too small for what the server announced first, indicating
2432 the connection was probably interrupted.
2433 """
d77c3dfd 2434
59ae15a5 2435 def __init__(self, downloaded, expected):
bf5b9d85
PM
2436 super(ContentTooShortError, self).__init__(
2437 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2438 )
2c7ed247 2439 # Both in bytes
59ae15a5
PH
2440 self.downloaded = downloaded
2441 self.expected = expected
d77c3dfd 2442
5f6a1245 2443
bf5b9d85 2444class XAttrMetadataError(YoutubeDLError):
efa97bdc
YCH
2445 def __init__(self, code=None, msg='Unknown error'):
2446 super(XAttrMetadataError, self).__init__(msg)
2447 self.code = code
bd264412 2448 self.msg = msg
efa97bdc
YCH
2449
2450 # Parsing code and msg
3089bc74
S
2451 if (self.code in (errno.ENOSPC, errno.EDQUOT)
2452 or 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
efa97bdc
YCH
2453 self.reason = 'NO_SPACE'
2454 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2455 self.reason = 'VALUE_TOO_LONG'
2456 else:
2457 self.reason = 'NOT_SUPPORTED'
2458
2459
bf5b9d85 2460class XAttrUnavailableError(YoutubeDLError):
efa97bdc
YCH
2461 pass
2462
2463
c5a59d93 2464def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
2465 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2466 # expected HTTP responses to meet HTTP/1.0 or later (see also
067aa17e 2467 # https://github.com/ytdl-org/youtube-dl/issues/6727)
e5e78797 2468 if sys.version_info < (3, 0):
65220c3b
S
2469 kwargs['strict'] = True
2470 hc = http_class(*args, **compat_kwargs(kwargs))
be4a824d 2471 source_address = ydl_handler._params.get('source_address')
8959018a 2472
be4a824d 2473 if source_address is not None:
8959018a
AU
2474 # This is to workaround _create_connection() from socket where it will try all
2475 # address data from getaddrinfo() including IPv6. This filters the result from
2476 # getaddrinfo() based on the source_address value.
2477 # This is based on the cpython socket.create_connection() function.
2478 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2479 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2480 host, port = address
2481 err = None
2482 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
9e21e6d9
S
2483 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2484 ip_addrs = [addr for addr in addrs if addr[0] == af]
2485 if addrs and not ip_addrs:
2486 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2487 raise socket.error(
2488 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2489 % (ip_version, source_address[0]))
8959018a
AU
2490 for res in ip_addrs:
2491 af, socktype, proto, canonname, sa = res
2492 sock = None
2493 try:
2494 sock = socket.socket(af, socktype, proto)
2495 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2496 sock.settimeout(timeout)
2497 sock.bind(source_address)
2498 sock.connect(sa)
2499 err = None # Explicitly break reference cycle
2500 return sock
2501 except socket.error as _:
2502 err = _
2503 if sock is not None:
2504 sock.close()
2505 if err is not None:
2506 raise err
2507 else:
9e21e6d9
S
2508 raise socket.error('getaddrinfo returns an empty list')
2509 if hasattr(hc, '_create_connection'):
2510 hc._create_connection = _create_connection
be4a824d
PH
2511 sa = (source_address, 0)
2512 if hasattr(hc, 'source_address'): # Python 2.7+
2513 hc.source_address = sa
2514 else: # Python 2.6
2515 def _hc_connect(self, *args, **kwargs):
9e21e6d9 2516 sock = _create_connection(
be4a824d
PH
2517 (self.host, self.port), self.timeout, sa)
2518 if is_https:
d7932313
PH
2519 self.sock = ssl.wrap_socket(
2520 sock, self.key_file, self.cert_file,
2521 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
2522 else:
2523 self.sock = sock
2524 hc.connect = functools.partial(_hc_connect, hc)
2525
2526 return hc
2527
2528
87f0e62d 2529def handle_youtubedl_headers(headers):
992fc9d6
YCH
2530 filtered_headers = headers
2531
2532 if 'Youtubedl-no-compression' in filtered_headers:
2533 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 2534 del filtered_headers['Youtubedl-no-compression']
87f0e62d 2535
992fc9d6 2536 return filtered_headers
87f0e62d
YCH
2537
2538
acebc9cd 2539class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
2540 """Handler for HTTP requests and responses.
2541
2542 This class, when installed with an OpenerDirector, automatically adds
2543 the standard headers to every HTTP request and handles gzipped and
2544 deflated responses from web servers. If compression is to be avoided in
2545 a particular request, the original request in the program code only has
0424ec30 2546 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
2547 removed before making the real request.
2548
2549 Part of this code was copied from:
2550
2551 http://techknack.net/python-urllib2-handlers/
2552
2553 Andrew Rowls, the author of that code, agreed to release it to the
2554 public domain.
2555 """
2556
be4a824d
PH
2557 def __init__(self, params, *args, **kwargs):
2558 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2559 self._params = params
2560
2561 def http_open(self, req):
71aff188
YCH
2562 conn_class = compat_http_client.HTTPConnection
2563
2564 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2565 if socks_proxy:
2566 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2567 del req.headers['Ytdl-socks-proxy']
2568
be4a824d 2569 return self.do_open(functools.partial(
71aff188 2570 _create_http_connection, self, conn_class, False),
be4a824d
PH
2571 req)
2572
59ae15a5
PH
2573 @staticmethod
2574 def deflate(data):
2575 try:
2576 return zlib.decompress(data, -zlib.MAX_WBITS)
2577 except zlib.error:
2578 return zlib.decompress(data)
2579
acebc9cd 2580 def http_request(self, req):
51f267d9
S
2581 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2582 # always respected by websites, some tend to give out URLs with non percent-encoded
2583 # non-ASCII characters (see telemb.py, ard.py [#3412])
2584 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2585 # To work around aforementioned issue we will replace request's original URL with
2586 # percent-encoded one
2587 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2588 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2589 url = req.get_full_url()
2590 url_escaped = escape_url(url)
2591
2592 # Substitute URL if any change after escaping
2593 if url != url_escaped:
15d260eb 2594 req = update_Request(req, url=url_escaped)
51f267d9 2595
33ac271b 2596 for h, v in std_headers.items():
3d5f7a39
JK
2597 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2598 # The dict keys are capitalized because of this bug by urllib
2599 if h.capitalize() not in req.headers:
33ac271b 2600 req.add_header(h, v)
87f0e62d
YCH
2601
2602 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
2603
2604 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2605 # Python 2.6 is brain-dead when it comes to fragments
2606 req._Request__original = req._Request__original.partition('#')[0]
2607 req._Request__r_type = req._Request__r_type.partition('#')[0]
2608
59ae15a5
PH
2609 return req
2610
acebc9cd 2611 def http_response(self, req, resp):
59ae15a5
PH
2612 old_resp = resp
2613 # gzip
2614 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
2615 content = resp.read()
2616 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2617 try:
2618 uncompressed = io.BytesIO(gz.read())
2619 except IOError as original_ioerror:
2620 # There may be junk add the end of the file
2621 # See http://stackoverflow.com/q/4928560/35070 for details
2622 for i in range(1, 1024):
2623 try:
2624 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2625 uncompressed = io.BytesIO(gz.read())
2626 except IOError:
2627 continue
2628 break
2629 else:
2630 raise original_ioerror
b407d853 2631 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2632 resp.msg = old_resp.msg
c047270c 2633 del resp.headers['Content-encoding']
59ae15a5
PH
2634 # deflate
2635 if resp.headers.get('Content-encoding', '') == 'deflate':
2636 gz = io.BytesIO(self.deflate(resp.read()))
b407d853 2637 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2638 resp.msg = old_resp.msg
c047270c 2639 del resp.headers['Content-encoding']
ad729172 2640 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
067aa17e 2641 # https://github.com/ytdl-org/youtube-dl/issues/6457).
5a4d9ddb
S
2642 if 300 <= resp.code < 400:
2643 location = resp.headers.get('Location')
2644 if location:
2645 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2646 if sys.version_info >= (3, 0):
2647 location = location.encode('iso-8859-1').decode('utf-8')
0ea59007
YCH
2648 else:
2649 location = location.decode('utf-8')
5a4d9ddb
S
2650 location_escaped = escape_url(location)
2651 if location != location_escaped:
2652 del resp.headers['Location']
9a4aec8b
YCH
2653 if sys.version_info < (3, 0):
2654 location_escaped = location_escaped.encode('utf-8')
5a4d9ddb 2655 resp.headers['Location'] = location_escaped
59ae15a5 2656 return resp
0f8d03f8 2657
acebc9cd
PH
2658 https_request = http_request
2659 https_response = http_response
bf50b038 2660
5de90176 2661
71aff188
YCH
2662def make_socks_conn_class(base_class, socks_proxy):
2663 assert issubclass(base_class, (
2664 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2665
2666 url_components = compat_urlparse.urlparse(socks_proxy)
2667 if url_components.scheme.lower() == 'socks5':
2668 socks_type = ProxyType.SOCKS5
2669 elif url_components.scheme.lower() in ('socks', 'socks4'):
2670 socks_type = ProxyType.SOCKS4
51fb4995
YCH
2671 elif url_components.scheme.lower() == 'socks4a':
2672 socks_type = ProxyType.SOCKS4A
71aff188 2673
cdd94c2e
YCH
2674 def unquote_if_non_empty(s):
2675 if not s:
2676 return s
2677 return compat_urllib_parse_unquote_plus(s)
2678
71aff188
YCH
2679 proxy_args = (
2680 socks_type,
2681 url_components.hostname, url_components.port or 1080,
2682 True, # Remote DNS
cdd94c2e
YCH
2683 unquote_if_non_empty(url_components.username),
2684 unquote_if_non_empty(url_components.password),
71aff188
YCH
2685 )
2686
2687 class SocksConnection(base_class):
2688 def connect(self):
2689 self.sock = sockssocket()
2690 self.sock.setproxy(*proxy_args)
2691 if type(self.timeout) in (int, float):
2692 self.sock.settimeout(self.timeout)
2693 self.sock.connect((self.host, self.port))
2694
2695 if isinstance(self, compat_http_client.HTTPSConnection):
2696 if hasattr(self, '_context'): # Python > 2.6
2697 self.sock = self._context.wrap_socket(
2698 self.sock, server_hostname=self.host)
2699 else:
2700 self.sock = ssl.wrap_socket(self.sock)
2701
2702 return SocksConnection
2703
2704
be4a824d
PH
2705class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2706 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2707 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2708 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2709 self._params = params
2710
2711 def https_open(self, req):
4f264c02 2712 kwargs = {}
71aff188
YCH
2713 conn_class = self._https_conn_class
2714
4f264c02
JMF
2715 if hasattr(self, '_context'): # python > 2.6
2716 kwargs['context'] = self._context
2717 if hasattr(self, '_check_hostname'): # python 3.x
2718 kwargs['check_hostname'] = self._check_hostname
71aff188
YCH
2719
2720 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2721 if socks_proxy:
2722 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2723 del req.headers['Ytdl-socks-proxy']
2724
be4a824d 2725 return self.do_open(functools.partial(
71aff188 2726 _create_http_connection, self, conn_class, True),
4f264c02 2727 req, **kwargs)
be4a824d
PH
2728
2729
1bab3437 2730class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
e7e62441 2731 _HTTPONLY_PREFIX = '#HttpOnly_'
2732
1bab3437
S
2733 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2734 # Store session cookies with `expires` set to 0 instead of an empty
2735 # string
2736 for cookie in self:
2737 if cookie.expires is None:
2738 cookie.expires = 0
2739 compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires)
2740
2741 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
e7e62441 2742 """Load cookies from a file."""
2743 if filename is None:
2744 if self.filename is not None:
2745 filename = self.filename
2746 else:
2747 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2748
2749 cf = io.StringIO()
2750 with open(filename) as f:
2751 for line in f:
2752 if line.startswith(self._HTTPONLY_PREFIX):
2753 line = line[len(self._HTTPONLY_PREFIX):]
2754 cf.write(compat_str(line))
2755 cf.seek(0)
2756 self._really_load(cf, filename, ignore_discard, ignore_expires)
1bab3437
S
2757 # Session cookies are denoted by either `expires` field set to
2758 # an empty string or 0. MozillaCookieJar only recognizes the former
2759 # (see [1]). So we need force the latter to be recognized as session
2760 # cookies on our own.
2761 # Session cookies may be important for cookies-based authentication,
2762 # e.g. usually, when user does not check 'Remember me' check box while
2763 # logging in on a site, some important cookies are stored as session
2764 # cookies so that not recognizing them will result in failed login.
2765 # 1. https://bugs.python.org/issue17164
2766 for cookie in self:
2767 # Treat `expires=0` cookies as session cookies
2768 if cookie.expires == 0:
2769 cookie.expires = None
2770 cookie.discard = True
2771
2772
a6420bf5
S
2773class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2774 def __init__(self, cookiejar=None):
2775 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2776
2777 def http_response(self, request, response):
2778 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2779 # characters in Set-Cookie HTTP header of last response (see
067aa17e 2780 # https://github.com/ytdl-org/youtube-dl/issues/6769).
a6420bf5
S
2781 # In order to at least prevent crashing we will percent encode Set-Cookie
2782 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
2783 # if sys.version_info < (3, 0) and response.headers:
2784 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2785 # set_cookie = response.headers.get(set_cookie_header)
2786 # if set_cookie:
2787 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2788 # if set_cookie != set_cookie_escaped:
2789 # del response.headers[set_cookie_header]
2790 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
2791 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2792
2793 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2794 https_response = http_response
2795
2796
46f59e89
S
2797def extract_timezone(date_str):
2798 m = re.search(
2799 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2800 date_str)
2801 if not m:
2802 timezone = datetime.timedelta()
2803 else:
2804 date_str = date_str[:-len(m.group('tz'))]
2805 if not m.group('sign'):
2806 timezone = datetime.timedelta()
2807 else:
2808 sign = 1 if m.group('sign') == '+' else -1
2809 timezone = datetime.timedelta(
2810 hours=sign * int(m.group('hours')),
2811 minutes=sign * int(m.group('minutes')))
2812 return timezone, date_str
2813
2814
08b38d54 2815def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
2816 """ Return a UNIX timestamp from the given date """
2817
2818 if date_str is None:
2819 return None
2820
52c3a6e4
S
2821 date_str = re.sub(r'\.[0-9]+', '', date_str)
2822
08b38d54 2823 if timezone is None:
46f59e89
S
2824 timezone, date_str = extract_timezone(date_str)
2825
52c3a6e4
S
2826 try:
2827 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2828 dt = datetime.datetime.strptime(date_str, date_format) - timezone
2829 return calendar.timegm(dt.timetuple())
2830 except ValueError:
2831 pass
912b38b4
PH
2832
2833
46f59e89
S
2834def date_formats(day_first=True):
2835 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2836
2837
42bdd9d0 2838def unified_strdate(date_str, day_first=True):
bf50b038 2839 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
2840
2841 if date_str is None:
2842 return None
bf50b038 2843 upload_date = None
5f6a1245 2844 # Replace commas
026fcc04 2845 date_str = date_str.replace(',', ' ')
42bdd9d0 2846 # Remove AM/PM + timezone
9bb8e0a3 2847 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89 2848 _, date_str = extract_timezone(date_str)
42bdd9d0 2849
46f59e89 2850 for expression in date_formats(day_first):
bf50b038
JMF
2851 try:
2852 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 2853 except ValueError:
bf50b038 2854 pass
42393ce2
PH
2855 if upload_date is None:
2856 timetuple = email.utils.parsedate_tz(date_str)
2857 if timetuple:
c6b9cf05
S
2858 try:
2859 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2860 except ValueError:
2861 pass
6a750402
JMF
2862 if upload_date is not None:
2863 return compat_str(upload_date)
bf50b038 2864
5f6a1245 2865
46f59e89
S
2866def unified_timestamp(date_str, day_first=True):
2867 if date_str is None:
2868 return None
2869
2ae2ffda 2870 date_str = re.sub(r'[,|]', '', date_str)
46f59e89 2871
7dc2a74e 2872 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89
S
2873 timezone, date_str = extract_timezone(date_str)
2874
2875 # Remove AM/PM + timezone
2876 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2877
deef3195
S
2878 # Remove unrecognized timezones from ISO 8601 alike timestamps
2879 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
2880 if m:
2881 date_str = date_str[:-len(m.group('tz'))]
2882
f226880c
PH
2883 # Python only supports microseconds, so remove nanoseconds
2884 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
2885 if m:
2886 date_str = m.group(1)
2887
46f59e89
S
2888 for expression in date_formats(day_first):
2889 try:
7dc2a74e 2890 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
46f59e89
S
2891 return calendar.timegm(dt.timetuple())
2892 except ValueError:
2893 pass
2894 timetuple = email.utils.parsedate_tz(date_str)
2895 if timetuple:
7dc2a74e 2896 return calendar.timegm(timetuple) + pm_delta * 3600
46f59e89
S
2897
2898
28e614de 2899def determine_ext(url, default_ext='unknown_video'):
85750f89 2900 if url is None or '.' not in url:
f4776371 2901 return default_ext
9cb9a5df 2902 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
2903 if re.match(r'^[A-Za-z0-9]+$', guess):
2904 return guess
a7aaa398
S
2905 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
2906 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 2907 return guess.rstrip('/')
73e79f2a 2908 else:
cbdbb766 2909 return default_ext
73e79f2a 2910
5f6a1245 2911
824fa511
S
2912def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
2913 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
d4051a8e 2914
5f6a1245 2915
bd558525 2916def date_from_str(date_str):
37254abc
JMF
2917 """
2918 Return a datetime object from a string in the format YYYYMMDD or
2919 (now|today)[+-][0-9](day|week|month|year)(s)?"""
2920 today = datetime.date.today()
f8795e10 2921 if date_str in ('now', 'today'):
37254abc 2922 return today
f8795e10
PH
2923 if date_str == 'yesterday':
2924 return today - datetime.timedelta(days=1)
ec85ded8 2925 match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
37254abc
JMF
2926 if match is not None:
2927 sign = match.group('sign')
2928 time = int(match.group('time'))
2929 if sign == '-':
2930 time = -time
2931 unit = match.group('unit')
dfb1b146 2932 # A bad approximation?
37254abc
JMF
2933 if unit == 'month':
2934 unit = 'day'
2935 time *= 30
2936 elif unit == 'year':
2937 unit = 'day'
2938 time *= 365
2939 unit += 's'
2940 delta = datetime.timedelta(**{unit: time})
2941 return today + delta
611c1dd9 2942 return datetime.datetime.strptime(date_str, '%Y%m%d').date()
5f6a1245
JW
2943
2944
e63fc1be 2945def hyphenate_date(date_str):
2946 """
2947 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
2948 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
2949 if match is not None:
2950 return '-'.join(match.groups())
2951 else:
2952 return date_str
2953
5f6a1245 2954
bd558525
JMF
2955class DateRange(object):
2956 """Represents a time interval between two dates"""
5f6a1245 2957
bd558525
JMF
2958 def __init__(self, start=None, end=None):
2959 """start and end must be strings in the format accepted by date"""
2960 if start is not None:
2961 self.start = date_from_str(start)
2962 else:
2963 self.start = datetime.datetime.min.date()
2964 if end is not None:
2965 self.end = date_from_str(end)
2966 else:
2967 self.end = datetime.datetime.max.date()
37254abc 2968 if self.start > self.end:
bd558525 2969 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 2970
bd558525
JMF
2971 @classmethod
2972 def day(cls, day):
2973 """Returns a range that only contains the given day"""
5f6a1245
JW
2974 return cls(day, day)
2975
bd558525
JMF
2976 def __contains__(self, date):
2977 """Check if the date is in the range"""
37254abc
JMF
2978 if not isinstance(date, datetime.date):
2979 date = date_from_str(date)
2980 return self.start <= date <= self.end
5f6a1245 2981
bd558525 2982 def __str__(self):
5f6a1245 2983 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
2984
2985
2986def platform_name():
2987 """ Returns the platform name as a compat_str """
2988 res = platform.platform()
2989 if isinstance(res, bytes):
2990 res = res.decode(preferredencoding())
2991
2992 assert isinstance(res, compat_str)
2993 return res
c257baff
PH
2994
2995
b58ddb32
PH
2996def _windows_write_string(s, out):
2997 """ Returns True if the string was written using special methods,
2998 False if it has yet to be written out."""
2999 # Adapted from http://stackoverflow.com/a/3259271/35070
3000
3001 import ctypes
3002 import ctypes.wintypes
3003
3004 WIN_OUTPUT_IDS = {
3005 1: -11,
3006 2: -12,
3007 }
3008
a383a98a
PH
3009 try:
3010 fileno = out.fileno()
3011 except AttributeError:
3012 # If the output stream doesn't have a fileno, it's virtual
3013 return False
aa42e873
PH
3014 except io.UnsupportedOperation:
3015 # Some strange Windows pseudo files?
3016 return False
b58ddb32
PH
3017 if fileno not in WIN_OUTPUT_IDS:
3018 return False
3019
d7cd9a9e 3020 GetStdHandle = compat_ctypes_WINFUNCTYPE(
b58ddb32 3021 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
d7cd9a9e 3022 ('GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
3023 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3024
d7cd9a9e 3025 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3026 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3027 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
d7cd9a9e 3028 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
3029 written = ctypes.wintypes.DWORD(0)
3030
d7cd9a9e 3031 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
3032 FILE_TYPE_CHAR = 0x0002
3033 FILE_TYPE_REMOTE = 0x8000
d7cd9a9e 3034 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3035 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3036 ctypes.POINTER(ctypes.wintypes.DWORD))(
d7cd9a9e 3037 ('GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
3038 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3039
3040 def not_a_console(handle):
3041 if handle == INVALID_HANDLE_VALUE or handle is None:
3042 return True
3089bc74
S
3043 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3044 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
3045
3046 if not_a_console(h):
3047 return False
3048
d1b9c912
PH
3049 def next_nonbmp_pos(s):
3050 try:
3051 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3052 except StopIteration:
3053 return len(s)
3054
3055 while s:
3056 count = min(next_nonbmp_pos(s), 1024)
3057
b58ddb32 3058 ret = WriteConsoleW(
d1b9c912 3059 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
3060 if ret == 0:
3061 raise OSError('Failed to write string')
d1b9c912
PH
3062 if not count: # We just wrote a non-BMP character
3063 assert written.value == 2
3064 s = s[1:]
3065 else:
3066 assert written.value > 0
3067 s = s[written.value:]
b58ddb32
PH
3068 return True
3069
3070
734f90bb 3071def write_string(s, out=None, encoding=None):
7459e3a2
PH
3072 if out is None:
3073 out = sys.stderr
8bf48f23 3074 assert type(s) == compat_str
7459e3a2 3075
b58ddb32
PH
3076 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3077 if _windows_write_string(s, out):
3078 return
3079
3089bc74
S
3080 if ('b' in getattr(out, 'mode', '')
3081 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
3082 byt = s.encode(encoding or preferredencoding(), 'ignore')
3083 out.write(byt)
3084 elif hasattr(out, 'buffer'):
3085 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3086 byt = s.encode(enc, 'ignore')
3087 out.buffer.write(byt)
3088 else:
8bf48f23 3089 out.write(s)
7459e3a2
PH
3090 out.flush()
3091
3092
48ea9cea
PH
3093def bytes_to_intlist(bs):
3094 if not bs:
3095 return []
3096 if isinstance(bs[0], int): # Python 3
3097 return list(bs)
3098 else:
3099 return [ord(c) for c in bs]
3100
c257baff 3101
cba892fa 3102def intlist_to_bytes(xs):
3103 if not xs:
3104 return b''
edaa23f8 3105 return compat_struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
3106
3107
c1c9a79c
PH
3108# Cross-platform file locking
3109if sys.platform == 'win32':
3110 import ctypes.wintypes
3111 import msvcrt
3112
3113 class OVERLAPPED(ctypes.Structure):
3114 _fields_ = [
3115 ('Internal', ctypes.wintypes.LPVOID),
3116 ('InternalHigh', ctypes.wintypes.LPVOID),
3117 ('Offset', ctypes.wintypes.DWORD),
3118 ('OffsetHigh', ctypes.wintypes.DWORD),
3119 ('hEvent', ctypes.wintypes.HANDLE),
3120 ]
3121
3122 kernel32 = ctypes.windll.kernel32
3123 LockFileEx = kernel32.LockFileEx
3124 LockFileEx.argtypes = [
3125 ctypes.wintypes.HANDLE, # hFile
3126 ctypes.wintypes.DWORD, # dwFlags
3127 ctypes.wintypes.DWORD, # dwReserved
3128 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3129 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3130 ctypes.POINTER(OVERLAPPED) # Overlapped
3131 ]
3132 LockFileEx.restype = ctypes.wintypes.BOOL
3133 UnlockFileEx = kernel32.UnlockFileEx
3134 UnlockFileEx.argtypes = [
3135 ctypes.wintypes.HANDLE, # hFile
3136 ctypes.wintypes.DWORD, # dwReserved
3137 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3138 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3139 ctypes.POINTER(OVERLAPPED) # Overlapped
3140 ]
3141 UnlockFileEx.restype = ctypes.wintypes.BOOL
3142 whole_low = 0xffffffff
3143 whole_high = 0x7fffffff
3144
3145 def _lock_file(f, exclusive):
3146 overlapped = OVERLAPPED()
3147 overlapped.Offset = 0
3148 overlapped.OffsetHigh = 0
3149 overlapped.hEvent = 0
3150 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3151 handle = msvcrt.get_osfhandle(f.fileno())
3152 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3153 whole_low, whole_high, f._lock_file_overlapped_p):
3154 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3155
3156 def _unlock_file(f):
3157 assert f._lock_file_overlapped_p
3158 handle = msvcrt.get_osfhandle(f.fileno())
3159 if not UnlockFileEx(handle, 0,
3160 whole_low, whole_high, f._lock_file_overlapped_p):
3161 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3162
3163else:
399a76e6
YCH
3164 # Some platforms, such as Jython, is missing fcntl
3165 try:
3166 import fcntl
c1c9a79c 3167
399a76e6
YCH
3168 def _lock_file(f, exclusive):
3169 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 3170
399a76e6
YCH
3171 def _unlock_file(f):
3172 fcntl.flock(f, fcntl.LOCK_UN)
3173 except ImportError:
3174 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3175
3176 def _lock_file(f, exclusive):
3177 raise IOError(UNSUPPORTED_MSG)
3178
3179 def _unlock_file(f):
3180 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
3181
3182
3183class locked_file(object):
3184 def __init__(self, filename, mode, encoding=None):
3185 assert mode in ['r', 'a', 'w']
3186 self.f = io.open(filename, mode, encoding=encoding)
3187 self.mode = mode
3188
3189 def __enter__(self):
3190 exclusive = self.mode != 'r'
3191 try:
3192 _lock_file(self.f, exclusive)
3193 except IOError:
3194 self.f.close()
3195 raise
3196 return self
3197
3198 def __exit__(self, etype, value, traceback):
3199 try:
3200 _unlock_file(self.f)
3201 finally:
3202 self.f.close()
3203
3204 def __iter__(self):
3205 return iter(self.f)
3206
3207 def write(self, *args):
3208 return self.f.write(*args)
3209
3210 def read(self, *args):
3211 return self.f.read(*args)
4eb7f1d1
JMF
3212
3213
4644ac55
S
3214def get_filesystem_encoding():
3215 encoding = sys.getfilesystemencoding()
3216 return encoding if encoding is not None else 'utf-8'
3217
3218
4eb7f1d1 3219def shell_quote(args):
a6a173c2 3220 quoted_args = []
4644ac55 3221 encoding = get_filesystem_encoding()
a6a173c2
JMF
3222 for a in args:
3223 if isinstance(a, bytes):
3224 # We may get a filename encoded with 'encodeFilename'
3225 a = a.decode(encoding)
aefce8e6 3226 quoted_args.append(compat_shlex_quote(a))
28e614de 3227 return ' '.join(quoted_args)
9d4660ca
PH
3228
3229
3230def smuggle_url(url, data):
3231 """ Pass additional data in a URL for internal use. """
3232
81953d1a
RA
3233 url, idata = unsmuggle_url(url, {})
3234 data.update(idata)
15707c7e 3235 sdata = compat_urllib_parse_urlencode(
28e614de
PH
3236 {'__youtubedl_smuggle': json.dumps(data)})
3237 return url + '#' + sdata
9d4660ca
PH
3238
3239
79f82953 3240def unsmuggle_url(smug_url, default=None):
83e865a3 3241 if '#__youtubedl_smuggle' not in smug_url:
79f82953 3242 return smug_url, default
28e614de
PH
3243 url, _, sdata = smug_url.rpartition('#')
3244 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
3245 data = json.loads(jsond)
3246 return url, data
02dbf93f
PH
3247
3248
02dbf93f
PH
3249def format_bytes(bytes):
3250 if bytes is None:
28e614de 3251 return 'N/A'
02dbf93f
PH
3252 if type(bytes) is str:
3253 bytes = float(bytes)
3254 if bytes == 0.0:
3255 exponent = 0
3256 else:
3257 exponent = int(math.log(bytes, 1024.0))
28e614de 3258 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 3259 converted = float(bytes) / float(1024 ** exponent)
28e614de 3260 return '%.2f%s' % (converted, suffix)
f53c966a 3261
1c088fa8 3262
fb47597b
S
3263def lookup_unit_table(unit_table, s):
3264 units_re = '|'.join(re.escape(u) for u in unit_table)
3265 m = re.match(
782b1b5b 3266 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
fb47597b
S
3267 if not m:
3268 return None
3269 num_str = m.group('num').replace(',', '.')
3270 mult = unit_table[m.group('unit')]
3271 return int(float(num_str) * mult)
3272
3273
be64b5b0
PH
3274def parse_filesize(s):
3275 if s is None:
3276 return None
3277
dfb1b146 3278 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
3279 # but we support those too
3280 _UNIT_TABLE = {
3281 'B': 1,
3282 'b': 1,
70852b47 3283 'bytes': 1,
be64b5b0
PH
3284 'KiB': 1024,
3285 'KB': 1000,
3286 'kB': 1024,
3287 'Kb': 1000,
13585d76 3288 'kb': 1000,
70852b47
YCH
3289 'kilobytes': 1000,
3290 'kibibytes': 1024,
be64b5b0
PH
3291 'MiB': 1024 ** 2,
3292 'MB': 1000 ** 2,
3293 'mB': 1024 ** 2,
3294 'Mb': 1000 ** 2,
13585d76 3295 'mb': 1000 ** 2,
70852b47
YCH
3296 'megabytes': 1000 ** 2,
3297 'mebibytes': 1024 ** 2,
be64b5b0
PH
3298 'GiB': 1024 ** 3,
3299 'GB': 1000 ** 3,
3300 'gB': 1024 ** 3,
3301 'Gb': 1000 ** 3,
13585d76 3302 'gb': 1000 ** 3,
70852b47
YCH
3303 'gigabytes': 1000 ** 3,
3304 'gibibytes': 1024 ** 3,
be64b5b0
PH
3305 'TiB': 1024 ** 4,
3306 'TB': 1000 ** 4,
3307 'tB': 1024 ** 4,
3308 'Tb': 1000 ** 4,
13585d76 3309 'tb': 1000 ** 4,
70852b47
YCH
3310 'terabytes': 1000 ** 4,
3311 'tebibytes': 1024 ** 4,
be64b5b0
PH
3312 'PiB': 1024 ** 5,
3313 'PB': 1000 ** 5,
3314 'pB': 1024 ** 5,
3315 'Pb': 1000 ** 5,
13585d76 3316 'pb': 1000 ** 5,
70852b47
YCH
3317 'petabytes': 1000 ** 5,
3318 'pebibytes': 1024 ** 5,
be64b5b0
PH
3319 'EiB': 1024 ** 6,
3320 'EB': 1000 ** 6,
3321 'eB': 1024 ** 6,
3322 'Eb': 1000 ** 6,
13585d76 3323 'eb': 1000 ** 6,
70852b47
YCH
3324 'exabytes': 1000 ** 6,
3325 'exbibytes': 1024 ** 6,
be64b5b0
PH
3326 'ZiB': 1024 ** 7,
3327 'ZB': 1000 ** 7,
3328 'zB': 1024 ** 7,
3329 'Zb': 1000 ** 7,
13585d76 3330 'zb': 1000 ** 7,
70852b47
YCH
3331 'zettabytes': 1000 ** 7,
3332 'zebibytes': 1024 ** 7,
be64b5b0
PH
3333 'YiB': 1024 ** 8,
3334 'YB': 1000 ** 8,
3335 'yB': 1024 ** 8,
3336 'Yb': 1000 ** 8,
13585d76 3337 'yb': 1000 ** 8,
70852b47
YCH
3338 'yottabytes': 1000 ** 8,
3339 'yobibytes': 1024 ** 8,
be64b5b0
PH
3340 }
3341
fb47597b
S
3342 return lookup_unit_table(_UNIT_TABLE, s)
3343
3344
3345def parse_count(s):
3346 if s is None:
be64b5b0
PH
3347 return None
3348
fb47597b
S
3349 s = s.strip()
3350
3351 if re.match(r'^[\d,.]+$', s):
3352 return str_to_int(s)
3353
3354 _UNIT_TABLE = {
3355 'k': 1000,
3356 'K': 1000,
3357 'm': 1000 ** 2,
3358 'M': 1000 ** 2,
3359 'kk': 1000 ** 2,
3360 'KK': 1000 ** 2,
3361 }
be64b5b0 3362
fb47597b 3363 return lookup_unit_table(_UNIT_TABLE, s)
be64b5b0 3364
2f7ae819 3365
b871d7e9
S
3366def parse_resolution(s):
3367 if s is None:
3368 return {}
3369
3370 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3371 if mobj:
3372 return {
3373 'width': int(mobj.group('w')),
3374 'height': int(mobj.group('h')),
3375 }
3376
3377 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3378 if mobj:
3379 return {'height': int(mobj.group(1))}
3380
3381 mobj = re.search(r'\b([48])[kK]\b', s)
3382 if mobj:
3383 return {'height': int(mobj.group(1)) * 540}
3384
3385 return {}
3386
3387
0dc41787
S
3388def parse_bitrate(s):
3389 if not isinstance(s, compat_str):
3390 return
3391 mobj = re.search(r'\b(\d+)\s*kbps', s)
3392 if mobj:
3393 return int(mobj.group(1))
3394
3395
a942d6cb 3396def month_by_name(name, lang='en'):
caefb1de
PH
3397 """ Return the number of a month by (locale-independently) English name """
3398
f6717dec 3399 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb 3400
caefb1de 3401 try:
f6717dec 3402 return month_names.index(name) + 1
7105440c
YCH
3403 except ValueError:
3404 return None
3405
3406
3407def month_by_abbreviation(abbrev):
3408 """ Return the number of a month by (locale-independently) English
3409 abbreviations """
3410
3411 try:
3412 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
3413 except ValueError:
3414 return None
18258362
JMF
3415
3416
5aafe895 3417def fix_xml_ampersands(xml_str):
18258362 3418 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
3419 return re.sub(
3420 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 3421 '&amp;',
5aafe895 3422 xml_str)
e3946f98
PH
3423
3424
3425def setproctitle(title):
8bf48f23 3426 assert isinstance(title, compat_str)
c1c05c67
YCH
3427
3428 # ctypes in Jython is not complete
3429 # http://bugs.jython.org/issue2148
3430 if sys.platform.startswith('java'):
3431 return
3432
e3946f98 3433 try:
611c1dd9 3434 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
3435 except OSError:
3436 return
2f49bcd6
RC
3437 except TypeError:
3438 # LoadLibrary in Windows Python 2.7.13 only expects
3439 # a bytestring, but since unicode_literals turns
3440 # every string into a unicode string, it fails.
3441 return
6eefe533
PH
3442 title_bytes = title.encode('utf-8')
3443 buf = ctypes.create_string_buffer(len(title_bytes))
3444 buf.value = title_bytes
e3946f98 3445 try:
6eefe533 3446 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
3447 except AttributeError:
3448 return # Strange libc, just skip this
d7dda168
PH
3449
3450
3451def remove_start(s, start):
46bc9b7d 3452 return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174
PH
3453
3454
2b9faf55 3455def remove_end(s, end):
46bc9b7d 3456 return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55
PH
3457
3458
31b2051e
S
3459def remove_quotes(s):
3460 if s is None or len(s) < 2:
3461 return s
3462 for quote in ('"', "'", ):
3463 if s[0] == quote and s[-1] == quote:
3464 return s[1:-1]
3465 return s
3466
3467
29eb5174 3468def url_basename(url):
9b8aaeed 3469 path = compat_urlparse.urlparse(url).path
28e614de 3470 return path.strip('/').split('/')[-1]
aa94a6d3
PH
3471
3472
02dc0a36
S
3473def base_url(url):
3474 return re.match(r'https?://[^?#&]+/', url).group()
3475
3476
e34c3361 3477def urljoin(base, path):
4b5de77b
S
3478 if isinstance(path, bytes):
3479 path = path.decode('utf-8')
e34c3361
S
3480 if not isinstance(path, compat_str) or not path:
3481 return None
fad4ceb5 3482 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
e34c3361 3483 return path
4b5de77b
S
3484 if isinstance(base, bytes):
3485 base = base.decode('utf-8')
3486 if not isinstance(base, compat_str) or not re.match(
3487 r'^(?:https?:)?//', base):
e34c3361
S
3488 return None
3489 return compat_urlparse.urljoin(base, path)
3490
3491
aa94a6d3
PH
3492class HEADRequest(compat_urllib_request.Request):
3493 def get_method(self):
611c1dd9 3494 return 'HEAD'
7217e148
PH
3495
3496
95cf60e8
S
3497class PUTRequest(compat_urllib_request.Request):
3498 def get_method(self):
3499 return 'PUT'
3500
3501
9732d77e 3502def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
3503 if get_attr:
3504 if v is not None:
3505 v = getattr(v, get_attr, None)
9572013d
PH
3506 if v == '':
3507 v = None
1812afb7
S
3508 if v is None:
3509 return default
3510 try:
3511 return int(v) * invscale // scale
5e1271c5 3512 except (ValueError, TypeError):
af98f8ff 3513 return default
9732d77e 3514
9572013d 3515
40a90862
JMF
3516def str_or_none(v, default=None):
3517 return default if v is None else compat_str(v)
3518
9732d77e
PH
3519
3520def str_to_int(int_str):
48d4681e 3521 """ A more relaxed version of int_or_none """
9732d77e
PH
3522 if int_str is None:
3523 return None
28e614de 3524 int_str = re.sub(r'[,\.\+]', '', int_str)
9732d77e 3525 return int(int_str)
608d11f5
PH
3526
3527
9732d77e 3528def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
3529 if v is None:
3530 return default
3531 try:
3532 return float(v) * invscale / scale
5e1271c5 3533 except (ValueError, TypeError):
caf80631 3534 return default
43f775e4
PH
3535
3536
c7e327c4
S
3537def bool_or_none(v, default=None):
3538 return v if isinstance(v, bool) else default
3539
3540
53cd37ba
S
3541def strip_or_none(v, default=None):
3542 return v.strip() if isinstance(v, compat_str) else default
b72b4431
S
3543
3544
af03000a
S
3545def url_or_none(url):
3546 if not url or not isinstance(url, compat_str):
3547 return None
3548 url = url.strip()
3549 return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
3550
3551
608d11f5 3552def parse_duration(s):
8f9312c3 3553 if not isinstance(s, compat_basestring):
608d11f5
PH
3554 return None
3555
ca7b3246
S
3556 s = s.strip()
3557
acaff495 3558 days, hours, mins, secs, ms = [None] * 5
15846398 3559 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
acaff495 3560 if m:
3561 days, hours, mins, secs, ms = m.groups()
3562 else:
3563 m = re.match(
056653bb
S
3564 r'''(?ix)(?:P?
3565 (?:
3566 [0-9]+\s*y(?:ears?)?\s*
3567 )?
3568 (?:
3569 [0-9]+\s*m(?:onths?)?\s*
3570 )?
3571 (?:
3572 [0-9]+\s*w(?:eeks?)?\s*
3573 )?
8f4b58d7 3574 (?:
acaff495 3575 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
8f4b58d7 3576 )?
056653bb 3577 T)?
acaff495 3578 (?:
3579 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3580 )?
3581 (?:
3582 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3583 )?
3584 (?:
3585 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
15846398 3586 )?Z?$''', s)
acaff495 3587 if m:
3588 days, hours, mins, secs, ms = m.groups()
3589 else:
15846398 3590 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
acaff495 3591 if m:
3592 hours, mins = m.groups()
3593 else:
3594 return None
3595
3596 duration = 0
3597 if secs:
3598 duration += float(secs)
3599 if mins:
3600 duration += float(mins) * 60
3601 if hours:
3602 duration += float(hours) * 60 * 60
3603 if days:
3604 duration += float(days) * 24 * 60 * 60
3605 if ms:
3606 duration += float(ms)
3607 return duration
91d7d0b3
JMF
3608
3609
e65e4c88 3610def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 3611 name, real_ext = os.path.splitext(filename)
e65e4c88
S
3612 return (
3613 '{0}.{1}{2}'.format(name, ext, real_ext)
3614 if not expected_real_ext or real_ext[1:] == expected_real_ext
3615 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
3616
3617
b3ed15b7
S
3618def replace_extension(filename, ext, expected_real_ext=None):
3619 name, real_ext = os.path.splitext(filename)
3620 return '{0}.{1}'.format(
3621 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3622 ext)
3623
3624
d70ad093
PH
3625def check_executable(exe, args=[]):
3626 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3627 args can be a list of arguments for a short output (like -version) """
3628 try:
3629 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
3630 except OSError:
3631 return False
3632 return exe
b7ab0590
PH
3633
3634
95807118 3635def get_exe_version(exe, args=['--version'],
cae97f65 3636 version_re=None, unrecognized='present'):
95807118
PH
3637 """ Returns the version of the specified executable,
3638 or False if the executable is not present """
3639 try:
b64d04c1
YCH
3640 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3641 # SIGTTOU if youtube-dl is run in the background.
067aa17e 3642 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
cae97f65 3643 out, _ = subprocess.Popen(
54116803 3644 [encodeArgument(exe)] + args,
00ca7552 3645 stdin=subprocess.PIPE,
95807118
PH
3646 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
3647 except OSError:
3648 return False
cae97f65
PH
3649 if isinstance(out, bytes): # Python 2.x
3650 out = out.decode('ascii', 'ignore')
3651 return detect_exe_version(out, version_re, unrecognized)
3652
3653
3654def detect_exe_version(output, version_re=None, unrecognized='present'):
3655 assert isinstance(output, compat_str)
3656 if version_re is None:
3657 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3658 m = re.search(version_re, output)
95807118
PH
3659 if m:
3660 return m.group(1)
3661 else:
3662 return unrecognized
3663
3664
b7ab0590 3665class PagedList(object):
dd26ced1
PH
3666 def __len__(self):
3667 # This is only useful for tests
3668 return len(self.getslice())
3669
9c44d242
PH
3670
3671class OnDemandPagedList(PagedList):
6be08ce6 3672 def __init__(self, pagefunc, pagesize, use_cache=True):
9c44d242
PH
3673 self._pagefunc = pagefunc
3674 self._pagesize = pagesize
b95dc034
YCH
3675 self._use_cache = use_cache
3676 if use_cache:
3677 self._cache = {}
9c44d242 3678
b7ab0590
PH
3679 def getslice(self, start=0, end=None):
3680 res = []
3681 for pagenum in itertools.count(start // self._pagesize):
3682 firstid = pagenum * self._pagesize
3683 nextfirstid = pagenum * self._pagesize + self._pagesize
3684 if start >= nextfirstid:
3685 continue
3686
b95dc034
YCH
3687 page_results = None
3688 if self._use_cache:
3689 page_results = self._cache.get(pagenum)
3690 if page_results is None:
3691 page_results = list(self._pagefunc(pagenum))
3692 if self._use_cache:
3693 self._cache[pagenum] = page_results
b7ab0590
PH
3694
3695 startv = (
3696 start % self._pagesize
3697 if firstid <= start < nextfirstid
3698 else 0)
3699
3700 endv = (
3701 ((end - 1) % self._pagesize) + 1
3702 if (end is not None and firstid <= end <= nextfirstid)
3703 else None)
3704
3705 if startv != 0 or endv is not None:
3706 page_results = page_results[startv:endv]
3707 res.extend(page_results)
3708
3709 # A little optimization - if current page is not "full", ie. does
3710 # not contain page_size videos then we can assume that this page
3711 # is the last one - there are no more ids on further pages -
3712 # i.e. no need to query again.
3713 if len(page_results) + startv < self._pagesize:
3714 break
3715
3716 # If we got the whole page, but the next page is not interesting,
3717 # break out early as well
3718 if end == nextfirstid:
3719 break
3720 return res
81c2f20b
PH
3721
3722
9c44d242
PH
3723class InAdvancePagedList(PagedList):
3724 def __init__(self, pagefunc, pagecount, pagesize):
3725 self._pagefunc = pagefunc
3726 self._pagecount = pagecount
3727 self._pagesize = pagesize
3728
3729 def getslice(self, start=0, end=None):
3730 res = []
3731 start_page = start // self._pagesize
3732 end_page = (
3733 self._pagecount if end is None else (end // self._pagesize + 1))
3734 skip_elems = start - start_page * self._pagesize
3735 only_more = None if end is None else end - start
3736 for pagenum in range(start_page, end_page):
3737 page = list(self._pagefunc(pagenum))
3738 if skip_elems:
3739 page = page[skip_elems:]
3740 skip_elems = None
3741 if only_more is not None:
3742 if len(page) < only_more:
3743 only_more -= len(page)
3744 else:
3745 page = page[:only_more]
3746 res.extend(page)
3747 break
3748 res.extend(page)
3749 return res
3750
3751
81c2f20b 3752def uppercase_escape(s):
676eb3f2 3753 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 3754 return re.sub(
a612753d 3755 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
3756 lambda m: unicode_escape(m.group(0))[0],
3757 s)
0fe2ff78
YCH
3758
3759
3760def lowercase_escape(s):
3761 unicode_escape = codecs.getdecoder('unicode_escape')
3762 return re.sub(
3763 r'\\u[0-9a-fA-F]{4}',
3764 lambda m: unicode_escape(m.group(0))[0],
3765 s)
b53466e1 3766
d05cfe06
S
3767
3768def escape_rfc3986(s):
3769 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 3770 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 3771 s = s.encode('utf-8')
ecc0c5ee 3772 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
3773
3774
3775def escape_url(url):
3776 """Escape URL as suggested by RFC 3986"""
3777 url_parsed = compat_urllib_parse_urlparse(url)
3778 return url_parsed._replace(
efbed08d 3779 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
d05cfe06
S
3780 path=escape_rfc3986(url_parsed.path),
3781 params=escape_rfc3986(url_parsed.params),
3782 query=escape_rfc3986(url_parsed.query),
3783 fragment=escape_rfc3986(url_parsed.fragment)
3784 ).geturl()
3785
62e609ab
PH
3786
3787def read_batch_urls(batch_fd):
3788 def fixup(url):
3789 if not isinstance(url, compat_str):
3790 url = url.decode('utf-8', 'replace')
28e614de 3791 BOM_UTF8 = '\xef\xbb\xbf'
62e609ab
PH
3792 if url.startswith(BOM_UTF8):
3793 url = url[len(BOM_UTF8):]
3794 url = url.strip()
3795 if url.startswith(('#', ';', ']')):
3796 return False
3797 return url
3798
3799 with contextlib.closing(batch_fd) as fd:
3800 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
3801
3802
3803def urlencode_postdata(*args, **kargs):
15707c7e 3804 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
3805
3806
38f9ef31 3807def update_url_query(url, query):
cacd9966
YCH
3808 if not query:
3809 return url
38f9ef31 3810 parsed_url = compat_urlparse.urlparse(url)
3811 qs = compat_parse_qs(parsed_url.query)
3812 qs.update(query)
3813 return compat_urlparse.urlunparse(parsed_url._replace(
15707c7e 3814 query=compat_urllib_parse_urlencode(qs, True)))
16392824 3815
8e60dc75 3816
ed0291d1
S
3817def update_Request(req, url=None, data=None, headers={}, query={}):
3818 req_headers = req.headers.copy()
3819 req_headers.update(headers)
3820 req_data = data or req.data
3821 req_url = update_url_query(url or req.get_full_url(), query)
95cf60e8
S
3822 req_get_method = req.get_method()
3823 if req_get_method == 'HEAD':
3824 req_type = HEADRequest
3825 elif req_get_method == 'PUT':
3826 req_type = PUTRequest
3827 else:
3828 req_type = compat_urllib_request.Request
ed0291d1
S
3829 new_req = req_type(
3830 req_url, data=req_data, headers=req_headers,
3831 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3832 if hasattr(req, 'timeout'):
3833 new_req.timeout = req.timeout
3834 return new_req
3835
3836
10c87c15 3837def _multipart_encode_impl(data, boundary):
0c265486
YCH
3838 content_type = 'multipart/form-data; boundary=%s' % boundary
3839
3840 out = b''
3841 for k, v in data.items():
3842 out += b'--' + boundary.encode('ascii') + b'\r\n'
3843 if isinstance(k, compat_str):
3844 k = k.encode('utf-8')
3845 if isinstance(v, compat_str):
3846 v = v.encode('utf-8')
3847 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3848 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
b2ad479d 3849 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
0c265486
YCH
3850 if boundary.encode('ascii') in content:
3851 raise ValueError('Boundary overlaps with data')
3852 out += content
3853
3854 out += b'--' + boundary.encode('ascii') + b'--\r\n'
3855
3856 return out, content_type
3857
3858
3859def multipart_encode(data, boundary=None):
3860 '''
3861 Encode a dict to RFC 7578-compliant form-data
3862
3863 data:
3864 A dict where keys and values can be either Unicode or bytes-like
3865 objects.
3866 boundary:
3867 If specified a Unicode object, it's used as the boundary. Otherwise
3868 a random boundary is generated.
3869
3870 Reference: https://tools.ietf.org/html/rfc7578
3871 '''
3872 has_specified_boundary = boundary is not None
3873
3874 while True:
3875 if boundary is None:
3876 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3877
3878 try:
10c87c15 3879 out, content_type = _multipart_encode_impl(data, boundary)
0c265486
YCH
3880 break
3881 except ValueError:
3882 if has_specified_boundary:
3883 raise
3884 boundary = None
3885
3886 return out, content_type
3887
3888
86296ad2 3889def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
3890 if isinstance(key_or_keys, (list, tuple)):
3891 for key in key_or_keys:
86296ad2
S
3892 if key not in d or d[key] is None or skip_false_values and not d[key]:
3893 continue
3894 return d[key]
cbecc9b9
S
3895 return default
3896 return d.get(key_or_keys, default)
3897
3898
329ca3be 3899def try_get(src, getter, expected_type=None):
a32a9a7e
S
3900 if not isinstance(getter, (list, tuple)):
3901 getter = [getter]
3902 for get in getter:
3903 try:
3904 v = get(src)
3905 except (AttributeError, KeyError, TypeError, IndexError):
3906 pass
3907 else:
3908 if expected_type is None or isinstance(v, expected_type):
3909 return v
329ca3be
S
3910
3911
6cc62232
S
3912def merge_dicts(*dicts):
3913 merged = {}
3914 for a_dict in dicts:
3915 for k, v in a_dict.items():
3916 if v is None:
3917 continue
3089bc74
S
3918 if (k not in merged
3919 or (isinstance(v, compat_str) and v
3920 and isinstance(merged[k], compat_str)
3921 and not merged[k])):
6cc62232
S
3922 merged[k] = v
3923 return merged
3924
3925
8e60dc75
S
3926def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
3927 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
3928
16392824 3929
a1a530b0
PH
3930US_RATINGS = {
3931 'G': 0,
3932 'PG': 10,
3933 'PG-13': 13,
3934 'R': 16,
3935 'NC': 18,
3936}
fac55558
PH
3937
3938
a8795327 3939TV_PARENTAL_GUIDELINES = {
5a16c9d9
RA
3940 'TV-Y': 0,
3941 'TV-Y7': 7,
3942 'TV-G': 0,
3943 'TV-PG': 0,
3944 'TV-14': 14,
3945 'TV-MA': 17,
a8795327
S
3946}
3947
3948
146c80e2 3949def parse_age_limit(s):
a8795327
S
3950 if type(s) == int:
3951 return s if 0 <= s <= 21 else None
3952 if not isinstance(s, compat_basestring):
d838b1bd 3953 return None
146c80e2 3954 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327
S
3955 if m:
3956 return int(m.group('age'))
3957 if s in US_RATINGS:
3958 return US_RATINGS[s]
5a16c9d9 3959 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
b8361187 3960 if m:
5a16c9d9 3961 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
b8361187 3962 return None
146c80e2
S
3963
3964
fac55558 3965def strip_jsonp(code):
609a61e3 3966 return re.sub(
5552c9eb 3967 r'''(?sx)^
e9c671d5 3968 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
5552c9eb
YCH
3969 (?:\s*&&\s*(?P=func_name))?
3970 \s*\(\s*(?P<callback_data>.*)\);?
3971 \s*?(?://[^\n]*)*$''',
3972 r'\g<callback_data>', code)
478c2c61
PH
3973
3974
e05f6939 3975def js_to_json(code):
4195096e
S
3976 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
3977 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
3978 INTEGER_TABLE = (
3979 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
3980 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
3981 )
3982
e05f6939 3983 def fix_kv(m):
e7b6d122
PH
3984 v = m.group(0)
3985 if v in ('true', 'false', 'null'):
3986 return v
b3ee552e 3987 elif v.startswith('/*') or v.startswith('//') or v == ',':
bd1e4844 3988 return ""
3989
3990 if v[0] in ("'", '"'):
3991 v = re.sub(r'(?s)\\.|"', lambda m: {
e7b6d122 3992 '"': '\\"',
bd1e4844 3993 "\\'": "'",
3994 '\\\n': '',
3995 '\\x': '\\u00',
3996 }.get(m.group(0), m.group(0)), v[1:-1])
3997
89ac4a19
S
3998 for regex, base in INTEGER_TABLE:
3999 im = re.match(regex, v)
4000 if im:
e4659b45 4001 i = int(im.group(1), base)
89ac4a19
S
4002 return '"%d":' % i if v.endswith(':') else '%d' % i
4003
e7b6d122 4004 return '"%s"' % v
e05f6939 4005
bd1e4844 4006 return re.sub(r'''(?sx)
4007 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4008 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4195096e 4009 {comment}|,(?={skip}[\]}}])|
c384d537 4010 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4195096e
S
4011 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4012 [0-9]+(?={skip}:)
4013 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
e05f6939
PH
4014
4015
478c2c61
PH
4016def qualities(quality_ids):
4017 """ Get a numeric quality value out of a list of possible values """
4018 def q(qid):
4019 try:
4020 return quality_ids.index(qid)
4021 except ValueError:
4022 return -1
4023 return q
4024
acd69589
PH
4025
4026DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
0a871f68 4027
a020a0dc
PH
4028
4029def limit_length(s, length):
4030 """ Add ellipses to overly long strings """
4031 if s is None:
4032 return None
4033 ELLIPSES = '...'
4034 if len(s) > length:
4035 return s[:length - len(ELLIPSES)] + ELLIPSES
4036 return s
48844745
PH
4037
4038
4039def version_tuple(v):
5f9b8394 4040 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
4041
4042
4043def is_outdated_version(version, limit, assume_new=True):
4044 if not version:
4045 return not assume_new
4046 try:
4047 return version_tuple(version) < version_tuple(limit)
4048 except ValueError:
4049 return not assume_new
732ea2f0
PH
4050
4051
4052def ytdl_is_updateable():
4053 """ Returns if youtube-dl can be updated with -U """
4054 from zipimport import zipimporter
4055
4056 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
4057
4058
4059def args_to_str(args):
4060 # Get a short string representation for a subprocess command
702ccf2d 4061 return ' '.join(compat_shlex_quote(a) for a in args)
2ccd1b10
PH
4062
4063
9b9c5355 4064def error_to_compat_str(err):
fdae2358
S
4065 err_str = str(err)
4066 # On python 2 error byte string must be decoded with proper
4067 # encoding rather than ascii
4068 if sys.version_info[0] < 3:
4069 err_str = err_str.decode(preferredencoding())
4070 return err_str
4071
4072
c460bdd5 4073def mimetype2ext(mt):
eb9ee194
S
4074 if mt is None:
4075 return None
4076
765ac263
JMF
4077 ext = {
4078 'audio/mp4': 'm4a',
6c33d24b
YCH
4079 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4080 # it's the most popular one
4081 'audio/mpeg': 'mp3',
765ac263
JMF
4082 }.get(mt)
4083 if ext is not None:
4084 return ext
4085
c460bdd5 4086 _, _, res = mt.rpartition('/')
6562d34a 4087 res = res.split(';')[0].strip().lower()
c460bdd5
PH
4088
4089 return {
f6861ec9 4090 '3gpp': '3gp',
cafcf657 4091 'smptett+xml': 'tt',
cafcf657 4092 'ttaf+xml': 'dfxp',
a0d8d704 4093 'ttml+xml': 'ttml',
f6861ec9 4094 'x-flv': 'flv',
a0d8d704 4095 'x-mp4-fragmented': 'mp4',
d4f05d47 4096 'x-ms-sami': 'sami',
a0d8d704 4097 'x-ms-wmv': 'wmv',
b4173f15
RA
4098 'mpegurl': 'm3u8',
4099 'x-mpegurl': 'm3u8',
4100 'vnd.apple.mpegurl': 'm3u8',
4101 'dash+xml': 'mpd',
b4173f15 4102 'f4m+xml': 'f4m',
f164b971 4103 'hds+xml': 'f4m',
e910fe2f 4104 'vnd.ms-sstr+xml': 'ism',
c2b2c7e1 4105 'quicktime': 'mov',
98ce1a3f 4106 'mp2t': 'ts',
c460bdd5
PH
4107 }.get(res, res)
4108
4109
4f3c5e06 4110def parse_codecs(codecs_str):
4111 # http://tools.ietf.org/html/rfc6381
4112 if not codecs_str:
4113 return {}
4114 splited_codecs = list(filter(None, map(
4115 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4116 vcodec, acodec = None, None
4117 for full_codec in splited_codecs:
4118 codec = full_codec.split('.')[0]
28cc2241 4119 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4f3c5e06 4120 if not vcodec:
4121 vcodec = full_codec
60f5c9fb 4122 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4f3c5e06 4123 if not acodec:
4124 acodec = full_codec
4125 else:
60f5c9fb 4126 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4f3c5e06 4127 if not vcodec and not acodec:
4128 if len(splited_codecs) == 2:
4129 return {
28cc2241
S
4130 'vcodec': splited_codecs[0],
4131 'acodec': splited_codecs[1],
4f3c5e06 4132 }
4133 else:
4134 return {
4135 'vcodec': vcodec or 'none',
4136 'acodec': acodec or 'none',
4137 }
4138 return {}
4139
4140
2ccd1b10 4141def urlhandle_detect_ext(url_handle):
79298173 4142 getheader = url_handle.headers.get
2ccd1b10 4143
b55ee18f
PH
4144 cd = getheader('Content-Disposition')
4145 if cd:
4146 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4147 if m:
4148 e = determine_ext(m.group('filename'), default_ext=None)
4149 if e:
4150 return e
4151
c460bdd5 4152 return mimetype2ext(getheader('Content-Type'))
05900629
PH
4153
4154
1e399778
YCH
4155def encode_data_uri(data, mime_type):
4156 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4157
4158
05900629 4159def age_restricted(content_limit, age_limit):
6ec6cb4e 4160 """ Returns True iff the content should be blocked """
05900629
PH
4161
4162 if age_limit is None: # No limit set
4163 return False
4164 if content_limit is None:
4165 return False # Content available for everyone
4166 return age_limit < content_limit
61ca9a80
PH
4167
4168
4169def is_html(first_bytes):
4170 """ Detect whether a file contains HTML by examining its first bytes. """
4171
4172 BOMS = [
4173 (b'\xef\xbb\xbf', 'utf-8'),
4174 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4175 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4176 (b'\xff\xfe', 'utf-16-le'),
4177 (b'\xfe\xff', 'utf-16-be'),
4178 ]
4179 for bom, enc in BOMS:
4180 if first_bytes.startswith(bom):
4181 s = first_bytes[len(bom):].decode(enc, 'replace')
4182 break
4183 else:
4184 s = first_bytes.decode('utf-8', 'replace')
4185
4186 return re.match(r'^\s*<', s)
a055469f
PH
4187
4188
4189def determine_protocol(info_dict):
4190 protocol = info_dict.get('protocol')
4191 if protocol is not None:
4192 return protocol
4193
4194 url = info_dict['url']
4195 if url.startswith('rtmp'):
4196 return 'rtmp'
4197 elif url.startswith('mms'):
4198 return 'mms'
4199 elif url.startswith('rtsp'):
4200 return 'rtsp'
4201
4202 ext = determine_ext(url)
4203 if ext == 'm3u8':
4204 return 'm3u8'
4205 elif ext == 'f4m':
4206 return 'f4m'
4207
4208 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
4209
4210
4211def render_table(header_row, data):
4212 """ Render a list of rows, each as a list of values """
4213 table = [header_row] + data
4214 max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4215 format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
4216 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
4217
4218
4219def _match_one(filter_part, dct):
4220 COMPARISON_OPERATORS = {
4221 '<': operator.lt,
4222 '<=': operator.le,
4223 '>': operator.gt,
4224 '>=': operator.ge,
4225 '=': operator.eq,
4226 '!=': operator.ne,
4227 }
4228 operator_rex = re.compile(r'''(?x)\s*
4229 (?P<key>[a-z_]+)
4230 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4231 (?:
4232 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
db13c16e 4233 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
347de493
PH
4234 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4235 )
4236 \s*$
4237 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4238 m = operator_rex.search(filter_part)
4239 if m:
4240 op = COMPARISON_OPERATORS[m.group('op')]
e5a088dc 4241 actual_value = dct.get(m.group('key'))
3089bc74
S
4242 if (m.group('quotedstrval') is not None
4243 or m.group('strval') is not None
e5a088dc
S
4244 # If the original field is a string and matching comparisonvalue is
4245 # a number we should respect the origin of the original field
4246 # and process comparison value as a string (see
067aa17e 4247 # https://github.com/ytdl-org/youtube-dl/issues/11082).
3089bc74
S
4248 or actual_value is not None and m.group('intval') is not None
4249 and isinstance(actual_value, compat_str)):
347de493
PH
4250 if m.group('op') not in ('=', '!='):
4251 raise ValueError(
4252 'Operator %s does not support string values!' % m.group('op'))
db13c16e
S
4253 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4254 quote = m.group('quote')
4255 if quote is not None:
4256 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
347de493
PH
4257 else:
4258 try:
4259 comparison_value = int(m.group('intval'))
4260 except ValueError:
4261 comparison_value = parse_filesize(m.group('intval'))
4262 if comparison_value is None:
4263 comparison_value = parse_filesize(m.group('intval') + 'B')
4264 if comparison_value is None:
4265 raise ValueError(
4266 'Invalid integer value %r in filter part %r' % (
4267 m.group('intval'), filter_part))
347de493
PH
4268 if actual_value is None:
4269 return m.group('none_inclusive')
4270 return op(actual_value, comparison_value)
4271
4272 UNARY_OPERATORS = {
1cc47c66
S
4273 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4274 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
347de493
PH
4275 }
4276 operator_rex = re.compile(r'''(?x)\s*
4277 (?P<op>%s)\s*(?P<key>[a-z_]+)
4278 \s*$
4279 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4280 m = operator_rex.search(filter_part)
4281 if m:
4282 op = UNARY_OPERATORS[m.group('op')]
4283 actual_value = dct.get(m.group('key'))
4284 return op(actual_value)
4285
4286 raise ValueError('Invalid filter part %r' % filter_part)
4287
4288
4289def match_str(filter_str, dct):
4290 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4291
4292 return all(
4293 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4294
4295
4296def match_filter_func(filter_str):
4297 def _match_func(info_dict):
4298 if match_str(filter_str, info_dict):
4299 return None
4300 else:
4301 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4302 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4303 return _match_func
91410c9b
PH
4304
4305
bf6427d2
YCH
4306def parse_dfxp_time_expr(time_expr):
4307 if not time_expr:
d631d5f9 4308 return
bf6427d2
YCH
4309
4310 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4311 if mobj:
4312 return float(mobj.group('time_offset'))
4313
db2fe38b 4314 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 4315 if mobj:
db2fe38b 4316 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
4317
4318
c1c924ab
YCH
4319def srt_subtitles_timecode(seconds):
4320 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
4321
4322
4323def dfxp2srt(dfxp_data):
3869028f
YCH
4324 '''
4325 @param dfxp_data A bytes-like object containing DFXP data
4326 @returns A unicode object containing converted SRT data
4327 '''
5b995f71 4328 LEGACY_NAMESPACES = (
3869028f
YCH
4329 (b'http://www.w3.org/ns/ttml', [
4330 b'http://www.w3.org/2004/11/ttaf1',
4331 b'http://www.w3.org/2006/04/ttaf1',
4332 b'http://www.w3.org/2006/10/ttaf1',
5b995f71 4333 ]),
3869028f
YCH
4334 (b'http://www.w3.org/ns/ttml#styling', [
4335 b'http://www.w3.org/ns/ttml#style',
5b995f71
RA
4336 ]),
4337 )
4338
4339 SUPPORTED_STYLING = [
4340 'color',
4341 'fontFamily',
4342 'fontSize',
4343 'fontStyle',
4344 'fontWeight',
4345 'textDecoration'
4346 ]
4347
4e335771 4348 _x = functools.partial(xpath_with_ns, ns_map={
261f4730 4349 'xml': 'http://www.w3.org/XML/1998/namespace',
4e335771 4350 'ttml': 'http://www.w3.org/ns/ttml',
5b995f71 4351 'tts': 'http://www.w3.org/ns/ttml#styling',
4e335771 4352 })
bf6427d2 4353
5b995f71
RA
4354 styles = {}
4355 default_style = {}
4356
87de7069 4357 class TTMLPElementParser(object):
5b995f71
RA
4358 _out = ''
4359 _unclosed_elements = []
4360 _applied_styles = []
bf6427d2 4361
2b14cb56 4362 def start(self, tag, attrib):
5b995f71
RA
4363 if tag in (_x('ttml:br'), 'br'):
4364 self._out += '\n'
4365 else:
4366 unclosed_elements = []
4367 style = {}
4368 element_style_id = attrib.get('style')
4369 if default_style:
4370 style.update(default_style)
4371 if element_style_id:
4372 style.update(styles.get(element_style_id, {}))
4373 for prop in SUPPORTED_STYLING:
4374 prop_val = attrib.get(_x('tts:' + prop))
4375 if prop_val:
4376 style[prop] = prop_val
4377 if style:
4378 font = ''
4379 for k, v in sorted(style.items()):
4380 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4381 continue
4382 if k == 'color':
4383 font += ' color="%s"' % v
4384 elif k == 'fontSize':
4385 font += ' size="%s"' % v
4386 elif k == 'fontFamily':
4387 font += ' face="%s"' % v
4388 elif k == 'fontWeight' and v == 'bold':
4389 self._out += '<b>'
4390 unclosed_elements.append('b')
4391 elif k == 'fontStyle' and v == 'italic':
4392 self._out += '<i>'
4393 unclosed_elements.append('i')
4394 elif k == 'textDecoration' and v == 'underline':
4395 self._out += '<u>'
4396 unclosed_elements.append('u')
4397 if font:
4398 self._out += '<font' + font + '>'
4399 unclosed_elements.append('font')
4400 applied_style = {}
4401 if self._applied_styles:
4402 applied_style.update(self._applied_styles[-1])
4403 applied_style.update(style)
4404 self._applied_styles.append(applied_style)
4405 self._unclosed_elements.append(unclosed_elements)
bf6427d2 4406
2b14cb56 4407 def end(self, tag):
5b995f71
RA
4408 if tag not in (_x('ttml:br'), 'br'):
4409 unclosed_elements = self._unclosed_elements.pop()
4410 for element in reversed(unclosed_elements):
4411 self._out += '</%s>' % element
4412 if unclosed_elements and self._applied_styles:
4413 self._applied_styles.pop()
bf6427d2 4414
2b14cb56 4415 def data(self, data):
5b995f71 4416 self._out += data
2b14cb56 4417
4418 def close(self):
5b995f71 4419 return self._out.strip()
2b14cb56 4420
4421 def parse_node(node):
4422 target = TTMLPElementParser()
4423 parser = xml.etree.ElementTree.XMLParser(target=target)
4424 parser.feed(xml.etree.ElementTree.tostring(node))
4425 return parser.close()
bf6427d2 4426
5b995f71
RA
4427 for k, v in LEGACY_NAMESPACES:
4428 for ns in v:
4429 dfxp_data = dfxp_data.replace(ns, k)
4430
3869028f 4431 dfxp = compat_etree_fromstring(dfxp_data)
bf6427d2 4432 out = []
5b995f71 4433 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
1b0427e6
YCH
4434
4435 if not paras:
4436 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2 4437
5b995f71
RA
4438 repeat = False
4439 while True:
4440 for style in dfxp.findall(_x('.//ttml:style')):
261f4730
RA
4441 style_id = style.get('id') or style.get(_x('xml:id'))
4442 if not style_id:
4443 continue
5b995f71
RA
4444 parent_style_id = style.get('style')
4445 if parent_style_id:
4446 if parent_style_id not in styles:
4447 repeat = True
4448 continue
4449 styles[style_id] = styles[parent_style_id].copy()
4450 for prop in SUPPORTED_STYLING:
4451 prop_val = style.get(_x('tts:' + prop))
4452 if prop_val:
4453 styles.setdefault(style_id, {})[prop] = prop_val
4454 if repeat:
4455 repeat = False
4456 else:
4457 break
4458
4459 for p in ('body', 'div'):
4460 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4461 if ele is None:
4462 continue
4463 style = styles.get(ele.get('style'))
4464 if not style:
4465 continue
4466 default_style.update(style)
4467
bf6427d2 4468 for para, index in zip(paras, itertools.count(1)):
d631d5f9 4469 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 4470 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
4471 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4472 if begin_time is None:
4473 continue
7dff0363 4474 if not end_time:
d631d5f9
YCH
4475 if not dur:
4476 continue
4477 end_time = begin_time + dur
bf6427d2
YCH
4478 out.append('%d\n%s --> %s\n%s\n\n' % (
4479 index,
c1c924ab
YCH
4480 srt_subtitles_timecode(begin_time),
4481 srt_subtitles_timecode(end_time),
bf6427d2
YCH
4482 parse_node(para)))
4483
4484 return ''.join(out)
4485
4486
66e289ba
S
4487def cli_option(params, command_option, param):
4488 param = params.get(param)
98e698f1
RA
4489 if param:
4490 param = compat_str(param)
66e289ba
S
4491 return [command_option, param] if param is not None else []
4492
4493
4494def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4495 param = params.get(param)
5b232f46
S
4496 if param is None:
4497 return []
66e289ba
S
4498 assert isinstance(param, bool)
4499 if separator:
4500 return [command_option + separator + (true_value if param else false_value)]
4501 return [command_option, true_value if param else false_value]
4502
4503
4504def cli_valueless_option(params, command_option, param, expected_value=True):
4505 param = params.get(param)
4506 return [command_option] if param == expected_value else []
4507
4508
4509def cli_configuration_args(params, param, default=[]):
4510 ex_args = params.get(param)
4511 if ex_args is None:
4512 return default
4513 assert isinstance(ex_args, list)
4514 return ex_args
4515
4516
39672624
YCH
4517class ISO639Utils(object):
4518 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4519 _lang_map = {
4520 'aa': 'aar',
4521 'ab': 'abk',
4522 'ae': 'ave',
4523 'af': 'afr',
4524 'ak': 'aka',
4525 'am': 'amh',
4526 'an': 'arg',
4527 'ar': 'ara',
4528 'as': 'asm',
4529 'av': 'ava',
4530 'ay': 'aym',
4531 'az': 'aze',
4532 'ba': 'bak',
4533 'be': 'bel',
4534 'bg': 'bul',
4535 'bh': 'bih',
4536 'bi': 'bis',
4537 'bm': 'bam',
4538 'bn': 'ben',
4539 'bo': 'bod',
4540 'br': 'bre',
4541 'bs': 'bos',
4542 'ca': 'cat',
4543 'ce': 'che',
4544 'ch': 'cha',
4545 'co': 'cos',
4546 'cr': 'cre',
4547 'cs': 'ces',
4548 'cu': 'chu',
4549 'cv': 'chv',
4550 'cy': 'cym',
4551 'da': 'dan',
4552 'de': 'deu',
4553 'dv': 'div',
4554 'dz': 'dzo',
4555 'ee': 'ewe',
4556 'el': 'ell',
4557 'en': 'eng',
4558 'eo': 'epo',
4559 'es': 'spa',
4560 'et': 'est',
4561 'eu': 'eus',
4562 'fa': 'fas',
4563 'ff': 'ful',
4564 'fi': 'fin',
4565 'fj': 'fij',
4566 'fo': 'fao',
4567 'fr': 'fra',
4568 'fy': 'fry',
4569 'ga': 'gle',
4570 'gd': 'gla',
4571 'gl': 'glg',
4572 'gn': 'grn',
4573 'gu': 'guj',
4574 'gv': 'glv',
4575 'ha': 'hau',
4576 'he': 'heb',
b7acc835 4577 'iw': 'heb', # Replaced by he in 1989 revision
39672624
YCH
4578 'hi': 'hin',
4579 'ho': 'hmo',
4580 'hr': 'hrv',
4581 'ht': 'hat',
4582 'hu': 'hun',
4583 'hy': 'hye',
4584 'hz': 'her',
4585 'ia': 'ina',
4586 'id': 'ind',
b7acc835 4587 'in': 'ind', # Replaced by id in 1989 revision
39672624
YCH
4588 'ie': 'ile',
4589 'ig': 'ibo',
4590 'ii': 'iii',
4591 'ik': 'ipk',
4592 'io': 'ido',
4593 'is': 'isl',
4594 'it': 'ita',
4595 'iu': 'iku',
4596 'ja': 'jpn',
4597 'jv': 'jav',
4598 'ka': 'kat',
4599 'kg': 'kon',
4600 'ki': 'kik',
4601 'kj': 'kua',
4602 'kk': 'kaz',
4603 'kl': 'kal',
4604 'km': 'khm',
4605 'kn': 'kan',
4606 'ko': 'kor',
4607 'kr': 'kau',
4608 'ks': 'kas',
4609 'ku': 'kur',
4610 'kv': 'kom',
4611 'kw': 'cor',
4612 'ky': 'kir',
4613 'la': 'lat',
4614 'lb': 'ltz',
4615 'lg': 'lug',
4616 'li': 'lim',
4617 'ln': 'lin',
4618 'lo': 'lao',
4619 'lt': 'lit',
4620 'lu': 'lub',
4621 'lv': 'lav',
4622 'mg': 'mlg',
4623 'mh': 'mah',
4624 'mi': 'mri',
4625 'mk': 'mkd',
4626 'ml': 'mal',
4627 'mn': 'mon',
4628 'mr': 'mar',
4629 'ms': 'msa',
4630 'mt': 'mlt',
4631 'my': 'mya',
4632 'na': 'nau',
4633 'nb': 'nob',
4634 'nd': 'nde',
4635 'ne': 'nep',
4636 'ng': 'ndo',
4637 'nl': 'nld',
4638 'nn': 'nno',
4639 'no': 'nor',
4640 'nr': 'nbl',
4641 'nv': 'nav',
4642 'ny': 'nya',
4643 'oc': 'oci',
4644 'oj': 'oji',
4645 'om': 'orm',
4646 'or': 'ori',
4647 'os': 'oss',
4648 'pa': 'pan',
4649 'pi': 'pli',
4650 'pl': 'pol',
4651 'ps': 'pus',
4652 'pt': 'por',
4653 'qu': 'que',
4654 'rm': 'roh',
4655 'rn': 'run',
4656 'ro': 'ron',
4657 'ru': 'rus',
4658 'rw': 'kin',
4659 'sa': 'san',
4660 'sc': 'srd',
4661 'sd': 'snd',
4662 'se': 'sme',
4663 'sg': 'sag',
4664 'si': 'sin',
4665 'sk': 'slk',
4666 'sl': 'slv',
4667 'sm': 'smo',
4668 'sn': 'sna',
4669 'so': 'som',
4670 'sq': 'sqi',
4671 'sr': 'srp',
4672 'ss': 'ssw',
4673 'st': 'sot',
4674 'su': 'sun',
4675 'sv': 'swe',
4676 'sw': 'swa',
4677 'ta': 'tam',
4678 'te': 'tel',
4679 'tg': 'tgk',
4680 'th': 'tha',
4681 'ti': 'tir',
4682 'tk': 'tuk',
4683 'tl': 'tgl',
4684 'tn': 'tsn',
4685 'to': 'ton',
4686 'tr': 'tur',
4687 'ts': 'tso',
4688 'tt': 'tat',
4689 'tw': 'twi',
4690 'ty': 'tah',
4691 'ug': 'uig',
4692 'uk': 'ukr',
4693 'ur': 'urd',
4694 'uz': 'uzb',
4695 've': 'ven',
4696 'vi': 'vie',
4697 'vo': 'vol',
4698 'wa': 'wln',
4699 'wo': 'wol',
4700 'xh': 'xho',
4701 'yi': 'yid',
e9a50fba 4702 'ji': 'yid', # Replaced by yi in 1989 revision
39672624
YCH
4703 'yo': 'yor',
4704 'za': 'zha',
4705 'zh': 'zho',
4706 'zu': 'zul',
4707 }
4708
4709 @classmethod
4710 def short2long(cls, code):
4711 """Convert language code from ISO 639-1 to ISO 639-2/T"""
4712 return cls._lang_map.get(code[:2])
4713
4714 @classmethod
4715 def long2short(cls, code):
4716 """Convert language code from ISO 639-2/T to ISO 639-1"""
4717 for short_name, long_name in cls._lang_map.items():
4718 if long_name == code:
4719 return short_name
4720
4721
4eb10f66
YCH
4722class ISO3166Utils(object):
4723 # From http://data.okfn.org/data/core/country-list
4724 _country_map = {
4725 'AF': 'Afghanistan',
4726 'AX': 'Åland Islands',
4727 'AL': 'Albania',
4728 'DZ': 'Algeria',
4729 'AS': 'American Samoa',
4730 'AD': 'Andorra',
4731 'AO': 'Angola',
4732 'AI': 'Anguilla',
4733 'AQ': 'Antarctica',
4734 'AG': 'Antigua and Barbuda',
4735 'AR': 'Argentina',
4736 'AM': 'Armenia',
4737 'AW': 'Aruba',
4738 'AU': 'Australia',
4739 'AT': 'Austria',
4740 'AZ': 'Azerbaijan',
4741 'BS': 'Bahamas',
4742 'BH': 'Bahrain',
4743 'BD': 'Bangladesh',
4744 'BB': 'Barbados',
4745 'BY': 'Belarus',
4746 'BE': 'Belgium',
4747 'BZ': 'Belize',
4748 'BJ': 'Benin',
4749 'BM': 'Bermuda',
4750 'BT': 'Bhutan',
4751 'BO': 'Bolivia, Plurinational State of',
4752 'BQ': 'Bonaire, Sint Eustatius and Saba',
4753 'BA': 'Bosnia and Herzegovina',
4754 'BW': 'Botswana',
4755 'BV': 'Bouvet Island',
4756 'BR': 'Brazil',
4757 'IO': 'British Indian Ocean Territory',
4758 'BN': 'Brunei Darussalam',
4759 'BG': 'Bulgaria',
4760 'BF': 'Burkina Faso',
4761 'BI': 'Burundi',
4762 'KH': 'Cambodia',
4763 'CM': 'Cameroon',
4764 'CA': 'Canada',
4765 'CV': 'Cape Verde',
4766 'KY': 'Cayman Islands',
4767 'CF': 'Central African Republic',
4768 'TD': 'Chad',
4769 'CL': 'Chile',
4770 'CN': 'China',
4771 'CX': 'Christmas Island',
4772 'CC': 'Cocos (Keeling) Islands',
4773 'CO': 'Colombia',
4774 'KM': 'Comoros',
4775 'CG': 'Congo',
4776 'CD': 'Congo, the Democratic Republic of the',
4777 'CK': 'Cook Islands',
4778 'CR': 'Costa Rica',
4779 'CI': 'Côte d\'Ivoire',
4780 'HR': 'Croatia',
4781 'CU': 'Cuba',
4782 'CW': 'Curaçao',
4783 'CY': 'Cyprus',
4784 'CZ': 'Czech Republic',
4785 'DK': 'Denmark',
4786 'DJ': 'Djibouti',
4787 'DM': 'Dominica',
4788 'DO': 'Dominican Republic',
4789 'EC': 'Ecuador',
4790 'EG': 'Egypt',
4791 'SV': 'El Salvador',
4792 'GQ': 'Equatorial Guinea',
4793 'ER': 'Eritrea',
4794 'EE': 'Estonia',
4795 'ET': 'Ethiopia',
4796 'FK': 'Falkland Islands (Malvinas)',
4797 'FO': 'Faroe Islands',
4798 'FJ': 'Fiji',
4799 'FI': 'Finland',
4800 'FR': 'France',
4801 'GF': 'French Guiana',
4802 'PF': 'French Polynesia',
4803 'TF': 'French Southern Territories',
4804 'GA': 'Gabon',
4805 'GM': 'Gambia',
4806 'GE': 'Georgia',
4807 'DE': 'Germany',
4808 'GH': 'Ghana',
4809 'GI': 'Gibraltar',
4810 'GR': 'Greece',
4811 'GL': 'Greenland',
4812 'GD': 'Grenada',
4813 'GP': 'Guadeloupe',
4814 'GU': 'Guam',
4815 'GT': 'Guatemala',
4816 'GG': 'Guernsey',
4817 'GN': 'Guinea',
4818 'GW': 'Guinea-Bissau',
4819 'GY': 'Guyana',
4820 'HT': 'Haiti',
4821 'HM': 'Heard Island and McDonald Islands',
4822 'VA': 'Holy See (Vatican City State)',
4823 'HN': 'Honduras',
4824 'HK': 'Hong Kong',
4825 'HU': 'Hungary',
4826 'IS': 'Iceland',
4827 'IN': 'India',
4828 'ID': 'Indonesia',
4829 'IR': 'Iran, Islamic Republic of',
4830 'IQ': 'Iraq',
4831 'IE': 'Ireland',
4832 'IM': 'Isle of Man',
4833 'IL': 'Israel',
4834 'IT': 'Italy',
4835 'JM': 'Jamaica',
4836 'JP': 'Japan',
4837 'JE': 'Jersey',
4838 'JO': 'Jordan',
4839 'KZ': 'Kazakhstan',
4840 'KE': 'Kenya',
4841 'KI': 'Kiribati',
4842 'KP': 'Korea, Democratic People\'s Republic of',
4843 'KR': 'Korea, Republic of',
4844 'KW': 'Kuwait',
4845 'KG': 'Kyrgyzstan',
4846 'LA': 'Lao People\'s Democratic Republic',
4847 'LV': 'Latvia',
4848 'LB': 'Lebanon',
4849 'LS': 'Lesotho',
4850 'LR': 'Liberia',
4851 'LY': 'Libya',
4852 'LI': 'Liechtenstein',
4853 'LT': 'Lithuania',
4854 'LU': 'Luxembourg',
4855 'MO': 'Macao',
4856 'MK': 'Macedonia, the Former Yugoslav Republic of',
4857 'MG': 'Madagascar',
4858 'MW': 'Malawi',
4859 'MY': 'Malaysia',
4860 'MV': 'Maldives',
4861 'ML': 'Mali',
4862 'MT': 'Malta',
4863 'MH': 'Marshall Islands',
4864 'MQ': 'Martinique',
4865 'MR': 'Mauritania',
4866 'MU': 'Mauritius',
4867 'YT': 'Mayotte',
4868 'MX': 'Mexico',
4869 'FM': 'Micronesia, Federated States of',
4870 'MD': 'Moldova, Republic of',
4871 'MC': 'Monaco',
4872 'MN': 'Mongolia',
4873 'ME': 'Montenegro',
4874 'MS': 'Montserrat',
4875 'MA': 'Morocco',
4876 'MZ': 'Mozambique',
4877 'MM': 'Myanmar',
4878 'NA': 'Namibia',
4879 'NR': 'Nauru',
4880 'NP': 'Nepal',
4881 'NL': 'Netherlands',
4882 'NC': 'New Caledonia',
4883 'NZ': 'New Zealand',
4884 'NI': 'Nicaragua',
4885 'NE': 'Niger',
4886 'NG': 'Nigeria',
4887 'NU': 'Niue',
4888 'NF': 'Norfolk Island',
4889 'MP': 'Northern Mariana Islands',
4890 'NO': 'Norway',
4891 'OM': 'Oman',
4892 'PK': 'Pakistan',
4893 'PW': 'Palau',
4894 'PS': 'Palestine, State of',
4895 'PA': 'Panama',
4896 'PG': 'Papua New Guinea',
4897 'PY': 'Paraguay',
4898 'PE': 'Peru',
4899 'PH': 'Philippines',
4900 'PN': 'Pitcairn',
4901 'PL': 'Poland',
4902 'PT': 'Portugal',
4903 'PR': 'Puerto Rico',
4904 'QA': 'Qatar',
4905 'RE': 'Réunion',
4906 'RO': 'Romania',
4907 'RU': 'Russian Federation',
4908 'RW': 'Rwanda',
4909 'BL': 'Saint Barthélemy',
4910 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4911 'KN': 'Saint Kitts and Nevis',
4912 'LC': 'Saint Lucia',
4913 'MF': 'Saint Martin (French part)',
4914 'PM': 'Saint Pierre and Miquelon',
4915 'VC': 'Saint Vincent and the Grenadines',
4916 'WS': 'Samoa',
4917 'SM': 'San Marino',
4918 'ST': 'Sao Tome and Principe',
4919 'SA': 'Saudi Arabia',
4920 'SN': 'Senegal',
4921 'RS': 'Serbia',
4922 'SC': 'Seychelles',
4923 'SL': 'Sierra Leone',
4924 'SG': 'Singapore',
4925 'SX': 'Sint Maarten (Dutch part)',
4926 'SK': 'Slovakia',
4927 'SI': 'Slovenia',
4928 'SB': 'Solomon Islands',
4929 'SO': 'Somalia',
4930 'ZA': 'South Africa',
4931 'GS': 'South Georgia and the South Sandwich Islands',
4932 'SS': 'South Sudan',
4933 'ES': 'Spain',
4934 'LK': 'Sri Lanka',
4935 'SD': 'Sudan',
4936 'SR': 'Suriname',
4937 'SJ': 'Svalbard and Jan Mayen',
4938 'SZ': 'Swaziland',
4939 'SE': 'Sweden',
4940 'CH': 'Switzerland',
4941 'SY': 'Syrian Arab Republic',
4942 'TW': 'Taiwan, Province of China',
4943 'TJ': 'Tajikistan',
4944 'TZ': 'Tanzania, United Republic of',
4945 'TH': 'Thailand',
4946 'TL': 'Timor-Leste',
4947 'TG': 'Togo',
4948 'TK': 'Tokelau',
4949 'TO': 'Tonga',
4950 'TT': 'Trinidad and Tobago',
4951 'TN': 'Tunisia',
4952 'TR': 'Turkey',
4953 'TM': 'Turkmenistan',
4954 'TC': 'Turks and Caicos Islands',
4955 'TV': 'Tuvalu',
4956 'UG': 'Uganda',
4957 'UA': 'Ukraine',
4958 'AE': 'United Arab Emirates',
4959 'GB': 'United Kingdom',
4960 'US': 'United States',
4961 'UM': 'United States Minor Outlying Islands',
4962 'UY': 'Uruguay',
4963 'UZ': 'Uzbekistan',
4964 'VU': 'Vanuatu',
4965 'VE': 'Venezuela, Bolivarian Republic of',
4966 'VN': 'Viet Nam',
4967 'VG': 'Virgin Islands, British',
4968 'VI': 'Virgin Islands, U.S.',
4969 'WF': 'Wallis and Futuna',
4970 'EH': 'Western Sahara',
4971 'YE': 'Yemen',
4972 'ZM': 'Zambia',
4973 'ZW': 'Zimbabwe',
4974 }
4975
4976 @classmethod
4977 def short2full(cls, code):
4978 """Convert an ISO 3166-2 country code to the corresponding full name"""
4979 return cls._country_map.get(code.upper())
4980
4981
773f291d
S
4982class GeoUtils(object):
4983 # Major IPv4 address blocks per country
4984 _country_ip_map = {
53896ca5 4985 'AD': '46.172.224.0/19',
773f291d
S
4986 'AE': '94.200.0.0/13',
4987 'AF': '149.54.0.0/17',
4988 'AG': '209.59.64.0/18',
4989 'AI': '204.14.248.0/21',
4990 'AL': '46.99.0.0/16',
4991 'AM': '46.70.0.0/15',
4992 'AO': '105.168.0.0/13',
53896ca5
S
4993 'AP': '182.50.184.0/21',
4994 'AQ': '23.154.160.0/24',
773f291d
S
4995 'AR': '181.0.0.0/12',
4996 'AS': '202.70.112.0/20',
53896ca5 4997 'AT': '77.116.0.0/14',
773f291d
S
4998 'AU': '1.128.0.0/11',
4999 'AW': '181.41.0.0/18',
53896ca5
S
5000 'AX': '185.217.4.0/22',
5001 'AZ': '5.197.0.0/16',
773f291d
S
5002 'BA': '31.176.128.0/17',
5003 'BB': '65.48.128.0/17',
5004 'BD': '114.130.0.0/16',
5005 'BE': '57.0.0.0/8',
53896ca5 5006 'BF': '102.178.0.0/15',
773f291d
S
5007 'BG': '95.42.0.0/15',
5008 'BH': '37.131.0.0/17',
5009 'BI': '154.117.192.0/18',
5010 'BJ': '137.255.0.0/16',
53896ca5 5011 'BL': '185.212.72.0/23',
773f291d
S
5012 'BM': '196.12.64.0/18',
5013 'BN': '156.31.0.0/16',
5014 'BO': '161.56.0.0/16',
5015 'BQ': '161.0.80.0/20',
53896ca5 5016 'BR': '191.128.0.0/12',
773f291d
S
5017 'BS': '24.51.64.0/18',
5018 'BT': '119.2.96.0/19',
5019 'BW': '168.167.0.0/16',
5020 'BY': '178.120.0.0/13',
5021 'BZ': '179.42.192.0/18',
5022 'CA': '99.224.0.0/11',
5023 'CD': '41.243.0.0/16',
53896ca5
S
5024 'CF': '197.242.176.0/21',
5025 'CG': '160.113.0.0/16',
773f291d 5026 'CH': '85.0.0.0/13',
53896ca5 5027 'CI': '102.136.0.0/14',
773f291d
S
5028 'CK': '202.65.32.0/19',
5029 'CL': '152.172.0.0/14',
53896ca5 5030 'CM': '102.244.0.0/14',
773f291d
S
5031 'CN': '36.128.0.0/10',
5032 'CO': '181.240.0.0/12',
5033 'CR': '201.192.0.0/12',
5034 'CU': '152.206.0.0/15',
5035 'CV': '165.90.96.0/19',
5036 'CW': '190.88.128.0/17',
53896ca5 5037 'CY': '31.153.0.0/16',
773f291d
S
5038 'CZ': '88.100.0.0/14',
5039 'DE': '53.0.0.0/8',
5040 'DJ': '197.241.0.0/17',
5041 'DK': '87.48.0.0/12',
5042 'DM': '192.243.48.0/20',
5043 'DO': '152.166.0.0/15',
5044 'DZ': '41.96.0.0/12',
5045 'EC': '186.68.0.0/15',
5046 'EE': '90.190.0.0/15',
5047 'EG': '156.160.0.0/11',
5048 'ER': '196.200.96.0/20',
5049 'ES': '88.0.0.0/11',
5050 'ET': '196.188.0.0/14',
5051 'EU': '2.16.0.0/13',
5052 'FI': '91.152.0.0/13',
5053 'FJ': '144.120.0.0/16',
53896ca5 5054 'FK': '80.73.208.0/21',
773f291d
S
5055 'FM': '119.252.112.0/20',
5056 'FO': '88.85.32.0/19',
5057 'FR': '90.0.0.0/9',
5058 'GA': '41.158.0.0/15',
5059 'GB': '25.0.0.0/8',
5060 'GD': '74.122.88.0/21',
5061 'GE': '31.146.0.0/16',
5062 'GF': '161.22.64.0/18',
5063 'GG': '62.68.160.0/19',
53896ca5
S
5064 'GH': '154.160.0.0/12',
5065 'GI': '95.164.0.0/16',
773f291d
S
5066 'GL': '88.83.0.0/19',
5067 'GM': '160.182.0.0/15',
5068 'GN': '197.149.192.0/18',
5069 'GP': '104.250.0.0/19',
5070 'GQ': '105.235.224.0/20',
5071 'GR': '94.64.0.0/13',
5072 'GT': '168.234.0.0/16',
5073 'GU': '168.123.0.0/16',
5074 'GW': '197.214.80.0/20',
5075 'GY': '181.41.64.0/18',
5076 'HK': '113.252.0.0/14',
5077 'HN': '181.210.0.0/16',
5078 'HR': '93.136.0.0/13',
5079 'HT': '148.102.128.0/17',
5080 'HU': '84.0.0.0/14',
5081 'ID': '39.192.0.0/10',
5082 'IE': '87.32.0.0/12',
5083 'IL': '79.176.0.0/13',
5084 'IM': '5.62.80.0/20',
5085 'IN': '117.192.0.0/10',
5086 'IO': '203.83.48.0/21',
5087 'IQ': '37.236.0.0/14',
5088 'IR': '2.176.0.0/12',
5089 'IS': '82.221.0.0/16',
5090 'IT': '79.0.0.0/10',
5091 'JE': '87.244.64.0/18',
5092 'JM': '72.27.0.0/17',
5093 'JO': '176.29.0.0/16',
53896ca5 5094 'JP': '133.0.0.0/8',
773f291d
S
5095 'KE': '105.48.0.0/12',
5096 'KG': '158.181.128.0/17',
5097 'KH': '36.37.128.0/17',
5098 'KI': '103.25.140.0/22',
5099 'KM': '197.255.224.0/20',
53896ca5 5100 'KN': '198.167.192.0/19',
773f291d
S
5101 'KP': '175.45.176.0/22',
5102 'KR': '175.192.0.0/10',
5103 'KW': '37.36.0.0/14',
5104 'KY': '64.96.0.0/15',
5105 'KZ': '2.72.0.0/13',
5106 'LA': '115.84.64.0/18',
5107 'LB': '178.135.0.0/16',
53896ca5 5108 'LC': '24.92.144.0/20',
773f291d
S
5109 'LI': '82.117.0.0/19',
5110 'LK': '112.134.0.0/15',
53896ca5 5111 'LR': '102.183.0.0/16',
773f291d
S
5112 'LS': '129.232.0.0/17',
5113 'LT': '78.56.0.0/13',
5114 'LU': '188.42.0.0/16',
5115 'LV': '46.109.0.0/16',
5116 'LY': '41.252.0.0/14',
5117 'MA': '105.128.0.0/11',
5118 'MC': '88.209.64.0/18',
5119 'MD': '37.246.0.0/16',
5120 'ME': '178.175.0.0/17',
5121 'MF': '74.112.232.0/21',
5122 'MG': '154.126.0.0/17',
5123 'MH': '117.103.88.0/21',
5124 'MK': '77.28.0.0/15',
5125 'ML': '154.118.128.0/18',
5126 'MM': '37.111.0.0/17',
5127 'MN': '49.0.128.0/17',
5128 'MO': '60.246.0.0/16',
5129 'MP': '202.88.64.0/20',
5130 'MQ': '109.203.224.0/19',
5131 'MR': '41.188.64.0/18',
5132 'MS': '208.90.112.0/22',
5133 'MT': '46.11.0.0/16',
5134 'MU': '105.16.0.0/12',
5135 'MV': '27.114.128.0/18',
53896ca5 5136 'MW': '102.70.0.0/15',
773f291d
S
5137 'MX': '187.192.0.0/11',
5138 'MY': '175.136.0.0/13',
5139 'MZ': '197.218.0.0/15',
5140 'NA': '41.182.0.0/16',
5141 'NC': '101.101.0.0/18',
5142 'NE': '197.214.0.0/18',
5143 'NF': '203.17.240.0/22',
5144 'NG': '105.112.0.0/12',
5145 'NI': '186.76.0.0/15',
5146 'NL': '145.96.0.0/11',
5147 'NO': '84.208.0.0/13',
5148 'NP': '36.252.0.0/15',
5149 'NR': '203.98.224.0/19',
5150 'NU': '49.156.48.0/22',
5151 'NZ': '49.224.0.0/14',
5152 'OM': '5.36.0.0/15',
5153 'PA': '186.72.0.0/15',
5154 'PE': '186.160.0.0/14',
5155 'PF': '123.50.64.0/18',
5156 'PG': '124.240.192.0/19',
5157 'PH': '49.144.0.0/13',
5158 'PK': '39.32.0.0/11',
5159 'PL': '83.0.0.0/11',
5160 'PM': '70.36.0.0/20',
5161 'PR': '66.50.0.0/16',
5162 'PS': '188.161.0.0/16',
5163 'PT': '85.240.0.0/13',
5164 'PW': '202.124.224.0/20',
5165 'PY': '181.120.0.0/14',
5166 'QA': '37.210.0.0/15',
53896ca5 5167 'RE': '102.35.0.0/16',
773f291d 5168 'RO': '79.112.0.0/13',
53896ca5 5169 'RS': '93.86.0.0/15',
773f291d 5170 'RU': '5.136.0.0/13',
53896ca5 5171 'RW': '41.186.0.0/16',
773f291d
S
5172 'SA': '188.48.0.0/13',
5173 'SB': '202.1.160.0/19',
5174 'SC': '154.192.0.0/11',
53896ca5 5175 'SD': '102.120.0.0/13',
773f291d 5176 'SE': '78.64.0.0/12',
53896ca5 5177 'SG': '8.128.0.0/10',
773f291d
S
5178 'SI': '188.196.0.0/14',
5179 'SK': '78.98.0.0/15',
53896ca5 5180 'SL': '102.143.0.0/17',
773f291d
S
5181 'SM': '89.186.32.0/19',
5182 'SN': '41.82.0.0/15',
53896ca5 5183 'SO': '154.115.192.0/18',
773f291d
S
5184 'SR': '186.179.128.0/17',
5185 'SS': '105.235.208.0/21',
5186 'ST': '197.159.160.0/19',
5187 'SV': '168.243.0.0/16',
5188 'SX': '190.102.0.0/20',
5189 'SY': '5.0.0.0/16',
5190 'SZ': '41.84.224.0/19',
5191 'TC': '65.255.48.0/20',
5192 'TD': '154.68.128.0/19',
5193 'TG': '196.168.0.0/14',
5194 'TH': '171.96.0.0/13',
5195 'TJ': '85.9.128.0/18',
5196 'TK': '27.96.24.0/21',
5197 'TL': '180.189.160.0/20',
5198 'TM': '95.85.96.0/19',
5199 'TN': '197.0.0.0/11',
5200 'TO': '175.176.144.0/21',
5201 'TR': '78.160.0.0/11',
5202 'TT': '186.44.0.0/15',
5203 'TV': '202.2.96.0/19',
5204 'TW': '120.96.0.0/11',
5205 'TZ': '156.156.0.0/14',
53896ca5
S
5206 'UA': '37.52.0.0/14',
5207 'UG': '102.80.0.0/13',
5208 'US': '6.0.0.0/8',
773f291d 5209 'UY': '167.56.0.0/13',
53896ca5 5210 'UZ': '84.54.64.0/18',
773f291d 5211 'VA': '212.77.0.0/19',
53896ca5 5212 'VC': '207.191.240.0/21',
773f291d 5213 'VE': '186.88.0.0/13',
53896ca5 5214 'VG': '66.81.192.0/20',
773f291d
S
5215 'VI': '146.226.0.0/16',
5216 'VN': '14.160.0.0/11',
5217 'VU': '202.80.32.0/20',
5218 'WF': '117.20.32.0/21',
5219 'WS': '202.4.32.0/19',
5220 'YE': '134.35.0.0/16',
5221 'YT': '41.242.116.0/22',
5222 'ZA': '41.0.0.0/11',
53896ca5
S
5223 'ZM': '102.144.0.0/13',
5224 'ZW': '102.177.192.0/18',
773f291d
S
5225 }
5226
5227 @classmethod
5f95927a
S
5228 def random_ipv4(cls, code_or_block):
5229 if len(code_or_block) == 2:
5230 block = cls._country_ip_map.get(code_or_block.upper())
5231 if not block:
5232 return None
5233 else:
5234 block = code_or_block
773f291d
S
5235 addr, preflen = block.split('/')
5236 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5237 addr_max = addr_min | (0xffffffff >> int(preflen))
18a0defa 5238 return compat_str(socket.inet_ntoa(
4248dad9 5239 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
773f291d
S
5240
5241
91410c9b 5242class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
5243 def __init__(self, proxies=None):
5244 # Set default handlers
5245 for type in ('http', 'https'):
5246 setattr(self, '%s_open' % type,
5247 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5248 meth(r, proxy, type))
38e87f6c 5249 compat_urllib_request.ProxyHandler.__init__(self, proxies)
2461f79d 5250
91410c9b 5251 def proxy_open(self, req, proxy, type):
2461f79d 5252 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
5253 if req_proxy is not None:
5254 proxy = req_proxy
2461f79d
PH
5255 del req.headers['Ytdl-request-proxy']
5256
5257 if proxy == '__noproxy__':
5258 return None # No Proxy
51fb4995 5259 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
71aff188
YCH
5260 req.add_header('Ytdl-socks-proxy', proxy)
5261 # youtube-dl's http/https handlers do wrapping the socket with socks
5262 return None
91410c9b
PH
5263 return compat_urllib_request.ProxyHandler.proxy_open(
5264 self, req, proxy, type)
5bc880b9
YCH
5265
5266
0a5445dd
YCH
5267# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5268# released into Public Domain
5269# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5270
5271def long_to_bytes(n, blocksize=0):
5272 """long_to_bytes(n:long, blocksize:int) : string
5273 Convert a long integer to a byte string.
5274
5275 If optional blocksize is given and greater than zero, pad the front of the
5276 byte string with binary zeros so that the length is a multiple of
5277 blocksize.
5278 """
5279 # after much testing, this algorithm was deemed to be the fastest
5280 s = b''
5281 n = int(n)
5282 while n > 0:
5283 s = compat_struct_pack('>I', n & 0xffffffff) + s
5284 n = n >> 32
5285 # strip off leading zeros
5286 for i in range(len(s)):
5287 if s[i] != b'\000'[0]:
5288 break
5289 else:
5290 # only happens when n == 0
5291 s = b'\000'
5292 i = 0
5293 s = s[i:]
5294 # add back some pad bytes. this could be done more efficiently w.r.t. the
5295 # de-padding being done above, but sigh...
5296 if blocksize > 0 and len(s) % blocksize:
5297 s = (blocksize - len(s) % blocksize) * b'\000' + s
5298 return s
5299
5300
5301def bytes_to_long(s):
5302 """bytes_to_long(string) : long
5303 Convert a byte string to a long integer.
5304
5305 This is (essentially) the inverse of long_to_bytes().
5306 """
5307 acc = 0
5308 length = len(s)
5309 if length % 4:
5310 extra = (4 - length % 4)
5311 s = b'\000' * extra + s
5312 length = length + extra
5313 for i in range(0, length, 4):
5314 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5315 return acc
5316
5317
5bc880b9
YCH
5318def ohdave_rsa_encrypt(data, exponent, modulus):
5319 '''
5320 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5321
5322 Input:
5323 data: data to encrypt, bytes-like object
5324 exponent, modulus: parameter e and N of RSA algorithm, both integer
5325 Output: hex string of encrypted data
5326
5327 Limitation: supports one block encryption only
5328 '''
5329
5330 payload = int(binascii.hexlify(data[::-1]), 16)
5331 encrypted = pow(payload, exponent, modulus)
5332 return '%x' % encrypted
81bdc8fd
YCH
5333
5334
f48409c7
YCH
5335def pkcs1pad(data, length):
5336 """
5337 Padding input data with PKCS#1 scheme
5338
5339 @param {int[]} data input data
5340 @param {int} length target length
5341 @returns {int[]} padded data
5342 """
5343 if len(data) > length - 11:
5344 raise ValueError('Input data too long for PKCS#1 padding')
5345
5346 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5347 return [0, 2] + pseudo_random + [0] + data
5348
5349
5eb6bdce 5350def encode_base_n(num, n, table=None):
59f898b7 5351 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
59f898b7
YCH
5352 if not table:
5353 table = FULL_TABLE[:n]
5354
5eb6bdce
YCH
5355 if n > len(table):
5356 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5357
5358 if num == 0:
5359 return table[0]
5360
81bdc8fd
YCH
5361 ret = ''
5362 while num:
5363 ret = table[num % n] + ret
5364 num = num // n
5365 return ret
f52354a8
YCH
5366
5367
5368def decode_packed_codes(code):
06b3fe29 5369 mobj = re.search(PACKED_CODES_RE, code)
f52354a8
YCH
5370 obfucasted_code, base, count, symbols = mobj.groups()
5371 base = int(base)
5372 count = int(count)
5373 symbols = symbols.split('|')
5374 symbol_table = {}
5375
5376 while count:
5377 count -= 1
5eb6bdce 5378 base_n_count = encode_base_n(count, base)
f52354a8
YCH
5379 symbol_table[base_n_count] = symbols[count] or base_n_count
5380
5381 return re.sub(
5382 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5383 obfucasted_code)
e154c651 5384
5385
1ced2221
S
5386def caesar(s, alphabet, shift):
5387 if shift == 0:
5388 return s
5389 l = len(alphabet)
5390 return ''.join(
5391 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5392 for c in s)
5393
5394
5395def rot47(s):
5396 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5397
5398
e154c651 5399def parse_m3u8_attributes(attrib):
5400 info = {}
5401 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5402 if val.startswith('"'):
5403 val = val[1:-1]
5404 info[key] = val
5405 return info
1143535d
YCH
5406
5407
5408def urshift(val, n):
5409 return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038
YCH
5410
5411
5412# Based on png2str() written by @gdkchan and improved by @yokrysty
067aa17e 5413# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
d3f8e038
YCH
5414def decode_png(png_data):
5415 # Reference: https://www.w3.org/TR/PNG/
5416 header = png_data[8:]
5417
5418 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5419 raise IOError('Not a valid PNG file.')
5420
5421 int_map = {1: '>B', 2: '>H', 4: '>I'}
5422 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5423
5424 chunks = []
5425
5426 while header:
5427 length = unpack_integer(header[:4])
5428 header = header[4:]
5429
5430 chunk_type = header[:4]
5431 header = header[4:]
5432
5433 chunk_data = header[:length]
5434 header = header[length:]
5435
5436 header = header[4:] # Skip CRC
5437
5438 chunks.append({
5439 'type': chunk_type,
5440 'length': length,
5441 'data': chunk_data
5442 })
5443
5444 ihdr = chunks[0]['data']
5445
5446 width = unpack_integer(ihdr[:4])
5447 height = unpack_integer(ihdr[4:8])
5448
5449 idat = b''
5450
5451 for chunk in chunks:
5452 if chunk['type'] == b'IDAT':
5453 idat += chunk['data']
5454
5455 if not idat:
5456 raise IOError('Unable to read PNG data.')
5457
5458 decompressed_data = bytearray(zlib.decompress(idat))
5459
5460 stride = width * 3
5461 pixels = []
5462
5463 def _get_pixel(idx):
5464 x = idx % stride
5465 y = idx // stride
5466 return pixels[y][x]
5467
5468 for y in range(height):
5469 basePos = y * (1 + stride)
5470 filter_type = decompressed_data[basePos]
5471
5472 current_row = []
5473
5474 pixels.append(current_row)
5475
5476 for x in range(stride):
5477 color = decompressed_data[1 + basePos + x]
5478 basex = y * stride + x
5479 left = 0
5480 up = 0
5481
5482 if x > 2:
5483 left = _get_pixel(basex - 3)
5484 if y > 0:
5485 up = _get_pixel(basex - stride)
5486
5487 if filter_type == 1: # Sub
5488 color = (color + left) & 0xff
5489 elif filter_type == 2: # Up
5490 color = (color + up) & 0xff
5491 elif filter_type == 3: # Average
5492 color = (color + ((left + up) >> 1)) & 0xff
5493 elif filter_type == 4: # Paeth
5494 a = left
5495 b = up
5496 c = 0
5497
5498 if x > 2 and y > 0:
5499 c = _get_pixel(basex - stride - 3)
5500
5501 p = a + b - c
5502
5503 pa = abs(p - a)
5504 pb = abs(p - b)
5505 pc = abs(p - c)
5506
5507 if pa <= pb and pa <= pc:
5508 color = (color + a) & 0xff
5509 elif pb <= pc:
5510 color = (color + b) & 0xff
5511 else:
5512 color = (color + c) & 0xff
5513
5514 current_row.append(color)
5515
5516 return width, height, pixels
efa97bdc
YCH
5517
5518
5519def write_xattr(path, key, value):
5520 # This mess below finds the best xattr tool for the job
5521 try:
5522 # try the pyxattr module...
5523 import xattr
5524
53a7e3d2
YCH
5525 if hasattr(xattr, 'set'): # pyxattr
5526 # Unicode arguments are not supported in python-pyxattr until
5527 # version 0.5.0
067aa17e 5528 # See https://github.com/ytdl-org/youtube-dl/issues/5498
53a7e3d2
YCH
5529 pyxattr_required_version = '0.5.0'
5530 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5531 # TODO: fallback to CLI tools
5532 raise XAttrUnavailableError(
5533 'python-pyxattr is detected but is too old. '
5534 'youtube-dl requires %s or above while your version is %s. '
5535 'Falling back to other xattr implementations' % (
5536 pyxattr_required_version, xattr.__version__))
5537
5538 setxattr = xattr.set
5539 else: # xattr
5540 setxattr = xattr.setxattr
efa97bdc
YCH
5541
5542 try:
53a7e3d2 5543 setxattr(path, key, value)
efa97bdc
YCH
5544 except EnvironmentError as e:
5545 raise XAttrMetadataError(e.errno, e.strerror)
5546
5547 except ImportError:
5548 if compat_os_name == 'nt':
5549 # Write xattrs to NTFS Alternate Data Streams:
5550 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5551 assert ':' not in key
5552 assert os.path.exists(path)
5553
5554 ads_fn = path + ':' + key
5555 try:
5556 with open(ads_fn, 'wb') as f:
5557 f.write(value)
5558 except EnvironmentError as e:
5559 raise XAttrMetadataError(e.errno, e.strerror)
5560 else:
5561 user_has_setfattr = check_executable('setfattr', ['--version'])
5562 user_has_xattr = check_executable('xattr', ['-h'])
5563
5564 if user_has_setfattr or user_has_xattr:
5565
5566 value = value.decode('utf-8')
5567 if user_has_setfattr:
5568 executable = 'setfattr'
5569 opts = ['-n', key, '-v', value]
5570 elif user_has_xattr:
5571 executable = 'xattr'
5572 opts = ['-w', key, value]
5573
3089bc74
S
5574 cmd = ([encodeFilename(executable, True)]
5575 + [encodeArgument(o) for o in opts]
5576 + [encodeFilename(path, True)])
efa97bdc
YCH
5577
5578 try:
5579 p = subprocess.Popen(
5580 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5581 except EnvironmentError as e:
5582 raise XAttrMetadataError(e.errno, e.strerror)
5583 stdout, stderr = p.communicate()
5584 stderr = stderr.decode('utf-8', 'replace')
5585 if p.returncode != 0:
5586 raise XAttrMetadataError(p.returncode, stderr)
5587
5588 else:
5589 # On Unix, and can't find pyxattr, setfattr, or xattr.
5590 if sys.platform.startswith('linux'):
5591 raise XAttrUnavailableError(
5592 "Couldn't find a tool to set the xattrs. "
5593 "Install either the python 'pyxattr' or 'xattr' "
5594 "modules, or the GNU 'attr' package "
5595 "(which contains the 'setfattr' tool).")
5596 else:
5597 raise XAttrUnavailableError(
5598 "Couldn't find a tool to set the xattrs. "
5599 "Install either the python 'xattr' module, "
5600 "or the 'xattr' binary.")
0c265486
YCH
5601
5602
5603def random_birthday(year_field, month_field, day_field):
aa374bc7
AS
5604 start_date = datetime.date(1950, 1, 1)
5605 end_date = datetime.date(1995, 12, 31)
5606 offset = random.randint(0, (end_date - start_date).days)
5607 random_date = start_date + datetime.timedelta(offset)
0c265486 5608 return {
aa374bc7
AS
5609 year_field: str(random_date.year),
5610 month_field: str(random_date.month),
5611 day_field: str(random_date.day),
0c265486 5612 }