]> jfr.im git - yt-dlp.git/blame - youtube_dl/utils.py
Revert "[utils] Add support for cookies with spaces used instead of tabs"
[yt-dlp.git] / youtube_dl / utils.py
CommitLineData
d77c3dfd 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
d77c3dfd 3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
62e609ab 10import contextlib
e3946f98 11import ctypes
c496ca96
PH
12import datetime
13import email.utils
0c265486 14import email.header
f45c185f 15import errno
be4a824d 16import functools
d77c3dfd 17import gzip
03f9daab 18import io
79a2e94e 19import itertools
f4bfd65f 20import json
d77c3dfd 21import locale
02dbf93f 22import math
347de493 23import operator
d77c3dfd 24import os
c496ca96 25import platform
773f291d 26import random
d77c3dfd 27import re
c496ca96 28import socket
79a2e94e 29import ssl
1c088fa8 30import subprocess
d77c3dfd 31import sys
181c8655 32import tempfile
01951dda 33import traceback
bcf89ce6 34import xml.etree.ElementTree
d77c3dfd 35import zlib
d77c3dfd 36
8c25f81b 37from .compat import (
b4a3d461 38 compat_HTMLParseError,
8bb56eee 39 compat_HTMLParser,
8f9312c3 40 compat_basestring,
8c25f81b 41 compat_chr,
1bab3437 42 compat_cookiejar,
d7cd9a9e 43 compat_ctypes_WINFUNCTYPE,
36e6f62c 44 compat_etree_fromstring,
51098426 45 compat_expanduser,
8c25f81b 46 compat_html_entities,
55b2f099 47 compat_html_entities_html5,
be4a824d 48 compat_http_client,
42db58ec 49 compat_integer_types,
c86b6142 50 compat_kwargs,
efa97bdc 51 compat_os_name,
8c25f81b 52 compat_parse_qs,
702ccf2d 53 compat_shlex_quote,
8c25f81b 54 compat_str,
edaa23f8 55 compat_struct_pack,
d3f8e038 56 compat_struct_unpack,
8c25f81b
PH
57 compat_urllib_error,
58 compat_urllib_parse,
15707c7e 59 compat_urllib_parse_urlencode,
8c25f81b 60 compat_urllib_parse_urlparse,
7581bfc9 61 compat_urllib_parse_unquote_plus,
8c25f81b
PH
62 compat_urllib_request,
63 compat_urlparse,
810c10ba 64 compat_xpath,
8c25f81b 65)
4644ac55 66
71aff188
YCH
67from .socks import (
68 ProxyType,
69 sockssocket,
70)
71
4644ac55 72
51fb4995
YCH
73def register_socks_protocols():
74 # "Register" SOCKS protocols
d5ae6bb5
YCH
75 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
76 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
51fb4995
YCH
77 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
78 if scheme not in compat_urlparse.uses_netloc:
79 compat_urlparse.uses_netloc.append(scheme)
80
81
468e2e92
FV
82# This is not clearly defined otherwise
83compiled_regex_type = type(re.compile(''))
84
f7a147e3
S
85
86def random_user_agent():
87 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
88 _CHROME_VERSIONS = (
89 '74.0.3729.129',
90 '76.0.3780.3',
91 '76.0.3780.2',
92 '74.0.3729.128',
93 '76.0.3780.1',
94 '76.0.3780.0',
95 '75.0.3770.15',
96 '74.0.3729.127',
97 '74.0.3729.126',
98 '76.0.3779.1',
99 '76.0.3779.0',
100 '75.0.3770.14',
101 '74.0.3729.125',
102 '76.0.3778.1',
103 '76.0.3778.0',
104 '75.0.3770.13',
105 '74.0.3729.124',
106 '74.0.3729.123',
107 '73.0.3683.121',
108 '76.0.3777.1',
109 '76.0.3777.0',
110 '75.0.3770.12',
111 '74.0.3729.122',
112 '76.0.3776.4',
113 '75.0.3770.11',
114 '74.0.3729.121',
115 '76.0.3776.3',
116 '76.0.3776.2',
117 '73.0.3683.120',
118 '74.0.3729.120',
119 '74.0.3729.119',
120 '74.0.3729.118',
121 '76.0.3776.1',
122 '76.0.3776.0',
123 '76.0.3775.5',
124 '75.0.3770.10',
125 '74.0.3729.117',
126 '76.0.3775.4',
127 '76.0.3775.3',
128 '74.0.3729.116',
129 '75.0.3770.9',
130 '76.0.3775.2',
131 '76.0.3775.1',
132 '76.0.3775.0',
133 '75.0.3770.8',
134 '74.0.3729.115',
135 '74.0.3729.114',
136 '76.0.3774.1',
137 '76.0.3774.0',
138 '75.0.3770.7',
139 '74.0.3729.113',
140 '74.0.3729.112',
141 '74.0.3729.111',
142 '76.0.3773.1',
143 '76.0.3773.0',
144 '75.0.3770.6',
145 '74.0.3729.110',
146 '74.0.3729.109',
147 '76.0.3772.1',
148 '76.0.3772.0',
149 '75.0.3770.5',
150 '74.0.3729.108',
151 '74.0.3729.107',
152 '76.0.3771.1',
153 '76.0.3771.0',
154 '75.0.3770.4',
155 '74.0.3729.106',
156 '74.0.3729.105',
157 '75.0.3770.3',
158 '74.0.3729.104',
159 '74.0.3729.103',
160 '74.0.3729.102',
161 '75.0.3770.2',
162 '74.0.3729.101',
163 '75.0.3770.1',
164 '75.0.3770.0',
165 '74.0.3729.100',
166 '75.0.3769.5',
167 '75.0.3769.4',
168 '74.0.3729.99',
169 '75.0.3769.3',
170 '75.0.3769.2',
171 '75.0.3768.6',
172 '74.0.3729.98',
173 '75.0.3769.1',
174 '75.0.3769.0',
175 '74.0.3729.97',
176 '73.0.3683.119',
177 '73.0.3683.118',
178 '74.0.3729.96',
179 '75.0.3768.5',
180 '75.0.3768.4',
181 '75.0.3768.3',
182 '75.0.3768.2',
183 '74.0.3729.95',
184 '74.0.3729.94',
185 '75.0.3768.1',
186 '75.0.3768.0',
187 '74.0.3729.93',
188 '74.0.3729.92',
189 '73.0.3683.117',
190 '74.0.3729.91',
191 '75.0.3766.3',
192 '74.0.3729.90',
193 '75.0.3767.2',
194 '75.0.3767.1',
195 '75.0.3767.0',
196 '74.0.3729.89',
197 '73.0.3683.116',
198 '75.0.3766.2',
199 '74.0.3729.88',
200 '75.0.3766.1',
201 '75.0.3766.0',
202 '74.0.3729.87',
203 '73.0.3683.115',
204 '74.0.3729.86',
205 '75.0.3765.1',
206 '75.0.3765.0',
207 '74.0.3729.85',
208 '73.0.3683.114',
209 '74.0.3729.84',
210 '75.0.3764.1',
211 '75.0.3764.0',
212 '74.0.3729.83',
213 '73.0.3683.113',
214 '75.0.3763.2',
215 '75.0.3761.4',
216 '74.0.3729.82',
217 '75.0.3763.1',
218 '75.0.3763.0',
219 '74.0.3729.81',
220 '73.0.3683.112',
221 '75.0.3762.1',
222 '75.0.3762.0',
223 '74.0.3729.80',
224 '75.0.3761.3',
225 '74.0.3729.79',
226 '73.0.3683.111',
227 '75.0.3761.2',
228 '74.0.3729.78',
229 '74.0.3729.77',
230 '75.0.3761.1',
231 '75.0.3761.0',
232 '73.0.3683.110',
233 '74.0.3729.76',
234 '74.0.3729.75',
235 '75.0.3760.0',
236 '74.0.3729.74',
237 '75.0.3759.8',
238 '75.0.3759.7',
239 '75.0.3759.6',
240 '74.0.3729.73',
241 '75.0.3759.5',
242 '74.0.3729.72',
243 '73.0.3683.109',
244 '75.0.3759.4',
245 '75.0.3759.3',
246 '74.0.3729.71',
247 '75.0.3759.2',
248 '74.0.3729.70',
249 '73.0.3683.108',
250 '74.0.3729.69',
251 '75.0.3759.1',
252 '75.0.3759.0',
253 '74.0.3729.68',
254 '73.0.3683.107',
255 '74.0.3729.67',
256 '75.0.3758.1',
257 '75.0.3758.0',
258 '74.0.3729.66',
259 '73.0.3683.106',
260 '74.0.3729.65',
261 '75.0.3757.1',
262 '75.0.3757.0',
263 '74.0.3729.64',
264 '73.0.3683.105',
265 '74.0.3729.63',
266 '75.0.3756.1',
267 '75.0.3756.0',
268 '74.0.3729.62',
269 '73.0.3683.104',
270 '75.0.3755.3',
271 '75.0.3755.2',
272 '73.0.3683.103',
273 '75.0.3755.1',
274 '75.0.3755.0',
275 '74.0.3729.61',
276 '73.0.3683.102',
277 '74.0.3729.60',
278 '75.0.3754.2',
279 '74.0.3729.59',
280 '75.0.3753.4',
281 '74.0.3729.58',
282 '75.0.3754.1',
283 '75.0.3754.0',
284 '74.0.3729.57',
285 '73.0.3683.101',
286 '75.0.3753.3',
287 '75.0.3752.2',
288 '75.0.3753.2',
289 '74.0.3729.56',
290 '75.0.3753.1',
291 '75.0.3753.0',
292 '74.0.3729.55',
293 '73.0.3683.100',
294 '74.0.3729.54',
295 '75.0.3752.1',
296 '75.0.3752.0',
297 '74.0.3729.53',
298 '73.0.3683.99',
299 '74.0.3729.52',
300 '75.0.3751.1',
301 '75.0.3751.0',
302 '74.0.3729.51',
303 '73.0.3683.98',
304 '74.0.3729.50',
305 '75.0.3750.0',
306 '74.0.3729.49',
307 '74.0.3729.48',
308 '74.0.3729.47',
309 '75.0.3749.3',
310 '74.0.3729.46',
311 '73.0.3683.97',
312 '75.0.3749.2',
313 '74.0.3729.45',
314 '75.0.3749.1',
315 '75.0.3749.0',
316 '74.0.3729.44',
317 '73.0.3683.96',
318 '74.0.3729.43',
319 '74.0.3729.42',
320 '75.0.3748.1',
321 '75.0.3748.0',
322 '74.0.3729.41',
323 '75.0.3747.1',
324 '73.0.3683.95',
325 '75.0.3746.4',
326 '74.0.3729.40',
327 '74.0.3729.39',
328 '75.0.3747.0',
329 '75.0.3746.3',
330 '75.0.3746.2',
331 '74.0.3729.38',
332 '75.0.3746.1',
333 '75.0.3746.0',
334 '74.0.3729.37',
335 '73.0.3683.94',
336 '75.0.3745.5',
337 '75.0.3745.4',
338 '75.0.3745.3',
339 '75.0.3745.2',
340 '74.0.3729.36',
341 '75.0.3745.1',
342 '75.0.3745.0',
343 '75.0.3744.2',
344 '74.0.3729.35',
345 '73.0.3683.93',
346 '74.0.3729.34',
347 '75.0.3744.1',
348 '75.0.3744.0',
349 '74.0.3729.33',
350 '73.0.3683.92',
351 '74.0.3729.32',
352 '74.0.3729.31',
353 '73.0.3683.91',
354 '75.0.3741.2',
355 '75.0.3740.5',
356 '74.0.3729.30',
357 '75.0.3741.1',
358 '75.0.3741.0',
359 '74.0.3729.29',
360 '75.0.3740.4',
361 '73.0.3683.90',
362 '74.0.3729.28',
363 '75.0.3740.3',
364 '73.0.3683.89',
365 '75.0.3740.2',
366 '74.0.3729.27',
367 '75.0.3740.1',
368 '75.0.3740.0',
369 '74.0.3729.26',
370 '73.0.3683.88',
371 '73.0.3683.87',
372 '74.0.3729.25',
373 '75.0.3739.1',
374 '75.0.3739.0',
375 '73.0.3683.86',
376 '74.0.3729.24',
377 '73.0.3683.85',
378 '75.0.3738.4',
379 '75.0.3738.3',
380 '75.0.3738.2',
381 '75.0.3738.1',
382 '75.0.3738.0',
383 '74.0.3729.23',
384 '73.0.3683.84',
385 '74.0.3729.22',
386 '74.0.3729.21',
387 '75.0.3737.1',
388 '75.0.3737.0',
389 '74.0.3729.20',
390 '73.0.3683.83',
391 '74.0.3729.19',
392 '75.0.3736.1',
393 '75.0.3736.0',
394 '74.0.3729.18',
395 '73.0.3683.82',
396 '74.0.3729.17',
397 '75.0.3735.1',
398 '75.0.3735.0',
399 '74.0.3729.16',
400 '73.0.3683.81',
401 '75.0.3734.1',
402 '75.0.3734.0',
403 '74.0.3729.15',
404 '73.0.3683.80',
405 '74.0.3729.14',
406 '75.0.3733.1',
407 '75.0.3733.0',
408 '75.0.3732.1',
409 '74.0.3729.13',
410 '74.0.3729.12',
411 '73.0.3683.79',
412 '74.0.3729.11',
413 '75.0.3732.0',
414 '74.0.3729.10',
415 '73.0.3683.78',
416 '74.0.3729.9',
417 '74.0.3729.8',
418 '74.0.3729.7',
419 '75.0.3731.3',
420 '75.0.3731.2',
421 '75.0.3731.0',
422 '74.0.3729.6',
423 '73.0.3683.77',
424 '73.0.3683.76',
425 '75.0.3730.5',
426 '75.0.3730.4',
427 '73.0.3683.75',
428 '74.0.3729.5',
429 '73.0.3683.74',
430 '75.0.3730.3',
431 '75.0.3730.2',
432 '74.0.3729.4',
433 '73.0.3683.73',
434 '73.0.3683.72',
435 '75.0.3730.1',
436 '75.0.3730.0',
437 '74.0.3729.3',
438 '73.0.3683.71',
439 '74.0.3729.2',
440 '73.0.3683.70',
441 '74.0.3729.1',
442 '74.0.3729.0',
443 '74.0.3726.4',
444 '73.0.3683.69',
445 '74.0.3726.3',
446 '74.0.3728.0',
447 '74.0.3726.2',
448 '73.0.3683.68',
449 '74.0.3726.1',
450 '74.0.3726.0',
451 '74.0.3725.4',
452 '73.0.3683.67',
453 '73.0.3683.66',
454 '74.0.3725.3',
455 '74.0.3725.2',
456 '74.0.3725.1',
457 '74.0.3724.8',
458 '74.0.3725.0',
459 '73.0.3683.65',
460 '74.0.3724.7',
461 '74.0.3724.6',
462 '74.0.3724.5',
463 '74.0.3724.4',
464 '74.0.3724.3',
465 '74.0.3724.2',
466 '74.0.3724.1',
467 '74.0.3724.0',
468 '73.0.3683.64',
469 '74.0.3723.1',
470 '74.0.3723.0',
471 '73.0.3683.63',
472 '74.0.3722.1',
473 '74.0.3722.0',
474 '73.0.3683.62',
475 '74.0.3718.9',
476 '74.0.3702.3',
477 '74.0.3721.3',
478 '74.0.3721.2',
479 '74.0.3721.1',
480 '74.0.3721.0',
481 '74.0.3720.6',
482 '73.0.3683.61',
483 '72.0.3626.122',
484 '73.0.3683.60',
485 '74.0.3720.5',
486 '72.0.3626.121',
487 '74.0.3718.8',
488 '74.0.3720.4',
489 '74.0.3720.3',
490 '74.0.3718.7',
491 '74.0.3720.2',
492 '74.0.3720.1',
493 '74.0.3720.0',
494 '74.0.3718.6',
495 '74.0.3719.5',
496 '73.0.3683.59',
497 '74.0.3718.5',
498 '74.0.3718.4',
499 '74.0.3719.4',
500 '74.0.3719.3',
501 '74.0.3719.2',
502 '74.0.3719.1',
503 '73.0.3683.58',
504 '74.0.3719.0',
505 '73.0.3683.57',
506 '73.0.3683.56',
507 '74.0.3718.3',
508 '73.0.3683.55',
509 '74.0.3718.2',
510 '74.0.3718.1',
511 '74.0.3718.0',
512 '73.0.3683.54',
513 '74.0.3717.2',
514 '73.0.3683.53',
515 '74.0.3717.1',
516 '74.0.3717.0',
517 '73.0.3683.52',
518 '74.0.3716.1',
519 '74.0.3716.0',
520 '73.0.3683.51',
521 '74.0.3715.1',
522 '74.0.3715.0',
523 '73.0.3683.50',
524 '74.0.3711.2',
525 '74.0.3714.2',
526 '74.0.3713.3',
527 '74.0.3714.1',
528 '74.0.3714.0',
529 '73.0.3683.49',
530 '74.0.3713.1',
531 '74.0.3713.0',
532 '72.0.3626.120',
533 '73.0.3683.48',
534 '74.0.3712.2',
535 '74.0.3712.1',
536 '74.0.3712.0',
537 '73.0.3683.47',
538 '72.0.3626.119',
539 '73.0.3683.46',
540 '74.0.3710.2',
541 '72.0.3626.118',
542 '74.0.3711.1',
543 '74.0.3711.0',
544 '73.0.3683.45',
545 '72.0.3626.117',
546 '74.0.3710.1',
547 '74.0.3710.0',
548 '73.0.3683.44',
549 '72.0.3626.116',
550 '74.0.3709.1',
551 '74.0.3709.0',
552 '74.0.3704.9',
553 '73.0.3683.43',
554 '72.0.3626.115',
555 '74.0.3704.8',
556 '74.0.3704.7',
557 '74.0.3708.0',
558 '74.0.3706.7',
559 '74.0.3704.6',
560 '73.0.3683.42',
561 '72.0.3626.114',
562 '74.0.3706.6',
563 '72.0.3626.113',
564 '74.0.3704.5',
565 '74.0.3706.5',
566 '74.0.3706.4',
567 '74.0.3706.3',
568 '74.0.3706.2',
569 '74.0.3706.1',
570 '74.0.3706.0',
571 '73.0.3683.41',
572 '72.0.3626.112',
573 '74.0.3705.1',
574 '74.0.3705.0',
575 '73.0.3683.40',
576 '72.0.3626.111',
577 '73.0.3683.39',
578 '74.0.3704.4',
579 '73.0.3683.38',
580 '74.0.3704.3',
581 '74.0.3704.2',
582 '74.0.3704.1',
583 '74.0.3704.0',
584 '73.0.3683.37',
585 '72.0.3626.110',
586 '72.0.3626.109',
587 '74.0.3703.3',
588 '74.0.3703.2',
589 '73.0.3683.36',
590 '74.0.3703.1',
591 '74.0.3703.0',
592 '73.0.3683.35',
593 '72.0.3626.108',
594 '74.0.3702.2',
595 '74.0.3699.3',
596 '74.0.3702.1',
597 '74.0.3702.0',
598 '73.0.3683.34',
599 '72.0.3626.107',
600 '73.0.3683.33',
601 '74.0.3701.1',
602 '74.0.3701.0',
603 '73.0.3683.32',
604 '73.0.3683.31',
605 '72.0.3626.105',
606 '74.0.3700.1',
607 '74.0.3700.0',
608 '73.0.3683.29',
609 '72.0.3626.103',
610 '74.0.3699.2',
611 '74.0.3699.1',
612 '74.0.3699.0',
613 '73.0.3683.28',
614 '72.0.3626.102',
615 '73.0.3683.27',
616 '73.0.3683.26',
617 '74.0.3698.0',
618 '74.0.3696.2',
619 '72.0.3626.101',
620 '73.0.3683.25',
621 '74.0.3696.1',
622 '74.0.3696.0',
623 '74.0.3694.8',
624 '72.0.3626.100',
625 '74.0.3694.7',
626 '74.0.3694.6',
627 '74.0.3694.5',
628 '74.0.3694.4',
629 '72.0.3626.99',
630 '72.0.3626.98',
631 '74.0.3694.3',
632 '73.0.3683.24',
633 '72.0.3626.97',
634 '72.0.3626.96',
635 '72.0.3626.95',
636 '73.0.3683.23',
637 '72.0.3626.94',
638 '73.0.3683.22',
639 '73.0.3683.21',
640 '72.0.3626.93',
641 '74.0.3694.2',
642 '72.0.3626.92',
643 '74.0.3694.1',
644 '74.0.3694.0',
645 '74.0.3693.6',
646 '73.0.3683.20',
647 '72.0.3626.91',
648 '74.0.3693.5',
649 '74.0.3693.4',
650 '74.0.3693.3',
651 '74.0.3693.2',
652 '73.0.3683.19',
653 '74.0.3693.1',
654 '74.0.3693.0',
655 '73.0.3683.18',
656 '72.0.3626.90',
657 '74.0.3692.1',
658 '74.0.3692.0',
659 '73.0.3683.17',
660 '72.0.3626.89',
661 '74.0.3687.3',
662 '74.0.3691.1',
663 '74.0.3691.0',
664 '73.0.3683.16',
665 '72.0.3626.88',
666 '72.0.3626.87',
667 '73.0.3683.15',
668 '74.0.3690.1',
669 '74.0.3690.0',
670 '73.0.3683.14',
671 '72.0.3626.86',
672 '73.0.3683.13',
673 '73.0.3683.12',
674 '74.0.3689.1',
675 '74.0.3689.0',
676 '73.0.3683.11',
677 '72.0.3626.85',
678 '73.0.3683.10',
679 '72.0.3626.84',
680 '73.0.3683.9',
681 '74.0.3688.1',
682 '74.0.3688.0',
683 '73.0.3683.8',
684 '72.0.3626.83',
685 '74.0.3687.2',
686 '74.0.3687.1',
687 '74.0.3687.0',
688 '73.0.3683.7',
689 '72.0.3626.82',
690 '74.0.3686.4',
691 '72.0.3626.81',
692 '74.0.3686.3',
693 '74.0.3686.2',
694 '74.0.3686.1',
695 '74.0.3686.0',
696 '73.0.3683.6',
697 '72.0.3626.80',
698 '74.0.3685.1',
699 '74.0.3685.0',
700 '73.0.3683.5',
701 '72.0.3626.79',
702 '74.0.3684.1',
703 '74.0.3684.0',
704 '73.0.3683.4',
705 '72.0.3626.78',
706 '72.0.3626.77',
707 '73.0.3683.3',
708 '73.0.3683.2',
709 '72.0.3626.76',
710 '73.0.3683.1',
711 '73.0.3683.0',
712 '72.0.3626.75',
713 '71.0.3578.141',
714 '73.0.3682.1',
715 '73.0.3682.0',
716 '72.0.3626.74',
717 '71.0.3578.140',
718 '73.0.3681.4',
719 '73.0.3681.3',
720 '73.0.3681.2',
721 '73.0.3681.1',
722 '73.0.3681.0',
723 '72.0.3626.73',
724 '71.0.3578.139',
725 '72.0.3626.72',
726 '72.0.3626.71',
727 '73.0.3680.1',
728 '73.0.3680.0',
729 '72.0.3626.70',
730 '71.0.3578.138',
731 '73.0.3678.2',
732 '73.0.3679.1',
733 '73.0.3679.0',
734 '72.0.3626.69',
735 '71.0.3578.137',
736 '73.0.3678.1',
737 '73.0.3678.0',
738 '71.0.3578.136',
739 '73.0.3677.1',
740 '73.0.3677.0',
741 '72.0.3626.68',
742 '72.0.3626.67',
743 '71.0.3578.135',
744 '73.0.3676.1',
745 '73.0.3676.0',
746 '73.0.3674.2',
747 '72.0.3626.66',
748 '71.0.3578.134',
749 '73.0.3674.1',
750 '73.0.3674.0',
751 '72.0.3626.65',
752 '71.0.3578.133',
753 '73.0.3673.2',
754 '73.0.3673.1',
755 '73.0.3673.0',
756 '72.0.3626.64',
757 '71.0.3578.132',
758 '72.0.3626.63',
759 '72.0.3626.62',
760 '72.0.3626.61',
761 '72.0.3626.60',
762 '73.0.3672.1',
763 '73.0.3672.0',
764 '72.0.3626.59',
765 '71.0.3578.131',
766 '73.0.3671.3',
767 '73.0.3671.2',
768 '73.0.3671.1',
769 '73.0.3671.0',
770 '72.0.3626.58',
771 '71.0.3578.130',
772 '73.0.3670.1',
773 '73.0.3670.0',
774 '72.0.3626.57',
775 '71.0.3578.129',
776 '73.0.3669.1',
777 '73.0.3669.0',
778 '72.0.3626.56',
779 '71.0.3578.128',
780 '73.0.3668.2',
781 '73.0.3668.1',
782 '73.0.3668.0',
783 '72.0.3626.55',
784 '71.0.3578.127',
785 '73.0.3667.2',
786 '73.0.3667.1',
787 '73.0.3667.0',
788 '72.0.3626.54',
789 '71.0.3578.126',
790 '73.0.3666.1',
791 '73.0.3666.0',
792 '72.0.3626.53',
793 '71.0.3578.125',
794 '73.0.3665.4',
795 '73.0.3665.3',
796 '72.0.3626.52',
797 '73.0.3665.2',
798 '73.0.3664.4',
799 '73.0.3665.1',
800 '73.0.3665.0',
801 '72.0.3626.51',
802 '71.0.3578.124',
803 '72.0.3626.50',
804 '73.0.3664.3',
805 '73.0.3664.2',
806 '73.0.3664.1',
807 '73.0.3664.0',
808 '73.0.3663.2',
809 '72.0.3626.49',
810 '71.0.3578.123',
811 '73.0.3663.1',
812 '73.0.3663.0',
813 '72.0.3626.48',
814 '71.0.3578.122',
815 '73.0.3662.1',
816 '73.0.3662.0',
817 '72.0.3626.47',
818 '71.0.3578.121',
819 '73.0.3661.1',
820 '72.0.3626.46',
821 '73.0.3661.0',
822 '72.0.3626.45',
823 '71.0.3578.120',
824 '73.0.3660.2',
825 '73.0.3660.1',
826 '73.0.3660.0',
827 '72.0.3626.44',
828 '71.0.3578.119',
829 '73.0.3659.1',
830 '73.0.3659.0',
831 '72.0.3626.43',
832 '71.0.3578.118',
833 '73.0.3658.1',
834 '73.0.3658.0',
835 '72.0.3626.42',
836 '71.0.3578.117',
837 '73.0.3657.1',
838 '73.0.3657.0',
839 '72.0.3626.41',
840 '71.0.3578.116',
841 '73.0.3656.1',
842 '73.0.3656.0',
843 '72.0.3626.40',
844 '71.0.3578.115',
845 '73.0.3655.1',
846 '73.0.3655.0',
847 '72.0.3626.39',
848 '71.0.3578.114',
849 '73.0.3654.1',
850 '73.0.3654.0',
851 '72.0.3626.38',
852 '71.0.3578.113',
853 '73.0.3653.1',
854 '73.0.3653.0',
855 '72.0.3626.37',
856 '71.0.3578.112',
857 '73.0.3652.1',
858 '73.0.3652.0',
859 '72.0.3626.36',
860 '71.0.3578.111',
861 '73.0.3651.1',
862 '73.0.3651.0',
863 '72.0.3626.35',
864 '71.0.3578.110',
865 '73.0.3650.1',
866 '73.0.3650.0',
867 '72.0.3626.34',
868 '71.0.3578.109',
869 '73.0.3649.1',
870 '73.0.3649.0',
871 '72.0.3626.33',
872 '71.0.3578.108',
873 '73.0.3648.2',
874 '73.0.3648.1',
875 '73.0.3648.0',
876 '72.0.3626.32',
877 '71.0.3578.107',
878 '73.0.3647.2',
879 '73.0.3647.1',
880 '73.0.3647.0',
881 '72.0.3626.31',
882 '71.0.3578.106',
883 '73.0.3635.3',
884 '73.0.3646.2',
885 '73.0.3646.1',
886 '73.0.3646.0',
887 '72.0.3626.30',
888 '71.0.3578.105',
889 '72.0.3626.29',
890 '73.0.3645.2',
891 '73.0.3645.1',
892 '73.0.3645.0',
893 '72.0.3626.28',
894 '71.0.3578.104',
895 '72.0.3626.27',
896 '72.0.3626.26',
897 '72.0.3626.25',
898 '72.0.3626.24',
899 '73.0.3644.0',
900 '73.0.3643.2',
901 '72.0.3626.23',
902 '71.0.3578.103',
903 '73.0.3643.1',
904 '73.0.3643.0',
905 '72.0.3626.22',
906 '71.0.3578.102',
907 '73.0.3642.1',
908 '73.0.3642.0',
909 '72.0.3626.21',
910 '71.0.3578.101',
911 '73.0.3641.1',
912 '73.0.3641.0',
913 '72.0.3626.20',
914 '71.0.3578.100',
915 '72.0.3626.19',
916 '73.0.3640.1',
917 '73.0.3640.0',
918 '72.0.3626.18',
919 '73.0.3639.1',
920 '71.0.3578.99',
921 '73.0.3639.0',
922 '72.0.3626.17',
923 '73.0.3638.2',
924 '72.0.3626.16',
925 '73.0.3638.1',
926 '73.0.3638.0',
927 '72.0.3626.15',
928 '71.0.3578.98',
929 '73.0.3635.2',
930 '71.0.3578.97',
931 '73.0.3637.1',
932 '73.0.3637.0',
933 '72.0.3626.14',
934 '71.0.3578.96',
935 '71.0.3578.95',
936 '72.0.3626.13',
937 '71.0.3578.94',
938 '73.0.3636.2',
939 '71.0.3578.93',
940 '73.0.3636.1',
941 '73.0.3636.0',
942 '72.0.3626.12',
943 '71.0.3578.92',
944 '73.0.3635.1',
945 '73.0.3635.0',
946 '72.0.3626.11',
947 '71.0.3578.91',
948 '73.0.3634.2',
949 '73.0.3634.1',
950 '73.0.3634.0',
951 '72.0.3626.10',
952 '71.0.3578.90',
953 '71.0.3578.89',
954 '73.0.3633.2',
955 '73.0.3633.1',
956 '73.0.3633.0',
957 '72.0.3610.4',
958 '72.0.3626.9',
959 '71.0.3578.88',
960 '73.0.3632.5',
961 '73.0.3632.4',
962 '73.0.3632.3',
963 '73.0.3632.2',
964 '73.0.3632.1',
965 '73.0.3632.0',
966 '72.0.3626.8',
967 '71.0.3578.87',
968 '73.0.3631.2',
969 '73.0.3631.1',
970 '73.0.3631.0',
971 '72.0.3626.7',
972 '71.0.3578.86',
973 '72.0.3626.6',
974 '73.0.3630.1',
975 '73.0.3630.0',
976 '72.0.3626.5',
977 '71.0.3578.85',
978 '72.0.3626.4',
979 '73.0.3628.3',
980 '73.0.3628.2',
981 '73.0.3629.1',
982 '73.0.3629.0',
983 '72.0.3626.3',
984 '71.0.3578.84',
985 '73.0.3628.1',
986 '73.0.3628.0',
987 '71.0.3578.83',
988 '73.0.3627.1',
989 '73.0.3627.0',
990 '72.0.3626.2',
991 '71.0.3578.82',
992 '71.0.3578.81',
993 '71.0.3578.80',
994 '72.0.3626.1',
995 '72.0.3626.0',
996 '71.0.3578.79',
997 '70.0.3538.124',
998 '71.0.3578.78',
999 '72.0.3623.4',
1000 '72.0.3625.2',
1001 '72.0.3625.1',
1002 '72.0.3625.0',
1003 '71.0.3578.77',
1004 '70.0.3538.123',
1005 '72.0.3624.4',
1006 '72.0.3624.3',
1007 '72.0.3624.2',
1008 '71.0.3578.76',
1009 '72.0.3624.1',
1010 '72.0.3624.0',
1011 '72.0.3623.3',
1012 '71.0.3578.75',
1013 '70.0.3538.122',
1014 '71.0.3578.74',
1015 '72.0.3623.2',
1016 '72.0.3610.3',
1017 '72.0.3623.1',
1018 '72.0.3623.0',
1019 '72.0.3622.3',
1020 '72.0.3622.2',
1021 '71.0.3578.73',
1022 '70.0.3538.121',
1023 '72.0.3622.1',
1024 '72.0.3622.0',
1025 '71.0.3578.72',
1026 '70.0.3538.120',
1027 '72.0.3621.1',
1028 '72.0.3621.0',
1029 '71.0.3578.71',
1030 '70.0.3538.119',
1031 '72.0.3620.1',
1032 '72.0.3620.0',
1033 '71.0.3578.70',
1034 '70.0.3538.118',
1035 '71.0.3578.69',
1036 '72.0.3619.1',
1037 '72.0.3619.0',
1038 '71.0.3578.68',
1039 '70.0.3538.117',
1040 '71.0.3578.67',
1041 '72.0.3618.1',
1042 '72.0.3618.0',
1043 '71.0.3578.66',
1044 '70.0.3538.116',
1045 '72.0.3617.1',
1046 '72.0.3617.0',
1047 '71.0.3578.65',
1048 '70.0.3538.115',
1049 '72.0.3602.3',
1050 '71.0.3578.64',
1051 '72.0.3616.1',
1052 '72.0.3616.0',
1053 '71.0.3578.63',
1054 '70.0.3538.114',
1055 '71.0.3578.62',
1056 '72.0.3615.1',
1057 '72.0.3615.0',
1058 '71.0.3578.61',
1059 '70.0.3538.113',
1060 '72.0.3614.1',
1061 '72.0.3614.0',
1062 '71.0.3578.60',
1063 '70.0.3538.112',
1064 '72.0.3613.1',
1065 '72.0.3613.0',
1066 '71.0.3578.59',
1067 '70.0.3538.111',
1068 '72.0.3612.2',
1069 '72.0.3612.1',
1070 '72.0.3612.0',
1071 '70.0.3538.110',
1072 '71.0.3578.58',
1073 '70.0.3538.109',
1074 '72.0.3611.2',
1075 '72.0.3611.1',
1076 '72.0.3611.0',
1077 '71.0.3578.57',
1078 '70.0.3538.108',
1079 '72.0.3610.2',
1080 '71.0.3578.56',
1081 '71.0.3578.55',
1082 '72.0.3610.1',
1083 '72.0.3610.0',
1084 '71.0.3578.54',
1085 '70.0.3538.107',
1086 '71.0.3578.53',
1087 '72.0.3609.3',
1088 '71.0.3578.52',
1089 '72.0.3609.2',
1090 '71.0.3578.51',
1091 '72.0.3608.5',
1092 '72.0.3609.1',
1093 '72.0.3609.0',
1094 '71.0.3578.50',
1095 '70.0.3538.106',
1096 '72.0.3608.4',
1097 '72.0.3608.3',
1098 '72.0.3608.2',
1099 '71.0.3578.49',
1100 '72.0.3608.1',
1101 '72.0.3608.0',
1102 '70.0.3538.105',
1103 '71.0.3578.48',
1104 '72.0.3607.1',
1105 '72.0.3607.0',
1106 '71.0.3578.47',
1107 '70.0.3538.104',
1108 '72.0.3606.2',
1109 '72.0.3606.1',
1110 '72.0.3606.0',
1111 '71.0.3578.46',
1112 '70.0.3538.103',
1113 '70.0.3538.102',
1114 '72.0.3605.3',
1115 '72.0.3605.2',
1116 '72.0.3605.1',
1117 '72.0.3605.0',
1118 '71.0.3578.45',
1119 '70.0.3538.101',
1120 '71.0.3578.44',
1121 '71.0.3578.43',
1122 '70.0.3538.100',
1123 '70.0.3538.99',
1124 '71.0.3578.42',
1125 '72.0.3604.1',
1126 '72.0.3604.0',
1127 '71.0.3578.41',
1128 '70.0.3538.98',
1129 '71.0.3578.40',
1130 '72.0.3603.2',
1131 '72.0.3603.1',
1132 '72.0.3603.0',
1133 '71.0.3578.39',
1134 '70.0.3538.97',
1135 '72.0.3602.2',
1136 '71.0.3578.38',
1137 '71.0.3578.37',
1138 '72.0.3602.1',
1139 '72.0.3602.0',
1140 '71.0.3578.36',
1141 '70.0.3538.96',
1142 '72.0.3601.1',
1143 '72.0.3601.0',
1144 '71.0.3578.35',
1145 '70.0.3538.95',
1146 '72.0.3600.1',
1147 '72.0.3600.0',
1148 '71.0.3578.34',
1149 '70.0.3538.94',
1150 '72.0.3599.3',
1151 '72.0.3599.2',
1152 '72.0.3599.1',
1153 '72.0.3599.0',
1154 '71.0.3578.33',
1155 '70.0.3538.93',
1156 '72.0.3598.1',
1157 '72.0.3598.0',
1158 '71.0.3578.32',
1159 '70.0.3538.87',
1160 '72.0.3597.1',
1161 '72.0.3597.0',
1162 '72.0.3596.2',
1163 '71.0.3578.31',
1164 '70.0.3538.86',
1165 '71.0.3578.30',
1166 '71.0.3578.29',
1167 '72.0.3596.1',
1168 '72.0.3596.0',
1169 '71.0.3578.28',
1170 '70.0.3538.85',
1171 '72.0.3595.2',
1172 '72.0.3591.3',
1173 '72.0.3595.1',
1174 '72.0.3595.0',
1175 '71.0.3578.27',
1176 '70.0.3538.84',
1177 '72.0.3594.1',
1178 '72.0.3594.0',
1179 '71.0.3578.26',
1180 '70.0.3538.83',
1181 '72.0.3593.2',
1182 '72.0.3593.1',
1183 '72.0.3593.0',
1184 '71.0.3578.25',
1185 '70.0.3538.82',
1186 '72.0.3589.3',
1187 '72.0.3592.2',
1188 '72.0.3592.1',
1189 '72.0.3592.0',
1190 '71.0.3578.24',
1191 '72.0.3589.2',
1192 '70.0.3538.81',
1193 '70.0.3538.80',
1194 '72.0.3591.2',
1195 '72.0.3591.1',
1196 '72.0.3591.0',
1197 '71.0.3578.23',
1198 '70.0.3538.79',
1199 '71.0.3578.22',
1200 '72.0.3590.1',
1201 '72.0.3590.0',
1202 '71.0.3578.21',
1203 '70.0.3538.78',
1204 '70.0.3538.77',
1205 '72.0.3589.1',
1206 '72.0.3589.0',
1207 '71.0.3578.20',
1208 '70.0.3538.76',
1209 '71.0.3578.19',
1210 '70.0.3538.75',
1211 '72.0.3588.1',
1212 '72.0.3588.0',
1213 '71.0.3578.18',
1214 '70.0.3538.74',
1215 '72.0.3586.2',
1216 '72.0.3587.0',
1217 '71.0.3578.17',
1218 '70.0.3538.73',
1219 '72.0.3586.1',
1220 '72.0.3586.0',
1221 '71.0.3578.16',
1222 '70.0.3538.72',
1223 '72.0.3585.1',
1224 '72.0.3585.0',
1225 '71.0.3578.15',
1226 '70.0.3538.71',
1227 '71.0.3578.14',
1228 '72.0.3584.1',
1229 '72.0.3584.0',
1230 '71.0.3578.13',
1231 '70.0.3538.70',
1232 '72.0.3583.2',
1233 '71.0.3578.12',
1234 '72.0.3583.1',
1235 '72.0.3583.0',
1236 '71.0.3578.11',
1237 '70.0.3538.69',
1238 '71.0.3578.10',
1239 '72.0.3582.0',
1240 '72.0.3581.4',
1241 '71.0.3578.9',
1242 '70.0.3538.67',
1243 '72.0.3581.3',
1244 '72.0.3581.2',
1245 '72.0.3581.1',
1246 '72.0.3581.0',
1247 '71.0.3578.8',
1248 '70.0.3538.66',
1249 '72.0.3580.1',
1250 '72.0.3580.0',
1251 '71.0.3578.7',
1252 '70.0.3538.65',
1253 '71.0.3578.6',
1254 '72.0.3579.1',
1255 '72.0.3579.0',
1256 '71.0.3578.5',
1257 '70.0.3538.64',
1258 '71.0.3578.4',
1259 '71.0.3578.3',
1260 '71.0.3578.2',
1261 '71.0.3578.1',
1262 '71.0.3578.0',
1263 '70.0.3538.63',
1264 '69.0.3497.128',
1265 '70.0.3538.62',
1266 '70.0.3538.61',
1267 '70.0.3538.60',
1268 '70.0.3538.59',
1269 '71.0.3577.1',
1270 '71.0.3577.0',
1271 '70.0.3538.58',
1272 '69.0.3497.127',
1273 '71.0.3576.2',
1274 '71.0.3576.1',
1275 '71.0.3576.0',
1276 '70.0.3538.57',
1277 '70.0.3538.56',
1278 '71.0.3575.2',
1279 '70.0.3538.55',
1280 '69.0.3497.126',
1281 '70.0.3538.54',
1282 '71.0.3575.1',
1283 '71.0.3575.0',
1284 '71.0.3574.1',
1285 '71.0.3574.0',
1286 '70.0.3538.53',
1287 '69.0.3497.125',
1288 '70.0.3538.52',
1289 '71.0.3573.1',
1290 '71.0.3573.0',
1291 '70.0.3538.51',
1292 '69.0.3497.124',
1293 '71.0.3572.1',
1294 '71.0.3572.0',
1295 '70.0.3538.50',
1296 '69.0.3497.123',
1297 '71.0.3571.2',
1298 '70.0.3538.49',
1299 '69.0.3497.122',
1300 '71.0.3571.1',
1301 '71.0.3571.0',
1302 '70.0.3538.48',
1303 '69.0.3497.121',
1304 '71.0.3570.1',
1305 '71.0.3570.0',
1306 '70.0.3538.47',
1307 '69.0.3497.120',
1308 '71.0.3568.2',
1309 '71.0.3569.1',
1310 '71.0.3569.0',
1311 '70.0.3538.46',
1312 '69.0.3497.119',
1313 '70.0.3538.45',
1314 '71.0.3568.1',
1315 '71.0.3568.0',
1316 '70.0.3538.44',
1317 '69.0.3497.118',
1318 '70.0.3538.43',
1319 '70.0.3538.42',
1320 '71.0.3567.1',
1321 '71.0.3567.0',
1322 '70.0.3538.41',
1323 '69.0.3497.117',
1324 '71.0.3566.1',
1325 '71.0.3566.0',
1326 '70.0.3538.40',
1327 '69.0.3497.116',
1328 '71.0.3565.1',
1329 '71.0.3565.0',
1330 '70.0.3538.39',
1331 '69.0.3497.115',
1332 '71.0.3564.1',
1333 '71.0.3564.0',
1334 '70.0.3538.38',
1335 '69.0.3497.114',
1336 '71.0.3563.0',
1337 '71.0.3562.2',
1338 '70.0.3538.37',
1339 '69.0.3497.113',
1340 '70.0.3538.36',
1341 '70.0.3538.35',
1342 '71.0.3562.1',
1343 '71.0.3562.0',
1344 '70.0.3538.34',
1345 '69.0.3497.112',
1346 '70.0.3538.33',
1347 '71.0.3561.1',
1348 '71.0.3561.0',
1349 '70.0.3538.32',
1350 '69.0.3497.111',
1351 '71.0.3559.6',
1352 '71.0.3560.1',
1353 '71.0.3560.0',
1354 '71.0.3559.5',
1355 '71.0.3559.4',
1356 '70.0.3538.31',
1357 '69.0.3497.110',
1358 '71.0.3559.3',
1359 '70.0.3538.30',
1360 '69.0.3497.109',
1361 '71.0.3559.2',
1362 '71.0.3559.1',
1363 '71.0.3559.0',
1364 '70.0.3538.29',
1365 '69.0.3497.108',
1366 '71.0.3558.2',
1367 '71.0.3558.1',
1368 '71.0.3558.0',
1369 '70.0.3538.28',
1370 '69.0.3497.107',
1371 '71.0.3557.2',
1372 '71.0.3557.1',
1373 '71.0.3557.0',
1374 '70.0.3538.27',
1375 '69.0.3497.106',
1376 '71.0.3554.4',
1377 '70.0.3538.26',
1378 '71.0.3556.1',
1379 '71.0.3556.0',
1380 '70.0.3538.25',
1381 '71.0.3554.3',
1382 '69.0.3497.105',
1383 '71.0.3554.2',
1384 '70.0.3538.24',
1385 '69.0.3497.104',
1386 '71.0.3555.2',
1387 '70.0.3538.23',
1388 '71.0.3555.1',
1389 '71.0.3555.0',
1390 '70.0.3538.22',
1391 '69.0.3497.103',
1392 '71.0.3554.1',
1393 '71.0.3554.0',
1394 '70.0.3538.21',
1395 '69.0.3497.102',
1396 '71.0.3553.3',
1397 '70.0.3538.20',
1398 '69.0.3497.101',
1399 '71.0.3553.2',
1400 '69.0.3497.100',
1401 '71.0.3553.1',
1402 '71.0.3553.0',
1403 '70.0.3538.19',
1404 '69.0.3497.99',
1405 '69.0.3497.98',
1406 '69.0.3497.97',
1407 '71.0.3552.6',
1408 '71.0.3552.5',
1409 '71.0.3552.4',
1410 '71.0.3552.3',
1411 '71.0.3552.2',
1412 '71.0.3552.1',
1413 '71.0.3552.0',
1414 '70.0.3538.18',
1415 '69.0.3497.96',
1416 '71.0.3551.3',
1417 '71.0.3551.2',
1418 '71.0.3551.1',
1419 '71.0.3551.0',
1420 '70.0.3538.17',
1421 '69.0.3497.95',
1422 '71.0.3550.3',
1423 '71.0.3550.2',
1424 '71.0.3550.1',
1425 '71.0.3550.0',
1426 '70.0.3538.16',
1427 '69.0.3497.94',
1428 '71.0.3549.1',
1429 '71.0.3549.0',
1430 '70.0.3538.15',
1431 '69.0.3497.93',
1432 '69.0.3497.92',
1433 '71.0.3548.1',
1434 '71.0.3548.0',
1435 '70.0.3538.14',
1436 '69.0.3497.91',
1437 '71.0.3547.1',
1438 '71.0.3547.0',
1439 '70.0.3538.13',
1440 '69.0.3497.90',
1441 '71.0.3546.2',
1442 '69.0.3497.89',
1443 '71.0.3546.1',
1444 '71.0.3546.0',
1445 '70.0.3538.12',
1446 '69.0.3497.88',
1447 '71.0.3545.4',
1448 '71.0.3545.3',
1449 '71.0.3545.2',
1450 '71.0.3545.1',
1451 '71.0.3545.0',
1452 '70.0.3538.11',
1453 '69.0.3497.87',
1454 '71.0.3544.5',
1455 '71.0.3544.4',
1456 '71.0.3544.3',
1457 '71.0.3544.2',
1458 '71.0.3544.1',
1459 '71.0.3544.0',
1460 '69.0.3497.86',
1461 '70.0.3538.10',
1462 '69.0.3497.85',
1463 '70.0.3538.9',
1464 '69.0.3497.84',
1465 '71.0.3543.4',
1466 '70.0.3538.8',
1467 '71.0.3543.3',
1468 '71.0.3543.2',
1469 '71.0.3543.1',
1470 '71.0.3543.0',
1471 '70.0.3538.7',
1472 '69.0.3497.83',
1473 '71.0.3542.2',
1474 '71.0.3542.1',
1475 '71.0.3542.0',
1476 '70.0.3538.6',
1477 '69.0.3497.82',
1478 '69.0.3497.81',
1479 '71.0.3541.1',
1480 '71.0.3541.0',
1481 '70.0.3538.5',
1482 '69.0.3497.80',
1483 '71.0.3540.1',
1484 '71.0.3540.0',
1485 '70.0.3538.4',
1486 '69.0.3497.79',
1487 '70.0.3538.3',
1488 '71.0.3539.1',
1489 '71.0.3539.0',
1490 '69.0.3497.78',
1491 '68.0.3440.134',
1492 '69.0.3497.77',
1493 '70.0.3538.2',
1494 '70.0.3538.1',
1495 '70.0.3538.0',
1496 '69.0.3497.76',
1497 '68.0.3440.133',
1498 '69.0.3497.75',
1499 '70.0.3537.2',
1500 '70.0.3537.1',
1501 '70.0.3537.0',
1502 '69.0.3497.74',
1503 '68.0.3440.132',
1504 '70.0.3536.0',
1505 '70.0.3535.5',
1506 '70.0.3535.4',
1507 '70.0.3535.3',
1508 '69.0.3497.73',
1509 '68.0.3440.131',
1510 '70.0.3532.8',
1511 '70.0.3532.7',
1512 '69.0.3497.72',
1513 '69.0.3497.71',
1514 '70.0.3535.2',
1515 '70.0.3535.1',
1516 '70.0.3535.0',
1517 '69.0.3497.70',
1518 '68.0.3440.130',
1519 '69.0.3497.69',
1520 '68.0.3440.129',
1521 '70.0.3534.4',
1522 '70.0.3534.3',
1523 '70.0.3534.2',
1524 '70.0.3534.1',
1525 '70.0.3534.0',
1526 '69.0.3497.68',
1527 '68.0.3440.128',
1528 '70.0.3533.2',
1529 '70.0.3533.1',
1530 '70.0.3533.0',
1531 '69.0.3497.67',
1532 '68.0.3440.127',
1533 '70.0.3532.6',
1534 '70.0.3532.5',
1535 '70.0.3532.4',
1536 '69.0.3497.66',
1537 '68.0.3440.126',
1538 '70.0.3532.3',
1539 '70.0.3532.2',
1540 '70.0.3532.1',
1541 '69.0.3497.60',
1542 '69.0.3497.65',
1543 '69.0.3497.64',
1544 '70.0.3532.0',
1545 '70.0.3531.0',
1546 '70.0.3530.4',
1547 '70.0.3530.3',
1548 '70.0.3530.2',
1549 '69.0.3497.58',
1550 '68.0.3440.125',
1551 '69.0.3497.57',
1552 '69.0.3497.56',
1553 '69.0.3497.55',
1554 '69.0.3497.54',
1555 '70.0.3530.1',
1556 '70.0.3530.0',
1557 '69.0.3497.53',
1558 '68.0.3440.124',
1559 '69.0.3497.52',
1560 '70.0.3529.3',
1561 '70.0.3529.2',
1562 '70.0.3529.1',
1563 '70.0.3529.0',
1564 '69.0.3497.51',
1565 '70.0.3528.4',
1566 '68.0.3440.123',
1567 '70.0.3528.3',
1568 '70.0.3528.2',
1569 '70.0.3528.1',
1570 '70.0.3528.0',
1571 '69.0.3497.50',
1572 '68.0.3440.122',
1573 '70.0.3527.1',
1574 '70.0.3527.0',
1575 '69.0.3497.49',
1576 '68.0.3440.121',
1577 '70.0.3526.1',
1578 '70.0.3526.0',
1579 '68.0.3440.120',
1580 '69.0.3497.48',
1581 '69.0.3497.47',
1582 '68.0.3440.119',
1583 '68.0.3440.118',
1584 '70.0.3525.5',
1585 '70.0.3525.4',
1586 '70.0.3525.3',
1587 '68.0.3440.117',
1588 '69.0.3497.46',
1589 '70.0.3525.2',
1590 '70.0.3525.1',
1591 '70.0.3525.0',
1592 '69.0.3497.45',
1593 '68.0.3440.116',
1594 '70.0.3524.4',
1595 '70.0.3524.3',
1596 '69.0.3497.44',
1597 '70.0.3524.2',
1598 '70.0.3524.1',
1599 '70.0.3524.0',
1600 '70.0.3523.2',
1601 '69.0.3497.43',
1602 '68.0.3440.115',
1603 '70.0.3505.9',
1604 '69.0.3497.42',
1605 '70.0.3505.8',
1606 '70.0.3523.1',
1607 '70.0.3523.0',
1608 '69.0.3497.41',
1609 '68.0.3440.114',
1610 '70.0.3505.7',
1611 '69.0.3497.40',
1612 '70.0.3522.1',
1613 '70.0.3522.0',
1614 '70.0.3521.2',
1615 '69.0.3497.39',
1616 '68.0.3440.113',
1617 '70.0.3505.6',
1618 '70.0.3521.1',
1619 '70.0.3521.0',
1620 '69.0.3497.38',
1621 '68.0.3440.112',
1622 '70.0.3520.1',
1623 '70.0.3520.0',
1624 '69.0.3497.37',
1625 '68.0.3440.111',
1626 '70.0.3519.3',
1627 '70.0.3519.2',
1628 '70.0.3519.1',
1629 '70.0.3519.0',
1630 '69.0.3497.36',
1631 '68.0.3440.110',
1632 '70.0.3518.1',
1633 '70.0.3518.0',
1634 '69.0.3497.35',
1635 '69.0.3497.34',
1636 '68.0.3440.109',
1637 '70.0.3517.1',
1638 '70.0.3517.0',
1639 '69.0.3497.33',
1640 '68.0.3440.108',
1641 '69.0.3497.32',
1642 '70.0.3516.3',
1643 '70.0.3516.2',
1644 '70.0.3516.1',
1645 '70.0.3516.0',
1646 '69.0.3497.31',
1647 '68.0.3440.107',
1648 '70.0.3515.4',
1649 '68.0.3440.106',
1650 '70.0.3515.3',
1651 '70.0.3515.2',
1652 '70.0.3515.1',
1653 '70.0.3515.0',
1654 '69.0.3497.30',
1655 '68.0.3440.105',
1656 '68.0.3440.104',
1657 '70.0.3514.2',
1658 '70.0.3514.1',
1659 '70.0.3514.0',
1660 '69.0.3497.29',
1661 '68.0.3440.103',
1662 '70.0.3513.1',
1663 '70.0.3513.0',
1664 '69.0.3497.28',
1665 )
1666 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1667
1668
3e669f36 1669std_headers = {
f7a147e3 1670 'User-Agent': random_user_agent(),
59ae15a5
PH
1671 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1672 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1673 'Accept-Encoding': 'gzip, deflate',
1674 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 1675}
f427df17 1676
5f6a1245 1677
fb37eb25
S
1678USER_AGENTS = {
1679 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1680}
1681
1682
bf42a990
S
1683NO_DEFAULT = object()
1684
7105440c
YCH
1685ENGLISH_MONTH_NAMES = [
1686 'January', 'February', 'March', 'April', 'May', 'June',
1687 'July', 'August', 'September', 'October', 'November', 'December']
1688
f6717dec
S
1689MONTH_NAMES = {
1690 'en': ENGLISH_MONTH_NAMES,
1691 'fr': [
3e4185c3
S
1692 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1693 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
f6717dec 1694}
a942d6cb 1695
a7aaa398
S
1696KNOWN_EXTENSIONS = (
1697 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1698 'flv', 'f4v', 'f4a', 'f4b',
1699 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1700 'mkv', 'mka', 'mk3d',
1701 'avi', 'divx',
1702 'mov',
1703 'asf', 'wmv', 'wma',
1704 '3gp', '3g2',
1705 'mp3',
1706 'flac',
1707 'ape',
1708 'wav',
1709 'f4f', 'f4m', 'm3u8', 'smil')
1710
c587cbb7 1711# needed for sanitizing filenames in restricted mode
c8827027 1712ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
fd35d8cd
JW
1713 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1714 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
c587cbb7 1715
46f59e89
S
1716DATE_FORMATS = (
1717 '%d %B %Y',
1718 '%d %b %Y',
1719 '%B %d %Y',
cb655f34
S
1720 '%B %dst %Y',
1721 '%B %dnd %Y',
9d30c213 1722 '%B %drd %Y',
cb655f34 1723 '%B %dth %Y',
46f59e89 1724 '%b %d %Y',
cb655f34
S
1725 '%b %dst %Y',
1726 '%b %dnd %Y',
9d30c213 1727 '%b %drd %Y',
cb655f34 1728 '%b %dth %Y',
46f59e89
S
1729 '%b %dst %Y %I:%M',
1730 '%b %dnd %Y %I:%M',
9d30c213 1731 '%b %drd %Y %I:%M',
46f59e89
S
1732 '%b %dth %Y %I:%M',
1733 '%Y %m %d',
1734 '%Y-%m-%d',
1735 '%Y/%m/%d',
81c13222 1736 '%Y/%m/%d %H:%M',
46f59e89 1737 '%Y/%m/%d %H:%M:%S',
0c1c6f4b 1738 '%Y-%m-%d %H:%M',
46f59e89
S
1739 '%Y-%m-%d %H:%M:%S',
1740 '%Y-%m-%d %H:%M:%S.%f',
1741 '%d.%m.%Y %H:%M',
1742 '%d.%m.%Y %H.%M',
1743 '%Y-%m-%dT%H:%M:%SZ',
1744 '%Y-%m-%dT%H:%M:%S.%fZ',
1745 '%Y-%m-%dT%H:%M:%S.%f0Z',
1746 '%Y-%m-%dT%H:%M:%S',
1747 '%Y-%m-%dT%H:%M:%S.%f',
1748 '%Y-%m-%dT%H:%M',
c6eed6b8
S
1749 '%b %d %Y at %H:%M',
1750 '%b %d %Y at %H:%M:%S',
b555ae9b
S
1751 '%B %d %Y at %H:%M',
1752 '%B %d %Y at %H:%M:%S',
46f59e89
S
1753)
1754
1755DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1756DATE_FORMATS_DAY_FIRST.extend([
1757 '%d-%m-%Y',
1758 '%d.%m.%Y',
1759 '%d.%m.%y',
1760 '%d/%m/%Y',
1761 '%d/%m/%y',
1762 '%d/%m/%Y %H:%M:%S',
1763])
1764
1765DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1766DATE_FORMATS_MONTH_FIRST.extend([
1767 '%m-%d-%Y',
1768 '%m.%d.%Y',
1769 '%m/%d/%Y',
1770 '%m/%d/%y',
1771 '%m/%d/%Y %H:%M:%S',
1772])
1773
06b3fe29 1774PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
22f5f5c6 1775JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
06b3fe29 1776
7105440c 1777
d77c3dfd 1778def preferredencoding():
59ae15a5 1779 """Get preferred encoding.
d77c3dfd 1780
59ae15a5
PH
1781 Returns the best encoding scheme for the system, based on
1782 locale.getpreferredencoding() and some further tweaks.
1783 """
1784 try:
1785 pref = locale.getpreferredencoding()
28e614de 1786 'TEST'.encode(pref)
70a1165b 1787 except Exception:
59ae15a5 1788 pref = 'UTF-8'
bae611f2 1789
59ae15a5 1790 return pref
d77c3dfd 1791
f4bfd65f 1792
181c8655 1793def write_json_file(obj, fn):
1394646a 1794 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 1795
92120217 1796 fn = encodeFilename(fn)
61ee5aeb 1797 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
1798 encoding = get_filesystem_encoding()
1799 # os.path.basename returns a bytes object, but NamedTemporaryFile
1800 # will fail if the filename contains non ascii characters unless we
1801 # use a unicode object
1802 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1803 # the same for os.path.dirname
1804 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1805 else:
1806 path_basename = os.path.basename
1807 path_dirname = os.path.dirname
1808
73159f99
S
1809 args = {
1810 'suffix': '.tmp',
ec5f6016
JMF
1811 'prefix': path_basename(fn) + '.',
1812 'dir': path_dirname(fn),
73159f99
S
1813 'delete': False,
1814 }
1815
181c8655
PH
1816 # In Python 2.x, json.dump expects a bytestream.
1817 # In Python 3.x, it writes to a character stream
1818 if sys.version_info < (3, 0):
73159f99 1819 args['mode'] = 'wb'
181c8655 1820 else:
73159f99
S
1821 args.update({
1822 'mode': 'w',
1823 'encoding': 'utf-8',
1824 })
1825
c86b6142 1826 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
1827
1828 try:
1829 with tf:
1830 json.dump(obj, tf)
1394646a
IK
1831 if sys.platform == 'win32':
1832 # Need to remove existing file on Windows, else os.rename raises
1833 # WindowsError or FileExistsError.
1834 try:
1835 os.unlink(fn)
1836 except OSError:
1837 pass
181c8655 1838 os.rename(tf.name, fn)
70a1165b 1839 except Exception:
181c8655
PH
1840 try:
1841 os.remove(tf.name)
1842 except OSError:
1843 pass
1844 raise
1845
1846
1847if sys.version_info >= (2, 7):
ee114368 1848 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 1849 """ Find the xpath xpath[@key=val] """
5d2354f1 1850 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368 1851 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
1852 return node.find(expr)
1853else:
ee114368 1854 def find_xpath_attr(node, xpath, key, val=None):
810c10ba 1855 for f in node.findall(compat_xpath(xpath)):
ee114368
S
1856 if key not in f.attrib:
1857 continue
1858 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
1859 return f
1860 return None
1861
d7e66d39
JMF
1862# On python2.6 the xml.etree.ElementTree.Element methods don't support
1863# the namespace parameter
5f6a1245
JW
1864
1865
d7e66d39
JMF
1866def xpath_with_ns(path, ns_map):
1867 components = [c.split(':') for c in path.split('/')]
1868 replaced = []
1869 for c in components:
1870 if len(c) == 1:
1871 replaced.append(c[0])
1872 else:
1873 ns, tag = c
1874 replaced.append('{%s}%s' % (ns_map[ns], tag))
1875 return '/'.join(replaced)
1876
d77c3dfd 1877
a41fb80c 1878def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745 1879 def _find_xpath(xpath):
810c10ba 1880 return node.find(compat_xpath(xpath))
578c0745
S
1881
1882 if isinstance(xpath, (str, compat_str)):
1883 n = _find_xpath(xpath)
1884 else:
1885 for xp in xpath:
1886 n = _find_xpath(xp)
1887 if n is not None:
1888 break
d74bebd5 1889
8e636da4 1890 if n is None:
bf42a990
S
1891 if default is not NO_DEFAULT:
1892 return default
1893 elif fatal:
bf0ff932
PH
1894 name = xpath if name is None else name
1895 raise ExtractorError('Could not find XML element %s' % name)
1896 else:
1897 return None
a41fb80c
S
1898 return n
1899
1900
1901def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
1902 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1903 if n is None or n == default:
1904 return n
1905 if n.text is None:
1906 if default is not NO_DEFAULT:
1907 return default
1908 elif fatal:
1909 name = xpath if name is None else name
1910 raise ExtractorError('Could not find XML element\'s text %s' % name)
1911 else:
1912 return None
1913 return n.text
a41fb80c
S
1914
1915
1916def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1917 n = find_xpath_attr(node, xpath, key)
1918 if n is None:
1919 if default is not NO_DEFAULT:
1920 return default
1921 elif fatal:
1922 name = '%s[@%s]' % (xpath, key) if name is None else name
1923 raise ExtractorError('Could not find XML attribute %s' % name)
1924 else:
1925 return None
1926 return n.attrib[key]
bf0ff932
PH
1927
1928
9e6dd238 1929def get_element_by_id(id, html):
43e8fafd 1930 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 1931 return get_element_by_attribute('id', id, html)
43e8fafd 1932
12ea2f30 1933
84c237fb 1934def get_element_by_class(class_name, html):
2af12ad9
TC
1935 """Return the content of the first tag with the specified class in the passed HTML document"""
1936 retval = get_elements_by_class(class_name, html)
1937 return retval[0] if retval else None
1938
1939
1940def get_element_by_attribute(attribute, value, html, escape_value=True):
1941 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1942 return retval[0] if retval else None
1943
1944
1945def get_elements_by_class(class_name, html):
1946 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1947 return get_elements_by_attribute(
84c237fb
YCH
1948 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1949 html, escape_value=False)
1950
1951
2af12ad9 1952def get_elements_by_attribute(attribute, value, html, escape_value=True):
43e8fafd 1953 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 1954
84c237fb
YCH
1955 value = re.escape(value) if escape_value else value
1956
2af12ad9
TC
1957 retlist = []
1958 for m in re.finditer(r'''(?xs)
38285056 1959 <([a-zA-Z0-9:._-]+)
609ff8ca 1960 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056 1961 \s+%s=['"]?%s['"]?
609ff8ca 1962 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056
PH
1963 \s*>
1964 (?P<content>.*?)
1965 </\1>
2af12ad9
TC
1966 ''' % (re.escape(attribute), value), html):
1967 res = m.group('content')
38285056 1968
2af12ad9
TC
1969 if res.startswith('"') or res.startswith("'"):
1970 res = res[1:-1]
38285056 1971
2af12ad9 1972 retlist.append(unescapeHTML(res))
a921f407 1973
2af12ad9 1974 return retlist
a921f407 1975
c5229f39 1976
8bb56eee
BF
1977class HTMLAttributeParser(compat_HTMLParser):
1978 """Trivial HTML parser to gather the attributes for a single element"""
1979 def __init__(self):
c5229f39 1980 self.attrs = {}
8bb56eee
BF
1981 compat_HTMLParser.__init__(self)
1982
1983 def handle_starttag(self, tag, attrs):
1984 self.attrs = dict(attrs)
1985
c5229f39 1986
8bb56eee
BF
1987def extract_attributes(html_element):
1988 """Given a string for an HTML element such as
1989 <el
1990 a="foo" B="bar" c="&98;az" d=boz
1991 empty= noval entity="&amp;"
1992 sq='"' dq="'"
1993 >
1994 Decode and return a dictionary of attributes.
1995 {
1996 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
1997 'empty': '', 'noval': None, 'entity': '&',
1998 'sq': '"', 'dq': '\''
1999 }.
2000 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2001 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2002 """
2003 parser = HTMLAttributeParser()
b4a3d461
S
2004 try:
2005 parser.feed(html_element)
2006 parser.close()
2007 # Older Python may throw HTMLParseError in case of malformed HTML
2008 except compat_HTMLParseError:
2009 pass
8bb56eee 2010 return parser.attrs
9e6dd238 2011
c5229f39 2012
9e6dd238 2013def clean_html(html):
59ae15a5 2014 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
2015
2016 if html is None: # Convenience for sanitizing descriptions etc.
2017 return html
2018
59ae15a5
PH
2019 # Newline vs <br />
2020 html = html.replace('\n', ' ')
edd9221c
TF
2021 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2022 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
2023 # Strip html tags
2024 html = re.sub('<.*?>', '', html)
2025 # Replace html entities
2026 html = unescapeHTML(html)
7decf895 2027 return html.strip()
9e6dd238
FV
2028
2029
d77c3dfd 2030def sanitize_open(filename, open_mode):
59ae15a5
PH
2031 """Try to open the given filename, and slightly tweak it if this fails.
2032
2033 Attempts to open the given filename. If this fails, it tries to change
2034 the filename slightly, step by step, until it's either able to open it
2035 or it fails and raises a final exception, like the standard open()
2036 function.
2037
2038 It returns the tuple (stream, definitive_file_name).
2039 """
2040 try:
28e614de 2041 if filename == '-':
59ae15a5
PH
2042 if sys.platform == 'win32':
2043 import msvcrt
2044 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 2045 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
2046 stream = open(encodeFilename(filename), open_mode)
2047 return (stream, filename)
2048 except (IOError, OSError) as err:
f45c185f
PH
2049 if err.errno in (errno.EACCES,):
2050 raise
59ae15a5 2051
f45c185f 2052 # In case of error, try to remove win32 forbidden chars
d55de57b 2053 alt_filename = sanitize_path(filename)
f45c185f
PH
2054 if alt_filename == filename:
2055 raise
2056 else:
2057 # An exception here should be caught in the caller
d55de57b 2058 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 2059 return (stream, alt_filename)
d77c3dfd
FV
2060
2061
2062def timeconvert(timestr):
59ae15a5
PH
2063 """Convert RFC 2822 defined time string into system timestamp"""
2064 timestamp = None
2065 timetuple = email.utils.parsedate_tz(timestr)
2066 if timetuple is not None:
2067 timestamp = email.utils.mktime_tz(timetuple)
2068 return timestamp
1c469a94 2069
5f6a1245 2070
796173d0 2071def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
2072 """Sanitizes a string so it could be used as part of a filename.
2073 If restricted is set, use a stricter subset of allowed characters.
158af524
S
2074 Set is_id if this is not an arbitrary string, but an ID that should be kept
2075 if possible.
59ae15a5
PH
2076 """
2077 def replace_insane(char):
c587cbb7
AT
2078 if restricted and char in ACCENT_CHARS:
2079 return ACCENT_CHARS[char]
59ae15a5
PH
2080 if char == '?' or ord(char) < 32 or ord(char) == 127:
2081 return ''
2082 elif char == '"':
2083 return '' if restricted else '\''
2084 elif char == ':':
2085 return '_-' if restricted else ' -'
2086 elif char in '\\/|*<>':
2087 return '_'
627dcfff 2088 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
2089 return '_'
2090 if restricted and ord(char) > 127:
2091 return '_'
2092 return char
2093
2aeb06d6
PH
2094 # Handle timestamps
2095 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 2096 result = ''.join(map(replace_insane, s))
796173d0
PH
2097 if not is_id:
2098 while '__' in result:
2099 result = result.replace('__', '_')
2100 result = result.strip('_')
2101 # Common case of "Foreign band name - English song title"
2102 if restricted and result.startswith('-_'):
2103 result = result[2:]
5a42414b
PH
2104 if result.startswith('-'):
2105 result = '_' + result[len('-'):]
a7440261 2106 result = result.lstrip('.')
796173d0
PH
2107 if not result:
2108 result = '_'
59ae15a5 2109 return result
d77c3dfd 2110
5f6a1245 2111
a2aaf4db
S
2112def sanitize_path(s):
2113 """Sanitizes and normalizes path on Windows"""
2114 if sys.platform != 'win32':
2115 return s
be531ef1
S
2116 drive_or_unc, _ = os.path.splitdrive(s)
2117 if sys.version_info < (2, 7) and not drive_or_unc:
2118 drive_or_unc, _ = os.path.splitunc(s)
2119 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2120 if drive_or_unc:
a2aaf4db
S
2121 norm_path.pop(0)
2122 sanitized_path = [
ec85ded8 2123 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
a2aaf4db 2124 for path_part in norm_path]
be531ef1
S
2125 if drive_or_unc:
2126 sanitized_path.insert(0, drive_or_unc + os.path.sep)
a2aaf4db
S
2127 return os.path.join(*sanitized_path)
2128
2129
17bcc626 2130def sanitize_url(url):
befa4708
S
2131 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2132 # the number of unwanted failures due to missing protocol
2133 if url.startswith('//'):
2134 return 'http:%s' % url
2135 # Fix some common typos seen so far
2136 COMMON_TYPOS = (
067aa17e 2137 # https://github.com/ytdl-org/youtube-dl/issues/15649
befa4708
S
2138 (r'^httpss://', r'https://'),
2139 # https://bx1.be/lives/direct-tv/
2140 (r'^rmtp([es]?)://', r'rtmp\1://'),
2141 )
2142 for mistake, fixup in COMMON_TYPOS:
2143 if re.match(mistake, url):
2144 return re.sub(mistake, fixup, url)
2145 return url
17bcc626
S
2146
2147
67dda517 2148def sanitized_Request(url, *args, **kwargs):
17bcc626 2149 return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
67dda517
S
2150
2151
51098426
S
2152def expand_path(s):
2153 """Expand shell variables and ~"""
2154 return os.path.expandvars(compat_expanduser(s))
2155
2156
d77c3dfd 2157def orderedSet(iterable):
59ae15a5
PH
2158 """ Remove all duplicates from the input iterable """
2159 res = []
2160 for el in iterable:
2161 if el not in res:
2162 res.append(el)
2163 return res
d77c3dfd 2164
912b38b4 2165
55b2f099 2166def _htmlentity_transform(entity_with_semicolon):
4e408e47 2167 """Transforms an HTML entity to a character."""
55b2f099
YCH
2168 entity = entity_with_semicolon[:-1]
2169
4e408e47
PH
2170 # Known non-numeric HTML entity
2171 if entity in compat_html_entities.name2codepoint:
2172 return compat_chr(compat_html_entities.name2codepoint[entity])
2173
55b2f099
YCH
2174 # TODO: HTML5 allows entities without a semicolon. For example,
2175 # '&Eacuteric' should be decoded as 'Éric'.
2176 if entity_with_semicolon in compat_html_entities_html5:
2177 return compat_html_entities_html5[entity_with_semicolon]
2178
91757b0f 2179 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
2180 if mobj is not None:
2181 numstr = mobj.group(1)
28e614de 2182 if numstr.startswith('x'):
4e408e47 2183 base = 16
28e614de 2184 numstr = '0%s' % numstr
4e408e47
PH
2185 else:
2186 base = 10
067aa17e 2187 # See https://github.com/ytdl-org/youtube-dl/issues/7518
7aefc49c
S
2188 try:
2189 return compat_chr(int(numstr, base))
2190 except ValueError:
2191 pass
4e408e47
PH
2192
2193 # Unknown entity in name, return its literal representation
7a3f0c00 2194 return '&%s;' % entity
4e408e47
PH
2195
2196
d77c3dfd 2197def unescapeHTML(s):
912b38b4
PH
2198 if s is None:
2199 return None
2200 assert type(s) == compat_str
d77c3dfd 2201
4e408e47 2202 return re.sub(
95f3f7c2 2203 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 2204
8bf48f23 2205
aa49acd1
S
2206def get_subprocess_encoding():
2207 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2208 # For subprocess calls, encode with locale encoding
2209 # Refer to http://stackoverflow.com/a/9951851/35070
2210 encoding = preferredencoding()
2211 else:
2212 encoding = sys.getfilesystemencoding()
2213 if encoding is None:
2214 encoding = 'utf-8'
2215 return encoding
2216
2217
8bf48f23 2218def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
2219 """
2220 @param s The name of the file
2221 """
d77c3dfd 2222
8bf48f23 2223 assert type(s) == compat_str
d77c3dfd 2224
59ae15a5
PH
2225 # Python 3 has a Unicode API
2226 if sys.version_info >= (3, 0):
2227 return s
0f00efed 2228
aa49acd1
S
2229 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2230 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2231 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2232 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2233 return s
2234
8ee239e9
YCH
2235 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2236 if sys.platform.startswith('java'):
2237 return s
2238
aa49acd1
S
2239 return s.encode(get_subprocess_encoding(), 'ignore')
2240
2241
2242def decodeFilename(b, for_subprocess=False):
2243
2244 if sys.version_info >= (3, 0):
2245 return b
2246
2247 if not isinstance(b, bytes):
2248 return b
2249
2250 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 2251
f07b74fc
PH
2252
2253def encodeArgument(s):
2254 if not isinstance(s, compat_str):
2255 # Legacy code that uses byte strings
2256 # Uncomment the following line after fixing all post processors
7af808a5 2257 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
2258 s = s.decode('ascii')
2259 return encodeFilename(s, True)
2260
2261
aa49acd1
S
2262def decodeArgument(b):
2263 return decodeFilename(b, True)
2264
2265
8271226a
PH
2266def decodeOption(optval):
2267 if optval is None:
2268 return optval
2269 if isinstance(optval, bytes):
2270 optval = optval.decode(preferredencoding())
2271
2272 assert isinstance(optval, compat_str)
2273 return optval
1c256f70 2274
5f6a1245 2275
4539dd30
PH
2276def formatSeconds(secs):
2277 if secs > 3600:
2278 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
2279 elif secs > 60:
2280 return '%d:%02d' % (secs // 60, secs % 60)
2281 else:
2282 return '%d' % secs
2283
a0ddb8a2 2284
be4a824d
PH
2285def make_HTTPS_handler(params, **kwargs):
2286 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 2287 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 2288 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 2289 if opts_no_check_certificate:
be5f2c19 2290 context.check_hostname = False
0db261ba 2291 context.verify_mode = ssl.CERT_NONE
a2366922 2292 try:
be4a824d 2293 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
2294 except TypeError:
2295 # Python 2.7.8
2296 # (create_default_context present but HTTPSHandler has no context=)
2297 pass
2298
2299 if sys.version_info < (3, 2):
d7932313 2300 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 2301 else: # Python < 3.4
d7932313 2302 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 2303 context.verify_mode = (ssl.CERT_NONE
dca08720 2304 if opts_no_check_certificate
ea6d901e 2305 else ssl.CERT_REQUIRED)
303b479e 2306 context.set_default_verify_paths()
be4a824d 2307 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 2308
732ea2f0 2309
08f2a92c
JMF
2310def bug_reports_message():
2311 if ytdl_is_updateable():
2312 update_cmd = 'type youtube-dl -U to update'
2313 else:
2314 update_cmd = 'see https://yt-dl.org/update on how to update'
2315 msg = '; please report this issue on https://yt-dl.org/bug .'
2316 msg += ' Make sure you are using the latest version; %s.' % update_cmd
2317 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
2318 return msg
2319
2320
bf5b9d85
PM
2321class YoutubeDLError(Exception):
2322 """Base exception for YoutubeDL errors."""
2323 pass
2324
2325
2326class ExtractorError(YoutubeDLError):
1c256f70 2327 """Error during info extraction."""
5f6a1245 2328
d11271dd 2329 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238
PH
2330 """ tb, if given, is the original traceback (so that it can be printed out).
2331 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
2332 """
2333
2334 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2335 expected = True
d11271dd
PH
2336 if video_id is not None:
2337 msg = video_id + ': ' + msg
410f3e73 2338 if cause:
28e614de 2339 msg += ' (caused by %r)' % cause
9a82b238 2340 if not expected:
08f2a92c 2341 msg += bug_reports_message()
1c256f70 2342 super(ExtractorError, self).__init__(msg)
d5979c5d 2343
1c256f70 2344 self.traceback = tb
8cc83b8d 2345 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 2346 self.cause = cause
d11271dd 2347 self.video_id = video_id
1c256f70 2348
01951dda
PH
2349 def format_traceback(self):
2350 if self.traceback is None:
2351 return None
28e614de 2352 return ''.join(traceback.format_tb(self.traceback))
01951dda 2353
1c256f70 2354
416c7fcb
PH
2355class UnsupportedError(ExtractorError):
2356 def __init__(self, url):
2357 super(UnsupportedError, self).__init__(
2358 'Unsupported URL: %s' % url, expected=True)
2359 self.url = url
2360
2361
55b3e45b
JMF
2362class RegexNotFoundError(ExtractorError):
2363 """Error when a regex didn't match"""
2364 pass
2365
2366
773f291d
S
2367class GeoRestrictedError(ExtractorError):
2368 """Geographic restriction Error exception.
2369
2370 This exception may be thrown when a video is not available from your
2371 geographic location due to geographic restrictions imposed by a website.
2372 """
2373 def __init__(self, msg, countries=None):
2374 super(GeoRestrictedError, self).__init__(msg, expected=True)
2375 self.msg = msg
2376 self.countries = countries
2377
2378
bf5b9d85 2379class DownloadError(YoutubeDLError):
59ae15a5 2380 """Download Error exception.
d77c3dfd 2381
59ae15a5
PH
2382 This exception may be thrown by FileDownloader objects if they are not
2383 configured to continue on errors. They will contain the appropriate
2384 error message.
2385 """
5f6a1245 2386
8cc83b8d
FV
2387 def __init__(self, msg, exc_info=None):
2388 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2389 super(DownloadError, self).__init__(msg)
2390 self.exc_info = exc_info
d77c3dfd
FV
2391
2392
bf5b9d85 2393class SameFileError(YoutubeDLError):
59ae15a5 2394 """Same File exception.
d77c3dfd 2395
59ae15a5
PH
2396 This exception will be thrown by FileDownloader objects if they detect
2397 multiple files would have to be downloaded to the same file on disk.
2398 """
2399 pass
d77c3dfd
FV
2400
2401
bf5b9d85 2402class PostProcessingError(YoutubeDLError):
59ae15a5 2403 """Post Processing exception.
d77c3dfd 2404
59ae15a5
PH
2405 This exception may be raised by PostProcessor's .run() method to
2406 indicate an error in the postprocessing task.
2407 """
5f6a1245 2408
7851b379 2409 def __init__(self, msg):
bf5b9d85 2410 super(PostProcessingError, self).__init__(msg)
7851b379 2411 self.msg = msg
d77c3dfd 2412
5f6a1245 2413
bf5b9d85 2414class MaxDownloadsReached(YoutubeDLError):
59ae15a5
PH
2415 """ --max-downloads limit has been reached. """
2416 pass
d77c3dfd
FV
2417
2418
bf5b9d85 2419class UnavailableVideoError(YoutubeDLError):
59ae15a5 2420 """Unavailable Format exception.
d77c3dfd 2421
59ae15a5
PH
2422 This exception will be thrown when a video is requested
2423 in a format that is not available for that video.
2424 """
2425 pass
d77c3dfd
FV
2426
2427
bf5b9d85 2428class ContentTooShortError(YoutubeDLError):
59ae15a5 2429 """Content Too Short exception.
d77c3dfd 2430
59ae15a5
PH
2431 This exception may be raised by FileDownloader objects when a file they
2432 download is too small for what the server announced first, indicating
2433 the connection was probably interrupted.
2434 """
d77c3dfd 2435
59ae15a5 2436 def __init__(self, downloaded, expected):
bf5b9d85
PM
2437 super(ContentTooShortError, self).__init__(
2438 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2439 )
2c7ed247 2440 # Both in bytes
59ae15a5
PH
2441 self.downloaded = downloaded
2442 self.expected = expected
d77c3dfd 2443
5f6a1245 2444
bf5b9d85 2445class XAttrMetadataError(YoutubeDLError):
efa97bdc
YCH
2446 def __init__(self, code=None, msg='Unknown error'):
2447 super(XAttrMetadataError, self).__init__(msg)
2448 self.code = code
bd264412 2449 self.msg = msg
efa97bdc
YCH
2450
2451 # Parsing code and msg
3089bc74
S
2452 if (self.code in (errno.ENOSPC, errno.EDQUOT)
2453 or 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
efa97bdc
YCH
2454 self.reason = 'NO_SPACE'
2455 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2456 self.reason = 'VALUE_TOO_LONG'
2457 else:
2458 self.reason = 'NOT_SUPPORTED'
2459
2460
bf5b9d85 2461class XAttrUnavailableError(YoutubeDLError):
efa97bdc
YCH
2462 pass
2463
2464
c5a59d93 2465def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
2466 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2467 # expected HTTP responses to meet HTTP/1.0 or later (see also
067aa17e 2468 # https://github.com/ytdl-org/youtube-dl/issues/6727)
e5e78797 2469 if sys.version_info < (3, 0):
65220c3b
S
2470 kwargs['strict'] = True
2471 hc = http_class(*args, **compat_kwargs(kwargs))
be4a824d 2472 source_address = ydl_handler._params.get('source_address')
8959018a 2473
be4a824d 2474 if source_address is not None:
8959018a
AU
2475 # This is to workaround _create_connection() from socket where it will try all
2476 # address data from getaddrinfo() including IPv6. This filters the result from
2477 # getaddrinfo() based on the source_address value.
2478 # This is based on the cpython socket.create_connection() function.
2479 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2480 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2481 host, port = address
2482 err = None
2483 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
9e21e6d9
S
2484 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2485 ip_addrs = [addr for addr in addrs if addr[0] == af]
2486 if addrs and not ip_addrs:
2487 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2488 raise socket.error(
2489 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2490 % (ip_version, source_address[0]))
8959018a
AU
2491 for res in ip_addrs:
2492 af, socktype, proto, canonname, sa = res
2493 sock = None
2494 try:
2495 sock = socket.socket(af, socktype, proto)
2496 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2497 sock.settimeout(timeout)
2498 sock.bind(source_address)
2499 sock.connect(sa)
2500 err = None # Explicitly break reference cycle
2501 return sock
2502 except socket.error as _:
2503 err = _
2504 if sock is not None:
2505 sock.close()
2506 if err is not None:
2507 raise err
2508 else:
9e21e6d9
S
2509 raise socket.error('getaddrinfo returns an empty list')
2510 if hasattr(hc, '_create_connection'):
2511 hc._create_connection = _create_connection
be4a824d
PH
2512 sa = (source_address, 0)
2513 if hasattr(hc, 'source_address'): # Python 2.7+
2514 hc.source_address = sa
2515 else: # Python 2.6
2516 def _hc_connect(self, *args, **kwargs):
9e21e6d9 2517 sock = _create_connection(
be4a824d
PH
2518 (self.host, self.port), self.timeout, sa)
2519 if is_https:
d7932313
PH
2520 self.sock = ssl.wrap_socket(
2521 sock, self.key_file, self.cert_file,
2522 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
2523 else:
2524 self.sock = sock
2525 hc.connect = functools.partial(_hc_connect, hc)
2526
2527 return hc
2528
2529
87f0e62d 2530def handle_youtubedl_headers(headers):
992fc9d6
YCH
2531 filtered_headers = headers
2532
2533 if 'Youtubedl-no-compression' in filtered_headers:
2534 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 2535 del filtered_headers['Youtubedl-no-compression']
87f0e62d 2536
992fc9d6 2537 return filtered_headers
87f0e62d
YCH
2538
2539
acebc9cd 2540class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
2541 """Handler for HTTP requests and responses.
2542
2543 This class, when installed with an OpenerDirector, automatically adds
2544 the standard headers to every HTTP request and handles gzipped and
2545 deflated responses from web servers. If compression is to be avoided in
2546 a particular request, the original request in the program code only has
0424ec30 2547 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
2548 removed before making the real request.
2549
2550 Part of this code was copied from:
2551
2552 http://techknack.net/python-urllib2-handlers/
2553
2554 Andrew Rowls, the author of that code, agreed to release it to the
2555 public domain.
2556 """
2557
be4a824d
PH
2558 def __init__(self, params, *args, **kwargs):
2559 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2560 self._params = params
2561
2562 def http_open(self, req):
71aff188
YCH
2563 conn_class = compat_http_client.HTTPConnection
2564
2565 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2566 if socks_proxy:
2567 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2568 del req.headers['Ytdl-socks-proxy']
2569
be4a824d 2570 return self.do_open(functools.partial(
71aff188 2571 _create_http_connection, self, conn_class, False),
be4a824d
PH
2572 req)
2573
59ae15a5
PH
2574 @staticmethod
2575 def deflate(data):
2576 try:
2577 return zlib.decompress(data, -zlib.MAX_WBITS)
2578 except zlib.error:
2579 return zlib.decompress(data)
2580
acebc9cd 2581 def http_request(self, req):
51f267d9
S
2582 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2583 # always respected by websites, some tend to give out URLs with non percent-encoded
2584 # non-ASCII characters (see telemb.py, ard.py [#3412])
2585 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2586 # To work around aforementioned issue we will replace request's original URL with
2587 # percent-encoded one
2588 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2589 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2590 url = req.get_full_url()
2591 url_escaped = escape_url(url)
2592
2593 # Substitute URL if any change after escaping
2594 if url != url_escaped:
15d260eb 2595 req = update_Request(req, url=url_escaped)
51f267d9 2596
33ac271b 2597 for h, v in std_headers.items():
3d5f7a39
JK
2598 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2599 # The dict keys are capitalized because of this bug by urllib
2600 if h.capitalize() not in req.headers:
33ac271b 2601 req.add_header(h, v)
87f0e62d
YCH
2602
2603 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
2604
2605 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2606 # Python 2.6 is brain-dead when it comes to fragments
2607 req._Request__original = req._Request__original.partition('#')[0]
2608 req._Request__r_type = req._Request__r_type.partition('#')[0]
2609
59ae15a5
PH
2610 return req
2611
acebc9cd 2612 def http_response(self, req, resp):
59ae15a5
PH
2613 old_resp = resp
2614 # gzip
2615 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
2616 content = resp.read()
2617 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2618 try:
2619 uncompressed = io.BytesIO(gz.read())
2620 except IOError as original_ioerror:
2621 # There may be junk add the end of the file
2622 # See http://stackoverflow.com/q/4928560/35070 for details
2623 for i in range(1, 1024):
2624 try:
2625 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2626 uncompressed = io.BytesIO(gz.read())
2627 except IOError:
2628 continue
2629 break
2630 else:
2631 raise original_ioerror
b407d853 2632 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2633 resp.msg = old_resp.msg
c047270c 2634 del resp.headers['Content-encoding']
59ae15a5
PH
2635 # deflate
2636 if resp.headers.get('Content-encoding', '') == 'deflate':
2637 gz = io.BytesIO(self.deflate(resp.read()))
b407d853 2638 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2639 resp.msg = old_resp.msg
c047270c 2640 del resp.headers['Content-encoding']
ad729172 2641 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
067aa17e 2642 # https://github.com/ytdl-org/youtube-dl/issues/6457).
5a4d9ddb
S
2643 if 300 <= resp.code < 400:
2644 location = resp.headers.get('Location')
2645 if location:
2646 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2647 if sys.version_info >= (3, 0):
2648 location = location.encode('iso-8859-1').decode('utf-8')
0ea59007
YCH
2649 else:
2650 location = location.decode('utf-8')
5a4d9ddb
S
2651 location_escaped = escape_url(location)
2652 if location != location_escaped:
2653 del resp.headers['Location']
9a4aec8b
YCH
2654 if sys.version_info < (3, 0):
2655 location_escaped = location_escaped.encode('utf-8')
5a4d9ddb 2656 resp.headers['Location'] = location_escaped
59ae15a5 2657 return resp
0f8d03f8 2658
acebc9cd
PH
2659 https_request = http_request
2660 https_response = http_response
bf50b038 2661
5de90176 2662
71aff188
YCH
2663def make_socks_conn_class(base_class, socks_proxy):
2664 assert issubclass(base_class, (
2665 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2666
2667 url_components = compat_urlparse.urlparse(socks_proxy)
2668 if url_components.scheme.lower() == 'socks5':
2669 socks_type = ProxyType.SOCKS5
2670 elif url_components.scheme.lower() in ('socks', 'socks4'):
2671 socks_type = ProxyType.SOCKS4
51fb4995
YCH
2672 elif url_components.scheme.lower() == 'socks4a':
2673 socks_type = ProxyType.SOCKS4A
71aff188 2674
cdd94c2e
YCH
2675 def unquote_if_non_empty(s):
2676 if not s:
2677 return s
2678 return compat_urllib_parse_unquote_plus(s)
2679
71aff188
YCH
2680 proxy_args = (
2681 socks_type,
2682 url_components.hostname, url_components.port or 1080,
2683 True, # Remote DNS
cdd94c2e
YCH
2684 unquote_if_non_empty(url_components.username),
2685 unquote_if_non_empty(url_components.password),
71aff188
YCH
2686 )
2687
2688 class SocksConnection(base_class):
2689 def connect(self):
2690 self.sock = sockssocket()
2691 self.sock.setproxy(*proxy_args)
2692 if type(self.timeout) in (int, float):
2693 self.sock.settimeout(self.timeout)
2694 self.sock.connect((self.host, self.port))
2695
2696 if isinstance(self, compat_http_client.HTTPSConnection):
2697 if hasattr(self, '_context'): # Python > 2.6
2698 self.sock = self._context.wrap_socket(
2699 self.sock, server_hostname=self.host)
2700 else:
2701 self.sock = ssl.wrap_socket(self.sock)
2702
2703 return SocksConnection
2704
2705
be4a824d
PH
2706class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2707 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2708 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2709 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2710 self._params = params
2711
2712 def https_open(self, req):
4f264c02 2713 kwargs = {}
71aff188
YCH
2714 conn_class = self._https_conn_class
2715
4f264c02
JMF
2716 if hasattr(self, '_context'): # python > 2.6
2717 kwargs['context'] = self._context
2718 if hasattr(self, '_check_hostname'): # python 3.x
2719 kwargs['check_hostname'] = self._check_hostname
71aff188
YCH
2720
2721 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2722 if socks_proxy:
2723 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2724 del req.headers['Ytdl-socks-proxy']
2725
be4a824d 2726 return self.do_open(functools.partial(
71aff188 2727 _create_http_connection, self, conn_class, True),
4f264c02 2728 req, **kwargs)
be4a824d
PH
2729
2730
1bab3437 2731class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
e7e62441 2732 _HTTPONLY_PREFIX = '#HttpOnly_'
2733
1bab3437
S
2734 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2735 # Store session cookies with `expires` set to 0 instead of an empty
2736 # string
2737 for cookie in self:
2738 if cookie.expires is None:
2739 cookie.expires = 0
2740 compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires)
2741
2742 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
e7e62441 2743 """Load cookies from a file."""
2744 if filename is None:
2745 if self.filename is not None:
2746 filename = self.filename
2747 else:
2748 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2749
2750 cf = io.StringIO()
2751 with open(filename) as f:
2752 for line in f:
2753 if line.startswith(self._HTTPONLY_PREFIX):
2754 line = line[len(self._HTTPONLY_PREFIX):]
2755 cf.write(compat_str(line))
2756 cf.seek(0)
2757 self._really_load(cf, filename, ignore_discard, ignore_expires)
1bab3437
S
2758 # Session cookies are denoted by either `expires` field set to
2759 # an empty string or 0. MozillaCookieJar only recognizes the former
2760 # (see [1]). So we need force the latter to be recognized as session
2761 # cookies on our own.
2762 # Session cookies may be important for cookies-based authentication,
2763 # e.g. usually, when user does not check 'Remember me' check box while
2764 # logging in on a site, some important cookies are stored as session
2765 # cookies so that not recognizing them will result in failed login.
2766 # 1. https://bugs.python.org/issue17164
2767 for cookie in self:
2768 # Treat `expires=0` cookies as session cookies
2769 if cookie.expires == 0:
2770 cookie.expires = None
2771 cookie.discard = True
2772
2773
a6420bf5
S
2774class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2775 def __init__(self, cookiejar=None):
2776 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2777
2778 def http_response(self, request, response):
2779 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2780 # characters in Set-Cookie HTTP header of last response (see
067aa17e 2781 # https://github.com/ytdl-org/youtube-dl/issues/6769).
a6420bf5
S
2782 # In order to at least prevent crashing we will percent encode Set-Cookie
2783 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
2784 # if sys.version_info < (3, 0) and response.headers:
2785 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2786 # set_cookie = response.headers.get(set_cookie_header)
2787 # if set_cookie:
2788 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2789 # if set_cookie != set_cookie_escaped:
2790 # del response.headers[set_cookie_header]
2791 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
2792 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2793
2794 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2795 https_response = http_response
2796
2797
fca6dba8
S
2798class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2799 if sys.version_info[0] < 3:
2800 def redirect_request(self, req, fp, code, msg, headers, newurl):
2801 # On python 2 urlh.geturl() may sometimes return redirect URL
2802 # as byte string instead of unicode. This workaround allows
2803 # to force it always return unicode.
2804 return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2805
2806
46f59e89
S
2807def extract_timezone(date_str):
2808 m = re.search(
2809 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2810 date_str)
2811 if not m:
2812 timezone = datetime.timedelta()
2813 else:
2814 date_str = date_str[:-len(m.group('tz'))]
2815 if not m.group('sign'):
2816 timezone = datetime.timedelta()
2817 else:
2818 sign = 1 if m.group('sign') == '+' else -1
2819 timezone = datetime.timedelta(
2820 hours=sign * int(m.group('hours')),
2821 minutes=sign * int(m.group('minutes')))
2822 return timezone, date_str
2823
2824
08b38d54 2825def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
2826 """ Return a UNIX timestamp from the given date """
2827
2828 if date_str is None:
2829 return None
2830
52c3a6e4
S
2831 date_str = re.sub(r'\.[0-9]+', '', date_str)
2832
08b38d54 2833 if timezone is None:
46f59e89
S
2834 timezone, date_str = extract_timezone(date_str)
2835
52c3a6e4
S
2836 try:
2837 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2838 dt = datetime.datetime.strptime(date_str, date_format) - timezone
2839 return calendar.timegm(dt.timetuple())
2840 except ValueError:
2841 pass
912b38b4
PH
2842
2843
46f59e89
S
2844def date_formats(day_first=True):
2845 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2846
2847
42bdd9d0 2848def unified_strdate(date_str, day_first=True):
bf50b038 2849 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
2850
2851 if date_str is None:
2852 return None
bf50b038 2853 upload_date = None
5f6a1245 2854 # Replace commas
026fcc04 2855 date_str = date_str.replace(',', ' ')
42bdd9d0 2856 # Remove AM/PM + timezone
9bb8e0a3 2857 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89 2858 _, date_str = extract_timezone(date_str)
42bdd9d0 2859
46f59e89 2860 for expression in date_formats(day_first):
bf50b038
JMF
2861 try:
2862 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 2863 except ValueError:
bf50b038 2864 pass
42393ce2
PH
2865 if upload_date is None:
2866 timetuple = email.utils.parsedate_tz(date_str)
2867 if timetuple:
c6b9cf05
S
2868 try:
2869 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2870 except ValueError:
2871 pass
6a750402
JMF
2872 if upload_date is not None:
2873 return compat_str(upload_date)
bf50b038 2874
5f6a1245 2875
46f59e89
S
2876def unified_timestamp(date_str, day_first=True):
2877 if date_str is None:
2878 return None
2879
2ae2ffda 2880 date_str = re.sub(r'[,|]', '', date_str)
46f59e89 2881
7dc2a74e 2882 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89
S
2883 timezone, date_str = extract_timezone(date_str)
2884
2885 # Remove AM/PM + timezone
2886 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2887
deef3195
S
2888 # Remove unrecognized timezones from ISO 8601 alike timestamps
2889 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
2890 if m:
2891 date_str = date_str[:-len(m.group('tz'))]
2892
f226880c
PH
2893 # Python only supports microseconds, so remove nanoseconds
2894 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
2895 if m:
2896 date_str = m.group(1)
2897
46f59e89
S
2898 for expression in date_formats(day_first):
2899 try:
7dc2a74e 2900 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
46f59e89
S
2901 return calendar.timegm(dt.timetuple())
2902 except ValueError:
2903 pass
2904 timetuple = email.utils.parsedate_tz(date_str)
2905 if timetuple:
7dc2a74e 2906 return calendar.timegm(timetuple) + pm_delta * 3600
46f59e89
S
2907
2908
28e614de 2909def determine_ext(url, default_ext='unknown_video'):
85750f89 2910 if url is None or '.' not in url:
f4776371 2911 return default_ext
9cb9a5df 2912 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
2913 if re.match(r'^[A-Za-z0-9]+$', guess):
2914 return guess
a7aaa398
S
2915 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
2916 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 2917 return guess.rstrip('/')
73e79f2a 2918 else:
cbdbb766 2919 return default_ext
73e79f2a 2920
5f6a1245 2921
824fa511
S
2922def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
2923 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
d4051a8e 2924
5f6a1245 2925
bd558525 2926def date_from_str(date_str):
37254abc
JMF
2927 """
2928 Return a datetime object from a string in the format YYYYMMDD or
2929 (now|today)[+-][0-9](day|week|month|year)(s)?"""
2930 today = datetime.date.today()
f8795e10 2931 if date_str in ('now', 'today'):
37254abc 2932 return today
f8795e10
PH
2933 if date_str == 'yesterday':
2934 return today - datetime.timedelta(days=1)
ec85ded8 2935 match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
37254abc
JMF
2936 if match is not None:
2937 sign = match.group('sign')
2938 time = int(match.group('time'))
2939 if sign == '-':
2940 time = -time
2941 unit = match.group('unit')
dfb1b146 2942 # A bad approximation?
37254abc
JMF
2943 if unit == 'month':
2944 unit = 'day'
2945 time *= 30
2946 elif unit == 'year':
2947 unit = 'day'
2948 time *= 365
2949 unit += 's'
2950 delta = datetime.timedelta(**{unit: time})
2951 return today + delta
611c1dd9 2952 return datetime.datetime.strptime(date_str, '%Y%m%d').date()
5f6a1245
JW
2953
2954
e63fc1be 2955def hyphenate_date(date_str):
2956 """
2957 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
2958 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
2959 if match is not None:
2960 return '-'.join(match.groups())
2961 else:
2962 return date_str
2963
5f6a1245 2964
bd558525
JMF
2965class DateRange(object):
2966 """Represents a time interval between two dates"""
5f6a1245 2967
bd558525
JMF
2968 def __init__(self, start=None, end=None):
2969 """start and end must be strings in the format accepted by date"""
2970 if start is not None:
2971 self.start = date_from_str(start)
2972 else:
2973 self.start = datetime.datetime.min.date()
2974 if end is not None:
2975 self.end = date_from_str(end)
2976 else:
2977 self.end = datetime.datetime.max.date()
37254abc 2978 if self.start > self.end:
bd558525 2979 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 2980
bd558525
JMF
2981 @classmethod
2982 def day(cls, day):
2983 """Returns a range that only contains the given day"""
5f6a1245
JW
2984 return cls(day, day)
2985
bd558525
JMF
2986 def __contains__(self, date):
2987 """Check if the date is in the range"""
37254abc
JMF
2988 if not isinstance(date, datetime.date):
2989 date = date_from_str(date)
2990 return self.start <= date <= self.end
5f6a1245 2991
bd558525 2992 def __str__(self):
5f6a1245 2993 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
2994
2995
2996def platform_name():
2997 """ Returns the platform name as a compat_str """
2998 res = platform.platform()
2999 if isinstance(res, bytes):
3000 res = res.decode(preferredencoding())
3001
3002 assert isinstance(res, compat_str)
3003 return res
c257baff
PH
3004
3005
b58ddb32
PH
3006def _windows_write_string(s, out):
3007 """ Returns True if the string was written using special methods,
3008 False if it has yet to be written out."""
3009 # Adapted from http://stackoverflow.com/a/3259271/35070
3010
3011 import ctypes
3012 import ctypes.wintypes
3013
3014 WIN_OUTPUT_IDS = {
3015 1: -11,
3016 2: -12,
3017 }
3018
a383a98a
PH
3019 try:
3020 fileno = out.fileno()
3021 except AttributeError:
3022 # If the output stream doesn't have a fileno, it's virtual
3023 return False
aa42e873
PH
3024 except io.UnsupportedOperation:
3025 # Some strange Windows pseudo files?
3026 return False
b58ddb32
PH
3027 if fileno not in WIN_OUTPUT_IDS:
3028 return False
3029
d7cd9a9e 3030 GetStdHandle = compat_ctypes_WINFUNCTYPE(
b58ddb32 3031 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
d7cd9a9e 3032 ('GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
3033 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3034
d7cd9a9e 3035 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3036 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3037 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
d7cd9a9e 3038 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
3039 written = ctypes.wintypes.DWORD(0)
3040
d7cd9a9e 3041 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
3042 FILE_TYPE_CHAR = 0x0002
3043 FILE_TYPE_REMOTE = 0x8000
d7cd9a9e 3044 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3045 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3046 ctypes.POINTER(ctypes.wintypes.DWORD))(
d7cd9a9e 3047 ('GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
3048 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3049
3050 def not_a_console(handle):
3051 if handle == INVALID_HANDLE_VALUE or handle is None:
3052 return True
3089bc74
S
3053 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3054 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
3055
3056 if not_a_console(h):
3057 return False
3058
d1b9c912
PH
3059 def next_nonbmp_pos(s):
3060 try:
3061 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3062 except StopIteration:
3063 return len(s)
3064
3065 while s:
3066 count = min(next_nonbmp_pos(s), 1024)
3067
b58ddb32 3068 ret = WriteConsoleW(
d1b9c912 3069 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
3070 if ret == 0:
3071 raise OSError('Failed to write string')
d1b9c912
PH
3072 if not count: # We just wrote a non-BMP character
3073 assert written.value == 2
3074 s = s[1:]
3075 else:
3076 assert written.value > 0
3077 s = s[written.value:]
b58ddb32
PH
3078 return True
3079
3080
734f90bb 3081def write_string(s, out=None, encoding=None):
7459e3a2
PH
3082 if out is None:
3083 out = sys.stderr
8bf48f23 3084 assert type(s) == compat_str
7459e3a2 3085
b58ddb32
PH
3086 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3087 if _windows_write_string(s, out):
3088 return
3089
3089bc74
S
3090 if ('b' in getattr(out, 'mode', '')
3091 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
3092 byt = s.encode(encoding or preferredencoding(), 'ignore')
3093 out.write(byt)
3094 elif hasattr(out, 'buffer'):
3095 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3096 byt = s.encode(enc, 'ignore')
3097 out.buffer.write(byt)
3098 else:
8bf48f23 3099 out.write(s)
7459e3a2
PH
3100 out.flush()
3101
3102
48ea9cea
PH
3103def bytes_to_intlist(bs):
3104 if not bs:
3105 return []
3106 if isinstance(bs[0], int): # Python 3
3107 return list(bs)
3108 else:
3109 return [ord(c) for c in bs]
3110
c257baff 3111
cba892fa 3112def intlist_to_bytes(xs):
3113 if not xs:
3114 return b''
edaa23f8 3115 return compat_struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
3116
3117
c1c9a79c
PH
3118# Cross-platform file locking
3119if sys.platform == 'win32':
3120 import ctypes.wintypes
3121 import msvcrt
3122
3123 class OVERLAPPED(ctypes.Structure):
3124 _fields_ = [
3125 ('Internal', ctypes.wintypes.LPVOID),
3126 ('InternalHigh', ctypes.wintypes.LPVOID),
3127 ('Offset', ctypes.wintypes.DWORD),
3128 ('OffsetHigh', ctypes.wintypes.DWORD),
3129 ('hEvent', ctypes.wintypes.HANDLE),
3130 ]
3131
3132 kernel32 = ctypes.windll.kernel32
3133 LockFileEx = kernel32.LockFileEx
3134 LockFileEx.argtypes = [
3135 ctypes.wintypes.HANDLE, # hFile
3136 ctypes.wintypes.DWORD, # dwFlags
3137 ctypes.wintypes.DWORD, # dwReserved
3138 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3139 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3140 ctypes.POINTER(OVERLAPPED) # Overlapped
3141 ]
3142 LockFileEx.restype = ctypes.wintypes.BOOL
3143 UnlockFileEx = kernel32.UnlockFileEx
3144 UnlockFileEx.argtypes = [
3145 ctypes.wintypes.HANDLE, # hFile
3146 ctypes.wintypes.DWORD, # dwReserved
3147 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3148 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3149 ctypes.POINTER(OVERLAPPED) # Overlapped
3150 ]
3151 UnlockFileEx.restype = ctypes.wintypes.BOOL
3152 whole_low = 0xffffffff
3153 whole_high = 0x7fffffff
3154
3155 def _lock_file(f, exclusive):
3156 overlapped = OVERLAPPED()
3157 overlapped.Offset = 0
3158 overlapped.OffsetHigh = 0
3159 overlapped.hEvent = 0
3160 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3161 handle = msvcrt.get_osfhandle(f.fileno())
3162 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3163 whole_low, whole_high, f._lock_file_overlapped_p):
3164 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3165
3166 def _unlock_file(f):
3167 assert f._lock_file_overlapped_p
3168 handle = msvcrt.get_osfhandle(f.fileno())
3169 if not UnlockFileEx(handle, 0,
3170 whole_low, whole_high, f._lock_file_overlapped_p):
3171 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3172
3173else:
399a76e6
YCH
3174 # Some platforms, such as Jython, is missing fcntl
3175 try:
3176 import fcntl
c1c9a79c 3177
399a76e6
YCH
3178 def _lock_file(f, exclusive):
3179 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 3180
399a76e6
YCH
3181 def _unlock_file(f):
3182 fcntl.flock(f, fcntl.LOCK_UN)
3183 except ImportError:
3184 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3185
3186 def _lock_file(f, exclusive):
3187 raise IOError(UNSUPPORTED_MSG)
3188
3189 def _unlock_file(f):
3190 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
3191
3192
3193class locked_file(object):
3194 def __init__(self, filename, mode, encoding=None):
3195 assert mode in ['r', 'a', 'w']
3196 self.f = io.open(filename, mode, encoding=encoding)
3197 self.mode = mode
3198
3199 def __enter__(self):
3200 exclusive = self.mode != 'r'
3201 try:
3202 _lock_file(self.f, exclusive)
3203 except IOError:
3204 self.f.close()
3205 raise
3206 return self
3207
3208 def __exit__(self, etype, value, traceback):
3209 try:
3210 _unlock_file(self.f)
3211 finally:
3212 self.f.close()
3213
3214 def __iter__(self):
3215 return iter(self.f)
3216
3217 def write(self, *args):
3218 return self.f.write(*args)
3219
3220 def read(self, *args):
3221 return self.f.read(*args)
4eb7f1d1
JMF
3222
3223
4644ac55
S
3224def get_filesystem_encoding():
3225 encoding = sys.getfilesystemencoding()
3226 return encoding if encoding is not None else 'utf-8'
3227
3228
4eb7f1d1 3229def shell_quote(args):
a6a173c2 3230 quoted_args = []
4644ac55 3231 encoding = get_filesystem_encoding()
a6a173c2
JMF
3232 for a in args:
3233 if isinstance(a, bytes):
3234 # We may get a filename encoded with 'encodeFilename'
3235 a = a.decode(encoding)
aefce8e6 3236 quoted_args.append(compat_shlex_quote(a))
28e614de 3237 return ' '.join(quoted_args)
9d4660ca
PH
3238
3239
3240def smuggle_url(url, data):
3241 """ Pass additional data in a URL for internal use. """
3242
81953d1a
RA
3243 url, idata = unsmuggle_url(url, {})
3244 data.update(idata)
15707c7e 3245 sdata = compat_urllib_parse_urlencode(
28e614de
PH
3246 {'__youtubedl_smuggle': json.dumps(data)})
3247 return url + '#' + sdata
9d4660ca
PH
3248
3249
79f82953 3250def unsmuggle_url(smug_url, default=None):
83e865a3 3251 if '#__youtubedl_smuggle' not in smug_url:
79f82953 3252 return smug_url, default
28e614de
PH
3253 url, _, sdata = smug_url.rpartition('#')
3254 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
3255 data = json.loads(jsond)
3256 return url, data
02dbf93f
PH
3257
3258
02dbf93f
PH
3259def format_bytes(bytes):
3260 if bytes is None:
28e614de 3261 return 'N/A'
02dbf93f
PH
3262 if type(bytes) is str:
3263 bytes = float(bytes)
3264 if bytes == 0.0:
3265 exponent = 0
3266 else:
3267 exponent = int(math.log(bytes, 1024.0))
28e614de 3268 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 3269 converted = float(bytes) / float(1024 ** exponent)
28e614de 3270 return '%.2f%s' % (converted, suffix)
f53c966a 3271
1c088fa8 3272
fb47597b
S
3273def lookup_unit_table(unit_table, s):
3274 units_re = '|'.join(re.escape(u) for u in unit_table)
3275 m = re.match(
782b1b5b 3276 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
fb47597b
S
3277 if not m:
3278 return None
3279 num_str = m.group('num').replace(',', '.')
3280 mult = unit_table[m.group('unit')]
3281 return int(float(num_str) * mult)
3282
3283
be64b5b0
PH
3284def parse_filesize(s):
3285 if s is None:
3286 return None
3287
dfb1b146 3288 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
3289 # but we support those too
3290 _UNIT_TABLE = {
3291 'B': 1,
3292 'b': 1,
70852b47 3293 'bytes': 1,
be64b5b0
PH
3294 'KiB': 1024,
3295 'KB': 1000,
3296 'kB': 1024,
3297 'Kb': 1000,
13585d76 3298 'kb': 1000,
70852b47
YCH
3299 'kilobytes': 1000,
3300 'kibibytes': 1024,
be64b5b0
PH
3301 'MiB': 1024 ** 2,
3302 'MB': 1000 ** 2,
3303 'mB': 1024 ** 2,
3304 'Mb': 1000 ** 2,
13585d76 3305 'mb': 1000 ** 2,
70852b47
YCH
3306 'megabytes': 1000 ** 2,
3307 'mebibytes': 1024 ** 2,
be64b5b0
PH
3308 'GiB': 1024 ** 3,
3309 'GB': 1000 ** 3,
3310 'gB': 1024 ** 3,
3311 'Gb': 1000 ** 3,
13585d76 3312 'gb': 1000 ** 3,
70852b47
YCH
3313 'gigabytes': 1000 ** 3,
3314 'gibibytes': 1024 ** 3,
be64b5b0
PH
3315 'TiB': 1024 ** 4,
3316 'TB': 1000 ** 4,
3317 'tB': 1024 ** 4,
3318 'Tb': 1000 ** 4,
13585d76 3319 'tb': 1000 ** 4,
70852b47
YCH
3320 'terabytes': 1000 ** 4,
3321 'tebibytes': 1024 ** 4,
be64b5b0
PH
3322 'PiB': 1024 ** 5,
3323 'PB': 1000 ** 5,
3324 'pB': 1024 ** 5,
3325 'Pb': 1000 ** 5,
13585d76 3326 'pb': 1000 ** 5,
70852b47
YCH
3327 'petabytes': 1000 ** 5,
3328 'pebibytes': 1024 ** 5,
be64b5b0
PH
3329 'EiB': 1024 ** 6,
3330 'EB': 1000 ** 6,
3331 'eB': 1024 ** 6,
3332 'Eb': 1000 ** 6,
13585d76 3333 'eb': 1000 ** 6,
70852b47
YCH
3334 'exabytes': 1000 ** 6,
3335 'exbibytes': 1024 ** 6,
be64b5b0
PH
3336 'ZiB': 1024 ** 7,
3337 'ZB': 1000 ** 7,
3338 'zB': 1024 ** 7,
3339 'Zb': 1000 ** 7,
13585d76 3340 'zb': 1000 ** 7,
70852b47
YCH
3341 'zettabytes': 1000 ** 7,
3342 'zebibytes': 1024 ** 7,
be64b5b0
PH
3343 'YiB': 1024 ** 8,
3344 'YB': 1000 ** 8,
3345 'yB': 1024 ** 8,
3346 'Yb': 1000 ** 8,
13585d76 3347 'yb': 1000 ** 8,
70852b47
YCH
3348 'yottabytes': 1000 ** 8,
3349 'yobibytes': 1024 ** 8,
be64b5b0
PH
3350 }
3351
fb47597b
S
3352 return lookup_unit_table(_UNIT_TABLE, s)
3353
3354
3355def parse_count(s):
3356 if s is None:
be64b5b0
PH
3357 return None
3358
fb47597b
S
3359 s = s.strip()
3360
3361 if re.match(r'^[\d,.]+$', s):
3362 return str_to_int(s)
3363
3364 _UNIT_TABLE = {
3365 'k': 1000,
3366 'K': 1000,
3367 'm': 1000 ** 2,
3368 'M': 1000 ** 2,
3369 'kk': 1000 ** 2,
3370 'KK': 1000 ** 2,
3371 }
be64b5b0 3372
fb47597b 3373 return lookup_unit_table(_UNIT_TABLE, s)
be64b5b0 3374
2f7ae819 3375
b871d7e9
S
3376def parse_resolution(s):
3377 if s is None:
3378 return {}
3379
3380 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3381 if mobj:
3382 return {
3383 'width': int(mobj.group('w')),
3384 'height': int(mobj.group('h')),
3385 }
3386
3387 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3388 if mobj:
3389 return {'height': int(mobj.group(1))}
3390
3391 mobj = re.search(r'\b([48])[kK]\b', s)
3392 if mobj:
3393 return {'height': int(mobj.group(1)) * 540}
3394
3395 return {}
3396
3397
0dc41787
S
3398def parse_bitrate(s):
3399 if not isinstance(s, compat_str):
3400 return
3401 mobj = re.search(r'\b(\d+)\s*kbps', s)
3402 if mobj:
3403 return int(mobj.group(1))
3404
3405
a942d6cb 3406def month_by_name(name, lang='en'):
caefb1de
PH
3407 """ Return the number of a month by (locale-independently) English name """
3408
f6717dec 3409 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb 3410
caefb1de 3411 try:
f6717dec 3412 return month_names.index(name) + 1
7105440c
YCH
3413 except ValueError:
3414 return None
3415
3416
3417def month_by_abbreviation(abbrev):
3418 """ Return the number of a month by (locale-independently) English
3419 abbreviations """
3420
3421 try:
3422 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
3423 except ValueError:
3424 return None
18258362
JMF
3425
3426
5aafe895 3427def fix_xml_ampersands(xml_str):
18258362 3428 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
3429 return re.sub(
3430 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 3431 '&amp;',
5aafe895 3432 xml_str)
e3946f98
PH
3433
3434
3435def setproctitle(title):
8bf48f23 3436 assert isinstance(title, compat_str)
c1c05c67
YCH
3437
3438 # ctypes in Jython is not complete
3439 # http://bugs.jython.org/issue2148
3440 if sys.platform.startswith('java'):
3441 return
3442
e3946f98 3443 try:
611c1dd9 3444 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
3445 except OSError:
3446 return
2f49bcd6
RC
3447 except TypeError:
3448 # LoadLibrary in Windows Python 2.7.13 only expects
3449 # a bytestring, but since unicode_literals turns
3450 # every string into a unicode string, it fails.
3451 return
6eefe533
PH
3452 title_bytes = title.encode('utf-8')
3453 buf = ctypes.create_string_buffer(len(title_bytes))
3454 buf.value = title_bytes
e3946f98 3455 try:
6eefe533 3456 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
3457 except AttributeError:
3458 return # Strange libc, just skip this
d7dda168
PH
3459
3460
3461def remove_start(s, start):
46bc9b7d 3462 return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174
PH
3463
3464
2b9faf55 3465def remove_end(s, end):
46bc9b7d 3466 return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55
PH
3467
3468
31b2051e
S
3469def remove_quotes(s):
3470 if s is None or len(s) < 2:
3471 return s
3472 for quote in ('"', "'", ):
3473 if s[0] == quote and s[-1] == quote:
3474 return s[1:-1]
3475 return s
3476
3477
29eb5174 3478def url_basename(url):
9b8aaeed 3479 path = compat_urlparse.urlparse(url).path
28e614de 3480 return path.strip('/').split('/')[-1]
aa94a6d3
PH
3481
3482
02dc0a36
S
3483def base_url(url):
3484 return re.match(r'https?://[^?#&]+/', url).group()
3485
3486
e34c3361 3487def urljoin(base, path):
4b5de77b
S
3488 if isinstance(path, bytes):
3489 path = path.decode('utf-8')
e34c3361
S
3490 if not isinstance(path, compat_str) or not path:
3491 return None
fad4ceb5 3492 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
e34c3361 3493 return path
4b5de77b
S
3494 if isinstance(base, bytes):
3495 base = base.decode('utf-8')
3496 if not isinstance(base, compat_str) or not re.match(
3497 r'^(?:https?:)?//', base):
e34c3361
S
3498 return None
3499 return compat_urlparse.urljoin(base, path)
3500
3501
aa94a6d3
PH
3502class HEADRequest(compat_urllib_request.Request):
3503 def get_method(self):
611c1dd9 3504 return 'HEAD'
7217e148
PH
3505
3506
95cf60e8
S
3507class PUTRequest(compat_urllib_request.Request):
3508 def get_method(self):
3509 return 'PUT'
3510
3511
9732d77e 3512def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
3513 if get_attr:
3514 if v is not None:
3515 v = getattr(v, get_attr, None)
9572013d
PH
3516 if v == '':
3517 v = None
1812afb7
S
3518 if v is None:
3519 return default
3520 try:
3521 return int(v) * invscale // scale
5e1271c5 3522 except (ValueError, TypeError):
af98f8ff 3523 return default
9732d77e 3524
9572013d 3525
40a90862
JMF
3526def str_or_none(v, default=None):
3527 return default if v is None else compat_str(v)
3528
9732d77e
PH
3529
3530def str_to_int(int_str):
48d4681e 3531 """ A more relaxed version of int_or_none """
42db58ec 3532 if isinstance(int_str, compat_integer_types):
348c6bf1 3533 return int_str
42db58ec
S
3534 elif isinstance(int_str, compat_str):
3535 int_str = re.sub(r'[,\.\+]', '', int_str)
3536 return int_or_none(int_str)
608d11f5
PH
3537
3538
9732d77e 3539def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
3540 if v is None:
3541 return default
3542 try:
3543 return float(v) * invscale / scale
5e1271c5 3544 except (ValueError, TypeError):
caf80631 3545 return default
43f775e4
PH
3546
3547
c7e327c4
S
3548def bool_or_none(v, default=None):
3549 return v if isinstance(v, bool) else default
3550
3551
53cd37ba
S
3552def strip_or_none(v, default=None):
3553 return v.strip() if isinstance(v, compat_str) else default
b72b4431
S
3554
3555
af03000a
S
3556def url_or_none(url):
3557 if not url or not isinstance(url, compat_str):
3558 return None
3559 url = url.strip()
3560 return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
3561
3562
608d11f5 3563def parse_duration(s):
8f9312c3 3564 if not isinstance(s, compat_basestring):
608d11f5
PH
3565 return None
3566
ca7b3246
S
3567 s = s.strip()
3568
acaff495 3569 days, hours, mins, secs, ms = [None] * 5
15846398 3570 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
acaff495 3571 if m:
3572 days, hours, mins, secs, ms = m.groups()
3573 else:
3574 m = re.match(
056653bb
S
3575 r'''(?ix)(?:P?
3576 (?:
3577 [0-9]+\s*y(?:ears?)?\s*
3578 )?
3579 (?:
3580 [0-9]+\s*m(?:onths?)?\s*
3581 )?
3582 (?:
3583 [0-9]+\s*w(?:eeks?)?\s*
3584 )?
8f4b58d7 3585 (?:
acaff495 3586 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
8f4b58d7 3587 )?
056653bb 3588 T)?
acaff495 3589 (?:
3590 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3591 )?
3592 (?:
3593 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3594 )?
3595 (?:
3596 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
15846398 3597 )?Z?$''', s)
acaff495 3598 if m:
3599 days, hours, mins, secs, ms = m.groups()
3600 else:
15846398 3601 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
acaff495 3602 if m:
3603 hours, mins = m.groups()
3604 else:
3605 return None
3606
3607 duration = 0
3608 if secs:
3609 duration += float(secs)
3610 if mins:
3611 duration += float(mins) * 60
3612 if hours:
3613 duration += float(hours) * 60 * 60
3614 if days:
3615 duration += float(days) * 24 * 60 * 60
3616 if ms:
3617 duration += float(ms)
3618 return duration
91d7d0b3
JMF
3619
3620
e65e4c88 3621def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 3622 name, real_ext = os.path.splitext(filename)
e65e4c88
S
3623 return (
3624 '{0}.{1}{2}'.format(name, ext, real_ext)
3625 if not expected_real_ext or real_ext[1:] == expected_real_ext
3626 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
3627
3628
b3ed15b7
S
3629def replace_extension(filename, ext, expected_real_ext=None):
3630 name, real_ext = os.path.splitext(filename)
3631 return '{0}.{1}'.format(
3632 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3633 ext)
3634
3635
d70ad093
PH
3636def check_executable(exe, args=[]):
3637 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3638 args can be a list of arguments for a short output (like -version) """
3639 try:
3640 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
3641 except OSError:
3642 return False
3643 return exe
b7ab0590
PH
3644
3645
95807118 3646def get_exe_version(exe, args=['--version'],
cae97f65 3647 version_re=None, unrecognized='present'):
95807118
PH
3648 """ Returns the version of the specified executable,
3649 or False if the executable is not present """
3650 try:
b64d04c1
YCH
3651 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3652 # SIGTTOU if youtube-dl is run in the background.
067aa17e 3653 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
cae97f65 3654 out, _ = subprocess.Popen(
54116803 3655 [encodeArgument(exe)] + args,
00ca7552 3656 stdin=subprocess.PIPE,
95807118
PH
3657 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
3658 except OSError:
3659 return False
cae97f65
PH
3660 if isinstance(out, bytes): # Python 2.x
3661 out = out.decode('ascii', 'ignore')
3662 return detect_exe_version(out, version_re, unrecognized)
3663
3664
3665def detect_exe_version(output, version_re=None, unrecognized='present'):
3666 assert isinstance(output, compat_str)
3667 if version_re is None:
3668 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3669 m = re.search(version_re, output)
95807118
PH
3670 if m:
3671 return m.group(1)
3672 else:
3673 return unrecognized
3674
3675
b7ab0590 3676class PagedList(object):
dd26ced1
PH
3677 def __len__(self):
3678 # This is only useful for tests
3679 return len(self.getslice())
3680
9c44d242
PH
3681
3682class OnDemandPagedList(PagedList):
6be08ce6 3683 def __init__(self, pagefunc, pagesize, use_cache=True):
9c44d242
PH
3684 self._pagefunc = pagefunc
3685 self._pagesize = pagesize
b95dc034
YCH
3686 self._use_cache = use_cache
3687 if use_cache:
3688 self._cache = {}
9c44d242 3689
b7ab0590
PH
3690 def getslice(self, start=0, end=None):
3691 res = []
3692 for pagenum in itertools.count(start // self._pagesize):
3693 firstid = pagenum * self._pagesize
3694 nextfirstid = pagenum * self._pagesize + self._pagesize
3695 if start >= nextfirstid:
3696 continue
3697
b95dc034
YCH
3698 page_results = None
3699 if self._use_cache:
3700 page_results = self._cache.get(pagenum)
3701 if page_results is None:
3702 page_results = list(self._pagefunc(pagenum))
3703 if self._use_cache:
3704 self._cache[pagenum] = page_results
b7ab0590
PH
3705
3706 startv = (
3707 start % self._pagesize
3708 if firstid <= start < nextfirstid
3709 else 0)
3710
3711 endv = (
3712 ((end - 1) % self._pagesize) + 1
3713 if (end is not None and firstid <= end <= nextfirstid)
3714 else None)
3715
3716 if startv != 0 or endv is not None:
3717 page_results = page_results[startv:endv]
3718 res.extend(page_results)
3719
3720 # A little optimization - if current page is not "full", ie. does
3721 # not contain page_size videos then we can assume that this page
3722 # is the last one - there are no more ids on further pages -
3723 # i.e. no need to query again.
3724 if len(page_results) + startv < self._pagesize:
3725 break
3726
3727 # If we got the whole page, but the next page is not interesting,
3728 # break out early as well
3729 if end == nextfirstid:
3730 break
3731 return res
81c2f20b
PH
3732
3733
9c44d242
PH
3734class InAdvancePagedList(PagedList):
3735 def __init__(self, pagefunc, pagecount, pagesize):
3736 self._pagefunc = pagefunc
3737 self._pagecount = pagecount
3738 self._pagesize = pagesize
3739
3740 def getslice(self, start=0, end=None):
3741 res = []
3742 start_page = start // self._pagesize
3743 end_page = (
3744 self._pagecount if end is None else (end // self._pagesize + 1))
3745 skip_elems = start - start_page * self._pagesize
3746 only_more = None if end is None else end - start
3747 for pagenum in range(start_page, end_page):
3748 page = list(self._pagefunc(pagenum))
3749 if skip_elems:
3750 page = page[skip_elems:]
3751 skip_elems = None
3752 if only_more is not None:
3753 if len(page) < only_more:
3754 only_more -= len(page)
3755 else:
3756 page = page[:only_more]
3757 res.extend(page)
3758 break
3759 res.extend(page)
3760 return res
3761
3762
81c2f20b 3763def uppercase_escape(s):
676eb3f2 3764 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 3765 return re.sub(
a612753d 3766 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
3767 lambda m: unicode_escape(m.group(0))[0],
3768 s)
0fe2ff78
YCH
3769
3770
3771def lowercase_escape(s):
3772 unicode_escape = codecs.getdecoder('unicode_escape')
3773 return re.sub(
3774 r'\\u[0-9a-fA-F]{4}',
3775 lambda m: unicode_escape(m.group(0))[0],
3776 s)
b53466e1 3777
d05cfe06
S
3778
3779def escape_rfc3986(s):
3780 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 3781 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 3782 s = s.encode('utf-8')
ecc0c5ee 3783 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
3784
3785
3786def escape_url(url):
3787 """Escape URL as suggested by RFC 3986"""
3788 url_parsed = compat_urllib_parse_urlparse(url)
3789 return url_parsed._replace(
efbed08d 3790 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
d05cfe06
S
3791 path=escape_rfc3986(url_parsed.path),
3792 params=escape_rfc3986(url_parsed.params),
3793 query=escape_rfc3986(url_parsed.query),
3794 fragment=escape_rfc3986(url_parsed.fragment)
3795 ).geturl()
3796
62e609ab
PH
3797
3798def read_batch_urls(batch_fd):
3799 def fixup(url):
3800 if not isinstance(url, compat_str):
3801 url = url.decode('utf-8', 'replace')
28e614de 3802 BOM_UTF8 = '\xef\xbb\xbf'
62e609ab
PH
3803 if url.startswith(BOM_UTF8):
3804 url = url[len(BOM_UTF8):]
3805 url = url.strip()
3806 if url.startswith(('#', ';', ']')):
3807 return False
3808 return url
3809
3810 with contextlib.closing(batch_fd) as fd:
3811 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
3812
3813
3814def urlencode_postdata(*args, **kargs):
15707c7e 3815 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
3816
3817
38f9ef31 3818def update_url_query(url, query):
cacd9966
YCH
3819 if not query:
3820 return url
38f9ef31 3821 parsed_url = compat_urlparse.urlparse(url)
3822 qs = compat_parse_qs(parsed_url.query)
3823 qs.update(query)
3824 return compat_urlparse.urlunparse(parsed_url._replace(
15707c7e 3825 query=compat_urllib_parse_urlencode(qs, True)))
16392824 3826
8e60dc75 3827
ed0291d1
S
3828def update_Request(req, url=None, data=None, headers={}, query={}):
3829 req_headers = req.headers.copy()
3830 req_headers.update(headers)
3831 req_data = data or req.data
3832 req_url = update_url_query(url or req.get_full_url(), query)
95cf60e8
S
3833 req_get_method = req.get_method()
3834 if req_get_method == 'HEAD':
3835 req_type = HEADRequest
3836 elif req_get_method == 'PUT':
3837 req_type = PUTRequest
3838 else:
3839 req_type = compat_urllib_request.Request
ed0291d1
S
3840 new_req = req_type(
3841 req_url, data=req_data, headers=req_headers,
3842 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3843 if hasattr(req, 'timeout'):
3844 new_req.timeout = req.timeout
3845 return new_req
3846
3847
10c87c15 3848def _multipart_encode_impl(data, boundary):
0c265486
YCH
3849 content_type = 'multipart/form-data; boundary=%s' % boundary
3850
3851 out = b''
3852 for k, v in data.items():
3853 out += b'--' + boundary.encode('ascii') + b'\r\n'
3854 if isinstance(k, compat_str):
3855 k = k.encode('utf-8')
3856 if isinstance(v, compat_str):
3857 v = v.encode('utf-8')
3858 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3859 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
b2ad479d 3860 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
0c265486
YCH
3861 if boundary.encode('ascii') in content:
3862 raise ValueError('Boundary overlaps with data')
3863 out += content
3864
3865 out += b'--' + boundary.encode('ascii') + b'--\r\n'
3866
3867 return out, content_type
3868
3869
3870def multipart_encode(data, boundary=None):
3871 '''
3872 Encode a dict to RFC 7578-compliant form-data
3873
3874 data:
3875 A dict where keys and values can be either Unicode or bytes-like
3876 objects.
3877 boundary:
3878 If specified a Unicode object, it's used as the boundary. Otherwise
3879 a random boundary is generated.
3880
3881 Reference: https://tools.ietf.org/html/rfc7578
3882 '''
3883 has_specified_boundary = boundary is not None
3884
3885 while True:
3886 if boundary is None:
3887 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3888
3889 try:
10c87c15 3890 out, content_type = _multipart_encode_impl(data, boundary)
0c265486
YCH
3891 break
3892 except ValueError:
3893 if has_specified_boundary:
3894 raise
3895 boundary = None
3896
3897 return out, content_type
3898
3899
86296ad2 3900def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
3901 if isinstance(key_or_keys, (list, tuple)):
3902 for key in key_or_keys:
86296ad2
S
3903 if key not in d or d[key] is None or skip_false_values and not d[key]:
3904 continue
3905 return d[key]
cbecc9b9
S
3906 return default
3907 return d.get(key_or_keys, default)
3908
3909
329ca3be 3910def try_get(src, getter, expected_type=None):
a32a9a7e
S
3911 if not isinstance(getter, (list, tuple)):
3912 getter = [getter]
3913 for get in getter:
3914 try:
3915 v = get(src)
3916 except (AttributeError, KeyError, TypeError, IndexError):
3917 pass
3918 else:
3919 if expected_type is None or isinstance(v, expected_type):
3920 return v
329ca3be
S
3921
3922
6cc62232
S
3923def merge_dicts(*dicts):
3924 merged = {}
3925 for a_dict in dicts:
3926 for k, v in a_dict.items():
3927 if v is None:
3928 continue
3089bc74
S
3929 if (k not in merged
3930 or (isinstance(v, compat_str) and v
3931 and isinstance(merged[k], compat_str)
3932 and not merged[k])):
6cc62232
S
3933 merged[k] = v
3934 return merged
3935
3936
8e60dc75
S
3937def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
3938 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
3939
16392824 3940
a1a530b0
PH
3941US_RATINGS = {
3942 'G': 0,
3943 'PG': 10,
3944 'PG-13': 13,
3945 'R': 16,
3946 'NC': 18,
3947}
fac55558
PH
3948
3949
a8795327 3950TV_PARENTAL_GUIDELINES = {
5a16c9d9
RA
3951 'TV-Y': 0,
3952 'TV-Y7': 7,
3953 'TV-G': 0,
3954 'TV-PG': 0,
3955 'TV-14': 14,
3956 'TV-MA': 17,
a8795327
S
3957}
3958
3959
146c80e2 3960def parse_age_limit(s):
a8795327
S
3961 if type(s) == int:
3962 return s if 0 <= s <= 21 else None
3963 if not isinstance(s, compat_basestring):
d838b1bd 3964 return None
146c80e2 3965 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327
S
3966 if m:
3967 return int(m.group('age'))
3968 if s in US_RATINGS:
3969 return US_RATINGS[s]
5a16c9d9 3970 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
b8361187 3971 if m:
5a16c9d9 3972 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
b8361187 3973 return None
146c80e2
S
3974
3975
fac55558 3976def strip_jsonp(code):
609a61e3 3977 return re.sub(
5552c9eb 3978 r'''(?sx)^
e9c671d5 3979 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
5552c9eb
YCH
3980 (?:\s*&&\s*(?P=func_name))?
3981 \s*\(\s*(?P<callback_data>.*)\);?
3982 \s*?(?://[^\n]*)*$''',
3983 r'\g<callback_data>', code)
478c2c61
PH
3984
3985
e05f6939 3986def js_to_json(code):
4195096e
S
3987 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
3988 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
3989 INTEGER_TABLE = (
3990 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
3991 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
3992 )
3993
e05f6939 3994 def fix_kv(m):
e7b6d122
PH
3995 v = m.group(0)
3996 if v in ('true', 'false', 'null'):
3997 return v
b3ee552e 3998 elif v.startswith('/*') or v.startswith('//') or v == ',':
bd1e4844 3999 return ""
4000
4001 if v[0] in ("'", '"'):
4002 v = re.sub(r'(?s)\\.|"', lambda m: {
e7b6d122 4003 '"': '\\"',
bd1e4844 4004 "\\'": "'",
4005 '\\\n': '',
4006 '\\x': '\\u00',
4007 }.get(m.group(0), m.group(0)), v[1:-1])
4008
89ac4a19
S
4009 for regex, base in INTEGER_TABLE:
4010 im = re.match(regex, v)
4011 if im:
e4659b45 4012 i = int(im.group(1), base)
89ac4a19
S
4013 return '"%d":' % i if v.endswith(':') else '%d' % i
4014
e7b6d122 4015 return '"%s"' % v
e05f6939 4016
bd1e4844 4017 return re.sub(r'''(?sx)
4018 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4019 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4195096e 4020 {comment}|,(?={skip}[\]}}])|
c384d537 4021 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4195096e
S
4022 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4023 [0-9]+(?={skip}:)
4024 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
e05f6939
PH
4025
4026
478c2c61
PH
4027def qualities(quality_ids):
4028 """ Get a numeric quality value out of a list of possible values """
4029 def q(qid):
4030 try:
4031 return quality_ids.index(qid)
4032 except ValueError:
4033 return -1
4034 return q
4035
acd69589
PH
4036
4037DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
0a871f68 4038
a020a0dc
PH
4039
4040def limit_length(s, length):
4041 """ Add ellipses to overly long strings """
4042 if s is None:
4043 return None
4044 ELLIPSES = '...'
4045 if len(s) > length:
4046 return s[:length - len(ELLIPSES)] + ELLIPSES
4047 return s
48844745
PH
4048
4049
4050def version_tuple(v):
5f9b8394 4051 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
4052
4053
4054def is_outdated_version(version, limit, assume_new=True):
4055 if not version:
4056 return not assume_new
4057 try:
4058 return version_tuple(version) < version_tuple(limit)
4059 except ValueError:
4060 return not assume_new
732ea2f0
PH
4061
4062
4063def ytdl_is_updateable():
4064 """ Returns if youtube-dl can be updated with -U """
4065 from zipimport import zipimporter
4066
4067 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
4068
4069
4070def args_to_str(args):
4071 # Get a short string representation for a subprocess command
702ccf2d 4072 return ' '.join(compat_shlex_quote(a) for a in args)
2ccd1b10
PH
4073
4074
9b9c5355 4075def error_to_compat_str(err):
fdae2358
S
4076 err_str = str(err)
4077 # On python 2 error byte string must be decoded with proper
4078 # encoding rather than ascii
4079 if sys.version_info[0] < 3:
4080 err_str = err_str.decode(preferredencoding())
4081 return err_str
4082
4083
c460bdd5 4084def mimetype2ext(mt):
eb9ee194
S
4085 if mt is None:
4086 return None
4087
765ac263
JMF
4088 ext = {
4089 'audio/mp4': 'm4a',
6c33d24b
YCH
4090 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4091 # it's the most popular one
4092 'audio/mpeg': 'mp3',
765ac263
JMF
4093 }.get(mt)
4094 if ext is not None:
4095 return ext
4096
c460bdd5 4097 _, _, res = mt.rpartition('/')
6562d34a 4098 res = res.split(';')[0].strip().lower()
c460bdd5
PH
4099
4100 return {
f6861ec9 4101 '3gpp': '3gp',
cafcf657 4102 'smptett+xml': 'tt',
cafcf657 4103 'ttaf+xml': 'dfxp',
a0d8d704 4104 'ttml+xml': 'ttml',
f6861ec9 4105 'x-flv': 'flv',
a0d8d704 4106 'x-mp4-fragmented': 'mp4',
d4f05d47 4107 'x-ms-sami': 'sami',
a0d8d704 4108 'x-ms-wmv': 'wmv',
b4173f15
RA
4109 'mpegurl': 'm3u8',
4110 'x-mpegurl': 'm3u8',
4111 'vnd.apple.mpegurl': 'm3u8',
4112 'dash+xml': 'mpd',
b4173f15 4113 'f4m+xml': 'f4m',
f164b971 4114 'hds+xml': 'f4m',
e910fe2f 4115 'vnd.ms-sstr+xml': 'ism',
c2b2c7e1 4116 'quicktime': 'mov',
98ce1a3f 4117 'mp2t': 'ts',
c460bdd5
PH
4118 }.get(res, res)
4119
4120
4f3c5e06 4121def parse_codecs(codecs_str):
4122 # http://tools.ietf.org/html/rfc6381
4123 if not codecs_str:
4124 return {}
4125 splited_codecs = list(filter(None, map(
4126 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4127 vcodec, acodec = None, None
4128 for full_codec in splited_codecs:
4129 codec = full_codec.split('.')[0]
28cc2241 4130 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4f3c5e06 4131 if not vcodec:
4132 vcodec = full_codec
60f5c9fb 4133 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4f3c5e06 4134 if not acodec:
4135 acodec = full_codec
4136 else:
60f5c9fb 4137 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4f3c5e06 4138 if not vcodec and not acodec:
4139 if len(splited_codecs) == 2:
4140 return {
28cc2241
S
4141 'vcodec': splited_codecs[0],
4142 'acodec': splited_codecs[1],
4f3c5e06 4143 }
4144 else:
4145 return {
4146 'vcodec': vcodec or 'none',
4147 'acodec': acodec or 'none',
4148 }
4149 return {}
4150
4151
2ccd1b10 4152def urlhandle_detect_ext(url_handle):
79298173 4153 getheader = url_handle.headers.get
2ccd1b10 4154
b55ee18f
PH
4155 cd = getheader('Content-Disposition')
4156 if cd:
4157 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4158 if m:
4159 e = determine_ext(m.group('filename'), default_ext=None)
4160 if e:
4161 return e
4162
c460bdd5 4163 return mimetype2ext(getheader('Content-Type'))
05900629
PH
4164
4165
1e399778
YCH
4166def encode_data_uri(data, mime_type):
4167 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4168
4169
05900629 4170def age_restricted(content_limit, age_limit):
6ec6cb4e 4171 """ Returns True iff the content should be blocked """
05900629
PH
4172
4173 if age_limit is None: # No limit set
4174 return False
4175 if content_limit is None:
4176 return False # Content available for everyone
4177 return age_limit < content_limit
61ca9a80
PH
4178
4179
4180def is_html(first_bytes):
4181 """ Detect whether a file contains HTML by examining its first bytes. """
4182
4183 BOMS = [
4184 (b'\xef\xbb\xbf', 'utf-8'),
4185 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4186 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4187 (b'\xff\xfe', 'utf-16-le'),
4188 (b'\xfe\xff', 'utf-16-be'),
4189 ]
4190 for bom, enc in BOMS:
4191 if first_bytes.startswith(bom):
4192 s = first_bytes[len(bom):].decode(enc, 'replace')
4193 break
4194 else:
4195 s = first_bytes.decode('utf-8', 'replace')
4196
4197 return re.match(r'^\s*<', s)
a055469f
PH
4198
4199
4200def determine_protocol(info_dict):
4201 protocol = info_dict.get('protocol')
4202 if protocol is not None:
4203 return protocol
4204
4205 url = info_dict['url']
4206 if url.startswith('rtmp'):
4207 return 'rtmp'
4208 elif url.startswith('mms'):
4209 return 'mms'
4210 elif url.startswith('rtsp'):
4211 return 'rtsp'
4212
4213 ext = determine_ext(url)
4214 if ext == 'm3u8':
4215 return 'm3u8'
4216 elif ext == 'f4m':
4217 return 'f4m'
4218
4219 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
4220
4221
4222def render_table(header_row, data):
4223 """ Render a list of rows, each as a list of values """
4224 table = [header_row] + data
4225 max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4226 format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
4227 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
4228
4229
4230def _match_one(filter_part, dct):
4231 COMPARISON_OPERATORS = {
4232 '<': operator.lt,
4233 '<=': operator.le,
4234 '>': operator.gt,
4235 '>=': operator.ge,
4236 '=': operator.eq,
4237 '!=': operator.ne,
4238 }
4239 operator_rex = re.compile(r'''(?x)\s*
4240 (?P<key>[a-z_]+)
4241 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4242 (?:
4243 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
db13c16e 4244 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
347de493
PH
4245 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4246 )
4247 \s*$
4248 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4249 m = operator_rex.search(filter_part)
4250 if m:
4251 op = COMPARISON_OPERATORS[m.group('op')]
e5a088dc 4252 actual_value = dct.get(m.group('key'))
3089bc74
S
4253 if (m.group('quotedstrval') is not None
4254 or m.group('strval') is not None
e5a088dc
S
4255 # If the original field is a string and matching comparisonvalue is
4256 # a number we should respect the origin of the original field
4257 # and process comparison value as a string (see
067aa17e 4258 # https://github.com/ytdl-org/youtube-dl/issues/11082).
3089bc74
S
4259 or actual_value is not None and m.group('intval') is not None
4260 and isinstance(actual_value, compat_str)):
347de493
PH
4261 if m.group('op') not in ('=', '!='):
4262 raise ValueError(
4263 'Operator %s does not support string values!' % m.group('op'))
db13c16e
S
4264 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4265 quote = m.group('quote')
4266 if quote is not None:
4267 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
347de493
PH
4268 else:
4269 try:
4270 comparison_value = int(m.group('intval'))
4271 except ValueError:
4272 comparison_value = parse_filesize(m.group('intval'))
4273 if comparison_value is None:
4274 comparison_value = parse_filesize(m.group('intval') + 'B')
4275 if comparison_value is None:
4276 raise ValueError(
4277 'Invalid integer value %r in filter part %r' % (
4278 m.group('intval'), filter_part))
347de493
PH
4279 if actual_value is None:
4280 return m.group('none_inclusive')
4281 return op(actual_value, comparison_value)
4282
4283 UNARY_OPERATORS = {
1cc47c66
S
4284 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4285 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
347de493
PH
4286 }
4287 operator_rex = re.compile(r'''(?x)\s*
4288 (?P<op>%s)\s*(?P<key>[a-z_]+)
4289 \s*$
4290 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4291 m = operator_rex.search(filter_part)
4292 if m:
4293 op = UNARY_OPERATORS[m.group('op')]
4294 actual_value = dct.get(m.group('key'))
4295 return op(actual_value)
4296
4297 raise ValueError('Invalid filter part %r' % filter_part)
4298
4299
4300def match_str(filter_str, dct):
4301 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4302
4303 return all(
4304 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4305
4306
4307def match_filter_func(filter_str):
4308 def _match_func(info_dict):
4309 if match_str(filter_str, info_dict):
4310 return None
4311 else:
4312 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4313 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4314 return _match_func
91410c9b
PH
4315
4316
bf6427d2
YCH
4317def parse_dfxp_time_expr(time_expr):
4318 if not time_expr:
d631d5f9 4319 return
bf6427d2
YCH
4320
4321 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4322 if mobj:
4323 return float(mobj.group('time_offset'))
4324
db2fe38b 4325 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 4326 if mobj:
db2fe38b 4327 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
4328
4329
c1c924ab
YCH
4330def srt_subtitles_timecode(seconds):
4331 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
4332
4333
4334def dfxp2srt(dfxp_data):
3869028f
YCH
4335 '''
4336 @param dfxp_data A bytes-like object containing DFXP data
4337 @returns A unicode object containing converted SRT data
4338 '''
5b995f71 4339 LEGACY_NAMESPACES = (
3869028f
YCH
4340 (b'http://www.w3.org/ns/ttml', [
4341 b'http://www.w3.org/2004/11/ttaf1',
4342 b'http://www.w3.org/2006/04/ttaf1',
4343 b'http://www.w3.org/2006/10/ttaf1',
5b995f71 4344 ]),
3869028f
YCH
4345 (b'http://www.w3.org/ns/ttml#styling', [
4346 b'http://www.w3.org/ns/ttml#style',
5b995f71
RA
4347 ]),
4348 )
4349
4350 SUPPORTED_STYLING = [
4351 'color',
4352 'fontFamily',
4353 'fontSize',
4354 'fontStyle',
4355 'fontWeight',
4356 'textDecoration'
4357 ]
4358
4e335771 4359 _x = functools.partial(xpath_with_ns, ns_map={
261f4730 4360 'xml': 'http://www.w3.org/XML/1998/namespace',
4e335771 4361 'ttml': 'http://www.w3.org/ns/ttml',
5b995f71 4362 'tts': 'http://www.w3.org/ns/ttml#styling',
4e335771 4363 })
bf6427d2 4364
5b995f71
RA
4365 styles = {}
4366 default_style = {}
4367
87de7069 4368 class TTMLPElementParser(object):
5b995f71
RA
4369 _out = ''
4370 _unclosed_elements = []
4371 _applied_styles = []
bf6427d2 4372
2b14cb56 4373 def start(self, tag, attrib):
5b995f71
RA
4374 if tag in (_x('ttml:br'), 'br'):
4375 self._out += '\n'
4376 else:
4377 unclosed_elements = []
4378 style = {}
4379 element_style_id = attrib.get('style')
4380 if default_style:
4381 style.update(default_style)
4382 if element_style_id:
4383 style.update(styles.get(element_style_id, {}))
4384 for prop in SUPPORTED_STYLING:
4385 prop_val = attrib.get(_x('tts:' + prop))
4386 if prop_val:
4387 style[prop] = prop_val
4388 if style:
4389 font = ''
4390 for k, v in sorted(style.items()):
4391 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4392 continue
4393 if k == 'color':
4394 font += ' color="%s"' % v
4395 elif k == 'fontSize':
4396 font += ' size="%s"' % v
4397 elif k == 'fontFamily':
4398 font += ' face="%s"' % v
4399 elif k == 'fontWeight' and v == 'bold':
4400 self._out += '<b>'
4401 unclosed_elements.append('b')
4402 elif k == 'fontStyle' and v == 'italic':
4403 self._out += '<i>'
4404 unclosed_elements.append('i')
4405 elif k == 'textDecoration' and v == 'underline':
4406 self._out += '<u>'
4407 unclosed_elements.append('u')
4408 if font:
4409 self._out += '<font' + font + '>'
4410 unclosed_elements.append('font')
4411 applied_style = {}
4412 if self._applied_styles:
4413 applied_style.update(self._applied_styles[-1])
4414 applied_style.update(style)
4415 self._applied_styles.append(applied_style)
4416 self._unclosed_elements.append(unclosed_elements)
bf6427d2 4417
2b14cb56 4418 def end(self, tag):
5b995f71
RA
4419 if tag not in (_x('ttml:br'), 'br'):
4420 unclosed_elements = self._unclosed_elements.pop()
4421 for element in reversed(unclosed_elements):
4422 self._out += '</%s>' % element
4423 if unclosed_elements and self._applied_styles:
4424 self._applied_styles.pop()
bf6427d2 4425
2b14cb56 4426 def data(self, data):
5b995f71 4427 self._out += data
2b14cb56 4428
4429 def close(self):
5b995f71 4430 return self._out.strip()
2b14cb56 4431
4432 def parse_node(node):
4433 target = TTMLPElementParser()
4434 parser = xml.etree.ElementTree.XMLParser(target=target)
4435 parser.feed(xml.etree.ElementTree.tostring(node))
4436 return parser.close()
bf6427d2 4437
5b995f71
RA
4438 for k, v in LEGACY_NAMESPACES:
4439 for ns in v:
4440 dfxp_data = dfxp_data.replace(ns, k)
4441
3869028f 4442 dfxp = compat_etree_fromstring(dfxp_data)
bf6427d2 4443 out = []
5b995f71 4444 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
1b0427e6
YCH
4445
4446 if not paras:
4447 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2 4448
5b995f71
RA
4449 repeat = False
4450 while True:
4451 for style in dfxp.findall(_x('.//ttml:style')):
261f4730
RA
4452 style_id = style.get('id') or style.get(_x('xml:id'))
4453 if not style_id:
4454 continue
5b995f71
RA
4455 parent_style_id = style.get('style')
4456 if parent_style_id:
4457 if parent_style_id not in styles:
4458 repeat = True
4459 continue
4460 styles[style_id] = styles[parent_style_id].copy()
4461 for prop in SUPPORTED_STYLING:
4462 prop_val = style.get(_x('tts:' + prop))
4463 if prop_val:
4464 styles.setdefault(style_id, {})[prop] = prop_val
4465 if repeat:
4466 repeat = False
4467 else:
4468 break
4469
4470 for p in ('body', 'div'):
4471 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4472 if ele is None:
4473 continue
4474 style = styles.get(ele.get('style'))
4475 if not style:
4476 continue
4477 default_style.update(style)
4478
bf6427d2 4479 for para, index in zip(paras, itertools.count(1)):
d631d5f9 4480 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 4481 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
4482 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4483 if begin_time is None:
4484 continue
7dff0363 4485 if not end_time:
d631d5f9
YCH
4486 if not dur:
4487 continue
4488 end_time = begin_time + dur
bf6427d2
YCH
4489 out.append('%d\n%s --> %s\n%s\n\n' % (
4490 index,
c1c924ab
YCH
4491 srt_subtitles_timecode(begin_time),
4492 srt_subtitles_timecode(end_time),
bf6427d2
YCH
4493 parse_node(para)))
4494
4495 return ''.join(out)
4496
4497
66e289ba
S
4498def cli_option(params, command_option, param):
4499 param = params.get(param)
98e698f1
RA
4500 if param:
4501 param = compat_str(param)
66e289ba
S
4502 return [command_option, param] if param is not None else []
4503
4504
4505def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4506 param = params.get(param)
5b232f46
S
4507 if param is None:
4508 return []
66e289ba
S
4509 assert isinstance(param, bool)
4510 if separator:
4511 return [command_option + separator + (true_value if param else false_value)]
4512 return [command_option, true_value if param else false_value]
4513
4514
4515def cli_valueless_option(params, command_option, param, expected_value=True):
4516 param = params.get(param)
4517 return [command_option] if param == expected_value else []
4518
4519
4520def cli_configuration_args(params, param, default=[]):
4521 ex_args = params.get(param)
4522 if ex_args is None:
4523 return default
4524 assert isinstance(ex_args, list)
4525 return ex_args
4526
4527
39672624
YCH
4528class ISO639Utils(object):
4529 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4530 _lang_map = {
4531 'aa': 'aar',
4532 'ab': 'abk',
4533 'ae': 'ave',
4534 'af': 'afr',
4535 'ak': 'aka',
4536 'am': 'amh',
4537 'an': 'arg',
4538 'ar': 'ara',
4539 'as': 'asm',
4540 'av': 'ava',
4541 'ay': 'aym',
4542 'az': 'aze',
4543 'ba': 'bak',
4544 'be': 'bel',
4545 'bg': 'bul',
4546 'bh': 'bih',
4547 'bi': 'bis',
4548 'bm': 'bam',
4549 'bn': 'ben',
4550 'bo': 'bod',
4551 'br': 'bre',
4552 'bs': 'bos',
4553 'ca': 'cat',
4554 'ce': 'che',
4555 'ch': 'cha',
4556 'co': 'cos',
4557 'cr': 'cre',
4558 'cs': 'ces',
4559 'cu': 'chu',
4560 'cv': 'chv',
4561 'cy': 'cym',
4562 'da': 'dan',
4563 'de': 'deu',
4564 'dv': 'div',
4565 'dz': 'dzo',
4566 'ee': 'ewe',
4567 'el': 'ell',
4568 'en': 'eng',
4569 'eo': 'epo',
4570 'es': 'spa',
4571 'et': 'est',
4572 'eu': 'eus',
4573 'fa': 'fas',
4574 'ff': 'ful',
4575 'fi': 'fin',
4576 'fj': 'fij',
4577 'fo': 'fao',
4578 'fr': 'fra',
4579 'fy': 'fry',
4580 'ga': 'gle',
4581 'gd': 'gla',
4582 'gl': 'glg',
4583 'gn': 'grn',
4584 'gu': 'guj',
4585 'gv': 'glv',
4586 'ha': 'hau',
4587 'he': 'heb',
b7acc835 4588 'iw': 'heb', # Replaced by he in 1989 revision
39672624
YCH
4589 'hi': 'hin',
4590 'ho': 'hmo',
4591 'hr': 'hrv',
4592 'ht': 'hat',
4593 'hu': 'hun',
4594 'hy': 'hye',
4595 'hz': 'her',
4596 'ia': 'ina',
4597 'id': 'ind',
b7acc835 4598 'in': 'ind', # Replaced by id in 1989 revision
39672624
YCH
4599 'ie': 'ile',
4600 'ig': 'ibo',
4601 'ii': 'iii',
4602 'ik': 'ipk',
4603 'io': 'ido',
4604 'is': 'isl',
4605 'it': 'ita',
4606 'iu': 'iku',
4607 'ja': 'jpn',
4608 'jv': 'jav',
4609 'ka': 'kat',
4610 'kg': 'kon',
4611 'ki': 'kik',
4612 'kj': 'kua',
4613 'kk': 'kaz',
4614 'kl': 'kal',
4615 'km': 'khm',
4616 'kn': 'kan',
4617 'ko': 'kor',
4618 'kr': 'kau',
4619 'ks': 'kas',
4620 'ku': 'kur',
4621 'kv': 'kom',
4622 'kw': 'cor',
4623 'ky': 'kir',
4624 'la': 'lat',
4625 'lb': 'ltz',
4626 'lg': 'lug',
4627 'li': 'lim',
4628 'ln': 'lin',
4629 'lo': 'lao',
4630 'lt': 'lit',
4631 'lu': 'lub',
4632 'lv': 'lav',
4633 'mg': 'mlg',
4634 'mh': 'mah',
4635 'mi': 'mri',
4636 'mk': 'mkd',
4637 'ml': 'mal',
4638 'mn': 'mon',
4639 'mr': 'mar',
4640 'ms': 'msa',
4641 'mt': 'mlt',
4642 'my': 'mya',
4643 'na': 'nau',
4644 'nb': 'nob',
4645 'nd': 'nde',
4646 'ne': 'nep',
4647 'ng': 'ndo',
4648 'nl': 'nld',
4649 'nn': 'nno',
4650 'no': 'nor',
4651 'nr': 'nbl',
4652 'nv': 'nav',
4653 'ny': 'nya',
4654 'oc': 'oci',
4655 'oj': 'oji',
4656 'om': 'orm',
4657 'or': 'ori',
4658 'os': 'oss',
4659 'pa': 'pan',
4660 'pi': 'pli',
4661 'pl': 'pol',
4662 'ps': 'pus',
4663 'pt': 'por',
4664 'qu': 'que',
4665 'rm': 'roh',
4666 'rn': 'run',
4667 'ro': 'ron',
4668 'ru': 'rus',
4669 'rw': 'kin',
4670 'sa': 'san',
4671 'sc': 'srd',
4672 'sd': 'snd',
4673 'se': 'sme',
4674 'sg': 'sag',
4675 'si': 'sin',
4676 'sk': 'slk',
4677 'sl': 'slv',
4678 'sm': 'smo',
4679 'sn': 'sna',
4680 'so': 'som',
4681 'sq': 'sqi',
4682 'sr': 'srp',
4683 'ss': 'ssw',
4684 'st': 'sot',
4685 'su': 'sun',
4686 'sv': 'swe',
4687 'sw': 'swa',
4688 'ta': 'tam',
4689 'te': 'tel',
4690 'tg': 'tgk',
4691 'th': 'tha',
4692 'ti': 'tir',
4693 'tk': 'tuk',
4694 'tl': 'tgl',
4695 'tn': 'tsn',
4696 'to': 'ton',
4697 'tr': 'tur',
4698 'ts': 'tso',
4699 'tt': 'tat',
4700 'tw': 'twi',
4701 'ty': 'tah',
4702 'ug': 'uig',
4703 'uk': 'ukr',
4704 'ur': 'urd',
4705 'uz': 'uzb',
4706 've': 'ven',
4707 'vi': 'vie',
4708 'vo': 'vol',
4709 'wa': 'wln',
4710 'wo': 'wol',
4711 'xh': 'xho',
4712 'yi': 'yid',
e9a50fba 4713 'ji': 'yid', # Replaced by yi in 1989 revision
39672624
YCH
4714 'yo': 'yor',
4715 'za': 'zha',
4716 'zh': 'zho',
4717 'zu': 'zul',
4718 }
4719
4720 @classmethod
4721 def short2long(cls, code):
4722 """Convert language code from ISO 639-1 to ISO 639-2/T"""
4723 return cls._lang_map.get(code[:2])
4724
4725 @classmethod
4726 def long2short(cls, code):
4727 """Convert language code from ISO 639-2/T to ISO 639-1"""
4728 for short_name, long_name in cls._lang_map.items():
4729 if long_name == code:
4730 return short_name
4731
4732
4eb10f66
YCH
4733class ISO3166Utils(object):
4734 # From http://data.okfn.org/data/core/country-list
4735 _country_map = {
4736 'AF': 'Afghanistan',
4737 'AX': 'Åland Islands',
4738 'AL': 'Albania',
4739 'DZ': 'Algeria',
4740 'AS': 'American Samoa',
4741 'AD': 'Andorra',
4742 'AO': 'Angola',
4743 'AI': 'Anguilla',
4744 'AQ': 'Antarctica',
4745 'AG': 'Antigua and Barbuda',
4746 'AR': 'Argentina',
4747 'AM': 'Armenia',
4748 'AW': 'Aruba',
4749 'AU': 'Australia',
4750 'AT': 'Austria',
4751 'AZ': 'Azerbaijan',
4752 'BS': 'Bahamas',
4753 'BH': 'Bahrain',
4754 'BD': 'Bangladesh',
4755 'BB': 'Barbados',
4756 'BY': 'Belarus',
4757 'BE': 'Belgium',
4758 'BZ': 'Belize',
4759 'BJ': 'Benin',
4760 'BM': 'Bermuda',
4761 'BT': 'Bhutan',
4762 'BO': 'Bolivia, Plurinational State of',
4763 'BQ': 'Bonaire, Sint Eustatius and Saba',
4764 'BA': 'Bosnia and Herzegovina',
4765 'BW': 'Botswana',
4766 'BV': 'Bouvet Island',
4767 'BR': 'Brazil',
4768 'IO': 'British Indian Ocean Territory',
4769 'BN': 'Brunei Darussalam',
4770 'BG': 'Bulgaria',
4771 'BF': 'Burkina Faso',
4772 'BI': 'Burundi',
4773 'KH': 'Cambodia',
4774 'CM': 'Cameroon',
4775 'CA': 'Canada',
4776 'CV': 'Cape Verde',
4777 'KY': 'Cayman Islands',
4778 'CF': 'Central African Republic',
4779 'TD': 'Chad',
4780 'CL': 'Chile',
4781 'CN': 'China',
4782 'CX': 'Christmas Island',
4783 'CC': 'Cocos (Keeling) Islands',
4784 'CO': 'Colombia',
4785 'KM': 'Comoros',
4786 'CG': 'Congo',
4787 'CD': 'Congo, the Democratic Republic of the',
4788 'CK': 'Cook Islands',
4789 'CR': 'Costa Rica',
4790 'CI': 'Côte d\'Ivoire',
4791 'HR': 'Croatia',
4792 'CU': 'Cuba',
4793 'CW': 'Curaçao',
4794 'CY': 'Cyprus',
4795 'CZ': 'Czech Republic',
4796 'DK': 'Denmark',
4797 'DJ': 'Djibouti',
4798 'DM': 'Dominica',
4799 'DO': 'Dominican Republic',
4800 'EC': 'Ecuador',
4801 'EG': 'Egypt',
4802 'SV': 'El Salvador',
4803 'GQ': 'Equatorial Guinea',
4804 'ER': 'Eritrea',
4805 'EE': 'Estonia',
4806 'ET': 'Ethiopia',
4807 'FK': 'Falkland Islands (Malvinas)',
4808 'FO': 'Faroe Islands',
4809 'FJ': 'Fiji',
4810 'FI': 'Finland',
4811 'FR': 'France',
4812 'GF': 'French Guiana',
4813 'PF': 'French Polynesia',
4814 'TF': 'French Southern Territories',
4815 'GA': 'Gabon',
4816 'GM': 'Gambia',
4817 'GE': 'Georgia',
4818 'DE': 'Germany',
4819 'GH': 'Ghana',
4820 'GI': 'Gibraltar',
4821 'GR': 'Greece',
4822 'GL': 'Greenland',
4823 'GD': 'Grenada',
4824 'GP': 'Guadeloupe',
4825 'GU': 'Guam',
4826 'GT': 'Guatemala',
4827 'GG': 'Guernsey',
4828 'GN': 'Guinea',
4829 'GW': 'Guinea-Bissau',
4830 'GY': 'Guyana',
4831 'HT': 'Haiti',
4832 'HM': 'Heard Island and McDonald Islands',
4833 'VA': 'Holy See (Vatican City State)',
4834 'HN': 'Honduras',
4835 'HK': 'Hong Kong',
4836 'HU': 'Hungary',
4837 'IS': 'Iceland',
4838 'IN': 'India',
4839 'ID': 'Indonesia',
4840 'IR': 'Iran, Islamic Republic of',
4841 'IQ': 'Iraq',
4842 'IE': 'Ireland',
4843 'IM': 'Isle of Man',
4844 'IL': 'Israel',
4845 'IT': 'Italy',
4846 'JM': 'Jamaica',
4847 'JP': 'Japan',
4848 'JE': 'Jersey',
4849 'JO': 'Jordan',
4850 'KZ': 'Kazakhstan',
4851 'KE': 'Kenya',
4852 'KI': 'Kiribati',
4853 'KP': 'Korea, Democratic People\'s Republic of',
4854 'KR': 'Korea, Republic of',
4855 'KW': 'Kuwait',
4856 'KG': 'Kyrgyzstan',
4857 'LA': 'Lao People\'s Democratic Republic',
4858 'LV': 'Latvia',
4859 'LB': 'Lebanon',
4860 'LS': 'Lesotho',
4861 'LR': 'Liberia',
4862 'LY': 'Libya',
4863 'LI': 'Liechtenstein',
4864 'LT': 'Lithuania',
4865 'LU': 'Luxembourg',
4866 'MO': 'Macao',
4867 'MK': 'Macedonia, the Former Yugoslav Republic of',
4868 'MG': 'Madagascar',
4869 'MW': 'Malawi',
4870 'MY': 'Malaysia',
4871 'MV': 'Maldives',
4872 'ML': 'Mali',
4873 'MT': 'Malta',
4874 'MH': 'Marshall Islands',
4875 'MQ': 'Martinique',
4876 'MR': 'Mauritania',
4877 'MU': 'Mauritius',
4878 'YT': 'Mayotte',
4879 'MX': 'Mexico',
4880 'FM': 'Micronesia, Federated States of',
4881 'MD': 'Moldova, Republic of',
4882 'MC': 'Monaco',
4883 'MN': 'Mongolia',
4884 'ME': 'Montenegro',
4885 'MS': 'Montserrat',
4886 'MA': 'Morocco',
4887 'MZ': 'Mozambique',
4888 'MM': 'Myanmar',
4889 'NA': 'Namibia',
4890 'NR': 'Nauru',
4891 'NP': 'Nepal',
4892 'NL': 'Netherlands',
4893 'NC': 'New Caledonia',
4894 'NZ': 'New Zealand',
4895 'NI': 'Nicaragua',
4896 'NE': 'Niger',
4897 'NG': 'Nigeria',
4898 'NU': 'Niue',
4899 'NF': 'Norfolk Island',
4900 'MP': 'Northern Mariana Islands',
4901 'NO': 'Norway',
4902 'OM': 'Oman',
4903 'PK': 'Pakistan',
4904 'PW': 'Palau',
4905 'PS': 'Palestine, State of',
4906 'PA': 'Panama',
4907 'PG': 'Papua New Guinea',
4908 'PY': 'Paraguay',
4909 'PE': 'Peru',
4910 'PH': 'Philippines',
4911 'PN': 'Pitcairn',
4912 'PL': 'Poland',
4913 'PT': 'Portugal',
4914 'PR': 'Puerto Rico',
4915 'QA': 'Qatar',
4916 'RE': 'Réunion',
4917 'RO': 'Romania',
4918 'RU': 'Russian Federation',
4919 'RW': 'Rwanda',
4920 'BL': 'Saint Barthélemy',
4921 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
4922 'KN': 'Saint Kitts and Nevis',
4923 'LC': 'Saint Lucia',
4924 'MF': 'Saint Martin (French part)',
4925 'PM': 'Saint Pierre and Miquelon',
4926 'VC': 'Saint Vincent and the Grenadines',
4927 'WS': 'Samoa',
4928 'SM': 'San Marino',
4929 'ST': 'Sao Tome and Principe',
4930 'SA': 'Saudi Arabia',
4931 'SN': 'Senegal',
4932 'RS': 'Serbia',
4933 'SC': 'Seychelles',
4934 'SL': 'Sierra Leone',
4935 'SG': 'Singapore',
4936 'SX': 'Sint Maarten (Dutch part)',
4937 'SK': 'Slovakia',
4938 'SI': 'Slovenia',
4939 'SB': 'Solomon Islands',
4940 'SO': 'Somalia',
4941 'ZA': 'South Africa',
4942 'GS': 'South Georgia and the South Sandwich Islands',
4943 'SS': 'South Sudan',
4944 'ES': 'Spain',
4945 'LK': 'Sri Lanka',
4946 'SD': 'Sudan',
4947 'SR': 'Suriname',
4948 'SJ': 'Svalbard and Jan Mayen',
4949 'SZ': 'Swaziland',
4950 'SE': 'Sweden',
4951 'CH': 'Switzerland',
4952 'SY': 'Syrian Arab Republic',
4953 'TW': 'Taiwan, Province of China',
4954 'TJ': 'Tajikistan',
4955 'TZ': 'Tanzania, United Republic of',
4956 'TH': 'Thailand',
4957 'TL': 'Timor-Leste',
4958 'TG': 'Togo',
4959 'TK': 'Tokelau',
4960 'TO': 'Tonga',
4961 'TT': 'Trinidad and Tobago',
4962 'TN': 'Tunisia',
4963 'TR': 'Turkey',
4964 'TM': 'Turkmenistan',
4965 'TC': 'Turks and Caicos Islands',
4966 'TV': 'Tuvalu',
4967 'UG': 'Uganda',
4968 'UA': 'Ukraine',
4969 'AE': 'United Arab Emirates',
4970 'GB': 'United Kingdom',
4971 'US': 'United States',
4972 'UM': 'United States Minor Outlying Islands',
4973 'UY': 'Uruguay',
4974 'UZ': 'Uzbekistan',
4975 'VU': 'Vanuatu',
4976 'VE': 'Venezuela, Bolivarian Republic of',
4977 'VN': 'Viet Nam',
4978 'VG': 'Virgin Islands, British',
4979 'VI': 'Virgin Islands, U.S.',
4980 'WF': 'Wallis and Futuna',
4981 'EH': 'Western Sahara',
4982 'YE': 'Yemen',
4983 'ZM': 'Zambia',
4984 'ZW': 'Zimbabwe',
4985 }
4986
4987 @classmethod
4988 def short2full(cls, code):
4989 """Convert an ISO 3166-2 country code to the corresponding full name"""
4990 return cls._country_map.get(code.upper())
4991
4992
773f291d
S
4993class GeoUtils(object):
4994 # Major IPv4 address blocks per country
4995 _country_ip_map = {
53896ca5 4996 'AD': '46.172.224.0/19',
773f291d
S
4997 'AE': '94.200.0.0/13',
4998 'AF': '149.54.0.0/17',
4999 'AG': '209.59.64.0/18',
5000 'AI': '204.14.248.0/21',
5001 'AL': '46.99.0.0/16',
5002 'AM': '46.70.0.0/15',
5003 'AO': '105.168.0.0/13',
53896ca5
S
5004 'AP': '182.50.184.0/21',
5005 'AQ': '23.154.160.0/24',
773f291d
S
5006 'AR': '181.0.0.0/12',
5007 'AS': '202.70.112.0/20',
53896ca5 5008 'AT': '77.116.0.0/14',
773f291d
S
5009 'AU': '1.128.0.0/11',
5010 'AW': '181.41.0.0/18',
53896ca5
S
5011 'AX': '185.217.4.0/22',
5012 'AZ': '5.197.0.0/16',
773f291d
S
5013 'BA': '31.176.128.0/17',
5014 'BB': '65.48.128.0/17',
5015 'BD': '114.130.0.0/16',
5016 'BE': '57.0.0.0/8',
53896ca5 5017 'BF': '102.178.0.0/15',
773f291d
S
5018 'BG': '95.42.0.0/15',
5019 'BH': '37.131.0.0/17',
5020 'BI': '154.117.192.0/18',
5021 'BJ': '137.255.0.0/16',
53896ca5 5022 'BL': '185.212.72.0/23',
773f291d
S
5023 'BM': '196.12.64.0/18',
5024 'BN': '156.31.0.0/16',
5025 'BO': '161.56.0.0/16',
5026 'BQ': '161.0.80.0/20',
53896ca5 5027 'BR': '191.128.0.0/12',
773f291d
S
5028 'BS': '24.51.64.0/18',
5029 'BT': '119.2.96.0/19',
5030 'BW': '168.167.0.0/16',
5031 'BY': '178.120.0.0/13',
5032 'BZ': '179.42.192.0/18',
5033 'CA': '99.224.0.0/11',
5034 'CD': '41.243.0.0/16',
53896ca5
S
5035 'CF': '197.242.176.0/21',
5036 'CG': '160.113.0.0/16',
773f291d 5037 'CH': '85.0.0.0/13',
53896ca5 5038 'CI': '102.136.0.0/14',
773f291d
S
5039 'CK': '202.65.32.0/19',
5040 'CL': '152.172.0.0/14',
53896ca5 5041 'CM': '102.244.0.0/14',
773f291d
S
5042 'CN': '36.128.0.0/10',
5043 'CO': '181.240.0.0/12',
5044 'CR': '201.192.0.0/12',
5045 'CU': '152.206.0.0/15',
5046 'CV': '165.90.96.0/19',
5047 'CW': '190.88.128.0/17',
53896ca5 5048 'CY': '31.153.0.0/16',
773f291d
S
5049 'CZ': '88.100.0.0/14',
5050 'DE': '53.0.0.0/8',
5051 'DJ': '197.241.0.0/17',
5052 'DK': '87.48.0.0/12',
5053 'DM': '192.243.48.0/20',
5054 'DO': '152.166.0.0/15',
5055 'DZ': '41.96.0.0/12',
5056 'EC': '186.68.0.0/15',
5057 'EE': '90.190.0.0/15',
5058 'EG': '156.160.0.0/11',
5059 'ER': '196.200.96.0/20',
5060 'ES': '88.0.0.0/11',
5061 'ET': '196.188.0.0/14',
5062 'EU': '2.16.0.0/13',
5063 'FI': '91.152.0.0/13',
5064 'FJ': '144.120.0.0/16',
53896ca5 5065 'FK': '80.73.208.0/21',
773f291d
S
5066 'FM': '119.252.112.0/20',
5067 'FO': '88.85.32.0/19',
5068 'FR': '90.0.0.0/9',
5069 'GA': '41.158.0.0/15',
5070 'GB': '25.0.0.0/8',
5071 'GD': '74.122.88.0/21',
5072 'GE': '31.146.0.0/16',
5073 'GF': '161.22.64.0/18',
5074 'GG': '62.68.160.0/19',
53896ca5
S
5075 'GH': '154.160.0.0/12',
5076 'GI': '95.164.0.0/16',
773f291d
S
5077 'GL': '88.83.0.0/19',
5078 'GM': '160.182.0.0/15',
5079 'GN': '197.149.192.0/18',
5080 'GP': '104.250.0.0/19',
5081 'GQ': '105.235.224.0/20',
5082 'GR': '94.64.0.0/13',
5083 'GT': '168.234.0.0/16',
5084 'GU': '168.123.0.0/16',
5085 'GW': '197.214.80.0/20',
5086 'GY': '181.41.64.0/18',
5087 'HK': '113.252.0.0/14',
5088 'HN': '181.210.0.0/16',
5089 'HR': '93.136.0.0/13',
5090 'HT': '148.102.128.0/17',
5091 'HU': '84.0.0.0/14',
5092 'ID': '39.192.0.0/10',
5093 'IE': '87.32.0.0/12',
5094 'IL': '79.176.0.0/13',
5095 'IM': '5.62.80.0/20',
5096 'IN': '117.192.0.0/10',
5097 'IO': '203.83.48.0/21',
5098 'IQ': '37.236.0.0/14',
5099 'IR': '2.176.0.0/12',
5100 'IS': '82.221.0.0/16',
5101 'IT': '79.0.0.0/10',
5102 'JE': '87.244.64.0/18',
5103 'JM': '72.27.0.0/17',
5104 'JO': '176.29.0.0/16',
53896ca5 5105 'JP': '133.0.0.0/8',
773f291d
S
5106 'KE': '105.48.0.0/12',
5107 'KG': '158.181.128.0/17',
5108 'KH': '36.37.128.0/17',
5109 'KI': '103.25.140.0/22',
5110 'KM': '197.255.224.0/20',
53896ca5 5111 'KN': '198.167.192.0/19',
773f291d
S
5112 'KP': '175.45.176.0/22',
5113 'KR': '175.192.0.0/10',
5114 'KW': '37.36.0.0/14',
5115 'KY': '64.96.0.0/15',
5116 'KZ': '2.72.0.0/13',
5117 'LA': '115.84.64.0/18',
5118 'LB': '178.135.0.0/16',
53896ca5 5119 'LC': '24.92.144.0/20',
773f291d
S
5120 'LI': '82.117.0.0/19',
5121 'LK': '112.134.0.0/15',
53896ca5 5122 'LR': '102.183.0.0/16',
773f291d
S
5123 'LS': '129.232.0.0/17',
5124 'LT': '78.56.0.0/13',
5125 'LU': '188.42.0.0/16',
5126 'LV': '46.109.0.0/16',
5127 'LY': '41.252.0.0/14',
5128 'MA': '105.128.0.0/11',
5129 'MC': '88.209.64.0/18',
5130 'MD': '37.246.0.0/16',
5131 'ME': '178.175.0.0/17',
5132 'MF': '74.112.232.0/21',
5133 'MG': '154.126.0.0/17',
5134 'MH': '117.103.88.0/21',
5135 'MK': '77.28.0.0/15',
5136 'ML': '154.118.128.0/18',
5137 'MM': '37.111.0.0/17',
5138 'MN': '49.0.128.0/17',
5139 'MO': '60.246.0.0/16',
5140 'MP': '202.88.64.0/20',
5141 'MQ': '109.203.224.0/19',
5142 'MR': '41.188.64.0/18',
5143 'MS': '208.90.112.0/22',
5144 'MT': '46.11.0.0/16',
5145 'MU': '105.16.0.0/12',
5146 'MV': '27.114.128.0/18',
53896ca5 5147 'MW': '102.70.0.0/15',
773f291d
S
5148 'MX': '187.192.0.0/11',
5149 'MY': '175.136.0.0/13',
5150 'MZ': '197.218.0.0/15',
5151 'NA': '41.182.0.0/16',
5152 'NC': '101.101.0.0/18',
5153 'NE': '197.214.0.0/18',
5154 'NF': '203.17.240.0/22',
5155 'NG': '105.112.0.0/12',
5156 'NI': '186.76.0.0/15',
5157 'NL': '145.96.0.0/11',
5158 'NO': '84.208.0.0/13',
5159 'NP': '36.252.0.0/15',
5160 'NR': '203.98.224.0/19',
5161 'NU': '49.156.48.0/22',
5162 'NZ': '49.224.0.0/14',
5163 'OM': '5.36.0.0/15',
5164 'PA': '186.72.0.0/15',
5165 'PE': '186.160.0.0/14',
5166 'PF': '123.50.64.0/18',
5167 'PG': '124.240.192.0/19',
5168 'PH': '49.144.0.0/13',
5169 'PK': '39.32.0.0/11',
5170 'PL': '83.0.0.0/11',
5171 'PM': '70.36.0.0/20',
5172 'PR': '66.50.0.0/16',
5173 'PS': '188.161.0.0/16',
5174 'PT': '85.240.0.0/13',
5175 'PW': '202.124.224.0/20',
5176 'PY': '181.120.0.0/14',
5177 'QA': '37.210.0.0/15',
53896ca5 5178 'RE': '102.35.0.0/16',
773f291d 5179 'RO': '79.112.0.0/13',
53896ca5 5180 'RS': '93.86.0.0/15',
773f291d 5181 'RU': '5.136.0.0/13',
53896ca5 5182 'RW': '41.186.0.0/16',
773f291d
S
5183 'SA': '188.48.0.0/13',
5184 'SB': '202.1.160.0/19',
5185 'SC': '154.192.0.0/11',
53896ca5 5186 'SD': '102.120.0.0/13',
773f291d 5187 'SE': '78.64.0.0/12',
53896ca5 5188 'SG': '8.128.0.0/10',
773f291d
S
5189 'SI': '188.196.0.0/14',
5190 'SK': '78.98.0.0/15',
53896ca5 5191 'SL': '102.143.0.0/17',
773f291d
S
5192 'SM': '89.186.32.0/19',
5193 'SN': '41.82.0.0/15',
53896ca5 5194 'SO': '154.115.192.0/18',
773f291d
S
5195 'SR': '186.179.128.0/17',
5196 'SS': '105.235.208.0/21',
5197 'ST': '197.159.160.0/19',
5198 'SV': '168.243.0.0/16',
5199 'SX': '190.102.0.0/20',
5200 'SY': '5.0.0.0/16',
5201 'SZ': '41.84.224.0/19',
5202 'TC': '65.255.48.0/20',
5203 'TD': '154.68.128.0/19',
5204 'TG': '196.168.0.0/14',
5205 'TH': '171.96.0.0/13',
5206 'TJ': '85.9.128.0/18',
5207 'TK': '27.96.24.0/21',
5208 'TL': '180.189.160.0/20',
5209 'TM': '95.85.96.0/19',
5210 'TN': '197.0.0.0/11',
5211 'TO': '175.176.144.0/21',
5212 'TR': '78.160.0.0/11',
5213 'TT': '186.44.0.0/15',
5214 'TV': '202.2.96.0/19',
5215 'TW': '120.96.0.0/11',
5216 'TZ': '156.156.0.0/14',
53896ca5
S
5217 'UA': '37.52.0.0/14',
5218 'UG': '102.80.0.0/13',
5219 'US': '6.0.0.0/8',
773f291d 5220 'UY': '167.56.0.0/13',
53896ca5 5221 'UZ': '84.54.64.0/18',
773f291d 5222 'VA': '212.77.0.0/19',
53896ca5 5223 'VC': '207.191.240.0/21',
773f291d 5224 'VE': '186.88.0.0/13',
53896ca5 5225 'VG': '66.81.192.0/20',
773f291d
S
5226 'VI': '146.226.0.0/16',
5227 'VN': '14.160.0.0/11',
5228 'VU': '202.80.32.0/20',
5229 'WF': '117.20.32.0/21',
5230 'WS': '202.4.32.0/19',
5231 'YE': '134.35.0.0/16',
5232 'YT': '41.242.116.0/22',
5233 'ZA': '41.0.0.0/11',
53896ca5
S
5234 'ZM': '102.144.0.0/13',
5235 'ZW': '102.177.192.0/18',
773f291d
S
5236 }
5237
5238 @classmethod
5f95927a
S
5239 def random_ipv4(cls, code_or_block):
5240 if len(code_or_block) == 2:
5241 block = cls._country_ip_map.get(code_or_block.upper())
5242 if not block:
5243 return None
5244 else:
5245 block = code_or_block
773f291d
S
5246 addr, preflen = block.split('/')
5247 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5248 addr_max = addr_min | (0xffffffff >> int(preflen))
18a0defa 5249 return compat_str(socket.inet_ntoa(
4248dad9 5250 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
773f291d
S
5251
5252
91410c9b 5253class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
5254 def __init__(self, proxies=None):
5255 # Set default handlers
5256 for type in ('http', 'https'):
5257 setattr(self, '%s_open' % type,
5258 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5259 meth(r, proxy, type))
38e87f6c 5260 compat_urllib_request.ProxyHandler.__init__(self, proxies)
2461f79d 5261
91410c9b 5262 def proxy_open(self, req, proxy, type):
2461f79d 5263 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
5264 if req_proxy is not None:
5265 proxy = req_proxy
2461f79d
PH
5266 del req.headers['Ytdl-request-proxy']
5267
5268 if proxy == '__noproxy__':
5269 return None # No Proxy
51fb4995 5270 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
71aff188
YCH
5271 req.add_header('Ytdl-socks-proxy', proxy)
5272 # youtube-dl's http/https handlers do wrapping the socket with socks
5273 return None
91410c9b
PH
5274 return compat_urllib_request.ProxyHandler.proxy_open(
5275 self, req, proxy, type)
5bc880b9
YCH
5276
5277
0a5445dd
YCH
5278# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5279# released into Public Domain
5280# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5281
5282def long_to_bytes(n, blocksize=0):
5283 """long_to_bytes(n:long, blocksize:int) : string
5284 Convert a long integer to a byte string.
5285
5286 If optional blocksize is given and greater than zero, pad the front of the
5287 byte string with binary zeros so that the length is a multiple of
5288 blocksize.
5289 """
5290 # after much testing, this algorithm was deemed to be the fastest
5291 s = b''
5292 n = int(n)
5293 while n > 0:
5294 s = compat_struct_pack('>I', n & 0xffffffff) + s
5295 n = n >> 32
5296 # strip off leading zeros
5297 for i in range(len(s)):
5298 if s[i] != b'\000'[0]:
5299 break
5300 else:
5301 # only happens when n == 0
5302 s = b'\000'
5303 i = 0
5304 s = s[i:]
5305 # add back some pad bytes. this could be done more efficiently w.r.t. the
5306 # de-padding being done above, but sigh...
5307 if blocksize > 0 and len(s) % blocksize:
5308 s = (blocksize - len(s) % blocksize) * b'\000' + s
5309 return s
5310
5311
5312def bytes_to_long(s):
5313 """bytes_to_long(string) : long
5314 Convert a byte string to a long integer.
5315
5316 This is (essentially) the inverse of long_to_bytes().
5317 """
5318 acc = 0
5319 length = len(s)
5320 if length % 4:
5321 extra = (4 - length % 4)
5322 s = b'\000' * extra + s
5323 length = length + extra
5324 for i in range(0, length, 4):
5325 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5326 return acc
5327
5328
5bc880b9
YCH
5329def ohdave_rsa_encrypt(data, exponent, modulus):
5330 '''
5331 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5332
5333 Input:
5334 data: data to encrypt, bytes-like object
5335 exponent, modulus: parameter e and N of RSA algorithm, both integer
5336 Output: hex string of encrypted data
5337
5338 Limitation: supports one block encryption only
5339 '''
5340
5341 payload = int(binascii.hexlify(data[::-1]), 16)
5342 encrypted = pow(payload, exponent, modulus)
5343 return '%x' % encrypted
81bdc8fd
YCH
5344
5345
f48409c7
YCH
5346def pkcs1pad(data, length):
5347 """
5348 Padding input data with PKCS#1 scheme
5349
5350 @param {int[]} data input data
5351 @param {int} length target length
5352 @returns {int[]} padded data
5353 """
5354 if len(data) > length - 11:
5355 raise ValueError('Input data too long for PKCS#1 padding')
5356
5357 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5358 return [0, 2] + pseudo_random + [0] + data
5359
5360
5eb6bdce 5361def encode_base_n(num, n, table=None):
59f898b7 5362 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
59f898b7
YCH
5363 if not table:
5364 table = FULL_TABLE[:n]
5365
5eb6bdce
YCH
5366 if n > len(table):
5367 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5368
5369 if num == 0:
5370 return table[0]
5371
81bdc8fd
YCH
5372 ret = ''
5373 while num:
5374 ret = table[num % n] + ret
5375 num = num // n
5376 return ret
f52354a8
YCH
5377
5378
5379def decode_packed_codes(code):
06b3fe29 5380 mobj = re.search(PACKED_CODES_RE, code)
f52354a8
YCH
5381 obfucasted_code, base, count, symbols = mobj.groups()
5382 base = int(base)
5383 count = int(count)
5384 symbols = symbols.split('|')
5385 symbol_table = {}
5386
5387 while count:
5388 count -= 1
5eb6bdce 5389 base_n_count = encode_base_n(count, base)
f52354a8
YCH
5390 symbol_table[base_n_count] = symbols[count] or base_n_count
5391
5392 return re.sub(
5393 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5394 obfucasted_code)
e154c651 5395
5396
1ced2221
S
5397def caesar(s, alphabet, shift):
5398 if shift == 0:
5399 return s
5400 l = len(alphabet)
5401 return ''.join(
5402 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5403 for c in s)
5404
5405
5406def rot47(s):
5407 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5408
5409
e154c651 5410def parse_m3u8_attributes(attrib):
5411 info = {}
5412 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5413 if val.startswith('"'):
5414 val = val[1:-1]
5415 info[key] = val
5416 return info
1143535d
YCH
5417
5418
5419def urshift(val, n):
5420 return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038
YCH
5421
5422
5423# Based on png2str() written by @gdkchan and improved by @yokrysty
067aa17e 5424# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
d3f8e038
YCH
5425def decode_png(png_data):
5426 # Reference: https://www.w3.org/TR/PNG/
5427 header = png_data[8:]
5428
5429 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5430 raise IOError('Not a valid PNG file.')
5431
5432 int_map = {1: '>B', 2: '>H', 4: '>I'}
5433 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5434
5435 chunks = []
5436
5437 while header:
5438 length = unpack_integer(header[:4])
5439 header = header[4:]
5440
5441 chunk_type = header[:4]
5442 header = header[4:]
5443
5444 chunk_data = header[:length]
5445 header = header[length:]
5446
5447 header = header[4:] # Skip CRC
5448
5449 chunks.append({
5450 'type': chunk_type,
5451 'length': length,
5452 'data': chunk_data
5453 })
5454
5455 ihdr = chunks[0]['data']
5456
5457 width = unpack_integer(ihdr[:4])
5458 height = unpack_integer(ihdr[4:8])
5459
5460 idat = b''
5461
5462 for chunk in chunks:
5463 if chunk['type'] == b'IDAT':
5464 idat += chunk['data']
5465
5466 if not idat:
5467 raise IOError('Unable to read PNG data.')
5468
5469 decompressed_data = bytearray(zlib.decompress(idat))
5470
5471 stride = width * 3
5472 pixels = []
5473
5474 def _get_pixel(idx):
5475 x = idx % stride
5476 y = idx // stride
5477 return pixels[y][x]
5478
5479 for y in range(height):
5480 basePos = y * (1 + stride)
5481 filter_type = decompressed_data[basePos]
5482
5483 current_row = []
5484
5485 pixels.append(current_row)
5486
5487 for x in range(stride):
5488 color = decompressed_data[1 + basePos + x]
5489 basex = y * stride + x
5490 left = 0
5491 up = 0
5492
5493 if x > 2:
5494 left = _get_pixel(basex - 3)
5495 if y > 0:
5496 up = _get_pixel(basex - stride)
5497
5498 if filter_type == 1: # Sub
5499 color = (color + left) & 0xff
5500 elif filter_type == 2: # Up
5501 color = (color + up) & 0xff
5502 elif filter_type == 3: # Average
5503 color = (color + ((left + up) >> 1)) & 0xff
5504 elif filter_type == 4: # Paeth
5505 a = left
5506 b = up
5507 c = 0
5508
5509 if x > 2 and y > 0:
5510 c = _get_pixel(basex - stride - 3)
5511
5512 p = a + b - c
5513
5514 pa = abs(p - a)
5515 pb = abs(p - b)
5516 pc = abs(p - c)
5517
5518 if pa <= pb and pa <= pc:
5519 color = (color + a) & 0xff
5520 elif pb <= pc:
5521 color = (color + b) & 0xff
5522 else:
5523 color = (color + c) & 0xff
5524
5525 current_row.append(color)
5526
5527 return width, height, pixels
efa97bdc
YCH
5528
5529
5530def write_xattr(path, key, value):
5531 # This mess below finds the best xattr tool for the job
5532 try:
5533 # try the pyxattr module...
5534 import xattr
5535
53a7e3d2
YCH
5536 if hasattr(xattr, 'set'): # pyxattr
5537 # Unicode arguments are not supported in python-pyxattr until
5538 # version 0.5.0
067aa17e 5539 # See https://github.com/ytdl-org/youtube-dl/issues/5498
53a7e3d2
YCH
5540 pyxattr_required_version = '0.5.0'
5541 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5542 # TODO: fallback to CLI tools
5543 raise XAttrUnavailableError(
5544 'python-pyxattr is detected but is too old. '
5545 'youtube-dl requires %s or above while your version is %s. '
5546 'Falling back to other xattr implementations' % (
5547 pyxattr_required_version, xattr.__version__))
5548
5549 setxattr = xattr.set
5550 else: # xattr
5551 setxattr = xattr.setxattr
efa97bdc
YCH
5552
5553 try:
53a7e3d2 5554 setxattr(path, key, value)
efa97bdc
YCH
5555 except EnvironmentError as e:
5556 raise XAttrMetadataError(e.errno, e.strerror)
5557
5558 except ImportError:
5559 if compat_os_name == 'nt':
5560 # Write xattrs to NTFS Alternate Data Streams:
5561 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5562 assert ':' not in key
5563 assert os.path.exists(path)
5564
5565 ads_fn = path + ':' + key
5566 try:
5567 with open(ads_fn, 'wb') as f:
5568 f.write(value)
5569 except EnvironmentError as e:
5570 raise XAttrMetadataError(e.errno, e.strerror)
5571 else:
5572 user_has_setfattr = check_executable('setfattr', ['--version'])
5573 user_has_xattr = check_executable('xattr', ['-h'])
5574
5575 if user_has_setfattr or user_has_xattr:
5576
5577 value = value.decode('utf-8')
5578 if user_has_setfattr:
5579 executable = 'setfattr'
5580 opts = ['-n', key, '-v', value]
5581 elif user_has_xattr:
5582 executable = 'xattr'
5583 opts = ['-w', key, value]
5584
3089bc74
S
5585 cmd = ([encodeFilename(executable, True)]
5586 + [encodeArgument(o) for o in opts]
5587 + [encodeFilename(path, True)])
efa97bdc
YCH
5588
5589 try:
5590 p = subprocess.Popen(
5591 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5592 except EnvironmentError as e:
5593 raise XAttrMetadataError(e.errno, e.strerror)
5594 stdout, stderr = p.communicate()
5595 stderr = stderr.decode('utf-8', 'replace')
5596 if p.returncode != 0:
5597 raise XAttrMetadataError(p.returncode, stderr)
5598
5599 else:
5600 # On Unix, and can't find pyxattr, setfattr, or xattr.
5601 if sys.platform.startswith('linux'):
5602 raise XAttrUnavailableError(
5603 "Couldn't find a tool to set the xattrs. "
5604 "Install either the python 'pyxattr' or 'xattr' "
5605 "modules, or the GNU 'attr' package "
5606 "(which contains the 'setfattr' tool).")
5607 else:
5608 raise XAttrUnavailableError(
5609 "Couldn't find a tool to set the xattrs. "
5610 "Install either the python 'xattr' module, "
5611 "or the 'xattr' binary.")
0c265486
YCH
5612
5613
5614def random_birthday(year_field, month_field, day_field):
aa374bc7
AS
5615 start_date = datetime.date(1950, 1, 1)
5616 end_date = datetime.date(1995, 12, 31)
5617 offset = random.randint(0, (end_date - start_date).days)
5618 random_date = start_date + datetime.timedelta(offset)
0c265486 5619 return {
aa374bc7
AS
5620 year_field: str(random_date.year),
5621 month_field: str(random_date.month),
5622 day_field: str(random_date.day),
0c265486 5623 }