]> jfr.im git - yt-dlp.git/blame - youtube_dl/utils.py
pull changes from remote master (#190)
[yt-dlp.git] / youtube_dl / utils.py
CommitLineData
d77c3dfd 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
d77c3dfd 3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
b827ee92 10import collections
62e609ab 11import contextlib
e3946f98 12import ctypes
c496ca96
PH
13import datetime
14import email.utils
0c265486 15import email.header
f45c185f 16import errno
be4a824d 17import functools
d77c3dfd 18import gzip
03f9daab 19import io
79a2e94e 20import itertools
f4bfd65f 21import json
d77c3dfd 22import locale
02dbf93f 23import math
347de493 24import operator
d77c3dfd 25import os
c496ca96 26import platform
773f291d 27import random
d77c3dfd 28import re
c496ca96 29import socket
79a2e94e 30import ssl
1c088fa8 31import subprocess
d77c3dfd 32import sys
181c8655 33import tempfile
b827ee92 34import time
01951dda 35import traceback
bcf89ce6 36import xml.etree.ElementTree
d77c3dfd 37import zlib
d77c3dfd 38
8c25f81b 39from .compat import (
b4a3d461 40 compat_HTMLParseError,
8bb56eee 41 compat_HTMLParser,
8f9312c3 42 compat_basestring,
8c25f81b 43 compat_chr,
1bab3437 44 compat_cookiejar,
d7cd9a9e 45 compat_ctypes_WINFUNCTYPE,
36e6f62c 46 compat_etree_fromstring,
51098426 47 compat_expanduser,
8c25f81b 48 compat_html_entities,
55b2f099 49 compat_html_entities_html5,
be4a824d 50 compat_http_client,
42db58ec 51 compat_integer_types,
c86b6142 52 compat_kwargs,
efa97bdc 53 compat_os_name,
8c25f81b 54 compat_parse_qs,
702ccf2d 55 compat_shlex_quote,
8c25f81b 56 compat_str,
edaa23f8 57 compat_struct_pack,
d3f8e038 58 compat_struct_unpack,
8c25f81b
PH
59 compat_urllib_error,
60 compat_urllib_parse,
15707c7e 61 compat_urllib_parse_urlencode,
8c25f81b 62 compat_urllib_parse_urlparse,
7581bfc9 63 compat_urllib_parse_unquote_plus,
8c25f81b
PH
64 compat_urllib_request,
65 compat_urlparse,
810c10ba 66 compat_xpath,
8c25f81b 67)
4644ac55 68
71aff188
YCH
69from .socks import (
70 ProxyType,
71 sockssocket,
72)
73
4644ac55 74
51fb4995
YCH
75def register_socks_protocols():
76 # "Register" SOCKS protocols
d5ae6bb5
YCH
77 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
78 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
51fb4995
YCH
79 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
80 if scheme not in compat_urlparse.uses_netloc:
81 compat_urlparse.uses_netloc.append(scheme)
82
83
468e2e92
FV
84# This is not clearly defined otherwise
85compiled_regex_type = type(re.compile(''))
86
f7a147e3
S
87
88def random_user_agent():
89 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
90 _CHROME_VERSIONS = (
91 '74.0.3729.129',
92 '76.0.3780.3',
93 '76.0.3780.2',
94 '74.0.3729.128',
95 '76.0.3780.1',
96 '76.0.3780.0',
97 '75.0.3770.15',
98 '74.0.3729.127',
99 '74.0.3729.126',
100 '76.0.3779.1',
101 '76.0.3779.0',
102 '75.0.3770.14',
103 '74.0.3729.125',
104 '76.0.3778.1',
105 '76.0.3778.0',
106 '75.0.3770.13',
107 '74.0.3729.124',
108 '74.0.3729.123',
109 '73.0.3683.121',
110 '76.0.3777.1',
111 '76.0.3777.0',
112 '75.0.3770.12',
113 '74.0.3729.122',
114 '76.0.3776.4',
115 '75.0.3770.11',
116 '74.0.3729.121',
117 '76.0.3776.3',
118 '76.0.3776.2',
119 '73.0.3683.120',
120 '74.0.3729.120',
121 '74.0.3729.119',
122 '74.0.3729.118',
123 '76.0.3776.1',
124 '76.0.3776.0',
125 '76.0.3775.5',
126 '75.0.3770.10',
127 '74.0.3729.117',
128 '76.0.3775.4',
129 '76.0.3775.3',
130 '74.0.3729.116',
131 '75.0.3770.9',
132 '76.0.3775.2',
133 '76.0.3775.1',
134 '76.0.3775.0',
135 '75.0.3770.8',
136 '74.0.3729.115',
137 '74.0.3729.114',
138 '76.0.3774.1',
139 '76.0.3774.0',
140 '75.0.3770.7',
141 '74.0.3729.113',
142 '74.0.3729.112',
143 '74.0.3729.111',
144 '76.0.3773.1',
145 '76.0.3773.0',
146 '75.0.3770.6',
147 '74.0.3729.110',
148 '74.0.3729.109',
149 '76.0.3772.1',
150 '76.0.3772.0',
151 '75.0.3770.5',
152 '74.0.3729.108',
153 '74.0.3729.107',
154 '76.0.3771.1',
155 '76.0.3771.0',
156 '75.0.3770.4',
157 '74.0.3729.106',
158 '74.0.3729.105',
159 '75.0.3770.3',
160 '74.0.3729.104',
161 '74.0.3729.103',
162 '74.0.3729.102',
163 '75.0.3770.2',
164 '74.0.3729.101',
165 '75.0.3770.1',
166 '75.0.3770.0',
167 '74.0.3729.100',
168 '75.0.3769.5',
169 '75.0.3769.4',
170 '74.0.3729.99',
171 '75.0.3769.3',
172 '75.0.3769.2',
173 '75.0.3768.6',
174 '74.0.3729.98',
175 '75.0.3769.1',
176 '75.0.3769.0',
177 '74.0.3729.97',
178 '73.0.3683.119',
179 '73.0.3683.118',
180 '74.0.3729.96',
181 '75.0.3768.5',
182 '75.0.3768.4',
183 '75.0.3768.3',
184 '75.0.3768.2',
185 '74.0.3729.95',
186 '74.0.3729.94',
187 '75.0.3768.1',
188 '75.0.3768.0',
189 '74.0.3729.93',
190 '74.0.3729.92',
191 '73.0.3683.117',
192 '74.0.3729.91',
193 '75.0.3766.3',
194 '74.0.3729.90',
195 '75.0.3767.2',
196 '75.0.3767.1',
197 '75.0.3767.0',
198 '74.0.3729.89',
199 '73.0.3683.116',
200 '75.0.3766.2',
201 '74.0.3729.88',
202 '75.0.3766.1',
203 '75.0.3766.0',
204 '74.0.3729.87',
205 '73.0.3683.115',
206 '74.0.3729.86',
207 '75.0.3765.1',
208 '75.0.3765.0',
209 '74.0.3729.85',
210 '73.0.3683.114',
211 '74.0.3729.84',
212 '75.0.3764.1',
213 '75.0.3764.0',
214 '74.0.3729.83',
215 '73.0.3683.113',
216 '75.0.3763.2',
217 '75.0.3761.4',
218 '74.0.3729.82',
219 '75.0.3763.1',
220 '75.0.3763.0',
221 '74.0.3729.81',
222 '73.0.3683.112',
223 '75.0.3762.1',
224 '75.0.3762.0',
225 '74.0.3729.80',
226 '75.0.3761.3',
227 '74.0.3729.79',
228 '73.0.3683.111',
229 '75.0.3761.2',
230 '74.0.3729.78',
231 '74.0.3729.77',
232 '75.0.3761.1',
233 '75.0.3761.0',
234 '73.0.3683.110',
235 '74.0.3729.76',
236 '74.0.3729.75',
237 '75.0.3760.0',
238 '74.0.3729.74',
239 '75.0.3759.8',
240 '75.0.3759.7',
241 '75.0.3759.6',
242 '74.0.3729.73',
243 '75.0.3759.5',
244 '74.0.3729.72',
245 '73.0.3683.109',
246 '75.0.3759.4',
247 '75.0.3759.3',
248 '74.0.3729.71',
249 '75.0.3759.2',
250 '74.0.3729.70',
251 '73.0.3683.108',
252 '74.0.3729.69',
253 '75.0.3759.1',
254 '75.0.3759.0',
255 '74.0.3729.68',
256 '73.0.3683.107',
257 '74.0.3729.67',
258 '75.0.3758.1',
259 '75.0.3758.0',
260 '74.0.3729.66',
261 '73.0.3683.106',
262 '74.0.3729.65',
263 '75.0.3757.1',
264 '75.0.3757.0',
265 '74.0.3729.64',
266 '73.0.3683.105',
267 '74.0.3729.63',
268 '75.0.3756.1',
269 '75.0.3756.0',
270 '74.0.3729.62',
271 '73.0.3683.104',
272 '75.0.3755.3',
273 '75.0.3755.2',
274 '73.0.3683.103',
275 '75.0.3755.1',
276 '75.0.3755.0',
277 '74.0.3729.61',
278 '73.0.3683.102',
279 '74.0.3729.60',
280 '75.0.3754.2',
281 '74.0.3729.59',
282 '75.0.3753.4',
283 '74.0.3729.58',
284 '75.0.3754.1',
285 '75.0.3754.0',
286 '74.0.3729.57',
287 '73.0.3683.101',
288 '75.0.3753.3',
289 '75.0.3752.2',
290 '75.0.3753.2',
291 '74.0.3729.56',
292 '75.0.3753.1',
293 '75.0.3753.0',
294 '74.0.3729.55',
295 '73.0.3683.100',
296 '74.0.3729.54',
297 '75.0.3752.1',
298 '75.0.3752.0',
299 '74.0.3729.53',
300 '73.0.3683.99',
301 '74.0.3729.52',
302 '75.0.3751.1',
303 '75.0.3751.0',
304 '74.0.3729.51',
305 '73.0.3683.98',
306 '74.0.3729.50',
307 '75.0.3750.0',
308 '74.0.3729.49',
309 '74.0.3729.48',
310 '74.0.3729.47',
311 '75.0.3749.3',
312 '74.0.3729.46',
313 '73.0.3683.97',
314 '75.0.3749.2',
315 '74.0.3729.45',
316 '75.0.3749.1',
317 '75.0.3749.0',
318 '74.0.3729.44',
319 '73.0.3683.96',
320 '74.0.3729.43',
321 '74.0.3729.42',
322 '75.0.3748.1',
323 '75.0.3748.0',
324 '74.0.3729.41',
325 '75.0.3747.1',
326 '73.0.3683.95',
327 '75.0.3746.4',
328 '74.0.3729.40',
329 '74.0.3729.39',
330 '75.0.3747.0',
331 '75.0.3746.3',
332 '75.0.3746.2',
333 '74.0.3729.38',
334 '75.0.3746.1',
335 '75.0.3746.0',
336 '74.0.3729.37',
337 '73.0.3683.94',
338 '75.0.3745.5',
339 '75.0.3745.4',
340 '75.0.3745.3',
341 '75.0.3745.2',
342 '74.0.3729.36',
343 '75.0.3745.1',
344 '75.0.3745.0',
345 '75.0.3744.2',
346 '74.0.3729.35',
347 '73.0.3683.93',
348 '74.0.3729.34',
349 '75.0.3744.1',
350 '75.0.3744.0',
351 '74.0.3729.33',
352 '73.0.3683.92',
353 '74.0.3729.32',
354 '74.0.3729.31',
355 '73.0.3683.91',
356 '75.0.3741.2',
357 '75.0.3740.5',
358 '74.0.3729.30',
359 '75.0.3741.1',
360 '75.0.3741.0',
361 '74.0.3729.29',
362 '75.0.3740.4',
363 '73.0.3683.90',
364 '74.0.3729.28',
365 '75.0.3740.3',
366 '73.0.3683.89',
367 '75.0.3740.2',
368 '74.0.3729.27',
369 '75.0.3740.1',
370 '75.0.3740.0',
371 '74.0.3729.26',
372 '73.0.3683.88',
373 '73.0.3683.87',
374 '74.0.3729.25',
375 '75.0.3739.1',
376 '75.0.3739.0',
377 '73.0.3683.86',
378 '74.0.3729.24',
379 '73.0.3683.85',
380 '75.0.3738.4',
381 '75.0.3738.3',
382 '75.0.3738.2',
383 '75.0.3738.1',
384 '75.0.3738.0',
385 '74.0.3729.23',
386 '73.0.3683.84',
387 '74.0.3729.22',
388 '74.0.3729.21',
389 '75.0.3737.1',
390 '75.0.3737.0',
391 '74.0.3729.20',
392 '73.0.3683.83',
393 '74.0.3729.19',
394 '75.0.3736.1',
395 '75.0.3736.0',
396 '74.0.3729.18',
397 '73.0.3683.82',
398 '74.0.3729.17',
399 '75.0.3735.1',
400 '75.0.3735.0',
401 '74.0.3729.16',
402 '73.0.3683.81',
403 '75.0.3734.1',
404 '75.0.3734.0',
405 '74.0.3729.15',
406 '73.0.3683.80',
407 '74.0.3729.14',
408 '75.0.3733.1',
409 '75.0.3733.0',
410 '75.0.3732.1',
411 '74.0.3729.13',
412 '74.0.3729.12',
413 '73.0.3683.79',
414 '74.0.3729.11',
415 '75.0.3732.0',
416 '74.0.3729.10',
417 '73.0.3683.78',
418 '74.0.3729.9',
419 '74.0.3729.8',
420 '74.0.3729.7',
421 '75.0.3731.3',
422 '75.0.3731.2',
423 '75.0.3731.0',
424 '74.0.3729.6',
425 '73.0.3683.77',
426 '73.0.3683.76',
427 '75.0.3730.5',
428 '75.0.3730.4',
429 '73.0.3683.75',
430 '74.0.3729.5',
431 '73.0.3683.74',
432 '75.0.3730.3',
433 '75.0.3730.2',
434 '74.0.3729.4',
435 '73.0.3683.73',
436 '73.0.3683.72',
437 '75.0.3730.1',
438 '75.0.3730.0',
439 '74.0.3729.3',
440 '73.0.3683.71',
441 '74.0.3729.2',
442 '73.0.3683.70',
443 '74.0.3729.1',
444 '74.0.3729.0',
445 '74.0.3726.4',
446 '73.0.3683.69',
447 '74.0.3726.3',
448 '74.0.3728.0',
449 '74.0.3726.2',
450 '73.0.3683.68',
451 '74.0.3726.1',
452 '74.0.3726.0',
453 '74.0.3725.4',
454 '73.0.3683.67',
455 '73.0.3683.66',
456 '74.0.3725.3',
457 '74.0.3725.2',
458 '74.0.3725.1',
459 '74.0.3724.8',
460 '74.0.3725.0',
461 '73.0.3683.65',
462 '74.0.3724.7',
463 '74.0.3724.6',
464 '74.0.3724.5',
465 '74.0.3724.4',
466 '74.0.3724.3',
467 '74.0.3724.2',
468 '74.0.3724.1',
469 '74.0.3724.0',
470 '73.0.3683.64',
471 '74.0.3723.1',
472 '74.0.3723.0',
473 '73.0.3683.63',
474 '74.0.3722.1',
475 '74.0.3722.0',
476 '73.0.3683.62',
477 '74.0.3718.9',
478 '74.0.3702.3',
479 '74.0.3721.3',
480 '74.0.3721.2',
481 '74.0.3721.1',
482 '74.0.3721.0',
483 '74.0.3720.6',
484 '73.0.3683.61',
485 '72.0.3626.122',
486 '73.0.3683.60',
487 '74.0.3720.5',
488 '72.0.3626.121',
489 '74.0.3718.8',
490 '74.0.3720.4',
491 '74.0.3720.3',
492 '74.0.3718.7',
493 '74.0.3720.2',
494 '74.0.3720.1',
495 '74.0.3720.0',
496 '74.0.3718.6',
497 '74.0.3719.5',
498 '73.0.3683.59',
499 '74.0.3718.5',
500 '74.0.3718.4',
501 '74.0.3719.4',
502 '74.0.3719.3',
503 '74.0.3719.2',
504 '74.0.3719.1',
505 '73.0.3683.58',
506 '74.0.3719.0',
507 '73.0.3683.57',
508 '73.0.3683.56',
509 '74.0.3718.3',
510 '73.0.3683.55',
511 '74.0.3718.2',
512 '74.0.3718.1',
513 '74.0.3718.0',
514 '73.0.3683.54',
515 '74.0.3717.2',
516 '73.0.3683.53',
517 '74.0.3717.1',
518 '74.0.3717.0',
519 '73.0.3683.52',
520 '74.0.3716.1',
521 '74.0.3716.0',
522 '73.0.3683.51',
523 '74.0.3715.1',
524 '74.0.3715.0',
525 '73.0.3683.50',
526 '74.0.3711.2',
527 '74.0.3714.2',
528 '74.0.3713.3',
529 '74.0.3714.1',
530 '74.0.3714.0',
531 '73.0.3683.49',
532 '74.0.3713.1',
533 '74.0.3713.0',
534 '72.0.3626.120',
535 '73.0.3683.48',
536 '74.0.3712.2',
537 '74.0.3712.1',
538 '74.0.3712.0',
539 '73.0.3683.47',
540 '72.0.3626.119',
541 '73.0.3683.46',
542 '74.0.3710.2',
543 '72.0.3626.118',
544 '74.0.3711.1',
545 '74.0.3711.0',
546 '73.0.3683.45',
547 '72.0.3626.117',
548 '74.0.3710.1',
549 '74.0.3710.0',
550 '73.0.3683.44',
551 '72.0.3626.116',
552 '74.0.3709.1',
553 '74.0.3709.0',
554 '74.0.3704.9',
555 '73.0.3683.43',
556 '72.0.3626.115',
557 '74.0.3704.8',
558 '74.0.3704.7',
559 '74.0.3708.0',
560 '74.0.3706.7',
561 '74.0.3704.6',
562 '73.0.3683.42',
563 '72.0.3626.114',
564 '74.0.3706.6',
565 '72.0.3626.113',
566 '74.0.3704.5',
567 '74.0.3706.5',
568 '74.0.3706.4',
569 '74.0.3706.3',
570 '74.0.3706.2',
571 '74.0.3706.1',
572 '74.0.3706.0',
573 '73.0.3683.41',
574 '72.0.3626.112',
575 '74.0.3705.1',
576 '74.0.3705.0',
577 '73.0.3683.40',
578 '72.0.3626.111',
579 '73.0.3683.39',
580 '74.0.3704.4',
581 '73.0.3683.38',
582 '74.0.3704.3',
583 '74.0.3704.2',
584 '74.0.3704.1',
585 '74.0.3704.0',
586 '73.0.3683.37',
587 '72.0.3626.110',
588 '72.0.3626.109',
589 '74.0.3703.3',
590 '74.0.3703.2',
591 '73.0.3683.36',
592 '74.0.3703.1',
593 '74.0.3703.0',
594 '73.0.3683.35',
595 '72.0.3626.108',
596 '74.0.3702.2',
597 '74.0.3699.3',
598 '74.0.3702.1',
599 '74.0.3702.0',
600 '73.0.3683.34',
601 '72.0.3626.107',
602 '73.0.3683.33',
603 '74.0.3701.1',
604 '74.0.3701.0',
605 '73.0.3683.32',
606 '73.0.3683.31',
607 '72.0.3626.105',
608 '74.0.3700.1',
609 '74.0.3700.0',
610 '73.0.3683.29',
611 '72.0.3626.103',
612 '74.0.3699.2',
613 '74.0.3699.1',
614 '74.0.3699.0',
615 '73.0.3683.28',
616 '72.0.3626.102',
617 '73.0.3683.27',
618 '73.0.3683.26',
619 '74.0.3698.0',
620 '74.0.3696.2',
621 '72.0.3626.101',
622 '73.0.3683.25',
623 '74.0.3696.1',
624 '74.0.3696.0',
625 '74.0.3694.8',
626 '72.0.3626.100',
627 '74.0.3694.7',
628 '74.0.3694.6',
629 '74.0.3694.5',
630 '74.0.3694.4',
631 '72.0.3626.99',
632 '72.0.3626.98',
633 '74.0.3694.3',
634 '73.0.3683.24',
635 '72.0.3626.97',
636 '72.0.3626.96',
637 '72.0.3626.95',
638 '73.0.3683.23',
639 '72.0.3626.94',
640 '73.0.3683.22',
641 '73.0.3683.21',
642 '72.0.3626.93',
643 '74.0.3694.2',
644 '72.0.3626.92',
645 '74.0.3694.1',
646 '74.0.3694.0',
647 '74.0.3693.6',
648 '73.0.3683.20',
649 '72.0.3626.91',
650 '74.0.3693.5',
651 '74.0.3693.4',
652 '74.0.3693.3',
653 '74.0.3693.2',
654 '73.0.3683.19',
655 '74.0.3693.1',
656 '74.0.3693.0',
657 '73.0.3683.18',
658 '72.0.3626.90',
659 '74.0.3692.1',
660 '74.0.3692.0',
661 '73.0.3683.17',
662 '72.0.3626.89',
663 '74.0.3687.3',
664 '74.0.3691.1',
665 '74.0.3691.0',
666 '73.0.3683.16',
667 '72.0.3626.88',
668 '72.0.3626.87',
669 '73.0.3683.15',
670 '74.0.3690.1',
671 '74.0.3690.0',
672 '73.0.3683.14',
673 '72.0.3626.86',
674 '73.0.3683.13',
675 '73.0.3683.12',
676 '74.0.3689.1',
677 '74.0.3689.0',
678 '73.0.3683.11',
679 '72.0.3626.85',
680 '73.0.3683.10',
681 '72.0.3626.84',
682 '73.0.3683.9',
683 '74.0.3688.1',
684 '74.0.3688.0',
685 '73.0.3683.8',
686 '72.0.3626.83',
687 '74.0.3687.2',
688 '74.0.3687.1',
689 '74.0.3687.0',
690 '73.0.3683.7',
691 '72.0.3626.82',
692 '74.0.3686.4',
693 '72.0.3626.81',
694 '74.0.3686.3',
695 '74.0.3686.2',
696 '74.0.3686.1',
697 '74.0.3686.0',
698 '73.0.3683.6',
699 '72.0.3626.80',
700 '74.0.3685.1',
701 '74.0.3685.0',
702 '73.0.3683.5',
703 '72.0.3626.79',
704 '74.0.3684.1',
705 '74.0.3684.0',
706 '73.0.3683.4',
707 '72.0.3626.78',
708 '72.0.3626.77',
709 '73.0.3683.3',
710 '73.0.3683.2',
711 '72.0.3626.76',
712 '73.0.3683.1',
713 '73.0.3683.0',
714 '72.0.3626.75',
715 '71.0.3578.141',
716 '73.0.3682.1',
717 '73.0.3682.0',
718 '72.0.3626.74',
719 '71.0.3578.140',
720 '73.0.3681.4',
721 '73.0.3681.3',
722 '73.0.3681.2',
723 '73.0.3681.1',
724 '73.0.3681.0',
725 '72.0.3626.73',
726 '71.0.3578.139',
727 '72.0.3626.72',
728 '72.0.3626.71',
729 '73.0.3680.1',
730 '73.0.3680.0',
731 '72.0.3626.70',
732 '71.0.3578.138',
733 '73.0.3678.2',
734 '73.0.3679.1',
735 '73.0.3679.0',
736 '72.0.3626.69',
737 '71.0.3578.137',
738 '73.0.3678.1',
739 '73.0.3678.0',
740 '71.0.3578.136',
741 '73.0.3677.1',
742 '73.0.3677.0',
743 '72.0.3626.68',
744 '72.0.3626.67',
745 '71.0.3578.135',
746 '73.0.3676.1',
747 '73.0.3676.0',
748 '73.0.3674.2',
749 '72.0.3626.66',
750 '71.0.3578.134',
751 '73.0.3674.1',
752 '73.0.3674.0',
753 '72.0.3626.65',
754 '71.0.3578.133',
755 '73.0.3673.2',
756 '73.0.3673.1',
757 '73.0.3673.0',
758 '72.0.3626.64',
759 '71.0.3578.132',
760 '72.0.3626.63',
761 '72.0.3626.62',
762 '72.0.3626.61',
763 '72.0.3626.60',
764 '73.0.3672.1',
765 '73.0.3672.0',
766 '72.0.3626.59',
767 '71.0.3578.131',
768 '73.0.3671.3',
769 '73.0.3671.2',
770 '73.0.3671.1',
771 '73.0.3671.0',
772 '72.0.3626.58',
773 '71.0.3578.130',
774 '73.0.3670.1',
775 '73.0.3670.0',
776 '72.0.3626.57',
777 '71.0.3578.129',
778 '73.0.3669.1',
779 '73.0.3669.0',
780 '72.0.3626.56',
781 '71.0.3578.128',
782 '73.0.3668.2',
783 '73.0.3668.1',
784 '73.0.3668.0',
785 '72.0.3626.55',
786 '71.0.3578.127',
787 '73.0.3667.2',
788 '73.0.3667.1',
789 '73.0.3667.0',
790 '72.0.3626.54',
791 '71.0.3578.126',
792 '73.0.3666.1',
793 '73.0.3666.0',
794 '72.0.3626.53',
795 '71.0.3578.125',
796 '73.0.3665.4',
797 '73.0.3665.3',
798 '72.0.3626.52',
799 '73.0.3665.2',
800 '73.0.3664.4',
801 '73.0.3665.1',
802 '73.0.3665.0',
803 '72.0.3626.51',
804 '71.0.3578.124',
805 '72.0.3626.50',
806 '73.0.3664.3',
807 '73.0.3664.2',
808 '73.0.3664.1',
809 '73.0.3664.0',
810 '73.0.3663.2',
811 '72.0.3626.49',
812 '71.0.3578.123',
813 '73.0.3663.1',
814 '73.0.3663.0',
815 '72.0.3626.48',
816 '71.0.3578.122',
817 '73.0.3662.1',
818 '73.0.3662.0',
819 '72.0.3626.47',
820 '71.0.3578.121',
821 '73.0.3661.1',
822 '72.0.3626.46',
823 '73.0.3661.0',
824 '72.0.3626.45',
825 '71.0.3578.120',
826 '73.0.3660.2',
827 '73.0.3660.1',
828 '73.0.3660.0',
829 '72.0.3626.44',
830 '71.0.3578.119',
831 '73.0.3659.1',
832 '73.0.3659.0',
833 '72.0.3626.43',
834 '71.0.3578.118',
835 '73.0.3658.1',
836 '73.0.3658.0',
837 '72.0.3626.42',
838 '71.0.3578.117',
839 '73.0.3657.1',
840 '73.0.3657.0',
841 '72.0.3626.41',
842 '71.0.3578.116',
843 '73.0.3656.1',
844 '73.0.3656.0',
845 '72.0.3626.40',
846 '71.0.3578.115',
847 '73.0.3655.1',
848 '73.0.3655.0',
849 '72.0.3626.39',
850 '71.0.3578.114',
851 '73.0.3654.1',
852 '73.0.3654.0',
853 '72.0.3626.38',
854 '71.0.3578.113',
855 '73.0.3653.1',
856 '73.0.3653.0',
857 '72.0.3626.37',
858 '71.0.3578.112',
859 '73.0.3652.1',
860 '73.0.3652.0',
861 '72.0.3626.36',
862 '71.0.3578.111',
863 '73.0.3651.1',
864 '73.0.3651.0',
865 '72.0.3626.35',
866 '71.0.3578.110',
867 '73.0.3650.1',
868 '73.0.3650.0',
869 '72.0.3626.34',
870 '71.0.3578.109',
871 '73.0.3649.1',
872 '73.0.3649.0',
873 '72.0.3626.33',
874 '71.0.3578.108',
875 '73.0.3648.2',
876 '73.0.3648.1',
877 '73.0.3648.0',
878 '72.0.3626.32',
879 '71.0.3578.107',
880 '73.0.3647.2',
881 '73.0.3647.1',
882 '73.0.3647.0',
883 '72.0.3626.31',
884 '71.0.3578.106',
885 '73.0.3635.3',
886 '73.0.3646.2',
887 '73.0.3646.1',
888 '73.0.3646.0',
889 '72.0.3626.30',
890 '71.0.3578.105',
891 '72.0.3626.29',
892 '73.0.3645.2',
893 '73.0.3645.1',
894 '73.0.3645.0',
895 '72.0.3626.28',
896 '71.0.3578.104',
897 '72.0.3626.27',
898 '72.0.3626.26',
899 '72.0.3626.25',
900 '72.0.3626.24',
901 '73.0.3644.0',
902 '73.0.3643.2',
903 '72.0.3626.23',
904 '71.0.3578.103',
905 '73.0.3643.1',
906 '73.0.3643.0',
907 '72.0.3626.22',
908 '71.0.3578.102',
909 '73.0.3642.1',
910 '73.0.3642.0',
911 '72.0.3626.21',
912 '71.0.3578.101',
913 '73.0.3641.1',
914 '73.0.3641.0',
915 '72.0.3626.20',
916 '71.0.3578.100',
917 '72.0.3626.19',
918 '73.0.3640.1',
919 '73.0.3640.0',
920 '72.0.3626.18',
921 '73.0.3639.1',
922 '71.0.3578.99',
923 '73.0.3639.0',
924 '72.0.3626.17',
925 '73.0.3638.2',
926 '72.0.3626.16',
927 '73.0.3638.1',
928 '73.0.3638.0',
929 '72.0.3626.15',
930 '71.0.3578.98',
931 '73.0.3635.2',
932 '71.0.3578.97',
933 '73.0.3637.1',
934 '73.0.3637.0',
935 '72.0.3626.14',
936 '71.0.3578.96',
937 '71.0.3578.95',
938 '72.0.3626.13',
939 '71.0.3578.94',
940 '73.0.3636.2',
941 '71.0.3578.93',
942 '73.0.3636.1',
943 '73.0.3636.0',
944 '72.0.3626.12',
945 '71.0.3578.92',
946 '73.0.3635.1',
947 '73.0.3635.0',
948 '72.0.3626.11',
949 '71.0.3578.91',
950 '73.0.3634.2',
951 '73.0.3634.1',
952 '73.0.3634.0',
953 '72.0.3626.10',
954 '71.0.3578.90',
955 '71.0.3578.89',
956 '73.0.3633.2',
957 '73.0.3633.1',
958 '73.0.3633.0',
959 '72.0.3610.4',
960 '72.0.3626.9',
961 '71.0.3578.88',
962 '73.0.3632.5',
963 '73.0.3632.4',
964 '73.0.3632.3',
965 '73.0.3632.2',
966 '73.0.3632.1',
967 '73.0.3632.0',
968 '72.0.3626.8',
969 '71.0.3578.87',
970 '73.0.3631.2',
971 '73.0.3631.1',
972 '73.0.3631.0',
973 '72.0.3626.7',
974 '71.0.3578.86',
975 '72.0.3626.6',
976 '73.0.3630.1',
977 '73.0.3630.0',
978 '72.0.3626.5',
979 '71.0.3578.85',
980 '72.0.3626.4',
981 '73.0.3628.3',
982 '73.0.3628.2',
983 '73.0.3629.1',
984 '73.0.3629.0',
985 '72.0.3626.3',
986 '71.0.3578.84',
987 '73.0.3628.1',
988 '73.0.3628.0',
989 '71.0.3578.83',
990 '73.0.3627.1',
991 '73.0.3627.0',
992 '72.0.3626.2',
993 '71.0.3578.82',
994 '71.0.3578.81',
995 '71.0.3578.80',
996 '72.0.3626.1',
997 '72.0.3626.0',
998 '71.0.3578.79',
999 '70.0.3538.124',
1000 '71.0.3578.78',
1001 '72.0.3623.4',
1002 '72.0.3625.2',
1003 '72.0.3625.1',
1004 '72.0.3625.0',
1005 '71.0.3578.77',
1006 '70.0.3538.123',
1007 '72.0.3624.4',
1008 '72.0.3624.3',
1009 '72.0.3624.2',
1010 '71.0.3578.76',
1011 '72.0.3624.1',
1012 '72.0.3624.0',
1013 '72.0.3623.3',
1014 '71.0.3578.75',
1015 '70.0.3538.122',
1016 '71.0.3578.74',
1017 '72.0.3623.2',
1018 '72.0.3610.3',
1019 '72.0.3623.1',
1020 '72.0.3623.0',
1021 '72.0.3622.3',
1022 '72.0.3622.2',
1023 '71.0.3578.73',
1024 '70.0.3538.121',
1025 '72.0.3622.1',
1026 '72.0.3622.0',
1027 '71.0.3578.72',
1028 '70.0.3538.120',
1029 '72.0.3621.1',
1030 '72.0.3621.0',
1031 '71.0.3578.71',
1032 '70.0.3538.119',
1033 '72.0.3620.1',
1034 '72.0.3620.0',
1035 '71.0.3578.70',
1036 '70.0.3538.118',
1037 '71.0.3578.69',
1038 '72.0.3619.1',
1039 '72.0.3619.0',
1040 '71.0.3578.68',
1041 '70.0.3538.117',
1042 '71.0.3578.67',
1043 '72.0.3618.1',
1044 '72.0.3618.0',
1045 '71.0.3578.66',
1046 '70.0.3538.116',
1047 '72.0.3617.1',
1048 '72.0.3617.0',
1049 '71.0.3578.65',
1050 '70.0.3538.115',
1051 '72.0.3602.3',
1052 '71.0.3578.64',
1053 '72.0.3616.1',
1054 '72.0.3616.0',
1055 '71.0.3578.63',
1056 '70.0.3538.114',
1057 '71.0.3578.62',
1058 '72.0.3615.1',
1059 '72.0.3615.0',
1060 '71.0.3578.61',
1061 '70.0.3538.113',
1062 '72.0.3614.1',
1063 '72.0.3614.0',
1064 '71.0.3578.60',
1065 '70.0.3538.112',
1066 '72.0.3613.1',
1067 '72.0.3613.0',
1068 '71.0.3578.59',
1069 '70.0.3538.111',
1070 '72.0.3612.2',
1071 '72.0.3612.1',
1072 '72.0.3612.0',
1073 '70.0.3538.110',
1074 '71.0.3578.58',
1075 '70.0.3538.109',
1076 '72.0.3611.2',
1077 '72.0.3611.1',
1078 '72.0.3611.0',
1079 '71.0.3578.57',
1080 '70.0.3538.108',
1081 '72.0.3610.2',
1082 '71.0.3578.56',
1083 '71.0.3578.55',
1084 '72.0.3610.1',
1085 '72.0.3610.0',
1086 '71.0.3578.54',
1087 '70.0.3538.107',
1088 '71.0.3578.53',
1089 '72.0.3609.3',
1090 '71.0.3578.52',
1091 '72.0.3609.2',
1092 '71.0.3578.51',
1093 '72.0.3608.5',
1094 '72.0.3609.1',
1095 '72.0.3609.0',
1096 '71.0.3578.50',
1097 '70.0.3538.106',
1098 '72.0.3608.4',
1099 '72.0.3608.3',
1100 '72.0.3608.2',
1101 '71.0.3578.49',
1102 '72.0.3608.1',
1103 '72.0.3608.0',
1104 '70.0.3538.105',
1105 '71.0.3578.48',
1106 '72.0.3607.1',
1107 '72.0.3607.0',
1108 '71.0.3578.47',
1109 '70.0.3538.104',
1110 '72.0.3606.2',
1111 '72.0.3606.1',
1112 '72.0.3606.0',
1113 '71.0.3578.46',
1114 '70.0.3538.103',
1115 '70.0.3538.102',
1116 '72.0.3605.3',
1117 '72.0.3605.2',
1118 '72.0.3605.1',
1119 '72.0.3605.0',
1120 '71.0.3578.45',
1121 '70.0.3538.101',
1122 '71.0.3578.44',
1123 '71.0.3578.43',
1124 '70.0.3538.100',
1125 '70.0.3538.99',
1126 '71.0.3578.42',
1127 '72.0.3604.1',
1128 '72.0.3604.0',
1129 '71.0.3578.41',
1130 '70.0.3538.98',
1131 '71.0.3578.40',
1132 '72.0.3603.2',
1133 '72.0.3603.1',
1134 '72.0.3603.0',
1135 '71.0.3578.39',
1136 '70.0.3538.97',
1137 '72.0.3602.2',
1138 '71.0.3578.38',
1139 '71.0.3578.37',
1140 '72.0.3602.1',
1141 '72.0.3602.0',
1142 '71.0.3578.36',
1143 '70.0.3538.96',
1144 '72.0.3601.1',
1145 '72.0.3601.0',
1146 '71.0.3578.35',
1147 '70.0.3538.95',
1148 '72.0.3600.1',
1149 '72.0.3600.0',
1150 '71.0.3578.34',
1151 '70.0.3538.94',
1152 '72.0.3599.3',
1153 '72.0.3599.2',
1154 '72.0.3599.1',
1155 '72.0.3599.0',
1156 '71.0.3578.33',
1157 '70.0.3538.93',
1158 '72.0.3598.1',
1159 '72.0.3598.0',
1160 '71.0.3578.32',
1161 '70.0.3538.87',
1162 '72.0.3597.1',
1163 '72.0.3597.0',
1164 '72.0.3596.2',
1165 '71.0.3578.31',
1166 '70.0.3538.86',
1167 '71.0.3578.30',
1168 '71.0.3578.29',
1169 '72.0.3596.1',
1170 '72.0.3596.0',
1171 '71.0.3578.28',
1172 '70.0.3538.85',
1173 '72.0.3595.2',
1174 '72.0.3591.3',
1175 '72.0.3595.1',
1176 '72.0.3595.0',
1177 '71.0.3578.27',
1178 '70.0.3538.84',
1179 '72.0.3594.1',
1180 '72.0.3594.0',
1181 '71.0.3578.26',
1182 '70.0.3538.83',
1183 '72.0.3593.2',
1184 '72.0.3593.1',
1185 '72.0.3593.0',
1186 '71.0.3578.25',
1187 '70.0.3538.82',
1188 '72.0.3589.3',
1189 '72.0.3592.2',
1190 '72.0.3592.1',
1191 '72.0.3592.0',
1192 '71.0.3578.24',
1193 '72.0.3589.2',
1194 '70.0.3538.81',
1195 '70.0.3538.80',
1196 '72.0.3591.2',
1197 '72.0.3591.1',
1198 '72.0.3591.0',
1199 '71.0.3578.23',
1200 '70.0.3538.79',
1201 '71.0.3578.22',
1202 '72.0.3590.1',
1203 '72.0.3590.0',
1204 '71.0.3578.21',
1205 '70.0.3538.78',
1206 '70.0.3538.77',
1207 '72.0.3589.1',
1208 '72.0.3589.0',
1209 '71.0.3578.20',
1210 '70.0.3538.76',
1211 '71.0.3578.19',
1212 '70.0.3538.75',
1213 '72.0.3588.1',
1214 '72.0.3588.0',
1215 '71.0.3578.18',
1216 '70.0.3538.74',
1217 '72.0.3586.2',
1218 '72.0.3587.0',
1219 '71.0.3578.17',
1220 '70.0.3538.73',
1221 '72.0.3586.1',
1222 '72.0.3586.0',
1223 '71.0.3578.16',
1224 '70.0.3538.72',
1225 '72.0.3585.1',
1226 '72.0.3585.0',
1227 '71.0.3578.15',
1228 '70.0.3538.71',
1229 '71.0.3578.14',
1230 '72.0.3584.1',
1231 '72.0.3584.0',
1232 '71.0.3578.13',
1233 '70.0.3538.70',
1234 '72.0.3583.2',
1235 '71.0.3578.12',
1236 '72.0.3583.1',
1237 '72.0.3583.0',
1238 '71.0.3578.11',
1239 '70.0.3538.69',
1240 '71.0.3578.10',
1241 '72.0.3582.0',
1242 '72.0.3581.4',
1243 '71.0.3578.9',
1244 '70.0.3538.67',
1245 '72.0.3581.3',
1246 '72.0.3581.2',
1247 '72.0.3581.1',
1248 '72.0.3581.0',
1249 '71.0.3578.8',
1250 '70.0.3538.66',
1251 '72.0.3580.1',
1252 '72.0.3580.0',
1253 '71.0.3578.7',
1254 '70.0.3538.65',
1255 '71.0.3578.6',
1256 '72.0.3579.1',
1257 '72.0.3579.0',
1258 '71.0.3578.5',
1259 '70.0.3538.64',
1260 '71.0.3578.4',
1261 '71.0.3578.3',
1262 '71.0.3578.2',
1263 '71.0.3578.1',
1264 '71.0.3578.0',
1265 '70.0.3538.63',
1266 '69.0.3497.128',
1267 '70.0.3538.62',
1268 '70.0.3538.61',
1269 '70.0.3538.60',
1270 '70.0.3538.59',
1271 '71.0.3577.1',
1272 '71.0.3577.0',
1273 '70.0.3538.58',
1274 '69.0.3497.127',
1275 '71.0.3576.2',
1276 '71.0.3576.1',
1277 '71.0.3576.0',
1278 '70.0.3538.57',
1279 '70.0.3538.56',
1280 '71.0.3575.2',
1281 '70.0.3538.55',
1282 '69.0.3497.126',
1283 '70.0.3538.54',
1284 '71.0.3575.1',
1285 '71.0.3575.0',
1286 '71.0.3574.1',
1287 '71.0.3574.0',
1288 '70.0.3538.53',
1289 '69.0.3497.125',
1290 '70.0.3538.52',
1291 '71.0.3573.1',
1292 '71.0.3573.0',
1293 '70.0.3538.51',
1294 '69.0.3497.124',
1295 '71.0.3572.1',
1296 '71.0.3572.0',
1297 '70.0.3538.50',
1298 '69.0.3497.123',
1299 '71.0.3571.2',
1300 '70.0.3538.49',
1301 '69.0.3497.122',
1302 '71.0.3571.1',
1303 '71.0.3571.0',
1304 '70.0.3538.48',
1305 '69.0.3497.121',
1306 '71.0.3570.1',
1307 '71.0.3570.0',
1308 '70.0.3538.47',
1309 '69.0.3497.120',
1310 '71.0.3568.2',
1311 '71.0.3569.1',
1312 '71.0.3569.0',
1313 '70.0.3538.46',
1314 '69.0.3497.119',
1315 '70.0.3538.45',
1316 '71.0.3568.1',
1317 '71.0.3568.0',
1318 '70.0.3538.44',
1319 '69.0.3497.118',
1320 '70.0.3538.43',
1321 '70.0.3538.42',
1322 '71.0.3567.1',
1323 '71.0.3567.0',
1324 '70.0.3538.41',
1325 '69.0.3497.117',
1326 '71.0.3566.1',
1327 '71.0.3566.0',
1328 '70.0.3538.40',
1329 '69.0.3497.116',
1330 '71.0.3565.1',
1331 '71.0.3565.0',
1332 '70.0.3538.39',
1333 '69.0.3497.115',
1334 '71.0.3564.1',
1335 '71.0.3564.0',
1336 '70.0.3538.38',
1337 '69.0.3497.114',
1338 '71.0.3563.0',
1339 '71.0.3562.2',
1340 '70.0.3538.37',
1341 '69.0.3497.113',
1342 '70.0.3538.36',
1343 '70.0.3538.35',
1344 '71.0.3562.1',
1345 '71.0.3562.0',
1346 '70.0.3538.34',
1347 '69.0.3497.112',
1348 '70.0.3538.33',
1349 '71.0.3561.1',
1350 '71.0.3561.0',
1351 '70.0.3538.32',
1352 '69.0.3497.111',
1353 '71.0.3559.6',
1354 '71.0.3560.1',
1355 '71.0.3560.0',
1356 '71.0.3559.5',
1357 '71.0.3559.4',
1358 '70.0.3538.31',
1359 '69.0.3497.110',
1360 '71.0.3559.3',
1361 '70.0.3538.30',
1362 '69.0.3497.109',
1363 '71.0.3559.2',
1364 '71.0.3559.1',
1365 '71.0.3559.0',
1366 '70.0.3538.29',
1367 '69.0.3497.108',
1368 '71.0.3558.2',
1369 '71.0.3558.1',
1370 '71.0.3558.0',
1371 '70.0.3538.28',
1372 '69.0.3497.107',
1373 '71.0.3557.2',
1374 '71.0.3557.1',
1375 '71.0.3557.0',
1376 '70.0.3538.27',
1377 '69.0.3497.106',
1378 '71.0.3554.4',
1379 '70.0.3538.26',
1380 '71.0.3556.1',
1381 '71.0.3556.0',
1382 '70.0.3538.25',
1383 '71.0.3554.3',
1384 '69.0.3497.105',
1385 '71.0.3554.2',
1386 '70.0.3538.24',
1387 '69.0.3497.104',
1388 '71.0.3555.2',
1389 '70.0.3538.23',
1390 '71.0.3555.1',
1391 '71.0.3555.0',
1392 '70.0.3538.22',
1393 '69.0.3497.103',
1394 '71.0.3554.1',
1395 '71.0.3554.0',
1396 '70.0.3538.21',
1397 '69.0.3497.102',
1398 '71.0.3553.3',
1399 '70.0.3538.20',
1400 '69.0.3497.101',
1401 '71.0.3553.2',
1402 '69.0.3497.100',
1403 '71.0.3553.1',
1404 '71.0.3553.0',
1405 '70.0.3538.19',
1406 '69.0.3497.99',
1407 '69.0.3497.98',
1408 '69.0.3497.97',
1409 '71.0.3552.6',
1410 '71.0.3552.5',
1411 '71.0.3552.4',
1412 '71.0.3552.3',
1413 '71.0.3552.2',
1414 '71.0.3552.1',
1415 '71.0.3552.0',
1416 '70.0.3538.18',
1417 '69.0.3497.96',
1418 '71.0.3551.3',
1419 '71.0.3551.2',
1420 '71.0.3551.1',
1421 '71.0.3551.0',
1422 '70.0.3538.17',
1423 '69.0.3497.95',
1424 '71.0.3550.3',
1425 '71.0.3550.2',
1426 '71.0.3550.1',
1427 '71.0.3550.0',
1428 '70.0.3538.16',
1429 '69.0.3497.94',
1430 '71.0.3549.1',
1431 '71.0.3549.0',
1432 '70.0.3538.15',
1433 '69.0.3497.93',
1434 '69.0.3497.92',
1435 '71.0.3548.1',
1436 '71.0.3548.0',
1437 '70.0.3538.14',
1438 '69.0.3497.91',
1439 '71.0.3547.1',
1440 '71.0.3547.0',
1441 '70.0.3538.13',
1442 '69.0.3497.90',
1443 '71.0.3546.2',
1444 '69.0.3497.89',
1445 '71.0.3546.1',
1446 '71.0.3546.0',
1447 '70.0.3538.12',
1448 '69.0.3497.88',
1449 '71.0.3545.4',
1450 '71.0.3545.3',
1451 '71.0.3545.2',
1452 '71.0.3545.1',
1453 '71.0.3545.0',
1454 '70.0.3538.11',
1455 '69.0.3497.87',
1456 '71.0.3544.5',
1457 '71.0.3544.4',
1458 '71.0.3544.3',
1459 '71.0.3544.2',
1460 '71.0.3544.1',
1461 '71.0.3544.0',
1462 '69.0.3497.86',
1463 '70.0.3538.10',
1464 '69.0.3497.85',
1465 '70.0.3538.9',
1466 '69.0.3497.84',
1467 '71.0.3543.4',
1468 '70.0.3538.8',
1469 '71.0.3543.3',
1470 '71.0.3543.2',
1471 '71.0.3543.1',
1472 '71.0.3543.0',
1473 '70.0.3538.7',
1474 '69.0.3497.83',
1475 '71.0.3542.2',
1476 '71.0.3542.1',
1477 '71.0.3542.0',
1478 '70.0.3538.6',
1479 '69.0.3497.82',
1480 '69.0.3497.81',
1481 '71.0.3541.1',
1482 '71.0.3541.0',
1483 '70.0.3538.5',
1484 '69.0.3497.80',
1485 '71.0.3540.1',
1486 '71.0.3540.0',
1487 '70.0.3538.4',
1488 '69.0.3497.79',
1489 '70.0.3538.3',
1490 '71.0.3539.1',
1491 '71.0.3539.0',
1492 '69.0.3497.78',
1493 '68.0.3440.134',
1494 '69.0.3497.77',
1495 '70.0.3538.2',
1496 '70.0.3538.1',
1497 '70.0.3538.0',
1498 '69.0.3497.76',
1499 '68.0.3440.133',
1500 '69.0.3497.75',
1501 '70.0.3537.2',
1502 '70.0.3537.1',
1503 '70.0.3537.0',
1504 '69.0.3497.74',
1505 '68.0.3440.132',
1506 '70.0.3536.0',
1507 '70.0.3535.5',
1508 '70.0.3535.4',
1509 '70.0.3535.3',
1510 '69.0.3497.73',
1511 '68.0.3440.131',
1512 '70.0.3532.8',
1513 '70.0.3532.7',
1514 '69.0.3497.72',
1515 '69.0.3497.71',
1516 '70.0.3535.2',
1517 '70.0.3535.1',
1518 '70.0.3535.0',
1519 '69.0.3497.70',
1520 '68.0.3440.130',
1521 '69.0.3497.69',
1522 '68.0.3440.129',
1523 '70.0.3534.4',
1524 '70.0.3534.3',
1525 '70.0.3534.2',
1526 '70.0.3534.1',
1527 '70.0.3534.0',
1528 '69.0.3497.68',
1529 '68.0.3440.128',
1530 '70.0.3533.2',
1531 '70.0.3533.1',
1532 '70.0.3533.0',
1533 '69.0.3497.67',
1534 '68.0.3440.127',
1535 '70.0.3532.6',
1536 '70.0.3532.5',
1537 '70.0.3532.4',
1538 '69.0.3497.66',
1539 '68.0.3440.126',
1540 '70.0.3532.3',
1541 '70.0.3532.2',
1542 '70.0.3532.1',
1543 '69.0.3497.60',
1544 '69.0.3497.65',
1545 '69.0.3497.64',
1546 '70.0.3532.0',
1547 '70.0.3531.0',
1548 '70.0.3530.4',
1549 '70.0.3530.3',
1550 '70.0.3530.2',
1551 '69.0.3497.58',
1552 '68.0.3440.125',
1553 '69.0.3497.57',
1554 '69.0.3497.56',
1555 '69.0.3497.55',
1556 '69.0.3497.54',
1557 '70.0.3530.1',
1558 '70.0.3530.0',
1559 '69.0.3497.53',
1560 '68.0.3440.124',
1561 '69.0.3497.52',
1562 '70.0.3529.3',
1563 '70.0.3529.2',
1564 '70.0.3529.1',
1565 '70.0.3529.0',
1566 '69.0.3497.51',
1567 '70.0.3528.4',
1568 '68.0.3440.123',
1569 '70.0.3528.3',
1570 '70.0.3528.2',
1571 '70.0.3528.1',
1572 '70.0.3528.0',
1573 '69.0.3497.50',
1574 '68.0.3440.122',
1575 '70.0.3527.1',
1576 '70.0.3527.0',
1577 '69.0.3497.49',
1578 '68.0.3440.121',
1579 '70.0.3526.1',
1580 '70.0.3526.0',
1581 '68.0.3440.120',
1582 '69.0.3497.48',
1583 '69.0.3497.47',
1584 '68.0.3440.119',
1585 '68.0.3440.118',
1586 '70.0.3525.5',
1587 '70.0.3525.4',
1588 '70.0.3525.3',
1589 '68.0.3440.117',
1590 '69.0.3497.46',
1591 '70.0.3525.2',
1592 '70.0.3525.1',
1593 '70.0.3525.0',
1594 '69.0.3497.45',
1595 '68.0.3440.116',
1596 '70.0.3524.4',
1597 '70.0.3524.3',
1598 '69.0.3497.44',
1599 '70.0.3524.2',
1600 '70.0.3524.1',
1601 '70.0.3524.0',
1602 '70.0.3523.2',
1603 '69.0.3497.43',
1604 '68.0.3440.115',
1605 '70.0.3505.9',
1606 '69.0.3497.42',
1607 '70.0.3505.8',
1608 '70.0.3523.1',
1609 '70.0.3523.0',
1610 '69.0.3497.41',
1611 '68.0.3440.114',
1612 '70.0.3505.7',
1613 '69.0.3497.40',
1614 '70.0.3522.1',
1615 '70.0.3522.0',
1616 '70.0.3521.2',
1617 '69.0.3497.39',
1618 '68.0.3440.113',
1619 '70.0.3505.6',
1620 '70.0.3521.1',
1621 '70.0.3521.0',
1622 '69.0.3497.38',
1623 '68.0.3440.112',
1624 '70.0.3520.1',
1625 '70.0.3520.0',
1626 '69.0.3497.37',
1627 '68.0.3440.111',
1628 '70.0.3519.3',
1629 '70.0.3519.2',
1630 '70.0.3519.1',
1631 '70.0.3519.0',
1632 '69.0.3497.36',
1633 '68.0.3440.110',
1634 '70.0.3518.1',
1635 '70.0.3518.0',
1636 '69.0.3497.35',
1637 '69.0.3497.34',
1638 '68.0.3440.109',
1639 '70.0.3517.1',
1640 '70.0.3517.0',
1641 '69.0.3497.33',
1642 '68.0.3440.108',
1643 '69.0.3497.32',
1644 '70.0.3516.3',
1645 '70.0.3516.2',
1646 '70.0.3516.1',
1647 '70.0.3516.0',
1648 '69.0.3497.31',
1649 '68.0.3440.107',
1650 '70.0.3515.4',
1651 '68.0.3440.106',
1652 '70.0.3515.3',
1653 '70.0.3515.2',
1654 '70.0.3515.1',
1655 '70.0.3515.0',
1656 '69.0.3497.30',
1657 '68.0.3440.105',
1658 '68.0.3440.104',
1659 '70.0.3514.2',
1660 '70.0.3514.1',
1661 '70.0.3514.0',
1662 '69.0.3497.29',
1663 '68.0.3440.103',
1664 '70.0.3513.1',
1665 '70.0.3513.0',
1666 '69.0.3497.28',
1667 )
1668 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1669
1670
3e669f36 1671std_headers = {
f7a147e3 1672 'User-Agent': random_user_agent(),
59ae15a5
PH
1673 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1674 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1675 'Accept-Encoding': 'gzip, deflate',
1676 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 1677}
f427df17 1678
5f6a1245 1679
fb37eb25
S
1680USER_AGENTS = {
1681 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1682}
1683
1684
bf42a990
S
1685NO_DEFAULT = object()
1686
7105440c
YCH
1687ENGLISH_MONTH_NAMES = [
1688 'January', 'February', 'March', 'April', 'May', 'June',
1689 'July', 'August', 'September', 'October', 'November', 'December']
1690
f6717dec
S
1691MONTH_NAMES = {
1692 'en': ENGLISH_MONTH_NAMES,
1693 'fr': [
3e4185c3
S
1694 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1695 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
f6717dec 1696}
a942d6cb 1697
a7aaa398
S
1698KNOWN_EXTENSIONS = (
1699 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1700 'flv', 'f4v', 'f4a', 'f4b',
1701 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1702 'mkv', 'mka', 'mk3d',
1703 'avi', 'divx',
1704 'mov',
1705 'asf', 'wmv', 'wma',
1706 '3gp', '3g2',
1707 'mp3',
1708 'flac',
1709 'ape',
1710 'wav',
1711 'f4f', 'f4m', 'm3u8', 'smil')
1712
c587cbb7 1713# needed for sanitizing filenames in restricted mode
c8827027 1714ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
fd35d8cd
JW
1715 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1716 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
c587cbb7 1717
46f59e89
S
1718DATE_FORMATS = (
1719 '%d %B %Y',
1720 '%d %b %Y',
1721 '%B %d %Y',
cb655f34
S
1722 '%B %dst %Y',
1723 '%B %dnd %Y',
9d30c213 1724 '%B %drd %Y',
cb655f34 1725 '%B %dth %Y',
46f59e89 1726 '%b %d %Y',
cb655f34
S
1727 '%b %dst %Y',
1728 '%b %dnd %Y',
9d30c213 1729 '%b %drd %Y',
cb655f34 1730 '%b %dth %Y',
46f59e89
S
1731 '%b %dst %Y %I:%M',
1732 '%b %dnd %Y %I:%M',
9d30c213 1733 '%b %drd %Y %I:%M',
46f59e89
S
1734 '%b %dth %Y %I:%M',
1735 '%Y %m %d',
1736 '%Y-%m-%d',
1737 '%Y/%m/%d',
81c13222 1738 '%Y/%m/%d %H:%M',
46f59e89 1739 '%Y/%m/%d %H:%M:%S',
0c1c6f4b 1740 '%Y-%m-%d %H:%M',
46f59e89
S
1741 '%Y-%m-%d %H:%M:%S',
1742 '%Y-%m-%d %H:%M:%S.%f',
1743 '%d.%m.%Y %H:%M',
1744 '%d.%m.%Y %H.%M',
1745 '%Y-%m-%dT%H:%M:%SZ',
1746 '%Y-%m-%dT%H:%M:%S.%fZ',
1747 '%Y-%m-%dT%H:%M:%S.%f0Z',
1748 '%Y-%m-%dT%H:%M:%S',
1749 '%Y-%m-%dT%H:%M:%S.%f',
1750 '%Y-%m-%dT%H:%M',
c6eed6b8
S
1751 '%b %d %Y at %H:%M',
1752 '%b %d %Y at %H:%M:%S',
b555ae9b
S
1753 '%B %d %Y at %H:%M',
1754 '%B %d %Y at %H:%M:%S',
46f59e89
S
1755)
1756
1757DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1758DATE_FORMATS_DAY_FIRST.extend([
1759 '%d-%m-%Y',
1760 '%d.%m.%Y',
1761 '%d.%m.%y',
1762 '%d/%m/%Y',
1763 '%d/%m/%y',
1764 '%d/%m/%Y %H:%M:%S',
1765])
1766
1767DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1768DATE_FORMATS_MONTH_FIRST.extend([
1769 '%m-%d-%Y',
1770 '%m.%d.%Y',
1771 '%m/%d/%Y',
1772 '%m/%d/%y',
1773 '%m/%d/%Y %H:%M:%S',
1774])
1775
06b3fe29 1776PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
22f5f5c6 1777JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
06b3fe29 1778
7105440c 1779
d77c3dfd 1780def preferredencoding():
59ae15a5 1781 """Get preferred encoding.
d77c3dfd 1782
59ae15a5
PH
1783 Returns the best encoding scheme for the system, based on
1784 locale.getpreferredencoding() and some further tweaks.
1785 """
1786 try:
1787 pref = locale.getpreferredencoding()
28e614de 1788 'TEST'.encode(pref)
70a1165b 1789 except Exception:
59ae15a5 1790 pref = 'UTF-8'
bae611f2 1791
59ae15a5 1792 return pref
d77c3dfd 1793
f4bfd65f 1794
181c8655 1795def write_json_file(obj, fn):
1394646a 1796 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 1797
92120217 1798 fn = encodeFilename(fn)
61ee5aeb 1799 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
1800 encoding = get_filesystem_encoding()
1801 # os.path.basename returns a bytes object, but NamedTemporaryFile
1802 # will fail if the filename contains non ascii characters unless we
1803 # use a unicode object
1804 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1805 # the same for os.path.dirname
1806 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1807 else:
1808 path_basename = os.path.basename
1809 path_dirname = os.path.dirname
1810
73159f99
S
1811 args = {
1812 'suffix': '.tmp',
ec5f6016
JMF
1813 'prefix': path_basename(fn) + '.',
1814 'dir': path_dirname(fn),
73159f99
S
1815 'delete': False,
1816 }
1817
181c8655
PH
1818 # In Python 2.x, json.dump expects a bytestream.
1819 # In Python 3.x, it writes to a character stream
1820 if sys.version_info < (3, 0):
73159f99 1821 args['mode'] = 'wb'
181c8655 1822 else:
73159f99
S
1823 args.update({
1824 'mode': 'w',
1825 'encoding': 'utf-8',
1826 })
1827
c86b6142 1828 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
1829
1830 try:
1831 with tf:
1832 json.dump(obj, tf)
1394646a
IK
1833 if sys.platform == 'win32':
1834 # Need to remove existing file on Windows, else os.rename raises
1835 # WindowsError or FileExistsError.
1836 try:
1837 os.unlink(fn)
1838 except OSError:
1839 pass
b827ee92
AG
1840 try:
1841 mask = os.umask(0)
1842 os.umask(mask)
1843 os.chmod(tf.name, 0o666 & ~mask)
1844 except OSError:
1845 pass
181c8655 1846 os.rename(tf.name, fn)
70a1165b 1847 except Exception:
181c8655
PH
1848 try:
1849 os.remove(tf.name)
1850 except OSError:
1851 pass
1852 raise
1853
1854
1855if sys.version_info >= (2, 7):
ee114368 1856 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 1857 """ Find the xpath xpath[@key=val] """
5d2354f1 1858 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368 1859 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
1860 return node.find(expr)
1861else:
ee114368 1862 def find_xpath_attr(node, xpath, key, val=None):
810c10ba 1863 for f in node.findall(compat_xpath(xpath)):
ee114368
S
1864 if key not in f.attrib:
1865 continue
1866 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
1867 return f
1868 return None
1869
d7e66d39
JMF
1870# On python2.6 the xml.etree.ElementTree.Element methods don't support
1871# the namespace parameter
5f6a1245
JW
1872
1873
d7e66d39
JMF
1874def xpath_with_ns(path, ns_map):
1875 components = [c.split(':') for c in path.split('/')]
1876 replaced = []
1877 for c in components:
1878 if len(c) == 1:
1879 replaced.append(c[0])
1880 else:
1881 ns, tag = c
1882 replaced.append('{%s}%s' % (ns_map[ns], tag))
1883 return '/'.join(replaced)
1884
d77c3dfd 1885
a41fb80c 1886def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745 1887 def _find_xpath(xpath):
810c10ba 1888 return node.find(compat_xpath(xpath))
578c0745
S
1889
1890 if isinstance(xpath, (str, compat_str)):
1891 n = _find_xpath(xpath)
1892 else:
1893 for xp in xpath:
1894 n = _find_xpath(xp)
1895 if n is not None:
1896 break
d74bebd5 1897
8e636da4 1898 if n is None:
bf42a990
S
1899 if default is not NO_DEFAULT:
1900 return default
1901 elif fatal:
bf0ff932
PH
1902 name = xpath if name is None else name
1903 raise ExtractorError('Could not find XML element %s' % name)
1904 else:
1905 return None
a41fb80c
S
1906 return n
1907
1908
1909def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
1910 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1911 if n is None or n == default:
1912 return n
1913 if n.text is None:
1914 if default is not NO_DEFAULT:
1915 return default
1916 elif fatal:
1917 name = xpath if name is None else name
1918 raise ExtractorError('Could not find XML element\'s text %s' % name)
1919 else:
1920 return None
1921 return n.text
a41fb80c
S
1922
1923
1924def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1925 n = find_xpath_attr(node, xpath, key)
1926 if n is None:
1927 if default is not NO_DEFAULT:
1928 return default
1929 elif fatal:
1930 name = '%s[@%s]' % (xpath, key) if name is None else name
1931 raise ExtractorError('Could not find XML attribute %s' % name)
1932 else:
1933 return None
1934 return n.attrib[key]
bf0ff932
PH
1935
1936
9e6dd238 1937def get_element_by_id(id, html):
43e8fafd 1938 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 1939 return get_element_by_attribute('id', id, html)
43e8fafd 1940
12ea2f30 1941
84c237fb 1942def get_element_by_class(class_name, html):
2af12ad9
TC
1943 """Return the content of the first tag with the specified class in the passed HTML document"""
1944 retval = get_elements_by_class(class_name, html)
1945 return retval[0] if retval else None
1946
1947
1948def get_element_by_attribute(attribute, value, html, escape_value=True):
1949 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1950 return retval[0] if retval else None
1951
1952
1953def get_elements_by_class(class_name, html):
1954 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1955 return get_elements_by_attribute(
84c237fb
YCH
1956 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1957 html, escape_value=False)
1958
1959
2af12ad9 1960def get_elements_by_attribute(attribute, value, html, escape_value=True):
43e8fafd 1961 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 1962
84c237fb
YCH
1963 value = re.escape(value) if escape_value else value
1964
2af12ad9
TC
1965 retlist = []
1966 for m in re.finditer(r'''(?xs)
38285056 1967 <([a-zA-Z0-9:._-]+)
609ff8ca 1968 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056 1969 \s+%s=['"]?%s['"]?
609ff8ca 1970 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056
PH
1971 \s*>
1972 (?P<content>.*?)
1973 </\1>
2af12ad9
TC
1974 ''' % (re.escape(attribute), value), html):
1975 res = m.group('content')
38285056 1976
2af12ad9
TC
1977 if res.startswith('"') or res.startswith("'"):
1978 res = res[1:-1]
38285056 1979
2af12ad9 1980 retlist.append(unescapeHTML(res))
a921f407 1981
2af12ad9 1982 return retlist
a921f407 1983
c5229f39 1984
8bb56eee
BF
1985class HTMLAttributeParser(compat_HTMLParser):
1986 """Trivial HTML parser to gather the attributes for a single element"""
1987 def __init__(self):
c5229f39 1988 self.attrs = {}
8bb56eee
BF
1989 compat_HTMLParser.__init__(self)
1990
1991 def handle_starttag(self, tag, attrs):
1992 self.attrs = dict(attrs)
1993
c5229f39 1994
8bb56eee
BF
1995def extract_attributes(html_element):
1996 """Given a string for an HTML element such as
1997 <el
1998 a="foo" B="bar" c="&98;az" d=boz
1999 empty= noval entity="&amp;"
2000 sq='"' dq="'"
2001 >
2002 Decode and return a dictionary of attributes.
2003 {
2004 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2005 'empty': '', 'noval': None, 'entity': '&',
2006 'sq': '"', 'dq': '\''
2007 }.
2008 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2009 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2010 """
2011 parser = HTMLAttributeParser()
b4a3d461
S
2012 try:
2013 parser.feed(html_element)
2014 parser.close()
2015 # Older Python may throw HTMLParseError in case of malformed HTML
2016 except compat_HTMLParseError:
2017 pass
8bb56eee 2018 return parser.attrs
9e6dd238 2019
c5229f39 2020
9e6dd238 2021def clean_html(html):
59ae15a5 2022 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
2023
2024 if html is None: # Convenience for sanitizing descriptions etc.
2025 return html
2026
59ae15a5
PH
2027 # Newline vs <br />
2028 html = html.replace('\n', ' ')
edd9221c
TF
2029 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2030 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
2031 # Strip html tags
2032 html = re.sub('<.*?>', '', html)
2033 # Replace html entities
2034 html = unescapeHTML(html)
7decf895 2035 return html.strip()
9e6dd238
FV
2036
2037
d77c3dfd 2038def sanitize_open(filename, open_mode):
59ae15a5
PH
2039 """Try to open the given filename, and slightly tweak it if this fails.
2040
2041 Attempts to open the given filename. If this fails, it tries to change
2042 the filename slightly, step by step, until it's either able to open it
2043 or it fails and raises a final exception, like the standard open()
2044 function.
2045
2046 It returns the tuple (stream, definitive_file_name).
2047 """
2048 try:
28e614de 2049 if filename == '-':
59ae15a5
PH
2050 if sys.platform == 'win32':
2051 import msvcrt
2052 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 2053 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
2054 stream = open(encodeFilename(filename), open_mode)
2055 return (stream, filename)
2056 except (IOError, OSError) as err:
f45c185f
PH
2057 if err.errno in (errno.EACCES,):
2058 raise
59ae15a5 2059
f45c185f 2060 # In case of error, try to remove win32 forbidden chars
d55de57b 2061 alt_filename = sanitize_path(filename)
f45c185f
PH
2062 if alt_filename == filename:
2063 raise
2064 else:
2065 # An exception here should be caught in the caller
d55de57b 2066 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 2067 return (stream, alt_filename)
d77c3dfd
FV
2068
2069
2070def timeconvert(timestr):
59ae15a5
PH
2071 """Convert RFC 2822 defined time string into system timestamp"""
2072 timestamp = None
2073 timetuple = email.utils.parsedate_tz(timestr)
2074 if timetuple is not None:
2075 timestamp = email.utils.mktime_tz(timetuple)
2076 return timestamp
1c469a94 2077
5f6a1245 2078
796173d0 2079def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
2080 """Sanitizes a string so it could be used as part of a filename.
2081 If restricted is set, use a stricter subset of allowed characters.
158af524
S
2082 Set is_id if this is not an arbitrary string, but an ID that should be kept
2083 if possible.
59ae15a5
PH
2084 """
2085 def replace_insane(char):
c587cbb7
AT
2086 if restricted and char in ACCENT_CHARS:
2087 return ACCENT_CHARS[char]
59ae15a5
PH
2088 if char == '?' or ord(char) < 32 or ord(char) == 127:
2089 return ''
2090 elif char == '"':
2091 return '' if restricted else '\''
2092 elif char == ':':
2093 return '_-' if restricted else ' -'
2094 elif char in '\\/|*<>':
2095 return '_'
627dcfff 2096 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
2097 return '_'
2098 if restricted and ord(char) > 127:
2099 return '_'
2100 return char
2101
2aeb06d6
PH
2102 # Handle timestamps
2103 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 2104 result = ''.join(map(replace_insane, s))
796173d0
PH
2105 if not is_id:
2106 while '__' in result:
2107 result = result.replace('__', '_')
2108 result = result.strip('_')
2109 # Common case of "Foreign band name - English song title"
2110 if restricted and result.startswith('-_'):
2111 result = result[2:]
5a42414b
PH
2112 if result.startswith('-'):
2113 result = '_' + result[len('-'):]
a7440261 2114 result = result.lstrip('.')
796173d0
PH
2115 if not result:
2116 result = '_'
59ae15a5 2117 return result
d77c3dfd 2118
5f6a1245 2119
a2aaf4db
S
2120def sanitize_path(s):
2121 """Sanitizes and normalizes path on Windows"""
2122 if sys.platform != 'win32':
2123 return s
be531ef1
S
2124 drive_or_unc, _ = os.path.splitdrive(s)
2125 if sys.version_info < (2, 7) and not drive_or_unc:
2126 drive_or_unc, _ = os.path.splitunc(s)
2127 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2128 if drive_or_unc:
a2aaf4db
S
2129 norm_path.pop(0)
2130 sanitized_path = [
ec85ded8 2131 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
a2aaf4db 2132 for path_part in norm_path]
be531ef1
S
2133 if drive_or_unc:
2134 sanitized_path.insert(0, drive_or_unc + os.path.sep)
a2aaf4db
S
2135 return os.path.join(*sanitized_path)
2136
2137
17bcc626 2138def sanitize_url(url):
befa4708
S
2139 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2140 # the number of unwanted failures due to missing protocol
2141 if url.startswith('//'):
2142 return 'http:%s' % url
2143 # Fix some common typos seen so far
2144 COMMON_TYPOS = (
067aa17e 2145 # https://github.com/ytdl-org/youtube-dl/issues/15649
befa4708
S
2146 (r'^httpss://', r'https://'),
2147 # https://bx1.be/lives/direct-tv/
2148 (r'^rmtp([es]?)://', r'rtmp\1://'),
2149 )
2150 for mistake, fixup in COMMON_TYPOS:
2151 if re.match(mistake, url):
2152 return re.sub(mistake, fixup, url)
2153 return url
17bcc626
S
2154
2155
67dda517 2156def sanitized_Request(url, *args, **kwargs):
17bcc626 2157 return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
67dda517
S
2158
2159
51098426
S
2160def expand_path(s):
2161 """Expand shell variables and ~"""
2162 return os.path.expandvars(compat_expanduser(s))
2163
2164
d77c3dfd 2165def orderedSet(iterable):
59ae15a5
PH
2166 """ Remove all duplicates from the input iterable """
2167 res = []
2168 for el in iterable:
2169 if el not in res:
2170 res.append(el)
2171 return res
d77c3dfd 2172
912b38b4 2173
55b2f099 2174def _htmlentity_transform(entity_with_semicolon):
4e408e47 2175 """Transforms an HTML entity to a character."""
55b2f099
YCH
2176 entity = entity_with_semicolon[:-1]
2177
4e408e47
PH
2178 # Known non-numeric HTML entity
2179 if entity in compat_html_entities.name2codepoint:
2180 return compat_chr(compat_html_entities.name2codepoint[entity])
2181
55b2f099
YCH
2182 # TODO: HTML5 allows entities without a semicolon. For example,
2183 # '&Eacuteric' should be decoded as 'Éric'.
2184 if entity_with_semicolon in compat_html_entities_html5:
2185 return compat_html_entities_html5[entity_with_semicolon]
2186
91757b0f 2187 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
2188 if mobj is not None:
2189 numstr = mobj.group(1)
28e614de 2190 if numstr.startswith('x'):
4e408e47 2191 base = 16
28e614de 2192 numstr = '0%s' % numstr
4e408e47
PH
2193 else:
2194 base = 10
067aa17e 2195 # See https://github.com/ytdl-org/youtube-dl/issues/7518
7aefc49c
S
2196 try:
2197 return compat_chr(int(numstr, base))
2198 except ValueError:
2199 pass
4e408e47
PH
2200
2201 # Unknown entity in name, return its literal representation
7a3f0c00 2202 return '&%s;' % entity
4e408e47
PH
2203
2204
d77c3dfd 2205def unescapeHTML(s):
912b38b4
PH
2206 if s is None:
2207 return None
2208 assert type(s) == compat_str
d77c3dfd 2209
4e408e47 2210 return re.sub(
95f3f7c2 2211 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 2212
8bf48f23 2213
aa49acd1
S
2214def get_subprocess_encoding():
2215 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2216 # For subprocess calls, encode with locale encoding
2217 # Refer to http://stackoverflow.com/a/9951851/35070
2218 encoding = preferredencoding()
2219 else:
2220 encoding = sys.getfilesystemencoding()
2221 if encoding is None:
2222 encoding = 'utf-8'
2223 return encoding
2224
2225
8bf48f23 2226def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
2227 """
2228 @param s The name of the file
2229 """
d77c3dfd 2230
8bf48f23 2231 assert type(s) == compat_str
d77c3dfd 2232
59ae15a5
PH
2233 # Python 3 has a Unicode API
2234 if sys.version_info >= (3, 0):
2235 return s
0f00efed 2236
aa49acd1
S
2237 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2238 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2239 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2240 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2241 return s
2242
8ee239e9
YCH
2243 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2244 if sys.platform.startswith('java'):
2245 return s
2246
aa49acd1
S
2247 return s.encode(get_subprocess_encoding(), 'ignore')
2248
2249
2250def decodeFilename(b, for_subprocess=False):
2251
2252 if sys.version_info >= (3, 0):
2253 return b
2254
2255 if not isinstance(b, bytes):
2256 return b
2257
2258 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 2259
f07b74fc
PH
2260
2261def encodeArgument(s):
2262 if not isinstance(s, compat_str):
2263 # Legacy code that uses byte strings
2264 # Uncomment the following line after fixing all post processors
7af808a5 2265 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
2266 s = s.decode('ascii')
2267 return encodeFilename(s, True)
2268
2269
aa49acd1
S
2270def decodeArgument(b):
2271 return decodeFilename(b, True)
2272
2273
8271226a
PH
2274def decodeOption(optval):
2275 if optval is None:
2276 return optval
2277 if isinstance(optval, bytes):
2278 optval = optval.decode(preferredencoding())
2279
2280 assert isinstance(optval, compat_str)
2281 return optval
1c256f70 2282
5f6a1245 2283
4539dd30
PH
2284def formatSeconds(secs):
2285 if secs > 3600:
2286 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
2287 elif secs > 60:
2288 return '%d:%02d' % (secs // 60, secs % 60)
2289 else:
2290 return '%d' % secs
2291
a0ddb8a2 2292
be4a824d
PH
2293def make_HTTPS_handler(params, **kwargs):
2294 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 2295 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 2296 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 2297 if opts_no_check_certificate:
be5f2c19 2298 context.check_hostname = False
0db261ba 2299 context.verify_mode = ssl.CERT_NONE
a2366922 2300 try:
be4a824d 2301 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
2302 except TypeError:
2303 # Python 2.7.8
2304 # (create_default_context present but HTTPSHandler has no context=)
2305 pass
2306
2307 if sys.version_info < (3, 2):
d7932313 2308 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 2309 else: # Python < 3.4
d7932313 2310 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 2311 context.verify_mode = (ssl.CERT_NONE
dca08720 2312 if opts_no_check_certificate
ea6d901e 2313 else ssl.CERT_REQUIRED)
303b479e 2314 context.set_default_verify_paths()
be4a824d 2315 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 2316
732ea2f0 2317
08f2a92c
JMF
2318def bug_reports_message():
2319 if ytdl_is_updateable():
2320 update_cmd = 'type youtube-dl -U to update'
2321 else:
2322 update_cmd = 'see https://yt-dl.org/update on how to update'
2323 msg = '; please report this issue on https://yt-dl.org/bug .'
2324 msg += ' Make sure you are using the latest version; %s.' % update_cmd
2325 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
2326 return msg
2327
2328
bf5b9d85
PM
2329class YoutubeDLError(Exception):
2330 """Base exception for YoutubeDL errors."""
2331 pass
2332
2333
2334class ExtractorError(YoutubeDLError):
1c256f70 2335 """Error during info extraction."""
5f6a1245 2336
d11271dd 2337 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238
PH
2338 """ tb, if given, is the original traceback (so that it can be printed out).
2339 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
2340 """
2341
2342 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2343 expected = True
d11271dd
PH
2344 if video_id is not None:
2345 msg = video_id + ': ' + msg
410f3e73 2346 if cause:
28e614de 2347 msg += ' (caused by %r)' % cause
9a82b238 2348 if not expected:
08f2a92c 2349 msg += bug_reports_message()
1c256f70 2350 super(ExtractorError, self).__init__(msg)
d5979c5d 2351
1c256f70 2352 self.traceback = tb
8cc83b8d 2353 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 2354 self.cause = cause
d11271dd 2355 self.video_id = video_id
1c256f70 2356
01951dda
PH
2357 def format_traceback(self):
2358 if self.traceback is None:
2359 return None
28e614de 2360 return ''.join(traceback.format_tb(self.traceback))
01951dda 2361
1c256f70 2362
416c7fcb
PH
2363class UnsupportedError(ExtractorError):
2364 def __init__(self, url):
2365 super(UnsupportedError, self).__init__(
2366 'Unsupported URL: %s' % url, expected=True)
2367 self.url = url
2368
2369
55b3e45b
JMF
2370class RegexNotFoundError(ExtractorError):
2371 """Error when a regex didn't match"""
2372 pass
2373
2374
773f291d
S
2375class GeoRestrictedError(ExtractorError):
2376 """Geographic restriction Error exception.
2377
2378 This exception may be thrown when a video is not available from your
2379 geographic location due to geographic restrictions imposed by a website.
2380 """
2381 def __init__(self, msg, countries=None):
2382 super(GeoRestrictedError, self).__init__(msg, expected=True)
2383 self.msg = msg
2384 self.countries = countries
2385
2386
bf5b9d85 2387class DownloadError(YoutubeDLError):
59ae15a5 2388 """Download Error exception.
d77c3dfd 2389
59ae15a5
PH
2390 This exception may be thrown by FileDownloader objects if they are not
2391 configured to continue on errors. They will contain the appropriate
2392 error message.
2393 """
5f6a1245 2394
8cc83b8d
FV
2395 def __init__(self, msg, exc_info=None):
2396 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2397 super(DownloadError, self).__init__(msg)
2398 self.exc_info = exc_info
d77c3dfd
FV
2399
2400
bf5b9d85 2401class SameFileError(YoutubeDLError):
59ae15a5 2402 """Same File exception.
d77c3dfd 2403
59ae15a5
PH
2404 This exception will be thrown by FileDownloader objects if they detect
2405 multiple files would have to be downloaded to the same file on disk.
2406 """
2407 pass
d77c3dfd
FV
2408
2409
bf5b9d85 2410class PostProcessingError(YoutubeDLError):
59ae15a5 2411 """Post Processing exception.
d77c3dfd 2412
59ae15a5
PH
2413 This exception may be raised by PostProcessor's .run() method to
2414 indicate an error in the postprocessing task.
2415 """
5f6a1245 2416
7851b379 2417 def __init__(self, msg):
bf5b9d85 2418 super(PostProcessingError, self).__init__(msg)
7851b379 2419 self.msg = msg
d77c3dfd 2420
5f6a1245 2421
bf5b9d85 2422class MaxDownloadsReached(YoutubeDLError):
59ae15a5
PH
2423 """ --max-downloads limit has been reached. """
2424 pass
d77c3dfd
FV
2425
2426
bf5b9d85 2427class UnavailableVideoError(YoutubeDLError):
59ae15a5 2428 """Unavailable Format exception.
d77c3dfd 2429
59ae15a5
PH
2430 This exception will be thrown when a video is requested
2431 in a format that is not available for that video.
2432 """
2433 pass
d77c3dfd
FV
2434
2435
bf5b9d85 2436class ContentTooShortError(YoutubeDLError):
59ae15a5 2437 """Content Too Short exception.
d77c3dfd 2438
59ae15a5
PH
2439 This exception may be raised by FileDownloader objects when a file they
2440 download is too small for what the server announced first, indicating
2441 the connection was probably interrupted.
2442 """
d77c3dfd 2443
59ae15a5 2444 def __init__(self, downloaded, expected):
bf5b9d85
PM
2445 super(ContentTooShortError, self).__init__(
2446 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2447 )
2c7ed247 2448 # Both in bytes
59ae15a5
PH
2449 self.downloaded = downloaded
2450 self.expected = expected
d77c3dfd 2451
5f6a1245 2452
bf5b9d85 2453class XAttrMetadataError(YoutubeDLError):
efa97bdc
YCH
2454 def __init__(self, code=None, msg='Unknown error'):
2455 super(XAttrMetadataError, self).__init__(msg)
2456 self.code = code
bd264412 2457 self.msg = msg
efa97bdc
YCH
2458
2459 # Parsing code and msg
3089bc74
S
2460 if (self.code in (errno.ENOSPC, errno.EDQUOT)
2461 or 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
efa97bdc
YCH
2462 self.reason = 'NO_SPACE'
2463 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2464 self.reason = 'VALUE_TOO_LONG'
2465 else:
2466 self.reason = 'NOT_SUPPORTED'
2467
2468
bf5b9d85 2469class XAttrUnavailableError(YoutubeDLError):
efa97bdc
YCH
2470 pass
2471
2472
c5a59d93 2473def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
2474 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2475 # expected HTTP responses to meet HTTP/1.0 or later (see also
067aa17e 2476 # https://github.com/ytdl-org/youtube-dl/issues/6727)
e5e78797 2477 if sys.version_info < (3, 0):
65220c3b
S
2478 kwargs['strict'] = True
2479 hc = http_class(*args, **compat_kwargs(kwargs))
be4a824d 2480 source_address = ydl_handler._params.get('source_address')
8959018a 2481
be4a824d 2482 if source_address is not None:
8959018a
AU
2483 # This is to workaround _create_connection() from socket where it will try all
2484 # address data from getaddrinfo() including IPv6. This filters the result from
2485 # getaddrinfo() based on the source_address value.
2486 # This is based on the cpython socket.create_connection() function.
2487 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2488 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2489 host, port = address
2490 err = None
2491 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
9e21e6d9
S
2492 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2493 ip_addrs = [addr for addr in addrs if addr[0] == af]
2494 if addrs and not ip_addrs:
2495 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2496 raise socket.error(
2497 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2498 % (ip_version, source_address[0]))
8959018a
AU
2499 for res in ip_addrs:
2500 af, socktype, proto, canonname, sa = res
2501 sock = None
2502 try:
2503 sock = socket.socket(af, socktype, proto)
2504 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2505 sock.settimeout(timeout)
2506 sock.bind(source_address)
2507 sock.connect(sa)
2508 err = None # Explicitly break reference cycle
2509 return sock
2510 except socket.error as _:
2511 err = _
2512 if sock is not None:
2513 sock.close()
2514 if err is not None:
2515 raise err
2516 else:
9e21e6d9
S
2517 raise socket.error('getaddrinfo returns an empty list')
2518 if hasattr(hc, '_create_connection'):
2519 hc._create_connection = _create_connection
be4a824d
PH
2520 sa = (source_address, 0)
2521 if hasattr(hc, 'source_address'): # Python 2.7+
2522 hc.source_address = sa
2523 else: # Python 2.6
2524 def _hc_connect(self, *args, **kwargs):
9e21e6d9 2525 sock = _create_connection(
be4a824d
PH
2526 (self.host, self.port), self.timeout, sa)
2527 if is_https:
d7932313
PH
2528 self.sock = ssl.wrap_socket(
2529 sock, self.key_file, self.cert_file,
2530 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
2531 else:
2532 self.sock = sock
2533 hc.connect = functools.partial(_hc_connect, hc)
2534
2535 return hc
2536
2537
87f0e62d 2538def handle_youtubedl_headers(headers):
992fc9d6
YCH
2539 filtered_headers = headers
2540
2541 if 'Youtubedl-no-compression' in filtered_headers:
2542 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 2543 del filtered_headers['Youtubedl-no-compression']
87f0e62d 2544
992fc9d6 2545 return filtered_headers
87f0e62d
YCH
2546
2547
acebc9cd 2548class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
2549 """Handler for HTTP requests and responses.
2550
2551 This class, when installed with an OpenerDirector, automatically adds
2552 the standard headers to every HTTP request and handles gzipped and
2553 deflated responses from web servers. If compression is to be avoided in
2554 a particular request, the original request in the program code only has
0424ec30 2555 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
2556 removed before making the real request.
2557
2558 Part of this code was copied from:
2559
2560 http://techknack.net/python-urllib2-handlers/
2561
2562 Andrew Rowls, the author of that code, agreed to release it to the
2563 public domain.
2564 """
2565
be4a824d
PH
2566 def __init__(self, params, *args, **kwargs):
2567 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2568 self._params = params
2569
2570 def http_open(self, req):
71aff188
YCH
2571 conn_class = compat_http_client.HTTPConnection
2572
2573 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2574 if socks_proxy:
2575 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2576 del req.headers['Ytdl-socks-proxy']
2577
be4a824d 2578 return self.do_open(functools.partial(
71aff188 2579 _create_http_connection, self, conn_class, False),
be4a824d
PH
2580 req)
2581
59ae15a5
PH
2582 @staticmethod
2583 def deflate(data):
2584 try:
2585 return zlib.decompress(data, -zlib.MAX_WBITS)
2586 except zlib.error:
2587 return zlib.decompress(data)
2588
acebc9cd 2589 def http_request(self, req):
51f267d9
S
2590 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2591 # always respected by websites, some tend to give out URLs with non percent-encoded
2592 # non-ASCII characters (see telemb.py, ard.py [#3412])
2593 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2594 # To work around aforementioned issue we will replace request's original URL with
2595 # percent-encoded one
2596 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2597 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2598 url = req.get_full_url()
2599 url_escaped = escape_url(url)
2600
2601 # Substitute URL if any change after escaping
2602 if url != url_escaped:
15d260eb 2603 req = update_Request(req, url=url_escaped)
51f267d9 2604
33ac271b 2605 for h, v in std_headers.items():
3d5f7a39
JK
2606 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2607 # The dict keys are capitalized because of this bug by urllib
2608 if h.capitalize() not in req.headers:
33ac271b 2609 req.add_header(h, v)
87f0e62d
YCH
2610
2611 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
2612
2613 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2614 # Python 2.6 is brain-dead when it comes to fragments
2615 req._Request__original = req._Request__original.partition('#')[0]
2616 req._Request__r_type = req._Request__r_type.partition('#')[0]
2617
59ae15a5
PH
2618 return req
2619
acebc9cd 2620 def http_response(self, req, resp):
59ae15a5
PH
2621 old_resp = resp
2622 # gzip
2623 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
2624 content = resp.read()
2625 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2626 try:
2627 uncompressed = io.BytesIO(gz.read())
2628 except IOError as original_ioerror:
2629 # There may be junk add the end of the file
2630 # See http://stackoverflow.com/q/4928560/35070 for details
2631 for i in range(1, 1024):
2632 try:
2633 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2634 uncompressed = io.BytesIO(gz.read())
2635 except IOError:
2636 continue
2637 break
2638 else:
2639 raise original_ioerror
b407d853 2640 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2641 resp.msg = old_resp.msg
c047270c 2642 del resp.headers['Content-encoding']
59ae15a5
PH
2643 # deflate
2644 if resp.headers.get('Content-encoding', '') == 'deflate':
2645 gz = io.BytesIO(self.deflate(resp.read()))
b407d853 2646 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2647 resp.msg = old_resp.msg
c047270c 2648 del resp.headers['Content-encoding']
ad729172 2649 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
067aa17e 2650 # https://github.com/ytdl-org/youtube-dl/issues/6457).
5a4d9ddb
S
2651 if 300 <= resp.code < 400:
2652 location = resp.headers.get('Location')
2653 if location:
2654 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2655 if sys.version_info >= (3, 0):
2656 location = location.encode('iso-8859-1').decode('utf-8')
0ea59007
YCH
2657 else:
2658 location = location.decode('utf-8')
5a4d9ddb
S
2659 location_escaped = escape_url(location)
2660 if location != location_escaped:
2661 del resp.headers['Location']
9a4aec8b
YCH
2662 if sys.version_info < (3, 0):
2663 location_escaped = location_escaped.encode('utf-8')
5a4d9ddb 2664 resp.headers['Location'] = location_escaped
59ae15a5 2665 return resp
0f8d03f8 2666
acebc9cd
PH
2667 https_request = http_request
2668 https_response = http_response
bf50b038 2669
5de90176 2670
71aff188
YCH
2671def make_socks_conn_class(base_class, socks_proxy):
2672 assert issubclass(base_class, (
2673 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2674
2675 url_components = compat_urlparse.urlparse(socks_proxy)
2676 if url_components.scheme.lower() == 'socks5':
2677 socks_type = ProxyType.SOCKS5
2678 elif url_components.scheme.lower() in ('socks', 'socks4'):
2679 socks_type = ProxyType.SOCKS4
51fb4995
YCH
2680 elif url_components.scheme.lower() == 'socks4a':
2681 socks_type = ProxyType.SOCKS4A
71aff188 2682
cdd94c2e
YCH
2683 def unquote_if_non_empty(s):
2684 if not s:
2685 return s
2686 return compat_urllib_parse_unquote_plus(s)
2687
71aff188
YCH
2688 proxy_args = (
2689 socks_type,
2690 url_components.hostname, url_components.port or 1080,
2691 True, # Remote DNS
cdd94c2e
YCH
2692 unquote_if_non_empty(url_components.username),
2693 unquote_if_non_empty(url_components.password),
71aff188
YCH
2694 )
2695
2696 class SocksConnection(base_class):
2697 def connect(self):
2698 self.sock = sockssocket()
2699 self.sock.setproxy(*proxy_args)
2700 if type(self.timeout) in (int, float):
2701 self.sock.settimeout(self.timeout)
2702 self.sock.connect((self.host, self.port))
2703
2704 if isinstance(self, compat_http_client.HTTPSConnection):
2705 if hasattr(self, '_context'): # Python > 2.6
2706 self.sock = self._context.wrap_socket(
2707 self.sock, server_hostname=self.host)
2708 else:
2709 self.sock = ssl.wrap_socket(self.sock)
2710
2711 return SocksConnection
2712
2713
be4a824d
PH
2714class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2715 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2716 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2717 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2718 self._params = params
2719
2720 def https_open(self, req):
4f264c02 2721 kwargs = {}
71aff188
YCH
2722 conn_class = self._https_conn_class
2723
4f264c02
JMF
2724 if hasattr(self, '_context'): # python > 2.6
2725 kwargs['context'] = self._context
2726 if hasattr(self, '_check_hostname'): # python 3.x
2727 kwargs['check_hostname'] = self._check_hostname
71aff188
YCH
2728
2729 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2730 if socks_proxy:
2731 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2732 del req.headers['Ytdl-socks-proxy']
2733
be4a824d 2734 return self.do_open(functools.partial(
71aff188 2735 _create_http_connection, self, conn_class, True),
4f264c02 2736 req, **kwargs)
be4a824d
PH
2737
2738
1bab3437 2739class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
b827ee92
AG
2740 """
2741 See [1] for cookie file format.
2742
2743 1. https://curl.haxx.se/docs/http-cookies.html
2744 """
e7e62441 2745 _HTTPONLY_PREFIX = '#HttpOnly_'
b827ee92
AG
2746 _ENTRY_LEN = 7
2747 _HEADER = '''# Netscape HTTP Cookie File
2748# This file is generated by youtube-dl. Do not edit.
2749
2750'''
2751 _CookieFileEntry = collections.namedtuple(
2752 'CookieFileEntry',
2753 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
e7e62441 2754
1bab3437 2755 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
b827ee92
AG
2756 """
2757 Save cookies to a file.
2758
2759 Most of the code is taken from CPython 3.8 and slightly adapted
2760 to support cookie files with UTF-8 in both python 2 and 3.
2761 """
2762 if filename is None:
2763 if self.filename is not None:
2764 filename = self.filename
2765 else:
2766 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2767
1bab3437
S
2768 # Store session cookies with `expires` set to 0 instead of an empty
2769 # string
2770 for cookie in self:
2771 if cookie.expires is None:
2772 cookie.expires = 0
b827ee92
AG
2773
2774 with io.open(filename, 'w', encoding='utf-8') as f:
2775 f.write(self._HEADER)
2776 now = time.time()
2777 for cookie in self:
2778 if not ignore_discard and cookie.discard:
2779 continue
2780 if not ignore_expires and cookie.is_expired(now):
2781 continue
2782 if cookie.secure:
2783 secure = 'TRUE'
2784 else:
2785 secure = 'FALSE'
2786 if cookie.domain.startswith('.'):
2787 initial_dot = 'TRUE'
2788 else:
2789 initial_dot = 'FALSE'
2790 if cookie.expires is not None:
2791 expires = compat_str(cookie.expires)
2792 else:
2793 expires = ''
2794 if cookie.value is None:
2795 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2796 # with no name, whereas http.cookiejar regards it as a
2797 # cookie with no value.
2798 name = ''
2799 value = cookie.name
2800 else:
2801 name = cookie.name
2802 value = cookie.value
2803 f.write(
2804 '\t'.join([cookie.domain, initial_dot, cookie.path,
2805 secure, expires, name, value]) + '\n')
1bab3437
S
2806
2807 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
e7e62441 2808 """Load cookies from a file."""
2809 if filename is None:
2810 if self.filename is not None:
2811 filename = self.filename
2812 else:
2813 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2814
b827ee92
AG
2815 def prepare_line(line):
2816 if line.startswith(self._HTTPONLY_PREFIX):
2817 line = line[len(self._HTTPONLY_PREFIX):]
2818 # comments and empty lines are fine
2819 if line.startswith('#') or not line.strip():
2820 return line
2821 cookie_list = line.split('\t')
2822 if len(cookie_list) != self._ENTRY_LEN:
2823 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2824 cookie = self._CookieFileEntry(*cookie_list)
2825 if cookie.expires_at and not cookie.expires_at.isdigit():
2826 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2827 return line
2828
e7e62441 2829 cf = io.StringIO()
b827ee92 2830 with io.open(filename, encoding='utf-8') as f:
e7e62441 2831 for line in f:
b827ee92
AG
2832 try:
2833 cf.write(prepare_line(line))
2834 except compat_cookiejar.LoadError as e:
2835 write_string(
2836 'WARNING: skipping cookie file entry due to %s: %r\n'
2837 % (e, line), sys.stderr)
2838 continue
e7e62441 2839 cf.seek(0)
2840 self._really_load(cf, filename, ignore_discard, ignore_expires)
1bab3437
S
2841 # Session cookies are denoted by either `expires` field set to
2842 # an empty string or 0. MozillaCookieJar only recognizes the former
2843 # (see [1]). So we need force the latter to be recognized as session
2844 # cookies on our own.
2845 # Session cookies may be important for cookies-based authentication,
2846 # e.g. usually, when user does not check 'Remember me' check box while
2847 # logging in on a site, some important cookies are stored as session
2848 # cookies so that not recognizing them will result in failed login.
2849 # 1. https://bugs.python.org/issue17164
2850 for cookie in self:
2851 # Treat `expires=0` cookies as session cookies
2852 if cookie.expires == 0:
2853 cookie.expires = None
2854 cookie.discard = True
2855
2856
a6420bf5
S
2857class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2858 def __init__(self, cookiejar=None):
2859 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2860
2861 def http_response(self, request, response):
2862 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2863 # characters in Set-Cookie HTTP header of last response (see
067aa17e 2864 # https://github.com/ytdl-org/youtube-dl/issues/6769).
a6420bf5
S
2865 # In order to at least prevent crashing we will percent encode Set-Cookie
2866 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
2867 # if sys.version_info < (3, 0) and response.headers:
2868 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2869 # set_cookie = response.headers.get(set_cookie_header)
2870 # if set_cookie:
2871 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2872 # if set_cookie != set_cookie_escaped:
2873 # del response.headers[set_cookie_header]
2874 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
2875 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2876
2877 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2878 https_response = http_response
2879
2880
b827ee92
AG
2881class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2882 if sys.version_info[0] < 3:
2883 def redirect_request(self, req, fp, code, msg, headers, newurl):
2884 # On python 2 urlh.geturl() may sometimes return redirect URL
2885 # as byte string instead of unicode. This workaround allows
2886 # to force it always return unicode.
2887 return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2888
2889
46f59e89
S
2890def extract_timezone(date_str):
2891 m = re.search(
2892 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2893 date_str)
2894 if not m:
2895 timezone = datetime.timedelta()
2896 else:
2897 date_str = date_str[:-len(m.group('tz'))]
2898 if not m.group('sign'):
2899 timezone = datetime.timedelta()
2900 else:
2901 sign = 1 if m.group('sign') == '+' else -1
2902 timezone = datetime.timedelta(
2903 hours=sign * int(m.group('hours')),
2904 minutes=sign * int(m.group('minutes')))
2905 return timezone, date_str
2906
2907
08b38d54 2908def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
2909 """ Return a UNIX timestamp from the given date """
2910
2911 if date_str is None:
2912 return None
2913
52c3a6e4
S
2914 date_str = re.sub(r'\.[0-9]+', '', date_str)
2915
08b38d54 2916 if timezone is None:
46f59e89
S
2917 timezone, date_str = extract_timezone(date_str)
2918
52c3a6e4
S
2919 try:
2920 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2921 dt = datetime.datetime.strptime(date_str, date_format) - timezone
2922 return calendar.timegm(dt.timetuple())
2923 except ValueError:
2924 pass
912b38b4
PH
2925
2926
46f59e89
S
2927def date_formats(day_first=True):
2928 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2929
2930
42bdd9d0 2931def unified_strdate(date_str, day_first=True):
bf50b038 2932 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
2933
2934 if date_str is None:
2935 return None
bf50b038 2936 upload_date = None
5f6a1245 2937 # Replace commas
026fcc04 2938 date_str = date_str.replace(',', ' ')
42bdd9d0 2939 # Remove AM/PM + timezone
9bb8e0a3 2940 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89 2941 _, date_str = extract_timezone(date_str)
42bdd9d0 2942
46f59e89 2943 for expression in date_formats(day_first):
bf50b038
JMF
2944 try:
2945 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 2946 except ValueError:
bf50b038 2947 pass
42393ce2
PH
2948 if upload_date is None:
2949 timetuple = email.utils.parsedate_tz(date_str)
2950 if timetuple:
c6b9cf05
S
2951 try:
2952 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2953 except ValueError:
2954 pass
6a750402
JMF
2955 if upload_date is not None:
2956 return compat_str(upload_date)
bf50b038 2957
5f6a1245 2958
46f59e89
S
2959def unified_timestamp(date_str, day_first=True):
2960 if date_str is None:
2961 return None
2962
2ae2ffda 2963 date_str = re.sub(r'[,|]', '', date_str)
46f59e89 2964
7dc2a74e 2965 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89
S
2966 timezone, date_str = extract_timezone(date_str)
2967
2968 # Remove AM/PM + timezone
2969 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2970
deef3195
S
2971 # Remove unrecognized timezones from ISO 8601 alike timestamps
2972 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
2973 if m:
2974 date_str = date_str[:-len(m.group('tz'))]
2975
f226880c
PH
2976 # Python only supports microseconds, so remove nanoseconds
2977 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
2978 if m:
2979 date_str = m.group(1)
2980
46f59e89
S
2981 for expression in date_formats(day_first):
2982 try:
7dc2a74e 2983 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
46f59e89
S
2984 return calendar.timegm(dt.timetuple())
2985 except ValueError:
2986 pass
2987 timetuple = email.utils.parsedate_tz(date_str)
2988 if timetuple:
7dc2a74e 2989 return calendar.timegm(timetuple) + pm_delta * 3600
46f59e89
S
2990
2991
28e614de 2992def determine_ext(url, default_ext='unknown_video'):
85750f89 2993 if url is None or '.' not in url:
f4776371 2994 return default_ext
9cb9a5df 2995 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
2996 if re.match(r'^[A-Za-z0-9]+$', guess):
2997 return guess
a7aaa398
S
2998 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
2999 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 3000 return guess.rstrip('/')
73e79f2a 3001 else:
cbdbb766 3002 return default_ext
73e79f2a 3003
5f6a1245 3004
824fa511
S
3005def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3006 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
d4051a8e 3007
5f6a1245 3008
bd558525 3009def date_from_str(date_str):
37254abc
JMF
3010 """
3011 Return a datetime object from a string in the format YYYYMMDD or
3012 (now|today)[+-][0-9](day|week|month|year)(s)?"""
3013 today = datetime.date.today()
f8795e10 3014 if date_str in ('now', 'today'):
37254abc 3015 return today
f8795e10
PH
3016 if date_str == 'yesterday':
3017 return today - datetime.timedelta(days=1)
ec85ded8 3018 match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
37254abc
JMF
3019 if match is not None:
3020 sign = match.group('sign')
3021 time = int(match.group('time'))
3022 if sign == '-':
3023 time = -time
3024 unit = match.group('unit')
dfb1b146 3025 # A bad approximation?
37254abc
JMF
3026 if unit == 'month':
3027 unit = 'day'
3028 time *= 30
3029 elif unit == 'year':
3030 unit = 'day'
3031 time *= 365
3032 unit += 's'
3033 delta = datetime.timedelta(**{unit: time})
3034 return today + delta
611c1dd9 3035 return datetime.datetime.strptime(date_str, '%Y%m%d').date()
5f6a1245
JW
3036
3037
e63fc1be 3038def hyphenate_date(date_str):
3039 """
3040 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3041 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3042 if match is not None:
3043 return '-'.join(match.groups())
3044 else:
3045 return date_str
3046
5f6a1245 3047
bd558525
JMF
3048class DateRange(object):
3049 """Represents a time interval between two dates"""
5f6a1245 3050
bd558525
JMF
3051 def __init__(self, start=None, end=None):
3052 """start and end must be strings in the format accepted by date"""
3053 if start is not None:
3054 self.start = date_from_str(start)
3055 else:
3056 self.start = datetime.datetime.min.date()
3057 if end is not None:
3058 self.end = date_from_str(end)
3059 else:
3060 self.end = datetime.datetime.max.date()
37254abc 3061 if self.start > self.end:
bd558525 3062 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 3063
bd558525
JMF
3064 @classmethod
3065 def day(cls, day):
3066 """Returns a range that only contains the given day"""
5f6a1245
JW
3067 return cls(day, day)
3068
bd558525
JMF
3069 def __contains__(self, date):
3070 """Check if the date is in the range"""
37254abc
JMF
3071 if not isinstance(date, datetime.date):
3072 date = date_from_str(date)
3073 return self.start <= date <= self.end
5f6a1245 3074
bd558525 3075 def __str__(self):
5f6a1245 3076 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
3077
3078
3079def platform_name():
3080 """ Returns the platform name as a compat_str """
3081 res = platform.platform()
3082 if isinstance(res, bytes):
3083 res = res.decode(preferredencoding())
3084
3085 assert isinstance(res, compat_str)
3086 return res
c257baff
PH
3087
3088
b58ddb32
PH
3089def _windows_write_string(s, out):
3090 """ Returns True if the string was written using special methods,
3091 False if it has yet to be written out."""
3092 # Adapted from http://stackoverflow.com/a/3259271/35070
3093
3094 import ctypes
3095 import ctypes.wintypes
3096
3097 WIN_OUTPUT_IDS = {
3098 1: -11,
3099 2: -12,
3100 }
3101
a383a98a
PH
3102 try:
3103 fileno = out.fileno()
3104 except AttributeError:
3105 # If the output stream doesn't have a fileno, it's virtual
3106 return False
aa42e873
PH
3107 except io.UnsupportedOperation:
3108 # Some strange Windows pseudo files?
3109 return False
b58ddb32
PH
3110 if fileno not in WIN_OUTPUT_IDS:
3111 return False
3112
d7cd9a9e 3113 GetStdHandle = compat_ctypes_WINFUNCTYPE(
b58ddb32 3114 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
d7cd9a9e 3115 ('GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
3116 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3117
d7cd9a9e 3118 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3119 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3120 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
d7cd9a9e 3121 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
3122 written = ctypes.wintypes.DWORD(0)
3123
d7cd9a9e 3124 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
3125 FILE_TYPE_CHAR = 0x0002
3126 FILE_TYPE_REMOTE = 0x8000
d7cd9a9e 3127 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3128 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3129 ctypes.POINTER(ctypes.wintypes.DWORD))(
d7cd9a9e 3130 ('GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
3131 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3132
3133 def not_a_console(handle):
3134 if handle == INVALID_HANDLE_VALUE or handle is None:
3135 return True
3089bc74
S
3136 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3137 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
3138
3139 if not_a_console(h):
3140 return False
3141
d1b9c912
PH
3142 def next_nonbmp_pos(s):
3143 try:
3144 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3145 except StopIteration:
3146 return len(s)
3147
3148 while s:
3149 count = min(next_nonbmp_pos(s), 1024)
3150
b58ddb32 3151 ret = WriteConsoleW(
d1b9c912 3152 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
3153 if ret == 0:
3154 raise OSError('Failed to write string')
d1b9c912
PH
3155 if not count: # We just wrote a non-BMP character
3156 assert written.value == 2
3157 s = s[1:]
3158 else:
3159 assert written.value > 0
3160 s = s[written.value:]
b58ddb32
PH
3161 return True
3162
3163
734f90bb 3164def write_string(s, out=None, encoding=None):
7459e3a2
PH
3165 if out is None:
3166 out = sys.stderr
8bf48f23 3167 assert type(s) == compat_str
7459e3a2 3168
b58ddb32
PH
3169 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3170 if _windows_write_string(s, out):
3171 return
3172
3089bc74
S
3173 if ('b' in getattr(out, 'mode', '')
3174 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
3175 byt = s.encode(encoding or preferredencoding(), 'ignore')
3176 out.write(byt)
3177 elif hasattr(out, 'buffer'):
3178 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3179 byt = s.encode(enc, 'ignore')
3180 out.buffer.write(byt)
3181 else:
8bf48f23 3182 out.write(s)
7459e3a2
PH
3183 out.flush()
3184
3185
48ea9cea
PH
3186def bytes_to_intlist(bs):
3187 if not bs:
3188 return []
3189 if isinstance(bs[0], int): # Python 3
3190 return list(bs)
3191 else:
3192 return [ord(c) for c in bs]
3193
c257baff 3194
cba892fa 3195def intlist_to_bytes(xs):
3196 if not xs:
3197 return b''
edaa23f8 3198 return compat_struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
3199
3200
c1c9a79c
PH
3201# Cross-platform file locking
3202if sys.platform == 'win32':
3203 import ctypes.wintypes
3204 import msvcrt
3205
3206 class OVERLAPPED(ctypes.Structure):
3207 _fields_ = [
3208 ('Internal', ctypes.wintypes.LPVOID),
3209 ('InternalHigh', ctypes.wintypes.LPVOID),
3210 ('Offset', ctypes.wintypes.DWORD),
3211 ('OffsetHigh', ctypes.wintypes.DWORD),
3212 ('hEvent', ctypes.wintypes.HANDLE),
3213 ]
3214
3215 kernel32 = ctypes.windll.kernel32
3216 LockFileEx = kernel32.LockFileEx
3217 LockFileEx.argtypes = [
3218 ctypes.wintypes.HANDLE, # hFile
3219 ctypes.wintypes.DWORD, # dwFlags
3220 ctypes.wintypes.DWORD, # dwReserved
3221 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3222 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3223 ctypes.POINTER(OVERLAPPED) # Overlapped
3224 ]
3225 LockFileEx.restype = ctypes.wintypes.BOOL
3226 UnlockFileEx = kernel32.UnlockFileEx
3227 UnlockFileEx.argtypes = [
3228 ctypes.wintypes.HANDLE, # hFile
3229 ctypes.wintypes.DWORD, # dwReserved
3230 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3231 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3232 ctypes.POINTER(OVERLAPPED) # Overlapped
3233 ]
3234 UnlockFileEx.restype = ctypes.wintypes.BOOL
3235 whole_low = 0xffffffff
3236 whole_high = 0x7fffffff
3237
3238 def _lock_file(f, exclusive):
3239 overlapped = OVERLAPPED()
3240 overlapped.Offset = 0
3241 overlapped.OffsetHigh = 0
3242 overlapped.hEvent = 0
3243 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3244 handle = msvcrt.get_osfhandle(f.fileno())
3245 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3246 whole_low, whole_high, f._lock_file_overlapped_p):
3247 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3248
3249 def _unlock_file(f):
3250 assert f._lock_file_overlapped_p
3251 handle = msvcrt.get_osfhandle(f.fileno())
3252 if not UnlockFileEx(handle, 0,
3253 whole_low, whole_high, f._lock_file_overlapped_p):
3254 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3255
3256else:
399a76e6
YCH
3257 # Some platforms, such as Jython, is missing fcntl
3258 try:
3259 import fcntl
c1c9a79c 3260
399a76e6
YCH
3261 def _lock_file(f, exclusive):
3262 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 3263
399a76e6
YCH
3264 def _unlock_file(f):
3265 fcntl.flock(f, fcntl.LOCK_UN)
3266 except ImportError:
3267 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3268
3269 def _lock_file(f, exclusive):
3270 raise IOError(UNSUPPORTED_MSG)
3271
3272 def _unlock_file(f):
3273 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
3274
3275
3276class locked_file(object):
3277 def __init__(self, filename, mode, encoding=None):
3278 assert mode in ['r', 'a', 'w']
3279 self.f = io.open(filename, mode, encoding=encoding)
3280 self.mode = mode
3281
3282 def __enter__(self):
3283 exclusive = self.mode != 'r'
3284 try:
3285 _lock_file(self.f, exclusive)
3286 except IOError:
3287 self.f.close()
3288 raise
3289 return self
3290
3291 def __exit__(self, etype, value, traceback):
3292 try:
3293 _unlock_file(self.f)
3294 finally:
3295 self.f.close()
3296
3297 def __iter__(self):
3298 return iter(self.f)
3299
3300 def write(self, *args):
3301 return self.f.write(*args)
3302
3303 def read(self, *args):
3304 return self.f.read(*args)
4eb7f1d1
JMF
3305
3306
4644ac55
S
3307def get_filesystem_encoding():
3308 encoding = sys.getfilesystemencoding()
3309 return encoding if encoding is not None else 'utf-8'
3310
3311
4eb7f1d1 3312def shell_quote(args):
a6a173c2 3313 quoted_args = []
4644ac55 3314 encoding = get_filesystem_encoding()
a6a173c2
JMF
3315 for a in args:
3316 if isinstance(a, bytes):
3317 # We may get a filename encoded with 'encodeFilename'
3318 a = a.decode(encoding)
aefce8e6 3319 quoted_args.append(compat_shlex_quote(a))
28e614de 3320 return ' '.join(quoted_args)
9d4660ca
PH
3321
3322
3323def smuggle_url(url, data):
3324 """ Pass additional data in a URL for internal use. """
3325
81953d1a
RA
3326 url, idata = unsmuggle_url(url, {})
3327 data.update(idata)
15707c7e 3328 sdata = compat_urllib_parse_urlencode(
28e614de
PH
3329 {'__youtubedl_smuggle': json.dumps(data)})
3330 return url + '#' + sdata
9d4660ca
PH
3331
3332
79f82953 3333def unsmuggle_url(smug_url, default=None):
83e865a3 3334 if '#__youtubedl_smuggle' not in smug_url:
79f82953 3335 return smug_url, default
28e614de
PH
3336 url, _, sdata = smug_url.rpartition('#')
3337 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
3338 data = json.loads(jsond)
3339 return url, data
02dbf93f
PH
3340
3341
02dbf93f
PH
3342def format_bytes(bytes):
3343 if bytes is None:
28e614de 3344 return 'N/A'
02dbf93f
PH
3345 if type(bytes) is str:
3346 bytes = float(bytes)
3347 if bytes == 0.0:
3348 exponent = 0
3349 else:
3350 exponent = int(math.log(bytes, 1024.0))
28e614de 3351 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 3352 converted = float(bytes) / float(1024 ** exponent)
28e614de 3353 return '%.2f%s' % (converted, suffix)
f53c966a 3354
1c088fa8 3355
fb47597b
S
3356def lookup_unit_table(unit_table, s):
3357 units_re = '|'.join(re.escape(u) for u in unit_table)
3358 m = re.match(
782b1b5b 3359 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
fb47597b
S
3360 if not m:
3361 return None
3362 num_str = m.group('num').replace(',', '.')
3363 mult = unit_table[m.group('unit')]
3364 return int(float(num_str) * mult)
3365
3366
be64b5b0
PH
3367def parse_filesize(s):
3368 if s is None:
3369 return None
3370
dfb1b146 3371 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
3372 # but we support those too
3373 _UNIT_TABLE = {
3374 'B': 1,
3375 'b': 1,
70852b47 3376 'bytes': 1,
be64b5b0
PH
3377 'KiB': 1024,
3378 'KB': 1000,
3379 'kB': 1024,
3380 'Kb': 1000,
13585d76 3381 'kb': 1000,
70852b47
YCH
3382 'kilobytes': 1000,
3383 'kibibytes': 1024,
be64b5b0
PH
3384 'MiB': 1024 ** 2,
3385 'MB': 1000 ** 2,
3386 'mB': 1024 ** 2,
3387 'Mb': 1000 ** 2,
13585d76 3388 'mb': 1000 ** 2,
70852b47
YCH
3389 'megabytes': 1000 ** 2,
3390 'mebibytes': 1024 ** 2,
be64b5b0
PH
3391 'GiB': 1024 ** 3,
3392 'GB': 1000 ** 3,
3393 'gB': 1024 ** 3,
3394 'Gb': 1000 ** 3,
13585d76 3395 'gb': 1000 ** 3,
70852b47
YCH
3396 'gigabytes': 1000 ** 3,
3397 'gibibytes': 1024 ** 3,
be64b5b0
PH
3398 'TiB': 1024 ** 4,
3399 'TB': 1000 ** 4,
3400 'tB': 1024 ** 4,
3401 'Tb': 1000 ** 4,
13585d76 3402 'tb': 1000 ** 4,
70852b47
YCH
3403 'terabytes': 1000 ** 4,
3404 'tebibytes': 1024 ** 4,
be64b5b0
PH
3405 'PiB': 1024 ** 5,
3406 'PB': 1000 ** 5,
3407 'pB': 1024 ** 5,
3408 'Pb': 1000 ** 5,
13585d76 3409 'pb': 1000 ** 5,
70852b47
YCH
3410 'petabytes': 1000 ** 5,
3411 'pebibytes': 1024 ** 5,
be64b5b0
PH
3412 'EiB': 1024 ** 6,
3413 'EB': 1000 ** 6,
3414 'eB': 1024 ** 6,
3415 'Eb': 1000 ** 6,
13585d76 3416 'eb': 1000 ** 6,
70852b47
YCH
3417 'exabytes': 1000 ** 6,
3418 'exbibytes': 1024 ** 6,
be64b5b0
PH
3419 'ZiB': 1024 ** 7,
3420 'ZB': 1000 ** 7,
3421 'zB': 1024 ** 7,
3422 'Zb': 1000 ** 7,
13585d76 3423 'zb': 1000 ** 7,
70852b47
YCH
3424 'zettabytes': 1000 ** 7,
3425 'zebibytes': 1024 ** 7,
be64b5b0
PH
3426 'YiB': 1024 ** 8,
3427 'YB': 1000 ** 8,
3428 'yB': 1024 ** 8,
3429 'Yb': 1000 ** 8,
13585d76 3430 'yb': 1000 ** 8,
70852b47
YCH
3431 'yottabytes': 1000 ** 8,
3432 'yobibytes': 1024 ** 8,
be64b5b0
PH
3433 }
3434
fb47597b
S
3435 return lookup_unit_table(_UNIT_TABLE, s)
3436
3437
3438def parse_count(s):
3439 if s is None:
be64b5b0
PH
3440 return None
3441
fb47597b
S
3442 s = s.strip()
3443
3444 if re.match(r'^[\d,.]+$', s):
3445 return str_to_int(s)
3446
3447 _UNIT_TABLE = {
3448 'k': 1000,
3449 'K': 1000,
3450 'm': 1000 ** 2,
3451 'M': 1000 ** 2,
3452 'kk': 1000 ** 2,
3453 'KK': 1000 ** 2,
3454 }
be64b5b0 3455
fb47597b 3456 return lookup_unit_table(_UNIT_TABLE, s)
be64b5b0 3457
2f7ae819 3458
b871d7e9
S
3459def parse_resolution(s):
3460 if s is None:
3461 return {}
3462
3463 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3464 if mobj:
3465 return {
3466 'width': int(mobj.group('w')),
3467 'height': int(mobj.group('h')),
3468 }
3469
3470 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3471 if mobj:
3472 return {'height': int(mobj.group(1))}
3473
3474 mobj = re.search(r'\b([48])[kK]\b', s)
3475 if mobj:
3476 return {'height': int(mobj.group(1)) * 540}
3477
3478 return {}
3479
3480
0dc41787
S
3481def parse_bitrate(s):
3482 if not isinstance(s, compat_str):
3483 return
3484 mobj = re.search(r'\b(\d+)\s*kbps', s)
3485 if mobj:
3486 return int(mobj.group(1))
3487
3488
a942d6cb 3489def month_by_name(name, lang='en'):
caefb1de
PH
3490 """ Return the number of a month by (locale-independently) English name """
3491
f6717dec 3492 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb 3493
caefb1de 3494 try:
f6717dec 3495 return month_names.index(name) + 1
7105440c
YCH
3496 except ValueError:
3497 return None
3498
3499
3500def month_by_abbreviation(abbrev):
3501 """ Return the number of a month by (locale-independently) English
3502 abbreviations """
3503
3504 try:
3505 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
3506 except ValueError:
3507 return None
18258362
JMF
3508
3509
5aafe895 3510def fix_xml_ampersands(xml_str):
18258362 3511 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
3512 return re.sub(
3513 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 3514 '&amp;',
5aafe895 3515 xml_str)
e3946f98
PH
3516
3517
3518def setproctitle(title):
8bf48f23 3519 assert isinstance(title, compat_str)
c1c05c67
YCH
3520
3521 # ctypes in Jython is not complete
3522 # http://bugs.jython.org/issue2148
3523 if sys.platform.startswith('java'):
3524 return
3525
e3946f98 3526 try:
611c1dd9 3527 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
3528 except OSError:
3529 return
2f49bcd6
RC
3530 except TypeError:
3531 # LoadLibrary in Windows Python 2.7.13 only expects
3532 # a bytestring, but since unicode_literals turns
3533 # every string into a unicode string, it fails.
3534 return
6eefe533
PH
3535 title_bytes = title.encode('utf-8')
3536 buf = ctypes.create_string_buffer(len(title_bytes))
3537 buf.value = title_bytes
e3946f98 3538 try:
6eefe533 3539 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
3540 except AttributeError:
3541 return # Strange libc, just skip this
d7dda168
PH
3542
3543
3544def remove_start(s, start):
46bc9b7d 3545 return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174
PH
3546
3547
2b9faf55 3548def remove_end(s, end):
46bc9b7d 3549 return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55
PH
3550
3551
31b2051e
S
3552def remove_quotes(s):
3553 if s is None or len(s) < 2:
3554 return s
3555 for quote in ('"', "'", ):
3556 if s[0] == quote and s[-1] == quote:
3557 return s[1:-1]
3558 return s
3559
3560
29eb5174 3561def url_basename(url):
9b8aaeed 3562 path = compat_urlparse.urlparse(url).path
28e614de 3563 return path.strip('/').split('/')[-1]
aa94a6d3
PH
3564
3565
02dc0a36
S
3566def base_url(url):
3567 return re.match(r'https?://[^?#&]+/', url).group()
3568
3569
e34c3361 3570def urljoin(base, path):
4b5de77b
S
3571 if isinstance(path, bytes):
3572 path = path.decode('utf-8')
e34c3361
S
3573 if not isinstance(path, compat_str) or not path:
3574 return None
fad4ceb5 3575 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
e34c3361 3576 return path
4b5de77b
S
3577 if isinstance(base, bytes):
3578 base = base.decode('utf-8')
3579 if not isinstance(base, compat_str) or not re.match(
3580 r'^(?:https?:)?//', base):
e34c3361
S
3581 return None
3582 return compat_urlparse.urljoin(base, path)
3583
3584
aa94a6d3
PH
3585class HEADRequest(compat_urllib_request.Request):
3586 def get_method(self):
611c1dd9 3587 return 'HEAD'
7217e148
PH
3588
3589
95cf60e8
S
3590class PUTRequest(compat_urllib_request.Request):
3591 def get_method(self):
3592 return 'PUT'
3593
3594
9732d77e 3595def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
3596 if get_attr:
3597 if v is not None:
3598 v = getattr(v, get_attr, None)
9572013d
PH
3599 if v == '':
3600 v = None
1812afb7
S
3601 if v is None:
3602 return default
3603 try:
3604 return int(v) * invscale // scale
5e1271c5 3605 except (ValueError, TypeError):
af98f8ff 3606 return default
9732d77e 3607
9572013d 3608
40a90862
JMF
3609def str_or_none(v, default=None):
3610 return default if v is None else compat_str(v)
3611
9732d77e
PH
3612
3613def str_to_int(int_str):
48d4681e 3614 """ A more relaxed version of int_or_none """
42db58ec 3615 if isinstance(int_str, compat_integer_types):
348c6bf1 3616 return int_str
42db58ec
S
3617 elif isinstance(int_str, compat_str):
3618 int_str = re.sub(r'[,\.\+]', '', int_str)
3619 return int_or_none(int_str)
608d11f5
PH
3620
3621
9732d77e 3622def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
3623 if v is None:
3624 return default
3625 try:
3626 return float(v) * invscale / scale
5e1271c5 3627 except (ValueError, TypeError):
caf80631 3628 return default
43f775e4
PH
3629
3630
c7e327c4
S
3631def bool_or_none(v, default=None):
3632 return v if isinstance(v, bool) else default
3633
3634
53cd37ba
S
3635def strip_or_none(v, default=None):
3636 return v.strip() if isinstance(v, compat_str) else default
b72b4431
S
3637
3638
af03000a
S
3639def url_or_none(url):
3640 if not url or not isinstance(url, compat_str):
3641 return None
3642 url = url.strip()
3643 return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
3644
3645
608d11f5 3646def parse_duration(s):
8f9312c3 3647 if not isinstance(s, compat_basestring):
608d11f5
PH
3648 return None
3649
ca7b3246
S
3650 s = s.strip()
3651
acaff495 3652 days, hours, mins, secs, ms = [None] * 5
15846398 3653 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
acaff495 3654 if m:
3655 days, hours, mins, secs, ms = m.groups()
3656 else:
3657 m = re.match(
056653bb
S
3658 r'''(?ix)(?:P?
3659 (?:
3660 [0-9]+\s*y(?:ears?)?\s*
3661 )?
3662 (?:
3663 [0-9]+\s*m(?:onths?)?\s*
3664 )?
3665 (?:
3666 [0-9]+\s*w(?:eeks?)?\s*
3667 )?
8f4b58d7 3668 (?:
acaff495 3669 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
8f4b58d7 3670 )?
056653bb 3671 T)?
acaff495 3672 (?:
3673 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3674 )?
3675 (?:
3676 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3677 )?
3678 (?:
3679 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
15846398 3680 )?Z?$''', s)
acaff495 3681 if m:
3682 days, hours, mins, secs, ms = m.groups()
3683 else:
15846398 3684 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
acaff495 3685 if m:
3686 hours, mins = m.groups()
3687 else:
3688 return None
3689
3690 duration = 0
3691 if secs:
3692 duration += float(secs)
3693 if mins:
3694 duration += float(mins) * 60
3695 if hours:
3696 duration += float(hours) * 60 * 60
3697 if days:
3698 duration += float(days) * 24 * 60 * 60
3699 if ms:
3700 duration += float(ms)
3701 return duration
91d7d0b3
JMF
3702
3703
e65e4c88 3704def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 3705 name, real_ext = os.path.splitext(filename)
e65e4c88
S
3706 return (
3707 '{0}.{1}{2}'.format(name, ext, real_ext)
3708 if not expected_real_ext or real_ext[1:] == expected_real_ext
3709 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
3710
3711
b3ed15b7
S
3712def replace_extension(filename, ext, expected_real_ext=None):
3713 name, real_ext = os.path.splitext(filename)
3714 return '{0}.{1}'.format(
3715 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3716 ext)
3717
3718
d70ad093
PH
3719def check_executable(exe, args=[]):
3720 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3721 args can be a list of arguments for a short output (like -version) """
3722 try:
3723 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
3724 except OSError:
3725 return False
3726 return exe
b7ab0590
PH
3727
3728
95807118 3729def get_exe_version(exe, args=['--version'],
cae97f65 3730 version_re=None, unrecognized='present'):
95807118
PH
3731 """ Returns the version of the specified executable,
3732 or False if the executable is not present """
3733 try:
b64d04c1
YCH
3734 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3735 # SIGTTOU if youtube-dl is run in the background.
067aa17e 3736 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
cae97f65 3737 out, _ = subprocess.Popen(
54116803 3738 [encodeArgument(exe)] + args,
00ca7552 3739 stdin=subprocess.PIPE,
95807118
PH
3740 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
3741 except OSError:
3742 return False
cae97f65
PH
3743 if isinstance(out, bytes): # Python 2.x
3744 out = out.decode('ascii', 'ignore')
3745 return detect_exe_version(out, version_re, unrecognized)
3746
3747
3748def detect_exe_version(output, version_re=None, unrecognized='present'):
3749 assert isinstance(output, compat_str)
3750 if version_re is None:
3751 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3752 m = re.search(version_re, output)
95807118
PH
3753 if m:
3754 return m.group(1)
3755 else:
3756 return unrecognized
3757
3758
b7ab0590 3759class PagedList(object):
dd26ced1
PH
3760 def __len__(self):
3761 # This is only useful for tests
3762 return len(self.getslice())
3763
9c44d242
PH
3764
3765class OnDemandPagedList(PagedList):
6be08ce6 3766 def __init__(self, pagefunc, pagesize, use_cache=True):
9c44d242
PH
3767 self._pagefunc = pagefunc
3768 self._pagesize = pagesize
b95dc034
YCH
3769 self._use_cache = use_cache
3770 if use_cache:
3771 self._cache = {}
9c44d242 3772
b7ab0590
PH
3773 def getslice(self, start=0, end=None):
3774 res = []
3775 for pagenum in itertools.count(start // self._pagesize):
3776 firstid = pagenum * self._pagesize
3777 nextfirstid = pagenum * self._pagesize + self._pagesize
3778 if start >= nextfirstid:
3779 continue
3780
b95dc034
YCH
3781 page_results = None
3782 if self._use_cache:
3783 page_results = self._cache.get(pagenum)
3784 if page_results is None:
3785 page_results = list(self._pagefunc(pagenum))
3786 if self._use_cache:
3787 self._cache[pagenum] = page_results
b7ab0590
PH
3788
3789 startv = (
3790 start % self._pagesize
3791 if firstid <= start < nextfirstid
3792 else 0)
3793
3794 endv = (
3795 ((end - 1) % self._pagesize) + 1
3796 if (end is not None and firstid <= end <= nextfirstid)
3797 else None)
3798
3799 if startv != 0 or endv is not None:
3800 page_results = page_results[startv:endv]
3801 res.extend(page_results)
3802
3803 # A little optimization - if current page is not "full", ie. does
3804 # not contain page_size videos then we can assume that this page
3805 # is the last one - there are no more ids on further pages -
3806 # i.e. no need to query again.
3807 if len(page_results) + startv < self._pagesize:
3808 break
3809
3810 # If we got the whole page, but the next page is not interesting,
3811 # break out early as well
3812 if end == nextfirstid:
3813 break
3814 return res
81c2f20b
PH
3815
3816
9c44d242
PH
3817class InAdvancePagedList(PagedList):
3818 def __init__(self, pagefunc, pagecount, pagesize):
3819 self._pagefunc = pagefunc
3820 self._pagecount = pagecount
3821 self._pagesize = pagesize
3822
3823 def getslice(self, start=0, end=None):
3824 res = []
3825 start_page = start // self._pagesize
3826 end_page = (
3827 self._pagecount if end is None else (end // self._pagesize + 1))
3828 skip_elems = start - start_page * self._pagesize
3829 only_more = None if end is None else end - start
3830 for pagenum in range(start_page, end_page):
3831 page = list(self._pagefunc(pagenum))
3832 if skip_elems:
3833 page = page[skip_elems:]
3834 skip_elems = None
3835 if only_more is not None:
3836 if len(page) < only_more:
3837 only_more -= len(page)
3838 else:
3839 page = page[:only_more]
3840 res.extend(page)
3841 break
3842 res.extend(page)
3843 return res
3844
3845
81c2f20b 3846def uppercase_escape(s):
676eb3f2 3847 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 3848 return re.sub(
a612753d 3849 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
3850 lambda m: unicode_escape(m.group(0))[0],
3851 s)
0fe2ff78
YCH
3852
3853
3854def lowercase_escape(s):
3855 unicode_escape = codecs.getdecoder('unicode_escape')
3856 return re.sub(
3857 r'\\u[0-9a-fA-F]{4}',
3858 lambda m: unicode_escape(m.group(0))[0],
3859 s)
b53466e1 3860
d05cfe06
S
3861
3862def escape_rfc3986(s):
3863 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 3864 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 3865 s = s.encode('utf-8')
ecc0c5ee 3866 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
3867
3868
3869def escape_url(url):
3870 """Escape URL as suggested by RFC 3986"""
3871 url_parsed = compat_urllib_parse_urlparse(url)
3872 return url_parsed._replace(
efbed08d 3873 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
d05cfe06
S
3874 path=escape_rfc3986(url_parsed.path),
3875 params=escape_rfc3986(url_parsed.params),
3876 query=escape_rfc3986(url_parsed.query),
3877 fragment=escape_rfc3986(url_parsed.fragment)
3878 ).geturl()
3879
62e609ab
PH
3880
3881def read_batch_urls(batch_fd):
3882 def fixup(url):
3883 if not isinstance(url, compat_str):
3884 url = url.decode('utf-8', 'replace')
28e614de 3885 BOM_UTF8 = '\xef\xbb\xbf'
62e609ab
PH
3886 if url.startswith(BOM_UTF8):
3887 url = url[len(BOM_UTF8):]
3888 url = url.strip()
3889 if url.startswith(('#', ';', ']')):
3890 return False
3891 return url
3892
3893 with contextlib.closing(batch_fd) as fd:
3894 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
3895
3896
3897def urlencode_postdata(*args, **kargs):
15707c7e 3898 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
3899
3900
38f9ef31 3901def update_url_query(url, query):
cacd9966
YCH
3902 if not query:
3903 return url
38f9ef31 3904 parsed_url = compat_urlparse.urlparse(url)
3905 qs = compat_parse_qs(parsed_url.query)
3906 qs.update(query)
3907 return compat_urlparse.urlunparse(parsed_url._replace(
15707c7e 3908 query=compat_urllib_parse_urlencode(qs, True)))
16392824 3909
8e60dc75 3910
ed0291d1
S
3911def update_Request(req, url=None, data=None, headers={}, query={}):
3912 req_headers = req.headers.copy()
3913 req_headers.update(headers)
3914 req_data = data or req.data
3915 req_url = update_url_query(url or req.get_full_url(), query)
95cf60e8
S
3916 req_get_method = req.get_method()
3917 if req_get_method == 'HEAD':
3918 req_type = HEADRequest
3919 elif req_get_method == 'PUT':
3920 req_type = PUTRequest
3921 else:
3922 req_type = compat_urllib_request.Request
ed0291d1
S
3923 new_req = req_type(
3924 req_url, data=req_data, headers=req_headers,
3925 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3926 if hasattr(req, 'timeout'):
3927 new_req.timeout = req.timeout
3928 return new_req
3929
3930
10c87c15 3931def _multipart_encode_impl(data, boundary):
0c265486
YCH
3932 content_type = 'multipart/form-data; boundary=%s' % boundary
3933
3934 out = b''
3935 for k, v in data.items():
3936 out += b'--' + boundary.encode('ascii') + b'\r\n'
3937 if isinstance(k, compat_str):
3938 k = k.encode('utf-8')
3939 if isinstance(v, compat_str):
3940 v = v.encode('utf-8')
3941 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3942 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
b2ad479d 3943 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
0c265486
YCH
3944 if boundary.encode('ascii') in content:
3945 raise ValueError('Boundary overlaps with data')
3946 out += content
3947
3948 out += b'--' + boundary.encode('ascii') + b'--\r\n'
3949
3950 return out, content_type
3951
3952
3953def multipart_encode(data, boundary=None):
3954 '''
3955 Encode a dict to RFC 7578-compliant form-data
3956
3957 data:
3958 A dict where keys and values can be either Unicode or bytes-like
3959 objects.
3960 boundary:
3961 If specified a Unicode object, it's used as the boundary. Otherwise
3962 a random boundary is generated.
3963
3964 Reference: https://tools.ietf.org/html/rfc7578
3965 '''
3966 has_specified_boundary = boundary is not None
3967
3968 while True:
3969 if boundary is None:
3970 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3971
3972 try:
10c87c15 3973 out, content_type = _multipart_encode_impl(data, boundary)
0c265486
YCH
3974 break
3975 except ValueError:
3976 if has_specified_boundary:
3977 raise
3978 boundary = None
3979
3980 return out, content_type
3981
3982
86296ad2 3983def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
3984 if isinstance(key_or_keys, (list, tuple)):
3985 for key in key_or_keys:
86296ad2
S
3986 if key not in d or d[key] is None or skip_false_values and not d[key]:
3987 continue
3988 return d[key]
cbecc9b9
S
3989 return default
3990 return d.get(key_or_keys, default)
3991
3992
329ca3be 3993def try_get(src, getter, expected_type=None):
a32a9a7e
S
3994 if not isinstance(getter, (list, tuple)):
3995 getter = [getter]
3996 for get in getter:
3997 try:
3998 v = get(src)
3999 except (AttributeError, KeyError, TypeError, IndexError):
4000 pass
4001 else:
4002 if expected_type is None or isinstance(v, expected_type):
4003 return v
329ca3be
S
4004
4005
6cc62232
S
4006def merge_dicts(*dicts):
4007 merged = {}
4008 for a_dict in dicts:
4009 for k, v in a_dict.items():
4010 if v is None:
4011 continue
3089bc74
S
4012 if (k not in merged
4013 or (isinstance(v, compat_str) and v
4014 and isinstance(merged[k], compat_str)
4015 and not merged[k])):
6cc62232
S
4016 merged[k] = v
4017 return merged
4018
4019
8e60dc75
S
4020def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4021 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4022
16392824 4023
a1a530b0
PH
4024US_RATINGS = {
4025 'G': 0,
4026 'PG': 10,
4027 'PG-13': 13,
4028 'R': 16,
4029 'NC': 18,
4030}
fac55558
PH
4031
4032
a8795327 4033TV_PARENTAL_GUIDELINES = {
5a16c9d9
RA
4034 'TV-Y': 0,
4035 'TV-Y7': 7,
4036 'TV-G': 0,
4037 'TV-PG': 0,
4038 'TV-14': 14,
4039 'TV-MA': 17,
a8795327
S
4040}
4041
4042
146c80e2 4043def parse_age_limit(s):
a8795327
S
4044 if type(s) == int:
4045 return s if 0 <= s <= 21 else None
4046 if not isinstance(s, compat_basestring):
d838b1bd 4047 return None
146c80e2 4048 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327
S
4049 if m:
4050 return int(m.group('age'))
4051 if s in US_RATINGS:
4052 return US_RATINGS[s]
5a16c9d9 4053 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
b8361187 4054 if m:
5a16c9d9 4055 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
b8361187 4056 return None
146c80e2
S
4057
4058
fac55558 4059def strip_jsonp(code):
609a61e3 4060 return re.sub(
5552c9eb 4061 r'''(?sx)^
e9c671d5 4062 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
5552c9eb
YCH
4063 (?:\s*&&\s*(?P=func_name))?
4064 \s*\(\s*(?P<callback_data>.*)\);?
4065 \s*?(?://[^\n]*)*$''',
4066 r'\g<callback_data>', code)
478c2c61
PH
4067
4068
e05f6939 4069def js_to_json(code):
4195096e
S
4070 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4071 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4072 INTEGER_TABLE = (
4073 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4074 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4075 )
4076
e05f6939 4077 def fix_kv(m):
e7b6d122
PH
4078 v = m.group(0)
4079 if v in ('true', 'false', 'null'):
4080 return v
b3ee552e 4081 elif v.startswith('/*') or v.startswith('//') or v == ',':
bd1e4844 4082 return ""
4083
4084 if v[0] in ("'", '"'):
4085 v = re.sub(r'(?s)\\.|"', lambda m: {
e7b6d122 4086 '"': '\\"',
bd1e4844 4087 "\\'": "'",
4088 '\\\n': '',
4089 '\\x': '\\u00',
4090 }.get(m.group(0), m.group(0)), v[1:-1])
4091
89ac4a19
S
4092 for regex, base in INTEGER_TABLE:
4093 im = re.match(regex, v)
4094 if im:
e4659b45 4095 i = int(im.group(1), base)
89ac4a19
S
4096 return '"%d":' % i if v.endswith(':') else '%d' % i
4097
e7b6d122 4098 return '"%s"' % v
e05f6939 4099
bd1e4844 4100 return re.sub(r'''(?sx)
4101 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4102 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4195096e 4103 {comment}|,(?={skip}[\]}}])|
c384d537 4104 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4195096e
S
4105 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4106 [0-9]+(?={skip}:)
4107 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
e05f6939
PH
4108
4109
478c2c61
PH
4110def qualities(quality_ids):
4111 """ Get a numeric quality value out of a list of possible values """
4112 def q(qid):
4113 try:
4114 return quality_ids.index(qid)
4115 except ValueError:
4116 return -1
4117 return q
4118
acd69589
PH
4119
4120DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
0a871f68 4121
a020a0dc
PH
4122
4123def limit_length(s, length):
4124 """ Add ellipses to overly long strings """
4125 if s is None:
4126 return None
4127 ELLIPSES = '...'
4128 if len(s) > length:
4129 return s[:length - len(ELLIPSES)] + ELLIPSES
4130 return s
48844745
PH
4131
4132
4133def version_tuple(v):
5f9b8394 4134 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
4135
4136
4137def is_outdated_version(version, limit, assume_new=True):
4138 if not version:
4139 return not assume_new
4140 try:
4141 return version_tuple(version) < version_tuple(limit)
4142 except ValueError:
4143 return not assume_new
732ea2f0
PH
4144
4145
4146def ytdl_is_updateable():
4147 """ Returns if youtube-dl can be updated with -U """
4148 from zipimport import zipimporter
4149
4150 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
4151
4152
4153def args_to_str(args):
4154 # Get a short string representation for a subprocess command
702ccf2d 4155 return ' '.join(compat_shlex_quote(a) for a in args)
2ccd1b10
PH
4156
4157
9b9c5355 4158def error_to_compat_str(err):
fdae2358
S
4159 err_str = str(err)
4160 # On python 2 error byte string must be decoded with proper
4161 # encoding rather than ascii
4162 if sys.version_info[0] < 3:
4163 err_str = err_str.decode(preferredencoding())
4164 return err_str
4165
4166
c460bdd5 4167def mimetype2ext(mt):
eb9ee194
S
4168 if mt is None:
4169 return None
4170
765ac263
JMF
4171 ext = {
4172 'audio/mp4': 'm4a',
6c33d24b
YCH
4173 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4174 # it's the most popular one
4175 'audio/mpeg': 'mp3',
765ac263
JMF
4176 }.get(mt)
4177 if ext is not None:
4178 return ext
4179
c460bdd5 4180 _, _, res = mt.rpartition('/')
6562d34a 4181 res = res.split(';')[0].strip().lower()
c460bdd5
PH
4182
4183 return {
f6861ec9 4184 '3gpp': '3gp',
cafcf657 4185 'smptett+xml': 'tt',
cafcf657 4186 'ttaf+xml': 'dfxp',
a0d8d704 4187 'ttml+xml': 'ttml',
f6861ec9 4188 'x-flv': 'flv',
a0d8d704 4189 'x-mp4-fragmented': 'mp4',
d4f05d47 4190 'x-ms-sami': 'sami',
a0d8d704 4191 'x-ms-wmv': 'wmv',
b4173f15
RA
4192 'mpegurl': 'm3u8',
4193 'x-mpegurl': 'm3u8',
4194 'vnd.apple.mpegurl': 'm3u8',
4195 'dash+xml': 'mpd',
b4173f15 4196 'f4m+xml': 'f4m',
f164b971 4197 'hds+xml': 'f4m',
e910fe2f 4198 'vnd.ms-sstr+xml': 'ism',
c2b2c7e1 4199 'quicktime': 'mov',
98ce1a3f 4200 'mp2t': 'ts',
c460bdd5
PH
4201 }.get(res, res)
4202
4203
4f3c5e06 4204def parse_codecs(codecs_str):
4205 # http://tools.ietf.org/html/rfc6381
4206 if not codecs_str:
4207 return {}
4208 splited_codecs = list(filter(None, map(
4209 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4210 vcodec, acodec = None, None
4211 for full_codec in splited_codecs:
4212 codec = full_codec.split('.')[0]
28cc2241 4213 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4f3c5e06 4214 if not vcodec:
4215 vcodec = full_codec
60f5c9fb 4216 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4f3c5e06 4217 if not acodec:
4218 acodec = full_codec
4219 else:
60f5c9fb 4220 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4f3c5e06 4221 if not vcodec and not acodec:
4222 if len(splited_codecs) == 2:
4223 return {
28cc2241
S
4224 'vcodec': splited_codecs[0],
4225 'acodec': splited_codecs[1],
4f3c5e06 4226 }
4227 else:
4228 return {
4229 'vcodec': vcodec or 'none',
4230 'acodec': acodec or 'none',
4231 }
4232 return {}
4233
4234
2ccd1b10 4235def urlhandle_detect_ext(url_handle):
79298173 4236 getheader = url_handle.headers.get
2ccd1b10 4237
b55ee18f
PH
4238 cd = getheader('Content-Disposition')
4239 if cd:
4240 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4241 if m:
4242 e = determine_ext(m.group('filename'), default_ext=None)
4243 if e:
4244 return e
4245
c460bdd5 4246 return mimetype2ext(getheader('Content-Type'))
05900629
PH
4247
4248
1e399778
YCH
4249def encode_data_uri(data, mime_type):
4250 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4251
4252
05900629 4253def age_restricted(content_limit, age_limit):
6ec6cb4e 4254 """ Returns True iff the content should be blocked """
05900629
PH
4255
4256 if age_limit is None: # No limit set
4257 return False
4258 if content_limit is None:
4259 return False # Content available for everyone
4260 return age_limit < content_limit
61ca9a80
PH
4261
4262
4263def is_html(first_bytes):
4264 """ Detect whether a file contains HTML by examining its first bytes. """
4265
4266 BOMS = [
4267 (b'\xef\xbb\xbf', 'utf-8'),
4268 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4269 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4270 (b'\xff\xfe', 'utf-16-le'),
4271 (b'\xfe\xff', 'utf-16-be'),
4272 ]
4273 for bom, enc in BOMS:
4274 if first_bytes.startswith(bom):
4275 s = first_bytes[len(bom):].decode(enc, 'replace')
4276 break
4277 else:
4278 s = first_bytes.decode('utf-8', 'replace')
4279
4280 return re.match(r'^\s*<', s)
a055469f
PH
4281
4282
4283def determine_protocol(info_dict):
4284 protocol = info_dict.get('protocol')
4285 if protocol is not None:
4286 return protocol
4287
4288 url = info_dict['url']
4289 if url.startswith('rtmp'):
4290 return 'rtmp'
4291 elif url.startswith('mms'):
4292 return 'mms'
4293 elif url.startswith('rtsp'):
4294 return 'rtsp'
4295
4296 ext = determine_ext(url)
4297 if ext == 'm3u8':
4298 return 'm3u8'
4299 elif ext == 'f4m':
4300 return 'f4m'
4301
4302 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
4303
4304
4305def render_table(header_row, data):
4306 """ Render a list of rows, each as a list of values """
4307 table = [header_row] + data
4308 max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4309 format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
4310 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
4311
4312
4313def _match_one(filter_part, dct):
4314 COMPARISON_OPERATORS = {
4315 '<': operator.lt,
4316 '<=': operator.le,
4317 '>': operator.gt,
4318 '>=': operator.ge,
4319 '=': operator.eq,
4320 '!=': operator.ne,
4321 }
4322 operator_rex = re.compile(r'''(?x)\s*
4323 (?P<key>[a-z_]+)
4324 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4325 (?:
4326 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
db13c16e 4327 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
347de493
PH
4328 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4329 )
4330 \s*$
4331 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4332 m = operator_rex.search(filter_part)
4333 if m:
4334 op = COMPARISON_OPERATORS[m.group('op')]
e5a088dc 4335 actual_value = dct.get(m.group('key'))
3089bc74
S
4336 if (m.group('quotedstrval') is not None
4337 or m.group('strval') is not None
e5a088dc
S
4338 # If the original field is a string and matching comparisonvalue is
4339 # a number we should respect the origin of the original field
4340 # and process comparison value as a string (see
067aa17e 4341 # https://github.com/ytdl-org/youtube-dl/issues/11082).
3089bc74
S
4342 or actual_value is not None and m.group('intval') is not None
4343 and isinstance(actual_value, compat_str)):
347de493
PH
4344 if m.group('op') not in ('=', '!='):
4345 raise ValueError(
4346 'Operator %s does not support string values!' % m.group('op'))
db13c16e
S
4347 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4348 quote = m.group('quote')
4349 if quote is not None:
4350 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
347de493
PH
4351 else:
4352 try:
4353 comparison_value = int(m.group('intval'))
4354 except ValueError:
4355 comparison_value = parse_filesize(m.group('intval'))
4356 if comparison_value is None:
4357 comparison_value = parse_filesize(m.group('intval') + 'B')
4358 if comparison_value is None:
4359 raise ValueError(
4360 'Invalid integer value %r in filter part %r' % (
4361 m.group('intval'), filter_part))
347de493
PH
4362 if actual_value is None:
4363 return m.group('none_inclusive')
4364 return op(actual_value, comparison_value)
4365
4366 UNARY_OPERATORS = {
1cc47c66
S
4367 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4368 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
347de493
PH
4369 }
4370 operator_rex = re.compile(r'''(?x)\s*
4371 (?P<op>%s)\s*(?P<key>[a-z_]+)
4372 \s*$
4373 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4374 m = operator_rex.search(filter_part)
4375 if m:
4376 op = UNARY_OPERATORS[m.group('op')]
4377 actual_value = dct.get(m.group('key'))
4378 return op(actual_value)
4379
4380 raise ValueError('Invalid filter part %r' % filter_part)
4381
4382
4383def match_str(filter_str, dct):
4384 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4385
4386 return all(
4387 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4388
4389
4390def match_filter_func(filter_str):
4391 def _match_func(info_dict):
4392 if match_str(filter_str, info_dict):
4393 return None
4394 else:
4395 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4396 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4397 return _match_func
91410c9b
PH
4398
4399
bf6427d2
YCH
4400def parse_dfxp_time_expr(time_expr):
4401 if not time_expr:
d631d5f9 4402 return
bf6427d2
YCH
4403
4404 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4405 if mobj:
4406 return float(mobj.group('time_offset'))
4407
db2fe38b 4408 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 4409 if mobj:
db2fe38b 4410 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
4411
4412
c1c924ab
YCH
4413def srt_subtitles_timecode(seconds):
4414 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
4415
4416
4417def dfxp2srt(dfxp_data):
3869028f
YCH
4418 '''
4419 @param dfxp_data A bytes-like object containing DFXP data
4420 @returns A unicode object containing converted SRT data
4421 '''
5b995f71 4422 LEGACY_NAMESPACES = (
3869028f
YCH
4423 (b'http://www.w3.org/ns/ttml', [
4424 b'http://www.w3.org/2004/11/ttaf1',
4425 b'http://www.w3.org/2006/04/ttaf1',
4426 b'http://www.w3.org/2006/10/ttaf1',
5b995f71 4427 ]),
3869028f
YCH
4428 (b'http://www.w3.org/ns/ttml#styling', [
4429 b'http://www.w3.org/ns/ttml#style',
5b995f71
RA
4430 ]),
4431 )
4432
4433 SUPPORTED_STYLING = [
4434 'color',
4435 'fontFamily',
4436 'fontSize',
4437 'fontStyle',
4438 'fontWeight',
4439 'textDecoration'
4440 ]
4441
4e335771 4442 _x = functools.partial(xpath_with_ns, ns_map={
261f4730 4443 'xml': 'http://www.w3.org/XML/1998/namespace',
4e335771 4444 'ttml': 'http://www.w3.org/ns/ttml',
5b995f71 4445 'tts': 'http://www.w3.org/ns/ttml#styling',
4e335771 4446 })
bf6427d2 4447
5b995f71
RA
4448 styles = {}
4449 default_style = {}
4450
87de7069 4451 class TTMLPElementParser(object):
5b995f71
RA
4452 _out = ''
4453 _unclosed_elements = []
4454 _applied_styles = []
bf6427d2 4455
2b14cb56 4456 def start(self, tag, attrib):
5b995f71
RA
4457 if tag in (_x('ttml:br'), 'br'):
4458 self._out += '\n'
4459 else:
4460 unclosed_elements = []
4461 style = {}
4462 element_style_id = attrib.get('style')
4463 if default_style:
4464 style.update(default_style)
4465 if element_style_id:
4466 style.update(styles.get(element_style_id, {}))
4467 for prop in SUPPORTED_STYLING:
4468 prop_val = attrib.get(_x('tts:' + prop))
4469 if prop_val:
4470 style[prop] = prop_val
4471 if style:
4472 font = ''
4473 for k, v in sorted(style.items()):
4474 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4475 continue
4476 if k == 'color':
4477 font += ' color="%s"' % v
4478 elif k == 'fontSize':
4479 font += ' size="%s"' % v
4480 elif k == 'fontFamily':
4481 font += ' face="%s"' % v
4482 elif k == 'fontWeight' and v == 'bold':
4483 self._out += '<b>'
4484 unclosed_elements.append('b')
4485 elif k == 'fontStyle' and v == 'italic':
4486 self._out += '<i>'
4487 unclosed_elements.append('i')
4488 elif k == 'textDecoration' and v == 'underline':
4489 self._out += '<u>'
4490 unclosed_elements.append('u')
4491 if font:
4492 self._out += '<font' + font + '>'
4493 unclosed_elements.append('font')
4494 applied_style = {}
4495 if self._applied_styles:
4496 applied_style.update(self._applied_styles[-1])
4497 applied_style.update(style)
4498 self._applied_styles.append(applied_style)
4499 self._unclosed_elements.append(unclosed_elements)
bf6427d2 4500
2b14cb56 4501 def end(self, tag):
5b995f71
RA
4502 if tag not in (_x('ttml:br'), 'br'):
4503 unclosed_elements = self._unclosed_elements.pop()
4504 for element in reversed(unclosed_elements):
4505 self._out += '</%s>' % element
4506 if unclosed_elements and self._applied_styles:
4507 self._applied_styles.pop()
bf6427d2 4508
2b14cb56 4509 def data(self, data):
5b995f71 4510 self._out += data
2b14cb56 4511
4512 def close(self):
5b995f71 4513 return self._out.strip()
2b14cb56 4514
4515 def parse_node(node):
4516 target = TTMLPElementParser()
4517 parser = xml.etree.ElementTree.XMLParser(target=target)
4518 parser.feed(xml.etree.ElementTree.tostring(node))
4519 return parser.close()
bf6427d2 4520
5b995f71
RA
4521 for k, v in LEGACY_NAMESPACES:
4522 for ns in v:
4523 dfxp_data = dfxp_data.replace(ns, k)
4524
3869028f 4525 dfxp = compat_etree_fromstring(dfxp_data)
bf6427d2 4526 out = []
5b995f71 4527 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
1b0427e6
YCH
4528
4529 if not paras:
4530 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2 4531
5b995f71
RA
4532 repeat = False
4533 while True:
4534 for style in dfxp.findall(_x('.//ttml:style')):
261f4730
RA
4535 style_id = style.get('id') or style.get(_x('xml:id'))
4536 if not style_id:
4537 continue
5b995f71
RA
4538 parent_style_id = style.get('style')
4539 if parent_style_id:
4540 if parent_style_id not in styles:
4541 repeat = True
4542 continue
4543 styles[style_id] = styles[parent_style_id].copy()
4544 for prop in SUPPORTED_STYLING:
4545 prop_val = style.get(_x('tts:' + prop))
4546 if prop_val:
4547 styles.setdefault(style_id, {})[prop] = prop_val
4548 if repeat:
4549 repeat = False
4550 else:
4551 break
4552
4553 for p in ('body', 'div'):
4554 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4555 if ele is None:
4556 continue
4557 style = styles.get(ele.get('style'))
4558 if not style:
4559 continue
4560 default_style.update(style)
4561
bf6427d2 4562 for para, index in zip(paras, itertools.count(1)):
d631d5f9 4563 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 4564 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
4565 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4566 if begin_time is None:
4567 continue
7dff0363 4568 if not end_time:
d631d5f9
YCH
4569 if not dur:
4570 continue
4571 end_time = begin_time + dur
bf6427d2
YCH
4572 out.append('%d\n%s --> %s\n%s\n\n' % (
4573 index,
c1c924ab
YCH
4574 srt_subtitles_timecode(begin_time),
4575 srt_subtitles_timecode(end_time),
bf6427d2
YCH
4576 parse_node(para)))
4577
4578 return ''.join(out)
4579
4580
66e289ba
S
4581def cli_option(params, command_option, param):
4582 param = params.get(param)
98e698f1
RA
4583 if param:
4584 param = compat_str(param)
66e289ba
S
4585 return [command_option, param] if param is not None else []
4586
4587
4588def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4589 param = params.get(param)
5b232f46
S
4590 if param is None:
4591 return []
66e289ba
S
4592 assert isinstance(param, bool)
4593 if separator:
4594 return [command_option + separator + (true_value if param else false_value)]
4595 return [command_option, true_value if param else false_value]
4596
4597
4598def cli_valueless_option(params, command_option, param, expected_value=True):
4599 param = params.get(param)
4600 return [command_option] if param == expected_value else []
4601
4602
4603def cli_configuration_args(params, param, default=[]):
4604 ex_args = params.get(param)
4605 if ex_args is None:
4606 return default
4607 assert isinstance(ex_args, list)
4608 return ex_args
4609
4610
39672624
YCH
4611class ISO639Utils(object):
4612 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4613 _lang_map = {
4614 'aa': 'aar',
4615 'ab': 'abk',
4616 'ae': 'ave',
4617 'af': 'afr',
4618 'ak': 'aka',
4619 'am': 'amh',
4620 'an': 'arg',
4621 'ar': 'ara',
4622 'as': 'asm',
4623 'av': 'ava',
4624 'ay': 'aym',
4625 'az': 'aze',
4626 'ba': 'bak',
4627 'be': 'bel',
4628 'bg': 'bul',
4629 'bh': 'bih',
4630 'bi': 'bis',
4631 'bm': 'bam',
4632 'bn': 'ben',
4633 'bo': 'bod',
4634 'br': 'bre',
4635 'bs': 'bos',
4636 'ca': 'cat',
4637 'ce': 'che',
4638 'ch': 'cha',
4639 'co': 'cos',
4640 'cr': 'cre',
4641 'cs': 'ces',
4642 'cu': 'chu',
4643 'cv': 'chv',
4644 'cy': 'cym',
4645 'da': 'dan',
4646 'de': 'deu',
4647 'dv': 'div',
4648 'dz': 'dzo',
4649 'ee': 'ewe',
4650 'el': 'ell',
4651 'en': 'eng',
4652 'eo': 'epo',
4653 'es': 'spa',
4654 'et': 'est',
4655 'eu': 'eus',
4656 'fa': 'fas',
4657 'ff': 'ful',
4658 'fi': 'fin',
4659 'fj': 'fij',
4660 'fo': 'fao',
4661 'fr': 'fra',
4662 'fy': 'fry',
4663 'ga': 'gle',
4664 'gd': 'gla',
4665 'gl': 'glg',
4666 'gn': 'grn',
4667 'gu': 'guj',
4668 'gv': 'glv',
4669 'ha': 'hau',
4670 'he': 'heb',
b7acc835 4671 'iw': 'heb', # Replaced by he in 1989 revision
39672624
YCH
4672 'hi': 'hin',
4673 'ho': 'hmo',
4674 'hr': 'hrv',
4675 'ht': 'hat',
4676 'hu': 'hun',
4677 'hy': 'hye',
4678 'hz': 'her',
4679 'ia': 'ina',
4680 'id': 'ind',
b7acc835 4681 'in': 'ind', # Replaced by id in 1989 revision
39672624
YCH
4682 'ie': 'ile',
4683 'ig': 'ibo',
4684 'ii': 'iii',
4685 'ik': 'ipk',
4686 'io': 'ido',
4687 'is': 'isl',
4688 'it': 'ita',
4689 'iu': 'iku',
4690 'ja': 'jpn',
4691 'jv': 'jav',
4692 'ka': 'kat',
4693 'kg': 'kon',
4694 'ki': 'kik',
4695 'kj': 'kua',
4696 'kk': 'kaz',
4697 'kl': 'kal',
4698 'km': 'khm',
4699 'kn': 'kan',
4700 'ko': 'kor',
4701 'kr': 'kau',
4702 'ks': 'kas',
4703 'ku': 'kur',
4704 'kv': 'kom',
4705 'kw': 'cor',
4706 'ky': 'kir',
4707 'la': 'lat',
4708 'lb': 'ltz',
4709 'lg': 'lug',
4710 'li': 'lim',
4711 'ln': 'lin',
4712 'lo': 'lao',
4713 'lt': 'lit',
4714 'lu': 'lub',
4715 'lv': 'lav',
4716 'mg': 'mlg',
4717 'mh': 'mah',
4718 'mi': 'mri',
4719 'mk': 'mkd',
4720 'ml': 'mal',
4721 'mn': 'mon',
4722 'mr': 'mar',
4723 'ms': 'msa',
4724 'mt': 'mlt',
4725 'my': 'mya',
4726 'na': 'nau',
4727 'nb': 'nob',
4728 'nd': 'nde',
4729 'ne': 'nep',
4730 'ng': 'ndo',
4731 'nl': 'nld',
4732 'nn': 'nno',
4733 'no': 'nor',
4734 'nr': 'nbl',
4735 'nv': 'nav',
4736 'ny': 'nya',
4737 'oc': 'oci',
4738 'oj': 'oji',
4739 'om': 'orm',
4740 'or': 'ori',
4741 'os': 'oss',
4742 'pa': 'pan',
4743 'pi': 'pli',
4744 'pl': 'pol',
4745 'ps': 'pus',
4746 'pt': 'por',
4747 'qu': 'que',
4748 'rm': 'roh',
4749 'rn': 'run',
4750 'ro': 'ron',
4751 'ru': 'rus',
4752 'rw': 'kin',
4753 'sa': 'san',
4754 'sc': 'srd',
4755 'sd': 'snd',
4756 'se': 'sme',
4757 'sg': 'sag',
4758 'si': 'sin',
4759 'sk': 'slk',
4760 'sl': 'slv',
4761 'sm': 'smo',
4762 'sn': 'sna',
4763 'so': 'som',
4764 'sq': 'sqi',
4765 'sr': 'srp',
4766 'ss': 'ssw',
4767 'st': 'sot',
4768 'su': 'sun',
4769 'sv': 'swe',
4770 'sw': 'swa',
4771 'ta': 'tam',
4772 'te': 'tel',
4773 'tg': 'tgk',
4774 'th': 'tha',
4775 'ti': 'tir',
4776 'tk': 'tuk',
4777 'tl': 'tgl',
4778 'tn': 'tsn',
4779 'to': 'ton',
4780 'tr': 'tur',
4781 'ts': 'tso',
4782 'tt': 'tat',
4783 'tw': 'twi',
4784 'ty': 'tah',
4785 'ug': 'uig',
4786 'uk': 'ukr',
4787 'ur': 'urd',
4788 'uz': 'uzb',
4789 've': 'ven',
4790 'vi': 'vie',
4791 'vo': 'vol',
4792 'wa': 'wln',
4793 'wo': 'wol',
4794 'xh': 'xho',
4795 'yi': 'yid',
e9a50fba 4796 'ji': 'yid', # Replaced by yi in 1989 revision
39672624
YCH
4797 'yo': 'yor',
4798 'za': 'zha',
4799 'zh': 'zho',
4800 'zu': 'zul',
4801 }
4802
4803 @classmethod
4804 def short2long(cls, code):
4805 """Convert language code from ISO 639-1 to ISO 639-2/T"""
4806 return cls._lang_map.get(code[:2])
4807
4808 @classmethod
4809 def long2short(cls, code):
4810 """Convert language code from ISO 639-2/T to ISO 639-1"""
4811 for short_name, long_name in cls._lang_map.items():
4812 if long_name == code:
4813 return short_name
4814
4815
4eb10f66
YCH
4816class ISO3166Utils(object):
4817 # From http://data.okfn.org/data/core/country-list
4818 _country_map = {
4819 'AF': 'Afghanistan',
4820 'AX': 'Åland Islands',
4821 'AL': 'Albania',
4822 'DZ': 'Algeria',
4823 'AS': 'American Samoa',
4824 'AD': 'Andorra',
4825 'AO': 'Angola',
4826 'AI': 'Anguilla',
4827 'AQ': 'Antarctica',
4828 'AG': 'Antigua and Barbuda',
4829 'AR': 'Argentina',
4830 'AM': 'Armenia',
4831 'AW': 'Aruba',
4832 'AU': 'Australia',
4833 'AT': 'Austria',
4834 'AZ': 'Azerbaijan',
4835 'BS': 'Bahamas',
4836 'BH': 'Bahrain',
4837 'BD': 'Bangladesh',
4838 'BB': 'Barbados',
4839 'BY': 'Belarus',
4840 'BE': 'Belgium',
4841 'BZ': 'Belize',
4842 'BJ': 'Benin',
4843 'BM': 'Bermuda',
4844 'BT': 'Bhutan',
4845 'BO': 'Bolivia, Plurinational State of',
4846 'BQ': 'Bonaire, Sint Eustatius and Saba',
4847 'BA': 'Bosnia and Herzegovina',
4848 'BW': 'Botswana',
4849 'BV': 'Bouvet Island',
4850 'BR': 'Brazil',
4851 'IO': 'British Indian Ocean Territory',
4852 'BN': 'Brunei Darussalam',
4853 'BG': 'Bulgaria',
4854 'BF': 'Burkina Faso',
4855 'BI': 'Burundi',
4856 'KH': 'Cambodia',
4857 'CM': 'Cameroon',
4858 'CA': 'Canada',
4859 'CV': 'Cape Verde',
4860 'KY': 'Cayman Islands',
4861 'CF': 'Central African Republic',
4862 'TD': 'Chad',
4863 'CL': 'Chile',
4864 'CN': 'China',
4865 'CX': 'Christmas Island',
4866 'CC': 'Cocos (Keeling) Islands',
4867 'CO': 'Colombia',
4868 'KM': 'Comoros',
4869 'CG': 'Congo',
4870 'CD': 'Congo, the Democratic Republic of the',
4871 'CK': 'Cook Islands',
4872 'CR': 'Costa Rica',
4873 'CI': 'Côte d\'Ivoire',
4874 'HR': 'Croatia',
4875 'CU': 'Cuba',
4876 'CW': 'Curaçao',
4877 'CY': 'Cyprus',
4878 'CZ': 'Czech Republic',
4879 'DK': 'Denmark',
4880 'DJ': 'Djibouti',
4881 'DM': 'Dominica',
4882 'DO': 'Dominican Republic',
4883 'EC': 'Ecuador',
4884 'EG': 'Egypt',
4885 'SV': 'El Salvador',
4886 'GQ': 'Equatorial Guinea',
4887 'ER': 'Eritrea',
4888 'EE': 'Estonia',
4889 'ET': 'Ethiopia',
4890 'FK': 'Falkland Islands (Malvinas)',
4891 'FO': 'Faroe Islands',
4892 'FJ': 'Fiji',
4893 'FI': 'Finland',
4894 'FR': 'France',
4895 'GF': 'French Guiana',
4896 'PF': 'French Polynesia',
4897 'TF': 'French Southern Territories',
4898 'GA': 'Gabon',
4899 'GM': 'Gambia',
4900 'GE': 'Georgia',
4901 'DE': 'Germany',
4902 'GH': 'Ghana',
4903 'GI': 'Gibraltar',
4904 'GR': 'Greece',
4905 'GL': 'Greenland',
4906 'GD': 'Grenada',
4907 'GP': 'Guadeloupe',
4908 'GU': 'Guam',
4909 'GT': 'Guatemala',
4910 'GG': 'Guernsey',
4911 'GN': 'Guinea',
4912 'GW': 'Guinea-Bissau',
4913 'GY': 'Guyana',
4914 'HT': 'Haiti',
4915 'HM': 'Heard Island and McDonald Islands',
4916 'VA': 'Holy See (Vatican City State)',
4917 'HN': 'Honduras',
4918 'HK': 'Hong Kong',
4919 'HU': 'Hungary',
4920 'IS': 'Iceland',
4921 'IN': 'India',
4922 'ID': 'Indonesia',
4923 'IR': 'Iran, Islamic Republic of',
4924 'IQ': 'Iraq',
4925 'IE': 'Ireland',
4926 'IM': 'Isle of Man',
4927 'IL': 'Israel',
4928 'IT': 'Italy',
4929 'JM': 'Jamaica',
4930 'JP': 'Japan',
4931 'JE': 'Jersey',
4932 'JO': 'Jordan',
4933 'KZ': 'Kazakhstan',
4934 'KE': 'Kenya',
4935 'KI': 'Kiribati',
4936 'KP': 'Korea, Democratic People\'s Republic of',
4937 'KR': 'Korea, Republic of',
4938 'KW': 'Kuwait',
4939 'KG': 'Kyrgyzstan',
4940 'LA': 'Lao People\'s Democratic Republic',
4941 'LV': 'Latvia',
4942 'LB': 'Lebanon',
4943 'LS': 'Lesotho',
4944 'LR': 'Liberia',
4945 'LY': 'Libya',
4946 'LI': 'Liechtenstein',
4947 'LT': 'Lithuania',
4948 'LU': 'Luxembourg',
4949 'MO': 'Macao',
4950 'MK': 'Macedonia, the Former Yugoslav Republic of',
4951 'MG': 'Madagascar',
4952 'MW': 'Malawi',
4953 'MY': 'Malaysia',
4954 'MV': 'Maldives',
4955 'ML': 'Mali',
4956 'MT': 'Malta',
4957 'MH': 'Marshall Islands',
4958 'MQ': 'Martinique',
4959 'MR': 'Mauritania',
4960 'MU': 'Mauritius',
4961 'YT': 'Mayotte',
4962 'MX': 'Mexico',
4963 'FM': 'Micronesia, Federated States of',
4964 'MD': 'Moldova, Republic of',
4965 'MC': 'Monaco',
4966 'MN': 'Mongolia',
4967 'ME': 'Montenegro',
4968 'MS': 'Montserrat',
4969 'MA': 'Morocco',
4970 'MZ': 'Mozambique',
4971 'MM': 'Myanmar',
4972 'NA': 'Namibia',
4973 'NR': 'Nauru',
4974 'NP': 'Nepal',
4975 'NL': 'Netherlands',
4976 'NC': 'New Caledonia',
4977 'NZ': 'New Zealand',
4978 'NI': 'Nicaragua',
4979 'NE': 'Niger',
4980 'NG': 'Nigeria',
4981 'NU': 'Niue',
4982 'NF': 'Norfolk Island',
4983 'MP': 'Northern Mariana Islands',
4984 'NO': 'Norway',
4985 'OM': 'Oman',
4986 'PK': 'Pakistan',
4987 'PW': 'Palau',
4988 'PS': 'Palestine, State of',
4989 'PA': 'Panama',
4990 'PG': 'Papua New Guinea',
4991 'PY': 'Paraguay',
4992 'PE': 'Peru',
4993 'PH': 'Philippines',
4994 'PN': 'Pitcairn',
4995 'PL': 'Poland',
4996 'PT': 'Portugal',
4997 'PR': 'Puerto Rico',
4998 'QA': 'Qatar',
4999 'RE': 'Réunion',
5000 'RO': 'Romania',
5001 'RU': 'Russian Federation',
5002 'RW': 'Rwanda',
5003 'BL': 'Saint Barthélemy',
5004 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5005 'KN': 'Saint Kitts and Nevis',
5006 'LC': 'Saint Lucia',
5007 'MF': 'Saint Martin (French part)',
5008 'PM': 'Saint Pierre and Miquelon',
5009 'VC': 'Saint Vincent and the Grenadines',
5010 'WS': 'Samoa',
5011 'SM': 'San Marino',
5012 'ST': 'Sao Tome and Principe',
5013 'SA': 'Saudi Arabia',
5014 'SN': 'Senegal',
5015 'RS': 'Serbia',
5016 'SC': 'Seychelles',
5017 'SL': 'Sierra Leone',
5018 'SG': 'Singapore',
5019 'SX': 'Sint Maarten (Dutch part)',
5020 'SK': 'Slovakia',
5021 'SI': 'Slovenia',
5022 'SB': 'Solomon Islands',
5023 'SO': 'Somalia',
5024 'ZA': 'South Africa',
5025 'GS': 'South Georgia and the South Sandwich Islands',
5026 'SS': 'South Sudan',
5027 'ES': 'Spain',
5028 'LK': 'Sri Lanka',
5029 'SD': 'Sudan',
5030 'SR': 'Suriname',
5031 'SJ': 'Svalbard and Jan Mayen',
5032 'SZ': 'Swaziland',
5033 'SE': 'Sweden',
5034 'CH': 'Switzerland',
5035 'SY': 'Syrian Arab Republic',
5036 'TW': 'Taiwan, Province of China',
5037 'TJ': 'Tajikistan',
5038 'TZ': 'Tanzania, United Republic of',
5039 'TH': 'Thailand',
5040 'TL': 'Timor-Leste',
5041 'TG': 'Togo',
5042 'TK': 'Tokelau',
5043 'TO': 'Tonga',
5044 'TT': 'Trinidad and Tobago',
5045 'TN': 'Tunisia',
5046 'TR': 'Turkey',
5047 'TM': 'Turkmenistan',
5048 'TC': 'Turks and Caicos Islands',
5049 'TV': 'Tuvalu',
5050 'UG': 'Uganda',
5051 'UA': 'Ukraine',
5052 'AE': 'United Arab Emirates',
5053 'GB': 'United Kingdom',
5054 'US': 'United States',
5055 'UM': 'United States Minor Outlying Islands',
5056 'UY': 'Uruguay',
5057 'UZ': 'Uzbekistan',
5058 'VU': 'Vanuatu',
5059 'VE': 'Venezuela, Bolivarian Republic of',
5060 'VN': 'Viet Nam',
5061 'VG': 'Virgin Islands, British',
5062 'VI': 'Virgin Islands, U.S.',
5063 'WF': 'Wallis and Futuna',
5064 'EH': 'Western Sahara',
5065 'YE': 'Yemen',
5066 'ZM': 'Zambia',
5067 'ZW': 'Zimbabwe',
5068 }
5069
5070 @classmethod
5071 def short2full(cls, code):
5072 """Convert an ISO 3166-2 country code to the corresponding full name"""
5073 return cls._country_map.get(code.upper())
5074
5075
773f291d
S
5076class GeoUtils(object):
5077 # Major IPv4 address blocks per country
5078 _country_ip_map = {
53896ca5 5079 'AD': '46.172.224.0/19',
773f291d
S
5080 'AE': '94.200.0.0/13',
5081 'AF': '149.54.0.0/17',
5082 'AG': '209.59.64.0/18',
5083 'AI': '204.14.248.0/21',
5084 'AL': '46.99.0.0/16',
5085 'AM': '46.70.0.0/15',
5086 'AO': '105.168.0.0/13',
53896ca5
S
5087 'AP': '182.50.184.0/21',
5088 'AQ': '23.154.160.0/24',
773f291d
S
5089 'AR': '181.0.0.0/12',
5090 'AS': '202.70.112.0/20',
53896ca5 5091 'AT': '77.116.0.0/14',
773f291d
S
5092 'AU': '1.128.0.0/11',
5093 'AW': '181.41.0.0/18',
53896ca5
S
5094 'AX': '185.217.4.0/22',
5095 'AZ': '5.197.0.0/16',
773f291d
S
5096 'BA': '31.176.128.0/17',
5097 'BB': '65.48.128.0/17',
5098 'BD': '114.130.0.0/16',
5099 'BE': '57.0.0.0/8',
53896ca5 5100 'BF': '102.178.0.0/15',
773f291d
S
5101 'BG': '95.42.0.0/15',
5102 'BH': '37.131.0.0/17',
5103 'BI': '154.117.192.0/18',
5104 'BJ': '137.255.0.0/16',
53896ca5 5105 'BL': '185.212.72.0/23',
773f291d
S
5106 'BM': '196.12.64.0/18',
5107 'BN': '156.31.0.0/16',
5108 'BO': '161.56.0.0/16',
5109 'BQ': '161.0.80.0/20',
53896ca5 5110 'BR': '191.128.0.0/12',
773f291d
S
5111 'BS': '24.51.64.0/18',
5112 'BT': '119.2.96.0/19',
5113 'BW': '168.167.0.0/16',
5114 'BY': '178.120.0.0/13',
5115 'BZ': '179.42.192.0/18',
5116 'CA': '99.224.0.0/11',
5117 'CD': '41.243.0.0/16',
53896ca5
S
5118 'CF': '197.242.176.0/21',
5119 'CG': '160.113.0.0/16',
773f291d 5120 'CH': '85.0.0.0/13',
53896ca5 5121 'CI': '102.136.0.0/14',
773f291d
S
5122 'CK': '202.65.32.0/19',
5123 'CL': '152.172.0.0/14',
53896ca5 5124 'CM': '102.244.0.0/14',
773f291d
S
5125 'CN': '36.128.0.0/10',
5126 'CO': '181.240.0.0/12',
5127 'CR': '201.192.0.0/12',
5128 'CU': '152.206.0.0/15',
5129 'CV': '165.90.96.0/19',
5130 'CW': '190.88.128.0/17',
53896ca5 5131 'CY': '31.153.0.0/16',
773f291d
S
5132 'CZ': '88.100.0.0/14',
5133 'DE': '53.0.0.0/8',
5134 'DJ': '197.241.0.0/17',
5135 'DK': '87.48.0.0/12',
5136 'DM': '192.243.48.0/20',
5137 'DO': '152.166.0.0/15',
5138 'DZ': '41.96.0.0/12',
5139 'EC': '186.68.0.0/15',
5140 'EE': '90.190.0.0/15',
5141 'EG': '156.160.0.0/11',
5142 'ER': '196.200.96.0/20',
5143 'ES': '88.0.0.0/11',
5144 'ET': '196.188.0.0/14',
5145 'EU': '2.16.0.0/13',
5146 'FI': '91.152.0.0/13',
5147 'FJ': '144.120.0.0/16',
53896ca5 5148 'FK': '80.73.208.0/21',
773f291d
S
5149 'FM': '119.252.112.0/20',
5150 'FO': '88.85.32.0/19',
5151 'FR': '90.0.0.0/9',
5152 'GA': '41.158.0.0/15',
5153 'GB': '25.0.0.0/8',
5154 'GD': '74.122.88.0/21',
5155 'GE': '31.146.0.0/16',
5156 'GF': '161.22.64.0/18',
5157 'GG': '62.68.160.0/19',
53896ca5
S
5158 'GH': '154.160.0.0/12',
5159 'GI': '95.164.0.0/16',
773f291d
S
5160 'GL': '88.83.0.0/19',
5161 'GM': '160.182.0.0/15',
5162 'GN': '197.149.192.0/18',
5163 'GP': '104.250.0.0/19',
5164 'GQ': '105.235.224.0/20',
5165 'GR': '94.64.0.0/13',
5166 'GT': '168.234.0.0/16',
5167 'GU': '168.123.0.0/16',
5168 'GW': '197.214.80.0/20',
5169 'GY': '181.41.64.0/18',
5170 'HK': '113.252.0.0/14',
5171 'HN': '181.210.0.0/16',
5172 'HR': '93.136.0.0/13',
5173 'HT': '148.102.128.0/17',
5174 'HU': '84.0.0.0/14',
5175 'ID': '39.192.0.0/10',
5176 'IE': '87.32.0.0/12',
5177 'IL': '79.176.0.0/13',
5178 'IM': '5.62.80.0/20',
5179 'IN': '117.192.0.0/10',
5180 'IO': '203.83.48.0/21',
5181 'IQ': '37.236.0.0/14',
5182 'IR': '2.176.0.0/12',
5183 'IS': '82.221.0.0/16',
5184 'IT': '79.0.0.0/10',
5185 'JE': '87.244.64.0/18',
5186 'JM': '72.27.0.0/17',
5187 'JO': '176.29.0.0/16',
53896ca5 5188 'JP': '133.0.0.0/8',
773f291d
S
5189 'KE': '105.48.0.0/12',
5190 'KG': '158.181.128.0/17',
5191 'KH': '36.37.128.0/17',
5192 'KI': '103.25.140.0/22',
5193 'KM': '197.255.224.0/20',
53896ca5 5194 'KN': '198.167.192.0/19',
773f291d
S
5195 'KP': '175.45.176.0/22',
5196 'KR': '175.192.0.0/10',
5197 'KW': '37.36.0.0/14',
5198 'KY': '64.96.0.0/15',
5199 'KZ': '2.72.0.0/13',
5200 'LA': '115.84.64.0/18',
5201 'LB': '178.135.0.0/16',
53896ca5 5202 'LC': '24.92.144.0/20',
773f291d
S
5203 'LI': '82.117.0.0/19',
5204 'LK': '112.134.0.0/15',
53896ca5 5205 'LR': '102.183.0.0/16',
773f291d
S
5206 'LS': '129.232.0.0/17',
5207 'LT': '78.56.0.0/13',
5208 'LU': '188.42.0.0/16',
5209 'LV': '46.109.0.0/16',
5210 'LY': '41.252.0.0/14',
5211 'MA': '105.128.0.0/11',
5212 'MC': '88.209.64.0/18',
5213 'MD': '37.246.0.0/16',
5214 'ME': '178.175.0.0/17',
5215 'MF': '74.112.232.0/21',
5216 'MG': '154.126.0.0/17',
5217 'MH': '117.103.88.0/21',
5218 'MK': '77.28.0.0/15',
5219 'ML': '154.118.128.0/18',
5220 'MM': '37.111.0.0/17',
5221 'MN': '49.0.128.0/17',
5222 'MO': '60.246.0.0/16',
5223 'MP': '202.88.64.0/20',
5224 'MQ': '109.203.224.0/19',
5225 'MR': '41.188.64.0/18',
5226 'MS': '208.90.112.0/22',
5227 'MT': '46.11.0.0/16',
5228 'MU': '105.16.0.0/12',
5229 'MV': '27.114.128.0/18',
53896ca5 5230 'MW': '102.70.0.0/15',
773f291d
S
5231 'MX': '187.192.0.0/11',
5232 'MY': '175.136.0.0/13',
5233 'MZ': '197.218.0.0/15',
5234 'NA': '41.182.0.0/16',
5235 'NC': '101.101.0.0/18',
5236 'NE': '197.214.0.0/18',
5237 'NF': '203.17.240.0/22',
5238 'NG': '105.112.0.0/12',
5239 'NI': '186.76.0.0/15',
5240 'NL': '145.96.0.0/11',
5241 'NO': '84.208.0.0/13',
5242 'NP': '36.252.0.0/15',
5243 'NR': '203.98.224.0/19',
5244 'NU': '49.156.48.0/22',
5245 'NZ': '49.224.0.0/14',
5246 'OM': '5.36.0.0/15',
5247 'PA': '186.72.0.0/15',
5248 'PE': '186.160.0.0/14',
5249 'PF': '123.50.64.0/18',
5250 'PG': '124.240.192.0/19',
5251 'PH': '49.144.0.0/13',
5252 'PK': '39.32.0.0/11',
5253 'PL': '83.0.0.0/11',
5254 'PM': '70.36.0.0/20',
5255 'PR': '66.50.0.0/16',
5256 'PS': '188.161.0.0/16',
5257 'PT': '85.240.0.0/13',
5258 'PW': '202.124.224.0/20',
5259 'PY': '181.120.0.0/14',
5260 'QA': '37.210.0.0/15',
53896ca5 5261 'RE': '102.35.0.0/16',
773f291d 5262 'RO': '79.112.0.0/13',
53896ca5 5263 'RS': '93.86.0.0/15',
773f291d 5264 'RU': '5.136.0.0/13',
53896ca5 5265 'RW': '41.186.0.0/16',
773f291d
S
5266 'SA': '188.48.0.0/13',
5267 'SB': '202.1.160.0/19',
5268 'SC': '154.192.0.0/11',
53896ca5 5269 'SD': '102.120.0.0/13',
773f291d 5270 'SE': '78.64.0.0/12',
53896ca5 5271 'SG': '8.128.0.0/10',
773f291d
S
5272 'SI': '188.196.0.0/14',
5273 'SK': '78.98.0.0/15',
53896ca5 5274 'SL': '102.143.0.0/17',
773f291d
S
5275 'SM': '89.186.32.0/19',
5276 'SN': '41.82.0.0/15',
53896ca5 5277 'SO': '154.115.192.0/18',
773f291d
S
5278 'SR': '186.179.128.0/17',
5279 'SS': '105.235.208.0/21',
5280 'ST': '197.159.160.0/19',
5281 'SV': '168.243.0.0/16',
5282 'SX': '190.102.0.0/20',
5283 'SY': '5.0.0.0/16',
5284 'SZ': '41.84.224.0/19',
5285 'TC': '65.255.48.0/20',
5286 'TD': '154.68.128.0/19',
5287 'TG': '196.168.0.0/14',
5288 'TH': '171.96.0.0/13',
5289 'TJ': '85.9.128.0/18',
5290 'TK': '27.96.24.0/21',
5291 'TL': '180.189.160.0/20',
5292 'TM': '95.85.96.0/19',
5293 'TN': '197.0.0.0/11',
5294 'TO': '175.176.144.0/21',
5295 'TR': '78.160.0.0/11',
5296 'TT': '186.44.0.0/15',
5297 'TV': '202.2.96.0/19',
5298 'TW': '120.96.0.0/11',
5299 'TZ': '156.156.0.0/14',
53896ca5
S
5300 'UA': '37.52.0.0/14',
5301 'UG': '102.80.0.0/13',
5302 'US': '6.0.0.0/8',
773f291d 5303 'UY': '167.56.0.0/13',
53896ca5 5304 'UZ': '84.54.64.0/18',
773f291d 5305 'VA': '212.77.0.0/19',
53896ca5 5306 'VC': '207.191.240.0/21',
773f291d 5307 'VE': '186.88.0.0/13',
53896ca5 5308 'VG': '66.81.192.0/20',
773f291d
S
5309 'VI': '146.226.0.0/16',
5310 'VN': '14.160.0.0/11',
5311 'VU': '202.80.32.0/20',
5312 'WF': '117.20.32.0/21',
5313 'WS': '202.4.32.0/19',
5314 'YE': '134.35.0.0/16',
5315 'YT': '41.242.116.0/22',
5316 'ZA': '41.0.0.0/11',
53896ca5
S
5317 'ZM': '102.144.0.0/13',
5318 'ZW': '102.177.192.0/18',
773f291d
S
5319 }
5320
5321 @classmethod
5f95927a
S
5322 def random_ipv4(cls, code_or_block):
5323 if len(code_or_block) == 2:
5324 block = cls._country_ip_map.get(code_or_block.upper())
5325 if not block:
5326 return None
5327 else:
5328 block = code_or_block
773f291d
S
5329 addr, preflen = block.split('/')
5330 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5331 addr_max = addr_min | (0xffffffff >> int(preflen))
18a0defa 5332 return compat_str(socket.inet_ntoa(
4248dad9 5333 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
773f291d
S
5334
5335
91410c9b 5336class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
5337 def __init__(self, proxies=None):
5338 # Set default handlers
5339 for type in ('http', 'https'):
5340 setattr(self, '%s_open' % type,
5341 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5342 meth(r, proxy, type))
38e87f6c 5343 compat_urllib_request.ProxyHandler.__init__(self, proxies)
2461f79d 5344
91410c9b 5345 def proxy_open(self, req, proxy, type):
2461f79d 5346 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
5347 if req_proxy is not None:
5348 proxy = req_proxy
2461f79d
PH
5349 del req.headers['Ytdl-request-proxy']
5350
5351 if proxy == '__noproxy__':
5352 return None # No Proxy
51fb4995 5353 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
71aff188
YCH
5354 req.add_header('Ytdl-socks-proxy', proxy)
5355 # youtube-dl's http/https handlers do wrapping the socket with socks
5356 return None
91410c9b
PH
5357 return compat_urllib_request.ProxyHandler.proxy_open(
5358 self, req, proxy, type)
5bc880b9
YCH
5359
5360
0a5445dd
YCH
5361# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5362# released into Public Domain
5363# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5364
5365def long_to_bytes(n, blocksize=0):
5366 """long_to_bytes(n:long, blocksize:int) : string
5367 Convert a long integer to a byte string.
5368
5369 If optional blocksize is given and greater than zero, pad the front of the
5370 byte string with binary zeros so that the length is a multiple of
5371 blocksize.
5372 """
5373 # after much testing, this algorithm was deemed to be the fastest
5374 s = b''
5375 n = int(n)
5376 while n > 0:
5377 s = compat_struct_pack('>I', n & 0xffffffff) + s
5378 n = n >> 32
5379 # strip off leading zeros
5380 for i in range(len(s)):
5381 if s[i] != b'\000'[0]:
5382 break
5383 else:
5384 # only happens when n == 0
5385 s = b'\000'
5386 i = 0
5387 s = s[i:]
5388 # add back some pad bytes. this could be done more efficiently w.r.t. the
5389 # de-padding being done above, but sigh...
5390 if blocksize > 0 and len(s) % blocksize:
5391 s = (blocksize - len(s) % blocksize) * b'\000' + s
5392 return s
5393
5394
5395def bytes_to_long(s):
5396 """bytes_to_long(string) : long
5397 Convert a byte string to a long integer.
5398
5399 This is (essentially) the inverse of long_to_bytes().
5400 """
5401 acc = 0
5402 length = len(s)
5403 if length % 4:
5404 extra = (4 - length % 4)
5405 s = b'\000' * extra + s
5406 length = length + extra
5407 for i in range(0, length, 4):
5408 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5409 return acc
5410
5411
5bc880b9
YCH
5412def ohdave_rsa_encrypt(data, exponent, modulus):
5413 '''
5414 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5415
5416 Input:
5417 data: data to encrypt, bytes-like object
5418 exponent, modulus: parameter e and N of RSA algorithm, both integer
5419 Output: hex string of encrypted data
5420
5421 Limitation: supports one block encryption only
5422 '''
5423
5424 payload = int(binascii.hexlify(data[::-1]), 16)
5425 encrypted = pow(payload, exponent, modulus)
5426 return '%x' % encrypted
81bdc8fd
YCH
5427
5428
f48409c7
YCH
5429def pkcs1pad(data, length):
5430 """
5431 Padding input data with PKCS#1 scheme
5432
5433 @param {int[]} data input data
5434 @param {int} length target length
5435 @returns {int[]} padded data
5436 """
5437 if len(data) > length - 11:
5438 raise ValueError('Input data too long for PKCS#1 padding')
5439
5440 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5441 return [0, 2] + pseudo_random + [0] + data
5442
5443
5eb6bdce 5444def encode_base_n(num, n, table=None):
59f898b7 5445 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
59f898b7
YCH
5446 if not table:
5447 table = FULL_TABLE[:n]
5448
5eb6bdce
YCH
5449 if n > len(table):
5450 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5451
5452 if num == 0:
5453 return table[0]
5454
81bdc8fd
YCH
5455 ret = ''
5456 while num:
5457 ret = table[num % n] + ret
5458 num = num // n
5459 return ret
f52354a8
YCH
5460
5461
5462def decode_packed_codes(code):
06b3fe29 5463 mobj = re.search(PACKED_CODES_RE, code)
f52354a8
YCH
5464 obfucasted_code, base, count, symbols = mobj.groups()
5465 base = int(base)
5466 count = int(count)
5467 symbols = symbols.split('|')
5468 symbol_table = {}
5469
5470 while count:
5471 count -= 1
5eb6bdce 5472 base_n_count = encode_base_n(count, base)
f52354a8
YCH
5473 symbol_table[base_n_count] = symbols[count] or base_n_count
5474
5475 return re.sub(
5476 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5477 obfucasted_code)
e154c651 5478
5479
1ced2221
S
5480def caesar(s, alphabet, shift):
5481 if shift == 0:
5482 return s
5483 l = len(alphabet)
5484 return ''.join(
5485 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5486 for c in s)
5487
5488
5489def rot47(s):
5490 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5491
5492
e154c651 5493def parse_m3u8_attributes(attrib):
5494 info = {}
5495 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5496 if val.startswith('"'):
5497 val = val[1:-1]
5498 info[key] = val
5499 return info
1143535d
YCH
5500
5501
5502def urshift(val, n):
5503 return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038
YCH
5504
5505
5506# Based on png2str() written by @gdkchan and improved by @yokrysty
067aa17e 5507# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
d3f8e038
YCH
5508def decode_png(png_data):
5509 # Reference: https://www.w3.org/TR/PNG/
5510 header = png_data[8:]
5511
5512 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5513 raise IOError('Not a valid PNG file.')
5514
5515 int_map = {1: '>B', 2: '>H', 4: '>I'}
5516 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5517
5518 chunks = []
5519
5520 while header:
5521 length = unpack_integer(header[:4])
5522 header = header[4:]
5523
5524 chunk_type = header[:4]
5525 header = header[4:]
5526
5527 chunk_data = header[:length]
5528 header = header[length:]
5529
5530 header = header[4:] # Skip CRC
5531
5532 chunks.append({
5533 'type': chunk_type,
5534 'length': length,
5535 'data': chunk_data
5536 })
5537
5538 ihdr = chunks[0]['data']
5539
5540 width = unpack_integer(ihdr[:4])
5541 height = unpack_integer(ihdr[4:8])
5542
5543 idat = b''
5544
5545 for chunk in chunks:
5546 if chunk['type'] == b'IDAT':
5547 idat += chunk['data']
5548
5549 if not idat:
5550 raise IOError('Unable to read PNG data.')
5551
5552 decompressed_data = bytearray(zlib.decompress(idat))
5553
5554 stride = width * 3
5555 pixels = []
5556
5557 def _get_pixel(idx):
5558 x = idx % stride
5559 y = idx // stride
5560 return pixels[y][x]
5561
5562 for y in range(height):
5563 basePos = y * (1 + stride)
5564 filter_type = decompressed_data[basePos]
5565
5566 current_row = []
5567
5568 pixels.append(current_row)
5569
5570 for x in range(stride):
5571 color = decompressed_data[1 + basePos + x]
5572 basex = y * stride + x
5573 left = 0
5574 up = 0
5575
5576 if x > 2:
5577 left = _get_pixel(basex - 3)
5578 if y > 0:
5579 up = _get_pixel(basex - stride)
5580
5581 if filter_type == 1: # Sub
5582 color = (color + left) & 0xff
5583 elif filter_type == 2: # Up
5584 color = (color + up) & 0xff
5585 elif filter_type == 3: # Average
5586 color = (color + ((left + up) >> 1)) & 0xff
5587 elif filter_type == 4: # Paeth
5588 a = left
5589 b = up
5590 c = 0
5591
5592 if x > 2 and y > 0:
5593 c = _get_pixel(basex - stride - 3)
5594
5595 p = a + b - c
5596
5597 pa = abs(p - a)
5598 pb = abs(p - b)
5599 pc = abs(p - c)
5600
5601 if pa <= pb and pa <= pc:
5602 color = (color + a) & 0xff
5603 elif pb <= pc:
5604 color = (color + b) & 0xff
5605 else:
5606 color = (color + c) & 0xff
5607
5608 current_row.append(color)
5609
5610 return width, height, pixels
efa97bdc
YCH
5611
5612
5613def write_xattr(path, key, value):
5614 # This mess below finds the best xattr tool for the job
5615 try:
5616 # try the pyxattr module...
5617 import xattr
5618
53a7e3d2
YCH
5619 if hasattr(xattr, 'set'): # pyxattr
5620 # Unicode arguments are not supported in python-pyxattr until
5621 # version 0.5.0
067aa17e 5622 # See https://github.com/ytdl-org/youtube-dl/issues/5498
53a7e3d2
YCH
5623 pyxattr_required_version = '0.5.0'
5624 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5625 # TODO: fallback to CLI tools
5626 raise XAttrUnavailableError(
5627 'python-pyxattr is detected but is too old. '
5628 'youtube-dl requires %s or above while your version is %s. '
5629 'Falling back to other xattr implementations' % (
5630 pyxattr_required_version, xattr.__version__))
5631
5632 setxattr = xattr.set
5633 else: # xattr
5634 setxattr = xattr.setxattr
efa97bdc
YCH
5635
5636 try:
53a7e3d2 5637 setxattr(path, key, value)
efa97bdc
YCH
5638 except EnvironmentError as e:
5639 raise XAttrMetadataError(e.errno, e.strerror)
5640
5641 except ImportError:
5642 if compat_os_name == 'nt':
5643 # Write xattrs to NTFS Alternate Data Streams:
5644 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5645 assert ':' not in key
5646 assert os.path.exists(path)
5647
5648 ads_fn = path + ':' + key
5649 try:
5650 with open(ads_fn, 'wb') as f:
5651 f.write(value)
5652 except EnvironmentError as e:
5653 raise XAttrMetadataError(e.errno, e.strerror)
5654 else:
5655 user_has_setfattr = check_executable('setfattr', ['--version'])
5656 user_has_xattr = check_executable('xattr', ['-h'])
5657
5658 if user_has_setfattr or user_has_xattr:
5659
5660 value = value.decode('utf-8')
5661 if user_has_setfattr:
5662 executable = 'setfattr'
5663 opts = ['-n', key, '-v', value]
5664 elif user_has_xattr:
5665 executable = 'xattr'
5666 opts = ['-w', key, value]
5667
3089bc74
S
5668 cmd = ([encodeFilename(executable, True)]
5669 + [encodeArgument(o) for o in opts]
5670 + [encodeFilename(path, True)])
efa97bdc
YCH
5671
5672 try:
5673 p = subprocess.Popen(
5674 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5675 except EnvironmentError as e:
5676 raise XAttrMetadataError(e.errno, e.strerror)
5677 stdout, stderr = p.communicate()
5678 stderr = stderr.decode('utf-8', 'replace')
5679 if p.returncode != 0:
5680 raise XAttrMetadataError(p.returncode, stderr)
5681
5682 else:
5683 # On Unix, and can't find pyxattr, setfattr, or xattr.
5684 if sys.platform.startswith('linux'):
5685 raise XAttrUnavailableError(
5686 "Couldn't find a tool to set the xattrs. "
5687 "Install either the python 'pyxattr' or 'xattr' "
5688 "modules, or the GNU 'attr' package "
5689 "(which contains the 'setfattr' tool).")
5690 else:
5691 raise XAttrUnavailableError(
5692 "Couldn't find a tool to set the xattrs. "
5693 "Install either the python 'xattr' module, "
5694 "or the 'xattr' binary.")
0c265486
YCH
5695
5696
5697def random_birthday(year_field, month_field, day_field):
aa374bc7
AS
5698 start_date = datetime.date(1950, 1, 1)
5699 end_date = datetime.date(1995, 12, 31)
5700 offset = random.randint(0, (end_date - start_date).days)
5701 random_date = start_date + datetime.timedelta(offset)
0c265486 5702 return {
aa374bc7
AS
5703 year_field: str(random_date.year),
5704 month_field: str(random_date.month),
5705 day_field: str(random_date.day),
0c265486 5706 }