]> jfr.im git - yt-dlp.git/blame - youtube_dlc/utils.py
Preparing for release
[yt-dlp.git] / youtube_dlc / utils.py
CommitLineData
d77c3dfd 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
d77c3dfd 3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
c380cc28 10import collections
62e609ab 11import contextlib
e3946f98 12import ctypes
c496ca96
PH
13import datetime
14import email.utils
0c265486 15import email.header
f45c185f 16import errno
be4a824d 17import functools
d77c3dfd 18import gzip
03f9daab 19import io
79a2e94e 20import itertools
f4bfd65f 21import json
d77c3dfd 22import locale
02dbf93f 23import math
347de493 24import operator
d77c3dfd 25import os
c496ca96 26import platform
773f291d 27import random
d77c3dfd 28import re
c496ca96 29import socket
79a2e94e 30import ssl
1c088fa8 31import subprocess
d77c3dfd 32import sys
181c8655 33import tempfile
c380cc28 34import time
01951dda 35import traceback
bcf89ce6 36import xml.etree.ElementTree
d77c3dfd 37import zlib
d77c3dfd 38
8c25f81b 39from .compat import (
b4a3d461 40 compat_HTMLParseError,
8bb56eee 41 compat_HTMLParser,
8f9312c3 42 compat_basestring,
8c25f81b 43 compat_chr,
1bab3437 44 compat_cookiejar,
d7cd9a9e 45 compat_ctypes_WINFUNCTYPE,
36e6f62c 46 compat_etree_fromstring,
51098426 47 compat_expanduser,
8c25f81b 48 compat_html_entities,
55b2f099 49 compat_html_entities_html5,
be4a824d 50 compat_http_client,
42db58ec 51 compat_integer_types,
c86b6142 52 compat_kwargs,
efa97bdc 53 compat_os_name,
8c25f81b 54 compat_parse_qs,
702ccf2d 55 compat_shlex_quote,
8c25f81b 56 compat_str,
edaa23f8 57 compat_struct_pack,
d3f8e038 58 compat_struct_unpack,
8c25f81b
PH
59 compat_urllib_error,
60 compat_urllib_parse,
15707c7e 61 compat_urllib_parse_urlencode,
8c25f81b 62 compat_urllib_parse_urlparse,
732044af 63 compat_urllib_parse_urlunparse,
64 compat_urllib_parse_quote,
65 compat_urllib_parse_quote_plus,
7581bfc9 66 compat_urllib_parse_unquote_plus,
8c25f81b
PH
67 compat_urllib_request,
68 compat_urlparse,
810c10ba 69 compat_xpath,
8c25f81b 70)
4644ac55 71
71aff188
YCH
72from .socks import (
73 ProxyType,
74 sockssocket,
75)
76
4644ac55 77
51fb4995
YCH
78def register_socks_protocols():
79 # "Register" SOCKS protocols
d5ae6bb5
YCH
80 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
81 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
51fb4995
YCH
82 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
83 if scheme not in compat_urlparse.uses_netloc:
84 compat_urlparse.uses_netloc.append(scheme)
85
86
468e2e92
FV
87# This is not clearly defined otherwise
88compiled_regex_type = type(re.compile(''))
89
f7a147e3
S
90
91def random_user_agent():
92 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
93 _CHROME_VERSIONS = (
94 '74.0.3729.129',
95 '76.0.3780.3',
96 '76.0.3780.2',
97 '74.0.3729.128',
98 '76.0.3780.1',
99 '76.0.3780.0',
100 '75.0.3770.15',
101 '74.0.3729.127',
102 '74.0.3729.126',
103 '76.0.3779.1',
104 '76.0.3779.0',
105 '75.0.3770.14',
106 '74.0.3729.125',
107 '76.0.3778.1',
108 '76.0.3778.0',
109 '75.0.3770.13',
110 '74.0.3729.124',
111 '74.0.3729.123',
112 '73.0.3683.121',
113 '76.0.3777.1',
114 '76.0.3777.0',
115 '75.0.3770.12',
116 '74.0.3729.122',
117 '76.0.3776.4',
118 '75.0.3770.11',
119 '74.0.3729.121',
120 '76.0.3776.3',
121 '76.0.3776.2',
122 '73.0.3683.120',
123 '74.0.3729.120',
124 '74.0.3729.119',
125 '74.0.3729.118',
126 '76.0.3776.1',
127 '76.0.3776.0',
128 '76.0.3775.5',
129 '75.0.3770.10',
130 '74.0.3729.117',
131 '76.0.3775.4',
132 '76.0.3775.3',
133 '74.0.3729.116',
134 '75.0.3770.9',
135 '76.0.3775.2',
136 '76.0.3775.1',
137 '76.0.3775.0',
138 '75.0.3770.8',
139 '74.0.3729.115',
140 '74.0.3729.114',
141 '76.0.3774.1',
142 '76.0.3774.0',
143 '75.0.3770.7',
144 '74.0.3729.113',
145 '74.0.3729.112',
146 '74.0.3729.111',
147 '76.0.3773.1',
148 '76.0.3773.0',
149 '75.0.3770.6',
150 '74.0.3729.110',
151 '74.0.3729.109',
152 '76.0.3772.1',
153 '76.0.3772.0',
154 '75.0.3770.5',
155 '74.0.3729.108',
156 '74.0.3729.107',
157 '76.0.3771.1',
158 '76.0.3771.0',
159 '75.0.3770.4',
160 '74.0.3729.106',
161 '74.0.3729.105',
162 '75.0.3770.3',
163 '74.0.3729.104',
164 '74.0.3729.103',
165 '74.0.3729.102',
166 '75.0.3770.2',
167 '74.0.3729.101',
168 '75.0.3770.1',
169 '75.0.3770.0',
170 '74.0.3729.100',
171 '75.0.3769.5',
172 '75.0.3769.4',
173 '74.0.3729.99',
174 '75.0.3769.3',
175 '75.0.3769.2',
176 '75.0.3768.6',
177 '74.0.3729.98',
178 '75.0.3769.1',
179 '75.0.3769.0',
180 '74.0.3729.97',
181 '73.0.3683.119',
182 '73.0.3683.118',
183 '74.0.3729.96',
184 '75.0.3768.5',
185 '75.0.3768.4',
186 '75.0.3768.3',
187 '75.0.3768.2',
188 '74.0.3729.95',
189 '74.0.3729.94',
190 '75.0.3768.1',
191 '75.0.3768.0',
192 '74.0.3729.93',
193 '74.0.3729.92',
194 '73.0.3683.117',
195 '74.0.3729.91',
196 '75.0.3766.3',
197 '74.0.3729.90',
198 '75.0.3767.2',
199 '75.0.3767.1',
200 '75.0.3767.0',
201 '74.0.3729.89',
202 '73.0.3683.116',
203 '75.0.3766.2',
204 '74.0.3729.88',
205 '75.0.3766.1',
206 '75.0.3766.0',
207 '74.0.3729.87',
208 '73.0.3683.115',
209 '74.0.3729.86',
210 '75.0.3765.1',
211 '75.0.3765.0',
212 '74.0.3729.85',
213 '73.0.3683.114',
214 '74.0.3729.84',
215 '75.0.3764.1',
216 '75.0.3764.0',
217 '74.0.3729.83',
218 '73.0.3683.113',
219 '75.0.3763.2',
220 '75.0.3761.4',
221 '74.0.3729.82',
222 '75.0.3763.1',
223 '75.0.3763.0',
224 '74.0.3729.81',
225 '73.0.3683.112',
226 '75.0.3762.1',
227 '75.0.3762.0',
228 '74.0.3729.80',
229 '75.0.3761.3',
230 '74.0.3729.79',
231 '73.0.3683.111',
232 '75.0.3761.2',
233 '74.0.3729.78',
234 '74.0.3729.77',
235 '75.0.3761.1',
236 '75.0.3761.0',
237 '73.0.3683.110',
238 '74.0.3729.76',
239 '74.0.3729.75',
240 '75.0.3760.0',
241 '74.0.3729.74',
242 '75.0.3759.8',
243 '75.0.3759.7',
244 '75.0.3759.6',
245 '74.0.3729.73',
246 '75.0.3759.5',
247 '74.0.3729.72',
248 '73.0.3683.109',
249 '75.0.3759.4',
250 '75.0.3759.3',
251 '74.0.3729.71',
252 '75.0.3759.2',
253 '74.0.3729.70',
254 '73.0.3683.108',
255 '74.0.3729.69',
256 '75.0.3759.1',
257 '75.0.3759.0',
258 '74.0.3729.68',
259 '73.0.3683.107',
260 '74.0.3729.67',
261 '75.0.3758.1',
262 '75.0.3758.0',
263 '74.0.3729.66',
264 '73.0.3683.106',
265 '74.0.3729.65',
266 '75.0.3757.1',
267 '75.0.3757.0',
268 '74.0.3729.64',
269 '73.0.3683.105',
270 '74.0.3729.63',
271 '75.0.3756.1',
272 '75.0.3756.0',
273 '74.0.3729.62',
274 '73.0.3683.104',
275 '75.0.3755.3',
276 '75.0.3755.2',
277 '73.0.3683.103',
278 '75.0.3755.1',
279 '75.0.3755.0',
280 '74.0.3729.61',
281 '73.0.3683.102',
282 '74.0.3729.60',
283 '75.0.3754.2',
284 '74.0.3729.59',
285 '75.0.3753.4',
286 '74.0.3729.58',
287 '75.0.3754.1',
288 '75.0.3754.0',
289 '74.0.3729.57',
290 '73.0.3683.101',
291 '75.0.3753.3',
292 '75.0.3752.2',
293 '75.0.3753.2',
294 '74.0.3729.56',
295 '75.0.3753.1',
296 '75.0.3753.0',
297 '74.0.3729.55',
298 '73.0.3683.100',
299 '74.0.3729.54',
300 '75.0.3752.1',
301 '75.0.3752.0',
302 '74.0.3729.53',
303 '73.0.3683.99',
304 '74.0.3729.52',
305 '75.0.3751.1',
306 '75.0.3751.0',
307 '74.0.3729.51',
308 '73.0.3683.98',
309 '74.0.3729.50',
310 '75.0.3750.0',
311 '74.0.3729.49',
312 '74.0.3729.48',
313 '74.0.3729.47',
314 '75.0.3749.3',
315 '74.0.3729.46',
316 '73.0.3683.97',
317 '75.0.3749.2',
318 '74.0.3729.45',
319 '75.0.3749.1',
320 '75.0.3749.0',
321 '74.0.3729.44',
322 '73.0.3683.96',
323 '74.0.3729.43',
324 '74.0.3729.42',
325 '75.0.3748.1',
326 '75.0.3748.0',
327 '74.0.3729.41',
328 '75.0.3747.1',
329 '73.0.3683.95',
330 '75.0.3746.4',
331 '74.0.3729.40',
332 '74.0.3729.39',
333 '75.0.3747.0',
334 '75.0.3746.3',
335 '75.0.3746.2',
336 '74.0.3729.38',
337 '75.0.3746.1',
338 '75.0.3746.0',
339 '74.0.3729.37',
340 '73.0.3683.94',
341 '75.0.3745.5',
342 '75.0.3745.4',
343 '75.0.3745.3',
344 '75.0.3745.2',
345 '74.0.3729.36',
346 '75.0.3745.1',
347 '75.0.3745.0',
348 '75.0.3744.2',
349 '74.0.3729.35',
350 '73.0.3683.93',
351 '74.0.3729.34',
352 '75.0.3744.1',
353 '75.0.3744.0',
354 '74.0.3729.33',
355 '73.0.3683.92',
356 '74.0.3729.32',
357 '74.0.3729.31',
358 '73.0.3683.91',
359 '75.0.3741.2',
360 '75.0.3740.5',
361 '74.0.3729.30',
362 '75.0.3741.1',
363 '75.0.3741.0',
364 '74.0.3729.29',
365 '75.0.3740.4',
366 '73.0.3683.90',
367 '74.0.3729.28',
368 '75.0.3740.3',
369 '73.0.3683.89',
370 '75.0.3740.2',
371 '74.0.3729.27',
372 '75.0.3740.1',
373 '75.0.3740.0',
374 '74.0.3729.26',
375 '73.0.3683.88',
376 '73.0.3683.87',
377 '74.0.3729.25',
378 '75.0.3739.1',
379 '75.0.3739.0',
380 '73.0.3683.86',
381 '74.0.3729.24',
382 '73.0.3683.85',
383 '75.0.3738.4',
384 '75.0.3738.3',
385 '75.0.3738.2',
386 '75.0.3738.1',
387 '75.0.3738.0',
388 '74.0.3729.23',
389 '73.0.3683.84',
390 '74.0.3729.22',
391 '74.0.3729.21',
392 '75.0.3737.1',
393 '75.0.3737.0',
394 '74.0.3729.20',
395 '73.0.3683.83',
396 '74.0.3729.19',
397 '75.0.3736.1',
398 '75.0.3736.0',
399 '74.0.3729.18',
400 '73.0.3683.82',
401 '74.0.3729.17',
402 '75.0.3735.1',
403 '75.0.3735.0',
404 '74.0.3729.16',
405 '73.0.3683.81',
406 '75.0.3734.1',
407 '75.0.3734.0',
408 '74.0.3729.15',
409 '73.0.3683.80',
410 '74.0.3729.14',
411 '75.0.3733.1',
412 '75.0.3733.0',
413 '75.0.3732.1',
414 '74.0.3729.13',
415 '74.0.3729.12',
416 '73.0.3683.79',
417 '74.0.3729.11',
418 '75.0.3732.0',
419 '74.0.3729.10',
420 '73.0.3683.78',
421 '74.0.3729.9',
422 '74.0.3729.8',
423 '74.0.3729.7',
424 '75.0.3731.3',
425 '75.0.3731.2',
426 '75.0.3731.0',
427 '74.0.3729.6',
428 '73.0.3683.77',
429 '73.0.3683.76',
430 '75.0.3730.5',
431 '75.0.3730.4',
432 '73.0.3683.75',
433 '74.0.3729.5',
434 '73.0.3683.74',
435 '75.0.3730.3',
436 '75.0.3730.2',
437 '74.0.3729.4',
438 '73.0.3683.73',
439 '73.0.3683.72',
440 '75.0.3730.1',
441 '75.0.3730.0',
442 '74.0.3729.3',
443 '73.0.3683.71',
444 '74.0.3729.2',
445 '73.0.3683.70',
446 '74.0.3729.1',
447 '74.0.3729.0',
448 '74.0.3726.4',
449 '73.0.3683.69',
450 '74.0.3726.3',
451 '74.0.3728.0',
452 '74.0.3726.2',
453 '73.0.3683.68',
454 '74.0.3726.1',
455 '74.0.3726.0',
456 '74.0.3725.4',
457 '73.0.3683.67',
458 '73.0.3683.66',
459 '74.0.3725.3',
460 '74.0.3725.2',
461 '74.0.3725.1',
462 '74.0.3724.8',
463 '74.0.3725.0',
464 '73.0.3683.65',
465 '74.0.3724.7',
466 '74.0.3724.6',
467 '74.0.3724.5',
468 '74.0.3724.4',
469 '74.0.3724.3',
470 '74.0.3724.2',
471 '74.0.3724.1',
472 '74.0.3724.0',
473 '73.0.3683.64',
474 '74.0.3723.1',
475 '74.0.3723.0',
476 '73.0.3683.63',
477 '74.0.3722.1',
478 '74.0.3722.0',
479 '73.0.3683.62',
480 '74.0.3718.9',
481 '74.0.3702.3',
482 '74.0.3721.3',
483 '74.0.3721.2',
484 '74.0.3721.1',
485 '74.0.3721.0',
486 '74.0.3720.6',
487 '73.0.3683.61',
488 '72.0.3626.122',
489 '73.0.3683.60',
490 '74.0.3720.5',
491 '72.0.3626.121',
492 '74.0.3718.8',
493 '74.0.3720.4',
494 '74.0.3720.3',
495 '74.0.3718.7',
496 '74.0.3720.2',
497 '74.0.3720.1',
498 '74.0.3720.0',
499 '74.0.3718.6',
500 '74.0.3719.5',
501 '73.0.3683.59',
502 '74.0.3718.5',
503 '74.0.3718.4',
504 '74.0.3719.4',
505 '74.0.3719.3',
506 '74.0.3719.2',
507 '74.0.3719.1',
508 '73.0.3683.58',
509 '74.0.3719.0',
510 '73.0.3683.57',
511 '73.0.3683.56',
512 '74.0.3718.3',
513 '73.0.3683.55',
514 '74.0.3718.2',
515 '74.0.3718.1',
516 '74.0.3718.0',
517 '73.0.3683.54',
518 '74.0.3717.2',
519 '73.0.3683.53',
520 '74.0.3717.1',
521 '74.0.3717.0',
522 '73.0.3683.52',
523 '74.0.3716.1',
524 '74.0.3716.0',
525 '73.0.3683.51',
526 '74.0.3715.1',
527 '74.0.3715.0',
528 '73.0.3683.50',
529 '74.0.3711.2',
530 '74.0.3714.2',
531 '74.0.3713.3',
532 '74.0.3714.1',
533 '74.0.3714.0',
534 '73.0.3683.49',
535 '74.0.3713.1',
536 '74.0.3713.0',
537 '72.0.3626.120',
538 '73.0.3683.48',
539 '74.0.3712.2',
540 '74.0.3712.1',
541 '74.0.3712.0',
542 '73.0.3683.47',
543 '72.0.3626.119',
544 '73.0.3683.46',
545 '74.0.3710.2',
546 '72.0.3626.118',
547 '74.0.3711.1',
548 '74.0.3711.0',
549 '73.0.3683.45',
550 '72.0.3626.117',
551 '74.0.3710.1',
552 '74.0.3710.0',
553 '73.0.3683.44',
554 '72.0.3626.116',
555 '74.0.3709.1',
556 '74.0.3709.0',
557 '74.0.3704.9',
558 '73.0.3683.43',
559 '72.0.3626.115',
560 '74.0.3704.8',
561 '74.0.3704.7',
562 '74.0.3708.0',
563 '74.0.3706.7',
564 '74.0.3704.6',
565 '73.0.3683.42',
566 '72.0.3626.114',
567 '74.0.3706.6',
568 '72.0.3626.113',
569 '74.0.3704.5',
570 '74.0.3706.5',
571 '74.0.3706.4',
572 '74.0.3706.3',
573 '74.0.3706.2',
574 '74.0.3706.1',
575 '74.0.3706.0',
576 '73.0.3683.41',
577 '72.0.3626.112',
578 '74.0.3705.1',
579 '74.0.3705.0',
580 '73.0.3683.40',
581 '72.0.3626.111',
582 '73.0.3683.39',
583 '74.0.3704.4',
584 '73.0.3683.38',
585 '74.0.3704.3',
586 '74.0.3704.2',
587 '74.0.3704.1',
588 '74.0.3704.0',
589 '73.0.3683.37',
590 '72.0.3626.110',
591 '72.0.3626.109',
592 '74.0.3703.3',
593 '74.0.3703.2',
594 '73.0.3683.36',
595 '74.0.3703.1',
596 '74.0.3703.0',
597 '73.0.3683.35',
598 '72.0.3626.108',
599 '74.0.3702.2',
600 '74.0.3699.3',
601 '74.0.3702.1',
602 '74.0.3702.0',
603 '73.0.3683.34',
604 '72.0.3626.107',
605 '73.0.3683.33',
606 '74.0.3701.1',
607 '74.0.3701.0',
608 '73.0.3683.32',
609 '73.0.3683.31',
610 '72.0.3626.105',
611 '74.0.3700.1',
612 '74.0.3700.0',
613 '73.0.3683.29',
614 '72.0.3626.103',
615 '74.0.3699.2',
616 '74.0.3699.1',
617 '74.0.3699.0',
618 '73.0.3683.28',
619 '72.0.3626.102',
620 '73.0.3683.27',
621 '73.0.3683.26',
622 '74.0.3698.0',
623 '74.0.3696.2',
624 '72.0.3626.101',
625 '73.0.3683.25',
626 '74.0.3696.1',
627 '74.0.3696.0',
628 '74.0.3694.8',
629 '72.0.3626.100',
630 '74.0.3694.7',
631 '74.0.3694.6',
632 '74.0.3694.5',
633 '74.0.3694.4',
634 '72.0.3626.99',
635 '72.0.3626.98',
636 '74.0.3694.3',
637 '73.0.3683.24',
638 '72.0.3626.97',
639 '72.0.3626.96',
640 '72.0.3626.95',
641 '73.0.3683.23',
642 '72.0.3626.94',
643 '73.0.3683.22',
644 '73.0.3683.21',
645 '72.0.3626.93',
646 '74.0.3694.2',
647 '72.0.3626.92',
648 '74.0.3694.1',
649 '74.0.3694.0',
650 '74.0.3693.6',
651 '73.0.3683.20',
652 '72.0.3626.91',
653 '74.0.3693.5',
654 '74.0.3693.4',
655 '74.0.3693.3',
656 '74.0.3693.2',
657 '73.0.3683.19',
658 '74.0.3693.1',
659 '74.0.3693.0',
660 '73.0.3683.18',
661 '72.0.3626.90',
662 '74.0.3692.1',
663 '74.0.3692.0',
664 '73.0.3683.17',
665 '72.0.3626.89',
666 '74.0.3687.3',
667 '74.0.3691.1',
668 '74.0.3691.0',
669 '73.0.3683.16',
670 '72.0.3626.88',
671 '72.0.3626.87',
672 '73.0.3683.15',
673 '74.0.3690.1',
674 '74.0.3690.0',
675 '73.0.3683.14',
676 '72.0.3626.86',
677 '73.0.3683.13',
678 '73.0.3683.12',
679 '74.0.3689.1',
680 '74.0.3689.0',
681 '73.0.3683.11',
682 '72.0.3626.85',
683 '73.0.3683.10',
684 '72.0.3626.84',
685 '73.0.3683.9',
686 '74.0.3688.1',
687 '74.0.3688.0',
688 '73.0.3683.8',
689 '72.0.3626.83',
690 '74.0.3687.2',
691 '74.0.3687.1',
692 '74.0.3687.0',
693 '73.0.3683.7',
694 '72.0.3626.82',
695 '74.0.3686.4',
696 '72.0.3626.81',
697 '74.0.3686.3',
698 '74.0.3686.2',
699 '74.0.3686.1',
700 '74.0.3686.0',
701 '73.0.3683.6',
702 '72.0.3626.80',
703 '74.0.3685.1',
704 '74.0.3685.0',
705 '73.0.3683.5',
706 '72.0.3626.79',
707 '74.0.3684.1',
708 '74.0.3684.0',
709 '73.0.3683.4',
710 '72.0.3626.78',
711 '72.0.3626.77',
712 '73.0.3683.3',
713 '73.0.3683.2',
714 '72.0.3626.76',
715 '73.0.3683.1',
716 '73.0.3683.0',
717 '72.0.3626.75',
718 '71.0.3578.141',
719 '73.0.3682.1',
720 '73.0.3682.0',
721 '72.0.3626.74',
722 '71.0.3578.140',
723 '73.0.3681.4',
724 '73.0.3681.3',
725 '73.0.3681.2',
726 '73.0.3681.1',
727 '73.0.3681.0',
728 '72.0.3626.73',
729 '71.0.3578.139',
730 '72.0.3626.72',
731 '72.0.3626.71',
732 '73.0.3680.1',
733 '73.0.3680.0',
734 '72.0.3626.70',
735 '71.0.3578.138',
736 '73.0.3678.2',
737 '73.0.3679.1',
738 '73.0.3679.0',
739 '72.0.3626.69',
740 '71.0.3578.137',
741 '73.0.3678.1',
742 '73.0.3678.0',
743 '71.0.3578.136',
744 '73.0.3677.1',
745 '73.0.3677.0',
746 '72.0.3626.68',
747 '72.0.3626.67',
748 '71.0.3578.135',
749 '73.0.3676.1',
750 '73.0.3676.0',
751 '73.0.3674.2',
752 '72.0.3626.66',
753 '71.0.3578.134',
754 '73.0.3674.1',
755 '73.0.3674.0',
756 '72.0.3626.65',
757 '71.0.3578.133',
758 '73.0.3673.2',
759 '73.0.3673.1',
760 '73.0.3673.0',
761 '72.0.3626.64',
762 '71.0.3578.132',
763 '72.0.3626.63',
764 '72.0.3626.62',
765 '72.0.3626.61',
766 '72.0.3626.60',
767 '73.0.3672.1',
768 '73.0.3672.0',
769 '72.0.3626.59',
770 '71.0.3578.131',
771 '73.0.3671.3',
772 '73.0.3671.2',
773 '73.0.3671.1',
774 '73.0.3671.0',
775 '72.0.3626.58',
776 '71.0.3578.130',
777 '73.0.3670.1',
778 '73.0.3670.0',
779 '72.0.3626.57',
780 '71.0.3578.129',
781 '73.0.3669.1',
782 '73.0.3669.0',
783 '72.0.3626.56',
784 '71.0.3578.128',
785 '73.0.3668.2',
786 '73.0.3668.1',
787 '73.0.3668.0',
788 '72.0.3626.55',
789 '71.0.3578.127',
790 '73.0.3667.2',
791 '73.0.3667.1',
792 '73.0.3667.0',
793 '72.0.3626.54',
794 '71.0.3578.126',
795 '73.0.3666.1',
796 '73.0.3666.0',
797 '72.0.3626.53',
798 '71.0.3578.125',
799 '73.0.3665.4',
800 '73.0.3665.3',
801 '72.0.3626.52',
802 '73.0.3665.2',
803 '73.0.3664.4',
804 '73.0.3665.1',
805 '73.0.3665.0',
806 '72.0.3626.51',
807 '71.0.3578.124',
808 '72.0.3626.50',
809 '73.0.3664.3',
810 '73.0.3664.2',
811 '73.0.3664.1',
812 '73.0.3664.0',
813 '73.0.3663.2',
814 '72.0.3626.49',
815 '71.0.3578.123',
816 '73.0.3663.1',
817 '73.0.3663.0',
818 '72.0.3626.48',
819 '71.0.3578.122',
820 '73.0.3662.1',
821 '73.0.3662.0',
822 '72.0.3626.47',
823 '71.0.3578.121',
824 '73.0.3661.1',
825 '72.0.3626.46',
826 '73.0.3661.0',
827 '72.0.3626.45',
828 '71.0.3578.120',
829 '73.0.3660.2',
830 '73.0.3660.1',
831 '73.0.3660.0',
832 '72.0.3626.44',
833 '71.0.3578.119',
834 '73.0.3659.1',
835 '73.0.3659.0',
836 '72.0.3626.43',
837 '71.0.3578.118',
838 '73.0.3658.1',
839 '73.0.3658.0',
840 '72.0.3626.42',
841 '71.0.3578.117',
842 '73.0.3657.1',
843 '73.0.3657.0',
844 '72.0.3626.41',
845 '71.0.3578.116',
846 '73.0.3656.1',
847 '73.0.3656.0',
848 '72.0.3626.40',
849 '71.0.3578.115',
850 '73.0.3655.1',
851 '73.0.3655.0',
852 '72.0.3626.39',
853 '71.0.3578.114',
854 '73.0.3654.1',
855 '73.0.3654.0',
856 '72.0.3626.38',
857 '71.0.3578.113',
858 '73.0.3653.1',
859 '73.0.3653.0',
860 '72.0.3626.37',
861 '71.0.3578.112',
862 '73.0.3652.1',
863 '73.0.3652.0',
864 '72.0.3626.36',
865 '71.0.3578.111',
866 '73.0.3651.1',
867 '73.0.3651.0',
868 '72.0.3626.35',
869 '71.0.3578.110',
870 '73.0.3650.1',
871 '73.0.3650.0',
872 '72.0.3626.34',
873 '71.0.3578.109',
874 '73.0.3649.1',
875 '73.0.3649.0',
876 '72.0.3626.33',
877 '71.0.3578.108',
878 '73.0.3648.2',
879 '73.0.3648.1',
880 '73.0.3648.0',
881 '72.0.3626.32',
882 '71.0.3578.107',
883 '73.0.3647.2',
884 '73.0.3647.1',
885 '73.0.3647.0',
886 '72.0.3626.31',
887 '71.0.3578.106',
888 '73.0.3635.3',
889 '73.0.3646.2',
890 '73.0.3646.1',
891 '73.0.3646.0',
892 '72.0.3626.30',
893 '71.0.3578.105',
894 '72.0.3626.29',
895 '73.0.3645.2',
896 '73.0.3645.1',
897 '73.0.3645.0',
898 '72.0.3626.28',
899 '71.0.3578.104',
900 '72.0.3626.27',
901 '72.0.3626.26',
902 '72.0.3626.25',
903 '72.0.3626.24',
904 '73.0.3644.0',
905 '73.0.3643.2',
906 '72.0.3626.23',
907 '71.0.3578.103',
908 '73.0.3643.1',
909 '73.0.3643.0',
910 '72.0.3626.22',
911 '71.0.3578.102',
912 '73.0.3642.1',
913 '73.0.3642.0',
914 '72.0.3626.21',
915 '71.0.3578.101',
916 '73.0.3641.1',
917 '73.0.3641.0',
918 '72.0.3626.20',
919 '71.0.3578.100',
920 '72.0.3626.19',
921 '73.0.3640.1',
922 '73.0.3640.0',
923 '72.0.3626.18',
924 '73.0.3639.1',
925 '71.0.3578.99',
926 '73.0.3639.0',
927 '72.0.3626.17',
928 '73.0.3638.2',
929 '72.0.3626.16',
930 '73.0.3638.1',
931 '73.0.3638.0',
932 '72.0.3626.15',
933 '71.0.3578.98',
934 '73.0.3635.2',
935 '71.0.3578.97',
936 '73.0.3637.1',
937 '73.0.3637.0',
938 '72.0.3626.14',
939 '71.0.3578.96',
940 '71.0.3578.95',
941 '72.0.3626.13',
942 '71.0.3578.94',
943 '73.0.3636.2',
944 '71.0.3578.93',
945 '73.0.3636.1',
946 '73.0.3636.0',
947 '72.0.3626.12',
948 '71.0.3578.92',
949 '73.0.3635.1',
950 '73.0.3635.0',
951 '72.0.3626.11',
952 '71.0.3578.91',
953 '73.0.3634.2',
954 '73.0.3634.1',
955 '73.0.3634.0',
956 '72.0.3626.10',
957 '71.0.3578.90',
958 '71.0.3578.89',
959 '73.0.3633.2',
960 '73.0.3633.1',
961 '73.0.3633.0',
962 '72.0.3610.4',
963 '72.0.3626.9',
964 '71.0.3578.88',
965 '73.0.3632.5',
966 '73.0.3632.4',
967 '73.0.3632.3',
968 '73.0.3632.2',
969 '73.0.3632.1',
970 '73.0.3632.0',
971 '72.0.3626.8',
972 '71.0.3578.87',
973 '73.0.3631.2',
974 '73.0.3631.1',
975 '73.0.3631.0',
976 '72.0.3626.7',
977 '71.0.3578.86',
978 '72.0.3626.6',
979 '73.0.3630.1',
980 '73.0.3630.0',
981 '72.0.3626.5',
982 '71.0.3578.85',
983 '72.0.3626.4',
984 '73.0.3628.3',
985 '73.0.3628.2',
986 '73.0.3629.1',
987 '73.0.3629.0',
988 '72.0.3626.3',
989 '71.0.3578.84',
990 '73.0.3628.1',
991 '73.0.3628.0',
992 '71.0.3578.83',
993 '73.0.3627.1',
994 '73.0.3627.0',
995 '72.0.3626.2',
996 '71.0.3578.82',
997 '71.0.3578.81',
998 '71.0.3578.80',
999 '72.0.3626.1',
1000 '72.0.3626.0',
1001 '71.0.3578.79',
1002 '70.0.3538.124',
1003 '71.0.3578.78',
1004 '72.0.3623.4',
1005 '72.0.3625.2',
1006 '72.0.3625.1',
1007 '72.0.3625.0',
1008 '71.0.3578.77',
1009 '70.0.3538.123',
1010 '72.0.3624.4',
1011 '72.0.3624.3',
1012 '72.0.3624.2',
1013 '71.0.3578.76',
1014 '72.0.3624.1',
1015 '72.0.3624.0',
1016 '72.0.3623.3',
1017 '71.0.3578.75',
1018 '70.0.3538.122',
1019 '71.0.3578.74',
1020 '72.0.3623.2',
1021 '72.0.3610.3',
1022 '72.0.3623.1',
1023 '72.0.3623.0',
1024 '72.0.3622.3',
1025 '72.0.3622.2',
1026 '71.0.3578.73',
1027 '70.0.3538.121',
1028 '72.0.3622.1',
1029 '72.0.3622.0',
1030 '71.0.3578.72',
1031 '70.0.3538.120',
1032 '72.0.3621.1',
1033 '72.0.3621.0',
1034 '71.0.3578.71',
1035 '70.0.3538.119',
1036 '72.0.3620.1',
1037 '72.0.3620.0',
1038 '71.0.3578.70',
1039 '70.0.3538.118',
1040 '71.0.3578.69',
1041 '72.0.3619.1',
1042 '72.0.3619.0',
1043 '71.0.3578.68',
1044 '70.0.3538.117',
1045 '71.0.3578.67',
1046 '72.0.3618.1',
1047 '72.0.3618.0',
1048 '71.0.3578.66',
1049 '70.0.3538.116',
1050 '72.0.3617.1',
1051 '72.0.3617.0',
1052 '71.0.3578.65',
1053 '70.0.3538.115',
1054 '72.0.3602.3',
1055 '71.0.3578.64',
1056 '72.0.3616.1',
1057 '72.0.3616.0',
1058 '71.0.3578.63',
1059 '70.0.3538.114',
1060 '71.0.3578.62',
1061 '72.0.3615.1',
1062 '72.0.3615.0',
1063 '71.0.3578.61',
1064 '70.0.3538.113',
1065 '72.0.3614.1',
1066 '72.0.3614.0',
1067 '71.0.3578.60',
1068 '70.0.3538.112',
1069 '72.0.3613.1',
1070 '72.0.3613.0',
1071 '71.0.3578.59',
1072 '70.0.3538.111',
1073 '72.0.3612.2',
1074 '72.0.3612.1',
1075 '72.0.3612.0',
1076 '70.0.3538.110',
1077 '71.0.3578.58',
1078 '70.0.3538.109',
1079 '72.0.3611.2',
1080 '72.0.3611.1',
1081 '72.0.3611.0',
1082 '71.0.3578.57',
1083 '70.0.3538.108',
1084 '72.0.3610.2',
1085 '71.0.3578.56',
1086 '71.0.3578.55',
1087 '72.0.3610.1',
1088 '72.0.3610.0',
1089 '71.0.3578.54',
1090 '70.0.3538.107',
1091 '71.0.3578.53',
1092 '72.0.3609.3',
1093 '71.0.3578.52',
1094 '72.0.3609.2',
1095 '71.0.3578.51',
1096 '72.0.3608.5',
1097 '72.0.3609.1',
1098 '72.0.3609.0',
1099 '71.0.3578.50',
1100 '70.0.3538.106',
1101 '72.0.3608.4',
1102 '72.0.3608.3',
1103 '72.0.3608.2',
1104 '71.0.3578.49',
1105 '72.0.3608.1',
1106 '72.0.3608.0',
1107 '70.0.3538.105',
1108 '71.0.3578.48',
1109 '72.0.3607.1',
1110 '72.0.3607.0',
1111 '71.0.3578.47',
1112 '70.0.3538.104',
1113 '72.0.3606.2',
1114 '72.0.3606.1',
1115 '72.0.3606.0',
1116 '71.0.3578.46',
1117 '70.0.3538.103',
1118 '70.0.3538.102',
1119 '72.0.3605.3',
1120 '72.0.3605.2',
1121 '72.0.3605.1',
1122 '72.0.3605.0',
1123 '71.0.3578.45',
1124 '70.0.3538.101',
1125 '71.0.3578.44',
1126 '71.0.3578.43',
1127 '70.0.3538.100',
1128 '70.0.3538.99',
1129 '71.0.3578.42',
1130 '72.0.3604.1',
1131 '72.0.3604.0',
1132 '71.0.3578.41',
1133 '70.0.3538.98',
1134 '71.0.3578.40',
1135 '72.0.3603.2',
1136 '72.0.3603.1',
1137 '72.0.3603.0',
1138 '71.0.3578.39',
1139 '70.0.3538.97',
1140 '72.0.3602.2',
1141 '71.0.3578.38',
1142 '71.0.3578.37',
1143 '72.0.3602.1',
1144 '72.0.3602.0',
1145 '71.0.3578.36',
1146 '70.0.3538.96',
1147 '72.0.3601.1',
1148 '72.0.3601.0',
1149 '71.0.3578.35',
1150 '70.0.3538.95',
1151 '72.0.3600.1',
1152 '72.0.3600.0',
1153 '71.0.3578.34',
1154 '70.0.3538.94',
1155 '72.0.3599.3',
1156 '72.0.3599.2',
1157 '72.0.3599.1',
1158 '72.0.3599.0',
1159 '71.0.3578.33',
1160 '70.0.3538.93',
1161 '72.0.3598.1',
1162 '72.0.3598.0',
1163 '71.0.3578.32',
1164 '70.0.3538.87',
1165 '72.0.3597.1',
1166 '72.0.3597.0',
1167 '72.0.3596.2',
1168 '71.0.3578.31',
1169 '70.0.3538.86',
1170 '71.0.3578.30',
1171 '71.0.3578.29',
1172 '72.0.3596.1',
1173 '72.0.3596.0',
1174 '71.0.3578.28',
1175 '70.0.3538.85',
1176 '72.0.3595.2',
1177 '72.0.3591.3',
1178 '72.0.3595.1',
1179 '72.0.3595.0',
1180 '71.0.3578.27',
1181 '70.0.3538.84',
1182 '72.0.3594.1',
1183 '72.0.3594.0',
1184 '71.0.3578.26',
1185 '70.0.3538.83',
1186 '72.0.3593.2',
1187 '72.0.3593.1',
1188 '72.0.3593.0',
1189 '71.0.3578.25',
1190 '70.0.3538.82',
1191 '72.0.3589.3',
1192 '72.0.3592.2',
1193 '72.0.3592.1',
1194 '72.0.3592.0',
1195 '71.0.3578.24',
1196 '72.0.3589.2',
1197 '70.0.3538.81',
1198 '70.0.3538.80',
1199 '72.0.3591.2',
1200 '72.0.3591.1',
1201 '72.0.3591.0',
1202 '71.0.3578.23',
1203 '70.0.3538.79',
1204 '71.0.3578.22',
1205 '72.0.3590.1',
1206 '72.0.3590.0',
1207 '71.0.3578.21',
1208 '70.0.3538.78',
1209 '70.0.3538.77',
1210 '72.0.3589.1',
1211 '72.0.3589.0',
1212 '71.0.3578.20',
1213 '70.0.3538.76',
1214 '71.0.3578.19',
1215 '70.0.3538.75',
1216 '72.0.3588.1',
1217 '72.0.3588.0',
1218 '71.0.3578.18',
1219 '70.0.3538.74',
1220 '72.0.3586.2',
1221 '72.0.3587.0',
1222 '71.0.3578.17',
1223 '70.0.3538.73',
1224 '72.0.3586.1',
1225 '72.0.3586.0',
1226 '71.0.3578.16',
1227 '70.0.3538.72',
1228 '72.0.3585.1',
1229 '72.0.3585.0',
1230 '71.0.3578.15',
1231 '70.0.3538.71',
1232 '71.0.3578.14',
1233 '72.0.3584.1',
1234 '72.0.3584.0',
1235 '71.0.3578.13',
1236 '70.0.3538.70',
1237 '72.0.3583.2',
1238 '71.0.3578.12',
1239 '72.0.3583.1',
1240 '72.0.3583.0',
1241 '71.0.3578.11',
1242 '70.0.3538.69',
1243 '71.0.3578.10',
1244 '72.0.3582.0',
1245 '72.0.3581.4',
1246 '71.0.3578.9',
1247 '70.0.3538.67',
1248 '72.0.3581.3',
1249 '72.0.3581.2',
1250 '72.0.3581.1',
1251 '72.0.3581.0',
1252 '71.0.3578.8',
1253 '70.0.3538.66',
1254 '72.0.3580.1',
1255 '72.0.3580.0',
1256 '71.0.3578.7',
1257 '70.0.3538.65',
1258 '71.0.3578.6',
1259 '72.0.3579.1',
1260 '72.0.3579.0',
1261 '71.0.3578.5',
1262 '70.0.3538.64',
1263 '71.0.3578.4',
1264 '71.0.3578.3',
1265 '71.0.3578.2',
1266 '71.0.3578.1',
1267 '71.0.3578.0',
1268 '70.0.3538.63',
1269 '69.0.3497.128',
1270 '70.0.3538.62',
1271 '70.0.3538.61',
1272 '70.0.3538.60',
1273 '70.0.3538.59',
1274 '71.0.3577.1',
1275 '71.0.3577.0',
1276 '70.0.3538.58',
1277 '69.0.3497.127',
1278 '71.0.3576.2',
1279 '71.0.3576.1',
1280 '71.0.3576.0',
1281 '70.0.3538.57',
1282 '70.0.3538.56',
1283 '71.0.3575.2',
1284 '70.0.3538.55',
1285 '69.0.3497.126',
1286 '70.0.3538.54',
1287 '71.0.3575.1',
1288 '71.0.3575.0',
1289 '71.0.3574.1',
1290 '71.0.3574.0',
1291 '70.0.3538.53',
1292 '69.0.3497.125',
1293 '70.0.3538.52',
1294 '71.0.3573.1',
1295 '71.0.3573.0',
1296 '70.0.3538.51',
1297 '69.0.3497.124',
1298 '71.0.3572.1',
1299 '71.0.3572.0',
1300 '70.0.3538.50',
1301 '69.0.3497.123',
1302 '71.0.3571.2',
1303 '70.0.3538.49',
1304 '69.0.3497.122',
1305 '71.0.3571.1',
1306 '71.0.3571.0',
1307 '70.0.3538.48',
1308 '69.0.3497.121',
1309 '71.0.3570.1',
1310 '71.0.3570.0',
1311 '70.0.3538.47',
1312 '69.0.3497.120',
1313 '71.0.3568.2',
1314 '71.0.3569.1',
1315 '71.0.3569.0',
1316 '70.0.3538.46',
1317 '69.0.3497.119',
1318 '70.0.3538.45',
1319 '71.0.3568.1',
1320 '71.0.3568.0',
1321 '70.0.3538.44',
1322 '69.0.3497.118',
1323 '70.0.3538.43',
1324 '70.0.3538.42',
1325 '71.0.3567.1',
1326 '71.0.3567.0',
1327 '70.0.3538.41',
1328 '69.0.3497.117',
1329 '71.0.3566.1',
1330 '71.0.3566.0',
1331 '70.0.3538.40',
1332 '69.0.3497.116',
1333 '71.0.3565.1',
1334 '71.0.3565.0',
1335 '70.0.3538.39',
1336 '69.0.3497.115',
1337 '71.0.3564.1',
1338 '71.0.3564.0',
1339 '70.0.3538.38',
1340 '69.0.3497.114',
1341 '71.0.3563.0',
1342 '71.0.3562.2',
1343 '70.0.3538.37',
1344 '69.0.3497.113',
1345 '70.0.3538.36',
1346 '70.0.3538.35',
1347 '71.0.3562.1',
1348 '71.0.3562.0',
1349 '70.0.3538.34',
1350 '69.0.3497.112',
1351 '70.0.3538.33',
1352 '71.0.3561.1',
1353 '71.0.3561.0',
1354 '70.0.3538.32',
1355 '69.0.3497.111',
1356 '71.0.3559.6',
1357 '71.0.3560.1',
1358 '71.0.3560.0',
1359 '71.0.3559.5',
1360 '71.0.3559.4',
1361 '70.0.3538.31',
1362 '69.0.3497.110',
1363 '71.0.3559.3',
1364 '70.0.3538.30',
1365 '69.0.3497.109',
1366 '71.0.3559.2',
1367 '71.0.3559.1',
1368 '71.0.3559.0',
1369 '70.0.3538.29',
1370 '69.0.3497.108',
1371 '71.0.3558.2',
1372 '71.0.3558.1',
1373 '71.0.3558.0',
1374 '70.0.3538.28',
1375 '69.0.3497.107',
1376 '71.0.3557.2',
1377 '71.0.3557.1',
1378 '71.0.3557.0',
1379 '70.0.3538.27',
1380 '69.0.3497.106',
1381 '71.0.3554.4',
1382 '70.0.3538.26',
1383 '71.0.3556.1',
1384 '71.0.3556.0',
1385 '70.0.3538.25',
1386 '71.0.3554.3',
1387 '69.0.3497.105',
1388 '71.0.3554.2',
1389 '70.0.3538.24',
1390 '69.0.3497.104',
1391 '71.0.3555.2',
1392 '70.0.3538.23',
1393 '71.0.3555.1',
1394 '71.0.3555.0',
1395 '70.0.3538.22',
1396 '69.0.3497.103',
1397 '71.0.3554.1',
1398 '71.0.3554.0',
1399 '70.0.3538.21',
1400 '69.0.3497.102',
1401 '71.0.3553.3',
1402 '70.0.3538.20',
1403 '69.0.3497.101',
1404 '71.0.3553.2',
1405 '69.0.3497.100',
1406 '71.0.3553.1',
1407 '71.0.3553.0',
1408 '70.0.3538.19',
1409 '69.0.3497.99',
1410 '69.0.3497.98',
1411 '69.0.3497.97',
1412 '71.0.3552.6',
1413 '71.0.3552.5',
1414 '71.0.3552.4',
1415 '71.0.3552.3',
1416 '71.0.3552.2',
1417 '71.0.3552.1',
1418 '71.0.3552.0',
1419 '70.0.3538.18',
1420 '69.0.3497.96',
1421 '71.0.3551.3',
1422 '71.0.3551.2',
1423 '71.0.3551.1',
1424 '71.0.3551.0',
1425 '70.0.3538.17',
1426 '69.0.3497.95',
1427 '71.0.3550.3',
1428 '71.0.3550.2',
1429 '71.0.3550.1',
1430 '71.0.3550.0',
1431 '70.0.3538.16',
1432 '69.0.3497.94',
1433 '71.0.3549.1',
1434 '71.0.3549.0',
1435 '70.0.3538.15',
1436 '69.0.3497.93',
1437 '69.0.3497.92',
1438 '71.0.3548.1',
1439 '71.0.3548.0',
1440 '70.0.3538.14',
1441 '69.0.3497.91',
1442 '71.0.3547.1',
1443 '71.0.3547.0',
1444 '70.0.3538.13',
1445 '69.0.3497.90',
1446 '71.0.3546.2',
1447 '69.0.3497.89',
1448 '71.0.3546.1',
1449 '71.0.3546.0',
1450 '70.0.3538.12',
1451 '69.0.3497.88',
1452 '71.0.3545.4',
1453 '71.0.3545.3',
1454 '71.0.3545.2',
1455 '71.0.3545.1',
1456 '71.0.3545.0',
1457 '70.0.3538.11',
1458 '69.0.3497.87',
1459 '71.0.3544.5',
1460 '71.0.3544.4',
1461 '71.0.3544.3',
1462 '71.0.3544.2',
1463 '71.0.3544.1',
1464 '71.0.3544.0',
1465 '69.0.3497.86',
1466 '70.0.3538.10',
1467 '69.0.3497.85',
1468 '70.0.3538.9',
1469 '69.0.3497.84',
1470 '71.0.3543.4',
1471 '70.0.3538.8',
1472 '71.0.3543.3',
1473 '71.0.3543.2',
1474 '71.0.3543.1',
1475 '71.0.3543.0',
1476 '70.0.3538.7',
1477 '69.0.3497.83',
1478 '71.0.3542.2',
1479 '71.0.3542.1',
1480 '71.0.3542.0',
1481 '70.0.3538.6',
1482 '69.0.3497.82',
1483 '69.0.3497.81',
1484 '71.0.3541.1',
1485 '71.0.3541.0',
1486 '70.0.3538.5',
1487 '69.0.3497.80',
1488 '71.0.3540.1',
1489 '71.0.3540.0',
1490 '70.0.3538.4',
1491 '69.0.3497.79',
1492 '70.0.3538.3',
1493 '71.0.3539.1',
1494 '71.0.3539.0',
1495 '69.0.3497.78',
1496 '68.0.3440.134',
1497 '69.0.3497.77',
1498 '70.0.3538.2',
1499 '70.0.3538.1',
1500 '70.0.3538.0',
1501 '69.0.3497.76',
1502 '68.0.3440.133',
1503 '69.0.3497.75',
1504 '70.0.3537.2',
1505 '70.0.3537.1',
1506 '70.0.3537.0',
1507 '69.0.3497.74',
1508 '68.0.3440.132',
1509 '70.0.3536.0',
1510 '70.0.3535.5',
1511 '70.0.3535.4',
1512 '70.0.3535.3',
1513 '69.0.3497.73',
1514 '68.0.3440.131',
1515 '70.0.3532.8',
1516 '70.0.3532.7',
1517 '69.0.3497.72',
1518 '69.0.3497.71',
1519 '70.0.3535.2',
1520 '70.0.3535.1',
1521 '70.0.3535.0',
1522 '69.0.3497.70',
1523 '68.0.3440.130',
1524 '69.0.3497.69',
1525 '68.0.3440.129',
1526 '70.0.3534.4',
1527 '70.0.3534.3',
1528 '70.0.3534.2',
1529 '70.0.3534.1',
1530 '70.0.3534.0',
1531 '69.0.3497.68',
1532 '68.0.3440.128',
1533 '70.0.3533.2',
1534 '70.0.3533.1',
1535 '70.0.3533.0',
1536 '69.0.3497.67',
1537 '68.0.3440.127',
1538 '70.0.3532.6',
1539 '70.0.3532.5',
1540 '70.0.3532.4',
1541 '69.0.3497.66',
1542 '68.0.3440.126',
1543 '70.0.3532.3',
1544 '70.0.3532.2',
1545 '70.0.3532.1',
1546 '69.0.3497.60',
1547 '69.0.3497.65',
1548 '69.0.3497.64',
1549 '70.0.3532.0',
1550 '70.0.3531.0',
1551 '70.0.3530.4',
1552 '70.0.3530.3',
1553 '70.0.3530.2',
1554 '69.0.3497.58',
1555 '68.0.3440.125',
1556 '69.0.3497.57',
1557 '69.0.3497.56',
1558 '69.0.3497.55',
1559 '69.0.3497.54',
1560 '70.0.3530.1',
1561 '70.0.3530.0',
1562 '69.0.3497.53',
1563 '68.0.3440.124',
1564 '69.0.3497.52',
1565 '70.0.3529.3',
1566 '70.0.3529.2',
1567 '70.0.3529.1',
1568 '70.0.3529.0',
1569 '69.0.3497.51',
1570 '70.0.3528.4',
1571 '68.0.3440.123',
1572 '70.0.3528.3',
1573 '70.0.3528.2',
1574 '70.0.3528.1',
1575 '70.0.3528.0',
1576 '69.0.3497.50',
1577 '68.0.3440.122',
1578 '70.0.3527.1',
1579 '70.0.3527.0',
1580 '69.0.3497.49',
1581 '68.0.3440.121',
1582 '70.0.3526.1',
1583 '70.0.3526.0',
1584 '68.0.3440.120',
1585 '69.0.3497.48',
1586 '69.0.3497.47',
1587 '68.0.3440.119',
1588 '68.0.3440.118',
1589 '70.0.3525.5',
1590 '70.0.3525.4',
1591 '70.0.3525.3',
1592 '68.0.3440.117',
1593 '69.0.3497.46',
1594 '70.0.3525.2',
1595 '70.0.3525.1',
1596 '70.0.3525.0',
1597 '69.0.3497.45',
1598 '68.0.3440.116',
1599 '70.0.3524.4',
1600 '70.0.3524.3',
1601 '69.0.3497.44',
1602 '70.0.3524.2',
1603 '70.0.3524.1',
1604 '70.0.3524.0',
1605 '70.0.3523.2',
1606 '69.0.3497.43',
1607 '68.0.3440.115',
1608 '70.0.3505.9',
1609 '69.0.3497.42',
1610 '70.0.3505.8',
1611 '70.0.3523.1',
1612 '70.0.3523.0',
1613 '69.0.3497.41',
1614 '68.0.3440.114',
1615 '70.0.3505.7',
1616 '69.0.3497.40',
1617 '70.0.3522.1',
1618 '70.0.3522.0',
1619 '70.0.3521.2',
1620 '69.0.3497.39',
1621 '68.0.3440.113',
1622 '70.0.3505.6',
1623 '70.0.3521.1',
1624 '70.0.3521.0',
1625 '69.0.3497.38',
1626 '68.0.3440.112',
1627 '70.0.3520.1',
1628 '70.0.3520.0',
1629 '69.0.3497.37',
1630 '68.0.3440.111',
1631 '70.0.3519.3',
1632 '70.0.3519.2',
1633 '70.0.3519.1',
1634 '70.0.3519.0',
1635 '69.0.3497.36',
1636 '68.0.3440.110',
1637 '70.0.3518.1',
1638 '70.0.3518.0',
1639 '69.0.3497.35',
1640 '69.0.3497.34',
1641 '68.0.3440.109',
1642 '70.0.3517.1',
1643 '70.0.3517.0',
1644 '69.0.3497.33',
1645 '68.0.3440.108',
1646 '69.0.3497.32',
1647 '70.0.3516.3',
1648 '70.0.3516.2',
1649 '70.0.3516.1',
1650 '70.0.3516.0',
1651 '69.0.3497.31',
1652 '68.0.3440.107',
1653 '70.0.3515.4',
1654 '68.0.3440.106',
1655 '70.0.3515.3',
1656 '70.0.3515.2',
1657 '70.0.3515.1',
1658 '70.0.3515.0',
1659 '69.0.3497.30',
1660 '68.0.3440.105',
1661 '68.0.3440.104',
1662 '70.0.3514.2',
1663 '70.0.3514.1',
1664 '70.0.3514.0',
1665 '69.0.3497.29',
1666 '68.0.3440.103',
1667 '70.0.3513.1',
1668 '70.0.3513.0',
1669 '69.0.3497.28',
1670 )
1671 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1672
1673
3e669f36 1674std_headers = {
f7a147e3 1675 'User-Agent': random_user_agent(),
59ae15a5
PH
1676 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1677 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1678 'Accept-Encoding': 'gzip, deflate',
1679 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 1680}
f427df17 1681
5f6a1245 1682
fb37eb25
S
1683USER_AGENTS = {
1684 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1685}
1686
1687
bf42a990
S
1688NO_DEFAULT = object()
1689
7105440c
YCH
1690ENGLISH_MONTH_NAMES = [
1691 'January', 'February', 'March', 'April', 'May', 'June',
1692 'July', 'August', 'September', 'October', 'November', 'December']
1693
f6717dec
S
1694MONTH_NAMES = {
1695 'en': ENGLISH_MONTH_NAMES,
1696 'fr': [
3e4185c3
S
1697 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1698 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
f6717dec 1699}
a942d6cb 1700
a7aaa398
S
1701KNOWN_EXTENSIONS = (
1702 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1703 'flv', 'f4v', 'f4a', 'f4b',
1704 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1705 'mkv', 'mka', 'mk3d',
1706 'avi', 'divx',
1707 'mov',
1708 'asf', 'wmv', 'wma',
1709 '3gp', '3g2',
1710 'mp3',
1711 'flac',
1712 'ape',
1713 'wav',
1714 'f4f', 'f4m', 'm3u8', 'smil')
1715
c587cbb7 1716# needed for sanitizing filenames in restricted mode
c8827027 1717ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
fd35d8cd
JW
1718 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1719 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
c587cbb7 1720
46f59e89
S
1721DATE_FORMATS = (
1722 '%d %B %Y',
1723 '%d %b %Y',
1724 '%B %d %Y',
cb655f34
S
1725 '%B %dst %Y',
1726 '%B %dnd %Y',
9d30c213 1727 '%B %drd %Y',
cb655f34 1728 '%B %dth %Y',
46f59e89 1729 '%b %d %Y',
cb655f34
S
1730 '%b %dst %Y',
1731 '%b %dnd %Y',
9d30c213 1732 '%b %drd %Y',
cb655f34 1733 '%b %dth %Y',
46f59e89
S
1734 '%b %dst %Y %I:%M',
1735 '%b %dnd %Y %I:%M',
9d30c213 1736 '%b %drd %Y %I:%M',
46f59e89
S
1737 '%b %dth %Y %I:%M',
1738 '%Y %m %d',
1739 '%Y-%m-%d',
1740 '%Y/%m/%d',
81c13222 1741 '%Y/%m/%d %H:%M',
46f59e89 1742 '%Y/%m/%d %H:%M:%S',
0c1c6f4b 1743 '%Y-%m-%d %H:%M',
46f59e89
S
1744 '%Y-%m-%d %H:%M:%S',
1745 '%Y-%m-%d %H:%M:%S.%f',
1746 '%d.%m.%Y %H:%M',
1747 '%d.%m.%Y %H.%M',
1748 '%Y-%m-%dT%H:%M:%SZ',
1749 '%Y-%m-%dT%H:%M:%S.%fZ',
1750 '%Y-%m-%dT%H:%M:%S.%f0Z',
1751 '%Y-%m-%dT%H:%M:%S',
1752 '%Y-%m-%dT%H:%M:%S.%f',
1753 '%Y-%m-%dT%H:%M',
c6eed6b8
S
1754 '%b %d %Y at %H:%M',
1755 '%b %d %Y at %H:%M:%S',
b555ae9b
S
1756 '%B %d %Y at %H:%M',
1757 '%B %d %Y at %H:%M:%S',
46f59e89
S
1758)
1759
1760DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1761DATE_FORMATS_DAY_FIRST.extend([
1762 '%d-%m-%Y',
1763 '%d.%m.%Y',
1764 '%d.%m.%y',
1765 '%d/%m/%Y',
1766 '%d/%m/%y',
1767 '%d/%m/%Y %H:%M:%S',
1768])
1769
1770DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1771DATE_FORMATS_MONTH_FIRST.extend([
1772 '%m-%d-%Y',
1773 '%m.%d.%Y',
1774 '%m/%d/%Y',
1775 '%m/%d/%y',
1776 '%m/%d/%Y %H:%M:%S',
1777])
1778
06b3fe29 1779PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
22f5f5c6 1780JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
06b3fe29 1781
7105440c 1782
d77c3dfd 1783def preferredencoding():
59ae15a5 1784 """Get preferred encoding.
d77c3dfd 1785
59ae15a5
PH
1786 Returns the best encoding scheme for the system, based on
1787 locale.getpreferredencoding() and some further tweaks.
1788 """
1789 try:
1790 pref = locale.getpreferredencoding()
28e614de 1791 'TEST'.encode(pref)
70a1165b 1792 except Exception:
59ae15a5 1793 pref = 'UTF-8'
bae611f2 1794
59ae15a5 1795 return pref
d77c3dfd 1796
f4bfd65f 1797
181c8655 1798def write_json_file(obj, fn):
1394646a 1799 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 1800
92120217 1801 fn = encodeFilename(fn)
61ee5aeb 1802 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
1803 encoding = get_filesystem_encoding()
1804 # os.path.basename returns a bytes object, but NamedTemporaryFile
1805 # will fail if the filename contains non ascii characters unless we
1806 # use a unicode object
1807 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1808 # the same for os.path.dirname
1809 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1810 else:
1811 path_basename = os.path.basename
1812 path_dirname = os.path.dirname
1813
73159f99
S
1814 args = {
1815 'suffix': '.tmp',
ec5f6016
JMF
1816 'prefix': path_basename(fn) + '.',
1817 'dir': path_dirname(fn),
73159f99
S
1818 'delete': False,
1819 }
1820
181c8655
PH
1821 # In Python 2.x, json.dump expects a bytestream.
1822 # In Python 3.x, it writes to a character stream
1823 if sys.version_info < (3, 0):
73159f99 1824 args['mode'] = 'wb'
181c8655 1825 else:
73159f99
S
1826 args.update({
1827 'mode': 'w',
1828 'encoding': 'utf-8',
1829 })
1830
c86b6142 1831 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
1832
1833 try:
1834 with tf:
1835 json.dump(obj, tf)
1394646a
IK
1836 if sys.platform == 'win32':
1837 # Need to remove existing file on Windows, else os.rename raises
1838 # WindowsError or FileExistsError.
1839 try:
1840 os.unlink(fn)
1841 except OSError:
1842 pass
9cd5f54e
R
1843 try:
1844 mask = os.umask(0)
1845 os.umask(mask)
1846 os.chmod(tf.name, 0o666 & ~mask)
1847 except OSError:
1848 pass
181c8655 1849 os.rename(tf.name, fn)
70a1165b 1850 except Exception:
181c8655
PH
1851 try:
1852 os.remove(tf.name)
1853 except OSError:
1854 pass
1855 raise
1856
1857
1858if sys.version_info >= (2, 7):
ee114368 1859 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 1860 """ Find the xpath xpath[@key=val] """
5d2354f1 1861 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368 1862 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
1863 return node.find(expr)
1864else:
ee114368 1865 def find_xpath_attr(node, xpath, key, val=None):
810c10ba 1866 for f in node.findall(compat_xpath(xpath)):
ee114368
S
1867 if key not in f.attrib:
1868 continue
1869 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
1870 return f
1871 return None
1872
d7e66d39
JMF
1873# On python2.6 the xml.etree.ElementTree.Element methods don't support
1874# the namespace parameter
5f6a1245
JW
1875
1876
d7e66d39
JMF
1877def xpath_with_ns(path, ns_map):
1878 components = [c.split(':') for c in path.split('/')]
1879 replaced = []
1880 for c in components:
1881 if len(c) == 1:
1882 replaced.append(c[0])
1883 else:
1884 ns, tag = c
1885 replaced.append('{%s}%s' % (ns_map[ns], tag))
1886 return '/'.join(replaced)
1887
d77c3dfd 1888
a41fb80c 1889def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745 1890 def _find_xpath(xpath):
810c10ba 1891 return node.find(compat_xpath(xpath))
578c0745
S
1892
1893 if isinstance(xpath, (str, compat_str)):
1894 n = _find_xpath(xpath)
1895 else:
1896 for xp in xpath:
1897 n = _find_xpath(xp)
1898 if n is not None:
1899 break
d74bebd5 1900
8e636da4 1901 if n is None:
bf42a990
S
1902 if default is not NO_DEFAULT:
1903 return default
1904 elif fatal:
bf0ff932
PH
1905 name = xpath if name is None else name
1906 raise ExtractorError('Could not find XML element %s' % name)
1907 else:
1908 return None
a41fb80c
S
1909 return n
1910
1911
1912def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
1913 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1914 if n is None or n == default:
1915 return n
1916 if n.text is None:
1917 if default is not NO_DEFAULT:
1918 return default
1919 elif fatal:
1920 name = xpath if name is None else name
1921 raise ExtractorError('Could not find XML element\'s text %s' % name)
1922 else:
1923 return None
1924 return n.text
a41fb80c
S
1925
1926
1927def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1928 n = find_xpath_attr(node, xpath, key)
1929 if n is None:
1930 if default is not NO_DEFAULT:
1931 return default
1932 elif fatal:
1933 name = '%s[@%s]' % (xpath, key) if name is None else name
1934 raise ExtractorError('Could not find XML attribute %s' % name)
1935 else:
1936 return None
1937 return n.attrib[key]
bf0ff932
PH
1938
1939
9e6dd238 1940def get_element_by_id(id, html):
43e8fafd 1941 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 1942 return get_element_by_attribute('id', id, html)
43e8fafd 1943
12ea2f30 1944
84c237fb 1945def get_element_by_class(class_name, html):
2af12ad9
TC
1946 """Return the content of the first tag with the specified class in the passed HTML document"""
1947 retval = get_elements_by_class(class_name, html)
1948 return retval[0] if retval else None
1949
1950
1951def get_element_by_attribute(attribute, value, html, escape_value=True):
1952 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1953 return retval[0] if retval else None
1954
1955
1956def get_elements_by_class(class_name, html):
1957 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1958 return get_elements_by_attribute(
84c237fb
YCH
1959 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1960 html, escape_value=False)
1961
1962
2af12ad9 1963def get_elements_by_attribute(attribute, value, html, escape_value=True):
43e8fafd 1964 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 1965
84c237fb
YCH
1966 value = re.escape(value) if escape_value else value
1967
2af12ad9
TC
1968 retlist = []
1969 for m in re.finditer(r'''(?xs)
38285056 1970 <([a-zA-Z0-9:._-]+)
609ff8ca 1971 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056 1972 \s+%s=['"]?%s['"]?
609ff8ca 1973 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056
PH
1974 \s*>
1975 (?P<content>.*?)
1976 </\1>
2af12ad9
TC
1977 ''' % (re.escape(attribute), value), html):
1978 res = m.group('content')
38285056 1979
2af12ad9
TC
1980 if res.startswith('"') or res.startswith("'"):
1981 res = res[1:-1]
38285056 1982
2af12ad9 1983 retlist.append(unescapeHTML(res))
a921f407 1984
2af12ad9 1985 return retlist
a921f407 1986
c5229f39 1987
8bb56eee
BF
1988class HTMLAttributeParser(compat_HTMLParser):
1989 """Trivial HTML parser to gather the attributes for a single element"""
b6e0c7d2 1990
8bb56eee 1991 def __init__(self):
c5229f39 1992 self.attrs = {}
8bb56eee
BF
1993 compat_HTMLParser.__init__(self)
1994
1995 def handle_starttag(self, tag, attrs):
1996 self.attrs = dict(attrs)
1997
c5229f39 1998
8bb56eee
BF
1999def extract_attributes(html_element):
2000 """Given a string for an HTML element such as
2001 <el
2002 a="foo" B="bar" c="&98;az" d=boz
2003 empty= noval entity="&amp;"
2004 sq='"' dq="'"
2005 >
2006 Decode and return a dictionary of attributes.
2007 {
2008 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2009 'empty': '', 'noval': None, 'entity': '&',
2010 'sq': '"', 'dq': '\''
2011 }.
2012 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2013 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2014 """
2015 parser = HTMLAttributeParser()
b4a3d461
S
2016 try:
2017 parser.feed(html_element)
2018 parser.close()
2019 # Older Python may throw HTMLParseError in case of malformed HTML
2020 except compat_HTMLParseError:
2021 pass
8bb56eee 2022 return parser.attrs
9e6dd238 2023
c5229f39 2024
9e6dd238 2025def clean_html(html):
59ae15a5 2026 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
2027
2028 if html is None: # Convenience for sanitizing descriptions etc.
2029 return html
2030
59ae15a5
PH
2031 # Newline vs <br />
2032 html = html.replace('\n', ' ')
edd9221c
TF
2033 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2034 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
2035 # Strip html tags
2036 html = re.sub('<.*?>', '', html)
2037 # Replace html entities
2038 html = unescapeHTML(html)
7decf895 2039 return html.strip()
9e6dd238
FV
2040
2041
d77c3dfd 2042def sanitize_open(filename, open_mode):
59ae15a5
PH
2043 """Try to open the given filename, and slightly tweak it if this fails.
2044
2045 Attempts to open the given filename. If this fails, it tries to change
2046 the filename slightly, step by step, until it's either able to open it
2047 or it fails and raises a final exception, like the standard open()
2048 function.
2049
2050 It returns the tuple (stream, definitive_file_name).
2051 """
2052 try:
28e614de 2053 if filename == '-':
59ae15a5
PH
2054 if sys.platform == 'win32':
2055 import msvcrt
2056 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 2057 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
2058 stream = open(encodeFilename(filename), open_mode)
2059 return (stream, filename)
2060 except (IOError, OSError) as err:
f45c185f
PH
2061 if err.errno in (errno.EACCES,):
2062 raise
59ae15a5 2063
f45c185f 2064 # In case of error, try to remove win32 forbidden chars
d55de57b 2065 alt_filename = sanitize_path(filename)
f45c185f
PH
2066 if alt_filename == filename:
2067 raise
2068 else:
2069 # An exception here should be caught in the caller
d55de57b 2070 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 2071 return (stream, alt_filename)
d77c3dfd
FV
2072
2073
2074def timeconvert(timestr):
59ae15a5
PH
2075 """Convert RFC 2822 defined time string into system timestamp"""
2076 timestamp = None
2077 timetuple = email.utils.parsedate_tz(timestr)
2078 if timetuple is not None:
2079 timestamp = email.utils.mktime_tz(timetuple)
2080 return timestamp
1c469a94 2081
5f6a1245 2082
796173d0 2083def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
2084 """Sanitizes a string so it could be used as part of a filename.
2085 If restricted is set, use a stricter subset of allowed characters.
158af524
S
2086 Set is_id if this is not an arbitrary string, but an ID that should be kept
2087 if possible.
59ae15a5
PH
2088 """
2089 def replace_insane(char):
c587cbb7
AT
2090 if restricted and char in ACCENT_CHARS:
2091 return ACCENT_CHARS[char]
59ae15a5
PH
2092 if char == '?' or ord(char) < 32 or ord(char) == 127:
2093 return ''
2094 elif char == '"':
2095 return '' if restricted else '\''
2096 elif char == ':':
2097 return '_-' if restricted else ' -'
2098 elif char in '\\/|*<>':
2099 return '_'
627dcfff 2100 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
2101 return '_'
2102 if restricted and ord(char) > 127:
2103 return '_'
2104 return char
2105
2aeb06d6
PH
2106 # Handle timestamps
2107 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 2108 result = ''.join(map(replace_insane, s))
796173d0
PH
2109 if not is_id:
2110 while '__' in result:
2111 result = result.replace('__', '_')
2112 result = result.strip('_')
2113 # Common case of "Foreign band name - English song title"
2114 if restricted and result.startswith('-_'):
2115 result = result[2:]
5a42414b
PH
2116 if result.startswith('-'):
2117 result = '_' + result[len('-'):]
a7440261 2118 result = result.lstrip('.')
796173d0
PH
2119 if not result:
2120 result = '_'
59ae15a5 2121 return result
d77c3dfd 2122
5f6a1245 2123
a2aaf4db
S
2124def sanitize_path(s):
2125 """Sanitizes and normalizes path on Windows"""
2126 if sys.platform != 'win32':
2127 return s
be531ef1
S
2128 drive_or_unc, _ = os.path.splitdrive(s)
2129 if sys.version_info < (2, 7) and not drive_or_unc:
2130 drive_or_unc, _ = os.path.splitunc(s)
2131 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2132 if drive_or_unc:
a2aaf4db
S
2133 norm_path.pop(0)
2134 sanitized_path = [
ec85ded8 2135 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
a2aaf4db 2136 for path_part in norm_path]
be531ef1
S
2137 if drive_or_unc:
2138 sanitized_path.insert(0, drive_or_unc + os.path.sep)
a2aaf4db
S
2139 return os.path.join(*sanitized_path)
2140
2141
17bcc626 2142def sanitize_url(url):
befa4708
S
2143 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2144 # the number of unwanted failures due to missing protocol
2145 if url.startswith('//'):
2146 return 'http:%s' % url
2147 # Fix some common typos seen so far
2148 COMMON_TYPOS = (
067aa17e 2149 # https://github.com/ytdl-org/youtube-dl/issues/15649
befa4708
S
2150 (r'^httpss://', r'https://'),
2151 # https://bx1.be/lives/direct-tv/
2152 (r'^rmtp([es]?)://', r'rtmp\1://'),
2153 )
2154 for mistake, fixup in COMMON_TYPOS:
2155 if re.match(mistake, url):
2156 return re.sub(mistake, fixup, url)
2157 return url
17bcc626
S
2158
2159
67dda517 2160def sanitized_Request(url, *args, **kwargs):
17bcc626 2161 return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
67dda517
S
2162
2163
51098426
S
2164def expand_path(s):
2165 """Expand shell variables and ~"""
2166 return os.path.expandvars(compat_expanduser(s))
2167
2168
d77c3dfd 2169def orderedSet(iterable):
59ae15a5
PH
2170 """ Remove all duplicates from the input iterable """
2171 res = []
2172 for el in iterable:
2173 if el not in res:
2174 res.append(el)
2175 return res
d77c3dfd 2176
912b38b4 2177
55b2f099 2178def _htmlentity_transform(entity_with_semicolon):
4e408e47 2179 """Transforms an HTML entity to a character."""
55b2f099
YCH
2180 entity = entity_with_semicolon[:-1]
2181
4e408e47
PH
2182 # Known non-numeric HTML entity
2183 if entity in compat_html_entities.name2codepoint:
2184 return compat_chr(compat_html_entities.name2codepoint[entity])
2185
55b2f099
YCH
2186 # TODO: HTML5 allows entities without a semicolon. For example,
2187 # '&Eacuteric' should be decoded as 'Éric'.
2188 if entity_with_semicolon in compat_html_entities_html5:
2189 return compat_html_entities_html5[entity_with_semicolon]
2190
91757b0f 2191 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
2192 if mobj is not None:
2193 numstr = mobj.group(1)
28e614de 2194 if numstr.startswith('x'):
4e408e47 2195 base = 16
28e614de 2196 numstr = '0%s' % numstr
4e408e47
PH
2197 else:
2198 base = 10
067aa17e 2199 # See https://github.com/ytdl-org/youtube-dl/issues/7518
7aefc49c
S
2200 try:
2201 return compat_chr(int(numstr, base))
2202 except ValueError:
2203 pass
4e408e47
PH
2204
2205 # Unknown entity in name, return its literal representation
7a3f0c00 2206 return '&%s;' % entity
4e408e47
PH
2207
2208
d77c3dfd 2209def unescapeHTML(s):
912b38b4
PH
2210 if s is None:
2211 return None
2212 assert type(s) == compat_str
d77c3dfd 2213
4e408e47 2214 return re.sub(
95f3f7c2 2215 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 2216
8bf48f23 2217
aa49acd1
S
2218def get_subprocess_encoding():
2219 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2220 # For subprocess calls, encode with locale encoding
2221 # Refer to http://stackoverflow.com/a/9951851/35070
2222 encoding = preferredencoding()
2223 else:
2224 encoding = sys.getfilesystemencoding()
2225 if encoding is None:
2226 encoding = 'utf-8'
2227 return encoding
2228
2229
8bf48f23 2230def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
2231 """
2232 @param s The name of the file
2233 """
d77c3dfd 2234
8bf48f23 2235 assert type(s) == compat_str
d77c3dfd 2236
59ae15a5
PH
2237 # Python 3 has a Unicode API
2238 if sys.version_info >= (3, 0):
2239 return s
0f00efed 2240
aa49acd1
S
2241 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2242 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2243 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2244 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2245 return s
2246
8ee239e9
YCH
2247 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2248 if sys.platform.startswith('java'):
2249 return s
2250
aa49acd1
S
2251 return s.encode(get_subprocess_encoding(), 'ignore')
2252
2253
2254def decodeFilename(b, for_subprocess=False):
2255
2256 if sys.version_info >= (3, 0):
2257 return b
2258
2259 if not isinstance(b, bytes):
2260 return b
2261
2262 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 2263
f07b74fc
PH
2264
2265def encodeArgument(s):
2266 if not isinstance(s, compat_str):
2267 # Legacy code that uses byte strings
2268 # Uncomment the following line after fixing all post processors
7af808a5 2269 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
2270 s = s.decode('ascii')
2271 return encodeFilename(s, True)
2272
2273
aa49acd1
S
2274def decodeArgument(b):
2275 return decodeFilename(b, True)
2276
2277
8271226a
PH
2278def decodeOption(optval):
2279 if optval is None:
2280 return optval
2281 if isinstance(optval, bytes):
2282 optval = optval.decode(preferredencoding())
2283
2284 assert isinstance(optval, compat_str)
2285 return optval
1c256f70 2286
5f6a1245 2287
dbbbe555 2288def formatSeconds(secs, delim=':'):
4539dd30 2289 if secs > 3600:
dbbbe555 2290 return '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
4539dd30 2291 elif secs > 60:
dbbbe555 2292 return '%d%s%02d' % (secs // 60, delim, secs % 60)
4539dd30
PH
2293 else:
2294 return '%d' % secs
2295
a0ddb8a2 2296
be4a824d
PH
2297def make_HTTPS_handler(params, **kwargs):
2298 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 2299 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 2300 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 2301 if opts_no_check_certificate:
be5f2c19 2302 context.check_hostname = False
0db261ba 2303 context.verify_mode = ssl.CERT_NONE
a2366922 2304 try:
be4a824d 2305 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
2306 except TypeError:
2307 # Python 2.7.8
2308 # (create_default_context present but HTTPSHandler has no context=)
2309 pass
2310
2311 if sys.version_info < (3, 2):
d7932313 2312 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 2313 else: # Python < 3.4
d7932313 2314 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 2315 context.verify_mode = (ssl.CERT_NONE
dca08720 2316 if opts_no_check_certificate
ea6d901e 2317 else ssl.CERT_REQUIRED)
303b479e 2318 context.set_default_verify_paths()
be4a824d 2319 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 2320
732ea2f0 2321
08f2a92c
JMF
2322def bug_reports_message():
2323 if ytdl_is_updateable():
cefecac1 2324 update_cmd = 'type youtube-dlc -U to update'
08f2a92c 2325 else:
c76eb41b 2326 update_cmd = 'see https://github.com/pukkandan/yt-dlc on how to update'
2327 msg = '; please report this issue on https://github.com/pukkandan/yt-dlc .'
08f2a92c 2328 msg += ' Make sure you are using the latest version; %s.' % update_cmd
cefecac1 2329 msg += ' Be sure to call youtube-dlc with the --verbose flag and include its complete output.'
08f2a92c
JMF
2330 return msg
2331
2332
bf5b9d85
PM
2333class YoutubeDLError(Exception):
2334 """Base exception for YoutubeDL errors."""
2335 pass
2336
2337
2338class ExtractorError(YoutubeDLError):
1c256f70 2339 """Error during info extraction."""
5f6a1245 2340
d11271dd 2341 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238 2342 """ tb, if given, is the original traceback (so that it can be printed out).
cefecac1 2343 If expected is set, this is a normal error message and most likely not a bug in youtube-dlc.
9a82b238
PH
2344 """
2345
2346 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2347 expected = True
d11271dd
PH
2348 if video_id is not None:
2349 msg = video_id + ': ' + msg
410f3e73 2350 if cause:
28e614de 2351 msg += ' (caused by %r)' % cause
9a82b238 2352 if not expected:
08f2a92c 2353 msg += bug_reports_message()
1c256f70 2354 super(ExtractorError, self).__init__(msg)
d5979c5d 2355
1c256f70 2356 self.traceback = tb
8cc83b8d 2357 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 2358 self.cause = cause
d11271dd 2359 self.video_id = video_id
1c256f70 2360
01951dda
PH
2361 def format_traceback(self):
2362 if self.traceback is None:
2363 return None
28e614de 2364 return ''.join(traceback.format_tb(self.traceback))
01951dda 2365
1c256f70 2366
416c7fcb
PH
2367class UnsupportedError(ExtractorError):
2368 def __init__(self, url):
2369 super(UnsupportedError, self).__init__(
2370 'Unsupported URL: %s' % url, expected=True)
2371 self.url = url
2372
2373
55b3e45b
JMF
2374class RegexNotFoundError(ExtractorError):
2375 """Error when a regex didn't match"""
2376 pass
2377
2378
773f291d
S
2379class GeoRestrictedError(ExtractorError):
2380 """Geographic restriction Error exception.
2381
2382 This exception may be thrown when a video is not available from your
2383 geographic location due to geographic restrictions imposed by a website.
2384 """
b6e0c7d2 2385
773f291d
S
2386 def __init__(self, msg, countries=None):
2387 super(GeoRestrictedError, self).__init__(msg, expected=True)
2388 self.msg = msg
2389 self.countries = countries
2390
2391
bf5b9d85 2392class DownloadError(YoutubeDLError):
59ae15a5 2393 """Download Error exception.
d77c3dfd 2394
59ae15a5
PH
2395 This exception may be thrown by FileDownloader objects if they are not
2396 configured to continue on errors. They will contain the appropriate
2397 error message.
2398 """
5f6a1245 2399
8cc83b8d
FV
2400 def __init__(self, msg, exc_info=None):
2401 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2402 super(DownloadError, self).__init__(msg)
2403 self.exc_info = exc_info
d77c3dfd
FV
2404
2405
bf5b9d85 2406class SameFileError(YoutubeDLError):
59ae15a5 2407 """Same File exception.
d77c3dfd 2408
59ae15a5
PH
2409 This exception will be thrown by FileDownloader objects if they detect
2410 multiple files would have to be downloaded to the same file on disk.
2411 """
2412 pass
d77c3dfd
FV
2413
2414
bf5b9d85 2415class PostProcessingError(YoutubeDLError):
59ae15a5 2416 """Post Processing exception.
d77c3dfd 2417
59ae15a5
PH
2418 This exception may be raised by PostProcessor's .run() method to
2419 indicate an error in the postprocessing task.
2420 """
5f6a1245 2421
7851b379 2422 def __init__(self, msg):
bf5b9d85 2423 super(PostProcessingError, self).__init__(msg)
7851b379 2424 self.msg = msg
d77c3dfd 2425
5f6a1245 2426
bf5b9d85 2427class MaxDownloadsReached(YoutubeDLError):
59ae15a5
PH
2428 """ --max-downloads limit has been reached. """
2429 pass
d77c3dfd
FV
2430
2431
bf5b9d85 2432class UnavailableVideoError(YoutubeDLError):
59ae15a5 2433 """Unavailable Format exception.
d77c3dfd 2434
59ae15a5
PH
2435 This exception will be thrown when a video is requested
2436 in a format that is not available for that video.
2437 """
2438 pass
d77c3dfd
FV
2439
2440
bf5b9d85 2441class ContentTooShortError(YoutubeDLError):
59ae15a5 2442 """Content Too Short exception.
d77c3dfd 2443
59ae15a5
PH
2444 This exception may be raised by FileDownloader objects when a file they
2445 download is too small for what the server announced first, indicating
2446 the connection was probably interrupted.
2447 """
d77c3dfd 2448
59ae15a5 2449 def __init__(self, downloaded, expected):
bf5b9d85
PM
2450 super(ContentTooShortError, self).__init__(
2451 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2452 )
2c7ed247 2453 # Both in bytes
59ae15a5
PH
2454 self.downloaded = downloaded
2455 self.expected = expected
d77c3dfd 2456
5f6a1245 2457
bf5b9d85 2458class XAttrMetadataError(YoutubeDLError):
efa97bdc
YCH
2459 def __init__(self, code=None, msg='Unknown error'):
2460 super(XAttrMetadataError, self).__init__(msg)
2461 self.code = code
bd264412 2462 self.msg = msg
efa97bdc
YCH
2463
2464 # Parsing code and msg
3089bc74 2465 if (self.code in (errno.ENOSPC, errno.EDQUOT)
a0566bbf 2466 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
efa97bdc
YCH
2467 self.reason = 'NO_SPACE'
2468 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2469 self.reason = 'VALUE_TOO_LONG'
2470 else:
2471 self.reason = 'NOT_SUPPORTED'
2472
2473
bf5b9d85 2474class XAttrUnavailableError(YoutubeDLError):
efa97bdc
YCH
2475 pass
2476
2477
c5a59d93 2478def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
2479 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2480 # expected HTTP responses to meet HTTP/1.0 or later (see also
067aa17e 2481 # https://github.com/ytdl-org/youtube-dl/issues/6727)
e5e78797 2482 if sys.version_info < (3, 0):
65220c3b
S
2483 kwargs['strict'] = True
2484 hc = http_class(*args, **compat_kwargs(kwargs))
be4a824d 2485 source_address = ydl_handler._params.get('source_address')
8959018a 2486
be4a824d 2487 if source_address is not None:
8959018a
AU
2488 # This is to workaround _create_connection() from socket where it will try all
2489 # address data from getaddrinfo() including IPv6. This filters the result from
2490 # getaddrinfo() based on the source_address value.
2491 # This is based on the cpython socket.create_connection() function.
2492 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2493 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2494 host, port = address
2495 err = None
2496 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
9e21e6d9
S
2497 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2498 ip_addrs = [addr for addr in addrs if addr[0] == af]
2499 if addrs and not ip_addrs:
2500 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2501 raise socket.error(
2502 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2503 % (ip_version, source_address[0]))
8959018a
AU
2504 for res in ip_addrs:
2505 af, socktype, proto, canonname, sa = res
2506 sock = None
2507 try:
2508 sock = socket.socket(af, socktype, proto)
2509 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2510 sock.settimeout(timeout)
2511 sock.bind(source_address)
2512 sock.connect(sa)
2513 err = None # Explicitly break reference cycle
2514 return sock
2515 except socket.error as _:
2516 err = _
2517 if sock is not None:
2518 sock.close()
2519 if err is not None:
2520 raise err
2521 else:
9e21e6d9
S
2522 raise socket.error('getaddrinfo returns an empty list')
2523 if hasattr(hc, '_create_connection'):
2524 hc._create_connection = _create_connection
be4a824d
PH
2525 sa = (source_address, 0)
2526 if hasattr(hc, 'source_address'): # Python 2.7+
2527 hc.source_address = sa
2528 else: # Python 2.6
2529 def _hc_connect(self, *args, **kwargs):
9e21e6d9 2530 sock = _create_connection(
be4a824d
PH
2531 (self.host, self.port), self.timeout, sa)
2532 if is_https:
d7932313
PH
2533 self.sock = ssl.wrap_socket(
2534 sock, self.key_file, self.cert_file,
2535 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
2536 else:
2537 self.sock = sock
2538 hc.connect = functools.partial(_hc_connect, hc)
2539
2540 return hc
2541
2542
87f0e62d 2543def handle_youtubedl_headers(headers):
992fc9d6
YCH
2544 filtered_headers = headers
2545
2546 if 'Youtubedl-no-compression' in filtered_headers:
2547 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 2548 del filtered_headers['Youtubedl-no-compression']
87f0e62d 2549
992fc9d6 2550 return filtered_headers
87f0e62d
YCH
2551
2552
acebc9cd 2553class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
2554 """Handler for HTTP requests and responses.
2555
2556 This class, when installed with an OpenerDirector, automatically adds
2557 the standard headers to every HTTP request and handles gzipped and
2558 deflated responses from web servers. If compression is to be avoided in
2559 a particular request, the original request in the program code only has
0424ec30 2560 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
2561 removed before making the real request.
2562
2563 Part of this code was copied from:
2564
2565 http://techknack.net/python-urllib2-handlers/
2566
2567 Andrew Rowls, the author of that code, agreed to release it to the
2568 public domain.
2569 """
2570
be4a824d
PH
2571 def __init__(self, params, *args, **kwargs):
2572 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2573 self._params = params
2574
2575 def http_open(self, req):
71aff188
YCH
2576 conn_class = compat_http_client.HTTPConnection
2577
2578 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2579 if socks_proxy:
2580 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2581 del req.headers['Ytdl-socks-proxy']
2582
be4a824d 2583 return self.do_open(functools.partial(
71aff188 2584 _create_http_connection, self, conn_class, False),
be4a824d
PH
2585 req)
2586
59ae15a5
PH
2587 @staticmethod
2588 def deflate(data):
2589 try:
2590 return zlib.decompress(data, -zlib.MAX_WBITS)
2591 except zlib.error:
2592 return zlib.decompress(data)
2593
acebc9cd 2594 def http_request(self, req):
51f267d9
S
2595 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2596 # always respected by websites, some tend to give out URLs with non percent-encoded
2597 # non-ASCII characters (see telemb.py, ard.py [#3412])
2598 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2599 # To work around aforementioned issue we will replace request's original URL with
2600 # percent-encoded one
2601 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2602 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2603 url = req.get_full_url()
2604 url_escaped = escape_url(url)
2605
2606 # Substitute URL if any change after escaping
2607 if url != url_escaped:
15d260eb 2608 req = update_Request(req, url=url_escaped)
51f267d9 2609
33ac271b 2610 for h, v in std_headers.items():
3d5f7a39
JK
2611 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2612 # The dict keys are capitalized because of this bug by urllib
2613 if h.capitalize() not in req.headers:
33ac271b 2614 req.add_header(h, v)
87f0e62d
YCH
2615
2616 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
2617
2618 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2619 # Python 2.6 is brain-dead when it comes to fragments
2620 req._Request__original = req._Request__original.partition('#')[0]
2621 req._Request__r_type = req._Request__r_type.partition('#')[0]
2622
59ae15a5
PH
2623 return req
2624
acebc9cd 2625 def http_response(self, req, resp):
59ae15a5
PH
2626 old_resp = resp
2627 # gzip
2628 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
2629 content = resp.read()
2630 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2631 try:
2632 uncompressed = io.BytesIO(gz.read())
2633 except IOError as original_ioerror:
2634 # There may be junk add the end of the file
2635 # See http://stackoverflow.com/q/4928560/35070 for details
2636 for i in range(1, 1024):
2637 try:
2638 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2639 uncompressed = io.BytesIO(gz.read())
2640 except IOError:
2641 continue
2642 break
2643 else:
2644 raise original_ioerror
b407d853 2645 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2646 resp.msg = old_resp.msg
c047270c 2647 del resp.headers['Content-encoding']
59ae15a5
PH
2648 # deflate
2649 if resp.headers.get('Content-encoding', '') == 'deflate':
2650 gz = io.BytesIO(self.deflate(resp.read()))
b407d853 2651 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2652 resp.msg = old_resp.msg
c047270c 2653 del resp.headers['Content-encoding']
ad729172 2654 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
067aa17e 2655 # https://github.com/ytdl-org/youtube-dl/issues/6457).
5a4d9ddb
S
2656 if 300 <= resp.code < 400:
2657 location = resp.headers.get('Location')
2658 if location:
2659 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2660 if sys.version_info >= (3, 0):
2661 location = location.encode('iso-8859-1').decode('utf-8')
0ea59007
YCH
2662 else:
2663 location = location.decode('utf-8')
5a4d9ddb
S
2664 location_escaped = escape_url(location)
2665 if location != location_escaped:
2666 del resp.headers['Location']
9a4aec8b
YCH
2667 if sys.version_info < (3, 0):
2668 location_escaped = location_escaped.encode('utf-8')
5a4d9ddb 2669 resp.headers['Location'] = location_escaped
59ae15a5 2670 return resp
0f8d03f8 2671
acebc9cd
PH
2672 https_request = http_request
2673 https_response = http_response
bf50b038 2674
5de90176 2675
71aff188
YCH
2676def make_socks_conn_class(base_class, socks_proxy):
2677 assert issubclass(base_class, (
2678 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2679
2680 url_components = compat_urlparse.urlparse(socks_proxy)
2681 if url_components.scheme.lower() == 'socks5':
2682 socks_type = ProxyType.SOCKS5
2683 elif url_components.scheme.lower() in ('socks', 'socks4'):
2684 socks_type = ProxyType.SOCKS4
51fb4995
YCH
2685 elif url_components.scheme.lower() == 'socks4a':
2686 socks_type = ProxyType.SOCKS4A
71aff188 2687
cdd94c2e
YCH
2688 def unquote_if_non_empty(s):
2689 if not s:
2690 return s
2691 return compat_urllib_parse_unquote_plus(s)
2692
71aff188
YCH
2693 proxy_args = (
2694 socks_type,
2695 url_components.hostname, url_components.port or 1080,
2696 True, # Remote DNS
cdd94c2e
YCH
2697 unquote_if_non_empty(url_components.username),
2698 unquote_if_non_empty(url_components.password),
71aff188
YCH
2699 )
2700
2701 class SocksConnection(base_class):
2702 def connect(self):
2703 self.sock = sockssocket()
2704 self.sock.setproxy(*proxy_args)
2705 if type(self.timeout) in (int, float):
2706 self.sock.settimeout(self.timeout)
2707 self.sock.connect((self.host, self.port))
2708
2709 if isinstance(self, compat_http_client.HTTPSConnection):
2710 if hasattr(self, '_context'): # Python > 2.6
2711 self.sock = self._context.wrap_socket(
2712 self.sock, server_hostname=self.host)
2713 else:
2714 self.sock = ssl.wrap_socket(self.sock)
2715
2716 return SocksConnection
2717
2718
be4a824d
PH
2719class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2720 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2721 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2722 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2723 self._params = params
2724
2725 def https_open(self, req):
4f264c02 2726 kwargs = {}
71aff188
YCH
2727 conn_class = self._https_conn_class
2728
4f264c02
JMF
2729 if hasattr(self, '_context'): # python > 2.6
2730 kwargs['context'] = self._context
2731 if hasattr(self, '_check_hostname'): # python 3.x
2732 kwargs['check_hostname'] = self._check_hostname
71aff188
YCH
2733
2734 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2735 if socks_proxy:
2736 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2737 del req.headers['Ytdl-socks-proxy']
2738
be4a824d 2739 return self.do_open(functools.partial(
71aff188 2740 _create_http_connection, self, conn_class, True),
4f264c02 2741 req, **kwargs)
be4a824d
PH
2742
2743
1bab3437 2744class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
f1a8511f
S
2745 """
2746 See [1] for cookie file format.
2747
2748 1. https://curl.haxx.se/docs/http-cookies.html
2749 """
e7e62441 2750 _HTTPONLY_PREFIX = '#HttpOnly_'
c380cc28
S
2751 _ENTRY_LEN = 7
2752 _HEADER = '''# Netscape HTTP Cookie File
cefecac1 2753# This file is generated by youtube-dlc. Do not edit.
c380cc28
S
2754
2755'''
2756 _CookieFileEntry = collections.namedtuple(
2757 'CookieFileEntry',
2758 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
e7e62441 2759
1bab3437 2760 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
c380cc28
S
2761 """
2762 Save cookies to a file.
2763
2764 Most of the code is taken from CPython 3.8 and slightly adapted
2765 to support cookie files with UTF-8 in both python 2 and 3.
2766 """
2767 if filename is None:
2768 if self.filename is not None:
2769 filename = self.filename
2770 else:
2771 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2772
1bab3437
S
2773 # Store session cookies with `expires` set to 0 instead of an empty
2774 # string
2775 for cookie in self:
2776 if cookie.expires is None:
2777 cookie.expires = 0
c380cc28
S
2778
2779 with io.open(filename, 'w', encoding='utf-8') as f:
2780 f.write(self._HEADER)
2781 now = time.time()
2782 for cookie in self:
2783 if not ignore_discard and cookie.discard:
2784 continue
2785 if not ignore_expires and cookie.is_expired(now):
2786 continue
2787 if cookie.secure:
2788 secure = 'TRUE'
2789 else:
2790 secure = 'FALSE'
2791 if cookie.domain.startswith('.'):
2792 initial_dot = 'TRUE'
2793 else:
2794 initial_dot = 'FALSE'
2795 if cookie.expires is not None:
2796 expires = compat_str(cookie.expires)
2797 else:
2798 expires = ''
2799 if cookie.value is None:
2800 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2801 # with no name, whereas http.cookiejar regards it as a
2802 # cookie with no value.
2803 name = ''
2804 value = cookie.name
2805 else:
2806 name = cookie.name
2807 value = cookie.value
2808 f.write(
2809 '\t'.join([cookie.domain, initial_dot, cookie.path,
2810 secure, expires, name, value]) + '\n')
1bab3437
S
2811
2812 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
e7e62441 2813 """Load cookies from a file."""
2814 if filename is None:
2815 if self.filename is not None:
2816 filename = self.filename
2817 else:
2818 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2819
c380cc28
S
2820 def prepare_line(line):
2821 if line.startswith(self._HTTPONLY_PREFIX):
2822 line = line[len(self._HTTPONLY_PREFIX):]
2823 # comments and empty lines are fine
2824 if line.startswith('#') or not line.strip():
2825 return line
2826 cookie_list = line.split('\t')
2827 if len(cookie_list) != self._ENTRY_LEN:
2828 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2829 cookie = self._CookieFileEntry(*cookie_list)
2830 if cookie.expires_at and not cookie.expires_at.isdigit():
2831 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2832 return line
2833
e7e62441 2834 cf = io.StringIO()
c380cc28 2835 with io.open(filename, encoding='utf-8') as f:
e7e62441 2836 for line in f:
c380cc28
S
2837 try:
2838 cf.write(prepare_line(line))
2839 except compat_cookiejar.LoadError as e:
2840 write_string(
2841 'WARNING: skipping cookie file entry due to %s: %r\n'
2842 % (e, line), sys.stderr)
2843 continue
e7e62441 2844 cf.seek(0)
2845 self._really_load(cf, filename, ignore_discard, ignore_expires)
1bab3437
S
2846 # Session cookies are denoted by either `expires` field set to
2847 # an empty string or 0. MozillaCookieJar only recognizes the former
2848 # (see [1]). So we need force the latter to be recognized as session
2849 # cookies on our own.
2850 # Session cookies may be important for cookies-based authentication,
2851 # e.g. usually, when user does not check 'Remember me' check box while
2852 # logging in on a site, some important cookies are stored as session
2853 # cookies so that not recognizing them will result in failed login.
2854 # 1. https://bugs.python.org/issue17164
2855 for cookie in self:
2856 # Treat `expires=0` cookies as session cookies
2857 if cookie.expires == 0:
2858 cookie.expires = None
2859 cookie.discard = True
2860
2861
a6420bf5
S
2862class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2863 def __init__(self, cookiejar=None):
2864 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2865
2866 def http_response(self, request, response):
2867 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2868 # characters in Set-Cookie HTTP header of last response (see
067aa17e 2869 # https://github.com/ytdl-org/youtube-dl/issues/6769).
a6420bf5
S
2870 # In order to at least prevent crashing we will percent encode Set-Cookie
2871 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
2872 # if sys.version_info < (3, 0) and response.headers:
2873 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2874 # set_cookie = response.headers.get(set_cookie_header)
2875 # if set_cookie:
2876 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2877 # if set_cookie != set_cookie_escaped:
2878 # del response.headers[set_cookie_header]
2879 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
2880 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2881
2882 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2883 https_response = http_response
2884
2885
fca6dba8
S
2886class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2887 if sys.version_info[0] < 3:
2888 def redirect_request(self, req, fp, code, msg, headers, newurl):
2889 # On python 2 urlh.geturl() may sometimes return redirect URL
2890 # as byte string instead of unicode. This workaround allows
2891 # to force it always return unicode.
2892 return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2893
2894
46f59e89
S
2895def extract_timezone(date_str):
2896 m = re.search(
2897 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2898 date_str)
2899 if not m:
2900 timezone = datetime.timedelta()
2901 else:
2902 date_str = date_str[:-len(m.group('tz'))]
2903 if not m.group('sign'):
2904 timezone = datetime.timedelta()
2905 else:
2906 sign = 1 if m.group('sign') == '+' else -1
2907 timezone = datetime.timedelta(
2908 hours=sign * int(m.group('hours')),
2909 minutes=sign * int(m.group('minutes')))
2910 return timezone, date_str
2911
2912
08b38d54 2913def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
2914 """ Return a UNIX timestamp from the given date """
2915
2916 if date_str is None:
2917 return None
2918
52c3a6e4
S
2919 date_str = re.sub(r'\.[0-9]+', '', date_str)
2920
08b38d54 2921 if timezone is None:
46f59e89
S
2922 timezone, date_str = extract_timezone(date_str)
2923
52c3a6e4
S
2924 try:
2925 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2926 dt = datetime.datetime.strptime(date_str, date_format) - timezone
2927 return calendar.timegm(dt.timetuple())
2928 except ValueError:
2929 pass
912b38b4
PH
2930
2931
46f59e89
S
2932def date_formats(day_first=True):
2933 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2934
2935
42bdd9d0 2936def unified_strdate(date_str, day_first=True):
bf50b038 2937 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
2938
2939 if date_str is None:
2940 return None
bf50b038 2941 upload_date = None
5f6a1245 2942 # Replace commas
026fcc04 2943 date_str = date_str.replace(',', ' ')
42bdd9d0 2944 # Remove AM/PM + timezone
9bb8e0a3 2945 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89 2946 _, date_str = extract_timezone(date_str)
42bdd9d0 2947
46f59e89 2948 for expression in date_formats(day_first):
bf50b038
JMF
2949 try:
2950 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 2951 except ValueError:
bf50b038 2952 pass
42393ce2
PH
2953 if upload_date is None:
2954 timetuple = email.utils.parsedate_tz(date_str)
2955 if timetuple:
c6b9cf05
S
2956 try:
2957 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2958 except ValueError:
2959 pass
6a750402
JMF
2960 if upload_date is not None:
2961 return compat_str(upload_date)
bf50b038 2962
5f6a1245 2963
46f59e89
S
2964def unified_timestamp(date_str, day_first=True):
2965 if date_str is None:
2966 return None
2967
2ae2ffda 2968 date_str = re.sub(r'[,|]', '', date_str)
46f59e89 2969
7dc2a74e 2970 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89
S
2971 timezone, date_str = extract_timezone(date_str)
2972
2973 # Remove AM/PM + timezone
2974 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2975
deef3195
S
2976 # Remove unrecognized timezones from ISO 8601 alike timestamps
2977 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
2978 if m:
2979 date_str = date_str[:-len(m.group('tz'))]
2980
f226880c
PH
2981 # Python only supports microseconds, so remove nanoseconds
2982 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
2983 if m:
2984 date_str = m.group(1)
2985
46f59e89
S
2986 for expression in date_formats(day_first):
2987 try:
7dc2a74e 2988 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
46f59e89
S
2989 return calendar.timegm(dt.timetuple())
2990 except ValueError:
2991 pass
2992 timetuple = email.utils.parsedate_tz(date_str)
2993 if timetuple:
7dc2a74e 2994 return calendar.timegm(timetuple) + pm_delta * 3600
46f59e89
S
2995
2996
28e614de 2997def determine_ext(url, default_ext='unknown_video'):
85750f89 2998 if url is None or '.' not in url:
f4776371 2999 return default_ext
9cb9a5df 3000 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
3001 if re.match(r'^[A-Za-z0-9]+$', guess):
3002 return guess
a7aaa398
S
3003 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3004 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 3005 return guess.rstrip('/')
73e79f2a 3006 else:
cbdbb766 3007 return default_ext
73e79f2a 3008
5f6a1245 3009
824fa511
S
3010def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3011 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
d4051a8e 3012
5f6a1245 3013
bd558525 3014def date_from_str(date_str):
37254abc
JMF
3015 """
3016 Return a datetime object from a string in the format YYYYMMDD or
3017 (now|today)[+-][0-9](day|week|month|year)(s)?"""
3018 today = datetime.date.today()
f8795e10 3019 if date_str in ('now', 'today'):
37254abc 3020 return today
f8795e10
PH
3021 if date_str == 'yesterday':
3022 return today - datetime.timedelta(days=1)
ec85ded8 3023 match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
37254abc
JMF
3024 if match is not None:
3025 sign = match.group('sign')
3026 time = int(match.group('time'))
3027 if sign == '-':
3028 time = -time
3029 unit = match.group('unit')
dfb1b146 3030 # A bad approximation?
37254abc
JMF
3031 if unit == 'month':
3032 unit = 'day'
3033 time *= 30
3034 elif unit == 'year':
3035 unit = 'day'
3036 time *= 365
3037 unit += 's'
3038 delta = datetime.timedelta(**{unit: time})
3039 return today + delta
611c1dd9 3040 return datetime.datetime.strptime(date_str, '%Y%m%d').date()
5f6a1245
JW
3041
3042
e63fc1be 3043def hyphenate_date(date_str):
3044 """
3045 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3046 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3047 if match is not None:
3048 return '-'.join(match.groups())
3049 else:
3050 return date_str
3051
5f6a1245 3052
bd558525
JMF
3053class DateRange(object):
3054 """Represents a time interval between two dates"""
5f6a1245 3055
bd558525
JMF
3056 def __init__(self, start=None, end=None):
3057 """start and end must be strings in the format accepted by date"""
3058 if start is not None:
3059 self.start = date_from_str(start)
3060 else:
3061 self.start = datetime.datetime.min.date()
3062 if end is not None:
3063 self.end = date_from_str(end)
3064 else:
3065 self.end = datetime.datetime.max.date()
37254abc 3066 if self.start > self.end:
bd558525 3067 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 3068
bd558525
JMF
3069 @classmethod
3070 def day(cls, day):
3071 """Returns a range that only contains the given day"""
5f6a1245
JW
3072 return cls(day, day)
3073
bd558525
JMF
3074 def __contains__(self, date):
3075 """Check if the date is in the range"""
37254abc
JMF
3076 if not isinstance(date, datetime.date):
3077 date = date_from_str(date)
3078 return self.start <= date <= self.end
5f6a1245 3079
bd558525 3080 def __str__(self):
5f6a1245 3081 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
3082
3083
3084def platform_name():
3085 """ Returns the platform name as a compat_str """
3086 res = platform.platform()
3087 if isinstance(res, bytes):
3088 res = res.decode(preferredencoding())
3089
3090 assert isinstance(res, compat_str)
3091 return res
c257baff
PH
3092
3093
b58ddb32
PH
3094def _windows_write_string(s, out):
3095 """ Returns True if the string was written using special methods,
3096 False if it has yet to be written out."""
3097 # Adapted from http://stackoverflow.com/a/3259271/35070
3098
3099 import ctypes
3100 import ctypes.wintypes
3101
3102 WIN_OUTPUT_IDS = {
3103 1: -11,
3104 2: -12,
3105 }
3106
a383a98a
PH
3107 try:
3108 fileno = out.fileno()
3109 except AttributeError:
3110 # If the output stream doesn't have a fileno, it's virtual
3111 return False
aa42e873
PH
3112 except io.UnsupportedOperation:
3113 # Some strange Windows pseudo files?
3114 return False
b58ddb32
PH
3115 if fileno not in WIN_OUTPUT_IDS:
3116 return False
3117
d7cd9a9e 3118 GetStdHandle = compat_ctypes_WINFUNCTYPE(
b58ddb32 3119 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
d7cd9a9e 3120 ('GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
3121 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3122
d7cd9a9e 3123 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3124 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3125 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
d7cd9a9e 3126 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
3127 written = ctypes.wintypes.DWORD(0)
3128
d7cd9a9e 3129 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
3130 FILE_TYPE_CHAR = 0x0002
3131 FILE_TYPE_REMOTE = 0x8000
d7cd9a9e 3132 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3133 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3134 ctypes.POINTER(ctypes.wintypes.DWORD))(
d7cd9a9e 3135 ('GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
3136 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3137
3138 def not_a_console(handle):
3139 if handle == INVALID_HANDLE_VALUE or handle is None:
3140 return True
3089bc74
S
3141 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3142 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
3143
3144 if not_a_console(h):
3145 return False
3146
d1b9c912
PH
3147 def next_nonbmp_pos(s):
3148 try:
3149 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3150 except StopIteration:
3151 return len(s)
3152
3153 while s:
3154 count = min(next_nonbmp_pos(s), 1024)
3155
b58ddb32 3156 ret = WriteConsoleW(
d1b9c912 3157 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
3158 if ret == 0:
3159 raise OSError('Failed to write string')
d1b9c912
PH
3160 if not count: # We just wrote a non-BMP character
3161 assert written.value == 2
3162 s = s[1:]
3163 else:
3164 assert written.value > 0
3165 s = s[written.value:]
b58ddb32
PH
3166 return True
3167
3168
734f90bb 3169def write_string(s, out=None, encoding=None):
7459e3a2
PH
3170 if out is None:
3171 out = sys.stderr
8bf48f23 3172 assert type(s) == compat_str
7459e3a2 3173
b58ddb32
PH
3174 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3175 if _windows_write_string(s, out):
3176 return
3177
3089bc74
S
3178 if ('b' in getattr(out, 'mode', '')
3179 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
3180 byt = s.encode(encoding or preferredencoding(), 'ignore')
3181 out.write(byt)
3182 elif hasattr(out, 'buffer'):
3183 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3184 byt = s.encode(enc, 'ignore')
3185 out.buffer.write(byt)
3186 else:
8bf48f23 3187 out.write(s)
7459e3a2
PH
3188 out.flush()
3189
3190
48ea9cea
PH
3191def bytes_to_intlist(bs):
3192 if not bs:
3193 return []
3194 if isinstance(bs[0], int): # Python 3
3195 return list(bs)
3196 else:
3197 return [ord(c) for c in bs]
3198
c257baff 3199
cba892fa 3200def intlist_to_bytes(xs):
3201 if not xs:
3202 return b''
edaa23f8 3203 return compat_struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
3204
3205
c1c9a79c
PH
3206# Cross-platform file locking
3207if sys.platform == 'win32':
3208 import ctypes.wintypes
3209 import msvcrt
3210
3211 class OVERLAPPED(ctypes.Structure):
3212 _fields_ = [
3213 ('Internal', ctypes.wintypes.LPVOID),
3214 ('InternalHigh', ctypes.wintypes.LPVOID),
3215 ('Offset', ctypes.wintypes.DWORD),
3216 ('OffsetHigh', ctypes.wintypes.DWORD),
3217 ('hEvent', ctypes.wintypes.HANDLE),
3218 ]
3219
3220 kernel32 = ctypes.windll.kernel32
3221 LockFileEx = kernel32.LockFileEx
3222 LockFileEx.argtypes = [
3223 ctypes.wintypes.HANDLE, # hFile
3224 ctypes.wintypes.DWORD, # dwFlags
3225 ctypes.wintypes.DWORD, # dwReserved
3226 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3227 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3228 ctypes.POINTER(OVERLAPPED) # Overlapped
3229 ]
3230 LockFileEx.restype = ctypes.wintypes.BOOL
3231 UnlockFileEx = kernel32.UnlockFileEx
3232 UnlockFileEx.argtypes = [
3233 ctypes.wintypes.HANDLE, # hFile
3234 ctypes.wintypes.DWORD, # dwReserved
3235 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3236 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3237 ctypes.POINTER(OVERLAPPED) # Overlapped
3238 ]
3239 UnlockFileEx.restype = ctypes.wintypes.BOOL
3240 whole_low = 0xffffffff
3241 whole_high = 0x7fffffff
3242
3243 def _lock_file(f, exclusive):
3244 overlapped = OVERLAPPED()
3245 overlapped.Offset = 0
3246 overlapped.OffsetHigh = 0
3247 overlapped.hEvent = 0
3248 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3249 handle = msvcrt.get_osfhandle(f.fileno())
3250 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3251 whole_low, whole_high, f._lock_file_overlapped_p):
3252 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3253
3254 def _unlock_file(f):
3255 assert f._lock_file_overlapped_p
3256 handle = msvcrt.get_osfhandle(f.fileno())
3257 if not UnlockFileEx(handle, 0,
3258 whole_low, whole_high, f._lock_file_overlapped_p):
3259 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3260
3261else:
399a76e6
YCH
3262 # Some platforms, such as Jython, is missing fcntl
3263 try:
3264 import fcntl
c1c9a79c 3265
399a76e6
YCH
3266 def _lock_file(f, exclusive):
3267 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 3268
399a76e6
YCH
3269 def _unlock_file(f):
3270 fcntl.flock(f, fcntl.LOCK_UN)
3271 except ImportError:
3272 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3273
3274 def _lock_file(f, exclusive):
3275 raise IOError(UNSUPPORTED_MSG)
3276
3277 def _unlock_file(f):
3278 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
3279
3280
3281class locked_file(object):
3282 def __init__(self, filename, mode, encoding=None):
3283 assert mode in ['r', 'a', 'w']
3284 self.f = io.open(filename, mode, encoding=encoding)
3285 self.mode = mode
3286
3287 def __enter__(self):
3288 exclusive = self.mode != 'r'
3289 try:
3290 _lock_file(self.f, exclusive)
3291 except IOError:
3292 self.f.close()
3293 raise
3294 return self
3295
3296 def __exit__(self, etype, value, traceback):
3297 try:
3298 _unlock_file(self.f)
3299 finally:
3300 self.f.close()
3301
3302 def __iter__(self):
3303 return iter(self.f)
3304
3305 def write(self, *args):
3306 return self.f.write(*args)
3307
3308 def read(self, *args):
3309 return self.f.read(*args)
4eb7f1d1
JMF
3310
3311
4644ac55
S
3312def get_filesystem_encoding():
3313 encoding = sys.getfilesystemencoding()
3314 return encoding if encoding is not None else 'utf-8'
3315
3316
4eb7f1d1 3317def shell_quote(args):
a6a173c2 3318 quoted_args = []
4644ac55 3319 encoding = get_filesystem_encoding()
a6a173c2
JMF
3320 for a in args:
3321 if isinstance(a, bytes):
3322 # We may get a filename encoded with 'encodeFilename'
3323 a = a.decode(encoding)
aefce8e6 3324 quoted_args.append(compat_shlex_quote(a))
28e614de 3325 return ' '.join(quoted_args)
9d4660ca
PH
3326
3327
3328def smuggle_url(url, data):
3329 """ Pass additional data in a URL for internal use. """
3330
81953d1a
RA
3331 url, idata = unsmuggle_url(url, {})
3332 data.update(idata)
15707c7e 3333 sdata = compat_urllib_parse_urlencode(
28e614de
PH
3334 {'__youtubedl_smuggle': json.dumps(data)})
3335 return url + '#' + sdata
9d4660ca
PH
3336
3337
79f82953 3338def unsmuggle_url(smug_url, default=None):
83e865a3 3339 if '#__youtubedl_smuggle' not in smug_url:
79f82953 3340 return smug_url, default
28e614de
PH
3341 url, _, sdata = smug_url.rpartition('#')
3342 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
3343 data = json.loads(jsond)
3344 return url, data
02dbf93f
PH
3345
3346
02dbf93f
PH
3347def format_bytes(bytes):
3348 if bytes is None:
28e614de 3349 return 'N/A'
02dbf93f
PH
3350 if type(bytes) is str:
3351 bytes = float(bytes)
3352 if bytes == 0.0:
3353 exponent = 0
3354 else:
3355 exponent = int(math.log(bytes, 1024.0))
28e614de 3356 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 3357 converted = float(bytes) / float(1024 ** exponent)
28e614de 3358 return '%.2f%s' % (converted, suffix)
f53c966a 3359
1c088fa8 3360
fb47597b
S
3361def lookup_unit_table(unit_table, s):
3362 units_re = '|'.join(re.escape(u) for u in unit_table)
3363 m = re.match(
782b1b5b 3364 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
fb47597b
S
3365 if not m:
3366 return None
3367 num_str = m.group('num').replace(',', '.')
3368 mult = unit_table[m.group('unit')]
3369 return int(float(num_str) * mult)
3370
3371
be64b5b0
PH
3372def parse_filesize(s):
3373 if s is None:
3374 return None
3375
dfb1b146 3376 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
3377 # but we support those too
3378 _UNIT_TABLE = {
3379 'B': 1,
3380 'b': 1,
70852b47 3381 'bytes': 1,
be64b5b0
PH
3382 'KiB': 1024,
3383 'KB': 1000,
3384 'kB': 1024,
3385 'Kb': 1000,
13585d76 3386 'kb': 1000,
70852b47
YCH
3387 'kilobytes': 1000,
3388 'kibibytes': 1024,
be64b5b0
PH
3389 'MiB': 1024 ** 2,
3390 'MB': 1000 ** 2,
3391 'mB': 1024 ** 2,
3392 'Mb': 1000 ** 2,
13585d76 3393 'mb': 1000 ** 2,
70852b47
YCH
3394 'megabytes': 1000 ** 2,
3395 'mebibytes': 1024 ** 2,
be64b5b0
PH
3396 'GiB': 1024 ** 3,
3397 'GB': 1000 ** 3,
3398 'gB': 1024 ** 3,
3399 'Gb': 1000 ** 3,
13585d76 3400 'gb': 1000 ** 3,
70852b47
YCH
3401 'gigabytes': 1000 ** 3,
3402 'gibibytes': 1024 ** 3,
be64b5b0
PH
3403 'TiB': 1024 ** 4,
3404 'TB': 1000 ** 4,
3405 'tB': 1024 ** 4,
3406 'Tb': 1000 ** 4,
13585d76 3407 'tb': 1000 ** 4,
70852b47
YCH
3408 'terabytes': 1000 ** 4,
3409 'tebibytes': 1024 ** 4,
be64b5b0
PH
3410 'PiB': 1024 ** 5,
3411 'PB': 1000 ** 5,
3412 'pB': 1024 ** 5,
3413 'Pb': 1000 ** 5,
13585d76 3414 'pb': 1000 ** 5,
70852b47
YCH
3415 'petabytes': 1000 ** 5,
3416 'pebibytes': 1024 ** 5,
be64b5b0
PH
3417 'EiB': 1024 ** 6,
3418 'EB': 1000 ** 6,
3419 'eB': 1024 ** 6,
3420 'Eb': 1000 ** 6,
13585d76 3421 'eb': 1000 ** 6,
70852b47
YCH
3422 'exabytes': 1000 ** 6,
3423 'exbibytes': 1024 ** 6,
be64b5b0
PH
3424 'ZiB': 1024 ** 7,
3425 'ZB': 1000 ** 7,
3426 'zB': 1024 ** 7,
3427 'Zb': 1000 ** 7,
13585d76 3428 'zb': 1000 ** 7,
70852b47
YCH
3429 'zettabytes': 1000 ** 7,
3430 'zebibytes': 1024 ** 7,
be64b5b0
PH
3431 'YiB': 1024 ** 8,
3432 'YB': 1000 ** 8,
3433 'yB': 1024 ** 8,
3434 'Yb': 1000 ** 8,
13585d76 3435 'yb': 1000 ** 8,
70852b47
YCH
3436 'yottabytes': 1000 ** 8,
3437 'yobibytes': 1024 ** 8,
be64b5b0
PH
3438 }
3439
fb47597b
S
3440 return lookup_unit_table(_UNIT_TABLE, s)
3441
3442
3443def parse_count(s):
3444 if s is None:
be64b5b0
PH
3445 return None
3446
fb47597b
S
3447 s = s.strip()
3448
3449 if re.match(r'^[\d,.]+$', s):
3450 return str_to_int(s)
3451
3452 _UNIT_TABLE = {
3453 'k': 1000,
3454 'K': 1000,
3455 'm': 1000 ** 2,
3456 'M': 1000 ** 2,
3457 'kk': 1000 ** 2,
3458 'KK': 1000 ** 2,
3459 }
be64b5b0 3460
fb47597b 3461 return lookup_unit_table(_UNIT_TABLE, s)
be64b5b0 3462
2f7ae819 3463
b871d7e9
S
3464def parse_resolution(s):
3465 if s is None:
3466 return {}
3467
3468 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3469 if mobj:
3470 return {
3471 'width': int(mobj.group('w')),
3472 'height': int(mobj.group('h')),
3473 }
3474
3475 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3476 if mobj:
3477 return {'height': int(mobj.group(1))}
3478
3479 mobj = re.search(r'\b([48])[kK]\b', s)
3480 if mobj:
3481 return {'height': int(mobj.group(1)) * 540}
3482
3483 return {}
3484
3485
0dc41787
S
3486def parse_bitrate(s):
3487 if not isinstance(s, compat_str):
3488 return
3489 mobj = re.search(r'\b(\d+)\s*kbps', s)
3490 if mobj:
3491 return int(mobj.group(1))
3492
3493
a942d6cb 3494def month_by_name(name, lang='en'):
caefb1de
PH
3495 """ Return the number of a month by (locale-independently) English name """
3496
f6717dec 3497 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb 3498
caefb1de 3499 try:
f6717dec 3500 return month_names.index(name) + 1
7105440c
YCH
3501 except ValueError:
3502 return None
3503
3504
3505def month_by_abbreviation(abbrev):
3506 """ Return the number of a month by (locale-independently) English
3507 abbreviations """
3508
3509 try:
3510 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
3511 except ValueError:
3512 return None
18258362
JMF
3513
3514
5aafe895 3515def fix_xml_ampersands(xml_str):
18258362 3516 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
3517 return re.sub(
3518 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 3519 '&amp;',
5aafe895 3520 xml_str)
e3946f98
PH
3521
3522
3523def setproctitle(title):
8bf48f23 3524 assert isinstance(title, compat_str)
c1c05c67
YCH
3525
3526 # ctypes in Jython is not complete
3527 # http://bugs.jython.org/issue2148
3528 if sys.platform.startswith('java'):
3529 return
3530
e3946f98 3531 try:
611c1dd9 3532 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
3533 except OSError:
3534 return
2f49bcd6
RC
3535 except TypeError:
3536 # LoadLibrary in Windows Python 2.7.13 only expects
3537 # a bytestring, but since unicode_literals turns
3538 # every string into a unicode string, it fails.
3539 return
6eefe533
PH
3540 title_bytes = title.encode('utf-8')
3541 buf = ctypes.create_string_buffer(len(title_bytes))
3542 buf.value = title_bytes
e3946f98 3543 try:
6eefe533 3544 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
3545 except AttributeError:
3546 return # Strange libc, just skip this
d7dda168
PH
3547
3548
3549def remove_start(s, start):
46bc9b7d 3550 return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174
PH
3551
3552
2b9faf55 3553def remove_end(s, end):
46bc9b7d 3554 return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55
PH
3555
3556
31b2051e
S
3557def remove_quotes(s):
3558 if s is None or len(s) < 2:
3559 return s
3560 for quote in ('"', "'", ):
3561 if s[0] == quote and s[-1] == quote:
3562 return s[1:-1]
3563 return s
3564
3565
b6e0c7d2
U
3566def get_domain(url):
3567 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3568 return domain.group('domain') if domain else None
3569
3570
29eb5174 3571def url_basename(url):
9b8aaeed 3572 path = compat_urlparse.urlparse(url).path
28e614de 3573 return path.strip('/').split('/')[-1]
aa94a6d3
PH
3574
3575
02dc0a36
S
3576def base_url(url):
3577 return re.match(r'https?://[^?#&]+/', url).group()
3578
3579
e34c3361 3580def urljoin(base, path):
4b5de77b
S
3581 if isinstance(path, bytes):
3582 path = path.decode('utf-8')
e34c3361
S
3583 if not isinstance(path, compat_str) or not path:
3584 return None
fad4ceb5 3585 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
e34c3361 3586 return path
4b5de77b
S
3587 if isinstance(base, bytes):
3588 base = base.decode('utf-8')
3589 if not isinstance(base, compat_str) or not re.match(
3590 r'^(?:https?:)?//', base):
e34c3361
S
3591 return None
3592 return compat_urlparse.urljoin(base, path)
3593
3594
aa94a6d3
PH
3595class HEADRequest(compat_urllib_request.Request):
3596 def get_method(self):
611c1dd9 3597 return 'HEAD'
7217e148
PH
3598
3599
95cf60e8
S
3600class PUTRequest(compat_urllib_request.Request):
3601 def get_method(self):
3602 return 'PUT'
3603
3604
9732d77e 3605def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
3606 if get_attr:
3607 if v is not None:
3608 v = getattr(v, get_attr, None)
9572013d
PH
3609 if v == '':
3610 v = None
1812afb7
S
3611 if v is None:
3612 return default
3613 try:
3614 return int(v) * invscale // scale
5e1271c5 3615 except (ValueError, TypeError):
af98f8ff 3616 return default
9732d77e 3617
9572013d 3618
40a90862
JMF
3619def str_or_none(v, default=None):
3620 return default if v is None else compat_str(v)
3621
9732d77e
PH
3622
3623def str_to_int(int_str):
48d4681e 3624 """ A more relaxed version of int_or_none """
42db58ec 3625 if isinstance(int_str, compat_integer_types):
348c6bf1 3626 return int_str
42db58ec
S
3627 elif isinstance(int_str, compat_str):
3628 int_str = re.sub(r'[,\.\+]', '', int_str)
3629 return int_or_none(int_str)
608d11f5
PH
3630
3631
9732d77e 3632def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
3633 if v is None:
3634 return default
3635 try:
3636 return float(v) * invscale / scale
5e1271c5 3637 except (ValueError, TypeError):
caf80631 3638 return default
43f775e4
PH
3639
3640
c7e327c4
S
3641def bool_or_none(v, default=None):
3642 return v if isinstance(v, bool) else default
3643
3644
53cd37ba
S
3645def strip_or_none(v, default=None):
3646 return v.strip() if isinstance(v, compat_str) else default
b72b4431
S
3647
3648
af03000a
S
3649def url_or_none(url):
3650 if not url or not isinstance(url, compat_str):
3651 return None
3652 url = url.strip()
29f7c58a 3653 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
af03000a
S
3654
3655
608d11f5 3656def parse_duration(s):
8f9312c3 3657 if not isinstance(s, compat_basestring):
608d11f5
PH
3658 return None
3659
ca7b3246
S
3660 s = s.strip()
3661
acaff495 3662 days, hours, mins, secs, ms = [None] * 5
15846398 3663 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
acaff495 3664 if m:
3665 days, hours, mins, secs, ms = m.groups()
3666 else:
3667 m = re.match(
056653bb
S
3668 r'''(?ix)(?:P?
3669 (?:
3670 [0-9]+\s*y(?:ears?)?\s*
3671 )?
3672 (?:
3673 [0-9]+\s*m(?:onths?)?\s*
3674 )?
3675 (?:
3676 [0-9]+\s*w(?:eeks?)?\s*
3677 )?
8f4b58d7 3678 (?:
acaff495 3679 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
8f4b58d7 3680 )?
056653bb 3681 T)?
acaff495 3682 (?:
3683 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3684 )?
3685 (?:
3686 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3687 )?
3688 (?:
3689 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
15846398 3690 )?Z?$''', s)
acaff495 3691 if m:
3692 days, hours, mins, secs, ms = m.groups()
3693 else:
15846398 3694 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
acaff495 3695 if m:
3696 hours, mins = m.groups()
3697 else:
3698 return None
3699
3700 duration = 0
3701 if secs:
3702 duration += float(secs)
3703 if mins:
3704 duration += float(mins) * 60
3705 if hours:
3706 duration += float(hours) * 60 * 60
3707 if days:
3708 duration += float(days) * 24 * 60 * 60
3709 if ms:
3710 duration += float(ms)
3711 return duration
91d7d0b3
JMF
3712
3713
e65e4c88 3714def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 3715 name, real_ext = os.path.splitext(filename)
e65e4c88
S
3716 return (
3717 '{0}.{1}{2}'.format(name, ext, real_ext)
3718 if not expected_real_ext or real_ext[1:] == expected_real_ext
3719 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
3720
3721
b3ed15b7
S
3722def replace_extension(filename, ext, expected_real_ext=None):
3723 name, real_ext = os.path.splitext(filename)
3724 return '{0}.{1}'.format(
3725 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3726 ext)
3727
3728
d70ad093
PH
3729def check_executable(exe, args=[]):
3730 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3731 args can be a list of arguments for a short output (like -version) """
3732 try:
3733 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
3734 except OSError:
3735 return False
3736 return exe
b7ab0590
PH
3737
3738
95807118 3739def get_exe_version(exe, args=['--version'],
cae97f65 3740 version_re=None, unrecognized='present'):
95807118
PH
3741 """ Returns the version of the specified executable,
3742 or False if the executable is not present """
3743 try:
b64d04c1 3744 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
cefecac1 3745 # SIGTTOU if youtube-dlc is run in the background.
067aa17e 3746 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
cae97f65 3747 out, _ = subprocess.Popen(
54116803 3748 [encodeArgument(exe)] + args,
00ca7552 3749 stdin=subprocess.PIPE,
95807118
PH
3750 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
3751 except OSError:
3752 return False
cae97f65
PH
3753 if isinstance(out, bytes): # Python 2.x
3754 out = out.decode('ascii', 'ignore')
3755 return detect_exe_version(out, version_re, unrecognized)
3756
3757
3758def detect_exe_version(output, version_re=None, unrecognized='present'):
3759 assert isinstance(output, compat_str)
3760 if version_re is None:
3761 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3762 m = re.search(version_re, output)
95807118
PH
3763 if m:
3764 return m.group(1)
3765 else:
3766 return unrecognized
3767
3768
b7ab0590 3769class PagedList(object):
dd26ced1
PH
3770 def __len__(self):
3771 # This is only useful for tests
3772 return len(self.getslice())
3773
9c44d242
PH
3774
3775class OnDemandPagedList(PagedList):
6be08ce6 3776 def __init__(self, pagefunc, pagesize, use_cache=True):
9c44d242
PH
3777 self._pagefunc = pagefunc
3778 self._pagesize = pagesize
b95dc034
YCH
3779 self._use_cache = use_cache
3780 if use_cache:
3781 self._cache = {}
9c44d242 3782
b7ab0590
PH
3783 def getslice(self, start=0, end=None):
3784 res = []
3785 for pagenum in itertools.count(start // self._pagesize):
3786 firstid = pagenum * self._pagesize
3787 nextfirstid = pagenum * self._pagesize + self._pagesize
3788 if start >= nextfirstid:
3789 continue
3790
b95dc034
YCH
3791 page_results = None
3792 if self._use_cache:
3793 page_results = self._cache.get(pagenum)
3794 if page_results is None:
3795 page_results = list(self._pagefunc(pagenum))
3796 if self._use_cache:
3797 self._cache[pagenum] = page_results
b7ab0590
PH
3798
3799 startv = (
3800 start % self._pagesize
3801 if firstid <= start < nextfirstid
3802 else 0)
3803
3804 endv = (
3805 ((end - 1) % self._pagesize) + 1
3806 if (end is not None and firstid <= end <= nextfirstid)
3807 else None)
3808
3809 if startv != 0 or endv is not None:
3810 page_results = page_results[startv:endv]
3811 res.extend(page_results)
3812
3813 # A little optimization - if current page is not "full", ie. does
3814 # not contain page_size videos then we can assume that this page
3815 # is the last one - there are no more ids on further pages -
3816 # i.e. no need to query again.
3817 if len(page_results) + startv < self._pagesize:
3818 break
3819
3820 # If we got the whole page, but the next page is not interesting,
3821 # break out early as well
3822 if end == nextfirstid:
3823 break
3824 return res
81c2f20b
PH
3825
3826
9c44d242
PH
3827class InAdvancePagedList(PagedList):
3828 def __init__(self, pagefunc, pagecount, pagesize):
3829 self._pagefunc = pagefunc
3830 self._pagecount = pagecount
3831 self._pagesize = pagesize
3832
3833 def getslice(self, start=0, end=None):
3834 res = []
3835 start_page = start // self._pagesize
3836 end_page = (
3837 self._pagecount if end is None else (end // self._pagesize + 1))
3838 skip_elems = start - start_page * self._pagesize
3839 only_more = None if end is None else end - start
3840 for pagenum in range(start_page, end_page):
3841 page = list(self._pagefunc(pagenum))
3842 if skip_elems:
3843 page = page[skip_elems:]
3844 skip_elems = None
3845 if only_more is not None:
3846 if len(page) < only_more:
3847 only_more -= len(page)
3848 else:
3849 page = page[:only_more]
3850 res.extend(page)
3851 break
3852 res.extend(page)
3853 return res
3854
3855
81c2f20b 3856def uppercase_escape(s):
676eb3f2 3857 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 3858 return re.sub(
a612753d 3859 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
3860 lambda m: unicode_escape(m.group(0))[0],
3861 s)
0fe2ff78
YCH
3862
3863
3864def lowercase_escape(s):
3865 unicode_escape = codecs.getdecoder('unicode_escape')
3866 return re.sub(
3867 r'\\u[0-9a-fA-F]{4}',
3868 lambda m: unicode_escape(m.group(0))[0],
3869 s)
b53466e1 3870
d05cfe06
S
3871
3872def escape_rfc3986(s):
3873 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 3874 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 3875 s = s.encode('utf-8')
ecc0c5ee 3876 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
3877
3878
3879def escape_url(url):
3880 """Escape URL as suggested by RFC 3986"""
3881 url_parsed = compat_urllib_parse_urlparse(url)
3882 return url_parsed._replace(
efbed08d 3883 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
d05cfe06
S
3884 path=escape_rfc3986(url_parsed.path),
3885 params=escape_rfc3986(url_parsed.params),
3886 query=escape_rfc3986(url_parsed.query),
3887 fragment=escape_rfc3986(url_parsed.fragment)
3888 ).geturl()
3889
62e609ab
PH
3890
3891def read_batch_urls(batch_fd):
3892 def fixup(url):
3893 if not isinstance(url, compat_str):
3894 url = url.decode('utf-8', 'replace')
28e614de 3895 BOM_UTF8 = '\xef\xbb\xbf'
62e609ab
PH
3896 if url.startswith(BOM_UTF8):
3897 url = url[len(BOM_UTF8):]
3898 url = url.strip()
3899 if url.startswith(('#', ';', ']')):
3900 return False
3901 return url
3902
3903 with contextlib.closing(batch_fd) as fd:
3904 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
3905
3906
3907def urlencode_postdata(*args, **kargs):
15707c7e 3908 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
3909
3910
38f9ef31 3911def update_url_query(url, query):
cacd9966
YCH
3912 if not query:
3913 return url
38f9ef31 3914 parsed_url = compat_urlparse.urlparse(url)
3915 qs = compat_parse_qs(parsed_url.query)
3916 qs.update(query)
3917 return compat_urlparse.urlunparse(parsed_url._replace(
15707c7e 3918 query=compat_urllib_parse_urlencode(qs, True)))
16392824 3919
8e60dc75 3920
ed0291d1
S
3921def update_Request(req, url=None, data=None, headers={}, query={}):
3922 req_headers = req.headers.copy()
3923 req_headers.update(headers)
3924 req_data = data or req.data
3925 req_url = update_url_query(url or req.get_full_url(), query)
95cf60e8
S
3926 req_get_method = req.get_method()
3927 if req_get_method == 'HEAD':
3928 req_type = HEADRequest
3929 elif req_get_method == 'PUT':
3930 req_type = PUTRequest
3931 else:
3932 req_type = compat_urllib_request.Request
ed0291d1
S
3933 new_req = req_type(
3934 req_url, data=req_data, headers=req_headers,
3935 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3936 if hasattr(req, 'timeout'):
3937 new_req.timeout = req.timeout
3938 return new_req
3939
3940
10c87c15 3941def _multipart_encode_impl(data, boundary):
0c265486
YCH
3942 content_type = 'multipart/form-data; boundary=%s' % boundary
3943
3944 out = b''
3945 for k, v in data.items():
3946 out += b'--' + boundary.encode('ascii') + b'\r\n'
3947 if isinstance(k, compat_str):
3948 k = k.encode('utf-8')
3949 if isinstance(v, compat_str):
3950 v = v.encode('utf-8')
3951 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3952 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
b2ad479d 3953 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
0c265486
YCH
3954 if boundary.encode('ascii') in content:
3955 raise ValueError('Boundary overlaps with data')
3956 out += content
3957
3958 out += b'--' + boundary.encode('ascii') + b'--\r\n'
3959
3960 return out, content_type
3961
3962
3963def multipart_encode(data, boundary=None):
3964 '''
3965 Encode a dict to RFC 7578-compliant form-data
3966
3967 data:
3968 A dict where keys and values can be either Unicode or bytes-like
3969 objects.
3970 boundary:
3971 If specified a Unicode object, it's used as the boundary. Otherwise
3972 a random boundary is generated.
3973
3974 Reference: https://tools.ietf.org/html/rfc7578
3975 '''
3976 has_specified_boundary = boundary is not None
3977
3978 while True:
3979 if boundary is None:
3980 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3981
3982 try:
10c87c15 3983 out, content_type = _multipart_encode_impl(data, boundary)
0c265486
YCH
3984 break
3985 except ValueError:
3986 if has_specified_boundary:
3987 raise
3988 boundary = None
3989
3990 return out, content_type
3991
3992
86296ad2 3993def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
3994 if isinstance(key_or_keys, (list, tuple)):
3995 for key in key_or_keys:
86296ad2
S
3996 if key not in d or d[key] is None or skip_false_values and not d[key]:
3997 continue
3998 return d[key]
cbecc9b9
S
3999 return default
4000 return d.get(key_or_keys, default)
4001
4002
329ca3be 4003def try_get(src, getter, expected_type=None):
a32a9a7e
S
4004 if not isinstance(getter, (list, tuple)):
4005 getter = [getter]
4006 for get in getter:
4007 try:
4008 v = get(src)
4009 except (AttributeError, KeyError, TypeError, IndexError):
4010 pass
4011 else:
4012 if expected_type is None or isinstance(v, expected_type):
4013 return v
329ca3be
S
4014
4015
6cc62232
S
4016def merge_dicts(*dicts):
4017 merged = {}
4018 for a_dict in dicts:
4019 for k, v in a_dict.items():
4020 if v is None:
4021 continue
3089bc74
S
4022 if (k not in merged
4023 or (isinstance(v, compat_str) and v
4024 and isinstance(merged[k], compat_str)
4025 and not merged[k])):
6cc62232
S
4026 merged[k] = v
4027 return merged
4028
4029
8e60dc75
S
4030def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4031 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4032
16392824 4033
a1a530b0
PH
4034US_RATINGS = {
4035 'G': 0,
4036 'PG': 10,
4037 'PG-13': 13,
4038 'R': 16,
4039 'NC': 18,
4040}
fac55558
PH
4041
4042
a8795327 4043TV_PARENTAL_GUIDELINES = {
5a16c9d9
RA
4044 'TV-Y': 0,
4045 'TV-Y7': 7,
4046 'TV-G': 0,
4047 'TV-PG': 0,
4048 'TV-14': 14,
4049 'TV-MA': 17,
a8795327
S
4050}
4051
4052
146c80e2 4053def parse_age_limit(s):
a8795327
S
4054 if type(s) == int:
4055 return s if 0 <= s <= 21 else None
4056 if not isinstance(s, compat_basestring):
d838b1bd 4057 return None
146c80e2 4058 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327
S
4059 if m:
4060 return int(m.group('age'))
4061 if s in US_RATINGS:
4062 return US_RATINGS[s]
5a16c9d9 4063 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
b8361187 4064 if m:
5a16c9d9 4065 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
b8361187 4066 return None
146c80e2
S
4067
4068
fac55558 4069def strip_jsonp(code):
609a61e3 4070 return re.sub(
5552c9eb 4071 r'''(?sx)^
e9c671d5 4072 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
5552c9eb
YCH
4073 (?:\s*&&\s*(?P=func_name))?
4074 \s*\(\s*(?P<callback_data>.*)\);?
4075 \s*?(?://[^\n]*)*$''',
4076 r'\g<callback_data>', code)
478c2c61
PH
4077
4078
e05f6939 4079def js_to_json(code):
4195096e
S
4080 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4081 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4082 INTEGER_TABLE = (
4083 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4084 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4085 )
4086
e05f6939 4087 def fix_kv(m):
e7b6d122
PH
4088 v = m.group(0)
4089 if v in ('true', 'false', 'null'):
4090 return v
8bdd16b4 4091 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
bd1e4844 4092 return ""
4093
4094 if v[0] in ("'", '"'):
4095 v = re.sub(r'(?s)\\.|"', lambda m: {
e7b6d122 4096 '"': '\\"',
bd1e4844 4097 "\\'": "'",
4098 '\\\n': '',
4099 '\\x': '\\u00',
4100 }.get(m.group(0), m.group(0)), v[1:-1])
8bdd16b4 4101 else:
4102 for regex, base in INTEGER_TABLE:
4103 im = re.match(regex, v)
4104 if im:
4105 i = int(im.group(1), base)
4106 return '"%d":' % i if v.endswith(':') else '%d' % i
89ac4a19 4107
e7b6d122 4108 return '"%s"' % v
e05f6939 4109
bd1e4844 4110 return re.sub(r'''(?sx)
4111 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4112 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4195096e 4113 {comment}|,(?={skip}[\]}}])|
c384d537 4114 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4195096e 4115 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
8bdd16b4 4116 [0-9]+(?={skip}:)|
4117 !+
4195096e 4118 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
e05f6939
PH
4119
4120
478c2c61
PH
4121def qualities(quality_ids):
4122 """ Get a numeric quality value out of a list of possible values """
4123 def q(qid):
4124 try:
4125 return quality_ids.index(qid)
4126 except ValueError:
4127 return -1
4128 return q
4129
acd69589 4130
91ebc640 4131DEFAULT_OUTTMPL = '%(title)s [%(id)s].%(ext)s'
0a871f68 4132
a020a0dc
PH
4133
4134def limit_length(s, length):
4135 """ Add ellipses to overly long strings """
4136 if s is None:
4137 return None
4138 ELLIPSES = '...'
4139 if len(s) > length:
4140 return s[:length - len(ELLIPSES)] + ELLIPSES
4141 return s
48844745
PH
4142
4143
4144def version_tuple(v):
5f9b8394 4145 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
4146
4147
4148def is_outdated_version(version, limit, assume_new=True):
4149 if not version:
4150 return not assume_new
4151 try:
4152 return version_tuple(version) < version_tuple(limit)
4153 except ValueError:
4154 return not assume_new
732ea2f0
PH
4155
4156
4157def ytdl_is_updateable():
cefecac1 4158 """ Returns if youtube-dlc can be updated with -U """
735d865e 4159 return False
4160
732ea2f0
PH
4161 from zipimport import zipimporter
4162
4163 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
4164
4165
4166def args_to_str(args):
4167 # Get a short string representation for a subprocess command
702ccf2d 4168 return ' '.join(compat_shlex_quote(a) for a in args)
2ccd1b10
PH
4169
4170
9b9c5355 4171def error_to_compat_str(err):
fdae2358
S
4172 err_str = str(err)
4173 # On python 2 error byte string must be decoded with proper
4174 # encoding rather than ascii
4175 if sys.version_info[0] < 3:
4176 err_str = err_str.decode(preferredencoding())
4177 return err_str
4178
4179
c460bdd5 4180def mimetype2ext(mt):
eb9ee194
S
4181 if mt is None:
4182 return None
4183
765ac263
JMF
4184 ext = {
4185 'audio/mp4': 'm4a',
6c33d24b
YCH
4186 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4187 # it's the most popular one
4188 'audio/mpeg': 'mp3',
ba39289d 4189 'audio/x-wav': 'wav',
765ac263
JMF
4190 }.get(mt)
4191 if ext is not None:
4192 return ext
4193
c460bdd5 4194 _, _, res = mt.rpartition('/')
6562d34a 4195 res = res.split(';')[0].strip().lower()
c460bdd5
PH
4196
4197 return {
f6861ec9 4198 '3gpp': '3gp',
cafcf657 4199 'smptett+xml': 'tt',
cafcf657 4200 'ttaf+xml': 'dfxp',
a0d8d704 4201 'ttml+xml': 'ttml',
f6861ec9 4202 'x-flv': 'flv',
a0d8d704 4203 'x-mp4-fragmented': 'mp4',
d4f05d47 4204 'x-ms-sami': 'sami',
a0d8d704 4205 'x-ms-wmv': 'wmv',
b4173f15
RA
4206 'mpegurl': 'm3u8',
4207 'x-mpegurl': 'm3u8',
4208 'vnd.apple.mpegurl': 'm3u8',
4209 'dash+xml': 'mpd',
b4173f15 4210 'f4m+xml': 'f4m',
f164b971 4211 'hds+xml': 'f4m',
e910fe2f 4212 'vnd.ms-sstr+xml': 'ism',
c2b2c7e1 4213 'quicktime': 'mov',
98ce1a3f 4214 'mp2t': 'ts',
39e7107d 4215 'x-wav': 'wav',
c460bdd5
PH
4216 }.get(res, res)
4217
4218
4f3c5e06 4219def parse_codecs(codecs_str):
4220 # http://tools.ietf.org/html/rfc6381
4221 if not codecs_str:
4222 return {}
a0566bbf 4223 split_codecs = list(filter(None, map(
4f3c5e06 4224 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4225 vcodec, acodec = None, None
a0566bbf 4226 for full_codec in split_codecs:
4f3c5e06 4227 codec = full_codec.split('.')[0]
28cc2241 4228 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4f3c5e06 4229 if not vcodec:
4230 vcodec = full_codec
60f5c9fb 4231 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4f3c5e06 4232 if not acodec:
4233 acodec = full_codec
4234 else:
60f5c9fb 4235 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4f3c5e06 4236 if not vcodec and not acodec:
a0566bbf 4237 if len(split_codecs) == 2:
4f3c5e06 4238 return {
a0566bbf 4239 'vcodec': split_codecs[0],
4240 'acodec': split_codecs[1],
4f3c5e06 4241 }
4242 else:
4243 return {
4244 'vcodec': vcodec or 'none',
4245 'acodec': acodec or 'none',
4246 }
4247 return {}
4248
4249
2ccd1b10 4250def urlhandle_detect_ext(url_handle):
79298173 4251 getheader = url_handle.headers.get
2ccd1b10 4252
b55ee18f
PH
4253 cd = getheader('Content-Disposition')
4254 if cd:
4255 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4256 if m:
4257 e = determine_ext(m.group('filename'), default_ext=None)
4258 if e:
4259 return e
4260
c460bdd5 4261 return mimetype2ext(getheader('Content-Type'))
05900629
PH
4262
4263
1e399778
YCH
4264def encode_data_uri(data, mime_type):
4265 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4266
4267
05900629 4268def age_restricted(content_limit, age_limit):
6ec6cb4e 4269 """ Returns True iff the content should be blocked """
05900629
PH
4270
4271 if age_limit is None: # No limit set
4272 return False
4273 if content_limit is None:
4274 return False # Content available for everyone
4275 return age_limit < content_limit
61ca9a80
PH
4276
4277
4278def is_html(first_bytes):
4279 """ Detect whether a file contains HTML by examining its first bytes. """
4280
4281 BOMS = [
4282 (b'\xef\xbb\xbf', 'utf-8'),
4283 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4284 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4285 (b'\xff\xfe', 'utf-16-le'),
4286 (b'\xfe\xff', 'utf-16-be'),
4287 ]
4288 for bom, enc in BOMS:
4289 if first_bytes.startswith(bom):
4290 s = first_bytes[len(bom):].decode(enc, 'replace')
4291 break
4292 else:
4293 s = first_bytes.decode('utf-8', 'replace')
4294
4295 return re.match(r'^\s*<', s)
a055469f
PH
4296
4297
4298def determine_protocol(info_dict):
4299 protocol = info_dict.get('protocol')
4300 if protocol is not None:
4301 return protocol
4302
4303 url = info_dict['url']
4304 if url.startswith('rtmp'):
4305 return 'rtmp'
4306 elif url.startswith('mms'):
4307 return 'mms'
4308 elif url.startswith('rtsp'):
4309 return 'rtsp'
4310
4311 ext = determine_ext(url)
4312 if ext == 'm3u8':
4313 return 'm3u8'
4314 elif ext == 'f4m':
4315 return 'f4m'
4316
4317 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
4318
4319
76d321f6 4320def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
cfb56d1a 4321 """ Render a list of rows, each as a list of values """
76d321f6 4322
4323 def get_max_lens(table):
4324 return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4325
4326 def filter_using_list(row, filterArray):
4327 return [col for (take, col) in zip(filterArray, row) if take]
4328
4329 if hideEmpty:
4330 max_lens = get_max_lens(data)
4331 header_row = filter_using_list(header_row, max_lens)
4332 data = [filter_using_list(row, max_lens) for row in data]
4333
cfb56d1a 4334 table = [header_row] + data
76d321f6 4335 max_lens = get_max_lens(table)
4336 if delim:
4337 table = [header_row] + [['-' * ml for ml in max_lens]] + data
4338 format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
cfb56d1a 4339 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
4340
4341
4342def _match_one(filter_part, dct):
4343 COMPARISON_OPERATORS = {
4344 '<': operator.lt,
4345 '<=': operator.le,
4346 '>': operator.gt,
4347 '>=': operator.ge,
4348 '=': operator.eq,
4349 '!=': operator.ne,
4350 }
4351 operator_rex = re.compile(r'''(?x)\s*
4352 (?P<key>[a-z_]+)
4353 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4354 (?:
4355 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
db13c16e 4356 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
347de493
PH
4357 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4358 )
4359 \s*$
4360 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4361 m = operator_rex.search(filter_part)
4362 if m:
4363 op = COMPARISON_OPERATORS[m.group('op')]
e5a088dc 4364 actual_value = dct.get(m.group('key'))
3089bc74
S
4365 if (m.group('quotedstrval') is not None
4366 or m.group('strval') is not None
e5a088dc
S
4367 # If the original field is a string and matching comparisonvalue is
4368 # a number we should respect the origin of the original field
4369 # and process comparison value as a string (see
067aa17e 4370 # https://github.com/ytdl-org/youtube-dl/issues/11082).
3089bc74
S
4371 or actual_value is not None and m.group('intval') is not None
4372 and isinstance(actual_value, compat_str)):
347de493
PH
4373 if m.group('op') not in ('=', '!='):
4374 raise ValueError(
4375 'Operator %s does not support string values!' % m.group('op'))
db13c16e
S
4376 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4377 quote = m.group('quote')
4378 if quote is not None:
4379 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
347de493
PH
4380 else:
4381 try:
4382 comparison_value = int(m.group('intval'))
4383 except ValueError:
4384 comparison_value = parse_filesize(m.group('intval'))
4385 if comparison_value is None:
4386 comparison_value = parse_filesize(m.group('intval') + 'B')
4387 if comparison_value is None:
4388 raise ValueError(
4389 'Invalid integer value %r in filter part %r' % (
4390 m.group('intval'), filter_part))
347de493
PH
4391 if actual_value is None:
4392 return m.group('none_inclusive')
4393 return op(actual_value, comparison_value)
4394
4395 UNARY_OPERATORS = {
1cc47c66
S
4396 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4397 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
347de493
PH
4398 }
4399 operator_rex = re.compile(r'''(?x)\s*
4400 (?P<op>%s)\s*(?P<key>[a-z_]+)
4401 \s*$
4402 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4403 m = operator_rex.search(filter_part)
4404 if m:
4405 op = UNARY_OPERATORS[m.group('op')]
4406 actual_value = dct.get(m.group('key'))
4407 return op(actual_value)
4408
4409 raise ValueError('Invalid filter part %r' % filter_part)
4410
4411
4412def match_str(filter_str, dct):
4413 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4414
4415 return all(
4416 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4417
4418
4419def match_filter_func(filter_str):
4420 def _match_func(info_dict):
4421 if match_str(filter_str, info_dict):
4422 return None
4423 else:
4424 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4425 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4426 return _match_func
91410c9b
PH
4427
4428
bf6427d2
YCH
4429def parse_dfxp_time_expr(time_expr):
4430 if not time_expr:
d631d5f9 4431 return
bf6427d2
YCH
4432
4433 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4434 if mobj:
4435 return float(mobj.group('time_offset'))
4436
db2fe38b 4437 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 4438 if mobj:
db2fe38b 4439 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
4440
4441
c1c924ab
YCH
4442def srt_subtitles_timecode(seconds):
4443 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
4444
4445
4446def dfxp2srt(dfxp_data):
3869028f
YCH
4447 '''
4448 @param dfxp_data A bytes-like object containing DFXP data
4449 @returns A unicode object containing converted SRT data
4450 '''
5b995f71 4451 LEGACY_NAMESPACES = (
3869028f
YCH
4452 (b'http://www.w3.org/ns/ttml', [
4453 b'http://www.w3.org/2004/11/ttaf1',
4454 b'http://www.w3.org/2006/04/ttaf1',
4455 b'http://www.w3.org/2006/10/ttaf1',
5b995f71 4456 ]),
3869028f
YCH
4457 (b'http://www.w3.org/ns/ttml#styling', [
4458 b'http://www.w3.org/ns/ttml#style',
5b995f71
RA
4459 ]),
4460 )
4461
4462 SUPPORTED_STYLING = [
4463 'color',
4464 'fontFamily',
4465 'fontSize',
4466 'fontStyle',
4467 'fontWeight',
4468 'textDecoration'
4469 ]
4470
4e335771 4471 _x = functools.partial(xpath_with_ns, ns_map={
261f4730 4472 'xml': 'http://www.w3.org/XML/1998/namespace',
4e335771 4473 'ttml': 'http://www.w3.org/ns/ttml',
5b995f71 4474 'tts': 'http://www.w3.org/ns/ttml#styling',
4e335771 4475 })
bf6427d2 4476
5b995f71
RA
4477 styles = {}
4478 default_style = {}
4479
87de7069 4480 class TTMLPElementParser(object):
5b995f71
RA
4481 _out = ''
4482 _unclosed_elements = []
4483 _applied_styles = []
bf6427d2 4484
2b14cb56 4485 def start(self, tag, attrib):
5b995f71
RA
4486 if tag in (_x('ttml:br'), 'br'):
4487 self._out += '\n'
4488 else:
4489 unclosed_elements = []
4490 style = {}
4491 element_style_id = attrib.get('style')
4492 if default_style:
4493 style.update(default_style)
4494 if element_style_id:
4495 style.update(styles.get(element_style_id, {}))
4496 for prop in SUPPORTED_STYLING:
4497 prop_val = attrib.get(_x('tts:' + prop))
4498 if prop_val:
4499 style[prop] = prop_val
4500 if style:
4501 font = ''
4502 for k, v in sorted(style.items()):
4503 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4504 continue
4505 if k == 'color':
4506 font += ' color="%s"' % v
4507 elif k == 'fontSize':
4508 font += ' size="%s"' % v
4509 elif k == 'fontFamily':
4510 font += ' face="%s"' % v
4511 elif k == 'fontWeight' and v == 'bold':
4512 self._out += '<b>'
4513 unclosed_elements.append('b')
4514 elif k == 'fontStyle' and v == 'italic':
4515 self._out += '<i>'
4516 unclosed_elements.append('i')
4517 elif k == 'textDecoration' and v == 'underline':
4518 self._out += '<u>'
4519 unclosed_elements.append('u')
4520 if font:
4521 self._out += '<font' + font + '>'
4522 unclosed_elements.append('font')
4523 applied_style = {}
4524 if self._applied_styles:
4525 applied_style.update(self._applied_styles[-1])
4526 applied_style.update(style)
4527 self._applied_styles.append(applied_style)
4528 self._unclosed_elements.append(unclosed_elements)
bf6427d2 4529
2b14cb56 4530 def end(self, tag):
5b995f71
RA
4531 if tag not in (_x('ttml:br'), 'br'):
4532 unclosed_elements = self._unclosed_elements.pop()
4533 for element in reversed(unclosed_elements):
4534 self._out += '</%s>' % element
4535 if unclosed_elements and self._applied_styles:
4536 self._applied_styles.pop()
bf6427d2 4537
2b14cb56 4538 def data(self, data):
5b995f71 4539 self._out += data
2b14cb56 4540
4541 def close(self):
5b995f71 4542 return self._out.strip()
2b14cb56 4543
4544 def parse_node(node):
4545 target = TTMLPElementParser()
4546 parser = xml.etree.ElementTree.XMLParser(target=target)
4547 parser.feed(xml.etree.ElementTree.tostring(node))
4548 return parser.close()
bf6427d2 4549
5b995f71
RA
4550 for k, v in LEGACY_NAMESPACES:
4551 for ns in v:
4552 dfxp_data = dfxp_data.replace(ns, k)
4553
3869028f 4554 dfxp = compat_etree_fromstring(dfxp_data)
bf6427d2 4555 out = []
5b995f71 4556 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
1b0427e6
YCH
4557
4558 if not paras:
4559 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2 4560
5b995f71
RA
4561 repeat = False
4562 while True:
4563 for style in dfxp.findall(_x('.//ttml:style')):
261f4730
RA
4564 style_id = style.get('id') or style.get(_x('xml:id'))
4565 if not style_id:
4566 continue
5b995f71
RA
4567 parent_style_id = style.get('style')
4568 if parent_style_id:
4569 if parent_style_id not in styles:
4570 repeat = True
4571 continue
4572 styles[style_id] = styles[parent_style_id].copy()
4573 for prop in SUPPORTED_STYLING:
4574 prop_val = style.get(_x('tts:' + prop))
4575 if prop_val:
4576 styles.setdefault(style_id, {})[prop] = prop_val
4577 if repeat:
4578 repeat = False
4579 else:
4580 break
4581
4582 for p in ('body', 'div'):
4583 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4584 if ele is None:
4585 continue
4586 style = styles.get(ele.get('style'))
4587 if not style:
4588 continue
4589 default_style.update(style)
4590
bf6427d2 4591 for para, index in zip(paras, itertools.count(1)):
d631d5f9 4592 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 4593 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
4594 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4595 if begin_time is None:
4596 continue
7dff0363 4597 if not end_time:
d631d5f9
YCH
4598 if not dur:
4599 continue
4600 end_time = begin_time + dur
bf6427d2
YCH
4601 out.append('%d\n%s --> %s\n%s\n\n' % (
4602 index,
c1c924ab
YCH
4603 srt_subtitles_timecode(begin_time),
4604 srt_subtitles_timecode(end_time),
bf6427d2
YCH
4605 parse_node(para)))
4606
4607 return ''.join(out)
4608
4609
66e289ba
S
4610def cli_option(params, command_option, param):
4611 param = params.get(param)
98e698f1
RA
4612 if param:
4613 param = compat_str(param)
66e289ba
S
4614 return [command_option, param] if param is not None else []
4615
4616
4617def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4618 param = params.get(param)
5b232f46
S
4619 if param is None:
4620 return []
66e289ba
S
4621 assert isinstance(param, bool)
4622 if separator:
4623 return [command_option + separator + (true_value if param else false_value)]
4624 return [command_option, true_value if param else false_value]
4625
4626
4627def cli_valueless_option(params, command_option, param, expected_value=True):
4628 param = params.get(param)
4629 return [command_option] if param == expected_value else []
4630
4631
4632def cli_configuration_args(params, param, default=[]):
4633 ex_args = params.get(param)
4634 if ex_args is None:
4635 return default
4636 assert isinstance(ex_args, list)
4637 return ex_args
4638
4639
39672624
YCH
4640class ISO639Utils(object):
4641 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4642 _lang_map = {
4643 'aa': 'aar',
4644 'ab': 'abk',
4645 'ae': 'ave',
4646 'af': 'afr',
4647 'ak': 'aka',
4648 'am': 'amh',
4649 'an': 'arg',
4650 'ar': 'ara',
4651 'as': 'asm',
4652 'av': 'ava',
4653 'ay': 'aym',
4654 'az': 'aze',
4655 'ba': 'bak',
4656 'be': 'bel',
4657 'bg': 'bul',
4658 'bh': 'bih',
4659 'bi': 'bis',
4660 'bm': 'bam',
4661 'bn': 'ben',
4662 'bo': 'bod',
4663 'br': 'bre',
4664 'bs': 'bos',
4665 'ca': 'cat',
4666 'ce': 'che',
4667 'ch': 'cha',
4668 'co': 'cos',
4669 'cr': 'cre',
4670 'cs': 'ces',
4671 'cu': 'chu',
4672 'cv': 'chv',
4673 'cy': 'cym',
4674 'da': 'dan',
4675 'de': 'deu',
4676 'dv': 'div',
4677 'dz': 'dzo',
4678 'ee': 'ewe',
4679 'el': 'ell',
4680 'en': 'eng',
4681 'eo': 'epo',
4682 'es': 'spa',
4683 'et': 'est',
4684 'eu': 'eus',
4685 'fa': 'fas',
4686 'ff': 'ful',
4687 'fi': 'fin',
4688 'fj': 'fij',
4689 'fo': 'fao',
4690 'fr': 'fra',
4691 'fy': 'fry',
4692 'ga': 'gle',
4693 'gd': 'gla',
4694 'gl': 'glg',
4695 'gn': 'grn',
4696 'gu': 'guj',
4697 'gv': 'glv',
4698 'ha': 'hau',
4699 'he': 'heb',
b7acc835 4700 'iw': 'heb', # Replaced by he in 1989 revision
39672624
YCH
4701 'hi': 'hin',
4702 'ho': 'hmo',
4703 'hr': 'hrv',
4704 'ht': 'hat',
4705 'hu': 'hun',
4706 'hy': 'hye',
4707 'hz': 'her',
4708 'ia': 'ina',
4709 'id': 'ind',
b7acc835 4710 'in': 'ind', # Replaced by id in 1989 revision
39672624
YCH
4711 'ie': 'ile',
4712 'ig': 'ibo',
4713 'ii': 'iii',
4714 'ik': 'ipk',
4715 'io': 'ido',
4716 'is': 'isl',
4717 'it': 'ita',
4718 'iu': 'iku',
4719 'ja': 'jpn',
4720 'jv': 'jav',
4721 'ka': 'kat',
4722 'kg': 'kon',
4723 'ki': 'kik',
4724 'kj': 'kua',
4725 'kk': 'kaz',
4726 'kl': 'kal',
4727 'km': 'khm',
4728 'kn': 'kan',
4729 'ko': 'kor',
4730 'kr': 'kau',
4731 'ks': 'kas',
4732 'ku': 'kur',
4733 'kv': 'kom',
4734 'kw': 'cor',
4735 'ky': 'kir',
4736 'la': 'lat',
4737 'lb': 'ltz',
4738 'lg': 'lug',
4739 'li': 'lim',
4740 'ln': 'lin',
4741 'lo': 'lao',
4742 'lt': 'lit',
4743 'lu': 'lub',
4744 'lv': 'lav',
4745 'mg': 'mlg',
4746 'mh': 'mah',
4747 'mi': 'mri',
4748 'mk': 'mkd',
4749 'ml': 'mal',
4750 'mn': 'mon',
4751 'mr': 'mar',
4752 'ms': 'msa',
4753 'mt': 'mlt',
4754 'my': 'mya',
4755 'na': 'nau',
4756 'nb': 'nob',
4757 'nd': 'nde',
4758 'ne': 'nep',
4759 'ng': 'ndo',
4760 'nl': 'nld',
4761 'nn': 'nno',
4762 'no': 'nor',
4763 'nr': 'nbl',
4764 'nv': 'nav',
4765 'ny': 'nya',
4766 'oc': 'oci',
4767 'oj': 'oji',
4768 'om': 'orm',
4769 'or': 'ori',
4770 'os': 'oss',
4771 'pa': 'pan',
4772 'pi': 'pli',
4773 'pl': 'pol',
4774 'ps': 'pus',
4775 'pt': 'por',
4776 'qu': 'que',
4777 'rm': 'roh',
4778 'rn': 'run',
4779 'ro': 'ron',
4780 'ru': 'rus',
4781 'rw': 'kin',
4782 'sa': 'san',
4783 'sc': 'srd',
4784 'sd': 'snd',
4785 'se': 'sme',
4786 'sg': 'sag',
4787 'si': 'sin',
4788 'sk': 'slk',
4789 'sl': 'slv',
4790 'sm': 'smo',
4791 'sn': 'sna',
4792 'so': 'som',
4793 'sq': 'sqi',
4794 'sr': 'srp',
4795 'ss': 'ssw',
4796 'st': 'sot',
4797 'su': 'sun',
4798 'sv': 'swe',
4799 'sw': 'swa',
4800 'ta': 'tam',
4801 'te': 'tel',
4802 'tg': 'tgk',
4803 'th': 'tha',
4804 'ti': 'tir',
4805 'tk': 'tuk',
4806 'tl': 'tgl',
4807 'tn': 'tsn',
4808 'to': 'ton',
4809 'tr': 'tur',
4810 'ts': 'tso',
4811 'tt': 'tat',
4812 'tw': 'twi',
4813 'ty': 'tah',
4814 'ug': 'uig',
4815 'uk': 'ukr',
4816 'ur': 'urd',
4817 'uz': 'uzb',
4818 've': 'ven',
4819 'vi': 'vie',
4820 'vo': 'vol',
4821 'wa': 'wln',
4822 'wo': 'wol',
4823 'xh': 'xho',
4824 'yi': 'yid',
e9a50fba 4825 'ji': 'yid', # Replaced by yi in 1989 revision
39672624
YCH
4826 'yo': 'yor',
4827 'za': 'zha',
4828 'zh': 'zho',
4829 'zu': 'zul',
4830 }
4831
4832 @classmethod
4833 def short2long(cls, code):
4834 """Convert language code from ISO 639-1 to ISO 639-2/T"""
4835 return cls._lang_map.get(code[:2])
4836
4837 @classmethod
4838 def long2short(cls, code):
4839 """Convert language code from ISO 639-2/T to ISO 639-1"""
4840 for short_name, long_name in cls._lang_map.items():
4841 if long_name == code:
4842 return short_name
4843
4844
4eb10f66
YCH
4845class ISO3166Utils(object):
4846 # From http://data.okfn.org/data/core/country-list
4847 _country_map = {
4848 'AF': 'Afghanistan',
4849 'AX': 'Åland Islands',
4850 'AL': 'Albania',
4851 'DZ': 'Algeria',
4852 'AS': 'American Samoa',
4853 'AD': 'Andorra',
4854 'AO': 'Angola',
4855 'AI': 'Anguilla',
4856 'AQ': 'Antarctica',
4857 'AG': 'Antigua and Barbuda',
4858 'AR': 'Argentina',
4859 'AM': 'Armenia',
4860 'AW': 'Aruba',
4861 'AU': 'Australia',
4862 'AT': 'Austria',
4863 'AZ': 'Azerbaijan',
4864 'BS': 'Bahamas',
4865 'BH': 'Bahrain',
4866 'BD': 'Bangladesh',
4867 'BB': 'Barbados',
4868 'BY': 'Belarus',
4869 'BE': 'Belgium',
4870 'BZ': 'Belize',
4871 'BJ': 'Benin',
4872 'BM': 'Bermuda',
4873 'BT': 'Bhutan',
4874 'BO': 'Bolivia, Plurinational State of',
4875 'BQ': 'Bonaire, Sint Eustatius and Saba',
4876 'BA': 'Bosnia and Herzegovina',
4877 'BW': 'Botswana',
4878 'BV': 'Bouvet Island',
4879 'BR': 'Brazil',
4880 'IO': 'British Indian Ocean Territory',
4881 'BN': 'Brunei Darussalam',
4882 'BG': 'Bulgaria',
4883 'BF': 'Burkina Faso',
4884 'BI': 'Burundi',
4885 'KH': 'Cambodia',
4886 'CM': 'Cameroon',
4887 'CA': 'Canada',
4888 'CV': 'Cape Verde',
4889 'KY': 'Cayman Islands',
4890 'CF': 'Central African Republic',
4891 'TD': 'Chad',
4892 'CL': 'Chile',
4893 'CN': 'China',
4894 'CX': 'Christmas Island',
4895 'CC': 'Cocos (Keeling) Islands',
4896 'CO': 'Colombia',
4897 'KM': 'Comoros',
4898 'CG': 'Congo',
4899 'CD': 'Congo, the Democratic Republic of the',
4900 'CK': 'Cook Islands',
4901 'CR': 'Costa Rica',
4902 'CI': 'Côte d\'Ivoire',
4903 'HR': 'Croatia',
4904 'CU': 'Cuba',
4905 'CW': 'Curaçao',
4906 'CY': 'Cyprus',
4907 'CZ': 'Czech Republic',
4908 'DK': 'Denmark',
4909 'DJ': 'Djibouti',
4910 'DM': 'Dominica',
4911 'DO': 'Dominican Republic',
4912 'EC': 'Ecuador',
4913 'EG': 'Egypt',
4914 'SV': 'El Salvador',
4915 'GQ': 'Equatorial Guinea',
4916 'ER': 'Eritrea',
4917 'EE': 'Estonia',
4918 'ET': 'Ethiopia',
4919 'FK': 'Falkland Islands (Malvinas)',
4920 'FO': 'Faroe Islands',
4921 'FJ': 'Fiji',
4922 'FI': 'Finland',
4923 'FR': 'France',
4924 'GF': 'French Guiana',
4925 'PF': 'French Polynesia',
4926 'TF': 'French Southern Territories',
4927 'GA': 'Gabon',
4928 'GM': 'Gambia',
4929 'GE': 'Georgia',
4930 'DE': 'Germany',
4931 'GH': 'Ghana',
4932 'GI': 'Gibraltar',
4933 'GR': 'Greece',
4934 'GL': 'Greenland',
4935 'GD': 'Grenada',
4936 'GP': 'Guadeloupe',
4937 'GU': 'Guam',
4938 'GT': 'Guatemala',
4939 'GG': 'Guernsey',
4940 'GN': 'Guinea',
4941 'GW': 'Guinea-Bissau',
4942 'GY': 'Guyana',
4943 'HT': 'Haiti',
4944 'HM': 'Heard Island and McDonald Islands',
4945 'VA': 'Holy See (Vatican City State)',
4946 'HN': 'Honduras',
4947 'HK': 'Hong Kong',
4948 'HU': 'Hungary',
4949 'IS': 'Iceland',
4950 'IN': 'India',
4951 'ID': 'Indonesia',
4952 'IR': 'Iran, Islamic Republic of',
4953 'IQ': 'Iraq',
4954 'IE': 'Ireland',
4955 'IM': 'Isle of Man',
4956 'IL': 'Israel',
4957 'IT': 'Italy',
4958 'JM': 'Jamaica',
4959 'JP': 'Japan',
4960 'JE': 'Jersey',
4961 'JO': 'Jordan',
4962 'KZ': 'Kazakhstan',
4963 'KE': 'Kenya',
4964 'KI': 'Kiribati',
4965 'KP': 'Korea, Democratic People\'s Republic of',
4966 'KR': 'Korea, Republic of',
4967 'KW': 'Kuwait',
4968 'KG': 'Kyrgyzstan',
4969 'LA': 'Lao People\'s Democratic Republic',
4970 'LV': 'Latvia',
4971 'LB': 'Lebanon',
4972 'LS': 'Lesotho',
4973 'LR': 'Liberia',
4974 'LY': 'Libya',
4975 'LI': 'Liechtenstein',
4976 'LT': 'Lithuania',
4977 'LU': 'Luxembourg',
4978 'MO': 'Macao',
4979 'MK': 'Macedonia, the Former Yugoslav Republic of',
4980 'MG': 'Madagascar',
4981 'MW': 'Malawi',
4982 'MY': 'Malaysia',
4983 'MV': 'Maldives',
4984 'ML': 'Mali',
4985 'MT': 'Malta',
4986 'MH': 'Marshall Islands',
4987 'MQ': 'Martinique',
4988 'MR': 'Mauritania',
4989 'MU': 'Mauritius',
4990 'YT': 'Mayotte',
4991 'MX': 'Mexico',
4992 'FM': 'Micronesia, Federated States of',
4993 'MD': 'Moldova, Republic of',
4994 'MC': 'Monaco',
4995 'MN': 'Mongolia',
4996 'ME': 'Montenegro',
4997 'MS': 'Montserrat',
4998 'MA': 'Morocco',
4999 'MZ': 'Mozambique',
5000 'MM': 'Myanmar',
5001 'NA': 'Namibia',
5002 'NR': 'Nauru',
5003 'NP': 'Nepal',
5004 'NL': 'Netherlands',
5005 'NC': 'New Caledonia',
5006 'NZ': 'New Zealand',
5007 'NI': 'Nicaragua',
5008 'NE': 'Niger',
5009 'NG': 'Nigeria',
5010 'NU': 'Niue',
5011 'NF': 'Norfolk Island',
5012 'MP': 'Northern Mariana Islands',
5013 'NO': 'Norway',
5014 'OM': 'Oman',
5015 'PK': 'Pakistan',
5016 'PW': 'Palau',
5017 'PS': 'Palestine, State of',
5018 'PA': 'Panama',
5019 'PG': 'Papua New Guinea',
5020 'PY': 'Paraguay',
5021 'PE': 'Peru',
5022 'PH': 'Philippines',
5023 'PN': 'Pitcairn',
5024 'PL': 'Poland',
5025 'PT': 'Portugal',
5026 'PR': 'Puerto Rico',
5027 'QA': 'Qatar',
5028 'RE': 'Réunion',
5029 'RO': 'Romania',
5030 'RU': 'Russian Federation',
5031 'RW': 'Rwanda',
5032 'BL': 'Saint Barthélemy',
5033 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5034 'KN': 'Saint Kitts and Nevis',
5035 'LC': 'Saint Lucia',
5036 'MF': 'Saint Martin (French part)',
5037 'PM': 'Saint Pierre and Miquelon',
5038 'VC': 'Saint Vincent and the Grenadines',
5039 'WS': 'Samoa',
5040 'SM': 'San Marino',
5041 'ST': 'Sao Tome and Principe',
5042 'SA': 'Saudi Arabia',
5043 'SN': 'Senegal',
5044 'RS': 'Serbia',
5045 'SC': 'Seychelles',
5046 'SL': 'Sierra Leone',
5047 'SG': 'Singapore',
5048 'SX': 'Sint Maarten (Dutch part)',
5049 'SK': 'Slovakia',
5050 'SI': 'Slovenia',
5051 'SB': 'Solomon Islands',
5052 'SO': 'Somalia',
5053 'ZA': 'South Africa',
5054 'GS': 'South Georgia and the South Sandwich Islands',
5055 'SS': 'South Sudan',
5056 'ES': 'Spain',
5057 'LK': 'Sri Lanka',
5058 'SD': 'Sudan',
5059 'SR': 'Suriname',
5060 'SJ': 'Svalbard and Jan Mayen',
5061 'SZ': 'Swaziland',
5062 'SE': 'Sweden',
5063 'CH': 'Switzerland',
5064 'SY': 'Syrian Arab Republic',
5065 'TW': 'Taiwan, Province of China',
5066 'TJ': 'Tajikistan',
5067 'TZ': 'Tanzania, United Republic of',
5068 'TH': 'Thailand',
5069 'TL': 'Timor-Leste',
5070 'TG': 'Togo',
5071 'TK': 'Tokelau',
5072 'TO': 'Tonga',
5073 'TT': 'Trinidad and Tobago',
5074 'TN': 'Tunisia',
5075 'TR': 'Turkey',
5076 'TM': 'Turkmenistan',
5077 'TC': 'Turks and Caicos Islands',
5078 'TV': 'Tuvalu',
5079 'UG': 'Uganda',
5080 'UA': 'Ukraine',
5081 'AE': 'United Arab Emirates',
5082 'GB': 'United Kingdom',
5083 'US': 'United States',
5084 'UM': 'United States Minor Outlying Islands',
5085 'UY': 'Uruguay',
5086 'UZ': 'Uzbekistan',
5087 'VU': 'Vanuatu',
5088 'VE': 'Venezuela, Bolivarian Republic of',
5089 'VN': 'Viet Nam',
5090 'VG': 'Virgin Islands, British',
5091 'VI': 'Virgin Islands, U.S.',
5092 'WF': 'Wallis and Futuna',
5093 'EH': 'Western Sahara',
5094 'YE': 'Yemen',
5095 'ZM': 'Zambia',
5096 'ZW': 'Zimbabwe',
5097 }
5098
5099 @classmethod
5100 def short2full(cls, code):
5101 """Convert an ISO 3166-2 country code to the corresponding full name"""
5102 return cls._country_map.get(code.upper())
5103
5104
773f291d
S
5105class GeoUtils(object):
5106 # Major IPv4 address blocks per country
5107 _country_ip_map = {
53896ca5 5108 'AD': '46.172.224.0/19',
773f291d
S
5109 'AE': '94.200.0.0/13',
5110 'AF': '149.54.0.0/17',
5111 'AG': '209.59.64.0/18',
5112 'AI': '204.14.248.0/21',
5113 'AL': '46.99.0.0/16',
5114 'AM': '46.70.0.0/15',
5115 'AO': '105.168.0.0/13',
53896ca5
S
5116 'AP': '182.50.184.0/21',
5117 'AQ': '23.154.160.0/24',
773f291d
S
5118 'AR': '181.0.0.0/12',
5119 'AS': '202.70.112.0/20',
53896ca5 5120 'AT': '77.116.0.0/14',
773f291d
S
5121 'AU': '1.128.0.0/11',
5122 'AW': '181.41.0.0/18',
53896ca5
S
5123 'AX': '185.217.4.0/22',
5124 'AZ': '5.197.0.0/16',
773f291d
S
5125 'BA': '31.176.128.0/17',
5126 'BB': '65.48.128.0/17',
5127 'BD': '114.130.0.0/16',
5128 'BE': '57.0.0.0/8',
53896ca5 5129 'BF': '102.178.0.0/15',
773f291d
S
5130 'BG': '95.42.0.0/15',
5131 'BH': '37.131.0.0/17',
5132 'BI': '154.117.192.0/18',
5133 'BJ': '137.255.0.0/16',
53896ca5 5134 'BL': '185.212.72.0/23',
773f291d
S
5135 'BM': '196.12.64.0/18',
5136 'BN': '156.31.0.0/16',
5137 'BO': '161.56.0.0/16',
5138 'BQ': '161.0.80.0/20',
53896ca5 5139 'BR': '191.128.0.0/12',
773f291d
S
5140 'BS': '24.51.64.0/18',
5141 'BT': '119.2.96.0/19',
5142 'BW': '168.167.0.0/16',
5143 'BY': '178.120.0.0/13',
5144 'BZ': '179.42.192.0/18',
5145 'CA': '99.224.0.0/11',
5146 'CD': '41.243.0.0/16',
53896ca5
S
5147 'CF': '197.242.176.0/21',
5148 'CG': '160.113.0.0/16',
773f291d 5149 'CH': '85.0.0.0/13',
53896ca5 5150 'CI': '102.136.0.0/14',
773f291d
S
5151 'CK': '202.65.32.0/19',
5152 'CL': '152.172.0.0/14',
53896ca5 5153 'CM': '102.244.0.0/14',
773f291d
S
5154 'CN': '36.128.0.0/10',
5155 'CO': '181.240.0.0/12',
5156 'CR': '201.192.0.0/12',
5157 'CU': '152.206.0.0/15',
5158 'CV': '165.90.96.0/19',
5159 'CW': '190.88.128.0/17',
53896ca5 5160 'CY': '31.153.0.0/16',
773f291d
S
5161 'CZ': '88.100.0.0/14',
5162 'DE': '53.0.0.0/8',
5163 'DJ': '197.241.0.0/17',
5164 'DK': '87.48.0.0/12',
5165 'DM': '192.243.48.0/20',
5166 'DO': '152.166.0.0/15',
5167 'DZ': '41.96.0.0/12',
5168 'EC': '186.68.0.0/15',
5169 'EE': '90.190.0.0/15',
5170 'EG': '156.160.0.0/11',
5171 'ER': '196.200.96.0/20',
5172 'ES': '88.0.0.0/11',
5173 'ET': '196.188.0.0/14',
5174 'EU': '2.16.0.0/13',
5175 'FI': '91.152.0.0/13',
5176 'FJ': '144.120.0.0/16',
53896ca5 5177 'FK': '80.73.208.0/21',
773f291d
S
5178 'FM': '119.252.112.0/20',
5179 'FO': '88.85.32.0/19',
5180 'FR': '90.0.0.0/9',
5181 'GA': '41.158.0.0/15',
5182 'GB': '25.0.0.0/8',
5183 'GD': '74.122.88.0/21',
5184 'GE': '31.146.0.0/16',
5185 'GF': '161.22.64.0/18',
5186 'GG': '62.68.160.0/19',
53896ca5
S
5187 'GH': '154.160.0.0/12',
5188 'GI': '95.164.0.0/16',
773f291d
S
5189 'GL': '88.83.0.0/19',
5190 'GM': '160.182.0.0/15',
5191 'GN': '197.149.192.0/18',
5192 'GP': '104.250.0.0/19',
5193 'GQ': '105.235.224.0/20',
5194 'GR': '94.64.0.0/13',
5195 'GT': '168.234.0.0/16',
5196 'GU': '168.123.0.0/16',
5197 'GW': '197.214.80.0/20',
5198 'GY': '181.41.64.0/18',
5199 'HK': '113.252.0.0/14',
5200 'HN': '181.210.0.0/16',
5201 'HR': '93.136.0.0/13',
5202 'HT': '148.102.128.0/17',
5203 'HU': '84.0.0.0/14',
5204 'ID': '39.192.0.0/10',
5205 'IE': '87.32.0.0/12',
5206 'IL': '79.176.0.0/13',
5207 'IM': '5.62.80.0/20',
5208 'IN': '117.192.0.0/10',
5209 'IO': '203.83.48.0/21',
5210 'IQ': '37.236.0.0/14',
5211 'IR': '2.176.0.0/12',
5212 'IS': '82.221.0.0/16',
5213 'IT': '79.0.0.0/10',
5214 'JE': '87.244.64.0/18',
5215 'JM': '72.27.0.0/17',
5216 'JO': '176.29.0.0/16',
53896ca5 5217 'JP': '133.0.0.0/8',
773f291d
S
5218 'KE': '105.48.0.0/12',
5219 'KG': '158.181.128.0/17',
5220 'KH': '36.37.128.0/17',
5221 'KI': '103.25.140.0/22',
5222 'KM': '197.255.224.0/20',
53896ca5 5223 'KN': '198.167.192.0/19',
773f291d
S
5224 'KP': '175.45.176.0/22',
5225 'KR': '175.192.0.0/10',
5226 'KW': '37.36.0.0/14',
5227 'KY': '64.96.0.0/15',
5228 'KZ': '2.72.0.0/13',
5229 'LA': '115.84.64.0/18',
5230 'LB': '178.135.0.0/16',
53896ca5 5231 'LC': '24.92.144.0/20',
773f291d
S
5232 'LI': '82.117.0.0/19',
5233 'LK': '112.134.0.0/15',
53896ca5 5234 'LR': '102.183.0.0/16',
773f291d
S
5235 'LS': '129.232.0.0/17',
5236 'LT': '78.56.0.0/13',
5237 'LU': '188.42.0.0/16',
5238 'LV': '46.109.0.0/16',
5239 'LY': '41.252.0.0/14',
5240 'MA': '105.128.0.0/11',
5241 'MC': '88.209.64.0/18',
5242 'MD': '37.246.0.0/16',
5243 'ME': '178.175.0.0/17',
5244 'MF': '74.112.232.0/21',
5245 'MG': '154.126.0.0/17',
5246 'MH': '117.103.88.0/21',
5247 'MK': '77.28.0.0/15',
5248 'ML': '154.118.128.0/18',
5249 'MM': '37.111.0.0/17',
5250 'MN': '49.0.128.0/17',
5251 'MO': '60.246.0.0/16',
5252 'MP': '202.88.64.0/20',
5253 'MQ': '109.203.224.0/19',
5254 'MR': '41.188.64.0/18',
5255 'MS': '208.90.112.0/22',
5256 'MT': '46.11.0.0/16',
5257 'MU': '105.16.0.0/12',
5258 'MV': '27.114.128.0/18',
53896ca5 5259 'MW': '102.70.0.0/15',
773f291d
S
5260 'MX': '187.192.0.0/11',
5261 'MY': '175.136.0.0/13',
5262 'MZ': '197.218.0.0/15',
5263 'NA': '41.182.0.0/16',
5264 'NC': '101.101.0.0/18',
5265 'NE': '197.214.0.0/18',
5266 'NF': '203.17.240.0/22',
5267 'NG': '105.112.0.0/12',
5268 'NI': '186.76.0.0/15',
5269 'NL': '145.96.0.0/11',
5270 'NO': '84.208.0.0/13',
5271 'NP': '36.252.0.0/15',
5272 'NR': '203.98.224.0/19',
5273 'NU': '49.156.48.0/22',
5274 'NZ': '49.224.0.0/14',
5275 'OM': '5.36.0.0/15',
5276 'PA': '186.72.0.0/15',
5277 'PE': '186.160.0.0/14',
5278 'PF': '123.50.64.0/18',
5279 'PG': '124.240.192.0/19',
5280 'PH': '49.144.0.0/13',
5281 'PK': '39.32.0.0/11',
5282 'PL': '83.0.0.0/11',
5283 'PM': '70.36.0.0/20',
5284 'PR': '66.50.0.0/16',
5285 'PS': '188.161.0.0/16',
5286 'PT': '85.240.0.0/13',
5287 'PW': '202.124.224.0/20',
5288 'PY': '181.120.0.0/14',
5289 'QA': '37.210.0.0/15',
53896ca5 5290 'RE': '102.35.0.0/16',
773f291d 5291 'RO': '79.112.0.0/13',
53896ca5 5292 'RS': '93.86.0.0/15',
773f291d 5293 'RU': '5.136.0.0/13',
53896ca5 5294 'RW': '41.186.0.0/16',
773f291d
S
5295 'SA': '188.48.0.0/13',
5296 'SB': '202.1.160.0/19',
5297 'SC': '154.192.0.0/11',
53896ca5 5298 'SD': '102.120.0.0/13',
773f291d 5299 'SE': '78.64.0.0/12',
53896ca5 5300 'SG': '8.128.0.0/10',
773f291d
S
5301 'SI': '188.196.0.0/14',
5302 'SK': '78.98.0.0/15',
53896ca5 5303 'SL': '102.143.0.0/17',
773f291d
S
5304 'SM': '89.186.32.0/19',
5305 'SN': '41.82.0.0/15',
53896ca5 5306 'SO': '154.115.192.0/18',
773f291d
S
5307 'SR': '186.179.128.0/17',
5308 'SS': '105.235.208.0/21',
5309 'ST': '197.159.160.0/19',
5310 'SV': '168.243.0.0/16',
5311 'SX': '190.102.0.0/20',
5312 'SY': '5.0.0.0/16',
5313 'SZ': '41.84.224.0/19',
5314 'TC': '65.255.48.0/20',
5315 'TD': '154.68.128.0/19',
5316 'TG': '196.168.0.0/14',
5317 'TH': '171.96.0.0/13',
5318 'TJ': '85.9.128.0/18',
5319 'TK': '27.96.24.0/21',
5320 'TL': '180.189.160.0/20',
5321 'TM': '95.85.96.0/19',
5322 'TN': '197.0.0.0/11',
5323 'TO': '175.176.144.0/21',
5324 'TR': '78.160.0.0/11',
5325 'TT': '186.44.0.0/15',
5326 'TV': '202.2.96.0/19',
5327 'TW': '120.96.0.0/11',
5328 'TZ': '156.156.0.0/14',
53896ca5
S
5329 'UA': '37.52.0.0/14',
5330 'UG': '102.80.0.0/13',
5331 'US': '6.0.0.0/8',
773f291d 5332 'UY': '167.56.0.0/13',
53896ca5 5333 'UZ': '84.54.64.0/18',
773f291d 5334 'VA': '212.77.0.0/19',
53896ca5 5335 'VC': '207.191.240.0/21',
773f291d 5336 'VE': '186.88.0.0/13',
53896ca5 5337 'VG': '66.81.192.0/20',
773f291d
S
5338 'VI': '146.226.0.0/16',
5339 'VN': '14.160.0.0/11',
5340 'VU': '202.80.32.0/20',
5341 'WF': '117.20.32.0/21',
5342 'WS': '202.4.32.0/19',
5343 'YE': '134.35.0.0/16',
5344 'YT': '41.242.116.0/22',
5345 'ZA': '41.0.0.0/11',
53896ca5
S
5346 'ZM': '102.144.0.0/13',
5347 'ZW': '102.177.192.0/18',
773f291d
S
5348 }
5349
5350 @classmethod
5f95927a
S
5351 def random_ipv4(cls, code_or_block):
5352 if len(code_or_block) == 2:
5353 block = cls._country_ip_map.get(code_or_block.upper())
5354 if not block:
5355 return None
5356 else:
5357 block = code_or_block
773f291d
S
5358 addr, preflen = block.split('/')
5359 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5360 addr_max = addr_min | (0xffffffff >> int(preflen))
18a0defa 5361 return compat_str(socket.inet_ntoa(
4248dad9 5362 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
773f291d
S
5363
5364
91410c9b 5365class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
5366 def __init__(self, proxies=None):
5367 # Set default handlers
5368 for type in ('http', 'https'):
5369 setattr(self, '%s_open' % type,
5370 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5371 meth(r, proxy, type))
38e87f6c 5372 compat_urllib_request.ProxyHandler.__init__(self, proxies)
2461f79d 5373
91410c9b 5374 def proxy_open(self, req, proxy, type):
2461f79d 5375 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
5376 if req_proxy is not None:
5377 proxy = req_proxy
2461f79d
PH
5378 del req.headers['Ytdl-request-proxy']
5379
5380 if proxy == '__noproxy__':
5381 return None # No Proxy
51fb4995 5382 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
71aff188 5383 req.add_header('Ytdl-socks-proxy', proxy)
cefecac1 5384 # youtube-dlc's http/https handlers do wrapping the socket with socks
71aff188 5385 return None
91410c9b
PH
5386 return compat_urllib_request.ProxyHandler.proxy_open(
5387 self, req, proxy, type)
5bc880b9
YCH
5388
5389
0a5445dd
YCH
5390# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5391# released into Public Domain
5392# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5393
5394def long_to_bytes(n, blocksize=0):
5395 """long_to_bytes(n:long, blocksize:int) : string
5396 Convert a long integer to a byte string.
5397
5398 If optional blocksize is given and greater than zero, pad the front of the
5399 byte string with binary zeros so that the length is a multiple of
5400 blocksize.
5401 """
5402 # after much testing, this algorithm was deemed to be the fastest
5403 s = b''
5404 n = int(n)
5405 while n > 0:
5406 s = compat_struct_pack('>I', n & 0xffffffff) + s
5407 n = n >> 32
5408 # strip off leading zeros
5409 for i in range(len(s)):
5410 if s[i] != b'\000'[0]:
5411 break
5412 else:
5413 # only happens when n == 0
5414 s = b'\000'
5415 i = 0
5416 s = s[i:]
5417 # add back some pad bytes. this could be done more efficiently w.r.t. the
5418 # de-padding being done above, but sigh...
5419 if blocksize > 0 and len(s) % blocksize:
5420 s = (blocksize - len(s) % blocksize) * b'\000' + s
5421 return s
5422
5423
5424def bytes_to_long(s):
5425 """bytes_to_long(string) : long
5426 Convert a byte string to a long integer.
5427
5428 This is (essentially) the inverse of long_to_bytes().
5429 """
5430 acc = 0
5431 length = len(s)
5432 if length % 4:
5433 extra = (4 - length % 4)
5434 s = b'\000' * extra + s
5435 length = length + extra
5436 for i in range(0, length, 4):
5437 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5438 return acc
5439
5440
5bc880b9
YCH
5441def ohdave_rsa_encrypt(data, exponent, modulus):
5442 '''
5443 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5444
5445 Input:
5446 data: data to encrypt, bytes-like object
5447 exponent, modulus: parameter e and N of RSA algorithm, both integer
5448 Output: hex string of encrypted data
5449
5450 Limitation: supports one block encryption only
5451 '''
5452
5453 payload = int(binascii.hexlify(data[::-1]), 16)
5454 encrypted = pow(payload, exponent, modulus)
5455 return '%x' % encrypted
81bdc8fd
YCH
5456
5457
f48409c7
YCH
5458def pkcs1pad(data, length):
5459 """
5460 Padding input data with PKCS#1 scheme
5461
5462 @param {int[]} data input data
5463 @param {int} length target length
5464 @returns {int[]} padded data
5465 """
5466 if len(data) > length - 11:
5467 raise ValueError('Input data too long for PKCS#1 padding')
5468
5469 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5470 return [0, 2] + pseudo_random + [0] + data
5471
5472
5eb6bdce 5473def encode_base_n(num, n, table=None):
59f898b7 5474 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
59f898b7
YCH
5475 if not table:
5476 table = FULL_TABLE[:n]
5477
5eb6bdce
YCH
5478 if n > len(table):
5479 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5480
5481 if num == 0:
5482 return table[0]
5483
81bdc8fd
YCH
5484 ret = ''
5485 while num:
5486 ret = table[num % n] + ret
5487 num = num // n
5488 return ret
f52354a8
YCH
5489
5490
5491def decode_packed_codes(code):
06b3fe29 5492 mobj = re.search(PACKED_CODES_RE, code)
a0566bbf 5493 obfuscated_code, base, count, symbols = mobj.groups()
f52354a8
YCH
5494 base = int(base)
5495 count = int(count)
5496 symbols = symbols.split('|')
5497 symbol_table = {}
5498
5499 while count:
5500 count -= 1
5eb6bdce 5501 base_n_count = encode_base_n(count, base)
f52354a8
YCH
5502 symbol_table[base_n_count] = symbols[count] or base_n_count
5503
5504 return re.sub(
5505 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
a0566bbf 5506 obfuscated_code)
e154c651 5507
5508
1ced2221
S
5509def caesar(s, alphabet, shift):
5510 if shift == 0:
5511 return s
5512 l = len(alphabet)
5513 return ''.join(
5514 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5515 for c in s)
5516
5517
5518def rot47(s):
5519 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5520
5521
e154c651 5522def parse_m3u8_attributes(attrib):
5523 info = {}
5524 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5525 if val.startswith('"'):
5526 val = val[1:-1]
5527 info[key] = val
5528 return info
1143535d
YCH
5529
5530
5531def urshift(val, n):
5532 return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038
YCH
5533
5534
5535# Based on png2str() written by @gdkchan and improved by @yokrysty
067aa17e 5536# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
d3f8e038
YCH
5537def decode_png(png_data):
5538 # Reference: https://www.w3.org/TR/PNG/
5539 header = png_data[8:]
5540
5541 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5542 raise IOError('Not a valid PNG file.')
5543
5544 int_map = {1: '>B', 2: '>H', 4: '>I'}
5545 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5546
5547 chunks = []
5548
5549 while header:
5550 length = unpack_integer(header[:4])
5551 header = header[4:]
5552
5553 chunk_type = header[:4]
5554 header = header[4:]
5555
5556 chunk_data = header[:length]
5557 header = header[length:]
5558
5559 header = header[4:] # Skip CRC
5560
5561 chunks.append({
5562 'type': chunk_type,
5563 'length': length,
5564 'data': chunk_data
5565 })
5566
5567 ihdr = chunks[0]['data']
5568
5569 width = unpack_integer(ihdr[:4])
5570 height = unpack_integer(ihdr[4:8])
5571
5572 idat = b''
5573
5574 for chunk in chunks:
5575 if chunk['type'] == b'IDAT':
5576 idat += chunk['data']
5577
5578 if not idat:
5579 raise IOError('Unable to read PNG data.')
5580
5581 decompressed_data = bytearray(zlib.decompress(idat))
5582
5583 stride = width * 3
5584 pixels = []
5585
5586 def _get_pixel(idx):
5587 x = idx % stride
5588 y = idx // stride
5589 return pixels[y][x]
5590
5591 for y in range(height):
5592 basePos = y * (1 + stride)
5593 filter_type = decompressed_data[basePos]
5594
5595 current_row = []
5596
5597 pixels.append(current_row)
5598
5599 for x in range(stride):
5600 color = decompressed_data[1 + basePos + x]
5601 basex = y * stride + x
5602 left = 0
5603 up = 0
5604
5605 if x > 2:
5606 left = _get_pixel(basex - 3)
5607 if y > 0:
5608 up = _get_pixel(basex - stride)
5609
5610 if filter_type == 1: # Sub
5611 color = (color + left) & 0xff
5612 elif filter_type == 2: # Up
5613 color = (color + up) & 0xff
5614 elif filter_type == 3: # Average
5615 color = (color + ((left + up) >> 1)) & 0xff
5616 elif filter_type == 4: # Paeth
5617 a = left
5618 b = up
5619 c = 0
5620
5621 if x > 2 and y > 0:
5622 c = _get_pixel(basex - stride - 3)
5623
5624 p = a + b - c
5625
5626 pa = abs(p - a)
5627 pb = abs(p - b)
5628 pc = abs(p - c)
5629
5630 if pa <= pb and pa <= pc:
5631 color = (color + a) & 0xff
5632 elif pb <= pc:
5633 color = (color + b) & 0xff
5634 else:
5635 color = (color + c) & 0xff
5636
5637 current_row.append(color)
5638
5639 return width, height, pixels
efa97bdc
YCH
5640
5641
5642def write_xattr(path, key, value):
5643 # This mess below finds the best xattr tool for the job
5644 try:
5645 # try the pyxattr module...
5646 import xattr
5647
53a7e3d2
YCH
5648 if hasattr(xattr, 'set'): # pyxattr
5649 # Unicode arguments are not supported in python-pyxattr until
5650 # version 0.5.0
067aa17e 5651 # See https://github.com/ytdl-org/youtube-dl/issues/5498
53a7e3d2
YCH
5652 pyxattr_required_version = '0.5.0'
5653 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5654 # TODO: fallback to CLI tools
5655 raise XAttrUnavailableError(
5656 'python-pyxattr is detected but is too old. '
cefecac1 5657 'youtube-dlc requires %s or above while your version is %s. '
53a7e3d2
YCH
5658 'Falling back to other xattr implementations' % (
5659 pyxattr_required_version, xattr.__version__))
5660
5661 setxattr = xattr.set
5662 else: # xattr
5663 setxattr = xattr.setxattr
efa97bdc
YCH
5664
5665 try:
53a7e3d2 5666 setxattr(path, key, value)
efa97bdc
YCH
5667 except EnvironmentError as e:
5668 raise XAttrMetadataError(e.errno, e.strerror)
5669
5670 except ImportError:
5671 if compat_os_name == 'nt':
5672 # Write xattrs to NTFS Alternate Data Streams:
5673 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5674 assert ':' not in key
5675 assert os.path.exists(path)
5676
5677 ads_fn = path + ':' + key
5678 try:
5679 with open(ads_fn, 'wb') as f:
5680 f.write(value)
5681 except EnvironmentError as e:
5682 raise XAttrMetadataError(e.errno, e.strerror)
5683 else:
5684 user_has_setfattr = check_executable('setfattr', ['--version'])
5685 user_has_xattr = check_executable('xattr', ['-h'])
5686
5687 if user_has_setfattr or user_has_xattr:
5688
5689 value = value.decode('utf-8')
5690 if user_has_setfattr:
5691 executable = 'setfattr'
5692 opts = ['-n', key, '-v', value]
5693 elif user_has_xattr:
5694 executable = 'xattr'
5695 opts = ['-w', key, value]
5696
3089bc74
S
5697 cmd = ([encodeFilename(executable, True)]
5698 + [encodeArgument(o) for o in opts]
5699 + [encodeFilename(path, True)])
efa97bdc
YCH
5700
5701 try:
5702 p = subprocess.Popen(
5703 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5704 except EnvironmentError as e:
5705 raise XAttrMetadataError(e.errno, e.strerror)
5706 stdout, stderr = p.communicate()
5707 stderr = stderr.decode('utf-8', 'replace')
5708 if p.returncode != 0:
5709 raise XAttrMetadataError(p.returncode, stderr)
5710
5711 else:
5712 # On Unix, and can't find pyxattr, setfattr, or xattr.
5713 if sys.platform.startswith('linux'):
5714 raise XAttrUnavailableError(
5715 "Couldn't find a tool to set the xattrs. "
5716 "Install either the python 'pyxattr' or 'xattr' "
5717 "modules, or the GNU 'attr' package "
5718 "(which contains the 'setfattr' tool).")
5719 else:
5720 raise XAttrUnavailableError(
5721 "Couldn't find a tool to set the xattrs. "
5722 "Install either the python 'xattr' module, "
5723 "or the 'xattr' binary.")
0c265486
YCH
5724
5725
5726def random_birthday(year_field, month_field, day_field):
aa374bc7
AS
5727 start_date = datetime.date(1950, 1, 1)
5728 end_date = datetime.date(1995, 12, 31)
5729 offset = random.randint(0, (end_date - start_date).days)
5730 random_date = start_date + datetime.timedelta(offset)
0c265486 5731 return {
aa374bc7
AS
5732 year_field: str(random_date.year),
5733 month_field: str(random_date.month),
5734 day_field: str(random_date.day),
0c265486 5735 }
732044af 5736
c76eb41b 5737
732044af 5738# Templates for internet shortcut files, which are plain text files.
5739DOT_URL_LINK_TEMPLATE = '''
5740[InternetShortcut]
5741URL=%(url)s
5742'''.lstrip()
5743
5744DOT_WEBLOC_LINK_TEMPLATE = '''
5745<?xml version="1.0" encoding="UTF-8"?>
5746<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5747<plist version="1.0">
5748<dict>
5749\t<key>URL</key>
5750\t<string>%(url)s</string>
5751</dict>
5752</plist>
5753'''.lstrip()
5754
5755DOT_DESKTOP_LINK_TEMPLATE = '''
5756[Desktop Entry]
5757Encoding=UTF-8
5758Name=%(filename)s
5759Type=Link
5760URL=%(url)s
5761Icon=text-html
5762'''.lstrip()
5763
5764
5765def iri_to_uri(iri):
5766 """
5767 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5768
5769 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5770 """
5771
5772 iri_parts = compat_urllib_parse_urlparse(iri)
5773
5774 if '[' in iri_parts.netloc:
5775 raise ValueError('IPv6 URIs are not, yet, supported.')
5776 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5777
5778 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5779
5780 net_location = ''
5781 if iri_parts.username:
5782 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
5783 if iri_parts.password is not None:
5784 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
5785 net_location += '@'
5786
5787 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
5788 # The 'idna' encoding produces ASCII text.
5789 if iri_parts.port is not None and iri_parts.port != 80:
5790 net_location += ':' + str(iri_parts.port)
5791
5792 return compat_urllib_parse_urlunparse(
5793 (iri_parts.scheme,
5794 net_location,
5795
5796 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5797
5798 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5799 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5800
5801 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5802 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5803
5804 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5805
5806 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5807
5808
5809def to_high_limit_path(path):
5810 if sys.platform in ['win32', 'cygwin']:
5811 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5812 return r'\\?\ '.rstrip() + os.path.abspath(path)
5813
5814 return path
76d321f6 5815
c76eb41b 5816
76d321f6 5817def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
5818 val = obj.get(field, default)
5819 if func and val not in ignore:
5820 val = func(val)
5821 return template % val if val not in ignore else default