]> jfr.im git - yt-dlp.git/blame - youtube_dlc/utils.py
Readthedocs support (#107)
[yt-dlp.git] / youtube_dlc / utils.py
CommitLineData
d77c3dfd 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
d77c3dfd 3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
c380cc28 10import collections
62e609ab 11import contextlib
e3946f98 12import ctypes
c496ca96
PH
13import datetime
14import email.utils
0c265486 15import email.header
f45c185f 16import errno
be4a824d 17import functools
d77c3dfd 18import gzip
f74980cb 19import imp
03f9daab 20import io
79a2e94e 21import itertools
f4bfd65f 22import json
d77c3dfd 23import locale
02dbf93f 24import math
347de493 25import operator
d77c3dfd 26import os
c496ca96 27import platform
773f291d 28import random
d77c3dfd 29import re
c496ca96 30import socket
79a2e94e 31import ssl
1c088fa8 32import subprocess
d77c3dfd 33import sys
181c8655 34import tempfile
c380cc28 35import time
01951dda 36import traceback
bcf89ce6 37import xml.etree.ElementTree
d77c3dfd 38import zlib
d77c3dfd 39
8c25f81b 40from .compat import (
b4a3d461 41 compat_HTMLParseError,
8bb56eee 42 compat_HTMLParser,
8f9312c3 43 compat_basestring,
8c25f81b 44 compat_chr,
1bab3437 45 compat_cookiejar,
d7cd9a9e 46 compat_ctypes_WINFUNCTYPE,
36e6f62c 47 compat_etree_fromstring,
51098426 48 compat_expanduser,
8c25f81b 49 compat_html_entities,
55b2f099 50 compat_html_entities_html5,
be4a824d 51 compat_http_client,
42db58ec 52 compat_integer_types,
e29663c6 53 compat_numeric_types,
c86b6142 54 compat_kwargs,
efa97bdc 55 compat_os_name,
8c25f81b 56 compat_parse_qs,
702ccf2d 57 compat_shlex_quote,
8c25f81b 58 compat_str,
edaa23f8 59 compat_struct_pack,
d3f8e038 60 compat_struct_unpack,
8c25f81b
PH
61 compat_urllib_error,
62 compat_urllib_parse,
15707c7e 63 compat_urllib_parse_urlencode,
8c25f81b 64 compat_urllib_parse_urlparse,
732044af 65 compat_urllib_parse_urlunparse,
66 compat_urllib_parse_quote,
67 compat_urllib_parse_quote_plus,
7581bfc9 68 compat_urllib_parse_unquote_plus,
8c25f81b
PH
69 compat_urllib_request,
70 compat_urlparse,
810c10ba 71 compat_xpath,
8c25f81b 72)
4644ac55 73
71aff188
YCH
74from .socks import (
75 ProxyType,
76 sockssocket,
77)
78
4644ac55 79
51fb4995
YCH
80def register_socks_protocols():
81 # "Register" SOCKS protocols
d5ae6bb5
YCH
82 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
83 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
51fb4995
YCH
84 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
85 if scheme not in compat_urlparse.uses_netloc:
86 compat_urlparse.uses_netloc.append(scheme)
87
88
468e2e92
FV
89# This is not clearly defined otherwise
90compiled_regex_type = type(re.compile(''))
91
f7a147e3
S
92
93def random_user_agent():
94 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
95 _CHROME_VERSIONS = (
96 '74.0.3729.129',
97 '76.0.3780.3',
98 '76.0.3780.2',
99 '74.0.3729.128',
100 '76.0.3780.1',
101 '76.0.3780.0',
102 '75.0.3770.15',
103 '74.0.3729.127',
104 '74.0.3729.126',
105 '76.0.3779.1',
106 '76.0.3779.0',
107 '75.0.3770.14',
108 '74.0.3729.125',
109 '76.0.3778.1',
110 '76.0.3778.0',
111 '75.0.3770.13',
112 '74.0.3729.124',
113 '74.0.3729.123',
114 '73.0.3683.121',
115 '76.0.3777.1',
116 '76.0.3777.0',
117 '75.0.3770.12',
118 '74.0.3729.122',
119 '76.0.3776.4',
120 '75.0.3770.11',
121 '74.0.3729.121',
122 '76.0.3776.3',
123 '76.0.3776.2',
124 '73.0.3683.120',
125 '74.0.3729.120',
126 '74.0.3729.119',
127 '74.0.3729.118',
128 '76.0.3776.1',
129 '76.0.3776.0',
130 '76.0.3775.5',
131 '75.0.3770.10',
132 '74.0.3729.117',
133 '76.0.3775.4',
134 '76.0.3775.3',
135 '74.0.3729.116',
136 '75.0.3770.9',
137 '76.0.3775.2',
138 '76.0.3775.1',
139 '76.0.3775.0',
140 '75.0.3770.8',
141 '74.0.3729.115',
142 '74.0.3729.114',
143 '76.0.3774.1',
144 '76.0.3774.0',
145 '75.0.3770.7',
146 '74.0.3729.113',
147 '74.0.3729.112',
148 '74.0.3729.111',
149 '76.0.3773.1',
150 '76.0.3773.0',
151 '75.0.3770.6',
152 '74.0.3729.110',
153 '74.0.3729.109',
154 '76.0.3772.1',
155 '76.0.3772.0',
156 '75.0.3770.5',
157 '74.0.3729.108',
158 '74.0.3729.107',
159 '76.0.3771.1',
160 '76.0.3771.0',
161 '75.0.3770.4',
162 '74.0.3729.106',
163 '74.0.3729.105',
164 '75.0.3770.3',
165 '74.0.3729.104',
166 '74.0.3729.103',
167 '74.0.3729.102',
168 '75.0.3770.2',
169 '74.0.3729.101',
170 '75.0.3770.1',
171 '75.0.3770.0',
172 '74.0.3729.100',
173 '75.0.3769.5',
174 '75.0.3769.4',
175 '74.0.3729.99',
176 '75.0.3769.3',
177 '75.0.3769.2',
178 '75.0.3768.6',
179 '74.0.3729.98',
180 '75.0.3769.1',
181 '75.0.3769.0',
182 '74.0.3729.97',
183 '73.0.3683.119',
184 '73.0.3683.118',
185 '74.0.3729.96',
186 '75.0.3768.5',
187 '75.0.3768.4',
188 '75.0.3768.3',
189 '75.0.3768.2',
190 '74.0.3729.95',
191 '74.0.3729.94',
192 '75.0.3768.1',
193 '75.0.3768.0',
194 '74.0.3729.93',
195 '74.0.3729.92',
196 '73.0.3683.117',
197 '74.0.3729.91',
198 '75.0.3766.3',
199 '74.0.3729.90',
200 '75.0.3767.2',
201 '75.0.3767.1',
202 '75.0.3767.0',
203 '74.0.3729.89',
204 '73.0.3683.116',
205 '75.0.3766.2',
206 '74.0.3729.88',
207 '75.0.3766.1',
208 '75.0.3766.0',
209 '74.0.3729.87',
210 '73.0.3683.115',
211 '74.0.3729.86',
212 '75.0.3765.1',
213 '75.0.3765.0',
214 '74.0.3729.85',
215 '73.0.3683.114',
216 '74.0.3729.84',
217 '75.0.3764.1',
218 '75.0.3764.0',
219 '74.0.3729.83',
220 '73.0.3683.113',
221 '75.0.3763.2',
222 '75.0.3761.4',
223 '74.0.3729.82',
224 '75.0.3763.1',
225 '75.0.3763.0',
226 '74.0.3729.81',
227 '73.0.3683.112',
228 '75.0.3762.1',
229 '75.0.3762.0',
230 '74.0.3729.80',
231 '75.0.3761.3',
232 '74.0.3729.79',
233 '73.0.3683.111',
234 '75.0.3761.2',
235 '74.0.3729.78',
236 '74.0.3729.77',
237 '75.0.3761.1',
238 '75.0.3761.0',
239 '73.0.3683.110',
240 '74.0.3729.76',
241 '74.0.3729.75',
242 '75.0.3760.0',
243 '74.0.3729.74',
244 '75.0.3759.8',
245 '75.0.3759.7',
246 '75.0.3759.6',
247 '74.0.3729.73',
248 '75.0.3759.5',
249 '74.0.3729.72',
250 '73.0.3683.109',
251 '75.0.3759.4',
252 '75.0.3759.3',
253 '74.0.3729.71',
254 '75.0.3759.2',
255 '74.0.3729.70',
256 '73.0.3683.108',
257 '74.0.3729.69',
258 '75.0.3759.1',
259 '75.0.3759.0',
260 '74.0.3729.68',
261 '73.0.3683.107',
262 '74.0.3729.67',
263 '75.0.3758.1',
264 '75.0.3758.0',
265 '74.0.3729.66',
266 '73.0.3683.106',
267 '74.0.3729.65',
268 '75.0.3757.1',
269 '75.0.3757.0',
270 '74.0.3729.64',
271 '73.0.3683.105',
272 '74.0.3729.63',
273 '75.0.3756.1',
274 '75.0.3756.0',
275 '74.0.3729.62',
276 '73.0.3683.104',
277 '75.0.3755.3',
278 '75.0.3755.2',
279 '73.0.3683.103',
280 '75.0.3755.1',
281 '75.0.3755.0',
282 '74.0.3729.61',
283 '73.0.3683.102',
284 '74.0.3729.60',
285 '75.0.3754.2',
286 '74.0.3729.59',
287 '75.0.3753.4',
288 '74.0.3729.58',
289 '75.0.3754.1',
290 '75.0.3754.0',
291 '74.0.3729.57',
292 '73.0.3683.101',
293 '75.0.3753.3',
294 '75.0.3752.2',
295 '75.0.3753.2',
296 '74.0.3729.56',
297 '75.0.3753.1',
298 '75.0.3753.0',
299 '74.0.3729.55',
300 '73.0.3683.100',
301 '74.0.3729.54',
302 '75.0.3752.1',
303 '75.0.3752.0',
304 '74.0.3729.53',
305 '73.0.3683.99',
306 '74.0.3729.52',
307 '75.0.3751.1',
308 '75.0.3751.0',
309 '74.0.3729.51',
310 '73.0.3683.98',
311 '74.0.3729.50',
312 '75.0.3750.0',
313 '74.0.3729.49',
314 '74.0.3729.48',
315 '74.0.3729.47',
316 '75.0.3749.3',
317 '74.0.3729.46',
318 '73.0.3683.97',
319 '75.0.3749.2',
320 '74.0.3729.45',
321 '75.0.3749.1',
322 '75.0.3749.0',
323 '74.0.3729.44',
324 '73.0.3683.96',
325 '74.0.3729.43',
326 '74.0.3729.42',
327 '75.0.3748.1',
328 '75.0.3748.0',
329 '74.0.3729.41',
330 '75.0.3747.1',
331 '73.0.3683.95',
332 '75.0.3746.4',
333 '74.0.3729.40',
334 '74.0.3729.39',
335 '75.0.3747.0',
336 '75.0.3746.3',
337 '75.0.3746.2',
338 '74.0.3729.38',
339 '75.0.3746.1',
340 '75.0.3746.0',
341 '74.0.3729.37',
342 '73.0.3683.94',
343 '75.0.3745.5',
344 '75.0.3745.4',
345 '75.0.3745.3',
346 '75.0.3745.2',
347 '74.0.3729.36',
348 '75.0.3745.1',
349 '75.0.3745.0',
350 '75.0.3744.2',
351 '74.0.3729.35',
352 '73.0.3683.93',
353 '74.0.3729.34',
354 '75.0.3744.1',
355 '75.0.3744.0',
356 '74.0.3729.33',
357 '73.0.3683.92',
358 '74.0.3729.32',
359 '74.0.3729.31',
360 '73.0.3683.91',
361 '75.0.3741.2',
362 '75.0.3740.5',
363 '74.0.3729.30',
364 '75.0.3741.1',
365 '75.0.3741.0',
366 '74.0.3729.29',
367 '75.0.3740.4',
368 '73.0.3683.90',
369 '74.0.3729.28',
370 '75.0.3740.3',
371 '73.0.3683.89',
372 '75.0.3740.2',
373 '74.0.3729.27',
374 '75.0.3740.1',
375 '75.0.3740.0',
376 '74.0.3729.26',
377 '73.0.3683.88',
378 '73.0.3683.87',
379 '74.0.3729.25',
380 '75.0.3739.1',
381 '75.0.3739.0',
382 '73.0.3683.86',
383 '74.0.3729.24',
384 '73.0.3683.85',
385 '75.0.3738.4',
386 '75.0.3738.3',
387 '75.0.3738.2',
388 '75.0.3738.1',
389 '75.0.3738.0',
390 '74.0.3729.23',
391 '73.0.3683.84',
392 '74.0.3729.22',
393 '74.0.3729.21',
394 '75.0.3737.1',
395 '75.0.3737.0',
396 '74.0.3729.20',
397 '73.0.3683.83',
398 '74.0.3729.19',
399 '75.0.3736.1',
400 '75.0.3736.0',
401 '74.0.3729.18',
402 '73.0.3683.82',
403 '74.0.3729.17',
404 '75.0.3735.1',
405 '75.0.3735.0',
406 '74.0.3729.16',
407 '73.0.3683.81',
408 '75.0.3734.1',
409 '75.0.3734.0',
410 '74.0.3729.15',
411 '73.0.3683.80',
412 '74.0.3729.14',
413 '75.0.3733.1',
414 '75.0.3733.0',
415 '75.0.3732.1',
416 '74.0.3729.13',
417 '74.0.3729.12',
418 '73.0.3683.79',
419 '74.0.3729.11',
420 '75.0.3732.0',
421 '74.0.3729.10',
422 '73.0.3683.78',
423 '74.0.3729.9',
424 '74.0.3729.8',
425 '74.0.3729.7',
426 '75.0.3731.3',
427 '75.0.3731.2',
428 '75.0.3731.0',
429 '74.0.3729.6',
430 '73.0.3683.77',
431 '73.0.3683.76',
432 '75.0.3730.5',
433 '75.0.3730.4',
434 '73.0.3683.75',
435 '74.0.3729.5',
436 '73.0.3683.74',
437 '75.0.3730.3',
438 '75.0.3730.2',
439 '74.0.3729.4',
440 '73.0.3683.73',
441 '73.0.3683.72',
442 '75.0.3730.1',
443 '75.0.3730.0',
444 '74.0.3729.3',
445 '73.0.3683.71',
446 '74.0.3729.2',
447 '73.0.3683.70',
448 '74.0.3729.1',
449 '74.0.3729.0',
450 '74.0.3726.4',
451 '73.0.3683.69',
452 '74.0.3726.3',
453 '74.0.3728.0',
454 '74.0.3726.2',
455 '73.0.3683.68',
456 '74.0.3726.1',
457 '74.0.3726.0',
458 '74.0.3725.4',
459 '73.0.3683.67',
460 '73.0.3683.66',
461 '74.0.3725.3',
462 '74.0.3725.2',
463 '74.0.3725.1',
464 '74.0.3724.8',
465 '74.0.3725.0',
466 '73.0.3683.65',
467 '74.0.3724.7',
468 '74.0.3724.6',
469 '74.0.3724.5',
470 '74.0.3724.4',
471 '74.0.3724.3',
472 '74.0.3724.2',
473 '74.0.3724.1',
474 '74.0.3724.0',
475 '73.0.3683.64',
476 '74.0.3723.1',
477 '74.0.3723.0',
478 '73.0.3683.63',
479 '74.0.3722.1',
480 '74.0.3722.0',
481 '73.0.3683.62',
482 '74.0.3718.9',
483 '74.0.3702.3',
484 '74.0.3721.3',
485 '74.0.3721.2',
486 '74.0.3721.1',
487 '74.0.3721.0',
488 '74.0.3720.6',
489 '73.0.3683.61',
490 '72.0.3626.122',
491 '73.0.3683.60',
492 '74.0.3720.5',
493 '72.0.3626.121',
494 '74.0.3718.8',
495 '74.0.3720.4',
496 '74.0.3720.3',
497 '74.0.3718.7',
498 '74.0.3720.2',
499 '74.0.3720.1',
500 '74.0.3720.0',
501 '74.0.3718.6',
502 '74.0.3719.5',
503 '73.0.3683.59',
504 '74.0.3718.5',
505 '74.0.3718.4',
506 '74.0.3719.4',
507 '74.0.3719.3',
508 '74.0.3719.2',
509 '74.0.3719.1',
510 '73.0.3683.58',
511 '74.0.3719.0',
512 '73.0.3683.57',
513 '73.0.3683.56',
514 '74.0.3718.3',
515 '73.0.3683.55',
516 '74.0.3718.2',
517 '74.0.3718.1',
518 '74.0.3718.0',
519 '73.0.3683.54',
520 '74.0.3717.2',
521 '73.0.3683.53',
522 '74.0.3717.1',
523 '74.0.3717.0',
524 '73.0.3683.52',
525 '74.0.3716.1',
526 '74.0.3716.0',
527 '73.0.3683.51',
528 '74.0.3715.1',
529 '74.0.3715.0',
530 '73.0.3683.50',
531 '74.0.3711.2',
532 '74.0.3714.2',
533 '74.0.3713.3',
534 '74.0.3714.1',
535 '74.0.3714.0',
536 '73.0.3683.49',
537 '74.0.3713.1',
538 '74.0.3713.0',
539 '72.0.3626.120',
540 '73.0.3683.48',
541 '74.0.3712.2',
542 '74.0.3712.1',
543 '74.0.3712.0',
544 '73.0.3683.47',
545 '72.0.3626.119',
546 '73.0.3683.46',
547 '74.0.3710.2',
548 '72.0.3626.118',
549 '74.0.3711.1',
550 '74.0.3711.0',
551 '73.0.3683.45',
552 '72.0.3626.117',
553 '74.0.3710.1',
554 '74.0.3710.0',
555 '73.0.3683.44',
556 '72.0.3626.116',
557 '74.0.3709.1',
558 '74.0.3709.0',
559 '74.0.3704.9',
560 '73.0.3683.43',
561 '72.0.3626.115',
562 '74.0.3704.8',
563 '74.0.3704.7',
564 '74.0.3708.0',
565 '74.0.3706.7',
566 '74.0.3704.6',
567 '73.0.3683.42',
568 '72.0.3626.114',
569 '74.0.3706.6',
570 '72.0.3626.113',
571 '74.0.3704.5',
572 '74.0.3706.5',
573 '74.0.3706.4',
574 '74.0.3706.3',
575 '74.0.3706.2',
576 '74.0.3706.1',
577 '74.0.3706.0',
578 '73.0.3683.41',
579 '72.0.3626.112',
580 '74.0.3705.1',
581 '74.0.3705.0',
582 '73.0.3683.40',
583 '72.0.3626.111',
584 '73.0.3683.39',
585 '74.0.3704.4',
586 '73.0.3683.38',
587 '74.0.3704.3',
588 '74.0.3704.2',
589 '74.0.3704.1',
590 '74.0.3704.0',
591 '73.0.3683.37',
592 '72.0.3626.110',
593 '72.0.3626.109',
594 '74.0.3703.3',
595 '74.0.3703.2',
596 '73.0.3683.36',
597 '74.0.3703.1',
598 '74.0.3703.0',
599 '73.0.3683.35',
600 '72.0.3626.108',
601 '74.0.3702.2',
602 '74.0.3699.3',
603 '74.0.3702.1',
604 '74.0.3702.0',
605 '73.0.3683.34',
606 '72.0.3626.107',
607 '73.0.3683.33',
608 '74.0.3701.1',
609 '74.0.3701.0',
610 '73.0.3683.32',
611 '73.0.3683.31',
612 '72.0.3626.105',
613 '74.0.3700.1',
614 '74.0.3700.0',
615 '73.0.3683.29',
616 '72.0.3626.103',
617 '74.0.3699.2',
618 '74.0.3699.1',
619 '74.0.3699.0',
620 '73.0.3683.28',
621 '72.0.3626.102',
622 '73.0.3683.27',
623 '73.0.3683.26',
624 '74.0.3698.0',
625 '74.0.3696.2',
626 '72.0.3626.101',
627 '73.0.3683.25',
628 '74.0.3696.1',
629 '74.0.3696.0',
630 '74.0.3694.8',
631 '72.0.3626.100',
632 '74.0.3694.7',
633 '74.0.3694.6',
634 '74.0.3694.5',
635 '74.0.3694.4',
636 '72.0.3626.99',
637 '72.0.3626.98',
638 '74.0.3694.3',
639 '73.0.3683.24',
640 '72.0.3626.97',
641 '72.0.3626.96',
642 '72.0.3626.95',
643 '73.0.3683.23',
644 '72.0.3626.94',
645 '73.0.3683.22',
646 '73.0.3683.21',
647 '72.0.3626.93',
648 '74.0.3694.2',
649 '72.0.3626.92',
650 '74.0.3694.1',
651 '74.0.3694.0',
652 '74.0.3693.6',
653 '73.0.3683.20',
654 '72.0.3626.91',
655 '74.0.3693.5',
656 '74.0.3693.4',
657 '74.0.3693.3',
658 '74.0.3693.2',
659 '73.0.3683.19',
660 '74.0.3693.1',
661 '74.0.3693.0',
662 '73.0.3683.18',
663 '72.0.3626.90',
664 '74.0.3692.1',
665 '74.0.3692.0',
666 '73.0.3683.17',
667 '72.0.3626.89',
668 '74.0.3687.3',
669 '74.0.3691.1',
670 '74.0.3691.0',
671 '73.0.3683.16',
672 '72.0.3626.88',
673 '72.0.3626.87',
674 '73.0.3683.15',
675 '74.0.3690.1',
676 '74.0.3690.0',
677 '73.0.3683.14',
678 '72.0.3626.86',
679 '73.0.3683.13',
680 '73.0.3683.12',
681 '74.0.3689.1',
682 '74.0.3689.0',
683 '73.0.3683.11',
684 '72.0.3626.85',
685 '73.0.3683.10',
686 '72.0.3626.84',
687 '73.0.3683.9',
688 '74.0.3688.1',
689 '74.0.3688.0',
690 '73.0.3683.8',
691 '72.0.3626.83',
692 '74.0.3687.2',
693 '74.0.3687.1',
694 '74.0.3687.0',
695 '73.0.3683.7',
696 '72.0.3626.82',
697 '74.0.3686.4',
698 '72.0.3626.81',
699 '74.0.3686.3',
700 '74.0.3686.2',
701 '74.0.3686.1',
702 '74.0.3686.0',
703 '73.0.3683.6',
704 '72.0.3626.80',
705 '74.0.3685.1',
706 '74.0.3685.0',
707 '73.0.3683.5',
708 '72.0.3626.79',
709 '74.0.3684.1',
710 '74.0.3684.0',
711 '73.0.3683.4',
712 '72.0.3626.78',
713 '72.0.3626.77',
714 '73.0.3683.3',
715 '73.0.3683.2',
716 '72.0.3626.76',
717 '73.0.3683.1',
718 '73.0.3683.0',
719 '72.0.3626.75',
720 '71.0.3578.141',
721 '73.0.3682.1',
722 '73.0.3682.0',
723 '72.0.3626.74',
724 '71.0.3578.140',
725 '73.0.3681.4',
726 '73.0.3681.3',
727 '73.0.3681.2',
728 '73.0.3681.1',
729 '73.0.3681.0',
730 '72.0.3626.73',
731 '71.0.3578.139',
732 '72.0.3626.72',
733 '72.0.3626.71',
734 '73.0.3680.1',
735 '73.0.3680.0',
736 '72.0.3626.70',
737 '71.0.3578.138',
738 '73.0.3678.2',
739 '73.0.3679.1',
740 '73.0.3679.0',
741 '72.0.3626.69',
742 '71.0.3578.137',
743 '73.0.3678.1',
744 '73.0.3678.0',
745 '71.0.3578.136',
746 '73.0.3677.1',
747 '73.0.3677.0',
748 '72.0.3626.68',
749 '72.0.3626.67',
750 '71.0.3578.135',
751 '73.0.3676.1',
752 '73.0.3676.0',
753 '73.0.3674.2',
754 '72.0.3626.66',
755 '71.0.3578.134',
756 '73.0.3674.1',
757 '73.0.3674.0',
758 '72.0.3626.65',
759 '71.0.3578.133',
760 '73.0.3673.2',
761 '73.0.3673.1',
762 '73.0.3673.0',
763 '72.0.3626.64',
764 '71.0.3578.132',
765 '72.0.3626.63',
766 '72.0.3626.62',
767 '72.0.3626.61',
768 '72.0.3626.60',
769 '73.0.3672.1',
770 '73.0.3672.0',
771 '72.0.3626.59',
772 '71.0.3578.131',
773 '73.0.3671.3',
774 '73.0.3671.2',
775 '73.0.3671.1',
776 '73.0.3671.0',
777 '72.0.3626.58',
778 '71.0.3578.130',
779 '73.0.3670.1',
780 '73.0.3670.0',
781 '72.0.3626.57',
782 '71.0.3578.129',
783 '73.0.3669.1',
784 '73.0.3669.0',
785 '72.0.3626.56',
786 '71.0.3578.128',
787 '73.0.3668.2',
788 '73.0.3668.1',
789 '73.0.3668.0',
790 '72.0.3626.55',
791 '71.0.3578.127',
792 '73.0.3667.2',
793 '73.0.3667.1',
794 '73.0.3667.0',
795 '72.0.3626.54',
796 '71.0.3578.126',
797 '73.0.3666.1',
798 '73.0.3666.0',
799 '72.0.3626.53',
800 '71.0.3578.125',
801 '73.0.3665.4',
802 '73.0.3665.3',
803 '72.0.3626.52',
804 '73.0.3665.2',
805 '73.0.3664.4',
806 '73.0.3665.1',
807 '73.0.3665.0',
808 '72.0.3626.51',
809 '71.0.3578.124',
810 '72.0.3626.50',
811 '73.0.3664.3',
812 '73.0.3664.2',
813 '73.0.3664.1',
814 '73.0.3664.0',
815 '73.0.3663.2',
816 '72.0.3626.49',
817 '71.0.3578.123',
818 '73.0.3663.1',
819 '73.0.3663.0',
820 '72.0.3626.48',
821 '71.0.3578.122',
822 '73.0.3662.1',
823 '73.0.3662.0',
824 '72.0.3626.47',
825 '71.0.3578.121',
826 '73.0.3661.1',
827 '72.0.3626.46',
828 '73.0.3661.0',
829 '72.0.3626.45',
830 '71.0.3578.120',
831 '73.0.3660.2',
832 '73.0.3660.1',
833 '73.0.3660.0',
834 '72.0.3626.44',
835 '71.0.3578.119',
836 '73.0.3659.1',
837 '73.0.3659.0',
838 '72.0.3626.43',
839 '71.0.3578.118',
840 '73.0.3658.1',
841 '73.0.3658.0',
842 '72.0.3626.42',
843 '71.0.3578.117',
844 '73.0.3657.1',
845 '73.0.3657.0',
846 '72.0.3626.41',
847 '71.0.3578.116',
848 '73.0.3656.1',
849 '73.0.3656.0',
850 '72.0.3626.40',
851 '71.0.3578.115',
852 '73.0.3655.1',
853 '73.0.3655.0',
854 '72.0.3626.39',
855 '71.0.3578.114',
856 '73.0.3654.1',
857 '73.0.3654.0',
858 '72.0.3626.38',
859 '71.0.3578.113',
860 '73.0.3653.1',
861 '73.0.3653.0',
862 '72.0.3626.37',
863 '71.0.3578.112',
864 '73.0.3652.1',
865 '73.0.3652.0',
866 '72.0.3626.36',
867 '71.0.3578.111',
868 '73.0.3651.1',
869 '73.0.3651.0',
870 '72.0.3626.35',
871 '71.0.3578.110',
872 '73.0.3650.1',
873 '73.0.3650.0',
874 '72.0.3626.34',
875 '71.0.3578.109',
876 '73.0.3649.1',
877 '73.0.3649.0',
878 '72.0.3626.33',
879 '71.0.3578.108',
880 '73.0.3648.2',
881 '73.0.3648.1',
882 '73.0.3648.0',
883 '72.0.3626.32',
884 '71.0.3578.107',
885 '73.0.3647.2',
886 '73.0.3647.1',
887 '73.0.3647.0',
888 '72.0.3626.31',
889 '71.0.3578.106',
890 '73.0.3635.3',
891 '73.0.3646.2',
892 '73.0.3646.1',
893 '73.0.3646.0',
894 '72.0.3626.30',
895 '71.0.3578.105',
896 '72.0.3626.29',
897 '73.0.3645.2',
898 '73.0.3645.1',
899 '73.0.3645.0',
900 '72.0.3626.28',
901 '71.0.3578.104',
902 '72.0.3626.27',
903 '72.0.3626.26',
904 '72.0.3626.25',
905 '72.0.3626.24',
906 '73.0.3644.0',
907 '73.0.3643.2',
908 '72.0.3626.23',
909 '71.0.3578.103',
910 '73.0.3643.1',
911 '73.0.3643.0',
912 '72.0.3626.22',
913 '71.0.3578.102',
914 '73.0.3642.1',
915 '73.0.3642.0',
916 '72.0.3626.21',
917 '71.0.3578.101',
918 '73.0.3641.1',
919 '73.0.3641.0',
920 '72.0.3626.20',
921 '71.0.3578.100',
922 '72.0.3626.19',
923 '73.0.3640.1',
924 '73.0.3640.0',
925 '72.0.3626.18',
926 '73.0.3639.1',
927 '71.0.3578.99',
928 '73.0.3639.0',
929 '72.0.3626.17',
930 '73.0.3638.2',
931 '72.0.3626.16',
932 '73.0.3638.1',
933 '73.0.3638.0',
934 '72.0.3626.15',
935 '71.0.3578.98',
936 '73.0.3635.2',
937 '71.0.3578.97',
938 '73.0.3637.1',
939 '73.0.3637.0',
940 '72.0.3626.14',
941 '71.0.3578.96',
942 '71.0.3578.95',
943 '72.0.3626.13',
944 '71.0.3578.94',
945 '73.0.3636.2',
946 '71.0.3578.93',
947 '73.0.3636.1',
948 '73.0.3636.0',
949 '72.0.3626.12',
950 '71.0.3578.92',
951 '73.0.3635.1',
952 '73.0.3635.0',
953 '72.0.3626.11',
954 '71.0.3578.91',
955 '73.0.3634.2',
956 '73.0.3634.1',
957 '73.0.3634.0',
958 '72.0.3626.10',
959 '71.0.3578.90',
960 '71.0.3578.89',
961 '73.0.3633.2',
962 '73.0.3633.1',
963 '73.0.3633.0',
964 '72.0.3610.4',
965 '72.0.3626.9',
966 '71.0.3578.88',
967 '73.0.3632.5',
968 '73.0.3632.4',
969 '73.0.3632.3',
970 '73.0.3632.2',
971 '73.0.3632.1',
972 '73.0.3632.0',
973 '72.0.3626.8',
974 '71.0.3578.87',
975 '73.0.3631.2',
976 '73.0.3631.1',
977 '73.0.3631.0',
978 '72.0.3626.7',
979 '71.0.3578.86',
980 '72.0.3626.6',
981 '73.0.3630.1',
982 '73.0.3630.0',
983 '72.0.3626.5',
984 '71.0.3578.85',
985 '72.0.3626.4',
986 '73.0.3628.3',
987 '73.0.3628.2',
988 '73.0.3629.1',
989 '73.0.3629.0',
990 '72.0.3626.3',
991 '71.0.3578.84',
992 '73.0.3628.1',
993 '73.0.3628.0',
994 '71.0.3578.83',
995 '73.0.3627.1',
996 '73.0.3627.0',
997 '72.0.3626.2',
998 '71.0.3578.82',
999 '71.0.3578.81',
1000 '71.0.3578.80',
1001 '72.0.3626.1',
1002 '72.0.3626.0',
1003 '71.0.3578.79',
1004 '70.0.3538.124',
1005 '71.0.3578.78',
1006 '72.0.3623.4',
1007 '72.0.3625.2',
1008 '72.0.3625.1',
1009 '72.0.3625.0',
1010 '71.0.3578.77',
1011 '70.0.3538.123',
1012 '72.0.3624.4',
1013 '72.0.3624.3',
1014 '72.0.3624.2',
1015 '71.0.3578.76',
1016 '72.0.3624.1',
1017 '72.0.3624.0',
1018 '72.0.3623.3',
1019 '71.0.3578.75',
1020 '70.0.3538.122',
1021 '71.0.3578.74',
1022 '72.0.3623.2',
1023 '72.0.3610.3',
1024 '72.0.3623.1',
1025 '72.0.3623.0',
1026 '72.0.3622.3',
1027 '72.0.3622.2',
1028 '71.0.3578.73',
1029 '70.0.3538.121',
1030 '72.0.3622.1',
1031 '72.0.3622.0',
1032 '71.0.3578.72',
1033 '70.0.3538.120',
1034 '72.0.3621.1',
1035 '72.0.3621.0',
1036 '71.0.3578.71',
1037 '70.0.3538.119',
1038 '72.0.3620.1',
1039 '72.0.3620.0',
1040 '71.0.3578.70',
1041 '70.0.3538.118',
1042 '71.0.3578.69',
1043 '72.0.3619.1',
1044 '72.0.3619.0',
1045 '71.0.3578.68',
1046 '70.0.3538.117',
1047 '71.0.3578.67',
1048 '72.0.3618.1',
1049 '72.0.3618.0',
1050 '71.0.3578.66',
1051 '70.0.3538.116',
1052 '72.0.3617.1',
1053 '72.0.3617.0',
1054 '71.0.3578.65',
1055 '70.0.3538.115',
1056 '72.0.3602.3',
1057 '71.0.3578.64',
1058 '72.0.3616.1',
1059 '72.0.3616.0',
1060 '71.0.3578.63',
1061 '70.0.3538.114',
1062 '71.0.3578.62',
1063 '72.0.3615.1',
1064 '72.0.3615.0',
1065 '71.0.3578.61',
1066 '70.0.3538.113',
1067 '72.0.3614.1',
1068 '72.0.3614.0',
1069 '71.0.3578.60',
1070 '70.0.3538.112',
1071 '72.0.3613.1',
1072 '72.0.3613.0',
1073 '71.0.3578.59',
1074 '70.0.3538.111',
1075 '72.0.3612.2',
1076 '72.0.3612.1',
1077 '72.0.3612.0',
1078 '70.0.3538.110',
1079 '71.0.3578.58',
1080 '70.0.3538.109',
1081 '72.0.3611.2',
1082 '72.0.3611.1',
1083 '72.0.3611.0',
1084 '71.0.3578.57',
1085 '70.0.3538.108',
1086 '72.0.3610.2',
1087 '71.0.3578.56',
1088 '71.0.3578.55',
1089 '72.0.3610.1',
1090 '72.0.3610.0',
1091 '71.0.3578.54',
1092 '70.0.3538.107',
1093 '71.0.3578.53',
1094 '72.0.3609.3',
1095 '71.0.3578.52',
1096 '72.0.3609.2',
1097 '71.0.3578.51',
1098 '72.0.3608.5',
1099 '72.0.3609.1',
1100 '72.0.3609.0',
1101 '71.0.3578.50',
1102 '70.0.3538.106',
1103 '72.0.3608.4',
1104 '72.0.3608.3',
1105 '72.0.3608.2',
1106 '71.0.3578.49',
1107 '72.0.3608.1',
1108 '72.0.3608.0',
1109 '70.0.3538.105',
1110 '71.0.3578.48',
1111 '72.0.3607.1',
1112 '72.0.3607.0',
1113 '71.0.3578.47',
1114 '70.0.3538.104',
1115 '72.0.3606.2',
1116 '72.0.3606.1',
1117 '72.0.3606.0',
1118 '71.0.3578.46',
1119 '70.0.3538.103',
1120 '70.0.3538.102',
1121 '72.0.3605.3',
1122 '72.0.3605.2',
1123 '72.0.3605.1',
1124 '72.0.3605.0',
1125 '71.0.3578.45',
1126 '70.0.3538.101',
1127 '71.0.3578.44',
1128 '71.0.3578.43',
1129 '70.0.3538.100',
1130 '70.0.3538.99',
1131 '71.0.3578.42',
1132 '72.0.3604.1',
1133 '72.0.3604.0',
1134 '71.0.3578.41',
1135 '70.0.3538.98',
1136 '71.0.3578.40',
1137 '72.0.3603.2',
1138 '72.0.3603.1',
1139 '72.0.3603.0',
1140 '71.0.3578.39',
1141 '70.0.3538.97',
1142 '72.0.3602.2',
1143 '71.0.3578.38',
1144 '71.0.3578.37',
1145 '72.0.3602.1',
1146 '72.0.3602.0',
1147 '71.0.3578.36',
1148 '70.0.3538.96',
1149 '72.0.3601.1',
1150 '72.0.3601.0',
1151 '71.0.3578.35',
1152 '70.0.3538.95',
1153 '72.0.3600.1',
1154 '72.0.3600.0',
1155 '71.0.3578.34',
1156 '70.0.3538.94',
1157 '72.0.3599.3',
1158 '72.0.3599.2',
1159 '72.0.3599.1',
1160 '72.0.3599.0',
1161 '71.0.3578.33',
1162 '70.0.3538.93',
1163 '72.0.3598.1',
1164 '72.0.3598.0',
1165 '71.0.3578.32',
1166 '70.0.3538.87',
1167 '72.0.3597.1',
1168 '72.0.3597.0',
1169 '72.0.3596.2',
1170 '71.0.3578.31',
1171 '70.0.3538.86',
1172 '71.0.3578.30',
1173 '71.0.3578.29',
1174 '72.0.3596.1',
1175 '72.0.3596.0',
1176 '71.0.3578.28',
1177 '70.0.3538.85',
1178 '72.0.3595.2',
1179 '72.0.3591.3',
1180 '72.0.3595.1',
1181 '72.0.3595.0',
1182 '71.0.3578.27',
1183 '70.0.3538.84',
1184 '72.0.3594.1',
1185 '72.0.3594.0',
1186 '71.0.3578.26',
1187 '70.0.3538.83',
1188 '72.0.3593.2',
1189 '72.0.3593.1',
1190 '72.0.3593.0',
1191 '71.0.3578.25',
1192 '70.0.3538.82',
1193 '72.0.3589.3',
1194 '72.0.3592.2',
1195 '72.0.3592.1',
1196 '72.0.3592.0',
1197 '71.0.3578.24',
1198 '72.0.3589.2',
1199 '70.0.3538.81',
1200 '70.0.3538.80',
1201 '72.0.3591.2',
1202 '72.0.3591.1',
1203 '72.0.3591.0',
1204 '71.0.3578.23',
1205 '70.0.3538.79',
1206 '71.0.3578.22',
1207 '72.0.3590.1',
1208 '72.0.3590.0',
1209 '71.0.3578.21',
1210 '70.0.3538.78',
1211 '70.0.3538.77',
1212 '72.0.3589.1',
1213 '72.0.3589.0',
1214 '71.0.3578.20',
1215 '70.0.3538.76',
1216 '71.0.3578.19',
1217 '70.0.3538.75',
1218 '72.0.3588.1',
1219 '72.0.3588.0',
1220 '71.0.3578.18',
1221 '70.0.3538.74',
1222 '72.0.3586.2',
1223 '72.0.3587.0',
1224 '71.0.3578.17',
1225 '70.0.3538.73',
1226 '72.0.3586.1',
1227 '72.0.3586.0',
1228 '71.0.3578.16',
1229 '70.0.3538.72',
1230 '72.0.3585.1',
1231 '72.0.3585.0',
1232 '71.0.3578.15',
1233 '70.0.3538.71',
1234 '71.0.3578.14',
1235 '72.0.3584.1',
1236 '72.0.3584.0',
1237 '71.0.3578.13',
1238 '70.0.3538.70',
1239 '72.0.3583.2',
1240 '71.0.3578.12',
1241 '72.0.3583.1',
1242 '72.0.3583.0',
1243 '71.0.3578.11',
1244 '70.0.3538.69',
1245 '71.0.3578.10',
1246 '72.0.3582.0',
1247 '72.0.3581.4',
1248 '71.0.3578.9',
1249 '70.0.3538.67',
1250 '72.0.3581.3',
1251 '72.0.3581.2',
1252 '72.0.3581.1',
1253 '72.0.3581.0',
1254 '71.0.3578.8',
1255 '70.0.3538.66',
1256 '72.0.3580.1',
1257 '72.0.3580.0',
1258 '71.0.3578.7',
1259 '70.0.3538.65',
1260 '71.0.3578.6',
1261 '72.0.3579.1',
1262 '72.0.3579.0',
1263 '71.0.3578.5',
1264 '70.0.3538.64',
1265 '71.0.3578.4',
1266 '71.0.3578.3',
1267 '71.0.3578.2',
1268 '71.0.3578.1',
1269 '71.0.3578.0',
1270 '70.0.3538.63',
1271 '69.0.3497.128',
1272 '70.0.3538.62',
1273 '70.0.3538.61',
1274 '70.0.3538.60',
1275 '70.0.3538.59',
1276 '71.0.3577.1',
1277 '71.0.3577.0',
1278 '70.0.3538.58',
1279 '69.0.3497.127',
1280 '71.0.3576.2',
1281 '71.0.3576.1',
1282 '71.0.3576.0',
1283 '70.0.3538.57',
1284 '70.0.3538.56',
1285 '71.0.3575.2',
1286 '70.0.3538.55',
1287 '69.0.3497.126',
1288 '70.0.3538.54',
1289 '71.0.3575.1',
1290 '71.0.3575.0',
1291 '71.0.3574.1',
1292 '71.0.3574.0',
1293 '70.0.3538.53',
1294 '69.0.3497.125',
1295 '70.0.3538.52',
1296 '71.0.3573.1',
1297 '71.0.3573.0',
1298 '70.0.3538.51',
1299 '69.0.3497.124',
1300 '71.0.3572.1',
1301 '71.0.3572.0',
1302 '70.0.3538.50',
1303 '69.0.3497.123',
1304 '71.0.3571.2',
1305 '70.0.3538.49',
1306 '69.0.3497.122',
1307 '71.0.3571.1',
1308 '71.0.3571.0',
1309 '70.0.3538.48',
1310 '69.0.3497.121',
1311 '71.0.3570.1',
1312 '71.0.3570.0',
1313 '70.0.3538.47',
1314 '69.0.3497.120',
1315 '71.0.3568.2',
1316 '71.0.3569.1',
1317 '71.0.3569.0',
1318 '70.0.3538.46',
1319 '69.0.3497.119',
1320 '70.0.3538.45',
1321 '71.0.3568.1',
1322 '71.0.3568.0',
1323 '70.0.3538.44',
1324 '69.0.3497.118',
1325 '70.0.3538.43',
1326 '70.0.3538.42',
1327 '71.0.3567.1',
1328 '71.0.3567.0',
1329 '70.0.3538.41',
1330 '69.0.3497.117',
1331 '71.0.3566.1',
1332 '71.0.3566.0',
1333 '70.0.3538.40',
1334 '69.0.3497.116',
1335 '71.0.3565.1',
1336 '71.0.3565.0',
1337 '70.0.3538.39',
1338 '69.0.3497.115',
1339 '71.0.3564.1',
1340 '71.0.3564.0',
1341 '70.0.3538.38',
1342 '69.0.3497.114',
1343 '71.0.3563.0',
1344 '71.0.3562.2',
1345 '70.0.3538.37',
1346 '69.0.3497.113',
1347 '70.0.3538.36',
1348 '70.0.3538.35',
1349 '71.0.3562.1',
1350 '71.0.3562.0',
1351 '70.0.3538.34',
1352 '69.0.3497.112',
1353 '70.0.3538.33',
1354 '71.0.3561.1',
1355 '71.0.3561.0',
1356 '70.0.3538.32',
1357 '69.0.3497.111',
1358 '71.0.3559.6',
1359 '71.0.3560.1',
1360 '71.0.3560.0',
1361 '71.0.3559.5',
1362 '71.0.3559.4',
1363 '70.0.3538.31',
1364 '69.0.3497.110',
1365 '71.0.3559.3',
1366 '70.0.3538.30',
1367 '69.0.3497.109',
1368 '71.0.3559.2',
1369 '71.0.3559.1',
1370 '71.0.3559.0',
1371 '70.0.3538.29',
1372 '69.0.3497.108',
1373 '71.0.3558.2',
1374 '71.0.3558.1',
1375 '71.0.3558.0',
1376 '70.0.3538.28',
1377 '69.0.3497.107',
1378 '71.0.3557.2',
1379 '71.0.3557.1',
1380 '71.0.3557.0',
1381 '70.0.3538.27',
1382 '69.0.3497.106',
1383 '71.0.3554.4',
1384 '70.0.3538.26',
1385 '71.0.3556.1',
1386 '71.0.3556.0',
1387 '70.0.3538.25',
1388 '71.0.3554.3',
1389 '69.0.3497.105',
1390 '71.0.3554.2',
1391 '70.0.3538.24',
1392 '69.0.3497.104',
1393 '71.0.3555.2',
1394 '70.0.3538.23',
1395 '71.0.3555.1',
1396 '71.0.3555.0',
1397 '70.0.3538.22',
1398 '69.0.3497.103',
1399 '71.0.3554.1',
1400 '71.0.3554.0',
1401 '70.0.3538.21',
1402 '69.0.3497.102',
1403 '71.0.3553.3',
1404 '70.0.3538.20',
1405 '69.0.3497.101',
1406 '71.0.3553.2',
1407 '69.0.3497.100',
1408 '71.0.3553.1',
1409 '71.0.3553.0',
1410 '70.0.3538.19',
1411 '69.0.3497.99',
1412 '69.0.3497.98',
1413 '69.0.3497.97',
1414 '71.0.3552.6',
1415 '71.0.3552.5',
1416 '71.0.3552.4',
1417 '71.0.3552.3',
1418 '71.0.3552.2',
1419 '71.0.3552.1',
1420 '71.0.3552.0',
1421 '70.0.3538.18',
1422 '69.0.3497.96',
1423 '71.0.3551.3',
1424 '71.0.3551.2',
1425 '71.0.3551.1',
1426 '71.0.3551.0',
1427 '70.0.3538.17',
1428 '69.0.3497.95',
1429 '71.0.3550.3',
1430 '71.0.3550.2',
1431 '71.0.3550.1',
1432 '71.0.3550.0',
1433 '70.0.3538.16',
1434 '69.0.3497.94',
1435 '71.0.3549.1',
1436 '71.0.3549.0',
1437 '70.0.3538.15',
1438 '69.0.3497.93',
1439 '69.0.3497.92',
1440 '71.0.3548.1',
1441 '71.0.3548.0',
1442 '70.0.3538.14',
1443 '69.0.3497.91',
1444 '71.0.3547.1',
1445 '71.0.3547.0',
1446 '70.0.3538.13',
1447 '69.0.3497.90',
1448 '71.0.3546.2',
1449 '69.0.3497.89',
1450 '71.0.3546.1',
1451 '71.0.3546.0',
1452 '70.0.3538.12',
1453 '69.0.3497.88',
1454 '71.0.3545.4',
1455 '71.0.3545.3',
1456 '71.0.3545.2',
1457 '71.0.3545.1',
1458 '71.0.3545.0',
1459 '70.0.3538.11',
1460 '69.0.3497.87',
1461 '71.0.3544.5',
1462 '71.0.3544.4',
1463 '71.0.3544.3',
1464 '71.0.3544.2',
1465 '71.0.3544.1',
1466 '71.0.3544.0',
1467 '69.0.3497.86',
1468 '70.0.3538.10',
1469 '69.0.3497.85',
1470 '70.0.3538.9',
1471 '69.0.3497.84',
1472 '71.0.3543.4',
1473 '70.0.3538.8',
1474 '71.0.3543.3',
1475 '71.0.3543.2',
1476 '71.0.3543.1',
1477 '71.0.3543.0',
1478 '70.0.3538.7',
1479 '69.0.3497.83',
1480 '71.0.3542.2',
1481 '71.0.3542.1',
1482 '71.0.3542.0',
1483 '70.0.3538.6',
1484 '69.0.3497.82',
1485 '69.0.3497.81',
1486 '71.0.3541.1',
1487 '71.0.3541.0',
1488 '70.0.3538.5',
1489 '69.0.3497.80',
1490 '71.0.3540.1',
1491 '71.0.3540.0',
1492 '70.0.3538.4',
1493 '69.0.3497.79',
1494 '70.0.3538.3',
1495 '71.0.3539.1',
1496 '71.0.3539.0',
1497 '69.0.3497.78',
1498 '68.0.3440.134',
1499 '69.0.3497.77',
1500 '70.0.3538.2',
1501 '70.0.3538.1',
1502 '70.0.3538.0',
1503 '69.0.3497.76',
1504 '68.0.3440.133',
1505 '69.0.3497.75',
1506 '70.0.3537.2',
1507 '70.0.3537.1',
1508 '70.0.3537.0',
1509 '69.0.3497.74',
1510 '68.0.3440.132',
1511 '70.0.3536.0',
1512 '70.0.3535.5',
1513 '70.0.3535.4',
1514 '70.0.3535.3',
1515 '69.0.3497.73',
1516 '68.0.3440.131',
1517 '70.0.3532.8',
1518 '70.0.3532.7',
1519 '69.0.3497.72',
1520 '69.0.3497.71',
1521 '70.0.3535.2',
1522 '70.0.3535.1',
1523 '70.0.3535.0',
1524 '69.0.3497.70',
1525 '68.0.3440.130',
1526 '69.0.3497.69',
1527 '68.0.3440.129',
1528 '70.0.3534.4',
1529 '70.0.3534.3',
1530 '70.0.3534.2',
1531 '70.0.3534.1',
1532 '70.0.3534.0',
1533 '69.0.3497.68',
1534 '68.0.3440.128',
1535 '70.0.3533.2',
1536 '70.0.3533.1',
1537 '70.0.3533.0',
1538 '69.0.3497.67',
1539 '68.0.3440.127',
1540 '70.0.3532.6',
1541 '70.0.3532.5',
1542 '70.0.3532.4',
1543 '69.0.3497.66',
1544 '68.0.3440.126',
1545 '70.0.3532.3',
1546 '70.0.3532.2',
1547 '70.0.3532.1',
1548 '69.0.3497.60',
1549 '69.0.3497.65',
1550 '69.0.3497.64',
1551 '70.0.3532.0',
1552 '70.0.3531.0',
1553 '70.0.3530.4',
1554 '70.0.3530.3',
1555 '70.0.3530.2',
1556 '69.0.3497.58',
1557 '68.0.3440.125',
1558 '69.0.3497.57',
1559 '69.0.3497.56',
1560 '69.0.3497.55',
1561 '69.0.3497.54',
1562 '70.0.3530.1',
1563 '70.0.3530.0',
1564 '69.0.3497.53',
1565 '68.0.3440.124',
1566 '69.0.3497.52',
1567 '70.0.3529.3',
1568 '70.0.3529.2',
1569 '70.0.3529.1',
1570 '70.0.3529.0',
1571 '69.0.3497.51',
1572 '70.0.3528.4',
1573 '68.0.3440.123',
1574 '70.0.3528.3',
1575 '70.0.3528.2',
1576 '70.0.3528.1',
1577 '70.0.3528.0',
1578 '69.0.3497.50',
1579 '68.0.3440.122',
1580 '70.0.3527.1',
1581 '70.0.3527.0',
1582 '69.0.3497.49',
1583 '68.0.3440.121',
1584 '70.0.3526.1',
1585 '70.0.3526.0',
1586 '68.0.3440.120',
1587 '69.0.3497.48',
1588 '69.0.3497.47',
1589 '68.0.3440.119',
1590 '68.0.3440.118',
1591 '70.0.3525.5',
1592 '70.0.3525.4',
1593 '70.0.3525.3',
1594 '68.0.3440.117',
1595 '69.0.3497.46',
1596 '70.0.3525.2',
1597 '70.0.3525.1',
1598 '70.0.3525.0',
1599 '69.0.3497.45',
1600 '68.0.3440.116',
1601 '70.0.3524.4',
1602 '70.0.3524.3',
1603 '69.0.3497.44',
1604 '70.0.3524.2',
1605 '70.0.3524.1',
1606 '70.0.3524.0',
1607 '70.0.3523.2',
1608 '69.0.3497.43',
1609 '68.0.3440.115',
1610 '70.0.3505.9',
1611 '69.0.3497.42',
1612 '70.0.3505.8',
1613 '70.0.3523.1',
1614 '70.0.3523.0',
1615 '69.0.3497.41',
1616 '68.0.3440.114',
1617 '70.0.3505.7',
1618 '69.0.3497.40',
1619 '70.0.3522.1',
1620 '70.0.3522.0',
1621 '70.0.3521.2',
1622 '69.0.3497.39',
1623 '68.0.3440.113',
1624 '70.0.3505.6',
1625 '70.0.3521.1',
1626 '70.0.3521.0',
1627 '69.0.3497.38',
1628 '68.0.3440.112',
1629 '70.0.3520.1',
1630 '70.0.3520.0',
1631 '69.0.3497.37',
1632 '68.0.3440.111',
1633 '70.0.3519.3',
1634 '70.0.3519.2',
1635 '70.0.3519.1',
1636 '70.0.3519.0',
1637 '69.0.3497.36',
1638 '68.0.3440.110',
1639 '70.0.3518.1',
1640 '70.0.3518.0',
1641 '69.0.3497.35',
1642 '69.0.3497.34',
1643 '68.0.3440.109',
1644 '70.0.3517.1',
1645 '70.0.3517.0',
1646 '69.0.3497.33',
1647 '68.0.3440.108',
1648 '69.0.3497.32',
1649 '70.0.3516.3',
1650 '70.0.3516.2',
1651 '70.0.3516.1',
1652 '70.0.3516.0',
1653 '69.0.3497.31',
1654 '68.0.3440.107',
1655 '70.0.3515.4',
1656 '68.0.3440.106',
1657 '70.0.3515.3',
1658 '70.0.3515.2',
1659 '70.0.3515.1',
1660 '70.0.3515.0',
1661 '69.0.3497.30',
1662 '68.0.3440.105',
1663 '68.0.3440.104',
1664 '70.0.3514.2',
1665 '70.0.3514.1',
1666 '70.0.3514.0',
1667 '69.0.3497.29',
1668 '68.0.3440.103',
1669 '70.0.3513.1',
1670 '70.0.3513.0',
1671 '69.0.3497.28',
1672 )
1673 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1674
1675
3e669f36 1676std_headers = {
f7a147e3 1677 'User-Agent': random_user_agent(),
59ae15a5
PH
1678 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1679 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1680 'Accept-Encoding': 'gzip, deflate',
1681 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 1682}
f427df17 1683
5f6a1245 1684
fb37eb25
S
1685USER_AGENTS = {
1686 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1687}
1688
1689
bf42a990
S
1690NO_DEFAULT = object()
1691
7105440c
YCH
1692ENGLISH_MONTH_NAMES = [
1693 'January', 'February', 'March', 'April', 'May', 'June',
1694 'July', 'August', 'September', 'October', 'November', 'December']
1695
f6717dec
S
1696MONTH_NAMES = {
1697 'en': ENGLISH_MONTH_NAMES,
1698 'fr': [
3e4185c3
S
1699 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1700 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
f6717dec 1701}
a942d6cb 1702
a7aaa398
S
1703KNOWN_EXTENSIONS = (
1704 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1705 'flv', 'f4v', 'f4a', 'f4b',
1706 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1707 'mkv', 'mka', 'mk3d',
1708 'avi', 'divx',
1709 'mov',
1710 'asf', 'wmv', 'wma',
1711 '3gp', '3g2',
1712 'mp3',
1713 'flac',
1714 'ape',
1715 'wav',
1716 'f4f', 'f4m', 'm3u8', 'smil')
1717
df692c5a 1718REMUX_EXTENSIONS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mp3', 'mka', 'm4a', 'ogg', 'opus')
1719
c587cbb7 1720# needed for sanitizing filenames in restricted mode
c8827027 1721ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
fd35d8cd
JW
1722 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1723 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
c587cbb7 1724
46f59e89
S
1725DATE_FORMATS = (
1726 '%d %B %Y',
1727 '%d %b %Y',
1728 '%B %d %Y',
cb655f34
S
1729 '%B %dst %Y',
1730 '%B %dnd %Y',
9d30c213 1731 '%B %drd %Y',
cb655f34 1732 '%B %dth %Y',
46f59e89 1733 '%b %d %Y',
cb655f34
S
1734 '%b %dst %Y',
1735 '%b %dnd %Y',
9d30c213 1736 '%b %drd %Y',
cb655f34 1737 '%b %dth %Y',
46f59e89
S
1738 '%b %dst %Y %I:%M',
1739 '%b %dnd %Y %I:%M',
9d30c213 1740 '%b %drd %Y %I:%M',
46f59e89
S
1741 '%b %dth %Y %I:%M',
1742 '%Y %m %d',
1743 '%Y-%m-%d',
1744 '%Y/%m/%d',
81c13222 1745 '%Y/%m/%d %H:%M',
46f59e89 1746 '%Y/%m/%d %H:%M:%S',
0c1c6f4b 1747 '%Y-%m-%d %H:%M',
46f59e89
S
1748 '%Y-%m-%d %H:%M:%S',
1749 '%Y-%m-%d %H:%M:%S.%f',
1750 '%d.%m.%Y %H:%M',
1751 '%d.%m.%Y %H.%M',
1752 '%Y-%m-%dT%H:%M:%SZ',
1753 '%Y-%m-%dT%H:%M:%S.%fZ',
1754 '%Y-%m-%dT%H:%M:%S.%f0Z',
1755 '%Y-%m-%dT%H:%M:%S',
1756 '%Y-%m-%dT%H:%M:%S.%f',
1757 '%Y-%m-%dT%H:%M',
c6eed6b8
S
1758 '%b %d %Y at %H:%M',
1759 '%b %d %Y at %H:%M:%S',
b555ae9b
S
1760 '%B %d %Y at %H:%M',
1761 '%B %d %Y at %H:%M:%S',
46f59e89
S
1762)
1763
1764DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1765DATE_FORMATS_DAY_FIRST.extend([
1766 '%d-%m-%Y',
1767 '%d.%m.%Y',
1768 '%d.%m.%y',
1769 '%d/%m/%Y',
1770 '%d/%m/%y',
1771 '%d/%m/%Y %H:%M:%S',
1772])
1773
1774DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1775DATE_FORMATS_MONTH_FIRST.extend([
1776 '%m-%d-%Y',
1777 '%m.%d.%Y',
1778 '%m/%d/%Y',
1779 '%m/%d/%y',
1780 '%m/%d/%Y %H:%M:%S',
1781])
1782
06b3fe29 1783PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
22f5f5c6 1784JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
06b3fe29 1785
7105440c 1786
d77c3dfd 1787def preferredencoding():
59ae15a5 1788 """Get preferred encoding.
d77c3dfd 1789
59ae15a5
PH
1790 Returns the best encoding scheme for the system, based on
1791 locale.getpreferredencoding() and some further tweaks.
1792 """
1793 try:
1794 pref = locale.getpreferredencoding()
28e614de 1795 'TEST'.encode(pref)
70a1165b 1796 except Exception:
59ae15a5 1797 pref = 'UTF-8'
bae611f2 1798
59ae15a5 1799 return pref
d77c3dfd 1800
f4bfd65f 1801
181c8655 1802def write_json_file(obj, fn):
1394646a 1803 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 1804
92120217 1805 fn = encodeFilename(fn)
61ee5aeb 1806 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
1807 encoding = get_filesystem_encoding()
1808 # os.path.basename returns a bytes object, but NamedTemporaryFile
1809 # will fail if the filename contains non ascii characters unless we
1810 # use a unicode object
1811 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1812 # the same for os.path.dirname
1813 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1814 else:
1815 path_basename = os.path.basename
1816 path_dirname = os.path.dirname
1817
73159f99
S
1818 args = {
1819 'suffix': '.tmp',
ec5f6016
JMF
1820 'prefix': path_basename(fn) + '.',
1821 'dir': path_dirname(fn),
73159f99
S
1822 'delete': False,
1823 }
1824
181c8655
PH
1825 # In Python 2.x, json.dump expects a bytestream.
1826 # In Python 3.x, it writes to a character stream
1827 if sys.version_info < (3, 0):
73159f99 1828 args['mode'] = 'wb'
181c8655 1829 else:
73159f99
S
1830 args.update({
1831 'mode': 'w',
1832 'encoding': 'utf-8',
1833 })
1834
c86b6142 1835 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
1836
1837 try:
1838 with tf:
1839 json.dump(obj, tf)
1394646a
IK
1840 if sys.platform == 'win32':
1841 # Need to remove existing file on Windows, else os.rename raises
1842 # WindowsError or FileExistsError.
1843 try:
1844 os.unlink(fn)
1845 except OSError:
1846 pass
9cd5f54e
R
1847 try:
1848 mask = os.umask(0)
1849 os.umask(mask)
1850 os.chmod(tf.name, 0o666 & ~mask)
1851 except OSError:
1852 pass
181c8655 1853 os.rename(tf.name, fn)
70a1165b 1854 except Exception:
181c8655
PH
1855 try:
1856 os.remove(tf.name)
1857 except OSError:
1858 pass
1859 raise
1860
1861
1862if sys.version_info >= (2, 7):
ee114368 1863 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 1864 """ Find the xpath xpath[@key=val] """
5d2354f1 1865 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368 1866 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
1867 return node.find(expr)
1868else:
ee114368 1869 def find_xpath_attr(node, xpath, key, val=None):
810c10ba 1870 for f in node.findall(compat_xpath(xpath)):
ee114368
S
1871 if key not in f.attrib:
1872 continue
1873 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
1874 return f
1875 return None
1876
d7e66d39
JMF
1877# On python2.6 the xml.etree.ElementTree.Element methods don't support
1878# the namespace parameter
5f6a1245
JW
1879
1880
d7e66d39
JMF
1881def xpath_with_ns(path, ns_map):
1882 components = [c.split(':') for c in path.split('/')]
1883 replaced = []
1884 for c in components:
1885 if len(c) == 1:
1886 replaced.append(c[0])
1887 else:
1888 ns, tag = c
1889 replaced.append('{%s}%s' % (ns_map[ns], tag))
1890 return '/'.join(replaced)
1891
d77c3dfd 1892
a41fb80c 1893def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745 1894 def _find_xpath(xpath):
810c10ba 1895 return node.find(compat_xpath(xpath))
578c0745
S
1896
1897 if isinstance(xpath, (str, compat_str)):
1898 n = _find_xpath(xpath)
1899 else:
1900 for xp in xpath:
1901 n = _find_xpath(xp)
1902 if n is not None:
1903 break
d74bebd5 1904
8e636da4 1905 if n is None:
bf42a990
S
1906 if default is not NO_DEFAULT:
1907 return default
1908 elif fatal:
bf0ff932
PH
1909 name = xpath if name is None else name
1910 raise ExtractorError('Could not find XML element %s' % name)
1911 else:
1912 return None
a41fb80c
S
1913 return n
1914
1915
1916def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
1917 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1918 if n is None or n == default:
1919 return n
1920 if n.text is None:
1921 if default is not NO_DEFAULT:
1922 return default
1923 elif fatal:
1924 name = xpath if name is None else name
1925 raise ExtractorError('Could not find XML element\'s text %s' % name)
1926 else:
1927 return None
1928 return n.text
a41fb80c
S
1929
1930
1931def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1932 n = find_xpath_attr(node, xpath, key)
1933 if n is None:
1934 if default is not NO_DEFAULT:
1935 return default
1936 elif fatal:
1937 name = '%s[@%s]' % (xpath, key) if name is None else name
1938 raise ExtractorError('Could not find XML attribute %s' % name)
1939 else:
1940 return None
1941 return n.attrib[key]
bf0ff932
PH
1942
1943
9e6dd238 1944def get_element_by_id(id, html):
43e8fafd 1945 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 1946 return get_element_by_attribute('id', id, html)
43e8fafd 1947
12ea2f30 1948
84c237fb 1949def get_element_by_class(class_name, html):
2af12ad9
TC
1950 """Return the content of the first tag with the specified class in the passed HTML document"""
1951 retval = get_elements_by_class(class_name, html)
1952 return retval[0] if retval else None
1953
1954
1955def get_element_by_attribute(attribute, value, html, escape_value=True):
1956 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1957 return retval[0] if retval else None
1958
1959
1960def get_elements_by_class(class_name, html):
1961 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1962 return get_elements_by_attribute(
84c237fb
YCH
1963 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1964 html, escape_value=False)
1965
1966
2af12ad9 1967def get_elements_by_attribute(attribute, value, html, escape_value=True):
43e8fafd 1968 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 1969
84c237fb
YCH
1970 value = re.escape(value) if escape_value else value
1971
2af12ad9
TC
1972 retlist = []
1973 for m in re.finditer(r'''(?xs)
38285056 1974 <([a-zA-Z0-9:._-]+)
609ff8ca 1975 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056 1976 \s+%s=['"]?%s['"]?
609ff8ca 1977 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056
PH
1978 \s*>
1979 (?P<content>.*?)
1980 </\1>
2af12ad9
TC
1981 ''' % (re.escape(attribute), value), html):
1982 res = m.group('content')
38285056 1983
2af12ad9
TC
1984 if res.startswith('"') or res.startswith("'"):
1985 res = res[1:-1]
38285056 1986
2af12ad9 1987 retlist.append(unescapeHTML(res))
a921f407 1988
2af12ad9 1989 return retlist
a921f407 1990
c5229f39 1991
8bb56eee
BF
1992class HTMLAttributeParser(compat_HTMLParser):
1993 """Trivial HTML parser to gather the attributes for a single element"""
b6e0c7d2 1994
8bb56eee 1995 def __init__(self):
c5229f39 1996 self.attrs = {}
8bb56eee
BF
1997 compat_HTMLParser.__init__(self)
1998
1999 def handle_starttag(self, tag, attrs):
2000 self.attrs = dict(attrs)
2001
c5229f39 2002
8bb56eee
BF
2003def extract_attributes(html_element):
2004 """Given a string for an HTML element such as
2005 <el
2006 a="foo" B="bar" c="&98;az" d=boz
2007 empty= noval entity="&amp;"
2008 sq='"' dq="'"
2009 >
2010 Decode and return a dictionary of attributes.
2011 {
2012 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2013 'empty': '', 'noval': None, 'entity': '&',
2014 'sq': '"', 'dq': '\''
2015 }.
2016 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2017 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2018 """
2019 parser = HTMLAttributeParser()
b4a3d461
S
2020 try:
2021 parser.feed(html_element)
2022 parser.close()
2023 # Older Python may throw HTMLParseError in case of malformed HTML
2024 except compat_HTMLParseError:
2025 pass
8bb56eee 2026 return parser.attrs
9e6dd238 2027
c5229f39 2028
9e6dd238 2029def clean_html(html):
59ae15a5 2030 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
2031
2032 if html is None: # Convenience for sanitizing descriptions etc.
2033 return html
2034
59ae15a5
PH
2035 # Newline vs <br />
2036 html = html.replace('\n', ' ')
edd9221c
TF
2037 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2038 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
2039 # Strip html tags
2040 html = re.sub('<.*?>', '', html)
2041 # Replace html entities
2042 html = unescapeHTML(html)
7decf895 2043 return html.strip()
9e6dd238
FV
2044
2045
d77c3dfd 2046def sanitize_open(filename, open_mode):
59ae15a5
PH
2047 """Try to open the given filename, and slightly tweak it if this fails.
2048
2049 Attempts to open the given filename. If this fails, it tries to change
2050 the filename slightly, step by step, until it's either able to open it
2051 or it fails and raises a final exception, like the standard open()
2052 function.
2053
2054 It returns the tuple (stream, definitive_file_name).
2055 """
2056 try:
28e614de 2057 if filename == '-':
59ae15a5
PH
2058 if sys.platform == 'win32':
2059 import msvcrt
2060 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 2061 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
2062 stream = open(encodeFilename(filename), open_mode)
2063 return (stream, filename)
2064 except (IOError, OSError) as err:
f45c185f
PH
2065 if err.errno in (errno.EACCES,):
2066 raise
59ae15a5 2067
f45c185f 2068 # In case of error, try to remove win32 forbidden chars
d55de57b 2069 alt_filename = sanitize_path(filename)
f45c185f
PH
2070 if alt_filename == filename:
2071 raise
2072 else:
2073 # An exception here should be caught in the caller
d55de57b 2074 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 2075 return (stream, alt_filename)
d77c3dfd
FV
2076
2077
2078def timeconvert(timestr):
59ae15a5
PH
2079 """Convert RFC 2822 defined time string into system timestamp"""
2080 timestamp = None
2081 timetuple = email.utils.parsedate_tz(timestr)
2082 if timetuple is not None:
2083 timestamp = email.utils.mktime_tz(timetuple)
2084 return timestamp
1c469a94 2085
5f6a1245 2086
796173d0 2087def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
2088 """Sanitizes a string so it could be used as part of a filename.
2089 If restricted is set, use a stricter subset of allowed characters.
158af524
S
2090 Set is_id if this is not an arbitrary string, but an ID that should be kept
2091 if possible.
59ae15a5
PH
2092 """
2093 def replace_insane(char):
c587cbb7
AT
2094 if restricted and char in ACCENT_CHARS:
2095 return ACCENT_CHARS[char]
59ae15a5
PH
2096 if char == '?' or ord(char) < 32 or ord(char) == 127:
2097 return ''
2098 elif char == '"':
2099 return '' if restricted else '\''
2100 elif char == ':':
2101 return '_-' if restricted else ' -'
2102 elif char in '\\/|*<>':
2103 return '_'
627dcfff 2104 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
2105 return '_'
2106 if restricted and ord(char) > 127:
2107 return '_'
2108 return char
2109
2aeb06d6
PH
2110 # Handle timestamps
2111 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 2112 result = ''.join(map(replace_insane, s))
796173d0
PH
2113 if not is_id:
2114 while '__' in result:
2115 result = result.replace('__', '_')
2116 result = result.strip('_')
2117 # Common case of "Foreign band name - English song title"
2118 if restricted and result.startswith('-_'):
2119 result = result[2:]
5a42414b
PH
2120 if result.startswith('-'):
2121 result = '_' + result[len('-'):]
a7440261 2122 result = result.lstrip('.')
796173d0
PH
2123 if not result:
2124 result = '_'
59ae15a5 2125 return result
d77c3dfd 2126
5f6a1245 2127
c2934512 2128def sanitize_path(s, force=False):
a2aaf4db 2129 """Sanitizes and normalizes path on Windows"""
c2934512 2130 if sys.platform == 'win32':
2131 drive_or_unc, _ = os.path.splitdrive(s)
2132 if sys.version_info < (2, 7) and not drive_or_unc:
2133 drive_or_unc, _ = os.path.splitunc(s)
2134 elif force:
2135 drive_or_unc = ''
2136 else:
a2aaf4db 2137 return s
c2934512 2138
be531ef1
S
2139 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2140 if drive_or_unc:
a2aaf4db
S
2141 norm_path.pop(0)
2142 sanitized_path = [
ec85ded8 2143 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
a2aaf4db 2144 for path_part in norm_path]
be531ef1
S
2145 if drive_or_unc:
2146 sanitized_path.insert(0, drive_or_unc + os.path.sep)
a2aaf4db
S
2147 return os.path.join(*sanitized_path)
2148
2149
17bcc626 2150def sanitize_url(url):
befa4708
S
2151 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2152 # the number of unwanted failures due to missing protocol
2153 if url.startswith('//'):
2154 return 'http:%s' % url
2155 # Fix some common typos seen so far
2156 COMMON_TYPOS = (
067aa17e 2157 # https://github.com/ytdl-org/youtube-dl/issues/15649
befa4708
S
2158 (r'^httpss://', r'https://'),
2159 # https://bx1.be/lives/direct-tv/
2160 (r'^rmtp([es]?)://', r'rtmp\1://'),
2161 )
2162 for mistake, fixup in COMMON_TYPOS:
2163 if re.match(mistake, url):
2164 return re.sub(mistake, fixup, url)
2165 return url
17bcc626
S
2166
2167
67dda517 2168def sanitized_Request(url, *args, **kwargs):
17bcc626 2169 return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
67dda517
S
2170
2171
51098426
S
2172def expand_path(s):
2173 """Expand shell variables and ~"""
2174 return os.path.expandvars(compat_expanduser(s))
2175
2176
d77c3dfd 2177def orderedSet(iterable):
59ae15a5
PH
2178 """ Remove all duplicates from the input iterable """
2179 res = []
2180 for el in iterable:
2181 if el not in res:
2182 res.append(el)
2183 return res
d77c3dfd 2184
912b38b4 2185
55b2f099 2186def _htmlentity_transform(entity_with_semicolon):
4e408e47 2187 """Transforms an HTML entity to a character."""
55b2f099
YCH
2188 entity = entity_with_semicolon[:-1]
2189
4e408e47
PH
2190 # Known non-numeric HTML entity
2191 if entity in compat_html_entities.name2codepoint:
2192 return compat_chr(compat_html_entities.name2codepoint[entity])
2193
55b2f099
YCH
2194 # TODO: HTML5 allows entities without a semicolon. For example,
2195 # '&Eacuteric' should be decoded as 'Éric'.
2196 if entity_with_semicolon in compat_html_entities_html5:
2197 return compat_html_entities_html5[entity_with_semicolon]
2198
91757b0f 2199 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
2200 if mobj is not None:
2201 numstr = mobj.group(1)
28e614de 2202 if numstr.startswith('x'):
4e408e47 2203 base = 16
28e614de 2204 numstr = '0%s' % numstr
4e408e47
PH
2205 else:
2206 base = 10
067aa17e 2207 # See https://github.com/ytdl-org/youtube-dl/issues/7518
7aefc49c
S
2208 try:
2209 return compat_chr(int(numstr, base))
2210 except ValueError:
2211 pass
4e408e47
PH
2212
2213 # Unknown entity in name, return its literal representation
7a3f0c00 2214 return '&%s;' % entity
4e408e47
PH
2215
2216
d77c3dfd 2217def unescapeHTML(s):
912b38b4
PH
2218 if s is None:
2219 return None
2220 assert type(s) == compat_str
d77c3dfd 2221
4e408e47 2222 return re.sub(
95f3f7c2 2223 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 2224
8bf48f23 2225
f5b1bca9 2226def process_communicate_or_kill(p, *args, **kwargs):
2227 try:
2228 return p.communicate(*args, **kwargs)
2229 except BaseException: # Including KeyboardInterrupt
2230 p.kill()
2231 p.wait()
2232 raise
2233
2234
aa49acd1
S
2235def get_subprocess_encoding():
2236 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2237 # For subprocess calls, encode with locale encoding
2238 # Refer to http://stackoverflow.com/a/9951851/35070
2239 encoding = preferredencoding()
2240 else:
2241 encoding = sys.getfilesystemencoding()
2242 if encoding is None:
2243 encoding = 'utf-8'
2244 return encoding
2245
2246
8bf48f23 2247def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
2248 """
2249 @param s The name of the file
2250 """
d77c3dfd 2251
8bf48f23 2252 assert type(s) == compat_str
d77c3dfd 2253
59ae15a5
PH
2254 # Python 3 has a Unicode API
2255 if sys.version_info >= (3, 0):
2256 return s
0f00efed 2257
aa49acd1
S
2258 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2259 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2260 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2261 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2262 return s
2263
8ee239e9
YCH
2264 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2265 if sys.platform.startswith('java'):
2266 return s
2267
aa49acd1
S
2268 return s.encode(get_subprocess_encoding(), 'ignore')
2269
2270
2271def decodeFilename(b, for_subprocess=False):
2272
2273 if sys.version_info >= (3, 0):
2274 return b
2275
2276 if not isinstance(b, bytes):
2277 return b
2278
2279 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 2280
f07b74fc
PH
2281
2282def encodeArgument(s):
2283 if not isinstance(s, compat_str):
2284 # Legacy code that uses byte strings
2285 # Uncomment the following line after fixing all post processors
7af808a5 2286 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
2287 s = s.decode('ascii')
2288 return encodeFilename(s, True)
2289
2290
aa49acd1
S
2291def decodeArgument(b):
2292 return decodeFilename(b, True)
2293
2294
8271226a
PH
2295def decodeOption(optval):
2296 if optval is None:
2297 return optval
2298 if isinstance(optval, bytes):
2299 optval = optval.decode(preferredencoding())
2300
2301 assert isinstance(optval, compat_str)
2302 return optval
1c256f70 2303
5f6a1245 2304
dbbbe555 2305def formatSeconds(secs, delim=':'):
4539dd30 2306 if secs > 3600:
dbbbe555 2307 return '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
4539dd30 2308 elif secs > 60:
dbbbe555 2309 return '%d%s%02d' % (secs // 60, delim, secs % 60)
4539dd30
PH
2310 else:
2311 return '%d' % secs
2312
a0ddb8a2 2313
be4a824d
PH
2314def make_HTTPS_handler(params, **kwargs):
2315 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 2316 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 2317 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 2318 if opts_no_check_certificate:
be5f2c19 2319 context.check_hostname = False
0db261ba 2320 context.verify_mode = ssl.CERT_NONE
a2366922 2321 try:
be4a824d 2322 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
2323 except TypeError:
2324 # Python 2.7.8
2325 # (create_default_context present but HTTPSHandler has no context=)
2326 pass
2327
2328 if sys.version_info < (3, 2):
d7932313 2329 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 2330 else: # Python < 3.4
d7932313 2331 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 2332 context.verify_mode = (ssl.CERT_NONE
dca08720 2333 if opts_no_check_certificate
ea6d901e 2334 else ssl.CERT_REQUIRED)
303b479e 2335 context.set_default_verify_paths()
be4a824d 2336 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 2337
732ea2f0 2338
08f2a92c
JMF
2339def bug_reports_message():
2340 if ytdl_is_updateable():
cefecac1 2341 update_cmd = 'type youtube-dlc -U to update'
08f2a92c 2342 else:
17fa3ee2 2343 update_cmd = 'see https://github.com/pukkandan/yt-dlp on how to update'
2344 msg = '; please report this issue on https://github.com/pukkandan/yt-dlp .'
08f2a92c 2345 msg += ' Make sure you are using the latest version; %s.' % update_cmd
cefecac1 2346 msg += ' Be sure to call youtube-dlc with the --verbose flag and include its complete output.'
08f2a92c
JMF
2347 return msg
2348
2349
bf5b9d85
PM
2350class YoutubeDLError(Exception):
2351 """Base exception for YoutubeDL errors."""
2352 pass
2353
2354
2355class ExtractorError(YoutubeDLError):
1c256f70 2356 """Error during info extraction."""
5f6a1245 2357
d11271dd 2358 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238 2359 """ tb, if given, is the original traceback (so that it can be printed out).
cefecac1 2360 If expected is set, this is a normal error message and most likely not a bug in youtube-dlc.
9a82b238
PH
2361 """
2362
2363 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2364 expected = True
d11271dd
PH
2365 if video_id is not None:
2366 msg = video_id + ': ' + msg
410f3e73 2367 if cause:
28e614de 2368 msg += ' (caused by %r)' % cause
9a82b238 2369 if not expected:
08f2a92c 2370 msg += bug_reports_message()
1c256f70 2371 super(ExtractorError, self).__init__(msg)
d5979c5d 2372
1c256f70 2373 self.traceback = tb
8cc83b8d 2374 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 2375 self.cause = cause
d11271dd 2376 self.video_id = video_id
1c256f70 2377
01951dda
PH
2378 def format_traceback(self):
2379 if self.traceback is None:
2380 return None
28e614de 2381 return ''.join(traceback.format_tb(self.traceback))
01951dda 2382
1c256f70 2383
416c7fcb
PH
2384class UnsupportedError(ExtractorError):
2385 def __init__(self, url):
2386 super(UnsupportedError, self).__init__(
2387 'Unsupported URL: %s' % url, expected=True)
2388 self.url = url
2389
2390
55b3e45b
JMF
2391class RegexNotFoundError(ExtractorError):
2392 """Error when a regex didn't match"""
2393 pass
2394
2395
773f291d
S
2396class GeoRestrictedError(ExtractorError):
2397 """Geographic restriction Error exception.
2398
2399 This exception may be thrown when a video is not available from your
2400 geographic location due to geographic restrictions imposed by a website.
2401 """
b6e0c7d2 2402
773f291d
S
2403 def __init__(self, msg, countries=None):
2404 super(GeoRestrictedError, self).__init__(msg, expected=True)
2405 self.msg = msg
2406 self.countries = countries
2407
2408
bf5b9d85 2409class DownloadError(YoutubeDLError):
59ae15a5 2410 """Download Error exception.
d77c3dfd 2411
59ae15a5
PH
2412 This exception may be thrown by FileDownloader objects if they are not
2413 configured to continue on errors. They will contain the appropriate
2414 error message.
2415 """
5f6a1245 2416
8cc83b8d
FV
2417 def __init__(self, msg, exc_info=None):
2418 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2419 super(DownloadError, self).__init__(msg)
2420 self.exc_info = exc_info
d77c3dfd
FV
2421
2422
bf5b9d85 2423class SameFileError(YoutubeDLError):
59ae15a5 2424 """Same File exception.
d77c3dfd 2425
59ae15a5
PH
2426 This exception will be thrown by FileDownloader objects if they detect
2427 multiple files would have to be downloaded to the same file on disk.
2428 """
2429 pass
d77c3dfd
FV
2430
2431
bf5b9d85 2432class PostProcessingError(YoutubeDLError):
59ae15a5 2433 """Post Processing exception.
d77c3dfd 2434
59ae15a5
PH
2435 This exception may be raised by PostProcessor's .run() method to
2436 indicate an error in the postprocessing task.
2437 """
5f6a1245 2438
7851b379 2439 def __init__(self, msg):
bf5b9d85 2440 super(PostProcessingError, self).__init__(msg)
7851b379 2441 self.msg = msg
d77c3dfd 2442
5f6a1245 2443
8b0d7497 2444class ExistingVideoReached(YoutubeDLError):
2445 """ --max-downloads limit has been reached. """
2446 pass
2447
2448
2449class RejectedVideoReached(YoutubeDLError):
2450 """ --max-downloads limit has been reached. """
2451 pass
2452
2453
bf5b9d85 2454class MaxDownloadsReached(YoutubeDLError):
59ae15a5
PH
2455 """ --max-downloads limit has been reached. """
2456 pass
d77c3dfd
FV
2457
2458
bf5b9d85 2459class UnavailableVideoError(YoutubeDLError):
59ae15a5 2460 """Unavailable Format exception.
d77c3dfd 2461
59ae15a5
PH
2462 This exception will be thrown when a video is requested
2463 in a format that is not available for that video.
2464 """
2465 pass
d77c3dfd
FV
2466
2467
bf5b9d85 2468class ContentTooShortError(YoutubeDLError):
59ae15a5 2469 """Content Too Short exception.
d77c3dfd 2470
59ae15a5
PH
2471 This exception may be raised by FileDownloader objects when a file they
2472 download is too small for what the server announced first, indicating
2473 the connection was probably interrupted.
2474 """
d77c3dfd 2475
59ae15a5 2476 def __init__(self, downloaded, expected):
bf5b9d85
PM
2477 super(ContentTooShortError, self).__init__(
2478 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2479 )
2c7ed247 2480 # Both in bytes
59ae15a5
PH
2481 self.downloaded = downloaded
2482 self.expected = expected
d77c3dfd 2483
5f6a1245 2484
bf5b9d85 2485class XAttrMetadataError(YoutubeDLError):
efa97bdc
YCH
2486 def __init__(self, code=None, msg='Unknown error'):
2487 super(XAttrMetadataError, self).__init__(msg)
2488 self.code = code
bd264412 2489 self.msg = msg
efa97bdc
YCH
2490
2491 # Parsing code and msg
3089bc74 2492 if (self.code in (errno.ENOSPC, errno.EDQUOT)
a0566bbf 2493 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
efa97bdc
YCH
2494 self.reason = 'NO_SPACE'
2495 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2496 self.reason = 'VALUE_TOO_LONG'
2497 else:
2498 self.reason = 'NOT_SUPPORTED'
2499
2500
bf5b9d85 2501class XAttrUnavailableError(YoutubeDLError):
efa97bdc
YCH
2502 pass
2503
2504
c5a59d93 2505def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
2506 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2507 # expected HTTP responses to meet HTTP/1.0 or later (see also
067aa17e 2508 # https://github.com/ytdl-org/youtube-dl/issues/6727)
e5e78797 2509 if sys.version_info < (3, 0):
65220c3b
S
2510 kwargs['strict'] = True
2511 hc = http_class(*args, **compat_kwargs(kwargs))
be4a824d 2512 source_address = ydl_handler._params.get('source_address')
8959018a 2513
be4a824d 2514 if source_address is not None:
8959018a
AU
2515 # This is to workaround _create_connection() from socket where it will try all
2516 # address data from getaddrinfo() including IPv6. This filters the result from
2517 # getaddrinfo() based on the source_address value.
2518 # This is based on the cpython socket.create_connection() function.
2519 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2520 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2521 host, port = address
2522 err = None
2523 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
9e21e6d9
S
2524 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2525 ip_addrs = [addr for addr in addrs if addr[0] == af]
2526 if addrs and not ip_addrs:
2527 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2528 raise socket.error(
2529 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2530 % (ip_version, source_address[0]))
8959018a
AU
2531 for res in ip_addrs:
2532 af, socktype, proto, canonname, sa = res
2533 sock = None
2534 try:
2535 sock = socket.socket(af, socktype, proto)
2536 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2537 sock.settimeout(timeout)
2538 sock.bind(source_address)
2539 sock.connect(sa)
2540 err = None # Explicitly break reference cycle
2541 return sock
2542 except socket.error as _:
2543 err = _
2544 if sock is not None:
2545 sock.close()
2546 if err is not None:
2547 raise err
2548 else:
9e21e6d9
S
2549 raise socket.error('getaddrinfo returns an empty list')
2550 if hasattr(hc, '_create_connection'):
2551 hc._create_connection = _create_connection
be4a824d
PH
2552 sa = (source_address, 0)
2553 if hasattr(hc, 'source_address'): # Python 2.7+
2554 hc.source_address = sa
2555 else: # Python 2.6
2556 def _hc_connect(self, *args, **kwargs):
9e21e6d9 2557 sock = _create_connection(
be4a824d
PH
2558 (self.host, self.port), self.timeout, sa)
2559 if is_https:
d7932313
PH
2560 self.sock = ssl.wrap_socket(
2561 sock, self.key_file, self.cert_file,
2562 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
2563 else:
2564 self.sock = sock
2565 hc.connect = functools.partial(_hc_connect, hc)
2566
2567 return hc
2568
2569
87f0e62d 2570def handle_youtubedl_headers(headers):
992fc9d6
YCH
2571 filtered_headers = headers
2572
2573 if 'Youtubedl-no-compression' in filtered_headers:
2574 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 2575 del filtered_headers['Youtubedl-no-compression']
87f0e62d 2576
992fc9d6 2577 return filtered_headers
87f0e62d
YCH
2578
2579
acebc9cd 2580class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
2581 """Handler for HTTP requests and responses.
2582
2583 This class, when installed with an OpenerDirector, automatically adds
2584 the standard headers to every HTTP request and handles gzipped and
2585 deflated responses from web servers. If compression is to be avoided in
2586 a particular request, the original request in the program code only has
0424ec30 2587 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
2588 removed before making the real request.
2589
2590 Part of this code was copied from:
2591
2592 http://techknack.net/python-urllib2-handlers/
2593
2594 Andrew Rowls, the author of that code, agreed to release it to the
2595 public domain.
2596 """
2597
be4a824d
PH
2598 def __init__(self, params, *args, **kwargs):
2599 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2600 self._params = params
2601
2602 def http_open(self, req):
71aff188
YCH
2603 conn_class = compat_http_client.HTTPConnection
2604
2605 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2606 if socks_proxy:
2607 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2608 del req.headers['Ytdl-socks-proxy']
2609
be4a824d 2610 return self.do_open(functools.partial(
71aff188 2611 _create_http_connection, self, conn_class, False),
be4a824d
PH
2612 req)
2613
59ae15a5
PH
2614 @staticmethod
2615 def deflate(data):
fc2119f2 2616 if not data:
2617 return data
59ae15a5
PH
2618 try:
2619 return zlib.decompress(data, -zlib.MAX_WBITS)
2620 except zlib.error:
2621 return zlib.decompress(data)
2622
acebc9cd 2623 def http_request(self, req):
51f267d9
S
2624 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2625 # always respected by websites, some tend to give out URLs with non percent-encoded
2626 # non-ASCII characters (see telemb.py, ard.py [#3412])
2627 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2628 # To work around aforementioned issue we will replace request's original URL with
2629 # percent-encoded one
2630 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2631 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2632 url = req.get_full_url()
2633 url_escaped = escape_url(url)
2634
2635 # Substitute URL if any change after escaping
2636 if url != url_escaped:
15d260eb 2637 req = update_Request(req, url=url_escaped)
51f267d9 2638
33ac271b 2639 for h, v in std_headers.items():
3d5f7a39
JK
2640 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2641 # The dict keys are capitalized because of this bug by urllib
2642 if h.capitalize() not in req.headers:
33ac271b 2643 req.add_header(h, v)
87f0e62d
YCH
2644
2645 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
2646
2647 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2648 # Python 2.6 is brain-dead when it comes to fragments
2649 req._Request__original = req._Request__original.partition('#')[0]
2650 req._Request__r_type = req._Request__r_type.partition('#')[0]
2651
59ae15a5
PH
2652 return req
2653
acebc9cd 2654 def http_response(self, req, resp):
59ae15a5
PH
2655 old_resp = resp
2656 # gzip
2657 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
2658 content = resp.read()
2659 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2660 try:
2661 uncompressed = io.BytesIO(gz.read())
2662 except IOError as original_ioerror:
2663 # There may be junk add the end of the file
2664 # See http://stackoverflow.com/q/4928560/35070 for details
2665 for i in range(1, 1024):
2666 try:
2667 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2668 uncompressed = io.BytesIO(gz.read())
2669 except IOError:
2670 continue
2671 break
2672 else:
2673 raise original_ioerror
b407d853 2674 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2675 resp.msg = old_resp.msg
c047270c 2676 del resp.headers['Content-encoding']
59ae15a5
PH
2677 # deflate
2678 if resp.headers.get('Content-encoding', '') == 'deflate':
2679 gz = io.BytesIO(self.deflate(resp.read()))
b407d853 2680 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2681 resp.msg = old_resp.msg
c047270c 2682 del resp.headers['Content-encoding']
ad729172 2683 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
067aa17e 2684 # https://github.com/ytdl-org/youtube-dl/issues/6457).
5a4d9ddb
S
2685 if 300 <= resp.code < 400:
2686 location = resp.headers.get('Location')
2687 if location:
2688 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2689 if sys.version_info >= (3, 0):
2690 location = location.encode('iso-8859-1').decode('utf-8')
0ea59007
YCH
2691 else:
2692 location = location.decode('utf-8')
5a4d9ddb
S
2693 location_escaped = escape_url(location)
2694 if location != location_escaped:
2695 del resp.headers['Location']
9a4aec8b
YCH
2696 if sys.version_info < (3, 0):
2697 location_escaped = location_escaped.encode('utf-8')
5a4d9ddb 2698 resp.headers['Location'] = location_escaped
59ae15a5 2699 return resp
0f8d03f8 2700
acebc9cd
PH
2701 https_request = http_request
2702 https_response = http_response
bf50b038 2703
5de90176 2704
71aff188
YCH
2705def make_socks_conn_class(base_class, socks_proxy):
2706 assert issubclass(base_class, (
2707 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2708
2709 url_components = compat_urlparse.urlparse(socks_proxy)
2710 if url_components.scheme.lower() == 'socks5':
2711 socks_type = ProxyType.SOCKS5
2712 elif url_components.scheme.lower() in ('socks', 'socks4'):
2713 socks_type = ProxyType.SOCKS4
51fb4995
YCH
2714 elif url_components.scheme.lower() == 'socks4a':
2715 socks_type = ProxyType.SOCKS4A
71aff188 2716
cdd94c2e
YCH
2717 def unquote_if_non_empty(s):
2718 if not s:
2719 return s
2720 return compat_urllib_parse_unquote_plus(s)
2721
71aff188
YCH
2722 proxy_args = (
2723 socks_type,
2724 url_components.hostname, url_components.port or 1080,
2725 True, # Remote DNS
cdd94c2e
YCH
2726 unquote_if_non_empty(url_components.username),
2727 unquote_if_non_empty(url_components.password),
71aff188
YCH
2728 )
2729
2730 class SocksConnection(base_class):
2731 def connect(self):
2732 self.sock = sockssocket()
2733 self.sock.setproxy(*proxy_args)
2734 if type(self.timeout) in (int, float):
2735 self.sock.settimeout(self.timeout)
2736 self.sock.connect((self.host, self.port))
2737
2738 if isinstance(self, compat_http_client.HTTPSConnection):
2739 if hasattr(self, '_context'): # Python > 2.6
2740 self.sock = self._context.wrap_socket(
2741 self.sock, server_hostname=self.host)
2742 else:
2743 self.sock = ssl.wrap_socket(self.sock)
2744
2745 return SocksConnection
2746
2747
be4a824d
PH
2748class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2749 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2750 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2751 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2752 self._params = params
2753
2754 def https_open(self, req):
4f264c02 2755 kwargs = {}
71aff188
YCH
2756 conn_class = self._https_conn_class
2757
4f264c02
JMF
2758 if hasattr(self, '_context'): # python > 2.6
2759 kwargs['context'] = self._context
2760 if hasattr(self, '_check_hostname'): # python 3.x
2761 kwargs['check_hostname'] = self._check_hostname
71aff188
YCH
2762
2763 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2764 if socks_proxy:
2765 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2766 del req.headers['Ytdl-socks-proxy']
2767
be4a824d 2768 return self.do_open(functools.partial(
71aff188 2769 _create_http_connection, self, conn_class, True),
4f264c02 2770 req, **kwargs)
be4a824d
PH
2771
2772
1bab3437 2773class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
f1a8511f
S
2774 """
2775 See [1] for cookie file format.
2776
2777 1. https://curl.haxx.se/docs/http-cookies.html
2778 """
e7e62441 2779 _HTTPONLY_PREFIX = '#HttpOnly_'
c380cc28
S
2780 _ENTRY_LEN = 7
2781 _HEADER = '''# Netscape HTTP Cookie File
cefecac1 2782# This file is generated by youtube-dlc. Do not edit.
c380cc28
S
2783
2784'''
2785 _CookieFileEntry = collections.namedtuple(
2786 'CookieFileEntry',
2787 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
e7e62441 2788
1bab3437 2789 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
c380cc28
S
2790 """
2791 Save cookies to a file.
2792
2793 Most of the code is taken from CPython 3.8 and slightly adapted
2794 to support cookie files with UTF-8 in both python 2 and 3.
2795 """
2796 if filename is None:
2797 if self.filename is not None:
2798 filename = self.filename
2799 else:
2800 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2801
1bab3437
S
2802 # Store session cookies with `expires` set to 0 instead of an empty
2803 # string
2804 for cookie in self:
2805 if cookie.expires is None:
2806 cookie.expires = 0
c380cc28
S
2807
2808 with io.open(filename, 'w', encoding='utf-8') as f:
2809 f.write(self._HEADER)
2810 now = time.time()
2811 for cookie in self:
2812 if not ignore_discard and cookie.discard:
2813 continue
2814 if not ignore_expires and cookie.is_expired(now):
2815 continue
2816 if cookie.secure:
2817 secure = 'TRUE'
2818 else:
2819 secure = 'FALSE'
2820 if cookie.domain.startswith('.'):
2821 initial_dot = 'TRUE'
2822 else:
2823 initial_dot = 'FALSE'
2824 if cookie.expires is not None:
2825 expires = compat_str(cookie.expires)
2826 else:
2827 expires = ''
2828 if cookie.value is None:
2829 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2830 # with no name, whereas http.cookiejar regards it as a
2831 # cookie with no value.
2832 name = ''
2833 value = cookie.name
2834 else:
2835 name = cookie.name
2836 value = cookie.value
2837 f.write(
2838 '\t'.join([cookie.domain, initial_dot, cookie.path,
2839 secure, expires, name, value]) + '\n')
1bab3437
S
2840
2841 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
e7e62441 2842 """Load cookies from a file."""
2843 if filename is None:
2844 if self.filename is not None:
2845 filename = self.filename
2846 else:
2847 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2848
c380cc28
S
2849 def prepare_line(line):
2850 if line.startswith(self._HTTPONLY_PREFIX):
2851 line = line[len(self._HTTPONLY_PREFIX):]
2852 # comments and empty lines are fine
2853 if line.startswith('#') or not line.strip():
2854 return line
2855 cookie_list = line.split('\t')
2856 if len(cookie_list) != self._ENTRY_LEN:
2857 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2858 cookie = self._CookieFileEntry(*cookie_list)
2859 if cookie.expires_at and not cookie.expires_at.isdigit():
2860 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2861 return line
2862
e7e62441 2863 cf = io.StringIO()
c380cc28 2864 with io.open(filename, encoding='utf-8') as f:
e7e62441 2865 for line in f:
c380cc28
S
2866 try:
2867 cf.write(prepare_line(line))
2868 except compat_cookiejar.LoadError as e:
2869 write_string(
2870 'WARNING: skipping cookie file entry due to %s: %r\n'
2871 % (e, line), sys.stderr)
2872 continue
e7e62441 2873 cf.seek(0)
2874 self._really_load(cf, filename, ignore_discard, ignore_expires)
1bab3437
S
2875 # Session cookies are denoted by either `expires` field set to
2876 # an empty string or 0. MozillaCookieJar only recognizes the former
2877 # (see [1]). So we need force the latter to be recognized as session
2878 # cookies on our own.
2879 # Session cookies may be important for cookies-based authentication,
2880 # e.g. usually, when user does not check 'Remember me' check box while
2881 # logging in on a site, some important cookies are stored as session
2882 # cookies so that not recognizing them will result in failed login.
2883 # 1. https://bugs.python.org/issue17164
2884 for cookie in self:
2885 # Treat `expires=0` cookies as session cookies
2886 if cookie.expires == 0:
2887 cookie.expires = None
2888 cookie.discard = True
2889
2890
a6420bf5
S
2891class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2892 def __init__(self, cookiejar=None):
2893 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2894
2895 def http_response(self, request, response):
2896 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2897 # characters in Set-Cookie HTTP header of last response (see
067aa17e 2898 # https://github.com/ytdl-org/youtube-dl/issues/6769).
a6420bf5
S
2899 # In order to at least prevent crashing we will percent encode Set-Cookie
2900 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
2901 # if sys.version_info < (3, 0) and response.headers:
2902 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2903 # set_cookie = response.headers.get(set_cookie_header)
2904 # if set_cookie:
2905 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2906 # if set_cookie != set_cookie_escaped:
2907 # del response.headers[set_cookie_header]
2908 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
2909 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2910
2911 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2912 https_response = http_response
2913
2914
fca6dba8
S
2915class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2916 if sys.version_info[0] < 3:
2917 def redirect_request(self, req, fp, code, msg, headers, newurl):
2918 # On python 2 urlh.geturl() may sometimes return redirect URL
2919 # as byte string instead of unicode. This workaround allows
2920 # to force it always return unicode.
2921 return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2922
2923
46f59e89
S
2924def extract_timezone(date_str):
2925 m = re.search(
2926 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2927 date_str)
2928 if not m:
2929 timezone = datetime.timedelta()
2930 else:
2931 date_str = date_str[:-len(m.group('tz'))]
2932 if not m.group('sign'):
2933 timezone = datetime.timedelta()
2934 else:
2935 sign = 1 if m.group('sign') == '+' else -1
2936 timezone = datetime.timedelta(
2937 hours=sign * int(m.group('hours')),
2938 minutes=sign * int(m.group('minutes')))
2939 return timezone, date_str
2940
2941
08b38d54 2942def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
2943 """ Return a UNIX timestamp from the given date """
2944
2945 if date_str is None:
2946 return None
2947
52c3a6e4
S
2948 date_str = re.sub(r'\.[0-9]+', '', date_str)
2949
08b38d54 2950 if timezone is None:
46f59e89
S
2951 timezone, date_str = extract_timezone(date_str)
2952
52c3a6e4
S
2953 try:
2954 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2955 dt = datetime.datetime.strptime(date_str, date_format) - timezone
2956 return calendar.timegm(dt.timetuple())
2957 except ValueError:
2958 pass
912b38b4
PH
2959
2960
46f59e89
S
2961def date_formats(day_first=True):
2962 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2963
2964
42bdd9d0 2965def unified_strdate(date_str, day_first=True):
bf50b038 2966 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
2967
2968 if date_str is None:
2969 return None
bf50b038 2970 upload_date = None
5f6a1245 2971 # Replace commas
026fcc04 2972 date_str = date_str.replace(',', ' ')
42bdd9d0 2973 # Remove AM/PM + timezone
9bb8e0a3 2974 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89 2975 _, date_str = extract_timezone(date_str)
42bdd9d0 2976
46f59e89 2977 for expression in date_formats(day_first):
bf50b038
JMF
2978 try:
2979 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 2980 except ValueError:
bf50b038 2981 pass
42393ce2
PH
2982 if upload_date is None:
2983 timetuple = email.utils.parsedate_tz(date_str)
2984 if timetuple:
c6b9cf05
S
2985 try:
2986 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2987 except ValueError:
2988 pass
6a750402
JMF
2989 if upload_date is not None:
2990 return compat_str(upload_date)
bf50b038 2991
5f6a1245 2992
46f59e89
S
2993def unified_timestamp(date_str, day_first=True):
2994 if date_str is None:
2995 return None
2996
2ae2ffda 2997 date_str = re.sub(r'[,|]', '', date_str)
46f59e89 2998
7dc2a74e 2999 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89
S
3000 timezone, date_str = extract_timezone(date_str)
3001
3002 # Remove AM/PM + timezone
3003 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3004
deef3195
S
3005 # Remove unrecognized timezones from ISO 8601 alike timestamps
3006 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3007 if m:
3008 date_str = date_str[:-len(m.group('tz'))]
3009
f226880c
PH
3010 # Python only supports microseconds, so remove nanoseconds
3011 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3012 if m:
3013 date_str = m.group(1)
3014
46f59e89
S
3015 for expression in date_formats(day_first):
3016 try:
7dc2a74e 3017 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
46f59e89
S
3018 return calendar.timegm(dt.timetuple())
3019 except ValueError:
3020 pass
3021 timetuple = email.utils.parsedate_tz(date_str)
3022 if timetuple:
7dc2a74e 3023 return calendar.timegm(timetuple) + pm_delta * 3600
46f59e89
S
3024
3025
28e614de 3026def determine_ext(url, default_ext='unknown_video'):
85750f89 3027 if url is None or '.' not in url:
f4776371 3028 return default_ext
9cb9a5df 3029 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
3030 if re.match(r'^[A-Za-z0-9]+$', guess):
3031 return guess
a7aaa398
S
3032 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3033 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 3034 return guess.rstrip('/')
73e79f2a 3035 else:
cbdbb766 3036 return default_ext
73e79f2a 3037
5f6a1245 3038
824fa511
S
3039def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3040 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
d4051a8e 3041
5f6a1245 3042
bd558525 3043def date_from_str(date_str):
37254abc
JMF
3044 """
3045 Return a datetime object from a string in the format YYYYMMDD or
3046 (now|today)[+-][0-9](day|week|month|year)(s)?"""
3047 today = datetime.date.today()
f8795e10 3048 if date_str in ('now', 'today'):
37254abc 3049 return today
f8795e10
PH
3050 if date_str == 'yesterday':
3051 return today - datetime.timedelta(days=1)
ec85ded8 3052 match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
37254abc
JMF
3053 if match is not None:
3054 sign = match.group('sign')
3055 time = int(match.group('time'))
3056 if sign == '-':
3057 time = -time
3058 unit = match.group('unit')
dfb1b146 3059 # A bad approximation?
37254abc
JMF
3060 if unit == 'month':
3061 unit = 'day'
3062 time *= 30
3063 elif unit == 'year':
3064 unit = 'day'
3065 time *= 365
3066 unit += 's'
3067 delta = datetime.timedelta(**{unit: time})
3068 return today + delta
611c1dd9 3069 return datetime.datetime.strptime(date_str, '%Y%m%d').date()
5f6a1245
JW
3070
3071
e63fc1be 3072def hyphenate_date(date_str):
3073 """
3074 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3075 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3076 if match is not None:
3077 return '-'.join(match.groups())
3078 else:
3079 return date_str
3080
5f6a1245 3081
bd558525
JMF
3082class DateRange(object):
3083 """Represents a time interval between two dates"""
5f6a1245 3084
bd558525
JMF
3085 def __init__(self, start=None, end=None):
3086 """start and end must be strings in the format accepted by date"""
3087 if start is not None:
3088 self.start = date_from_str(start)
3089 else:
3090 self.start = datetime.datetime.min.date()
3091 if end is not None:
3092 self.end = date_from_str(end)
3093 else:
3094 self.end = datetime.datetime.max.date()
37254abc 3095 if self.start > self.end:
bd558525 3096 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 3097
bd558525
JMF
3098 @classmethod
3099 def day(cls, day):
3100 """Returns a range that only contains the given day"""
5f6a1245
JW
3101 return cls(day, day)
3102
bd558525
JMF
3103 def __contains__(self, date):
3104 """Check if the date is in the range"""
37254abc
JMF
3105 if not isinstance(date, datetime.date):
3106 date = date_from_str(date)
3107 return self.start <= date <= self.end
5f6a1245 3108
bd558525 3109 def __str__(self):
5f6a1245 3110 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
3111
3112
3113def platform_name():
3114 """ Returns the platform name as a compat_str """
3115 res = platform.platform()
3116 if isinstance(res, bytes):
3117 res = res.decode(preferredencoding())
3118
3119 assert isinstance(res, compat_str)
3120 return res
c257baff
PH
3121
3122
b58ddb32
PH
3123def _windows_write_string(s, out):
3124 """ Returns True if the string was written using special methods,
3125 False if it has yet to be written out."""
3126 # Adapted from http://stackoverflow.com/a/3259271/35070
3127
3128 import ctypes
3129 import ctypes.wintypes
3130
3131 WIN_OUTPUT_IDS = {
3132 1: -11,
3133 2: -12,
3134 }
3135
a383a98a
PH
3136 try:
3137 fileno = out.fileno()
3138 except AttributeError:
3139 # If the output stream doesn't have a fileno, it's virtual
3140 return False
aa42e873
PH
3141 except io.UnsupportedOperation:
3142 # Some strange Windows pseudo files?
3143 return False
b58ddb32
PH
3144 if fileno not in WIN_OUTPUT_IDS:
3145 return False
3146
d7cd9a9e 3147 GetStdHandle = compat_ctypes_WINFUNCTYPE(
b58ddb32 3148 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
d7cd9a9e 3149 ('GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
3150 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3151
d7cd9a9e 3152 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3153 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3154 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
d7cd9a9e 3155 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
3156 written = ctypes.wintypes.DWORD(0)
3157
d7cd9a9e 3158 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
3159 FILE_TYPE_CHAR = 0x0002
3160 FILE_TYPE_REMOTE = 0x8000
d7cd9a9e 3161 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3162 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3163 ctypes.POINTER(ctypes.wintypes.DWORD))(
d7cd9a9e 3164 ('GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
3165 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3166
3167 def not_a_console(handle):
3168 if handle == INVALID_HANDLE_VALUE or handle is None:
3169 return True
3089bc74
S
3170 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3171 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
3172
3173 if not_a_console(h):
3174 return False
3175
d1b9c912
PH
3176 def next_nonbmp_pos(s):
3177 try:
3178 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3179 except StopIteration:
3180 return len(s)
3181
3182 while s:
3183 count = min(next_nonbmp_pos(s), 1024)
3184
b58ddb32 3185 ret = WriteConsoleW(
d1b9c912 3186 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
3187 if ret == 0:
3188 raise OSError('Failed to write string')
d1b9c912
PH
3189 if not count: # We just wrote a non-BMP character
3190 assert written.value == 2
3191 s = s[1:]
3192 else:
3193 assert written.value > 0
3194 s = s[written.value:]
b58ddb32
PH
3195 return True
3196
3197
734f90bb 3198def write_string(s, out=None, encoding=None):
7459e3a2
PH
3199 if out is None:
3200 out = sys.stderr
8bf48f23 3201 assert type(s) == compat_str
7459e3a2 3202
b58ddb32
PH
3203 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3204 if _windows_write_string(s, out):
3205 return
3206
3089bc74
S
3207 if ('b' in getattr(out, 'mode', '')
3208 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
3209 byt = s.encode(encoding or preferredencoding(), 'ignore')
3210 out.write(byt)
3211 elif hasattr(out, 'buffer'):
3212 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3213 byt = s.encode(enc, 'ignore')
3214 out.buffer.write(byt)
3215 else:
8bf48f23 3216 out.write(s)
7459e3a2
PH
3217 out.flush()
3218
3219
48ea9cea
PH
3220def bytes_to_intlist(bs):
3221 if not bs:
3222 return []
3223 if isinstance(bs[0], int): # Python 3
3224 return list(bs)
3225 else:
3226 return [ord(c) for c in bs]
3227
c257baff 3228
cba892fa 3229def intlist_to_bytes(xs):
3230 if not xs:
3231 return b''
edaa23f8 3232 return compat_struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
3233
3234
c1c9a79c
PH
3235# Cross-platform file locking
3236if sys.platform == 'win32':
3237 import ctypes.wintypes
3238 import msvcrt
3239
3240 class OVERLAPPED(ctypes.Structure):
3241 _fields_ = [
3242 ('Internal', ctypes.wintypes.LPVOID),
3243 ('InternalHigh', ctypes.wintypes.LPVOID),
3244 ('Offset', ctypes.wintypes.DWORD),
3245 ('OffsetHigh', ctypes.wintypes.DWORD),
3246 ('hEvent', ctypes.wintypes.HANDLE),
3247 ]
3248
3249 kernel32 = ctypes.windll.kernel32
3250 LockFileEx = kernel32.LockFileEx
3251 LockFileEx.argtypes = [
3252 ctypes.wintypes.HANDLE, # hFile
3253 ctypes.wintypes.DWORD, # dwFlags
3254 ctypes.wintypes.DWORD, # dwReserved
3255 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3256 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3257 ctypes.POINTER(OVERLAPPED) # Overlapped
3258 ]
3259 LockFileEx.restype = ctypes.wintypes.BOOL
3260 UnlockFileEx = kernel32.UnlockFileEx
3261 UnlockFileEx.argtypes = [
3262 ctypes.wintypes.HANDLE, # hFile
3263 ctypes.wintypes.DWORD, # dwReserved
3264 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3265 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3266 ctypes.POINTER(OVERLAPPED) # Overlapped
3267 ]
3268 UnlockFileEx.restype = ctypes.wintypes.BOOL
3269 whole_low = 0xffffffff
3270 whole_high = 0x7fffffff
3271
3272 def _lock_file(f, exclusive):
3273 overlapped = OVERLAPPED()
3274 overlapped.Offset = 0
3275 overlapped.OffsetHigh = 0
3276 overlapped.hEvent = 0
3277 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3278 handle = msvcrt.get_osfhandle(f.fileno())
3279 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3280 whole_low, whole_high, f._lock_file_overlapped_p):
3281 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3282
3283 def _unlock_file(f):
3284 assert f._lock_file_overlapped_p
3285 handle = msvcrt.get_osfhandle(f.fileno())
3286 if not UnlockFileEx(handle, 0,
3287 whole_low, whole_high, f._lock_file_overlapped_p):
3288 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3289
3290else:
399a76e6
YCH
3291 # Some platforms, such as Jython, is missing fcntl
3292 try:
3293 import fcntl
c1c9a79c 3294
399a76e6
YCH
3295 def _lock_file(f, exclusive):
3296 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 3297
399a76e6
YCH
3298 def _unlock_file(f):
3299 fcntl.flock(f, fcntl.LOCK_UN)
3300 except ImportError:
3301 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3302
3303 def _lock_file(f, exclusive):
3304 raise IOError(UNSUPPORTED_MSG)
3305
3306 def _unlock_file(f):
3307 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
3308
3309
3310class locked_file(object):
3311 def __init__(self, filename, mode, encoding=None):
3312 assert mode in ['r', 'a', 'w']
3313 self.f = io.open(filename, mode, encoding=encoding)
3314 self.mode = mode
3315
3316 def __enter__(self):
3317 exclusive = self.mode != 'r'
3318 try:
3319 _lock_file(self.f, exclusive)
3320 except IOError:
3321 self.f.close()
3322 raise
3323 return self
3324
3325 def __exit__(self, etype, value, traceback):
3326 try:
3327 _unlock_file(self.f)
3328 finally:
3329 self.f.close()
3330
3331 def __iter__(self):
3332 return iter(self.f)
3333
3334 def write(self, *args):
3335 return self.f.write(*args)
3336
3337 def read(self, *args):
3338 return self.f.read(*args)
4eb7f1d1
JMF
3339
3340
4644ac55
S
3341def get_filesystem_encoding():
3342 encoding = sys.getfilesystemencoding()
3343 return encoding if encoding is not None else 'utf-8'
3344
3345
4eb7f1d1 3346def shell_quote(args):
a6a173c2 3347 quoted_args = []
4644ac55 3348 encoding = get_filesystem_encoding()
a6a173c2
JMF
3349 for a in args:
3350 if isinstance(a, bytes):
3351 # We may get a filename encoded with 'encodeFilename'
3352 a = a.decode(encoding)
aefce8e6 3353 quoted_args.append(compat_shlex_quote(a))
28e614de 3354 return ' '.join(quoted_args)
9d4660ca
PH
3355
3356
3357def smuggle_url(url, data):
3358 """ Pass additional data in a URL for internal use. """
3359
81953d1a
RA
3360 url, idata = unsmuggle_url(url, {})
3361 data.update(idata)
15707c7e 3362 sdata = compat_urllib_parse_urlencode(
28e614de
PH
3363 {'__youtubedl_smuggle': json.dumps(data)})
3364 return url + '#' + sdata
9d4660ca
PH
3365
3366
79f82953 3367def unsmuggle_url(smug_url, default=None):
83e865a3 3368 if '#__youtubedl_smuggle' not in smug_url:
79f82953 3369 return smug_url, default
28e614de
PH
3370 url, _, sdata = smug_url.rpartition('#')
3371 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
3372 data = json.loads(jsond)
3373 return url, data
02dbf93f
PH
3374
3375
02dbf93f
PH
3376def format_bytes(bytes):
3377 if bytes is None:
28e614de 3378 return 'N/A'
02dbf93f
PH
3379 if type(bytes) is str:
3380 bytes = float(bytes)
3381 if bytes == 0.0:
3382 exponent = 0
3383 else:
3384 exponent = int(math.log(bytes, 1024.0))
28e614de 3385 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 3386 converted = float(bytes) / float(1024 ** exponent)
28e614de 3387 return '%.2f%s' % (converted, suffix)
f53c966a 3388
1c088fa8 3389
fb47597b
S
3390def lookup_unit_table(unit_table, s):
3391 units_re = '|'.join(re.escape(u) for u in unit_table)
3392 m = re.match(
782b1b5b 3393 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
fb47597b
S
3394 if not m:
3395 return None
3396 num_str = m.group('num').replace(',', '.')
3397 mult = unit_table[m.group('unit')]
3398 return int(float(num_str) * mult)
3399
3400
be64b5b0
PH
3401def parse_filesize(s):
3402 if s is None:
3403 return None
3404
dfb1b146 3405 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
3406 # but we support those too
3407 _UNIT_TABLE = {
3408 'B': 1,
3409 'b': 1,
70852b47 3410 'bytes': 1,
be64b5b0
PH
3411 'KiB': 1024,
3412 'KB': 1000,
3413 'kB': 1024,
3414 'Kb': 1000,
13585d76 3415 'kb': 1000,
70852b47
YCH
3416 'kilobytes': 1000,
3417 'kibibytes': 1024,
be64b5b0
PH
3418 'MiB': 1024 ** 2,
3419 'MB': 1000 ** 2,
3420 'mB': 1024 ** 2,
3421 'Mb': 1000 ** 2,
13585d76 3422 'mb': 1000 ** 2,
70852b47
YCH
3423 'megabytes': 1000 ** 2,
3424 'mebibytes': 1024 ** 2,
be64b5b0
PH
3425 'GiB': 1024 ** 3,
3426 'GB': 1000 ** 3,
3427 'gB': 1024 ** 3,
3428 'Gb': 1000 ** 3,
13585d76 3429 'gb': 1000 ** 3,
70852b47
YCH
3430 'gigabytes': 1000 ** 3,
3431 'gibibytes': 1024 ** 3,
be64b5b0
PH
3432 'TiB': 1024 ** 4,
3433 'TB': 1000 ** 4,
3434 'tB': 1024 ** 4,
3435 'Tb': 1000 ** 4,
13585d76 3436 'tb': 1000 ** 4,
70852b47
YCH
3437 'terabytes': 1000 ** 4,
3438 'tebibytes': 1024 ** 4,
be64b5b0
PH
3439 'PiB': 1024 ** 5,
3440 'PB': 1000 ** 5,
3441 'pB': 1024 ** 5,
3442 'Pb': 1000 ** 5,
13585d76 3443 'pb': 1000 ** 5,
70852b47
YCH
3444 'petabytes': 1000 ** 5,
3445 'pebibytes': 1024 ** 5,
be64b5b0
PH
3446 'EiB': 1024 ** 6,
3447 'EB': 1000 ** 6,
3448 'eB': 1024 ** 6,
3449 'Eb': 1000 ** 6,
13585d76 3450 'eb': 1000 ** 6,
70852b47
YCH
3451 'exabytes': 1000 ** 6,
3452 'exbibytes': 1024 ** 6,
be64b5b0
PH
3453 'ZiB': 1024 ** 7,
3454 'ZB': 1000 ** 7,
3455 'zB': 1024 ** 7,
3456 'Zb': 1000 ** 7,
13585d76 3457 'zb': 1000 ** 7,
70852b47
YCH
3458 'zettabytes': 1000 ** 7,
3459 'zebibytes': 1024 ** 7,
be64b5b0
PH
3460 'YiB': 1024 ** 8,
3461 'YB': 1000 ** 8,
3462 'yB': 1024 ** 8,
3463 'Yb': 1000 ** 8,
13585d76 3464 'yb': 1000 ** 8,
70852b47
YCH
3465 'yottabytes': 1000 ** 8,
3466 'yobibytes': 1024 ** 8,
be64b5b0
PH
3467 }
3468
fb47597b
S
3469 return lookup_unit_table(_UNIT_TABLE, s)
3470
3471
3472def parse_count(s):
3473 if s is None:
be64b5b0
PH
3474 return None
3475
fb47597b
S
3476 s = s.strip()
3477
3478 if re.match(r'^[\d,.]+$', s):
3479 return str_to_int(s)
3480
3481 _UNIT_TABLE = {
3482 'k': 1000,
3483 'K': 1000,
3484 'm': 1000 ** 2,
3485 'M': 1000 ** 2,
3486 'kk': 1000 ** 2,
3487 'KK': 1000 ** 2,
3488 }
be64b5b0 3489
fb47597b 3490 return lookup_unit_table(_UNIT_TABLE, s)
be64b5b0 3491
2f7ae819 3492
b871d7e9
S
3493def parse_resolution(s):
3494 if s is None:
3495 return {}
3496
3497 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3498 if mobj:
3499 return {
3500 'width': int(mobj.group('w')),
3501 'height': int(mobj.group('h')),
3502 }
3503
3504 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3505 if mobj:
3506 return {'height': int(mobj.group(1))}
3507
3508 mobj = re.search(r'\b([48])[kK]\b', s)
3509 if mobj:
3510 return {'height': int(mobj.group(1)) * 540}
3511
3512 return {}
3513
3514
0dc41787
S
3515def parse_bitrate(s):
3516 if not isinstance(s, compat_str):
3517 return
3518 mobj = re.search(r'\b(\d+)\s*kbps', s)
3519 if mobj:
3520 return int(mobj.group(1))
3521
3522
a942d6cb 3523def month_by_name(name, lang='en'):
caefb1de
PH
3524 """ Return the number of a month by (locale-independently) English name """
3525
f6717dec 3526 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb 3527
caefb1de 3528 try:
f6717dec 3529 return month_names.index(name) + 1
7105440c
YCH
3530 except ValueError:
3531 return None
3532
3533
3534def month_by_abbreviation(abbrev):
3535 """ Return the number of a month by (locale-independently) English
3536 abbreviations """
3537
3538 try:
3539 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
3540 except ValueError:
3541 return None
18258362
JMF
3542
3543
5aafe895 3544def fix_xml_ampersands(xml_str):
18258362 3545 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
3546 return re.sub(
3547 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 3548 '&amp;',
5aafe895 3549 xml_str)
e3946f98
PH
3550
3551
3552def setproctitle(title):
8bf48f23 3553 assert isinstance(title, compat_str)
c1c05c67
YCH
3554
3555 # ctypes in Jython is not complete
3556 # http://bugs.jython.org/issue2148
3557 if sys.platform.startswith('java'):
3558 return
3559
e3946f98 3560 try:
611c1dd9 3561 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
3562 except OSError:
3563 return
2f49bcd6
RC
3564 except TypeError:
3565 # LoadLibrary in Windows Python 2.7.13 only expects
3566 # a bytestring, but since unicode_literals turns
3567 # every string into a unicode string, it fails.
3568 return
6eefe533
PH
3569 title_bytes = title.encode('utf-8')
3570 buf = ctypes.create_string_buffer(len(title_bytes))
3571 buf.value = title_bytes
e3946f98 3572 try:
6eefe533 3573 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
3574 except AttributeError:
3575 return # Strange libc, just skip this
d7dda168
PH
3576
3577
3578def remove_start(s, start):
46bc9b7d 3579 return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174
PH
3580
3581
2b9faf55 3582def remove_end(s, end):
46bc9b7d 3583 return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55
PH
3584
3585
31b2051e
S
3586def remove_quotes(s):
3587 if s is None or len(s) < 2:
3588 return s
3589 for quote in ('"', "'", ):
3590 if s[0] == quote and s[-1] == quote:
3591 return s[1:-1]
3592 return s
3593
3594
b6e0c7d2
U
3595def get_domain(url):
3596 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3597 return domain.group('domain') if domain else None
3598
3599
29eb5174 3600def url_basename(url):
9b8aaeed 3601 path = compat_urlparse.urlparse(url).path
28e614de 3602 return path.strip('/').split('/')[-1]
aa94a6d3
PH
3603
3604
02dc0a36
S
3605def base_url(url):
3606 return re.match(r'https?://[^?#&]+/', url).group()
3607
3608
e34c3361 3609def urljoin(base, path):
4b5de77b
S
3610 if isinstance(path, bytes):
3611 path = path.decode('utf-8')
e34c3361
S
3612 if not isinstance(path, compat_str) or not path:
3613 return None
fad4ceb5 3614 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
e34c3361 3615 return path
4b5de77b
S
3616 if isinstance(base, bytes):
3617 base = base.decode('utf-8')
3618 if not isinstance(base, compat_str) or not re.match(
3619 r'^(?:https?:)?//', base):
e34c3361
S
3620 return None
3621 return compat_urlparse.urljoin(base, path)
3622
3623
aa94a6d3
PH
3624class HEADRequest(compat_urllib_request.Request):
3625 def get_method(self):
611c1dd9 3626 return 'HEAD'
7217e148
PH
3627
3628
95cf60e8
S
3629class PUTRequest(compat_urllib_request.Request):
3630 def get_method(self):
3631 return 'PUT'
3632
3633
9732d77e 3634def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
3635 if get_attr:
3636 if v is not None:
3637 v = getattr(v, get_attr, None)
9572013d
PH
3638 if v == '':
3639 v = None
1812afb7
S
3640 if v is None:
3641 return default
3642 try:
3643 return int(v) * invscale // scale
5e1271c5 3644 except (ValueError, TypeError):
af98f8ff 3645 return default
9732d77e 3646
9572013d 3647
40a90862
JMF
3648def str_or_none(v, default=None):
3649 return default if v is None else compat_str(v)
3650
9732d77e
PH
3651
3652def str_to_int(int_str):
48d4681e 3653 """ A more relaxed version of int_or_none """
42db58ec 3654 if isinstance(int_str, compat_integer_types):
348c6bf1 3655 return int_str
42db58ec
S
3656 elif isinstance(int_str, compat_str):
3657 int_str = re.sub(r'[,\.\+]', '', int_str)
3658 return int_or_none(int_str)
608d11f5
PH
3659
3660
9732d77e 3661def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
3662 if v is None:
3663 return default
3664 try:
3665 return float(v) * invscale / scale
5e1271c5 3666 except (ValueError, TypeError):
caf80631 3667 return default
43f775e4
PH
3668
3669
c7e327c4
S
3670def bool_or_none(v, default=None):
3671 return v if isinstance(v, bool) else default
3672
3673
53cd37ba
S
3674def strip_or_none(v, default=None):
3675 return v.strip() if isinstance(v, compat_str) else default
b72b4431
S
3676
3677
af03000a
S
3678def url_or_none(url):
3679 if not url or not isinstance(url, compat_str):
3680 return None
3681 url = url.strip()
29f7c58a 3682 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
af03000a
S
3683
3684
e29663c6 3685def strftime_or_none(timestamp, date_format, default=None):
3686 datetime_object = None
3687 try:
3688 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3689 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3690 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3691 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3692 return datetime_object.strftime(date_format)
3693 except (ValueError, TypeError, AttributeError):
3694 return default
3695
3696
608d11f5 3697def parse_duration(s):
8f9312c3 3698 if not isinstance(s, compat_basestring):
608d11f5
PH
3699 return None
3700
ca7b3246
S
3701 s = s.strip()
3702
acaff495 3703 days, hours, mins, secs, ms = [None] * 5
15846398 3704 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
acaff495 3705 if m:
3706 days, hours, mins, secs, ms = m.groups()
3707 else:
3708 m = re.match(
056653bb
S
3709 r'''(?ix)(?:P?
3710 (?:
3711 [0-9]+\s*y(?:ears?)?\s*
3712 )?
3713 (?:
3714 [0-9]+\s*m(?:onths?)?\s*
3715 )?
3716 (?:
3717 [0-9]+\s*w(?:eeks?)?\s*
3718 )?
8f4b58d7 3719 (?:
acaff495 3720 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
8f4b58d7 3721 )?
056653bb 3722 T)?
acaff495 3723 (?:
3724 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3725 )?
3726 (?:
3727 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3728 )?
3729 (?:
3730 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
15846398 3731 )?Z?$''', s)
acaff495 3732 if m:
3733 days, hours, mins, secs, ms = m.groups()
3734 else:
15846398 3735 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
acaff495 3736 if m:
3737 hours, mins = m.groups()
3738 else:
3739 return None
3740
3741 duration = 0
3742 if secs:
3743 duration += float(secs)
3744 if mins:
3745 duration += float(mins) * 60
3746 if hours:
3747 duration += float(hours) * 60 * 60
3748 if days:
3749 duration += float(days) * 24 * 60 * 60
3750 if ms:
3751 duration += float(ms)
3752 return duration
91d7d0b3
JMF
3753
3754
e65e4c88 3755def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 3756 name, real_ext = os.path.splitext(filename)
e65e4c88
S
3757 return (
3758 '{0}.{1}{2}'.format(name, ext, real_ext)
3759 if not expected_real_ext or real_ext[1:] == expected_real_ext
3760 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
3761
3762
b3ed15b7
S
3763def replace_extension(filename, ext, expected_real_ext=None):
3764 name, real_ext = os.path.splitext(filename)
3765 return '{0}.{1}'.format(
3766 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3767 ext)
3768
3769
d70ad093
PH
3770def check_executable(exe, args=[]):
3771 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3772 args can be a list of arguments for a short output (like -version) """
3773 try:
f5b1bca9 3774 process_communicate_or_kill(subprocess.Popen(
3775 [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
d70ad093
PH
3776 except OSError:
3777 return False
3778 return exe
b7ab0590
PH
3779
3780
95807118 3781def get_exe_version(exe, args=['--version'],
cae97f65 3782 version_re=None, unrecognized='present'):
95807118
PH
3783 """ Returns the version of the specified executable,
3784 or False if the executable is not present """
3785 try:
b64d04c1 3786 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
cefecac1 3787 # SIGTTOU if youtube-dlc is run in the background.
067aa17e 3788 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
f5b1bca9 3789 out, _ = process_communicate_or_kill(subprocess.Popen(
54116803 3790 [encodeArgument(exe)] + args,
00ca7552 3791 stdin=subprocess.PIPE,
f5b1bca9 3792 stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
95807118
PH
3793 except OSError:
3794 return False
cae97f65
PH
3795 if isinstance(out, bytes): # Python 2.x
3796 out = out.decode('ascii', 'ignore')
3797 return detect_exe_version(out, version_re, unrecognized)
3798
3799
3800def detect_exe_version(output, version_re=None, unrecognized='present'):
3801 assert isinstance(output, compat_str)
3802 if version_re is None:
3803 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3804 m = re.search(version_re, output)
95807118
PH
3805 if m:
3806 return m.group(1)
3807 else:
3808 return unrecognized
3809
3810
b7ab0590 3811class PagedList(object):
dd26ced1
PH
3812 def __len__(self):
3813 # This is only useful for tests
3814 return len(self.getslice())
3815
9c44d242
PH
3816
3817class OnDemandPagedList(PagedList):
6be08ce6 3818 def __init__(self, pagefunc, pagesize, use_cache=True):
9c44d242
PH
3819 self._pagefunc = pagefunc
3820 self._pagesize = pagesize
b95dc034
YCH
3821 self._use_cache = use_cache
3822 if use_cache:
3823 self._cache = {}
9c44d242 3824
b7ab0590
PH
3825 def getslice(self, start=0, end=None):
3826 res = []
3827 for pagenum in itertools.count(start // self._pagesize):
3828 firstid = pagenum * self._pagesize
3829 nextfirstid = pagenum * self._pagesize + self._pagesize
3830 if start >= nextfirstid:
3831 continue
3832
b95dc034
YCH
3833 page_results = None
3834 if self._use_cache:
3835 page_results = self._cache.get(pagenum)
3836 if page_results is None:
3837 page_results = list(self._pagefunc(pagenum))
3838 if self._use_cache:
3839 self._cache[pagenum] = page_results
b7ab0590
PH
3840
3841 startv = (
3842 start % self._pagesize
3843 if firstid <= start < nextfirstid
3844 else 0)
3845
3846 endv = (
3847 ((end - 1) % self._pagesize) + 1
3848 if (end is not None and firstid <= end <= nextfirstid)
3849 else None)
3850
3851 if startv != 0 or endv is not None:
3852 page_results = page_results[startv:endv]
3853 res.extend(page_results)
3854
3855 # A little optimization - if current page is not "full", ie. does
3856 # not contain page_size videos then we can assume that this page
3857 # is the last one - there are no more ids on further pages -
3858 # i.e. no need to query again.
3859 if len(page_results) + startv < self._pagesize:
3860 break
3861
3862 # If we got the whole page, but the next page is not interesting,
3863 # break out early as well
3864 if end == nextfirstid:
3865 break
3866 return res
81c2f20b
PH
3867
3868
9c44d242
PH
3869class InAdvancePagedList(PagedList):
3870 def __init__(self, pagefunc, pagecount, pagesize):
3871 self._pagefunc = pagefunc
3872 self._pagecount = pagecount
3873 self._pagesize = pagesize
3874
3875 def getslice(self, start=0, end=None):
3876 res = []
3877 start_page = start // self._pagesize
3878 end_page = (
3879 self._pagecount if end is None else (end // self._pagesize + 1))
3880 skip_elems = start - start_page * self._pagesize
3881 only_more = None if end is None else end - start
3882 for pagenum in range(start_page, end_page):
3883 page = list(self._pagefunc(pagenum))
3884 if skip_elems:
3885 page = page[skip_elems:]
3886 skip_elems = None
3887 if only_more is not None:
3888 if len(page) < only_more:
3889 only_more -= len(page)
3890 else:
3891 page = page[:only_more]
3892 res.extend(page)
3893 break
3894 res.extend(page)
3895 return res
3896
3897
81c2f20b 3898def uppercase_escape(s):
676eb3f2 3899 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 3900 return re.sub(
a612753d 3901 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
3902 lambda m: unicode_escape(m.group(0))[0],
3903 s)
0fe2ff78
YCH
3904
3905
3906def lowercase_escape(s):
3907 unicode_escape = codecs.getdecoder('unicode_escape')
3908 return re.sub(
3909 r'\\u[0-9a-fA-F]{4}',
3910 lambda m: unicode_escape(m.group(0))[0],
3911 s)
b53466e1 3912
d05cfe06
S
3913
3914def escape_rfc3986(s):
3915 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 3916 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 3917 s = s.encode('utf-8')
ecc0c5ee 3918 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
3919
3920
3921def escape_url(url):
3922 """Escape URL as suggested by RFC 3986"""
3923 url_parsed = compat_urllib_parse_urlparse(url)
3924 return url_parsed._replace(
efbed08d 3925 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
d05cfe06
S
3926 path=escape_rfc3986(url_parsed.path),
3927 params=escape_rfc3986(url_parsed.params),
3928 query=escape_rfc3986(url_parsed.query),
3929 fragment=escape_rfc3986(url_parsed.fragment)
3930 ).geturl()
3931
62e609ab
PH
3932
3933def read_batch_urls(batch_fd):
3934 def fixup(url):
3935 if not isinstance(url, compat_str):
3936 url = url.decode('utf-8', 'replace')
8c04f0be 3937 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
3938 for bom in BOM_UTF8:
3939 if url.startswith(bom):
3940 url = url[len(bom):]
3941 url = url.lstrip()
3942 if not url or url.startswith(('#', ';', ']')):
62e609ab 3943 return False
8c04f0be 3944 # "#" cannot be stripped out since it is part of the URI
3945 # However, it can be safely stipped out if follwing a whitespace
3946 return re.split(r'\s#', url, 1)[0].rstrip()
62e609ab
PH
3947
3948 with contextlib.closing(batch_fd) as fd:
3949 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
3950
3951
3952def urlencode_postdata(*args, **kargs):
15707c7e 3953 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
3954
3955
38f9ef31 3956def update_url_query(url, query):
cacd9966
YCH
3957 if not query:
3958 return url
38f9ef31 3959 parsed_url = compat_urlparse.urlparse(url)
3960 qs = compat_parse_qs(parsed_url.query)
3961 qs.update(query)
3962 return compat_urlparse.urlunparse(parsed_url._replace(
15707c7e 3963 query=compat_urllib_parse_urlencode(qs, True)))
16392824 3964
8e60dc75 3965
ed0291d1
S
3966def update_Request(req, url=None, data=None, headers={}, query={}):
3967 req_headers = req.headers.copy()
3968 req_headers.update(headers)
3969 req_data = data or req.data
3970 req_url = update_url_query(url or req.get_full_url(), query)
95cf60e8
S
3971 req_get_method = req.get_method()
3972 if req_get_method == 'HEAD':
3973 req_type = HEADRequest
3974 elif req_get_method == 'PUT':
3975 req_type = PUTRequest
3976 else:
3977 req_type = compat_urllib_request.Request
ed0291d1
S
3978 new_req = req_type(
3979 req_url, data=req_data, headers=req_headers,
3980 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3981 if hasattr(req, 'timeout'):
3982 new_req.timeout = req.timeout
3983 return new_req
3984
3985
10c87c15 3986def _multipart_encode_impl(data, boundary):
0c265486
YCH
3987 content_type = 'multipart/form-data; boundary=%s' % boundary
3988
3989 out = b''
3990 for k, v in data.items():
3991 out += b'--' + boundary.encode('ascii') + b'\r\n'
3992 if isinstance(k, compat_str):
3993 k = k.encode('utf-8')
3994 if isinstance(v, compat_str):
3995 v = v.encode('utf-8')
3996 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3997 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
b2ad479d 3998 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
0c265486
YCH
3999 if boundary.encode('ascii') in content:
4000 raise ValueError('Boundary overlaps with data')
4001 out += content
4002
4003 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4004
4005 return out, content_type
4006
4007
4008def multipart_encode(data, boundary=None):
4009 '''
4010 Encode a dict to RFC 7578-compliant form-data
4011
4012 data:
4013 A dict where keys and values can be either Unicode or bytes-like
4014 objects.
4015 boundary:
4016 If specified a Unicode object, it's used as the boundary. Otherwise
4017 a random boundary is generated.
4018
4019 Reference: https://tools.ietf.org/html/rfc7578
4020 '''
4021 has_specified_boundary = boundary is not None
4022
4023 while True:
4024 if boundary is None:
4025 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4026
4027 try:
10c87c15 4028 out, content_type = _multipart_encode_impl(data, boundary)
0c265486
YCH
4029 break
4030 except ValueError:
4031 if has_specified_boundary:
4032 raise
4033 boundary = None
4034
4035 return out, content_type
4036
4037
86296ad2 4038def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
4039 if isinstance(key_or_keys, (list, tuple)):
4040 for key in key_or_keys:
86296ad2
S
4041 if key not in d or d[key] is None or skip_false_values and not d[key]:
4042 continue
4043 return d[key]
cbecc9b9
S
4044 return default
4045 return d.get(key_or_keys, default)
4046
4047
329ca3be 4048def try_get(src, getter, expected_type=None):
a32a9a7e
S
4049 if not isinstance(getter, (list, tuple)):
4050 getter = [getter]
4051 for get in getter:
4052 try:
4053 v = get(src)
4054 except (AttributeError, KeyError, TypeError, IndexError):
4055 pass
4056 else:
4057 if expected_type is None or isinstance(v, expected_type):
4058 return v
329ca3be
S
4059
4060
6cc62232
S
4061def merge_dicts(*dicts):
4062 merged = {}
4063 for a_dict in dicts:
4064 for k, v in a_dict.items():
4065 if v is None:
4066 continue
3089bc74
S
4067 if (k not in merged
4068 or (isinstance(v, compat_str) and v
4069 and isinstance(merged[k], compat_str)
4070 and not merged[k])):
6cc62232
S
4071 merged[k] = v
4072 return merged
4073
4074
8e60dc75
S
4075def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4076 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4077
16392824 4078
a1a530b0
PH
4079US_RATINGS = {
4080 'G': 0,
4081 'PG': 10,
4082 'PG-13': 13,
4083 'R': 16,
4084 'NC': 18,
4085}
fac55558
PH
4086
4087
a8795327 4088TV_PARENTAL_GUIDELINES = {
5a16c9d9
RA
4089 'TV-Y': 0,
4090 'TV-Y7': 7,
4091 'TV-G': 0,
4092 'TV-PG': 0,
4093 'TV-14': 14,
4094 'TV-MA': 17,
a8795327
S
4095}
4096
4097
146c80e2 4098def parse_age_limit(s):
a8795327
S
4099 if type(s) == int:
4100 return s if 0 <= s <= 21 else None
4101 if not isinstance(s, compat_basestring):
d838b1bd 4102 return None
146c80e2 4103 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327
S
4104 if m:
4105 return int(m.group('age'))
4106 if s in US_RATINGS:
4107 return US_RATINGS[s]
5a16c9d9 4108 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
b8361187 4109 if m:
5a16c9d9 4110 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
b8361187 4111 return None
146c80e2
S
4112
4113
fac55558 4114def strip_jsonp(code):
609a61e3 4115 return re.sub(
5552c9eb 4116 r'''(?sx)^
e9c671d5 4117 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
5552c9eb
YCH
4118 (?:\s*&&\s*(?P=func_name))?
4119 \s*\(\s*(?P<callback_data>.*)\);?
4120 \s*?(?://[^\n]*)*$''',
4121 r'\g<callback_data>', code)
478c2c61
PH
4122
4123
5c610515 4124def js_to_json(code, vars={}):
4125 # vars is a dict of var, val pairs to substitute
4195096e
S
4126 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4127 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4128 INTEGER_TABLE = (
4129 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4130 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4131 )
4132
e05f6939 4133 def fix_kv(m):
e7b6d122
PH
4134 v = m.group(0)
4135 if v in ('true', 'false', 'null'):
4136 return v
8bdd16b4 4137 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
bd1e4844 4138 return ""
4139
4140 if v[0] in ("'", '"'):
4141 v = re.sub(r'(?s)\\.|"', lambda m: {
e7b6d122 4142 '"': '\\"',
bd1e4844 4143 "\\'": "'",
4144 '\\\n': '',
4145 '\\x': '\\u00',
4146 }.get(m.group(0), m.group(0)), v[1:-1])
8bdd16b4 4147 else:
4148 for regex, base in INTEGER_TABLE:
4149 im = re.match(regex, v)
4150 if im:
4151 i = int(im.group(1), base)
4152 return '"%d":' % i if v.endswith(':') else '%d' % i
89ac4a19 4153
5c610515 4154 if v in vars:
4155 return vars[v]
4156
e7b6d122 4157 return '"%s"' % v
e05f6939 4158
bd1e4844 4159 return re.sub(r'''(?sx)
4160 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4161 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4195096e 4162 {comment}|,(?={skip}[\]}}])|
c384d537 4163 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4195096e 4164 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
8bdd16b4 4165 [0-9]+(?={skip}:)|
4166 !+
4195096e 4167 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
e05f6939
PH
4168
4169
478c2c61
PH
4170def qualities(quality_ids):
4171 """ Get a numeric quality value out of a list of possible values """
4172 def q(qid):
4173 try:
4174 return quality_ids.index(qid)
4175 except ValueError:
4176 return -1
4177 return q
4178
acd69589 4179
de6000d9 4180DEFAULT_OUTTMPL = {
4181 'default': '%(title)s [%(id)s].%(ext)s',
4182}
4183OUTTMPL_TYPES = {
4184 'subtitle': None,
4185 'thumbnail': None,
4186 'description': 'description',
4187 'annotation': 'annotations.xml',
4188 'infojson': 'info.json',
4189 'pl_description': 'description',
4190 'pl_infojson': 'info.json',
4191}
0a871f68 4192
a020a0dc
PH
4193
4194def limit_length(s, length):
4195 """ Add ellipses to overly long strings """
4196 if s is None:
4197 return None
4198 ELLIPSES = '...'
4199 if len(s) > length:
4200 return s[:length - len(ELLIPSES)] + ELLIPSES
4201 return s
48844745
PH
4202
4203
4204def version_tuple(v):
5f9b8394 4205 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
4206
4207
4208def is_outdated_version(version, limit, assume_new=True):
4209 if not version:
4210 return not assume_new
4211 try:
4212 return version_tuple(version) < version_tuple(limit)
4213 except ValueError:
4214 return not assume_new
732ea2f0
PH
4215
4216
4217def ytdl_is_updateable():
cefecac1 4218 """ Returns if youtube-dlc can be updated with -U """
735d865e 4219 return False
4220
732ea2f0
PH
4221 from zipimport import zipimporter
4222
4223 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
4224
4225
4226def args_to_str(args):
4227 # Get a short string representation for a subprocess command
702ccf2d 4228 return ' '.join(compat_shlex_quote(a) for a in args)
2ccd1b10
PH
4229
4230
9b9c5355 4231def error_to_compat_str(err):
fdae2358
S
4232 err_str = str(err)
4233 # On python 2 error byte string must be decoded with proper
4234 # encoding rather than ascii
4235 if sys.version_info[0] < 3:
4236 err_str = err_str.decode(preferredencoding())
4237 return err_str
4238
4239
c460bdd5 4240def mimetype2ext(mt):
eb9ee194
S
4241 if mt is None:
4242 return None
4243
765ac263
JMF
4244 ext = {
4245 'audio/mp4': 'm4a',
6c33d24b
YCH
4246 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4247 # it's the most popular one
4248 'audio/mpeg': 'mp3',
ba39289d 4249 'audio/x-wav': 'wav',
765ac263
JMF
4250 }.get(mt)
4251 if ext is not None:
4252 return ext
4253
c460bdd5 4254 _, _, res = mt.rpartition('/')
6562d34a 4255 res = res.split(';')[0].strip().lower()
c460bdd5
PH
4256
4257 return {
f6861ec9 4258 '3gpp': '3gp',
cafcf657 4259 'smptett+xml': 'tt',
cafcf657 4260 'ttaf+xml': 'dfxp',
a0d8d704 4261 'ttml+xml': 'ttml',
f6861ec9 4262 'x-flv': 'flv',
a0d8d704 4263 'x-mp4-fragmented': 'mp4',
d4f05d47 4264 'x-ms-sami': 'sami',
a0d8d704 4265 'x-ms-wmv': 'wmv',
b4173f15
RA
4266 'mpegurl': 'm3u8',
4267 'x-mpegurl': 'm3u8',
4268 'vnd.apple.mpegurl': 'm3u8',
4269 'dash+xml': 'mpd',
b4173f15 4270 'f4m+xml': 'f4m',
f164b971 4271 'hds+xml': 'f4m',
e910fe2f 4272 'vnd.ms-sstr+xml': 'ism',
c2b2c7e1 4273 'quicktime': 'mov',
98ce1a3f 4274 'mp2t': 'ts',
39e7107d 4275 'x-wav': 'wav',
c460bdd5
PH
4276 }.get(res, res)
4277
4278
4f3c5e06 4279def parse_codecs(codecs_str):
4280 # http://tools.ietf.org/html/rfc6381
4281 if not codecs_str:
4282 return {}
a0566bbf 4283 split_codecs = list(filter(None, map(
4f3c5e06 4284 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4285 vcodec, acodec = None, None
a0566bbf 4286 for full_codec in split_codecs:
4f3c5e06 4287 codec = full_codec.split('.')[0]
28cc2241 4288 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4f3c5e06 4289 if not vcodec:
4290 vcodec = full_codec
60f5c9fb 4291 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4f3c5e06 4292 if not acodec:
4293 acodec = full_codec
4294 else:
60f5c9fb 4295 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4f3c5e06 4296 if not vcodec and not acodec:
a0566bbf 4297 if len(split_codecs) == 2:
4f3c5e06 4298 return {
a0566bbf 4299 'vcodec': split_codecs[0],
4300 'acodec': split_codecs[1],
4f3c5e06 4301 }
4302 else:
4303 return {
4304 'vcodec': vcodec or 'none',
4305 'acodec': acodec or 'none',
4306 }
4307 return {}
4308
4309
2ccd1b10 4310def urlhandle_detect_ext(url_handle):
79298173 4311 getheader = url_handle.headers.get
2ccd1b10 4312
b55ee18f
PH
4313 cd = getheader('Content-Disposition')
4314 if cd:
4315 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4316 if m:
4317 e = determine_ext(m.group('filename'), default_ext=None)
4318 if e:
4319 return e
4320
c460bdd5 4321 return mimetype2ext(getheader('Content-Type'))
05900629
PH
4322
4323
1e399778
YCH
4324def encode_data_uri(data, mime_type):
4325 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4326
4327
05900629 4328def age_restricted(content_limit, age_limit):
6ec6cb4e 4329 """ Returns True iff the content should be blocked """
05900629
PH
4330
4331 if age_limit is None: # No limit set
4332 return False
4333 if content_limit is None:
4334 return False # Content available for everyone
4335 return age_limit < content_limit
61ca9a80
PH
4336
4337
4338def is_html(first_bytes):
4339 """ Detect whether a file contains HTML by examining its first bytes. """
4340
4341 BOMS = [
4342 (b'\xef\xbb\xbf', 'utf-8'),
4343 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4344 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4345 (b'\xff\xfe', 'utf-16-le'),
4346 (b'\xfe\xff', 'utf-16-be'),
4347 ]
4348 for bom, enc in BOMS:
4349 if first_bytes.startswith(bom):
4350 s = first_bytes[len(bom):].decode(enc, 'replace')
4351 break
4352 else:
4353 s = first_bytes.decode('utf-8', 'replace')
4354
4355 return re.match(r'^\s*<', s)
a055469f
PH
4356
4357
4358def determine_protocol(info_dict):
4359 protocol = info_dict.get('protocol')
4360 if protocol is not None:
4361 return protocol
4362
4363 url = info_dict['url']
4364 if url.startswith('rtmp'):
4365 return 'rtmp'
4366 elif url.startswith('mms'):
4367 return 'mms'
4368 elif url.startswith('rtsp'):
4369 return 'rtsp'
4370
4371 ext = determine_ext(url)
4372 if ext == 'm3u8':
4373 return 'm3u8'
4374 elif ext == 'f4m':
4375 return 'f4m'
4376
4377 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
4378
4379
76d321f6 4380def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
cfb56d1a 4381 """ Render a list of rows, each as a list of values """
76d321f6 4382
4383 def get_max_lens(table):
4384 return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4385
4386 def filter_using_list(row, filterArray):
4387 return [col for (take, col) in zip(filterArray, row) if take]
4388
4389 if hideEmpty:
4390 max_lens = get_max_lens(data)
4391 header_row = filter_using_list(header_row, max_lens)
4392 data = [filter_using_list(row, max_lens) for row in data]
4393
cfb56d1a 4394 table = [header_row] + data
76d321f6 4395 max_lens = get_max_lens(table)
4396 if delim:
4397 table = [header_row] + [['-' * ml for ml in max_lens]] + data
4398 format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
cfb56d1a 4399 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
4400
4401
4402def _match_one(filter_part, dct):
4403 COMPARISON_OPERATORS = {
4404 '<': operator.lt,
4405 '<=': operator.le,
4406 '>': operator.gt,
4407 '>=': operator.ge,
4408 '=': operator.eq,
4409 '!=': operator.ne,
4410 }
4411 operator_rex = re.compile(r'''(?x)\s*
4412 (?P<key>[a-z_]+)
4413 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4414 (?:
4415 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
db13c16e 4416 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
347de493
PH
4417 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4418 )
4419 \s*$
4420 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4421 m = operator_rex.search(filter_part)
4422 if m:
4423 op = COMPARISON_OPERATORS[m.group('op')]
e5a088dc 4424 actual_value = dct.get(m.group('key'))
3089bc74
S
4425 if (m.group('quotedstrval') is not None
4426 or m.group('strval') is not None
e5a088dc
S
4427 # If the original field is a string and matching comparisonvalue is
4428 # a number we should respect the origin of the original field
4429 # and process comparison value as a string (see
067aa17e 4430 # https://github.com/ytdl-org/youtube-dl/issues/11082).
3089bc74
S
4431 or actual_value is not None and m.group('intval') is not None
4432 and isinstance(actual_value, compat_str)):
347de493
PH
4433 if m.group('op') not in ('=', '!='):
4434 raise ValueError(
4435 'Operator %s does not support string values!' % m.group('op'))
db13c16e
S
4436 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4437 quote = m.group('quote')
4438 if quote is not None:
4439 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
347de493
PH
4440 else:
4441 try:
4442 comparison_value = int(m.group('intval'))
4443 except ValueError:
4444 comparison_value = parse_filesize(m.group('intval'))
4445 if comparison_value is None:
4446 comparison_value = parse_filesize(m.group('intval') + 'B')
4447 if comparison_value is None:
4448 raise ValueError(
4449 'Invalid integer value %r in filter part %r' % (
4450 m.group('intval'), filter_part))
347de493
PH
4451 if actual_value is None:
4452 return m.group('none_inclusive')
4453 return op(actual_value, comparison_value)
4454
4455 UNARY_OPERATORS = {
1cc47c66
S
4456 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4457 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
347de493
PH
4458 }
4459 operator_rex = re.compile(r'''(?x)\s*
4460 (?P<op>%s)\s*(?P<key>[a-z_]+)
4461 \s*$
4462 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4463 m = operator_rex.search(filter_part)
4464 if m:
4465 op = UNARY_OPERATORS[m.group('op')]
4466 actual_value = dct.get(m.group('key'))
4467 return op(actual_value)
4468
4469 raise ValueError('Invalid filter part %r' % filter_part)
4470
4471
4472def match_str(filter_str, dct):
4473 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4474
4475 return all(
4476 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4477
4478
4479def match_filter_func(filter_str):
4480 def _match_func(info_dict):
4481 if match_str(filter_str, info_dict):
4482 return None
4483 else:
4484 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4485 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4486 return _match_func
91410c9b
PH
4487
4488
bf6427d2
YCH
4489def parse_dfxp_time_expr(time_expr):
4490 if not time_expr:
d631d5f9 4491 return
bf6427d2
YCH
4492
4493 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4494 if mobj:
4495 return float(mobj.group('time_offset'))
4496
db2fe38b 4497 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 4498 if mobj:
db2fe38b 4499 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
4500
4501
c1c924ab
YCH
4502def srt_subtitles_timecode(seconds):
4503 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
4504
4505
4506def dfxp2srt(dfxp_data):
3869028f
YCH
4507 '''
4508 @param dfxp_data A bytes-like object containing DFXP data
4509 @returns A unicode object containing converted SRT data
4510 '''
5b995f71 4511 LEGACY_NAMESPACES = (
3869028f
YCH
4512 (b'http://www.w3.org/ns/ttml', [
4513 b'http://www.w3.org/2004/11/ttaf1',
4514 b'http://www.w3.org/2006/04/ttaf1',
4515 b'http://www.w3.org/2006/10/ttaf1',
5b995f71 4516 ]),
3869028f
YCH
4517 (b'http://www.w3.org/ns/ttml#styling', [
4518 b'http://www.w3.org/ns/ttml#style',
5b995f71
RA
4519 ]),
4520 )
4521
4522 SUPPORTED_STYLING = [
4523 'color',
4524 'fontFamily',
4525 'fontSize',
4526 'fontStyle',
4527 'fontWeight',
4528 'textDecoration'
4529 ]
4530
4e335771 4531 _x = functools.partial(xpath_with_ns, ns_map={
261f4730 4532 'xml': 'http://www.w3.org/XML/1998/namespace',
4e335771 4533 'ttml': 'http://www.w3.org/ns/ttml',
5b995f71 4534 'tts': 'http://www.w3.org/ns/ttml#styling',
4e335771 4535 })
bf6427d2 4536
5b995f71
RA
4537 styles = {}
4538 default_style = {}
4539
87de7069 4540 class TTMLPElementParser(object):
5b995f71
RA
4541 _out = ''
4542 _unclosed_elements = []
4543 _applied_styles = []
bf6427d2 4544
2b14cb56 4545 def start(self, tag, attrib):
5b995f71
RA
4546 if tag in (_x('ttml:br'), 'br'):
4547 self._out += '\n'
4548 else:
4549 unclosed_elements = []
4550 style = {}
4551 element_style_id = attrib.get('style')
4552 if default_style:
4553 style.update(default_style)
4554 if element_style_id:
4555 style.update(styles.get(element_style_id, {}))
4556 for prop in SUPPORTED_STYLING:
4557 prop_val = attrib.get(_x('tts:' + prop))
4558 if prop_val:
4559 style[prop] = prop_val
4560 if style:
4561 font = ''
4562 for k, v in sorted(style.items()):
4563 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4564 continue
4565 if k == 'color':
4566 font += ' color="%s"' % v
4567 elif k == 'fontSize':
4568 font += ' size="%s"' % v
4569 elif k == 'fontFamily':
4570 font += ' face="%s"' % v
4571 elif k == 'fontWeight' and v == 'bold':
4572 self._out += '<b>'
4573 unclosed_elements.append('b')
4574 elif k == 'fontStyle' and v == 'italic':
4575 self._out += '<i>'
4576 unclosed_elements.append('i')
4577 elif k == 'textDecoration' and v == 'underline':
4578 self._out += '<u>'
4579 unclosed_elements.append('u')
4580 if font:
4581 self._out += '<font' + font + '>'
4582 unclosed_elements.append('font')
4583 applied_style = {}
4584 if self._applied_styles:
4585 applied_style.update(self._applied_styles[-1])
4586 applied_style.update(style)
4587 self._applied_styles.append(applied_style)
4588 self._unclosed_elements.append(unclosed_elements)
bf6427d2 4589
2b14cb56 4590 def end(self, tag):
5b995f71
RA
4591 if tag not in (_x('ttml:br'), 'br'):
4592 unclosed_elements = self._unclosed_elements.pop()
4593 for element in reversed(unclosed_elements):
4594 self._out += '</%s>' % element
4595 if unclosed_elements and self._applied_styles:
4596 self._applied_styles.pop()
bf6427d2 4597
2b14cb56 4598 def data(self, data):
5b995f71 4599 self._out += data
2b14cb56 4600
4601 def close(self):
5b995f71 4602 return self._out.strip()
2b14cb56 4603
4604 def parse_node(node):
4605 target = TTMLPElementParser()
4606 parser = xml.etree.ElementTree.XMLParser(target=target)
4607 parser.feed(xml.etree.ElementTree.tostring(node))
4608 return parser.close()
bf6427d2 4609
5b995f71
RA
4610 for k, v in LEGACY_NAMESPACES:
4611 for ns in v:
4612 dfxp_data = dfxp_data.replace(ns, k)
4613
3869028f 4614 dfxp = compat_etree_fromstring(dfxp_data)
bf6427d2 4615 out = []
5b995f71 4616 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
1b0427e6
YCH
4617
4618 if not paras:
4619 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2 4620
5b995f71
RA
4621 repeat = False
4622 while True:
4623 for style in dfxp.findall(_x('.//ttml:style')):
261f4730
RA
4624 style_id = style.get('id') or style.get(_x('xml:id'))
4625 if not style_id:
4626 continue
5b995f71
RA
4627 parent_style_id = style.get('style')
4628 if parent_style_id:
4629 if parent_style_id not in styles:
4630 repeat = True
4631 continue
4632 styles[style_id] = styles[parent_style_id].copy()
4633 for prop in SUPPORTED_STYLING:
4634 prop_val = style.get(_x('tts:' + prop))
4635 if prop_val:
4636 styles.setdefault(style_id, {})[prop] = prop_val
4637 if repeat:
4638 repeat = False
4639 else:
4640 break
4641
4642 for p in ('body', 'div'):
4643 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4644 if ele is None:
4645 continue
4646 style = styles.get(ele.get('style'))
4647 if not style:
4648 continue
4649 default_style.update(style)
4650
bf6427d2 4651 for para, index in zip(paras, itertools.count(1)):
d631d5f9 4652 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 4653 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
4654 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4655 if begin_time is None:
4656 continue
7dff0363 4657 if not end_time:
d631d5f9
YCH
4658 if not dur:
4659 continue
4660 end_time = begin_time + dur
bf6427d2
YCH
4661 out.append('%d\n%s --> %s\n%s\n\n' % (
4662 index,
c1c924ab
YCH
4663 srt_subtitles_timecode(begin_time),
4664 srt_subtitles_timecode(end_time),
bf6427d2
YCH
4665 parse_node(para)))
4666
4667 return ''.join(out)
4668
4669
66e289ba
S
4670def cli_option(params, command_option, param):
4671 param = params.get(param)
98e698f1
RA
4672 if param:
4673 param = compat_str(param)
66e289ba
S
4674 return [command_option, param] if param is not None else []
4675
4676
4677def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4678 param = params.get(param)
5b232f46
S
4679 if param is None:
4680 return []
66e289ba
S
4681 assert isinstance(param, bool)
4682 if separator:
4683 return [command_option + separator + (true_value if param else false_value)]
4684 return [command_option, true_value if param else false_value]
4685
4686
4687def cli_valueless_option(params, command_option, param, expected_value=True):
4688 param = params.get(param)
4689 return [command_option] if param == expected_value else []
4690
4691
5b1ecbb3 4692def cli_configuration_args(argdict, key, default=[], exe=None, use_default_arg=True):
4693 # use_default_arg can be True, False, or 'no_compat'
eab9b2bc 4694 if isinstance(argdict, (list, tuple)): # for backward compatibility
5b1ecbb3 4695 if use_default_arg == True:
4696 return argdict
4697 else:
4698 argdict = None
eab9b2bc 4699
4700 if argdict is None:
5b1ecbb3 4701 return default
eab9b2bc 4702 assert isinstance(argdict, dict)
4703
eab9b2bc 4704 key = key.lower()
eab9b2bc 4705 args = exe_args = None
4706 if exe is not None:
4707 assert isinstance(exe, compat_str)
4708 exe = exe.lower()
4709 args = argdict.get('%s+%s' % (key, exe))
4710 if args is None:
4711 exe_args = argdict.get(exe)
4712
4713 if args is None:
4714 args = argdict.get(key) if key != exe else None
4715 if args is None and exe_args is None:
5b1ecbb3 4716 args = argdict.get('default', default) if use_default_arg else default
eab9b2bc 4717
4718 args, exe_args = args or [], exe_args or []
4719 assert isinstance(args, (list, tuple))
4720 assert isinstance(exe_args, (list, tuple))
5b1ecbb3 4721 return args + exe_args
66e289ba
S
4722
4723
39672624
YCH
4724class ISO639Utils(object):
4725 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4726 _lang_map = {
4727 'aa': 'aar',
4728 'ab': 'abk',
4729 'ae': 'ave',
4730 'af': 'afr',
4731 'ak': 'aka',
4732 'am': 'amh',
4733 'an': 'arg',
4734 'ar': 'ara',
4735 'as': 'asm',
4736 'av': 'ava',
4737 'ay': 'aym',
4738 'az': 'aze',
4739 'ba': 'bak',
4740 'be': 'bel',
4741 'bg': 'bul',
4742 'bh': 'bih',
4743 'bi': 'bis',
4744 'bm': 'bam',
4745 'bn': 'ben',
4746 'bo': 'bod',
4747 'br': 'bre',
4748 'bs': 'bos',
4749 'ca': 'cat',
4750 'ce': 'che',
4751 'ch': 'cha',
4752 'co': 'cos',
4753 'cr': 'cre',
4754 'cs': 'ces',
4755 'cu': 'chu',
4756 'cv': 'chv',
4757 'cy': 'cym',
4758 'da': 'dan',
4759 'de': 'deu',
4760 'dv': 'div',
4761 'dz': 'dzo',
4762 'ee': 'ewe',
4763 'el': 'ell',
4764 'en': 'eng',
4765 'eo': 'epo',
4766 'es': 'spa',
4767 'et': 'est',
4768 'eu': 'eus',
4769 'fa': 'fas',
4770 'ff': 'ful',
4771 'fi': 'fin',
4772 'fj': 'fij',
4773 'fo': 'fao',
4774 'fr': 'fra',
4775 'fy': 'fry',
4776 'ga': 'gle',
4777 'gd': 'gla',
4778 'gl': 'glg',
4779 'gn': 'grn',
4780 'gu': 'guj',
4781 'gv': 'glv',
4782 'ha': 'hau',
4783 'he': 'heb',
b7acc835 4784 'iw': 'heb', # Replaced by he in 1989 revision
39672624
YCH
4785 'hi': 'hin',
4786 'ho': 'hmo',
4787 'hr': 'hrv',
4788 'ht': 'hat',
4789 'hu': 'hun',
4790 'hy': 'hye',
4791 'hz': 'her',
4792 'ia': 'ina',
4793 'id': 'ind',
b7acc835 4794 'in': 'ind', # Replaced by id in 1989 revision
39672624
YCH
4795 'ie': 'ile',
4796 'ig': 'ibo',
4797 'ii': 'iii',
4798 'ik': 'ipk',
4799 'io': 'ido',
4800 'is': 'isl',
4801 'it': 'ita',
4802 'iu': 'iku',
4803 'ja': 'jpn',
4804 'jv': 'jav',
4805 'ka': 'kat',
4806 'kg': 'kon',
4807 'ki': 'kik',
4808 'kj': 'kua',
4809 'kk': 'kaz',
4810 'kl': 'kal',
4811 'km': 'khm',
4812 'kn': 'kan',
4813 'ko': 'kor',
4814 'kr': 'kau',
4815 'ks': 'kas',
4816 'ku': 'kur',
4817 'kv': 'kom',
4818 'kw': 'cor',
4819 'ky': 'kir',
4820 'la': 'lat',
4821 'lb': 'ltz',
4822 'lg': 'lug',
4823 'li': 'lim',
4824 'ln': 'lin',
4825 'lo': 'lao',
4826 'lt': 'lit',
4827 'lu': 'lub',
4828 'lv': 'lav',
4829 'mg': 'mlg',
4830 'mh': 'mah',
4831 'mi': 'mri',
4832 'mk': 'mkd',
4833 'ml': 'mal',
4834 'mn': 'mon',
4835 'mr': 'mar',
4836 'ms': 'msa',
4837 'mt': 'mlt',
4838 'my': 'mya',
4839 'na': 'nau',
4840 'nb': 'nob',
4841 'nd': 'nde',
4842 'ne': 'nep',
4843 'ng': 'ndo',
4844 'nl': 'nld',
4845 'nn': 'nno',
4846 'no': 'nor',
4847 'nr': 'nbl',
4848 'nv': 'nav',
4849 'ny': 'nya',
4850 'oc': 'oci',
4851 'oj': 'oji',
4852 'om': 'orm',
4853 'or': 'ori',
4854 'os': 'oss',
4855 'pa': 'pan',
4856 'pi': 'pli',
4857 'pl': 'pol',
4858 'ps': 'pus',
4859 'pt': 'por',
4860 'qu': 'que',
4861 'rm': 'roh',
4862 'rn': 'run',
4863 'ro': 'ron',
4864 'ru': 'rus',
4865 'rw': 'kin',
4866 'sa': 'san',
4867 'sc': 'srd',
4868 'sd': 'snd',
4869 'se': 'sme',
4870 'sg': 'sag',
4871 'si': 'sin',
4872 'sk': 'slk',
4873 'sl': 'slv',
4874 'sm': 'smo',
4875 'sn': 'sna',
4876 'so': 'som',
4877 'sq': 'sqi',
4878 'sr': 'srp',
4879 'ss': 'ssw',
4880 'st': 'sot',
4881 'su': 'sun',
4882 'sv': 'swe',
4883 'sw': 'swa',
4884 'ta': 'tam',
4885 'te': 'tel',
4886 'tg': 'tgk',
4887 'th': 'tha',
4888 'ti': 'tir',
4889 'tk': 'tuk',
4890 'tl': 'tgl',
4891 'tn': 'tsn',
4892 'to': 'ton',
4893 'tr': 'tur',
4894 'ts': 'tso',
4895 'tt': 'tat',
4896 'tw': 'twi',
4897 'ty': 'tah',
4898 'ug': 'uig',
4899 'uk': 'ukr',
4900 'ur': 'urd',
4901 'uz': 'uzb',
4902 've': 'ven',
4903 'vi': 'vie',
4904 'vo': 'vol',
4905 'wa': 'wln',
4906 'wo': 'wol',
4907 'xh': 'xho',
4908 'yi': 'yid',
e9a50fba 4909 'ji': 'yid', # Replaced by yi in 1989 revision
39672624
YCH
4910 'yo': 'yor',
4911 'za': 'zha',
4912 'zh': 'zho',
4913 'zu': 'zul',
4914 }
4915
4916 @classmethod
4917 def short2long(cls, code):
4918 """Convert language code from ISO 639-1 to ISO 639-2/T"""
4919 return cls._lang_map.get(code[:2])
4920
4921 @classmethod
4922 def long2short(cls, code):
4923 """Convert language code from ISO 639-2/T to ISO 639-1"""
4924 for short_name, long_name in cls._lang_map.items():
4925 if long_name == code:
4926 return short_name
4927
4928
4eb10f66
YCH
4929class ISO3166Utils(object):
4930 # From http://data.okfn.org/data/core/country-list
4931 _country_map = {
4932 'AF': 'Afghanistan',
4933 'AX': 'Åland Islands',
4934 'AL': 'Albania',
4935 'DZ': 'Algeria',
4936 'AS': 'American Samoa',
4937 'AD': 'Andorra',
4938 'AO': 'Angola',
4939 'AI': 'Anguilla',
4940 'AQ': 'Antarctica',
4941 'AG': 'Antigua and Barbuda',
4942 'AR': 'Argentina',
4943 'AM': 'Armenia',
4944 'AW': 'Aruba',
4945 'AU': 'Australia',
4946 'AT': 'Austria',
4947 'AZ': 'Azerbaijan',
4948 'BS': 'Bahamas',
4949 'BH': 'Bahrain',
4950 'BD': 'Bangladesh',
4951 'BB': 'Barbados',
4952 'BY': 'Belarus',
4953 'BE': 'Belgium',
4954 'BZ': 'Belize',
4955 'BJ': 'Benin',
4956 'BM': 'Bermuda',
4957 'BT': 'Bhutan',
4958 'BO': 'Bolivia, Plurinational State of',
4959 'BQ': 'Bonaire, Sint Eustatius and Saba',
4960 'BA': 'Bosnia and Herzegovina',
4961 'BW': 'Botswana',
4962 'BV': 'Bouvet Island',
4963 'BR': 'Brazil',
4964 'IO': 'British Indian Ocean Territory',
4965 'BN': 'Brunei Darussalam',
4966 'BG': 'Bulgaria',
4967 'BF': 'Burkina Faso',
4968 'BI': 'Burundi',
4969 'KH': 'Cambodia',
4970 'CM': 'Cameroon',
4971 'CA': 'Canada',
4972 'CV': 'Cape Verde',
4973 'KY': 'Cayman Islands',
4974 'CF': 'Central African Republic',
4975 'TD': 'Chad',
4976 'CL': 'Chile',
4977 'CN': 'China',
4978 'CX': 'Christmas Island',
4979 'CC': 'Cocos (Keeling) Islands',
4980 'CO': 'Colombia',
4981 'KM': 'Comoros',
4982 'CG': 'Congo',
4983 'CD': 'Congo, the Democratic Republic of the',
4984 'CK': 'Cook Islands',
4985 'CR': 'Costa Rica',
4986 'CI': 'Côte d\'Ivoire',
4987 'HR': 'Croatia',
4988 'CU': 'Cuba',
4989 'CW': 'Curaçao',
4990 'CY': 'Cyprus',
4991 'CZ': 'Czech Republic',
4992 'DK': 'Denmark',
4993 'DJ': 'Djibouti',
4994 'DM': 'Dominica',
4995 'DO': 'Dominican Republic',
4996 'EC': 'Ecuador',
4997 'EG': 'Egypt',
4998 'SV': 'El Salvador',
4999 'GQ': 'Equatorial Guinea',
5000 'ER': 'Eritrea',
5001 'EE': 'Estonia',
5002 'ET': 'Ethiopia',
5003 'FK': 'Falkland Islands (Malvinas)',
5004 'FO': 'Faroe Islands',
5005 'FJ': 'Fiji',
5006 'FI': 'Finland',
5007 'FR': 'France',
5008 'GF': 'French Guiana',
5009 'PF': 'French Polynesia',
5010 'TF': 'French Southern Territories',
5011 'GA': 'Gabon',
5012 'GM': 'Gambia',
5013 'GE': 'Georgia',
5014 'DE': 'Germany',
5015 'GH': 'Ghana',
5016 'GI': 'Gibraltar',
5017 'GR': 'Greece',
5018 'GL': 'Greenland',
5019 'GD': 'Grenada',
5020 'GP': 'Guadeloupe',
5021 'GU': 'Guam',
5022 'GT': 'Guatemala',
5023 'GG': 'Guernsey',
5024 'GN': 'Guinea',
5025 'GW': 'Guinea-Bissau',
5026 'GY': 'Guyana',
5027 'HT': 'Haiti',
5028 'HM': 'Heard Island and McDonald Islands',
5029 'VA': 'Holy See (Vatican City State)',
5030 'HN': 'Honduras',
5031 'HK': 'Hong Kong',
5032 'HU': 'Hungary',
5033 'IS': 'Iceland',
5034 'IN': 'India',
5035 'ID': 'Indonesia',
5036 'IR': 'Iran, Islamic Republic of',
5037 'IQ': 'Iraq',
5038 'IE': 'Ireland',
5039 'IM': 'Isle of Man',
5040 'IL': 'Israel',
5041 'IT': 'Italy',
5042 'JM': 'Jamaica',
5043 'JP': 'Japan',
5044 'JE': 'Jersey',
5045 'JO': 'Jordan',
5046 'KZ': 'Kazakhstan',
5047 'KE': 'Kenya',
5048 'KI': 'Kiribati',
5049 'KP': 'Korea, Democratic People\'s Republic of',
5050 'KR': 'Korea, Republic of',
5051 'KW': 'Kuwait',
5052 'KG': 'Kyrgyzstan',
5053 'LA': 'Lao People\'s Democratic Republic',
5054 'LV': 'Latvia',
5055 'LB': 'Lebanon',
5056 'LS': 'Lesotho',
5057 'LR': 'Liberia',
5058 'LY': 'Libya',
5059 'LI': 'Liechtenstein',
5060 'LT': 'Lithuania',
5061 'LU': 'Luxembourg',
5062 'MO': 'Macao',
5063 'MK': 'Macedonia, the Former Yugoslav Republic of',
5064 'MG': 'Madagascar',
5065 'MW': 'Malawi',
5066 'MY': 'Malaysia',
5067 'MV': 'Maldives',
5068 'ML': 'Mali',
5069 'MT': 'Malta',
5070 'MH': 'Marshall Islands',
5071 'MQ': 'Martinique',
5072 'MR': 'Mauritania',
5073 'MU': 'Mauritius',
5074 'YT': 'Mayotte',
5075 'MX': 'Mexico',
5076 'FM': 'Micronesia, Federated States of',
5077 'MD': 'Moldova, Republic of',
5078 'MC': 'Monaco',
5079 'MN': 'Mongolia',
5080 'ME': 'Montenegro',
5081 'MS': 'Montserrat',
5082 'MA': 'Morocco',
5083 'MZ': 'Mozambique',
5084 'MM': 'Myanmar',
5085 'NA': 'Namibia',
5086 'NR': 'Nauru',
5087 'NP': 'Nepal',
5088 'NL': 'Netherlands',
5089 'NC': 'New Caledonia',
5090 'NZ': 'New Zealand',
5091 'NI': 'Nicaragua',
5092 'NE': 'Niger',
5093 'NG': 'Nigeria',
5094 'NU': 'Niue',
5095 'NF': 'Norfolk Island',
5096 'MP': 'Northern Mariana Islands',
5097 'NO': 'Norway',
5098 'OM': 'Oman',
5099 'PK': 'Pakistan',
5100 'PW': 'Palau',
5101 'PS': 'Palestine, State of',
5102 'PA': 'Panama',
5103 'PG': 'Papua New Guinea',
5104 'PY': 'Paraguay',
5105 'PE': 'Peru',
5106 'PH': 'Philippines',
5107 'PN': 'Pitcairn',
5108 'PL': 'Poland',
5109 'PT': 'Portugal',
5110 'PR': 'Puerto Rico',
5111 'QA': 'Qatar',
5112 'RE': 'Réunion',
5113 'RO': 'Romania',
5114 'RU': 'Russian Federation',
5115 'RW': 'Rwanda',
5116 'BL': 'Saint Barthélemy',
5117 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5118 'KN': 'Saint Kitts and Nevis',
5119 'LC': 'Saint Lucia',
5120 'MF': 'Saint Martin (French part)',
5121 'PM': 'Saint Pierre and Miquelon',
5122 'VC': 'Saint Vincent and the Grenadines',
5123 'WS': 'Samoa',
5124 'SM': 'San Marino',
5125 'ST': 'Sao Tome and Principe',
5126 'SA': 'Saudi Arabia',
5127 'SN': 'Senegal',
5128 'RS': 'Serbia',
5129 'SC': 'Seychelles',
5130 'SL': 'Sierra Leone',
5131 'SG': 'Singapore',
5132 'SX': 'Sint Maarten (Dutch part)',
5133 'SK': 'Slovakia',
5134 'SI': 'Slovenia',
5135 'SB': 'Solomon Islands',
5136 'SO': 'Somalia',
5137 'ZA': 'South Africa',
5138 'GS': 'South Georgia and the South Sandwich Islands',
5139 'SS': 'South Sudan',
5140 'ES': 'Spain',
5141 'LK': 'Sri Lanka',
5142 'SD': 'Sudan',
5143 'SR': 'Suriname',
5144 'SJ': 'Svalbard and Jan Mayen',
5145 'SZ': 'Swaziland',
5146 'SE': 'Sweden',
5147 'CH': 'Switzerland',
5148 'SY': 'Syrian Arab Republic',
5149 'TW': 'Taiwan, Province of China',
5150 'TJ': 'Tajikistan',
5151 'TZ': 'Tanzania, United Republic of',
5152 'TH': 'Thailand',
5153 'TL': 'Timor-Leste',
5154 'TG': 'Togo',
5155 'TK': 'Tokelau',
5156 'TO': 'Tonga',
5157 'TT': 'Trinidad and Tobago',
5158 'TN': 'Tunisia',
5159 'TR': 'Turkey',
5160 'TM': 'Turkmenistan',
5161 'TC': 'Turks and Caicos Islands',
5162 'TV': 'Tuvalu',
5163 'UG': 'Uganda',
5164 'UA': 'Ukraine',
5165 'AE': 'United Arab Emirates',
5166 'GB': 'United Kingdom',
5167 'US': 'United States',
5168 'UM': 'United States Minor Outlying Islands',
5169 'UY': 'Uruguay',
5170 'UZ': 'Uzbekistan',
5171 'VU': 'Vanuatu',
5172 'VE': 'Venezuela, Bolivarian Republic of',
5173 'VN': 'Viet Nam',
5174 'VG': 'Virgin Islands, British',
5175 'VI': 'Virgin Islands, U.S.',
5176 'WF': 'Wallis and Futuna',
5177 'EH': 'Western Sahara',
5178 'YE': 'Yemen',
5179 'ZM': 'Zambia',
5180 'ZW': 'Zimbabwe',
5181 }
5182
5183 @classmethod
5184 def short2full(cls, code):
5185 """Convert an ISO 3166-2 country code to the corresponding full name"""
5186 return cls._country_map.get(code.upper())
5187
5188
773f291d
S
5189class GeoUtils(object):
5190 # Major IPv4 address blocks per country
5191 _country_ip_map = {
53896ca5 5192 'AD': '46.172.224.0/19',
773f291d
S
5193 'AE': '94.200.0.0/13',
5194 'AF': '149.54.0.0/17',
5195 'AG': '209.59.64.0/18',
5196 'AI': '204.14.248.0/21',
5197 'AL': '46.99.0.0/16',
5198 'AM': '46.70.0.0/15',
5199 'AO': '105.168.0.0/13',
53896ca5
S
5200 'AP': '182.50.184.0/21',
5201 'AQ': '23.154.160.0/24',
773f291d
S
5202 'AR': '181.0.0.0/12',
5203 'AS': '202.70.112.0/20',
53896ca5 5204 'AT': '77.116.0.0/14',
773f291d
S
5205 'AU': '1.128.0.0/11',
5206 'AW': '181.41.0.0/18',
53896ca5
S
5207 'AX': '185.217.4.0/22',
5208 'AZ': '5.197.0.0/16',
773f291d
S
5209 'BA': '31.176.128.0/17',
5210 'BB': '65.48.128.0/17',
5211 'BD': '114.130.0.0/16',
5212 'BE': '57.0.0.0/8',
53896ca5 5213 'BF': '102.178.0.0/15',
773f291d
S
5214 'BG': '95.42.0.0/15',
5215 'BH': '37.131.0.0/17',
5216 'BI': '154.117.192.0/18',
5217 'BJ': '137.255.0.0/16',
53896ca5 5218 'BL': '185.212.72.0/23',
773f291d
S
5219 'BM': '196.12.64.0/18',
5220 'BN': '156.31.0.0/16',
5221 'BO': '161.56.0.0/16',
5222 'BQ': '161.0.80.0/20',
53896ca5 5223 'BR': '191.128.0.0/12',
773f291d
S
5224 'BS': '24.51.64.0/18',
5225 'BT': '119.2.96.0/19',
5226 'BW': '168.167.0.0/16',
5227 'BY': '178.120.0.0/13',
5228 'BZ': '179.42.192.0/18',
5229 'CA': '99.224.0.0/11',
5230 'CD': '41.243.0.0/16',
53896ca5
S
5231 'CF': '197.242.176.0/21',
5232 'CG': '160.113.0.0/16',
773f291d 5233 'CH': '85.0.0.0/13',
53896ca5 5234 'CI': '102.136.0.0/14',
773f291d
S
5235 'CK': '202.65.32.0/19',
5236 'CL': '152.172.0.0/14',
53896ca5 5237 'CM': '102.244.0.0/14',
773f291d
S
5238 'CN': '36.128.0.0/10',
5239 'CO': '181.240.0.0/12',
5240 'CR': '201.192.0.0/12',
5241 'CU': '152.206.0.0/15',
5242 'CV': '165.90.96.0/19',
5243 'CW': '190.88.128.0/17',
53896ca5 5244 'CY': '31.153.0.0/16',
773f291d
S
5245 'CZ': '88.100.0.0/14',
5246 'DE': '53.0.0.0/8',
5247 'DJ': '197.241.0.0/17',
5248 'DK': '87.48.0.0/12',
5249 'DM': '192.243.48.0/20',
5250 'DO': '152.166.0.0/15',
5251 'DZ': '41.96.0.0/12',
5252 'EC': '186.68.0.0/15',
5253 'EE': '90.190.0.0/15',
5254 'EG': '156.160.0.0/11',
5255 'ER': '196.200.96.0/20',
5256 'ES': '88.0.0.0/11',
5257 'ET': '196.188.0.0/14',
5258 'EU': '2.16.0.0/13',
5259 'FI': '91.152.0.0/13',
5260 'FJ': '144.120.0.0/16',
53896ca5 5261 'FK': '80.73.208.0/21',
773f291d
S
5262 'FM': '119.252.112.0/20',
5263 'FO': '88.85.32.0/19',
5264 'FR': '90.0.0.0/9',
5265 'GA': '41.158.0.0/15',
5266 'GB': '25.0.0.0/8',
5267 'GD': '74.122.88.0/21',
5268 'GE': '31.146.0.0/16',
5269 'GF': '161.22.64.0/18',
5270 'GG': '62.68.160.0/19',
53896ca5
S
5271 'GH': '154.160.0.0/12',
5272 'GI': '95.164.0.0/16',
773f291d
S
5273 'GL': '88.83.0.0/19',
5274 'GM': '160.182.0.0/15',
5275 'GN': '197.149.192.0/18',
5276 'GP': '104.250.0.0/19',
5277 'GQ': '105.235.224.0/20',
5278 'GR': '94.64.0.0/13',
5279 'GT': '168.234.0.0/16',
5280 'GU': '168.123.0.0/16',
5281 'GW': '197.214.80.0/20',
5282 'GY': '181.41.64.0/18',
5283 'HK': '113.252.0.0/14',
5284 'HN': '181.210.0.0/16',
5285 'HR': '93.136.0.0/13',
5286 'HT': '148.102.128.0/17',
5287 'HU': '84.0.0.0/14',
5288 'ID': '39.192.0.0/10',
5289 'IE': '87.32.0.0/12',
5290 'IL': '79.176.0.0/13',
5291 'IM': '5.62.80.0/20',
5292 'IN': '117.192.0.0/10',
5293 'IO': '203.83.48.0/21',
5294 'IQ': '37.236.0.0/14',
5295 'IR': '2.176.0.0/12',
5296 'IS': '82.221.0.0/16',
5297 'IT': '79.0.0.0/10',
5298 'JE': '87.244.64.0/18',
5299 'JM': '72.27.0.0/17',
5300 'JO': '176.29.0.0/16',
53896ca5 5301 'JP': '133.0.0.0/8',
773f291d
S
5302 'KE': '105.48.0.0/12',
5303 'KG': '158.181.128.0/17',
5304 'KH': '36.37.128.0/17',
5305 'KI': '103.25.140.0/22',
5306 'KM': '197.255.224.0/20',
53896ca5 5307 'KN': '198.167.192.0/19',
773f291d
S
5308 'KP': '175.45.176.0/22',
5309 'KR': '175.192.0.0/10',
5310 'KW': '37.36.0.0/14',
5311 'KY': '64.96.0.0/15',
5312 'KZ': '2.72.0.0/13',
5313 'LA': '115.84.64.0/18',
5314 'LB': '178.135.0.0/16',
53896ca5 5315 'LC': '24.92.144.0/20',
773f291d
S
5316 'LI': '82.117.0.0/19',
5317 'LK': '112.134.0.0/15',
53896ca5 5318 'LR': '102.183.0.0/16',
773f291d
S
5319 'LS': '129.232.0.0/17',
5320 'LT': '78.56.0.0/13',
5321 'LU': '188.42.0.0/16',
5322 'LV': '46.109.0.0/16',
5323 'LY': '41.252.0.0/14',
5324 'MA': '105.128.0.0/11',
5325 'MC': '88.209.64.0/18',
5326 'MD': '37.246.0.0/16',
5327 'ME': '178.175.0.0/17',
5328 'MF': '74.112.232.0/21',
5329 'MG': '154.126.0.0/17',
5330 'MH': '117.103.88.0/21',
5331 'MK': '77.28.0.0/15',
5332 'ML': '154.118.128.0/18',
5333 'MM': '37.111.0.0/17',
5334 'MN': '49.0.128.0/17',
5335 'MO': '60.246.0.0/16',
5336 'MP': '202.88.64.0/20',
5337 'MQ': '109.203.224.0/19',
5338 'MR': '41.188.64.0/18',
5339 'MS': '208.90.112.0/22',
5340 'MT': '46.11.0.0/16',
5341 'MU': '105.16.0.0/12',
5342 'MV': '27.114.128.0/18',
53896ca5 5343 'MW': '102.70.0.0/15',
773f291d
S
5344 'MX': '187.192.0.0/11',
5345 'MY': '175.136.0.0/13',
5346 'MZ': '197.218.0.0/15',
5347 'NA': '41.182.0.0/16',
5348 'NC': '101.101.0.0/18',
5349 'NE': '197.214.0.0/18',
5350 'NF': '203.17.240.0/22',
5351 'NG': '105.112.0.0/12',
5352 'NI': '186.76.0.0/15',
5353 'NL': '145.96.0.0/11',
5354 'NO': '84.208.0.0/13',
5355 'NP': '36.252.0.0/15',
5356 'NR': '203.98.224.0/19',
5357 'NU': '49.156.48.0/22',
5358 'NZ': '49.224.0.0/14',
5359 'OM': '5.36.0.0/15',
5360 'PA': '186.72.0.0/15',
5361 'PE': '186.160.0.0/14',
5362 'PF': '123.50.64.0/18',
5363 'PG': '124.240.192.0/19',
5364 'PH': '49.144.0.0/13',
5365 'PK': '39.32.0.0/11',
5366 'PL': '83.0.0.0/11',
5367 'PM': '70.36.0.0/20',
5368 'PR': '66.50.0.0/16',
5369 'PS': '188.161.0.0/16',
5370 'PT': '85.240.0.0/13',
5371 'PW': '202.124.224.0/20',
5372 'PY': '181.120.0.0/14',
5373 'QA': '37.210.0.0/15',
53896ca5 5374 'RE': '102.35.0.0/16',
773f291d 5375 'RO': '79.112.0.0/13',
53896ca5 5376 'RS': '93.86.0.0/15',
773f291d 5377 'RU': '5.136.0.0/13',
53896ca5 5378 'RW': '41.186.0.0/16',
773f291d
S
5379 'SA': '188.48.0.0/13',
5380 'SB': '202.1.160.0/19',
5381 'SC': '154.192.0.0/11',
53896ca5 5382 'SD': '102.120.0.0/13',
773f291d 5383 'SE': '78.64.0.0/12',
53896ca5 5384 'SG': '8.128.0.0/10',
773f291d
S
5385 'SI': '188.196.0.0/14',
5386 'SK': '78.98.0.0/15',
53896ca5 5387 'SL': '102.143.0.0/17',
773f291d
S
5388 'SM': '89.186.32.0/19',
5389 'SN': '41.82.0.0/15',
53896ca5 5390 'SO': '154.115.192.0/18',
773f291d
S
5391 'SR': '186.179.128.0/17',
5392 'SS': '105.235.208.0/21',
5393 'ST': '197.159.160.0/19',
5394 'SV': '168.243.0.0/16',
5395 'SX': '190.102.0.0/20',
5396 'SY': '5.0.0.0/16',
5397 'SZ': '41.84.224.0/19',
5398 'TC': '65.255.48.0/20',
5399 'TD': '154.68.128.0/19',
5400 'TG': '196.168.0.0/14',
5401 'TH': '171.96.0.0/13',
5402 'TJ': '85.9.128.0/18',
5403 'TK': '27.96.24.0/21',
5404 'TL': '180.189.160.0/20',
5405 'TM': '95.85.96.0/19',
5406 'TN': '197.0.0.0/11',
5407 'TO': '175.176.144.0/21',
5408 'TR': '78.160.0.0/11',
5409 'TT': '186.44.0.0/15',
5410 'TV': '202.2.96.0/19',
5411 'TW': '120.96.0.0/11',
5412 'TZ': '156.156.0.0/14',
53896ca5
S
5413 'UA': '37.52.0.0/14',
5414 'UG': '102.80.0.0/13',
5415 'US': '6.0.0.0/8',
773f291d 5416 'UY': '167.56.0.0/13',
53896ca5 5417 'UZ': '84.54.64.0/18',
773f291d 5418 'VA': '212.77.0.0/19',
53896ca5 5419 'VC': '207.191.240.0/21',
773f291d 5420 'VE': '186.88.0.0/13',
53896ca5 5421 'VG': '66.81.192.0/20',
773f291d
S
5422 'VI': '146.226.0.0/16',
5423 'VN': '14.160.0.0/11',
5424 'VU': '202.80.32.0/20',
5425 'WF': '117.20.32.0/21',
5426 'WS': '202.4.32.0/19',
5427 'YE': '134.35.0.0/16',
5428 'YT': '41.242.116.0/22',
5429 'ZA': '41.0.0.0/11',
53896ca5
S
5430 'ZM': '102.144.0.0/13',
5431 'ZW': '102.177.192.0/18',
773f291d
S
5432 }
5433
5434 @classmethod
5f95927a
S
5435 def random_ipv4(cls, code_or_block):
5436 if len(code_or_block) == 2:
5437 block = cls._country_ip_map.get(code_or_block.upper())
5438 if not block:
5439 return None
5440 else:
5441 block = code_or_block
773f291d
S
5442 addr, preflen = block.split('/')
5443 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5444 addr_max = addr_min | (0xffffffff >> int(preflen))
18a0defa 5445 return compat_str(socket.inet_ntoa(
4248dad9 5446 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
773f291d
S
5447
5448
91410c9b 5449class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
5450 def __init__(self, proxies=None):
5451 # Set default handlers
5452 for type in ('http', 'https'):
5453 setattr(self, '%s_open' % type,
5454 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5455 meth(r, proxy, type))
38e87f6c 5456 compat_urllib_request.ProxyHandler.__init__(self, proxies)
2461f79d 5457
91410c9b 5458 def proxy_open(self, req, proxy, type):
2461f79d 5459 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
5460 if req_proxy is not None:
5461 proxy = req_proxy
2461f79d
PH
5462 del req.headers['Ytdl-request-proxy']
5463
5464 if proxy == '__noproxy__':
5465 return None # No Proxy
51fb4995 5466 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
71aff188 5467 req.add_header('Ytdl-socks-proxy', proxy)
cefecac1 5468 # youtube-dlc's http/https handlers do wrapping the socket with socks
71aff188 5469 return None
91410c9b
PH
5470 return compat_urllib_request.ProxyHandler.proxy_open(
5471 self, req, proxy, type)
5bc880b9
YCH
5472
5473
0a5445dd
YCH
5474# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5475# released into Public Domain
5476# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5477
5478def long_to_bytes(n, blocksize=0):
5479 """long_to_bytes(n:long, blocksize:int) : string
5480 Convert a long integer to a byte string.
5481
5482 If optional blocksize is given and greater than zero, pad the front of the
5483 byte string with binary zeros so that the length is a multiple of
5484 blocksize.
5485 """
5486 # after much testing, this algorithm was deemed to be the fastest
5487 s = b''
5488 n = int(n)
5489 while n > 0:
5490 s = compat_struct_pack('>I', n & 0xffffffff) + s
5491 n = n >> 32
5492 # strip off leading zeros
5493 for i in range(len(s)):
5494 if s[i] != b'\000'[0]:
5495 break
5496 else:
5497 # only happens when n == 0
5498 s = b'\000'
5499 i = 0
5500 s = s[i:]
5501 # add back some pad bytes. this could be done more efficiently w.r.t. the
5502 # de-padding being done above, but sigh...
5503 if blocksize > 0 and len(s) % blocksize:
5504 s = (blocksize - len(s) % blocksize) * b'\000' + s
5505 return s
5506
5507
5508def bytes_to_long(s):
5509 """bytes_to_long(string) : long
5510 Convert a byte string to a long integer.
5511
5512 This is (essentially) the inverse of long_to_bytes().
5513 """
5514 acc = 0
5515 length = len(s)
5516 if length % 4:
5517 extra = (4 - length % 4)
5518 s = b'\000' * extra + s
5519 length = length + extra
5520 for i in range(0, length, 4):
5521 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5522 return acc
5523
5524
5bc880b9
YCH
5525def ohdave_rsa_encrypt(data, exponent, modulus):
5526 '''
5527 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5528
5529 Input:
5530 data: data to encrypt, bytes-like object
5531 exponent, modulus: parameter e and N of RSA algorithm, both integer
5532 Output: hex string of encrypted data
5533
5534 Limitation: supports one block encryption only
5535 '''
5536
5537 payload = int(binascii.hexlify(data[::-1]), 16)
5538 encrypted = pow(payload, exponent, modulus)
5539 return '%x' % encrypted
81bdc8fd
YCH
5540
5541
f48409c7
YCH
5542def pkcs1pad(data, length):
5543 """
5544 Padding input data with PKCS#1 scheme
5545
5546 @param {int[]} data input data
5547 @param {int} length target length
5548 @returns {int[]} padded data
5549 """
5550 if len(data) > length - 11:
5551 raise ValueError('Input data too long for PKCS#1 padding')
5552
5553 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5554 return [0, 2] + pseudo_random + [0] + data
5555
5556
5eb6bdce 5557def encode_base_n(num, n, table=None):
59f898b7 5558 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
59f898b7
YCH
5559 if not table:
5560 table = FULL_TABLE[:n]
5561
5eb6bdce
YCH
5562 if n > len(table):
5563 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5564
5565 if num == 0:
5566 return table[0]
5567
81bdc8fd
YCH
5568 ret = ''
5569 while num:
5570 ret = table[num % n] + ret
5571 num = num // n
5572 return ret
f52354a8
YCH
5573
5574
5575def decode_packed_codes(code):
06b3fe29 5576 mobj = re.search(PACKED_CODES_RE, code)
a0566bbf 5577 obfuscated_code, base, count, symbols = mobj.groups()
f52354a8
YCH
5578 base = int(base)
5579 count = int(count)
5580 symbols = symbols.split('|')
5581 symbol_table = {}
5582
5583 while count:
5584 count -= 1
5eb6bdce 5585 base_n_count = encode_base_n(count, base)
f52354a8
YCH
5586 symbol_table[base_n_count] = symbols[count] or base_n_count
5587
5588 return re.sub(
5589 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
a0566bbf 5590 obfuscated_code)
e154c651 5591
5592
1ced2221
S
5593def caesar(s, alphabet, shift):
5594 if shift == 0:
5595 return s
5596 l = len(alphabet)
5597 return ''.join(
5598 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5599 for c in s)
5600
5601
5602def rot47(s):
5603 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5604
5605
e154c651 5606def parse_m3u8_attributes(attrib):
5607 info = {}
5608 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5609 if val.startswith('"'):
5610 val = val[1:-1]
5611 info[key] = val
5612 return info
1143535d
YCH
5613
5614
5615def urshift(val, n):
5616 return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038
YCH
5617
5618
5619# Based on png2str() written by @gdkchan and improved by @yokrysty
067aa17e 5620# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
d3f8e038
YCH
5621def decode_png(png_data):
5622 # Reference: https://www.w3.org/TR/PNG/
5623 header = png_data[8:]
5624
5625 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5626 raise IOError('Not a valid PNG file.')
5627
5628 int_map = {1: '>B', 2: '>H', 4: '>I'}
5629 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5630
5631 chunks = []
5632
5633 while header:
5634 length = unpack_integer(header[:4])
5635 header = header[4:]
5636
5637 chunk_type = header[:4]
5638 header = header[4:]
5639
5640 chunk_data = header[:length]
5641 header = header[length:]
5642
5643 header = header[4:] # Skip CRC
5644
5645 chunks.append({
5646 'type': chunk_type,
5647 'length': length,
5648 'data': chunk_data
5649 })
5650
5651 ihdr = chunks[0]['data']
5652
5653 width = unpack_integer(ihdr[:4])
5654 height = unpack_integer(ihdr[4:8])
5655
5656 idat = b''
5657
5658 for chunk in chunks:
5659 if chunk['type'] == b'IDAT':
5660 idat += chunk['data']
5661
5662 if not idat:
5663 raise IOError('Unable to read PNG data.')
5664
5665 decompressed_data = bytearray(zlib.decompress(idat))
5666
5667 stride = width * 3
5668 pixels = []
5669
5670 def _get_pixel(idx):
5671 x = idx % stride
5672 y = idx // stride
5673 return pixels[y][x]
5674
5675 for y in range(height):
5676 basePos = y * (1 + stride)
5677 filter_type = decompressed_data[basePos]
5678
5679 current_row = []
5680
5681 pixels.append(current_row)
5682
5683 for x in range(stride):
5684 color = decompressed_data[1 + basePos + x]
5685 basex = y * stride + x
5686 left = 0
5687 up = 0
5688
5689 if x > 2:
5690 left = _get_pixel(basex - 3)
5691 if y > 0:
5692 up = _get_pixel(basex - stride)
5693
5694 if filter_type == 1: # Sub
5695 color = (color + left) & 0xff
5696 elif filter_type == 2: # Up
5697 color = (color + up) & 0xff
5698 elif filter_type == 3: # Average
5699 color = (color + ((left + up) >> 1)) & 0xff
5700 elif filter_type == 4: # Paeth
5701 a = left
5702 b = up
5703 c = 0
5704
5705 if x > 2 and y > 0:
5706 c = _get_pixel(basex - stride - 3)
5707
5708 p = a + b - c
5709
5710 pa = abs(p - a)
5711 pb = abs(p - b)
5712 pc = abs(p - c)
5713
5714 if pa <= pb and pa <= pc:
5715 color = (color + a) & 0xff
5716 elif pb <= pc:
5717 color = (color + b) & 0xff
5718 else:
5719 color = (color + c) & 0xff
5720
5721 current_row.append(color)
5722
5723 return width, height, pixels
efa97bdc
YCH
5724
5725
5726def write_xattr(path, key, value):
5727 # This mess below finds the best xattr tool for the job
5728 try:
5729 # try the pyxattr module...
5730 import xattr
5731
53a7e3d2
YCH
5732 if hasattr(xattr, 'set'): # pyxattr
5733 # Unicode arguments are not supported in python-pyxattr until
5734 # version 0.5.0
067aa17e 5735 # See https://github.com/ytdl-org/youtube-dl/issues/5498
53a7e3d2
YCH
5736 pyxattr_required_version = '0.5.0'
5737 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5738 # TODO: fallback to CLI tools
5739 raise XAttrUnavailableError(
5740 'python-pyxattr is detected but is too old. '
cefecac1 5741 'youtube-dlc requires %s or above while your version is %s. '
53a7e3d2
YCH
5742 'Falling back to other xattr implementations' % (
5743 pyxattr_required_version, xattr.__version__))
5744
5745 setxattr = xattr.set
5746 else: # xattr
5747 setxattr = xattr.setxattr
efa97bdc
YCH
5748
5749 try:
53a7e3d2 5750 setxattr(path, key, value)
efa97bdc
YCH
5751 except EnvironmentError as e:
5752 raise XAttrMetadataError(e.errno, e.strerror)
5753
5754 except ImportError:
5755 if compat_os_name == 'nt':
5756 # Write xattrs to NTFS Alternate Data Streams:
5757 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5758 assert ':' not in key
5759 assert os.path.exists(path)
5760
5761 ads_fn = path + ':' + key
5762 try:
5763 with open(ads_fn, 'wb') as f:
5764 f.write(value)
5765 except EnvironmentError as e:
5766 raise XAttrMetadataError(e.errno, e.strerror)
5767 else:
5768 user_has_setfattr = check_executable('setfattr', ['--version'])
5769 user_has_xattr = check_executable('xattr', ['-h'])
5770
5771 if user_has_setfattr or user_has_xattr:
5772
5773 value = value.decode('utf-8')
5774 if user_has_setfattr:
5775 executable = 'setfattr'
5776 opts = ['-n', key, '-v', value]
5777 elif user_has_xattr:
5778 executable = 'xattr'
5779 opts = ['-w', key, value]
5780
3089bc74
S
5781 cmd = ([encodeFilename(executable, True)]
5782 + [encodeArgument(o) for o in opts]
5783 + [encodeFilename(path, True)])
efa97bdc
YCH
5784
5785 try:
5786 p = subprocess.Popen(
5787 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5788 except EnvironmentError as e:
5789 raise XAttrMetadataError(e.errno, e.strerror)
f5b1bca9 5790 stdout, stderr = process_communicate_or_kill(p)
efa97bdc
YCH
5791 stderr = stderr.decode('utf-8', 'replace')
5792 if p.returncode != 0:
5793 raise XAttrMetadataError(p.returncode, stderr)
5794
5795 else:
5796 # On Unix, and can't find pyxattr, setfattr, or xattr.
5797 if sys.platform.startswith('linux'):
5798 raise XAttrUnavailableError(
5799 "Couldn't find a tool to set the xattrs. "
5800 "Install either the python 'pyxattr' or 'xattr' "
5801 "modules, or the GNU 'attr' package "
5802 "(which contains the 'setfattr' tool).")
5803 else:
5804 raise XAttrUnavailableError(
5805 "Couldn't find a tool to set the xattrs. "
5806 "Install either the python 'xattr' module, "
5807 "or the 'xattr' binary.")
0c265486
YCH
5808
5809
5810def random_birthday(year_field, month_field, day_field):
aa374bc7
AS
5811 start_date = datetime.date(1950, 1, 1)
5812 end_date = datetime.date(1995, 12, 31)
5813 offset = random.randint(0, (end_date - start_date).days)
5814 random_date = start_date + datetime.timedelta(offset)
0c265486 5815 return {
aa374bc7
AS
5816 year_field: str(random_date.year),
5817 month_field: str(random_date.month),
5818 day_field: str(random_date.day),
0c265486 5819 }
732044af 5820
c76eb41b 5821
732044af 5822# Templates for internet shortcut files, which are plain text files.
5823DOT_URL_LINK_TEMPLATE = '''
5824[InternetShortcut]
5825URL=%(url)s
5826'''.lstrip()
5827
5828DOT_WEBLOC_LINK_TEMPLATE = '''
5829<?xml version="1.0" encoding="UTF-8"?>
5830<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5831<plist version="1.0">
5832<dict>
5833\t<key>URL</key>
5834\t<string>%(url)s</string>
5835</dict>
5836</plist>
5837'''.lstrip()
5838
5839DOT_DESKTOP_LINK_TEMPLATE = '''
5840[Desktop Entry]
5841Encoding=UTF-8
5842Name=%(filename)s
5843Type=Link
5844URL=%(url)s
5845Icon=text-html
5846'''.lstrip()
5847
5848
5849def iri_to_uri(iri):
5850 """
5851 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5852
5853 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5854 """
5855
5856 iri_parts = compat_urllib_parse_urlparse(iri)
5857
5858 if '[' in iri_parts.netloc:
5859 raise ValueError('IPv6 URIs are not, yet, supported.')
5860 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5861
5862 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5863
5864 net_location = ''
5865 if iri_parts.username:
5866 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
5867 if iri_parts.password is not None:
5868 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
5869 net_location += '@'
5870
5871 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
5872 # The 'idna' encoding produces ASCII text.
5873 if iri_parts.port is not None and iri_parts.port != 80:
5874 net_location += ':' + str(iri_parts.port)
5875
5876 return compat_urllib_parse_urlunparse(
5877 (iri_parts.scheme,
5878 net_location,
5879
5880 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5881
5882 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5883 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5884
5885 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5886 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5887
5888 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5889
5890 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5891
5892
5893def to_high_limit_path(path):
5894 if sys.platform in ['win32', 'cygwin']:
5895 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5896 return r'\\?\ '.rstrip() + os.path.abspath(path)
5897
5898 return path
76d321f6 5899
c76eb41b 5900
76d321f6 5901def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
5902 val = obj.get(field, default)
5903 if func and val not in ignore:
5904 val = func(val)
5905 return template % val if val not in ignore else default
00dd0cd5 5906
5907
5908def clean_podcast_url(url):
5909 return re.sub(r'''(?x)
5910 (?:
5911 (?:
5912 chtbl\.com/track|
5913 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5914 play\.podtrac\.com
5915 )/[^/]+|
5916 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5917 flex\.acast\.com|
5918 pd(?:
5919 cn\.co| # https://podcorn.com/analytics-prefix/
5920 st\.fm # https://podsights.com/docs/
5921 )/e
5922 )/''', '', url)
ffcb8191
THD
5923
5924
5925_HEX_TABLE = '0123456789abcdef'
5926
5927
5928def random_uuidv4():
5929 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
0202b52a 5930
5931
5932def make_dir(path, to_screen=None):
5933 try:
5934 dn = os.path.dirname(path)
5935 if dn and not os.path.exists(dn):
5936 os.makedirs(dn)
5937 return True
5938 except (OSError, IOError) as err:
5939 if callable(to_screen) is not None:
5940 to_screen('unable to create directory ' + error_to_compat_str(err))
5941 return False
f74980cb 5942
5943
5944def get_executable_path():
5945 path = os.path.dirname(sys.argv[0])
e5813e53 5946 if os.path.basename(sys.argv[0]) == '__main__': # Running from source
f74980cb 5947 path = os.path.join(path, '..')
5948 return os.path.abspath(path)
5949
5950
5951def load_plugins(name, type, namespace):
5952 plugin_info = [None]
5953 classes = []
5954 try:
5955 plugin_info = imp.find_module(
5956 name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
5957 plugins = imp.load_module(name, *plugin_info)
5958 for name in dir(plugins):
5959 if not name.endswith(type):
5960 continue
5961 klass = getattr(plugins, name)
5962 classes.append(klass)
5963 namespace[name] = klass
5964 except ImportError:
5965 pass
5966 finally:
5967 if plugin_info[0] is not None:
5968 plugin_info[0].close()
5969 return classes
06167fbb 5970
5971
5972def traverse_dict(dictn, keys, casesense=True):
5973 if not isinstance(dictn, dict):
5974 return None
5975 first_key = keys[0]
5976 if not casesense:
5977 dictn = {key.lower(): val for key, val in dictn.items()}
5978 first_key = first_key.lower()
5979 value = dictn.get(first_key, None)
5980 return value if len(keys) < 2 else traverse_dict(value, keys[1:], casesense)