]> jfr.im git - yt-dlp.git/blame - yt_dlp/utils.py
Completely change project name to yt-dlp (#85)
[yt-dlp.git] / yt_dlp / utils.py
CommitLineData
d77c3dfd 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
d77c3dfd 3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
c380cc28 10import collections
62e609ab 11import contextlib
e3946f98 12import ctypes
c496ca96
PH
13import datetime
14import email.utils
0c265486 15import email.header
f45c185f 16import errno
be4a824d 17import functools
d77c3dfd 18import gzip
f74980cb 19import imp
03f9daab 20import io
79a2e94e 21import itertools
f4bfd65f 22import json
d77c3dfd 23import locale
02dbf93f 24import math
347de493 25import operator
d77c3dfd 26import os
c496ca96 27import platform
773f291d 28import random
d77c3dfd 29import re
c496ca96 30import socket
79a2e94e 31import ssl
1c088fa8 32import subprocess
d77c3dfd 33import sys
181c8655 34import tempfile
c380cc28 35import time
01951dda 36import traceback
bcf89ce6 37import xml.etree.ElementTree
d77c3dfd 38import zlib
d77c3dfd 39
8c25f81b 40from .compat import (
b4a3d461 41 compat_HTMLParseError,
8bb56eee 42 compat_HTMLParser,
8f9312c3 43 compat_basestring,
8c25f81b 44 compat_chr,
1bab3437 45 compat_cookiejar,
d7cd9a9e 46 compat_ctypes_WINFUNCTYPE,
36e6f62c 47 compat_etree_fromstring,
51098426 48 compat_expanduser,
8c25f81b 49 compat_html_entities,
55b2f099 50 compat_html_entities_html5,
be4a824d 51 compat_http_client,
42db58ec 52 compat_integer_types,
e29663c6 53 compat_numeric_types,
c86b6142 54 compat_kwargs,
efa97bdc 55 compat_os_name,
8c25f81b 56 compat_parse_qs,
702ccf2d 57 compat_shlex_quote,
8c25f81b 58 compat_str,
edaa23f8 59 compat_struct_pack,
d3f8e038 60 compat_struct_unpack,
8c25f81b
PH
61 compat_urllib_error,
62 compat_urllib_parse,
15707c7e 63 compat_urllib_parse_urlencode,
8c25f81b 64 compat_urllib_parse_urlparse,
732044af 65 compat_urllib_parse_urlunparse,
66 compat_urllib_parse_quote,
67 compat_urllib_parse_quote_plus,
7581bfc9 68 compat_urllib_parse_unquote_plus,
8c25f81b
PH
69 compat_urllib_request,
70 compat_urlparse,
810c10ba 71 compat_xpath,
8c25f81b 72)
4644ac55 73
71aff188
YCH
74from .socks import (
75 ProxyType,
76 sockssocket,
77)
78
4644ac55 79
51fb4995
YCH
80def register_socks_protocols():
81 # "Register" SOCKS protocols
d5ae6bb5
YCH
82 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
83 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
51fb4995
YCH
84 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
85 if scheme not in compat_urlparse.uses_netloc:
86 compat_urlparse.uses_netloc.append(scheme)
87
88
468e2e92
FV
89# This is not clearly defined otherwise
90compiled_regex_type = type(re.compile(''))
91
f7a147e3
S
92
93def random_user_agent():
94 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
95 _CHROME_VERSIONS = (
96 '74.0.3729.129',
97 '76.0.3780.3',
98 '76.0.3780.2',
99 '74.0.3729.128',
100 '76.0.3780.1',
101 '76.0.3780.0',
102 '75.0.3770.15',
103 '74.0.3729.127',
104 '74.0.3729.126',
105 '76.0.3779.1',
106 '76.0.3779.0',
107 '75.0.3770.14',
108 '74.0.3729.125',
109 '76.0.3778.1',
110 '76.0.3778.0',
111 '75.0.3770.13',
112 '74.0.3729.124',
113 '74.0.3729.123',
114 '73.0.3683.121',
115 '76.0.3777.1',
116 '76.0.3777.0',
117 '75.0.3770.12',
118 '74.0.3729.122',
119 '76.0.3776.4',
120 '75.0.3770.11',
121 '74.0.3729.121',
122 '76.0.3776.3',
123 '76.0.3776.2',
124 '73.0.3683.120',
125 '74.0.3729.120',
126 '74.0.3729.119',
127 '74.0.3729.118',
128 '76.0.3776.1',
129 '76.0.3776.0',
130 '76.0.3775.5',
131 '75.0.3770.10',
132 '74.0.3729.117',
133 '76.0.3775.4',
134 '76.0.3775.3',
135 '74.0.3729.116',
136 '75.0.3770.9',
137 '76.0.3775.2',
138 '76.0.3775.1',
139 '76.0.3775.0',
140 '75.0.3770.8',
141 '74.0.3729.115',
142 '74.0.3729.114',
143 '76.0.3774.1',
144 '76.0.3774.0',
145 '75.0.3770.7',
146 '74.0.3729.113',
147 '74.0.3729.112',
148 '74.0.3729.111',
149 '76.0.3773.1',
150 '76.0.3773.0',
151 '75.0.3770.6',
152 '74.0.3729.110',
153 '74.0.3729.109',
154 '76.0.3772.1',
155 '76.0.3772.0',
156 '75.0.3770.5',
157 '74.0.3729.108',
158 '74.0.3729.107',
159 '76.0.3771.1',
160 '76.0.3771.0',
161 '75.0.3770.4',
162 '74.0.3729.106',
163 '74.0.3729.105',
164 '75.0.3770.3',
165 '74.0.3729.104',
166 '74.0.3729.103',
167 '74.0.3729.102',
168 '75.0.3770.2',
169 '74.0.3729.101',
170 '75.0.3770.1',
171 '75.0.3770.0',
172 '74.0.3729.100',
173 '75.0.3769.5',
174 '75.0.3769.4',
175 '74.0.3729.99',
176 '75.0.3769.3',
177 '75.0.3769.2',
178 '75.0.3768.6',
179 '74.0.3729.98',
180 '75.0.3769.1',
181 '75.0.3769.0',
182 '74.0.3729.97',
183 '73.0.3683.119',
184 '73.0.3683.118',
185 '74.0.3729.96',
186 '75.0.3768.5',
187 '75.0.3768.4',
188 '75.0.3768.3',
189 '75.0.3768.2',
190 '74.0.3729.95',
191 '74.0.3729.94',
192 '75.0.3768.1',
193 '75.0.3768.0',
194 '74.0.3729.93',
195 '74.0.3729.92',
196 '73.0.3683.117',
197 '74.0.3729.91',
198 '75.0.3766.3',
199 '74.0.3729.90',
200 '75.0.3767.2',
201 '75.0.3767.1',
202 '75.0.3767.0',
203 '74.0.3729.89',
204 '73.0.3683.116',
205 '75.0.3766.2',
206 '74.0.3729.88',
207 '75.0.3766.1',
208 '75.0.3766.0',
209 '74.0.3729.87',
210 '73.0.3683.115',
211 '74.0.3729.86',
212 '75.0.3765.1',
213 '75.0.3765.0',
214 '74.0.3729.85',
215 '73.0.3683.114',
216 '74.0.3729.84',
217 '75.0.3764.1',
218 '75.0.3764.0',
219 '74.0.3729.83',
220 '73.0.3683.113',
221 '75.0.3763.2',
222 '75.0.3761.4',
223 '74.0.3729.82',
224 '75.0.3763.1',
225 '75.0.3763.0',
226 '74.0.3729.81',
227 '73.0.3683.112',
228 '75.0.3762.1',
229 '75.0.3762.0',
230 '74.0.3729.80',
231 '75.0.3761.3',
232 '74.0.3729.79',
233 '73.0.3683.111',
234 '75.0.3761.2',
235 '74.0.3729.78',
236 '74.0.3729.77',
237 '75.0.3761.1',
238 '75.0.3761.0',
239 '73.0.3683.110',
240 '74.0.3729.76',
241 '74.0.3729.75',
242 '75.0.3760.0',
243 '74.0.3729.74',
244 '75.0.3759.8',
245 '75.0.3759.7',
246 '75.0.3759.6',
247 '74.0.3729.73',
248 '75.0.3759.5',
249 '74.0.3729.72',
250 '73.0.3683.109',
251 '75.0.3759.4',
252 '75.0.3759.3',
253 '74.0.3729.71',
254 '75.0.3759.2',
255 '74.0.3729.70',
256 '73.0.3683.108',
257 '74.0.3729.69',
258 '75.0.3759.1',
259 '75.0.3759.0',
260 '74.0.3729.68',
261 '73.0.3683.107',
262 '74.0.3729.67',
263 '75.0.3758.1',
264 '75.0.3758.0',
265 '74.0.3729.66',
266 '73.0.3683.106',
267 '74.0.3729.65',
268 '75.0.3757.1',
269 '75.0.3757.0',
270 '74.0.3729.64',
271 '73.0.3683.105',
272 '74.0.3729.63',
273 '75.0.3756.1',
274 '75.0.3756.0',
275 '74.0.3729.62',
276 '73.0.3683.104',
277 '75.0.3755.3',
278 '75.0.3755.2',
279 '73.0.3683.103',
280 '75.0.3755.1',
281 '75.0.3755.0',
282 '74.0.3729.61',
283 '73.0.3683.102',
284 '74.0.3729.60',
285 '75.0.3754.2',
286 '74.0.3729.59',
287 '75.0.3753.4',
288 '74.0.3729.58',
289 '75.0.3754.1',
290 '75.0.3754.0',
291 '74.0.3729.57',
292 '73.0.3683.101',
293 '75.0.3753.3',
294 '75.0.3752.2',
295 '75.0.3753.2',
296 '74.0.3729.56',
297 '75.0.3753.1',
298 '75.0.3753.0',
299 '74.0.3729.55',
300 '73.0.3683.100',
301 '74.0.3729.54',
302 '75.0.3752.1',
303 '75.0.3752.0',
304 '74.0.3729.53',
305 '73.0.3683.99',
306 '74.0.3729.52',
307 '75.0.3751.1',
308 '75.0.3751.0',
309 '74.0.3729.51',
310 '73.0.3683.98',
311 '74.0.3729.50',
312 '75.0.3750.0',
313 '74.0.3729.49',
314 '74.0.3729.48',
315 '74.0.3729.47',
316 '75.0.3749.3',
317 '74.0.3729.46',
318 '73.0.3683.97',
319 '75.0.3749.2',
320 '74.0.3729.45',
321 '75.0.3749.1',
322 '75.0.3749.0',
323 '74.0.3729.44',
324 '73.0.3683.96',
325 '74.0.3729.43',
326 '74.0.3729.42',
327 '75.0.3748.1',
328 '75.0.3748.0',
329 '74.0.3729.41',
330 '75.0.3747.1',
331 '73.0.3683.95',
332 '75.0.3746.4',
333 '74.0.3729.40',
334 '74.0.3729.39',
335 '75.0.3747.0',
336 '75.0.3746.3',
337 '75.0.3746.2',
338 '74.0.3729.38',
339 '75.0.3746.1',
340 '75.0.3746.0',
341 '74.0.3729.37',
342 '73.0.3683.94',
343 '75.0.3745.5',
344 '75.0.3745.4',
345 '75.0.3745.3',
346 '75.0.3745.2',
347 '74.0.3729.36',
348 '75.0.3745.1',
349 '75.0.3745.0',
350 '75.0.3744.2',
351 '74.0.3729.35',
352 '73.0.3683.93',
353 '74.0.3729.34',
354 '75.0.3744.1',
355 '75.0.3744.0',
356 '74.0.3729.33',
357 '73.0.3683.92',
358 '74.0.3729.32',
359 '74.0.3729.31',
360 '73.0.3683.91',
361 '75.0.3741.2',
362 '75.0.3740.5',
363 '74.0.3729.30',
364 '75.0.3741.1',
365 '75.0.3741.0',
366 '74.0.3729.29',
367 '75.0.3740.4',
368 '73.0.3683.90',
369 '74.0.3729.28',
370 '75.0.3740.3',
371 '73.0.3683.89',
372 '75.0.3740.2',
373 '74.0.3729.27',
374 '75.0.3740.1',
375 '75.0.3740.0',
376 '74.0.3729.26',
377 '73.0.3683.88',
378 '73.0.3683.87',
379 '74.0.3729.25',
380 '75.0.3739.1',
381 '75.0.3739.0',
382 '73.0.3683.86',
383 '74.0.3729.24',
384 '73.0.3683.85',
385 '75.0.3738.4',
386 '75.0.3738.3',
387 '75.0.3738.2',
388 '75.0.3738.1',
389 '75.0.3738.0',
390 '74.0.3729.23',
391 '73.0.3683.84',
392 '74.0.3729.22',
393 '74.0.3729.21',
394 '75.0.3737.1',
395 '75.0.3737.0',
396 '74.0.3729.20',
397 '73.0.3683.83',
398 '74.0.3729.19',
399 '75.0.3736.1',
400 '75.0.3736.0',
401 '74.0.3729.18',
402 '73.0.3683.82',
403 '74.0.3729.17',
404 '75.0.3735.1',
405 '75.0.3735.0',
406 '74.0.3729.16',
407 '73.0.3683.81',
408 '75.0.3734.1',
409 '75.0.3734.0',
410 '74.0.3729.15',
411 '73.0.3683.80',
412 '74.0.3729.14',
413 '75.0.3733.1',
414 '75.0.3733.0',
415 '75.0.3732.1',
416 '74.0.3729.13',
417 '74.0.3729.12',
418 '73.0.3683.79',
419 '74.0.3729.11',
420 '75.0.3732.0',
421 '74.0.3729.10',
422 '73.0.3683.78',
423 '74.0.3729.9',
424 '74.0.3729.8',
425 '74.0.3729.7',
426 '75.0.3731.3',
427 '75.0.3731.2',
428 '75.0.3731.0',
429 '74.0.3729.6',
430 '73.0.3683.77',
431 '73.0.3683.76',
432 '75.0.3730.5',
433 '75.0.3730.4',
434 '73.0.3683.75',
435 '74.0.3729.5',
436 '73.0.3683.74',
437 '75.0.3730.3',
438 '75.0.3730.2',
439 '74.0.3729.4',
440 '73.0.3683.73',
441 '73.0.3683.72',
442 '75.0.3730.1',
443 '75.0.3730.0',
444 '74.0.3729.3',
445 '73.0.3683.71',
446 '74.0.3729.2',
447 '73.0.3683.70',
448 '74.0.3729.1',
449 '74.0.3729.0',
450 '74.0.3726.4',
451 '73.0.3683.69',
452 '74.0.3726.3',
453 '74.0.3728.0',
454 '74.0.3726.2',
455 '73.0.3683.68',
456 '74.0.3726.1',
457 '74.0.3726.0',
458 '74.0.3725.4',
459 '73.0.3683.67',
460 '73.0.3683.66',
461 '74.0.3725.3',
462 '74.0.3725.2',
463 '74.0.3725.1',
464 '74.0.3724.8',
465 '74.0.3725.0',
466 '73.0.3683.65',
467 '74.0.3724.7',
468 '74.0.3724.6',
469 '74.0.3724.5',
470 '74.0.3724.4',
471 '74.0.3724.3',
472 '74.0.3724.2',
473 '74.0.3724.1',
474 '74.0.3724.0',
475 '73.0.3683.64',
476 '74.0.3723.1',
477 '74.0.3723.0',
478 '73.0.3683.63',
479 '74.0.3722.1',
480 '74.0.3722.0',
481 '73.0.3683.62',
482 '74.0.3718.9',
483 '74.0.3702.3',
484 '74.0.3721.3',
485 '74.0.3721.2',
486 '74.0.3721.1',
487 '74.0.3721.0',
488 '74.0.3720.6',
489 '73.0.3683.61',
490 '72.0.3626.122',
491 '73.0.3683.60',
492 '74.0.3720.5',
493 '72.0.3626.121',
494 '74.0.3718.8',
495 '74.0.3720.4',
496 '74.0.3720.3',
497 '74.0.3718.7',
498 '74.0.3720.2',
499 '74.0.3720.1',
500 '74.0.3720.0',
501 '74.0.3718.6',
502 '74.0.3719.5',
503 '73.0.3683.59',
504 '74.0.3718.5',
505 '74.0.3718.4',
506 '74.0.3719.4',
507 '74.0.3719.3',
508 '74.0.3719.2',
509 '74.0.3719.1',
510 '73.0.3683.58',
511 '74.0.3719.0',
512 '73.0.3683.57',
513 '73.0.3683.56',
514 '74.0.3718.3',
515 '73.0.3683.55',
516 '74.0.3718.2',
517 '74.0.3718.1',
518 '74.0.3718.0',
519 '73.0.3683.54',
520 '74.0.3717.2',
521 '73.0.3683.53',
522 '74.0.3717.1',
523 '74.0.3717.0',
524 '73.0.3683.52',
525 '74.0.3716.1',
526 '74.0.3716.0',
527 '73.0.3683.51',
528 '74.0.3715.1',
529 '74.0.3715.0',
530 '73.0.3683.50',
531 '74.0.3711.2',
532 '74.0.3714.2',
533 '74.0.3713.3',
534 '74.0.3714.1',
535 '74.0.3714.0',
536 '73.0.3683.49',
537 '74.0.3713.1',
538 '74.0.3713.0',
539 '72.0.3626.120',
540 '73.0.3683.48',
541 '74.0.3712.2',
542 '74.0.3712.1',
543 '74.0.3712.0',
544 '73.0.3683.47',
545 '72.0.3626.119',
546 '73.0.3683.46',
547 '74.0.3710.2',
548 '72.0.3626.118',
549 '74.0.3711.1',
550 '74.0.3711.0',
551 '73.0.3683.45',
552 '72.0.3626.117',
553 '74.0.3710.1',
554 '74.0.3710.0',
555 '73.0.3683.44',
556 '72.0.3626.116',
557 '74.0.3709.1',
558 '74.0.3709.0',
559 '74.0.3704.9',
560 '73.0.3683.43',
561 '72.0.3626.115',
562 '74.0.3704.8',
563 '74.0.3704.7',
564 '74.0.3708.0',
565 '74.0.3706.7',
566 '74.0.3704.6',
567 '73.0.3683.42',
568 '72.0.3626.114',
569 '74.0.3706.6',
570 '72.0.3626.113',
571 '74.0.3704.5',
572 '74.0.3706.5',
573 '74.0.3706.4',
574 '74.0.3706.3',
575 '74.0.3706.2',
576 '74.0.3706.1',
577 '74.0.3706.0',
578 '73.0.3683.41',
579 '72.0.3626.112',
580 '74.0.3705.1',
581 '74.0.3705.0',
582 '73.0.3683.40',
583 '72.0.3626.111',
584 '73.0.3683.39',
585 '74.0.3704.4',
586 '73.0.3683.38',
587 '74.0.3704.3',
588 '74.0.3704.2',
589 '74.0.3704.1',
590 '74.0.3704.0',
591 '73.0.3683.37',
592 '72.0.3626.110',
593 '72.0.3626.109',
594 '74.0.3703.3',
595 '74.0.3703.2',
596 '73.0.3683.36',
597 '74.0.3703.1',
598 '74.0.3703.0',
599 '73.0.3683.35',
600 '72.0.3626.108',
601 '74.0.3702.2',
602 '74.0.3699.3',
603 '74.0.3702.1',
604 '74.0.3702.0',
605 '73.0.3683.34',
606 '72.0.3626.107',
607 '73.0.3683.33',
608 '74.0.3701.1',
609 '74.0.3701.0',
610 '73.0.3683.32',
611 '73.0.3683.31',
612 '72.0.3626.105',
613 '74.0.3700.1',
614 '74.0.3700.0',
615 '73.0.3683.29',
616 '72.0.3626.103',
617 '74.0.3699.2',
618 '74.0.3699.1',
619 '74.0.3699.0',
620 '73.0.3683.28',
621 '72.0.3626.102',
622 '73.0.3683.27',
623 '73.0.3683.26',
624 '74.0.3698.0',
625 '74.0.3696.2',
626 '72.0.3626.101',
627 '73.0.3683.25',
628 '74.0.3696.1',
629 '74.0.3696.0',
630 '74.0.3694.8',
631 '72.0.3626.100',
632 '74.0.3694.7',
633 '74.0.3694.6',
634 '74.0.3694.5',
635 '74.0.3694.4',
636 '72.0.3626.99',
637 '72.0.3626.98',
638 '74.0.3694.3',
639 '73.0.3683.24',
640 '72.0.3626.97',
641 '72.0.3626.96',
642 '72.0.3626.95',
643 '73.0.3683.23',
644 '72.0.3626.94',
645 '73.0.3683.22',
646 '73.0.3683.21',
647 '72.0.3626.93',
648 '74.0.3694.2',
649 '72.0.3626.92',
650 '74.0.3694.1',
651 '74.0.3694.0',
652 '74.0.3693.6',
653 '73.0.3683.20',
654 '72.0.3626.91',
655 '74.0.3693.5',
656 '74.0.3693.4',
657 '74.0.3693.3',
658 '74.0.3693.2',
659 '73.0.3683.19',
660 '74.0.3693.1',
661 '74.0.3693.0',
662 '73.0.3683.18',
663 '72.0.3626.90',
664 '74.0.3692.1',
665 '74.0.3692.0',
666 '73.0.3683.17',
667 '72.0.3626.89',
668 '74.0.3687.3',
669 '74.0.3691.1',
670 '74.0.3691.0',
671 '73.0.3683.16',
672 '72.0.3626.88',
673 '72.0.3626.87',
674 '73.0.3683.15',
675 '74.0.3690.1',
676 '74.0.3690.0',
677 '73.0.3683.14',
678 '72.0.3626.86',
679 '73.0.3683.13',
680 '73.0.3683.12',
681 '74.0.3689.1',
682 '74.0.3689.0',
683 '73.0.3683.11',
684 '72.0.3626.85',
685 '73.0.3683.10',
686 '72.0.3626.84',
687 '73.0.3683.9',
688 '74.0.3688.1',
689 '74.0.3688.0',
690 '73.0.3683.8',
691 '72.0.3626.83',
692 '74.0.3687.2',
693 '74.0.3687.1',
694 '74.0.3687.0',
695 '73.0.3683.7',
696 '72.0.3626.82',
697 '74.0.3686.4',
698 '72.0.3626.81',
699 '74.0.3686.3',
700 '74.0.3686.2',
701 '74.0.3686.1',
702 '74.0.3686.0',
703 '73.0.3683.6',
704 '72.0.3626.80',
705 '74.0.3685.1',
706 '74.0.3685.0',
707 '73.0.3683.5',
708 '72.0.3626.79',
709 '74.0.3684.1',
710 '74.0.3684.0',
711 '73.0.3683.4',
712 '72.0.3626.78',
713 '72.0.3626.77',
714 '73.0.3683.3',
715 '73.0.3683.2',
716 '72.0.3626.76',
717 '73.0.3683.1',
718 '73.0.3683.0',
719 '72.0.3626.75',
720 '71.0.3578.141',
721 '73.0.3682.1',
722 '73.0.3682.0',
723 '72.0.3626.74',
724 '71.0.3578.140',
725 '73.0.3681.4',
726 '73.0.3681.3',
727 '73.0.3681.2',
728 '73.0.3681.1',
729 '73.0.3681.0',
730 '72.0.3626.73',
731 '71.0.3578.139',
732 '72.0.3626.72',
733 '72.0.3626.71',
734 '73.0.3680.1',
735 '73.0.3680.0',
736 '72.0.3626.70',
737 '71.0.3578.138',
738 '73.0.3678.2',
739 '73.0.3679.1',
740 '73.0.3679.0',
741 '72.0.3626.69',
742 '71.0.3578.137',
743 '73.0.3678.1',
744 '73.0.3678.0',
745 '71.0.3578.136',
746 '73.0.3677.1',
747 '73.0.3677.0',
748 '72.0.3626.68',
749 '72.0.3626.67',
750 '71.0.3578.135',
751 '73.0.3676.1',
752 '73.0.3676.0',
753 '73.0.3674.2',
754 '72.0.3626.66',
755 '71.0.3578.134',
756 '73.0.3674.1',
757 '73.0.3674.0',
758 '72.0.3626.65',
759 '71.0.3578.133',
760 '73.0.3673.2',
761 '73.0.3673.1',
762 '73.0.3673.0',
763 '72.0.3626.64',
764 '71.0.3578.132',
765 '72.0.3626.63',
766 '72.0.3626.62',
767 '72.0.3626.61',
768 '72.0.3626.60',
769 '73.0.3672.1',
770 '73.0.3672.0',
771 '72.0.3626.59',
772 '71.0.3578.131',
773 '73.0.3671.3',
774 '73.0.3671.2',
775 '73.0.3671.1',
776 '73.0.3671.0',
777 '72.0.3626.58',
778 '71.0.3578.130',
779 '73.0.3670.1',
780 '73.0.3670.0',
781 '72.0.3626.57',
782 '71.0.3578.129',
783 '73.0.3669.1',
784 '73.0.3669.0',
785 '72.0.3626.56',
786 '71.0.3578.128',
787 '73.0.3668.2',
788 '73.0.3668.1',
789 '73.0.3668.0',
790 '72.0.3626.55',
791 '71.0.3578.127',
792 '73.0.3667.2',
793 '73.0.3667.1',
794 '73.0.3667.0',
795 '72.0.3626.54',
796 '71.0.3578.126',
797 '73.0.3666.1',
798 '73.0.3666.0',
799 '72.0.3626.53',
800 '71.0.3578.125',
801 '73.0.3665.4',
802 '73.0.3665.3',
803 '72.0.3626.52',
804 '73.0.3665.2',
805 '73.0.3664.4',
806 '73.0.3665.1',
807 '73.0.3665.0',
808 '72.0.3626.51',
809 '71.0.3578.124',
810 '72.0.3626.50',
811 '73.0.3664.3',
812 '73.0.3664.2',
813 '73.0.3664.1',
814 '73.0.3664.0',
815 '73.0.3663.2',
816 '72.0.3626.49',
817 '71.0.3578.123',
818 '73.0.3663.1',
819 '73.0.3663.0',
820 '72.0.3626.48',
821 '71.0.3578.122',
822 '73.0.3662.1',
823 '73.0.3662.0',
824 '72.0.3626.47',
825 '71.0.3578.121',
826 '73.0.3661.1',
827 '72.0.3626.46',
828 '73.0.3661.0',
829 '72.0.3626.45',
830 '71.0.3578.120',
831 '73.0.3660.2',
832 '73.0.3660.1',
833 '73.0.3660.0',
834 '72.0.3626.44',
835 '71.0.3578.119',
836 '73.0.3659.1',
837 '73.0.3659.0',
838 '72.0.3626.43',
839 '71.0.3578.118',
840 '73.0.3658.1',
841 '73.0.3658.0',
842 '72.0.3626.42',
843 '71.0.3578.117',
844 '73.0.3657.1',
845 '73.0.3657.0',
846 '72.0.3626.41',
847 '71.0.3578.116',
848 '73.0.3656.1',
849 '73.0.3656.0',
850 '72.0.3626.40',
851 '71.0.3578.115',
852 '73.0.3655.1',
853 '73.0.3655.0',
854 '72.0.3626.39',
855 '71.0.3578.114',
856 '73.0.3654.1',
857 '73.0.3654.0',
858 '72.0.3626.38',
859 '71.0.3578.113',
860 '73.0.3653.1',
861 '73.0.3653.0',
862 '72.0.3626.37',
863 '71.0.3578.112',
864 '73.0.3652.1',
865 '73.0.3652.0',
866 '72.0.3626.36',
867 '71.0.3578.111',
868 '73.0.3651.1',
869 '73.0.3651.0',
870 '72.0.3626.35',
871 '71.0.3578.110',
872 '73.0.3650.1',
873 '73.0.3650.0',
874 '72.0.3626.34',
875 '71.0.3578.109',
876 '73.0.3649.1',
877 '73.0.3649.0',
878 '72.0.3626.33',
879 '71.0.3578.108',
880 '73.0.3648.2',
881 '73.0.3648.1',
882 '73.0.3648.0',
883 '72.0.3626.32',
884 '71.0.3578.107',
885 '73.0.3647.2',
886 '73.0.3647.1',
887 '73.0.3647.0',
888 '72.0.3626.31',
889 '71.0.3578.106',
890 '73.0.3635.3',
891 '73.0.3646.2',
892 '73.0.3646.1',
893 '73.0.3646.0',
894 '72.0.3626.30',
895 '71.0.3578.105',
896 '72.0.3626.29',
897 '73.0.3645.2',
898 '73.0.3645.1',
899 '73.0.3645.0',
900 '72.0.3626.28',
901 '71.0.3578.104',
902 '72.0.3626.27',
903 '72.0.3626.26',
904 '72.0.3626.25',
905 '72.0.3626.24',
906 '73.0.3644.0',
907 '73.0.3643.2',
908 '72.0.3626.23',
909 '71.0.3578.103',
910 '73.0.3643.1',
911 '73.0.3643.0',
912 '72.0.3626.22',
913 '71.0.3578.102',
914 '73.0.3642.1',
915 '73.0.3642.0',
916 '72.0.3626.21',
917 '71.0.3578.101',
918 '73.0.3641.1',
919 '73.0.3641.0',
920 '72.0.3626.20',
921 '71.0.3578.100',
922 '72.0.3626.19',
923 '73.0.3640.1',
924 '73.0.3640.0',
925 '72.0.3626.18',
926 '73.0.3639.1',
927 '71.0.3578.99',
928 '73.0.3639.0',
929 '72.0.3626.17',
930 '73.0.3638.2',
931 '72.0.3626.16',
932 '73.0.3638.1',
933 '73.0.3638.0',
934 '72.0.3626.15',
935 '71.0.3578.98',
936 '73.0.3635.2',
937 '71.0.3578.97',
938 '73.0.3637.1',
939 '73.0.3637.0',
940 '72.0.3626.14',
941 '71.0.3578.96',
942 '71.0.3578.95',
943 '72.0.3626.13',
944 '71.0.3578.94',
945 '73.0.3636.2',
946 '71.0.3578.93',
947 '73.0.3636.1',
948 '73.0.3636.0',
949 '72.0.3626.12',
950 '71.0.3578.92',
951 '73.0.3635.1',
952 '73.0.3635.0',
953 '72.0.3626.11',
954 '71.0.3578.91',
955 '73.0.3634.2',
956 '73.0.3634.1',
957 '73.0.3634.0',
958 '72.0.3626.10',
959 '71.0.3578.90',
960 '71.0.3578.89',
961 '73.0.3633.2',
962 '73.0.3633.1',
963 '73.0.3633.0',
964 '72.0.3610.4',
965 '72.0.3626.9',
966 '71.0.3578.88',
967 '73.0.3632.5',
968 '73.0.3632.4',
969 '73.0.3632.3',
970 '73.0.3632.2',
971 '73.0.3632.1',
972 '73.0.3632.0',
973 '72.0.3626.8',
974 '71.0.3578.87',
975 '73.0.3631.2',
976 '73.0.3631.1',
977 '73.0.3631.0',
978 '72.0.3626.7',
979 '71.0.3578.86',
980 '72.0.3626.6',
981 '73.0.3630.1',
982 '73.0.3630.0',
983 '72.0.3626.5',
984 '71.0.3578.85',
985 '72.0.3626.4',
986 '73.0.3628.3',
987 '73.0.3628.2',
988 '73.0.3629.1',
989 '73.0.3629.0',
990 '72.0.3626.3',
991 '71.0.3578.84',
992 '73.0.3628.1',
993 '73.0.3628.0',
994 '71.0.3578.83',
995 '73.0.3627.1',
996 '73.0.3627.0',
997 '72.0.3626.2',
998 '71.0.3578.82',
999 '71.0.3578.81',
1000 '71.0.3578.80',
1001 '72.0.3626.1',
1002 '72.0.3626.0',
1003 '71.0.3578.79',
1004 '70.0.3538.124',
1005 '71.0.3578.78',
1006 '72.0.3623.4',
1007 '72.0.3625.2',
1008 '72.0.3625.1',
1009 '72.0.3625.0',
1010 '71.0.3578.77',
1011 '70.0.3538.123',
1012 '72.0.3624.4',
1013 '72.0.3624.3',
1014 '72.0.3624.2',
1015 '71.0.3578.76',
1016 '72.0.3624.1',
1017 '72.0.3624.0',
1018 '72.0.3623.3',
1019 '71.0.3578.75',
1020 '70.0.3538.122',
1021 '71.0.3578.74',
1022 '72.0.3623.2',
1023 '72.0.3610.3',
1024 '72.0.3623.1',
1025 '72.0.3623.0',
1026 '72.0.3622.3',
1027 '72.0.3622.2',
1028 '71.0.3578.73',
1029 '70.0.3538.121',
1030 '72.0.3622.1',
1031 '72.0.3622.0',
1032 '71.0.3578.72',
1033 '70.0.3538.120',
1034 '72.0.3621.1',
1035 '72.0.3621.0',
1036 '71.0.3578.71',
1037 '70.0.3538.119',
1038 '72.0.3620.1',
1039 '72.0.3620.0',
1040 '71.0.3578.70',
1041 '70.0.3538.118',
1042 '71.0.3578.69',
1043 '72.0.3619.1',
1044 '72.0.3619.0',
1045 '71.0.3578.68',
1046 '70.0.3538.117',
1047 '71.0.3578.67',
1048 '72.0.3618.1',
1049 '72.0.3618.0',
1050 '71.0.3578.66',
1051 '70.0.3538.116',
1052 '72.0.3617.1',
1053 '72.0.3617.0',
1054 '71.0.3578.65',
1055 '70.0.3538.115',
1056 '72.0.3602.3',
1057 '71.0.3578.64',
1058 '72.0.3616.1',
1059 '72.0.3616.0',
1060 '71.0.3578.63',
1061 '70.0.3538.114',
1062 '71.0.3578.62',
1063 '72.0.3615.1',
1064 '72.0.3615.0',
1065 '71.0.3578.61',
1066 '70.0.3538.113',
1067 '72.0.3614.1',
1068 '72.0.3614.0',
1069 '71.0.3578.60',
1070 '70.0.3538.112',
1071 '72.0.3613.1',
1072 '72.0.3613.0',
1073 '71.0.3578.59',
1074 '70.0.3538.111',
1075 '72.0.3612.2',
1076 '72.0.3612.1',
1077 '72.0.3612.0',
1078 '70.0.3538.110',
1079 '71.0.3578.58',
1080 '70.0.3538.109',
1081 '72.0.3611.2',
1082 '72.0.3611.1',
1083 '72.0.3611.0',
1084 '71.0.3578.57',
1085 '70.0.3538.108',
1086 '72.0.3610.2',
1087 '71.0.3578.56',
1088 '71.0.3578.55',
1089 '72.0.3610.1',
1090 '72.0.3610.0',
1091 '71.0.3578.54',
1092 '70.0.3538.107',
1093 '71.0.3578.53',
1094 '72.0.3609.3',
1095 '71.0.3578.52',
1096 '72.0.3609.2',
1097 '71.0.3578.51',
1098 '72.0.3608.5',
1099 '72.0.3609.1',
1100 '72.0.3609.0',
1101 '71.0.3578.50',
1102 '70.0.3538.106',
1103 '72.0.3608.4',
1104 '72.0.3608.3',
1105 '72.0.3608.2',
1106 '71.0.3578.49',
1107 '72.0.3608.1',
1108 '72.0.3608.0',
1109 '70.0.3538.105',
1110 '71.0.3578.48',
1111 '72.0.3607.1',
1112 '72.0.3607.0',
1113 '71.0.3578.47',
1114 '70.0.3538.104',
1115 '72.0.3606.2',
1116 '72.0.3606.1',
1117 '72.0.3606.0',
1118 '71.0.3578.46',
1119 '70.0.3538.103',
1120 '70.0.3538.102',
1121 '72.0.3605.3',
1122 '72.0.3605.2',
1123 '72.0.3605.1',
1124 '72.0.3605.0',
1125 '71.0.3578.45',
1126 '70.0.3538.101',
1127 '71.0.3578.44',
1128 '71.0.3578.43',
1129 '70.0.3538.100',
1130 '70.0.3538.99',
1131 '71.0.3578.42',
1132 '72.0.3604.1',
1133 '72.0.3604.0',
1134 '71.0.3578.41',
1135 '70.0.3538.98',
1136 '71.0.3578.40',
1137 '72.0.3603.2',
1138 '72.0.3603.1',
1139 '72.0.3603.0',
1140 '71.0.3578.39',
1141 '70.0.3538.97',
1142 '72.0.3602.2',
1143 '71.0.3578.38',
1144 '71.0.3578.37',
1145 '72.0.3602.1',
1146 '72.0.3602.0',
1147 '71.0.3578.36',
1148 '70.0.3538.96',
1149 '72.0.3601.1',
1150 '72.0.3601.0',
1151 '71.0.3578.35',
1152 '70.0.3538.95',
1153 '72.0.3600.1',
1154 '72.0.3600.0',
1155 '71.0.3578.34',
1156 '70.0.3538.94',
1157 '72.0.3599.3',
1158 '72.0.3599.2',
1159 '72.0.3599.1',
1160 '72.0.3599.0',
1161 '71.0.3578.33',
1162 '70.0.3538.93',
1163 '72.0.3598.1',
1164 '72.0.3598.0',
1165 '71.0.3578.32',
1166 '70.0.3538.87',
1167 '72.0.3597.1',
1168 '72.0.3597.0',
1169 '72.0.3596.2',
1170 '71.0.3578.31',
1171 '70.0.3538.86',
1172 '71.0.3578.30',
1173 '71.0.3578.29',
1174 '72.0.3596.1',
1175 '72.0.3596.0',
1176 '71.0.3578.28',
1177 '70.0.3538.85',
1178 '72.0.3595.2',
1179 '72.0.3591.3',
1180 '72.0.3595.1',
1181 '72.0.3595.0',
1182 '71.0.3578.27',
1183 '70.0.3538.84',
1184 '72.0.3594.1',
1185 '72.0.3594.0',
1186 '71.0.3578.26',
1187 '70.0.3538.83',
1188 '72.0.3593.2',
1189 '72.0.3593.1',
1190 '72.0.3593.0',
1191 '71.0.3578.25',
1192 '70.0.3538.82',
1193 '72.0.3589.3',
1194 '72.0.3592.2',
1195 '72.0.3592.1',
1196 '72.0.3592.0',
1197 '71.0.3578.24',
1198 '72.0.3589.2',
1199 '70.0.3538.81',
1200 '70.0.3538.80',
1201 '72.0.3591.2',
1202 '72.0.3591.1',
1203 '72.0.3591.0',
1204 '71.0.3578.23',
1205 '70.0.3538.79',
1206 '71.0.3578.22',
1207 '72.0.3590.1',
1208 '72.0.3590.0',
1209 '71.0.3578.21',
1210 '70.0.3538.78',
1211 '70.0.3538.77',
1212 '72.0.3589.1',
1213 '72.0.3589.0',
1214 '71.0.3578.20',
1215 '70.0.3538.76',
1216 '71.0.3578.19',
1217 '70.0.3538.75',
1218 '72.0.3588.1',
1219 '72.0.3588.0',
1220 '71.0.3578.18',
1221 '70.0.3538.74',
1222 '72.0.3586.2',
1223 '72.0.3587.0',
1224 '71.0.3578.17',
1225 '70.0.3538.73',
1226 '72.0.3586.1',
1227 '72.0.3586.0',
1228 '71.0.3578.16',
1229 '70.0.3538.72',
1230 '72.0.3585.1',
1231 '72.0.3585.0',
1232 '71.0.3578.15',
1233 '70.0.3538.71',
1234 '71.0.3578.14',
1235 '72.0.3584.1',
1236 '72.0.3584.0',
1237 '71.0.3578.13',
1238 '70.0.3538.70',
1239 '72.0.3583.2',
1240 '71.0.3578.12',
1241 '72.0.3583.1',
1242 '72.0.3583.0',
1243 '71.0.3578.11',
1244 '70.0.3538.69',
1245 '71.0.3578.10',
1246 '72.0.3582.0',
1247 '72.0.3581.4',
1248 '71.0.3578.9',
1249 '70.0.3538.67',
1250 '72.0.3581.3',
1251 '72.0.3581.2',
1252 '72.0.3581.1',
1253 '72.0.3581.0',
1254 '71.0.3578.8',
1255 '70.0.3538.66',
1256 '72.0.3580.1',
1257 '72.0.3580.0',
1258 '71.0.3578.7',
1259 '70.0.3538.65',
1260 '71.0.3578.6',
1261 '72.0.3579.1',
1262 '72.0.3579.0',
1263 '71.0.3578.5',
1264 '70.0.3538.64',
1265 '71.0.3578.4',
1266 '71.0.3578.3',
1267 '71.0.3578.2',
1268 '71.0.3578.1',
1269 '71.0.3578.0',
1270 '70.0.3538.63',
1271 '69.0.3497.128',
1272 '70.0.3538.62',
1273 '70.0.3538.61',
1274 '70.0.3538.60',
1275 '70.0.3538.59',
1276 '71.0.3577.1',
1277 '71.0.3577.0',
1278 '70.0.3538.58',
1279 '69.0.3497.127',
1280 '71.0.3576.2',
1281 '71.0.3576.1',
1282 '71.0.3576.0',
1283 '70.0.3538.57',
1284 '70.0.3538.56',
1285 '71.0.3575.2',
1286 '70.0.3538.55',
1287 '69.0.3497.126',
1288 '70.0.3538.54',
1289 '71.0.3575.1',
1290 '71.0.3575.0',
1291 '71.0.3574.1',
1292 '71.0.3574.0',
1293 '70.0.3538.53',
1294 '69.0.3497.125',
1295 '70.0.3538.52',
1296 '71.0.3573.1',
1297 '71.0.3573.0',
1298 '70.0.3538.51',
1299 '69.0.3497.124',
1300 '71.0.3572.1',
1301 '71.0.3572.0',
1302 '70.0.3538.50',
1303 '69.0.3497.123',
1304 '71.0.3571.2',
1305 '70.0.3538.49',
1306 '69.0.3497.122',
1307 '71.0.3571.1',
1308 '71.0.3571.0',
1309 '70.0.3538.48',
1310 '69.0.3497.121',
1311 '71.0.3570.1',
1312 '71.0.3570.0',
1313 '70.0.3538.47',
1314 '69.0.3497.120',
1315 '71.0.3568.2',
1316 '71.0.3569.1',
1317 '71.0.3569.0',
1318 '70.0.3538.46',
1319 '69.0.3497.119',
1320 '70.0.3538.45',
1321 '71.0.3568.1',
1322 '71.0.3568.0',
1323 '70.0.3538.44',
1324 '69.0.3497.118',
1325 '70.0.3538.43',
1326 '70.0.3538.42',
1327 '71.0.3567.1',
1328 '71.0.3567.0',
1329 '70.0.3538.41',
1330 '69.0.3497.117',
1331 '71.0.3566.1',
1332 '71.0.3566.0',
1333 '70.0.3538.40',
1334 '69.0.3497.116',
1335 '71.0.3565.1',
1336 '71.0.3565.0',
1337 '70.0.3538.39',
1338 '69.0.3497.115',
1339 '71.0.3564.1',
1340 '71.0.3564.0',
1341 '70.0.3538.38',
1342 '69.0.3497.114',
1343 '71.0.3563.0',
1344 '71.0.3562.2',
1345 '70.0.3538.37',
1346 '69.0.3497.113',
1347 '70.0.3538.36',
1348 '70.0.3538.35',
1349 '71.0.3562.1',
1350 '71.0.3562.0',
1351 '70.0.3538.34',
1352 '69.0.3497.112',
1353 '70.0.3538.33',
1354 '71.0.3561.1',
1355 '71.0.3561.0',
1356 '70.0.3538.32',
1357 '69.0.3497.111',
1358 '71.0.3559.6',
1359 '71.0.3560.1',
1360 '71.0.3560.0',
1361 '71.0.3559.5',
1362 '71.0.3559.4',
1363 '70.0.3538.31',
1364 '69.0.3497.110',
1365 '71.0.3559.3',
1366 '70.0.3538.30',
1367 '69.0.3497.109',
1368 '71.0.3559.2',
1369 '71.0.3559.1',
1370 '71.0.3559.0',
1371 '70.0.3538.29',
1372 '69.0.3497.108',
1373 '71.0.3558.2',
1374 '71.0.3558.1',
1375 '71.0.3558.0',
1376 '70.0.3538.28',
1377 '69.0.3497.107',
1378 '71.0.3557.2',
1379 '71.0.3557.1',
1380 '71.0.3557.0',
1381 '70.0.3538.27',
1382 '69.0.3497.106',
1383 '71.0.3554.4',
1384 '70.0.3538.26',
1385 '71.0.3556.1',
1386 '71.0.3556.0',
1387 '70.0.3538.25',
1388 '71.0.3554.3',
1389 '69.0.3497.105',
1390 '71.0.3554.2',
1391 '70.0.3538.24',
1392 '69.0.3497.104',
1393 '71.0.3555.2',
1394 '70.0.3538.23',
1395 '71.0.3555.1',
1396 '71.0.3555.0',
1397 '70.0.3538.22',
1398 '69.0.3497.103',
1399 '71.0.3554.1',
1400 '71.0.3554.0',
1401 '70.0.3538.21',
1402 '69.0.3497.102',
1403 '71.0.3553.3',
1404 '70.0.3538.20',
1405 '69.0.3497.101',
1406 '71.0.3553.2',
1407 '69.0.3497.100',
1408 '71.0.3553.1',
1409 '71.0.3553.0',
1410 '70.0.3538.19',
1411 '69.0.3497.99',
1412 '69.0.3497.98',
1413 '69.0.3497.97',
1414 '71.0.3552.6',
1415 '71.0.3552.5',
1416 '71.0.3552.4',
1417 '71.0.3552.3',
1418 '71.0.3552.2',
1419 '71.0.3552.1',
1420 '71.0.3552.0',
1421 '70.0.3538.18',
1422 '69.0.3497.96',
1423 '71.0.3551.3',
1424 '71.0.3551.2',
1425 '71.0.3551.1',
1426 '71.0.3551.0',
1427 '70.0.3538.17',
1428 '69.0.3497.95',
1429 '71.0.3550.3',
1430 '71.0.3550.2',
1431 '71.0.3550.1',
1432 '71.0.3550.0',
1433 '70.0.3538.16',
1434 '69.0.3497.94',
1435 '71.0.3549.1',
1436 '71.0.3549.0',
1437 '70.0.3538.15',
1438 '69.0.3497.93',
1439 '69.0.3497.92',
1440 '71.0.3548.1',
1441 '71.0.3548.0',
1442 '70.0.3538.14',
1443 '69.0.3497.91',
1444 '71.0.3547.1',
1445 '71.0.3547.0',
1446 '70.0.3538.13',
1447 '69.0.3497.90',
1448 '71.0.3546.2',
1449 '69.0.3497.89',
1450 '71.0.3546.1',
1451 '71.0.3546.0',
1452 '70.0.3538.12',
1453 '69.0.3497.88',
1454 '71.0.3545.4',
1455 '71.0.3545.3',
1456 '71.0.3545.2',
1457 '71.0.3545.1',
1458 '71.0.3545.0',
1459 '70.0.3538.11',
1460 '69.0.3497.87',
1461 '71.0.3544.5',
1462 '71.0.3544.4',
1463 '71.0.3544.3',
1464 '71.0.3544.2',
1465 '71.0.3544.1',
1466 '71.0.3544.0',
1467 '69.0.3497.86',
1468 '70.0.3538.10',
1469 '69.0.3497.85',
1470 '70.0.3538.9',
1471 '69.0.3497.84',
1472 '71.0.3543.4',
1473 '70.0.3538.8',
1474 '71.0.3543.3',
1475 '71.0.3543.2',
1476 '71.0.3543.1',
1477 '71.0.3543.0',
1478 '70.0.3538.7',
1479 '69.0.3497.83',
1480 '71.0.3542.2',
1481 '71.0.3542.1',
1482 '71.0.3542.0',
1483 '70.0.3538.6',
1484 '69.0.3497.82',
1485 '69.0.3497.81',
1486 '71.0.3541.1',
1487 '71.0.3541.0',
1488 '70.0.3538.5',
1489 '69.0.3497.80',
1490 '71.0.3540.1',
1491 '71.0.3540.0',
1492 '70.0.3538.4',
1493 '69.0.3497.79',
1494 '70.0.3538.3',
1495 '71.0.3539.1',
1496 '71.0.3539.0',
1497 '69.0.3497.78',
1498 '68.0.3440.134',
1499 '69.0.3497.77',
1500 '70.0.3538.2',
1501 '70.0.3538.1',
1502 '70.0.3538.0',
1503 '69.0.3497.76',
1504 '68.0.3440.133',
1505 '69.0.3497.75',
1506 '70.0.3537.2',
1507 '70.0.3537.1',
1508 '70.0.3537.0',
1509 '69.0.3497.74',
1510 '68.0.3440.132',
1511 '70.0.3536.0',
1512 '70.0.3535.5',
1513 '70.0.3535.4',
1514 '70.0.3535.3',
1515 '69.0.3497.73',
1516 '68.0.3440.131',
1517 '70.0.3532.8',
1518 '70.0.3532.7',
1519 '69.0.3497.72',
1520 '69.0.3497.71',
1521 '70.0.3535.2',
1522 '70.0.3535.1',
1523 '70.0.3535.0',
1524 '69.0.3497.70',
1525 '68.0.3440.130',
1526 '69.0.3497.69',
1527 '68.0.3440.129',
1528 '70.0.3534.4',
1529 '70.0.3534.3',
1530 '70.0.3534.2',
1531 '70.0.3534.1',
1532 '70.0.3534.0',
1533 '69.0.3497.68',
1534 '68.0.3440.128',
1535 '70.0.3533.2',
1536 '70.0.3533.1',
1537 '70.0.3533.0',
1538 '69.0.3497.67',
1539 '68.0.3440.127',
1540 '70.0.3532.6',
1541 '70.0.3532.5',
1542 '70.0.3532.4',
1543 '69.0.3497.66',
1544 '68.0.3440.126',
1545 '70.0.3532.3',
1546 '70.0.3532.2',
1547 '70.0.3532.1',
1548 '69.0.3497.60',
1549 '69.0.3497.65',
1550 '69.0.3497.64',
1551 '70.0.3532.0',
1552 '70.0.3531.0',
1553 '70.0.3530.4',
1554 '70.0.3530.3',
1555 '70.0.3530.2',
1556 '69.0.3497.58',
1557 '68.0.3440.125',
1558 '69.0.3497.57',
1559 '69.0.3497.56',
1560 '69.0.3497.55',
1561 '69.0.3497.54',
1562 '70.0.3530.1',
1563 '70.0.3530.0',
1564 '69.0.3497.53',
1565 '68.0.3440.124',
1566 '69.0.3497.52',
1567 '70.0.3529.3',
1568 '70.0.3529.2',
1569 '70.0.3529.1',
1570 '70.0.3529.0',
1571 '69.0.3497.51',
1572 '70.0.3528.4',
1573 '68.0.3440.123',
1574 '70.0.3528.3',
1575 '70.0.3528.2',
1576 '70.0.3528.1',
1577 '70.0.3528.0',
1578 '69.0.3497.50',
1579 '68.0.3440.122',
1580 '70.0.3527.1',
1581 '70.0.3527.0',
1582 '69.0.3497.49',
1583 '68.0.3440.121',
1584 '70.0.3526.1',
1585 '70.0.3526.0',
1586 '68.0.3440.120',
1587 '69.0.3497.48',
1588 '69.0.3497.47',
1589 '68.0.3440.119',
1590 '68.0.3440.118',
1591 '70.0.3525.5',
1592 '70.0.3525.4',
1593 '70.0.3525.3',
1594 '68.0.3440.117',
1595 '69.0.3497.46',
1596 '70.0.3525.2',
1597 '70.0.3525.1',
1598 '70.0.3525.0',
1599 '69.0.3497.45',
1600 '68.0.3440.116',
1601 '70.0.3524.4',
1602 '70.0.3524.3',
1603 '69.0.3497.44',
1604 '70.0.3524.2',
1605 '70.0.3524.1',
1606 '70.0.3524.0',
1607 '70.0.3523.2',
1608 '69.0.3497.43',
1609 '68.0.3440.115',
1610 '70.0.3505.9',
1611 '69.0.3497.42',
1612 '70.0.3505.8',
1613 '70.0.3523.1',
1614 '70.0.3523.0',
1615 '69.0.3497.41',
1616 '68.0.3440.114',
1617 '70.0.3505.7',
1618 '69.0.3497.40',
1619 '70.0.3522.1',
1620 '70.0.3522.0',
1621 '70.0.3521.2',
1622 '69.0.3497.39',
1623 '68.0.3440.113',
1624 '70.0.3505.6',
1625 '70.0.3521.1',
1626 '70.0.3521.0',
1627 '69.0.3497.38',
1628 '68.0.3440.112',
1629 '70.0.3520.1',
1630 '70.0.3520.0',
1631 '69.0.3497.37',
1632 '68.0.3440.111',
1633 '70.0.3519.3',
1634 '70.0.3519.2',
1635 '70.0.3519.1',
1636 '70.0.3519.0',
1637 '69.0.3497.36',
1638 '68.0.3440.110',
1639 '70.0.3518.1',
1640 '70.0.3518.0',
1641 '69.0.3497.35',
1642 '69.0.3497.34',
1643 '68.0.3440.109',
1644 '70.0.3517.1',
1645 '70.0.3517.0',
1646 '69.0.3497.33',
1647 '68.0.3440.108',
1648 '69.0.3497.32',
1649 '70.0.3516.3',
1650 '70.0.3516.2',
1651 '70.0.3516.1',
1652 '70.0.3516.0',
1653 '69.0.3497.31',
1654 '68.0.3440.107',
1655 '70.0.3515.4',
1656 '68.0.3440.106',
1657 '70.0.3515.3',
1658 '70.0.3515.2',
1659 '70.0.3515.1',
1660 '70.0.3515.0',
1661 '69.0.3497.30',
1662 '68.0.3440.105',
1663 '68.0.3440.104',
1664 '70.0.3514.2',
1665 '70.0.3514.1',
1666 '70.0.3514.0',
1667 '69.0.3497.29',
1668 '68.0.3440.103',
1669 '70.0.3513.1',
1670 '70.0.3513.0',
1671 '69.0.3497.28',
1672 )
1673 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1674
1675
3e669f36 1676std_headers = {
f7a147e3 1677 'User-Agent': random_user_agent(),
59ae15a5
PH
1678 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1679 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1680 'Accept-Encoding': 'gzip, deflate',
1681 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 1682}
f427df17 1683
5f6a1245 1684
fb37eb25
S
1685USER_AGENTS = {
1686 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1687}
1688
1689
bf42a990
S
1690NO_DEFAULT = object()
1691
7105440c
YCH
1692ENGLISH_MONTH_NAMES = [
1693 'January', 'February', 'March', 'April', 'May', 'June',
1694 'July', 'August', 'September', 'October', 'November', 'December']
1695
f6717dec
S
1696MONTH_NAMES = {
1697 'en': ENGLISH_MONTH_NAMES,
1698 'fr': [
3e4185c3
S
1699 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1700 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
f6717dec 1701}
a942d6cb 1702
a7aaa398
S
1703KNOWN_EXTENSIONS = (
1704 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1705 'flv', 'f4v', 'f4a', 'f4b',
1706 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1707 'mkv', 'mka', 'mk3d',
1708 'avi', 'divx',
1709 'mov',
1710 'asf', 'wmv', 'wma',
1711 '3gp', '3g2',
1712 'mp3',
1713 'flac',
1714 'ape',
1715 'wav',
1716 'f4f', 'f4m', 'm3u8', 'smil')
1717
df692c5a 1718REMUX_EXTENSIONS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mp3', 'mka', 'm4a', 'ogg', 'opus')
1719
c587cbb7 1720# needed for sanitizing filenames in restricted mode
c8827027 1721ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
fd35d8cd
JW
1722 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1723 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
c587cbb7 1724
46f59e89
S
1725DATE_FORMATS = (
1726 '%d %B %Y',
1727 '%d %b %Y',
1728 '%B %d %Y',
cb655f34
S
1729 '%B %dst %Y',
1730 '%B %dnd %Y',
9d30c213 1731 '%B %drd %Y',
cb655f34 1732 '%B %dth %Y',
46f59e89 1733 '%b %d %Y',
cb655f34
S
1734 '%b %dst %Y',
1735 '%b %dnd %Y',
9d30c213 1736 '%b %drd %Y',
cb655f34 1737 '%b %dth %Y',
46f59e89
S
1738 '%b %dst %Y %I:%M',
1739 '%b %dnd %Y %I:%M',
9d30c213 1740 '%b %drd %Y %I:%M',
46f59e89
S
1741 '%b %dth %Y %I:%M',
1742 '%Y %m %d',
1743 '%Y-%m-%d',
1744 '%Y/%m/%d',
81c13222 1745 '%Y/%m/%d %H:%M',
46f59e89 1746 '%Y/%m/%d %H:%M:%S',
0c1c6f4b 1747 '%Y-%m-%d %H:%M',
46f59e89
S
1748 '%Y-%m-%d %H:%M:%S',
1749 '%Y-%m-%d %H:%M:%S.%f',
1750 '%d.%m.%Y %H:%M',
1751 '%d.%m.%Y %H.%M',
1752 '%Y-%m-%dT%H:%M:%SZ',
1753 '%Y-%m-%dT%H:%M:%S.%fZ',
1754 '%Y-%m-%dT%H:%M:%S.%f0Z',
1755 '%Y-%m-%dT%H:%M:%S',
1756 '%Y-%m-%dT%H:%M:%S.%f',
1757 '%Y-%m-%dT%H:%M',
c6eed6b8
S
1758 '%b %d %Y at %H:%M',
1759 '%b %d %Y at %H:%M:%S',
b555ae9b
S
1760 '%B %d %Y at %H:%M',
1761 '%B %d %Y at %H:%M:%S',
46f59e89
S
1762)
1763
1764DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1765DATE_FORMATS_DAY_FIRST.extend([
1766 '%d-%m-%Y',
1767 '%d.%m.%Y',
1768 '%d.%m.%y',
1769 '%d/%m/%Y',
1770 '%d/%m/%y',
1771 '%d/%m/%Y %H:%M:%S',
1772])
1773
1774DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1775DATE_FORMATS_MONTH_FIRST.extend([
1776 '%m-%d-%Y',
1777 '%m.%d.%Y',
1778 '%m/%d/%Y',
1779 '%m/%d/%y',
1780 '%m/%d/%Y %H:%M:%S',
1781])
1782
06b3fe29 1783PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
22f5f5c6 1784JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
06b3fe29 1785
7105440c 1786
d77c3dfd 1787def preferredencoding():
59ae15a5 1788 """Get preferred encoding.
d77c3dfd 1789
59ae15a5
PH
1790 Returns the best encoding scheme for the system, based on
1791 locale.getpreferredencoding() and some further tweaks.
1792 """
1793 try:
1794 pref = locale.getpreferredencoding()
28e614de 1795 'TEST'.encode(pref)
70a1165b 1796 except Exception:
59ae15a5 1797 pref = 'UTF-8'
bae611f2 1798
59ae15a5 1799 return pref
d77c3dfd 1800
f4bfd65f 1801
181c8655 1802def write_json_file(obj, fn):
1394646a 1803 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 1804
92120217 1805 fn = encodeFilename(fn)
61ee5aeb 1806 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
1807 encoding = get_filesystem_encoding()
1808 # os.path.basename returns a bytes object, but NamedTemporaryFile
1809 # will fail if the filename contains non ascii characters unless we
1810 # use a unicode object
1811 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1812 # the same for os.path.dirname
1813 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1814 else:
1815 path_basename = os.path.basename
1816 path_dirname = os.path.dirname
1817
73159f99
S
1818 args = {
1819 'suffix': '.tmp',
ec5f6016
JMF
1820 'prefix': path_basename(fn) + '.',
1821 'dir': path_dirname(fn),
73159f99
S
1822 'delete': False,
1823 }
1824
181c8655
PH
1825 # In Python 2.x, json.dump expects a bytestream.
1826 # In Python 3.x, it writes to a character stream
1827 if sys.version_info < (3, 0):
73159f99 1828 args['mode'] = 'wb'
181c8655 1829 else:
73159f99
S
1830 args.update({
1831 'mode': 'w',
1832 'encoding': 'utf-8',
1833 })
1834
c86b6142 1835 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
1836
1837 try:
1838 with tf:
1839 json.dump(obj, tf)
1394646a
IK
1840 if sys.platform == 'win32':
1841 # Need to remove existing file on Windows, else os.rename raises
1842 # WindowsError or FileExistsError.
1843 try:
1844 os.unlink(fn)
1845 except OSError:
1846 pass
9cd5f54e
R
1847 try:
1848 mask = os.umask(0)
1849 os.umask(mask)
1850 os.chmod(tf.name, 0o666 & ~mask)
1851 except OSError:
1852 pass
181c8655 1853 os.rename(tf.name, fn)
70a1165b 1854 except Exception:
181c8655
PH
1855 try:
1856 os.remove(tf.name)
1857 except OSError:
1858 pass
1859 raise
1860
1861
1862if sys.version_info >= (2, 7):
ee114368 1863 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 1864 """ Find the xpath xpath[@key=val] """
5d2354f1 1865 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368 1866 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
1867 return node.find(expr)
1868else:
ee114368 1869 def find_xpath_attr(node, xpath, key, val=None):
810c10ba 1870 for f in node.findall(compat_xpath(xpath)):
ee114368
S
1871 if key not in f.attrib:
1872 continue
1873 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
1874 return f
1875 return None
1876
d7e66d39
JMF
1877# On python2.6 the xml.etree.ElementTree.Element methods don't support
1878# the namespace parameter
5f6a1245
JW
1879
1880
d7e66d39
JMF
1881def xpath_with_ns(path, ns_map):
1882 components = [c.split(':') for c in path.split('/')]
1883 replaced = []
1884 for c in components:
1885 if len(c) == 1:
1886 replaced.append(c[0])
1887 else:
1888 ns, tag = c
1889 replaced.append('{%s}%s' % (ns_map[ns], tag))
1890 return '/'.join(replaced)
1891
d77c3dfd 1892
a41fb80c 1893def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745 1894 def _find_xpath(xpath):
810c10ba 1895 return node.find(compat_xpath(xpath))
578c0745
S
1896
1897 if isinstance(xpath, (str, compat_str)):
1898 n = _find_xpath(xpath)
1899 else:
1900 for xp in xpath:
1901 n = _find_xpath(xp)
1902 if n is not None:
1903 break
d74bebd5 1904
8e636da4 1905 if n is None:
bf42a990
S
1906 if default is not NO_DEFAULT:
1907 return default
1908 elif fatal:
bf0ff932
PH
1909 name = xpath if name is None else name
1910 raise ExtractorError('Could not find XML element %s' % name)
1911 else:
1912 return None
a41fb80c
S
1913 return n
1914
1915
1916def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
1917 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1918 if n is None or n == default:
1919 return n
1920 if n.text is None:
1921 if default is not NO_DEFAULT:
1922 return default
1923 elif fatal:
1924 name = xpath if name is None else name
1925 raise ExtractorError('Could not find XML element\'s text %s' % name)
1926 else:
1927 return None
1928 return n.text
a41fb80c
S
1929
1930
1931def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1932 n = find_xpath_attr(node, xpath, key)
1933 if n is None:
1934 if default is not NO_DEFAULT:
1935 return default
1936 elif fatal:
1937 name = '%s[@%s]' % (xpath, key) if name is None else name
1938 raise ExtractorError('Could not find XML attribute %s' % name)
1939 else:
1940 return None
1941 return n.attrib[key]
bf0ff932
PH
1942
1943
9e6dd238 1944def get_element_by_id(id, html):
43e8fafd 1945 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 1946 return get_element_by_attribute('id', id, html)
43e8fafd 1947
12ea2f30 1948
84c237fb 1949def get_element_by_class(class_name, html):
2af12ad9
TC
1950 """Return the content of the first tag with the specified class in the passed HTML document"""
1951 retval = get_elements_by_class(class_name, html)
1952 return retval[0] if retval else None
1953
1954
1955def get_element_by_attribute(attribute, value, html, escape_value=True):
1956 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1957 return retval[0] if retval else None
1958
1959
1960def get_elements_by_class(class_name, html):
1961 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1962 return get_elements_by_attribute(
84c237fb
YCH
1963 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1964 html, escape_value=False)
1965
1966
2af12ad9 1967def get_elements_by_attribute(attribute, value, html, escape_value=True):
43e8fafd 1968 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 1969
84c237fb
YCH
1970 value = re.escape(value) if escape_value else value
1971
2af12ad9
TC
1972 retlist = []
1973 for m in re.finditer(r'''(?xs)
38285056 1974 <([a-zA-Z0-9:._-]+)
609ff8ca 1975 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056 1976 \s+%s=['"]?%s['"]?
609ff8ca 1977 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056
PH
1978 \s*>
1979 (?P<content>.*?)
1980 </\1>
2af12ad9
TC
1981 ''' % (re.escape(attribute), value), html):
1982 res = m.group('content')
38285056 1983
2af12ad9
TC
1984 if res.startswith('"') or res.startswith("'"):
1985 res = res[1:-1]
38285056 1986
2af12ad9 1987 retlist.append(unescapeHTML(res))
a921f407 1988
2af12ad9 1989 return retlist
a921f407 1990
c5229f39 1991
8bb56eee
BF
1992class HTMLAttributeParser(compat_HTMLParser):
1993 """Trivial HTML parser to gather the attributes for a single element"""
b6e0c7d2 1994
8bb56eee 1995 def __init__(self):
c5229f39 1996 self.attrs = {}
8bb56eee
BF
1997 compat_HTMLParser.__init__(self)
1998
1999 def handle_starttag(self, tag, attrs):
2000 self.attrs = dict(attrs)
2001
c5229f39 2002
8bb56eee
BF
2003def extract_attributes(html_element):
2004 """Given a string for an HTML element such as
2005 <el
2006 a="foo" B="bar" c="&98;az" d=boz
2007 empty= noval entity="&amp;"
2008 sq='"' dq="'"
2009 >
2010 Decode and return a dictionary of attributes.
2011 {
2012 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2013 'empty': '', 'noval': None, 'entity': '&',
2014 'sq': '"', 'dq': '\''
2015 }.
2016 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2017 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2018 """
2019 parser = HTMLAttributeParser()
b4a3d461
S
2020 try:
2021 parser.feed(html_element)
2022 parser.close()
2023 # Older Python may throw HTMLParseError in case of malformed HTML
2024 except compat_HTMLParseError:
2025 pass
8bb56eee 2026 return parser.attrs
9e6dd238 2027
c5229f39 2028
9e6dd238 2029def clean_html(html):
59ae15a5 2030 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
2031
2032 if html is None: # Convenience for sanitizing descriptions etc.
2033 return html
2034
59ae15a5
PH
2035 # Newline vs <br />
2036 html = html.replace('\n', ' ')
edd9221c
TF
2037 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2038 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
2039 # Strip html tags
2040 html = re.sub('<.*?>', '', html)
2041 # Replace html entities
2042 html = unescapeHTML(html)
7decf895 2043 return html.strip()
9e6dd238
FV
2044
2045
d77c3dfd 2046def sanitize_open(filename, open_mode):
59ae15a5
PH
2047 """Try to open the given filename, and slightly tweak it if this fails.
2048
2049 Attempts to open the given filename. If this fails, it tries to change
2050 the filename slightly, step by step, until it's either able to open it
2051 or it fails and raises a final exception, like the standard open()
2052 function.
2053
2054 It returns the tuple (stream, definitive_file_name).
2055 """
2056 try:
28e614de 2057 if filename == '-':
59ae15a5
PH
2058 if sys.platform == 'win32':
2059 import msvcrt
2060 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 2061 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
2062 stream = open(encodeFilename(filename), open_mode)
2063 return (stream, filename)
2064 except (IOError, OSError) as err:
f45c185f
PH
2065 if err.errno in (errno.EACCES,):
2066 raise
59ae15a5 2067
f45c185f 2068 # In case of error, try to remove win32 forbidden chars
d55de57b 2069 alt_filename = sanitize_path(filename)
f45c185f
PH
2070 if alt_filename == filename:
2071 raise
2072 else:
2073 # An exception here should be caught in the caller
d55de57b 2074 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 2075 return (stream, alt_filename)
d77c3dfd
FV
2076
2077
2078def timeconvert(timestr):
59ae15a5
PH
2079 """Convert RFC 2822 defined time string into system timestamp"""
2080 timestamp = None
2081 timetuple = email.utils.parsedate_tz(timestr)
2082 if timetuple is not None:
2083 timestamp = email.utils.mktime_tz(timetuple)
2084 return timestamp
1c469a94 2085
5f6a1245 2086
796173d0 2087def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
2088 """Sanitizes a string so it could be used as part of a filename.
2089 If restricted is set, use a stricter subset of allowed characters.
158af524
S
2090 Set is_id if this is not an arbitrary string, but an ID that should be kept
2091 if possible.
59ae15a5
PH
2092 """
2093 def replace_insane(char):
c587cbb7
AT
2094 if restricted and char in ACCENT_CHARS:
2095 return ACCENT_CHARS[char]
59ae15a5
PH
2096 if char == '?' or ord(char) < 32 or ord(char) == 127:
2097 return ''
2098 elif char == '"':
2099 return '' if restricted else '\''
2100 elif char == ':':
2101 return '_-' if restricted else ' -'
2102 elif char in '\\/|*<>':
2103 return '_'
627dcfff 2104 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
2105 return '_'
2106 if restricted and ord(char) > 127:
2107 return '_'
2108 return char
2109
2aeb06d6
PH
2110 # Handle timestamps
2111 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 2112 result = ''.join(map(replace_insane, s))
796173d0
PH
2113 if not is_id:
2114 while '__' in result:
2115 result = result.replace('__', '_')
2116 result = result.strip('_')
2117 # Common case of "Foreign band name - English song title"
2118 if restricted and result.startswith('-_'):
2119 result = result[2:]
5a42414b
PH
2120 if result.startswith('-'):
2121 result = '_' + result[len('-'):]
a7440261 2122 result = result.lstrip('.')
796173d0
PH
2123 if not result:
2124 result = '_'
59ae15a5 2125 return result
d77c3dfd 2126
5f6a1245 2127
c2934512 2128def sanitize_path(s, force=False):
a2aaf4db 2129 """Sanitizes and normalizes path on Windows"""
c2934512 2130 if sys.platform == 'win32':
c4218ac3 2131 force = False
c2934512 2132 drive_or_unc, _ = os.path.splitdrive(s)
2133 if sys.version_info < (2, 7) and not drive_or_unc:
2134 drive_or_unc, _ = os.path.splitunc(s)
2135 elif force:
2136 drive_or_unc = ''
2137 else:
a2aaf4db 2138 return s
c2934512 2139
be531ef1
S
2140 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2141 if drive_or_unc:
a2aaf4db
S
2142 norm_path.pop(0)
2143 sanitized_path = [
ec85ded8 2144 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
a2aaf4db 2145 for path_part in norm_path]
be531ef1
S
2146 if drive_or_unc:
2147 sanitized_path.insert(0, drive_or_unc + os.path.sep)
c4218ac3 2148 elif force and s[0] == os.path.sep:
2149 sanitized_path.insert(0, os.path.sep)
a2aaf4db
S
2150 return os.path.join(*sanitized_path)
2151
2152
17bcc626 2153def sanitize_url(url):
befa4708
S
2154 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2155 # the number of unwanted failures due to missing protocol
2156 if url.startswith('//'):
2157 return 'http:%s' % url
2158 # Fix some common typos seen so far
2159 COMMON_TYPOS = (
067aa17e 2160 # https://github.com/ytdl-org/youtube-dl/issues/15649
befa4708
S
2161 (r'^httpss://', r'https://'),
2162 # https://bx1.be/lives/direct-tv/
2163 (r'^rmtp([es]?)://', r'rtmp\1://'),
2164 )
2165 for mistake, fixup in COMMON_TYPOS:
2166 if re.match(mistake, url):
2167 return re.sub(mistake, fixup, url)
2168 return url
17bcc626
S
2169
2170
67dda517 2171def sanitized_Request(url, *args, **kwargs):
17bcc626 2172 return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
67dda517
S
2173
2174
51098426
S
2175def expand_path(s):
2176 """Expand shell variables and ~"""
2177 return os.path.expandvars(compat_expanduser(s))
2178
2179
d77c3dfd 2180def orderedSet(iterable):
59ae15a5
PH
2181 """ Remove all duplicates from the input iterable """
2182 res = []
2183 for el in iterable:
2184 if el not in res:
2185 res.append(el)
2186 return res
d77c3dfd 2187
912b38b4 2188
55b2f099 2189def _htmlentity_transform(entity_with_semicolon):
4e408e47 2190 """Transforms an HTML entity to a character."""
55b2f099
YCH
2191 entity = entity_with_semicolon[:-1]
2192
4e408e47
PH
2193 # Known non-numeric HTML entity
2194 if entity in compat_html_entities.name2codepoint:
2195 return compat_chr(compat_html_entities.name2codepoint[entity])
2196
55b2f099
YCH
2197 # TODO: HTML5 allows entities without a semicolon. For example,
2198 # '&Eacuteric' should be decoded as 'Éric'.
2199 if entity_with_semicolon in compat_html_entities_html5:
2200 return compat_html_entities_html5[entity_with_semicolon]
2201
91757b0f 2202 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
2203 if mobj is not None:
2204 numstr = mobj.group(1)
28e614de 2205 if numstr.startswith('x'):
4e408e47 2206 base = 16
28e614de 2207 numstr = '0%s' % numstr
4e408e47
PH
2208 else:
2209 base = 10
067aa17e 2210 # See https://github.com/ytdl-org/youtube-dl/issues/7518
7aefc49c
S
2211 try:
2212 return compat_chr(int(numstr, base))
2213 except ValueError:
2214 pass
4e408e47
PH
2215
2216 # Unknown entity in name, return its literal representation
7a3f0c00 2217 return '&%s;' % entity
4e408e47
PH
2218
2219
d77c3dfd 2220def unescapeHTML(s):
912b38b4
PH
2221 if s is None:
2222 return None
2223 assert type(s) == compat_str
d77c3dfd 2224
4e408e47 2225 return re.sub(
95f3f7c2 2226 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 2227
8bf48f23 2228
f5b1bca9 2229def process_communicate_or_kill(p, *args, **kwargs):
2230 try:
2231 return p.communicate(*args, **kwargs)
2232 except BaseException: # Including KeyboardInterrupt
2233 p.kill()
2234 p.wait()
2235 raise
2236
2237
aa49acd1
S
2238def get_subprocess_encoding():
2239 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2240 # For subprocess calls, encode with locale encoding
2241 # Refer to http://stackoverflow.com/a/9951851/35070
2242 encoding = preferredencoding()
2243 else:
2244 encoding = sys.getfilesystemencoding()
2245 if encoding is None:
2246 encoding = 'utf-8'
2247 return encoding
2248
2249
8bf48f23 2250def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
2251 """
2252 @param s The name of the file
2253 """
d77c3dfd 2254
8bf48f23 2255 assert type(s) == compat_str
d77c3dfd 2256
59ae15a5
PH
2257 # Python 3 has a Unicode API
2258 if sys.version_info >= (3, 0):
2259 return s
0f00efed 2260
aa49acd1
S
2261 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2262 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2263 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2264 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2265 return s
2266
8ee239e9
YCH
2267 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2268 if sys.platform.startswith('java'):
2269 return s
2270
aa49acd1
S
2271 return s.encode(get_subprocess_encoding(), 'ignore')
2272
2273
2274def decodeFilename(b, for_subprocess=False):
2275
2276 if sys.version_info >= (3, 0):
2277 return b
2278
2279 if not isinstance(b, bytes):
2280 return b
2281
2282 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 2283
f07b74fc
PH
2284
2285def encodeArgument(s):
2286 if not isinstance(s, compat_str):
2287 # Legacy code that uses byte strings
2288 # Uncomment the following line after fixing all post processors
7af808a5 2289 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
2290 s = s.decode('ascii')
2291 return encodeFilename(s, True)
2292
2293
aa49acd1
S
2294def decodeArgument(b):
2295 return decodeFilename(b, True)
2296
2297
8271226a
PH
2298def decodeOption(optval):
2299 if optval is None:
2300 return optval
2301 if isinstance(optval, bytes):
2302 optval = optval.decode(preferredencoding())
2303
2304 assert isinstance(optval, compat_str)
2305 return optval
1c256f70 2306
5f6a1245 2307
dbbbe555 2308def formatSeconds(secs, delim=':'):
4539dd30 2309 if secs > 3600:
dbbbe555 2310 return '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
4539dd30 2311 elif secs > 60:
dbbbe555 2312 return '%d%s%02d' % (secs // 60, delim, secs % 60)
4539dd30
PH
2313 else:
2314 return '%d' % secs
2315
a0ddb8a2 2316
be4a824d
PH
2317def make_HTTPS_handler(params, **kwargs):
2318 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 2319 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 2320 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 2321 if opts_no_check_certificate:
be5f2c19 2322 context.check_hostname = False
0db261ba 2323 context.verify_mode = ssl.CERT_NONE
a2366922 2324 try:
be4a824d 2325 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
2326 except TypeError:
2327 # Python 2.7.8
2328 # (create_default_context present but HTTPSHandler has no context=)
2329 pass
2330
2331 if sys.version_info < (3, 2):
d7932313 2332 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 2333 else: # Python < 3.4
d7932313 2334 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 2335 context.verify_mode = (ssl.CERT_NONE
dca08720 2336 if opts_no_check_certificate
ea6d901e 2337 else ssl.CERT_REQUIRED)
303b479e 2338 context.set_default_verify_paths()
be4a824d 2339 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 2340
732ea2f0 2341
08f2a92c
JMF
2342def bug_reports_message():
2343 if ytdl_is_updateable():
7a5c1cfe 2344 update_cmd = 'type yt-dlp -U to update'
08f2a92c 2345 else:
7a5c1cfe
P
2346 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
2347 msg = '; please report this issue on https://github.com/yt-dlp/yt-dlp .'
08f2a92c 2348 msg += ' Make sure you are using the latest version; %s.' % update_cmd
7a5c1cfe 2349 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
08f2a92c
JMF
2350 return msg
2351
2352
bf5b9d85
PM
2353class YoutubeDLError(Exception):
2354 """Base exception for YoutubeDL errors."""
2355 pass
2356
2357
2358class ExtractorError(YoutubeDLError):
1c256f70 2359 """Error during info extraction."""
5f6a1245 2360
d11271dd 2361 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238 2362 """ tb, if given, is the original traceback (so that it can be printed out).
7a5c1cfe 2363 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
9a82b238
PH
2364 """
2365
2366 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2367 expected = True
d11271dd
PH
2368 if video_id is not None:
2369 msg = video_id + ': ' + msg
410f3e73 2370 if cause:
28e614de 2371 msg += ' (caused by %r)' % cause
9a82b238 2372 if not expected:
08f2a92c 2373 msg += bug_reports_message()
1c256f70 2374 super(ExtractorError, self).__init__(msg)
d5979c5d 2375
1c256f70 2376 self.traceback = tb
8cc83b8d 2377 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 2378 self.cause = cause
d11271dd 2379 self.video_id = video_id
1c256f70 2380
01951dda
PH
2381 def format_traceback(self):
2382 if self.traceback is None:
2383 return None
28e614de 2384 return ''.join(traceback.format_tb(self.traceback))
01951dda 2385
1c256f70 2386
416c7fcb
PH
2387class UnsupportedError(ExtractorError):
2388 def __init__(self, url):
2389 super(UnsupportedError, self).__init__(
2390 'Unsupported URL: %s' % url, expected=True)
2391 self.url = url
2392
2393
55b3e45b
JMF
2394class RegexNotFoundError(ExtractorError):
2395 """Error when a regex didn't match"""
2396 pass
2397
2398
773f291d
S
2399class GeoRestrictedError(ExtractorError):
2400 """Geographic restriction Error exception.
2401
2402 This exception may be thrown when a video is not available from your
2403 geographic location due to geographic restrictions imposed by a website.
2404 """
b6e0c7d2 2405
773f291d
S
2406 def __init__(self, msg, countries=None):
2407 super(GeoRestrictedError, self).__init__(msg, expected=True)
2408 self.msg = msg
2409 self.countries = countries
2410
2411
bf5b9d85 2412class DownloadError(YoutubeDLError):
59ae15a5 2413 """Download Error exception.
d77c3dfd 2414
59ae15a5
PH
2415 This exception may be thrown by FileDownloader objects if they are not
2416 configured to continue on errors. They will contain the appropriate
2417 error message.
2418 """
5f6a1245 2419
8cc83b8d
FV
2420 def __init__(self, msg, exc_info=None):
2421 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2422 super(DownloadError, self).__init__(msg)
2423 self.exc_info = exc_info
d77c3dfd
FV
2424
2425
bf5b9d85 2426class SameFileError(YoutubeDLError):
59ae15a5 2427 """Same File exception.
d77c3dfd 2428
59ae15a5
PH
2429 This exception will be thrown by FileDownloader objects if they detect
2430 multiple files would have to be downloaded to the same file on disk.
2431 """
2432 pass
d77c3dfd
FV
2433
2434
bf5b9d85 2435class PostProcessingError(YoutubeDLError):
59ae15a5 2436 """Post Processing exception.
d77c3dfd 2437
59ae15a5
PH
2438 This exception may be raised by PostProcessor's .run() method to
2439 indicate an error in the postprocessing task.
2440 """
5f6a1245 2441
7851b379 2442 def __init__(self, msg):
bf5b9d85 2443 super(PostProcessingError, self).__init__(msg)
7851b379 2444 self.msg = msg
d77c3dfd 2445
5f6a1245 2446
8b0d7497 2447class ExistingVideoReached(YoutubeDLError):
2448 """ --max-downloads limit has been reached. """
2449 pass
2450
2451
2452class RejectedVideoReached(YoutubeDLError):
2453 """ --max-downloads limit has been reached. """
2454 pass
2455
2456
bf5b9d85 2457class MaxDownloadsReached(YoutubeDLError):
59ae15a5
PH
2458 """ --max-downloads limit has been reached. """
2459 pass
d77c3dfd
FV
2460
2461
bf5b9d85 2462class UnavailableVideoError(YoutubeDLError):
59ae15a5 2463 """Unavailable Format exception.
d77c3dfd 2464
59ae15a5
PH
2465 This exception will be thrown when a video is requested
2466 in a format that is not available for that video.
2467 """
2468 pass
d77c3dfd
FV
2469
2470
bf5b9d85 2471class ContentTooShortError(YoutubeDLError):
59ae15a5 2472 """Content Too Short exception.
d77c3dfd 2473
59ae15a5
PH
2474 This exception may be raised by FileDownloader objects when a file they
2475 download is too small for what the server announced first, indicating
2476 the connection was probably interrupted.
2477 """
d77c3dfd 2478
59ae15a5 2479 def __init__(self, downloaded, expected):
bf5b9d85
PM
2480 super(ContentTooShortError, self).__init__(
2481 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2482 )
2c7ed247 2483 # Both in bytes
59ae15a5
PH
2484 self.downloaded = downloaded
2485 self.expected = expected
d77c3dfd 2486
5f6a1245 2487
bf5b9d85 2488class XAttrMetadataError(YoutubeDLError):
efa97bdc
YCH
2489 def __init__(self, code=None, msg='Unknown error'):
2490 super(XAttrMetadataError, self).__init__(msg)
2491 self.code = code
bd264412 2492 self.msg = msg
efa97bdc
YCH
2493
2494 # Parsing code and msg
3089bc74 2495 if (self.code in (errno.ENOSPC, errno.EDQUOT)
a0566bbf 2496 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
efa97bdc
YCH
2497 self.reason = 'NO_SPACE'
2498 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2499 self.reason = 'VALUE_TOO_LONG'
2500 else:
2501 self.reason = 'NOT_SUPPORTED'
2502
2503
bf5b9d85 2504class XAttrUnavailableError(YoutubeDLError):
efa97bdc
YCH
2505 pass
2506
2507
c5a59d93 2508def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
2509 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2510 # expected HTTP responses to meet HTTP/1.0 or later (see also
067aa17e 2511 # https://github.com/ytdl-org/youtube-dl/issues/6727)
e5e78797 2512 if sys.version_info < (3, 0):
65220c3b
S
2513 kwargs['strict'] = True
2514 hc = http_class(*args, **compat_kwargs(kwargs))
be4a824d 2515 source_address = ydl_handler._params.get('source_address')
8959018a 2516
be4a824d 2517 if source_address is not None:
8959018a
AU
2518 # This is to workaround _create_connection() from socket where it will try all
2519 # address data from getaddrinfo() including IPv6. This filters the result from
2520 # getaddrinfo() based on the source_address value.
2521 # This is based on the cpython socket.create_connection() function.
2522 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2523 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2524 host, port = address
2525 err = None
2526 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
9e21e6d9
S
2527 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2528 ip_addrs = [addr for addr in addrs if addr[0] == af]
2529 if addrs and not ip_addrs:
2530 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2531 raise socket.error(
2532 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2533 % (ip_version, source_address[0]))
8959018a
AU
2534 for res in ip_addrs:
2535 af, socktype, proto, canonname, sa = res
2536 sock = None
2537 try:
2538 sock = socket.socket(af, socktype, proto)
2539 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2540 sock.settimeout(timeout)
2541 sock.bind(source_address)
2542 sock.connect(sa)
2543 err = None # Explicitly break reference cycle
2544 return sock
2545 except socket.error as _:
2546 err = _
2547 if sock is not None:
2548 sock.close()
2549 if err is not None:
2550 raise err
2551 else:
9e21e6d9
S
2552 raise socket.error('getaddrinfo returns an empty list')
2553 if hasattr(hc, '_create_connection'):
2554 hc._create_connection = _create_connection
be4a824d
PH
2555 sa = (source_address, 0)
2556 if hasattr(hc, 'source_address'): # Python 2.7+
2557 hc.source_address = sa
2558 else: # Python 2.6
2559 def _hc_connect(self, *args, **kwargs):
9e21e6d9 2560 sock = _create_connection(
be4a824d
PH
2561 (self.host, self.port), self.timeout, sa)
2562 if is_https:
d7932313
PH
2563 self.sock = ssl.wrap_socket(
2564 sock, self.key_file, self.cert_file,
2565 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
2566 else:
2567 self.sock = sock
2568 hc.connect = functools.partial(_hc_connect, hc)
2569
2570 return hc
2571
2572
87f0e62d 2573def handle_youtubedl_headers(headers):
992fc9d6
YCH
2574 filtered_headers = headers
2575
2576 if 'Youtubedl-no-compression' in filtered_headers:
2577 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 2578 del filtered_headers['Youtubedl-no-compression']
87f0e62d 2579
992fc9d6 2580 return filtered_headers
87f0e62d
YCH
2581
2582
acebc9cd 2583class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
2584 """Handler for HTTP requests and responses.
2585
2586 This class, when installed with an OpenerDirector, automatically adds
2587 the standard headers to every HTTP request and handles gzipped and
2588 deflated responses from web servers. If compression is to be avoided in
2589 a particular request, the original request in the program code only has
0424ec30 2590 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
2591 removed before making the real request.
2592
2593 Part of this code was copied from:
2594
2595 http://techknack.net/python-urllib2-handlers/
2596
2597 Andrew Rowls, the author of that code, agreed to release it to the
2598 public domain.
2599 """
2600
be4a824d
PH
2601 def __init__(self, params, *args, **kwargs):
2602 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2603 self._params = params
2604
2605 def http_open(self, req):
71aff188
YCH
2606 conn_class = compat_http_client.HTTPConnection
2607
2608 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2609 if socks_proxy:
2610 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2611 del req.headers['Ytdl-socks-proxy']
2612
be4a824d 2613 return self.do_open(functools.partial(
71aff188 2614 _create_http_connection, self, conn_class, False),
be4a824d
PH
2615 req)
2616
59ae15a5
PH
2617 @staticmethod
2618 def deflate(data):
fc2119f2 2619 if not data:
2620 return data
59ae15a5
PH
2621 try:
2622 return zlib.decompress(data, -zlib.MAX_WBITS)
2623 except zlib.error:
2624 return zlib.decompress(data)
2625
acebc9cd 2626 def http_request(self, req):
51f267d9
S
2627 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2628 # always respected by websites, some tend to give out URLs with non percent-encoded
2629 # non-ASCII characters (see telemb.py, ard.py [#3412])
2630 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2631 # To work around aforementioned issue we will replace request's original URL with
2632 # percent-encoded one
2633 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2634 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2635 url = req.get_full_url()
2636 url_escaped = escape_url(url)
2637
2638 # Substitute URL if any change after escaping
2639 if url != url_escaped:
15d260eb 2640 req = update_Request(req, url=url_escaped)
51f267d9 2641
33ac271b 2642 for h, v in std_headers.items():
3d5f7a39
JK
2643 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2644 # The dict keys are capitalized because of this bug by urllib
2645 if h.capitalize() not in req.headers:
33ac271b 2646 req.add_header(h, v)
87f0e62d
YCH
2647
2648 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
2649
2650 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2651 # Python 2.6 is brain-dead when it comes to fragments
2652 req._Request__original = req._Request__original.partition('#')[0]
2653 req._Request__r_type = req._Request__r_type.partition('#')[0]
2654
59ae15a5
PH
2655 return req
2656
acebc9cd 2657 def http_response(self, req, resp):
59ae15a5
PH
2658 old_resp = resp
2659 # gzip
2660 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
2661 content = resp.read()
2662 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2663 try:
2664 uncompressed = io.BytesIO(gz.read())
2665 except IOError as original_ioerror:
2666 # There may be junk add the end of the file
2667 # See http://stackoverflow.com/q/4928560/35070 for details
2668 for i in range(1, 1024):
2669 try:
2670 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2671 uncompressed = io.BytesIO(gz.read())
2672 except IOError:
2673 continue
2674 break
2675 else:
2676 raise original_ioerror
b407d853 2677 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2678 resp.msg = old_resp.msg
c047270c 2679 del resp.headers['Content-encoding']
59ae15a5
PH
2680 # deflate
2681 if resp.headers.get('Content-encoding', '') == 'deflate':
2682 gz = io.BytesIO(self.deflate(resp.read()))
b407d853 2683 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2684 resp.msg = old_resp.msg
c047270c 2685 del resp.headers['Content-encoding']
ad729172 2686 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
067aa17e 2687 # https://github.com/ytdl-org/youtube-dl/issues/6457).
5a4d9ddb
S
2688 if 300 <= resp.code < 400:
2689 location = resp.headers.get('Location')
2690 if location:
2691 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2692 if sys.version_info >= (3, 0):
2693 location = location.encode('iso-8859-1').decode('utf-8')
0ea59007
YCH
2694 else:
2695 location = location.decode('utf-8')
5a4d9ddb
S
2696 location_escaped = escape_url(location)
2697 if location != location_escaped:
2698 del resp.headers['Location']
9a4aec8b
YCH
2699 if sys.version_info < (3, 0):
2700 location_escaped = location_escaped.encode('utf-8')
5a4d9ddb 2701 resp.headers['Location'] = location_escaped
59ae15a5 2702 return resp
0f8d03f8 2703
acebc9cd
PH
2704 https_request = http_request
2705 https_response = http_response
bf50b038 2706
5de90176 2707
71aff188
YCH
2708def make_socks_conn_class(base_class, socks_proxy):
2709 assert issubclass(base_class, (
2710 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2711
2712 url_components = compat_urlparse.urlparse(socks_proxy)
2713 if url_components.scheme.lower() == 'socks5':
2714 socks_type = ProxyType.SOCKS5
2715 elif url_components.scheme.lower() in ('socks', 'socks4'):
2716 socks_type = ProxyType.SOCKS4
51fb4995
YCH
2717 elif url_components.scheme.lower() == 'socks4a':
2718 socks_type = ProxyType.SOCKS4A
71aff188 2719
cdd94c2e
YCH
2720 def unquote_if_non_empty(s):
2721 if not s:
2722 return s
2723 return compat_urllib_parse_unquote_plus(s)
2724
71aff188
YCH
2725 proxy_args = (
2726 socks_type,
2727 url_components.hostname, url_components.port or 1080,
2728 True, # Remote DNS
cdd94c2e
YCH
2729 unquote_if_non_empty(url_components.username),
2730 unquote_if_non_empty(url_components.password),
71aff188
YCH
2731 )
2732
2733 class SocksConnection(base_class):
2734 def connect(self):
2735 self.sock = sockssocket()
2736 self.sock.setproxy(*proxy_args)
2737 if type(self.timeout) in (int, float):
2738 self.sock.settimeout(self.timeout)
2739 self.sock.connect((self.host, self.port))
2740
2741 if isinstance(self, compat_http_client.HTTPSConnection):
2742 if hasattr(self, '_context'): # Python > 2.6
2743 self.sock = self._context.wrap_socket(
2744 self.sock, server_hostname=self.host)
2745 else:
2746 self.sock = ssl.wrap_socket(self.sock)
2747
2748 return SocksConnection
2749
2750
be4a824d
PH
2751class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2752 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2753 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2754 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2755 self._params = params
2756
2757 def https_open(self, req):
4f264c02 2758 kwargs = {}
71aff188
YCH
2759 conn_class = self._https_conn_class
2760
4f264c02
JMF
2761 if hasattr(self, '_context'): # python > 2.6
2762 kwargs['context'] = self._context
2763 if hasattr(self, '_check_hostname'): # python 3.x
2764 kwargs['check_hostname'] = self._check_hostname
71aff188
YCH
2765
2766 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2767 if socks_proxy:
2768 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2769 del req.headers['Ytdl-socks-proxy']
2770
be4a824d 2771 return self.do_open(functools.partial(
71aff188 2772 _create_http_connection, self, conn_class, True),
4f264c02 2773 req, **kwargs)
be4a824d
PH
2774
2775
1bab3437 2776class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
f1a8511f
S
2777 """
2778 See [1] for cookie file format.
2779
2780 1. https://curl.haxx.se/docs/http-cookies.html
2781 """
e7e62441 2782 _HTTPONLY_PREFIX = '#HttpOnly_'
c380cc28
S
2783 _ENTRY_LEN = 7
2784 _HEADER = '''# Netscape HTTP Cookie File
7a5c1cfe 2785# This file is generated by yt-dlp. Do not edit.
c380cc28
S
2786
2787'''
2788 _CookieFileEntry = collections.namedtuple(
2789 'CookieFileEntry',
2790 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
e7e62441 2791
1bab3437 2792 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
c380cc28
S
2793 """
2794 Save cookies to a file.
2795
2796 Most of the code is taken from CPython 3.8 and slightly adapted
2797 to support cookie files with UTF-8 in both python 2 and 3.
2798 """
2799 if filename is None:
2800 if self.filename is not None:
2801 filename = self.filename
2802 else:
2803 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2804
1bab3437
S
2805 # Store session cookies with `expires` set to 0 instead of an empty
2806 # string
2807 for cookie in self:
2808 if cookie.expires is None:
2809 cookie.expires = 0
c380cc28
S
2810
2811 with io.open(filename, 'w', encoding='utf-8') as f:
2812 f.write(self._HEADER)
2813 now = time.time()
2814 for cookie in self:
2815 if not ignore_discard and cookie.discard:
2816 continue
2817 if not ignore_expires and cookie.is_expired(now):
2818 continue
2819 if cookie.secure:
2820 secure = 'TRUE'
2821 else:
2822 secure = 'FALSE'
2823 if cookie.domain.startswith('.'):
2824 initial_dot = 'TRUE'
2825 else:
2826 initial_dot = 'FALSE'
2827 if cookie.expires is not None:
2828 expires = compat_str(cookie.expires)
2829 else:
2830 expires = ''
2831 if cookie.value is None:
2832 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2833 # with no name, whereas http.cookiejar regards it as a
2834 # cookie with no value.
2835 name = ''
2836 value = cookie.name
2837 else:
2838 name = cookie.name
2839 value = cookie.value
2840 f.write(
2841 '\t'.join([cookie.domain, initial_dot, cookie.path,
2842 secure, expires, name, value]) + '\n')
1bab3437
S
2843
2844 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
e7e62441 2845 """Load cookies from a file."""
2846 if filename is None:
2847 if self.filename is not None:
2848 filename = self.filename
2849 else:
2850 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2851
c380cc28
S
2852 def prepare_line(line):
2853 if line.startswith(self._HTTPONLY_PREFIX):
2854 line = line[len(self._HTTPONLY_PREFIX):]
2855 # comments and empty lines are fine
2856 if line.startswith('#') or not line.strip():
2857 return line
2858 cookie_list = line.split('\t')
2859 if len(cookie_list) != self._ENTRY_LEN:
2860 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2861 cookie = self._CookieFileEntry(*cookie_list)
2862 if cookie.expires_at and not cookie.expires_at.isdigit():
2863 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2864 return line
2865
e7e62441 2866 cf = io.StringIO()
c380cc28 2867 with io.open(filename, encoding='utf-8') as f:
e7e62441 2868 for line in f:
c380cc28
S
2869 try:
2870 cf.write(prepare_line(line))
2871 except compat_cookiejar.LoadError as e:
2872 write_string(
2873 'WARNING: skipping cookie file entry due to %s: %r\n'
2874 % (e, line), sys.stderr)
2875 continue
e7e62441 2876 cf.seek(0)
2877 self._really_load(cf, filename, ignore_discard, ignore_expires)
1bab3437
S
2878 # Session cookies are denoted by either `expires` field set to
2879 # an empty string or 0. MozillaCookieJar only recognizes the former
2880 # (see [1]). So we need force the latter to be recognized as session
2881 # cookies on our own.
2882 # Session cookies may be important for cookies-based authentication,
2883 # e.g. usually, when user does not check 'Remember me' check box while
2884 # logging in on a site, some important cookies are stored as session
2885 # cookies so that not recognizing them will result in failed login.
2886 # 1. https://bugs.python.org/issue17164
2887 for cookie in self:
2888 # Treat `expires=0` cookies as session cookies
2889 if cookie.expires == 0:
2890 cookie.expires = None
2891 cookie.discard = True
2892
2893
a6420bf5
S
2894class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2895 def __init__(self, cookiejar=None):
2896 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2897
2898 def http_response(self, request, response):
2899 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2900 # characters in Set-Cookie HTTP header of last response (see
067aa17e 2901 # https://github.com/ytdl-org/youtube-dl/issues/6769).
a6420bf5
S
2902 # In order to at least prevent crashing we will percent encode Set-Cookie
2903 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
2904 # if sys.version_info < (3, 0) and response.headers:
2905 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2906 # set_cookie = response.headers.get(set_cookie_header)
2907 # if set_cookie:
2908 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2909 # if set_cookie != set_cookie_escaped:
2910 # del response.headers[set_cookie_header]
2911 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
2912 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2913
2914 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2915 https_response = http_response
2916
2917
fca6dba8
S
2918class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2919 if sys.version_info[0] < 3:
2920 def redirect_request(self, req, fp, code, msg, headers, newurl):
2921 # On python 2 urlh.geturl() may sometimes return redirect URL
2922 # as byte string instead of unicode. This workaround allows
2923 # to force it always return unicode.
2924 return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2925
2926
46f59e89
S
2927def extract_timezone(date_str):
2928 m = re.search(
2929 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2930 date_str)
2931 if not m:
2932 timezone = datetime.timedelta()
2933 else:
2934 date_str = date_str[:-len(m.group('tz'))]
2935 if not m.group('sign'):
2936 timezone = datetime.timedelta()
2937 else:
2938 sign = 1 if m.group('sign') == '+' else -1
2939 timezone = datetime.timedelta(
2940 hours=sign * int(m.group('hours')),
2941 minutes=sign * int(m.group('minutes')))
2942 return timezone, date_str
2943
2944
08b38d54 2945def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
2946 """ Return a UNIX timestamp from the given date """
2947
2948 if date_str is None:
2949 return None
2950
52c3a6e4
S
2951 date_str = re.sub(r'\.[0-9]+', '', date_str)
2952
08b38d54 2953 if timezone is None:
46f59e89
S
2954 timezone, date_str = extract_timezone(date_str)
2955
52c3a6e4
S
2956 try:
2957 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2958 dt = datetime.datetime.strptime(date_str, date_format) - timezone
2959 return calendar.timegm(dt.timetuple())
2960 except ValueError:
2961 pass
912b38b4
PH
2962
2963
46f59e89
S
2964def date_formats(day_first=True):
2965 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2966
2967
42bdd9d0 2968def unified_strdate(date_str, day_first=True):
bf50b038 2969 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
2970
2971 if date_str is None:
2972 return None
bf50b038 2973 upload_date = None
5f6a1245 2974 # Replace commas
026fcc04 2975 date_str = date_str.replace(',', ' ')
42bdd9d0 2976 # Remove AM/PM + timezone
9bb8e0a3 2977 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89 2978 _, date_str = extract_timezone(date_str)
42bdd9d0 2979
46f59e89 2980 for expression in date_formats(day_first):
bf50b038
JMF
2981 try:
2982 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 2983 except ValueError:
bf50b038 2984 pass
42393ce2
PH
2985 if upload_date is None:
2986 timetuple = email.utils.parsedate_tz(date_str)
2987 if timetuple:
c6b9cf05
S
2988 try:
2989 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2990 except ValueError:
2991 pass
6a750402
JMF
2992 if upload_date is not None:
2993 return compat_str(upload_date)
bf50b038 2994
5f6a1245 2995
46f59e89
S
2996def unified_timestamp(date_str, day_first=True):
2997 if date_str is None:
2998 return None
2999
2ae2ffda 3000 date_str = re.sub(r'[,|]', '', date_str)
46f59e89 3001
7dc2a74e 3002 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89
S
3003 timezone, date_str = extract_timezone(date_str)
3004
3005 # Remove AM/PM + timezone
3006 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3007
deef3195
S
3008 # Remove unrecognized timezones from ISO 8601 alike timestamps
3009 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3010 if m:
3011 date_str = date_str[:-len(m.group('tz'))]
3012
f226880c
PH
3013 # Python only supports microseconds, so remove nanoseconds
3014 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3015 if m:
3016 date_str = m.group(1)
3017
46f59e89
S
3018 for expression in date_formats(day_first):
3019 try:
7dc2a74e 3020 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
46f59e89
S
3021 return calendar.timegm(dt.timetuple())
3022 except ValueError:
3023 pass
3024 timetuple = email.utils.parsedate_tz(date_str)
3025 if timetuple:
7dc2a74e 3026 return calendar.timegm(timetuple) + pm_delta * 3600
46f59e89
S
3027
3028
28e614de 3029def determine_ext(url, default_ext='unknown_video'):
85750f89 3030 if url is None or '.' not in url:
f4776371 3031 return default_ext
9cb9a5df 3032 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
3033 if re.match(r'^[A-Za-z0-9]+$', guess):
3034 return guess
a7aaa398
S
3035 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3036 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 3037 return guess.rstrip('/')
73e79f2a 3038 else:
cbdbb766 3039 return default_ext
73e79f2a 3040
5f6a1245 3041
824fa511
S
3042def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3043 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
d4051a8e 3044
5f6a1245 3045
bd558525 3046def date_from_str(date_str):
37254abc
JMF
3047 """
3048 Return a datetime object from a string in the format YYYYMMDD or
3049 (now|today)[+-][0-9](day|week|month|year)(s)?"""
3050 today = datetime.date.today()
f8795e10 3051 if date_str in ('now', 'today'):
37254abc 3052 return today
f8795e10
PH
3053 if date_str == 'yesterday':
3054 return today - datetime.timedelta(days=1)
ec85ded8 3055 match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
37254abc
JMF
3056 if match is not None:
3057 sign = match.group('sign')
3058 time = int(match.group('time'))
3059 if sign == '-':
3060 time = -time
3061 unit = match.group('unit')
dfb1b146 3062 # A bad approximation?
37254abc
JMF
3063 if unit == 'month':
3064 unit = 'day'
3065 time *= 30
3066 elif unit == 'year':
3067 unit = 'day'
3068 time *= 365
3069 unit += 's'
3070 delta = datetime.timedelta(**{unit: time})
3071 return today + delta
611c1dd9 3072 return datetime.datetime.strptime(date_str, '%Y%m%d').date()
5f6a1245
JW
3073
3074
e63fc1be 3075def hyphenate_date(date_str):
3076 """
3077 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3078 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3079 if match is not None:
3080 return '-'.join(match.groups())
3081 else:
3082 return date_str
3083
5f6a1245 3084
bd558525
JMF
3085class DateRange(object):
3086 """Represents a time interval between two dates"""
5f6a1245 3087
bd558525
JMF
3088 def __init__(self, start=None, end=None):
3089 """start and end must be strings in the format accepted by date"""
3090 if start is not None:
3091 self.start = date_from_str(start)
3092 else:
3093 self.start = datetime.datetime.min.date()
3094 if end is not None:
3095 self.end = date_from_str(end)
3096 else:
3097 self.end = datetime.datetime.max.date()
37254abc 3098 if self.start > self.end:
bd558525 3099 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 3100
bd558525
JMF
3101 @classmethod
3102 def day(cls, day):
3103 """Returns a range that only contains the given day"""
5f6a1245
JW
3104 return cls(day, day)
3105
bd558525
JMF
3106 def __contains__(self, date):
3107 """Check if the date is in the range"""
37254abc
JMF
3108 if not isinstance(date, datetime.date):
3109 date = date_from_str(date)
3110 return self.start <= date <= self.end
5f6a1245 3111
bd558525 3112 def __str__(self):
5f6a1245 3113 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
3114
3115
3116def platform_name():
3117 """ Returns the platform name as a compat_str """
3118 res = platform.platform()
3119 if isinstance(res, bytes):
3120 res = res.decode(preferredencoding())
3121
3122 assert isinstance(res, compat_str)
3123 return res
c257baff
PH
3124
3125
b58ddb32
PH
3126def _windows_write_string(s, out):
3127 """ Returns True if the string was written using special methods,
3128 False if it has yet to be written out."""
3129 # Adapted from http://stackoverflow.com/a/3259271/35070
3130
3131 import ctypes
3132 import ctypes.wintypes
3133
3134 WIN_OUTPUT_IDS = {
3135 1: -11,
3136 2: -12,
3137 }
3138
a383a98a
PH
3139 try:
3140 fileno = out.fileno()
3141 except AttributeError:
3142 # If the output stream doesn't have a fileno, it's virtual
3143 return False
aa42e873
PH
3144 except io.UnsupportedOperation:
3145 # Some strange Windows pseudo files?
3146 return False
b58ddb32
PH
3147 if fileno not in WIN_OUTPUT_IDS:
3148 return False
3149
d7cd9a9e 3150 GetStdHandle = compat_ctypes_WINFUNCTYPE(
b58ddb32 3151 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
d7cd9a9e 3152 ('GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
3153 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3154
d7cd9a9e 3155 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3156 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3157 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
d7cd9a9e 3158 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
3159 written = ctypes.wintypes.DWORD(0)
3160
d7cd9a9e 3161 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
3162 FILE_TYPE_CHAR = 0x0002
3163 FILE_TYPE_REMOTE = 0x8000
d7cd9a9e 3164 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3165 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3166 ctypes.POINTER(ctypes.wintypes.DWORD))(
d7cd9a9e 3167 ('GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
3168 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3169
3170 def not_a_console(handle):
3171 if handle == INVALID_HANDLE_VALUE or handle is None:
3172 return True
3089bc74
S
3173 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3174 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
3175
3176 if not_a_console(h):
3177 return False
3178
d1b9c912
PH
3179 def next_nonbmp_pos(s):
3180 try:
3181 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3182 except StopIteration:
3183 return len(s)
3184
3185 while s:
3186 count = min(next_nonbmp_pos(s), 1024)
3187
b58ddb32 3188 ret = WriteConsoleW(
d1b9c912 3189 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
3190 if ret == 0:
3191 raise OSError('Failed to write string')
d1b9c912
PH
3192 if not count: # We just wrote a non-BMP character
3193 assert written.value == 2
3194 s = s[1:]
3195 else:
3196 assert written.value > 0
3197 s = s[written.value:]
b58ddb32
PH
3198 return True
3199
3200
734f90bb 3201def write_string(s, out=None, encoding=None):
7459e3a2
PH
3202 if out is None:
3203 out = sys.stderr
8bf48f23 3204 assert type(s) == compat_str
7459e3a2 3205
b58ddb32
PH
3206 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3207 if _windows_write_string(s, out):
3208 return
3209
3089bc74
S
3210 if ('b' in getattr(out, 'mode', '')
3211 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
3212 byt = s.encode(encoding or preferredencoding(), 'ignore')
3213 out.write(byt)
3214 elif hasattr(out, 'buffer'):
3215 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3216 byt = s.encode(enc, 'ignore')
3217 out.buffer.write(byt)
3218 else:
8bf48f23 3219 out.write(s)
7459e3a2
PH
3220 out.flush()
3221
3222
48ea9cea
PH
3223def bytes_to_intlist(bs):
3224 if not bs:
3225 return []
3226 if isinstance(bs[0], int): # Python 3
3227 return list(bs)
3228 else:
3229 return [ord(c) for c in bs]
3230
c257baff 3231
cba892fa 3232def intlist_to_bytes(xs):
3233 if not xs:
3234 return b''
edaa23f8 3235 return compat_struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
3236
3237
c1c9a79c
PH
3238# Cross-platform file locking
3239if sys.platform == 'win32':
3240 import ctypes.wintypes
3241 import msvcrt
3242
3243 class OVERLAPPED(ctypes.Structure):
3244 _fields_ = [
3245 ('Internal', ctypes.wintypes.LPVOID),
3246 ('InternalHigh', ctypes.wintypes.LPVOID),
3247 ('Offset', ctypes.wintypes.DWORD),
3248 ('OffsetHigh', ctypes.wintypes.DWORD),
3249 ('hEvent', ctypes.wintypes.HANDLE),
3250 ]
3251
3252 kernel32 = ctypes.windll.kernel32
3253 LockFileEx = kernel32.LockFileEx
3254 LockFileEx.argtypes = [
3255 ctypes.wintypes.HANDLE, # hFile
3256 ctypes.wintypes.DWORD, # dwFlags
3257 ctypes.wintypes.DWORD, # dwReserved
3258 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3259 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3260 ctypes.POINTER(OVERLAPPED) # Overlapped
3261 ]
3262 LockFileEx.restype = ctypes.wintypes.BOOL
3263 UnlockFileEx = kernel32.UnlockFileEx
3264 UnlockFileEx.argtypes = [
3265 ctypes.wintypes.HANDLE, # hFile
3266 ctypes.wintypes.DWORD, # dwReserved
3267 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3268 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3269 ctypes.POINTER(OVERLAPPED) # Overlapped
3270 ]
3271 UnlockFileEx.restype = ctypes.wintypes.BOOL
3272 whole_low = 0xffffffff
3273 whole_high = 0x7fffffff
3274
3275 def _lock_file(f, exclusive):
3276 overlapped = OVERLAPPED()
3277 overlapped.Offset = 0
3278 overlapped.OffsetHigh = 0
3279 overlapped.hEvent = 0
3280 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3281 handle = msvcrt.get_osfhandle(f.fileno())
3282 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3283 whole_low, whole_high, f._lock_file_overlapped_p):
3284 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3285
3286 def _unlock_file(f):
3287 assert f._lock_file_overlapped_p
3288 handle = msvcrt.get_osfhandle(f.fileno())
3289 if not UnlockFileEx(handle, 0,
3290 whole_low, whole_high, f._lock_file_overlapped_p):
3291 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3292
3293else:
399a76e6
YCH
3294 # Some platforms, such as Jython, is missing fcntl
3295 try:
3296 import fcntl
c1c9a79c 3297
399a76e6
YCH
3298 def _lock_file(f, exclusive):
3299 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 3300
399a76e6
YCH
3301 def _unlock_file(f):
3302 fcntl.flock(f, fcntl.LOCK_UN)
3303 except ImportError:
3304 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3305
3306 def _lock_file(f, exclusive):
3307 raise IOError(UNSUPPORTED_MSG)
3308
3309 def _unlock_file(f):
3310 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
3311
3312
3313class locked_file(object):
3314 def __init__(self, filename, mode, encoding=None):
3315 assert mode in ['r', 'a', 'w']
3316 self.f = io.open(filename, mode, encoding=encoding)
3317 self.mode = mode
3318
3319 def __enter__(self):
3320 exclusive = self.mode != 'r'
3321 try:
3322 _lock_file(self.f, exclusive)
3323 except IOError:
3324 self.f.close()
3325 raise
3326 return self
3327
3328 def __exit__(self, etype, value, traceback):
3329 try:
3330 _unlock_file(self.f)
3331 finally:
3332 self.f.close()
3333
3334 def __iter__(self):
3335 return iter(self.f)
3336
3337 def write(self, *args):
3338 return self.f.write(*args)
3339
3340 def read(self, *args):
3341 return self.f.read(*args)
4eb7f1d1
JMF
3342
3343
4644ac55
S
3344def get_filesystem_encoding():
3345 encoding = sys.getfilesystemencoding()
3346 return encoding if encoding is not None else 'utf-8'
3347
3348
4eb7f1d1 3349def shell_quote(args):
a6a173c2 3350 quoted_args = []
4644ac55 3351 encoding = get_filesystem_encoding()
a6a173c2
JMF
3352 for a in args:
3353 if isinstance(a, bytes):
3354 # We may get a filename encoded with 'encodeFilename'
3355 a = a.decode(encoding)
aefce8e6 3356 quoted_args.append(compat_shlex_quote(a))
28e614de 3357 return ' '.join(quoted_args)
9d4660ca
PH
3358
3359
3360def smuggle_url(url, data):
3361 """ Pass additional data in a URL for internal use. """
3362
81953d1a
RA
3363 url, idata = unsmuggle_url(url, {})
3364 data.update(idata)
15707c7e 3365 sdata = compat_urllib_parse_urlencode(
28e614de
PH
3366 {'__youtubedl_smuggle': json.dumps(data)})
3367 return url + '#' + sdata
9d4660ca
PH
3368
3369
79f82953 3370def unsmuggle_url(smug_url, default=None):
83e865a3 3371 if '#__youtubedl_smuggle' not in smug_url:
79f82953 3372 return smug_url, default
28e614de
PH
3373 url, _, sdata = smug_url.rpartition('#')
3374 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
3375 data = json.loads(jsond)
3376 return url, data
02dbf93f
PH
3377
3378
02dbf93f
PH
3379def format_bytes(bytes):
3380 if bytes is None:
28e614de 3381 return 'N/A'
02dbf93f
PH
3382 if type(bytes) is str:
3383 bytes = float(bytes)
3384 if bytes == 0.0:
3385 exponent = 0
3386 else:
3387 exponent = int(math.log(bytes, 1024.0))
28e614de 3388 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 3389 converted = float(bytes) / float(1024 ** exponent)
28e614de 3390 return '%.2f%s' % (converted, suffix)
f53c966a 3391
1c088fa8 3392
fb47597b
S
3393def lookup_unit_table(unit_table, s):
3394 units_re = '|'.join(re.escape(u) for u in unit_table)
3395 m = re.match(
782b1b5b 3396 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
fb47597b
S
3397 if not m:
3398 return None
3399 num_str = m.group('num').replace(',', '.')
3400 mult = unit_table[m.group('unit')]
3401 return int(float(num_str) * mult)
3402
3403
be64b5b0
PH
3404def parse_filesize(s):
3405 if s is None:
3406 return None
3407
dfb1b146 3408 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
3409 # but we support those too
3410 _UNIT_TABLE = {
3411 'B': 1,
3412 'b': 1,
70852b47 3413 'bytes': 1,
be64b5b0
PH
3414 'KiB': 1024,
3415 'KB': 1000,
3416 'kB': 1024,
3417 'Kb': 1000,
13585d76 3418 'kb': 1000,
70852b47
YCH
3419 'kilobytes': 1000,
3420 'kibibytes': 1024,
be64b5b0
PH
3421 'MiB': 1024 ** 2,
3422 'MB': 1000 ** 2,
3423 'mB': 1024 ** 2,
3424 'Mb': 1000 ** 2,
13585d76 3425 'mb': 1000 ** 2,
70852b47
YCH
3426 'megabytes': 1000 ** 2,
3427 'mebibytes': 1024 ** 2,
be64b5b0
PH
3428 'GiB': 1024 ** 3,
3429 'GB': 1000 ** 3,
3430 'gB': 1024 ** 3,
3431 'Gb': 1000 ** 3,
13585d76 3432 'gb': 1000 ** 3,
70852b47
YCH
3433 'gigabytes': 1000 ** 3,
3434 'gibibytes': 1024 ** 3,
be64b5b0
PH
3435 'TiB': 1024 ** 4,
3436 'TB': 1000 ** 4,
3437 'tB': 1024 ** 4,
3438 'Tb': 1000 ** 4,
13585d76 3439 'tb': 1000 ** 4,
70852b47
YCH
3440 'terabytes': 1000 ** 4,
3441 'tebibytes': 1024 ** 4,
be64b5b0
PH
3442 'PiB': 1024 ** 5,
3443 'PB': 1000 ** 5,
3444 'pB': 1024 ** 5,
3445 'Pb': 1000 ** 5,
13585d76 3446 'pb': 1000 ** 5,
70852b47
YCH
3447 'petabytes': 1000 ** 5,
3448 'pebibytes': 1024 ** 5,
be64b5b0
PH
3449 'EiB': 1024 ** 6,
3450 'EB': 1000 ** 6,
3451 'eB': 1024 ** 6,
3452 'Eb': 1000 ** 6,
13585d76 3453 'eb': 1000 ** 6,
70852b47
YCH
3454 'exabytes': 1000 ** 6,
3455 'exbibytes': 1024 ** 6,
be64b5b0
PH
3456 'ZiB': 1024 ** 7,
3457 'ZB': 1000 ** 7,
3458 'zB': 1024 ** 7,
3459 'Zb': 1000 ** 7,
13585d76 3460 'zb': 1000 ** 7,
70852b47
YCH
3461 'zettabytes': 1000 ** 7,
3462 'zebibytes': 1024 ** 7,
be64b5b0
PH
3463 'YiB': 1024 ** 8,
3464 'YB': 1000 ** 8,
3465 'yB': 1024 ** 8,
3466 'Yb': 1000 ** 8,
13585d76 3467 'yb': 1000 ** 8,
70852b47
YCH
3468 'yottabytes': 1000 ** 8,
3469 'yobibytes': 1024 ** 8,
be64b5b0
PH
3470 }
3471
fb47597b
S
3472 return lookup_unit_table(_UNIT_TABLE, s)
3473
3474
3475def parse_count(s):
3476 if s is None:
be64b5b0
PH
3477 return None
3478
fb47597b
S
3479 s = s.strip()
3480
3481 if re.match(r'^[\d,.]+$', s):
3482 return str_to_int(s)
3483
3484 _UNIT_TABLE = {
3485 'k': 1000,
3486 'K': 1000,
3487 'm': 1000 ** 2,
3488 'M': 1000 ** 2,
3489 'kk': 1000 ** 2,
3490 'KK': 1000 ** 2,
3491 }
be64b5b0 3492
fb47597b 3493 return lookup_unit_table(_UNIT_TABLE, s)
be64b5b0 3494
2f7ae819 3495
b871d7e9
S
3496def parse_resolution(s):
3497 if s is None:
3498 return {}
3499
3500 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3501 if mobj:
3502 return {
3503 'width': int(mobj.group('w')),
3504 'height': int(mobj.group('h')),
3505 }
3506
3507 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3508 if mobj:
3509 return {'height': int(mobj.group(1))}
3510
3511 mobj = re.search(r'\b([48])[kK]\b', s)
3512 if mobj:
3513 return {'height': int(mobj.group(1)) * 540}
3514
3515 return {}
3516
3517
0dc41787
S
3518def parse_bitrate(s):
3519 if not isinstance(s, compat_str):
3520 return
3521 mobj = re.search(r'\b(\d+)\s*kbps', s)
3522 if mobj:
3523 return int(mobj.group(1))
3524
3525
a942d6cb 3526def month_by_name(name, lang='en'):
caefb1de
PH
3527 """ Return the number of a month by (locale-independently) English name """
3528
f6717dec 3529 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb 3530
caefb1de 3531 try:
f6717dec 3532 return month_names.index(name) + 1
7105440c
YCH
3533 except ValueError:
3534 return None
3535
3536
3537def month_by_abbreviation(abbrev):
3538 """ Return the number of a month by (locale-independently) English
3539 abbreviations """
3540
3541 try:
3542 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
3543 except ValueError:
3544 return None
18258362
JMF
3545
3546
5aafe895 3547def fix_xml_ampersands(xml_str):
18258362 3548 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
3549 return re.sub(
3550 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 3551 '&amp;',
5aafe895 3552 xml_str)
e3946f98
PH
3553
3554
3555def setproctitle(title):
8bf48f23 3556 assert isinstance(title, compat_str)
c1c05c67
YCH
3557
3558 # ctypes in Jython is not complete
3559 # http://bugs.jython.org/issue2148
3560 if sys.platform.startswith('java'):
3561 return
3562
e3946f98 3563 try:
611c1dd9 3564 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
3565 except OSError:
3566 return
2f49bcd6
RC
3567 except TypeError:
3568 # LoadLibrary in Windows Python 2.7.13 only expects
3569 # a bytestring, but since unicode_literals turns
3570 # every string into a unicode string, it fails.
3571 return
6eefe533
PH
3572 title_bytes = title.encode('utf-8')
3573 buf = ctypes.create_string_buffer(len(title_bytes))
3574 buf.value = title_bytes
e3946f98 3575 try:
6eefe533 3576 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
3577 except AttributeError:
3578 return # Strange libc, just skip this
d7dda168
PH
3579
3580
3581def remove_start(s, start):
46bc9b7d 3582 return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174
PH
3583
3584
2b9faf55 3585def remove_end(s, end):
46bc9b7d 3586 return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55
PH
3587
3588
31b2051e
S
3589def remove_quotes(s):
3590 if s is None or len(s) < 2:
3591 return s
3592 for quote in ('"', "'", ):
3593 if s[0] == quote and s[-1] == quote:
3594 return s[1:-1]
3595 return s
3596
3597
b6e0c7d2
U
3598def get_domain(url):
3599 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3600 return domain.group('domain') if domain else None
3601
3602
29eb5174 3603def url_basename(url):
9b8aaeed 3604 path = compat_urlparse.urlparse(url).path
28e614de 3605 return path.strip('/').split('/')[-1]
aa94a6d3
PH
3606
3607
02dc0a36
S
3608def base_url(url):
3609 return re.match(r'https?://[^?#&]+/', url).group()
3610
3611
e34c3361 3612def urljoin(base, path):
4b5de77b
S
3613 if isinstance(path, bytes):
3614 path = path.decode('utf-8')
e34c3361
S
3615 if not isinstance(path, compat_str) or not path:
3616 return None
fad4ceb5 3617 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
e34c3361 3618 return path
4b5de77b
S
3619 if isinstance(base, bytes):
3620 base = base.decode('utf-8')
3621 if not isinstance(base, compat_str) or not re.match(
3622 r'^(?:https?:)?//', base):
e34c3361
S
3623 return None
3624 return compat_urlparse.urljoin(base, path)
3625
3626
aa94a6d3
PH
3627class HEADRequest(compat_urllib_request.Request):
3628 def get_method(self):
611c1dd9 3629 return 'HEAD'
7217e148
PH
3630
3631
95cf60e8
S
3632class PUTRequest(compat_urllib_request.Request):
3633 def get_method(self):
3634 return 'PUT'
3635
3636
9732d77e 3637def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
3638 if get_attr:
3639 if v is not None:
3640 v = getattr(v, get_attr, None)
9572013d
PH
3641 if v == '':
3642 v = None
1812afb7
S
3643 if v is None:
3644 return default
3645 try:
3646 return int(v) * invscale // scale
5e1271c5 3647 except (ValueError, TypeError):
af98f8ff 3648 return default
9732d77e 3649
9572013d 3650
40a90862
JMF
3651def str_or_none(v, default=None):
3652 return default if v is None else compat_str(v)
3653
9732d77e
PH
3654
3655def str_to_int(int_str):
48d4681e 3656 """ A more relaxed version of int_or_none """
42db58ec 3657 if isinstance(int_str, compat_integer_types):
348c6bf1 3658 return int_str
42db58ec
S
3659 elif isinstance(int_str, compat_str):
3660 int_str = re.sub(r'[,\.\+]', '', int_str)
3661 return int_or_none(int_str)
608d11f5
PH
3662
3663
9732d77e 3664def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
3665 if v is None:
3666 return default
3667 try:
3668 return float(v) * invscale / scale
5e1271c5 3669 except (ValueError, TypeError):
caf80631 3670 return default
43f775e4
PH
3671
3672
c7e327c4
S
3673def bool_or_none(v, default=None):
3674 return v if isinstance(v, bool) else default
3675
3676
53cd37ba
S
3677def strip_or_none(v, default=None):
3678 return v.strip() if isinstance(v, compat_str) else default
b72b4431
S
3679
3680
af03000a
S
3681def url_or_none(url):
3682 if not url or not isinstance(url, compat_str):
3683 return None
3684 url = url.strip()
29f7c58a 3685 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
af03000a
S
3686
3687
e29663c6 3688def strftime_or_none(timestamp, date_format, default=None):
3689 datetime_object = None
3690 try:
3691 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3692 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3693 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3694 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3695 return datetime_object.strftime(date_format)
3696 except (ValueError, TypeError, AttributeError):
3697 return default
3698
3699
608d11f5 3700def parse_duration(s):
8f9312c3 3701 if not isinstance(s, compat_basestring):
608d11f5
PH
3702 return None
3703
ca7b3246
S
3704 s = s.strip()
3705
acaff495 3706 days, hours, mins, secs, ms = [None] * 5
15846398 3707 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
acaff495 3708 if m:
3709 days, hours, mins, secs, ms = m.groups()
3710 else:
3711 m = re.match(
056653bb
S
3712 r'''(?ix)(?:P?
3713 (?:
3714 [0-9]+\s*y(?:ears?)?\s*
3715 )?
3716 (?:
3717 [0-9]+\s*m(?:onths?)?\s*
3718 )?
3719 (?:
3720 [0-9]+\s*w(?:eeks?)?\s*
3721 )?
8f4b58d7 3722 (?:
acaff495 3723 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
8f4b58d7 3724 )?
056653bb 3725 T)?
acaff495 3726 (?:
3727 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3728 )?
3729 (?:
3730 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3731 )?
3732 (?:
3733 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
15846398 3734 )?Z?$''', s)
acaff495 3735 if m:
3736 days, hours, mins, secs, ms = m.groups()
3737 else:
15846398 3738 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
acaff495 3739 if m:
3740 hours, mins = m.groups()
3741 else:
3742 return None
3743
3744 duration = 0
3745 if secs:
3746 duration += float(secs)
3747 if mins:
3748 duration += float(mins) * 60
3749 if hours:
3750 duration += float(hours) * 60 * 60
3751 if days:
3752 duration += float(days) * 24 * 60 * 60
3753 if ms:
3754 duration += float(ms)
3755 return duration
91d7d0b3
JMF
3756
3757
e65e4c88 3758def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 3759 name, real_ext = os.path.splitext(filename)
e65e4c88
S
3760 return (
3761 '{0}.{1}{2}'.format(name, ext, real_ext)
3762 if not expected_real_ext or real_ext[1:] == expected_real_ext
3763 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
3764
3765
b3ed15b7
S
3766def replace_extension(filename, ext, expected_real_ext=None):
3767 name, real_ext = os.path.splitext(filename)
3768 return '{0}.{1}'.format(
3769 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3770 ext)
3771
3772
d70ad093
PH
3773def check_executable(exe, args=[]):
3774 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3775 args can be a list of arguments for a short output (like -version) """
3776 try:
f5b1bca9 3777 process_communicate_or_kill(subprocess.Popen(
3778 [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
d70ad093
PH
3779 except OSError:
3780 return False
3781 return exe
b7ab0590
PH
3782
3783
95807118 3784def get_exe_version(exe, args=['--version'],
cae97f65 3785 version_re=None, unrecognized='present'):
95807118
PH
3786 """ Returns the version of the specified executable,
3787 or False if the executable is not present """
3788 try:
b64d04c1 3789 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
7a5c1cfe 3790 # SIGTTOU if yt-dlp is run in the background.
067aa17e 3791 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
f5b1bca9 3792 out, _ = process_communicate_or_kill(subprocess.Popen(
54116803 3793 [encodeArgument(exe)] + args,
00ca7552 3794 stdin=subprocess.PIPE,
f5b1bca9 3795 stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
95807118
PH
3796 except OSError:
3797 return False
cae97f65
PH
3798 if isinstance(out, bytes): # Python 2.x
3799 out = out.decode('ascii', 'ignore')
3800 return detect_exe_version(out, version_re, unrecognized)
3801
3802
3803def detect_exe_version(output, version_re=None, unrecognized='present'):
3804 assert isinstance(output, compat_str)
3805 if version_re is None:
3806 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3807 m = re.search(version_re, output)
95807118
PH
3808 if m:
3809 return m.group(1)
3810 else:
3811 return unrecognized
3812
3813
b7ab0590 3814class PagedList(object):
dd26ced1
PH
3815 def __len__(self):
3816 # This is only useful for tests
3817 return len(self.getslice())
3818
9c44d242
PH
3819
3820class OnDemandPagedList(PagedList):
6be08ce6 3821 def __init__(self, pagefunc, pagesize, use_cache=True):
9c44d242
PH
3822 self._pagefunc = pagefunc
3823 self._pagesize = pagesize
b95dc034
YCH
3824 self._use_cache = use_cache
3825 if use_cache:
3826 self._cache = {}
9c44d242 3827
b7ab0590
PH
3828 def getslice(self, start=0, end=None):
3829 res = []
3830 for pagenum in itertools.count(start // self._pagesize):
3831 firstid = pagenum * self._pagesize
3832 nextfirstid = pagenum * self._pagesize + self._pagesize
3833 if start >= nextfirstid:
3834 continue
3835
b95dc034
YCH
3836 page_results = None
3837 if self._use_cache:
3838 page_results = self._cache.get(pagenum)
3839 if page_results is None:
3840 page_results = list(self._pagefunc(pagenum))
3841 if self._use_cache:
3842 self._cache[pagenum] = page_results
b7ab0590
PH
3843
3844 startv = (
3845 start % self._pagesize
3846 if firstid <= start < nextfirstid
3847 else 0)
3848
3849 endv = (
3850 ((end - 1) % self._pagesize) + 1
3851 if (end is not None and firstid <= end <= nextfirstid)
3852 else None)
3853
3854 if startv != 0 or endv is not None:
3855 page_results = page_results[startv:endv]
3856 res.extend(page_results)
3857
3858 # A little optimization - if current page is not "full", ie. does
3859 # not contain page_size videos then we can assume that this page
3860 # is the last one - there are no more ids on further pages -
3861 # i.e. no need to query again.
3862 if len(page_results) + startv < self._pagesize:
3863 break
3864
3865 # If we got the whole page, but the next page is not interesting,
3866 # break out early as well
3867 if end == nextfirstid:
3868 break
3869 return res
81c2f20b
PH
3870
3871
9c44d242
PH
3872class InAdvancePagedList(PagedList):
3873 def __init__(self, pagefunc, pagecount, pagesize):
3874 self._pagefunc = pagefunc
3875 self._pagecount = pagecount
3876 self._pagesize = pagesize
3877
3878 def getslice(self, start=0, end=None):
3879 res = []
3880 start_page = start // self._pagesize
3881 end_page = (
3882 self._pagecount if end is None else (end // self._pagesize + 1))
3883 skip_elems = start - start_page * self._pagesize
3884 only_more = None if end is None else end - start
3885 for pagenum in range(start_page, end_page):
3886 page = list(self._pagefunc(pagenum))
3887 if skip_elems:
3888 page = page[skip_elems:]
3889 skip_elems = None
3890 if only_more is not None:
3891 if len(page) < only_more:
3892 only_more -= len(page)
3893 else:
3894 page = page[:only_more]
3895 res.extend(page)
3896 break
3897 res.extend(page)
3898 return res
3899
3900
81c2f20b 3901def uppercase_escape(s):
676eb3f2 3902 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 3903 return re.sub(
a612753d 3904 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
3905 lambda m: unicode_escape(m.group(0))[0],
3906 s)
0fe2ff78
YCH
3907
3908
3909def lowercase_escape(s):
3910 unicode_escape = codecs.getdecoder('unicode_escape')
3911 return re.sub(
3912 r'\\u[0-9a-fA-F]{4}',
3913 lambda m: unicode_escape(m.group(0))[0],
3914 s)
b53466e1 3915
d05cfe06
S
3916
3917def escape_rfc3986(s):
3918 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 3919 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 3920 s = s.encode('utf-8')
ecc0c5ee 3921 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
3922
3923
3924def escape_url(url):
3925 """Escape URL as suggested by RFC 3986"""
3926 url_parsed = compat_urllib_parse_urlparse(url)
3927 return url_parsed._replace(
efbed08d 3928 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
d05cfe06
S
3929 path=escape_rfc3986(url_parsed.path),
3930 params=escape_rfc3986(url_parsed.params),
3931 query=escape_rfc3986(url_parsed.query),
3932 fragment=escape_rfc3986(url_parsed.fragment)
3933 ).geturl()
3934
62e609ab
PH
3935
3936def read_batch_urls(batch_fd):
3937 def fixup(url):
3938 if not isinstance(url, compat_str):
3939 url = url.decode('utf-8', 'replace')
8c04f0be 3940 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
3941 for bom in BOM_UTF8:
3942 if url.startswith(bom):
3943 url = url[len(bom):]
3944 url = url.lstrip()
3945 if not url or url.startswith(('#', ';', ']')):
62e609ab 3946 return False
8c04f0be 3947 # "#" cannot be stripped out since it is part of the URI
3948 # However, it can be safely stipped out if follwing a whitespace
3949 return re.split(r'\s#', url, 1)[0].rstrip()
62e609ab
PH
3950
3951 with contextlib.closing(batch_fd) as fd:
3952 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
3953
3954
3955def urlencode_postdata(*args, **kargs):
15707c7e 3956 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
3957
3958
38f9ef31 3959def update_url_query(url, query):
cacd9966
YCH
3960 if not query:
3961 return url
38f9ef31 3962 parsed_url = compat_urlparse.urlparse(url)
3963 qs = compat_parse_qs(parsed_url.query)
3964 qs.update(query)
3965 return compat_urlparse.urlunparse(parsed_url._replace(
15707c7e 3966 query=compat_urllib_parse_urlencode(qs, True)))
16392824 3967
8e60dc75 3968
ed0291d1
S
3969def update_Request(req, url=None, data=None, headers={}, query={}):
3970 req_headers = req.headers.copy()
3971 req_headers.update(headers)
3972 req_data = data or req.data
3973 req_url = update_url_query(url or req.get_full_url(), query)
95cf60e8
S
3974 req_get_method = req.get_method()
3975 if req_get_method == 'HEAD':
3976 req_type = HEADRequest
3977 elif req_get_method == 'PUT':
3978 req_type = PUTRequest
3979 else:
3980 req_type = compat_urllib_request.Request
ed0291d1
S
3981 new_req = req_type(
3982 req_url, data=req_data, headers=req_headers,
3983 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3984 if hasattr(req, 'timeout'):
3985 new_req.timeout = req.timeout
3986 return new_req
3987
3988
10c87c15 3989def _multipart_encode_impl(data, boundary):
0c265486
YCH
3990 content_type = 'multipart/form-data; boundary=%s' % boundary
3991
3992 out = b''
3993 for k, v in data.items():
3994 out += b'--' + boundary.encode('ascii') + b'\r\n'
3995 if isinstance(k, compat_str):
3996 k = k.encode('utf-8')
3997 if isinstance(v, compat_str):
3998 v = v.encode('utf-8')
3999 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4000 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
b2ad479d 4001 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
0c265486
YCH
4002 if boundary.encode('ascii') in content:
4003 raise ValueError('Boundary overlaps with data')
4004 out += content
4005
4006 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4007
4008 return out, content_type
4009
4010
4011def multipart_encode(data, boundary=None):
4012 '''
4013 Encode a dict to RFC 7578-compliant form-data
4014
4015 data:
4016 A dict where keys and values can be either Unicode or bytes-like
4017 objects.
4018 boundary:
4019 If specified a Unicode object, it's used as the boundary. Otherwise
4020 a random boundary is generated.
4021
4022 Reference: https://tools.ietf.org/html/rfc7578
4023 '''
4024 has_specified_boundary = boundary is not None
4025
4026 while True:
4027 if boundary is None:
4028 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4029
4030 try:
10c87c15 4031 out, content_type = _multipart_encode_impl(data, boundary)
0c265486
YCH
4032 break
4033 except ValueError:
4034 if has_specified_boundary:
4035 raise
4036 boundary = None
4037
4038 return out, content_type
4039
4040
86296ad2 4041def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
4042 if isinstance(key_or_keys, (list, tuple)):
4043 for key in key_or_keys:
86296ad2
S
4044 if key not in d or d[key] is None or skip_false_values and not d[key]:
4045 continue
4046 return d[key]
cbecc9b9
S
4047 return default
4048 return d.get(key_or_keys, default)
4049
4050
329ca3be 4051def try_get(src, getter, expected_type=None):
a32a9a7e
S
4052 if not isinstance(getter, (list, tuple)):
4053 getter = [getter]
4054 for get in getter:
4055 try:
4056 v = get(src)
4057 except (AttributeError, KeyError, TypeError, IndexError):
4058 pass
4059 else:
4060 if expected_type is None or isinstance(v, expected_type):
4061 return v
329ca3be
S
4062
4063
6cc62232
S
4064def merge_dicts(*dicts):
4065 merged = {}
4066 for a_dict in dicts:
4067 for k, v in a_dict.items():
4068 if v is None:
4069 continue
3089bc74
S
4070 if (k not in merged
4071 or (isinstance(v, compat_str) and v
4072 and isinstance(merged[k], compat_str)
4073 and not merged[k])):
6cc62232
S
4074 merged[k] = v
4075 return merged
4076
4077
8e60dc75
S
4078def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4079 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4080
16392824 4081
a1a530b0
PH
4082US_RATINGS = {
4083 'G': 0,
4084 'PG': 10,
4085 'PG-13': 13,
4086 'R': 16,
4087 'NC': 18,
4088}
fac55558
PH
4089
4090
a8795327 4091TV_PARENTAL_GUIDELINES = {
5a16c9d9
RA
4092 'TV-Y': 0,
4093 'TV-Y7': 7,
4094 'TV-G': 0,
4095 'TV-PG': 0,
4096 'TV-14': 14,
4097 'TV-MA': 17,
a8795327
S
4098}
4099
4100
146c80e2 4101def parse_age_limit(s):
a8795327
S
4102 if type(s) == int:
4103 return s if 0 <= s <= 21 else None
4104 if not isinstance(s, compat_basestring):
d838b1bd 4105 return None
146c80e2 4106 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327
S
4107 if m:
4108 return int(m.group('age'))
4109 if s in US_RATINGS:
4110 return US_RATINGS[s]
5a16c9d9 4111 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
b8361187 4112 if m:
5a16c9d9 4113 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
b8361187 4114 return None
146c80e2
S
4115
4116
fac55558 4117def strip_jsonp(code):
609a61e3 4118 return re.sub(
5552c9eb 4119 r'''(?sx)^
e9c671d5 4120 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
5552c9eb
YCH
4121 (?:\s*&&\s*(?P=func_name))?
4122 \s*\(\s*(?P<callback_data>.*)\);?
4123 \s*?(?://[^\n]*)*$''',
4124 r'\g<callback_data>', code)
478c2c61
PH
4125
4126
5c610515 4127def js_to_json(code, vars={}):
4128 # vars is a dict of var, val pairs to substitute
4195096e
S
4129 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4130 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4131 INTEGER_TABLE = (
4132 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4133 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4134 )
4135
e05f6939 4136 def fix_kv(m):
e7b6d122
PH
4137 v = m.group(0)
4138 if v in ('true', 'false', 'null'):
4139 return v
8bdd16b4 4140 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
bd1e4844 4141 return ""
4142
4143 if v[0] in ("'", '"'):
4144 v = re.sub(r'(?s)\\.|"', lambda m: {
e7b6d122 4145 '"': '\\"',
bd1e4844 4146 "\\'": "'",
4147 '\\\n': '',
4148 '\\x': '\\u00',
4149 }.get(m.group(0), m.group(0)), v[1:-1])
8bdd16b4 4150 else:
4151 for regex, base in INTEGER_TABLE:
4152 im = re.match(regex, v)
4153 if im:
4154 i = int(im.group(1), base)
4155 return '"%d":' % i if v.endswith(':') else '%d' % i
89ac4a19 4156
5c610515 4157 if v in vars:
4158 return vars[v]
4159
e7b6d122 4160 return '"%s"' % v
e05f6939 4161
bd1e4844 4162 return re.sub(r'''(?sx)
4163 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4164 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4195096e 4165 {comment}|,(?={skip}[\]}}])|
c384d537 4166 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4195096e 4167 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
8bdd16b4 4168 [0-9]+(?={skip}:)|
4169 !+
4195096e 4170 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
e05f6939
PH
4171
4172
478c2c61
PH
4173def qualities(quality_ids):
4174 """ Get a numeric quality value out of a list of possible values """
4175 def q(qid):
4176 try:
4177 return quality_ids.index(qid)
4178 except ValueError:
4179 return -1
4180 return q
4181
acd69589 4182
de6000d9 4183DEFAULT_OUTTMPL = {
4184 'default': '%(title)s [%(id)s].%(ext)s',
4185}
4186OUTTMPL_TYPES = {
4187 'subtitle': None,
4188 'thumbnail': None,
4189 'description': 'description',
4190 'annotation': 'annotations.xml',
4191 'infojson': 'info.json',
4192 'pl_description': 'description',
4193 'pl_infojson': 'info.json',
4194}
0a871f68 4195
a020a0dc
PH
4196
4197def limit_length(s, length):
4198 """ Add ellipses to overly long strings """
4199 if s is None:
4200 return None
4201 ELLIPSES = '...'
4202 if len(s) > length:
4203 return s[:length - len(ELLIPSES)] + ELLIPSES
4204 return s
48844745
PH
4205
4206
4207def version_tuple(v):
5f9b8394 4208 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
4209
4210
4211def is_outdated_version(version, limit, assume_new=True):
4212 if not version:
4213 return not assume_new
4214 try:
4215 return version_tuple(version) < version_tuple(limit)
4216 except ValueError:
4217 return not assume_new
732ea2f0
PH
4218
4219
4220def ytdl_is_updateable():
7a5c1cfe 4221 """ Returns if yt-dlp can be updated with -U """
735d865e 4222 return False
4223
732ea2f0
PH
4224 from zipimport import zipimporter
4225
4226 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
4227
4228
4229def args_to_str(args):
4230 # Get a short string representation for a subprocess command
702ccf2d 4231 return ' '.join(compat_shlex_quote(a) for a in args)
2ccd1b10
PH
4232
4233
9b9c5355 4234def error_to_compat_str(err):
fdae2358
S
4235 err_str = str(err)
4236 # On python 2 error byte string must be decoded with proper
4237 # encoding rather than ascii
4238 if sys.version_info[0] < 3:
4239 err_str = err_str.decode(preferredencoding())
4240 return err_str
4241
4242
c460bdd5 4243def mimetype2ext(mt):
eb9ee194
S
4244 if mt is None:
4245 return None
4246
765ac263
JMF
4247 ext = {
4248 'audio/mp4': 'm4a',
6c33d24b
YCH
4249 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4250 # it's the most popular one
4251 'audio/mpeg': 'mp3',
ba39289d 4252 'audio/x-wav': 'wav',
765ac263
JMF
4253 }.get(mt)
4254 if ext is not None:
4255 return ext
4256
c460bdd5 4257 _, _, res = mt.rpartition('/')
6562d34a 4258 res = res.split(';')[0].strip().lower()
c460bdd5
PH
4259
4260 return {
f6861ec9 4261 '3gpp': '3gp',
cafcf657 4262 'smptett+xml': 'tt',
cafcf657 4263 'ttaf+xml': 'dfxp',
a0d8d704 4264 'ttml+xml': 'ttml',
f6861ec9 4265 'x-flv': 'flv',
a0d8d704 4266 'x-mp4-fragmented': 'mp4',
d4f05d47 4267 'x-ms-sami': 'sami',
a0d8d704 4268 'x-ms-wmv': 'wmv',
b4173f15
RA
4269 'mpegurl': 'm3u8',
4270 'x-mpegurl': 'm3u8',
4271 'vnd.apple.mpegurl': 'm3u8',
4272 'dash+xml': 'mpd',
b4173f15 4273 'f4m+xml': 'f4m',
f164b971 4274 'hds+xml': 'f4m',
e910fe2f 4275 'vnd.ms-sstr+xml': 'ism',
c2b2c7e1 4276 'quicktime': 'mov',
98ce1a3f 4277 'mp2t': 'ts',
39e7107d 4278 'x-wav': 'wav',
c460bdd5
PH
4279 }.get(res, res)
4280
4281
4f3c5e06 4282def parse_codecs(codecs_str):
4283 # http://tools.ietf.org/html/rfc6381
4284 if not codecs_str:
4285 return {}
a0566bbf 4286 split_codecs = list(filter(None, map(
4f3c5e06 4287 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4288 vcodec, acodec = None, None
a0566bbf 4289 for full_codec in split_codecs:
4f3c5e06 4290 codec = full_codec.split('.')[0]
28cc2241 4291 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4f3c5e06 4292 if not vcodec:
4293 vcodec = full_codec
60f5c9fb 4294 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4f3c5e06 4295 if not acodec:
4296 acodec = full_codec
4297 else:
60f5c9fb 4298 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4f3c5e06 4299 if not vcodec and not acodec:
a0566bbf 4300 if len(split_codecs) == 2:
4f3c5e06 4301 return {
a0566bbf 4302 'vcodec': split_codecs[0],
4303 'acodec': split_codecs[1],
4f3c5e06 4304 }
4305 else:
4306 return {
4307 'vcodec': vcodec or 'none',
4308 'acodec': acodec or 'none',
4309 }
4310 return {}
4311
4312
2ccd1b10 4313def urlhandle_detect_ext(url_handle):
79298173 4314 getheader = url_handle.headers.get
2ccd1b10 4315
b55ee18f
PH
4316 cd = getheader('Content-Disposition')
4317 if cd:
4318 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4319 if m:
4320 e = determine_ext(m.group('filename'), default_ext=None)
4321 if e:
4322 return e
4323
c460bdd5 4324 return mimetype2ext(getheader('Content-Type'))
05900629
PH
4325
4326
1e399778
YCH
4327def encode_data_uri(data, mime_type):
4328 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4329
4330
05900629 4331def age_restricted(content_limit, age_limit):
6ec6cb4e 4332 """ Returns True iff the content should be blocked """
05900629
PH
4333
4334 if age_limit is None: # No limit set
4335 return False
4336 if content_limit is None:
4337 return False # Content available for everyone
4338 return age_limit < content_limit
61ca9a80
PH
4339
4340
4341def is_html(first_bytes):
4342 """ Detect whether a file contains HTML by examining its first bytes. """
4343
4344 BOMS = [
4345 (b'\xef\xbb\xbf', 'utf-8'),
4346 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4347 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4348 (b'\xff\xfe', 'utf-16-le'),
4349 (b'\xfe\xff', 'utf-16-be'),
4350 ]
4351 for bom, enc in BOMS:
4352 if first_bytes.startswith(bom):
4353 s = first_bytes[len(bom):].decode(enc, 'replace')
4354 break
4355 else:
4356 s = first_bytes.decode('utf-8', 'replace')
4357
4358 return re.match(r'^\s*<', s)
a055469f
PH
4359
4360
4361def determine_protocol(info_dict):
4362 protocol = info_dict.get('protocol')
4363 if protocol is not None:
4364 return protocol
4365
4366 url = info_dict['url']
4367 if url.startswith('rtmp'):
4368 return 'rtmp'
4369 elif url.startswith('mms'):
4370 return 'mms'
4371 elif url.startswith('rtsp'):
4372 return 'rtsp'
4373
4374 ext = determine_ext(url)
4375 if ext == 'm3u8':
4376 return 'm3u8'
4377 elif ext == 'f4m':
4378 return 'f4m'
4379
4380 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
4381
4382
76d321f6 4383def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
cfb56d1a 4384 """ Render a list of rows, each as a list of values """
76d321f6 4385
4386 def get_max_lens(table):
4387 return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4388
4389 def filter_using_list(row, filterArray):
4390 return [col for (take, col) in zip(filterArray, row) if take]
4391
4392 if hideEmpty:
4393 max_lens = get_max_lens(data)
4394 header_row = filter_using_list(header_row, max_lens)
4395 data = [filter_using_list(row, max_lens) for row in data]
4396
cfb56d1a 4397 table = [header_row] + data
76d321f6 4398 max_lens = get_max_lens(table)
4399 if delim:
4400 table = [header_row] + [['-' * ml for ml in max_lens]] + data
4401 format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
cfb56d1a 4402 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
4403
4404
4405def _match_one(filter_part, dct):
4406 COMPARISON_OPERATORS = {
4407 '<': operator.lt,
4408 '<=': operator.le,
4409 '>': operator.gt,
4410 '>=': operator.ge,
4411 '=': operator.eq,
4412 '!=': operator.ne,
4413 }
4414 operator_rex = re.compile(r'''(?x)\s*
4415 (?P<key>[a-z_]+)
4416 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4417 (?:
4418 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
db13c16e 4419 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
347de493
PH
4420 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4421 )
4422 \s*$
4423 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4424 m = operator_rex.search(filter_part)
4425 if m:
4426 op = COMPARISON_OPERATORS[m.group('op')]
e5a088dc 4427 actual_value = dct.get(m.group('key'))
3089bc74
S
4428 if (m.group('quotedstrval') is not None
4429 or m.group('strval') is not None
e5a088dc
S
4430 # If the original field is a string and matching comparisonvalue is
4431 # a number we should respect the origin of the original field
4432 # and process comparison value as a string (see
067aa17e 4433 # https://github.com/ytdl-org/youtube-dl/issues/11082).
3089bc74
S
4434 or actual_value is not None and m.group('intval') is not None
4435 and isinstance(actual_value, compat_str)):
347de493
PH
4436 if m.group('op') not in ('=', '!='):
4437 raise ValueError(
4438 'Operator %s does not support string values!' % m.group('op'))
db13c16e
S
4439 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4440 quote = m.group('quote')
4441 if quote is not None:
4442 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
347de493
PH
4443 else:
4444 try:
4445 comparison_value = int(m.group('intval'))
4446 except ValueError:
4447 comparison_value = parse_filesize(m.group('intval'))
4448 if comparison_value is None:
4449 comparison_value = parse_filesize(m.group('intval') + 'B')
4450 if comparison_value is None:
4451 raise ValueError(
4452 'Invalid integer value %r in filter part %r' % (
4453 m.group('intval'), filter_part))
347de493
PH
4454 if actual_value is None:
4455 return m.group('none_inclusive')
4456 return op(actual_value, comparison_value)
4457
4458 UNARY_OPERATORS = {
1cc47c66
S
4459 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4460 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
347de493
PH
4461 }
4462 operator_rex = re.compile(r'''(?x)\s*
4463 (?P<op>%s)\s*(?P<key>[a-z_]+)
4464 \s*$
4465 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4466 m = operator_rex.search(filter_part)
4467 if m:
4468 op = UNARY_OPERATORS[m.group('op')]
4469 actual_value = dct.get(m.group('key'))
4470 return op(actual_value)
4471
4472 raise ValueError('Invalid filter part %r' % filter_part)
4473
4474
4475def match_str(filter_str, dct):
4476 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4477
4478 return all(
4479 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4480
4481
4482def match_filter_func(filter_str):
4483 def _match_func(info_dict):
4484 if match_str(filter_str, info_dict):
4485 return None
4486 else:
4487 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4488 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4489 return _match_func
91410c9b
PH
4490
4491
bf6427d2
YCH
4492def parse_dfxp_time_expr(time_expr):
4493 if not time_expr:
d631d5f9 4494 return
bf6427d2
YCH
4495
4496 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4497 if mobj:
4498 return float(mobj.group('time_offset'))
4499
db2fe38b 4500 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 4501 if mobj:
db2fe38b 4502 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
4503
4504
c1c924ab
YCH
4505def srt_subtitles_timecode(seconds):
4506 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
4507
4508
4509def dfxp2srt(dfxp_data):
3869028f
YCH
4510 '''
4511 @param dfxp_data A bytes-like object containing DFXP data
4512 @returns A unicode object containing converted SRT data
4513 '''
5b995f71 4514 LEGACY_NAMESPACES = (
3869028f
YCH
4515 (b'http://www.w3.org/ns/ttml', [
4516 b'http://www.w3.org/2004/11/ttaf1',
4517 b'http://www.w3.org/2006/04/ttaf1',
4518 b'http://www.w3.org/2006/10/ttaf1',
5b995f71 4519 ]),
3869028f
YCH
4520 (b'http://www.w3.org/ns/ttml#styling', [
4521 b'http://www.w3.org/ns/ttml#style',
5b995f71
RA
4522 ]),
4523 )
4524
4525 SUPPORTED_STYLING = [
4526 'color',
4527 'fontFamily',
4528 'fontSize',
4529 'fontStyle',
4530 'fontWeight',
4531 'textDecoration'
4532 ]
4533
4e335771 4534 _x = functools.partial(xpath_with_ns, ns_map={
261f4730 4535 'xml': 'http://www.w3.org/XML/1998/namespace',
4e335771 4536 'ttml': 'http://www.w3.org/ns/ttml',
5b995f71 4537 'tts': 'http://www.w3.org/ns/ttml#styling',
4e335771 4538 })
bf6427d2 4539
5b995f71
RA
4540 styles = {}
4541 default_style = {}
4542
87de7069 4543 class TTMLPElementParser(object):
5b995f71
RA
4544 _out = ''
4545 _unclosed_elements = []
4546 _applied_styles = []
bf6427d2 4547
2b14cb56 4548 def start(self, tag, attrib):
5b995f71
RA
4549 if tag in (_x('ttml:br'), 'br'):
4550 self._out += '\n'
4551 else:
4552 unclosed_elements = []
4553 style = {}
4554 element_style_id = attrib.get('style')
4555 if default_style:
4556 style.update(default_style)
4557 if element_style_id:
4558 style.update(styles.get(element_style_id, {}))
4559 for prop in SUPPORTED_STYLING:
4560 prop_val = attrib.get(_x('tts:' + prop))
4561 if prop_val:
4562 style[prop] = prop_val
4563 if style:
4564 font = ''
4565 for k, v in sorted(style.items()):
4566 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4567 continue
4568 if k == 'color':
4569 font += ' color="%s"' % v
4570 elif k == 'fontSize':
4571 font += ' size="%s"' % v
4572 elif k == 'fontFamily':
4573 font += ' face="%s"' % v
4574 elif k == 'fontWeight' and v == 'bold':
4575 self._out += '<b>'
4576 unclosed_elements.append('b')
4577 elif k == 'fontStyle' and v == 'italic':
4578 self._out += '<i>'
4579 unclosed_elements.append('i')
4580 elif k == 'textDecoration' and v == 'underline':
4581 self._out += '<u>'
4582 unclosed_elements.append('u')
4583 if font:
4584 self._out += '<font' + font + '>'
4585 unclosed_elements.append('font')
4586 applied_style = {}
4587 if self._applied_styles:
4588 applied_style.update(self._applied_styles[-1])
4589 applied_style.update(style)
4590 self._applied_styles.append(applied_style)
4591 self._unclosed_elements.append(unclosed_elements)
bf6427d2 4592
2b14cb56 4593 def end(self, tag):
5b995f71
RA
4594 if tag not in (_x('ttml:br'), 'br'):
4595 unclosed_elements = self._unclosed_elements.pop()
4596 for element in reversed(unclosed_elements):
4597 self._out += '</%s>' % element
4598 if unclosed_elements and self._applied_styles:
4599 self._applied_styles.pop()
bf6427d2 4600
2b14cb56 4601 def data(self, data):
5b995f71 4602 self._out += data
2b14cb56 4603
4604 def close(self):
5b995f71 4605 return self._out.strip()
2b14cb56 4606
4607 def parse_node(node):
4608 target = TTMLPElementParser()
4609 parser = xml.etree.ElementTree.XMLParser(target=target)
4610 parser.feed(xml.etree.ElementTree.tostring(node))
4611 return parser.close()
bf6427d2 4612
5b995f71
RA
4613 for k, v in LEGACY_NAMESPACES:
4614 for ns in v:
4615 dfxp_data = dfxp_data.replace(ns, k)
4616
3869028f 4617 dfxp = compat_etree_fromstring(dfxp_data)
bf6427d2 4618 out = []
5b995f71 4619 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
1b0427e6
YCH
4620
4621 if not paras:
4622 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2 4623
5b995f71
RA
4624 repeat = False
4625 while True:
4626 for style in dfxp.findall(_x('.//ttml:style')):
261f4730
RA
4627 style_id = style.get('id') or style.get(_x('xml:id'))
4628 if not style_id:
4629 continue
5b995f71
RA
4630 parent_style_id = style.get('style')
4631 if parent_style_id:
4632 if parent_style_id not in styles:
4633 repeat = True
4634 continue
4635 styles[style_id] = styles[parent_style_id].copy()
4636 for prop in SUPPORTED_STYLING:
4637 prop_val = style.get(_x('tts:' + prop))
4638 if prop_val:
4639 styles.setdefault(style_id, {})[prop] = prop_val
4640 if repeat:
4641 repeat = False
4642 else:
4643 break
4644
4645 for p in ('body', 'div'):
4646 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4647 if ele is None:
4648 continue
4649 style = styles.get(ele.get('style'))
4650 if not style:
4651 continue
4652 default_style.update(style)
4653
bf6427d2 4654 for para, index in zip(paras, itertools.count(1)):
d631d5f9 4655 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 4656 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
4657 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4658 if begin_time is None:
4659 continue
7dff0363 4660 if not end_time:
d631d5f9
YCH
4661 if not dur:
4662 continue
4663 end_time = begin_time + dur
bf6427d2
YCH
4664 out.append('%d\n%s --> %s\n%s\n\n' % (
4665 index,
c1c924ab
YCH
4666 srt_subtitles_timecode(begin_time),
4667 srt_subtitles_timecode(end_time),
bf6427d2
YCH
4668 parse_node(para)))
4669
4670 return ''.join(out)
4671
4672
66e289ba
S
4673def cli_option(params, command_option, param):
4674 param = params.get(param)
98e698f1
RA
4675 if param:
4676 param = compat_str(param)
66e289ba
S
4677 return [command_option, param] if param is not None else []
4678
4679
4680def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4681 param = params.get(param)
5b232f46
S
4682 if param is None:
4683 return []
66e289ba
S
4684 assert isinstance(param, bool)
4685 if separator:
4686 return [command_option + separator + (true_value if param else false_value)]
4687 return [command_option, true_value if param else false_value]
4688
4689
4690def cli_valueless_option(params, command_option, param, expected_value=True):
4691 param = params.get(param)
4692 return [command_option] if param == expected_value else []
4693
4694
5b1ecbb3 4695def cli_configuration_args(argdict, key, default=[], exe=None, use_default_arg=True):
4696 # use_default_arg can be True, False, or 'no_compat'
eab9b2bc 4697 if isinstance(argdict, (list, tuple)): # for backward compatibility
7a5c1cfe 4698 if use_default_arg is True:
5b1ecbb3 4699 return argdict
4700 else:
4701 argdict = None
eab9b2bc 4702
4703 if argdict is None:
5b1ecbb3 4704 return default
eab9b2bc 4705 assert isinstance(argdict, dict)
4706
eab9b2bc 4707 key = key.lower()
eab9b2bc 4708 args = exe_args = None
4709 if exe is not None:
4710 assert isinstance(exe, compat_str)
4711 exe = exe.lower()
4712 args = argdict.get('%s+%s' % (key, exe))
4713 if args is None:
4714 exe_args = argdict.get(exe)
4715
4716 if args is None:
4717 args = argdict.get(key) if key != exe else None
4718 if args is None and exe_args is None:
5b1ecbb3 4719 args = argdict.get('default', default) if use_default_arg else default
eab9b2bc 4720
4721 args, exe_args = args or [], exe_args or []
4722 assert isinstance(args, (list, tuple))
4723 assert isinstance(exe_args, (list, tuple))
5b1ecbb3 4724 return args + exe_args
66e289ba
S
4725
4726
39672624
YCH
4727class ISO639Utils(object):
4728 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4729 _lang_map = {
4730 'aa': 'aar',
4731 'ab': 'abk',
4732 'ae': 'ave',
4733 'af': 'afr',
4734 'ak': 'aka',
4735 'am': 'amh',
4736 'an': 'arg',
4737 'ar': 'ara',
4738 'as': 'asm',
4739 'av': 'ava',
4740 'ay': 'aym',
4741 'az': 'aze',
4742 'ba': 'bak',
4743 'be': 'bel',
4744 'bg': 'bul',
4745 'bh': 'bih',
4746 'bi': 'bis',
4747 'bm': 'bam',
4748 'bn': 'ben',
4749 'bo': 'bod',
4750 'br': 'bre',
4751 'bs': 'bos',
4752 'ca': 'cat',
4753 'ce': 'che',
4754 'ch': 'cha',
4755 'co': 'cos',
4756 'cr': 'cre',
4757 'cs': 'ces',
4758 'cu': 'chu',
4759 'cv': 'chv',
4760 'cy': 'cym',
4761 'da': 'dan',
4762 'de': 'deu',
4763 'dv': 'div',
4764 'dz': 'dzo',
4765 'ee': 'ewe',
4766 'el': 'ell',
4767 'en': 'eng',
4768 'eo': 'epo',
4769 'es': 'spa',
4770 'et': 'est',
4771 'eu': 'eus',
4772 'fa': 'fas',
4773 'ff': 'ful',
4774 'fi': 'fin',
4775 'fj': 'fij',
4776 'fo': 'fao',
4777 'fr': 'fra',
4778 'fy': 'fry',
4779 'ga': 'gle',
4780 'gd': 'gla',
4781 'gl': 'glg',
4782 'gn': 'grn',
4783 'gu': 'guj',
4784 'gv': 'glv',
4785 'ha': 'hau',
4786 'he': 'heb',
b7acc835 4787 'iw': 'heb', # Replaced by he in 1989 revision
39672624
YCH
4788 'hi': 'hin',
4789 'ho': 'hmo',
4790 'hr': 'hrv',
4791 'ht': 'hat',
4792 'hu': 'hun',
4793 'hy': 'hye',
4794 'hz': 'her',
4795 'ia': 'ina',
4796 'id': 'ind',
b7acc835 4797 'in': 'ind', # Replaced by id in 1989 revision
39672624
YCH
4798 'ie': 'ile',
4799 'ig': 'ibo',
4800 'ii': 'iii',
4801 'ik': 'ipk',
4802 'io': 'ido',
4803 'is': 'isl',
4804 'it': 'ita',
4805 'iu': 'iku',
4806 'ja': 'jpn',
4807 'jv': 'jav',
4808 'ka': 'kat',
4809 'kg': 'kon',
4810 'ki': 'kik',
4811 'kj': 'kua',
4812 'kk': 'kaz',
4813 'kl': 'kal',
4814 'km': 'khm',
4815 'kn': 'kan',
4816 'ko': 'kor',
4817 'kr': 'kau',
4818 'ks': 'kas',
4819 'ku': 'kur',
4820 'kv': 'kom',
4821 'kw': 'cor',
4822 'ky': 'kir',
4823 'la': 'lat',
4824 'lb': 'ltz',
4825 'lg': 'lug',
4826 'li': 'lim',
4827 'ln': 'lin',
4828 'lo': 'lao',
4829 'lt': 'lit',
4830 'lu': 'lub',
4831 'lv': 'lav',
4832 'mg': 'mlg',
4833 'mh': 'mah',
4834 'mi': 'mri',
4835 'mk': 'mkd',
4836 'ml': 'mal',
4837 'mn': 'mon',
4838 'mr': 'mar',
4839 'ms': 'msa',
4840 'mt': 'mlt',
4841 'my': 'mya',
4842 'na': 'nau',
4843 'nb': 'nob',
4844 'nd': 'nde',
4845 'ne': 'nep',
4846 'ng': 'ndo',
4847 'nl': 'nld',
4848 'nn': 'nno',
4849 'no': 'nor',
4850 'nr': 'nbl',
4851 'nv': 'nav',
4852 'ny': 'nya',
4853 'oc': 'oci',
4854 'oj': 'oji',
4855 'om': 'orm',
4856 'or': 'ori',
4857 'os': 'oss',
4858 'pa': 'pan',
4859 'pi': 'pli',
4860 'pl': 'pol',
4861 'ps': 'pus',
4862 'pt': 'por',
4863 'qu': 'que',
4864 'rm': 'roh',
4865 'rn': 'run',
4866 'ro': 'ron',
4867 'ru': 'rus',
4868 'rw': 'kin',
4869 'sa': 'san',
4870 'sc': 'srd',
4871 'sd': 'snd',
4872 'se': 'sme',
4873 'sg': 'sag',
4874 'si': 'sin',
4875 'sk': 'slk',
4876 'sl': 'slv',
4877 'sm': 'smo',
4878 'sn': 'sna',
4879 'so': 'som',
4880 'sq': 'sqi',
4881 'sr': 'srp',
4882 'ss': 'ssw',
4883 'st': 'sot',
4884 'su': 'sun',
4885 'sv': 'swe',
4886 'sw': 'swa',
4887 'ta': 'tam',
4888 'te': 'tel',
4889 'tg': 'tgk',
4890 'th': 'tha',
4891 'ti': 'tir',
4892 'tk': 'tuk',
4893 'tl': 'tgl',
4894 'tn': 'tsn',
4895 'to': 'ton',
4896 'tr': 'tur',
4897 'ts': 'tso',
4898 'tt': 'tat',
4899 'tw': 'twi',
4900 'ty': 'tah',
4901 'ug': 'uig',
4902 'uk': 'ukr',
4903 'ur': 'urd',
4904 'uz': 'uzb',
4905 've': 'ven',
4906 'vi': 'vie',
4907 'vo': 'vol',
4908 'wa': 'wln',
4909 'wo': 'wol',
4910 'xh': 'xho',
4911 'yi': 'yid',
e9a50fba 4912 'ji': 'yid', # Replaced by yi in 1989 revision
39672624
YCH
4913 'yo': 'yor',
4914 'za': 'zha',
4915 'zh': 'zho',
4916 'zu': 'zul',
4917 }
4918
4919 @classmethod
4920 def short2long(cls, code):
4921 """Convert language code from ISO 639-1 to ISO 639-2/T"""
4922 return cls._lang_map.get(code[:2])
4923
4924 @classmethod
4925 def long2short(cls, code):
4926 """Convert language code from ISO 639-2/T to ISO 639-1"""
4927 for short_name, long_name in cls._lang_map.items():
4928 if long_name == code:
4929 return short_name
4930
4931
4eb10f66
YCH
4932class ISO3166Utils(object):
4933 # From http://data.okfn.org/data/core/country-list
4934 _country_map = {
4935 'AF': 'Afghanistan',
4936 'AX': 'Åland Islands',
4937 'AL': 'Albania',
4938 'DZ': 'Algeria',
4939 'AS': 'American Samoa',
4940 'AD': 'Andorra',
4941 'AO': 'Angola',
4942 'AI': 'Anguilla',
4943 'AQ': 'Antarctica',
4944 'AG': 'Antigua and Barbuda',
4945 'AR': 'Argentina',
4946 'AM': 'Armenia',
4947 'AW': 'Aruba',
4948 'AU': 'Australia',
4949 'AT': 'Austria',
4950 'AZ': 'Azerbaijan',
4951 'BS': 'Bahamas',
4952 'BH': 'Bahrain',
4953 'BD': 'Bangladesh',
4954 'BB': 'Barbados',
4955 'BY': 'Belarus',
4956 'BE': 'Belgium',
4957 'BZ': 'Belize',
4958 'BJ': 'Benin',
4959 'BM': 'Bermuda',
4960 'BT': 'Bhutan',
4961 'BO': 'Bolivia, Plurinational State of',
4962 'BQ': 'Bonaire, Sint Eustatius and Saba',
4963 'BA': 'Bosnia and Herzegovina',
4964 'BW': 'Botswana',
4965 'BV': 'Bouvet Island',
4966 'BR': 'Brazil',
4967 'IO': 'British Indian Ocean Territory',
4968 'BN': 'Brunei Darussalam',
4969 'BG': 'Bulgaria',
4970 'BF': 'Burkina Faso',
4971 'BI': 'Burundi',
4972 'KH': 'Cambodia',
4973 'CM': 'Cameroon',
4974 'CA': 'Canada',
4975 'CV': 'Cape Verde',
4976 'KY': 'Cayman Islands',
4977 'CF': 'Central African Republic',
4978 'TD': 'Chad',
4979 'CL': 'Chile',
4980 'CN': 'China',
4981 'CX': 'Christmas Island',
4982 'CC': 'Cocos (Keeling) Islands',
4983 'CO': 'Colombia',
4984 'KM': 'Comoros',
4985 'CG': 'Congo',
4986 'CD': 'Congo, the Democratic Republic of the',
4987 'CK': 'Cook Islands',
4988 'CR': 'Costa Rica',
4989 'CI': 'Côte d\'Ivoire',
4990 'HR': 'Croatia',
4991 'CU': 'Cuba',
4992 'CW': 'Curaçao',
4993 'CY': 'Cyprus',
4994 'CZ': 'Czech Republic',
4995 'DK': 'Denmark',
4996 'DJ': 'Djibouti',
4997 'DM': 'Dominica',
4998 'DO': 'Dominican Republic',
4999 'EC': 'Ecuador',
5000 'EG': 'Egypt',
5001 'SV': 'El Salvador',
5002 'GQ': 'Equatorial Guinea',
5003 'ER': 'Eritrea',
5004 'EE': 'Estonia',
5005 'ET': 'Ethiopia',
5006 'FK': 'Falkland Islands (Malvinas)',
5007 'FO': 'Faroe Islands',
5008 'FJ': 'Fiji',
5009 'FI': 'Finland',
5010 'FR': 'France',
5011 'GF': 'French Guiana',
5012 'PF': 'French Polynesia',
5013 'TF': 'French Southern Territories',
5014 'GA': 'Gabon',
5015 'GM': 'Gambia',
5016 'GE': 'Georgia',
5017 'DE': 'Germany',
5018 'GH': 'Ghana',
5019 'GI': 'Gibraltar',
5020 'GR': 'Greece',
5021 'GL': 'Greenland',
5022 'GD': 'Grenada',
5023 'GP': 'Guadeloupe',
5024 'GU': 'Guam',
5025 'GT': 'Guatemala',
5026 'GG': 'Guernsey',
5027 'GN': 'Guinea',
5028 'GW': 'Guinea-Bissau',
5029 'GY': 'Guyana',
5030 'HT': 'Haiti',
5031 'HM': 'Heard Island and McDonald Islands',
5032 'VA': 'Holy See (Vatican City State)',
5033 'HN': 'Honduras',
5034 'HK': 'Hong Kong',
5035 'HU': 'Hungary',
5036 'IS': 'Iceland',
5037 'IN': 'India',
5038 'ID': 'Indonesia',
5039 'IR': 'Iran, Islamic Republic of',
5040 'IQ': 'Iraq',
5041 'IE': 'Ireland',
5042 'IM': 'Isle of Man',
5043 'IL': 'Israel',
5044 'IT': 'Italy',
5045 'JM': 'Jamaica',
5046 'JP': 'Japan',
5047 'JE': 'Jersey',
5048 'JO': 'Jordan',
5049 'KZ': 'Kazakhstan',
5050 'KE': 'Kenya',
5051 'KI': 'Kiribati',
5052 'KP': 'Korea, Democratic People\'s Republic of',
5053 'KR': 'Korea, Republic of',
5054 'KW': 'Kuwait',
5055 'KG': 'Kyrgyzstan',
5056 'LA': 'Lao People\'s Democratic Republic',
5057 'LV': 'Latvia',
5058 'LB': 'Lebanon',
5059 'LS': 'Lesotho',
5060 'LR': 'Liberia',
5061 'LY': 'Libya',
5062 'LI': 'Liechtenstein',
5063 'LT': 'Lithuania',
5064 'LU': 'Luxembourg',
5065 'MO': 'Macao',
5066 'MK': 'Macedonia, the Former Yugoslav Republic of',
5067 'MG': 'Madagascar',
5068 'MW': 'Malawi',
5069 'MY': 'Malaysia',
5070 'MV': 'Maldives',
5071 'ML': 'Mali',
5072 'MT': 'Malta',
5073 'MH': 'Marshall Islands',
5074 'MQ': 'Martinique',
5075 'MR': 'Mauritania',
5076 'MU': 'Mauritius',
5077 'YT': 'Mayotte',
5078 'MX': 'Mexico',
5079 'FM': 'Micronesia, Federated States of',
5080 'MD': 'Moldova, Republic of',
5081 'MC': 'Monaco',
5082 'MN': 'Mongolia',
5083 'ME': 'Montenegro',
5084 'MS': 'Montserrat',
5085 'MA': 'Morocco',
5086 'MZ': 'Mozambique',
5087 'MM': 'Myanmar',
5088 'NA': 'Namibia',
5089 'NR': 'Nauru',
5090 'NP': 'Nepal',
5091 'NL': 'Netherlands',
5092 'NC': 'New Caledonia',
5093 'NZ': 'New Zealand',
5094 'NI': 'Nicaragua',
5095 'NE': 'Niger',
5096 'NG': 'Nigeria',
5097 'NU': 'Niue',
5098 'NF': 'Norfolk Island',
5099 'MP': 'Northern Mariana Islands',
5100 'NO': 'Norway',
5101 'OM': 'Oman',
5102 'PK': 'Pakistan',
5103 'PW': 'Palau',
5104 'PS': 'Palestine, State of',
5105 'PA': 'Panama',
5106 'PG': 'Papua New Guinea',
5107 'PY': 'Paraguay',
5108 'PE': 'Peru',
5109 'PH': 'Philippines',
5110 'PN': 'Pitcairn',
5111 'PL': 'Poland',
5112 'PT': 'Portugal',
5113 'PR': 'Puerto Rico',
5114 'QA': 'Qatar',
5115 'RE': 'Réunion',
5116 'RO': 'Romania',
5117 'RU': 'Russian Federation',
5118 'RW': 'Rwanda',
5119 'BL': 'Saint Barthélemy',
5120 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5121 'KN': 'Saint Kitts and Nevis',
5122 'LC': 'Saint Lucia',
5123 'MF': 'Saint Martin (French part)',
5124 'PM': 'Saint Pierre and Miquelon',
5125 'VC': 'Saint Vincent and the Grenadines',
5126 'WS': 'Samoa',
5127 'SM': 'San Marino',
5128 'ST': 'Sao Tome and Principe',
5129 'SA': 'Saudi Arabia',
5130 'SN': 'Senegal',
5131 'RS': 'Serbia',
5132 'SC': 'Seychelles',
5133 'SL': 'Sierra Leone',
5134 'SG': 'Singapore',
5135 'SX': 'Sint Maarten (Dutch part)',
5136 'SK': 'Slovakia',
5137 'SI': 'Slovenia',
5138 'SB': 'Solomon Islands',
5139 'SO': 'Somalia',
5140 'ZA': 'South Africa',
5141 'GS': 'South Georgia and the South Sandwich Islands',
5142 'SS': 'South Sudan',
5143 'ES': 'Spain',
5144 'LK': 'Sri Lanka',
5145 'SD': 'Sudan',
5146 'SR': 'Suriname',
5147 'SJ': 'Svalbard and Jan Mayen',
5148 'SZ': 'Swaziland',
5149 'SE': 'Sweden',
5150 'CH': 'Switzerland',
5151 'SY': 'Syrian Arab Republic',
5152 'TW': 'Taiwan, Province of China',
5153 'TJ': 'Tajikistan',
5154 'TZ': 'Tanzania, United Republic of',
5155 'TH': 'Thailand',
5156 'TL': 'Timor-Leste',
5157 'TG': 'Togo',
5158 'TK': 'Tokelau',
5159 'TO': 'Tonga',
5160 'TT': 'Trinidad and Tobago',
5161 'TN': 'Tunisia',
5162 'TR': 'Turkey',
5163 'TM': 'Turkmenistan',
5164 'TC': 'Turks and Caicos Islands',
5165 'TV': 'Tuvalu',
5166 'UG': 'Uganda',
5167 'UA': 'Ukraine',
5168 'AE': 'United Arab Emirates',
5169 'GB': 'United Kingdom',
5170 'US': 'United States',
5171 'UM': 'United States Minor Outlying Islands',
5172 'UY': 'Uruguay',
5173 'UZ': 'Uzbekistan',
5174 'VU': 'Vanuatu',
5175 'VE': 'Venezuela, Bolivarian Republic of',
5176 'VN': 'Viet Nam',
5177 'VG': 'Virgin Islands, British',
5178 'VI': 'Virgin Islands, U.S.',
5179 'WF': 'Wallis and Futuna',
5180 'EH': 'Western Sahara',
5181 'YE': 'Yemen',
5182 'ZM': 'Zambia',
5183 'ZW': 'Zimbabwe',
5184 }
5185
5186 @classmethod
5187 def short2full(cls, code):
5188 """Convert an ISO 3166-2 country code to the corresponding full name"""
5189 return cls._country_map.get(code.upper())
5190
5191
773f291d
S
5192class GeoUtils(object):
5193 # Major IPv4 address blocks per country
5194 _country_ip_map = {
53896ca5 5195 'AD': '46.172.224.0/19',
773f291d
S
5196 'AE': '94.200.0.0/13',
5197 'AF': '149.54.0.0/17',
5198 'AG': '209.59.64.0/18',
5199 'AI': '204.14.248.0/21',
5200 'AL': '46.99.0.0/16',
5201 'AM': '46.70.0.0/15',
5202 'AO': '105.168.0.0/13',
53896ca5
S
5203 'AP': '182.50.184.0/21',
5204 'AQ': '23.154.160.0/24',
773f291d
S
5205 'AR': '181.0.0.0/12',
5206 'AS': '202.70.112.0/20',
53896ca5 5207 'AT': '77.116.0.0/14',
773f291d
S
5208 'AU': '1.128.0.0/11',
5209 'AW': '181.41.0.0/18',
53896ca5
S
5210 'AX': '185.217.4.0/22',
5211 'AZ': '5.197.0.0/16',
773f291d
S
5212 'BA': '31.176.128.0/17',
5213 'BB': '65.48.128.0/17',
5214 'BD': '114.130.0.0/16',
5215 'BE': '57.0.0.0/8',
53896ca5 5216 'BF': '102.178.0.0/15',
773f291d
S
5217 'BG': '95.42.0.0/15',
5218 'BH': '37.131.0.0/17',
5219 'BI': '154.117.192.0/18',
5220 'BJ': '137.255.0.0/16',
53896ca5 5221 'BL': '185.212.72.0/23',
773f291d
S
5222 'BM': '196.12.64.0/18',
5223 'BN': '156.31.0.0/16',
5224 'BO': '161.56.0.0/16',
5225 'BQ': '161.0.80.0/20',
53896ca5 5226 'BR': '191.128.0.0/12',
773f291d
S
5227 'BS': '24.51.64.0/18',
5228 'BT': '119.2.96.0/19',
5229 'BW': '168.167.0.0/16',
5230 'BY': '178.120.0.0/13',
5231 'BZ': '179.42.192.0/18',
5232 'CA': '99.224.0.0/11',
5233 'CD': '41.243.0.0/16',
53896ca5
S
5234 'CF': '197.242.176.0/21',
5235 'CG': '160.113.0.0/16',
773f291d 5236 'CH': '85.0.0.0/13',
53896ca5 5237 'CI': '102.136.0.0/14',
773f291d
S
5238 'CK': '202.65.32.0/19',
5239 'CL': '152.172.0.0/14',
53896ca5 5240 'CM': '102.244.0.0/14',
773f291d
S
5241 'CN': '36.128.0.0/10',
5242 'CO': '181.240.0.0/12',
5243 'CR': '201.192.0.0/12',
5244 'CU': '152.206.0.0/15',
5245 'CV': '165.90.96.0/19',
5246 'CW': '190.88.128.0/17',
53896ca5 5247 'CY': '31.153.0.0/16',
773f291d
S
5248 'CZ': '88.100.0.0/14',
5249 'DE': '53.0.0.0/8',
5250 'DJ': '197.241.0.0/17',
5251 'DK': '87.48.0.0/12',
5252 'DM': '192.243.48.0/20',
5253 'DO': '152.166.0.0/15',
5254 'DZ': '41.96.0.0/12',
5255 'EC': '186.68.0.0/15',
5256 'EE': '90.190.0.0/15',
5257 'EG': '156.160.0.0/11',
5258 'ER': '196.200.96.0/20',
5259 'ES': '88.0.0.0/11',
5260 'ET': '196.188.0.0/14',
5261 'EU': '2.16.0.0/13',
5262 'FI': '91.152.0.0/13',
5263 'FJ': '144.120.0.0/16',
53896ca5 5264 'FK': '80.73.208.0/21',
773f291d
S
5265 'FM': '119.252.112.0/20',
5266 'FO': '88.85.32.0/19',
5267 'FR': '90.0.0.0/9',
5268 'GA': '41.158.0.0/15',
5269 'GB': '25.0.0.0/8',
5270 'GD': '74.122.88.0/21',
5271 'GE': '31.146.0.0/16',
5272 'GF': '161.22.64.0/18',
5273 'GG': '62.68.160.0/19',
53896ca5
S
5274 'GH': '154.160.0.0/12',
5275 'GI': '95.164.0.0/16',
773f291d
S
5276 'GL': '88.83.0.0/19',
5277 'GM': '160.182.0.0/15',
5278 'GN': '197.149.192.0/18',
5279 'GP': '104.250.0.0/19',
5280 'GQ': '105.235.224.0/20',
5281 'GR': '94.64.0.0/13',
5282 'GT': '168.234.0.0/16',
5283 'GU': '168.123.0.0/16',
5284 'GW': '197.214.80.0/20',
5285 'GY': '181.41.64.0/18',
5286 'HK': '113.252.0.0/14',
5287 'HN': '181.210.0.0/16',
5288 'HR': '93.136.0.0/13',
5289 'HT': '148.102.128.0/17',
5290 'HU': '84.0.0.0/14',
5291 'ID': '39.192.0.0/10',
5292 'IE': '87.32.0.0/12',
5293 'IL': '79.176.0.0/13',
5294 'IM': '5.62.80.0/20',
5295 'IN': '117.192.0.0/10',
5296 'IO': '203.83.48.0/21',
5297 'IQ': '37.236.0.0/14',
5298 'IR': '2.176.0.0/12',
5299 'IS': '82.221.0.0/16',
5300 'IT': '79.0.0.0/10',
5301 'JE': '87.244.64.0/18',
5302 'JM': '72.27.0.0/17',
5303 'JO': '176.29.0.0/16',
53896ca5 5304 'JP': '133.0.0.0/8',
773f291d
S
5305 'KE': '105.48.0.0/12',
5306 'KG': '158.181.128.0/17',
5307 'KH': '36.37.128.0/17',
5308 'KI': '103.25.140.0/22',
5309 'KM': '197.255.224.0/20',
53896ca5 5310 'KN': '198.167.192.0/19',
773f291d
S
5311 'KP': '175.45.176.0/22',
5312 'KR': '175.192.0.0/10',
5313 'KW': '37.36.0.0/14',
5314 'KY': '64.96.0.0/15',
5315 'KZ': '2.72.0.0/13',
5316 'LA': '115.84.64.0/18',
5317 'LB': '178.135.0.0/16',
53896ca5 5318 'LC': '24.92.144.0/20',
773f291d
S
5319 'LI': '82.117.0.0/19',
5320 'LK': '112.134.0.0/15',
53896ca5 5321 'LR': '102.183.0.0/16',
773f291d
S
5322 'LS': '129.232.0.0/17',
5323 'LT': '78.56.0.0/13',
5324 'LU': '188.42.0.0/16',
5325 'LV': '46.109.0.0/16',
5326 'LY': '41.252.0.0/14',
5327 'MA': '105.128.0.0/11',
5328 'MC': '88.209.64.0/18',
5329 'MD': '37.246.0.0/16',
5330 'ME': '178.175.0.0/17',
5331 'MF': '74.112.232.0/21',
5332 'MG': '154.126.0.0/17',
5333 'MH': '117.103.88.0/21',
5334 'MK': '77.28.0.0/15',
5335 'ML': '154.118.128.0/18',
5336 'MM': '37.111.0.0/17',
5337 'MN': '49.0.128.0/17',
5338 'MO': '60.246.0.0/16',
5339 'MP': '202.88.64.0/20',
5340 'MQ': '109.203.224.0/19',
5341 'MR': '41.188.64.0/18',
5342 'MS': '208.90.112.0/22',
5343 'MT': '46.11.0.0/16',
5344 'MU': '105.16.0.0/12',
5345 'MV': '27.114.128.0/18',
53896ca5 5346 'MW': '102.70.0.0/15',
773f291d
S
5347 'MX': '187.192.0.0/11',
5348 'MY': '175.136.0.0/13',
5349 'MZ': '197.218.0.0/15',
5350 'NA': '41.182.0.0/16',
5351 'NC': '101.101.0.0/18',
5352 'NE': '197.214.0.0/18',
5353 'NF': '203.17.240.0/22',
5354 'NG': '105.112.0.0/12',
5355 'NI': '186.76.0.0/15',
5356 'NL': '145.96.0.0/11',
5357 'NO': '84.208.0.0/13',
5358 'NP': '36.252.0.0/15',
5359 'NR': '203.98.224.0/19',
5360 'NU': '49.156.48.0/22',
5361 'NZ': '49.224.0.0/14',
5362 'OM': '5.36.0.0/15',
5363 'PA': '186.72.0.0/15',
5364 'PE': '186.160.0.0/14',
5365 'PF': '123.50.64.0/18',
5366 'PG': '124.240.192.0/19',
5367 'PH': '49.144.0.0/13',
5368 'PK': '39.32.0.0/11',
5369 'PL': '83.0.0.0/11',
5370 'PM': '70.36.0.0/20',
5371 'PR': '66.50.0.0/16',
5372 'PS': '188.161.0.0/16',
5373 'PT': '85.240.0.0/13',
5374 'PW': '202.124.224.0/20',
5375 'PY': '181.120.0.0/14',
5376 'QA': '37.210.0.0/15',
53896ca5 5377 'RE': '102.35.0.0/16',
773f291d 5378 'RO': '79.112.0.0/13',
53896ca5 5379 'RS': '93.86.0.0/15',
773f291d 5380 'RU': '5.136.0.0/13',
53896ca5 5381 'RW': '41.186.0.0/16',
773f291d
S
5382 'SA': '188.48.0.0/13',
5383 'SB': '202.1.160.0/19',
5384 'SC': '154.192.0.0/11',
53896ca5 5385 'SD': '102.120.0.0/13',
773f291d 5386 'SE': '78.64.0.0/12',
53896ca5 5387 'SG': '8.128.0.0/10',
773f291d
S
5388 'SI': '188.196.0.0/14',
5389 'SK': '78.98.0.0/15',
53896ca5 5390 'SL': '102.143.0.0/17',
773f291d
S
5391 'SM': '89.186.32.0/19',
5392 'SN': '41.82.0.0/15',
53896ca5 5393 'SO': '154.115.192.0/18',
773f291d
S
5394 'SR': '186.179.128.0/17',
5395 'SS': '105.235.208.0/21',
5396 'ST': '197.159.160.0/19',
5397 'SV': '168.243.0.0/16',
5398 'SX': '190.102.0.0/20',
5399 'SY': '5.0.0.0/16',
5400 'SZ': '41.84.224.0/19',
5401 'TC': '65.255.48.0/20',
5402 'TD': '154.68.128.0/19',
5403 'TG': '196.168.0.0/14',
5404 'TH': '171.96.0.0/13',
5405 'TJ': '85.9.128.0/18',
5406 'TK': '27.96.24.0/21',
5407 'TL': '180.189.160.0/20',
5408 'TM': '95.85.96.0/19',
5409 'TN': '197.0.0.0/11',
5410 'TO': '175.176.144.0/21',
5411 'TR': '78.160.0.0/11',
5412 'TT': '186.44.0.0/15',
5413 'TV': '202.2.96.0/19',
5414 'TW': '120.96.0.0/11',
5415 'TZ': '156.156.0.0/14',
53896ca5
S
5416 'UA': '37.52.0.0/14',
5417 'UG': '102.80.0.0/13',
5418 'US': '6.0.0.0/8',
773f291d 5419 'UY': '167.56.0.0/13',
53896ca5 5420 'UZ': '84.54.64.0/18',
773f291d 5421 'VA': '212.77.0.0/19',
53896ca5 5422 'VC': '207.191.240.0/21',
773f291d 5423 'VE': '186.88.0.0/13',
53896ca5 5424 'VG': '66.81.192.0/20',
773f291d
S
5425 'VI': '146.226.0.0/16',
5426 'VN': '14.160.0.0/11',
5427 'VU': '202.80.32.0/20',
5428 'WF': '117.20.32.0/21',
5429 'WS': '202.4.32.0/19',
5430 'YE': '134.35.0.0/16',
5431 'YT': '41.242.116.0/22',
5432 'ZA': '41.0.0.0/11',
53896ca5
S
5433 'ZM': '102.144.0.0/13',
5434 'ZW': '102.177.192.0/18',
773f291d
S
5435 }
5436
5437 @classmethod
5f95927a
S
5438 def random_ipv4(cls, code_or_block):
5439 if len(code_or_block) == 2:
5440 block = cls._country_ip_map.get(code_or_block.upper())
5441 if not block:
5442 return None
5443 else:
5444 block = code_or_block
773f291d
S
5445 addr, preflen = block.split('/')
5446 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5447 addr_max = addr_min | (0xffffffff >> int(preflen))
18a0defa 5448 return compat_str(socket.inet_ntoa(
4248dad9 5449 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
773f291d
S
5450
5451
91410c9b 5452class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
5453 def __init__(self, proxies=None):
5454 # Set default handlers
5455 for type in ('http', 'https'):
5456 setattr(self, '%s_open' % type,
5457 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5458 meth(r, proxy, type))
38e87f6c 5459 compat_urllib_request.ProxyHandler.__init__(self, proxies)
2461f79d 5460
91410c9b 5461 def proxy_open(self, req, proxy, type):
2461f79d 5462 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
5463 if req_proxy is not None:
5464 proxy = req_proxy
2461f79d
PH
5465 del req.headers['Ytdl-request-proxy']
5466
5467 if proxy == '__noproxy__':
5468 return None # No Proxy
51fb4995 5469 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
71aff188 5470 req.add_header('Ytdl-socks-proxy', proxy)
7a5c1cfe 5471 # yt-dlp's http/https handlers do wrapping the socket with socks
71aff188 5472 return None
91410c9b
PH
5473 return compat_urllib_request.ProxyHandler.proxy_open(
5474 self, req, proxy, type)
5bc880b9
YCH
5475
5476
0a5445dd
YCH
5477# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5478# released into Public Domain
5479# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5480
5481def long_to_bytes(n, blocksize=0):
5482 """long_to_bytes(n:long, blocksize:int) : string
5483 Convert a long integer to a byte string.
5484
5485 If optional blocksize is given and greater than zero, pad the front of the
5486 byte string with binary zeros so that the length is a multiple of
5487 blocksize.
5488 """
5489 # after much testing, this algorithm was deemed to be the fastest
5490 s = b''
5491 n = int(n)
5492 while n > 0:
5493 s = compat_struct_pack('>I', n & 0xffffffff) + s
5494 n = n >> 32
5495 # strip off leading zeros
5496 for i in range(len(s)):
5497 if s[i] != b'\000'[0]:
5498 break
5499 else:
5500 # only happens when n == 0
5501 s = b'\000'
5502 i = 0
5503 s = s[i:]
5504 # add back some pad bytes. this could be done more efficiently w.r.t. the
5505 # de-padding being done above, but sigh...
5506 if blocksize > 0 and len(s) % blocksize:
5507 s = (blocksize - len(s) % blocksize) * b'\000' + s
5508 return s
5509
5510
5511def bytes_to_long(s):
5512 """bytes_to_long(string) : long
5513 Convert a byte string to a long integer.
5514
5515 This is (essentially) the inverse of long_to_bytes().
5516 """
5517 acc = 0
5518 length = len(s)
5519 if length % 4:
5520 extra = (4 - length % 4)
5521 s = b'\000' * extra + s
5522 length = length + extra
5523 for i in range(0, length, 4):
5524 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5525 return acc
5526
5527
5bc880b9
YCH
5528def ohdave_rsa_encrypt(data, exponent, modulus):
5529 '''
5530 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5531
5532 Input:
5533 data: data to encrypt, bytes-like object
5534 exponent, modulus: parameter e and N of RSA algorithm, both integer
5535 Output: hex string of encrypted data
5536
5537 Limitation: supports one block encryption only
5538 '''
5539
5540 payload = int(binascii.hexlify(data[::-1]), 16)
5541 encrypted = pow(payload, exponent, modulus)
5542 return '%x' % encrypted
81bdc8fd
YCH
5543
5544
f48409c7
YCH
5545def pkcs1pad(data, length):
5546 """
5547 Padding input data with PKCS#1 scheme
5548
5549 @param {int[]} data input data
5550 @param {int} length target length
5551 @returns {int[]} padded data
5552 """
5553 if len(data) > length - 11:
5554 raise ValueError('Input data too long for PKCS#1 padding')
5555
5556 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5557 return [0, 2] + pseudo_random + [0] + data
5558
5559
5eb6bdce 5560def encode_base_n(num, n, table=None):
59f898b7 5561 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
59f898b7
YCH
5562 if not table:
5563 table = FULL_TABLE[:n]
5564
5eb6bdce
YCH
5565 if n > len(table):
5566 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5567
5568 if num == 0:
5569 return table[0]
5570
81bdc8fd
YCH
5571 ret = ''
5572 while num:
5573 ret = table[num % n] + ret
5574 num = num // n
5575 return ret
f52354a8
YCH
5576
5577
5578def decode_packed_codes(code):
06b3fe29 5579 mobj = re.search(PACKED_CODES_RE, code)
a0566bbf 5580 obfuscated_code, base, count, symbols = mobj.groups()
f52354a8
YCH
5581 base = int(base)
5582 count = int(count)
5583 symbols = symbols.split('|')
5584 symbol_table = {}
5585
5586 while count:
5587 count -= 1
5eb6bdce 5588 base_n_count = encode_base_n(count, base)
f52354a8
YCH
5589 symbol_table[base_n_count] = symbols[count] or base_n_count
5590
5591 return re.sub(
5592 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
a0566bbf 5593 obfuscated_code)
e154c651 5594
5595
1ced2221
S
5596def caesar(s, alphabet, shift):
5597 if shift == 0:
5598 return s
5599 l = len(alphabet)
5600 return ''.join(
5601 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5602 for c in s)
5603
5604
5605def rot47(s):
5606 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5607
5608
e154c651 5609def parse_m3u8_attributes(attrib):
5610 info = {}
5611 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5612 if val.startswith('"'):
5613 val = val[1:-1]
5614 info[key] = val
5615 return info
1143535d
YCH
5616
5617
5618def urshift(val, n):
5619 return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038
YCH
5620
5621
5622# Based on png2str() written by @gdkchan and improved by @yokrysty
067aa17e 5623# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
d3f8e038
YCH
5624def decode_png(png_data):
5625 # Reference: https://www.w3.org/TR/PNG/
5626 header = png_data[8:]
5627
5628 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5629 raise IOError('Not a valid PNG file.')
5630
5631 int_map = {1: '>B', 2: '>H', 4: '>I'}
5632 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5633
5634 chunks = []
5635
5636 while header:
5637 length = unpack_integer(header[:4])
5638 header = header[4:]
5639
5640 chunk_type = header[:4]
5641 header = header[4:]
5642
5643 chunk_data = header[:length]
5644 header = header[length:]
5645
5646 header = header[4:] # Skip CRC
5647
5648 chunks.append({
5649 'type': chunk_type,
5650 'length': length,
5651 'data': chunk_data
5652 })
5653
5654 ihdr = chunks[0]['data']
5655
5656 width = unpack_integer(ihdr[:4])
5657 height = unpack_integer(ihdr[4:8])
5658
5659 idat = b''
5660
5661 for chunk in chunks:
5662 if chunk['type'] == b'IDAT':
5663 idat += chunk['data']
5664
5665 if not idat:
5666 raise IOError('Unable to read PNG data.')
5667
5668 decompressed_data = bytearray(zlib.decompress(idat))
5669
5670 stride = width * 3
5671 pixels = []
5672
5673 def _get_pixel(idx):
5674 x = idx % stride
5675 y = idx // stride
5676 return pixels[y][x]
5677
5678 for y in range(height):
5679 basePos = y * (1 + stride)
5680 filter_type = decompressed_data[basePos]
5681
5682 current_row = []
5683
5684 pixels.append(current_row)
5685
5686 for x in range(stride):
5687 color = decompressed_data[1 + basePos + x]
5688 basex = y * stride + x
5689 left = 0
5690 up = 0
5691
5692 if x > 2:
5693 left = _get_pixel(basex - 3)
5694 if y > 0:
5695 up = _get_pixel(basex - stride)
5696
5697 if filter_type == 1: # Sub
5698 color = (color + left) & 0xff
5699 elif filter_type == 2: # Up
5700 color = (color + up) & 0xff
5701 elif filter_type == 3: # Average
5702 color = (color + ((left + up) >> 1)) & 0xff
5703 elif filter_type == 4: # Paeth
5704 a = left
5705 b = up
5706 c = 0
5707
5708 if x > 2 and y > 0:
5709 c = _get_pixel(basex - stride - 3)
5710
5711 p = a + b - c
5712
5713 pa = abs(p - a)
5714 pb = abs(p - b)
5715 pc = abs(p - c)
5716
5717 if pa <= pb and pa <= pc:
5718 color = (color + a) & 0xff
5719 elif pb <= pc:
5720 color = (color + b) & 0xff
5721 else:
5722 color = (color + c) & 0xff
5723
5724 current_row.append(color)
5725
5726 return width, height, pixels
efa97bdc
YCH
5727
5728
5729def write_xattr(path, key, value):
5730 # This mess below finds the best xattr tool for the job
5731 try:
5732 # try the pyxattr module...
5733 import xattr
5734
53a7e3d2
YCH
5735 if hasattr(xattr, 'set'): # pyxattr
5736 # Unicode arguments are not supported in python-pyxattr until
5737 # version 0.5.0
067aa17e 5738 # See https://github.com/ytdl-org/youtube-dl/issues/5498
53a7e3d2
YCH
5739 pyxattr_required_version = '0.5.0'
5740 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5741 # TODO: fallback to CLI tools
5742 raise XAttrUnavailableError(
5743 'python-pyxattr is detected but is too old. '
7a5c1cfe 5744 'yt-dlp requires %s or above while your version is %s. '
53a7e3d2
YCH
5745 'Falling back to other xattr implementations' % (
5746 pyxattr_required_version, xattr.__version__))
5747
5748 setxattr = xattr.set
5749 else: # xattr
5750 setxattr = xattr.setxattr
efa97bdc
YCH
5751
5752 try:
53a7e3d2 5753 setxattr(path, key, value)
efa97bdc
YCH
5754 except EnvironmentError as e:
5755 raise XAttrMetadataError(e.errno, e.strerror)
5756
5757 except ImportError:
5758 if compat_os_name == 'nt':
5759 # Write xattrs to NTFS Alternate Data Streams:
5760 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5761 assert ':' not in key
5762 assert os.path.exists(path)
5763
5764 ads_fn = path + ':' + key
5765 try:
5766 with open(ads_fn, 'wb') as f:
5767 f.write(value)
5768 except EnvironmentError as e:
5769 raise XAttrMetadataError(e.errno, e.strerror)
5770 else:
5771 user_has_setfattr = check_executable('setfattr', ['--version'])
5772 user_has_xattr = check_executable('xattr', ['-h'])
5773
5774 if user_has_setfattr or user_has_xattr:
5775
5776 value = value.decode('utf-8')
5777 if user_has_setfattr:
5778 executable = 'setfattr'
5779 opts = ['-n', key, '-v', value]
5780 elif user_has_xattr:
5781 executable = 'xattr'
5782 opts = ['-w', key, value]
5783
3089bc74
S
5784 cmd = ([encodeFilename(executable, True)]
5785 + [encodeArgument(o) for o in opts]
5786 + [encodeFilename(path, True)])
efa97bdc
YCH
5787
5788 try:
5789 p = subprocess.Popen(
5790 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5791 except EnvironmentError as e:
5792 raise XAttrMetadataError(e.errno, e.strerror)
f5b1bca9 5793 stdout, stderr = process_communicate_or_kill(p)
efa97bdc
YCH
5794 stderr = stderr.decode('utf-8', 'replace')
5795 if p.returncode != 0:
5796 raise XAttrMetadataError(p.returncode, stderr)
5797
5798 else:
5799 # On Unix, and can't find pyxattr, setfattr, or xattr.
5800 if sys.platform.startswith('linux'):
5801 raise XAttrUnavailableError(
5802 "Couldn't find a tool to set the xattrs. "
5803 "Install either the python 'pyxattr' or 'xattr' "
5804 "modules, or the GNU 'attr' package "
5805 "(which contains the 'setfattr' tool).")
5806 else:
5807 raise XAttrUnavailableError(
5808 "Couldn't find a tool to set the xattrs. "
5809 "Install either the python 'xattr' module, "
5810 "or the 'xattr' binary.")
0c265486
YCH
5811
5812
5813def random_birthday(year_field, month_field, day_field):
aa374bc7
AS
5814 start_date = datetime.date(1950, 1, 1)
5815 end_date = datetime.date(1995, 12, 31)
5816 offset = random.randint(0, (end_date - start_date).days)
5817 random_date = start_date + datetime.timedelta(offset)
0c265486 5818 return {
aa374bc7
AS
5819 year_field: str(random_date.year),
5820 month_field: str(random_date.month),
5821 day_field: str(random_date.day),
0c265486 5822 }
732044af 5823
c76eb41b 5824
732044af 5825# Templates for internet shortcut files, which are plain text files.
5826DOT_URL_LINK_TEMPLATE = '''
5827[InternetShortcut]
5828URL=%(url)s
5829'''.lstrip()
5830
5831DOT_WEBLOC_LINK_TEMPLATE = '''
5832<?xml version="1.0" encoding="UTF-8"?>
5833<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5834<plist version="1.0">
5835<dict>
5836\t<key>URL</key>
5837\t<string>%(url)s</string>
5838</dict>
5839</plist>
5840'''.lstrip()
5841
5842DOT_DESKTOP_LINK_TEMPLATE = '''
5843[Desktop Entry]
5844Encoding=UTF-8
5845Name=%(filename)s
5846Type=Link
5847URL=%(url)s
5848Icon=text-html
5849'''.lstrip()
5850
5851
5852def iri_to_uri(iri):
5853 """
5854 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5855
5856 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5857 """
5858
5859 iri_parts = compat_urllib_parse_urlparse(iri)
5860
5861 if '[' in iri_parts.netloc:
5862 raise ValueError('IPv6 URIs are not, yet, supported.')
5863 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5864
5865 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5866
5867 net_location = ''
5868 if iri_parts.username:
5869 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
5870 if iri_parts.password is not None:
5871 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
5872 net_location += '@'
5873
5874 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
5875 # The 'idna' encoding produces ASCII text.
5876 if iri_parts.port is not None and iri_parts.port != 80:
5877 net_location += ':' + str(iri_parts.port)
5878
5879 return compat_urllib_parse_urlunparse(
5880 (iri_parts.scheme,
5881 net_location,
5882
5883 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5884
5885 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5886 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5887
5888 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5889 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5890
5891 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5892
5893 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5894
5895
5896def to_high_limit_path(path):
5897 if sys.platform in ['win32', 'cygwin']:
5898 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5899 return r'\\?\ '.rstrip() + os.path.abspath(path)
5900
5901 return path
76d321f6 5902
c76eb41b 5903
76d321f6 5904def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
5905 val = obj.get(field, default)
5906 if func and val not in ignore:
5907 val = func(val)
5908 return template % val if val not in ignore else default
00dd0cd5 5909
5910
5911def clean_podcast_url(url):
5912 return re.sub(r'''(?x)
5913 (?:
5914 (?:
5915 chtbl\.com/track|
5916 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5917 play\.podtrac\.com
5918 )/[^/]+|
5919 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5920 flex\.acast\.com|
5921 pd(?:
5922 cn\.co| # https://podcorn.com/analytics-prefix/
5923 st\.fm # https://podsights.com/docs/
5924 )/e
5925 )/''', '', url)
ffcb8191
THD
5926
5927
5928_HEX_TABLE = '0123456789abcdef'
5929
5930
5931def random_uuidv4():
5932 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
0202b52a 5933
5934
5935def make_dir(path, to_screen=None):
5936 try:
5937 dn = os.path.dirname(path)
5938 if dn and not os.path.exists(dn):
5939 os.makedirs(dn)
5940 return True
5941 except (OSError, IOError) as err:
5942 if callable(to_screen) is not None:
5943 to_screen('unable to create directory ' + error_to_compat_str(err))
5944 return False
f74980cb 5945
5946
5947def get_executable_path():
5948 path = os.path.dirname(sys.argv[0])
e5813e53 5949 if os.path.basename(sys.argv[0]) == '__main__': # Running from source
f74980cb 5950 path = os.path.join(path, '..')
5951 return os.path.abspath(path)
5952
5953
5954def load_plugins(name, type, namespace):
5955 plugin_info = [None]
5956 classes = []
5957 try:
5958 plugin_info = imp.find_module(
5959 name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
5960 plugins = imp.load_module(name, *plugin_info)
5961 for name in dir(plugins):
5962 if not name.endswith(type):
5963 continue
5964 klass = getattr(plugins, name)
5965 classes.append(klass)
5966 namespace[name] = klass
5967 except ImportError:
5968 pass
5969 finally:
5970 if plugin_info[0] is not None:
5971 plugin_info[0].close()
5972 return classes
06167fbb 5973
5974
5975def traverse_dict(dictn, keys, casesense=True):
5976 if not isinstance(dictn, dict):
5977 return None
5978 first_key = keys[0]
5979 if not casesense:
5980 dictn = {key.lower(): val for key, val in dictn.items()}
5981 first_key = first_key.lower()
5982 value = dictn.get(first_key, None)
5983 return value if len(keys) < 2 else traverse_dict(value, keys[1:], casesense)