]> jfr.im git - yt-dlp.git/blame - yt_dlp/utils.py
Split video by chapters (#158)
[yt-dlp.git] / yt_dlp / utils.py
CommitLineData
d77c3dfd 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
d77c3dfd 3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
c380cc28 10import collections
62e609ab 11import contextlib
e3946f98 12import ctypes
c496ca96
PH
13import datetime
14import email.utils
0c265486 15import email.header
f45c185f 16import errno
be4a824d 17import functools
d77c3dfd 18import gzip
f74980cb 19import imp
03f9daab 20import io
79a2e94e 21import itertools
f4bfd65f 22import json
d77c3dfd 23import locale
02dbf93f 24import math
347de493 25import operator
d77c3dfd 26import os
c496ca96 27import platform
773f291d 28import random
d77c3dfd 29import re
c496ca96 30import socket
79a2e94e 31import ssl
1c088fa8 32import subprocess
d77c3dfd 33import sys
181c8655 34import tempfile
c380cc28 35import time
01951dda 36import traceback
bcf89ce6 37import xml.etree.ElementTree
d77c3dfd 38import zlib
d77c3dfd 39
8c25f81b 40from .compat import (
b4a3d461 41 compat_HTMLParseError,
8bb56eee 42 compat_HTMLParser,
8f9312c3 43 compat_basestring,
8c25f81b 44 compat_chr,
1bab3437 45 compat_cookiejar,
d7cd9a9e 46 compat_ctypes_WINFUNCTYPE,
36e6f62c 47 compat_etree_fromstring,
51098426 48 compat_expanduser,
8c25f81b 49 compat_html_entities,
55b2f099 50 compat_html_entities_html5,
be4a824d 51 compat_http_client,
42db58ec 52 compat_integer_types,
e29663c6 53 compat_numeric_types,
c86b6142 54 compat_kwargs,
efa97bdc 55 compat_os_name,
8c25f81b 56 compat_parse_qs,
702ccf2d 57 compat_shlex_quote,
8c25f81b 58 compat_str,
edaa23f8 59 compat_struct_pack,
d3f8e038 60 compat_struct_unpack,
8c25f81b
PH
61 compat_urllib_error,
62 compat_urllib_parse,
15707c7e 63 compat_urllib_parse_urlencode,
8c25f81b 64 compat_urllib_parse_urlparse,
732044af 65 compat_urllib_parse_urlunparse,
66 compat_urllib_parse_quote,
67 compat_urllib_parse_quote_plus,
7581bfc9 68 compat_urllib_parse_unquote_plus,
8c25f81b
PH
69 compat_urllib_request,
70 compat_urlparse,
810c10ba 71 compat_xpath,
8c25f81b 72)
4644ac55 73
71aff188
YCH
74from .socks import (
75 ProxyType,
76 sockssocket,
77)
78
4644ac55 79
51fb4995
YCH
80def register_socks_protocols():
81 # "Register" SOCKS protocols
d5ae6bb5
YCH
82 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
83 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
51fb4995
YCH
84 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
85 if scheme not in compat_urlparse.uses_netloc:
86 compat_urlparse.uses_netloc.append(scheme)
87
88
468e2e92
FV
89# This is not clearly defined otherwise
90compiled_regex_type = type(re.compile(''))
91
f7a147e3
S
92
93def random_user_agent():
94 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
95 _CHROME_VERSIONS = (
96 '74.0.3729.129',
97 '76.0.3780.3',
98 '76.0.3780.2',
99 '74.0.3729.128',
100 '76.0.3780.1',
101 '76.0.3780.0',
102 '75.0.3770.15',
103 '74.0.3729.127',
104 '74.0.3729.126',
105 '76.0.3779.1',
106 '76.0.3779.0',
107 '75.0.3770.14',
108 '74.0.3729.125',
109 '76.0.3778.1',
110 '76.0.3778.0',
111 '75.0.3770.13',
112 '74.0.3729.124',
113 '74.0.3729.123',
114 '73.0.3683.121',
115 '76.0.3777.1',
116 '76.0.3777.0',
117 '75.0.3770.12',
118 '74.0.3729.122',
119 '76.0.3776.4',
120 '75.0.3770.11',
121 '74.0.3729.121',
122 '76.0.3776.3',
123 '76.0.3776.2',
124 '73.0.3683.120',
125 '74.0.3729.120',
126 '74.0.3729.119',
127 '74.0.3729.118',
128 '76.0.3776.1',
129 '76.0.3776.0',
130 '76.0.3775.5',
131 '75.0.3770.10',
132 '74.0.3729.117',
133 '76.0.3775.4',
134 '76.0.3775.3',
135 '74.0.3729.116',
136 '75.0.3770.9',
137 '76.0.3775.2',
138 '76.0.3775.1',
139 '76.0.3775.0',
140 '75.0.3770.8',
141 '74.0.3729.115',
142 '74.0.3729.114',
143 '76.0.3774.1',
144 '76.0.3774.0',
145 '75.0.3770.7',
146 '74.0.3729.113',
147 '74.0.3729.112',
148 '74.0.3729.111',
149 '76.0.3773.1',
150 '76.0.3773.0',
151 '75.0.3770.6',
152 '74.0.3729.110',
153 '74.0.3729.109',
154 '76.0.3772.1',
155 '76.0.3772.0',
156 '75.0.3770.5',
157 '74.0.3729.108',
158 '74.0.3729.107',
159 '76.0.3771.1',
160 '76.0.3771.0',
161 '75.0.3770.4',
162 '74.0.3729.106',
163 '74.0.3729.105',
164 '75.0.3770.3',
165 '74.0.3729.104',
166 '74.0.3729.103',
167 '74.0.3729.102',
168 '75.0.3770.2',
169 '74.0.3729.101',
170 '75.0.3770.1',
171 '75.0.3770.0',
172 '74.0.3729.100',
173 '75.0.3769.5',
174 '75.0.3769.4',
175 '74.0.3729.99',
176 '75.0.3769.3',
177 '75.0.3769.2',
178 '75.0.3768.6',
179 '74.0.3729.98',
180 '75.0.3769.1',
181 '75.0.3769.0',
182 '74.0.3729.97',
183 '73.0.3683.119',
184 '73.0.3683.118',
185 '74.0.3729.96',
186 '75.0.3768.5',
187 '75.0.3768.4',
188 '75.0.3768.3',
189 '75.0.3768.2',
190 '74.0.3729.95',
191 '74.0.3729.94',
192 '75.0.3768.1',
193 '75.0.3768.0',
194 '74.0.3729.93',
195 '74.0.3729.92',
196 '73.0.3683.117',
197 '74.0.3729.91',
198 '75.0.3766.3',
199 '74.0.3729.90',
200 '75.0.3767.2',
201 '75.0.3767.1',
202 '75.0.3767.0',
203 '74.0.3729.89',
204 '73.0.3683.116',
205 '75.0.3766.2',
206 '74.0.3729.88',
207 '75.0.3766.1',
208 '75.0.3766.0',
209 '74.0.3729.87',
210 '73.0.3683.115',
211 '74.0.3729.86',
212 '75.0.3765.1',
213 '75.0.3765.0',
214 '74.0.3729.85',
215 '73.0.3683.114',
216 '74.0.3729.84',
217 '75.0.3764.1',
218 '75.0.3764.0',
219 '74.0.3729.83',
220 '73.0.3683.113',
221 '75.0.3763.2',
222 '75.0.3761.4',
223 '74.0.3729.82',
224 '75.0.3763.1',
225 '75.0.3763.0',
226 '74.0.3729.81',
227 '73.0.3683.112',
228 '75.0.3762.1',
229 '75.0.3762.0',
230 '74.0.3729.80',
231 '75.0.3761.3',
232 '74.0.3729.79',
233 '73.0.3683.111',
234 '75.0.3761.2',
235 '74.0.3729.78',
236 '74.0.3729.77',
237 '75.0.3761.1',
238 '75.0.3761.0',
239 '73.0.3683.110',
240 '74.0.3729.76',
241 '74.0.3729.75',
242 '75.0.3760.0',
243 '74.0.3729.74',
244 '75.0.3759.8',
245 '75.0.3759.7',
246 '75.0.3759.6',
247 '74.0.3729.73',
248 '75.0.3759.5',
249 '74.0.3729.72',
250 '73.0.3683.109',
251 '75.0.3759.4',
252 '75.0.3759.3',
253 '74.0.3729.71',
254 '75.0.3759.2',
255 '74.0.3729.70',
256 '73.0.3683.108',
257 '74.0.3729.69',
258 '75.0.3759.1',
259 '75.0.3759.0',
260 '74.0.3729.68',
261 '73.0.3683.107',
262 '74.0.3729.67',
263 '75.0.3758.1',
264 '75.0.3758.0',
265 '74.0.3729.66',
266 '73.0.3683.106',
267 '74.0.3729.65',
268 '75.0.3757.1',
269 '75.0.3757.0',
270 '74.0.3729.64',
271 '73.0.3683.105',
272 '74.0.3729.63',
273 '75.0.3756.1',
274 '75.0.3756.0',
275 '74.0.3729.62',
276 '73.0.3683.104',
277 '75.0.3755.3',
278 '75.0.3755.2',
279 '73.0.3683.103',
280 '75.0.3755.1',
281 '75.0.3755.0',
282 '74.0.3729.61',
283 '73.0.3683.102',
284 '74.0.3729.60',
285 '75.0.3754.2',
286 '74.0.3729.59',
287 '75.0.3753.4',
288 '74.0.3729.58',
289 '75.0.3754.1',
290 '75.0.3754.0',
291 '74.0.3729.57',
292 '73.0.3683.101',
293 '75.0.3753.3',
294 '75.0.3752.2',
295 '75.0.3753.2',
296 '74.0.3729.56',
297 '75.0.3753.1',
298 '75.0.3753.0',
299 '74.0.3729.55',
300 '73.0.3683.100',
301 '74.0.3729.54',
302 '75.0.3752.1',
303 '75.0.3752.0',
304 '74.0.3729.53',
305 '73.0.3683.99',
306 '74.0.3729.52',
307 '75.0.3751.1',
308 '75.0.3751.0',
309 '74.0.3729.51',
310 '73.0.3683.98',
311 '74.0.3729.50',
312 '75.0.3750.0',
313 '74.0.3729.49',
314 '74.0.3729.48',
315 '74.0.3729.47',
316 '75.0.3749.3',
317 '74.0.3729.46',
318 '73.0.3683.97',
319 '75.0.3749.2',
320 '74.0.3729.45',
321 '75.0.3749.1',
322 '75.0.3749.0',
323 '74.0.3729.44',
324 '73.0.3683.96',
325 '74.0.3729.43',
326 '74.0.3729.42',
327 '75.0.3748.1',
328 '75.0.3748.0',
329 '74.0.3729.41',
330 '75.0.3747.1',
331 '73.0.3683.95',
332 '75.0.3746.4',
333 '74.0.3729.40',
334 '74.0.3729.39',
335 '75.0.3747.0',
336 '75.0.3746.3',
337 '75.0.3746.2',
338 '74.0.3729.38',
339 '75.0.3746.1',
340 '75.0.3746.0',
341 '74.0.3729.37',
342 '73.0.3683.94',
343 '75.0.3745.5',
344 '75.0.3745.4',
345 '75.0.3745.3',
346 '75.0.3745.2',
347 '74.0.3729.36',
348 '75.0.3745.1',
349 '75.0.3745.0',
350 '75.0.3744.2',
351 '74.0.3729.35',
352 '73.0.3683.93',
353 '74.0.3729.34',
354 '75.0.3744.1',
355 '75.0.3744.0',
356 '74.0.3729.33',
357 '73.0.3683.92',
358 '74.0.3729.32',
359 '74.0.3729.31',
360 '73.0.3683.91',
361 '75.0.3741.2',
362 '75.0.3740.5',
363 '74.0.3729.30',
364 '75.0.3741.1',
365 '75.0.3741.0',
366 '74.0.3729.29',
367 '75.0.3740.4',
368 '73.0.3683.90',
369 '74.0.3729.28',
370 '75.0.3740.3',
371 '73.0.3683.89',
372 '75.0.3740.2',
373 '74.0.3729.27',
374 '75.0.3740.1',
375 '75.0.3740.0',
376 '74.0.3729.26',
377 '73.0.3683.88',
378 '73.0.3683.87',
379 '74.0.3729.25',
380 '75.0.3739.1',
381 '75.0.3739.0',
382 '73.0.3683.86',
383 '74.0.3729.24',
384 '73.0.3683.85',
385 '75.0.3738.4',
386 '75.0.3738.3',
387 '75.0.3738.2',
388 '75.0.3738.1',
389 '75.0.3738.0',
390 '74.0.3729.23',
391 '73.0.3683.84',
392 '74.0.3729.22',
393 '74.0.3729.21',
394 '75.0.3737.1',
395 '75.0.3737.0',
396 '74.0.3729.20',
397 '73.0.3683.83',
398 '74.0.3729.19',
399 '75.0.3736.1',
400 '75.0.3736.0',
401 '74.0.3729.18',
402 '73.0.3683.82',
403 '74.0.3729.17',
404 '75.0.3735.1',
405 '75.0.3735.0',
406 '74.0.3729.16',
407 '73.0.3683.81',
408 '75.0.3734.1',
409 '75.0.3734.0',
410 '74.0.3729.15',
411 '73.0.3683.80',
412 '74.0.3729.14',
413 '75.0.3733.1',
414 '75.0.3733.0',
415 '75.0.3732.1',
416 '74.0.3729.13',
417 '74.0.3729.12',
418 '73.0.3683.79',
419 '74.0.3729.11',
420 '75.0.3732.0',
421 '74.0.3729.10',
422 '73.0.3683.78',
423 '74.0.3729.9',
424 '74.0.3729.8',
425 '74.0.3729.7',
426 '75.0.3731.3',
427 '75.0.3731.2',
428 '75.0.3731.0',
429 '74.0.3729.6',
430 '73.0.3683.77',
431 '73.0.3683.76',
432 '75.0.3730.5',
433 '75.0.3730.4',
434 '73.0.3683.75',
435 '74.0.3729.5',
436 '73.0.3683.74',
437 '75.0.3730.3',
438 '75.0.3730.2',
439 '74.0.3729.4',
440 '73.0.3683.73',
441 '73.0.3683.72',
442 '75.0.3730.1',
443 '75.0.3730.0',
444 '74.0.3729.3',
445 '73.0.3683.71',
446 '74.0.3729.2',
447 '73.0.3683.70',
448 '74.0.3729.1',
449 '74.0.3729.0',
450 '74.0.3726.4',
451 '73.0.3683.69',
452 '74.0.3726.3',
453 '74.0.3728.0',
454 '74.0.3726.2',
455 '73.0.3683.68',
456 '74.0.3726.1',
457 '74.0.3726.0',
458 '74.0.3725.4',
459 '73.0.3683.67',
460 '73.0.3683.66',
461 '74.0.3725.3',
462 '74.0.3725.2',
463 '74.0.3725.1',
464 '74.0.3724.8',
465 '74.0.3725.0',
466 '73.0.3683.65',
467 '74.0.3724.7',
468 '74.0.3724.6',
469 '74.0.3724.5',
470 '74.0.3724.4',
471 '74.0.3724.3',
472 '74.0.3724.2',
473 '74.0.3724.1',
474 '74.0.3724.0',
475 '73.0.3683.64',
476 '74.0.3723.1',
477 '74.0.3723.0',
478 '73.0.3683.63',
479 '74.0.3722.1',
480 '74.0.3722.0',
481 '73.0.3683.62',
482 '74.0.3718.9',
483 '74.0.3702.3',
484 '74.0.3721.3',
485 '74.0.3721.2',
486 '74.0.3721.1',
487 '74.0.3721.0',
488 '74.0.3720.6',
489 '73.0.3683.61',
490 '72.0.3626.122',
491 '73.0.3683.60',
492 '74.0.3720.5',
493 '72.0.3626.121',
494 '74.0.3718.8',
495 '74.0.3720.4',
496 '74.0.3720.3',
497 '74.0.3718.7',
498 '74.0.3720.2',
499 '74.0.3720.1',
500 '74.0.3720.0',
501 '74.0.3718.6',
502 '74.0.3719.5',
503 '73.0.3683.59',
504 '74.0.3718.5',
505 '74.0.3718.4',
506 '74.0.3719.4',
507 '74.0.3719.3',
508 '74.0.3719.2',
509 '74.0.3719.1',
510 '73.0.3683.58',
511 '74.0.3719.0',
512 '73.0.3683.57',
513 '73.0.3683.56',
514 '74.0.3718.3',
515 '73.0.3683.55',
516 '74.0.3718.2',
517 '74.0.3718.1',
518 '74.0.3718.0',
519 '73.0.3683.54',
520 '74.0.3717.2',
521 '73.0.3683.53',
522 '74.0.3717.1',
523 '74.0.3717.0',
524 '73.0.3683.52',
525 '74.0.3716.1',
526 '74.0.3716.0',
527 '73.0.3683.51',
528 '74.0.3715.1',
529 '74.0.3715.0',
530 '73.0.3683.50',
531 '74.0.3711.2',
532 '74.0.3714.2',
533 '74.0.3713.3',
534 '74.0.3714.1',
535 '74.0.3714.0',
536 '73.0.3683.49',
537 '74.0.3713.1',
538 '74.0.3713.0',
539 '72.0.3626.120',
540 '73.0.3683.48',
541 '74.0.3712.2',
542 '74.0.3712.1',
543 '74.0.3712.0',
544 '73.0.3683.47',
545 '72.0.3626.119',
546 '73.0.3683.46',
547 '74.0.3710.2',
548 '72.0.3626.118',
549 '74.0.3711.1',
550 '74.0.3711.0',
551 '73.0.3683.45',
552 '72.0.3626.117',
553 '74.0.3710.1',
554 '74.0.3710.0',
555 '73.0.3683.44',
556 '72.0.3626.116',
557 '74.0.3709.1',
558 '74.0.3709.0',
559 '74.0.3704.9',
560 '73.0.3683.43',
561 '72.0.3626.115',
562 '74.0.3704.8',
563 '74.0.3704.7',
564 '74.0.3708.0',
565 '74.0.3706.7',
566 '74.0.3704.6',
567 '73.0.3683.42',
568 '72.0.3626.114',
569 '74.0.3706.6',
570 '72.0.3626.113',
571 '74.0.3704.5',
572 '74.0.3706.5',
573 '74.0.3706.4',
574 '74.0.3706.3',
575 '74.0.3706.2',
576 '74.0.3706.1',
577 '74.0.3706.0',
578 '73.0.3683.41',
579 '72.0.3626.112',
580 '74.0.3705.1',
581 '74.0.3705.0',
582 '73.0.3683.40',
583 '72.0.3626.111',
584 '73.0.3683.39',
585 '74.0.3704.4',
586 '73.0.3683.38',
587 '74.0.3704.3',
588 '74.0.3704.2',
589 '74.0.3704.1',
590 '74.0.3704.0',
591 '73.0.3683.37',
592 '72.0.3626.110',
593 '72.0.3626.109',
594 '74.0.3703.3',
595 '74.0.3703.2',
596 '73.0.3683.36',
597 '74.0.3703.1',
598 '74.0.3703.0',
599 '73.0.3683.35',
600 '72.0.3626.108',
601 '74.0.3702.2',
602 '74.0.3699.3',
603 '74.0.3702.1',
604 '74.0.3702.0',
605 '73.0.3683.34',
606 '72.0.3626.107',
607 '73.0.3683.33',
608 '74.0.3701.1',
609 '74.0.3701.0',
610 '73.0.3683.32',
611 '73.0.3683.31',
612 '72.0.3626.105',
613 '74.0.3700.1',
614 '74.0.3700.0',
615 '73.0.3683.29',
616 '72.0.3626.103',
617 '74.0.3699.2',
618 '74.0.3699.1',
619 '74.0.3699.0',
620 '73.0.3683.28',
621 '72.0.3626.102',
622 '73.0.3683.27',
623 '73.0.3683.26',
624 '74.0.3698.0',
625 '74.0.3696.2',
626 '72.0.3626.101',
627 '73.0.3683.25',
628 '74.0.3696.1',
629 '74.0.3696.0',
630 '74.0.3694.8',
631 '72.0.3626.100',
632 '74.0.3694.7',
633 '74.0.3694.6',
634 '74.0.3694.5',
635 '74.0.3694.4',
636 '72.0.3626.99',
637 '72.0.3626.98',
638 '74.0.3694.3',
639 '73.0.3683.24',
640 '72.0.3626.97',
641 '72.0.3626.96',
642 '72.0.3626.95',
643 '73.0.3683.23',
644 '72.0.3626.94',
645 '73.0.3683.22',
646 '73.0.3683.21',
647 '72.0.3626.93',
648 '74.0.3694.2',
649 '72.0.3626.92',
650 '74.0.3694.1',
651 '74.0.3694.0',
652 '74.0.3693.6',
653 '73.0.3683.20',
654 '72.0.3626.91',
655 '74.0.3693.5',
656 '74.0.3693.4',
657 '74.0.3693.3',
658 '74.0.3693.2',
659 '73.0.3683.19',
660 '74.0.3693.1',
661 '74.0.3693.0',
662 '73.0.3683.18',
663 '72.0.3626.90',
664 '74.0.3692.1',
665 '74.0.3692.0',
666 '73.0.3683.17',
667 '72.0.3626.89',
668 '74.0.3687.3',
669 '74.0.3691.1',
670 '74.0.3691.0',
671 '73.0.3683.16',
672 '72.0.3626.88',
673 '72.0.3626.87',
674 '73.0.3683.15',
675 '74.0.3690.1',
676 '74.0.3690.0',
677 '73.0.3683.14',
678 '72.0.3626.86',
679 '73.0.3683.13',
680 '73.0.3683.12',
681 '74.0.3689.1',
682 '74.0.3689.0',
683 '73.0.3683.11',
684 '72.0.3626.85',
685 '73.0.3683.10',
686 '72.0.3626.84',
687 '73.0.3683.9',
688 '74.0.3688.1',
689 '74.0.3688.0',
690 '73.0.3683.8',
691 '72.0.3626.83',
692 '74.0.3687.2',
693 '74.0.3687.1',
694 '74.0.3687.0',
695 '73.0.3683.7',
696 '72.0.3626.82',
697 '74.0.3686.4',
698 '72.0.3626.81',
699 '74.0.3686.3',
700 '74.0.3686.2',
701 '74.0.3686.1',
702 '74.0.3686.0',
703 '73.0.3683.6',
704 '72.0.3626.80',
705 '74.0.3685.1',
706 '74.0.3685.0',
707 '73.0.3683.5',
708 '72.0.3626.79',
709 '74.0.3684.1',
710 '74.0.3684.0',
711 '73.0.3683.4',
712 '72.0.3626.78',
713 '72.0.3626.77',
714 '73.0.3683.3',
715 '73.0.3683.2',
716 '72.0.3626.76',
717 '73.0.3683.1',
718 '73.0.3683.0',
719 '72.0.3626.75',
720 '71.0.3578.141',
721 '73.0.3682.1',
722 '73.0.3682.0',
723 '72.0.3626.74',
724 '71.0.3578.140',
725 '73.0.3681.4',
726 '73.0.3681.3',
727 '73.0.3681.2',
728 '73.0.3681.1',
729 '73.0.3681.0',
730 '72.0.3626.73',
731 '71.0.3578.139',
732 '72.0.3626.72',
733 '72.0.3626.71',
734 '73.0.3680.1',
735 '73.0.3680.0',
736 '72.0.3626.70',
737 '71.0.3578.138',
738 '73.0.3678.2',
739 '73.0.3679.1',
740 '73.0.3679.0',
741 '72.0.3626.69',
742 '71.0.3578.137',
743 '73.0.3678.1',
744 '73.0.3678.0',
745 '71.0.3578.136',
746 '73.0.3677.1',
747 '73.0.3677.0',
748 '72.0.3626.68',
749 '72.0.3626.67',
750 '71.0.3578.135',
751 '73.0.3676.1',
752 '73.0.3676.0',
753 '73.0.3674.2',
754 '72.0.3626.66',
755 '71.0.3578.134',
756 '73.0.3674.1',
757 '73.0.3674.0',
758 '72.0.3626.65',
759 '71.0.3578.133',
760 '73.0.3673.2',
761 '73.0.3673.1',
762 '73.0.3673.0',
763 '72.0.3626.64',
764 '71.0.3578.132',
765 '72.0.3626.63',
766 '72.0.3626.62',
767 '72.0.3626.61',
768 '72.0.3626.60',
769 '73.0.3672.1',
770 '73.0.3672.0',
771 '72.0.3626.59',
772 '71.0.3578.131',
773 '73.0.3671.3',
774 '73.0.3671.2',
775 '73.0.3671.1',
776 '73.0.3671.0',
777 '72.0.3626.58',
778 '71.0.3578.130',
779 '73.0.3670.1',
780 '73.0.3670.0',
781 '72.0.3626.57',
782 '71.0.3578.129',
783 '73.0.3669.1',
784 '73.0.3669.0',
785 '72.0.3626.56',
786 '71.0.3578.128',
787 '73.0.3668.2',
788 '73.0.3668.1',
789 '73.0.3668.0',
790 '72.0.3626.55',
791 '71.0.3578.127',
792 '73.0.3667.2',
793 '73.0.3667.1',
794 '73.0.3667.0',
795 '72.0.3626.54',
796 '71.0.3578.126',
797 '73.0.3666.1',
798 '73.0.3666.0',
799 '72.0.3626.53',
800 '71.0.3578.125',
801 '73.0.3665.4',
802 '73.0.3665.3',
803 '72.0.3626.52',
804 '73.0.3665.2',
805 '73.0.3664.4',
806 '73.0.3665.1',
807 '73.0.3665.0',
808 '72.0.3626.51',
809 '71.0.3578.124',
810 '72.0.3626.50',
811 '73.0.3664.3',
812 '73.0.3664.2',
813 '73.0.3664.1',
814 '73.0.3664.0',
815 '73.0.3663.2',
816 '72.0.3626.49',
817 '71.0.3578.123',
818 '73.0.3663.1',
819 '73.0.3663.0',
820 '72.0.3626.48',
821 '71.0.3578.122',
822 '73.0.3662.1',
823 '73.0.3662.0',
824 '72.0.3626.47',
825 '71.0.3578.121',
826 '73.0.3661.1',
827 '72.0.3626.46',
828 '73.0.3661.0',
829 '72.0.3626.45',
830 '71.0.3578.120',
831 '73.0.3660.2',
832 '73.0.3660.1',
833 '73.0.3660.0',
834 '72.0.3626.44',
835 '71.0.3578.119',
836 '73.0.3659.1',
837 '73.0.3659.0',
838 '72.0.3626.43',
839 '71.0.3578.118',
840 '73.0.3658.1',
841 '73.0.3658.0',
842 '72.0.3626.42',
843 '71.0.3578.117',
844 '73.0.3657.1',
845 '73.0.3657.0',
846 '72.0.3626.41',
847 '71.0.3578.116',
848 '73.0.3656.1',
849 '73.0.3656.0',
850 '72.0.3626.40',
851 '71.0.3578.115',
852 '73.0.3655.1',
853 '73.0.3655.0',
854 '72.0.3626.39',
855 '71.0.3578.114',
856 '73.0.3654.1',
857 '73.0.3654.0',
858 '72.0.3626.38',
859 '71.0.3578.113',
860 '73.0.3653.1',
861 '73.0.3653.0',
862 '72.0.3626.37',
863 '71.0.3578.112',
864 '73.0.3652.1',
865 '73.0.3652.0',
866 '72.0.3626.36',
867 '71.0.3578.111',
868 '73.0.3651.1',
869 '73.0.3651.0',
870 '72.0.3626.35',
871 '71.0.3578.110',
872 '73.0.3650.1',
873 '73.0.3650.0',
874 '72.0.3626.34',
875 '71.0.3578.109',
876 '73.0.3649.1',
877 '73.0.3649.0',
878 '72.0.3626.33',
879 '71.0.3578.108',
880 '73.0.3648.2',
881 '73.0.3648.1',
882 '73.0.3648.0',
883 '72.0.3626.32',
884 '71.0.3578.107',
885 '73.0.3647.2',
886 '73.0.3647.1',
887 '73.0.3647.0',
888 '72.0.3626.31',
889 '71.0.3578.106',
890 '73.0.3635.3',
891 '73.0.3646.2',
892 '73.0.3646.1',
893 '73.0.3646.0',
894 '72.0.3626.30',
895 '71.0.3578.105',
896 '72.0.3626.29',
897 '73.0.3645.2',
898 '73.0.3645.1',
899 '73.0.3645.0',
900 '72.0.3626.28',
901 '71.0.3578.104',
902 '72.0.3626.27',
903 '72.0.3626.26',
904 '72.0.3626.25',
905 '72.0.3626.24',
906 '73.0.3644.0',
907 '73.0.3643.2',
908 '72.0.3626.23',
909 '71.0.3578.103',
910 '73.0.3643.1',
911 '73.0.3643.0',
912 '72.0.3626.22',
913 '71.0.3578.102',
914 '73.0.3642.1',
915 '73.0.3642.0',
916 '72.0.3626.21',
917 '71.0.3578.101',
918 '73.0.3641.1',
919 '73.0.3641.0',
920 '72.0.3626.20',
921 '71.0.3578.100',
922 '72.0.3626.19',
923 '73.0.3640.1',
924 '73.0.3640.0',
925 '72.0.3626.18',
926 '73.0.3639.1',
927 '71.0.3578.99',
928 '73.0.3639.0',
929 '72.0.3626.17',
930 '73.0.3638.2',
931 '72.0.3626.16',
932 '73.0.3638.1',
933 '73.0.3638.0',
934 '72.0.3626.15',
935 '71.0.3578.98',
936 '73.0.3635.2',
937 '71.0.3578.97',
938 '73.0.3637.1',
939 '73.0.3637.0',
940 '72.0.3626.14',
941 '71.0.3578.96',
942 '71.0.3578.95',
943 '72.0.3626.13',
944 '71.0.3578.94',
945 '73.0.3636.2',
946 '71.0.3578.93',
947 '73.0.3636.1',
948 '73.0.3636.0',
949 '72.0.3626.12',
950 '71.0.3578.92',
951 '73.0.3635.1',
952 '73.0.3635.0',
953 '72.0.3626.11',
954 '71.0.3578.91',
955 '73.0.3634.2',
956 '73.0.3634.1',
957 '73.0.3634.0',
958 '72.0.3626.10',
959 '71.0.3578.90',
960 '71.0.3578.89',
961 '73.0.3633.2',
962 '73.0.3633.1',
963 '73.0.3633.0',
964 '72.0.3610.4',
965 '72.0.3626.9',
966 '71.0.3578.88',
967 '73.0.3632.5',
968 '73.0.3632.4',
969 '73.0.3632.3',
970 '73.0.3632.2',
971 '73.0.3632.1',
972 '73.0.3632.0',
973 '72.0.3626.8',
974 '71.0.3578.87',
975 '73.0.3631.2',
976 '73.0.3631.1',
977 '73.0.3631.0',
978 '72.0.3626.7',
979 '71.0.3578.86',
980 '72.0.3626.6',
981 '73.0.3630.1',
982 '73.0.3630.0',
983 '72.0.3626.5',
984 '71.0.3578.85',
985 '72.0.3626.4',
986 '73.0.3628.3',
987 '73.0.3628.2',
988 '73.0.3629.1',
989 '73.0.3629.0',
990 '72.0.3626.3',
991 '71.0.3578.84',
992 '73.0.3628.1',
993 '73.0.3628.0',
994 '71.0.3578.83',
995 '73.0.3627.1',
996 '73.0.3627.0',
997 '72.0.3626.2',
998 '71.0.3578.82',
999 '71.0.3578.81',
1000 '71.0.3578.80',
1001 '72.0.3626.1',
1002 '72.0.3626.0',
1003 '71.0.3578.79',
1004 '70.0.3538.124',
1005 '71.0.3578.78',
1006 '72.0.3623.4',
1007 '72.0.3625.2',
1008 '72.0.3625.1',
1009 '72.0.3625.0',
1010 '71.0.3578.77',
1011 '70.0.3538.123',
1012 '72.0.3624.4',
1013 '72.0.3624.3',
1014 '72.0.3624.2',
1015 '71.0.3578.76',
1016 '72.0.3624.1',
1017 '72.0.3624.0',
1018 '72.0.3623.3',
1019 '71.0.3578.75',
1020 '70.0.3538.122',
1021 '71.0.3578.74',
1022 '72.0.3623.2',
1023 '72.0.3610.3',
1024 '72.0.3623.1',
1025 '72.0.3623.0',
1026 '72.0.3622.3',
1027 '72.0.3622.2',
1028 '71.0.3578.73',
1029 '70.0.3538.121',
1030 '72.0.3622.1',
1031 '72.0.3622.0',
1032 '71.0.3578.72',
1033 '70.0.3538.120',
1034 '72.0.3621.1',
1035 '72.0.3621.0',
1036 '71.0.3578.71',
1037 '70.0.3538.119',
1038 '72.0.3620.1',
1039 '72.0.3620.0',
1040 '71.0.3578.70',
1041 '70.0.3538.118',
1042 '71.0.3578.69',
1043 '72.0.3619.1',
1044 '72.0.3619.0',
1045 '71.0.3578.68',
1046 '70.0.3538.117',
1047 '71.0.3578.67',
1048 '72.0.3618.1',
1049 '72.0.3618.0',
1050 '71.0.3578.66',
1051 '70.0.3538.116',
1052 '72.0.3617.1',
1053 '72.0.3617.0',
1054 '71.0.3578.65',
1055 '70.0.3538.115',
1056 '72.0.3602.3',
1057 '71.0.3578.64',
1058 '72.0.3616.1',
1059 '72.0.3616.0',
1060 '71.0.3578.63',
1061 '70.0.3538.114',
1062 '71.0.3578.62',
1063 '72.0.3615.1',
1064 '72.0.3615.0',
1065 '71.0.3578.61',
1066 '70.0.3538.113',
1067 '72.0.3614.1',
1068 '72.0.3614.0',
1069 '71.0.3578.60',
1070 '70.0.3538.112',
1071 '72.0.3613.1',
1072 '72.0.3613.0',
1073 '71.0.3578.59',
1074 '70.0.3538.111',
1075 '72.0.3612.2',
1076 '72.0.3612.1',
1077 '72.0.3612.0',
1078 '70.0.3538.110',
1079 '71.0.3578.58',
1080 '70.0.3538.109',
1081 '72.0.3611.2',
1082 '72.0.3611.1',
1083 '72.0.3611.0',
1084 '71.0.3578.57',
1085 '70.0.3538.108',
1086 '72.0.3610.2',
1087 '71.0.3578.56',
1088 '71.0.3578.55',
1089 '72.0.3610.1',
1090 '72.0.3610.0',
1091 '71.0.3578.54',
1092 '70.0.3538.107',
1093 '71.0.3578.53',
1094 '72.0.3609.3',
1095 '71.0.3578.52',
1096 '72.0.3609.2',
1097 '71.0.3578.51',
1098 '72.0.3608.5',
1099 '72.0.3609.1',
1100 '72.0.3609.0',
1101 '71.0.3578.50',
1102 '70.0.3538.106',
1103 '72.0.3608.4',
1104 '72.0.3608.3',
1105 '72.0.3608.2',
1106 '71.0.3578.49',
1107 '72.0.3608.1',
1108 '72.0.3608.0',
1109 '70.0.3538.105',
1110 '71.0.3578.48',
1111 '72.0.3607.1',
1112 '72.0.3607.0',
1113 '71.0.3578.47',
1114 '70.0.3538.104',
1115 '72.0.3606.2',
1116 '72.0.3606.1',
1117 '72.0.3606.0',
1118 '71.0.3578.46',
1119 '70.0.3538.103',
1120 '70.0.3538.102',
1121 '72.0.3605.3',
1122 '72.0.3605.2',
1123 '72.0.3605.1',
1124 '72.0.3605.0',
1125 '71.0.3578.45',
1126 '70.0.3538.101',
1127 '71.0.3578.44',
1128 '71.0.3578.43',
1129 '70.0.3538.100',
1130 '70.0.3538.99',
1131 '71.0.3578.42',
1132 '72.0.3604.1',
1133 '72.0.3604.0',
1134 '71.0.3578.41',
1135 '70.0.3538.98',
1136 '71.0.3578.40',
1137 '72.0.3603.2',
1138 '72.0.3603.1',
1139 '72.0.3603.0',
1140 '71.0.3578.39',
1141 '70.0.3538.97',
1142 '72.0.3602.2',
1143 '71.0.3578.38',
1144 '71.0.3578.37',
1145 '72.0.3602.1',
1146 '72.0.3602.0',
1147 '71.0.3578.36',
1148 '70.0.3538.96',
1149 '72.0.3601.1',
1150 '72.0.3601.0',
1151 '71.0.3578.35',
1152 '70.0.3538.95',
1153 '72.0.3600.1',
1154 '72.0.3600.0',
1155 '71.0.3578.34',
1156 '70.0.3538.94',
1157 '72.0.3599.3',
1158 '72.0.3599.2',
1159 '72.0.3599.1',
1160 '72.0.3599.0',
1161 '71.0.3578.33',
1162 '70.0.3538.93',
1163 '72.0.3598.1',
1164 '72.0.3598.0',
1165 '71.0.3578.32',
1166 '70.0.3538.87',
1167 '72.0.3597.1',
1168 '72.0.3597.0',
1169 '72.0.3596.2',
1170 '71.0.3578.31',
1171 '70.0.3538.86',
1172 '71.0.3578.30',
1173 '71.0.3578.29',
1174 '72.0.3596.1',
1175 '72.0.3596.0',
1176 '71.0.3578.28',
1177 '70.0.3538.85',
1178 '72.0.3595.2',
1179 '72.0.3591.3',
1180 '72.0.3595.1',
1181 '72.0.3595.0',
1182 '71.0.3578.27',
1183 '70.0.3538.84',
1184 '72.0.3594.1',
1185 '72.0.3594.0',
1186 '71.0.3578.26',
1187 '70.0.3538.83',
1188 '72.0.3593.2',
1189 '72.0.3593.1',
1190 '72.0.3593.0',
1191 '71.0.3578.25',
1192 '70.0.3538.82',
1193 '72.0.3589.3',
1194 '72.0.3592.2',
1195 '72.0.3592.1',
1196 '72.0.3592.0',
1197 '71.0.3578.24',
1198 '72.0.3589.2',
1199 '70.0.3538.81',
1200 '70.0.3538.80',
1201 '72.0.3591.2',
1202 '72.0.3591.1',
1203 '72.0.3591.0',
1204 '71.0.3578.23',
1205 '70.0.3538.79',
1206 '71.0.3578.22',
1207 '72.0.3590.1',
1208 '72.0.3590.0',
1209 '71.0.3578.21',
1210 '70.0.3538.78',
1211 '70.0.3538.77',
1212 '72.0.3589.1',
1213 '72.0.3589.0',
1214 '71.0.3578.20',
1215 '70.0.3538.76',
1216 '71.0.3578.19',
1217 '70.0.3538.75',
1218 '72.0.3588.1',
1219 '72.0.3588.0',
1220 '71.0.3578.18',
1221 '70.0.3538.74',
1222 '72.0.3586.2',
1223 '72.0.3587.0',
1224 '71.0.3578.17',
1225 '70.0.3538.73',
1226 '72.0.3586.1',
1227 '72.0.3586.0',
1228 '71.0.3578.16',
1229 '70.0.3538.72',
1230 '72.0.3585.1',
1231 '72.0.3585.0',
1232 '71.0.3578.15',
1233 '70.0.3538.71',
1234 '71.0.3578.14',
1235 '72.0.3584.1',
1236 '72.0.3584.0',
1237 '71.0.3578.13',
1238 '70.0.3538.70',
1239 '72.0.3583.2',
1240 '71.0.3578.12',
1241 '72.0.3583.1',
1242 '72.0.3583.0',
1243 '71.0.3578.11',
1244 '70.0.3538.69',
1245 '71.0.3578.10',
1246 '72.0.3582.0',
1247 '72.0.3581.4',
1248 '71.0.3578.9',
1249 '70.0.3538.67',
1250 '72.0.3581.3',
1251 '72.0.3581.2',
1252 '72.0.3581.1',
1253 '72.0.3581.0',
1254 '71.0.3578.8',
1255 '70.0.3538.66',
1256 '72.0.3580.1',
1257 '72.0.3580.0',
1258 '71.0.3578.7',
1259 '70.0.3538.65',
1260 '71.0.3578.6',
1261 '72.0.3579.1',
1262 '72.0.3579.0',
1263 '71.0.3578.5',
1264 '70.0.3538.64',
1265 '71.0.3578.4',
1266 '71.0.3578.3',
1267 '71.0.3578.2',
1268 '71.0.3578.1',
1269 '71.0.3578.0',
1270 '70.0.3538.63',
1271 '69.0.3497.128',
1272 '70.0.3538.62',
1273 '70.0.3538.61',
1274 '70.0.3538.60',
1275 '70.0.3538.59',
1276 '71.0.3577.1',
1277 '71.0.3577.0',
1278 '70.0.3538.58',
1279 '69.0.3497.127',
1280 '71.0.3576.2',
1281 '71.0.3576.1',
1282 '71.0.3576.0',
1283 '70.0.3538.57',
1284 '70.0.3538.56',
1285 '71.0.3575.2',
1286 '70.0.3538.55',
1287 '69.0.3497.126',
1288 '70.0.3538.54',
1289 '71.0.3575.1',
1290 '71.0.3575.0',
1291 '71.0.3574.1',
1292 '71.0.3574.0',
1293 '70.0.3538.53',
1294 '69.0.3497.125',
1295 '70.0.3538.52',
1296 '71.0.3573.1',
1297 '71.0.3573.0',
1298 '70.0.3538.51',
1299 '69.0.3497.124',
1300 '71.0.3572.1',
1301 '71.0.3572.0',
1302 '70.0.3538.50',
1303 '69.0.3497.123',
1304 '71.0.3571.2',
1305 '70.0.3538.49',
1306 '69.0.3497.122',
1307 '71.0.3571.1',
1308 '71.0.3571.0',
1309 '70.0.3538.48',
1310 '69.0.3497.121',
1311 '71.0.3570.1',
1312 '71.0.3570.0',
1313 '70.0.3538.47',
1314 '69.0.3497.120',
1315 '71.0.3568.2',
1316 '71.0.3569.1',
1317 '71.0.3569.0',
1318 '70.0.3538.46',
1319 '69.0.3497.119',
1320 '70.0.3538.45',
1321 '71.0.3568.1',
1322 '71.0.3568.0',
1323 '70.0.3538.44',
1324 '69.0.3497.118',
1325 '70.0.3538.43',
1326 '70.0.3538.42',
1327 '71.0.3567.1',
1328 '71.0.3567.0',
1329 '70.0.3538.41',
1330 '69.0.3497.117',
1331 '71.0.3566.1',
1332 '71.0.3566.0',
1333 '70.0.3538.40',
1334 '69.0.3497.116',
1335 '71.0.3565.1',
1336 '71.0.3565.0',
1337 '70.0.3538.39',
1338 '69.0.3497.115',
1339 '71.0.3564.1',
1340 '71.0.3564.0',
1341 '70.0.3538.38',
1342 '69.0.3497.114',
1343 '71.0.3563.0',
1344 '71.0.3562.2',
1345 '70.0.3538.37',
1346 '69.0.3497.113',
1347 '70.0.3538.36',
1348 '70.0.3538.35',
1349 '71.0.3562.1',
1350 '71.0.3562.0',
1351 '70.0.3538.34',
1352 '69.0.3497.112',
1353 '70.0.3538.33',
1354 '71.0.3561.1',
1355 '71.0.3561.0',
1356 '70.0.3538.32',
1357 '69.0.3497.111',
1358 '71.0.3559.6',
1359 '71.0.3560.1',
1360 '71.0.3560.0',
1361 '71.0.3559.5',
1362 '71.0.3559.4',
1363 '70.0.3538.31',
1364 '69.0.3497.110',
1365 '71.0.3559.3',
1366 '70.0.3538.30',
1367 '69.0.3497.109',
1368 '71.0.3559.2',
1369 '71.0.3559.1',
1370 '71.0.3559.0',
1371 '70.0.3538.29',
1372 '69.0.3497.108',
1373 '71.0.3558.2',
1374 '71.0.3558.1',
1375 '71.0.3558.0',
1376 '70.0.3538.28',
1377 '69.0.3497.107',
1378 '71.0.3557.2',
1379 '71.0.3557.1',
1380 '71.0.3557.0',
1381 '70.0.3538.27',
1382 '69.0.3497.106',
1383 '71.0.3554.4',
1384 '70.0.3538.26',
1385 '71.0.3556.1',
1386 '71.0.3556.0',
1387 '70.0.3538.25',
1388 '71.0.3554.3',
1389 '69.0.3497.105',
1390 '71.0.3554.2',
1391 '70.0.3538.24',
1392 '69.0.3497.104',
1393 '71.0.3555.2',
1394 '70.0.3538.23',
1395 '71.0.3555.1',
1396 '71.0.3555.0',
1397 '70.0.3538.22',
1398 '69.0.3497.103',
1399 '71.0.3554.1',
1400 '71.0.3554.0',
1401 '70.0.3538.21',
1402 '69.0.3497.102',
1403 '71.0.3553.3',
1404 '70.0.3538.20',
1405 '69.0.3497.101',
1406 '71.0.3553.2',
1407 '69.0.3497.100',
1408 '71.0.3553.1',
1409 '71.0.3553.0',
1410 '70.0.3538.19',
1411 '69.0.3497.99',
1412 '69.0.3497.98',
1413 '69.0.3497.97',
1414 '71.0.3552.6',
1415 '71.0.3552.5',
1416 '71.0.3552.4',
1417 '71.0.3552.3',
1418 '71.0.3552.2',
1419 '71.0.3552.1',
1420 '71.0.3552.0',
1421 '70.0.3538.18',
1422 '69.0.3497.96',
1423 '71.0.3551.3',
1424 '71.0.3551.2',
1425 '71.0.3551.1',
1426 '71.0.3551.0',
1427 '70.0.3538.17',
1428 '69.0.3497.95',
1429 '71.0.3550.3',
1430 '71.0.3550.2',
1431 '71.0.3550.1',
1432 '71.0.3550.0',
1433 '70.0.3538.16',
1434 '69.0.3497.94',
1435 '71.0.3549.1',
1436 '71.0.3549.0',
1437 '70.0.3538.15',
1438 '69.0.3497.93',
1439 '69.0.3497.92',
1440 '71.0.3548.1',
1441 '71.0.3548.0',
1442 '70.0.3538.14',
1443 '69.0.3497.91',
1444 '71.0.3547.1',
1445 '71.0.3547.0',
1446 '70.0.3538.13',
1447 '69.0.3497.90',
1448 '71.0.3546.2',
1449 '69.0.3497.89',
1450 '71.0.3546.1',
1451 '71.0.3546.0',
1452 '70.0.3538.12',
1453 '69.0.3497.88',
1454 '71.0.3545.4',
1455 '71.0.3545.3',
1456 '71.0.3545.2',
1457 '71.0.3545.1',
1458 '71.0.3545.0',
1459 '70.0.3538.11',
1460 '69.0.3497.87',
1461 '71.0.3544.5',
1462 '71.0.3544.4',
1463 '71.0.3544.3',
1464 '71.0.3544.2',
1465 '71.0.3544.1',
1466 '71.0.3544.0',
1467 '69.0.3497.86',
1468 '70.0.3538.10',
1469 '69.0.3497.85',
1470 '70.0.3538.9',
1471 '69.0.3497.84',
1472 '71.0.3543.4',
1473 '70.0.3538.8',
1474 '71.0.3543.3',
1475 '71.0.3543.2',
1476 '71.0.3543.1',
1477 '71.0.3543.0',
1478 '70.0.3538.7',
1479 '69.0.3497.83',
1480 '71.0.3542.2',
1481 '71.0.3542.1',
1482 '71.0.3542.0',
1483 '70.0.3538.6',
1484 '69.0.3497.82',
1485 '69.0.3497.81',
1486 '71.0.3541.1',
1487 '71.0.3541.0',
1488 '70.0.3538.5',
1489 '69.0.3497.80',
1490 '71.0.3540.1',
1491 '71.0.3540.0',
1492 '70.0.3538.4',
1493 '69.0.3497.79',
1494 '70.0.3538.3',
1495 '71.0.3539.1',
1496 '71.0.3539.0',
1497 '69.0.3497.78',
1498 '68.0.3440.134',
1499 '69.0.3497.77',
1500 '70.0.3538.2',
1501 '70.0.3538.1',
1502 '70.0.3538.0',
1503 '69.0.3497.76',
1504 '68.0.3440.133',
1505 '69.0.3497.75',
1506 '70.0.3537.2',
1507 '70.0.3537.1',
1508 '70.0.3537.0',
1509 '69.0.3497.74',
1510 '68.0.3440.132',
1511 '70.0.3536.0',
1512 '70.0.3535.5',
1513 '70.0.3535.4',
1514 '70.0.3535.3',
1515 '69.0.3497.73',
1516 '68.0.3440.131',
1517 '70.0.3532.8',
1518 '70.0.3532.7',
1519 '69.0.3497.72',
1520 '69.0.3497.71',
1521 '70.0.3535.2',
1522 '70.0.3535.1',
1523 '70.0.3535.0',
1524 '69.0.3497.70',
1525 '68.0.3440.130',
1526 '69.0.3497.69',
1527 '68.0.3440.129',
1528 '70.0.3534.4',
1529 '70.0.3534.3',
1530 '70.0.3534.2',
1531 '70.0.3534.1',
1532 '70.0.3534.0',
1533 '69.0.3497.68',
1534 '68.0.3440.128',
1535 '70.0.3533.2',
1536 '70.0.3533.1',
1537 '70.0.3533.0',
1538 '69.0.3497.67',
1539 '68.0.3440.127',
1540 '70.0.3532.6',
1541 '70.0.3532.5',
1542 '70.0.3532.4',
1543 '69.0.3497.66',
1544 '68.0.3440.126',
1545 '70.0.3532.3',
1546 '70.0.3532.2',
1547 '70.0.3532.1',
1548 '69.0.3497.60',
1549 '69.0.3497.65',
1550 '69.0.3497.64',
1551 '70.0.3532.0',
1552 '70.0.3531.0',
1553 '70.0.3530.4',
1554 '70.0.3530.3',
1555 '70.0.3530.2',
1556 '69.0.3497.58',
1557 '68.0.3440.125',
1558 '69.0.3497.57',
1559 '69.0.3497.56',
1560 '69.0.3497.55',
1561 '69.0.3497.54',
1562 '70.0.3530.1',
1563 '70.0.3530.0',
1564 '69.0.3497.53',
1565 '68.0.3440.124',
1566 '69.0.3497.52',
1567 '70.0.3529.3',
1568 '70.0.3529.2',
1569 '70.0.3529.1',
1570 '70.0.3529.0',
1571 '69.0.3497.51',
1572 '70.0.3528.4',
1573 '68.0.3440.123',
1574 '70.0.3528.3',
1575 '70.0.3528.2',
1576 '70.0.3528.1',
1577 '70.0.3528.0',
1578 '69.0.3497.50',
1579 '68.0.3440.122',
1580 '70.0.3527.1',
1581 '70.0.3527.0',
1582 '69.0.3497.49',
1583 '68.0.3440.121',
1584 '70.0.3526.1',
1585 '70.0.3526.0',
1586 '68.0.3440.120',
1587 '69.0.3497.48',
1588 '69.0.3497.47',
1589 '68.0.3440.119',
1590 '68.0.3440.118',
1591 '70.0.3525.5',
1592 '70.0.3525.4',
1593 '70.0.3525.3',
1594 '68.0.3440.117',
1595 '69.0.3497.46',
1596 '70.0.3525.2',
1597 '70.0.3525.1',
1598 '70.0.3525.0',
1599 '69.0.3497.45',
1600 '68.0.3440.116',
1601 '70.0.3524.4',
1602 '70.0.3524.3',
1603 '69.0.3497.44',
1604 '70.0.3524.2',
1605 '70.0.3524.1',
1606 '70.0.3524.0',
1607 '70.0.3523.2',
1608 '69.0.3497.43',
1609 '68.0.3440.115',
1610 '70.0.3505.9',
1611 '69.0.3497.42',
1612 '70.0.3505.8',
1613 '70.0.3523.1',
1614 '70.0.3523.0',
1615 '69.0.3497.41',
1616 '68.0.3440.114',
1617 '70.0.3505.7',
1618 '69.0.3497.40',
1619 '70.0.3522.1',
1620 '70.0.3522.0',
1621 '70.0.3521.2',
1622 '69.0.3497.39',
1623 '68.0.3440.113',
1624 '70.0.3505.6',
1625 '70.0.3521.1',
1626 '70.0.3521.0',
1627 '69.0.3497.38',
1628 '68.0.3440.112',
1629 '70.0.3520.1',
1630 '70.0.3520.0',
1631 '69.0.3497.37',
1632 '68.0.3440.111',
1633 '70.0.3519.3',
1634 '70.0.3519.2',
1635 '70.0.3519.1',
1636 '70.0.3519.0',
1637 '69.0.3497.36',
1638 '68.0.3440.110',
1639 '70.0.3518.1',
1640 '70.0.3518.0',
1641 '69.0.3497.35',
1642 '69.0.3497.34',
1643 '68.0.3440.109',
1644 '70.0.3517.1',
1645 '70.0.3517.0',
1646 '69.0.3497.33',
1647 '68.0.3440.108',
1648 '69.0.3497.32',
1649 '70.0.3516.3',
1650 '70.0.3516.2',
1651 '70.0.3516.1',
1652 '70.0.3516.0',
1653 '69.0.3497.31',
1654 '68.0.3440.107',
1655 '70.0.3515.4',
1656 '68.0.3440.106',
1657 '70.0.3515.3',
1658 '70.0.3515.2',
1659 '70.0.3515.1',
1660 '70.0.3515.0',
1661 '69.0.3497.30',
1662 '68.0.3440.105',
1663 '68.0.3440.104',
1664 '70.0.3514.2',
1665 '70.0.3514.1',
1666 '70.0.3514.0',
1667 '69.0.3497.29',
1668 '68.0.3440.103',
1669 '70.0.3513.1',
1670 '70.0.3513.0',
1671 '69.0.3497.28',
1672 )
1673 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1674
1675
3e669f36 1676std_headers = {
f7a147e3 1677 'User-Agent': random_user_agent(),
59ae15a5
PH
1678 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1679 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1680 'Accept-Encoding': 'gzip, deflate',
1681 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 1682}
f427df17 1683
5f6a1245 1684
fb37eb25
S
1685USER_AGENTS = {
1686 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1687}
1688
1689
bf42a990
S
1690NO_DEFAULT = object()
1691
7105440c
YCH
1692ENGLISH_MONTH_NAMES = [
1693 'January', 'February', 'March', 'April', 'May', 'June',
1694 'July', 'August', 'September', 'October', 'November', 'December']
1695
f6717dec
S
1696MONTH_NAMES = {
1697 'en': ENGLISH_MONTH_NAMES,
1698 'fr': [
3e4185c3
S
1699 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1700 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
f6717dec 1701}
a942d6cb 1702
a7aaa398
S
1703KNOWN_EXTENSIONS = (
1704 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1705 'flv', 'f4v', 'f4a', 'f4b',
1706 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1707 'mkv', 'mka', 'mk3d',
1708 'avi', 'divx',
1709 'mov',
1710 'asf', 'wmv', 'wma',
1711 '3gp', '3g2',
1712 'mp3',
1713 'flac',
1714 'ape',
1715 'wav',
1716 'f4f', 'f4m', 'm3u8', 'smil')
1717
df692c5a 1718REMUX_EXTENSIONS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mp3', 'mka', 'm4a', 'ogg', 'opus')
1719
c587cbb7 1720# needed for sanitizing filenames in restricted mode
c8827027 1721ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
fd35d8cd
JW
1722 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1723 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
c587cbb7 1724
46f59e89
S
1725DATE_FORMATS = (
1726 '%d %B %Y',
1727 '%d %b %Y',
1728 '%B %d %Y',
cb655f34
S
1729 '%B %dst %Y',
1730 '%B %dnd %Y',
9d30c213 1731 '%B %drd %Y',
cb655f34 1732 '%B %dth %Y',
46f59e89 1733 '%b %d %Y',
cb655f34
S
1734 '%b %dst %Y',
1735 '%b %dnd %Y',
9d30c213 1736 '%b %drd %Y',
cb655f34 1737 '%b %dth %Y',
46f59e89
S
1738 '%b %dst %Y %I:%M',
1739 '%b %dnd %Y %I:%M',
9d30c213 1740 '%b %drd %Y %I:%M',
46f59e89
S
1741 '%b %dth %Y %I:%M',
1742 '%Y %m %d',
1743 '%Y-%m-%d',
1744 '%Y/%m/%d',
81c13222 1745 '%Y/%m/%d %H:%M',
46f59e89 1746 '%Y/%m/%d %H:%M:%S',
0c1c6f4b 1747 '%Y-%m-%d %H:%M',
46f59e89
S
1748 '%Y-%m-%d %H:%M:%S',
1749 '%Y-%m-%d %H:%M:%S.%f',
1750 '%d.%m.%Y %H:%M',
1751 '%d.%m.%Y %H.%M',
1752 '%Y-%m-%dT%H:%M:%SZ',
1753 '%Y-%m-%dT%H:%M:%S.%fZ',
1754 '%Y-%m-%dT%H:%M:%S.%f0Z',
1755 '%Y-%m-%dT%H:%M:%S',
1756 '%Y-%m-%dT%H:%M:%S.%f',
1757 '%Y-%m-%dT%H:%M',
c6eed6b8
S
1758 '%b %d %Y at %H:%M',
1759 '%b %d %Y at %H:%M:%S',
b555ae9b
S
1760 '%B %d %Y at %H:%M',
1761 '%B %d %Y at %H:%M:%S',
46f59e89
S
1762)
1763
1764DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1765DATE_FORMATS_DAY_FIRST.extend([
1766 '%d-%m-%Y',
1767 '%d.%m.%Y',
1768 '%d.%m.%y',
1769 '%d/%m/%Y',
1770 '%d/%m/%y',
1771 '%d/%m/%Y %H:%M:%S',
1772])
1773
1774DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1775DATE_FORMATS_MONTH_FIRST.extend([
1776 '%m-%d-%Y',
1777 '%m.%d.%Y',
1778 '%m/%d/%Y',
1779 '%m/%d/%y',
1780 '%m/%d/%Y %H:%M:%S',
1781])
1782
06b3fe29 1783PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
22f5f5c6 1784JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
06b3fe29 1785
7105440c 1786
d77c3dfd 1787def preferredencoding():
59ae15a5 1788 """Get preferred encoding.
d77c3dfd 1789
59ae15a5
PH
1790 Returns the best encoding scheme for the system, based on
1791 locale.getpreferredencoding() and some further tweaks.
1792 """
1793 try:
1794 pref = locale.getpreferredencoding()
28e614de 1795 'TEST'.encode(pref)
70a1165b 1796 except Exception:
59ae15a5 1797 pref = 'UTF-8'
bae611f2 1798
59ae15a5 1799 return pref
d77c3dfd 1800
f4bfd65f 1801
181c8655 1802def write_json_file(obj, fn):
1394646a 1803 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 1804
92120217 1805 fn = encodeFilename(fn)
61ee5aeb 1806 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
1807 encoding = get_filesystem_encoding()
1808 # os.path.basename returns a bytes object, but NamedTemporaryFile
1809 # will fail if the filename contains non ascii characters unless we
1810 # use a unicode object
1811 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1812 # the same for os.path.dirname
1813 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1814 else:
1815 path_basename = os.path.basename
1816 path_dirname = os.path.dirname
1817
73159f99
S
1818 args = {
1819 'suffix': '.tmp',
ec5f6016
JMF
1820 'prefix': path_basename(fn) + '.',
1821 'dir': path_dirname(fn),
73159f99
S
1822 'delete': False,
1823 }
1824
181c8655
PH
1825 # In Python 2.x, json.dump expects a bytestream.
1826 # In Python 3.x, it writes to a character stream
1827 if sys.version_info < (3, 0):
73159f99 1828 args['mode'] = 'wb'
181c8655 1829 else:
73159f99
S
1830 args.update({
1831 'mode': 'w',
1832 'encoding': 'utf-8',
1833 })
1834
c86b6142 1835 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
1836
1837 try:
1838 with tf:
1839 json.dump(obj, tf)
1394646a
IK
1840 if sys.platform == 'win32':
1841 # Need to remove existing file on Windows, else os.rename raises
1842 # WindowsError or FileExistsError.
1843 try:
1844 os.unlink(fn)
1845 except OSError:
1846 pass
9cd5f54e
R
1847 try:
1848 mask = os.umask(0)
1849 os.umask(mask)
1850 os.chmod(tf.name, 0o666 & ~mask)
1851 except OSError:
1852 pass
181c8655 1853 os.rename(tf.name, fn)
70a1165b 1854 except Exception:
181c8655
PH
1855 try:
1856 os.remove(tf.name)
1857 except OSError:
1858 pass
1859 raise
1860
1861
1862if sys.version_info >= (2, 7):
ee114368 1863 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 1864 """ Find the xpath xpath[@key=val] """
5d2354f1 1865 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368 1866 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
1867 return node.find(expr)
1868else:
ee114368 1869 def find_xpath_attr(node, xpath, key, val=None):
810c10ba 1870 for f in node.findall(compat_xpath(xpath)):
ee114368
S
1871 if key not in f.attrib:
1872 continue
1873 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
1874 return f
1875 return None
1876
d7e66d39
JMF
1877# On python2.6 the xml.etree.ElementTree.Element methods don't support
1878# the namespace parameter
5f6a1245
JW
1879
1880
d7e66d39
JMF
1881def xpath_with_ns(path, ns_map):
1882 components = [c.split(':') for c in path.split('/')]
1883 replaced = []
1884 for c in components:
1885 if len(c) == 1:
1886 replaced.append(c[0])
1887 else:
1888 ns, tag = c
1889 replaced.append('{%s}%s' % (ns_map[ns], tag))
1890 return '/'.join(replaced)
1891
d77c3dfd 1892
a41fb80c 1893def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745 1894 def _find_xpath(xpath):
810c10ba 1895 return node.find(compat_xpath(xpath))
578c0745
S
1896
1897 if isinstance(xpath, (str, compat_str)):
1898 n = _find_xpath(xpath)
1899 else:
1900 for xp in xpath:
1901 n = _find_xpath(xp)
1902 if n is not None:
1903 break
d74bebd5 1904
8e636da4 1905 if n is None:
bf42a990
S
1906 if default is not NO_DEFAULT:
1907 return default
1908 elif fatal:
bf0ff932
PH
1909 name = xpath if name is None else name
1910 raise ExtractorError('Could not find XML element %s' % name)
1911 else:
1912 return None
a41fb80c
S
1913 return n
1914
1915
1916def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
1917 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1918 if n is None or n == default:
1919 return n
1920 if n.text is None:
1921 if default is not NO_DEFAULT:
1922 return default
1923 elif fatal:
1924 name = xpath if name is None else name
1925 raise ExtractorError('Could not find XML element\'s text %s' % name)
1926 else:
1927 return None
1928 return n.text
a41fb80c
S
1929
1930
1931def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1932 n = find_xpath_attr(node, xpath, key)
1933 if n is None:
1934 if default is not NO_DEFAULT:
1935 return default
1936 elif fatal:
1937 name = '%s[@%s]' % (xpath, key) if name is None else name
1938 raise ExtractorError('Could not find XML attribute %s' % name)
1939 else:
1940 return None
1941 return n.attrib[key]
bf0ff932
PH
1942
1943
9e6dd238 1944def get_element_by_id(id, html):
43e8fafd 1945 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 1946 return get_element_by_attribute('id', id, html)
43e8fafd 1947
12ea2f30 1948
84c237fb 1949def get_element_by_class(class_name, html):
2af12ad9
TC
1950 """Return the content of the first tag with the specified class in the passed HTML document"""
1951 retval = get_elements_by_class(class_name, html)
1952 return retval[0] if retval else None
1953
1954
1955def get_element_by_attribute(attribute, value, html, escape_value=True):
1956 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1957 return retval[0] if retval else None
1958
1959
1960def get_elements_by_class(class_name, html):
1961 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1962 return get_elements_by_attribute(
84c237fb
YCH
1963 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1964 html, escape_value=False)
1965
1966
2af12ad9 1967def get_elements_by_attribute(attribute, value, html, escape_value=True):
43e8fafd 1968 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 1969
84c237fb
YCH
1970 value = re.escape(value) if escape_value else value
1971
2af12ad9
TC
1972 retlist = []
1973 for m in re.finditer(r'''(?xs)
38285056 1974 <([a-zA-Z0-9:._-]+)
609ff8ca 1975 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056 1976 \s+%s=['"]?%s['"]?
609ff8ca 1977 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056
PH
1978 \s*>
1979 (?P<content>.*?)
1980 </\1>
2af12ad9
TC
1981 ''' % (re.escape(attribute), value), html):
1982 res = m.group('content')
38285056 1983
2af12ad9
TC
1984 if res.startswith('"') or res.startswith("'"):
1985 res = res[1:-1]
38285056 1986
2af12ad9 1987 retlist.append(unescapeHTML(res))
a921f407 1988
2af12ad9 1989 return retlist
a921f407 1990
c5229f39 1991
8bb56eee
BF
1992class HTMLAttributeParser(compat_HTMLParser):
1993 """Trivial HTML parser to gather the attributes for a single element"""
b6e0c7d2 1994
8bb56eee 1995 def __init__(self):
c5229f39 1996 self.attrs = {}
8bb56eee
BF
1997 compat_HTMLParser.__init__(self)
1998
1999 def handle_starttag(self, tag, attrs):
2000 self.attrs = dict(attrs)
2001
c5229f39 2002
8bb56eee
BF
2003def extract_attributes(html_element):
2004 """Given a string for an HTML element such as
2005 <el
2006 a="foo" B="bar" c="&98;az" d=boz
2007 empty= noval entity="&amp;"
2008 sq='"' dq="'"
2009 >
2010 Decode and return a dictionary of attributes.
2011 {
2012 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2013 'empty': '', 'noval': None, 'entity': '&',
2014 'sq': '"', 'dq': '\''
2015 }.
2016 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2017 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2018 """
2019 parser = HTMLAttributeParser()
b4a3d461
S
2020 try:
2021 parser.feed(html_element)
2022 parser.close()
2023 # Older Python may throw HTMLParseError in case of malformed HTML
2024 except compat_HTMLParseError:
2025 pass
8bb56eee 2026 return parser.attrs
9e6dd238 2027
c5229f39 2028
9e6dd238 2029def clean_html(html):
59ae15a5 2030 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
2031
2032 if html is None: # Convenience for sanitizing descriptions etc.
2033 return html
2034
59ae15a5
PH
2035 # Newline vs <br />
2036 html = html.replace('\n', ' ')
edd9221c
TF
2037 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2038 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
2039 # Strip html tags
2040 html = re.sub('<.*?>', '', html)
2041 # Replace html entities
2042 html = unescapeHTML(html)
7decf895 2043 return html.strip()
9e6dd238
FV
2044
2045
d77c3dfd 2046def sanitize_open(filename, open_mode):
59ae15a5
PH
2047 """Try to open the given filename, and slightly tweak it if this fails.
2048
2049 Attempts to open the given filename. If this fails, it tries to change
2050 the filename slightly, step by step, until it's either able to open it
2051 or it fails and raises a final exception, like the standard open()
2052 function.
2053
2054 It returns the tuple (stream, definitive_file_name).
2055 """
2056 try:
28e614de 2057 if filename == '-':
59ae15a5
PH
2058 if sys.platform == 'win32':
2059 import msvcrt
2060 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 2061 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
2062 stream = open(encodeFilename(filename), open_mode)
2063 return (stream, filename)
2064 except (IOError, OSError) as err:
f45c185f
PH
2065 if err.errno in (errno.EACCES,):
2066 raise
59ae15a5 2067
f45c185f 2068 # In case of error, try to remove win32 forbidden chars
d55de57b 2069 alt_filename = sanitize_path(filename)
f45c185f
PH
2070 if alt_filename == filename:
2071 raise
2072 else:
2073 # An exception here should be caught in the caller
d55de57b 2074 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 2075 return (stream, alt_filename)
d77c3dfd
FV
2076
2077
2078def timeconvert(timestr):
59ae15a5
PH
2079 """Convert RFC 2822 defined time string into system timestamp"""
2080 timestamp = None
2081 timetuple = email.utils.parsedate_tz(timestr)
2082 if timetuple is not None:
2083 timestamp = email.utils.mktime_tz(timetuple)
2084 return timestamp
1c469a94 2085
5f6a1245 2086
796173d0 2087def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
2088 """Sanitizes a string so it could be used as part of a filename.
2089 If restricted is set, use a stricter subset of allowed characters.
158af524
S
2090 Set is_id if this is not an arbitrary string, but an ID that should be kept
2091 if possible.
59ae15a5
PH
2092 """
2093 def replace_insane(char):
c587cbb7
AT
2094 if restricted and char in ACCENT_CHARS:
2095 return ACCENT_CHARS[char]
59ae15a5
PH
2096 if char == '?' or ord(char) < 32 or ord(char) == 127:
2097 return ''
2098 elif char == '"':
2099 return '' if restricted else '\''
2100 elif char == ':':
2101 return '_-' if restricted else ' -'
2102 elif char in '\\/|*<>':
2103 return '_'
627dcfff 2104 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
2105 return '_'
2106 if restricted and ord(char) > 127:
2107 return '_'
2108 return char
2109
2aeb06d6
PH
2110 # Handle timestamps
2111 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 2112 result = ''.join(map(replace_insane, s))
796173d0
PH
2113 if not is_id:
2114 while '__' in result:
2115 result = result.replace('__', '_')
2116 result = result.strip('_')
2117 # Common case of "Foreign band name - English song title"
2118 if restricted and result.startswith('-_'):
2119 result = result[2:]
5a42414b
PH
2120 if result.startswith('-'):
2121 result = '_' + result[len('-'):]
a7440261 2122 result = result.lstrip('.')
796173d0
PH
2123 if not result:
2124 result = '_'
59ae15a5 2125 return result
d77c3dfd 2126
5f6a1245 2127
c2934512 2128def sanitize_path(s, force=False):
a2aaf4db 2129 """Sanitizes and normalizes path on Windows"""
c2934512 2130 if sys.platform == 'win32':
c4218ac3 2131 force = False
c2934512 2132 drive_or_unc, _ = os.path.splitdrive(s)
2133 if sys.version_info < (2, 7) and not drive_or_unc:
2134 drive_or_unc, _ = os.path.splitunc(s)
2135 elif force:
2136 drive_or_unc = ''
2137 else:
a2aaf4db 2138 return s
c2934512 2139
be531ef1
S
2140 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2141 if drive_or_unc:
a2aaf4db
S
2142 norm_path.pop(0)
2143 sanitized_path = [
ec85ded8 2144 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
a2aaf4db 2145 for path_part in norm_path]
be531ef1
S
2146 if drive_or_unc:
2147 sanitized_path.insert(0, drive_or_unc + os.path.sep)
c4218ac3 2148 elif force and s[0] == os.path.sep:
2149 sanitized_path.insert(0, os.path.sep)
a2aaf4db
S
2150 return os.path.join(*sanitized_path)
2151
2152
17bcc626 2153def sanitize_url(url):
befa4708
S
2154 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2155 # the number of unwanted failures due to missing protocol
2156 if url.startswith('//'):
2157 return 'http:%s' % url
2158 # Fix some common typos seen so far
2159 COMMON_TYPOS = (
067aa17e 2160 # https://github.com/ytdl-org/youtube-dl/issues/15649
befa4708
S
2161 (r'^httpss://', r'https://'),
2162 # https://bx1.be/lives/direct-tv/
2163 (r'^rmtp([es]?)://', r'rtmp\1://'),
2164 )
2165 for mistake, fixup in COMMON_TYPOS:
2166 if re.match(mistake, url):
2167 return re.sub(mistake, fixup, url)
2168 return url
17bcc626
S
2169
2170
67dda517 2171def sanitized_Request(url, *args, **kwargs):
17bcc626 2172 return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
67dda517
S
2173
2174
51098426
S
2175def expand_path(s):
2176 """Expand shell variables and ~"""
2177 return os.path.expandvars(compat_expanduser(s))
2178
2179
d77c3dfd 2180def orderedSet(iterable):
59ae15a5
PH
2181 """ Remove all duplicates from the input iterable """
2182 res = []
2183 for el in iterable:
2184 if el not in res:
2185 res.append(el)
2186 return res
d77c3dfd 2187
912b38b4 2188
55b2f099 2189def _htmlentity_transform(entity_with_semicolon):
4e408e47 2190 """Transforms an HTML entity to a character."""
55b2f099
YCH
2191 entity = entity_with_semicolon[:-1]
2192
4e408e47
PH
2193 # Known non-numeric HTML entity
2194 if entity in compat_html_entities.name2codepoint:
2195 return compat_chr(compat_html_entities.name2codepoint[entity])
2196
55b2f099
YCH
2197 # TODO: HTML5 allows entities without a semicolon. For example,
2198 # '&Eacuteric' should be decoded as 'Éric'.
2199 if entity_with_semicolon in compat_html_entities_html5:
2200 return compat_html_entities_html5[entity_with_semicolon]
2201
91757b0f 2202 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
2203 if mobj is not None:
2204 numstr = mobj.group(1)
28e614de 2205 if numstr.startswith('x'):
4e408e47 2206 base = 16
28e614de 2207 numstr = '0%s' % numstr
4e408e47
PH
2208 else:
2209 base = 10
067aa17e 2210 # See https://github.com/ytdl-org/youtube-dl/issues/7518
7aefc49c
S
2211 try:
2212 return compat_chr(int(numstr, base))
2213 except ValueError:
2214 pass
4e408e47
PH
2215
2216 # Unknown entity in name, return its literal representation
7a3f0c00 2217 return '&%s;' % entity
4e408e47
PH
2218
2219
d77c3dfd 2220def unescapeHTML(s):
912b38b4
PH
2221 if s is None:
2222 return None
2223 assert type(s) == compat_str
d77c3dfd 2224
4e408e47 2225 return re.sub(
95f3f7c2 2226 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 2227
8bf48f23 2228
f5b1bca9 2229def process_communicate_or_kill(p, *args, **kwargs):
2230 try:
2231 return p.communicate(*args, **kwargs)
2232 except BaseException: # Including KeyboardInterrupt
2233 p.kill()
2234 p.wait()
2235 raise
2236
2237
aa49acd1
S
2238def get_subprocess_encoding():
2239 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2240 # For subprocess calls, encode with locale encoding
2241 # Refer to http://stackoverflow.com/a/9951851/35070
2242 encoding = preferredencoding()
2243 else:
2244 encoding = sys.getfilesystemencoding()
2245 if encoding is None:
2246 encoding = 'utf-8'
2247 return encoding
2248
2249
8bf48f23 2250def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
2251 """
2252 @param s The name of the file
2253 """
d77c3dfd 2254
8bf48f23 2255 assert type(s) == compat_str
d77c3dfd 2256
59ae15a5
PH
2257 # Python 3 has a Unicode API
2258 if sys.version_info >= (3, 0):
2259 return s
0f00efed 2260
aa49acd1
S
2261 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2262 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2263 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2264 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2265 return s
2266
8ee239e9
YCH
2267 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2268 if sys.platform.startswith('java'):
2269 return s
2270
aa49acd1
S
2271 return s.encode(get_subprocess_encoding(), 'ignore')
2272
2273
2274def decodeFilename(b, for_subprocess=False):
2275
2276 if sys.version_info >= (3, 0):
2277 return b
2278
2279 if not isinstance(b, bytes):
2280 return b
2281
2282 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 2283
f07b74fc
PH
2284
2285def encodeArgument(s):
2286 if not isinstance(s, compat_str):
2287 # Legacy code that uses byte strings
2288 # Uncomment the following line after fixing all post processors
7af808a5 2289 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
2290 s = s.decode('ascii')
2291 return encodeFilename(s, True)
2292
2293
aa49acd1
S
2294def decodeArgument(b):
2295 return decodeFilename(b, True)
2296
2297
8271226a
PH
2298def decodeOption(optval):
2299 if optval is None:
2300 return optval
2301 if isinstance(optval, bytes):
2302 optval = optval.decode(preferredencoding())
2303
2304 assert isinstance(optval, compat_str)
2305 return optval
1c256f70 2306
5f6a1245 2307
dbbbe555 2308def formatSeconds(secs, delim=':'):
4539dd30 2309 if secs > 3600:
dbbbe555 2310 return '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
4539dd30 2311 elif secs > 60:
dbbbe555 2312 return '%d%s%02d' % (secs // 60, delim, secs % 60)
4539dd30
PH
2313 else:
2314 return '%d' % secs
2315
a0ddb8a2 2316
be4a824d
PH
2317def make_HTTPS_handler(params, **kwargs):
2318 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 2319 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 2320 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 2321 if opts_no_check_certificate:
be5f2c19 2322 context.check_hostname = False
0db261ba 2323 context.verify_mode = ssl.CERT_NONE
a2366922 2324 try:
be4a824d 2325 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
2326 except TypeError:
2327 # Python 2.7.8
2328 # (create_default_context present but HTTPSHandler has no context=)
2329 pass
2330
2331 if sys.version_info < (3, 2):
d7932313 2332 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 2333 else: # Python < 3.4
d7932313 2334 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 2335 context.verify_mode = (ssl.CERT_NONE
dca08720 2336 if opts_no_check_certificate
ea6d901e 2337 else ssl.CERT_REQUIRED)
303b479e 2338 context.set_default_verify_paths()
be4a824d 2339 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 2340
732ea2f0 2341
08f2a92c
JMF
2342def bug_reports_message():
2343 if ytdl_is_updateable():
7a5c1cfe 2344 update_cmd = 'type yt-dlp -U to update'
08f2a92c 2345 else:
7a5c1cfe
P
2346 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
2347 msg = '; please report this issue on https://github.com/yt-dlp/yt-dlp .'
08f2a92c 2348 msg += ' Make sure you are using the latest version; %s.' % update_cmd
7a5c1cfe 2349 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
08f2a92c
JMF
2350 return msg
2351
2352
bf5b9d85
PM
2353class YoutubeDLError(Exception):
2354 """Base exception for YoutubeDL errors."""
2355 pass
2356
2357
2358class ExtractorError(YoutubeDLError):
1c256f70 2359 """Error during info extraction."""
5f6a1245 2360
d11271dd 2361 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238 2362 """ tb, if given, is the original traceback (so that it can be printed out).
7a5c1cfe 2363 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
9a82b238
PH
2364 """
2365
2366 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2367 expected = True
d11271dd
PH
2368 if video_id is not None:
2369 msg = video_id + ': ' + msg
410f3e73 2370 if cause:
28e614de 2371 msg += ' (caused by %r)' % cause
9a82b238 2372 if not expected:
08f2a92c 2373 msg += bug_reports_message()
1c256f70 2374 super(ExtractorError, self).__init__(msg)
d5979c5d 2375
1c256f70 2376 self.traceback = tb
8cc83b8d 2377 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 2378 self.cause = cause
d11271dd 2379 self.video_id = video_id
1c256f70 2380
01951dda
PH
2381 def format_traceback(self):
2382 if self.traceback is None:
2383 return None
28e614de 2384 return ''.join(traceback.format_tb(self.traceback))
01951dda 2385
1c256f70 2386
416c7fcb
PH
2387class UnsupportedError(ExtractorError):
2388 def __init__(self, url):
2389 super(UnsupportedError, self).__init__(
2390 'Unsupported URL: %s' % url, expected=True)
2391 self.url = url
2392
2393
55b3e45b
JMF
2394class RegexNotFoundError(ExtractorError):
2395 """Error when a regex didn't match"""
2396 pass
2397
2398
773f291d
S
2399class GeoRestrictedError(ExtractorError):
2400 """Geographic restriction Error exception.
2401
2402 This exception may be thrown when a video is not available from your
2403 geographic location due to geographic restrictions imposed by a website.
2404 """
b6e0c7d2 2405
773f291d
S
2406 def __init__(self, msg, countries=None):
2407 super(GeoRestrictedError, self).__init__(msg, expected=True)
2408 self.msg = msg
2409 self.countries = countries
2410
2411
bf5b9d85 2412class DownloadError(YoutubeDLError):
59ae15a5 2413 """Download Error exception.
d77c3dfd 2414
59ae15a5
PH
2415 This exception may be thrown by FileDownloader objects if they are not
2416 configured to continue on errors. They will contain the appropriate
2417 error message.
2418 """
5f6a1245 2419
8cc83b8d
FV
2420 def __init__(self, msg, exc_info=None):
2421 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2422 super(DownloadError, self).__init__(msg)
2423 self.exc_info = exc_info
d77c3dfd
FV
2424
2425
bf5b9d85 2426class SameFileError(YoutubeDLError):
59ae15a5 2427 """Same File exception.
d77c3dfd 2428
59ae15a5
PH
2429 This exception will be thrown by FileDownloader objects if they detect
2430 multiple files would have to be downloaded to the same file on disk.
2431 """
2432 pass
d77c3dfd
FV
2433
2434
bf5b9d85 2435class PostProcessingError(YoutubeDLError):
59ae15a5 2436 """Post Processing exception.
d77c3dfd 2437
59ae15a5
PH
2438 This exception may be raised by PostProcessor's .run() method to
2439 indicate an error in the postprocessing task.
2440 """
5f6a1245 2441
7851b379 2442 def __init__(self, msg):
bf5b9d85 2443 super(PostProcessingError, self).__init__(msg)
7851b379 2444 self.msg = msg
d77c3dfd 2445
5f6a1245 2446
8b0d7497 2447class ExistingVideoReached(YoutubeDLError):
2448 """ --max-downloads limit has been reached. """
2449 pass
2450
2451
2452class RejectedVideoReached(YoutubeDLError):
2453 """ --max-downloads limit has been reached. """
2454 pass
2455
2456
bf5b9d85 2457class MaxDownloadsReached(YoutubeDLError):
59ae15a5
PH
2458 """ --max-downloads limit has been reached. """
2459 pass
d77c3dfd
FV
2460
2461
bf5b9d85 2462class UnavailableVideoError(YoutubeDLError):
59ae15a5 2463 """Unavailable Format exception.
d77c3dfd 2464
59ae15a5
PH
2465 This exception will be thrown when a video is requested
2466 in a format that is not available for that video.
2467 """
2468 pass
d77c3dfd
FV
2469
2470
bf5b9d85 2471class ContentTooShortError(YoutubeDLError):
59ae15a5 2472 """Content Too Short exception.
d77c3dfd 2473
59ae15a5
PH
2474 This exception may be raised by FileDownloader objects when a file they
2475 download is too small for what the server announced first, indicating
2476 the connection was probably interrupted.
2477 """
d77c3dfd 2478
59ae15a5 2479 def __init__(self, downloaded, expected):
bf5b9d85
PM
2480 super(ContentTooShortError, self).__init__(
2481 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2482 )
2c7ed247 2483 # Both in bytes
59ae15a5
PH
2484 self.downloaded = downloaded
2485 self.expected = expected
d77c3dfd 2486
5f6a1245 2487
bf5b9d85 2488class XAttrMetadataError(YoutubeDLError):
efa97bdc
YCH
2489 def __init__(self, code=None, msg='Unknown error'):
2490 super(XAttrMetadataError, self).__init__(msg)
2491 self.code = code
bd264412 2492 self.msg = msg
efa97bdc
YCH
2493
2494 # Parsing code and msg
3089bc74 2495 if (self.code in (errno.ENOSPC, errno.EDQUOT)
a0566bbf 2496 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
efa97bdc
YCH
2497 self.reason = 'NO_SPACE'
2498 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2499 self.reason = 'VALUE_TOO_LONG'
2500 else:
2501 self.reason = 'NOT_SUPPORTED'
2502
2503
bf5b9d85 2504class XAttrUnavailableError(YoutubeDLError):
efa97bdc
YCH
2505 pass
2506
2507
c5a59d93 2508def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
2509 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2510 # expected HTTP responses to meet HTTP/1.0 or later (see also
067aa17e 2511 # https://github.com/ytdl-org/youtube-dl/issues/6727)
e5e78797 2512 if sys.version_info < (3, 0):
65220c3b
S
2513 kwargs['strict'] = True
2514 hc = http_class(*args, **compat_kwargs(kwargs))
be4a824d 2515 source_address = ydl_handler._params.get('source_address')
8959018a 2516
be4a824d 2517 if source_address is not None:
8959018a
AU
2518 # This is to workaround _create_connection() from socket where it will try all
2519 # address data from getaddrinfo() including IPv6. This filters the result from
2520 # getaddrinfo() based on the source_address value.
2521 # This is based on the cpython socket.create_connection() function.
2522 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2523 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2524 host, port = address
2525 err = None
2526 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
9e21e6d9
S
2527 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2528 ip_addrs = [addr for addr in addrs if addr[0] == af]
2529 if addrs and not ip_addrs:
2530 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2531 raise socket.error(
2532 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2533 % (ip_version, source_address[0]))
8959018a
AU
2534 for res in ip_addrs:
2535 af, socktype, proto, canonname, sa = res
2536 sock = None
2537 try:
2538 sock = socket.socket(af, socktype, proto)
2539 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2540 sock.settimeout(timeout)
2541 sock.bind(source_address)
2542 sock.connect(sa)
2543 err = None # Explicitly break reference cycle
2544 return sock
2545 except socket.error as _:
2546 err = _
2547 if sock is not None:
2548 sock.close()
2549 if err is not None:
2550 raise err
2551 else:
9e21e6d9
S
2552 raise socket.error('getaddrinfo returns an empty list')
2553 if hasattr(hc, '_create_connection'):
2554 hc._create_connection = _create_connection
be4a824d
PH
2555 sa = (source_address, 0)
2556 if hasattr(hc, 'source_address'): # Python 2.7+
2557 hc.source_address = sa
2558 else: # Python 2.6
2559 def _hc_connect(self, *args, **kwargs):
9e21e6d9 2560 sock = _create_connection(
be4a824d
PH
2561 (self.host, self.port), self.timeout, sa)
2562 if is_https:
d7932313
PH
2563 self.sock = ssl.wrap_socket(
2564 sock, self.key_file, self.cert_file,
2565 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
2566 else:
2567 self.sock = sock
2568 hc.connect = functools.partial(_hc_connect, hc)
2569
2570 return hc
2571
2572
87f0e62d 2573def handle_youtubedl_headers(headers):
992fc9d6
YCH
2574 filtered_headers = headers
2575
2576 if 'Youtubedl-no-compression' in filtered_headers:
2577 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 2578 del filtered_headers['Youtubedl-no-compression']
87f0e62d 2579
992fc9d6 2580 return filtered_headers
87f0e62d
YCH
2581
2582
acebc9cd 2583class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
2584 """Handler for HTTP requests and responses.
2585
2586 This class, when installed with an OpenerDirector, automatically adds
2587 the standard headers to every HTTP request and handles gzipped and
2588 deflated responses from web servers. If compression is to be avoided in
2589 a particular request, the original request in the program code only has
0424ec30 2590 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
2591 removed before making the real request.
2592
2593 Part of this code was copied from:
2594
2595 http://techknack.net/python-urllib2-handlers/
2596
2597 Andrew Rowls, the author of that code, agreed to release it to the
2598 public domain.
2599 """
2600
be4a824d
PH
2601 def __init__(self, params, *args, **kwargs):
2602 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2603 self._params = params
2604
2605 def http_open(self, req):
71aff188
YCH
2606 conn_class = compat_http_client.HTTPConnection
2607
2608 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2609 if socks_proxy:
2610 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2611 del req.headers['Ytdl-socks-proxy']
2612
be4a824d 2613 return self.do_open(functools.partial(
71aff188 2614 _create_http_connection, self, conn_class, False),
be4a824d
PH
2615 req)
2616
59ae15a5
PH
2617 @staticmethod
2618 def deflate(data):
fc2119f2 2619 if not data:
2620 return data
59ae15a5
PH
2621 try:
2622 return zlib.decompress(data, -zlib.MAX_WBITS)
2623 except zlib.error:
2624 return zlib.decompress(data)
2625
acebc9cd 2626 def http_request(self, req):
51f267d9
S
2627 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2628 # always respected by websites, some tend to give out URLs with non percent-encoded
2629 # non-ASCII characters (see telemb.py, ard.py [#3412])
2630 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2631 # To work around aforementioned issue we will replace request's original URL with
2632 # percent-encoded one
2633 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2634 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2635 url = req.get_full_url()
2636 url_escaped = escape_url(url)
2637
2638 # Substitute URL if any change after escaping
2639 if url != url_escaped:
15d260eb 2640 req = update_Request(req, url=url_escaped)
51f267d9 2641
33ac271b 2642 for h, v in std_headers.items():
3d5f7a39
JK
2643 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2644 # The dict keys are capitalized because of this bug by urllib
2645 if h.capitalize() not in req.headers:
33ac271b 2646 req.add_header(h, v)
87f0e62d
YCH
2647
2648 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
2649
2650 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2651 # Python 2.6 is brain-dead when it comes to fragments
2652 req._Request__original = req._Request__original.partition('#')[0]
2653 req._Request__r_type = req._Request__r_type.partition('#')[0]
2654
59ae15a5
PH
2655 return req
2656
acebc9cd 2657 def http_response(self, req, resp):
59ae15a5
PH
2658 old_resp = resp
2659 # gzip
2660 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
2661 content = resp.read()
2662 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2663 try:
2664 uncompressed = io.BytesIO(gz.read())
2665 except IOError as original_ioerror:
2666 # There may be junk add the end of the file
2667 # See http://stackoverflow.com/q/4928560/35070 for details
2668 for i in range(1, 1024):
2669 try:
2670 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2671 uncompressed = io.BytesIO(gz.read())
2672 except IOError:
2673 continue
2674 break
2675 else:
2676 raise original_ioerror
b407d853 2677 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2678 resp.msg = old_resp.msg
c047270c 2679 del resp.headers['Content-encoding']
59ae15a5
PH
2680 # deflate
2681 if resp.headers.get('Content-encoding', '') == 'deflate':
2682 gz = io.BytesIO(self.deflate(resp.read()))
b407d853 2683 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2684 resp.msg = old_resp.msg
c047270c 2685 del resp.headers['Content-encoding']
ad729172 2686 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
067aa17e 2687 # https://github.com/ytdl-org/youtube-dl/issues/6457).
5a4d9ddb
S
2688 if 300 <= resp.code < 400:
2689 location = resp.headers.get('Location')
2690 if location:
2691 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2692 if sys.version_info >= (3, 0):
2693 location = location.encode('iso-8859-1').decode('utf-8')
0ea59007
YCH
2694 else:
2695 location = location.decode('utf-8')
5a4d9ddb
S
2696 location_escaped = escape_url(location)
2697 if location != location_escaped:
2698 del resp.headers['Location']
9a4aec8b
YCH
2699 if sys.version_info < (3, 0):
2700 location_escaped = location_escaped.encode('utf-8')
5a4d9ddb 2701 resp.headers['Location'] = location_escaped
59ae15a5 2702 return resp
0f8d03f8 2703
acebc9cd
PH
2704 https_request = http_request
2705 https_response = http_response
bf50b038 2706
5de90176 2707
71aff188
YCH
2708def make_socks_conn_class(base_class, socks_proxy):
2709 assert issubclass(base_class, (
2710 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2711
2712 url_components = compat_urlparse.urlparse(socks_proxy)
2713 if url_components.scheme.lower() == 'socks5':
2714 socks_type = ProxyType.SOCKS5
2715 elif url_components.scheme.lower() in ('socks', 'socks4'):
2716 socks_type = ProxyType.SOCKS4
51fb4995
YCH
2717 elif url_components.scheme.lower() == 'socks4a':
2718 socks_type = ProxyType.SOCKS4A
71aff188 2719
cdd94c2e
YCH
2720 def unquote_if_non_empty(s):
2721 if not s:
2722 return s
2723 return compat_urllib_parse_unquote_plus(s)
2724
71aff188
YCH
2725 proxy_args = (
2726 socks_type,
2727 url_components.hostname, url_components.port or 1080,
2728 True, # Remote DNS
cdd94c2e
YCH
2729 unquote_if_non_empty(url_components.username),
2730 unquote_if_non_empty(url_components.password),
71aff188
YCH
2731 )
2732
2733 class SocksConnection(base_class):
2734 def connect(self):
2735 self.sock = sockssocket()
2736 self.sock.setproxy(*proxy_args)
2737 if type(self.timeout) in (int, float):
2738 self.sock.settimeout(self.timeout)
2739 self.sock.connect((self.host, self.port))
2740
2741 if isinstance(self, compat_http_client.HTTPSConnection):
2742 if hasattr(self, '_context'): # Python > 2.6
2743 self.sock = self._context.wrap_socket(
2744 self.sock, server_hostname=self.host)
2745 else:
2746 self.sock = ssl.wrap_socket(self.sock)
2747
2748 return SocksConnection
2749
2750
be4a824d
PH
2751class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2752 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2753 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2754 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2755 self._params = params
2756
2757 def https_open(self, req):
4f264c02 2758 kwargs = {}
71aff188
YCH
2759 conn_class = self._https_conn_class
2760
4f264c02
JMF
2761 if hasattr(self, '_context'): # python > 2.6
2762 kwargs['context'] = self._context
2763 if hasattr(self, '_check_hostname'): # python 3.x
2764 kwargs['check_hostname'] = self._check_hostname
71aff188
YCH
2765
2766 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2767 if socks_proxy:
2768 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2769 del req.headers['Ytdl-socks-proxy']
2770
be4a824d 2771 return self.do_open(functools.partial(
71aff188 2772 _create_http_connection, self, conn_class, True),
4f264c02 2773 req, **kwargs)
be4a824d
PH
2774
2775
1bab3437 2776class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
f1a8511f
S
2777 """
2778 See [1] for cookie file format.
2779
2780 1. https://curl.haxx.se/docs/http-cookies.html
2781 """
e7e62441 2782 _HTTPONLY_PREFIX = '#HttpOnly_'
c380cc28
S
2783 _ENTRY_LEN = 7
2784 _HEADER = '''# Netscape HTTP Cookie File
7a5c1cfe 2785# This file is generated by yt-dlp. Do not edit.
c380cc28
S
2786
2787'''
2788 _CookieFileEntry = collections.namedtuple(
2789 'CookieFileEntry',
2790 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
e7e62441 2791
1bab3437 2792 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
c380cc28
S
2793 """
2794 Save cookies to a file.
2795
2796 Most of the code is taken from CPython 3.8 and slightly adapted
2797 to support cookie files with UTF-8 in both python 2 and 3.
2798 """
2799 if filename is None:
2800 if self.filename is not None:
2801 filename = self.filename
2802 else:
2803 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2804
1bab3437
S
2805 # Store session cookies with `expires` set to 0 instead of an empty
2806 # string
2807 for cookie in self:
2808 if cookie.expires is None:
2809 cookie.expires = 0
c380cc28
S
2810
2811 with io.open(filename, 'w', encoding='utf-8') as f:
2812 f.write(self._HEADER)
2813 now = time.time()
2814 for cookie in self:
2815 if not ignore_discard and cookie.discard:
2816 continue
2817 if not ignore_expires and cookie.is_expired(now):
2818 continue
2819 if cookie.secure:
2820 secure = 'TRUE'
2821 else:
2822 secure = 'FALSE'
2823 if cookie.domain.startswith('.'):
2824 initial_dot = 'TRUE'
2825 else:
2826 initial_dot = 'FALSE'
2827 if cookie.expires is not None:
2828 expires = compat_str(cookie.expires)
2829 else:
2830 expires = ''
2831 if cookie.value is None:
2832 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2833 # with no name, whereas http.cookiejar regards it as a
2834 # cookie with no value.
2835 name = ''
2836 value = cookie.name
2837 else:
2838 name = cookie.name
2839 value = cookie.value
2840 f.write(
2841 '\t'.join([cookie.domain, initial_dot, cookie.path,
2842 secure, expires, name, value]) + '\n')
1bab3437
S
2843
2844 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
e7e62441 2845 """Load cookies from a file."""
2846 if filename is None:
2847 if self.filename is not None:
2848 filename = self.filename
2849 else:
2850 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2851
c380cc28
S
2852 def prepare_line(line):
2853 if line.startswith(self._HTTPONLY_PREFIX):
2854 line = line[len(self._HTTPONLY_PREFIX):]
2855 # comments and empty lines are fine
2856 if line.startswith('#') or not line.strip():
2857 return line
2858 cookie_list = line.split('\t')
2859 if len(cookie_list) != self._ENTRY_LEN:
2860 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2861 cookie = self._CookieFileEntry(*cookie_list)
2862 if cookie.expires_at and not cookie.expires_at.isdigit():
2863 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2864 return line
2865
e7e62441 2866 cf = io.StringIO()
c380cc28 2867 with io.open(filename, encoding='utf-8') as f:
e7e62441 2868 for line in f:
c380cc28
S
2869 try:
2870 cf.write(prepare_line(line))
2871 except compat_cookiejar.LoadError as e:
2872 write_string(
2873 'WARNING: skipping cookie file entry due to %s: %r\n'
2874 % (e, line), sys.stderr)
2875 continue
e7e62441 2876 cf.seek(0)
2877 self._really_load(cf, filename, ignore_discard, ignore_expires)
1bab3437
S
2878 # Session cookies are denoted by either `expires` field set to
2879 # an empty string or 0. MozillaCookieJar only recognizes the former
2880 # (see [1]). So we need force the latter to be recognized as session
2881 # cookies on our own.
2882 # Session cookies may be important for cookies-based authentication,
2883 # e.g. usually, when user does not check 'Remember me' check box while
2884 # logging in on a site, some important cookies are stored as session
2885 # cookies so that not recognizing them will result in failed login.
2886 # 1. https://bugs.python.org/issue17164
2887 for cookie in self:
2888 # Treat `expires=0` cookies as session cookies
2889 if cookie.expires == 0:
2890 cookie.expires = None
2891 cookie.discard = True
2892
2893
a6420bf5
S
2894class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2895 def __init__(self, cookiejar=None):
2896 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2897
2898 def http_response(self, request, response):
2899 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2900 # characters in Set-Cookie HTTP header of last response (see
067aa17e 2901 # https://github.com/ytdl-org/youtube-dl/issues/6769).
a6420bf5
S
2902 # In order to at least prevent crashing we will percent encode Set-Cookie
2903 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
2904 # if sys.version_info < (3, 0) and response.headers:
2905 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2906 # set_cookie = response.headers.get(set_cookie_header)
2907 # if set_cookie:
2908 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2909 # if set_cookie != set_cookie_escaped:
2910 # del response.headers[set_cookie_header]
2911 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
2912 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2913
2914 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2915 https_response = http_response
2916
2917
fca6dba8
S
2918class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2919 if sys.version_info[0] < 3:
2920 def redirect_request(self, req, fp, code, msg, headers, newurl):
2921 # On python 2 urlh.geturl() may sometimes return redirect URL
2922 # as byte string instead of unicode. This workaround allows
2923 # to force it always return unicode.
2924 return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2925
2926
46f59e89
S
2927def extract_timezone(date_str):
2928 m = re.search(
2929 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2930 date_str)
2931 if not m:
2932 timezone = datetime.timedelta()
2933 else:
2934 date_str = date_str[:-len(m.group('tz'))]
2935 if not m.group('sign'):
2936 timezone = datetime.timedelta()
2937 else:
2938 sign = 1 if m.group('sign') == '+' else -1
2939 timezone = datetime.timedelta(
2940 hours=sign * int(m.group('hours')),
2941 minutes=sign * int(m.group('minutes')))
2942 return timezone, date_str
2943
2944
08b38d54 2945def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
2946 """ Return a UNIX timestamp from the given date """
2947
2948 if date_str is None:
2949 return None
2950
52c3a6e4
S
2951 date_str = re.sub(r'\.[0-9]+', '', date_str)
2952
08b38d54 2953 if timezone is None:
46f59e89
S
2954 timezone, date_str = extract_timezone(date_str)
2955
52c3a6e4
S
2956 try:
2957 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2958 dt = datetime.datetime.strptime(date_str, date_format) - timezone
2959 return calendar.timegm(dt.timetuple())
2960 except ValueError:
2961 pass
912b38b4
PH
2962
2963
46f59e89
S
2964def date_formats(day_first=True):
2965 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2966
2967
42bdd9d0 2968def unified_strdate(date_str, day_first=True):
bf50b038 2969 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
2970
2971 if date_str is None:
2972 return None
bf50b038 2973 upload_date = None
5f6a1245 2974 # Replace commas
026fcc04 2975 date_str = date_str.replace(',', ' ')
42bdd9d0 2976 # Remove AM/PM + timezone
9bb8e0a3 2977 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89 2978 _, date_str = extract_timezone(date_str)
42bdd9d0 2979
46f59e89 2980 for expression in date_formats(day_first):
bf50b038
JMF
2981 try:
2982 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 2983 except ValueError:
bf50b038 2984 pass
42393ce2
PH
2985 if upload_date is None:
2986 timetuple = email.utils.parsedate_tz(date_str)
2987 if timetuple:
c6b9cf05
S
2988 try:
2989 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2990 except ValueError:
2991 pass
6a750402
JMF
2992 if upload_date is not None:
2993 return compat_str(upload_date)
bf50b038 2994
5f6a1245 2995
46f59e89
S
2996def unified_timestamp(date_str, day_first=True):
2997 if date_str is None:
2998 return None
2999
2ae2ffda 3000 date_str = re.sub(r'[,|]', '', date_str)
46f59e89 3001
7dc2a74e 3002 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89
S
3003 timezone, date_str = extract_timezone(date_str)
3004
3005 # Remove AM/PM + timezone
3006 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3007
deef3195
S
3008 # Remove unrecognized timezones from ISO 8601 alike timestamps
3009 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3010 if m:
3011 date_str = date_str[:-len(m.group('tz'))]
3012
f226880c
PH
3013 # Python only supports microseconds, so remove nanoseconds
3014 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3015 if m:
3016 date_str = m.group(1)
3017
46f59e89
S
3018 for expression in date_formats(day_first):
3019 try:
7dc2a74e 3020 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
46f59e89
S
3021 return calendar.timegm(dt.timetuple())
3022 except ValueError:
3023 pass
3024 timetuple = email.utils.parsedate_tz(date_str)
3025 if timetuple:
7dc2a74e 3026 return calendar.timegm(timetuple) + pm_delta * 3600
46f59e89
S
3027
3028
28e614de 3029def determine_ext(url, default_ext='unknown_video'):
85750f89 3030 if url is None or '.' not in url:
f4776371 3031 return default_ext
9cb9a5df 3032 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
3033 if re.match(r'^[A-Za-z0-9]+$', guess):
3034 return guess
a7aaa398
S
3035 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3036 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 3037 return guess.rstrip('/')
73e79f2a 3038 else:
cbdbb766 3039 return default_ext
73e79f2a 3040
5f6a1245 3041
824fa511
S
3042def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3043 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
d4051a8e 3044
5f6a1245 3045
bd558525 3046def date_from_str(date_str):
37254abc
JMF
3047 """
3048 Return a datetime object from a string in the format YYYYMMDD or
3049 (now|today)[+-][0-9](day|week|month|year)(s)?"""
3050 today = datetime.date.today()
f8795e10 3051 if date_str in ('now', 'today'):
37254abc 3052 return today
f8795e10
PH
3053 if date_str == 'yesterday':
3054 return today - datetime.timedelta(days=1)
ec85ded8 3055 match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
37254abc
JMF
3056 if match is not None:
3057 sign = match.group('sign')
3058 time = int(match.group('time'))
3059 if sign == '-':
3060 time = -time
3061 unit = match.group('unit')
dfb1b146 3062 # A bad approximation?
37254abc
JMF
3063 if unit == 'month':
3064 unit = 'day'
3065 time *= 30
3066 elif unit == 'year':
3067 unit = 'day'
3068 time *= 365
3069 unit += 's'
3070 delta = datetime.timedelta(**{unit: time})
3071 return today + delta
611c1dd9 3072 return datetime.datetime.strptime(date_str, '%Y%m%d').date()
5f6a1245
JW
3073
3074
e63fc1be 3075def hyphenate_date(date_str):
3076 """
3077 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3078 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3079 if match is not None:
3080 return '-'.join(match.groups())
3081 else:
3082 return date_str
3083
5f6a1245 3084
bd558525
JMF
3085class DateRange(object):
3086 """Represents a time interval between two dates"""
5f6a1245 3087
bd558525
JMF
3088 def __init__(self, start=None, end=None):
3089 """start and end must be strings in the format accepted by date"""
3090 if start is not None:
3091 self.start = date_from_str(start)
3092 else:
3093 self.start = datetime.datetime.min.date()
3094 if end is not None:
3095 self.end = date_from_str(end)
3096 else:
3097 self.end = datetime.datetime.max.date()
37254abc 3098 if self.start > self.end:
bd558525 3099 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 3100
bd558525
JMF
3101 @classmethod
3102 def day(cls, day):
3103 """Returns a range that only contains the given day"""
5f6a1245
JW
3104 return cls(day, day)
3105
bd558525
JMF
3106 def __contains__(self, date):
3107 """Check if the date is in the range"""
37254abc
JMF
3108 if not isinstance(date, datetime.date):
3109 date = date_from_str(date)
3110 return self.start <= date <= self.end
5f6a1245 3111
bd558525 3112 def __str__(self):
5f6a1245 3113 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
3114
3115
3116def platform_name():
3117 """ Returns the platform name as a compat_str """
3118 res = platform.platform()
3119 if isinstance(res, bytes):
3120 res = res.decode(preferredencoding())
3121
3122 assert isinstance(res, compat_str)
3123 return res
c257baff
PH
3124
3125
b58ddb32
PH
3126def _windows_write_string(s, out):
3127 """ Returns True if the string was written using special methods,
3128 False if it has yet to be written out."""
3129 # Adapted from http://stackoverflow.com/a/3259271/35070
3130
3131 import ctypes
3132 import ctypes.wintypes
3133
3134 WIN_OUTPUT_IDS = {
3135 1: -11,
3136 2: -12,
3137 }
3138
a383a98a
PH
3139 try:
3140 fileno = out.fileno()
3141 except AttributeError:
3142 # If the output stream doesn't have a fileno, it's virtual
3143 return False
aa42e873
PH
3144 except io.UnsupportedOperation:
3145 # Some strange Windows pseudo files?
3146 return False
b58ddb32
PH
3147 if fileno not in WIN_OUTPUT_IDS:
3148 return False
3149
d7cd9a9e 3150 GetStdHandle = compat_ctypes_WINFUNCTYPE(
b58ddb32 3151 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
d7cd9a9e 3152 ('GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
3153 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3154
d7cd9a9e 3155 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3156 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3157 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
d7cd9a9e 3158 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
3159 written = ctypes.wintypes.DWORD(0)
3160
d7cd9a9e 3161 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
3162 FILE_TYPE_CHAR = 0x0002
3163 FILE_TYPE_REMOTE = 0x8000
d7cd9a9e 3164 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3165 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3166 ctypes.POINTER(ctypes.wintypes.DWORD))(
d7cd9a9e 3167 ('GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
3168 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3169
3170 def not_a_console(handle):
3171 if handle == INVALID_HANDLE_VALUE or handle is None:
3172 return True
3089bc74
S
3173 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3174 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
3175
3176 if not_a_console(h):
3177 return False
3178
d1b9c912
PH
3179 def next_nonbmp_pos(s):
3180 try:
3181 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3182 except StopIteration:
3183 return len(s)
3184
3185 while s:
3186 count = min(next_nonbmp_pos(s), 1024)
3187
b58ddb32 3188 ret = WriteConsoleW(
d1b9c912 3189 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
3190 if ret == 0:
3191 raise OSError('Failed to write string')
d1b9c912
PH
3192 if not count: # We just wrote a non-BMP character
3193 assert written.value == 2
3194 s = s[1:]
3195 else:
3196 assert written.value > 0
3197 s = s[written.value:]
b58ddb32
PH
3198 return True
3199
3200
734f90bb 3201def write_string(s, out=None, encoding=None):
7459e3a2
PH
3202 if out is None:
3203 out = sys.stderr
8bf48f23 3204 assert type(s) == compat_str
7459e3a2 3205
b58ddb32
PH
3206 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3207 if _windows_write_string(s, out):
3208 return
3209
3089bc74
S
3210 if ('b' in getattr(out, 'mode', '')
3211 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
3212 byt = s.encode(encoding or preferredencoding(), 'ignore')
3213 out.write(byt)
3214 elif hasattr(out, 'buffer'):
3215 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3216 byt = s.encode(enc, 'ignore')
3217 out.buffer.write(byt)
3218 else:
8bf48f23 3219 out.write(s)
7459e3a2
PH
3220 out.flush()
3221
3222
48ea9cea
PH
3223def bytes_to_intlist(bs):
3224 if not bs:
3225 return []
3226 if isinstance(bs[0], int): # Python 3
3227 return list(bs)
3228 else:
3229 return [ord(c) for c in bs]
3230
c257baff 3231
cba892fa 3232def intlist_to_bytes(xs):
3233 if not xs:
3234 return b''
edaa23f8 3235 return compat_struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
3236
3237
c1c9a79c
PH
3238# Cross-platform file locking
3239if sys.platform == 'win32':
3240 import ctypes.wintypes
3241 import msvcrt
3242
3243 class OVERLAPPED(ctypes.Structure):
3244 _fields_ = [
3245 ('Internal', ctypes.wintypes.LPVOID),
3246 ('InternalHigh', ctypes.wintypes.LPVOID),
3247 ('Offset', ctypes.wintypes.DWORD),
3248 ('OffsetHigh', ctypes.wintypes.DWORD),
3249 ('hEvent', ctypes.wintypes.HANDLE),
3250 ]
3251
3252 kernel32 = ctypes.windll.kernel32
3253 LockFileEx = kernel32.LockFileEx
3254 LockFileEx.argtypes = [
3255 ctypes.wintypes.HANDLE, # hFile
3256 ctypes.wintypes.DWORD, # dwFlags
3257 ctypes.wintypes.DWORD, # dwReserved
3258 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3259 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3260 ctypes.POINTER(OVERLAPPED) # Overlapped
3261 ]
3262 LockFileEx.restype = ctypes.wintypes.BOOL
3263 UnlockFileEx = kernel32.UnlockFileEx
3264 UnlockFileEx.argtypes = [
3265 ctypes.wintypes.HANDLE, # hFile
3266 ctypes.wintypes.DWORD, # dwReserved
3267 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3268 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3269 ctypes.POINTER(OVERLAPPED) # Overlapped
3270 ]
3271 UnlockFileEx.restype = ctypes.wintypes.BOOL
3272 whole_low = 0xffffffff
3273 whole_high = 0x7fffffff
3274
3275 def _lock_file(f, exclusive):
3276 overlapped = OVERLAPPED()
3277 overlapped.Offset = 0
3278 overlapped.OffsetHigh = 0
3279 overlapped.hEvent = 0
3280 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3281 handle = msvcrt.get_osfhandle(f.fileno())
3282 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3283 whole_low, whole_high, f._lock_file_overlapped_p):
3284 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3285
3286 def _unlock_file(f):
3287 assert f._lock_file_overlapped_p
3288 handle = msvcrt.get_osfhandle(f.fileno())
3289 if not UnlockFileEx(handle, 0,
3290 whole_low, whole_high, f._lock_file_overlapped_p):
3291 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3292
3293else:
399a76e6
YCH
3294 # Some platforms, such as Jython, is missing fcntl
3295 try:
3296 import fcntl
c1c9a79c 3297
399a76e6
YCH
3298 def _lock_file(f, exclusive):
3299 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 3300
399a76e6
YCH
3301 def _unlock_file(f):
3302 fcntl.flock(f, fcntl.LOCK_UN)
3303 except ImportError:
3304 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3305
3306 def _lock_file(f, exclusive):
3307 raise IOError(UNSUPPORTED_MSG)
3308
3309 def _unlock_file(f):
3310 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
3311
3312
3313class locked_file(object):
3314 def __init__(self, filename, mode, encoding=None):
3315 assert mode in ['r', 'a', 'w']
3316 self.f = io.open(filename, mode, encoding=encoding)
3317 self.mode = mode
3318
3319 def __enter__(self):
3320 exclusive = self.mode != 'r'
3321 try:
3322 _lock_file(self.f, exclusive)
3323 except IOError:
3324 self.f.close()
3325 raise
3326 return self
3327
3328 def __exit__(self, etype, value, traceback):
3329 try:
3330 _unlock_file(self.f)
3331 finally:
3332 self.f.close()
3333
3334 def __iter__(self):
3335 return iter(self.f)
3336
3337 def write(self, *args):
3338 return self.f.write(*args)
3339
3340 def read(self, *args):
3341 return self.f.read(*args)
4eb7f1d1
JMF
3342
3343
4644ac55
S
3344def get_filesystem_encoding():
3345 encoding = sys.getfilesystemencoding()
3346 return encoding if encoding is not None else 'utf-8'
3347
3348
4eb7f1d1 3349def shell_quote(args):
a6a173c2 3350 quoted_args = []
4644ac55 3351 encoding = get_filesystem_encoding()
a6a173c2
JMF
3352 for a in args:
3353 if isinstance(a, bytes):
3354 # We may get a filename encoded with 'encodeFilename'
3355 a = a.decode(encoding)
aefce8e6 3356 quoted_args.append(compat_shlex_quote(a))
28e614de 3357 return ' '.join(quoted_args)
9d4660ca
PH
3358
3359
3360def smuggle_url(url, data):
3361 """ Pass additional data in a URL for internal use. """
3362
81953d1a
RA
3363 url, idata = unsmuggle_url(url, {})
3364 data.update(idata)
15707c7e 3365 sdata = compat_urllib_parse_urlencode(
28e614de
PH
3366 {'__youtubedl_smuggle': json.dumps(data)})
3367 return url + '#' + sdata
9d4660ca
PH
3368
3369
79f82953 3370def unsmuggle_url(smug_url, default=None):
83e865a3 3371 if '#__youtubedl_smuggle' not in smug_url:
79f82953 3372 return smug_url, default
28e614de
PH
3373 url, _, sdata = smug_url.rpartition('#')
3374 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
3375 data = json.loads(jsond)
3376 return url, data
02dbf93f
PH
3377
3378
02dbf93f
PH
3379def format_bytes(bytes):
3380 if bytes is None:
28e614de 3381 return 'N/A'
02dbf93f
PH
3382 if type(bytes) is str:
3383 bytes = float(bytes)
3384 if bytes == 0.0:
3385 exponent = 0
3386 else:
3387 exponent = int(math.log(bytes, 1024.0))
28e614de 3388 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 3389 converted = float(bytes) / float(1024 ** exponent)
28e614de 3390 return '%.2f%s' % (converted, suffix)
f53c966a 3391
1c088fa8 3392
fb47597b
S
3393def lookup_unit_table(unit_table, s):
3394 units_re = '|'.join(re.escape(u) for u in unit_table)
3395 m = re.match(
782b1b5b 3396 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
fb47597b
S
3397 if not m:
3398 return None
3399 num_str = m.group('num').replace(',', '.')
3400 mult = unit_table[m.group('unit')]
3401 return int(float(num_str) * mult)
3402
3403
be64b5b0
PH
3404def parse_filesize(s):
3405 if s is None:
3406 return None
3407
dfb1b146 3408 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
3409 # but we support those too
3410 _UNIT_TABLE = {
3411 'B': 1,
3412 'b': 1,
70852b47 3413 'bytes': 1,
be64b5b0
PH
3414 'KiB': 1024,
3415 'KB': 1000,
3416 'kB': 1024,
3417 'Kb': 1000,
13585d76 3418 'kb': 1000,
70852b47
YCH
3419 'kilobytes': 1000,
3420 'kibibytes': 1024,
be64b5b0
PH
3421 'MiB': 1024 ** 2,
3422 'MB': 1000 ** 2,
3423 'mB': 1024 ** 2,
3424 'Mb': 1000 ** 2,
13585d76 3425 'mb': 1000 ** 2,
70852b47
YCH
3426 'megabytes': 1000 ** 2,
3427 'mebibytes': 1024 ** 2,
be64b5b0
PH
3428 'GiB': 1024 ** 3,
3429 'GB': 1000 ** 3,
3430 'gB': 1024 ** 3,
3431 'Gb': 1000 ** 3,
13585d76 3432 'gb': 1000 ** 3,
70852b47
YCH
3433 'gigabytes': 1000 ** 3,
3434 'gibibytes': 1024 ** 3,
be64b5b0
PH
3435 'TiB': 1024 ** 4,
3436 'TB': 1000 ** 4,
3437 'tB': 1024 ** 4,
3438 'Tb': 1000 ** 4,
13585d76 3439 'tb': 1000 ** 4,
70852b47
YCH
3440 'terabytes': 1000 ** 4,
3441 'tebibytes': 1024 ** 4,
be64b5b0
PH
3442 'PiB': 1024 ** 5,
3443 'PB': 1000 ** 5,
3444 'pB': 1024 ** 5,
3445 'Pb': 1000 ** 5,
13585d76 3446 'pb': 1000 ** 5,
70852b47
YCH
3447 'petabytes': 1000 ** 5,
3448 'pebibytes': 1024 ** 5,
be64b5b0
PH
3449 'EiB': 1024 ** 6,
3450 'EB': 1000 ** 6,
3451 'eB': 1024 ** 6,
3452 'Eb': 1000 ** 6,
13585d76 3453 'eb': 1000 ** 6,
70852b47
YCH
3454 'exabytes': 1000 ** 6,
3455 'exbibytes': 1024 ** 6,
be64b5b0
PH
3456 'ZiB': 1024 ** 7,
3457 'ZB': 1000 ** 7,
3458 'zB': 1024 ** 7,
3459 'Zb': 1000 ** 7,
13585d76 3460 'zb': 1000 ** 7,
70852b47
YCH
3461 'zettabytes': 1000 ** 7,
3462 'zebibytes': 1024 ** 7,
be64b5b0
PH
3463 'YiB': 1024 ** 8,
3464 'YB': 1000 ** 8,
3465 'yB': 1024 ** 8,
3466 'Yb': 1000 ** 8,
13585d76 3467 'yb': 1000 ** 8,
70852b47
YCH
3468 'yottabytes': 1000 ** 8,
3469 'yobibytes': 1024 ** 8,
be64b5b0
PH
3470 }
3471
fb47597b
S
3472 return lookup_unit_table(_UNIT_TABLE, s)
3473
3474
3475def parse_count(s):
3476 if s is None:
be64b5b0
PH
3477 return None
3478
fb47597b
S
3479 s = s.strip()
3480
3481 if re.match(r'^[\d,.]+$', s):
3482 return str_to_int(s)
3483
3484 _UNIT_TABLE = {
3485 'k': 1000,
3486 'K': 1000,
3487 'm': 1000 ** 2,
3488 'M': 1000 ** 2,
3489 'kk': 1000 ** 2,
3490 'KK': 1000 ** 2,
3491 }
be64b5b0 3492
fb47597b 3493 return lookup_unit_table(_UNIT_TABLE, s)
be64b5b0 3494
2f7ae819 3495
b871d7e9
S
3496def parse_resolution(s):
3497 if s is None:
3498 return {}
3499
3500 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3501 if mobj:
3502 return {
3503 'width': int(mobj.group('w')),
3504 'height': int(mobj.group('h')),
3505 }
3506
3507 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3508 if mobj:
3509 return {'height': int(mobj.group(1))}
3510
3511 mobj = re.search(r'\b([48])[kK]\b', s)
3512 if mobj:
3513 return {'height': int(mobj.group(1)) * 540}
3514
3515 return {}
3516
3517
0dc41787
S
3518def parse_bitrate(s):
3519 if not isinstance(s, compat_str):
3520 return
3521 mobj = re.search(r'\b(\d+)\s*kbps', s)
3522 if mobj:
3523 return int(mobj.group(1))
3524
3525
a942d6cb 3526def month_by_name(name, lang='en'):
caefb1de
PH
3527 """ Return the number of a month by (locale-independently) English name """
3528
f6717dec 3529 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb 3530
caefb1de 3531 try:
f6717dec 3532 return month_names.index(name) + 1
7105440c
YCH
3533 except ValueError:
3534 return None
3535
3536
3537def month_by_abbreviation(abbrev):
3538 """ Return the number of a month by (locale-independently) English
3539 abbreviations """
3540
3541 try:
3542 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
3543 except ValueError:
3544 return None
18258362
JMF
3545
3546
5aafe895 3547def fix_xml_ampersands(xml_str):
18258362 3548 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
3549 return re.sub(
3550 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 3551 '&amp;',
5aafe895 3552 xml_str)
e3946f98
PH
3553
3554
3555def setproctitle(title):
8bf48f23 3556 assert isinstance(title, compat_str)
c1c05c67
YCH
3557
3558 # ctypes in Jython is not complete
3559 # http://bugs.jython.org/issue2148
3560 if sys.platform.startswith('java'):
3561 return
3562
e3946f98 3563 try:
611c1dd9 3564 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
3565 except OSError:
3566 return
2f49bcd6
RC
3567 except TypeError:
3568 # LoadLibrary in Windows Python 2.7.13 only expects
3569 # a bytestring, but since unicode_literals turns
3570 # every string into a unicode string, it fails.
3571 return
6eefe533
PH
3572 title_bytes = title.encode('utf-8')
3573 buf = ctypes.create_string_buffer(len(title_bytes))
3574 buf.value = title_bytes
e3946f98 3575 try:
6eefe533 3576 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
3577 except AttributeError:
3578 return # Strange libc, just skip this
d7dda168
PH
3579
3580
3581def remove_start(s, start):
46bc9b7d 3582 return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174
PH
3583
3584
2b9faf55 3585def remove_end(s, end):
46bc9b7d 3586 return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55
PH
3587
3588
31b2051e
S
3589def remove_quotes(s):
3590 if s is None or len(s) < 2:
3591 return s
3592 for quote in ('"', "'", ):
3593 if s[0] == quote and s[-1] == quote:
3594 return s[1:-1]
3595 return s
3596
3597
b6e0c7d2
U
3598def get_domain(url):
3599 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3600 return domain.group('domain') if domain else None
3601
3602
29eb5174 3603def url_basename(url):
9b8aaeed 3604 path = compat_urlparse.urlparse(url).path
28e614de 3605 return path.strip('/').split('/')[-1]
aa94a6d3
PH
3606
3607
02dc0a36
S
3608def base_url(url):
3609 return re.match(r'https?://[^?#&]+/', url).group()
3610
3611
e34c3361 3612def urljoin(base, path):
4b5de77b
S
3613 if isinstance(path, bytes):
3614 path = path.decode('utf-8')
e34c3361
S
3615 if not isinstance(path, compat_str) or not path:
3616 return None
fad4ceb5 3617 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
e34c3361 3618 return path
4b5de77b
S
3619 if isinstance(base, bytes):
3620 base = base.decode('utf-8')
3621 if not isinstance(base, compat_str) or not re.match(
3622 r'^(?:https?:)?//', base):
e34c3361
S
3623 return None
3624 return compat_urlparse.urljoin(base, path)
3625
3626
aa94a6d3
PH
3627class HEADRequest(compat_urllib_request.Request):
3628 def get_method(self):
611c1dd9 3629 return 'HEAD'
7217e148
PH
3630
3631
95cf60e8
S
3632class PUTRequest(compat_urllib_request.Request):
3633 def get_method(self):
3634 return 'PUT'
3635
3636
9732d77e 3637def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
3638 if get_attr:
3639 if v is not None:
3640 v = getattr(v, get_attr, None)
9572013d
PH
3641 if v == '':
3642 v = None
1812afb7
S
3643 if v is None:
3644 return default
3645 try:
3646 return int(v) * invscale // scale
5e1271c5 3647 except (ValueError, TypeError):
af98f8ff 3648 return default
9732d77e 3649
9572013d 3650
40a90862
JMF
3651def str_or_none(v, default=None):
3652 return default if v is None else compat_str(v)
3653
9732d77e
PH
3654
3655def str_to_int(int_str):
48d4681e 3656 """ A more relaxed version of int_or_none """
42db58ec 3657 if isinstance(int_str, compat_integer_types):
348c6bf1 3658 return int_str
42db58ec
S
3659 elif isinstance(int_str, compat_str):
3660 int_str = re.sub(r'[,\.\+]', '', int_str)
3661 return int_or_none(int_str)
608d11f5
PH
3662
3663
9732d77e 3664def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
3665 if v is None:
3666 return default
3667 try:
3668 return float(v) * invscale / scale
5e1271c5 3669 except (ValueError, TypeError):
caf80631 3670 return default
43f775e4
PH
3671
3672
c7e327c4
S
3673def bool_or_none(v, default=None):
3674 return v if isinstance(v, bool) else default
3675
3676
53cd37ba
S
3677def strip_or_none(v, default=None):
3678 return v.strip() if isinstance(v, compat_str) else default
b72b4431
S
3679
3680
af03000a
S
3681def url_or_none(url):
3682 if not url or not isinstance(url, compat_str):
3683 return None
3684 url = url.strip()
29f7c58a 3685 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
af03000a
S
3686
3687
e29663c6 3688def strftime_or_none(timestamp, date_format, default=None):
3689 datetime_object = None
3690 try:
3691 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3692 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3693 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3694 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3695 return datetime_object.strftime(date_format)
3696 except (ValueError, TypeError, AttributeError):
3697 return default
3698
3699
608d11f5 3700def parse_duration(s):
8f9312c3 3701 if not isinstance(s, compat_basestring):
608d11f5
PH
3702 return None
3703
ca7b3246
S
3704 s = s.strip()
3705
acaff495 3706 days, hours, mins, secs, ms = [None] * 5
15846398 3707 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
acaff495 3708 if m:
3709 days, hours, mins, secs, ms = m.groups()
3710 else:
3711 m = re.match(
056653bb
S
3712 r'''(?ix)(?:P?
3713 (?:
3714 [0-9]+\s*y(?:ears?)?\s*
3715 )?
3716 (?:
3717 [0-9]+\s*m(?:onths?)?\s*
3718 )?
3719 (?:
3720 [0-9]+\s*w(?:eeks?)?\s*
3721 )?
8f4b58d7 3722 (?:
acaff495 3723 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
8f4b58d7 3724 )?
056653bb 3725 T)?
acaff495 3726 (?:
3727 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3728 )?
3729 (?:
3730 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3731 )?
3732 (?:
3733 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
15846398 3734 )?Z?$''', s)
acaff495 3735 if m:
3736 days, hours, mins, secs, ms = m.groups()
3737 else:
15846398 3738 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
acaff495 3739 if m:
3740 hours, mins = m.groups()
3741 else:
3742 return None
3743
3744 duration = 0
3745 if secs:
3746 duration += float(secs)
3747 if mins:
3748 duration += float(mins) * 60
3749 if hours:
3750 duration += float(hours) * 60 * 60
3751 if days:
3752 duration += float(days) * 24 * 60 * 60
3753 if ms:
3754 duration += float(ms)
3755 return duration
91d7d0b3
JMF
3756
3757
e65e4c88 3758def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 3759 name, real_ext = os.path.splitext(filename)
e65e4c88
S
3760 return (
3761 '{0}.{1}{2}'.format(name, ext, real_ext)
3762 if not expected_real_ext or real_ext[1:] == expected_real_ext
3763 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
3764
3765
b3ed15b7
S
3766def replace_extension(filename, ext, expected_real_ext=None):
3767 name, real_ext = os.path.splitext(filename)
3768 return '{0}.{1}'.format(
3769 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3770 ext)
3771
3772
d70ad093
PH
3773def check_executable(exe, args=[]):
3774 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3775 args can be a list of arguments for a short output (like -version) """
3776 try:
f5b1bca9 3777 process_communicate_or_kill(subprocess.Popen(
3778 [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
d70ad093
PH
3779 except OSError:
3780 return False
3781 return exe
b7ab0590
PH
3782
3783
95807118 3784def get_exe_version(exe, args=['--version'],
cae97f65 3785 version_re=None, unrecognized='present'):
95807118
PH
3786 """ Returns the version of the specified executable,
3787 or False if the executable is not present """
3788 try:
b64d04c1 3789 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
7a5c1cfe 3790 # SIGTTOU if yt-dlp is run in the background.
067aa17e 3791 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
f5b1bca9 3792 out, _ = process_communicate_or_kill(subprocess.Popen(
54116803 3793 [encodeArgument(exe)] + args,
00ca7552 3794 stdin=subprocess.PIPE,
f5b1bca9 3795 stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
95807118
PH
3796 except OSError:
3797 return False
cae97f65
PH
3798 if isinstance(out, bytes): # Python 2.x
3799 out = out.decode('ascii', 'ignore')
3800 return detect_exe_version(out, version_re, unrecognized)
3801
3802
3803def detect_exe_version(output, version_re=None, unrecognized='present'):
3804 assert isinstance(output, compat_str)
3805 if version_re is None:
3806 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3807 m = re.search(version_re, output)
95807118
PH
3808 if m:
3809 return m.group(1)
3810 else:
3811 return unrecognized
3812
3813
b7ab0590 3814class PagedList(object):
dd26ced1
PH
3815 def __len__(self):
3816 # This is only useful for tests
3817 return len(self.getslice())
3818
9c44d242
PH
3819
3820class OnDemandPagedList(PagedList):
6be08ce6 3821 def __init__(self, pagefunc, pagesize, use_cache=True):
9c44d242
PH
3822 self._pagefunc = pagefunc
3823 self._pagesize = pagesize
b95dc034
YCH
3824 self._use_cache = use_cache
3825 if use_cache:
3826 self._cache = {}
9c44d242 3827
b7ab0590
PH
3828 def getslice(self, start=0, end=None):
3829 res = []
3830 for pagenum in itertools.count(start // self._pagesize):
3831 firstid = pagenum * self._pagesize
3832 nextfirstid = pagenum * self._pagesize + self._pagesize
3833 if start >= nextfirstid:
3834 continue
3835
b95dc034
YCH
3836 page_results = None
3837 if self._use_cache:
3838 page_results = self._cache.get(pagenum)
3839 if page_results is None:
3840 page_results = list(self._pagefunc(pagenum))
3841 if self._use_cache:
3842 self._cache[pagenum] = page_results
b7ab0590
PH
3843
3844 startv = (
3845 start % self._pagesize
3846 if firstid <= start < nextfirstid
3847 else 0)
3848
3849 endv = (
3850 ((end - 1) % self._pagesize) + 1
3851 if (end is not None and firstid <= end <= nextfirstid)
3852 else None)
3853
3854 if startv != 0 or endv is not None:
3855 page_results = page_results[startv:endv]
3856 res.extend(page_results)
3857
3858 # A little optimization - if current page is not "full", ie. does
3859 # not contain page_size videos then we can assume that this page
3860 # is the last one - there are no more ids on further pages -
3861 # i.e. no need to query again.
3862 if len(page_results) + startv < self._pagesize:
3863 break
3864
3865 # If we got the whole page, but the next page is not interesting,
3866 # break out early as well
3867 if end == nextfirstid:
3868 break
3869 return res
81c2f20b
PH
3870
3871
9c44d242
PH
3872class InAdvancePagedList(PagedList):
3873 def __init__(self, pagefunc, pagecount, pagesize):
3874 self._pagefunc = pagefunc
3875 self._pagecount = pagecount
3876 self._pagesize = pagesize
3877
3878 def getslice(self, start=0, end=None):
3879 res = []
3880 start_page = start // self._pagesize
3881 end_page = (
3882 self._pagecount if end is None else (end // self._pagesize + 1))
3883 skip_elems = start - start_page * self._pagesize
3884 only_more = None if end is None else end - start
3885 for pagenum in range(start_page, end_page):
3886 page = list(self._pagefunc(pagenum))
3887 if skip_elems:
3888 page = page[skip_elems:]
3889 skip_elems = None
3890 if only_more is not None:
3891 if len(page) < only_more:
3892 only_more -= len(page)
3893 else:
3894 page = page[:only_more]
3895 res.extend(page)
3896 break
3897 res.extend(page)
3898 return res
3899
3900
81c2f20b 3901def uppercase_escape(s):
676eb3f2 3902 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 3903 return re.sub(
a612753d 3904 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
3905 lambda m: unicode_escape(m.group(0))[0],
3906 s)
0fe2ff78
YCH
3907
3908
3909def lowercase_escape(s):
3910 unicode_escape = codecs.getdecoder('unicode_escape')
3911 return re.sub(
3912 r'\\u[0-9a-fA-F]{4}',
3913 lambda m: unicode_escape(m.group(0))[0],
3914 s)
b53466e1 3915
d05cfe06
S
3916
3917def escape_rfc3986(s):
3918 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 3919 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 3920 s = s.encode('utf-8')
ecc0c5ee 3921 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
3922
3923
3924def escape_url(url):
3925 """Escape URL as suggested by RFC 3986"""
3926 url_parsed = compat_urllib_parse_urlparse(url)
3927 return url_parsed._replace(
efbed08d 3928 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
d05cfe06
S
3929 path=escape_rfc3986(url_parsed.path),
3930 params=escape_rfc3986(url_parsed.params),
3931 query=escape_rfc3986(url_parsed.query),
3932 fragment=escape_rfc3986(url_parsed.fragment)
3933 ).geturl()
3934
62e609ab
PH
3935
3936def read_batch_urls(batch_fd):
3937 def fixup(url):
3938 if not isinstance(url, compat_str):
3939 url = url.decode('utf-8', 'replace')
8c04f0be 3940 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
3941 for bom in BOM_UTF8:
3942 if url.startswith(bom):
3943 url = url[len(bom):]
3944 url = url.lstrip()
3945 if not url or url.startswith(('#', ';', ']')):
62e609ab 3946 return False
8c04f0be 3947 # "#" cannot be stripped out since it is part of the URI
3948 # However, it can be safely stipped out if follwing a whitespace
3949 return re.split(r'\s#', url, 1)[0].rstrip()
62e609ab
PH
3950
3951 with contextlib.closing(batch_fd) as fd:
3952 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
3953
3954
3955def urlencode_postdata(*args, **kargs):
15707c7e 3956 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
3957
3958
38f9ef31 3959def update_url_query(url, query):
cacd9966
YCH
3960 if not query:
3961 return url
38f9ef31 3962 parsed_url = compat_urlparse.urlparse(url)
3963 qs = compat_parse_qs(parsed_url.query)
3964 qs.update(query)
3965 return compat_urlparse.urlunparse(parsed_url._replace(
15707c7e 3966 query=compat_urllib_parse_urlencode(qs, True)))
16392824 3967
8e60dc75 3968
ed0291d1
S
3969def update_Request(req, url=None, data=None, headers={}, query={}):
3970 req_headers = req.headers.copy()
3971 req_headers.update(headers)
3972 req_data = data or req.data
3973 req_url = update_url_query(url or req.get_full_url(), query)
95cf60e8
S
3974 req_get_method = req.get_method()
3975 if req_get_method == 'HEAD':
3976 req_type = HEADRequest
3977 elif req_get_method == 'PUT':
3978 req_type = PUTRequest
3979 else:
3980 req_type = compat_urllib_request.Request
ed0291d1
S
3981 new_req = req_type(
3982 req_url, data=req_data, headers=req_headers,
3983 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3984 if hasattr(req, 'timeout'):
3985 new_req.timeout = req.timeout
3986 return new_req
3987
3988
10c87c15 3989def _multipart_encode_impl(data, boundary):
0c265486
YCH
3990 content_type = 'multipart/form-data; boundary=%s' % boundary
3991
3992 out = b''
3993 for k, v in data.items():
3994 out += b'--' + boundary.encode('ascii') + b'\r\n'
3995 if isinstance(k, compat_str):
3996 k = k.encode('utf-8')
3997 if isinstance(v, compat_str):
3998 v = v.encode('utf-8')
3999 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4000 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
b2ad479d 4001 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
0c265486
YCH
4002 if boundary.encode('ascii') in content:
4003 raise ValueError('Boundary overlaps with data')
4004 out += content
4005
4006 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4007
4008 return out, content_type
4009
4010
4011def multipart_encode(data, boundary=None):
4012 '''
4013 Encode a dict to RFC 7578-compliant form-data
4014
4015 data:
4016 A dict where keys and values can be either Unicode or bytes-like
4017 objects.
4018 boundary:
4019 If specified a Unicode object, it's used as the boundary. Otherwise
4020 a random boundary is generated.
4021
4022 Reference: https://tools.ietf.org/html/rfc7578
4023 '''
4024 has_specified_boundary = boundary is not None
4025
4026 while True:
4027 if boundary is None:
4028 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4029
4030 try:
10c87c15 4031 out, content_type = _multipart_encode_impl(data, boundary)
0c265486
YCH
4032 break
4033 except ValueError:
4034 if has_specified_boundary:
4035 raise
4036 boundary = None
4037
4038 return out, content_type
4039
4040
86296ad2 4041def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
4042 if isinstance(key_or_keys, (list, tuple)):
4043 for key in key_or_keys:
86296ad2
S
4044 if key not in d or d[key] is None or skip_false_values and not d[key]:
4045 continue
4046 return d[key]
cbecc9b9
S
4047 return default
4048 return d.get(key_or_keys, default)
4049
4050
329ca3be 4051def try_get(src, getter, expected_type=None):
a32a9a7e
S
4052 if not isinstance(getter, (list, tuple)):
4053 getter = [getter]
4054 for get in getter:
4055 try:
4056 v = get(src)
4057 except (AttributeError, KeyError, TypeError, IndexError):
4058 pass
4059 else:
4060 if expected_type is None or isinstance(v, expected_type):
4061 return v
329ca3be
S
4062
4063
6cc62232
S
4064def merge_dicts(*dicts):
4065 merged = {}
4066 for a_dict in dicts:
4067 for k, v in a_dict.items():
4068 if v is None:
4069 continue
3089bc74
S
4070 if (k not in merged
4071 or (isinstance(v, compat_str) and v
4072 and isinstance(merged[k], compat_str)
4073 and not merged[k])):
6cc62232
S
4074 merged[k] = v
4075 return merged
4076
4077
8e60dc75
S
4078def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4079 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4080
16392824 4081
a1a530b0
PH
4082US_RATINGS = {
4083 'G': 0,
4084 'PG': 10,
4085 'PG-13': 13,
4086 'R': 16,
4087 'NC': 18,
4088}
fac55558
PH
4089
4090
a8795327 4091TV_PARENTAL_GUIDELINES = {
5a16c9d9
RA
4092 'TV-Y': 0,
4093 'TV-Y7': 7,
4094 'TV-G': 0,
4095 'TV-PG': 0,
4096 'TV-14': 14,
4097 'TV-MA': 17,
a8795327
S
4098}
4099
4100
146c80e2 4101def parse_age_limit(s):
a8795327
S
4102 if type(s) == int:
4103 return s if 0 <= s <= 21 else None
4104 if not isinstance(s, compat_basestring):
d838b1bd 4105 return None
146c80e2 4106 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327
S
4107 if m:
4108 return int(m.group('age'))
4109 if s in US_RATINGS:
4110 return US_RATINGS[s]
5a16c9d9 4111 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
b8361187 4112 if m:
5a16c9d9 4113 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
b8361187 4114 return None
146c80e2
S
4115
4116
fac55558 4117def strip_jsonp(code):
609a61e3 4118 return re.sub(
5552c9eb 4119 r'''(?sx)^
e9c671d5 4120 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
5552c9eb
YCH
4121 (?:\s*&&\s*(?P=func_name))?
4122 \s*\(\s*(?P<callback_data>.*)\);?
4123 \s*?(?://[^\n]*)*$''',
4124 r'\g<callback_data>', code)
478c2c61
PH
4125
4126
5c610515 4127def js_to_json(code, vars={}):
4128 # vars is a dict of var, val pairs to substitute
4195096e
S
4129 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4130 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4131 INTEGER_TABLE = (
4132 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4133 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4134 )
4135
e05f6939 4136 def fix_kv(m):
e7b6d122
PH
4137 v = m.group(0)
4138 if v in ('true', 'false', 'null'):
4139 return v
8bdd16b4 4140 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
bd1e4844 4141 return ""
4142
4143 if v[0] in ("'", '"'):
4144 v = re.sub(r'(?s)\\.|"', lambda m: {
e7b6d122 4145 '"': '\\"',
bd1e4844 4146 "\\'": "'",
4147 '\\\n': '',
4148 '\\x': '\\u00',
4149 }.get(m.group(0), m.group(0)), v[1:-1])
8bdd16b4 4150 else:
4151 for regex, base in INTEGER_TABLE:
4152 im = re.match(regex, v)
4153 if im:
4154 i = int(im.group(1), base)
4155 return '"%d":' % i if v.endswith(':') else '%d' % i
89ac4a19 4156
5c610515 4157 if v in vars:
4158 return vars[v]
4159
e7b6d122 4160 return '"%s"' % v
e05f6939 4161
bd1e4844 4162 return re.sub(r'''(?sx)
4163 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4164 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4195096e 4165 {comment}|,(?={skip}[\]}}])|
c384d537 4166 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4195096e 4167 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
8bdd16b4 4168 [0-9]+(?={skip}:)|
4169 !+
4195096e 4170 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
e05f6939
PH
4171
4172
478c2c61
PH
4173def qualities(quality_ids):
4174 """ Get a numeric quality value out of a list of possible values """
4175 def q(qid):
4176 try:
4177 return quality_ids.index(qid)
4178 except ValueError:
4179 return -1
4180 return q
4181
acd69589 4182
de6000d9 4183DEFAULT_OUTTMPL = {
4184 'default': '%(title)s [%(id)s].%(ext)s',
72755351 4185 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
de6000d9 4186}
4187OUTTMPL_TYPES = {
72755351 4188 'chapter': None,
de6000d9 4189 'subtitle': None,
4190 'thumbnail': None,
4191 'description': 'description',
4192 'annotation': 'annotations.xml',
4193 'infojson': 'info.json',
4194 'pl_description': 'description',
4195 'pl_infojson': 'info.json',
4196}
0a871f68 4197
a020a0dc
PH
4198
4199def limit_length(s, length):
4200 """ Add ellipses to overly long strings """
4201 if s is None:
4202 return None
4203 ELLIPSES = '...'
4204 if len(s) > length:
4205 return s[:length - len(ELLIPSES)] + ELLIPSES
4206 return s
48844745
PH
4207
4208
4209def version_tuple(v):
5f9b8394 4210 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
4211
4212
4213def is_outdated_version(version, limit, assume_new=True):
4214 if not version:
4215 return not assume_new
4216 try:
4217 return version_tuple(version) < version_tuple(limit)
4218 except ValueError:
4219 return not assume_new
732ea2f0
PH
4220
4221
4222def ytdl_is_updateable():
7a5c1cfe 4223 """ Returns if yt-dlp can be updated with -U """
735d865e 4224 return False
4225
732ea2f0
PH
4226 from zipimport import zipimporter
4227
4228 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
4229
4230
4231def args_to_str(args):
4232 # Get a short string representation for a subprocess command
702ccf2d 4233 return ' '.join(compat_shlex_quote(a) for a in args)
2ccd1b10
PH
4234
4235
9b9c5355 4236def error_to_compat_str(err):
fdae2358
S
4237 err_str = str(err)
4238 # On python 2 error byte string must be decoded with proper
4239 # encoding rather than ascii
4240 if sys.version_info[0] < 3:
4241 err_str = err_str.decode(preferredencoding())
4242 return err_str
4243
4244
c460bdd5 4245def mimetype2ext(mt):
eb9ee194
S
4246 if mt is None:
4247 return None
4248
765ac263
JMF
4249 ext = {
4250 'audio/mp4': 'm4a',
6c33d24b
YCH
4251 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4252 # it's the most popular one
4253 'audio/mpeg': 'mp3',
ba39289d 4254 'audio/x-wav': 'wav',
765ac263
JMF
4255 }.get(mt)
4256 if ext is not None:
4257 return ext
4258
c460bdd5 4259 _, _, res = mt.rpartition('/')
6562d34a 4260 res = res.split(';')[0].strip().lower()
c460bdd5
PH
4261
4262 return {
f6861ec9 4263 '3gpp': '3gp',
cafcf657 4264 'smptett+xml': 'tt',
cafcf657 4265 'ttaf+xml': 'dfxp',
a0d8d704 4266 'ttml+xml': 'ttml',
f6861ec9 4267 'x-flv': 'flv',
a0d8d704 4268 'x-mp4-fragmented': 'mp4',
d4f05d47 4269 'x-ms-sami': 'sami',
a0d8d704 4270 'x-ms-wmv': 'wmv',
b4173f15
RA
4271 'mpegurl': 'm3u8',
4272 'x-mpegurl': 'm3u8',
4273 'vnd.apple.mpegurl': 'm3u8',
4274 'dash+xml': 'mpd',
b4173f15 4275 'f4m+xml': 'f4m',
f164b971 4276 'hds+xml': 'f4m',
e910fe2f 4277 'vnd.ms-sstr+xml': 'ism',
c2b2c7e1 4278 'quicktime': 'mov',
98ce1a3f 4279 'mp2t': 'ts',
39e7107d 4280 'x-wav': 'wav',
c460bdd5
PH
4281 }.get(res, res)
4282
4283
4f3c5e06 4284def parse_codecs(codecs_str):
4285 # http://tools.ietf.org/html/rfc6381
4286 if not codecs_str:
4287 return {}
a0566bbf 4288 split_codecs = list(filter(None, map(
4f3c5e06 4289 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4290 vcodec, acodec = None, None
a0566bbf 4291 for full_codec in split_codecs:
4f3c5e06 4292 codec = full_codec.split('.')[0]
28cc2241 4293 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4f3c5e06 4294 if not vcodec:
4295 vcodec = full_codec
60f5c9fb 4296 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4f3c5e06 4297 if not acodec:
4298 acodec = full_codec
4299 else:
60f5c9fb 4300 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4f3c5e06 4301 if not vcodec and not acodec:
a0566bbf 4302 if len(split_codecs) == 2:
4f3c5e06 4303 return {
a0566bbf 4304 'vcodec': split_codecs[0],
4305 'acodec': split_codecs[1],
4f3c5e06 4306 }
4307 else:
4308 return {
4309 'vcodec': vcodec or 'none',
4310 'acodec': acodec or 'none',
4311 }
4312 return {}
4313
4314
2ccd1b10 4315def urlhandle_detect_ext(url_handle):
79298173 4316 getheader = url_handle.headers.get
2ccd1b10 4317
b55ee18f
PH
4318 cd = getheader('Content-Disposition')
4319 if cd:
4320 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4321 if m:
4322 e = determine_ext(m.group('filename'), default_ext=None)
4323 if e:
4324 return e
4325
c460bdd5 4326 return mimetype2ext(getheader('Content-Type'))
05900629
PH
4327
4328
1e399778
YCH
4329def encode_data_uri(data, mime_type):
4330 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4331
4332
05900629 4333def age_restricted(content_limit, age_limit):
6ec6cb4e 4334 """ Returns True iff the content should be blocked """
05900629
PH
4335
4336 if age_limit is None: # No limit set
4337 return False
4338 if content_limit is None:
4339 return False # Content available for everyone
4340 return age_limit < content_limit
61ca9a80
PH
4341
4342
4343def is_html(first_bytes):
4344 """ Detect whether a file contains HTML by examining its first bytes. """
4345
4346 BOMS = [
4347 (b'\xef\xbb\xbf', 'utf-8'),
4348 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4349 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4350 (b'\xff\xfe', 'utf-16-le'),
4351 (b'\xfe\xff', 'utf-16-be'),
4352 ]
4353 for bom, enc in BOMS:
4354 if first_bytes.startswith(bom):
4355 s = first_bytes[len(bom):].decode(enc, 'replace')
4356 break
4357 else:
4358 s = first_bytes.decode('utf-8', 'replace')
4359
4360 return re.match(r'^\s*<', s)
a055469f
PH
4361
4362
4363def determine_protocol(info_dict):
4364 protocol = info_dict.get('protocol')
4365 if protocol is not None:
4366 return protocol
4367
4368 url = info_dict['url']
4369 if url.startswith('rtmp'):
4370 return 'rtmp'
4371 elif url.startswith('mms'):
4372 return 'mms'
4373 elif url.startswith('rtsp'):
4374 return 'rtsp'
4375
4376 ext = determine_ext(url)
4377 if ext == 'm3u8':
4378 return 'm3u8'
4379 elif ext == 'f4m':
4380 return 'f4m'
4381
4382 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
4383
4384
76d321f6 4385def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
cfb56d1a 4386 """ Render a list of rows, each as a list of values """
76d321f6 4387
4388 def get_max_lens(table):
4389 return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4390
4391 def filter_using_list(row, filterArray):
4392 return [col for (take, col) in zip(filterArray, row) if take]
4393
4394 if hideEmpty:
4395 max_lens = get_max_lens(data)
4396 header_row = filter_using_list(header_row, max_lens)
4397 data = [filter_using_list(row, max_lens) for row in data]
4398
cfb56d1a 4399 table = [header_row] + data
76d321f6 4400 max_lens = get_max_lens(table)
4401 if delim:
4402 table = [header_row] + [['-' * ml for ml in max_lens]] + data
4403 format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
cfb56d1a 4404 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
4405
4406
4407def _match_one(filter_part, dct):
4408 COMPARISON_OPERATORS = {
4409 '<': operator.lt,
4410 '<=': operator.le,
4411 '>': operator.gt,
4412 '>=': operator.ge,
4413 '=': operator.eq,
4414 '!=': operator.ne,
4415 }
4416 operator_rex = re.compile(r'''(?x)\s*
4417 (?P<key>[a-z_]+)
4418 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4419 (?:
4420 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
db13c16e 4421 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
347de493
PH
4422 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4423 )
4424 \s*$
4425 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4426 m = operator_rex.search(filter_part)
4427 if m:
4428 op = COMPARISON_OPERATORS[m.group('op')]
e5a088dc 4429 actual_value = dct.get(m.group('key'))
3089bc74
S
4430 if (m.group('quotedstrval') is not None
4431 or m.group('strval') is not None
e5a088dc
S
4432 # If the original field is a string and matching comparisonvalue is
4433 # a number we should respect the origin of the original field
4434 # and process comparison value as a string (see
067aa17e 4435 # https://github.com/ytdl-org/youtube-dl/issues/11082).
3089bc74
S
4436 or actual_value is not None and m.group('intval') is not None
4437 and isinstance(actual_value, compat_str)):
347de493
PH
4438 if m.group('op') not in ('=', '!='):
4439 raise ValueError(
4440 'Operator %s does not support string values!' % m.group('op'))
db13c16e
S
4441 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4442 quote = m.group('quote')
4443 if quote is not None:
4444 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
347de493
PH
4445 else:
4446 try:
4447 comparison_value = int(m.group('intval'))
4448 except ValueError:
4449 comparison_value = parse_filesize(m.group('intval'))
4450 if comparison_value is None:
4451 comparison_value = parse_filesize(m.group('intval') + 'B')
4452 if comparison_value is None:
4453 raise ValueError(
4454 'Invalid integer value %r in filter part %r' % (
4455 m.group('intval'), filter_part))
347de493
PH
4456 if actual_value is None:
4457 return m.group('none_inclusive')
4458 return op(actual_value, comparison_value)
4459
4460 UNARY_OPERATORS = {
1cc47c66
S
4461 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4462 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
347de493
PH
4463 }
4464 operator_rex = re.compile(r'''(?x)\s*
4465 (?P<op>%s)\s*(?P<key>[a-z_]+)
4466 \s*$
4467 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4468 m = operator_rex.search(filter_part)
4469 if m:
4470 op = UNARY_OPERATORS[m.group('op')]
4471 actual_value = dct.get(m.group('key'))
4472 return op(actual_value)
4473
4474 raise ValueError('Invalid filter part %r' % filter_part)
4475
4476
4477def match_str(filter_str, dct):
4478 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4479
4480 return all(
4481 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4482
4483
4484def match_filter_func(filter_str):
4485 def _match_func(info_dict):
4486 if match_str(filter_str, info_dict):
4487 return None
4488 else:
4489 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4490 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4491 return _match_func
91410c9b
PH
4492
4493
bf6427d2
YCH
4494def parse_dfxp_time_expr(time_expr):
4495 if not time_expr:
d631d5f9 4496 return
bf6427d2
YCH
4497
4498 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4499 if mobj:
4500 return float(mobj.group('time_offset'))
4501
db2fe38b 4502 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 4503 if mobj:
db2fe38b 4504 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
4505
4506
c1c924ab
YCH
4507def srt_subtitles_timecode(seconds):
4508 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
4509
4510
4511def dfxp2srt(dfxp_data):
3869028f
YCH
4512 '''
4513 @param dfxp_data A bytes-like object containing DFXP data
4514 @returns A unicode object containing converted SRT data
4515 '''
5b995f71 4516 LEGACY_NAMESPACES = (
3869028f
YCH
4517 (b'http://www.w3.org/ns/ttml', [
4518 b'http://www.w3.org/2004/11/ttaf1',
4519 b'http://www.w3.org/2006/04/ttaf1',
4520 b'http://www.w3.org/2006/10/ttaf1',
5b995f71 4521 ]),
3869028f
YCH
4522 (b'http://www.w3.org/ns/ttml#styling', [
4523 b'http://www.w3.org/ns/ttml#style',
5b995f71
RA
4524 ]),
4525 )
4526
4527 SUPPORTED_STYLING = [
4528 'color',
4529 'fontFamily',
4530 'fontSize',
4531 'fontStyle',
4532 'fontWeight',
4533 'textDecoration'
4534 ]
4535
4e335771 4536 _x = functools.partial(xpath_with_ns, ns_map={
261f4730 4537 'xml': 'http://www.w3.org/XML/1998/namespace',
4e335771 4538 'ttml': 'http://www.w3.org/ns/ttml',
5b995f71 4539 'tts': 'http://www.w3.org/ns/ttml#styling',
4e335771 4540 })
bf6427d2 4541
5b995f71
RA
4542 styles = {}
4543 default_style = {}
4544
87de7069 4545 class TTMLPElementParser(object):
5b995f71
RA
4546 _out = ''
4547 _unclosed_elements = []
4548 _applied_styles = []
bf6427d2 4549
2b14cb56 4550 def start(self, tag, attrib):
5b995f71
RA
4551 if tag in (_x('ttml:br'), 'br'):
4552 self._out += '\n'
4553 else:
4554 unclosed_elements = []
4555 style = {}
4556 element_style_id = attrib.get('style')
4557 if default_style:
4558 style.update(default_style)
4559 if element_style_id:
4560 style.update(styles.get(element_style_id, {}))
4561 for prop in SUPPORTED_STYLING:
4562 prop_val = attrib.get(_x('tts:' + prop))
4563 if prop_val:
4564 style[prop] = prop_val
4565 if style:
4566 font = ''
4567 for k, v in sorted(style.items()):
4568 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4569 continue
4570 if k == 'color':
4571 font += ' color="%s"' % v
4572 elif k == 'fontSize':
4573 font += ' size="%s"' % v
4574 elif k == 'fontFamily':
4575 font += ' face="%s"' % v
4576 elif k == 'fontWeight' and v == 'bold':
4577 self._out += '<b>'
4578 unclosed_elements.append('b')
4579 elif k == 'fontStyle' and v == 'italic':
4580 self._out += '<i>'
4581 unclosed_elements.append('i')
4582 elif k == 'textDecoration' and v == 'underline':
4583 self._out += '<u>'
4584 unclosed_elements.append('u')
4585 if font:
4586 self._out += '<font' + font + '>'
4587 unclosed_elements.append('font')
4588 applied_style = {}
4589 if self._applied_styles:
4590 applied_style.update(self._applied_styles[-1])
4591 applied_style.update(style)
4592 self._applied_styles.append(applied_style)
4593 self._unclosed_elements.append(unclosed_elements)
bf6427d2 4594
2b14cb56 4595 def end(self, tag):
5b995f71
RA
4596 if tag not in (_x('ttml:br'), 'br'):
4597 unclosed_elements = self._unclosed_elements.pop()
4598 for element in reversed(unclosed_elements):
4599 self._out += '</%s>' % element
4600 if unclosed_elements and self._applied_styles:
4601 self._applied_styles.pop()
bf6427d2 4602
2b14cb56 4603 def data(self, data):
5b995f71 4604 self._out += data
2b14cb56 4605
4606 def close(self):
5b995f71 4607 return self._out.strip()
2b14cb56 4608
4609 def parse_node(node):
4610 target = TTMLPElementParser()
4611 parser = xml.etree.ElementTree.XMLParser(target=target)
4612 parser.feed(xml.etree.ElementTree.tostring(node))
4613 return parser.close()
bf6427d2 4614
5b995f71
RA
4615 for k, v in LEGACY_NAMESPACES:
4616 for ns in v:
4617 dfxp_data = dfxp_data.replace(ns, k)
4618
3869028f 4619 dfxp = compat_etree_fromstring(dfxp_data)
bf6427d2 4620 out = []
5b995f71 4621 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
1b0427e6
YCH
4622
4623 if not paras:
4624 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2 4625
5b995f71
RA
4626 repeat = False
4627 while True:
4628 for style in dfxp.findall(_x('.//ttml:style')):
261f4730
RA
4629 style_id = style.get('id') or style.get(_x('xml:id'))
4630 if not style_id:
4631 continue
5b995f71
RA
4632 parent_style_id = style.get('style')
4633 if parent_style_id:
4634 if parent_style_id not in styles:
4635 repeat = True
4636 continue
4637 styles[style_id] = styles[parent_style_id].copy()
4638 for prop in SUPPORTED_STYLING:
4639 prop_val = style.get(_x('tts:' + prop))
4640 if prop_val:
4641 styles.setdefault(style_id, {})[prop] = prop_val
4642 if repeat:
4643 repeat = False
4644 else:
4645 break
4646
4647 for p in ('body', 'div'):
4648 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4649 if ele is None:
4650 continue
4651 style = styles.get(ele.get('style'))
4652 if not style:
4653 continue
4654 default_style.update(style)
4655
bf6427d2 4656 for para, index in zip(paras, itertools.count(1)):
d631d5f9 4657 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 4658 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
4659 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4660 if begin_time is None:
4661 continue
7dff0363 4662 if not end_time:
d631d5f9
YCH
4663 if not dur:
4664 continue
4665 end_time = begin_time + dur
bf6427d2
YCH
4666 out.append('%d\n%s --> %s\n%s\n\n' % (
4667 index,
c1c924ab
YCH
4668 srt_subtitles_timecode(begin_time),
4669 srt_subtitles_timecode(end_time),
bf6427d2
YCH
4670 parse_node(para)))
4671
4672 return ''.join(out)
4673
4674
66e289ba
S
4675def cli_option(params, command_option, param):
4676 param = params.get(param)
98e698f1
RA
4677 if param:
4678 param = compat_str(param)
66e289ba
S
4679 return [command_option, param] if param is not None else []
4680
4681
4682def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4683 param = params.get(param)
5b232f46
S
4684 if param is None:
4685 return []
66e289ba
S
4686 assert isinstance(param, bool)
4687 if separator:
4688 return [command_option + separator + (true_value if param else false_value)]
4689 return [command_option, true_value if param else false_value]
4690
4691
4692def cli_valueless_option(params, command_option, param, expected_value=True):
4693 param = params.get(param)
4694 return [command_option] if param == expected_value else []
4695
4696
e92caff5 4697def cli_configuration_args(argdict, keys, default=[], use_compat=True):
eab9b2bc 4698 if isinstance(argdict, (list, tuple)): # for backward compatibility
e92caff5 4699 if use_compat:
5b1ecbb3 4700 return argdict
4701 else:
4702 argdict = None
eab9b2bc 4703 if argdict is None:
5b1ecbb3 4704 return default
eab9b2bc 4705 assert isinstance(argdict, dict)
4706
e92caff5 4707 assert isinstance(keys, (list, tuple))
4708 for key_list in keys:
4709 if isinstance(key_list, compat_str):
4710 key_list = (key_list,)
4711 arg_list = list(filter(
4712 lambda x: x is not None,
4713 [argdict.get(key.lower()) for key in key_list]))
4714 if arg_list:
4715 return [arg for args in arg_list for arg in args]
4716 return default
66e289ba
S
4717
4718
39672624
YCH
4719class ISO639Utils(object):
4720 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4721 _lang_map = {
4722 'aa': 'aar',
4723 'ab': 'abk',
4724 'ae': 'ave',
4725 'af': 'afr',
4726 'ak': 'aka',
4727 'am': 'amh',
4728 'an': 'arg',
4729 'ar': 'ara',
4730 'as': 'asm',
4731 'av': 'ava',
4732 'ay': 'aym',
4733 'az': 'aze',
4734 'ba': 'bak',
4735 'be': 'bel',
4736 'bg': 'bul',
4737 'bh': 'bih',
4738 'bi': 'bis',
4739 'bm': 'bam',
4740 'bn': 'ben',
4741 'bo': 'bod',
4742 'br': 'bre',
4743 'bs': 'bos',
4744 'ca': 'cat',
4745 'ce': 'che',
4746 'ch': 'cha',
4747 'co': 'cos',
4748 'cr': 'cre',
4749 'cs': 'ces',
4750 'cu': 'chu',
4751 'cv': 'chv',
4752 'cy': 'cym',
4753 'da': 'dan',
4754 'de': 'deu',
4755 'dv': 'div',
4756 'dz': 'dzo',
4757 'ee': 'ewe',
4758 'el': 'ell',
4759 'en': 'eng',
4760 'eo': 'epo',
4761 'es': 'spa',
4762 'et': 'est',
4763 'eu': 'eus',
4764 'fa': 'fas',
4765 'ff': 'ful',
4766 'fi': 'fin',
4767 'fj': 'fij',
4768 'fo': 'fao',
4769 'fr': 'fra',
4770 'fy': 'fry',
4771 'ga': 'gle',
4772 'gd': 'gla',
4773 'gl': 'glg',
4774 'gn': 'grn',
4775 'gu': 'guj',
4776 'gv': 'glv',
4777 'ha': 'hau',
4778 'he': 'heb',
b7acc835 4779 'iw': 'heb', # Replaced by he in 1989 revision
39672624
YCH
4780 'hi': 'hin',
4781 'ho': 'hmo',
4782 'hr': 'hrv',
4783 'ht': 'hat',
4784 'hu': 'hun',
4785 'hy': 'hye',
4786 'hz': 'her',
4787 'ia': 'ina',
4788 'id': 'ind',
b7acc835 4789 'in': 'ind', # Replaced by id in 1989 revision
39672624
YCH
4790 'ie': 'ile',
4791 'ig': 'ibo',
4792 'ii': 'iii',
4793 'ik': 'ipk',
4794 'io': 'ido',
4795 'is': 'isl',
4796 'it': 'ita',
4797 'iu': 'iku',
4798 'ja': 'jpn',
4799 'jv': 'jav',
4800 'ka': 'kat',
4801 'kg': 'kon',
4802 'ki': 'kik',
4803 'kj': 'kua',
4804 'kk': 'kaz',
4805 'kl': 'kal',
4806 'km': 'khm',
4807 'kn': 'kan',
4808 'ko': 'kor',
4809 'kr': 'kau',
4810 'ks': 'kas',
4811 'ku': 'kur',
4812 'kv': 'kom',
4813 'kw': 'cor',
4814 'ky': 'kir',
4815 'la': 'lat',
4816 'lb': 'ltz',
4817 'lg': 'lug',
4818 'li': 'lim',
4819 'ln': 'lin',
4820 'lo': 'lao',
4821 'lt': 'lit',
4822 'lu': 'lub',
4823 'lv': 'lav',
4824 'mg': 'mlg',
4825 'mh': 'mah',
4826 'mi': 'mri',
4827 'mk': 'mkd',
4828 'ml': 'mal',
4829 'mn': 'mon',
4830 'mr': 'mar',
4831 'ms': 'msa',
4832 'mt': 'mlt',
4833 'my': 'mya',
4834 'na': 'nau',
4835 'nb': 'nob',
4836 'nd': 'nde',
4837 'ne': 'nep',
4838 'ng': 'ndo',
4839 'nl': 'nld',
4840 'nn': 'nno',
4841 'no': 'nor',
4842 'nr': 'nbl',
4843 'nv': 'nav',
4844 'ny': 'nya',
4845 'oc': 'oci',
4846 'oj': 'oji',
4847 'om': 'orm',
4848 'or': 'ori',
4849 'os': 'oss',
4850 'pa': 'pan',
4851 'pi': 'pli',
4852 'pl': 'pol',
4853 'ps': 'pus',
4854 'pt': 'por',
4855 'qu': 'que',
4856 'rm': 'roh',
4857 'rn': 'run',
4858 'ro': 'ron',
4859 'ru': 'rus',
4860 'rw': 'kin',
4861 'sa': 'san',
4862 'sc': 'srd',
4863 'sd': 'snd',
4864 'se': 'sme',
4865 'sg': 'sag',
4866 'si': 'sin',
4867 'sk': 'slk',
4868 'sl': 'slv',
4869 'sm': 'smo',
4870 'sn': 'sna',
4871 'so': 'som',
4872 'sq': 'sqi',
4873 'sr': 'srp',
4874 'ss': 'ssw',
4875 'st': 'sot',
4876 'su': 'sun',
4877 'sv': 'swe',
4878 'sw': 'swa',
4879 'ta': 'tam',
4880 'te': 'tel',
4881 'tg': 'tgk',
4882 'th': 'tha',
4883 'ti': 'tir',
4884 'tk': 'tuk',
4885 'tl': 'tgl',
4886 'tn': 'tsn',
4887 'to': 'ton',
4888 'tr': 'tur',
4889 'ts': 'tso',
4890 'tt': 'tat',
4891 'tw': 'twi',
4892 'ty': 'tah',
4893 'ug': 'uig',
4894 'uk': 'ukr',
4895 'ur': 'urd',
4896 'uz': 'uzb',
4897 've': 'ven',
4898 'vi': 'vie',
4899 'vo': 'vol',
4900 'wa': 'wln',
4901 'wo': 'wol',
4902 'xh': 'xho',
4903 'yi': 'yid',
e9a50fba 4904 'ji': 'yid', # Replaced by yi in 1989 revision
39672624
YCH
4905 'yo': 'yor',
4906 'za': 'zha',
4907 'zh': 'zho',
4908 'zu': 'zul',
4909 }
4910
4911 @classmethod
4912 def short2long(cls, code):
4913 """Convert language code from ISO 639-1 to ISO 639-2/T"""
4914 return cls._lang_map.get(code[:2])
4915
4916 @classmethod
4917 def long2short(cls, code):
4918 """Convert language code from ISO 639-2/T to ISO 639-1"""
4919 for short_name, long_name in cls._lang_map.items():
4920 if long_name == code:
4921 return short_name
4922
4923
4eb10f66
YCH
4924class ISO3166Utils(object):
4925 # From http://data.okfn.org/data/core/country-list
4926 _country_map = {
4927 'AF': 'Afghanistan',
4928 'AX': 'Åland Islands',
4929 'AL': 'Albania',
4930 'DZ': 'Algeria',
4931 'AS': 'American Samoa',
4932 'AD': 'Andorra',
4933 'AO': 'Angola',
4934 'AI': 'Anguilla',
4935 'AQ': 'Antarctica',
4936 'AG': 'Antigua and Barbuda',
4937 'AR': 'Argentina',
4938 'AM': 'Armenia',
4939 'AW': 'Aruba',
4940 'AU': 'Australia',
4941 'AT': 'Austria',
4942 'AZ': 'Azerbaijan',
4943 'BS': 'Bahamas',
4944 'BH': 'Bahrain',
4945 'BD': 'Bangladesh',
4946 'BB': 'Barbados',
4947 'BY': 'Belarus',
4948 'BE': 'Belgium',
4949 'BZ': 'Belize',
4950 'BJ': 'Benin',
4951 'BM': 'Bermuda',
4952 'BT': 'Bhutan',
4953 'BO': 'Bolivia, Plurinational State of',
4954 'BQ': 'Bonaire, Sint Eustatius and Saba',
4955 'BA': 'Bosnia and Herzegovina',
4956 'BW': 'Botswana',
4957 'BV': 'Bouvet Island',
4958 'BR': 'Brazil',
4959 'IO': 'British Indian Ocean Territory',
4960 'BN': 'Brunei Darussalam',
4961 'BG': 'Bulgaria',
4962 'BF': 'Burkina Faso',
4963 'BI': 'Burundi',
4964 'KH': 'Cambodia',
4965 'CM': 'Cameroon',
4966 'CA': 'Canada',
4967 'CV': 'Cape Verde',
4968 'KY': 'Cayman Islands',
4969 'CF': 'Central African Republic',
4970 'TD': 'Chad',
4971 'CL': 'Chile',
4972 'CN': 'China',
4973 'CX': 'Christmas Island',
4974 'CC': 'Cocos (Keeling) Islands',
4975 'CO': 'Colombia',
4976 'KM': 'Comoros',
4977 'CG': 'Congo',
4978 'CD': 'Congo, the Democratic Republic of the',
4979 'CK': 'Cook Islands',
4980 'CR': 'Costa Rica',
4981 'CI': 'Côte d\'Ivoire',
4982 'HR': 'Croatia',
4983 'CU': 'Cuba',
4984 'CW': 'Curaçao',
4985 'CY': 'Cyprus',
4986 'CZ': 'Czech Republic',
4987 'DK': 'Denmark',
4988 'DJ': 'Djibouti',
4989 'DM': 'Dominica',
4990 'DO': 'Dominican Republic',
4991 'EC': 'Ecuador',
4992 'EG': 'Egypt',
4993 'SV': 'El Salvador',
4994 'GQ': 'Equatorial Guinea',
4995 'ER': 'Eritrea',
4996 'EE': 'Estonia',
4997 'ET': 'Ethiopia',
4998 'FK': 'Falkland Islands (Malvinas)',
4999 'FO': 'Faroe Islands',
5000 'FJ': 'Fiji',
5001 'FI': 'Finland',
5002 'FR': 'France',
5003 'GF': 'French Guiana',
5004 'PF': 'French Polynesia',
5005 'TF': 'French Southern Territories',
5006 'GA': 'Gabon',
5007 'GM': 'Gambia',
5008 'GE': 'Georgia',
5009 'DE': 'Germany',
5010 'GH': 'Ghana',
5011 'GI': 'Gibraltar',
5012 'GR': 'Greece',
5013 'GL': 'Greenland',
5014 'GD': 'Grenada',
5015 'GP': 'Guadeloupe',
5016 'GU': 'Guam',
5017 'GT': 'Guatemala',
5018 'GG': 'Guernsey',
5019 'GN': 'Guinea',
5020 'GW': 'Guinea-Bissau',
5021 'GY': 'Guyana',
5022 'HT': 'Haiti',
5023 'HM': 'Heard Island and McDonald Islands',
5024 'VA': 'Holy See (Vatican City State)',
5025 'HN': 'Honduras',
5026 'HK': 'Hong Kong',
5027 'HU': 'Hungary',
5028 'IS': 'Iceland',
5029 'IN': 'India',
5030 'ID': 'Indonesia',
5031 'IR': 'Iran, Islamic Republic of',
5032 'IQ': 'Iraq',
5033 'IE': 'Ireland',
5034 'IM': 'Isle of Man',
5035 'IL': 'Israel',
5036 'IT': 'Italy',
5037 'JM': 'Jamaica',
5038 'JP': 'Japan',
5039 'JE': 'Jersey',
5040 'JO': 'Jordan',
5041 'KZ': 'Kazakhstan',
5042 'KE': 'Kenya',
5043 'KI': 'Kiribati',
5044 'KP': 'Korea, Democratic People\'s Republic of',
5045 'KR': 'Korea, Republic of',
5046 'KW': 'Kuwait',
5047 'KG': 'Kyrgyzstan',
5048 'LA': 'Lao People\'s Democratic Republic',
5049 'LV': 'Latvia',
5050 'LB': 'Lebanon',
5051 'LS': 'Lesotho',
5052 'LR': 'Liberia',
5053 'LY': 'Libya',
5054 'LI': 'Liechtenstein',
5055 'LT': 'Lithuania',
5056 'LU': 'Luxembourg',
5057 'MO': 'Macao',
5058 'MK': 'Macedonia, the Former Yugoslav Republic of',
5059 'MG': 'Madagascar',
5060 'MW': 'Malawi',
5061 'MY': 'Malaysia',
5062 'MV': 'Maldives',
5063 'ML': 'Mali',
5064 'MT': 'Malta',
5065 'MH': 'Marshall Islands',
5066 'MQ': 'Martinique',
5067 'MR': 'Mauritania',
5068 'MU': 'Mauritius',
5069 'YT': 'Mayotte',
5070 'MX': 'Mexico',
5071 'FM': 'Micronesia, Federated States of',
5072 'MD': 'Moldova, Republic of',
5073 'MC': 'Monaco',
5074 'MN': 'Mongolia',
5075 'ME': 'Montenegro',
5076 'MS': 'Montserrat',
5077 'MA': 'Morocco',
5078 'MZ': 'Mozambique',
5079 'MM': 'Myanmar',
5080 'NA': 'Namibia',
5081 'NR': 'Nauru',
5082 'NP': 'Nepal',
5083 'NL': 'Netherlands',
5084 'NC': 'New Caledonia',
5085 'NZ': 'New Zealand',
5086 'NI': 'Nicaragua',
5087 'NE': 'Niger',
5088 'NG': 'Nigeria',
5089 'NU': 'Niue',
5090 'NF': 'Norfolk Island',
5091 'MP': 'Northern Mariana Islands',
5092 'NO': 'Norway',
5093 'OM': 'Oman',
5094 'PK': 'Pakistan',
5095 'PW': 'Palau',
5096 'PS': 'Palestine, State of',
5097 'PA': 'Panama',
5098 'PG': 'Papua New Guinea',
5099 'PY': 'Paraguay',
5100 'PE': 'Peru',
5101 'PH': 'Philippines',
5102 'PN': 'Pitcairn',
5103 'PL': 'Poland',
5104 'PT': 'Portugal',
5105 'PR': 'Puerto Rico',
5106 'QA': 'Qatar',
5107 'RE': 'Réunion',
5108 'RO': 'Romania',
5109 'RU': 'Russian Federation',
5110 'RW': 'Rwanda',
5111 'BL': 'Saint Barthélemy',
5112 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5113 'KN': 'Saint Kitts and Nevis',
5114 'LC': 'Saint Lucia',
5115 'MF': 'Saint Martin (French part)',
5116 'PM': 'Saint Pierre and Miquelon',
5117 'VC': 'Saint Vincent and the Grenadines',
5118 'WS': 'Samoa',
5119 'SM': 'San Marino',
5120 'ST': 'Sao Tome and Principe',
5121 'SA': 'Saudi Arabia',
5122 'SN': 'Senegal',
5123 'RS': 'Serbia',
5124 'SC': 'Seychelles',
5125 'SL': 'Sierra Leone',
5126 'SG': 'Singapore',
5127 'SX': 'Sint Maarten (Dutch part)',
5128 'SK': 'Slovakia',
5129 'SI': 'Slovenia',
5130 'SB': 'Solomon Islands',
5131 'SO': 'Somalia',
5132 'ZA': 'South Africa',
5133 'GS': 'South Georgia and the South Sandwich Islands',
5134 'SS': 'South Sudan',
5135 'ES': 'Spain',
5136 'LK': 'Sri Lanka',
5137 'SD': 'Sudan',
5138 'SR': 'Suriname',
5139 'SJ': 'Svalbard and Jan Mayen',
5140 'SZ': 'Swaziland',
5141 'SE': 'Sweden',
5142 'CH': 'Switzerland',
5143 'SY': 'Syrian Arab Republic',
5144 'TW': 'Taiwan, Province of China',
5145 'TJ': 'Tajikistan',
5146 'TZ': 'Tanzania, United Republic of',
5147 'TH': 'Thailand',
5148 'TL': 'Timor-Leste',
5149 'TG': 'Togo',
5150 'TK': 'Tokelau',
5151 'TO': 'Tonga',
5152 'TT': 'Trinidad and Tobago',
5153 'TN': 'Tunisia',
5154 'TR': 'Turkey',
5155 'TM': 'Turkmenistan',
5156 'TC': 'Turks and Caicos Islands',
5157 'TV': 'Tuvalu',
5158 'UG': 'Uganda',
5159 'UA': 'Ukraine',
5160 'AE': 'United Arab Emirates',
5161 'GB': 'United Kingdom',
5162 'US': 'United States',
5163 'UM': 'United States Minor Outlying Islands',
5164 'UY': 'Uruguay',
5165 'UZ': 'Uzbekistan',
5166 'VU': 'Vanuatu',
5167 'VE': 'Venezuela, Bolivarian Republic of',
5168 'VN': 'Viet Nam',
5169 'VG': 'Virgin Islands, British',
5170 'VI': 'Virgin Islands, U.S.',
5171 'WF': 'Wallis and Futuna',
5172 'EH': 'Western Sahara',
5173 'YE': 'Yemen',
5174 'ZM': 'Zambia',
5175 'ZW': 'Zimbabwe',
5176 }
5177
5178 @classmethod
5179 def short2full(cls, code):
5180 """Convert an ISO 3166-2 country code to the corresponding full name"""
5181 return cls._country_map.get(code.upper())
5182
5183
773f291d
S
5184class GeoUtils(object):
5185 # Major IPv4 address blocks per country
5186 _country_ip_map = {
53896ca5 5187 'AD': '46.172.224.0/19',
773f291d
S
5188 'AE': '94.200.0.0/13',
5189 'AF': '149.54.0.0/17',
5190 'AG': '209.59.64.0/18',
5191 'AI': '204.14.248.0/21',
5192 'AL': '46.99.0.0/16',
5193 'AM': '46.70.0.0/15',
5194 'AO': '105.168.0.0/13',
53896ca5
S
5195 'AP': '182.50.184.0/21',
5196 'AQ': '23.154.160.0/24',
773f291d
S
5197 'AR': '181.0.0.0/12',
5198 'AS': '202.70.112.0/20',
53896ca5 5199 'AT': '77.116.0.0/14',
773f291d
S
5200 'AU': '1.128.0.0/11',
5201 'AW': '181.41.0.0/18',
53896ca5
S
5202 'AX': '185.217.4.0/22',
5203 'AZ': '5.197.0.0/16',
773f291d
S
5204 'BA': '31.176.128.0/17',
5205 'BB': '65.48.128.0/17',
5206 'BD': '114.130.0.0/16',
5207 'BE': '57.0.0.0/8',
53896ca5 5208 'BF': '102.178.0.0/15',
773f291d
S
5209 'BG': '95.42.0.0/15',
5210 'BH': '37.131.0.0/17',
5211 'BI': '154.117.192.0/18',
5212 'BJ': '137.255.0.0/16',
53896ca5 5213 'BL': '185.212.72.0/23',
773f291d
S
5214 'BM': '196.12.64.0/18',
5215 'BN': '156.31.0.0/16',
5216 'BO': '161.56.0.0/16',
5217 'BQ': '161.0.80.0/20',
53896ca5 5218 'BR': '191.128.0.0/12',
773f291d
S
5219 'BS': '24.51.64.0/18',
5220 'BT': '119.2.96.0/19',
5221 'BW': '168.167.0.0/16',
5222 'BY': '178.120.0.0/13',
5223 'BZ': '179.42.192.0/18',
5224 'CA': '99.224.0.0/11',
5225 'CD': '41.243.0.0/16',
53896ca5
S
5226 'CF': '197.242.176.0/21',
5227 'CG': '160.113.0.0/16',
773f291d 5228 'CH': '85.0.0.0/13',
53896ca5 5229 'CI': '102.136.0.0/14',
773f291d
S
5230 'CK': '202.65.32.0/19',
5231 'CL': '152.172.0.0/14',
53896ca5 5232 'CM': '102.244.0.0/14',
773f291d
S
5233 'CN': '36.128.0.0/10',
5234 'CO': '181.240.0.0/12',
5235 'CR': '201.192.0.0/12',
5236 'CU': '152.206.0.0/15',
5237 'CV': '165.90.96.0/19',
5238 'CW': '190.88.128.0/17',
53896ca5 5239 'CY': '31.153.0.0/16',
773f291d
S
5240 'CZ': '88.100.0.0/14',
5241 'DE': '53.0.0.0/8',
5242 'DJ': '197.241.0.0/17',
5243 'DK': '87.48.0.0/12',
5244 'DM': '192.243.48.0/20',
5245 'DO': '152.166.0.0/15',
5246 'DZ': '41.96.0.0/12',
5247 'EC': '186.68.0.0/15',
5248 'EE': '90.190.0.0/15',
5249 'EG': '156.160.0.0/11',
5250 'ER': '196.200.96.0/20',
5251 'ES': '88.0.0.0/11',
5252 'ET': '196.188.0.0/14',
5253 'EU': '2.16.0.0/13',
5254 'FI': '91.152.0.0/13',
5255 'FJ': '144.120.0.0/16',
53896ca5 5256 'FK': '80.73.208.0/21',
773f291d
S
5257 'FM': '119.252.112.0/20',
5258 'FO': '88.85.32.0/19',
5259 'FR': '90.0.0.0/9',
5260 'GA': '41.158.0.0/15',
5261 'GB': '25.0.0.0/8',
5262 'GD': '74.122.88.0/21',
5263 'GE': '31.146.0.0/16',
5264 'GF': '161.22.64.0/18',
5265 'GG': '62.68.160.0/19',
53896ca5
S
5266 'GH': '154.160.0.0/12',
5267 'GI': '95.164.0.0/16',
773f291d
S
5268 'GL': '88.83.0.0/19',
5269 'GM': '160.182.0.0/15',
5270 'GN': '197.149.192.0/18',
5271 'GP': '104.250.0.0/19',
5272 'GQ': '105.235.224.0/20',
5273 'GR': '94.64.0.0/13',
5274 'GT': '168.234.0.0/16',
5275 'GU': '168.123.0.0/16',
5276 'GW': '197.214.80.0/20',
5277 'GY': '181.41.64.0/18',
5278 'HK': '113.252.0.0/14',
5279 'HN': '181.210.0.0/16',
5280 'HR': '93.136.0.0/13',
5281 'HT': '148.102.128.0/17',
5282 'HU': '84.0.0.0/14',
5283 'ID': '39.192.0.0/10',
5284 'IE': '87.32.0.0/12',
5285 'IL': '79.176.0.0/13',
5286 'IM': '5.62.80.0/20',
5287 'IN': '117.192.0.0/10',
5288 'IO': '203.83.48.0/21',
5289 'IQ': '37.236.0.0/14',
5290 'IR': '2.176.0.0/12',
5291 'IS': '82.221.0.0/16',
5292 'IT': '79.0.0.0/10',
5293 'JE': '87.244.64.0/18',
5294 'JM': '72.27.0.0/17',
5295 'JO': '176.29.0.0/16',
53896ca5 5296 'JP': '133.0.0.0/8',
773f291d
S
5297 'KE': '105.48.0.0/12',
5298 'KG': '158.181.128.0/17',
5299 'KH': '36.37.128.0/17',
5300 'KI': '103.25.140.0/22',
5301 'KM': '197.255.224.0/20',
53896ca5 5302 'KN': '198.167.192.0/19',
773f291d
S
5303 'KP': '175.45.176.0/22',
5304 'KR': '175.192.0.0/10',
5305 'KW': '37.36.0.0/14',
5306 'KY': '64.96.0.0/15',
5307 'KZ': '2.72.0.0/13',
5308 'LA': '115.84.64.0/18',
5309 'LB': '178.135.0.0/16',
53896ca5 5310 'LC': '24.92.144.0/20',
773f291d
S
5311 'LI': '82.117.0.0/19',
5312 'LK': '112.134.0.0/15',
53896ca5 5313 'LR': '102.183.0.0/16',
773f291d
S
5314 'LS': '129.232.0.0/17',
5315 'LT': '78.56.0.0/13',
5316 'LU': '188.42.0.0/16',
5317 'LV': '46.109.0.0/16',
5318 'LY': '41.252.0.0/14',
5319 'MA': '105.128.0.0/11',
5320 'MC': '88.209.64.0/18',
5321 'MD': '37.246.0.0/16',
5322 'ME': '178.175.0.0/17',
5323 'MF': '74.112.232.0/21',
5324 'MG': '154.126.0.0/17',
5325 'MH': '117.103.88.0/21',
5326 'MK': '77.28.0.0/15',
5327 'ML': '154.118.128.0/18',
5328 'MM': '37.111.0.0/17',
5329 'MN': '49.0.128.0/17',
5330 'MO': '60.246.0.0/16',
5331 'MP': '202.88.64.0/20',
5332 'MQ': '109.203.224.0/19',
5333 'MR': '41.188.64.0/18',
5334 'MS': '208.90.112.0/22',
5335 'MT': '46.11.0.0/16',
5336 'MU': '105.16.0.0/12',
5337 'MV': '27.114.128.0/18',
53896ca5 5338 'MW': '102.70.0.0/15',
773f291d
S
5339 'MX': '187.192.0.0/11',
5340 'MY': '175.136.0.0/13',
5341 'MZ': '197.218.0.0/15',
5342 'NA': '41.182.0.0/16',
5343 'NC': '101.101.0.0/18',
5344 'NE': '197.214.0.0/18',
5345 'NF': '203.17.240.0/22',
5346 'NG': '105.112.0.0/12',
5347 'NI': '186.76.0.0/15',
5348 'NL': '145.96.0.0/11',
5349 'NO': '84.208.0.0/13',
5350 'NP': '36.252.0.0/15',
5351 'NR': '203.98.224.0/19',
5352 'NU': '49.156.48.0/22',
5353 'NZ': '49.224.0.0/14',
5354 'OM': '5.36.0.0/15',
5355 'PA': '186.72.0.0/15',
5356 'PE': '186.160.0.0/14',
5357 'PF': '123.50.64.0/18',
5358 'PG': '124.240.192.0/19',
5359 'PH': '49.144.0.0/13',
5360 'PK': '39.32.0.0/11',
5361 'PL': '83.0.0.0/11',
5362 'PM': '70.36.0.0/20',
5363 'PR': '66.50.0.0/16',
5364 'PS': '188.161.0.0/16',
5365 'PT': '85.240.0.0/13',
5366 'PW': '202.124.224.0/20',
5367 'PY': '181.120.0.0/14',
5368 'QA': '37.210.0.0/15',
53896ca5 5369 'RE': '102.35.0.0/16',
773f291d 5370 'RO': '79.112.0.0/13',
53896ca5 5371 'RS': '93.86.0.0/15',
773f291d 5372 'RU': '5.136.0.0/13',
53896ca5 5373 'RW': '41.186.0.0/16',
773f291d
S
5374 'SA': '188.48.0.0/13',
5375 'SB': '202.1.160.0/19',
5376 'SC': '154.192.0.0/11',
53896ca5 5377 'SD': '102.120.0.0/13',
773f291d 5378 'SE': '78.64.0.0/12',
53896ca5 5379 'SG': '8.128.0.0/10',
773f291d
S
5380 'SI': '188.196.0.0/14',
5381 'SK': '78.98.0.0/15',
53896ca5 5382 'SL': '102.143.0.0/17',
773f291d
S
5383 'SM': '89.186.32.0/19',
5384 'SN': '41.82.0.0/15',
53896ca5 5385 'SO': '154.115.192.0/18',
773f291d
S
5386 'SR': '186.179.128.0/17',
5387 'SS': '105.235.208.0/21',
5388 'ST': '197.159.160.0/19',
5389 'SV': '168.243.0.0/16',
5390 'SX': '190.102.0.0/20',
5391 'SY': '5.0.0.0/16',
5392 'SZ': '41.84.224.0/19',
5393 'TC': '65.255.48.0/20',
5394 'TD': '154.68.128.0/19',
5395 'TG': '196.168.0.0/14',
5396 'TH': '171.96.0.0/13',
5397 'TJ': '85.9.128.0/18',
5398 'TK': '27.96.24.0/21',
5399 'TL': '180.189.160.0/20',
5400 'TM': '95.85.96.0/19',
5401 'TN': '197.0.0.0/11',
5402 'TO': '175.176.144.0/21',
5403 'TR': '78.160.0.0/11',
5404 'TT': '186.44.0.0/15',
5405 'TV': '202.2.96.0/19',
5406 'TW': '120.96.0.0/11',
5407 'TZ': '156.156.0.0/14',
53896ca5
S
5408 'UA': '37.52.0.0/14',
5409 'UG': '102.80.0.0/13',
5410 'US': '6.0.0.0/8',
773f291d 5411 'UY': '167.56.0.0/13',
53896ca5 5412 'UZ': '84.54.64.0/18',
773f291d 5413 'VA': '212.77.0.0/19',
53896ca5 5414 'VC': '207.191.240.0/21',
773f291d 5415 'VE': '186.88.0.0/13',
53896ca5 5416 'VG': '66.81.192.0/20',
773f291d
S
5417 'VI': '146.226.0.0/16',
5418 'VN': '14.160.0.0/11',
5419 'VU': '202.80.32.0/20',
5420 'WF': '117.20.32.0/21',
5421 'WS': '202.4.32.0/19',
5422 'YE': '134.35.0.0/16',
5423 'YT': '41.242.116.0/22',
5424 'ZA': '41.0.0.0/11',
53896ca5
S
5425 'ZM': '102.144.0.0/13',
5426 'ZW': '102.177.192.0/18',
773f291d
S
5427 }
5428
5429 @classmethod
5f95927a
S
5430 def random_ipv4(cls, code_or_block):
5431 if len(code_or_block) == 2:
5432 block = cls._country_ip_map.get(code_or_block.upper())
5433 if not block:
5434 return None
5435 else:
5436 block = code_or_block
773f291d
S
5437 addr, preflen = block.split('/')
5438 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5439 addr_max = addr_min | (0xffffffff >> int(preflen))
18a0defa 5440 return compat_str(socket.inet_ntoa(
4248dad9 5441 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
773f291d
S
5442
5443
91410c9b 5444class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
5445 def __init__(self, proxies=None):
5446 # Set default handlers
5447 for type in ('http', 'https'):
5448 setattr(self, '%s_open' % type,
5449 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5450 meth(r, proxy, type))
38e87f6c 5451 compat_urllib_request.ProxyHandler.__init__(self, proxies)
2461f79d 5452
91410c9b 5453 def proxy_open(self, req, proxy, type):
2461f79d 5454 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
5455 if req_proxy is not None:
5456 proxy = req_proxy
2461f79d
PH
5457 del req.headers['Ytdl-request-proxy']
5458
5459 if proxy == '__noproxy__':
5460 return None # No Proxy
51fb4995 5461 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
71aff188 5462 req.add_header('Ytdl-socks-proxy', proxy)
7a5c1cfe 5463 # yt-dlp's http/https handlers do wrapping the socket with socks
71aff188 5464 return None
91410c9b
PH
5465 return compat_urllib_request.ProxyHandler.proxy_open(
5466 self, req, proxy, type)
5bc880b9
YCH
5467
5468
0a5445dd
YCH
5469# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5470# released into Public Domain
5471# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5472
5473def long_to_bytes(n, blocksize=0):
5474 """long_to_bytes(n:long, blocksize:int) : string
5475 Convert a long integer to a byte string.
5476
5477 If optional blocksize is given and greater than zero, pad the front of the
5478 byte string with binary zeros so that the length is a multiple of
5479 blocksize.
5480 """
5481 # after much testing, this algorithm was deemed to be the fastest
5482 s = b''
5483 n = int(n)
5484 while n > 0:
5485 s = compat_struct_pack('>I', n & 0xffffffff) + s
5486 n = n >> 32
5487 # strip off leading zeros
5488 for i in range(len(s)):
5489 if s[i] != b'\000'[0]:
5490 break
5491 else:
5492 # only happens when n == 0
5493 s = b'\000'
5494 i = 0
5495 s = s[i:]
5496 # add back some pad bytes. this could be done more efficiently w.r.t. the
5497 # de-padding being done above, but sigh...
5498 if blocksize > 0 and len(s) % blocksize:
5499 s = (blocksize - len(s) % blocksize) * b'\000' + s
5500 return s
5501
5502
5503def bytes_to_long(s):
5504 """bytes_to_long(string) : long
5505 Convert a byte string to a long integer.
5506
5507 This is (essentially) the inverse of long_to_bytes().
5508 """
5509 acc = 0
5510 length = len(s)
5511 if length % 4:
5512 extra = (4 - length % 4)
5513 s = b'\000' * extra + s
5514 length = length + extra
5515 for i in range(0, length, 4):
5516 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5517 return acc
5518
5519
5bc880b9
YCH
5520def ohdave_rsa_encrypt(data, exponent, modulus):
5521 '''
5522 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5523
5524 Input:
5525 data: data to encrypt, bytes-like object
5526 exponent, modulus: parameter e and N of RSA algorithm, both integer
5527 Output: hex string of encrypted data
5528
5529 Limitation: supports one block encryption only
5530 '''
5531
5532 payload = int(binascii.hexlify(data[::-1]), 16)
5533 encrypted = pow(payload, exponent, modulus)
5534 return '%x' % encrypted
81bdc8fd
YCH
5535
5536
f48409c7
YCH
5537def pkcs1pad(data, length):
5538 """
5539 Padding input data with PKCS#1 scheme
5540
5541 @param {int[]} data input data
5542 @param {int} length target length
5543 @returns {int[]} padded data
5544 """
5545 if len(data) > length - 11:
5546 raise ValueError('Input data too long for PKCS#1 padding')
5547
5548 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5549 return [0, 2] + pseudo_random + [0] + data
5550
5551
5eb6bdce 5552def encode_base_n(num, n, table=None):
59f898b7 5553 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
59f898b7
YCH
5554 if not table:
5555 table = FULL_TABLE[:n]
5556
5eb6bdce
YCH
5557 if n > len(table):
5558 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5559
5560 if num == 0:
5561 return table[0]
5562
81bdc8fd
YCH
5563 ret = ''
5564 while num:
5565 ret = table[num % n] + ret
5566 num = num // n
5567 return ret
f52354a8
YCH
5568
5569
5570def decode_packed_codes(code):
06b3fe29 5571 mobj = re.search(PACKED_CODES_RE, code)
a0566bbf 5572 obfuscated_code, base, count, symbols = mobj.groups()
f52354a8
YCH
5573 base = int(base)
5574 count = int(count)
5575 symbols = symbols.split('|')
5576 symbol_table = {}
5577
5578 while count:
5579 count -= 1
5eb6bdce 5580 base_n_count = encode_base_n(count, base)
f52354a8
YCH
5581 symbol_table[base_n_count] = symbols[count] or base_n_count
5582
5583 return re.sub(
5584 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
a0566bbf 5585 obfuscated_code)
e154c651 5586
5587
1ced2221
S
5588def caesar(s, alphabet, shift):
5589 if shift == 0:
5590 return s
5591 l = len(alphabet)
5592 return ''.join(
5593 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5594 for c in s)
5595
5596
5597def rot47(s):
5598 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5599
5600
e154c651 5601def parse_m3u8_attributes(attrib):
5602 info = {}
5603 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5604 if val.startswith('"'):
5605 val = val[1:-1]
5606 info[key] = val
5607 return info
1143535d
YCH
5608
5609
5610def urshift(val, n):
5611 return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038
YCH
5612
5613
5614# Based on png2str() written by @gdkchan and improved by @yokrysty
067aa17e 5615# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
d3f8e038
YCH
5616def decode_png(png_data):
5617 # Reference: https://www.w3.org/TR/PNG/
5618 header = png_data[8:]
5619
5620 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5621 raise IOError('Not a valid PNG file.')
5622
5623 int_map = {1: '>B', 2: '>H', 4: '>I'}
5624 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5625
5626 chunks = []
5627
5628 while header:
5629 length = unpack_integer(header[:4])
5630 header = header[4:]
5631
5632 chunk_type = header[:4]
5633 header = header[4:]
5634
5635 chunk_data = header[:length]
5636 header = header[length:]
5637
5638 header = header[4:] # Skip CRC
5639
5640 chunks.append({
5641 'type': chunk_type,
5642 'length': length,
5643 'data': chunk_data
5644 })
5645
5646 ihdr = chunks[0]['data']
5647
5648 width = unpack_integer(ihdr[:4])
5649 height = unpack_integer(ihdr[4:8])
5650
5651 idat = b''
5652
5653 for chunk in chunks:
5654 if chunk['type'] == b'IDAT':
5655 idat += chunk['data']
5656
5657 if not idat:
5658 raise IOError('Unable to read PNG data.')
5659
5660 decompressed_data = bytearray(zlib.decompress(idat))
5661
5662 stride = width * 3
5663 pixels = []
5664
5665 def _get_pixel(idx):
5666 x = idx % stride
5667 y = idx // stride
5668 return pixels[y][x]
5669
5670 for y in range(height):
5671 basePos = y * (1 + stride)
5672 filter_type = decompressed_data[basePos]
5673
5674 current_row = []
5675
5676 pixels.append(current_row)
5677
5678 for x in range(stride):
5679 color = decompressed_data[1 + basePos + x]
5680 basex = y * stride + x
5681 left = 0
5682 up = 0
5683
5684 if x > 2:
5685 left = _get_pixel(basex - 3)
5686 if y > 0:
5687 up = _get_pixel(basex - stride)
5688
5689 if filter_type == 1: # Sub
5690 color = (color + left) & 0xff
5691 elif filter_type == 2: # Up
5692 color = (color + up) & 0xff
5693 elif filter_type == 3: # Average
5694 color = (color + ((left + up) >> 1)) & 0xff
5695 elif filter_type == 4: # Paeth
5696 a = left
5697 b = up
5698 c = 0
5699
5700 if x > 2 and y > 0:
5701 c = _get_pixel(basex - stride - 3)
5702
5703 p = a + b - c
5704
5705 pa = abs(p - a)
5706 pb = abs(p - b)
5707 pc = abs(p - c)
5708
5709 if pa <= pb and pa <= pc:
5710 color = (color + a) & 0xff
5711 elif pb <= pc:
5712 color = (color + b) & 0xff
5713 else:
5714 color = (color + c) & 0xff
5715
5716 current_row.append(color)
5717
5718 return width, height, pixels
efa97bdc
YCH
5719
5720
5721def write_xattr(path, key, value):
5722 # This mess below finds the best xattr tool for the job
5723 try:
5724 # try the pyxattr module...
5725 import xattr
5726
53a7e3d2
YCH
5727 if hasattr(xattr, 'set'): # pyxattr
5728 # Unicode arguments are not supported in python-pyxattr until
5729 # version 0.5.0
067aa17e 5730 # See https://github.com/ytdl-org/youtube-dl/issues/5498
53a7e3d2
YCH
5731 pyxattr_required_version = '0.5.0'
5732 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5733 # TODO: fallback to CLI tools
5734 raise XAttrUnavailableError(
5735 'python-pyxattr is detected but is too old. '
7a5c1cfe 5736 'yt-dlp requires %s or above while your version is %s. '
53a7e3d2
YCH
5737 'Falling back to other xattr implementations' % (
5738 pyxattr_required_version, xattr.__version__))
5739
5740 setxattr = xattr.set
5741 else: # xattr
5742 setxattr = xattr.setxattr
efa97bdc
YCH
5743
5744 try:
53a7e3d2 5745 setxattr(path, key, value)
efa97bdc
YCH
5746 except EnvironmentError as e:
5747 raise XAttrMetadataError(e.errno, e.strerror)
5748
5749 except ImportError:
5750 if compat_os_name == 'nt':
5751 # Write xattrs to NTFS Alternate Data Streams:
5752 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5753 assert ':' not in key
5754 assert os.path.exists(path)
5755
5756 ads_fn = path + ':' + key
5757 try:
5758 with open(ads_fn, 'wb') as f:
5759 f.write(value)
5760 except EnvironmentError as e:
5761 raise XAttrMetadataError(e.errno, e.strerror)
5762 else:
5763 user_has_setfattr = check_executable('setfattr', ['--version'])
5764 user_has_xattr = check_executable('xattr', ['-h'])
5765
5766 if user_has_setfattr or user_has_xattr:
5767
5768 value = value.decode('utf-8')
5769 if user_has_setfattr:
5770 executable = 'setfattr'
5771 opts = ['-n', key, '-v', value]
5772 elif user_has_xattr:
5773 executable = 'xattr'
5774 opts = ['-w', key, value]
5775
3089bc74
S
5776 cmd = ([encodeFilename(executable, True)]
5777 + [encodeArgument(o) for o in opts]
5778 + [encodeFilename(path, True)])
efa97bdc
YCH
5779
5780 try:
5781 p = subprocess.Popen(
5782 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5783 except EnvironmentError as e:
5784 raise XAttrMetadataError(e.errno, e.strerror)
f5b1bca9 5785 stdout, stderr = process_communicate_or_kill(p)
efa97bdc
YCH
5786 stderr = stderr.decode('utf-8', 'replace')
5787 if p.returncode != 0:
5788 raise XAttrMetadataError(p.returncode, stderr)
5789
5790 else:
5791 # On Unix, and can't find pyxattr, setfattr, or xattr.
5792 if sys.platform.startswith('linux'):
5793 raise XAttrUnavailableError(
5794 "Couldn't find a tool to set the xattrs. "
5795 "Install either the python 'pyxattr' or 'xattr' "
5796 "modules, or the GNU 'attr' package "
5797 "(which contains the 'setfattr' tool).")
5798 else:
5799 raise XAttrUnavailableError(
5800 "Couldn't find a tool to set the xattrs. "
5801 "Install either the python 'xattr' module, "
5802 "or the 'xattr' binary.")
0c265486
YCH
5803
5804
5805def random_birthday(year_field, month_field, day_field):
aa374bc7
AS
5806 start_date = datetime.date(1950, 1, 1)
5807 end_date = datetime.date(1995, 12, 31)
5808 offset = random.randint(0, (end_date - start_date).days)
5809 random_date = start_date + datetime.timedelta(offset)
0c265486 5810 return {
aa374bc7
AS
5811 year_field: str(random_date.year),
5812 month_field: str(random_date.month),
5813 day_field: str(random_date.day),
0c265486 5814 }
732044af 5815
c76eb41b 5816
732044af 5817# Templates for internet shortcut files, which are plain text files.
5818DOT_URL_LINK_TEMPLATE = '''
5819[InternetShortcut]
5820URL=%(url)s
5821'''.lstrip()
5822
5823DOT_WEBLOC_LINK_TEMPLATE = '''
5824<?xml version="1.0" encoding="UTF-8"?>
5825<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5826<plist version="1.0">
5827<dict>
5828\t<key>URL</key>
5829\t<string>%(url)s</string>
5830</dict>
5831</plist>
5832'''.lstrip()
5833
5834DOT_DESKTOP_LINK_TEMPLATE = '''
5835[Desktop Entry]
5836Encoding=UTF-8
5837Name=%(filename)s
5838Type=Link
5839URL=%(url)s
5840Icon=text-html
5841'''.lstrip()
5842
5843
5844def iri_to_uri(iri):
5845 """
5846 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5847
5848 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5849 """
5850
5851 iri_parts = compat_urllib_parse_urlparse(iri)
5852
5853 if '[' in iri_parts.netloc:
5854 raise ValueError('IPv6 URIs are not, yet, supported.')
5855 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5856
5857 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5858
5859 net_location = ''
5860 if iri_parts.username:
5861 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
5862 if iri_parts.password is not None:
5863 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
5864 net_location += '@'
5865
5866 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
5867 # The 'idna' encoding produces ASCII text.
5868 if iri_parts.port is not None and iri_parts.port != 80:
5869 net_location += ':' + str(iri_parts.port)
5870
5871 return compat_urllib_parse_urlunparse(
5872 (iri_parts.scheme,
5873 net_location,
5874
5875 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5876
5877 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5878 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5879
5880 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5881 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5882
5883 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5884
5885 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5886
5887
5888def to_high_limit_path(path):
5889 if sys.platform in ['win32', 'cygwin']:
5890 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5891 return r'\\?\ '.rstrip() + os.path.abspath(path)
5892
5893 return path
76d321f6 5894
c76eb41b 5895
76d321f6 5896def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
5897 val = obj.get(field, default)
5898 if func and val not in ignore:
5899 val = func(val)
5900 return template % val if val not in ignore else default
00dd0cd5 5901
5902
5903def clean_podcast_url(url):
5904 return re.sub(r'''(?x)
5905 (?:
5906 (?:
5907 chtbl\.com/track|
5908 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5909 play\.podtrac\.com
5910 )/[^/]+|
5911 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5912 flex\.acast\.com|
5913 pd(?:
5914 cn\.co| # https://podcorn.com/analytics-prefix/
5915 st\.fm # https://podsights.com/docs/
5916 )/e
5917 )/''', '', url)
ffcb8191
THD
5918
5919
5920_HEX_TABLE = '0123456789abcdef'
5921
5922
5923def random_uuidv4():
5924 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
0202b52a 5925
5926
5927def make_dir(path, to_screen=None):
5928 try:
5929 dn = os.path.dirname(path)
5930 if dn and not os.path.exists(dn):
5931 os.makedirs(dn)
5932 return True
5933 except (OSError, IOError) as err:
5934 if callable(to_screen) is not None:
5935 to_screen('unable to create directory ' + error_to_compat_str(err))
5936 return False
f74980cb 5937
5938
5939def get_executable_path():
c552ae88 5940 from zipimport import zipimporter
5941 if hasattr(sys, 'frozen'): # Running from PyInstaller
5942 path = os.path.dirname(sys.executable)
5943 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
5944 path = os.path.join(os.path.dirname(__file__), '../..')
5945 else:
5946 path = os.path.join(os.path.dirname(__file__), '..')
f74980cb 5947 return os.path.abspath(path)
5948
5949
5950def load_plugins(name, type, namespace):
5951 plugin_info = [None]
5952 classes = []
5953 try:
5954 plugin_info = imp.find_module(
5955 name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
5956 plugins = imp.load_module(name, *plugin_info)
5957 for name in dir(plugins):
5958 if not name.endswith(type):
5959 continue
5960 klass = getattr(plugins, name)
5961 classes.append(klass)
5962 namespace[name] = klass
5963 except ImportError:
5964 pass
5965 finally:
5966 if plugin_info[0] is not None:
5967 plugin_info[0].close()
5968 return classes
06167fbb 5969
5970
5971def traverse_dict(dictn, keys, casesense=True):
5972 if not isinstance(dictn, dict):
5973 return None
5974 first_key = keys[0]
5975 if not casesense:
5976 dictn = {key.lower(): val for key, val in dictn.items()}
5977 first_key = first_key.lower()
5978 value = dictn.get(first_key, None)
5979 return value if len(keys) < 2 else traverse_dict(value, keys[1:], casesense)