]> jfr.im git - yt-dlp.git/blame - yt_dlp/utils.py
Write current epoch to infojson when using `--no-clean-infojson`
[yt-dlp.git] / yt_dlp / utils.py
CommitLineData
d77c3dfd 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
d77c3dfd 3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
c380cc28 10import collections
62e609ab 11import contextlib
e3946f98 12import ctypes
c496ca96
PH
13import datetime
14import email.utils
0c265486 15import email.header
f45c185f 16import errno
be4a824d 17import functools
d77c3dfd 18import gzip
f74980cb 19import imp
03f9daab 20import io
79a2e94e 21import itertools
f4bfd65f 22import json
d77c3dfd 23import locale
02dbf93f 24import math
347de493 25import operator
d77c3dfd 26import os
c496ca96 27import platform
773f291d 28import random
d77c3dfd 29import re
c496ca96 30import socket
79a2e94e 31import ssl
1c088fa8 32import subprocess
d77c3dfd 33import sys
181c8655 34import tempfile
c380cc28 35import time
01951dda 36import traceback
bcf89ce6 37import xml.etree.ElementTree
d77c3dfd 38import zlib
d77c3dfd 39
8c25f81b 40from .compat import (
b4a3d461 41 compat_HTMLParseError,
8bb56eee 42 compat_HTMLParser,
8f9312c3 43 compat_basestring,
8c25f81b 44 compat_chr,
1bab3437 45 compat_cookiejar,
d7cd9a9e 46 compat_ctypes_WINFUNCTYPE,
36e6f62c 47 compat_etree_fromstring,
51098426 48 compat_expanduser,
8c25f81b 49 compat_html_entities,
55b2f099 50 compat_html_entities_html5,
be4a824d 51 compat_http_client,
42db58ec 52 compat_integer_types,
e29663c6 53 compat_numeric_types,
c86b6142 54 compat_kwargs,
efa97bdc 55 compat_os_name,
8c25f81b 56 compat_parse_qs,
702ccf2d 57 compat_shlex_quote,
8c25f81b 58 compat_str,
edaa23f8 59 compat_struct_pack,
d3f8e038 60 compat_struct_unpack,
8c25f81b
PH
61 compat_urllib_error,
62 compat_urllib_parse,
15707c7e 63 compat_urllib_parse_urlencode,
8c25f81b 64 compat_urllib_parse_urlparse,
732044af 65 compat_urllib_parse_urlunparse,
66 compat_urllib_parse_quote,
67 compat_urllib_parse_quote_plus,
7581bfc9 68 compat_urllib_parse_unquote_plus,
8c25f81b
PH
69 compat_urllib_request,
70 compat_urlparse,
810c10ba 71 compat_xpath,
8c25f81b 72)
4644ac55 73
71aff188
YCH
74from .socks import (
75 ProxyType,
76 sockssocket,
77)
78
4644ac55 79
51fb4995
YCH
80def register_socks_protocols():
81 # "Register" SOCKS protocols
d5ae6bb5
YCH
82 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
83 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
51fb4995
YCH
84 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
85 if scheme not in compat_urlparse.uses_netloc:
86 compat_urlparse.uses_netloc.append(scheme)
87
88
468e2e92
FV
89# This is not clearly defined otherwise
90compiled_regex_type = type(re.compile(''))
91
f7a147e3
S
92
93def random_user_agent():
94 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
95 _CHROME_VERSIONS = (
96 '74.0.3729.129',
97 '76.0.3780.3',
98 '76.0.3780.2',
99 '74.0.3729.128',
100 '76.0.3780.1',
101 '76.0.3780.0',
102 '75.0.3770.15',
103 '74.0.3729.127',
104 '74.0.3729.126',
105 '76.0.3779.1',
106 '76.0.3779.0',
107 '75.0.3770.14',
108 '74.0.3729.125',
109 '76.0.3778.1',
110 '76.0.3778.0',
111 '75.0.3770.13',
112 '74.0.3729.124',
113 '74.0.3729.123',
114 '73.0.3683.121',
115 '76.0.3777.1',
116 '76.0.3777.0',
117 '75.0.3770.12',
118 '74.0.3729.122',
119 '76.0.3776.4',
120 '75.0.3770.11',
121 '74.0.3729.121',
122 '76.0.3776.3',
123 '76.0.3776.2',
124 '73.0.3683.120',
125 '74.0.3729.120',
126 '74.0.3729.119',
127 '74.0.3729.118',
128 '76.0.3776.1',
129 '76.0.3776.0',
130 '76.0.3775.5',
131 '75.0.3770.10',
132 '74.0.3729.117',
133 '76.0.3775.4',
134 '76.0.3775.3',
135 '74.0.3729.116',
136 '75.0.3770.9',
137 '76.0.3775.2',
138 '76.0.3775.1',
139 '76.0.3775.0',
140 '75.0.3770.8',
141 '74.0.3729.115',
142 '74.0.3729.114',
143 '76.0.3774.1',
144 '76.0.3774.0',
145 '75.0.3770.7',
146 '74.0.3729.113',
147 '74.0.3729.112',
148 '74.0.3729.111',
149 '76.0.3773.1',
150 '76.0.3773.0',
151 '75.0.3770.6',
152 '74.0.3729.110',
153 '74.0.3729.109',
154 '76.0.3772.1',
155 '76.0.3772.0',
156 '75.0.3770.5',
157 '74.0.3729.108',
158 '74.0.3729.107',
159 '76.0.3771.1',
160 '76.0.3771.0',
161 '75.0.3770.4',
162 '74.0.3729.106',
163 '74.0.3729.105',
164 '75.0.3770.3',
165 '74.0.3729.104',
166 '74.0.3729.103',
167 '74.0.3729.102',
168 '75.0.3770.2',
169 '74.0.3729.101',
170 '75.0.3770.1',
171 '75.0.3770.0',
172 '74.0.3729.100',
173 '75.0.3769.5',
174 '75.0.3769.4',
175 '74.0.3729.99',
176 '75.0.3769.3',
177 '75.0.3769.2',
178 '75.0.3768.6',
179 '74.0.3729.98',
180 '75.0.3769.1',
181 '75.0.3769.0',
182 '74.0.3729.97',
183 '73.0.3683.119',
184 '73.0.3683.118',
185 '74.0.3729.96',
186 '75.0.3768.5',
187 '75.0.3768.4',
188 '75.0.3768.3',
189 '75.0.3768.2',
190 '74.0.3729.95',
191 '74.0.3729.94',
192 '75.0.3768.1',
193 '75.0.3768.0',
194 '74.0.3729.93',
195 '74.0.3729.92',
196 '73.0.3683.117',
197 '74.0.3729.91',
198 '75.0.3766.3',
199 '74.0.3729.90',
200 '75.0.3767.2',
201 '75.0.3767.1',
202 '75.0.3767.0',
203 '74.0.3729.89',
204 '73.0.3683.116',
205 '75.0.3766.2',
206 '74.0.3729.88',
207 '75.0.3766.1',
208 '75.0.3766.0',
209 '74.0.3729.87',
210 '73.0.3683.115',
211 '74.0.3729.86',
212 '75.0.3765.1',
213 '75.0.3765.0',
214 '74.0.3729.85',
215 '73.0.3683.114',
216 '74.0.3729.84',
217 '75.0.3764.1',
218 '75.0.3764.0',
219 '74.0.3729.83',
220 '73.0.3683.113',
221 '75.0.3763.2',
222 '75.0.3761.4',
223 '74.0.3729.82',
224 '75.0.3763.1',
225 '75.0.3763.0',
226 '74.0.3729.81',
227 '73.0.3683.112',
228 '75.0.3762.1',
229 '75.0.3762.0',
230 '74.0.3729.80',
231 '75.0.3761.3',
232 '74.0.3729.79',
233 '73.0.3683.111',
234 '75.0.3761.2',
235 '74.0.3729.78',
236 '74.0.3729.77',
237 '75.0.3761.1',
238 '75.0.3761.0',
239 '73.0.3683.110',
240 '74.0.3729.76',
241 '74.0.3729.75',
242 '75.0.3760.0',
243 '74.0.3729.74',
244 '75.0.3759.8',
245 '75.0.3759.7',
246 '75.0.3759.6',
247 '74.0.3729.73',
248 '75.0.3759.5',
249 '74.0.3729.72',
250 '73.0.3683.109',
251 '75.0.3759.4',
252 '75.0.3759.3',
253 '74.0.3729.71',
254 '75.0.3759.2',
255 '74.0.3729.70',
256 '73.0.3683.108',
257 '74.0.3729.69',
258 '75.0.3759.1',
259 '75.0.3759.0',
260 '74.0.3729.68',
261 '73.0.3683.107',
262 '74.0.3729.67',
263 '75.0.3758.1',
264 '75.0.3758.0',
265 '74.0.3729.66',
266 '73.0.3683.106',
267 '74.0.3729.65',
268 '75.0.3757.1',
269 '75.0.3757.0',
270 '74.0.3729.64',
271 '73.0.3683.105',
272 '74.0.3729.63',
273 '75.0.3756.1',
274 '75.0.3756.0',
275 '74.0.3729.62',
276 '73.0.3683.104',
277 '75.0.3755.3',
278 '75.0.3755.2',
279 '73.0.3683.103',
280 '75.0.3755.1',
281 '75.0.3755.0',
282 '74.0.3729.61',
283 '73.0.3683.102',
284 '74.0.3729.60',
285 '75.0.3754.2',
286 '74.0.3729.59',
287 '75.0.3753.4',
288 '74.0.3729.58',
289 '75.0.3754.1',
290 '75.0.3754.0',
291 '74.0.3729.57',
292 '73.0.3683.101',
293 '75.0.3753.3',
294 '75.0.3752.2',
295 '75.0.3753.2',
296 '74.0.3729.56',
297 '75.0.3753.1',
298 '75.0.3753.0',
299 '74.0.3729.55',
300 '73.0.3683.100',
301 '74.0.3729.54',
302 '75.0.3752.1',
303 '75.0.3752.0',
304 '74.0.3729.53',
305 '73.0.3683.99',
306 '74.0.3729.52',
307 '75.0.3751.1',
308 '75.0.3751.0',
309 '74.0.3729.51',
310 '73.0.3683.98',
311 '74.0.3729.50',
312 '75.0.3750.0',
313 '74.0.3729.49',
314 '74.0.3729.48',
315 '74.0.3729.47',
316 '75.0.3749.3',
317 '74.0.3729.46',
318 '73.0.3683.97',
319 '75.0.3749.2',
320 '74.0.3729.45',
321 '75.0.3749.1',
322 '75.0.3749.0',
323 '74.0.3729.44',
324 '73.0.3683.96',
325 '74.0.3729.43',
326 '74.0.3729.42',
327 '75.0.3748.1',
328 '75.0.3748.0',
329 '74.0.3729.41',
330 '75.0.3747.1',
331 '73.0.3683.95',
332 '75.0.3746.4',
333 '74.0.3729.40',
334 '74.0.3729.39',
335 '75.0.3747.0',
336 '75.0.3746.3',
337 '75.0.3746.2',
338 '74.0.3729.38',
339 '75.0.3746.1',
340 '75.0.3746.0',
341 '74.0.3729.37',
342 '73.0.3683.94',
343 '75.0.3745.5',
344 '75.0.3745.4',
345 '75.0.3745.3',
346 '75.0.3745.2',
347 '74.0.3729.36',
348 '75.0.3745.1',
349 '75.0.3745.0',
350 '75.0.3744.2',
351 '74.0.3729.35',
352 '73.0.3683.93',
353 '74.0.3729.34',
354 '75.0.3744.1',
355 '75.0.3744.0',
356 '74.0.3729.33',
357 '73.0.3683.92',
358 '74.0.3729.32',
359 '74.0.3729.31',
360 '73.0.3683.91',
361 '75.0.3741.2',
362 '75.0.3740.5',
363 '74.0.3729.30',
364 '75.0.3741.1',
365 '75.0.3741.0',
366 '74.0.3729.29',
367 '75.0.3740.4',
368 '73.0.3683.90',
369 '74.0.3729.28',
370 '75.0.3740.3',
371 '73.0.3683.89',
372 '75.0.3740.2',
373 '74.0.3729.27',
374 '75.0.3740.1',
375 '75.0.3740.0',
376 '74.0.3729.26',
377 '73.0.3683.88',
378 '73.0.3683.87',
379 '74.0.3729.25',
380 '75.0.3739.1',
381 '75.0.3739.0',
382 '73.0.3683.86',
383 '74.0.3729.24',
384 '73.0.3683.85',
385 '75.0.3738.4',
386 '75.0.3738.3',
387 '75.0.3738.2',
388 '75.0.3738.1',
389 '75.0.3738.0',
390 '74.0.3729.23',
391 '73.0.3683.84',
392 '74.0.3729.22',
393 '74.0.3729.21',
394 '75.0.3737.1',
395 '75.0.3737.0',
396 '74.0.3729.20',
397 '73.0.3683.83',
398 '74.0.3729.19',
399 '75.0.3736.1',
400 '75.0.3736.0',
401 '74.0.3729.18',
402 '73.0.3683.82',
403 '74.0.3729.17',
404 '75.0.3735.1',
405 '75.0.3735.0',
406 '74.0.3729.16',
407 '73.0.3683.81',
408 '75.0.3734.1',
409 '75.0.3734.0',
410 '74.0.3729.15',
411 '73.0.3683.80',
412 '74.0.3729.14',
413 '75.0.3733.1',
414 '75.0.3733.0',
415 '75.0.3732.1',
416 '74.0.3729.13',
417 '74.0.3729.12',
418 '73.0.3683.79',
419 '74.0.3729.11',
420 '75.0.3732.0',
421 '74.0.3729.10',
422 '73.0.3683.78',
423 '74.0.3729.9',
424 '74.0.3729.8',
425 '74.0.3729.7',
426 '75.0.3731.3',
427 '75.0.3731.2',
428 '75.0.3731.0',
429 '74.0.3729.6',
430 '73.0.3683.77',
431 '73.0.3683.76',
432 '75.0.3730.5',
433 '75.0.3730.4',
434 '73.0.3683.75',
435 '74.0.3729.5',
436 '73.0.3683.74',
437 '75.0.3730.3',
438 '75.0.3730.2',
439 '74.0.3729.4',
440 '73.0.3683.73',
441 '73.0.3683.72',
442 '75.0.3730.1',
443 '75.0.3730.0',
444 '74.0.3729.3',
445 '73.0.3683.71',
446 '74.0.3729.2',
447 '73.0.3683.70',
448 '74.0.3729.1',
449 '74.0.3729.0',
450 '74.0.3726.4',
451 '73.0.3683.69',
452 '74.0.3726.3',
453 '74.0.3728.0',
454 '74.0.3726.2',
455 '73.0.3683.68',
456 '74.0.3726.1',
457 '74.0.3726.0',
458 '74.0.3725.4',
459 '73.0.3683.67',
460 '73.0.3683.66',
461 '74.0.3725.3',
462 '74.0.3725.2',
463 '74.0.3725.1',
464 '74.0.3724.8',
465 '74.0.3725.0',
466 '73.0.3683.65',
467 '74.0.3724.7',
468 '74.0.3724.6',
469 '74.0.3724.5',
470 '74.0.3724.4',
471 '74.0.3724.3',
472 '74.0.3724.2',
473 '74.0.3724.1',
474 '74.0.3724.0',
475 '73.0.3683.64',
476 '74.0.3723.1',
477 '74.0.3723.0',
478 '73.0.3683.63',
479 '74.0.3722.1',
480 '74.0.3722.0',
481 '73.0.3683.62',
482 '74.0.3718.9',
483 '74.0.3702.3',
484 '74.0.3721.3',
485 '74.0.3721.2',
486 '74.0.3721.1',
487 '74.0.3721.0',
488 '74.0.3720.6',
489 '73.0.3683.61',
490 '72.0.3626.122',
491 '73.0.3683.60',
492 '74.0.3720.5',
493 '72.0.3626.121',
494 '74.0.3718.8',
495 '74.0.3720.4',
496 '74.0.3720.3',
497 '74.0.3718.7',
498 '74.0.3720.2',
499 '74.0.3720.1',
500 '74.0.3720.0',
501 '74.0.3718.6',
502 '74.0.3719.5',
503 '73.0.3683.59',
504 '74.0.3718.5',
505 '74.0.3718.4',
506 '74.0.3719.4',
507 '74.0.3719.3',
508 '74.0.3719.2',
509 '74.0.3719.1',
510 '73.0.3683.58',
511 '74.0.3719.0',
512 '73.0.3683.57',
513 '73.0.3683.56',
514 '74.0.3718.3',
515 '73.0.3683.55',
516 '74.0.3718.2',
517 '74.0.3718.1',
518 '74.0.3718.0',
519 '73.0.3683.54',
520 '74.0.3717.2',
521 '73.0.3683.53',
522 '74.0.3717.1',
523 '74.0.3717.0',
524 '73.0.3683.52',
525 '74.0.3716.1',
526 '74.0.3716.0',
527 '73.0.3683.51',
528 '74.0.3715.1',
529 '74.0.3715.0',
530 '73.0.3683.50',
531 '74.0.3711.2',
532 '74.0.3714.2',
533 '74.0.3713.3',
534 '74.0.3714.1',
535 '74.0.3714.0',
536 '73.0.3683.49',
537 '74.0.3713.1',
538 '74.0.3713.0',
539 '72.0.3626.120',
540 '73.0.3683.48',
541 '74.0.3712.2',
542 '74.0.3712.1',
543 '74.0.3712.0',
544 '73.0.3683.47',
545 '72.0.3626.119',
546 '73.0.3683.46',
547 '74.0.3710.2',
548 '72.0.3626.118',
549 '74.0.3711.1',
550 '74.0.3711.0',
551 '73.0.3683.45',
552 '72.0.3626.117',
553 '74.0.3710.1',
554 '74.0.3710.0',
555 '73.0.3683.44',
556 '72.0.3626.116',
557 '74.0.3709.1',
558 '74.0.3709.0',
559 '74.0.3704.9',
560 '73.0.3683.43',
561 '72.0.3626.115',
562 '74.0.3704.8',
563 '74.0.3704.7',
564 '74.0.3708.0',
565 '74.0.3706.7',
566 '74.0.3704.6',
567 '73.0.3683.42',
568 '72.0.3626.114',
569 '74.0.3706.6',
570 '72.0.3626.113',
571 '74.0.3704.5',
572 '74.0.3706.5',
573 '74.0.3706.4',
574 '74.0.3706.3',
575 '74.0.3706.2',
576 '74.0.3706.1',
577 '74.0.3706.0',
578 '73.0.3683.41',
579 '72.0.3626.112',
580 '74.0.3705.1',
581 '74.0.3705.0',
582 '73.0.3683.40',
583 '72.0.3626.111',
584 '73.0.3683.39',
585 '74.0.3704.4',
586 '73.0.3683.38',
587 '74.0.3704.3',
588 '74.0.3704.2',
589 '74.0.3704.1',
590 '74.0.3704.0',
591 '73.0.3683.37',
592 '72.0.3626.110',
593 '72.0.3626.109',
594 '74.0.3703.3',
595 '74.0.3703.2',
596 '73.0.3683.36',
597 '74.0.3703.1',
598 '74.0.3703.0',
599 '73.0.3683.35',
600 '72.0.3626.108',
601 '74.0.3702.2',
602 '74.0.3699.3',
603 '74.0.3702.1',
604 '74.0.3702.0',
605 '73.0.3683.34',
606 '72.0.3626.107',
607 '73.0.3683.33',
608 '74.0.3701.1',
609 '74.0.3701.0',
610 '73.0.3683.32',
611 '73.0.3683.31',
612 '72.0.3626.105',
613 '74.0.3700.1',
614 '74.0.3700.0',
615 '73.0.3683.29',
616 '72.0.3626.103',
617 '74.0.3699.2',
618 '74.0.3699.1',
619 '74.0.3699.0',
620 '73.0.3683.28',
621 '72.0.3626.102',
622 '73.0.3683.27',
623 '73.0.3683.26',
624 '74.0.3698.0',
625 '74.0.3696.2',
626 '72.0.3626.101',
627 '73.0.3683.25',
628 '74.0.3696.1',
629 '74.0.3696.0',
630 '74.0.3694.8',
631 '72.0.3626.100',
632 '74.0.3694.7',
633 '74.0.3694.6',
634 '74.0.3694.5',
635 '74.0.3694.4',
636 '72.0.3626.99',
637 '72.0.3626.98',
638 '74.0.3694.3',
639 '73.0.3683.24',
640 '72.0.3626.97',
641 '72.0.3626.96',
642 '72.0.3626.95',
643 '73.0.3683.23',
644 '72.0.3626.94',
645 '73.0.3683.22',
646 '73.0.3683.21',
647 '72.0.3626.93',
648 '74.0.3694.2',
649 '72.0.3626.92',
650 '74.0.3694.1',
651 '74.0.3694.0',
652 '74.0.3693.6',
653 '73.0.3683.20',
654 '72.0.3626.91',
655 '74.0.3693.5',
656 '74.0.3693.4',
657 '74.0.3693.3',
658 '74.0.3693.2',
659 '73.0.3683.19',
660 '74.0.3693.1',
661 '74.0.3693.0',
662 '73.0.3683.18',
663 '72.0.3626.90',
664 '74.0.3692.1',
665 '74.0.3692.0',
666 '73.0.3683.17',
667 '72.0.3626.89',
668 '74.0.3687.3',
669 '74.0.3691.1',
670 '74.0.3691.0',
671 '73.0.3683.16',
672 '72.0.3626.88',
673 '72.0.3626.87',
674 '73.0.3683.15',
675 '74.0.3690.1',
676 '74.0.3690.0',
677 '73.0.3683.14',
678 '72.0.3626.86',
679 '73.0.3683.13',
680 '73.0.3683.12',
681 '74.0.3689.1',
682 '74.0.3689.0',
683 '73.0.3683.11',
684 '72.0.3626.85',
685 '73.0.3683.10',
686 '72.0.3626.84',
687 '73.0.3683.9',
688 '74.0.3688.1',
689 '74.0.3688.0',
690 '73.0.3683.8',
691 '72.0.3626.83',
692 '74.0.3687.2',
693 '74.0.3687.1',
694 '74.0.3687.0',
695 '73.0.3683.7',
696 '72.0.3626.82',
697 '74.0.3686.4',
698 '72.0.3626.81',
699 '74.0.3686.3',
700 '74.0.3686.2',
701 '74.0.3686.1',
702 '74.0.3686.0',
703 '73.0.3683.6',
704 '72.0.3626.80',
705 '74.0.3685.1',
706 '74.0.3685.0',
707 '73.0.3683.5',
708 '72.0.3626.79',
709 '74.0.3684.1',
710 '74.0.3684.0',
711 '73.0.3683.4',
712 '72.0.3626.78',
713 '72.0.3626.77',
714 '73.0.3683.3',
715 '73.0.3683.2',
716 '72.0.3626.76',
717 '73.0.3683.1',
718 '73.0.3683.0',
719 '72.0.3626.75',
720 '71.0.3578.141',
721 '73.0.3682.1',
722 '73.0.3682.0',
723 '72.0.3626.74',
724 '71.0.3578.140',
725 '73.0.3681.4',
726 '73.0.3681.3',
727 '73.0.3681.2',
728 '73.0.3681.1',
729 '73.0.3681.0',
730 '72.0.3626.73',
731 '71.0.3578.139',
732 '72.0.3626.72',
733 '72.0.3626.71',
734 '73.0.3680.1',
735 '73.0.3680.0',
736 '72.0.3626.70',
737 '71.0.3578.138',
738 '73.0.3678.2',
739 '73.0.3679.1',
740 '73.0.3679.0',
741 '72.0.3626.69',
742 '71.0.3578.137',
743 '73.0.3678.1',
744 '73.0.3678.0',
745 '71.0.3578.136',
746 '73.0.3677.1',
747 '73.0.3677.0',
748 '72.0.3626.68',
749 '72.0.3626.67',
750 '71.0.3578.135',
751 '73.0.3676.1',
752 '73.0.3676.0',
753 '73.0.3674.2',
754 '72.0.3626.66',
755 '71.0.3578.134',
756 '73.0.3674.1',
757 '73.0.3674.0',
758 '72.0.3626.65',
759 '71.0.3578.133',
760 '73.0.3673.2',
761 '73.0.3673.1',
762 '73.0.3673.0',
763 '72.0.3626.64',
764 '71.0.3578.132',
765 '72.0.3626.63',
766 '72.0.3626.62',
767 '72.0.3626.61',
768 '72.0.3626.60',
769 '73.0.3672.1',
770 '73.0.3672.0',
771 '72.0.3626.59',
772 '71.0.3578.131',
773 '73.0.3671.3',
774 '73.0.3671.2',
775 '73.0.3671.1',
776 '73.0.3671.0',
777 '72.0.3626.58',
778 '71.0.3578.130',
779 '73.0.3670.1',
780 '73.0.3670.0',
781 '72.0.3626.57',
782 '71.0.3578.129',
783 '73.0.3669.1',
784 '73.0.3669.0',
785 '72.0.3626.56',
786 '71.0.3578.128',
787 '73.0.3668.2',
788 '73.0.3668.1',
789 '73.0.3668.0',
790 '72.0.3626.55',
791 '71.0.3578.127',
792 '73.0.3667.2',
793 '73.0.3667.1',
794 '73.0.3667.0',
795 '72.0.3626.54',
796 '71.0.3578.126',
797 '73.0.3666.1',
798 '73.0.3666.0',
799 '72.0.3626.53',
800 '71.0.3578.125',
801 '73.0.3665.4',
802 '73.0.3665.3',
803 '72.0.3626.52',
804 '73.0.3665.2',
805 '73.0.3664.4',
806 '73.0.3665.1',
807 '73.0.3665.0',
808 '72.0.3626.51',
809 '71.0.3578.124',
810 '72.0.3626.50',
811 '73.0.3664.3',
812 '73.0.3664.2',
813 '73.0.3664.1',
814 '73.0.3664.0',
815 '73.0.3663.2',
816 '72.0.3626.49',
817 '71.0.3578.123',
818 '73.0.3663.1',
819 '73.0.3663.0',
820 '72.0.3626.48',
821 '71.0.3578.122',
822 '73.0.3662.1',
823 '73.0.3662.0',
824 '72.0.3626.47',
825 '71.0.3578.121',
826 '73.0.3661.1',
827 '72.0.3626.46',
828 '73.0.3661.0',
829 '72.0.3626.45',
830 '71.0.3578.120',
831 '73.0.3660.2',
832 '73.0.3660.1',
833 '73.0.3660.0',
834 '72.0.3626.44',
835 '71.0.3578.119',
836 '73.0.3659.1',
837 '73.0.3659.0',
838 '72.0.3626.43',
839 '71.0.3578.118',
840 '73.0.3658.1',
841 '73.0.3658.0',
842 '72.0.3626.42',
843 '71.0.3578.117',
844 '73.0.3657.1',
845 '73.0.3657.0',
846 '72.0.3626.41',
847 '71.0.3578.116',
848 '73.0.3656.1',
849 '73.0.3656.0',
850 '72.0.3626.40',
851 '71.0.3578.115',
852 '73.0.3655.1',
853 '73.0.3655.0',
854 '72.0.3626.39',
855 '71.0.3578.114',
856 '73.0.3654.1',
857 '73.0.3654.0',
858 '72.0.3626.38',
859 '71.0.3578.113',
860 '73.0.3653.1',
861 '73.0.3653.0',
862 '72.0.3626.37',
863 '71.0.3578.112',
864 '73.0.3652.1',
865 '73.0.3652.0',
866 '72.0.3626.36',
867 '71.0.3578.111',
868 '73.0.3651.1',
869 '73.0.3651.0',
870 '72.0.3626.35',
871 '71.0.3578.110',
872 '73.0.3650.1',
873 '73.0.3650.0',
874 '72.0.3626.34',
875 '71.0.3578.109',
876 '73.0.3649.1',
877 '73.0.3649.0',
878 '72.0.3626.33',
879 '71.0.3578.108',
880 '73.0.3648.2',
881 '73.0.3648.1',
882 '73.0.3648.0',
883 '72.0.3626.32',
884 '71.0.3578.107',
885 '73.0.3647.2',
886 '73.0.3647.1',
887 '73.0.3647.0',
888 '72.0.3626.31',
889 '71.0.3578.106',
890 '73.0.3635.3',
891 '73.0.3646.2',
892 '73.0.3646.1',
893 '73.0.3646.0',
894 '72.0.3626.30',
895 '71.0.3578.105',
896 '72.0.3626.29',
897 '73.0.3645.2',
898 '73.0.3645.1',
899 '73.0.3645.0',
900 '72.0.3626.28',
901 '71.0.3578.104',
902 '72.0.3626.27',
903 '72.0.3626.26',
904 '72.0.3626.25',
905 '72.0.3626.24',
906 '73.0.3644.0',
907 '73.0.3643.2',
908 '72.0.3626.23',
909 '71.0.3578.103',
910 '73.0.3643.1',
911 '73.0.3643.0',
912 '72.0.3626.22',
913 '71.0.3578.102',
914 '73.0.3642.1',
915 '73.0.3642.0',
916 '72.0.3626.21',
917 '71.0.3578.101',
918 '73.0.3641.1',
919 '73.0.3641.0',
920 '72.0.3626.20',
921 '71.0.3578.100',
922 '72.0.3626.19',
923 '73.0.3640.1',
924 '73.0.3640.0',
925 '72.0.3626.18',
926 '73.0.3639.1',
927 '71.0.3578.99',
928 '73.0.3639.0',
929 '72.0.3626.17',
930 '73.0.3638.2',
931 '72.0.3626.16',
932 '73.0.3638.1',
933 '73.0.3638.0',
934 '72.0.3626.15',
935 '71.0.3578.98',
936 '73.0.3635.2',
937 '71.0.3578.97',
938 '73.0.3637.1',
939 '73.0.3637.0',
940 '72.0.3626.14',
941 '71.0.3578.96',
942 '71.0.3578.95',
943 '72.0.3626.13',
944 '71.0.3578.94',
945 '73.0.3636.2',
946 '71.0.3578.93',
947 '73.0.3636.1',
948 '73.0.3636.0',
949 '72.0.3626.12',
950 '71.0.3578.92',
951 '73.0.3635.1',
952 '73.0.3635.0',
953 '72.0.3626.11',
954 '71.0.3578.91',
955 '73.0.3634.2',
956 '73.0.3634.1',
957 '73.0.3634.0',
958 '72.0.3626.10',
959 '71.0.3578.90',
960 '71.0.3578.89',
961 '73.0.3633.2',
962 '73.0.3633.1',
963 '73.0.3633.0',
964 '72.0.3610.4',
965 '72.0.3626.9',
966 '71.0.3578.88',
967 '73.0.3632.5',
968 '73.0.3632.4',
969 '73.0.3632.3',
970 '73.0.3632.2',
971 '73.0.3632.1',
972 '73.0.3632.0',
973 '72.0.3626.8',
974 '71.0.3578.87',
975 '73.0.3631.2',
976 '73.0.3631.1',
977 '73.0.3631.0',
978 '72.0.3626.7',
979 '71.0.3578.86',
980 '72.0.3626.6',
981 '73.0.3630.1',
982 '73.0.3630.0',
983 '72.0.3626.5',
984 '71.0.3578.85',
985 '72.0.3626.4',
986 '73.0.3628.3',
987 '73.0.3628.2',
988 '73.0.3629.1',
989 '73.0.3629.0',
990 '72.0.3626.3',
991 '71.0.3578.84',
992 '73.0.3628.1',
993 '73.0.3628.0',
994 '71.0.3578.83',
995 '73.0.3627.1',
996 '73.0.3627.0',
997 '72.0.3626.2',
998 '71.0.3578.82',
999 '71.0.3578.81',
1000 '71.0.3578.80',
1001 '72.0.3626.1',
1002 '72.0.3626.0',
1003 '71.0.3578.79',
1004 '70.0.3538.124',
1005 '71.0.3578.78',
1006 '72.0.3623.4',
1007 '72.0.3625.2',
1008 '72.0.3625.1',
1009 '72.0.3625.0',
1010 '71.0.3578.77',
1011 '70.0.3538.123',
1012 '72.0.3624.4',
1013 '72.0.3624.3',
1014 '72.0.3624.2',
1015 '71.0.3578.76',
1016 '72.0.3624.1',
1017 '72.0.3624.0',
1018 '72.0.3623.3',
1019 '71.0.3578.75',
1020 '70.0.3538.122',
1021 '71.0.3578.74',
1022 '72.0.3623.2',
1023 '72.0.3610.3',
1024 '72.0.3623.1',
1025 '72.0.3623.0',
1026 '72.0.3622.3',
1027 '72.0.3622.2',
1028 '71.0.3578.73',
1029 '70.0.3538.121',
1030 '72.0.3622.1',
1031 '72.0.3622.0',
1032 '71.0.3578.72',
1033 '70.0.3538.120',
1034 '72.0.3621.1',
1035 '72.0.3621.0',
1036 '71.0.3578.71',
1037 '70.0.3538.119',
1038 '72.0.3620.1',
1039 '72.0.3620.0',
1040 '71.0.3578.70',
1041 '70.0.3538.118',
1042 '71.0.3578.69',
1043 '72.0.3619.1',
1044 '72.0.3619.0',
1045 '71.0.3578.68',
1046 '70.0.3538.117',
1047 '71.0.3578.67',
1048 '72.0.3618.1',
1049 '72.0.3618.0',
1050 '71.0.3578.66',
1051 '70.0.3538.116',
1052 '72.0.3617.1',
1053 '72.0.3617.0',
1054 '71.0.3578.65',
1055 '70.0.3538.115',
1056 '72.0.3602.3',
1057 '71.0.3578.64',
1058 '72.0.3616.1',
1059 '72.0.3616.0',
1060 '71.0.3578.63',
1061 '70.0.3538.114',
1062 '71.0.3578.62',
1063 '72.0.3615.1',
1064 '72.0.3615.0',
1065 '71.0.3578.61',
1066 '70.0.3538.113',
1067 '72.0.3614.1',
1068 '72.0.3614.0',
1069 '71.0.3578.60',
1070 '70.0.3538.112',
1071 '72.0.3613.1',
1072 '72.0.3613.0',
1073 '71.0.3578.59',
1074 '70.0.3538.111',
1075 '72.0.3612.2',
1076 '72.0.3612.1',
1077 '72.0.3612.0',
1078 '70.0.3538.110',
1079 '71.0.3578.58',
1080 '70.0.3538.109',
1081 '72.0.3611.2',
1082 '72.0.3611.1',
1083 '72.0.3611.0',
1084 '71.0.3578.57',
1085 '70.0.3538.108',
1086 '72.0.3610.2',
1087 '71.0.3578.56',
1088 '71.0.3578.55',
1089 '72.0.3610.1',
1090 '72.0.3610.0',
1091 '71.0.3578.54',
1092 '70.0.3538.107',
1093 '71.0.3578.53',
1094 '72.0.3609.3',
1095 '71.0.3578.52',
1096 '72.0.3609.2',
1097 '71.0.3578.51',
1098 '72.0.3608.5',
1099 '72.0.3609.1',
1100 '72.0.3609.0',
1101 '71.0.3578.50',
1102 '70.0.3538.106',
1103 '72.0.3608.4',
1104 '72.0.3608.3',
1105 '72.0.3608.2',
1106 '71.0.3578.49',
1107 '72.0.3608.1',
1108 '72.0.3608.0',
1109 '70.0.3538.105',
1110 '71.0.3578.48',
1111 '72.0.3607.1',
1112 '72.0.3607.0',
1113 '71.0.3578.47',
1114 '70.0.3538.104',
1115 '72.0.3606.2',
1116 '72.0.3606.1',
1117 '72.0.3606.0',
1118 '71.0.3578.46',
1119 '70.0.3538.103',
1120 '70.0.3538.102',
1121 '72.0.3605.3',
1122 '72.0.3605.2',
1123 '72.0.3605.1',
1124 '72.0.3605.0',
1125 '71.0.3578.45',
1126 '70.0.3538.101',
1127 '71.0.3578.44',
1128 '71.0.3578.43',
1129 '70.0.3538.100',
1130 '70.0.3538.99',
1131 '71.0.3578.42',
1132 '72.0.3604.1',
1133 '72.0.3604.0',
1134 '71.0.3578.41',
1135 '70.0.3538.98',
1136 '71.0.3578.40',
1137 '72.0.3603.2',
1138 '72.0.3603.1',
1139 '72.0.3603.0',
1140 '71.0.3578.39',
1141 '70.0.3538.97',
1142 '72.0.3602.2',
1143 '71.0.3578.38',
1144 '71.0.3578.37',
1145 '72.0.3602.1',
1146 '72.0.3602.0',
1147 '71.0.3578.36',
1148 '70.0.3538.96',
1149 '72.0.3601.1',
1150 '72.0.3601.0',
1151 '71.0.3578.35',
1152 '70.0.3538.95',
1153 '72.0.3600.1',
1154 '72.0.3600.0',
1155 '71.0.3578.34',
1156 '70.0.3538.94',
1157 '72.0.3599.3',
1158 '72.0.3599.2',
1159 '72.0.3599.1',
1160 '72.0.3599.0',
1161 '71.0.3578.33',
1162 '70.0.3538.93',
1163 '72.0.3598.1',
1164 '72.0.3598.0',
1165 '71.0.3578.32',
1166 '70.0.3538.87',
1167 '72.0.3597.1',
1168 '72.0.3597.0',
1169 '72.0.3596.2',
1170 '71.0.3578.31',
1171 '70.0.3538.86',
1172 '71.0.3578.30',
1173 '71.0.3578.29',
1174 '72.0.3596.1',
1175 '72.0.3596.0',
1176 '71.0.3578.28',
1177 '70.0.3538.85',
1178 '72.0.3595.2',
1179 '72.0.3591.3',
1180 '72.0.3595.1',
1181 '72.0.3595.0',
1182 '71.0.3578.27',
1183 '70.0.3538.84',
1184 '72.0.3594.1',
1185 '72.0.3594.0',
1186 '71.0.3578.26',
1187 '70.0.3538.83',
1188 '72.0.3593.2',
1189 '72.0.3593.1',
1190 '72.0.3593.0',
1191 '71.0.3578.25',
1192 '70.0.3538.82',
1193 '72.0.3589.3',
1194 '72.0.3592.2',
1195 '72.0.3592.1',
1196 '72.0.3592.0',
1197 '71.0.3578.24',
1198 '72.0.3589.2',
1199 '70.0.3538.81',
1200 '70.0.3538.80',
1201 '72.0.3591.2',
1202 '72.0.3591.1',
1203 '72.0.3591.0',
1204 '71.0.3578.23',
1205 '70.0.3538.79',
1206 '71.0.3578.22',
1207 '72.0.3590.1',
1208 '72.0.3590.0',
1209 '71.0.3578.21',
1210 '70.0.3538.78',
1211 '70.0.3538.77',
1212 '72.0.3589.1',
1213 '72.0.3589.0',
1214 '71.0.3578.20',
1215 '70.0.3538.76',
1216 '71.0.3578.19',
1217 '70.0.3538.75',
1218 '72.0.3588.1',
1219 '72.0.3588.0',
1220 '71.0.3578.18',
1221 '70.0.3538.74',
1222 '72.0.3586.2',
1223 '72.0.3587.0',
1224 '71.0.3578.17',
1225 '70.0.3538.73',
1226 '72.0.3586.1',
1227 '72.0.3586.0',
1228 '71.0.3578.16',
1229 '70.0.3538.72',
1230 '72.0.3585.1',
1231 '72.0.3585.0',
1232 '71.0.3578.15',
1233 '70.0.3538.71',
1234 '71.0.3578.14',
1235 '72.0.3584.1',
1236 '72.0.3584.0',
1237 '71.0.3578.13',
1238 '70.0.3538.70',
1239 '72.0.3583.2',
1240 '71.0.3578.12',
1241 '72.0.3583.1',
1242 '72.0.3583.0',
1243 '71.0.3578.11',
1244 '70.0.3538.69',
1245 '71.0.3578.10',
1246 '72.0.3582.0',
1247 '72.0.3581.4',
1248 '71.0.3578.9',
1249 '70.0.3538.67',
1250 '72.0.3581.3',
1251 '72.0.3581.2',
1252 '72.0.3581.1',
1253 '72.0.3581.0',
1254 '71.0.3578.8',
1255 '70.0.3538.66',
1256 '72.0.3580.1',
1257 '72.0.3580.0',
1258 '71.0.3578.7',
1259 '70.0.3538.65',
1260 '71.0.3578.6',
1261 '72.0.3579.1',
1262 '72.0.3579.0',
1263 '71.0.3578.5',
1264 '70.0.3538.64',
1265 '71.0.3578.4',
1266 '71.0.3578.3',
1267 '71.0.3578.2',
1268 '71.0.3578.1',
1269 '71.0.3578.0',
1270 '70.0.3538.63',
1271 '69.0.3497.128',
1272 '70.0.3538.62',
1273 '70.0.3538.61',
1274 '70.0.3538.60',
1275 '70.0.3538.59',
1276 '71.0.3577.1',
1277 '71.0.3577.0',
1278 '70.0.3538.58',
1279 '69.0.3497.127',
1280 '71.0.3576.2',
1281 '71.0.3576.1',
1282 '71.0.3576.0',
1283 '70.0.3538.57',
1284 '70.0.3538.56',
1285 '71.0.3575.2',
1286 '70.0.3538.55',
1287 '69.0.3497.126',
1288 '70.0.3538.54',
1289 '71.0.3575.1',
1290 '71.0.3575.0',
1291 '71.0.3574.1',
1292 '71.0.3574.0',
1293 '70.0.3538.53',
1294 '69.0.3497.125',
1295 '70.0.3538.52',
1296 '71.0.3573.1',
1297 '71.0.3573.0',
1298 '70.0.3538.51',
1299 '69.0.3497.124',
1300 '71.0.3572.1',
1301 '71.0.3572.0',
1302 '70.0.3538.50',
1303 '69.0.3497.123',
1304 '71.0.3571.2',
1305 '70.0.3538.49',
1306 '69.0.3497.122',
1307 '71.0.3571.1',
1308 '71.0.3571.0',
1309 '70.0.3538.48',
1310 '69.0.3497.121',
1311 '71.0.3570.1',
1312 '71.0.3570.0',
1313 '70.0.3538.47',
1314 '69.0.3497.120',
1315 '71.0.3568.2',
1316 '71.0.3569.1',
1317 '71.0.3569.0',
1318 '70.0.3538.46',
1319 '69.0.3497.119',
1320 '70.0.3538.45',
1321 '71.0.3568.1',
1322 '71.0.3568.0',
1323 '70.0.3538.44',
1324 '69.0.3497.118',
1325 '70.0.3538.43',
1326 '70.0.3538.42',
1327 '71.0.3567.1',
1328 '71.0.3567.0',
1329 '70.0.3538.41',
1330 '69.0.3497.117',
1331 '71.0.3566.1',
1332 '71.0.3566.0',
1333 '70.0.3538.40',
1334 '69.0.3497.116',
1335 '71.0.3565.1',
1336 '71.0.3565.0',
1337 '70.0.3538.39',
1338 '69.0.3497.115',
1339 '71.0.3564.1',
1340 '71.0.3564.0',
1341 '70.0.3538.38',
1342 '69.0.3497.114',
1343 '71.0.3563.0',
1344 '71.0.3562.2',
1345 '70.0.3538.37',
1346 '69.0.3497.113',
1347 '70.0.3538.36',
1348 '70.0.3538.35',
1349 '71.0.3562.1',
1350 '71.0.3562.0',
1351 '70.0.3538.34',
1352 '69.0.3497.112',
1353 '70.0.3538.33',
1354 '71.0.3561.1',
1355 '71.0.3561.0',
1356 '70.0.3538.32',
1357 '69.0.3497.111',
1358 '71.0.3559.6',
1359 '71.0.3560.1',
1360 '71.0.3560.0',
1361 '71.0.3559.5',
1362 '71.0.3559.4',
1363 '70.0.3538.31',
1364 '69.0.3497.110',
1365 '71.0.3559.3',
1366 '70.0.3538.30',
1367 '69.0.3497.109',
1368 '71.0.3559.2',
1369 '71.0.3559.1',
1370 '71.0.3559.0',
1371 '70.0.3538.29',
1372 '69.0.3497.108',
1373 '71.0.3558.2',
1374 '71.0.3558.1',
1375 '71.0.3558.0',
1376 '70.0.3538.28',
1377 '69.0.3497.107',
1378 '71.0.3557.2',
1379 '71.0.3557.1',
1380 '71.0.3557.0',
1381 '70.0.3538.27',
1382 '69.0.3497.106',
1383 '71.0.3554.4',
1384 '70.0.3538.26',
1385 '71.0.3556.1',
1386 '71.0.3556.0',
1387 '70.0.3538.25',
1388 '71.0.3554.3',
1389 '69.0.3497.105',
1390 '71.0.3554.2',
1391 '70.0.3538.24',
1392 '69.0.3497.104',
1393 '71.0.3555.2',
1394 '70.0.3538.23',
1395 '71.0.3555.1',
1396 '71.0.3555.0',
1397 '70.0.3538.22',
1398 '69.0.3497.103',
1399 '71.0.3554.1',
1400 '71.0.3554.0',
1401 '70.0.3538.21',
1402 '69.0.3497.102',
1403 '71.0.3553.3',
1404 '70.0.3538.20',
1405 '69.0.3497.101',
1406 '71.0.3553.2',
1407 '69.0.3497.100',
1408 '71.0.3553.1',
1409 '71.0.3553.0',
1410 '70.0.3538.19',
1411 '69.0.3497.99',
1412 '69.0.3497.98',
1413 '69.0.3497.97',
1414 '71.0.3552.6',
1415 '71.0.3552.5',
1416 '71.0.3552.4',
1417 '71.0.3552.3',
1418 '71.0.3552.2',
1419 '71.0.3552.1',
1420 '71.0.3552.0',
1421 '70.0.3538.18',
1422 '69.0.3497.96',
1423 '71.0.3551.3',
1424 '71.0.3551.2',
1425 '71.0.3551.1',
1426 '71.0.3551.0',
1427 '70.0.3538.17',
1428 '69.0.3497.95',
1429 '71.0.3550.3',
1430 '71.0.3550.2',
1431 '71.0.3550.1',
1432 '71.0.3550.0',
1433 '70.0.3538.16',
1434 '69.0.3497.94',
1435 '71.0.3549.1',
1436 '71.0.3549.0',
1437 '70.0.3538.15',
1438 '69.0.3497.93',
1439 '69.0.3497.92',
1440 '71.0.3548.1',
1441 '71.0.3548.0',
1442 '70.0.3538.14',
1443 '69.0.3497.91',
1444 '71.0.3547.1',
1445 '71.0.3547.0',
1446 '70.0.3538.13',
1447 '69.0.3497.90',
1448 '71.0.3546.2',
1449 '69.0.3497.89',
1450 '71.0.3546.1',
1451 '71.0.3546.0',
1452 '70.0.3538.12',
1453 '69.0.3497.88',
1454 '71.0.3545.4',
1455 '71.0.3545.3',
1456 '71.0.3545.2',
1457 '71.0.3545.1',
1458 '71.0.3545.0',
1459 '70.0.3538.11',
1460 '69.0.3497.87',
1461 '71.0.3544.5',
1462 '71.0.3544.4',
1463 '71.0.3544.3',
1464 '71.0.3544.2',
1465 '71.0.3544.1',
1466 '71.0.3544.0',
1467 '69.0.3497.86',
1468 '70.0.3538.10',
1469 '69.0.3497.85',
1470 '70.0.3538.9',
1471 '69.0.3497.84',
1472 '71.0.3543.4',
1473 '70.0.3538.8',
1474 '71.0.3543.3',
1475 '71.0.3543.2',
1476 '71.0.3543.1',
1477 '71.0.3543.0',
1478 '70.0.3538.7',
1479 '69.0.3497.83',
1480 '71.0.3542.2',
1481 '71.0.3542.1',
1482 '71.0.3542.0',
1483 '70.0.3538.6',
1484 '69.0.3497.82',
1485 '69.0.3497.81',
1486 '71.0.3541.1',
1487 '71.0.3541.0',
1488 '70.0.3538.5',
1489 '69.0.3497.80',
1490 '71.0.3540.1',
1491 '71.0.3540.0',
1492 '70.0.3538.4',
1493 '69.0.3497.79',
1494 '70.0.3538.3',
1495 '71.0.3539.1',
1496 '71.0.3539.0',
1497 '69.0.3497.78',
1498 '68.0.3440.134',
1499 '69.0.3497.77',
1500 '70.0.3538.2',
1501 '70.0.3538.1',
1502 '70.0.3538.0',
1503 '69.0.3497.76',
1504 '68.0.3440.133',
1505 '69.0.3497.75',
1506 '70.0.3537.2',
1507 '70.0.3537.1',
1508 '70.0.3537.0',
1509 '69.0.3497.74',
1510 '68.0.3440.132',
1511 '70.0.3536.0',
1512 '70.0.3535.5',
1513 '70.0.3535.4',
1514 '70.0.3535.3',
1515 '69.0.3497.73',
1516 '68.0.3440.131',
1517 '70.0.3532.8',
1518 '70.0.3532.7',
1519 '69.0.3497.72',
1520 '69.0.3497.71',
1521 '70.0.3535.2',
1522 '70.0.3535.1',
1523 '70.0.3535.0',
1524 '69.0.3497.70',
1525 '68.0.3440.130',
1526 '69.0.3497.69',
1527 '68.0.3440.129',
1528 '70.0.3534.4',
1529 '70.0.3534.3',
1530 '70.0.3534.2',
1531 '70.0.3534.1',
1532 '70.0.3534.0',
1533 '69.0.3497.68',
1534 '68.0.3440.128',
1535 '70.0.3533.2',
1536 '70.0.3533.1',
1537 '70.0.3533.0',
1538 '69.0.3497.67',
1539 '68.0.3440.127',
1540 '70.0.3532.6',
1541 '70.0.3532.5',
1542 '70.0.3532.4',
1543 '69.0.3497.66',
1544 '68.0.3440.126',
1545 '70.0.3532.3',
1546 '70.0.3532.2',
1547 '70.0.3532.1',
1548 '69.0.3497.60',
1549 '69.0.3497.65',
1550 '69.0.3497.64',
1551 '70.0.3532.0',
1552 '70.0.3531.0',
1553 '70.0.3530.4',
1554 '70.0.3530.3',
1555 '70.0.3530.2',
1556 '69.0.3497.58',
1557 '68.0.3440.125',
1558 '69.0.3497.57',
1559 '69.0.3497.56',
1560 '69.0.3497.55',
1561 '69.0.3497.54',
1562 '70.0.3530.1',
1563 '70.0.3530.0',
1564 '69.0.3497.53',
1565 '68.0.3440.124',
1566 '69.0.3497.52',
1567 '70.0.3529.3',
1568 '70.0.3529.2',
1569 '70.0.3529.1',
1570 '70.0.3529.0',
1571 '69.0.3497.51',
1572 '70.0.3528.4',
1573 '68.0.3440.123',
1574 '70.0.3528.3',
1575 '70.0.3528.2',
1576 '70.0.3528.1',
1577 '70.0.3528.0',
1578 '69.0.3497.50',
1579 '68.0.3440.122',
1580 '70.0.3527.1',
1581 '70.0.3527.0',
1582 '69.0.3497.49',
1583 '68.0.3440.121',
1584 '70.0.3526.1',
1585 '70.0.3526.0',
1586 '68.0.3440.120',
1587 '69.0.3497.48',
1588 '69.0.3497.47',
1589 '68.0.3440.119',
1590 '68.0.3440.118',
1591 '70.0.3525.5',
1592 '70.0.3525.4',
1593 '70.0.3525.3',
1594 '68.0.3440.117',
1595 '69.0.3497.46',
1596 '70.0.3525.2',
1597 '70.0.3525.1',
1598 '70.0.3525.0',
1599 '69.0.3497.45',
1600 '68.0.3440.116',
1601 '70.0.3524.4',
1602 '70.0.3524.3',
1603 '69.0.3497.44',
1604 '70.0.3524.2',
1605 '70.0.3524.1',
1606 '70.0.3524.0',
1607 '70.0.3523.2',
1608 '69.0.3497.43',
1609 '68.0.3440.115',
1610 '70.0.3505.9',
1611 '69.0.3497.42',
1612 '70.0.3505.8',
1613 '70.0.3523.1',
1614 '70.0.3523.0',
1615 '69.0.3497.41',
1616 '68.0.3440.114',
1617 '70.0.3505.7',
1618 '69.0.3497.40',
1619 '70.0.3522.1',
1620 '70.0.3522.0',
1621 '70.0.3521.2',
1622 '69.0.3497.39',
1623 '68.0.3440.113',
1624 '70.0.3505.6',
1625 '70.0.3521.1',
1626 '70.0.3521.0',
1627 '69.0.3497.38',
1628 '68.0.3440.112',
1629 '70.0.3520.1',
1630 '70.0.3520.0',
1631 '69.0.3497.37',
1632 '68.0.3440.111',
1633 '70.0.3519.3',
1634 '70.0.3519.2',
1635 '70.0.3519.1',
1636 '70.0.3519.0',
1637 '69.0.3497.36',
1638 '68.0.3440.110',
1639 '70.0.3518.1',
1640 '70.0.3518.0',
1641 '69.0.3497.35',
1642 '69.0.3497.34',
1643 '68.0.3440.109',
1644 '70.0.3517.1',
1645 '70.0.3517.0',
1646 '69.0.3497.33',
1647 '68.0.3440.108',
1648 '69.0.3497.32',
1649 '70.0.3516.3',
1650 '70.0.3516.2',
1651 '70.0.3516.1',
1652 '70.0.3516.0',
1653 '69.0.3497.31',
1654 '68.0.3440.107',
1655 '70.0.3515.4',
1656 '68.0.3440.106',
1657 '70.0.3515.3',
1658 '70.0.3515.2',
1659 '70.0.3515.1',
1660 '70.0.3515.0',
1661 '69.0.3497.30',
1662 '68.0.3440.105',
1663 '68.0.3440.104',
1664 '70.0.3514.2',
1665 '70.0.3514.1',
1666 '70.0.3514.0',
1667 '69.0.3497.29',
1668 '68.0.3440.103',
1669 '70.0.3513.1',
1670 '70.0.3513.0',
1671 '69.0.3497.28',
1672 )
1673 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1674
1675
3e669f36 1676std_headers = {
f7a147e3 1677 'User-Agent': random_user_agent(),
59ae15a5
PH
1678 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1679 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1680 'Accept-Encoding': 'gzip, deflate',
1681 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 1682}
f427df17 1683
5f6a1245 1684
fb37eb25
S
1685USER_AGENTS = {
1686 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1687}
1688
1689
bf42a990
S
1690NO_DEFAULT = object()
1691
7105440c
YCH
1692ENGLISH_MONTH_NAMES = [
1693 'January', 'February', 'March', 'April', 'May', 'June',
1694 'July', 'August', 'September', 'October', 'November', 'December']
1695
f6717dec
S
1696MONTH_NAMES = {
1697 'en': ENGLISH_MONTH_NAMES,
1698 'fr': [
3e4185c3
S
1699 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1700 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
f6717dec 1701}
a942d6cb 1702
a7aaa398
S
1703KNOWN_EXTENSIONS = (
1704 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1705 'flv', 'f4v', 'f4a', 'f4b',
1706 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1707 'mkv', 'mka', 'mk3d',
1708 'avi', 'divx',
1709 'mov',
1710 'asf', 'wmv', 'wma',
1711 '3gp', '3g2',
1712 'mp3',
1713 'flac',
1714 'ape',
1715 'wav',
1716 'f4f', 'f4m', 'm3u8', 'smil')
1717
df692c5a 1718REMUX_EXTENSIONS = ('mp4', 'mkv', 'flv', 'webm', 'mov', 'avi', 'mp3', 'mka', 'm4a', 'ogg', 'opus')
1719
c587cbb7 1720# needed for sanitizing filenames in restricted mode
c8827027 1721ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
fd35d8cd
JW
1722 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1723 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
c587cbb7 1724
46f59e89
S
1725DATE_FORMATS = (
1726 '%d %B %Y',
1727 '%d %b %Y',
1728 '%B %d %Y',
cb655f34
S
1729 '%B %dst %Y',
1730 '%B %dnd %Y',
9d30c213 1731 '%B %drd %Y',
cb655f34 1732 '%B %dth %Y',
46f59e89 1733 '%b %d %Y',
cb655f34
S
1734 '%b %dst %Y',
1735 '%b %dnd %Y',
9d30c213 1736 '%b %drd %Y',
cb655f34 1737 '%b %dth %Y',
46f59e89
S
1738 '%b %dst %Y %I:%M',
1739 '%b %dnd %Y %I:%M',
9d30c213 1740 '%b %drd %Y %I:%M',
46f59e89
S
1741 '%b %dth %Y %I:%M',
1742 '%Y %m %d',
1743 '%Y-%m-%d',
1744 '%Y/%m/%d',
81c13222 1745 '%Y/%m/%d %H:%M',
46f59e89 1746 '%Y/%m/%d %H:%M:%S',
0c1c6f4b 1747 '%Y-%m-%d %H:%M',
46f59e89
S
1748 '%Y-%m-%d %H:%M:%S',
1749 '%Y-%m-%d %H:%M:%S.%f',
1750 '%d.%m.%Y %H:%M',
1751 '%d.%m.%Y %H.%M',
1752 '%Y-%m-%dT%H:%M:%SZ',
1753 '%Y-%m-%dT%H:%M:%S.%fZ',
1754 '%Y-%m-%dT%H:%M:%S.%f0Z',
1755 '%Y-%m-%dT%H:%M:%S',
1756 '%Y-%m-%dT%H:%M:%S.%f',
1757 '%Y-%m-%dT%H:%M',
c6eed6b8
S
1758 '%b %d %Y at %H:%M',
1759 '%b %d %Y at %H:%M:%S',
b555ae9b
S
1760 '%B %d %Y at %H:%M',
1761 '%B %d %Y at %H:%M:%S',
46f59e89
S
1762)
1763
1764DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1765DATE_FORMATS_DAY_FIRST.extend([
1766 '%d-%m-%Y',
1767 '%d.%m.%Y',
1768 '%d.%m.%y',
1769 '%d/%m/%Y',
1770 '%d/%m/%y',
1771 '%d/%m/%Y %H:%M:%S',
1772])
1773
1774DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1775DATE_FORMATS_MONTH_FIRST.extend([
1776 '%m-%d-%Y',
1777 '%m.%d.%Y',
1778 '%m/%d/%Y',
1779 '%m/%d/%y',
1780 '%m/%d/%Y %H:%M:%S',
1781])
1782
06b3fe29 1783PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
22f5f5c6 1784JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
06b3fe29 1785
7105440c 1786
d77c3dfd 1787def preferredencoding():
59ae15a5 1788 """Get preferred encoding.
d77c3dfd 1789
59ae15a5
PH
1790 Returns the best encoding scheme for the system, based on
1791 locale.getpreferredencoding() and some further tweaks.
1792 """
1793 try:
1794 pref = locale.getpreferredencoding()
28e614de 1795 'TEST'.encode(pref)
70a1165b 1796 except Exception:
59ae15a5 1797 pref = 'UTF-8'
bae611f2 1798
59ae15a5 1799 return pref
d77c3dfd 1800
f4bfd65f 1801
181c8655 1802def write_json_file(obj, fn):
1394646a 1803 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 1804
92120217 1805 fn = encodeFilename(fn)
61ee5aeb 1806 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
1807 encoding = get_filesystem_encoding()
1808 # os.path.basename returns a bytes object, but NamedTemporaryFile
1809 # will fail if the filename contains non ascii characters unless we
1810 # use a unicode object
1811 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1812 # the same for os.path.dirname
1813 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1814 else:
1815 path_basename = os.path.basename
1816 path_dirname = os.path.dirname
1817
73159f99
S
1818 args = {
1819 'suffix': '.tmp',
ec5f6016
JMF
1820 'prefix': path_basename(fn) + '.',
1821 'dir': path_dirname(fn),
73159f99
S
1822 'delete': False,
1823 }
1824
181c8655
PH
1825 # In Python 2.x, json.dump expects a bytestream.
1826 # In Python 3.x, it writes to a character stream
1827 if sys.version_info < (3, 0):
73159f99 1828 args['mode'] = 'wb'
181c8655 1829 else:
73159f99
S
1830 args.update({
1831 'mode': 'w',
1832 'encoding': 'utf-8',
1833 })
1834
c86b6142 1835 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
1836
1837 try:
1838 with tf:
75d43ca0 1839 json.dump(obj, tf, default=repr)
1394646a
IK
1840 if sys.platform == 'win32':
1841 # Need to remove existing file on Windows, else os.rename raises
1842 # WindowsError or FileExistsError.
1843 try:
1844 os.unlink(fn)
1845 except OSError:
1846 pass
9cd5f54e
R
1847 try:
1848 mask = os.umask(0)
1849 os.umask(mask)
1850 os.chmod(tf.name, 0o666 & ~mask)
1851 except OSError:
1852 pass
181c8655 1853 os.rename(tf.name, fn)
70a1165b 1854 except Exception:
181c8655
PH
1855 try:
1856 os.remove(tf.name)
1857 except OSError:
1858 pass
1859 raise
1860
1861
1862if sys.version_info >= (2, 7):
ee114368 1863 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 1864 """ Find the xpath xpath[@key=val] """
5d2354f1 1865 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368 1866 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
1867 return node.find(expr)
1868else:
ee114368 1869 def find_xpath_attr(node, xpath, key, val=None):
810c10ba 1870 for f in node.findall(compat_xpath(xpath)):
ee114368
S
1871 if key not in f.attrib:
1872 continue
1873 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
1874 return f
1875 return None
1876
d7e66d39
JMF
1877# On python2.6 the xml.etree.ElementTree.Element methods don't support
1878# the namespace parameter
5f6a1245
JW
1879
1880
d7e66d39
JMF
1881def xpath_with_ns(path, ns_map):
1882 components = [c.split(':') for c in path.split('/')]
1883 replaced = []
1884 for c in components:
1885 if len(c) == 1:
1886 replaced.append(c[0])
1887 else:
1888 ns, tag = c
1889 replaced.append('{%s}%s' % (ns_map[ns], tag))
1890 return '/'.join(replaced)
1891
d77c3dfd 1892
a41fb80c 1893def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745 1894 def _find_xpath(xpath):
810c10ba 1895 return node.find(compat_xpath(xpath))
578c0745
S
1896
1897 if isinstance(xpath, (str, compat_str)):
1898 n = _find_xpath(xpath)
1899 else:
1900 for xp in xpath:
1901 n = _find_xpath(xp)
1902 if n is not None:
1903 break
d74bebd5 1904
8e636da4 1905 if n is None:
bf42a990
S
1906 if default is not NO_DEFAULT:
1907 return default
1908 elif fatal:
bf0ff932
PH
1909 name = xpath if name is None else name
1910 raise ExtractorError('Could not find XML element %s' % name)
1911 else:
1912 return None
a41fb80c
S
1913 return n
1914
1915
1916def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
1917 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1918 if n is None or n == default:
1919 return n
1920 if n.text is None:
1921 if default is not NO_DEFAULT:
1922 return default
1923 elif fatal:
1924 name = xpath if name is None else name
1925 raise ExtractorError('Could not find XML element\'s text %s' % name)
1926 else:
1927 return None
1928 return n.text
a41fb80c
S
1929
1930
1931def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1932 n = find_xpath_attr(node, xpath, key)
1933 if n is None:
1934 if default is not NO_DEFAULT:
1935 return default
1936 elif fatal:
1937 name = '%s[@%s]' % (xpath, key) if name is None else name
1938 raise ExtractorError('Could not find XML attribute %s' % name)
1939 else:
1940 return None
1941 return n.attrib[key]
bf0ff932
PH
1942
1943
9e6dd238 1944def get_element_by_id(id, html):
43e8fafd 1945 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 1946 return get_element_by_attribute('id', id, html)
43e8fafd 1947
12ea2f30 1948
84c237fb 1949def get_element_by_class(class_name, html):
2af12ad9
TC
1950 """Return the content of the first tag with the specified class in the passed HTML document"""
1951 retval = get_elements_by_class(class_name, html)
1952 return retval[0] if retval else None
1953
1954
1955def get_element_by_attribute(attribute, value, html, escape_value=True):
1956 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1957 return retval[0] if retval else None
1958
1959
1960def get_elements_by_class(class_name, html):
1961 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1962 return get_elements_by_attribute(
84c237fb
YCH
1963 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1964 html, escape_value=False)
1965
1966
2af12ad9 1967def get_elements_by_attribute(attribute, value, html, escape_value=True):
43e8fafd 1968 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 1969
84c237fb
YCH
1970 value = re.escape(value) if escape_value else value
1971
2af12ad9
TC
1972 retlist = []
1973 for m in re.finditer(r'''(?xs)
38285056 1974 <([a-zA-Z0-9:._-]+)
609ff8ca 1975 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056 1976 \s+%s=['"]?%s['"]?
609ff8ca 1977 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056
PH
1978 \s*>
1979 (?P<content>.*?)
1980 </\1>
2af12ad9
TC
1981 ''' % (re.escape(attribute), value), html):
1982 res = m.group('content')
38285056 1983
2af12ad9
TC
1984 if res.startswith('"') or res.startswith("'"):
1985 res = res[1:-1]
38285056 1986
2af12ad9 1987 retlist.append(unescapeHTML(res))
a921f407 1988
2af12ad9 1989 return retlist
a921f407 1990
c5229f39 1991
8bb56eee
BF
1992class HTMLAttributeParser(compat_HTMLParser):
1993 """Trivial HTML parser to gather the attributes for a single element"""
b6e0c7d2 1994
8bb56eee 1995 def __init__(self):
c5229f39 1996 self.attrs = {}
8bb56eee
BF
1997 compat_HTMLParser.__init__(self)
1998
1999 def handle_starttag(self, tag, attrs):
2000 self.attrs = dict(attrs)
2001
c5229f39 2002
8bb56eee
BF
2003def extract_attributes(html_element):
2004 """Given a string for an HTML element such as
2005 <el
2006 a="foo" B="bar" c="&98;az" d=boz
2007 empty= noval entity="&amp;"
2008 sq='"' dq="'"
2009 >
2010 Decode and return a dictionary of attributes.
2011 {
2012 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2013 'empty': '', 'noval': None, 'entity': '&',
2014 'sq': '"', 'dq': '\''
2015 }.
2016 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2017 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2018 """
2019 parser = HTMLAttributeParser()
b4a3d461
S
2020 try:
2021 parser.feed(html_element)
2022 parser.close()
2023 # Older Python may throw HTMLParseError in case of malformed HTML
2024 except compat_HTMLParseError:
2025 pass
8bb56eee 2026 return parser.attrs
9e6dd238 2027
c5229f39 2028
9e6dd238 2029def clean_html(html):
59ae15a5 2030 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
2031
2032 if html is None: # Convenience for sanitizing descriptions etc.
2033 return html
2034
59ae15a5
PH
2035 # Newline vs <br />
2036 html = html.replace('\n', ' ')
edd9221c
TF
2037 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2038 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
2039 # Strip html tags
2040 html = re.sub('<.*?>', '', html)
2041 # Replace html entities
2042 html = unescapeHTML(html)
7decf895 2043 return html.strip()
9e6dd238
FV
2044
2045
d77c3dfd 2046def sanitize_open(filename, open_mode):
59ae15a5
PH
2047 """Try to open the given filename, and slightly tweak it if this fails.
2048
2049 Attempts to open the given filename. If this fails, it tries to change
2050 the filename slightly, step by step, until it's either able to open it
2051 or it fails and raises a final exception, like the standard open()
2052 function.
2053
2054 It returns the tuple (stream, definitive_file_name).
2055 """
2056 try:
28e614de 2057 if filename == '-':
59ae15a5
PH
2058 if sys.platform == 'win32':
2059 import msvcrt
2060 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 2061 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
2062 stream = open(encodeFilename(filename), open_mode)
2063 return (stream, filename)
2064 except (IOError, OSError) as err:
f45c185f
PH
2065 if err.errno in (errno.EACCES,):
2066 raise
59ae15a5 2067
f45c185f 2068 # In case of error, try to remove win32 forbidden chars
d55de57b 2069 alt_filename = sanitize_path(filename)
f45c185f
PH
2070 if alt_filename == filename:
2071 raise
2072 else:
2073 # An exception here should be caught in the caller
d55de57b 2074 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 2075 return (stream, alt_filename)
d77c3dfd
FV
2076
2077
2078def timeconvert(timestr):
59ae15a5
PH
2079 """Convert RFC 2822 defined time string into system timestamp"""
2080 timestamp = None
2081 timetuple = email.utils.parsedate_tz(timestr)
2082 if timetuple is not None:
2083 timestamp = email.utils.mktime_tz(timetuple)
2084 return timestamp
1c469a94 2085
5f6a1245 2086
796173d0 2087def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
2088 """Sanitizes a string so it could be used as part of a filename.
2089 If restricted is set, use a stricter subset of allowed characters.
158af524
S
2090 Set is_id if this is not an arbitrary string, but an ID that should be kept
2091 if possible.
59ae15a5
PH
2092 """
2093 def replace_insane(char):
c587cbb7
AT
2094 if restricted and char in ACCENT_CHARS:
2095 return ACCENT_CHARS[char]
59ae15a5
PH
2096 if char == '?' or ord(char) < 32 or ord(char) == 127:
2097 return ''
2098 elif char == '"':
2099 return '' if restricted else '\''
2100 elif char == ':':
2101 return '_-' if restricted else ' -'
2102 elif char in '\\/|*<>':
2103 return '_'
627dcfff 2104 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
2105 return '_'
2106 if restricted and ord(char) > 127:
2107 return '_'
2108 return char
2109
2aeb06d6
PH
2110 # Handle timestamps
2111 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 2112 result = ''.join(map(replace_insane, s))
796173d0
PH
2113 if not is_id:
2114 while '__' in result:
2115 result = result.replace('__', '_')
2116 result = result.strip('_')
2117 # Common case of "Foreign band name - English song title"
2118 if restricted and result.startswith('-_'):
2119 result = result[2:]
5a42414b
PH
2120 if result.startswith('-'):
2121 result = '_' + result[len('-'):]
a7440261 2122 result = result.lstrip('.')
796173d0
PH
2123 if not result:
2124 result = '_'
59ae15a5 2125 return result
d77c3dfd 2126
5f6a1245 2127
c2934512 2128def sanitize_path(s, force=False):
a2aaf4db 2129 """Sanitizes and normalizes path on Windows"""
c2934512 2130 if sys.platform == 'win32':
c4218ac3 2131 force = False
c2934512 2132 drive_or_unc, _ = os.path.splitdrive(s)
2133 if sys.version_info < (2, 7) and not drive_or_unc:
2134 drive_or_unc, _ = os.path.splitunc(s)
2135 elif force:
2136 drive_or_unc = ''
2137 else:
a2aaf4db 2138 return s
c2934512 2139
be531ef1
S
2140 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2141 if drive_or_unc:
a2aaf4db
S
2142 norm_path.pop(0)
2143 sanitized_path = [
ec85ded8 2144 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
a2aaf4db 2145 for path_part in norm_path]
be531ef1
S
2146 if drive_or_unc:
2147 sanitized_path.insert(0, drive_or_unc + os.path.sep)
c4218ac3 2148 elif force and s[0] == os.path.sep:
2149 sanitized_path.insert(0, os.path.sep)
a2aaf4db
S
2150 return os.path.join(*sanitized_path)
2151
2152
17bcc626 2153def sanitize_url(url):
befa4708
S
2154 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2155 # the number of unwanted failures due to missing protocol
2156 if url.startswith('//'):
2157 return 'http:%s' % url
2158 # Fix some common typos seen so far
2159 COMMON_TYPOS = (
067aa17e 2160 # https://github.com/ytdl-org/youtube-dl/issues/15649
befa4708
S
2161 (r'^httpss://', r'https://'),
2162 # https://bx1.be/lives/direct-tv/
2163 (r'^rmtp([es]?)://', r'rtmp\1://'),
2164 )
2165 for mistake, fixup in COMMON_TYPOS:
2166 if re.match(mistake, url):
2167 return re.sub(mistake, fixup, url)
2168 return url
17bcc626
S
2169
2170
67dda517 2171def sanitized_Request(url, *args, **kwargs):
17bcc626 2172 return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
67dda517
S
2173
2174
51098426
S
2175def expand_path(s):
2176 """Expand shell variables and ~"""
2177 return os.path.expandvars(compat_expanduser(s))
2178
2179
d77c3dfd 2180def orderedSet(iterable):
59ae15a5
PH
2181 """ Remove all duplicates from the input iterable """
2182 res = []
2183 for el in iterable:
2184 if el not in res:
2185 res.append(el)
2186 return res
d77c3dfd 2187
912b38b4 2188
55b2f099 2189def _htmlentity_transform(entity_with_semicolon):
4e408e47 2190 """Transforms an HTML entity to a character."""
55b2f099
YCH
2191 entity = entity_with_semicolon[:-1]
2192
4e408e47
PH
2193 # Known non-numeric HTML entity
2194 if entity in compat_html_entities.name2codepoint:
2195 return compat_chr(compat_html_entities.name2codepoint[entity])
2196
55b2f099
YCH
2197 # TODO: HTML5 allows entities without a semicolon. For example,
2198 # '&Eacuteric' should be decoded as 'Éric'.
2199 if entity_with_semicolon in compat_html_entities_html5:
2200 return compat_html_entities_html5[entity_with_semicolon]
2201
91757b0f 2202 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
2203 if mobj is not None:
2204 numstr = mobj.group(1)
28e614de 2205 if numstr.startswith('x'):
4e408e47 2206 base = 16
28e614de 2207 numstr = '0%s' % numstr
4e408e47
PH
2208 else:
2209 base = 10
067aa17e 2210 # See https://github.com/ytdl-org/youtube-dl/issues/7518
7aefc49c
S
2211 try:
2212 return compat_chr(int(numstr, base))
2213 except ValueError:
2214 pass
4e408e47
PH
2215
2216 # Unknown entity in name, return its literal representation
7a3f0c00 2217 return '&%s;' % entity
4e408e47
PH
2218
2219
d77c3dfd 2220def unescapeHTML(s):
912b38b4
PH
2221 if s is None:
2222 return None
2223 assert type(s) == compat_str
d77c3dfd 2224
4e408e47 2225 return re.sub(
95f3f7c2 2226 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 2227
8bf48f23 2228
f5b1bca9 2229def process_communicate_or_kill(p, *args, **kwargs):
2230 try:
2231 return p.communicate(*args, **kwargs)
2232 except BaseException: # Including KeyboardInterrupt
2233 p.kill()
2234 p.wait()
2235 raise
2236
2237
aa49acd1
S
2238def get_subprocess_encoding():
2239 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2240 # For subprocess calls, encode with locale encoding
2241 # Refer to http://stackoverflow.com/a/9951851/35070
2242 encoding = preferredencoding()
2243 else:
2244 encoding = sys.getfilesystemencoding()
2245 if encoding is None:
2246 encoding = 'utf-8'
2247 return encoding
2248
2249
8bf48f23 2250def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
2251 """
2252 @param s The name of the file
2253 """
d77c3dfd 2254
8bf48f23 2255 assert type(s) == compat_str
d77c3dfd 2256
59ae15a5
PH
2257 # Python 3 has a Unicode API
2258 if sys.version_info >= (3, 0):
2259 return s
0f00efed 2260
aa49acd1
S
2261 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2262 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2263 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2264 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2265 return s
2266
8ee239e9
YCH
2267 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2268 if sys.platform.startswith('java'):
2269 return s
2270
aa49acd1
S
2271 return s.encode(get_subprocess_encoding(), 'ignore')
2272
2273
2274def decodeFilename(b, for_subprocess=False):
2275
2276 if sys.version_info >= (3, 0):
2277 return b
2278
2279 if not isinstance(b, bytes):
2280 return b
2281
2282 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 2283
f07b74fc
PH
2284
2285def encodeArgument(s):
2286 if not isinstance(s, compat_str):
2287 # Legacy code that uses byte strings
2288 # Uncomment the following line after fixing all post processors
7af808a5 2289 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
2290 s = s.decode('ascii')
2291 return encodeFilename(s, True)
2292
2293
aa49acd1
S
2294def decodeArgument(b):
2295 return decodeFilename(b, True)
2296
2297
8271226a
PH
2298def decodeOption(optval):
2299 if optval is None:
2300 return optval
2301 if isinstance(optval, bytes):
2302 optval = optval.decode(preferredencoding())
2303
2304 assert isinstance(optval, compat_str)
2305 return optval
1c256f70 2306
5f6a1245 2307
dbbbe555 2308def formatSeconds(secs, delim=':'):
4539dd30 2309 if secs > 3600:
dbbbe555 2310 return '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
4539dd30 2311 elif secs > 60:
dbbbe555 2312 return '%d%s%02d' % (secs // 60, delim, secs % 60)
4539dd30
PH
2313 else:
2314 return '%d' % secs
2315
a0ddb8a2 2316
be4a824d
PH
2317def make_HTTPS_handler(params, **kwargs):
2318 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 2319 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 2320 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 2321 if opts_no_check_certificate:
be5f2c19 2322 context.check_hostname = False
0db261ba 2323 context.verify_mode = ssl.CERT_NONE
a2366922 2324 try:
be4a824d 2325 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
2326 except TypeError:
2327 # Python 2.7.8
2328 # (create_default_context present but HTTPSHandler has no context=)
2329 pass
2330
2331 if sys.version_info < (3, 2):
d7932313 2332 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 2333 else: # Python < 3.4
d7932313 2334 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 2335 context.verify_mode = (ssl.CERT_NONE
dca08720 2336 if opts_no_check_certificate
ea6d901e 2337 else ssl.CERT_REQUIRED)
303b479e 2338 context.set_default_verify_paths()
be4a824d 2339 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 2340
732ea2f0 2341
08f2a92c
JMF
2342def bug_reports_message():
2343 if ytdl_is_updateable():
7a5c1cfe 2344 update_cmd = 'type yt-dlp -U to update'
08f2a92c 2345 else:
7a5c1cfe
P
2346 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
2347 msg = '; please report this issue on https://github.com/yt-dlp/yt-dlp .'
08f2a92c 2348 msg += ' Make sure you are using the latest version; %s.' % update_cmd
7a5c1cfe 2349 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
08f2a92c
JMF
2350 return msg
2351
2352
bf5b9d85
PM
2353class YoutubeDLError(Exception):
2354 """Base exception for YoutubeDL errors."""
2355 pass
2356
2357
2358class ExtractorError(YoutubeDLError):
1c256f70 2359 """Error during info extraction."""
5f6a1245 2360
d11271dd 2361 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238 2362 """ tb, if given, is the original traceback (so that it can be printed out).
7a5c1cfe 2363 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
9a82b238
PH
2364 """
2365
2366 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2367 expected = True
d11271dd
PH
2368 if video_id is not None:
2369 msg = video_id + ': ' + msg
410f3e73 2370 if cause:
28e614de 2371 msg += ' (caused by %r)' % cause
9a82b238 2372 if not expected:
08f2a92c 2373 msg += bug_reports_message()
1c256f70 2374 super(ExtractorError, self).__init__(msg)
d5979c5d 2375
1c256f70 2376 self.traceback = tb
8cc83b8d 2377 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 2378 self.cause = cause
d11271dd 2379 self.video_id = video_id
1c256f70 2380
01951dda
PH
2381 def format_traceback(self):
2382 if self.traceback is None:
2383 return None
28e614de 2384 return ''.join(traceback.format_tb(self.traceback))
01951dda 2385
1c256f70 2386
416c7fcb
PH
2387class UnsupportedError(ExtractorError):
2388 def __init__(self, url):
2389 super(UnsupportedError, self).__init__(
2390 'Unsupported URL: %s' % url, expected=True)
2391 self.url = url
2392
2393
55b3e45b
JMF
2394class RegexNotFoundError(ExtractorError):
2395 """Error when a regex didn't match"""
2396 pass
2397
2398
773f291d
S
2399class GeoRestrictedError(ExtractorError):
2400 """Geographic restriction Error exception.
2401
2402 This exception may be thrown when a video is not available from your
2403 geographic location due to geographic restrictions imposed by a website.
2404 """
b6e0c7d2 2405
773f291d
S
2406 def __init__(self, msg, countries=None):
2407 super(GeoRestrictedError, self).__init__(msg, expected=True)
2408 self.msg = msg
2409 self.countries = countries
2410
2411
bf5b9d85 2412class DownloadError(YoutubeDLError):
59ae15a5 2413 """Download Error exception.
d77c3dfd 2414
59ae15a5
PH
2415 This exception may be thrown by FileDownloader objects if they are not
2416 configured to continue on errors. They will contain the appropriate
2417 error message.
2418 """
5f6a1245 2419
8cc83b8d
FV
2420 def __init__(self, msg, exc_info=None):
2421 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2422 super(DownloadError, self).__init__(msg)
2423 self.exc_info = exc_info
d77c3dfd
FV
2424
2425
bf5b9d85 2426class SameFileError(YoutubeDLError):
59ae15a5 2427 """Same File exception.
d77c3dfd 2428
59ae15a5
PH
2429 This exception will be thrown by FileDownloader objects if they detect
2430 multiple files would have to be downloaded to the same file on disk.
2431 """
2432 pass
d77c3dfd
FV
2433
2434
bf5b9d85 2435class PostProcessingError(YoutubeDLError):
59ae15a5 2436 """Post Processing exception.
d77c3dfd 2437
59ae15a5
PH
2438 This exception may be raised by PostProcessor's .run() method to
2439 indicate an error in the postprocessing task.
2440 """
5f6a1245 2441
7851b379 2442 def __init__(self, msg):
bf5b9d85 2443 super(PostProcessingError, self).__init__(msg)
7851b379 2444 self.msg = msg
d77c3dfd 2445
5f6a1245 2446
8b0d7497 2447class ExistingVideoReached(YoutubeDLError):
2448 """ --max-downloads limit has been reached. """
2449 pass
2450
2451
2452class RejectedVideoReached(YoutubeDLError):
2453 """ --max-downloads limit has been reached. """
2454 pass
2455
2456
bf5b9d85 2457class MaxDownloadsReached(YoutubeDLError):
59ae15a5
PH
2458 """ --max-downloads limit has been reached. """
2459 pass
d77c3dfd
FV
2460
2461
bf5b9d85 2462class UnavailableVideoError(YoutubeDLError):
59ae15a5 2463 """Unavailable Format exception.
d77c3dfd 2464
59ae15a5
PH
2465 This exception will be thrown when a video is requested
2466 in a format that is not available for that video.
2467 """
2468 pass
d77c3dfd
FV
2469
2470
bf5b9d85 2471class ContentTooShortError(YoutubeDLError):
59ae15a5 2472 """Content Too Short exception.
d77c3dfd 2473
59ae15a5
PH
2474 This exception may be raised by FileDownloader objects when a file they
2475 download is too small for what the server announced first, indicating
2476 the connection was probably interrupted.
2477 """
d77c3dfd 2478
59ae15a5 2479 def __init__(self, downloaded, expected):
bf5b9d85
PM
2480 super(ContentTooShortError, self).__init__(
2481 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2482 )
2c7ed247 2483 # Both in bytes
59ae15a5
PH
2484 self.downloaded = downloaded
2485 self.expected = expected
d77c3dfd 2486
5f6a1245 2487
bf5b9d85 2488class XAttrMetadataError(YoutubeDLError):
efa97bdc
YCH
2489 def __init__(self, code=None, msg='Unknown error'):
2490 super(XAttrMetadataError, self).__init__(msg)
2491 self.code = code
bd264412 2492 self.msg = msg
efa97bdc
YCH
2493
2494 # Parsing code and msg
3089bc74 2495 if (self.code in (errno.ENOSPC, errno.EDQUOT)
a0566bbf 2496 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
efa97bdc
YCH
2497 self.reason = 'NO_SPACE'
2498 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2499 self.reason = 'VALUE_TOO_LONG'
2500 else:
2501 self.reason = 'NOT_SUPPORTED'
2502
2503
bf5b9d85 2504class XAttrUnavailableError(YoutubeDLError):
efa97bdc
YCH
2505 pass
2506
2507
c5a59d93 2508def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
2509 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2510 # expected HTTP responses to meet HTTP/1.0 or later (see also
067aa17e 2511 # https://github.com/ytdl-org/youtube-dl/issues/6727)
e5e78797 2512 if sys.version_info < (3, 0):
65220c3b
S
2513 kwargs['strict'] = True
2514 hc = http_class(*args, **compat_kwargs(kwargs))
be4a824d 2515 source_address = ydl_handler._params.get('source_address')
8959018a 2516
be4a824d 2517 if source_address is not None:
8959018a
AU
2518 # This is to workaround _create_connection() from socket where it will try all
2519 # address data from getaddrinfo() including IPv6. This filters the result from
2520 # getaddrinfo() based on the source_address value.
2521 # This is based on the cpython socket.create_connection() function.
2522 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2523 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2524 host, port = address
2525 err = None
2526 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
9e21e6d9
S
2527 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2528 ip_addrs = [addr for addr in addrs if addr[0] == af]
2529 if addrs and not ip_addrs:
2530 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2531 raise socket.error(
2532 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2533 % (ip_version, source_address[0]))
8959018a
AU
2534 for res in ip_addrs:
2535 af, socktype, proto, canonname, sa = res
2536 sock = None
2537 try:
2538 sock = socket.socket(af, socktype, proto)
2539 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2540 sock.settimeout(timeout)
2541 sock.bind(source_address)
2542 sock.connect(sa)
2543 err = None # Explicitly break reference cycle
2544 return sock
2545 except socket.error as _:
2546 err = _
2547 if sock is not None:
2548 sock.close()
2549 if err is not None:
2550 raise err
2551 else:
9e21e6d9
S
2552 raise socket.error('getaddrinfo returns an empty list')
2553 if hasattr(hc, '_create_connection'):
2554 hc._create_connection = _create_connection
be4a824d
PH
2555 sa = (source_address, 0)
2556 if hasattr(hc, 'source_address'): # Python 2.7+
2557 hc.source_address = sa
2558 else: # Python 2.6
2559 def _hc_connect(self, *args, **kwargs):
9e21e6d9 2560 sock = _create_connection(
be4a824d
PH
2561 (self.host, self.port), self.timeout, sa)
2562 if is_https:
d7932313
PH
2563 self.sock = ssl.wrap_socket(
2564 sock, self.key_file, self.cert_file,
2565 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
2566 else:
2567 self.sock = sock
2568 hc.connect = functools.partial(_hc_connect, hc)
2569
2570 return hc
2571
2572
87f0e62d 2573def handle_youtubedl_headers(headers):
992fc9d6
YCH
2574 filtered_headers = headers
2575
2576 if 'Youtubedl-no-compression' in filtered_headers:
2577 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 2578 del filtered_headers['Youtubedl-no-compression']
87f0e62d 2579
992fc9d6 2580 return filtered_headers
87f0e62d
YCH
2581
2582
acebc9cd 2583class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
2584 """Handler for HTTP requests and responses.
2585
2586 This class, when installed with an OpenerDirector, automatically adds
2587 the standard headers to every HTTP request and handles gzipped and
2588 deflated responses from web servers. If compression is to be avoided in
2589 a particular request, the original request in the program code only has
0424ec30 2590 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
2591 removed before making the real request.
2592
2593 Part of this code was copied from:
2594
2595 http://techknack.net/python-urllib2-handlers/
2596
2597 Andrew Rowls, the author of that code, agreed to release it to the
2598 public domain.
2599 """
2600
be4a824d
PH
2601 def __init__(self, params, *args, **kwargs):
2602 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2603 self._params = params
2604
2605 def http_open(self, req):
71aff188
YCH
2606 conn_class = compat_http_client.HTTPConnection
2607
2608 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2609 if socks_proxy:
2610 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2611 del req.headers['Ytdl-socks-proxy']
2612
be4a824d 2613 return self.do_open(functools.partial(
71aff188 2614 _create_http_connection, self, conn_class, False),
be4a824d
PH
2615 req)
2616
59ae15a5
PH
2617 @staticmethod
2618 def deflate(data):
fc2119f2 2619 if not data:
2620 return data
59ae15a5
PH
2621 try:
2622 return zlib.decompress(data, -zlib.MAX_WBITS)
2623 except zlib.error:
2624 return zlib.decompress(data)
2625
acebc9cd 2626 def http_request(self, req):
51f267d9
S
2627 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2628 # always respected by websites, some tend to give out URLs with non percent-encoded
2629 # non-ASCII characters (see telemb.py, ard.py [#3412])
2630 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2631 # To work around aforementioned issue we will replace request's original URL with
2632 # percent-encoded one
2633 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2634 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2635 url = req.get_full_url()
2636 url_escaped = escape_url(url)
2637
2638 # Substitute URL if any change after escaping
2639 if url != url_escaped:
15d260eb 2640 req = update_Request(req, url=url_escaped)
51f267d9 2641
33ac271b 2642 for h, v in std_headers.items():
3d5f7a39
JK
2643 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2644 # The dict keys are capitalized because of this bug by urllib
2645 if h.capitalize() not in req.headers:
33ac271b 2646 req.add_header(h, v)
87f0e62d
YCH
2647
2648 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
2649
2650 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2651 # Python 2.6 is brain-dead when it comes to fragments
2652 req._Request__original = req._Request__original.partition('#')[0]
2653 req._Request__r_type = req._Request__r_type.partition('#')[0]
2654
59ae15a5
PH
2655 return req
2656
acebc9cd 2657 def http_response(self, req, resp):
59ae15a5
PH
2658 old_resp = resp
2659 # gzip
2660 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
2661 content = resp.read()
2662 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2663 try:
2664 uncompressed = io.BytesIO(gz.read())
2665 except IOError as original_ioerror:
2666 # There may be junk add the end of the file
2667 # See http://stackoverflow.com/q/4928560/35070 for details
2668 for i in range(1, 1024):
2669 try:
2670 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2671 uncompressed = io.BytesIO(gz.read())
2672 except IOError:
2673 continue
2674 break
2675 else:
2676 raise original_ioerror
b407d853 2677 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2678 resp.msg = old_resp.msg
c047270c 2679 del resp.headers['Content-encoding']
59ae15a5
PH
2680 # deflate
2681 if resp.headers.get('Content-encoding', '') == 'deflate':
2682 gz = io.BytesIO(self.deflate(resp.read()))
b407d853 2683 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2684 resp.msg = old_resp.msg
c047270c 2685 del resp.headers['Content-encoding']
ad729172 2686 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
067aa17e 2687 # https://github.com/ytdl-org/youtube-dl/issues/6457).
5a4d9ddb
S
2688 if 300 <= resp.code < 400:
2689 location = resp.headers.get('Location')
2690 if location:
2691 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2692 if sys.version_info >= (3, 0):
2693 location = location.encode('iso-8859-1').decode('utf-8')
0ea59007
YCH
2694 else:
2695 location = location.decode('utf-8')
5a4d9ddb
S
2696 location_escaped = escape_url(location)
2697 if location != location_escaped:
2698 del resp.headers['Location']
9a4aec8b
YCH
2699 if sys.version_info < (3, 0):
2700 location_escaped = location_escaped.encode('utf-8')
5a4d9ddb 2701 resp.headers['Location'] = location_escaped
59ae15a5 2702 return resp
0f8d03f8 2703
acebc9cd
PH
2704 https_request = http_request
2705 https_response = http_response
bf50b038 2706
5de90176 2707
71aff188
YCH
2708def make_socks_conn_class(base_class, socks_proxy):
2709 assert issubclass(base_class, (
2710 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2711
2712 url_components = compat_urlparse.urlparse(socks_proxy)
2713 if url_components.scheme.lower() == 'socks5':
2714 socks_type = ProxyType.SOCKS5
2715 elif url_components.scheme.lower() in ('socks', 'socks4'):
2716 socks_type = ProxyType.SOCKS4
51fb4995
YCH
2717 elif url_components.scheme.lower() == 'socks4a':
2718 socks_type = ProxyType.SOCKS4A
71aff188 2719
cdd94c2e
YCH
2720 def unquote_if_non_empty(s):
2721 if not s:
2722 return s
2723 return compat_urllib_parse_unquote_plus(s)
2724
71aff188
YCH
2725 proxy_args = (
2726 socks_type,
2727 url_components.hostname, url_components.port or 1080,
2728 True, # Remote DNS
cdd94c2e
YCH
2729 unquote_if_non_empty(url_components.username),
2730 unquote_if_non_empty(url_components.password),
71aff188
YCH
2731 )
2732
2733 class SocksConnection(base_class):
2734 def connect(self):
2735 self.sock = sockssocket()
2736 self.sock.setproxy(*proxy_args)
2737 if type(self.timeout) in (int, float):
2738 self.sock.settimeout(self.timeout)
2739 self.sock.connect((self.host, self.port))
2740
2741 if isinstance(self, compat_http_client.HTTPSConnection):
2742 if hasattr(self, '_context'): # Python > 2.6
2743 self.sock = self._context.wrap_socket(
2744 self.sock, server_hostname=self.host)
2745 else:
2746 self.sock = ssl.wrap_socket(self.sock)
2747
2748 return SocksConnection
2749
2750
be4a824d
PH
2751class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2752 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2753 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2754 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2755 self._params = params
2756
2757 def https_open(self, req):
4f264c02 2758 kwargs = {}
71aff188
YCH
2759 conn_class = self._https_conn_class
2760
4f264c02
JMF
2761 if hasattr(self, '_context'): # python > 2.6
2762 kwargs['context'] = self._context
2763 if hasattr(self, '_check_hostname'): # python 3.x
2764 kwargs['check_hostname'] = self._check_hostname
71aff188
YCH
2765
2766 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2767 if socks_proxy:
2768 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2769 del req.headers['Ytdl-socks-proxy']
2770
be4a824d 2771 return self.do_open(functools.partial(
71aff188 2772 _create_http_connection, self, conn_class, True),
4f264c02 2773 req, **kwargs)
be4a824d
PH
2774
2775
1bab3437 2776class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
f1a8511f
S
2777 """
2778 See [1] for cookie file format.
2779
2780 1. https://curl.haxx.se/docs/http-cookies.html
2781 """
e7e62441 2782 _HTTPONLY_PREFIX = '#HttpOnly_'
c380cc28
S
2783 _ENTRY_LEN = 7
2784 _HEADER = '''# Netscape HTTP Cookie File
7a5c1cfe 2785# This file is generated by yt-dlp. Do not edit.
c380cc28
S
2786
2787'''
2788 _CookieFileEntry = collections.namedtuple(
2789 'CookieFileEntry',
2790 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
e7e62441 2791
1bab3437 2792 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
c380cc28
S
2793 """
2794 Save cookies to a file.
2795
2796 Most of the code is taken from CPython 3.8 and slightly adapted
2797 to support cookie files with UTF-8 in both python 2 and 3.
2798 """
2799 if filename is None:
2800 if self.filename is not None:
2801 filename = self.filename
2802 else:
2803 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2804
1bab3437
S
2805 # Store session cookies with `expires` set to 0 instead of an empty
2806 # string
2807 for cookie in self:
2808 if cookie.expires is None:
2809 cookie.expires = 0
c380cc28
S
2810
2811 with io.open(filename, 'w', encoding='utf-8') as f:
2812 f.write(self._HEADER)
2813 now = time.time()
2814 for cookie in self:
2815 if not ignore_discard and cookie.discard:
2816 continue
2817 if not ignore_expires and cookie.is_expired(now):
2818 continue
2819 if cookie.secure:
2820 secure = 'TRUE'
2821 else:
2822 secure = 'FALSE'
2823 if cookie.domain.startswith('.'):
2824 initial_dot = 'TRUE'
2825 else:
2826 initial_dot = 'FALSE'
2827 if cookie.expires is not None:
2828 expires = compat_str(cookie.expires)
2829 else:
2830 expires = ''
2831 if cookie.value is None:
2832 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2833 # with no name, whereas http.cookiejar regards it as a
2834 # cookie with no value.
2835 name = ''
2836 value = cookie.name
2837 else:
2838 name = cookie.name
2839 value = cookie.value
2840 f.write(
2841 '\t'.join([cookie.domain, initial_dot, cookie.path,
2842 secure, expires, name, value]) + '\n')
1bab3437
S
2843
2844 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
e7e62441 2845 """Load cookies from a file."""
2846 if filename is None:
2847 if self.filename is not None:
2848 filename = self.filename
2849 else:
2850 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2851
c380cc28
S
2852 def prepare_line(line):
2853 if line.startswith(self._HTTPONLY_PREFIX):
2854 line = line[len(self._HTTPONLY_PREFIX):]
2855 # comments and empty lines are fine
2856 if line.startswith('#') or not line.strip():
2857 return line
2858 cookie_list = line.split('\t')
2859 if len(cookie_list) != self._ENTRY_LEN:
2860 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2861 cookie = self._CookieFileEntry(*cookie_list)
2862 if cookie.expires_at and not cookie.expires_at.isdigit():
2863 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2864 return line
2865
e7e62441 2866 cf = io.StringIO()
c380cc28 2867 with io.open(filename, encoding='utf-8') as f:
e7e62441 2868 for line in f:
c380cc28
S
2869 try:
2870 cf.write(prepare_line(line))
2871 except compat_cookiejar.LoadError as e:
2872 write_string(
2873 'WARNING: skipping cookie file entry due to %s: %r\n'
2874 % (e, line), sys.stderr)
2875 continue
e7e62441 2876 cf.seek(0)
2877 self._really_load(cf, filename, ignore_discard, ignore_expires)
1bab3437
S
2878 # Session cookies are denoted by either `expires` field set to
2879 # an empty string or 0. MozillaCookieJar only recognizes the former
2880 # (see [1]). So we need force the latter to be recognized as session
2881 # cookies on our own.
2882 # Session cookies may be important for cookies-based authentication,
2883 # e.g. usually, when user does not check 'Remember me' check box while
2884 # logging in on a site, some important cookies are stored as session
2885 # cookies so that not recognizing them will result in failed login.
2886 # 1. https://bugs.python.org/issue17164
2887 for cookie in self:
2888 # Treat `expires=0` cookies as session cookies
2889 if cookie.expires == 0:
2890 cookie.expires = None
2891 cookie.discard = True
2892
2893
a6420bf5
S
2894class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2895 def __init__(self, cookiejar=None):
2896 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2897
2898 def http_response(self, request, response):
2899 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2900 # characters in Set-Cookie HTTP header of last response (see
067aa17e 2901 # https://github.com/ytdl-org/youtube-dl/issues/6769).
a6420bf5
S
2902 # In order to at least prevent crashing we will percent encode Set-Cookie
2903 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
2904 # if sys.version_info < (3, 0) and response.headers:
2905 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2906 # set_cookie = response.headers.get(set_cookie_header)
2907 # if set_cookie:
2908 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2909 # if set_cookie != set_cookie_escaped:
2910 # del response.headers[set_cookie_header]
2911 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
2912 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2913
2914 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2915 https_response = http_response
2916
2917
fca6dba8
S
2918class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2919 if sys.version_info[0] < 3:
2920 def redirect_request(self, req, fp, code, msg, headers, newurl):
2921 # On python 2 urlh.geturl() may sometimes return redirect URL
2922 # as byte string instead of unicode. This workaround allows
2923 # to force it always return unicode.
2924 return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2925
2926
46f59e89
S
2927def extract_timezone(date_str):
2928 m = re.search(
2929 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2930 date_str)
2931 if not m:
2932 timezone = datetime.timedelta()
2933 else:
2934 date_str = date_str[:-len(m.group('tz'))]
2935 if not m.group('sign'):
2936 timezone = datetime.timedelta()
2937 else:
2938 sign = 1 if m.group('sign') == '+' else -1
2939 timezone = datetime.timedelta(
2940 hours=sign * int(m.group('hours')),
2941 minutes=sign * int(m.group('minutes')))
2942 return timezone, date_str
2943
2944
08b38d54 2945def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
2946 """ Return a UNIX timestamp from the given date """
2947
2948 if date_str is None:
2949 return None
2950
52c3a6e4
S
2951 date_str = re.sub(r'\.[0-9]+', '', date_str)
2952
08b38d54 2953 if timezone is None:
46f59e89
S
2954 timezone, date_str = extract_timezone(date_str)
2955
52c3a6e4
S
2956 try:
2957 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2958 dt = datetime.datetime.strptime(date_str, date_format) - timezone
2959 return calendar.timegm(dt.timetuple())
2960 except ValueError:
2961 pass
912b38b4
PH
2962
2963
46f59e89
S
2964def date_formats(day_first=True):
2965 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2966
2967
42bdd9d0 2968def unified_strdate(date_str, day_first=True):
bf50b038 2969 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
2970
2971 if date_str is None:
2972 return None
bf50b038 2973 upload_date = None
5f6a1245 2974 # Replace commas
026fcc04 2975 date_str = date_str.replace(',', ' ')
42bdd9d0 2976 # Remove AM/PM + timezone
9bb8e0a3 2977 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89 2978 _, date_str = extract_timezone(date_str)
42bdd9d0 2979
46f59e89 2980 for expression in date_formats(day_first):
bf50b038
JMF
2981 try:
2982 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 2983 except ValueError:
bf50b038 2984 pass
42393ce2
PH
2985 if upload_date is None:
2986 timetuple = email.utils.parsedate_tz(date_str)
2987 if timetuple:
c6b9cf05
S
2988 try:
2989 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2990 except ValueError:
2991 pass
6a750402
JMF
2992 if upload_date is not None:
2993 return compat_str(upload_date)
bf50b038 2994
5f6a1245 2995
46f59e89
S
2996def unified_timestamp(date_str, day_first=True):
2997 if date_str is None:
2998 return None
2999
2ae2ffda 3000 date_str = re.sub(r'[,|]', '', date_str)
46f59e89 3001
7dc2a74e 3002 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89
S
3003 timezone, date_str = extract_timezone(date_str)
3004
3005 # Remove AM/PM + timezone
3006 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3007
deef3195
S
3008 # Remove unrecognized timezones from ISO 8601 alike timestamps
3009 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3010 if m:
3011 date_str = date_str[:-len(m.group('tz'))]
3012
f226880c
PH
3013 # Python only supports microseconds, so remove nanoseconds
3014 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3015 if m:
3016 date_str = m.group(1)
3017
46f59e89
S
3018 for expression in date_formats(day_first):
3019 try:
7dc2a74e 3020 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
46f59e89
S
3021 return calendar.timegm(dt.timetuple())
3022 except ValueError:
3023 pass
3024 timetuple = email.utils.parsedate_tz(date_str)
3025 if timetuple:
7dc2a74e 3026 return calendar.timegm(timetuple) + pm_delta * 3600
46f59e89
S
3027
3028
28e614de 3029def determine_ext(url, default_ext='unknown_video'):
85750f89 3030 if url is None or '.' not in url:
f4776371 3031 return default_ext
9cb9a5df 3032 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
3033 if re.match(r'^[A-Za-z0-9]+$', guess):
3034 return guess
a7aaa398
S
3035 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3036 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 3037 return guess.rstrip('/')
73e79f2a 3038 else:
cbdbb766 3039 return default_ext
73e79f2a 3040
5f6a1245 3041
824fa511
S
3042def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3043 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
d4051a8e 3044
5f6a1245 3045
bd558525 3046def date_from_str(date_str):
37254abc
JMF
3047 """
3048 Return a datetime object from a string in the format YYYYMMDD or
3049 (now|today)[+-][0-9](day|week|month|year)(s)?"""
3050 today = datetime.date.today()
f8795e10 3051 if date_str in ('now', 'today'):
37254abc 3052 return today
f8795e10
PH
3053 if date_str == 'yesterday':
3054 return today - datetime.timedelta(days=1)
ec85ded8 3055 match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
37254abc
JMF
3056 if match is not None:
3057 sign = match.group('sign')
3058 time = int(match.group('time'))
3059 if sign == '-':
3060 time = -time
3061 unit = match.group('unit')
dfb1b146 3062 # A bad approximation?
37254abc
JMF
3063 if unit == 'month':
3064 unit = 'day'
3065 time *= 30
3066 elif unit == 'year':
3067 unit = 'day'
3068 time *= 365
3069 unit += 's'
3070 delta = datetime.timedelta(**{unit: time})
3071 return today + delta
611c1dd9 3072 return datetime.datetime.strptime(date_str, '%Y%m%d').date()
5f6a1245
JW
3073
3074
e63fc1be 3075def hyphenate_date(date_str):
3076 """
3077 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3078 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3079 if match is not None:
3080 return '-'.join(match.groups())
3081 else:
3082 return date_str
3083
5f6a1245 3084
bd558525
JMF
3085class DateRange(object):
3086 """Represents a time interval between two dates"""
5f6a1245 3087
bd558525
JMF
3088 def __init__(self, start=None, end=None):
3089 """start and end must be strings in the format accepted by date"""
3090 if start is not None:
3091 self.start = date_from_str(start)
3092 else:
3093 self.start = datetime.datetime.min.date()
3094 if end is not None:
3095 self.end = date_from_str(end)
3096 else:
3097 self.end = datetime.datetime.max.date()
37254abc 3098 if self.start > self.end:
bd558525 3099 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 3100
bd558525
JMF
3101 @classmethod
3102 def day(cls, day):
3103 """Returns a range that only contains the given day"""
5f6a1245
JW
3104 return cls(day, day)
3105
bd558525
JMF
3106 def __contains__(self, date):
3107 """Check if the date is in the range"""
37254abc
JMF
3108 if not isinstance(date, datetime.date):
3109 date = date_from_str(date)
3110 return self.start <= date <= self.end
5f6a1245 3111
bd558525 3112 def __str__(self):
5f6a1245 3113 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
3114
3115
3116def platform_name():
3117 """ Returns the platform name as a compat_str """
3118 res = platform.platform()
3119 if isinstance(res, bytes):
3120 res = res.decode(preferredencoding())
3121
3122 assert isinstance(res, compat_str)
3123 return res
c257baff
PH
3124
3125
b58ddb32
PH
3126def _windows_write_string(s, out):
3127 """ Returns True if the string was written using special methods,
3128 False if it has yet to be written out."""
3129 # Adapted from http://stackoverflow.com/a/3259271/35070
3130
3131 import ctypes
3132 import ctypes.wintypes
3133
3134 WIN_OUTPUT_IDS = {
3135 1: -11,
3136 2: -12,
3137 }
3138
a383a98a
PH
3139 try:
3140 fileno = out.fileno()
3141 except AttributeError:
3142 # If the output stream doesn't have a fileno, it's virtual
3143 return False
aa42e873
PH
3144 except io.UnsupportedOperation:
3145 # Some strange Windows pseudo files?
3146 return False
b58ddb32
PH
3147 if fileno not in WIN_OUTPUT_IDS:
3148 return False
3149
d7cd9a9e 3150 GetStdHandle = compat_ctypes_WINFUNCTYPE(
b58ddb32 3151 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
d7cd9a9e 3152 ('GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
3153 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3154
d7cd9a9e 3155 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3156 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3157 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
d7cd9a9e 3158 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
3159 written = ctypes.wintypes.DWORD(0)
3160
d7cd9a9e 3161 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
3162 FILE_TYPE_CHAR = 0x0002
3163 FILE_TYPE_REMOTE = 0x8000
d7cd9a9e 3164 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3165 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3166 ctypes.POINTER(ctypes.wintypes.DWORD))(
d7cd9a9e 3167 ('GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
3168 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3169
3170 def not_a_console(handle):
3171 if handle == INVALID_HANDLE_VALUE or handle is None:
3172 return True
3089bc74
S
3173 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3174 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
3175
3176 if not_a_console(h):
3177 return False
3178
d1b9c912
PH
3179 def next_nonbmp_pos(s):
3180 try:
3181 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3182 except StopIteration:
3183 return len(s)
3184
3185 while s:
3186 count = min(next_nonbmp_pos(s), 1024)
3187
b58ddb32 3188 ret = WriteConsoleW(
d1b9c912 3189 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
3190 if ret == 0:
3191 raise OSError('Failed to write string')
d1b9c912
PH
3192 if not count: # We just wrote a non-BMP character
3193 assert written.value == 2
3194 s = s[1:]
3195 else:
3196 assert written.value > 0
3197 s = s[written.value:]
b58ddb32
PH
3198 return True
3199
3200
734f90bb 3201def write_string(s, out=None, encoding=None):
7459e3a2
PH
3202 if out is None:
3203 out = sys.stderr
8bf48f23 3204 assert type(s) == compat_str
7459e3a2 3205
b58ddb32
PH
3206 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3207 if _windows_write_string(s, out):
3208 return
3209
3089bc74
S
3210 if ('b' in getattr(out, 'mode', '')
3211 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
3212 byt = s.encode(encoding or preferredencoding(), 'ignore')
3213 out.write(byt)
3214 elif hasattr(out, 'buffer'):
3215 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3216 byt = s.encode(enc, 'ignore')
3217 out.buffer.write(byt)
3218 else:
8bf48f23 3219 out.write(s)
7459e3a2
PH
3220 out.flush()
3221
3222
48ea9cea
PH
3223def bytes_to_intlist(bs):
3224 if not bs:
3225 return []
3226 if isinstance(bs[0], int): # Python 3
3227 return list(bs)
3228 else:
3229 return [ord(c) for c in bs]
3230
c257baff 3231
cba892fa 3232def intlist_to_bytes(xs):
3233 if not xs:
3234 return b''
edaa23f8 3235 return compat_struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
3236
3237
c1c9a79c
PH
3238# Cross-platform file locking
3239if sys.platform == 'win32':
3240 import ctypes.wintypes
3241 import msvcrt
3242
3243 class OVERLAPPED(ctypes.Structure):
3244 _fields_ = [
3245 ('Internal', ctypes.wintypes.LPVOID),
3246 ('InternalHigh', ctypes.wintypes.LPVOID),
3247 ('Offset', ctypes.wintypes.DWORD),
3248 ('OffsetHigh', ctypes.wintypes.DWORD),
3249 ('hEvent', ctypes.wintypes.HANDLE),
3250 ]
3251
3252 kernel32 = ctypes.windll.kernel32
3253 LockFileEx = kernel32.LockFileEx
3254 LockFileEx.argtypes = [
3255 ctypes.wintypes.HANDLE, # hFile
3256 ctypes.wintypes.DWORD, # dwFlags
3257 ctypes.wintypes.DWORD, # dwReserved
3258 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3259 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3260 ctypes.POINTER(OVERLAPPED) # Overlapped
3261 ]
3262 LockFileEx.restype = ctypes.wintypes.BOOL
3263 UnlockFileEx = kernel32.UnlockFileEx
3264 UnlockFileEx.argtypes = [
3265 ctypes.wintypes.HANDLE, # hFile
3266 ctypes.wintypes.DWORD, # dwReserved
3267 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3268 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3269 ctypes.POINTER(OVERLAPPED) # Overlapped
3270 ]
3271 UnlockFileEx.restype = ctypes.wintypes.BOOL
3272 whole_low = 0xffffffff
3273 whole_high = 0x7fffffff
3274
3275 def _lock_file(f, exclusive):
3276 overlapped = OVERLAPPED()
3277 overlapped.Offset = 0
3278 overlapped.OffsetHigh = 0
3279 overlapped.hEvent = 0
3280 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3281 handle = msvcrt.get_osfhandle(f.fileno())
3282 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3283 whole_low, whole_high, f._lock_file_overlapped_p):
3284 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3285
3286 def _unlock_file(f):
3287 assert f._lock_file_overlapped_p
3288 handle = msvcrt.get_osfhandle(f.fileno())
3289 if not UnlockFileEx(handle, 0,
3290 whole_low, whole_high, f._lock_file_overlapped_p):
3291 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3292
3293else:
399a76e6
YCH
3294 # Some platforms, such as Jython, is missing fcntl
3295 try:
3296 import fcntl
c1c9a79c 3297
399a76e6
YCH
3298 def _lock_file(f, exclusive):
3299 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 3300
399a76e6
YCH
3301 def _unlock_file(f):
3302 fcntl.flock(f, fcntl.LOCK_UN)
3303 except ImportError:
3304 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3305
3306 def _lock_file(f, exclusive):
3307 raise IOError(UNSUPPORTED_MSG)
3308
3309 def _unlock_file(f):
3310 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
3311
3312
3313class locked_file(object):
3314 def __init__(self, filename, mode, encoding=None):
3315 assert mode in ['r', 'a', 'w']
3316 self.f = io.open(filename, mode, encoding=encoding)
3317 self.mode = mode
3318
3319 def __enter__(self):
3320 exclusive = self.mode != 'r'
3321 try:
3322 _lock_file(self.f, exclusive)
3323 except IOError:
3324 self.f.close()
3325 raise
3326 return self
3327
3328 def __exit__(self, etype, value, traceback):
3329 try:
3330 _unlock_file(self.f)
3331 finally:
3332 self.f.close()
3333
3334 def __iter__(self):
3335 return iter(self.f)
3336
3337 def write(self, *args):
3338 return self.f.write(*args)
3339
3340 def read(self, *args):
3341 return self.f.read(*args)
4eb7f1d1
JMF
3342
3343
4644ac55
S
3344def get_filesystem_encoding():
3345 encoding = sys.getfilesystemencoding()
3346 return encoding if encoding is not None else 'utf-8'
3347
3348
4eb7f1d1 3349def shell_quote(args):
a6a173c2 3350 quoted_args = []
4644ac55 3351 encoding = get_filesystem_encoding()
a6a173c2
JMF
3352 for a in args:
3353 if isinstance(a, bytes):
3354 # We may get a filename encoded with 'encodeFilename'
3355 a = a.decode(encoding)
aefce8e6 3356 quoted_args.append(compat_shlex_quote(a))
28e614de 3357 return ' '.join(quoted_args)
9d4660ca
PH
3358
3359
3360def smuggle_url(url, data):
3361 """ Pass additional data in a URL for internal use. """
3362
81953d1a
RA
3363 url, idata = unsmuggle_url(url, {})
3364 data.update(idata)
15707c7e 3365 sdata = compat_urllib_parse_urlencode(
28e614de
PH
3366 {'__youtubedl_smuggle': json.dumps(data)})
3367 return url + '#' + sdata
9d4660ca
PH
3368
3369
79f82953 3370def unsmuggle_url(smug_url, default=None):
83e865a3 3371 if '#__youtubedl_smuggle' not in smug_url:
79f82953 3372 return smug_url, default
28e614de
PH
3373 url, _, sdata = smug_url.rpartition('#')
3374 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
3375 data = json.loads(jsond)
3376 return url, data
02dbf93f
PH
3377
3378
02dbf93f
PH
3379def format_bytes(bytes):
3380 if bytes is None:
28e614de 3381 return 'N/A'
02dbf93f
PH
3382 if type(bytes) is str:
3383 bytes = float(bytes)
3384 if bytes == 0.0:
3385 exponent = 0
3386 else:
3387 exponent = int(math.log(bytes, 1024.0))
28e614de 3388 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 3389 converted = float(bytes) / float(1024 ** exponent)
28e614de 3390 return '%.2f%s' % (converted, suffix)
f53c966a 3391
1c088fa8 3392
fb47597b
S
3393def lookup_unit_table(unit_table, s):
3394 units_re = '|'.join(re.escape(u) for u in unit_table)
3395 m = re.match(
782b1b5b 3396 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
fb47597b
S
3397 if not m:
3398 return None
3399 num_str = m.group('num').replace(',', '.')
3400 mult = unit_table[m.group('unit')]
3401 return int(float(num_str) * mult)
3402
3403
be64b5b0
PH
3404def parse_filesize(s):
3405 if s is None:
3406 return None
3407
dfb1b146 3408 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
3409 # but we support those too
3410 _UNIT_TABLE = {
3411 'B': 1,
3412 'b': 1,
70852b47 3413 'bytes': 1,
be64b5b0
PH
3414 'KiB': 1024,
3415 'KB': 1000,
3416 'kB': 1024,
3417 'Kb': 1000,
13585d76 3418 'kb': 1000,
70852b47
YCH
3419 'kilobytes': 1000,
3420 'kibibytes': 1024,
be64b5b0
PH
3421 'MiB': 1024 ** 2,
3422 'MB': 1000 ** 2,
3423 'mB': 1024 ** 2,
3424 'Mb': 1000 ** 2,
13585d76 3425 'mb': 1000 ** 2,
70852b47
YCH
3426 'megabytes': 1000 ** 2,
3427 'mebibytes': 1024 ** 2,
be64b5b0
PH
3428 'GiB': 1024 ** 3,
3429 'GB': 1000 ** 3,
3430 'gB': 1024 ** 3,
3431 'Gb': 1000 ** 3,
13585d76 3432 'gb': 1000 ** 3,
70852b47
YCH
3433 'gigabytes': 1000 ** 3,
3434 'gibibytes': 1024 ** 3,
be64b5b0
PH
3435 'TiB': 1024 ** 4,
3436 'TB': 1000 ** 4,
3437 'tB': 1024 ** 4,
3438 'Tb': 1000 ** 4,
13585d76 3439 'tb': 1000 ** 4,
70852b47
YCH
3440 'terabytes': 1000 ** 4,
3441 'tebibytes': 1024 ** 4,
be64b5b0
PH
3442 'PiB': 1024 ** 5,
3443 'PB': 1000 ** 5,
3444 'pB': 1024 ** 5,
3445 'Pb': 1000 ** 5,
13585d76 3446 'pb': 1000 ** 5,
70852b47
YCH
3447 'petabytes': 1000 ** 5,
3448 'pebibytes': 1024 ** 5,
be64b5b0
PH
3449 'EiB': 1024 ** 6,
3450 'EB': 1000 ** 6,
3451 'eB': 1024 ** 6,
3452 'Eb': 1000 ** 6,
13585d76 3453 'eb': 1000 ** 6,
70852b47
YCH
3454 'exabytes': 1000 ** 6,
3455 'exbibytes': 1024 ** 6,
be64b5b0
PH
3456 'ZiB': 1024 ** 7,
3457 'ZB': 1000 ** 7,
3458 'zB': 1024 ** 7,
3459 'Zb': 1000 ** 7,
13585d76 3460 'zb': 1000 ** 7,
70852b47
YCH
3461 'zettabytes': 1000 ** 7,
3462 'zebibytes': 1024 ** 7,
be64b5b0
PH
3463 'YiB': 1024 ** 8,
3464 'YB': 1000 ** 8,
3465 'yB': 1024 ** 8,
3466 'Yb': 1000 ** 8,
13585d76 3467 'yb': 1000 ** 8,
70852b47
YCH
3468 'yottabytes': 1000 ** 8,
3469 'yobibytes': 1024 ** 8,
be64b5b0
PH
3470 }
3471
fb47597b
S
3472 return lookup_unit_table(_UNIT_TABLE, s)
3473
3474
3475def parse_count(s):
3476 if s is None:
be64b5b0
PH
3477 return None
3478
fb47597b
S
3479 s = s.strip()
3480
3481 if re.match(r'^[\d,.]+$', s):
3482 return str_to_int(s)
3483
3484 _UNIT_TABLE = {
3485 'k': 1000,
3486 'K': 1000,
3487 'm': 1000 ** 2,
3488 'M': 1000 ** 2,
3489 'kk': 1000 ** 2,
3490 'KK': 1000 ** 2,
3491 }
be64b5b0 3492
fb47597b 3493 return lookup_unit_table(_UNIT_TABLE, s)
be64b5b0 3494
2f7ae819 3495
b871d7e9
S
3496def parse_resolution(s):
3497 if s is None:
3498 return {}
3499
3500 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3501 if mobj:
3502 return {
3503 'width': int(mobj.group('w')),
3504 'height': int(mobj.group('h')),
3505 }
3506
3507 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3508 if mobj:
3509 return {'height': int(mobj.group(1))}
3510
3511 mobj = re.search(r'\b([48])[kK]\b', s)
3512 if mobj:
3513 return {'height': int(mobj.group(1)) * 540}
3514
3515 return {}
3516
3517
0dc41787
S
3518def parse_bitrate(s):
3519 if not isinstance(s, compat_str):
3520 return
3521 mobj = re.search(r'\b(\d+)\s*kbps', s)
3522 if mobj:
3523 return int(mobj.group(1))
3524
3525
a942d6cb 3526def month_by_name(name, lang='en'):
caefb1de
PH
3527 """ Return the number of a month by (locale-independently) English name """
3528
f6717dec 3529 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb 3530
caefb1de 3531 try:
f6717dec 3532 return month_names.index(name) + 1
7105440c
YCH
3533 except ValueError:
3534 return None
3535
3536
3537def month_by_abbreviation(abbrev):
3538 """ Return the number of a month by (locale-independently) English
3539 abbreviations """
3540
3541 try:
3542 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
3543 except ValueError:
3544 return None
18258362
JMF
3545
3546
5aafe895 3547def fix_xml_ampersands(xml_str):
18258362 3548 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
3549 return re.sub(
3550 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 3551 '&amp;',
5aafe895 3552 xml_str)
e3946f98
PH
3553
3554
3555def setproctitle(title):
8bf48f23 3556 assert isinstance(title, compat_str)
c1c05c67
YCH
3557
3558 # ctypes in Jython is not complete
3559 # http://bugs.jython.org/issue2148
3560 if sys.platform.startswith('java'):
3561 return
3562
e3946f98 3563 try:
611c1dd9 3564 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
3565 except OSError:
3566 return
2f49bcd6
RC
3567 except TypeError:
3568 # LoadLibrary in Windows Python 2.7.13 only expects
3569 # a bytestring, but since unicode_literals turns
3570 # every string into a unicode string, it fails.
3571 return
6eefe533
PH
3572 title_bytes = title.encode('utf-8')
3573 buf = ctypes.create_string_buffer(len(title_bytes))
3574 buf.value = title_bytes
e3946f98 3575 try:
6eefe533 3576 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
3577 except AttributeError:
3578 return # Strange libc, just skip this
d7dda168
PH
3579
3580
3581def remove_start(s, start):
46bc9b7d 3582 return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174
PH
3583
3584
2b9faf55 3585def remove_end(s, end):
46bc9b7d 3586 return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55
PH
3587
3588
31b2051e
S
3589def remove_quotes(s):
3590 if s is None or len(s) < 2:
3591 return s
3592 for quote in ('"', "'", ):
3593 if s[0] == quote and s[-1] == quote:
3594 return s[1:-1]
3595 return s
3596
3597
b6e0c7d2
U
3598def get_domain(url):
3599 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3600 return domain.group('domain') if domain else None
3601
3602
29eb5174 3603def url_basename(url):
9b8aaeed 3604 path = compat_urlparse.urlparse(url).path
28e614de 3605 return path.strip('/').split('/')[-1]
aa94a6d3
PH
3606
3607
02dc0a36
S
3608def base_url(url):
3609 return re.match(r'https?://[^?#&]+/', url).group()
3610
3611
e34c3361 3612def urljoin(base, path):
4b5de77b
S
3613 if isinstance(path, bytes):
3614 path = path.decode('utf-8')
e34c3361
S
3615 if not isinstance(path, compat_str) or not path:
3616 return None
fad4ceb5 3617 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
e34c3361 3618 return path
4b5de77b
S
3619 if isinstance(base, bytes):
3620 base = base.decode('utf-8')
3621 if not isinstance(base, compat_str) or not re.match(
3622 r'^(?:https?:)?//', base):
e34c3361
S
3623 return None
3624 return compat_urlparse.urljoin(base, path)
3625
3626
aa94a6d3
PH
3627class HEADRequest(compat_urllib_request.Request):
3628 def get_method(self):
611c1dd9 3629 return 'HEAD'
7217e148
PH
3630
3631
95cf60e8
S
3632class PUTRequest(compat_urllib_request.Request):
3633 def get_method(self):
3634 return 'PUT'
3635
3636
9732d77e 3637def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
3638 if get_attr:
3639 if v is not None:
3640 v = getattr(v, get_attr, None)
9572013d
PH
3641 if v == '':
3642 v = None
1812afb7
S
3643 if v is None:
3644 return default
3645 try:
3646 return int(v) * invscale // scale
5e1271c5 3647 except (ValueError, TypeError):
af98f8ff 3648 return default
9732d77e 3649
9572013d 3650
40a90862
JMF
3651def str_or_none(v, default=None):
3652 return default if v is None else compat_str(v)
3653
9732d77e
PH
3654
3655def str_to_int(int_str):
48d4681e 3656 """ A more relaxed version of int_or_none """
42db58ec 3657 if isinstance(int_str, compat_integer_types):
348c6bf1 3658 return int_str
42db58ec
S
3659 elif isinstance(int_str, compat_str):
3660 int_str = re.sub(r'[,\.\+]', '', int_str)
3661 return int_or_none(int_str)
608d11f5
PH
3662
3663
9732d77e 3664def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
3665 if v is None:
3666 return default
3667 try:
3668 return float(v) * invscale / scale
5e1271c5 3669 except (ValueError, TypeError):
caf80631 3670 return default
43f775e4
PH
3671
3672
c7e327c4
S
3673def bool_or_none(v, default=None):
3674 return v if isinstance(v, bool) else default
3675
3676
53cd37ba
S
3677def strip_or_none(v, default=None):
3678 return v.strip() if isinstance(v, compat_str) else default
b72b4431
S
3679
3680
af03000a
S
3681def url_or_none(url):
3682 if not url or not isinstance(url, compat_str):
3683 return None
3684 url = url.strip()
29f7c58a 3685 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
af03000a
S
3686
3687
e29663c6 3688def strftime_or_none(timestamp, date_format, default=None):
3689 datetime_object = None
3690 try:
3691 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3692 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3693 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3694 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3695 return datetime_object.strftime(date_format)
3696 except (ValueError, TypeError, AttributeError):
3697 return default
3698
3699
608d11f5 3700def parse_duration(s):
8f9312c3 3701 if not isinstance(s, compat_basestring):
608d11f5
PH
3702 return None
3703
ca7b3246
S
3704 s = s.strip()
3705
acaff495 3706 days, hours, mins, secs, ms = [None] * 5
15846398 3707 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
acaff495 3708 if m:
3709 days, hours, mins, secs, ms = m.groups()
3710 else:
3711 m = re.match(
056653bb
S
3712 r'''(?ix)(?:P?
3713 (?:
3714 [0-9]+\s*y(?:ears?)?\s*
3715 )?
3716 (?:
3717 [0-9]+\s*m(?:onths?)?\s*
3718 )?
3719 (?:
3720 [0-9]+\s*w(?:eeks?)?\s*
3721 )?
8f4b58d7 3722 (?:
acaff495 3723 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
8f4b58d7 3724 )?
056653bb 3725 T)?
acaff495 3726 (?:
3727 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3728 )?
3729 (?:
3730 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3731 )?
3732 (?:
3733 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
15846398 3734 )?Z?$''', s)
acaff495 3735 if m:
3736 days, hours, mins, secs, ms = m.groups()
3737 else:
15846398 3738 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
acaff495 3739 if m:
3740 hours, mins = m.groups()
3741 else:
3742 return None
3743
3744 duration = 0
3745 if secs:
3746 duration += float(secs)
3747 if mins:
3748 duration += float(mins) * 60
3749 if hours:
3750 duration += float(hours) * 60 * 60
3751 if days:
3752 duration += float(days) * 24 * 60 * 60
3753 if ms:
3754 duration += float(ms)
3755 return duration
91d7d0b3
JMF
3756
3757
e65e4c88 3758def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 3759 name, real_ext = os.path.splitext(filename)
e65e4c88
S
3760 return (
3761 '{0}.{1}{2}'.format(name, ext, real_ext)
3762 if not expected_real_ext or real_ext[1:] == expected_real_ext
3763 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
3764
3765
b3ed15b7
S
3766def replace_extension(filename, ext, expected_real_ext=None):
3767 name, real_ext = os.path.splitext(filename)
3768 return '{0}.{1}'.format(
3769 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3770 ext)
3771
3772
d70ad093
PH
3773def check_executable(exe, args=[]):
3774 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3775 args can be a list of arguments for a short output (like -version) """
3776 try:
f5b1bca9 3777 process_communicate_or_kill(subprocess.Popen(
3778 [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
d70ad093
PH
3779 except OSError:
3780 return False
3781 return exe
b7ab0590
PH
3782
3783
95807118 3784def get_exe_version(exe, args=['--version'],
cae97f65 3785 version_re=None, unrecognized='present'):
95807118
PH
3786 """ Returns the version of the specified executable,
3787 or False if the executable is not present """
3788 try:
b64d04c1 3789 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
7a5c1cfe 3790 # SIGTTOU if yt-dlp is run in the background.
067aa17e 3791 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
f5b1bca9 3792 out, _ = process_communicate_or_kill(subprocess.Popen(
54116803 3793 [encodeArgument(exe)] + args,
00ca7552 3794 stdin=subprocess.PIPE,
f5b1bca9 3795 stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
95807118
PH
3796 except OSError:
3797 return False
cae97f65
PH
3798 if isinstance(out, bytes): # Python 2.x
3799 out = out.decode('ascii', 'ignore')
3800 return detect_exe_version(out, version_re, unrecognized)
3801
3802
3803def detect_exe_version(output, version_re=None, unrecognized='present'):
3804 assert isinstance(output, compat_str)
3805 if version_re is None:
3806 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3807 m = re.search(version_re, output)
95807118
PH
3808 if m:
3809 return m.group(1)
3810 else:
3811 return unrecognized
3812
3813
b7ab0590 3814class PagedList(object):
dd26ced1
PH
3815 def __len__(self):
3816 # This is only useful for tests
3817 return len(self.getslice())
3818
9c44d242
PH
3819
3820class OnDemandPagedList(PagedList):
6be08ce6 3821 def __init__(self, pagefunc, pagesize, use_cache=True):
9c44d242
PH
3822 self._pagefunc = pagefunc
3823 self._pagesize = pagesize
b95dc034
YCH
3824 self._use_cache = use_cache
3825 if use_cache:
3826 self._cache = {}
9c44d242 3827
b7ab0590
PH
3828 def getslice(self, start=0, end=None):
3829 res = []
3830 for pagenum in itertools.count(start // self._pagesize):
3831 firstid = pagenum * self._pagesize
3832 nextfirstid = pagenum * self._pagesize + self._pagesize
3833 if start >= nextfirstid:
3834 continue
3835
b95dc034
YCH
3836 page_results = None
3837 if self._use_cache:
3838 page_results = self._cache.get(pagenum)
3839 if page_results is None:
3840 page_results = list(self._pagefunc(pagenum))
3841 if self._use_cache:
3842 self._cache[pagenum] = page_results
b7ab0590
PH
3843
3844 startv = (
3845 start % self._pagesize
3846 if firstid <= start < nextfirstid
3847 else 0)
3848
3849 endv = (
3850 ((end - 1) % self._pagesize) + 1
3851 if (end is not None and firstid <= end <= nextfirstid)
3852 else None)
3853
3854 if startv != 0 or endv is not None:
3855 page_results = page_results[startv:endv]
3856 res.extend(page_results)
3857
3858 # A little optimization - if current page is not "full", ie. does
3859 # not contain page_size videos then we can assume that this page
3860 # is the last one - there are no more ids on further pages -
3861 # i.e. no need to query again.
3862 if len(page_results) + startv < self._pagesize:
3863 break
3864
3865 # If we got the whole page, but the next page is not interesting,
3866 # break out early as well
3867 if end == nextfirstid:
3868 break
3869 return res
81c2f20b
PH
3870
3871
9c44d242
PH
3872class InAdvancePagedList(PagedList):
3873 def __init__(self, pagefunc, pagecount, pagesize):
3874 self._pagefunc = pagefunc
3875 self._pagecount = pagecount
3876 self._pagesize = pagesize
3877
3878 def getslice(self, start=0, end=None):
3879 res = []
3880 start_page = start // self._pagesize
3881 end_page = (
3882 self._pagecount if end is None else (end // self._pagesize + 1))
3883 skip_elems = start - start_page * self._pagesize
3884 only_more = None if end is None else end - start
3885 for pagenum in range(start_page, end_page):
3886 page = list(self._pagefunc(pagenum))
3887 if skip_elems:
3888 page = page[skip_elems:]
3889 skip_elems = None
3890 if only_more is not None:
3891 if len(page) < only_more:
3892 only_more -= len(page)
3893 else:
3894 page = page[:only_more]
3895 res.extend(page)
3896 break
3897 res.extend(page)
3898 return res
3899
3900
81c2f20b 3901def uppercase_escape(s):
676eb3f2 3902 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 3903 return re.sub(
a612753d 3904 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
3905 lambda m: unicode_escape(m.group(0))[0],
3906 s)
0fe2ff78
YCH
3907
3908
3909def lowercase_escape(s):
3910 unicode_escape = codecs.getdecoder('unicode_escape')
3911 return re.sub(
3912 r'\\u[0-9a-fA-F]{4}',
3913 lambda m: unicode_escape(m.group(0))[0],
3914 s)
b53466e1 3915
d05cfe06
S
3916
3917def escape_rfc3986(s):
3918 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 3919 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 3920 s = s.encode('utf-8')
ecc0c5ee 3921 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
3922
3923
3924def escape_url(url):
3925 """Escape URL as suggested by RFC 3986"""
3926 url_parsed = compat_urllib_parse_urlparse(url)
3927 return url_parsed._replace(
efbed08d 3928 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
d05cfe06
S
3929 path=escape_rfc3986(url_parsed.path),
3930 params=escape_rfc3986(url_parsed.params),
3931 query=escape_rfc3986(url_parsed.query),
3932 fragment=escape_rfc3986(url_parsed.fragment)
3933 ).geturl()
3934
62e609ab
PH
3935
3936def read_batch_urls(batch_fd):
3937 def fixup(url):
3938 if not isinstance(url, compat_str):
3939 url = url.decode('utf-8', 'replace')
8c04f0be 3940 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
3941 for bom in BOM_UTF8:
3942 if url.startswith(bom):
3943 url = url[len(bom):]
3944 url = url.lstrip()
3945 if not url or url.startswith(('#', ';', ']')):
62e609ab 3946 return False
8c04f0be 3947 # "#" cannot be stripped out since it is part of the URI
3948 # However, it can be safely stipped out if follwing a whitespace
3949 return re.split(r'\s#', url, 1)[0].rstrip()
62e609ab
PH
3950
3951 with contextlib.closing(batch_fd) as fd:
3952 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
3953
3954
3955def urlencode_postdata(*args, **kargs):
15707c7e 3956 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
3957
3958
38f9ef31 3959def update_url_query(url, query):
cacd9966
YCH
3960 if not query:
3961 return url
38f9ef31 3962 parsed_url = compat_urlparse.urlparse(url)
3963 qs = compat_parse_qs(parsed_url.query)
3964 qs.update(query)
3965 return compat_urlparse.urlunparse(parsed_url._replace(
15707c7e 3966 query=compat_urllib_parse_urlencode(qs, True)))
16392824 3967
8e60dc75 3968
ed0291d1
S
3969def update_Request(req, url=None, data=None, headers={}, query={}):
3970 req_headers = req.headers.copy()
3971 req_headers.update(headers)
3972 req_data = data or req.data
3973 req_url = update_url_query(url or req.get_full_url(), query)
95cf60e8
S
3974 req_get_method = req.get_method()
3975 if req_get_method == 'HEAD':
3976 req_type = HEADRequest
3977 elif req_get_method == 'PUT':
3978 req_type = PUTRequest
3979 else:
3980 req_type = compat_urllib_request.Request
ed0291d1
S
3981 new_req = req_type(
3982 req_url, data=req_data, headers=req_headers,
3983 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3984 if hasattr(req, 'timeout'):
3985 new_req.timeout = req.timeout
3986 return new_req
3987
3988
10c87c15 3989def _multipart_encode_impl(data, boundary):
0c265486
YCH
3990 content_type = 'multipart/form-data; boundary=%s' % boundary
3991
3992 out = b''
3993 for k, v in data.items():
3994 out += b'--' + boundary.encode('ascii') + b'\r\n'
3995 if isinstance(k, compat_str):
3996 k = k.encode('utf-8')
3997 if isinstance(v, compat_str):
3998 v = v.encode('utf-8')
3999 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4000 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
b2ad479d 4001 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
0c265486
YCH
4002 if boundary.encode('ascii') in content:
4003 raise ValueError('Boundary overlaps with data')
4004 out += content
4005
4006 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4007
4008 return out, content_type
4009
4010
4011def multipart_encode(data, boundary=None):
4012 '''
4013 Encode a dict to RFC 7578-compliant form-data
4014
4015 data:
4016 A dict where keys and values can be either Unicode or bytes-like
4017 objects.
4018 boundary:
4019 If specified a Unicode object, it's used as the boundary. Otherwise
4020 a random boundary is generated.
4021
4022 Reference: https://tools.ietf.org/html/rfc7578
4023 '''
4024 has_specified_boundary = boundary is not None
4025
4026 while True:
4027 if boundary is None:
4028 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4029
4030 try:
10c87c15 4031 out, content_type = _multipart_encode_impl(data, boundary)
0c265486
YCH
4032 break
4033 except ValueError:
4034 if has_specified_boundary:
4035 raise
4036 boundary = None
4037
4038 return out, content_type
4039
4040
86296ad2 4041def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
4042 if isinstance(key_or_keys, (list, tuple)):
4043 for key in key_or_keys:
86296ad2
S
4044 if key not in d or d[key] is None or skip_false_values and not d[key]:
4045 continue
4046 return d[key]
cbecc9b9
S
4047 return default
4048 return d.get(key_or_keys, default)
4049
4050
329ca3be 4051def try_get(src, getter, expected_type=None):
a32a9a7e
S
4052 if not isinstance(getter, (list, tuple)):
4053 getter = [getter]
4054 for get in getter:
4055 try:
4056 v = get(src)
4057 except (AttributeError, KeyError, TypeError, IndexError):
4058 pass
4059 else:
4060 if expected_type is None or isinstance(v, expected_type):
4061 return v
329ca3be
S
4062
4063
6cc62232
S
4064def merge_dicts(*dicts):
4065 merged = {}
4066 for a_dict in dicts:
4067 for k, v in a_dict.items():
4068 if v is None:
4069 continue
3089bc74
S
4070 if (k not in merged
4071 or (isinstance(v, compat_str) and v
4072 and isinstance(merged[k], compat_str)
4073 and not merged[k])):
6cc62232
S
4074 merged[k] = v
4075 return merged
4076
4077
8e60dc75
S
4078def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4079 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4080
16392824 4081
a1a530b0
PH
4082US_RATINGS = {
4083 'G': 0,
4084 'PG': 10,
4085 'PG-13': 13,
4086 'R': 16,
4087 'NC': 18,
4088}
fac55558
PH
4089
4090
a8795327 4091TV_PARENTAL_GUIDELINES = {
5a16c9d9
RA
4092 'TV-Y': 0,
4093 'TV-Y7': 7,
4094 'TV-G': 0,
4095 'TV-PG': 0,
4096 'TV-14': 14,
4097 'TV-MA': 17,
a8795327
S
4098}
4099
4100
146c80e2 4101def parse_age_limit(s):
a8795327
S
4102 if type(s) == int:
4103 return s if 0 <= s <= 21 else None
4104 if not isinstance(s, compat_basestring):
d838b1bd 4105 return None
146c80e2 4106 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327
S
4107 if m:
4108 return int(m.group('age'))
5c5fae6d 4109 s = s.upper()
a8795327
S
4110 if s in US_RATINGS:
4111 return US_RATINGS[s]
5a16c9d9 4112 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
b8361187 4113 if m:
5a16c9d9 4114 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
b8361187 4115 return None
146c80e2
S
4116
4117
fac55558 4118def strip_jsonp(code):
609a61e3 4119 return re.sub(
5552c9eb 4120 r'''(?sx)^
e9c671d5 4121 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
5552c9eb
YCH
4122 (?:\s*&&\s*(?P=func_name))?
4123 \s*\(\s*(?P<callback_data>.*)\);?
4124 \s*?(?://[^\n]*)*$''',
4125 r'\g<callback_data>', code)
478c2c61
PH
4126
4127
5c610515 4128def js_to_json(code, vars={}):
4129 # vars is a dict of var, val pairs to substitute
4195096e
S
4130 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4131 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4132 INTEGER_TABLE = (
4133 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4134 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4135 )
4136
e05f6939 4137 def fix_kv(m):
e7b6d122
PH
4138 v = m.group(0)
4139 if v in ('true', 'false', 'null'):
4140 return v
8bdd16b4 4141 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
bd1e4844 4142 return ""
4143
4144 if v[0] in ("'", '"'):
4145 v = re.sub(r'(?s)\\.|"', lambda m: {
e7b6d122 4146 '"': '\\"',
bd1e4844 4147 "\\'": "'",
4148 '\\\n': '',
4149 '\\x': '\\u00',
4150 }.get(m.group(0), m.group(0)), v[1:-1])
8bdd16b4 4151 else:
4152 for regex, base in INTEGER_TABLE:
4153 im = re.match(regex, v)
4154 if im:
4155 i = int(im.group(1), base)
4156 return '"%d":' % i if v.endswith(':') else '%d' % i
89ac4a19 4157
5c610515 4158 if v in vars:
4159 return vars[v]
4160
e7b6d122 4161 return '"%s"' % v
e05f6939 4162
bd1e4844 4163 return re.sub(r'''(?sx)
4164 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4165 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4195096e 4166 {comment}|,(?={skip}[\]}}])|
c384d537 4167 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4195096e 4168 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
8bdd16b4 4169 [0-9]+(?={skip}:)|
4170 !+
4195096e 4171 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
e05f6939
PH
4172
4173
478c2c61
PH
4174def qualities(quality_ids):
4175 """ Get a numeric quality value out of a list of possible values """
4176 def q(qid):
4177 try:
4178 return quality_ids.index(qid)
4179 except ValueError:
4180 return -1
4181 return q
4182
acd69589 4183
de6000d9 4184DEFAULT_OUTTMPL = {
4185 'default': '%(title)s [%(id)s].%(ext)s',
72755351 4186 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
de6000d9 4187}
4188OUTTMPL_TYPES = {
72755351 4189 'chapter': None,
de6000d9 4190 'subtitle': None,
4191 'thumbnail': None,
4192 'description': 'description',
4193 'annotation': 'annotations.xml',
4194 'infojson': 'info.json',
4195 'pl_description': 'description',
4196 'pl_infojson': 'info.json',
4197}
0a871f68 4198
a020a0dc
PH
4199
4200def limit_length(s, length):
4201 """ Add ellipses to overly long strings """
4202 if s is None:
4203 return None
4204 ELLIPSES = '...'
4205 if len(s) > length:
4206 return s[:length - len(ELLIPSES)] + ELLIPSES
4207 return s
48844745
PH
4208
4209
4210def version_tuple(v):
5f9b8394 4211 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
4212
4213
4214def is_outdated_version(version, limit, assume_new=True):
4215 if not version:
4216 return not assume_new
4217 try:
4218 return version_tuple(version) < version_tuple(limit)
4219 except ValueError:
4220 return not assume_new
732ea2f0
PH
4221
4222
4223def ytdl_is_updateable():
7a5c1cfe 4224 """ Returns if yt-dlp can be updated with -U """
735d865e 4225 return False
4226
732ea2f0
PH
4227 from zipimport import zipimporter
4228
4229 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
4230
4231
4232def args_to_str(args):
4233 # Get a short string representation for a subprocess command
702ccf2d 4234 return ' '.join(compat_shlex_quote(a) for a in args)
2ccd1b10
PH
4235
4236
9b9c5355 4237def error_to_compat_str(err):
fdae2358
S
4238 err_str = str(err)
4239 # On python 2 error byte string must be decoded with proper
4240 # encoding rather than ascii
4241 if sys.version_info[0] < 3:
4242 err_str = err_str.decode(preferredencoding())
4243 return err_str
4244
4245
c460bdd5 4246def mimetype2ext(mt):
eb9ee194
S
4247 if mt is None:
4248 return None
4249
765ac263
JMF
4250 ext = {
4251 'audio/mp4': 'm4a',
6c33d24b
YCH
4252 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4253 # it's the most popular one
4254 'audio/mpeg': 'mp3',
ba39289d 4255 'audio/x-wav': 'wav',
765ac263
JMF
4256 }.get(mt)
4257 if ext is not None:
4258 return ext
4259
c460bdd5 4260 _, _, res = mt.rpartition('/')
6562d34a 4261 res = res.split(';')[0].strip().lower()
c460bdd5
PH
4262
4263 return {
f6861ec9 4264 '3gpp': '3gp',
cafcf657 4265 'smptett+xml': 'tt',
cafcf657 4266 'ttaf+xml': 'dfxp',
a0d8d704 4267 'ttml+xml': 'ttml',
f6861ec9 4268 'x-flv': 'flv',
a0d8d704 4269 'x-mp4-fragmented': 'mp4',
d4f05d47 4270 'x-ms-sami': 'sami',
a0d8d704 4271 'x-ms-wmv': 'wmv',
b4173f15
RA
4272 'mpegurl': 'm3u8',
4273 'x-mpegurl': 'm3u8',
4274 'vnd.apple.mpegurl': 'm3u8',
4275 'dash+xml': 'mpd',
b4173f15 4276 'f4m+xml': 'f4m',
f164b971 4277 'hds+xml': 'f4m',
e910fe2f 4278 'vnd.ms-sstr+xml': 'ism',
c2b2c7e1 4279 'quicktime': 'mov',
98ce1a3f 4280 'mp2t': 'ts',
39e7107d 4281 'x-wav': 'wav',
c460bdd5
PH
4282 }.get(res, res)
4283
4284
4f3c5e06 4285def parse_codecs(codecs_str):
4286 # http://tools.ietf.org/html/rfc6381
4287 if not codecs_str:
4288 return {}
a0566bbf 4289 split_codecs = list(filter(None, map(
4f3c5e06 4290 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4291 vcodec, acodec = None, None
a0566bbf 4292 for full_codec in split_codecs:
4f3c5e06 4293 codec = full_codec.split('.')[0]
28cc2241 4294 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4f3c5e06 4295 if not vcodec:
4296 vcodec = full_codec
60f5c9fb 4297 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4f3c5e06 4298 if not acodec:
4299 acodec = full_codec
4300 else:
60f5c9fb 4301 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4f3c5e06 4302 if not vcodec and not acodec:
a0566bbf 4303 if len(split_codecs) == 2:
4f3c5e06 4304 return {
a0566bbf 4305 'vcodec': split_codecs[0],
4306 'acodec': split_codecs[1],
4f3c5e06 4307 }
4308 else:
4309 return {
4310 'vcodec': vcodec or 'none',
4311 'acodec': acodec or 'none',
4312 }
4313 return {}
4314
4315
2ccd1b10 4316def urlhandle_detect_ext(url_handle):
79298173 4317 getheader = url_handle.headers.get
2ccd1b10 4318
b55ee18f
PH
4319 cd = getheader('Content-Disposition')
4320 if cd:
4321 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4322 if m:
4323 e = determine_ext(m.group('filename'), default_ext=None)
4324 if e:
4325 return e
4326
c460bdd5 4327 return mimetype2ext(getheader('Content-Type'))
05900629
PH
4328
4329
1e399778
YCH
4330def encode_data_uri(data, mime_type):
4331 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4332
4333
05900629 4334def age_restricted(content_limit, age_limit):
6ec6cb4e 4335 """ Returns True iff the content should be blocked """
05900629
PH
4336
4337 if age_limit is None: # No limit set
4338 return False
4339 if content_limit is None:
4340 return False # Content available for everyone
4341 return age_limit < content_limit
61ca9a80
PH
4342
4343
4344def is_html(first_bytes):
4345 """ Detect whether a file contains HTML by examining its first bytes. """
4346
4347 BOMS = [
4348 (b'\xef\xbb\xbf', 'utf-8'),
4349 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4350 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4351 (b'\xff\xfe', 'utf-16-le'),
4352 (b'\xfe\xff', 'utf-16-be'),
4353 ]
4354 for bom, enc in BOMS:
4355 if first_bytes.startswith(bom):
4356 s = first_bytes[len(bom):].decode(enc, 'replace')
4357 break
4358 else:
4359 s = first_bytes.decode('utf-8', 'replace')
4360
4361 return re.match(r'^\s*<', s)
a055469f
PH
4362
4363
4364def determine_protocol(info_dict):
4365 protocol = info_dict.get('protocol')
4366 if protocol is not None:
4367 return protocol
4368
4369 url = info_dict['url']
4370 if url.startswith('rtmp'):
4371 return 'rtmp'
4372 elif url.startswith('mms'):
4373 return 'mms'
4374 elif url.startswith('rtsp'):
4375 return 'rtsp'
4376
4377 ext = determine_ext(url)
4378 if ext == 'm3u8':
4379 return 'm3u8'
4380 elif ext == 'f4m':
4381 return 'f4m'
4382
4383 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
4384
4385
76d321f6 4386def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
cfb56d1a 4387 """ Render a list of rows, each as a list of values """
76d321f6 4388
4389 def get_max_lens(table):
4390 return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4391
4392 def filter_using_list(row, filterArray):
4393 return [col for (take, col) in zip(filterArray, row) if take]
4394
4395 if hideEmpty:
4396 max_lens = get_max_lens(data)
4397 header_row = filter_using_list(header_row, max_lens)
4398 data = [filter_using_list(row, max_lens) for row in data]
4399
cfb56d1a 4400 table = [header_row] + data
76d321f6 4401 max_lens = get_max_lens(table)
4402 if delim:
4403 table = [header_row] + [['-' * ml for ml in max_lens]] + data
4404 format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
cfb56d1a 4405 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
4406
4407
4408def _match_one(filter_part, dct):
4409 COMPARISON_OPERATORS = {
4410 '<': operator.lt,
4411 '<=': operator.le,
4412 '>': operator.gt,
4413 '>=': operator.ge,
4414 '=': operator.eq,
4415 '!=': operator.ne,
4416 }
4417 operator_rex = re.compile(r'''(?x)\s*
4418 (?P<key>[a-z_]+)
4419 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4420 (?:
4421 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
db13c16e 4422 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
347de493
PH
4423 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4424 )
4425 \s*$
4426 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4427 m = operator_rex.search(filter_part)
4428 if m:
4429 op = COMPARISON_OPERATORS[m.group('op')]
e5a088dc 4430 actual_value = dct.get(m.group('key'))
3089bc74
S
4431 if (m.group('quotedstrval') is not None
4432 or m.group('strval') is not None
e5a088dc
S
4433 # If the original field is a string and matching comparisonvalue is
4434 # a number we should respect the origin of the original field
4435 # and process comparison value as a string (see
067aa17e 4436 # https://github.com/ytdl-org/youtube-dl/issues/11082).
3089bc74
S
4437 or actual_value is not None and m.group('intval') is not None
4438 and isinstance(actual_value, compat_str)):
347de493
PH
4439 if m.group('op') not in ('=', '!='):
4440 raise ValueError(
4441 'Operator %s does not support string values!' % m.group('op'))
db13c16e
S
4442 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4443 quote = m.group('quote')
4444 if quote is not None:
4445 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
347de493
PH
4446 else:
4447 try:
4448 comparison_value = int(m.group('intval'))
4449 except ValueError:
4450 comparison_value = parse_filesize(m.group('intval'))
4451 if comparison_value is None:
4452 comparison_value = parse_filesize(m.group('intval') + 'B')
4453 if comparison_value is None:
4454 raise ValueError(
4455 'Invalid integer value %r in filter part %r' % (
4456 m.group('intval'), filter_part))
347de493
PH
4457 if actual_value is None:
4458 return m.group('none_inclusive')
4459 return op(actual_value, comparison_value)
4460
4461 UNARY_OPERATORS = {
1cc47c66
S
4462 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4463 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
347de493
PH
4464 }
4465 operator_rex = re.compile(r'''(?x)\s*
4466 (?P<op>%s)\s*(?P<key>[a-z_]+)
4467 \s*$
4468 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4469 m = operator_rex.search(filter_part)
4470 if m:
4471 op = UNARY_OPERATORS[m.group('op')]
4472 actual_value = dct.get(m.group('key'))
4473 return op(actual_value)
4474
4475 raise ValueError('Invalid filter part %r' % filter_part)
4476
4477
4478def match_str(filter_str, dct):
4479 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4480
4481 return all(
4482 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4483
4484
4485def match_filter_func(filter_str):
4486 def _match_func(info_dict):
4487 if match_str(filter_str, info_dict):
4488 return None
4489 else:
4490 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4491 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4492 return _match_func
91410c9b
PH
4493
4494
bf6427d2
YCH
4495def parse_dfxp_time_expr(time_expr):
4496 if not time_expr:
d631d5f9 4497 return
bf6427d2
YCH
4498
4499 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4500 if mobj:
4501 return float(mobj.group('time_offset'))
4502
db2fe38b 4503 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 4504 if mobj:
db2fe38b 4505 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
4506
4507
c1c924ab
YCH
4508def srt_subtitles_timecode(seconds):
4509 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
4510
4511
4512def dfxp2srt(dfxp_data):
3869028f
YCH
4513 '''
4514 @param dfxp_data A bytes-like object containing DFXP data
4515 @returns A unicode object containing converted SRT data
4516 '''
5b995f71 4517 LEGACY_NAMESPACES = (
3869028f
YCH
4518 (b'http://www.w3.org/ns/ttml', [
4519 b'http://www.w3.org/2004/11/ttaf1',
4520 b'http://www.w3.org/2006/04/ttaf1',
4521 b'http://www.w3.org/2006/10/ttaf1',
5b995f71 4522 ]),
3869028f
YCH
4523 (b'http://www.w3.org/ns/ttml#styling', [
4524 b'http://www.w3.org/ns/ttml#style',
5b995f71
RA
4525 ]),
4526 )
4527
4528 SUPPORTED_STYLING = [
4529 'color',
4530 'fontFamily',
4531 'fontSize',
4532 'fontStyle',
4533 'fontWeight',
4534 'textDecoration'
4535 ]
4536
4e335771 4537 _x = functools.partial(xpath_with_ns, ns_map={
261f4730 4538 'xml': 'http://www.w3.org/XML/1998/namespace',
4e335771 4539 'ttml': 'http://www.w3.org/ns/ttml',
5b995f71 4540 'tts': 'http://www.w3.org/ns/ttml#styling',
4e335771 4541 })
bf6427d2 4542
5b995f71
RA
4543 styles = {}
4544 default_style = {}
4545
87de7069 4546 class TTMLPElementParser(object):
5b995f71
RA
4547 _out = ''
4548 _unclosed_elements = []
4549 _applied_styles = []
bf6427d2 4550
2b14cb56 4551 def start(self, tag, attrib):
5b995f71
RA
4552 if tag in (_x('ttml:br'), 'br'):
4553 self._out += '\n'
4554 else:
4555 unclosed_elements = []
4556 style = {}
4557 element_style_id = attrib.get('style')
4558 if default_style:
4559 style.update(default_style)
4560 if element_style_id:
4561 style.update(styles.get(element_style_id, {}))
4562 for prop in SUPPORTED_STYLING:
4563 prop_val = attrib.get(_x('tts:' + prop))
4564 if prop_val:
4565 style[prop] = prop_val
4566 if style:
4567 font = ''
4568 for k, v in sorted(style.items()):
4569 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4570 continue
4571 if k == 'color':
4572 font += ' color="%s"' % v
4573 elif k == 'fontSize':
4574 font += ' size="%s"' % v
4575 elif k == 'fontFamily':
4576 font += ' face="%s"' % v
4577 elif k == 'fontWeight' and v == 'bold':
4578 self._out += '<b>'
4579 unclosed_elements.append('b')
4580 elif k == 'fontStyle' and v == 'italic':
4581 self._out += '<i>'
4582 unclosed_elements.append('i')
4583 elif k == 'textDecoration' and v == 'underline':
4584 self._out += '<u>'
4585 unclosed_elements.append('u')
4586 if font:
4587 self._out += '<font' + font + '>'
4588 unclosed_elements.append('font')
4589 applied_style = {}
4590 if self._applied_styles:
4591 applied_style.update(self._applied_styles[-1])
4592 applied_style.update(style)
4593 self._applied_styles.append(applied_style)
4594 self._unclosed_elements.append(unclosed_elements)
bf6427d2 4595
2b14cb56 4596 def end(self, tag):
5b995f71
RA
4597 if tag not in (_x('ttml:br'), 'br'):
4598 unclosed_elements = self._unclosed_elements.pop()
4599 for element in reversed(unclosed_elements):
4600 self._out += '</%s>' % element
4601 if unclosed_elements and self._applied_styles:
4602 self._applied_styles.pop()
bf6427d2 4603
2b14cb56 4604 def data(self, data):
5b995f71 4605 self._out += data
2b14cb56 4606
4607 def close(self):
5b995f71 4608 return self._out.strip()
2b14cb56 4609
4610 def parse_node(node):
4611 target = TTMLPElementParser()
4612 parser = xml.etree.ElementTree.XMLParser(target=target)
4613 parser.feed(xml.etree.ElementTree.tostring(node))
4614 return parser.close()
bf6427d2 4615
5b995f71
RA
4616 for k, v in LEGACY_NAMESPACES:
4617 for ns in v:
4618 dfxp_data = dfxp_data.replace(ns, k)
4619
3869028f 4620 dfxp = compat_etree_fromstring(dfxp_data)
bf6427d2 4621 out = []
5b995f71 4622 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
1b0427e6
YCH
4623
4624 if not paras:
4625 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2 4626
5b995f71
RA
4627 repeat = False
4628 while True:
4629 for style in dfxp.findall(_x('.//ttml:style')):
261f4730
RA
4630 style_id = style.get('id') or style.get(_x('xml:id'))
4631 if not style_id:
4632 continue
5b995f71
RA
4633 parent_style_id = style.get('style')
4634 if parent_style_id:
4635 if parent_style_id not in styles:
4636 repeat = True
4637 continue
4638 styles[style_id] = styles[parent_style_id].copy()
4639 for prop in SUPPORTED_STYLING:
4640 prop_val = style.get(_x('tts:' + prop))
4641 if prop_val:
4642 styles.setdefault(style_id, {})[prop] = prop_val
4643 if repeat:
4644 repeat = False
4645 else:
4646 break
4647
4648 for p in ('body', 'div'):
4649 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4650 if ele is None:
4651 continue
4652 style = styles.get(ele.get('style'))
4653 if not style:
4654 continue
4655 default_style.update(style)
4656
bf6427d2 4657 for para, index in zip(paras, itertools.count(1)):
d631d5f9 4658 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 4659 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
4660 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4661 if begin_time is None:
4662 continue
7dff0363 4663 if not end_time:
d631d5f9
YCH
4664 if not dur:
4665 continue
4666 end_time = begin_time + dur
bf6427d2
YCH
4667 out.append('%d\n%s --> %s\n%s\n\n' % (
4668 index,
c1c924ab
YCH
4669 srt_subtitles_timecode(begin_time),
4670 srt_subtitles_timecode(end_time),
bf6427d2
YCH
4671 parse_node(para)))
4672
4673 return ''.join(out)
4674
4675
66e289ba
S
4676def cli_option(params, command_option, param):
4677 param = params.get(param)
98e698f1
RA
4678 if param:
4679 param = compat_str(param)
66e289ba
S
4680 return [command_option, param] if param is not None else []
4681
4682
4683def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4684 param = params.get(param)
5b232f46
S
4685 if param is None:
4686 return []
66e289ba
S
4687 assert isinstance(param, bool)
4688 if separator:
4689 return [command_option + separator + (true_value if param else false_value)]
4690 return [command_option, true_value if param else false_value]
4691
4692
4693def cli_valueless_option(params, command_option, param, expected_value=True):
4694 param = params.get(param)
4695 return [command_option] if param == expected_value else []
4696
4697
e92caff5 4698def cli_configuration_args(argdict, keys, default=[], use_compat=True):
eab9b2bc 4699 if isinstance(argdict, (list, tuple)): # for backward compatibility
e92caff5 4700 if use_compat:
5b1ecbb3 4701 return argdict
4702 else:
4703 argdict = None
eab9b2bc 4704 if argdict is None:
5b1ecbb3 4705 return default
eab9b2bc 4706 assert isinstance(argdict, dict)
4707
e92caff5 4708 assert isinstance(keys, (list, tuple))
4709 for key_list in keys:
4710 if isinstance(key_list, compat_str):
4711 key_list = (key_list,)
4712 arg_list = list(filter(
4713 lambda x: x is not None,
4714 [argdict.get(key.lower()) for key in key_list]))
4715 if arg_list:
4716 return [arg for args in arg_list for arg in args]
4717 return default
66e289ba
S
4718
4719
39672624
YCH
4720class ISO639Utils(object):
4721 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4722 _lang_map = {
4723 'aa': 'aar',
4724 'ab': 'abk',
4725 'ae': 'ave',
4726 'af': 'afr',
4727 'ak': 'aka',
4728 'am': 'amh',
4729 'an': 'arg',
4730 'ar': 'ara',
4731 'as': 'asm',
4732 'av': 'ava',
4733 'ay': 'aym',
4734 'az': 'aze',
4735 'ba': 'bak',
4736 'be': 'bel',
4737 'bg': 'bul',
4738 'bh': 'bih',
4739 'bi': 'bis',
4740 'bm': 'bam',
4741 'bn': 'ben',
4742 'bo': 'bod',
4743 'br': 'bre',
4744 'bs': 'bos',
4745 'ca': 'cat',
4746 'ce': 'che',
4747 'ch': 'cha',
4748 'co': 'cos',
4749 'cr': 'cre',
4750 'cs': 'ces',
4751 'cu': 'chu',
4752 'cv': 'chv',
4753 'cy': 'cym',
4754 'da': 'dan',
4755 'de': 'deu',
4756 'dv': 'div',
4757 'dz': 'dzo',
4758 'ee': 'ewe',
4759 'el': 'ell',
4760 'en': 'eng',
4761 'eo': 'epo',
4762 'es': 'spa',
4763 'et': 'est',
4764 'eu': 'eus',
4765 'fa': 'fas',
4766 'ff': 'ful',
4767 'fi': 'fin',
4768 'fj': 'fij',
4769 'fo': 'fao',
4770 'fr': 'fra',
4771 'fy': 'fry',
4772 'ga': 'gle',
4773 'gd': 'gla',
4774 'gl': 'glg',
4775 'gn': 'grn',
4776 'gu': 'guj',
4777 'gv': 'glv',
4778 'ha': 'hau',
4779 'he': 'heb',
b7acc835 4780 'iw': 'heb', # Replaced by he in 1989 revision
39672624
YCH
4781 'hi': 'hin',
4782 'ho': 'hmo',
4783 'hr': 'hrv',
4784 'ht': 'hat',
4785 'hu': 'hun',
4786 'hy': 'hye',
4787 'hz': 'her',
4788 'ia': 'ina',
4789 'id': 'ind',
b7acc835 4790 'in': 'ind', # Replaced by id in 1989 revision
39672624
YCH
4791 'ie': 'ile',
4792 'ig': 'ibo',
4793 'ii': 'iii',
4794 'ik': 'ipk',
4795 'io': 'ido',
4796 'is': 'isl',
4797 'it': 'ita',
4798 'iu': 'iku',
4799 'ja': 'jpn',
4800 'jv': 'jav',
4801 'ka': 'kat',
4802 'kg': 'kon',
4803 'ki': 'kik',
4804 'kj': 'kua',
4805 'kk': 'kaz',
4806 'kl': 'kal',
4807 'km': 'khm',
4808 'kn': 'kan',
4809 'ko': 'kor',
4810 'kr': 'kau',
4811 'ks': 'kas',
4812 'ku': 'kur',
4813 'kv': 'kom',
4814 'kw': 'cor',
4815 'ky': 'kir',
4816 'la': 'lat',
4817 'lb': 'ltz',
4818 'lg': 'lug',
4819 'li': 'lim',
4820 'ln': 'lin',
4821 'lo': 'lao',
4822 'lt': 'lit',
4823 'lu': 'lub',
4824 'lv': 'lav',
4825 'mg': 'mlg',
4826 'mh': 'mah',
4827 'mi': 'mri',
4828 'mk': 'mkd',
4829 'ml': 'mal',
4830 'mn': 'mon',
4831 'mr': 'mar',
4832 'ms': 'msa',
4833 'mt': 'mlt',
4834 'my': 'mya',
4835 'na': 'nau',
4836 'nb': 'nob',
4837 'nd': 'nde',
4838 'ne': 'nep',
4839 'ng': 'ndo',
4840 'nl': 'nld',
4841 'nn': 'nno',
4842 'no': 'nor',
4843 'nr': 'nbl',
4844 'nv': 'nav',
4845 'ny': 'nya',
4846 'oc': 'oci',
4847 'oj': 'oji',
4848 'om': 'orm',
4849 'or': 'ori',
4850 'os': 'oss',
4851 'pa': 'pan',
4852 'pi': 'pli',
4853 'pl': 'pol',
4854 'ps': 'pus',
4855 'pt': 'por',
4856 'qu': 'que',
4857 'rm': 'roh',
4858 'rn': 'run',
4859 'ro': 'ron',
4860 'ru': 'rus',
4861 'rw': 'kin',
4862 'sa': 'san',
4863 'sc': 'srd',
4864 'sd': 'snd',
4865 'se': 'sme',
4866 'sg': 'sag',
4867 'si': 'sin',
4868 'sk': 'slk',
4869 'sl': 'slv',
4870 'sm': 'smo',
4871 'sn': 'sna',
4872 'so': 'som',
4873 'sq': 'sqi',
4874 'sr': 'srp',
4875 'ss': 'ssw',
4876 'st': 'sot',
4877 'su': 'sun',
4878 'sv': 'swe',
4879 'sw': 'swa',
4880 'ta': 'tam',
4881 'te': 'tel',
4882 'tg': 'tgk',
4883 'th': 'tha',
4884 'ti': 'tir',
4885 'tk': 'tuk',
4886 'tl': 'tgl',
4887 'tn': 'tsn',
4888 'to': 'ton',
4889 'tr': 'tur',
4890 'ts': 'tso',
4891 'tt': 'tat',
4892 'tw': 'twi',
4893 'ty': 'tah',
4894 'ug': 'uig',
4895 'uk': 'ukr',
4896 'ur': 'urd',
4897 'uz': 'uzb',
4898 've': 'ven',
4899 'vi': 'vie',
4900 'vo': 'vol',
4901 'wa': 'wln',
4902 'wo': 'wol',
4903 'xh': 'xho',
4904 'yi': 'yid',
e9a50fba 4905 'ji': 'yid', # Replaced by yi in 1989 revision
39672624
YCH
4906 'yo': 'yor',
4907 'za': 'zha',
4908 'zh': 'zho',
4909 'zu': 'zul',
4910 }
4911
4912 @classmethod
4913 def short2long(cls, code):
4914 """Convert language code from ISO 639-1 to ISO 639-2/T"""
4915 return cls._lang_map.get(code[:2])
4916
4917 @classmethod
4918 def long2short(cls, code):
4919 """Convert language code from ISO 639-2/T to ISO 639-1"""
4920 for short_name, long_name in cls._lang_map.items():
4921 if long_name == code:
4922 return short_name
4923
4924
4eb10f66
YCH
4925class ISO3166Utils(object):
4926 # From http://data.okfn.org/data/core/country-list
4927 _country_map = {
4928 'AF': 'Afghanistan',
4929 'AX': 'Åland Islands',
4930 'AL': 'Albania',
4931 'DZ': 'Algeria',
4932 'AS': 'American Samoa',
4933 'AD': 'Andorra',
4934 'AO': 'Angola',
4935 'AI': 'Anguilla',
4936 'AQ': 'Antarctica',
4937 'AG': 'Antigua and Barbuda',
4938 'AR': 'Argentina',
4939 'AM': 'Armenia',
4940 'AW': 'Aruba',
4941 'AU': 'Australia',
4942 'AT': 'Austria',
4943 'AZ': 'Azerbaijan',
4944 'BS': 'Bahamas',
4945 'BH': 'Bahrain',
4946 'BD': 'Bangladesh',
4947 'BB': 'Barbados',
4948 'BY': 'Belarus',
4949 'BE': 'Belgium',
4950 'BZ': 'Belize',
4951 'BJ': 'Benin',
4952 'BM': 'Bermuda',
4953 'BT': 'Bhutan',
4954 'BO': 'Bolivia, Plurinational State of',
4955 'BQ': 'Bonaire, Sint Eustatius and Saba',
4956 'BA': 'Bosnia and Herzegovina',
4957 'BW': 'Botswana',
4958 'BV': 'Bouvet Island',
4959 'BR': 'Brazil',
4960 'IO': 'British Indian Ocean Territory',
4961 'BN': 'Brunei Darussalam',
4962 'BG': 'Bulgaria',
4963 'BF': 'Burkina Faso',
4964 'BI': 'Burundi',
4965 'KH': 'Cambodia',
4966 'CM': 'Cameroon',
4967 'CA': 'Canada',
4968 'CV': 'Cape Verde',
4969 'KY': 'Cayman Islands',
4970 'CF': 'Central African Republic',
4971 'TD': 'Chad',
4972 'CL': 'Chile',
4973 'CN': 'China',
4974 'CX': 'Christmas Island',
4975 'CC': 'Cocos (Keeling) Islands',
4976 'CO': 'Colombia',
4977 'KM': 'Comoros',
4978 'CG': 'Congo',
4979 'CD': 'Congo, the Democratic Republic of the',
4980 'CK': 'Cook Islands',
4981 'CR': 'Costa Rica',
4982 'CI': 'Côte d\'Ivoire',
4983 'HR': 'Croatia',
4984 'CU': 'Cuba',
4985 'CW': 'Curaçao',
4986 'CY': 'Cyprus',
4987 'CZ': 'Czech Republic',
4988 'DK': 'Denmark',
4989 'DJ': 'Djibouti',
4990 'DM': 'Dominica',
4991 'DO': 'Dominican Republic',
4992 'EC': 'Ecuador',
4993 'EG': 'Egypt',
4994 'SV': 'El Salvador',
4995 'GQ': 'Equatorial Guinea',
4996 'ER': 'Eritrea',
4997 'EE': 'Estonia',
4998 'ET': 'Ethiopia',
4999 'FK': 'Falkland Islands (Malvinas)',
5000 'FO': 'Faroe Islands',
5001 'FJ': 'Fiji',
5002 'FI': 'Finland',
5003 'FR': 'France',
5004 'GF': 'French Guiana',
5005 'PF': 'French Polynesia',
5006 'TF': 'French Southern Territories',
5007 'GA': 'Gabon',
5008 'GM': 'Gambia',
5009 'GE': 'Georgia',
5010 'DE': 'Germany',
5011 'GH': 'Ghana',
5012 'GI': 'Gibraltar',
5013 'GR': 'Greece',
5014 'GL': 'Greenland',
5015 'GD': 'Grenada',
5016 'GP': 'Guadeloupe',
5017 'GU': 'Guam',
5018 'GT': 'Guatemala',
5019 'GG': 'Guernsey',
5020 'GN': 'Guinea',
5021 'GW': 'Guinea-Bissau',
5022 'GY': 'Guyana',
5023 'HT': 'Haiti',
5024 'HM': 'Heard Island and McDonald Islands',
5025 'VA': 'Holy See (Vatican City State)',
5026 'HN': 'Honduras',
5027 'HK': 'Hong Kong',
5028 'HU': 'Hungary',
5029 'IS': 'Iceland',
5030 'IN': 'India',
5031 'ID': 'Indonesia',
5032 'IR': 'Iran, Islamic Republic of',
5033 'IQ': 'Iraq',
5034 'IE': 'Ireland',
5035 'IM': 'Isle of Man',
5036 'IL': 'Israel',
5037 'IT': 'Italy',
5038 'JM': 'Jamaica',
5039 'JP': 'Japan',
5040 'JE': 'Jersey',
5041 'JO': 'Jordan',
5042 'KZ': 'Kazakhstan',
5043 'KE': 'Kenya',
5044 'KI': 'Kiribati',
5045 'KP': 'Korea, Democratic People\'s Republic of',
5046 'KR': 'Korea, Republic of',
5047 'KW': 'Kuwait',
5048 'KG': 'Kyrgyzstan',
5049 'LA': 'Lao People\'s Democratic Republic',
5050 'LV': 'Latvia',
5051 'LB': 'Lebanon',
5052 'LS': 'Lesotho',
5053 'LR': 'Liberia',
5054 'LY': 'Libya',
5055 'LI': 'Liechtenstein',
5056 'LT': 'Lithuania',
5057 'LU': 'Luxembourg',
5058 'MO': 'Macao',
5059 'MK': 'Macedonia, the Former Yugoslav Republic of',
5060 'MG': 'Madagascar',
5061 'MW': 'Malawi',
5062 'MY': 'Malaysia',
5063 'MV': 'Maldives',
5064 'ML': 'Mali',
5065 'MT': 'Malta',
5066 'MH': 'Marshall Islands',
5067 'MQ': 'Martinique',
5068 'MR': 'Mauritania',
5069 'MU': 'Mauritius',
5070 'YT': 'Mayotte',
5071 'MX': 'Mexico',
5072 'FM': 'Micronesia, Federated States of',
5073 'MD': 'Moldova, Republic of',
5074 'MC': 'Monaco',
5075 'MN': 'Mongolia',
5076 'ME': 'Montenegro',
5077 'MS': 'Montserrat',
5078 'MA': 'Morocco',
5079 'MZ': 'Mozambique',
5080 'MM': 'Myanmar',
5081 'NA': 'Namibia',
5082 'NR': 'Nauru',
5083 'NP': 'Nepal',
5084 'NL': 'Netherlands',
5085 'NC': 'New Caledonia',
5086 'NZ': 'New Zealand',
5087 'NI': 'Nicaragua',
5088 'NE': 'Niger',
5089 'NG': 'Nigeria',
5090 'NU': 'Niue',
5091 'NF': 'Norfolk Island',
5092 'MP': 'Northern Mariana Islands',
5093 'NO': 'Norway',
5094 'OM': 'Oman',
5095 'PK': 'Pakistan',
5096 'PW': 'Palau',
5097 'PS': 'Palestine, State of',
5098 'PA': 'Panama',
5099 'PG': 'Papua New Guinea',
5100 'PY': 'Paraguay',
5101 'PE': 'Peru',
5102 'PH': 'Philippines',
5103 'PN': 'Pitcairn',
5104 'PL': 'Poland',
5105 'PT': 'Portugal',
5106 'PR': 'Puerto Rico',
5107 'QA': 'Qatar',
5108 'RE': 'Réunion',
5109 'RO': 'Romania',
5110 'RU': 'Russian Federation',
5111 'RW': 'Rwanda',
5112 'BL': 'Saint Barthélemy',
5113 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5114 'KN': 'Saint Kitts and Nevis',
5115 'LC': 'Saint Lucia',
5116 'MF': 'Saint Martin (French part)',
5117 'PM': 'Saint Pierre and Miquelon',
5118 'VC': 'Saint Vincent and the Grenadines',
5119 'WS': 'Samoa',
5120 'SM': 'San Marino',
5121 'ST': 'Sao Tome and Principe',
5122 'SA': 'Saudi Arabia',
5123 'SN': 'Senegal',
5124 'RS': 'Serbia',
5125 'SC': 'Seychelles',
5126 'SL': 'Sierra Leone',
5127 'SG': 'Singapore',
5128 'SX': 'Sint Maarten (Dutch part)',
5129 'SK': 'Slovakia',
5130 'SI': 'Slovenia',
5131 'SB': 'Solomon Islands',
5132 'SO': 'Somalia',
5133 'ZA': 'South Africa',
5134 'GS': 'South Georgia and the South Sandwich Islands',
5135 'SS': 'South Sudan',
5136 'ES': 'Spain',
5137 'LK': 'Sri Lanka',
5138 'SD': 'Sudan',
5139 'SR': 'Suriname',
5140 'SJ': 'Svalbard and Jan Mayen',
5141 'SZ': 'Swaziland',
5142 'SE': 'Sweden',
5143 'CH': 'Switzerland',
5144 'SY': 'Syrian Arab Republic',
5145 'TW': 'Taiwan, Province of China',
5146 'TJ': 'Tajikistan',
5147 'TZ': 'Tanzania, United Republic of',
5148 'TH': 'Thailand',
5149 'TL': 'Timor-Leste',
5150 'TG': 'Togo',
5151 'TK': 'Tokelau',
5152 'TO': 'Tonga',
5153 'TT': 'Trinidad and Tobago',
5154 'TN': 'Tunisia',
5155 'TR': 'Turkey',
5156 'TM': 'Turkmenistan',
5157 'TC': 'Turks and Caicos Islands',
5158 'TV': 'Tuvalu',
5159 'UG': 'Uganda',
5160 'UA': 'Ukraine',
5161 'AE': 'United Arab Emirates',
5162 'GB': 'United Kingdom',
5163 'US': 'United States',
5164 'UM': 'United States Minor Outlying Islands',
5165 'UY': 'Uruguay',
5166 'UZ': 'Uzbekistan',
5167 'VU': 'Vanuatu',
5168 'VE': 'Venezuela, Bolivarian Republic of',
5169 'VN': 'Viet Nam',
5170 'VG': 'Virgin Islands, British',
5171 'VI': 'Virgin Islands, U.S.',
5172 'WF': 'Wallis and Futuna',
5173 'EH': 'Western Sahara',
5174 'YE': 'Yemen',
5175 'ZM': 'Zambia',
5176 'ZW': 'Zimbabwe',
5177 }
5178
5179 @classmethod
5180 def short2full(cls, code):
5181 """Convert an ISO 3166-2 country code to the corresponding full name"""
5182 return cls._country_map.get(code.upper())
5183
5184
773f291d
S
5185class GeoUtils(object):
5186 # Major IPv4 address blocks per country
5187 _country_ip_map = {
53896ca5 5188 'AD': '46.172.224.0/19',
773f291d
S
5189 'AE': '94.200.0.0/13',
5190 'AF': '149.54.0.0/17',
5191 'AG': '209.59.64.0/18',
5192 'AI': '204.14.248.0/21',
5193 'AL': '46.99.0.0/16',
5194 'AM': '46.70.0.0/15',
5195 'AO': '105.168.0.0/13',
53896ca5
S
5196 'AP': '182.50.184.0/21',
5197 'AQ': '23.154.160.0/24',
773f291d
S
5198 'AR': '181.0.0.0/12',
5199 'AS': '202.70.112.0/20',
53896ca5 5200 'AT': '77.116.0.0/14',
773f291d
S
5201 'AU': '1.128.0.0/11',
5202 'AW': '181.41.0.0/18',
53896ca5
S
5203 'AX': '185.217.4.0/22',
5204 'AZ': '5.197.0.0/16',
773f291d
S
5205 'BA': '31.176.128.0/17',
5206 'BB': '65.48.128.0/17',
5207 'BD': '114.130.0.0/16',
5208 'BE': '57.0.0.0/8',
53896ca5 5209 'BF': '102.178.0.0/15',
773f291d
S
5210 'BG': '95.42.0.0/15',
5211 'BH': '37.131.0.0/17',
5212 'BI': '154.117.192.0/18',
5213 'BJ': '137.255.0.0/16',
53896ca5 5214 'BL': '185.212.72.0/23',
773f291d
S
5215 'BM': '196.12.64.0/18',
5216 'BN': '156.31.0.0/16',
5217 'BO': '161.56.0.0/16',
5218 'BQ': '161.0.80.0/20',
53896ca5 5219 'BR': '191.128.0.0/12',
773f291d
S
5220 'BS': '24.51.64.0/18',
5221 'BT': '119.2.96.0/19',
5222 'BW': '168.167.0.0/16',
5223 'BY': '178.120.0.0/13',
5224 'BZ': '179.42.192.0/18',
5225 'CA': '99.224.0.0/11',
5226 'CD': '41.243.0.0/16',
53896ca5
S
5227 'CF': '197.242.176.0/21',
5228 'CG': '160.113.0.0/16',
773f291d 5229 'CH': '85.0.0.0/13',
53896ca5 5230 'CI': '102.136.0.0/14',
773f291d
S
5231 'CK': '202.65.32.0/19',
5232 'CL': '152.172.0.0/14',
53896ca5 5233 'CM': '102.244.0.0/14',
773f291d
S
5234 'CN': '36.128.0.0/10',
5235 'CO': '181.240.0.0/12',
5236 'CR': '201.192.0.0/12',
5237 'CU': '152.206.0.0/15',
5238 'CV': '165.90.96.0/19',
5239 'CW': '190.88.128.0/17',
53896ca5 5240 'CY': '31.153.0.0/16',
773f291d
S
5241 'CZ': '88.100.0.0/14',
5242 'DE': '53.0.0.0/8',
5243 'DJ': '197.241.0.0/17',
5244 'DK': '87.48.0.0/12',
5245 'DM': '192.243.48.0/20',
5246 'DO': '152.166.0.0/15',
5247 'DZ': '41.96.0.0/12',
5248 'EC': '186.68.0.0/15',
5249 'EE': '90.190.0.0/15',
5250 'EG': '156.160.0.0/11',
5251 'ER': '196.200.96.0/20',
5252 'ES': '88.0.0.0/11',
5253 'ET': '196.188.0.0/14',
5254 'EU': '2.16.0.0/13',
5255 'FI': '91.152.0.0/13',
5256 'FJ': '144.120.0.0/16',
53896ca5 5257 'FK': '80.73.208.0/21',
773f291d
S
5258 'FM': '119.252.112.0/20',
5259 'FO': '88.85.32.0/19',
5260 'FR': '90.0.0.0/9',
5261 'GA': '41.158.0.0/15',
5262 'GB': '25.0.0.0/8',
5263 'GD': '74.122.88.0/21',
5264 'GE': '31.146.0.0/16',
5265 'GF': '161.22.64.0/18',
5266 'GG': '62.68.160.0/19',
53896ca5
S
5267 'GH': '154.160.0.0/12',
5268 'GI': '95.164.0.0/16',
773f291d
S
5269 'GL': '88.83.0.0/19',
5270 'GM': '160.182.0.0/15',
5271 'GN': '197.149.192.0/18',
5272 'GP': '104.250.0.0/19',
5273 'GQ': '105.235.224.0/20',
5274 'GR': '94.64.0.0/13',
5275 'GT': '168.234.0.0/16',
5276 'GU': '168.123.0.0/16',
5277 'GW': '197.214.80.0/20',
5278 'GY': '181.41.64.0/18',
5279 'HK': '113.252.0.0/14',
5280 'HN': '181.210.0.0/16',
5281 'HR': '93.136.0.0/13',
5282 'HT': '148.102.128.0/17',
5283 'HU': '84.0.0.0/14',
5284 'ID': '39.192.0.0/10',
5285 'IE': '87.32.0.0/12',
5286 'IL': '79.176.0.0/13',
5287 'IM': '5.62.80.0/20',
5288 'IN': '117.192.0.0/10',
5289 'IO': '203.83.48.0/21',
5290 'IQ': '37.236.0.0/14',
5291 'IR': '2.176.0.0/12',
5292 'IS': '82.221.0.0/16',
5293 'IT': '79.0.0.0/10',
5294 'JE': '87.244.64.0/18',
5295 'JM': '72.27.0.0/17',
5296 'JO': '176.29.0.0/16',
53896ca5 5297 'JP': '133.0.0.0/8',
773f291d
S
5298 'KE': '105.48.0.0/12',
5299 'KG': '158.181.128.0/17',
5300 'KH': '36.37.128.0/17',
5301 'KI': '103.25.140.0/22',
5302 'KM': '197.255.224.0/20',
53896ca5 5303 'KN': '198.167.192.0/19',
773f291d
S
5304 'KP': '175.45.176.0/22',
5305 'KR': '175.192.0.0/10',
5306 'KW': '37.36.0.0/14',
5307 'KY': '64.96.0.0/15',
5308 'KZ': '2.72.0.0/13',
5309 'LA': '115.84.64.0/18',
5310 'LB': '178.135.0.0/16',
53896ca5 5311 'LC': '24.92.144.0/20',
773f291d
S
5312 'LI': '82.117.0.0/19',
5313 'LK': '112.134.0.0/15',
53896ca5 5314 'LR': '102.183.0.0/16',
773f291d
S
5315 'LS': '129.232.0.0/17',
5316 'LT': '78.56.0.0/13',
5317 'LU': '188.42.0.0/16',
5318 'LV': '46.109.0.0/16',
5319 'LY': '41.252.0.0/14',
5320 'MA': '105.128.0.0/11',
5321 'MC': '88.209.64.0/18',
5322 'MD': '37.246.0.0/16',
5323 'ME': '178.175.0.0/17',
5324 'MF': '74.112.232.0/21',
5325 'MG': '154.126.0.0/17',
5326 'MH': '117.103.88.0/21',
5327 'MK': '77.28.0.0/15',
5328 'ML': '154.118.128.0/18',
5329 'MM': '37.111.0.0/17',
5330 'MN': '49.0.128.0/17',
5331 'MO': '60.246.0.0/16',
5332 'MP': '202.88.64.0/20',
5333 'MQ': '109.203.224.0/19',
5334 'MR': '41.188.64.0/18',
5335 'MS': '208.90.112.0/22',
5336 'MT': '46.11.0.0/16',
5337 'MU': '105.16.0.0/12',
5338 'MV': '27.114.128.0/18',
53896ca5 5339 'MW': '102.70.0.0/15',
773f291d
S
5340 'MX': '187.192.0.0/11',
5341 'MY': '175.136.0.0/13',
5342 'MZ': '197.218.0.0/15',
5343 'NA': '41.182.0.0/16',
5344 'NC': '101.101.0.0/18',
5345 'NE': '197.214.0.0/18',
5346 'NF': '203.17.240.0/22',
5347 'NG': '105.112.0.0/12',
5348 'NI': '186.76.0.0/15',
5349 'NL': '145.96.0.0/11',
5350 'NO': '84.208.0.0/13',
5351 'NP': '36.252.0.0/15',
5352 'NR': '203.98.224.0/19',
5353 'NU': '49.156.48.0/22',
5354 'NZ': '49.224.0.0/14',
5355 'OM': '5.36.0.0/15',
5356 'PA': '186.72.0.0/15',
5357 'PE': '186.160.0.0/14',
5358 'PF': '123.50.64.0/18',
5359 'PG': '124.240.192.0/19',
5360 'PH': '49.144.0.0/13',
5361 'PK': '39.32.0.0/11',
5362 'PL': '83.0.0.0/11',
5363 'PM': '70.36.0.0/20',
5364 'PR': '66.50.0.0/16',
5365 'PS': '188.161.0.0/16',
5366 'PT': '85.240.0.0/13',
5367 'PW': '202.124.224.0/20',
5368 'PY': '181.120.0.0/14',
5369 'QA': '37.210.0.0/15',
53896ca5 5370 'RE': '102.35.0.0/16',
773f291d 5371 'RO': '79.112.0.0/13',
53896ca5 5372 'RS': '93.86.0.0/15',
773f291d 5373 'RU': '5.136.0.0/13',
53896ca5 5374 'RW': '41.186.0.0/16',
773f291d
S
5375 'SA': '188.48.0.0/13',
5376 'SB': '202.1.160.0/19',
5377 'SC': '154.192.0.0/11',
53896ca5 5378 'SD': '102.120.0.0/13',
773f291d 5379 'SE': '78.64.0.0/12',
53896ca5 5380 'SG': '8.128.0.0/10',
773f291d
S
5381 'SI': '188.196.0.0/14',
5382 'SK': '78.98.0.0/15',
53896ca5 5383 'SL': '102.143.0.0/17',
773f291d
S
5384 'SM': '89.186.32.0/19',
5385 'SN': '41.82.0.0/15',
53896ca5 5386 'SO': '154.115.192.0/18',
773f291d
S
5387 'SR': '186.179.128.0/17',
5388 'SS': '105.235.208.0/21',
5389 'ST': '197.159.160.0/19',
5390 'SV': '168.243.0.0/16',
5391 'SX': '190.102.0.0/20',
5392 'SY': '5.0.0.0/16',
5393 'SZ': '41.84.224.0/19',
5394 'TC': '65.255.48.0/20',
5395 'TD': '154.68.128.0/19',
5396 'TG': '196.168.0.0/14',
5397 'TH': '171.96.0.0/13',
5398 'TJ': '85.9.128.0/18',
5399 'TK': '27.96.24.0/21',
5400 'TL': '180.189.160.0/20',
5401 'TM': '95.85.96.0/19',
5402 'TN': '197.0.0.0/11',
5403 'TO': '175.176.144.0/21',
5404 'TR': '78.160.0.0/11',
5405 'TT': '186.44.0.0/15',
5406 'TV': '202.2.96.0/19',
5407 'TW': '120.96.0.0/11',
5408 'TZ': '156.156.0.0/14',
53896ca5
S
5409 'UA': '37.52.0.0/14',
5410 'UG': '102.80.0.0/13',
5411 'US': '6.0.0.0/8',
773f291d 5412 'UY': '167.56.0.0/13',
53896ca5 5413 'UZ': '84.54.64.0/18',
773f291d 5414 'VA': '212.77.0.0/19',
53896ca5 5415 'VC': '207.191.240.0/21',
773f291d 5416 'VE': '186.88.0.0/13',
53896ca5 5417 'VG': '66.81.192.0/20',
773f291d
S
5418 'VI': '146.226.0.0/16',
5419 'VN': '14.160.0.0/11',
5420 'VU': '202.80.32.0/20',
5421 'WF': '117.20.32.0/21',
5422 'WS': '202.4.32.0/19',
5423 'YE': '134.35.0.0/16',
5424 'YT': '41.242.116.0/22',
5425 'ZA': '41.0.0.0/11',
53896ca5
S
5426 'ZM': '102.144.0.0/13',
5427 'ZW': '102.177.192.0/18',
773f291d
S
5428 }
5429
5430 @classmethod
5f95927a
S
5431 def random_ipv4(cls, code_or_block):
5432 if len(code_or_block) == 2:
5433 block = cls._country_ip_map.get(code_or_block.upper())
5434 if not block:
5435 return None
5436 else:
5437 block = code_or_block
773f291d
S
5438 addr, preflen = block.split('/')
5439 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5440 addr_max = addr_min | (0xffffffff >> int(preflen))
18a0defa 5441 return compat_str(socket.inet_ntoa(
4248dad9 5442 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
773f291d
S
5443
5444
91410c9b 5445class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
5446 def __init__(self, proxies=None):
5447 # Set default handlers
5448 for type in ('http', 'https'):
5449 setattr(self, '%s_open' % type,
5450 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5451 meth(r, proxy, type))
38e87f6c 5452 compat_urllib_request.ProxyHandler.__init__(self, proxies)
2461f79d 5453
91410c9b 5454 def proxy_open(self, req, proxy, type):
2461f79d 5455 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
5456 if req_proxy is not None:
5457 proxy = req_proxy
2461f79d
PH
5458 del req.headers['Ytdl-request-proxy']
5459
5460 if proxy == '__noproxy__':
5461 return None # No Proxy
51fb4995 5462 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
71aff188 5463 req.add_header('Ytdl-socks-proxy', proxy)
7a5c1cfe 5464 # yt-dlp's http/https handlers do wrapping the socket with socks
71aff188 5465 return None
91410c9b
PH
5466 return compat_urllib_request.ProxyHandler.proxy_open(
5467 self, req, proxy, type)
5bc880b9
YCH
5468
5469
0a5445dd
YCH
5470# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5471# released into Public Domain
5472# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5473
5474def long_to_bytes(n, blocksize=0):
5475 """long_to_bytes(n:long, blocksize:int) : string
5476 Convert a long integer to a byte string.
5477
5478 If optional blocksize is given and greater than zero, pad the front of the
5479 byte string with binary zeros so that the length is a multiple of
5480 blocksize.
5481 """
5482 # after much testing, this algorithm was deemed to be the fastest
5483 s = b''
5484 n = int(n)
5485 while n > 0:
5486 s = compat_struct_pack('>I', n & 0xffffffff) + s
5487 n = n >> 32
5488 # strip off leading zeros
5489 for i in range(len(s)):
5490 if s[i] != b'\000'[0]:
5491 break
5492 else:
5493 # only happens when n == 0
5494 s = b'\000'
5495 i = 0
5496 s = s[i:]
5497 # add back some pad bytes. this could be done more efficiently w.r.t. the
5498 # de-padding being done above, but sigh...
5499 if blocksize > 0 and len(s) % blocksize:
5500 s = (blocksize - len(s) % blocksize) * b'\000' + s
5501 return s
5502
5503
5504def bytes_to_long(s):
5505 """bytes_to_long(string) : long
5506 Convert a byte string to a long integer.
5507
5508 This is (essentially) the inverse of long_to_bytes().
5509 """
5510 acc = 0
5511 length = len(s)
5512 if length % 4:
5513 extra = (4 - length % 4)
5514 s = b'\000' * extra + s
5515 length = length + extra
5516 for i in range(0, length, 4):
5517 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5518 return acc
5519
5520
5bc880b9
YCH
5521def ohdave_rsa_encrypt(data, exponent, modulus):
5522 '''
5523 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5524
5525 Input:
5526 data: data to encrypt, bytes-like object
5527 exponent, modulus: parameter e and N of RSA algorithm, both integer
5528 Output: hex string of encrypted data
5529
5530 Limitation: supports one block encryption only
5531 '''
5532
5533 payload = int(binascii.hexlify(data[::-1]), 16)
5534 encrypted = pow(payload, exponent, modulus)
5535 return '%x' % encrypted
81bdc8fd
YCH
5536
5537
f48409c7
YCH
5538def pkcs1pad(data, length):
5539 """
5540 Padding input data with PKCS#1 scheme
5541
5542 @param {int[]} data input data
5543 @param {int} length target length
5544 @returns {int[]} padded data
5545 """
5546 if len(data) > length - 11:
5547 raise ValueError('Input data too long for PKCS#1 padding')
5548
5549 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5550 return [0, 2] + pseudo_random + [0] + data
5551
5552
5eb6bdce 5553def encode_base_n(num, n, table=None):
59f898b7 5554 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
59f898b7
YCH
5555 if not table:
5556 table = FULL_TABLE[:n]
5557
5eb6bdce
YCH
5558 if n > len(table):
5559 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5560
5561 if num == 0:
5562 return table[0]
5563
81bdc8fd
YCH
5564 ret = ''
5565 while num:
5566 ret = table[num % n] + ret
5567 num = num // n
5568 return ret
f52354a8
YCH
5569
5570
5571def decode_packed_codes(code):
06b3fe29 5572 mobj = re.search(PACKED_CODES_RE, code)
a0566bbf 5573 obfuscated_code, base, count, symbols = mobj.groups()
f52354a8
YCH
5574 base = int(base)
5575 count = int(count)
5576 symbols = symbols.split('|')
5577 symbol_table = {}
5578
5579 while count:
5580 count -= 1
5eb6bdce 5581 base_n_count = encode_base_n(count, base)
f52354a8
YCH
5582 symbol_table[base_n_count] = symbols[count] or base_n_count
5583
5584 return re.sub(
5585 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
a0566bbf 5586 obfuscated_code)
e154c651 5587
5588
1ced2221
S
5589def caesar(s, alphabet, shift):
5590 if shift == 0:
5591 return s
5592 l = len(alphabet)
5593 return ''.join(
5594 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5595 for c in s)
5596
5597
5598def rot47(s):
5599 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5600
5601
e154c651 5602def parse_m3u8_attributes(attrib):
5603 info = {}
5604 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5605 if val.startswith('"'):
5606 val = val[1:-1]
5607 info[key] = val
5608 return info
1143535d
YCH
5609
5610
5611def urshift(val, n):
5612 return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038
YCH
5613
5614
5615# Based on png2str() written by @gdkchan and improved by @yokrysty
067aa17e 5616# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
d3f8e038
YCH
5617def decode_png(png_data):
5618 # Reference: https://www.w3.org/TR/PNG/
5619 header = png_data[8:]
5620
5621 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5622 raise IOError('Not a valid PNG file.')
5623
5624 int_map = {1: '>B', 2: '>H', 4: '>I'}
5625 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5626
5627 chunks = []
5628
5629 while header:
5630 length = unpack_integer(header[:4])
5631 header = header[4:]
5632
5633 chunk_type = header[:4]
5634 header = header[4:]
5635
5636 chunk_data = header[:length]
5637 header = header[length:]
5638
5639 header = header[4:] # Skip CRC
5640
5641 chunks.append({
5642 'type': chunk_type,
5643 'length': length,
5644 'data': chunk_data
5645 })
5646
5647 ihdr = chunks[0]['data']
5648
5649 width = unpack_integer(ihdr[:4])
5650 height = unpack_integer(ihdr[4:8])
5651
5652 idat = b''
5653
5654 for chunk in chunks:
5655 if chunk['type'] == b'IDAT':
5656 idat += chunk['data']
5657
5658 if not idat:
5659 raise IOError('Unable to read PNG data.')
5660
5661 decompressed_data = bytearray(zlib.decompress(idat))
5662
5663 stride = width * 3
5664 pixels = []
5665
5666 def _get_pixel(idx):
5667 x = idx % stride
5668 y = idx // stride
5669 return pixels[y][x]
5670
5671 for y in range(height):
5672 basePos = y * (1 + stride)
5673 filter_type = decompressed_data[basePos]
5674
5675 current_row = []
5676
5677 pixels.append(current_row)
5678
5679 for x in range(stride):
5680 color = decompressed_data[1 + basePos + x]
5681 basex = y * stride + x
5682 left = 0
5683 up = 0
5684
5685 if x > 2:
5686 left = _get_pixel(basex - 3)
5687 if y > 0:
5688 up = _get_pixel(basex - stride)
5689
5690 if filter_type == 1: # Sub
5691 color = (color + left) & 0xff
5692 elif filter_type == 2: # Up
5693 color = (color + up) & 0xff
5694 elif filter_type == 3: # Average
5695 color = (color + ((left + up) >> 1)) & 0xff
5696 elif filter_type == 4: # Paeth
5697 a = left
5698 b = up
5699 c = 0
5700
5701 if x > 2 and y > 0:
5702 c = _get_pixel(basex - stride - 3)
5703
5704 p = a + b - c
5705
5706 pa = abs(p - a)
5707 pb = abs(p - b)
5708 pc = abs(p - c)
5709
5710 if pa <= pb and pa <= pc:
5711 color = (color + a) & 0xff
5712 elif pb <= pc:
5713 color = (color + b) & 0xff
5714 else:
5715 color = (color + c) & 0xff
5716
5717 current_row.append(color)
5718
5719 return width, height, pixels
efa97bdc
YCH
5720
5721
5722def write_xattr(path, key, value):
5723 # This mess below finds the best xattr tool for the job
5724 try:
5725 # try the pyxattr module...
5726 import xattr
5727
53a7e3d2
YCH
5728 if hasattr(xattr, 'set'): # pyxattr
5729 # Unicode arguments are not supported in python-pyxattr until
5730 # version 0.5.0
067aa17e 5731 # See https://github.com/ytdl-org/youtube-dl/issues/5498
53a7e3d2
YCH
5732 pyxattr_required_version = '0.5.0'
5733 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5734 # TODO: fallback to CLI tools
5735 raise XAttrUnavailableError(
5736 'python-pyxattr is detected but is too old. '
7a5c1cfe 5737 'yt-dlp requires %s or above while your version is %s. '
53a7e3d2
YCH
5738 'Falling back to other xattr implementations' % (
5739 pyxattr_required_version, xattr.__version__))
5740
5741 setxattr = xattr.set
5742 else: # xattr
5743 setxattr = xattr.setxattr
efa97bdc
YCH
5744
5745 try:
53a7e3d2 5746 setxattr(path, key, value)
efa97bdc
YCH
5747 except EnvironmentError as e:
5748 raise XAttrMetadataError(e.errno, e.strerror)
5749
5750 except ImportError:
5751 if compat_os_name == 'nt':
5752 # Write xattrs to NTFS Alternate Data Streams:
5753 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5754 assert ':' not in key
5755 assert os.path.exists(path)
5756
5757 ads_fn = path + ':' + key
5758 try:
5759 with open(ads_fn, 'wb') as f:
5760 f.write(value)
5761 except EnvironmentError as e:
5762 raise XAttrMetadataError(e.errno, e.strerror)
5763 else:
5764 user_has_setfattr = check_executable('setfattr', ['--version'])
5765 user_has_xattr = check_executable('xattr', ['-h'])
5766
5767 if user_has_setfattr or user_has_xattr:
5768
5769 value = value.decode('utf-8')
5770 if user_has_setfattr:
5771 executable = 'setfattr'
5772 opts = ['-n', key, '-v', value]
5773 elif user_has_xattr:
5774 executable = 'xattr'
5775 opts = ['-w', key, value]
5776
3089bc74
S
5777 cmd = ([encodeFilename(executable, True)]
5778 + [encodeArgument(o) for o in opts]
5779 + [encodeFilename(path, True)])
efa97bdc
YCH
5780
5781 try:
5782 p = subprocess.Popen(
5783 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5784 except EnvironmentError as e:
5785 raise XAttrMetadataError(e.errno, e.strerror)
f5b1bca9 5786 stdout, stderr = process_communicate_or_kill(p)
efa97bdc
YCH
5787 stderr = stderr.decode('utf-8', 'replace')
5788 if p.returncode != 0:
5789 raise XAttrMetadataError(p.returncode, stderr)
5790
5791 else:
5792 # On Unix, and can't find pyxattr, setfattr, or xattr.
5793 if sys.platform.startswith('linux'):
5794 raise XAttrUnavailableError(
5795 "Couldn't find a tool to set the xattrs. "
5796 "Install either the python 'pyxattr' or 'xattr' "
5797 "modules, or the GNU 'attr' package "
5798 "(which contains the 'setfattr' tool).")
5799 else:
5800 raise XAttrUnavailableError(
5801 "Couldn't find a tool to set the xattrs. "
5802 "Install either the python 'xattr' module, "
5803 "or the 'xattr' binary.")
0c265486
YCH
5804
5805
5806def random_birthday(year_field, month_field, day_field):
aa374bc7
AS
5807 start_date = datetime.date(1950, 1, 1)
5808 end_date = datetime.date(1995, 12, 31)
5809 offset = random.randint(0, (end_date - start_date).days)
5810 random_date = start_date + datetime.timedelta(offset)
0c265486 5811 return {
aa374bc7
AS
5812 year_field: str(random_date.year),
5813 month_field: str(random_date.month),
5814 day_field: str(random_date.day),
0c265486 5815 }
732044af 5816
c76eb41b 5817
732044af 5818# Templates for internet shortcut files, which are plain text files.
5819DOT_URL_LINK_TEMPLATE = '''
5820[InternetShortcut]
5821URL=%(url)s
5822'''.lstrip()
5823
5824DOT_WEBLOC_LINK_TEMPLATE = '''
5825<?xml version="1.0" encoding="UTF-8"?>
5826<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5827<plist version="1.0">
5828<dict>
5829\t<key>URL</key>
5830\t<string>%(url)s</string>
5831</dict>
5832</plist>
5833'''.lstrip()
5834
5835DOT_DESKTOP_LINK_TEMPLATE = '''
5836[Desktop Entry]
5837Encoding=UTF-8
5838Name=%(filename)s
5839Type=Link
5840URL=%(url)s
5841Icon=text-html
5842'''.lstrip()
5843
5844
5845def iri_to_uri(iri):
5846 """
5847 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5848
5849 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5850 """
5851
5852 iri_parts = compat_urllib_parse_urlparse(iri)
5853
5854 if '[' in iri_parts.netloc:
5855 raise ValueError('IPv6 URIs are not, yet, supported.')
5856 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5857
5858 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5859
5860 net_location = ''
5861 if iri_parts.username:
5862 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
5863 if iri_parts.password is not None:
5864 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
5865 net_location += '@'
5866
5867 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
5868 # The 'idna' encoding produces ASCII text.
5869 if iri_parts.port is not None and iri_parts.port != 80:
5870 net_location += ':' + str(iri_parts.port)
5871
5872 return compat_urllib_parse_urlunparse(
5873 (iri_parts.scheme,
5874 net_location,
5875
5876 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5877
5878 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5879 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5880
5881 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5882 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5883
5884 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5885
5886 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5887
5888
5889def to_high_limit_path(path):
5890 if sys.platform in ['win32', 'cygwin']:
5891 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5892 return r'\\?\ '.rstrip() + os.path.abspath(path)
5893
5894 return path
76d321f6 5895
c76eb41b 5896
76d321f6 5897def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
5898 val = obj.get(field, default)
5899 if func and val not in ignore:
5900 val = func(val)
5901 return template % val if val not in ignore else default
00dd0cd5 5902
5903
5904def clean_podcast_url(url):
5905 return re.sub(r'''(?x)
5906 (?:
5907 (?:
5908 chtbl\.com/track|
5909 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5910 play\.podtrac\.com
5911 )/[^/]+|
5912 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5913 flex\.acast\.com|
5914 pd(?:
5915 cn\.co| # https://podcorn.com/analytics-prefix/
5916 st\.fm # https://podsights.com/docs/
5917 )/e
5918 )/''', '', url)
ffcb8191
THD
5919
5920
5921_HEX_TABLE = '0123456789abcdef'
5922
5923
5924def random_uuidv4():
5925 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
0202b52a 5926
5927
5928def make_dir(path, to_screen=None):
5929 try:
5930 dn = os.path.dirname(path)
5931 if dn and not os.path.exists(dn):
5932 os.makedirs(dn)
5933 return True
5934 except (OSError, IOError) as err:
5935 if callable(to_screen) is not None:
5936 to_screen('unable to create directory ' + error_to_compat_str(err))
5937 return False
f74980cb 5938
5939
5940def get_executable_path():
c552ae88 5941 from zipimport import zipimporter
5942 if hasattr(sys, 'frozen'): # Running from PyInstaller
5943 path = os.path.dirname(sys.executable)
5944 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
5945 path = os.path.join(os.path.dirname(__file__), '../..')
5946 else:
5947 path = os.path.join(os.path.dirname(__file__), '..')
f74980cb 5948 return os.path.abspath(path)
5949
5950
5951def load_plugins(name, type, namespace):
5952 plugin_info = [None]
5953 classes = []
5954 try:
5955 plugin_info = imp.find_module(
5956 name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
5957 plugins = imp.load_module(name, *plugin_info)
5958 for name in dir(plugins):
5959 if not name.endswith(type):
5960 continue
5961 klass = getattr(plugins, name)
5962 classes.append(klass)
5963 namespace[name] = klass
5964 except ImportError:
5965 pass
5966 finally:
5967 if plugin_info[0] is not None:
5968 plugin_info[0].close()
5969 return classes
06167fbb 5970
5971
5972def traverse_dict(dictn, keys, casesense=True):
5973 if not isinstance(dictn, dict):
5974 return None
5975 first_key = keys[0]
5976 if not casesense:
5977 dictn = {key.lower(): val for key, val in dictn.items()}
5978 first_key = first_key.lower()
5979 value = dictn.get(first_key, None)
5980 return value if len(keys) < 2 else traverse_dict(value, keys[1:], casesense)