]> jfr.im git - yt-dlp.git/blame - yt_dlp/utils.py
[utils] Allow alignment in `render_table`
[yt-dlp.git] / yt_dlp / utils.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
d77c3dfd 3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
c380cc28 10import collections
62e609ab 11import contextlib
e3946f98 12import ctypes
c496ca96
PH
13import datetime
14import email.utils
0c265486 15import email.header
f45c185f 16import errno
be4a824d 17import functools
d77c3dfd 18import gzip
49fa4d9a
N
19import hashlib
20import hmac
019a94f7 21import importlib.util
03f9daab 22import io
79a2e94e 23import itertools
f4bfd65f 24import json
d77c3dfd 25import locale
02dbf93f 26import math
347de493 27import operator
d77c3dfd 28import os
c496ca96 29import platform
773f291d 30import random
d77c3dfd 31import re
c496ca96 32import socket
79a2e94e 33import ssl
1c088fa8 34import subprocess
d77c3dfd 35import sys
181c8655 36import tempfile
c380cc28 37import time
01951dda 38import traceback
bcf89ce6 39import xml.etree.ElementTree
d77c3dfd 40import zlib
d77c3dfd 41
8c25f81b 42from .compat import (
b4a3d461 43 compat_HTMLParseError,
8bb56eee 44 compat_HTMLParser,
201c1459 45 compat_HTTPError,
8f9312c3 46 compat_basestring,
8c25f81b 47 compat_chr,
1bab3437 48 compat_cookiejar,
d7cd9a9e 49 compat_ctypes_WINFUNCTYPE,
36e6f62c 50 compat_etree_fromstring,
51098426 51 compat_expanduser,
8c25f81b 52 compat_html_entities,
55b2f099 53 compat_html_entities_html5,
be4a824d 54 compat_http_client,
42db58ec 55 compat_integer_types,
e29663c6 56 compat_numeric_types,
c86b6142 57 compat_kwargs,
efa97bdc 58 compat_os_name,
8c25f81b 59 compat_parse_qs,
702ccf2d 60 compat_shlex_quote,
8c25f81b 61 compat_str,
edaa23f8 62 compat_struct_pack,
d3f8e038 63 compat_struct_unpack,
8c25f81b
PH
64 compat_urllib_error,
65 compat_urllib_parse,
15707c7e 66 compat_urllib_parse_urlencode,
8c25f81b 67 compat_urllib_parse_urlparse,
732044af 68 compat_urllib_parse_urlunparse,
69 compat_urllib_parse_quote,
70 compat_urllib_parse_quote_plus,
7581bfc9 71 compat_urllib_parse_unquote_plus,
8c25f81b
PH
72 compat_urllib_request,
73 compat_urlparse,
810c10ba 74 compat_xpath,
8c25f81b 75)
4644ac55 76
71aff188
YCH
77from .socks import (
78 ProxyType,
79 sockssocket,
80)
81
4644ac55 82
51fb4995
YCH
83def register_socks_protocols():
84 # "Register" SOCKS protocols
d5ae6bb5
YCH
85 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
86 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
51fb4995
YCH
87 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
88 if scheme not in compat_urlparse.uses_netloc:
89 compat_urlparse.uses_netloc.append(scheme)
90
91
468e2e92
FV
92# This is not clearly defined otherwise
93compiled_regex_type = type(re.compile(''))
94
f7a147e3
S
95
96def random_user_agent():
97 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
98 _CHROME_VERSIONS = (
99 '74.0.3729.129',
100 '76.0.3780.3',
101 '76.0.3780.2',
102 '74.0.3729.128',
103 '76.0.3780.1',
104 '76.0.3780.0',
105 '75.0.3770.15',
106 '74.0.3729.127',
107 '74.0.3729.126',
108 '76.0.3779.1',
109 '76.0.3779.0',
110 '75.0.3770.14',
111 '74.0.3729.125',
112 '76.0.3778.1',
113 '76.0.3778.0',
114 '75.0.3770.13',
115 '74.0.3729.124',
116 '74.0.3729.123',
117 '73.0.3683.121',
118 '76.0.3777.1',
119 '76.0.3777.0',
120 '75.0.3770.12',
121 '74.0.3729.122',
122 '76.0.3776.4',
123 '75.0.3770.11',
124 '74.0.3729.121',
125 '76.0.3776.3',
126 '76.0.3776.2',
127 '73.0.3683.120',
128 '74.0.3729.120',
129 '74.0.3729.119',
130 '74.0.3729.118',
131 '76.0.3776.1',
132 '76.0.3776.0',
133 '76.0.3775.5',
134 '75.0.3770.10',
135 '74.0.3729.117',
136 '76.0.3775.4',
137 '76.0.3775.3',
138 '74.0.3729.116',
139 '75.0.3770.9',
140 '76.0.3775.2',
141 '76.0.3775.1',
142 '76.0.3775.0',
143 '75.0.3770.8',
144 '74.0.3729.115',
145 '74.0.3729.114',
146 '76.0.3774.1',
147 '76.0.3774.0',
148 '75.0.3770.7',
149 '74.0.3729.113',
150 '74.0.3729.112',
151 '74.0.3729.111',
152 '76.0.3773.1',
153 '76.0.3773.0',
154 '75.0.3770.6',
155 '74.0.3729.110',
156 '74.0.3729.109',
157 '76.0.3772.1',
158 '76.0.3772.0',
159 '75.0.3770.5',
160 '74.0.3729.108',
161 '74.0.3729.107',
162 '76.0.3771.1',
163 '76.0.3771.0',
164 '75.0.3770.4',
165 '74.0.3729.106',
166 '74.0.3729.105',
167 '75.0.3770.3',
168 '74.0.3729.104',
169 '74.0.3729.103',
170 '74.0.3729.102',
171 '75.0.3770.2',
172 '74.0.3729.101',
173 '75.0.3770.1',
174 '75.0.3770.0',
175 '74.0.3729.100',
176 '75.0.3769.5',
177 '75.0.3769.4',
178 '74.0.3729.99',
179 '75.0.3769.3',
180 '75.0.3769.2',
181 '75.0.3768.6',
182 '74.0.3729.98',
183 '75.0.3769.1',
184 '75.0.3769.0',
185 '74.0.3729.97',
186 '73.0.3683.119',
187 '73.0.3683.118',
188 '74.0.3729.96',
189 '75.0.3768.5',
190 '75.0.3768.4',
191 '75.0.3768.3',
192 '75.0.3768.2',
193 '74.0.3729.95',
194 '74.0.3729.94',
195 '75.0.3768.1',
196 '75.0.3768.0',
197 '74.0.3729.93',
198 '74.0.3729.92',
199 '73.0.3683.117',
200 '74.0.3729.91',
201 '75.0.3766.3',
202 '74.0.3729.90',
203 '75.0.3767.2',
204 '75.0.3767.1',
205 '75.0.3767.0',
206 '74.0.3729.89',
207 '73.0.3683.116',
208 '75.0.3766.2',
209 '74.0.3729.88',
210 '75.0.3766.1',
211 '75.0.3766.0',
212 '74.0.3729.87',
213 '73.0.3683.115',
214 '74.0.3729.86',
215 '75.0.3765.1',
216 '75.0.3765.0',
217 '74.0.3729.85',
218 '73.0.3683.114',
219 '74.0.3729.84',
220 '75.0.3764.1',
221 '75.0.3764.0',
222 '74.0.3729.83',
223 '73.0.3683.113',
224 '75.0.3763.2',
225 '75.0.3761.4',
226 '74.0.3729.82',
227 '75.0.3763.1',
228 '75.0.3763.0',
229 '74.0.3729.81',
230 '73.0.3683.112',
231 '75.0.3762.1',
232 '75.0.3762.0',
233 '74.0.3729.80',
234 '75.0.3761.3',
235 '74.0.3729.79',
236 '73.0.3683.111',
237 '75.0.3761.2',
238 '74.0.3729.78',
239 '74.0.3729.77',
240 '75.0.3761.1',
241 '75.0.3761.0',
242 '73.0.3683.110',
243 '74.0.3729.76',
244 '74.0.3729.75',
245 '75.0.3760.0',
246 '74.0.3729.74',
247 '75.0.3759.8',
248 '75.0.3759.7',
249 '75.0.3759.6',
250 '74.0.3729.73',
251 '75.0.3759.5',
252 '74.0.3729.72',
253 '73.0.3683.109',
254 '75.0.3759.4',
255 '75.0.3759.3',
256 '74.0.3729.71',
257 '75.0.3759.2',
258 '74.0.3729.70',
259 '73.0.3683.108',
260 '74.0.3729.69',
261 '75.0.3759.1',
262 '75.0.3759.0',
263 '74.0.3729.68',
264 '73.0.3683.107',
265 '74.0.3729.67',
266 '75.0.3758.1',
267 '75.0.3758.0',
268 '74.0.3729.66',
269 '73.0.3683.106',
270 '74.0.3729.65',
271 '75.0.3757.1',
272 '75.0.3757.0',
273 '74.0.3729.64',
274 '73.0.3683.105',
275 '74.0.3729.63',
276 '75.0.3756.1',
277 '75.0.3756.0',
278 '74.0.3729.62',
279 '73.0.3683.104',
280 '75.0.3755.3',
281 '75.0.3755.2',
282 '73.0.3683.103',
283 '75.0.3755.1',
284 '75.0.3755.0',
285 '74.0.3729.61',
286 '73.0.3683.102',
287 '74.0.3729.60',
288 '75.0.3754.2',
289 '74.0.3729.59',
290 '75.0.3753.4',
291 '74.0.3729.58',
292 '75.0.3754.1',
293 '75.0.3754.0',
294 '74.0.3729.57',
295 '73.0.3683.101',
296 '75.0.3753.3',
297 '75.0.3752.2',
298 '75.0.3753.2',
299 '74.0.3729.56',
300 '75.0.3753.1',
301 '75.0.3753.0',
302 '74.0.3729.55',
303 '73.0.3683.100',
304 '74.0.3729.54',
305 '75.0.3752.1',
306 '75.0.3752.0',
307 '74.0.3729.53',
308 '73.0.3683.99',
309 '74.0.3729.52',
310 '75.0.3751.1',
311 '75.0.3751.0',
312 '74.0.3729.51',
313 '73.0.3683.98',
314 '74.0.3729.50',
315 '75.0.3750.0',
316 '74.0.3729.49',
317 '74.0.3729.48',
318 '74.0.3729.47',
319 '75.0.3749.3',
320 '74.0.3729.46',
321 '73.0.3683.97',
322 '75.0.3749.2',
323 '74.0.3729.45',
324 '75.0.3749.1',
325 '75.0.3749.0',
326 '74.0.3729.44',
327 '73.0.3683.96',
328 '74.0.3729.43',
329 '74.0.3729.42',
330 '75.0.3748.1',
331 '75.0.3748.0',
332 '74.0.3729.41',
333 '75.0.3747.1',
334 '73.0.3683.95',
335 '75.0.3746.4',
336 '74.0.3729.40',
337 '74.0.3729.39',
338 '75.0.3747.0',
339 '75.0.3746.3',
340 '75.0.3746.2',
341 '74.0.3729.38',
342 '75.0.3746.1',
343 '75.0.3746.0',
344 '74.0.3729.37',
345 '73.0.3683.94',
346 '75.0.3745.5',
347 '75.0.3745.4',
348 '75.0.3745.3',
349 '75.0.3745.2',
350 '74.0.3729.36',
351 '75.0.3745.1',
352 '75.0.3745.0',
353 '75.0.3744.2',
354 '74.0.3729.35',
355 '73.0.3683.93',
356 '74.0.3729.34',
357 '75.0.3744.1',
358 '75.0.3744.0',
359 '74.0.3729.33',
360 '73.0.3683.92',
361 '74.0.3729.32',
362 '74.0.3729.31',
363 '73.0.3683.91',
364 '75.0.3741.2',
365 '75.0.3740.5',
366 '74.0.3729.30',
367 '75.0.3741.1',
368 '75.0.3741.0',
369 '74.0.3729.29',
370 '75.0.3740.4',
371 '73.0.3683.90',
372 '74.0.3729.28',
373 '75.0.3740.3',
374 '73.0.3683.89',
375 '75.0.3740.2',
376 '74.0.3729.27',
377 '75.0.3740.1',
378 '75.0.3740.0',
379 '74.0.3729.26',
380 '73.0.3683.88',
381 '73.0.3683.87',
382 '74.0.3729.25',
383 '75.0.3739.1',
384 '75.0.3739.0',
385 '73.0.3683.86',
386 '74.0.3729.24',
387 '73.0.3683.85',
388 '75.0.3738.4',
389 '75.0.3738.3',
390 '75.0.3738.2',
391 '75.0.3738.1',
392 '75.0.3738.0',
393 '74.0.3729.23',
394 '73.0.3683.84',
395 '74.0.3729.22',
396 '74.0.3729.21',
397 '75.0.3737.1',
398 '75.0.3737.0',
399 '74.0.3729.20',
400 '73.0.3683.83',
401 '74.0.3729.19',
402 '75.0.3736.1',
403 '75.0.3736.0',
404 '74.0.3729.18',
405 '73.0.3683.82',
406 '74.0.3729.17',
407 '75.0.3735.1',
408 '75.0.3735.0',
409 '74.0.3729.16',
410 '73.0.3683.81',
411 '75.0.3734.1',
412 '75.0.3734.0',
413 '74.0.3729.15',
414 '73.0.3683.80',
415 '74.0.3729.14',
416 '75.0.3733.1',
417 '75.0.3733.0',
418 '75.0.3732.1',
419 '74.0.3729.13',
420 '74.0.3729.12',
421 '73.0.3683.79',
422 '74.0.3729.11',
423 '75.0.3732.0',
424 '74.0.3729.10',
425 '73.0.3683.78',
426 '74.0.3729.9',
427 '74.0.3729.8',
428 '74.0.3729.7',
429 '75.0.3731.3',
430 '75.0.3731.2',
431 '75.0.3731.0',
432 '74.0.3729.6',
433 '73.0.3683.77',
434 '73.0.3683.76',
435 '75.0.3730.5',
436 '75.0.3730.4',
437 '73.0.3683.75',
438 '74.0.3729.5',
439 '73.0.3683.74',
440 '75.0.3730.3',
441 '75.0.3730.2',
442 '74.0.3729.4',
443 '73.0.3683.73',
444 '73.0.3683.72',
445 '75.0.3730.1',
446 '75.0.3730.0',
447 '74.0.3729.3',
448 '73.0.3683.71',
449 '74.0.3729.2',
450 '73.0.3683.70',
451 '74.0.3729.1',
452 '74.0.3729.0',
453 '74.0.3726.4',
454 '73.0.3683.69',
455 '74.0.3726.3',
456 '74.0.3728.0',
457 '74.0.3726.2',
458 '73.0.3683.68',
459 '74.0.3726.1',
460 '74.0.3726.0',
461 '74.0.3725.4',
462 '73.0.3683.67',
463 '73.0.3683.66',
464 '74.0.3725.3',
465 '74.0.3725.2',
466 '74.0.3725.1',
467 '74.0.3724.8',
468 '74.0.3725.0',
469 '73.0.3683.65',
470 '74.0.3724.7',
471 '74.0.3724.6',
472 '74.0.3724.5',
473 '74.0.3724.4',
474 '74.0.3724.3',
475 '74.0.3724.2',
476 '74.0.3724.1',
477 '74.0.3724.0',
478 '73.0.3683.64',
479 '74.0.3723.1',
480 '74.0.3723.0',
481 '73.0.3683.63',
482 '74.0.3722.1',
483 '74.0.3722.0',
484 '73.0.3683.62',
485 '74.0.3718.9',
486 '74.0.3702.3',
487 '74.0.3721.3',
488 '74.0.3721.2',
489 '74.0.3721.1',
490 '74.0.3721.0',
491 '74.0.3720.6',
492 '73.0.3683.61',
493 '72.0.3626.122',
494 '73.0.3683.60',
495 '74.0.3720.5',
496 '72.0.3626.121',
497 '74.0.3718.8',
498 '74.0.3720.4',
499 '74.0.3720.3',
500 '74.0.3718.7',
501 '74.0.3720.2',
502 '74.0.3720.1',
503 '74.0.3720.0',
504 '74.0.3718.6',
505 '74.0.3719.5',
506 '73.0.3683.59',
507 '74.0.3718.5',
508 '74.0.3718.4',
509 '74.0.3719.4',
510 '74.0.3719.3',
511 '74.0.3719.2',
512 '74.0.3719.1',
513 '73.0.3683.58',
514 '74.0.3719.0',
515 '73.0.3683.57',
516 '73.0.3683.56',
517 '74.0.3718.3',
518 '73.0.3683.55',
519 '74.0.3718.2',
520 '74.0.3718.1',
521 '74.0.3718.0',
522 '73.0.3683.54',
523 '74.0.3717.2',
524 '73.0.3683.53',
525 '74.0.3717.1',
526 '74.0.3717.0',
527 '73.0.3683.52',
528 '74.0.3716.1',
529 '74.0.3716.0',
530 '73.0.3683.51',
531 '74.0.3715.1',
532 '74.0.3715.0',
533 '73.0.3683.50',
534 '74.0.3711.2',
535 '74.0.3714.2',
536 '74.0.3713.3',
537 '74.0.3714.1',
538 '74.0.3714.0',
539 '73.0.3683.49',
540 '74.0.3713.1',
541 '74.0.3713.0',
542 '72.0.3626.120',
543 '73.0.3683.48',
544 '74.0.3712.2',
545 '74.0.3712.1',
546 '74.0.3712.0',
547 '73.0.3683.47',
548 '72.0.3626.119',
549 '73.0.3683.46',
550 '74.0.3710.2',
551 '72.0.3626.118',
552 '74.0.3711.1',
553 '74.0.3711.0',
554 '73.0.3683.45',
555 '72.0.3626.117',
556 '74.0.3710.1',
557 '74.0.3710.0',
558 '73.0.3683.44',
559 '72.0.3626.116',
560 '74.0.3709.1',
561 '74.0.3709.0',
562 '74.0.3704.9',
563 '73.0.3683.43',
564 '72.0.3626.115',
565 '74.0.3704.8',
566 '74.0.3704.7',
567 '74.0.3708.0',
568 '74.0.3706.7',
569 '74.0.3704.6',
570 '73.0.3683.42',
571 '72.0.3626.114',
572 '74.0.3706.6',
573 '72.0.3626.113',
574 '74.0.3704.5',
575 '74.0.3706.5',
576 '74.0.3706.4',
577 '74.0.3706.3',
578 '74.0.3706.2',
579 '74.0.3706.1',
580 '74.0.3706.0',
581 '73.0.3683.41',
582 '72.0.3626.112',
583 '74.0.3705.1',
584 '74.0.3705.0',
585 '73.0.3683.40',
586 '72.0.3626.111',
587 '73.0.3683.39',
588 '74.0.3704.4',
589 '73.0.3683.38',
590 '74.0.3704.3',
591 '74.0.3704.2',
592 '74.0.3704.1',
593 '74.0.3704.0',
594 '73.0.3683.37',
595 '72.0.3626.110',
596 '72.0.3626.109',
597 '74.0.3703.3',
598 '74.0.3703.2',
599 '73.0.3683.36',
600 '74.0.3703.1',
601 '74.0.3703.0',
602 '73.0.3683.35',
603 '72.0.3626.108',
604 '74.0.3702.2',
605 '74.0.3699.3',
606 '74.0.3702.1',
607 '74.0.3702.0',
608 '73.0.3683.34',
609 '72.0.3626.107',
610 '73.0.3683.33',
611 '74.0.3701.1',
612 '74.0.3701.0',
613 '73.0.3683.32',
614 '73.0.3683.31',
615 '72.0.3626.105',
616 '74.0.3700.1',
617 '74.0.3700.0',
618 '73.0.3683.29',
619 '72.0.3626.103',
620 '74.0.3699.2',
621 '74.0.3699.1',
622 '74.0.3699.0',
623 '73.0.3683.28',
624 '72.0.3626.102',
625 '73.0.3683.27',
626 '73.0.3683.26',
627 '74.0.3698.0',
628 '74.0.3696.2',
629 '72.0.3626.101',
630 '73.0.3683.25',
631 '74.0.3696.1',
632 '74.0.3696.0',
633 '74.0.3694.8',
634 '72.0.3626.100',
635 '74.0.3694.7',
636 '74.0.3694.6',
637 '74.0.3694.5',
638 '74.0.3694.4',
639 '72.0.3626.99',
640 '72.0.3626.98',
641 '74.0.3694.3',
642 '73.0.3683.24',
643 '72.0.3626.97',
644 '72.0.3626.96',
645 '72.0.3626.95',
646 '73.0.3683.23',
647 '72.0.3626.94',
648 '73.0.3683.22',
649 '73.0.3683.21',
650 '72.0.3626.93',
651 '74.0.3694.2',
652 '72.0.3626.92',
653 '74.0.3694.1',
654 '74.0.3694.0',
655 '74.0.3693.6',
656 '73.0.3683.20',
657 '72.0.3626.91',
658 '74.0.3693.5',
659 '74.0.3693.4',
660 '74.0.3693.3',
661 '74.0.3693.2',
662 '73.0.3683.19',
663 '74.0.3693.1',
664 '74.0.3693.0',
665 '73.0.3683.18',
666 '72.0.3626.90',
667 '74.0.3692.1',
668 '74.0.3692.0',
669 '73.0.3683.17',
670 '72.0.3626.89',
671 '74.0.3687.3',
672 '74.0.3691.1',
673 '74.0.3691.0',
674 '73.0.3683.16',
675 '72.0.3626.88',
676 '72.0.3626.87',
677 '73.0.3683.15',
678 '74.0.3690.1',
679 '74.0.3690.0',
680 '73.0.3683.14',
681 '72.0.3626.86',
682 '73.0.3683.13',
683 '73.0.3683.12',
684 '74.0.3689.1',
685 '74.0.3689.0',
686 '73.0.3683.11',
687 '72.0.3626.85',
688 '73.0.3683.10',
689 '72.0.3626.84',
690 '73.0.3683.9',
691 '74.0.3688.1',
692 '74.0.3688.0',
693 '73.0.3683.8',
694 '72.0.3626.83',
695 '74.0.3687.2',
696 '74.0.3687.1',
697 '74.0.3687.0',
698 '73.0.3683.7',
699 '72.0.3626.82',
700 '74.0.3686.4',
701 '72.0.3626.81',
702 '74.0.3686.3',
703 '74.0.3686.2',
704 '74.0.3686.1',
705 '74.0.3686.0',
706 '73.0.3683.6',
707 '72.0.3626.80',
708 '74.0.3685.1',
709 '74.0.3685.0',
710 '73.0.3683.5',
711 '72.0.3626.79',
712 '74.0.3684.1',
713 '74.0.3684.0',
714 '73.0.3683.4',
715 '72.0.3626.78',
716 '72.0.3626.77',
717 '73.0.3683.3',
718 '73.0.3683.2',
719 '72.0.3626.76',
720 '73.0.3683.1',
721 '73.0.3683.0',
722 '72.0.3626.75',
723 '71.0.3578.141',
724 '73.0.3682.1',
725 '73.0.3682.0',
726 '72.0.3626.74',
727 '71.0.3578.140',
728 '73.0.3681.4',
729 '73.0.3681.3',
730 '73.0.3681.2',
731 '73.0.3681.1',
732 '73.0.3681.0',
733 '72.0.3626.73',
734 '71.0.3578.139',
735 '72.0.3626.72',
736 '72.0.3626.71',
737 '73.0.3680.1',
738 '73.0.3680.0',
739 '72.0.3626.70',
740 '71.0.3578.138',
741 '73.0.3678.2',
742 '73.0.3679.1',
743 '73.0.3679.0',
744 '72.0.3626.69',
745 '71.0.3578.137',
746 '73.0.3678.1',
747 '73.0.3678.0',
748 '71.0.3578.136',
749 '73.0.3677.1',
750 '73.0.3677.0',
751 '72.0.3626.68',
752 '72.0.3626.67',
753 '71.0.3578.135',
754 '73.0.3676.1',
755 '73.0.3676.0',
756 '73.0.3674.2',
757 '72.0.3626.66',
758 '71.0.3578.134',
759 '73.0.3674.1',
760 '73.0.3674.0',
761 '72.0.3626.65',
762 '71.0.3578.133',
763 '73.0.3673.2',
764 '73.0.3673.1',
765 '73.0.3673.0',
766 '72.0.3626.64',
767 '71.0.3578.132',
768 '72.0.3626.63',
769 '72.0.3626.62',
770 '72.0.3626.61',
771 '72.0.3626.60',
772 '73.0.3672.1',
773 '73.0.3672.0',
774 '72.0.3626.59',
775 '71.0.3578.131',
776 '73.0.3671.3',
777 '73.0.3671.2',
778 '73.0.3671.1',
779 '73.0.3671.0',
780 '72.0.3626.58',
781 '71.0.3578.130',
782 '73.0.3670.1',
783 '73.0.3670.0',
784 '72.0.3626.57',
785 '71.0.3578.129',
786 '73.0.3669.1',
787 '73.0.3669.0',
788 '72.0.3626.56',
789 '71.0.3578.128',
790 '73.0.3668.2',
791 '73.0.3668.1',
792 '73.0.3668.0',
793 '72.0.3626.55',
794 '71.0.3578.127',
795 '73.0.3667.2',
796 '73.0.3667.1',
797 '73.0.3667.0',
798 '72.0.3626.54',
799 '71.0.3578.126',
800 '73.0.3666.1',
801 '73.0.3666.0',
802 '72.0.3626.53',
803 '71.0.3578.125',
804 '73.0.3665.4',
805 '73.0.3665.3',
806 '72.0.3626.52',
807 '73.0.3665.2',
808 '73.0.3664.4',
809 '73.0.3665.1',
810 '73.0.3665.0',
811 '72.0.3626.51',
812 '71.0.3578.124',
813 '72.0.3626.50',
814 '73.0.3664.3',
815 '73.0.3664.2',
816 '73.0.3664.1',
817 '73.0.3664.0',
818 '73.0.3663.2',
819 '72.0.3626.49',
820 '71.0.3578.123',
821 '73.0.3663.1',
822 '73.0.3663.0',
823 '72.0.3626.48',
824 '71.0.3578.122',
825 '73.0.3662.1',
826 '73.0.3662.0',
827 '72.0.3626.47',
828 '71.0.3578.121',
829 '73.0.3661.1',
830 '72.0.3626.46',
831 '73.0.3661.0',
832 '72.0.3626.45',
833 '71.0.3578.120',
834 '73.0.3660.2',
835 '73.0.3660.1',
836 '73.0.3660.0',
837 '72.0.3626.44',
838 '71.0.3578.119',
839 '73.0.3659.1',
840 '73.0.3659.0',
841 '72.0.3626.43',
842 '71.0.3578.118',
843 '73.0.3658.1',
844 '73.0.3658.0',
845 '72.0.3626.42',
846 '71.0.3578.117',
847 '73.0.3657.1',
848 '73.0.3657.0',
849 '72.0.3626.41',
850 '71.0.3578.116',
851 '73.0.3656.1',
852 '73.0.3656.0',
853 '72.0.3626.40',
854 '71.0.3578.115',
855 '73.0.3655.1',
856 '73.0.3655.0',
857 '72.0.3626.39',
858 '71.0.3578.114',
859 '73.0.3654.1',
860 '73.0.3654.0',
861 '72.0.3626.38',
862 '71.0.3578.113',
863 '73.0.3653.1',
864 '73.0.3653.0',
865 '72.0.3626.37',
866 '71.0.3578.112',
867 '73.0.3652.1',
868 '73.0.3652.0',
869 '72.0.3626.36',
870 '71.0.3578.111',
871 '73.0.3651.1',
872 '73.0.3651.0',
873 '72.0.3626.35',
874 '71.0.3578.110',
875 '73.0.3650.1',
876 '73.0.3650.0',
877 '72.0.3626.34',
878 '71.0.3578.109',
879 '73.0.3649.1',
880 '73.0.3649.0',
881 '72.0.3626.33',
882 '71.0.3578.108',
883 '73.0.3648.2',
884 '73.0.3648.1',
885 '73.0.3648.0',
886 '72.0.3626.32',
887 '71.0.3578.107',
888 '73.0.3647.2',
889 '73.0.3647.1',
890 '73.0.3647.0',
891 '72.0.3626.31',
892 '71.0.3578.106',
893 '73.0.3635.3',
894 '73.0.3646.2',
895 '73.0.3646.1',
896 '73.0.3646.0',
897 '72.0.3626.30',
898 '71.0.3578.105',
899 '72.0.3626.29',
900 '73.0.3645.2',
901 '73.0.3645.1',
902 '73.0.3645.0',
903 '72.0.3626.28',
904 '71.0.3578.104',
905 '72.0.3626.27',
906 '72.0.3626.26',
907 '72.0.3626.25',
908 '72.0.3626.24',
909 '73.0.3644.0',
910 '73.0.3643.2',
911 '72.0.3626.23',
912 '71.0.3578.103',
913 '73.0.3643.1',
914 '73.0.3643.0',
915 '72.0.3626.22',
916 '71.0.3578.102',
917 '73.0.3642.1',
918 '73.0.3642.0',
919 '72.0.3626.21',
920 '71.0.3578.101',
921 '73.0.3641.1',
922 '73.0.3641.0',
923 '72.0.3626.20',
924 '71.0.3578.100',
925 '72.0.3626.19',
926 '73.0.3640.1',
927 '73.0.3640.0',
928 '72.0.3626.18',
929 '73.0.3639.1',
930 '71.0.3578.99',
931 '73.0.3639.0',
932 '72.0.3626.17',
933 '73.0.3638.2',
934 '72.0.3626.16',
935 '73.0.3638.1',
936 '73.0.3638.0',
937 '72.0.3626.15',
938 '71.0.3578.98',
939 '73.0.3635.2',
940 '71.0.3578.97',
941 '73.0.3637.1',
942 '73.0.3637.0',
943 '72.0.3626.14',
944 '71.0.3578.96',
945 '71.0.3578.95',
946 '72.0.3626.13',
947 '71.0.3578.94',
948 '73.0.3636.2',
949 '71.0.3578.93',
950 '73.0.3636.1',
951 '73.0.3636.0',
952 '72.0.3626.12',
953 '71.0.3578.92',
954 '73.0.3635.1',
955 '73.0.3635.0',
956 '72.0.3626.11',
957 '71.0.3578.91',
958 '73.0.3634.2',
959 '73.0.3634.1',
960 '73.0.3634.0',
961 '72.0.3626.10',
962 '71.0.3578.90',
963 '71.0.3578.89',
964 '73.0.3633.2',
965 '73.0.3633.1',
966 '73.0.3633.0',
967 '72.0.3610.4',
968 '72.0.3626.9',
969 '71.0.3578.88',
970 '73.0.3632.5',
971 '73.0.3632.4',
972 '73.0.3632.3',
973 '73.0.3632.2',
974 '73.0.3632.1',
975 '73.0.3632.0',
976 '72.0.3626.8',
977 '71.0.3578.87',
978 '73.0.3631.2',
979 '73.0.3631.1',
980 '73.0.3631.0',
981 '72.0.3626.7',
982 '71.0.3578.86',
983 '72.0.3626.6',
984 '73.0.3630.1',
985 '73.0.3630.0',
986 '72.0.3626.5',
987 '71.0.3578.85',
988 '72.0.3626.4',
989 '73.0.3628.3',
990 '73.0.3628.2',
991 '73.0.3629.1',
992 '73.0.3629.0',
993 '72.0.3626.3',
994 '71.0.3578.84',
995 '73.0.3628.1',
996 '73.0.3628.0',
997 '71.0.3578.83',
998 '73.0.3627.1',
999 '73.0.3627.0',
1000 '72.0.3626.2',
1001 '71.0.3578.82',
1002 '71.0.3578.81',
1003 '71.0.3578.80',
1004 '72.0.3626.1',
1005 '72.0.3626.0',
1006 '71.0.3578.79',
1007 '70.0.3538.124',
1008 '71.0.3578.78',
1009 '72.0.3623.4',
1010 '72.0.3625.2',
1011 '72.0.3625.1',
1012 '72.0.3625.0',
1013 '71.0.3578.77',
1014 '70.0.3538.123',
1015 '72.0.3624.4',
1016 '72.0.3624.3',
1017 '72.0.3624.2',
1018 '71.0.3578.76',
1019 '72.0.3624.1',
1020 '72.0.3624.0',
1021 '72.0.3623.3',
1022 '71.0.3578.75',
1023 '70.0.3538.122',
1024 '71.0.3578.74',
1025 '72.0.3623.2',
1026 '72.0.3610.3',
1027 '72.0.3623.1',
1028 '72.0.3623.0',
1029 '72.0.3622.3',
1030 '72.0.3622.2',
1031 '71.0.3578.73',
1032 '70.0.3538.121',
1033 '72.0.3622.1',
1034 '72.0.3622.0',
1035 '71.0.3578.72',
1036 '70.0.3538.120',
1037 '72.0.3621.1',
1038 '72.0.3621.0',
1039 '71.0.3578.71',
1040 '70.0.3538.119',
1041 '72.0.3620.1',
1042 '72.0.3620.0',
1043 '71.0.3578.70',
1044 '70.0.3538.118',
1045 '71.0.3578.69',
1046 '72.0.3619.1',
1047 '72.0.3619.0',
1048 '71.0.3578.68',
1049 '70.0.3538.117',
1050 '71.0.3578.67',
1051 '72.0.3618.1',
1052 '72.0.3618.0',
1053 '71.0.3578.66',
1054 '70.0.3538.116',
1055 '72.0.3617.1',
1056 '72.0.3617.0',
1057 '71.0.3578.65',
1058 '70.0.3538.115',
1059 '72.0.3602.3',
1060 '71.0.3578.64',
1061 '72.0.3616.1',
1062 '72.0.3616.0',
1063 '71.0.3578.63',
1064 '70.0.3538.114',
1065 '71.0.3578.62',
1066 '72.0.3615.1',
1067 '72.0.3615.0',
1068 '71.0.3578.61',
1069 '70.0.3538.113',
1070 '72.0.3614.1',
1071 '72.0.3614.0',
1072 '71.0.3578.60',
1073 '70.0.3538.112',
1074 '72.0.3613.1',
1075 '72.0.3613.0',
1076 '71.0.3578.59',
1077 '70.0.3538.111',
1078 '72.0.3612.2',
1079 '72.0.3612.1',
1080 '72.0.3612.0',
1081 '70.0.3538.110',
1082 '71.0.3578.58',
1083 '70.0.3538.109',
1084 '72.0.3611.2',
1085 '72.0.3611.1',
1086 '72.0.3611.0',
1087 '71.0.3578.57',
1088 '70.0.3538.108',
1089 '72.0.3610.2',
1090 '71.0.3578.56',
1091 '71.0.3578.55',
1092 '72.0.3610.1',
1093 '72.0.3610.0',
1094 '71.0.3578.54',
1095 '70.0.3538.107',
1096 '71.0.3578.53',
1097 '72.0.3609.3',
1098 '71.0.3578.52',
1099 '72.0.3609.2',
1100 '71.0.3578.51',
1101 '72.0.3608.5',
1102 '72.0.3609.1',
1103 '72.0.3609.0',
1104 '71.0.3578.50',
1105 '70.0.3538.106',
1106 '72.0.3608.4',
1107 '72.0.3608.3',
1108 '72.0.3608.2',
1109 '71.0.3578.49',
1110 '72.0.3608.1',
1111 '72.0.3608.0',
1112 '70.0.3538.105',
1113 '71.0.3578.48',
1114 '72.0.3607.1',
1115 '72.0.3607.0',
1116 '71.0.3578.47',
1117 '70.0.3538.104',
1118 '72.0.3606.2',
1119 '72.0.3606.1',
1120 '72.0.3606.0',
1121 '71.0.3578.46',
1122 '70.0.3538.103',
1123 '70.0.3538.102',
1124 '72.0.3605.3',
1125 '72.0.3605.2',
1126 '72.0.3605.1',
1127 '72.0.3605.0',
1128 '71.0.3578.45',
1129 '70.0.3538.101',
1130 '71.0.3578.44',
1131 '71.0.3578.43',
1132 '70.0.3538.100',
1133 '70.0.3538.99',
1134 '71.0.3578.42',
1135 '72.0.3604.1',
1136 '72.0.3604.0',
1137 '71.0.3578.41',
1138 '70.0.3538.98',
1139 '71.0.3578.40',
1140 '72.0.3603.2',
1141 '72.0.3603.1',
1142 '72.0.3603.0',
1143 '71.0.3578.39',
1144 '70.0.3538.97',
1145 '72.0.3602.2',
1146 '71.0.3578.38',
1147 '71.0.3578.37',
1148 '72.0.3602.1',
1149 '72.0.3602.0',
1150 '71.0.3578.36',
1151 '70.0.3538.96',
1152 '72.0.3601.1',
1153 '72.0.3601.0',
1154 '71.0.3578.35',
1155 '70.0.3538.95',
1156 '72.0.3600.1',
1157 '72.0.3600.0',
1158 '71.0.3578.34',
1159 '70.0.3538.94',
1160 '72.0.3599.3',
1161 '72.0.3599.2',
1162 '72.0.3599.1',
1163 '72.0.3599.0',
1164 '71.0.3578.33',
1165 '70.0.3538.93',
1166 '72.0.3598.1',
1167 '72.0.3598.0',
1168 '71.0.3578.32',
1169 '70.0.3538.87',
1170 '72.0.3597.1',
1171 '72.0.3597.0',
1172 '72.0.3596.2',
1173 '71.0.3578.31',
1174 '70.0.3538.86',
1175 '71.0.3578.30',
1176 '71.0.3578.29',
1177 '72.0.3596.1',
1178 '72.0.3596.0',
1179 '71.0.3578.28',
1180 '70.0.3538.85',
1181 '72.0.3595.2',
1182 '72.0.3591.3',
1183 '72.0.3595.1',
1184 '72.0.3595.0',
1185 '71.0.3578.27',
1186 '70.0.3538.84',
1187 '72.0.3594.1',
1188 '72.0.3594.0',
1189 '71.0.3578.26',
1190 '70.0.3538.83',
1191 '72.0.3593.2',
1192 '72.0.3593.1',
1193 '72.0.3593.0',
1194 '71.0.3578.25',
1195 '70.0.3538.82',
1196 '72.0.3589.3',
1197 '72.0.3592.2',
1198 '72.0.3592.1',
1199 '72.0.3592.0',
1200 '71.0.3578.24',
1201 '72.0.3589.2',
1202 '70.0.3538.81',
1203 '70.0.3538.80',
1204 '72.0.3591.2',
1205 '72.0.3591.1',
1206 '72.0.3591.0',
1207 '71.0.3578.23',
1208 '70.0.3538.79',
1209 '71.0.3578.22',
1210 '72.0.3590.1',
1211 '72.0.3590.0',
1212 '71.0.3578.21',
1213 '70.0.3538.78',
1214 '70.0.3538.77',
1215 '72.0.3589.1',
1216 '72.0.3589.0',
1217 '71.0.3578.20',
1218 '70.0.3538.76',
1219 '71.0.3578.19',
1220 '70.0.3538.75',
1221 '72.0.3588.1',
1222 '72.0.3588.0',
1223 '71.0.3578.18',
1224 '70.0.3538.74',
1225 '72.0.3586.2',
1226 '72.0.3587.0',
1227 '71.0.3578.17',
1228 '70.0.3538.73',
1229 '72.0.3586.1',
1230 '72.0.3586.0',
1231 '71.0.3578.16',
1232 '70.0.3538.72',
1233 '72.0.3585.1',
1234 '72.0.3585.0',
1235 '71.0.3578.15',
1236 '70.0.3538.71',
1237 '71.0.3578.14',
1238 '72.0.3584.1',
1239 '72.0.3584.0',
1240 '71.0.3578.13',
1241 '70.0.3538.70',
1242 '72.0.3583.2',
1243 '71.0.3578.12',
1244 '72.0.3583.1',
1245 '72.0.3583.0',
1246 '71.0.3578.11',
1247 '70.0.3538.69',
1248 '71.0.3578.10',
1249 '72.0.3582.0',
1250 '72.0.3581.4',
1251 '71.0.3578.9',
1252 '70.0.3538.67',
1253 '72.0.3581.3',
1254 '72.0.3581.2',
1255 '72.0.3581.1',
1256 '72.0.3581.0',
1257 '71.0.3578.8',
1258 '70.0.3538.66',
1259 '72.0.3580.1',
1260 '72.0.3580.0',
1261 '71.0.3578.7',
1262 '70.0.3538.65',
1263 '71.0.3578.6',
1264 '72.0.3579.1',
1265 '72.0.3579.0',
1266 '71.0.3578.5',
1267 '70.0.3538.64',
1268 '71.0.3578.4',
1269 '71.0.3578.3',
1270 '71.0.3578.2',
1271 '71.0.3578.1',
1272 '71.0.3578.0',
1273 '70.0.3538.63',
1274 '69.0.3497.128',
1275 '70.0.3538.62',
1276 '70.0.3538.61',
1277 '70.0.3538.60',
1278 '70.0.3538.59',
1279 '71.0.3577.1',
1280 '71.0.3577.0',
1281 '70.0.3538.58',
1282 '69.0.3497.127',
1283 '71.0.3576.2',
1284 '71.0.3576.1',
1285 '71.0.3576.0',
1286 '70.0.3538.57',
1287 '70.0.3538.56',
1288 '71.0.3575.2',
1289 '70.0.3538.55',
1290 '69.0.3497.126',
1291 '70.0.3538.54',
1292 '71.0.3575.1',
1293 '71.0.3575.0',
1294 '71.0.3574.1',
1295 '71.0.3574.0',
1296 '70.0.3538.53',
1297 '69.0.3497.125',
1298 '70.0.3538.52',
1299 '71.0.3573.1',
1300 '71.0.3573.0',
1301 '70.0.3538.51',
1302 '69.0.3497.124',
1303 '71.0.3572.1',
1304 '71.0.3572.0',
1305 '70.0.3538.50',
1306 '69.0.3497.123',
1307 '71.0.3571.2',
1308 '70.0.3538.49',
1309 '69.0.3497.122',
1310 '71.0.3571.1',
1311 '71.0.3571.0',
1312 '70.0.3538.48',
1313 '69.0.3497.121',
1314 '71.0.3570.1',
1315 '71.0.3570.0',
1316 '70.0.3538.47',
1317 '69.0.3497.120',
1318 '71.0.3568.2',
1319 '71.0.3569.1',
1320 '71.0.3569.0',
1321 '70.0.3538.46',
1322 '69.0.3497.119',
1323 '70.0.3538.45',
1324 '71.0.3568.1',
1325 '71.0.3568.0',
1326 '70.0.3538.44',
1327 '69.0.3497.118',
1328 '70.0.3538.43',
1329 '70.0.3538.42',
1330 '71.0.3567.1',
1331 '71.0.3567.0',
1332 '70.0.3538.41',
1333 '69.0.3497.117',
1334 '71.0.3566.1',
1335 '71.0.3566.0',
1336 '70.0.3538.40',
1337 '69.0.3497.116',
1338 '71.0.3565.1',
1339 '71.0.3565.0',
1340 '70.0.3538.39',
1341 '69.0.3497.115',
1342 '71.0.3564.1',
1343 '71.0.3564.0',
1344 '70.0.3538.38',
1345 '69.0.3497.114',
1346 '71.0.3563.0',
1347 '71.0.3562.2',
1348 '70.0.3538.37',
1349 '69.0.3497.113',
1350 '70.0.3538.36',
1351 '70.0.3538.35',
1352 '71.0.3562.1',
1353 '71.0.3562.0',
1354 '70.0.3538.34',
1355 '69.0.3497.112',
1356 '70.0.3538.33',
1357 '71.0.3561.1',
1358 '71.0.3561.0',
1359 '70.0.3538.32',
1360 '69.0.3497.111',
1361 '71.0.3559.6',
1362 '71.0.3560.1',
1363 '71.0.3560.0',
1364 '71.0.3559.5',
1365 '71.0.3559.4',
1366 '70.0.3538.31',
1367 '69.0.3497.110',
1368 '71.0.3559.3',
1369 '70.0.3538.30',
1370 '69.0.3497.109',
1371 '71.0.3559.2',
1372 '71.0.3559.1',
1373 '71.0.3559.0',
1374 '70.0.3538.29',
1375 '69.0.3497.108',
1376 '71.0.3558.2',
1377 '71.0.3558.1',
1378 '71.0.3558.0',
1379 '70.0.3538.28',
1380 '69.0.3497.107',
1381 '71.0.3557.2',
1382 '71.0.3557.1',
1383 '71.0.3557.0',
1384 '70.0.3538.27',
1385 '69.0.3497.106',
1386 '71.0.3554.4',
1387 '70.0.3538.26',
1388 '71.0.3556.1',
1389 '71.0.3556.0',
1390 '70.0.3538.25',
1391 '71.0.3554.3',
1392 '69.0.3497.105',
1393 '71.0.3554.2',
1394 '70.0.3538.24',
1395 '69.0.3497.104',
1396 '71.0.3555.2',
1397 '70.0.3538.23',
1398 '71.0.3555.1',
1399 '71.0.3555.0',
1400 '70.0.3538.22',
1401 '69.0.3497.103',
1402 '71.0.3554.1',
1403 '71.0.3554.0',
1404 '70.0.3538.21',
1405 '69.0.3497.102',
1406 '71.0.3553.3',
1407 '70.0.3538.20',
1408 '69.0.3497.101',
1409 '71.0.3553.2',
1410 '69.0.3497.100',
1411 '71.0.3553.1',
1412 '71.0.3553.0',
1413 '70.0.3538.19',
1414 '69.0.3497.99',
1415 '69.0.3497.98',
1416 '69.0.3497.97',
1417 '71.0.3552.6',
1418 '71.0.3552.5',
1419 '71.0.3552.4',
1420 '71.0.3552.3',
1421 '71.0.3552.2',
1422 '71.0.3552.1',
1423 '71.0.3552.0',
1424 '70.0.3538.18',
1425 '69.0.3497.96',
1426 '71.0.3551.3',
1427 '71.0.3551.2',
1428 '71.0.3551.1',
1429 '71.0.3551.0',
1430 '70.0.3538.17',
1431 '69.0.3497.95',
1432 '71.0.3550.3',
1433 '71.0.3550.2',
1434 '71.0.3550.1',
1435 '71.0.3550.0',
1436 '70.0.3538.16',
1437 '69.0.3497.94',
1438 '71.0.3549.1',
1439 '71.0.3549.0',
1440 '70.0.3538.15',
1441 '69.0.3497.93',
1442 '69.0.3497.92',
1443 '71.0.3548.1',
1444 '71.0.3548.0',
1445 '70.0.3538.14',
1446 '69.0.3497.91',
1447 '71.0.3547.1',
1448 '71.0.3547.0',
1449 '70.0.3538.13',
1450 '69.0.3497.90',
1451 '71.0.3546.2',
1452 '69.0.3497.89',
1453 '71.0.3546.1',
1454 '71.0.3546.0',
1455 '70.0.3538.12',
1456 '69.0.3497.88',
1457 '71.0.3545.4',
1458 '71.0.3545.3',
1459 '71.0.3545.2',
1460 '71.0.3545.1',
1461 '71.0.3545.0',
1462 '70.0.3538.11',
1463 '69.0.3497.87',
1464 '71.0.3544.5',
1465 '71.0.3544.4',
1466 '71.0.3544.3',
1467 '71.0.3544.2',
1468 '71.0.3544.1',
1469 '71.0.3544.0',
1470 '69.0.3497.86',
1471 '70.0.3538.10',
1472 '69.0.3497.85',
1473 '70.0.3538.9',
1474 '69.0.3497.84',
1475 '71.0.3543.4',
1476 '70.0.3538.8',
1477 '71.0.3543.3',
1478 '71.0.3543.2',
1479 '71.0.3543.1',
1480 '71.0.3543.0',
1481 '70.0.3538.7',
1482 '69.0.3497.83',
1483 '71.0.3542.2',
1484 '71.0.3542.1',
1485 '71.0.3542.0',
1486 '70.0.3538.6',
1487 '69.0.3497.82',
1488 '69.0.3497.81',
1489 '71.0.3541.1',
1490 '71.0.3541.0',
1491 '70.0.3538.5',
1492 '69.0.3497.80',
1493 '71.0.3540.1',
1494 '71.0.3540.0',
1495 '70.0.3538.4',
1496 '69.0.3497.79',
1497 '70.0.3538.3',
1498 '71.0.3539.1',
1499 '71.0.3539.0',
1500 '69.0.3497.78',
1501 '68.0.3440.134',
1502 '69.0.3497.77',
1503 '70.0.3538.2',
1504 '70.0.3538.1',
1505 '70.0.3538.0',
1506 '69.0.3497.76',
1507 '68.0.3440.133',
1508 '69.0.3497.75',
1509 '70.0.3537.2',
1510 '70.0.3537.1',
1511 '70.0.3537.0',
1512 '69.0.3497.74',
1513 '68.0.3440.132',
1514 '70.0.3536.0',
1515 '70.0.3535.5',
1516 '70.0.3535.4',
1517 '70.0.3535.3',
1518 '69.0.3497.73',
1519 '68.0.3440.131',
1520 '70.0.3532.8',
1521 '70.0.3532.7',
1522 '69.0.3497.72',
1523 '69.0.3497.71',
1524 '70.0.3535.2',
1525 '70.0.3535.1',
1526 '70.0.3535.0',
1527 '69.0.3497.70',
1528 '68.0.3440.130',
1529 '69.0.3497.69',
1530 '68.0.3440.129',
1531 '70.0.3534.4',
1532 '70.0.3534.3',
1533 '70.0.3534.2',
1534 '70.0.3534.1',
1535 '70.0.3534.0',
1536 '69.0.3497.68',
1537 '68.0.3440.128',
1538 '70.0.3533.2',
1539 '70.0.3533.1',
1540 '70.0.3533.0',
1541 '69.0.3497.67',
1542 '68.0.3440.127',
1543 '70.0.3532.6',
1544 '70.0.3532.5',
1545 '70.0.3532.4',
1546 '69.0.3497.66',
1547 '68.0.3440.126',
1548 '70.0.3532.3',
1549 '70.0.3532.2',
1550 '70.0.3532.1',
1551 '69.0.3497.60',
1552 '69.0.3497.65',
1553 '69.0.3497.64',
1554 '70.0.3532.0',
1555 '70.0.3531.0',
1556 '70.0.3530.4',
1557 '70.0.3530.3',
1558 '70.0.3530.2',
1559 '69.0.3497.58',
1560 '68.0.3440.125',
1561 '69.0.3497.57',
1562 '69.0.3497.56',
1563 '69.0.3497.55',
1564 '69.0.3497.54',
1565 '70.0.3530.1',
1566 '70.0.3530.0',
1567 '69.0.3497.53',
1568 '68.0.3440.124',
1569 '69.0.3497.52',
1570 '70.0.3529.3',
1571 '70.0.3529.2',
1572 '70.0.3529.1',
1573 '70.0.3529.0',
1574 '69.0.3497.51',
1575 '70.0.3528.4',
1576 '68.0.3440.123',
1577 '70.0.3528.3',
1578 '70.0.3528.2',
1579 '70.0.3528.1',
1580 '70.0.3528.0',
1581 '69.0.3497.50',
1582 '68.0.3440.122',
1583 '70.0.3527.1',
1584 '70.0.3527.0',
1585 '69.0.3497.49',
1586 '68.0.3440.121',
1587 '70.0.3526.1',
1588 '70.0.3526.0',
1589 '68.0.3440.120',
1590 '69.0.3497.48',
1591 '69.0.3497.47',
1592 '68.0.3440.119',
1593 '68.0.3440.118',
1594 '70.0.3525.5',
1595 '70.0.3525.4',
1596 '70.0.3525.3',
1597 '68.0.3440.117',
1598 '69.0.3497.46',
1599 '70.0.3525.2',
1600 '70.0.3525.1',
1601 '70.0.3525.0',
1602 '69.0.3497.45',
1603 '68.0.3440.116',
1604 '70.0.3524.4',
1605 '70.0.3524.3',
1606 '69.0.3497.44',
1607 '70.0.3524.2',
1608 '70.0.3524.1',
1609 '70.0.3524.0',
1610 '70.0.3523.2',
1611 '69.0.3497.43',
1612 '68.0.3440.115',
1613 '70.0.3505.9',
1614 '69.0.3497.42',
1615 '70.0.3505.8',
1616 '70.0.3523.1',
1617 '70.0.3523.0',
1618 '69.0.3497.41',
1619 '68.0.3440.114',
1620 '70.0.3505.7',
1621 '69.0.3497.40',
1622 '70.0.3522.1',
1623 '70.0.3522.0',
1624 '70.0.3521.2',
1625 '69.0.3497.39',
1626 '68.0.3440.113',
1627 '70.0.3505.6',
1628 '70.0.3521.1',
1629 '70.0.3521.0',
1630 '69.0.3497.38',
1631 '68.0.3440.112',
1632 '70.0.3520.1',
1633 '70.0.3520.0',
1634 '69.0.3497.37',
1635 '68.0.3440.111',
1636 '70.0.3519.3',
1637 '70.0.3519.2',
1638 '70.0.3519.1',
1639 '70.0.3519.0',
1640 '69.0.3497.36',
1641 '68.0.3440.110',
1642 '70.0.3518.1',
1643 '70.0.3518.0',
1644 '69.0.3497.35',
1645 '69.0.3497.34',
1646 '68.0.3440.109',
1647 '70.0.3517.1',
1648 '70.0.3517.0',
1649 '69.0.3497.33',
1650 '68.0.3440.108',
1651 '69.0.3497.32',
1652 '70.0.3516.3',
1653 '70.0.3516.2',
1654 '70.0.3516.1',
1655 '70.0.3516.0',
1656 '69.0.3497.31',
1657 '68.0.3440.107',
1658 '70.0.3515.4',
1659 '68.0.3440.106',
1660 '70.0.3515.3',
1661 '70.0.3515.2',
1662 '70.0.3515.1',
1663 '70.0.3515.0',
1664 '69.0.3497.30',
1665 '68.0.3440.105',
1666 '68.0.3440.104',
1667 '70.0.3514.2',
1668 '70.0.3514.1',
1669 '70.0.3514.0',
1670 '69.0.3497.29',
1671 '68.0.3440.103',
1672 '70.0.3513.1',
1673 '70.0.3513.0',
1674 '69.0.3497.28',
1675 )
1676 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1677
1678
3e669f36 1679std_headers = {
f7a147e3 1680 'User-Agent': random_user_agent(),
59ae15a5
PH
1681 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1682 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1683 'Accept-Encoding': 'gzip, deflate',
1684 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 1685}
f427df17 1686
5f6a1245 1687
fb37eb25
S
1688USER_AGENTS = {
1689 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1690}
1691
1692
bf42a990
S
1693NO_DEFAULT = object()
1694
7105440c
YCH
1695ENGLISH_MONTH_NAMES = [
1696 'January', 'February', 'March', 'April', 'May', 'June',
1697 'July', 'August', 'September', 'October', 'November', 'December']
1698
f6717dec
S
1699MONTH_NAMES = {
1700 'en': ENGLISH_MONTH_NAMES,
1701 'fr': [
3e4185c3
S
1702 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1703 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
f6717dec 1704}
a942d6cb 1705
a7aaa398
S
1706KNOWN_EXTENSIONS = (
1707 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1708 'flv', 'f4v', 'f4a', 'f4b',
1709 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1710 'mkv', 'mka', 'mk3d',
1711 'avi', 'divx',
1712 'mov',
1713 'asf', 'wmv', 'wma',
1714 '3gp', '3g2',
1715 'mp3',
1716 'flac',
1717 'ape',
1718 'wav',
1719 'f4f', 'f4m', 'm3u8', 'smil')
1720
c587cbb7 1721# needed for sanitizing filenames in restricted mode
c8827027 1722ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
fd35d8cd
JW
1723 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1724 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
c587cbb7 1725
46f59e89
S
1726DATE_FORMATS = (
1727 '%d %B %Y',
1728 '%d %b %Y',
1729 '%B %d %Y',
cb655f34
S
1730 '%B %dst %Y',
1731 '%B %dnd %Y',
9d30c213 1732 '%B %drd %Y',
cb655f34 1733 '%B %dth %Y',
46f59e89 1734 '%b %d %Y',
cb655f34
S
1735 '%b %dst %Y',
1736 '%b %dnd %Y',
9d30c213 1737 '%b %drd %Y',
cb655f34 1738 '%b %dth %Y',
46f59e89
S
1739 '%b %dst %Y %I:%M',
1740 '%b %dnd %Y %I:%M',
9d30c213 1741 '%b %drd %Y %I:%M',
46f59e89
S
1742 '%b %dth %Y %I:%M',
1743 '%Y %m %d',
1744 '%Y-%m-%d',
bccdbd22 1745 '%Y.%m.%d.',
46f59e89 1746 '%Y/%m/%d',
81c13222 1747 '%Y/%m/%d %H:%M',
46f59e89 1748 '%Y/%m/%d %H:%M:%S',
1931a55e
THD
1749 '%Y%m%d%H%M',
1750 '%Y%m%d%H%M%S',
0c1c6f4b 1751 '%Y-%m-%d %H:%M',
46f59e89
S
1752 '%Y-%m-%d %H:%M:%S',
1753 '%Y-%m-%d %H:%M:%S.%f',
5014558a 1754 '%Y-%m-%d %H:%M:%S:%f',
46f59e89
S
1755 '%d.%m.%Y %H:%M',
1756 '%d.%m.%Y %H.%M',
1757 '%Y-%m-%dT%H:%M:%SZ',
1758 '%Y-%m-%dT%H:%M:%S.%fZ',
1759 '%Y-%m-%dT%H:%M:%S.%f0Z',
1760 '%Y-%m-%dT%H:%M:%S',
1761 '%Y-%m-%dT%H:%M:%S.%f',
1762 '%Y-%m-%dT%H:%M',
c6eed6b8
S
1763 '%b %d %Y at %H:%M',
1764 '%b %d %Y at %H:%M:%S',
b555ae9b
S
1765 '%B %d %Y at %H:%M',
1766 '%B %d %Y at %H:%M:%S',
a63d9bd0 1767 '%H:%M %d-%b-%Y',
46f59e89
S
1768)
1769
1770DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1771DATE_FORMATS_DAY_FIRST.extend([
1772 '%d-%m-%Y',
1773 '%d.%m.%Y',
1774 '%d.%m.%y',
1775 '%d/%m/%Y',
1776 '%d/%m/%y',
1777 '%d/%m/%Y %H:%M:%S',
1778])
1779
1780DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1781DATE_FORMATS_MONTH_FIRST.extend([
1782 '%m-%d-%Y',
1783 '%m.%d.%Y',
1784 '%m/%d/%Y',
1785 '%m/%d/%y',
1786 '%m/%d/%Y %H:%M:%S',
1787])
1788
06b3fe29 1789PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
22f5f5c6 1790JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
06b3fe29 1791
7105440c 1792
d77c3dfd 1793def preferredencoding():
59ae15a5 1794 """Get preferred encoding.
d77c3dfd 1795
59ae15a5
PH
1796 Returns the best encoding scheme for the system, based on
1797 locale.getpreferredencoding() and some further tweaks.
1798 """
1799 try:
1800 pref = locale.getpreferredencoding()
28e614de 1801 'TEST'.encode(pref)
70a1165b 1802 except Exception:
59ae15a5 1803 pref = 'UTF-8'
bae611f2 1804
59ae15a5 1805 return pref
d77c3dfd 1806
f4bfd65f 1807
181c8655 1808def write_json_file(obj, fn):
1394646a 1809 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 1810
92120217 1811 fn = encodeFilename(fn)
61ee5aeb 1812 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
1813 encoding = get_filesystem_encoding()
1814 # os.path.basename returns a bytes object, but NamedTemporaryFile
1815 # will fail if the filename contains non ascii characters unless we
1816 # use a unicode object
1817 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1818 # the same for os.path.dirname
1819 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1820 else:
1821 path_basename = os.path.basename
1822 path_dirname = os.path.dirname
1823
73159f99
S
1824 args = {
1825 'suffix': '.tmp',
ec5f6016
JMF
1826 'prefix': path_basename(fn) + '.',
1827 'dir': path_dirname(fn),
73159f99
S
1828 'delete': False,
1829 }
1830
181c8655
PH
1831 # In Python 2.x, json.dump expects a bytestream.
1832 # In Python 3.x, it writes to a character stream
1833 if sys.version_info < (3, 0):
73159f99 1834 args['mode'] = 'wb'
181c8655 1835 else:
73159f99
S
1836 args.update({
1837 'mode': 'w',
1838 'encoding': 'utf-8',
1839 })
1840
c86b6142 1841 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
1842
1843 try:
1844 with tf:
6e84b215 1845 json.dump(obj, tf)
1394646a
IK
1846 if sys.platform == 'win32':
1847 # Need to remove existing file on Windows, else os.rename raises
1848 # WindowsError or FileExistsError.
1849 try:
1850 os.unlink(fn)
1851 except OSError:
1852 pass
9cd5f54e
R
1853 try:
1854 mask = os.umask(0)
1855 os.umask(mask)
1856 os.chmod(tf.name, 0o666 & ~mask)
1857 except OSError:
1858 pass
181c8655 1859 os.rename(tf.name, fn)
70a1165b 1860 except Exception:
181c8655
PH
1861 try:
1862 os.remove(tf.name)
1863 except OSError:
1864 pass
1865 raise
1866
1867
1868if sys.version_info >= (2, 7):
ee114368 1869 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 1870 """ Find the xpath xpath[@key=val] """
5d2354f1 1871 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368 1872 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
1873 return node.find(expr)
1874else:
ee114368 1875 def find_xpath_attr(node, xpath, key, val=None):
810c10ba 1876 for f in node.findall(compat_xpath(xpath)):
ee114368
S
1877 if key not in f.attrib:
1878 continue
1879 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
1880 return f
1881 return None
1882
d7e66d39
JMF
1883# On python2.6 the xml.etree.ElementTree.Element methods don't support
1884# the namespace parameter
5f6a1245
JW
1885
1886
d7e66d39
JMF
1887def xpath_with_ns(path, ns_map):
1888 components = [c.split(':') for c in path.split('/')]
1889 replaced = []
1890 for c in components:
1891 if len(c) == 1:
1892 replaced.append(c[0])
1893 else:
1894 ns, tag = c
1895 replaced.append('{%s}%s' % (ns_map[ns], tag))
1896 return '/'.join(replaced)
1897
d77c3dfd 1898
a41fb80c 1899def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745 1900 def _find_xpath(xpath):
810c10ba 1901 return node.find(compat_xpath(xpath))
578c0745
S
1902
1903 if isinstance(xpath, (str, compat_str)):
1904 n = _find_xpath(xpath)
1905 else:
1906 for xp in xpath:
1907 n = _find_xpath(xp)
1908 if n is not None:
1909 break
d74bebd5 1910
8e636da4 1911 if n is None:
bf42a990
S
1912 if default is not NO_DEFAULT:
1913 return default
1914 elif fatal:
bf0ff932
PH
1915 name = xpath if name is None else name
1916 raise ExtractorError('Could not find XML element %s' % name)
1917 else:
1918 return None
a41fb80c
S
1919 return n
1920
1921
1922def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
1923 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1924 if n is None or n == default:
1925 return n
1926 if n.text is None:
1927 if default is not NO_DEFAULT:
1928 return default
1929 elif fatal:
1930 name = xpath if name is None else name
1931 raise ExtractorError('Could not find XML element\'s text %s' % name)
1932 else:
1933 return None
1934 return n.text
a41fb80c
S
1935
1936
1937def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1938 n = find_xpath_attr(node, xpath, key)
1939 if n is None:
1940 if default is not NO_DEFAULT:
1941 return default
1942 elif fatal:
1943 name = '%s[@%s]' % (xpath, key) if name is None else name
1944 raise ExtractorError('Could not find XML attribute %s' % name)
1945 else:
1946 return None
1947 return n.attrib[key]
bf0ff932
PH
1948
1949
9e6dd238 1950def get_element_by_id(id, html):
43e8fafd 1951 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 1952 return get_element_by_attribute('id', id, html)
43e8fafd 1953
12ea2f30 1954
84c237fb 1955def get_element_by_class(class_name, html):
2af12ad9
TC
1956 """Return the content of the first tag with the specified class in the passed HTML document"""
1957 retval = get_elements_by_class(class_name, html)
1958 return retval[0] if retval else None
1959
1960
1961def get_element_by_attribute(attribute, value, html, escape_value=True):
1962 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1963 return retval[0] if retval else None
1964
1965
1966def get_elements_by_class(class_name, html):
1967 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1968 return get_elements_by_attribute(
84c237fb
YCH
1969 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1970 html, escape_value=False)
1971
1972
2af12ad9 1973def get_elements_by_attribute(attribute, value, html, escape_value=True):
43e8fafd 1974 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 1975
84c237fb
YCH
1976 value = re.escape(value) if escape_value else value
1977
2af12ad9
TC
1978 retlist = []
1979 for m in re.finditer(r'''(?xs)
38285056 1980 <([a-zA-Z0-9:._-]+)
609ff8ca 1981 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056 1982 \s+%s=['"]?%s['"]?
609ff8ca 1983 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056
PH
1984 \s*>
1985 (?P<content>.*?)
1986 </\1>
2af12ad9
TC
1987 ''' % (re.escape(attribute), value), html):
1988 res = m.group('content')
38285056 1989
2af12ad9
TC
1990 if res.startswith('"') or res.startswith("'"):
1991 res = res[1:-1]
38285056 1992
2af12ad9 1993 retlist.append(unescapeHTML(res))
a921f407 1994
2af12ad9 1995 return retlist
a921f407 1996
c5229f39 1997
8bb56eee
BF
1998class HTMLAttributeParser(compat_HTMLParser):
1999 """Trivial HTML parser to gather the attributes for a single element"""
b6e0c7d2 2000
8bb56eee 2001 def __init__(self):
c5229f39 2002 self.attrs = {}
8bb56eee
BF
2003 compat_HTMLParser.__init__(self)
2004
2005 def handle_starttag(self, tag, attrs):
2006 self.attrs = dict(attrs)
2007
c5229f39 2008
73673ccf
FF
2009class HTMLListAttrsParser(compat_HTMLParser):
2010 """HTML parser to gather the attributes for the elements of a list"""
2011
2012 def __init__(self):
2013 compat_HTMLParser.__init__(self)
2014 self.items = []
2015 self._level = 0
2016
2017 def handle_starttag(self, tag, attrs):
2018 if tag == 'li' and self._level == 0:
2019 self.items.append(dict(attrs))
2020 self._level += 1
2021
2022 def handle_endtag(self, tag):
2023 self._level -= 1
2024
2025
8bb56eee
BF
2026def extract_attributes(html_element):
2027 """Given a string for an HTML element such as
2028 <el
2029 a="foo" B="bar" c="&98;az" d=boz
2030 empty= noval entity="&amp;"
2031 sq='"' dq="'"
2032 >
2033 Decode and return a dictionary of attributes.
2034 {
2035 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2036 'empty': '', 'noval': None, 'entity': '&',
2037 'sq': '"', 'dq': '\''
2038 }.
2039 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2040 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2041 """
2042 parser = HTMLAttributeParser()
b4a3d461
S
2043 try:
2044 parser.feed(html_element)
2045 parser.close()
2046 # Older Python may throw HTMLParseError in case of malformed HTML
2047 except compat_HTMLParseError:
2048 pass
8bb56eee 2049 return parser.attrs
9e6dd238 2050
c5229f39 2051
73673ccf
FF
2052def parse_list(webpage):
2053 """Given a string for an series of HTML <li> elements,
2054 return a dictionary of their attributes"""
2055 parser = HTMLListAttrsParser()
2056 parser.feed(webpage)
2057 parser.close()
2058 return parser.items
2059
2060
9e6dd238 2061def clean_html(html):
59ae15a5 2062 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
2063
2064 if html is None: # Convenience for sanitizing descriptions etc.
2065 return html
2066
59ae15a5
PH
2067 # Newline vs <br />
2068 html = html.replace('\n', ' ')
edd9221c
TF
2069 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2070 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
2071 # Strip html tags
2072 html = re.sub('<.*?>', '', html)
2073 # Replace html entities
2074 html = unescapeHTML(html)
7decf895 2075 return html.strip()
9e6dd238
FV
2076
2077
d77c3dfd 2078def sanitize_open(filename, open_mode):
59ae15a5
PH
2079 """Try to open the given filename, and slightly tweak it if this fails.
2080
2081 Attempts to open the given filename. If this fails, it tries to change
2082 the filename slightly, step by step, until it's either able to open it
2083 or it fails and raises a final exception, like the standard open()
2084 function.
2085
2086 It returns the tuple (stream, definitive_file_name).
2087 """
2088 try:
28e614de 2089 if filename == '-':
59ae15a5
PH
2090 if sys.platform == 'win32':
2091 import msvcrt
2092 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 2093 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
2094 stream = open(encodeFilename(filename), open_mode)
2095 return (stream, filename)
2096 except (IOError, OSError) as err:
f45c185f
PH
2097 if err.errno in (errno.EACCES,):
2098 raise
59ae15a5 2099
f45c185f 2100 # In case of error, try to remove win32 forbidden chars
d55de57b 2101 alt_filename = sanitize_path(filename)
f45c185f
PH
2102 if alt_filename == filename:
2103 raise
2104 else:
2105 # An exception here should be caught in the caller
d55de57b 2106 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 2107 return (stream, alt_filename)
d77c3dfd
FV
2108
2109
2110def timeconvert(timestr):
59ae15a5
PH
2111 """Convert RFC 2822 defined time string into system timestamp"""
2112 timestamp = None
2113 timetuple = email.utils.parsedate_tz(timestr)
2114 if timetuple is not None:
2115 timestamp = email.utils.mktime_tz(timetuple)
2116 return timestamp
1c469a94 2117
5f6a1245 2118
796173d0 2119def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
2120 """Sanitizes a string so it could be used as part of a filename.
2121 If restricted is set, use a stricter subset of allowed characters.
158af524
S
2122 Set is_id if this is not an arbitrary string, but an ID that should be kept
2123 if possible.
59ae15a5
PH
2124 """
2125 def replace_insane(char):
c587cbb7
AT
2126 if restricted and char in ACCENT_CHARS:
2127 return ACCENT_CHARS[char]
91dd88b9 2128 elif not restricted and char == '\n':
2129 return ' '
2130 elif char == '?' or ord(char) < 32 or ord(char) == 127:
59ae15a5
PH
2131 return ''
2132 elif char == '"':
2133 return '' if restricted else '\''
2134 elif char == ':':
2135 return '_-' if restricted else ' -'
2136 elif char in '\\/|*<>':
2137 return '_'
627dcfff 2138 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
2139 return '_'
2140 if restricted and ord(char) > 127:
2141 return '_'
2142 return char
2143
639f1cea 2144 if s == '':
2145 return ''
2aeb06d6
PH
2146 # Handle timestamps
2147 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 2148 result = ''.join(map(replace_insane, s))
796173d0
PH
2149 if not is_id:
2150 while '__' in result:
2151 result = result.replace('__', '_')
2152 result = result.strip('_')
2153 # Common case of "Foreign band name - English song title"
2154 if restricted and result.startswith('-_'):
2155 result = result[2:]
5a42414b
PH
2156 if result.startswith('-'):
2157 result = '_' + result[len('-'):]
a7440261 2158 result = result.lstrip('.')
796173d0
PH
2159 if not result:
2160 result = '_'
59ae15a5 2161 return result
d77c3dfd 2162
5f6a1245 2163
c2934512 2164def sanitize_path(s, force=False):
a2aaf4db 2165 """Sanitizes and normalizes path on Windows"""
c2934512 2166 if sys.platform == 'win32':
c4218ac3 2167 force = False
c2934512 2168 drive_or_unc, _ = os.path.splitdrive(s)
2169 if sys.version_info < (2, 7) and not drive_or_unc:
2170 drive_or_unc, _ = os.path.splitunc(s)
2171 elif force:
2172 drive_or_unc = ''
2173 else:
a2aaf4db 2174 return s
c2934512 2175
be531ef1
S
2176 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2177 if drive_or_unc:
a2aaf4db
S
2178 norm_path.pop(0)
2179 sanitized_path = [
ec85ded8 2180 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
a2aaf4db 2181 for path_part in norm_path]
be531ef1
S
2182 if drive_or_unc:
2183 sanitized_path.insert(0, drive_or_unc + os.path.sep)
c4218ac3 2184 elif force and s[0] == os.path.sep:
2185 sanitized_path.insert(0, os.path.sep)
a2aaf4db
S
2186 return os.path.join(*sanitized_path)
2187
2188
17bcc626 2189def sanitize_url(url):
befa4708
S
2190 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2191 # the number of unwanted failures due to missing protocol
2192 if url.startswith('//'):
2193 return 'http:%s' % url
2194 # Fix some common typos seen so far
2195 COMMON_TYPOS = (
067aa17e 2196 # https://github.com/ytdl-org/youtube-dl/issues/15649
befa4708
S
2197 (r'^httpss://', r'https://'),
2198 # https://bx1.be/lives/direct-tv/
2199 (r'^rmtp([es]?)://', r'rtmp\1://'),
2200 )
2201 for mistake, fixup in COMMON_TYPOS:
2202 if re.match(mistake, url):
2203 return re.sub(mistake, fixup, url)
bc6b9bcd 2204 return url
17bcc626
S
2205
2206
5435dcf9
HH
2207def extract_basic_auth(url):
2208 parts = compat_urlparse.urlsplit(url)
2209 if parts.username is None:
2210 return url, None
2211 url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2212 parts.hostname if parts.port is None
2213 else '%s:%d' % (parts.hostname, parts.port))))
2214 auth_payload = base64.b64encode(
2215 ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2216 return url, 'Basic ' + auth_payload.decode('utf-8')
2217
2218
67dda517 2219def sanitized_Request(url, *args, **kwargs):
bc6b9bcd 2220 url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
5435dcf9
HH
2221 if auth_header is not None:
2222 headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2223 headers['Authorization'] = auth_header
2224 return compat_urllib_request.Request(url, *args, **kwargs)
67dda517
S
2225
2226
51098426
S
2227def expand_path(s):
2228 """Expand shell variables and ~"""
2229 return os.path.expandvars(compat_expanduser(s))
2230
2231
d77c3dfd 2232def orderedSet(iterable):
59ae15a5
PH
2233 """ Remove all duplicates from the input iterable """
2234 res = []
2235 for el in iterable:
2236 if el not in res:
2237 res.append(el)
2238 return res
d77c3dfd 2239
912b38b4 2240
55b2f099 2241def _htmlentity_transform(entity_with_semicolon):
4e408e47 2242 """Transforms an HTML entity to a character."""
55b2f099
YCH
2243 entity = entity_with_semicolon[:-1]
2244
4e408e47
PH
2245 # Known non-numeric HTML entity
2246 if entity in compat_html_entities.name2codepoint:
2247 return compat_chr(compat_html_entities.name2codepoint[entity])
2248
55b2f099
YCH
2249 # TODO: HTML5 allows entities without a semicolon. For example,
2250 # '&Eacuteric' should be decoded as 'Éric'.
2251 if entity_with_semicolon in compat_html_entities_html5:
2252 return compat_html_entities_html5[entity_with_semicolon]
2253
91757b0f 2254 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
2255 if mobj is not None:
2256 numstr = mobj.group(1)
28e614de 2257 if numstr.startswith('x'):
4e408e47 2258 base = 16
28e614de 2259 numstr = '0%s' % numstr
4e408e47
PH
2260 else:
2261 base = 10
067aa17e 2262 # See https://github.com/ytdl-org/youtube-dl/issues/7518
7aefc49c
S
2263 try:
2264 return compat_chr(int(numstr, base))
2265 except ValueError:
2266 pass
4e408e47
PH
2267
2268 # Unknown entity in name, return its literal representation
7a3f0c00 2269 return '&%s;' % entity
4e408e47
PH
2270
2271
d77c3dfd 2272def unescapeHTML(s):
912b38b4
PH
2273 if s is None:
2274 return None
2275 assert type(s) == compat_str
d77c3dfd 2276
4e408e47 2277 return re.sub(
95f3f7c2 2278 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 2279
8bf48f23 2280
cdb19aa4 2281def escapeHTML(text):
2282 return (
2283 text
2284 .replace('&', '&amp;')
2285 .replace('<', '&lt;')
2286 .replace('>', '&gt;')
2287 .replace('"', '&quot;')
2288 .replace("'", '&#39;')
2289 )
2290
2291
f5b1bca9 2292def process_communicate_or_kill(p, *args, **kwargs):
2293 try:
2294 return p.communicate(*args, **kwargs)
2295 except BaseException: # Including KeyboardInterrupt
2296 p.kill()
2297 p.wait()
2298 raise
2299
2300
d3c93ec2 2301class Popen(subprocess.Popen):
2302 if sys.platform == 'win32':
2303 _startupinfo = subprocess.STARTUPINFO()
2304 _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
2305 else:
2306 _startupinfo = None
2307
2308 def __init__(self, *args, **kwargs):
2309 super(Popen, self).__init__(*args, **kwargs, startupinfo=self._startupinfo)
2310
2311 def communicate_or_kill(self, *args, **kwargs):
2312 return process_communicate_or_kill(self, *args, **kwargs)
2313
2314
aa49acd1
S
2315def get_subprocess_encoding():
2316 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2317 # For subprocess calls, encode with locale encoding
2318 # Refer to http://stackoverflow.com/a/9951851/35070
2319 encoding = preferredencoding()
2320 else:
2321 encoding = sys.getfilesystemencoding()
2322 if encoding is None:
2323 encoding = 'utf-8'
2324 return encoding
2325
2326
8bf48f23 2327def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
2328 """
2329 @param s The name of the file
2330 """
d77c3dfd 2331
8bf48f23 2332 assert type(s) == compat_str
d77c3dfd 2333
59ae15a5
PH
2334 # Python 3 has a Unicode API
2335 if sys.version_info >= (3, 0):
2336 return s
0f00efed 2337
aa49acd1
S
2338 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2339 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2340 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2341 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2342 return s
2343
8ee239e9
YCH
2344 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2345 if sys.platform.startswith('java'):
2346 return s
2347
aa49acd1
S
2348 return s.encode(get_subprocess_encoding(), 'ignore')
2349
2350
2351def decodeFilename(b, for_subprocess=False):
2352
2353 if sys.version_info >= (3, 0):
2354 return b
2355
2356 if not isinstance(b, bytes):
2357 return b
2358
2359 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 2360
f07b74fc
PH
2361
2362def encodeArgument(s):
2363 if not isinstance(s, compat_str):
2364 # Legacy code that uses byte strings
2365 # Uncomment the following line after fixing all post processors
7af808a5 2366 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
2367 s = s.decode('ascii')
2368 return encodeFilename(s, True)
2369
2370
aa49acd1
S
2371def decodeArgument(b):
2372 return decodeFilename(b, True)
2373
2374
8271226a
PH
2375def decodeOption(optval):
2376 if optval is None:
2377 return optval
2378 if isinstance(optval, bytes):
2379 optval = optval.decode(preferredencoding())
2380
2381 assert isinstance(optval, compat_str)
2382 return optval
1c256f70 2383
5f6a1245 2384
aa7785f8 2385_timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
2386
2387
2388def timetuple_from_msec(msec):
2389 secs, msec = divmod(msec, 1000)
2390 mins, secs = divmod(secs, 60)
2391 hrs, mins = divmod(mins, 60)
2392 return _timetuple(hrs, mins, secs, msec)
2393
2394
cdb19aa4 2395def formatSeconds(secs, delim=':', msec=False):
aa7785f8 2396 time = timetuple_from_msec(secs * 1000)
2397 if time.hours:
2398 ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
2399 elif time.minutes:
2400 ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
4539dd30 2401 else:
aa7785f8 2402 ret = '%d' % time.seconds
2403 return '%s.%03d' % (ret, time.milliseconds) if msec else ret
4539dd30 2404
a0ddb8a2 2405
77562778 2406def _ssl_load_windows_store_certs(ssl_context, storename):
2407 # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
2408 try:
2409 certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
2410 if encoding == 'x509_asn' and (
2411 trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
2412 except PermissionError:
2413 return
2414 for cert in certs:
a2366922 2415 try:
77562778 2416 ssl_context.load_verify_locations(cadata=cert)
2417 except ssl.SSLError:
a2366922
PH
2418 pass
2419
77562778 2420
2421def make_HTTPS_handler(params, **kwargs):
2422 opts_check_certificate = not params.get('nocheckcertificate')
2423 context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
2424 context.check_hostname = opts_check_certificate
2425 context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
2426 if opts_check_certificate:
4e3d1898 2427 try:
2428 context.load_default_certs()
2429 # Work around the issue in load_default_certs when there are bad certificates. See:
2430 # https://github.com/yt-dlp/yt-dlp/issues/1060,
2431 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
2432 except ssl.SSLError:
2433 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
2434 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
2435 # Create a new context to discard any certificates that were already loaded
2436 context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
2437 context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED
2438 for storename in ('CA', 'ROOT'):
2439 _ssl_load_windows_store_certs(context, storename)
2440 context.set_default_verify_paths()
77562778 2441 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 2442
732ea2f0 2443
5873d4cc 2444def bug_reports_message(before=';'):
08f2a92c 2445 if ytdl_is_updateable():
7a5c1cfe 2446 update_cmd = 'type yt-dlp -U to update'
08f2a92c 2447 else:
7a5c1cfe 2448 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
5873d4cc 2449 msg = 'please report this issue on https://github.com/yt-dlp/yt-dlp .'
08f2a92c 2450 msg += ' Make sure you are using the latest version; %s.' % update_cmd
7a5c1cfe 2451 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
5873d4cc
F
2452
2453 before = before.rstrip()
2454 if not before or before.endswith(('.', '!', '?')):
2455 msg = msg[0].title() + msg[1:]
2456
2457 return (before + ' ' if before else '') + msg
08f2a92c
JMF
2458
2459
bf5b9d85
PM
2460class YoutubeDLError(Exception):
2461 """Base exception for YoutubeDL errors."""
aa9369a2 2462 msg = None
2463
2464 def __init__(self, msg=None):
2465 if msg is not None:
2466 self.msg = msg
2467 elif self.msg is None:
2468 self.msg = type(self).__name__
2469 super().__init__(self.msg)
bf5b9d85
PM
2470
2471
3158150c 2472network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2473if hasattr(ssl, 'CertificateError'):
2474 network_exceptions.append(ssl.CertificateError)
2475network_exceptions = tuple(network_exceptions)
2476
2477
bf5b9d85 2478class ExtractorError(YoutubeDLError):
1c256f70 2479 """Error during info extraction."""
5f6a1245 2480
1151c407 2481 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
9a82b238 2482 """ tb, if given, is the original traceback (so that it can be printed out).
7a5c1cfe 2483 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
9a82b238 2484 """
3158150c 2485 if sys.exc_info()[0] in network_exceptions:
9a82b238 2486 expected = True
d5979c5d 2487
526d74ec 2488 self.msg = str(msg)
1c256f70 2489 self.traceback = tb
1151c407 2490 self.expected = expected
2eabb802 2491 self.cause = cause
d11271dd 2492 self.video_id = video_id
1151c407 2493 self.ie = ie
2494 self.exc_info = sys.exc_info() # preserve original exception
2495
2496 super(ExtractorError, self).__init__(''.join((
2497 format_field(ie, template='[%s] '),
2498 format_field(video_id, template='%s: '),
526d74ec 2499 self.msg,
1151c407 2500 format_field(cause, template=' (caused by %r)'),
2501 '' if expected else bug_reports_message())))
1c256f70 2502
01951dda
PH
2503 def format_traceback(self):
2504 if self.traceback is None:
2505 return None
28e614de 2506 return ''.join(traceback.format_tb(self.traceback))
01951dda 2507
1c256f70 2508
416c7fcb
PH
2509class UnsupportedError(ExtractorError):
2510 def __init__(self, url):
2511 super(UnsupportedError, self).__init__(
2512 'Unsupported URL: %s' % url, expected=True)
2513 self.url = url
2514
2515
55b3e45b
JMF
2516class RegexNotFoundError(ExtractorError):
2517 """Error when a regex didn't match"""
2518 pass
2519
2520
773f291d
S
2521class GeoRestrictedError(ExtractorError):
2522 """Geographic restriction Error exception.
2523
2524 This exception may be thrown when a video is not available from your
2525 geographic location due to geographic restrictions imposed by a website.
2526 """
b6e0c7d2 2527
0db3bae8 2528 def __init__(self, msg, countries=None, **kwargs):
2529 kwargs['expected'] = True
2530 super(GeoRestrictedError, self).__init__(msg, **kwargs)
773f291d
S
2531 self.countries = countries
2532
2533
bf5b9d85 2534class DownloadError(YoutubeDLError):
59ae15a5 2535 """Download Error exception.
d77c3dfd 2536
59ae15a5
PH
2537 This exception may be thrown by FileDownloader objects if they are not
2538 configured to continue on errors. They will contain the appropriate
2539 error message.
2540 """
5f6a1245 2541
8cc83b8d
FV
2542 def __init__(self, msg, exc_info=None):
2543 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2544 super(DownloadError, self).__init__(msg)
2545 self.exc_info = exc_info
d77c3dfd
FV
2546
2547
498f5606 2548class EntryNotInPlaylist(YoutubeDLError):
2549 """Entry not in playlist exception.
2550
2551 This exception will be thrown by YoutubeDL when a requested entry
2552 is not found in the playlist info_dict
2553 """
aa9369a2 2554 msg = 'Entry not found in info'
498f5606 2555
2556
bf5b9d85 2557class SameFileError(YoutubeDLError):
59ae15a5 2558 """Same File exception.
d77c3dfd 2559
59ae15a5
PH
2560 This exception will be thrown by FileDownloader objects if they detect
2561 multiple files would have to be downloaded to the same file on disk.
2562 """
aa9369a2 2563 msg = 'Fixed output name but more than one file to download'
2564
2565 def __init__(self, filename=None):
2566 if filename is not None:
2567 self.msg += f': {filename}'
2568 super().__init__(self.msg)
d77c3dfd
FV
2569
2570
bf5b9d85 2571class PostProcessingError(YoutubeDLError):
59ae15a5 2572 """Post Processing exception.
d77c3dfd 2573
59ae15a5
PH
2574 This exception may be raised by PostProcessor's .run() method to
2575 indicate an error in the postprocessing task.
2576 """
5f6a1245 2577
7851b379 2578 def __init__(self, msg):
bf5b9d85 2579 super(PostProcessingError, self).__init__(msg)
7851b379 2580 self.msg = msg
d77c3dfd 2581
5f6a1245 2582
48f79687 2583class DownloadCancelled(YoutubeDLError):
2584 """ Exception raised when the download queue should be interrupted """
2585 msg = 'The download was cancelled'
8b0d7497 2586
8b0d7497 2587
48f79687 2588class ExistingVideoReached(DownloadCancelled):
2589 """ --break-on-existing triggered """
2590 msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
8b0d7497 2591
48f79687 2592
2593class RejectedVideoReached(DownloadCancelled):
2594 """ --break-on-reject triggered """
2595 msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
51d9739f 2596
2597
48f79687 2598class MaxDownloadsReached(DownloadCancelled):
59ae15a5 2599 """ --max-downloads limit has been reached. """
48f79687 2600 msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
2601
2602
2603class ThrottledDownload(YoutubeDLError):
2604 """ Download speed below --throttled-rate. """
aa9369a2 2605 msg = 'The download speed is below throttle limit'
d77c3dfd
FV
2606
2607
bf5b9d85 2608class UnavailableVideoError(YoutubeDLError):
59ae15a5 2609 """Unavailable Format exception.
d77c3dfd 2610
59ae15a5
PH
2611 This exception will be thrown when a video is requested
2612 in a format that is not available for that video.
2613 """
aa9369a2 2614 msg = 'Unable to download video'
2615
2616 def __init__(self, err=None):
2617 if err is not None:
2618 self.msg += f': {err}'
2619 super().__init__(self.msg)
d77c3dfd
FV
2620
2621
bf5b9d85 2622class ContentTooShortError(YoutubeDLError):
59ae15a5 2623 """Content Too Short exception.
d77c3dfd 2624
59ae15a5
PH
2625 This exception may be raised by FileDownloader objects when a file they
2626 download is too small for what the server announced first, indicating
2627 the connection was probably interrupted.
2628 """
d77c3dfd 2629
59ae15a5 2630 def __init__(self, downloaded, expected):
bf5b9d85
PM
2631 super(ContentTooShortError, self).__init__(
2632 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2633 )
2c7ed247 2634 # Both in bytes
59ae15a5
PH
2635 self.downloaded = downloaded
2636 self.expected = expected
d77c3dfd 2637
5f6a1245 2638
bf5b9d85 2639class XAttrMetadataError(YoutubeDLError):
efa97bdc
YCH
2640 def __init__(self, code=None, msg='Unknown error'):
2641 super(XAttrMetadataError, self).__init__(msg)
2642 self.code = code
bd264412 2643 self.msg = msg
efa97bdc
YCH
2644
2645 # Parsing code and msg
3089bc74 2646 if (self.code in (errno.ENOSPC, errno.EDQUOT)
a0566bbf 2647 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
efa97bdc
YCH
2648 self.reason = 'NO_SPACE'
2649 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2650 self.reason = 'VALUE_TOO_LONG'
2651 else:
2652 self.reason = 'NOT_SUPPORTED'
2653
2654
bf5b9d85 2655class XAttrUnavailableError(YoutubeDLError):
efa97bdc
YCH
2656 pass
2657
2658
c5a59d93 2659def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
2660 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2661 # expected HTTP responses to meet HTTP/1.0 or later (see also
067aa17e 2662 # https://github.com/ytdl-org/youtube-dl/issues/6727)
e5e78797 2663 if sys.version_info < (3, 0):
65220c3b
S
2664 kwargs['strict'] = True
2665 hc = http_class(*args, **compat_kwargs(kwargs))
be4a824d 2666 source_address = ydl_handler._params.get('source_address')
8959018a 2667
be4a824d 2668 if source_address is not None:
8959018a
AU
2669 # This is to workaround _create_connection() from socket where it will try all
2670 # address data from getaddrinfo() including IPv6. This filters the result from
2671 # getaddrinfo() based on the source_address value.
2672 # This is based on the cpython socket.create_connection() function.
2673 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2674 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2675 host, port = address
2676 err = None
2677 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
9e21e6d9
S
2678 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2679 ip_addrs = [addr for addr in addrs if addr[0] == af]
2680 if addrs and not ip_addrs:
2681 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2682 raise socket.error(
2683 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2684 % (ip_version, source_address[0]))
8959018a
AU
2685 for res in ip_addrs:
2686 af, socktype, proto, canonname, sa = res
2687 sock = None
2688 try:
2689 sock = socket.socket(af, socktype, proto)
2690 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2691 sock.settimeout(timeout)
2692 sock.bind(source_address)
2693 sock.connect(sa)
2694 err = None # Explicitly break reference cycle
2695 return sock
2696 except socket.error as _:
2697 err = _
2698 if sock is not None:
2699 sock.close()
2700 if err is not None:
2701 raise err
2702 else:
9e21e6d9
S
2703 raise socket.error('getaddrinfo returns an empty list')
2704 if hasattr(hc, '_create_connection'):
2705 hc._create_connection = _create_connection
be4a824d
PH
2706 sa = (source_address, 0)
2707 if hasattr(hc, 'source_address'): # Python 2.7+
2708 hc.source_address = sa
2709 else: # Python 2.6
2710 def _hc_connect(self, *args, **kwargs):
9e21e6d9 2711 sock = _create_connection(
be4a824d
PH
2712 (self.host, self.port), self.timeout, sa)
2713 if is_https:
d7932313
PH
2714 self.sock = ssl.wrap_socket(
2715 sock, self.key_file, self.cert_file,
2716 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
2717 else:
2718 self.sock = sock
2719 hc.connect = functools.partial(_hc_connect, hc)
2720
2721 return hc
2722
2723
87f0e62d 2724def handle_youtubedl_headers(headers):
992fc9d6
YCH
2725 filtered_headers = headers
2726
2727 if 'Youtubedl-no-compression' in filtered_headers:
2728 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 2729 del filtered_headers['Youtubedl-no-compression']
87f0e62d 2730
992fc9d6 2731 return filtered_headers
87f0e62d
YCH
2732
2733
acebc9cd 2734class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
2735 """Handler for HTTP requests and responses.
2736
2737 This class, when installed with an OpenerDirector, automatically adds
2738 the standard headers to every HTTP request and handles gzipped and
2739 deflated responses from web servers. If compression is to be avoided in
2740 a particular request, the original request in the program code only has
0424ec30 2741 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
2742 removed before making the real request.
2743
2744 Part of this code was copied from:
2745
2746 http://techknack.net/python-urllib2-handlers/
2747
2748 Andrew Rowls, the author of that code, agreed to release it to the
2749 public domain.
2750 """
2751
be4a824d
PH
2752 def __init__(self, params, *args, **kwargs):
2753 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2754 self._params = params
2755
2756 def http_open(self, req):
71aff188
YCH
2757 conn_class = compat_http_client.HTTPConnection
2758
2759 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2760 if socks_proxy:
2761 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2762 del req.headers['Ytdl-socks-proxy']
2763
be4a824d 2764 return self.do_open(functools.partial(
71aff188 2765 _create_http_connection, self, conn_class, False),
be4a824d
PH
2766 req)
2767
59ae15a5
PH
2768 @staticmethod
2769 def deflate(data):
fc2119f2 2770 if not data:
2771 return data
59ae15a5
PH
2772 try:
2773 return zlib.decompress(data, -zlib.MAX_WBITS)
2774 except zlib.error:
2775 return zlib.decompress(data)
2776
acebc9cd 2777 def http_request(self, req):
51f267d9
S
2778 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2779 # always respected by websites, some tend to give out URLs with non percent-encoded
2780 # non-ASCII characters (see telemb.py, ard.py [#3412])
2781 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2782 # To work around aforementioned issue we will replace request's original URL with
2783 # percent-encoded one
2784 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2785 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2786 url = req.get_full_url()
2787 url_escaped = escape_url(url)
2788
2789 # Substitute URL if any change after escaping
2790 if url != url_escaped:
15d260eb 2791 req = update_Request(req, url=url_escaped)
51f267d9 2792
33ac271b 2793 for h, v in std_headers.items():
3d5f7a39
JK
2794 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2795 # The dict keys are capitalized because of this bug by urllib
2796 if h.capitalize() not in req.headers:
33ac271b 2797 req.add_header(h, v)
87f0e62d
YCH
2798
2799 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
2800
2801 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2802 # Python 2.6 is brain-dead when it comes to fragments
2803 req._Request__original = req._Request__original.partition('#')[0]
2804 req._Request__r_type = req._Request__r_type.partition('#')[0]
2805
59ae15a5
PH
2806 return req
2807
acebc9cd 2808 def http_response(self, req, resp):
59ae15a5
PH
2809 old_resp = resp
2810 # gzip
2811 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
2812 content = resp.read()
2813 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2814 try:
2815 uncompressed = io.BytesIO(gz.read())
2816 except IOError as original_ioerror:
2817 # There may be junk add the end of the file
2818 # See http://stackoverflow.com/q/4928560/35070 for details
2819 for i in range(1, 1024):
2820 try:
2821 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2822 uncompressed = io.BytesIO(gz.read())
2823 except IOError:
2824 continue
2825 break
2826 else:
2827 raise original_ioerror
b407d853 2828 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2829 resp.msg = old_resp.msg
c047270c 2830 del resp.headers['Content-encoding']
59ae15a5
PH
2831 # deflate
2832 if resp.headers.get('Content-encoding', '') == 'deflate':
2833 gz = io.BytesIO(self.deflate(resp.read()))
b407d853 2834 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2835 resp.msg = old_resp.msg
c047270c 2836 del resp.headers['Content-encoding']
ad729172 2837 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
067aa17e 2838 # https://github.com/ytdl-org/youtube-dl/issues/6457).
5a4d9ddb
S
2839 if 300 <= resp.code < 400:
2840 location = resp.headers.get('Location')
2841 if location:
2842 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2843 if sys.version_info >= (3, 0):
2844 location = location.encode('iso-8859-1').decode('utf-8')
0ea59007
YCH
2845 else:
2846 location = location.decode('utf-8')
5a4d9ddb
S
2847 location_escaped = escape_url(location)
2848 if location != location_escaped:
2849 del resp.headers['Location']
9a4aec8b
YCH
2850 if sys.version_info < (3, 0):
2851 location_escaped = location_escaped.encode('utf-8')
5a4d9ddb 2852 resp.headers['Location'] = location_escaped
59ae15a5 2853 return resp
0f8d03f8 2854
acebc9cd
PH
2855 https_request = http_request
2856 https_response = http_response
bf50b038 2857
5de90176 2858
71aff188
YCH
2859def make_socks_conn_class(base_class, socks_proxy):
2860 assert issubclass(base_class, (
2861 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2862
2863 url_components = compat_urlparse.urlparse(socks_proxy)
2864 if url_components.scheme.lower() == 'socks5':
2865 socks_type = ProxyType.SOCKS5
2866 elif url_components.scheme.lower() in ('socks', 'socks4'):
2867 socks_type = ProxyType.SOCKS4
51fb4995
YCH
2868 elif url_components.scheme.lower() == 'socks4a':
2869 socks_type = ProxyType.SOCKS4A
71aff188 2870
cdd94c2e
YCH
2871 def unquote_if_non_empty(s):
2872 if not s:
2873 return s
2874 return compat_urllib_parse_unquote_plus(s)
2875
71aff188
YCH
2876 proxy_args = (
2877 socks_type,
2878 url_components.hostname, url_components.port or 1080,
2879 True, # Remote DNS
cdd94c2e
YCH
2880 unquote_if_non_empty(url_components.username),
2881 unquote_if_non_empty(url_components.password),
71aff188
YCH
2882 )
2883
2884 class SocksConnection(base_class):
2885 def connect(self):
2886 self.sock = sockssocket()
2887 self.sock.setproxy(*proxy_args)
2888 if type(self.timeout) in (int, float):
2889 self.sock.settimeout(self.timeout)
2890 self.sock.connect((self.host, self.port))
2891
2892 if isinstance(self, compat_http_client.HTTPSConnection):
2893 if hasattr(self, '_context'): # Python > 2.6
2894 self.sock = self._context.wrap_socket(
2895 self.sock, server_hostname=self.host)
2896 else:
2897 self.sock = ssl.wrap_socket(self.sock)
2898
2899 return SocksConnection
2900
2901
be4a824d
PH
2902class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2903 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2904 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2905 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2906 self._params = params
2907
2908 def https_open(self, req):
4f264c02 2909 kwargs = {}
71aff188
YCH
2910 conn_class = self._https_conn_class
2911
4f264c02
JMF
2912 if hasattr(self, '_context'): # python > 2.6
2913 kwargs['context'] = self._context
2914 if hasattr(self, '_check_hostname'): # python 3.x
2915 kwargs['check_hostname'] = self._check_hostname
71aff188
YCH
2916
2917 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2918 if socks_proxy:
2919 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2920 del req.headers['Ytdl-socks-proxy']
2921
be4a824d 2922 return self.do_open(functools.partial(
71aff188 2923 _create_http_connection, self, conn_class, True),
4f264c02 2924 req, **kwargs)
be4a824d
PH
2925
2926
1bab3437 2927class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
f1a8511f
S
2928 """
2929 See [1] for cookie file format.
2930
2931 1. https://curl.haxx.se/docs/http-cookies.html
2932 """
e7e62441 2933 _HTTPONLY_PREFIX = '#HttpOnly_'
c380cc28
S
2934 _ENTRY_LEN = 7
2935 _HEADER = '''# Netscape HTTP Cookie File
7a5c1cfe 2936# This file is generated by yt-dlp. Do not edit.
c380cc28
S
2937
2938'''
2939 _CookieFileEntry = collections.namedtuple(
2940 'CookieFileEntry',
2941 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
e7e62441 2942
1bab3437 2943 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
c380cc28
S
2944 """
2945 Save cookies to a file.
2946
2947 Most of the code is taken from CPython 3.8 and slightly adapted
2948 to support cookie files with UTF-8 in both python 2 and 3.
2949 """
2950 if filename is None:
2951 if self.filename is not None:
2952 filename = self.filename
2953 else:
2954 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2955
1bab3437
S
2956 # Store session cookies with `expires` set to 0 instead of an empty
2957 # string
2958 for cookie in self:
2959 if cookie.expires is None:
2960 cookie.expires = 0
c380cc28
S
2961
2962 with io.open(filename, 'w', encoding='utf-8') as f:
2963 f.write(self._HEADER)
2964 now = time.time()
2965 for cookie in self:
2966 if not ignore_discard and cookie.discard:
2967 continue
2968 if not ignore_expires and cookie.is_expired(now):
2969 continue
2970 if cookie.secure:
2971 secure = 'TRUE'
2972 else:
2973 secure = 'FALSE'
2974 if cookie.domain.startswith('.'):
2975 initial_dot = 'TRUE'
2976 else:
2977 initial_dot = 'FALSE'
2978 if cookie.expires is not None:
2979 expires = compat_str(cookie.expires)
2980 else:
2981 expires = ''
2982 if cookie.value is None:
2983 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2984 # with no name, whereas http.cookiejar regards it as a
2985 # cookie with no value.
2986 name = ''
2987 value = cookie.name
2988 else:
2989 name = cookie.name
2990 value = cookie.value
2991 f.write(
2992 '\t'.join([cookie.domain, initial_dot, cookie.path,
2993 secure, expires, name, value]) + '\n')
1bab3437
S
2994
2995 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
e7e62441 2996 """Load cookies from a file."""
2997 if filename is None:
2998 if self.filename is not None:
2999 filename = self.filename
3000 else:
3001 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
3002
c380cc28
S
3003 def prepare_line(line):
3004 if line.startswith(self._HTTPONLY_PREFIX):
3005 line = line[len(self._HTTPONLY_PREFIX):]
3006 # comments and empty lines are fine
3007 if line.startswith('#') or not line.strip():
3008 return line
3009 cookie_list = line.split('\t')
3010 if len(cookie_list) != self._ENTRY_LEN:
3011 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
3012 cookie = self._CookieFileEntry(*cookie_list)
3013 if cookie.expires_at and not cookie.expires_at.isdigit():
3014 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
3015 return line
3016
e7e62441 3017 cf = io.StringIO()
c380cc28 3018 with io.open(filename, encoding='utf-8') as f:
e7e62441 3019 for line in f:
c380cc28
S
3020 try:
3021 cf.write(prepare_line(line))
3022 except compat_cookiejar.LoadError as e:
3023 write_string(
3024 'WARNING: skipping cookie file entry due to %s: %r\n'
3025 % (e, line), sys.stderr)
3026 continue
e7e62441 3027 cf.seek(0)
3028 self._really_load(cf, filename, ignore_discard, ignore_expires)
1bab3437
S
3029 # Session cookies are denoted by either `expires` field set to
3030 # an empty string or 0. MozillaCookieJar only recognizes the former
3031 # (see [1]). So we need force the latter to be recognized as session
3032 # cookies on our own.
3033 # Session cookies may be important for cookies-based authentication,
3034 # e.g. usually, when user does not check 'Remember me' check box while
3035 # logging in on a site, some important cookies are stored as session
3036 # cookies so that not recognizing them will result in failed login.
3037 # 1. https://bugs.python.org/issue17164
3038 for cookie in self:
3039 # Treat `expires=0` cookies as session cookies
3040 if cookie.expires == 0:
3041 cookie.expires = None
3042 cookie.discard = True
3043
3044
a6420bf5
S
3045class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
3046 def __init__(self, cookiejar=None):
3047 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
3048
3049 def http_response(self, request, response):
3050 # Python 2 will choke on next HTTP request in row if there are non-ASCII
3051 # characters in Set-Cookie HTTP header of last response (see
067aa17e 3052 # https://github.com/ytdl-org/youtube-dl/issues/6769).
a6420bf5
S
3053 # In order to at least prevent crashing we will percent encode Set-Cookie
3054 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
3055 # if sys.version_info < (3, 0) and response.headers:
3056 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
3057 # set_cookie = response.headers.get(set_cookie_header)
3058 # if set_cookie:
3059 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
3060 # if set_cookie != set_cookie_escaped:
3061 # del response.headers[set_cookie_header]
3062 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
3063 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
3064
f5fa042c 3065 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
a6420bf5
S
3066 https_response = http_response
3067
3068
fca6dba8 3069class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
201c1459 3070 """YoutubeDL redirect handler
3071
3072 The code is based on HTTPRedirectHandler implementation from CPython [1].
3073
3074 This redirect handler solves two issues:
3075 - ensures redirect URL is always unicode under python 2
3076 - introduces support for experimental HTTP response status code
3077 308 Permanent Redirect [2] used by some sites [3]
3078
3079 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
3080 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
3081 3. https://github.com/ytdl-org/youtube-dl/issues/28768
3082 """
3083
3084 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
3085
3086 def redirect_request(self, req, fp, code, msg, headers, newurl):
3087 """Return a Request or None in response to a redirect.
3088
3089 This is called by the http_error_30x methods when a
3090 redirection response is received. If a redirection should
3091 take place, return a new Request to allow http_error_30x to
3092 perform the redirect. Otherwise, raise HTTPError if no-one
3093 else should try to handle this url. Return None if you can't
3094 but another Handler might.
3095 """
3096 m = req.get_method()
3097 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
3098 or code in (301, 302, 303) and m == "POST")):
3099 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
3100 # Strictly (according to RFC 2616), 301 or 302 in response to
3101 # a POST MUST NOT cause a redirection without confirmation
3102 # from the user (of urllib.request, in this case). In practice,
3103 # essentially all clients do redirect in this case, so we do
3104 # the same.
3105
3106 # On python 2 urlh.geturl() may sometimes return redirect URL
3107 # as byte string instead of unicode. This workaround allows
3108 # to force it always return unicode.
3109 if sys.version_info[0] < 3:
3110 newurl = compat_str(newurl)
3111
3112 # Be conciliant with URIs containing a space. This is mainly
3113 # redundant with the more complete encoding done in http_error_302(),
3114 # but it is kept for compatibility with other callers.
3115 newurl = newurl.replace(' ', '%20')
3116
3117 CONTENT_HEADERS = ("content-length", "content-type")
3118 # NB: don't use dict comprehension for python 2.6 compatibility
3119 newheaders = dict((k, v) for k, v in req.headers.items()
3120 if k.lower() not in CONTENT_HEADERS)
3121 return compat_urllib_request.Request(
3122 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3123 unverifiable=True)
fca6dba8
S
3124
3125
46f59e89
S
3126def extract_timezone(date_str):
3127 m = re.search(
f137e4c2 3128 r'''(?x)
3129 ^.{8,}? # >=8 char non-TZ prefix, if present
3130 (?P<tz>Z| # just the UTC Z, or
3131 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
3132 (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
3133 [ ]? # optional space
3134 (?P<sign>\+|-) # +/-
3135 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
3136 $)
3137 ''', date_str)
46f59e89
S
3138 if not m:
3139 timezone = datetime.timedelta()
3140 else:
3141 date_str = date_str[:-len(m.group('tz'))]
3142 if not m.group('sign'):
3143 timezone = datetime.timedelta()
3144 else:
3145 sign = 1 if m.group('sign') == '+' else -1
3146 timezone = datetime.timedelta(
3147 hours=sign * int(m.group('hours')),
3148 minutes=sign * int(m.group('minutes')))
3149 return timezone, date_str
3150
3151
08b38d54 3152def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
3153 """ Return a UNIX timestamp from the given date """
3154
3155 if date_str is None:
3156 return None
3157
52c3a6e4
S
3158 date_str = re.sub(r'\.[0-9]+', '', date_str)
3159
08b38d54 3160 if timezone is None:
46f59e89
S
3161 timezone, date_str = extract_timezone(date_str)
3162
52c3a6e4
S
3163 try:
3164 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3165 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3166 return calendar.timegm(dt.timetuple())
3167 except ValueError:
3168 pass
912b38b4
PH
3169
3170
46f59e89
S
3171def date_formats(day_first=True):
3172 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3173
3174
42bdd9d0 3175def unified_strdate(date_str, day_first=True):
bf50b038 3176 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
3177
3178 if date_str is None:
3179 return None
bf50b038 3180 upload_date = None
5f6a1245 3181 # Replace commas
026fcc04 3182 date_str = date_str.replace(',', ' ')
42bdd9d0 3183 # Remove AM/PM + timezone
9bb8e0a3 3184 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89 3185 _, date_str = extract_timezone(date_str)
42bdd9d0 3186
46f59e89 3187 for expression in date_formats(day_first):
bf50b038
JMF
3188 try:
3189 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 3190 except ValueError:
bf50b038 3191 pass
42393ce2
PH
3192 if upload_date is None:
3193 timetuple = email.utils.parsedate_tz(date_str)
3194 if timetuple:
c6b9cf05
S
3195 try:
3196 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3197 except ValueError:
3198 pass
6a750402
JMF
3199 if upload_date is not None:
3200 return compat_str(upload_date)
bf50b038 3201
5f6a1245 3202
46f59e89
S
3203def unified_timestamp(date_str, day_first=True):
3204 if date_str is None:
3205 return None
3206
2ae2ffda 3207 date_str = re.sub(r'[,|]', '', date_str)
46f59e89 3208
7dc2a74e 3209 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89
S
3210 timezone, date_str = extract_timezone(date_str)
3211
3212 # Remove AM/PM + timezone
3213 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3214
deef3195
S
3215 # Remove unrecognized timezones from ISO 8601 alike timestamps
3216 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3217 if m:
3218 date_str = date_str[:-len(m.group('tz'))]
3219
f226880c
PH
3220 # Python only supports microseconds, so remove nanoseconds
3221 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3222 if m:
3223 date_str = m.group(1)
3224
46f59e89
S
3225 for expression in date_formats(day_first):
3226 try:
7dc2a74e 3227 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
46f59e89
S
3228 return calendar.timegm(dt.timetuple())
3229 except ValueError:
3230 pass
3231 timetuple = email.utils.parsedate_tz(date_str)
3232 if timetuple:
7dc2a74e 3233 return calendar.timegm(timetuple) + pm_delta * 3600
46f59e89
S
3234
3235
28e614de 3236def determine_ext(url, default_ext='unknown_video'):
85750f89 3237 if url is None or '.' not in url:
f4776371 3238 return default_ext
9cb9a5df 3239 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
3240 if re.match(r'^[A-Za-z0-9]+$', guess):
3241 return guess
a7aaa398
S
3242 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3243 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 3244 return guess.rstrip('/')
73e79f2a 3245 else:
cbdbb766 3246 return default_ext
73e79f2a 3247
5f6a1245 3248
824fa511
S
3249def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3250 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
d4051a8e 3251
5f6a1245 3252
9e62f283 3253def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
37254abc
JMF
3254 """
3255 Return a datetime object from a string in the format YYYYMMDD or
9e62f283 3256 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3257
3258 format: string date format used to return datetime object from
3259 precision: round the time portion of a datetime object.
3260 auto|microsecond|second|minute|hour|day.
3261 auto: round to the unit provided in date_str (if applicable).
3262 """
3263 auto_precision = False
3264 if precision == 'auto':
3265 auto_precision = True
3266 precision = 'microsecond'
3267 today = datetime_round(datetime.datetime.now(), precision)
f8795e10 3268 if date_str in ('now', 'today'):
37254abc 3269 return today
f8795e10
PH
3270 if date_str == 'yesterday':
3271 return today - datetime.timedelta(days=1)
9e62f283 3272 match = re.match(
3273 r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3274 date_str)
37254abc 3275 if match is not None:
9e62f283 3276 start_time = datetime_from_str(match.group('start'), precision, format)
3277 time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
37254abc 3278 unit = match.group('unit')
9e62f283 3279 if unit == 'month' or unit == 'year':
3280 new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
37254abc 3281 unit = 'day'
9e62f283 3282 else:
3283 if unit == 'week':
3284 unit = 'day'
3285 time *= 7
3286 delta = datetime.timedelta(**{unit + 's': time})
3287 new_date = start_time + delta
3288 if auto_precision:
3289 return datetime_round(new_date, unit)
3290 return new_date
3291
3292 return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3293
3294
3295def date_from_str(date_str, format='%Y%m%d'):
3296 """
3297 Return a datetime object from a string in the format YYYYMMDD or
3298 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3299
3300 format: string date format used to return datetime object from
3301 """
3302 return datetime_from_str(date_str, precision='microsecond', format=format).date()
3303
3304
3305def datetime_add_months(dt, months):
3306 """Increment/Decrement a datetime object by months."""
3307 month = dt.month + months - 1
3308 year = dt.year + month // 12
3309 month = month % 12 + 1
3310 day = min(dt.day, calendar.monthrange(year, month)[1])
3311 return dt.replace(year, month, day)
3312
3313
3314def datetime_round(dt, precision='day'):
3315 """
3316 Round a datetime object's time to a specific precision
3317 """
3318 if precision == 'microsecond':
3319 return dt
3320
3321 unit_seconds = {
3322 'day': 86400,
3323 'hour': 3600,
3324 'minute': 60,
3325 'second': 1,
3326 }
3327 roundto = lambda x, n: ((x + n / 2) // n) * n
3328 timestamp = calendar.timegm(dt.timetuple())
3329 return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
5f6a1245
JW
3330
3331
e63fc1be 3332def hyphenate_date(date_str):
3333 """
3334 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3335 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3336 if match is not None:
3337 return '-'.join(match.groups())
3338 else:
3339 return date_str
3340
5f6a1245 3341
bd558525
JMF
3342class DateRange(object):
3343 """Represents a time interval between two dates"""
5f6a1245 3344
bd558525
JMF
3345 def __init__(self, start=None, end=None):
3346 """start and end must be strings in the format accepted by date"""
3347 if start is not None:
3348 self.start = date_from_str(start)
3349 else:
3350 self.start = datetime.datetime.min.date()
3351 if end is not None:
3352 self.end = date_from_str(end)
3353 else:
3354 self.end = datetime.datetime.max.date()
37254abc 3355 if self.start > self.end:
bd558525 3356 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 3357
bd558525
JMF
3358 @classmethod
3359 def day(cls, day):
3360 """Returns a range that only contains the given day"""
5f6a1245
JW
3361 return cls(day, day)
3362
bd558525
JMF
3363 def __contains__(self, date):
3364 """Check if the date is in the range"""
37254abc
JMF
3365 if not isinstance(date, datetime.date):
3366 date = date_from_str(date)
3367 return self.start <= date <= self.end
5f6a1245 3368
bd558525 3369 def __str__(self):
5f6a1245 3370 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
3371
3372
3373def platform_name():
3374 """ Returns the platform name as a compat_str """
3375 res = platform.platform()
3376 if isinstance(res, bytes):
3377 res = res.decode(preferredencoding())
3378
3379 assert isinstance(res, compat_str)
3380 return res
c257baff
PH
3381
3382
49fa4d9a
N
3383def get_windows_version():
3384 ''' Get Windows version. None if it's not running on Windows '''
3385 if compat_os_name == 'nt':
3386 return version_tuple(platform.win32_ver()[1])
3387 else:
3388 return None
3389
3390
b58ddb32
PH
3391def _windows_write_string(s, out):
3392 """ Returns True if the string was written using special methods,
3393 False if it has yet to be written out."""
3394 # Adapted from http://stackoverflow.com/a/3259271/35070
3395
3396 import ctypes
3397 import ctypes.wintypes
3398
3399 WIN_OUTPUT_IDS = {
3400 1: -11,
3401 2: -12,
3402 }
3403
a383a98a
PH
3404 try:
3405 fileno = out.fileno()
3406 except AttributeError:
3407 # If the output stream doesn't have a fileno, it's virtual
3408 return False
aa42e873
PH
3409 except io.UnsupportedOperation:
3410 # Some strange Windows pseudo files?
3411 return False
b58ddb32
PH
3412 if fileno not in WIN_OUTPUT_IDS:
3413 return False
3414
d7cd9a9e 3415 GetStdHandle = compat_ctypes_WINFUNCTYPE(
b58ddb32 3416 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
d7cd9a9e 3417 ('GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
3418 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3419
d7cd9a9e 3420 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3421 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3422 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
d7cd9a9e 3423 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
3424 written = ctypes.wintypes.DWORD(0)
3425
d7cd9a9e 3426 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
3427 FILE_TYPE_CHAR = 0x0002
3428 FILE_TYPE_REMOTE = 0x8000
d7cd9a9e 3429 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3430 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3431 ctypes.POINTER(ctypes.wintypes.DWORD))(
d7cd9a9e 3432 ('GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
3433 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3434
3435 def not_a_console(handle):
3436 if handle == INVALID_HANDLE_VALUE or handle is None:
3437 return True
3089bc74
S
3438 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3439 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
3440
3441 if not_a_console(h):
3442 return False
3443
d1b9c912
PH
3444 def next_nonbmp_pos(s):
3445 try:
3446 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3447 except StopIteration:
3448 return len(s)
3449
3450 while s:
3451 count = min(next_nonbmp_pos(s), 1024)
3452
b58ddb32 3453 ret = WriteConsoleW(
d1b9c912 3454 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
3455 if ret == 0:
3456 raise OSError('Failed to write string')
d1b9c912
PH
3457 if not count: # We just wrote a non-BMP character
3458 assert written.value == 2
3459 s = s[1:]
3460 else:
3461 assert written.value > 0
3462 s = s[written.value:]
b58ddb32
PH
3463 return True
3464
3465
734f90bb 3466def write_string(s, out=None, encoding=None):
7459e3a2
PH
3467 if out is None:
3468 out = sys.stderr
8bf48f23 3469 assert type(s) == compat_str
7459e3a2 3470
b58ddb32
PH
3471 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3472 if _windows_write_string(s, out):
3473 return
3474
3089bc74
S
3475 if ('b' in getattr(out, 'mode', '')
3476 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
3477 byt = s.encode(encoding or preferredencoding(), 'ignore')
3478 out.write(byt)
3479 elif hasattr(out, 'buffer'):
3480 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3481 byt = s.encode(enc, 'ignore')
3482 out.buffer.write(byt)
3483 else:
8bf48f23 3484 out.write(s)
7459e3a2
PH
3485 out.flush()
3486
3487
48ea9cea
PH
3488def bytes_to_intlist(bs):
3489 if not bs:
3490 return []
3491 if isinstance(bs[0], int): # Python 3
3492 return list(bs)
3493 else:
3494 return [ord(c) for c in bs]
3495
c257baff 3496
cba892fa 3497def intlist_to_bytes(xs):
3498 if not xs:
3499 return b''
edaa23f8 3500 return compat_struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
3501
3502
c1c9a79c
PH
3503# Cross-platform file locking
3504if sys.platform == 'win32':
3505 import ctypes.wintypes
3506 import msvcrt
3507
3508 class OVERLAPPED(ctypes.Structure):
3509 _fields_ = [
3510 ('Internal', ctypes.wintypes.LPVOID),
3511 ('InternalHigh', ctypes.wintypes.LPVOID),
3512 ('Offset', ctypes.wintypes.DWORD),
3513 ('OffsetHigh', ctypes.wintypes.DWORD),
3514 ('hEvent', ctypes.wintypes.HANDLE),
3515 ]
3516
3517 kernel32 = ctypes.windll.kernel32
3518 LockFileEx = kernel32.LockFileEx
3519 LockFileEx.argtypes = [
3520 ctypes.wintypes.HANDLE, # hFile
3521 ctypes.wintypes.DWORD, # dwFlags
3522 ctypes.wintypes.DWORD, # dwReserved
3523 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3524 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3525 ctypes.POINTER(OVERLAPPED) # Overlapped
3526 ]
3527 LockFileEx.restype = ctypes.wintypes.BOOL
3528 UnlockFileEx = kernel32.UnlockFileEx
3529 UnlockFileEx.argtypes = [
3530 ctypes.wintypes.HANDLE, # hFile
3531 ctypes.wintypes.DWORD, # dwReserved
3532 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3533 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3534 ctypes.POINTER(OVERLAPPED) # Overlapped
3535 ]
3536 UnlockFileEx.restype = ctypes.wintypes.BOOL
3537 whole_low = 0xffffffff
3538 whole_high = 0x7fffffff
3539
3540 def _lock_file(f, exclusive):
3541 overlapped = OVERLAPPED()
3542 overlapped.Offset = 0
3543 overlapped.OffsetHigh = 0
3544 overlapped.hEvent = 0
3545 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3546 handle = msvcrt.get_osfhandle(f.fileno())
3547 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3548 whole_low, whole_high, f._lock_file_overlapped_p):
3549 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3550
3551 def _unlock_file(f):
3552 assert f._lock_file_overlapped_p
3553 handle = msvcrt.get_osfhandle(f.fileno())
3554 if not UnlockFileEx(handle, 0,
3555 whole_low, whole_high, f._lock_file_overlapped_p):
3556 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3557
3558else:
399a76e6
YCH
3559 # Some platforms, such as Jython, is missing fcntl
3560 try:
3561 import fcntl
c1c9a79c 3562
399a76e6
YCH
3563 def _lock_file(f, exclusive):
3564 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 3565
399a76e6
YCH
3566 def _unlock_file(f):
3567 fcntl.flock(f, fcntl.LOCK_UN)
3568 except ImportError:
3569 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3570
3571 def _lock_file(f, exclusive):
3572 raise IOError(UNSUPPORTED_MSG)
3573
3574 def _unlock_file(f):
3575 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
3576
3577
3578class locked_file(object):
3579 def __init__(self, filename, mode, encoding=None):
3580 assert mode in ['r', 'a', 'w']
3581 self.f = io.open(filename, mode, encoding=encoding)
3582 self.mode = mode
3583
3584 def __enter__(self):
3585 exclusive = self.mode != 'r'
3586 try:
3587 _lock_file(self.f, exclusive)
3588 except IOError:
3589 self.f.close()
3590 raise
3591 return self
3592
3593 def __exit__(self, etype, value, traceback):
3594 try:
3595 _unlock_file(self.f)
3596 finally:
3597 self.f.close()
3598
3599 def __iter__(self):
3600 return iter(self.f)
3601
3602 def write(self, *args):
3603 return self.f.write(*args)
3604
3605 def read(self, *args):
3606 return self.f.read(*args)
4eb7f1d1
JMF
3607
3608
4644ac55
S
3609def get_filesystem_encoding():
3610 encoding = sys.getfilesystemencoding()
3611 return encoding if encoding is not None else 'utf-8'
3612
3613
4eb7f1d1 3614def shell_quote(args):
a6a173c2 3615 quoted_args = []
4644ac55 3616 encoding = get_filesystem_encoding()
a6a173c2
JMF
3617 for a in args:
3618 if isinstance(a, bytes):
3619 # We may get a filename encoded with 'encodeFilename'
3620 a = a.decode(encoding)
aefce8e6 3621 quoted_args.append(compat_shlex_quote(a))
28e614de 3622 return ' '.join(quoted_args)
9d4660ca
PH
3623
3624
3625def smuggle_url(url, data):
3626 """ Pass additional data in a URL for internal use. """
3627
81953d1a
RA
3628 url, idata = unsmuggle_url(url, {})
3629 data.update(idata)
15707c7e 3630 sdata = compat_urllib_parse_urlencode(
28e614de
PH
3631 {'__youtubedl_smuggle': json.dumps(data)})
3632 return url + '#' + sdata
9d4660ca
PH
3633
3634
79f82953 3635def unsmuggle_url(smug_url, default=None):
83e865a3 3636 if '#__youtubedl_smuggle' not in smug_url:
79f82953 3637 return smug_url, default
28e614de
PH
3638 url, _, sdata = smug_url.rpartition('#')
3639 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
3640 data = json.loads(jsond)
3641 return url, data
02dbf93f
PH
3642
3643
02dbf93f
PH
3644def format_bytes(bytes):
3645 if bytes is None:
28e614de 3646 return 'N/A'
02dbf93f
PH
3647 if type(bytes) is str:
3648 bytes = float(bytes)
3649 if bytes == 0.0:
3650 exponent = 0
3651 else:
3652 exponent = int(math.log(bytes, 1024.0))
28e614de 3653 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 3654 converted = float(bytes) / float(1024 ** exponent)
28e614de 3655 return '%.2f%s' % (converted, suffix)
f53c966a 3656
1c088fa8 3657
fb47597b
S
3658def lookup_unit_table(unit_table, s):
3659 units_re = '|'.join(re.escape(u) for u in unit_table)
3660 m = re.match(
782b1b5b 3661 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
fb47597b
S
3662 if not m:
3663 return None
3664 num_str = m.group('num').replace(',', '.')
3665 mult = unit_table[m.group('unit')]
3666 return int(float(num_str) * mult)
3667
3668
be64b5b0
PH
3669def parse_filesize(s):
3670 if s is None:
3671 return None
3672
dfb1b146 3673 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
3674 # but we support those too
3675 _UNIT_TABLE = {
3676 'B': 1,
3677 'b': 1,
70852b47 3678 'bytes': 1,
be64b5b0
PH
3679 'KiB': 1024,
3680 'KB': 1000,
3681 'kB': 1024,
3682 'Kb': 1000,
13585d76 3683 'kb': 1000,
70852b47
YCH
3684 'kilobytes': 1000,
3685 'kibibytes': 1024,
be64b5b0
PH
3686 'MiB': 1024 ** 2,
3687 'MB': 1000 ** 2,
3688 'mB': 1024 ** 2,
3689 'Mb': 1000 ** 2,
13585d76 3690 'mb': 1000 ** 2,
70852b47
YCH
3691 'megabytes': 1000 ** 2,
3692 'mebibytes': 1024 ** 2,
be64b5b0
PH
3693 'GiB': 1024 ** 3,
3694 'GB': 1000 ** 3,
3695 'gB': 1024 ** 3,
3696 'Gb': 1000 ** 3,
13585d76 3697 'gb': 1000 ** 3,
70852b47
YCH
3698 'gigabytes': 1000 ** 3,
3699 'gibibytes': 1024 ** 3,
be64b5b0
PH
3700 'TiB': 1024 ** 4,
3701 'TB': 1000 ** 4,
3702 'tB': 1024 ** 4,
3703 'Tb': 1000 ** 4,
13585d76 3704 'tb': 1000 ** 4,
70852b47
YCH
3705 'terabytes': 1000 ** 4,
3706 'tebibytes': 1024 ** 4,
be64b5b0
PH
3707 'PiB': 1024 ** 5,
3708 'PB': 1000 ** 5,
3709 'pB': 1024 ** 5,
3710 'Pb': 1000 ** 5,
13585d76 3711 'pb': 1000 ** 5,
70852b47
YCH
3712 'petabytes': 1000 ** 5,
3713 'pebibytes': 1024 ** 5,
be64b5b0
PH
3714 'EiB': 1024 ** 6,
3715 'EB': 1000 ** 6,
3716 'eB': 1024 ** 6,
3717 'Eb': 1000 ** 6,
13585d76 3718 'eb': 1000 ** 6,
70852b47
YCH
3719 'exabytes': 1000 ** 6,
3720 'exbibytes': 1024 ** 6,
be64b5b0
PH
3721 'ZiB': 1024 ** 7,
3722 'ZB': 1000 ** 7,
3723 'zB': 1024 ** 7,
3724 'Zb': 1000 ** 7,
13585d76 3725 'zb': 1000 ** 7,
70852b47
YCH
3726 'zettabytes': 1000 ** 7,
3727 'zebibytes': 1024 ** 7,
be64b5b0
PH
3728 'YiB': 1024 ** 8,
3729 'YB': 1000 ** 8,
3730 'yB': 1024 ** 8,
3731 'Yb': 1000 ** 8,
13585d76 3732 'yb': 1000 ** 8,
70852b47
YCH
3733 'yottabytes': 1000 ** 8,
3734 'yobibytes': 1024 ** 8,
be64b5b0
PH
3735 }
3736
fb47597b
S
3737 return lookup_unit_table(_UNIT_TABLE, s)
3738
3739
3740def parse_count(s):
3741 if s is None:
be64b5b0
PH
3742 return None
3743
fb47597b
S
3744 s = s.strip()
3745
3746 if re.match(r'^[\d,.]+$', s):
3747 return str_to_int(s)
3748
3749 _UNIT_TABLE = {
3750 'k': 1000,
3751 'K': 1000,
3752 'm': 1000 ** 2,
3753 'M': 1000 ** 2,
3754 'kk': 1000 ** 2,
3755 'KK': 1000 ** 2,
3756 }
be64b5b0 3757
fb47597b 3758 return lookup_unit_table(_UNIT_TABLE, s)
be64b5b0 3759
2f7ae819 3760
b871d7e9
S
3761def parse_resolution(s):
3762 if s is None:
3763 return {}
3764
17ec8bcf 3765 mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
b871d7e9
S
3766 if mobj:
3767 return {
3768 'width': int(mobj.group('w')),
3769 'height': int(mobj.group('h')),
3770 }
3771
17ec8bcf 3772 mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
b871d7e9
S
3773 if mobj:
3774 return {'height': int(mobj.group(1))}
3775
3776 mobj = re.search(r'\b([48])[kK]\b', s)
3777 if mobj:
3778 return {'height': int(mobj.group(1)) * 540}
3779
3780 return {}
3781
3782
0dc41787
S
3783def parse_bitrate(s):
3784 if not isinstance(s, compat_str):
3785 return
3786 mobj = re.search(r'\b(\d+)\s*kbps', s)
3787 if mobj:
3788 return int(mobj.group(1))
3789
3790
a942d6cb 3791def month_by_name(name, lang='en'):
caefb1de
PH
3792 """ Return the number of a month by (locale-independently) English name """
3793
f6717dec 3794 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb 3795
caefb1de 3796 try:
f6717dec 3797 return month_names.index(name) + 1
7105440c
YCH
3798 except ValueError:
3799 return None
3800
3801
3802def month_by_abbreviation(abbrev):
3803 """ Return the number of a month by (locale-independently) English
3804 abbreviations """
3805
3806 try:
3807 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
3808 except ValueError:
3809 return None
18258362
JMF
3810
3811
5aafe895 3812def fix_xml_ampersands(xml_str):
18258362 3813 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
3814 return re.sub(
3815 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 3816 '&amp;',
5aafe895 3817 xml_str)
e3946f98
PH
3818
3819
3820def setproctitle(title):
8bf48f23 3821 assert isinstance(title, compat_str)
c1c05c67
YCH
3822
3823 # ctypes in Jython is not complete
3824 # http://bugs.jython.org/issue2148
3825 if sys.platform.startswith('java'):
3826 return
3827
e3946f98 3828 try:
611c1dd9 3829 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
3830 except OSError:
3831 return
2f49bcd6
RC
3832 except TypeError:
3833 # LoadLibrary in Windows Python 2.7.13 only expects
3834 # a bytestring, but since unicode_literals turns
3835 # every string into a unicode string, it fails.
3836 return
6eefe533
PH
3837 title_bytes = title.encode('utf-8')
3838 buf = ctypes.create_string_buffer(len(title_bytes))
3839 buf.value = title_bytes
e3946f98 3840 try:
6eefe533 3841 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
3842 except AttributeError:
3843 return # Strange libc, just skip this
d7dda168
PH
3844
3845
3846def remove_start(s, start):
46bc9b7d 3847 return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174
PH
3848
3849
2b9faf55 3850def remove_end(s, end):
46bc9b7d 3851 return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55
PH
3852
3853
31b2051e
S
3854def remove_quotes(s):
3855 if s is None or len(s) < 2:
3856 return s
3857 for quote in ('"', "'", ):
3858 if s[0] == quote and s[-1] == quote:
3859 return s[1:-1]
3860 return s
3861
3862
b6e0c7d2
U
3863def get_domain(url):
3864 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3865 return domain.group('domain') if domain else None
3866
3867
29eb5174 3868def url_basename(url):
9b8aaeed 3869 path = compat_urlparse.urlparse(url).path
28e614de 3870 return path.strip('/').split('/')[-1]
aa94a6d3
PH
3871
3872
02dc0a36
S
3873def base_url(url):
3874 return re.match(r'https?://[^?#&]+/', url).group()
3875
3876
e34c3361 3877def urljoin(base, path):
4b5de77b
S
3878 if isinstance(path, bytes):
3879 path = path.decode('utf-8')
e34c3361
S
3880 if not isinstance(path, compat_str) or not path:
3881 return None
fad4ceb5 3882 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
e34c3361 3883 return path
4b5de77b
S
3884 if isinstance(base, bytes):
3885 base = base.decode('utf-8')
3886 if not isinstance(base, compat_str) or not re.match(
3887 r'^(?:https?:)?//', base):
e34c3361
S
3888 return None
3889 return compat_urlparse.urljoin(base, path)
3890
3891
aa94a6d3
PH
3892class HEADRequest(compat_urllib_request.Request):
3893 def get_method(self):
611c1dd9 3894 return 'HEAD'
7217e148
PH
3895
3896
95cf60e8
S
3897class PUTRequest(compat_urllib_request.Request):
3898 def get_method(self):
3899 return 'PUT'
3900
3901
9732d77e 3902def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
3903 if get_attr:
3904 if v is not None:
3905 v = getattr(v, get_attr, None)
9572013d
PH
3906 if v == '':
3907 v = None
1812afb7
S
3908 if v is None:
3909 return default
3910 try:
3911 return int(v) * invscale // scale
31c49255 3912 except (ValueError, TypeError, OverflowError):
af98f8ff 3913 return default
9732d77e 3914
9572013d 3915
40a90862
JMF
3916def str_or_none(v, default=None):
3917 return default if v is None else compat_str(v)
3918
9732d77e
PH
3919
3920def str_to_int(int_str):
48d4681e 3921 """ A more relaxed version of int_or_none """
42db58ec 3922 if isinstance(int_str, compat_integer_types):
348c6bf1 3923 return int_str
42db58ec
S
3924 elif isinstance(int_str, compat_str):
3925 int_str = re.sub(r'[,\.\+]', '', int_str)
3926 return int_or_none(int_str)
608d11f5
PH
3927
3928
9732d77e 3929def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
3930 if v is None:
3931 return default
3932 try:
3933 return float(v) * invscale / scale
5e1271c5 3934 except (ValueError, TypeError):
caf80631 3935 return default
43f775e4
PH
3936
3937
c7e327c4
S
3938def bool_or_none(v, default=None):
3939 return v if isinstance(v, bool) else default
3940
3941
53cd37ba
S
3942def strip_or_none(v, default=None):
3943 return v.strip() if isinstance(v, compat_str) else default
b72b4431
S
3944
3945
af03000a
S
3946def url_or_none(url):
3947 if not url or not isinstance(url, compat_str):
3948 return None
3949 url = url.strip()
29f7c58a 3950 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
af03000a
S
3951
3952
e29663c6 3953def strftime_or_none(timestamp, date_format, default=None):
3954 datetime_object = None
3955 try:
3956 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3957 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3958 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3959 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3960 return datetime_object.strftime(date_format)
3961 except (ValueError, TypeError, AttributeError):
3962 return default
3963
3964
608d11f5 3965def parse_duration(s):
8f9312c3 3966 if not isinstance(s, compat_basestring):
608d11f5
PH
3967 return None
3968
ca7b3246
S
3969 s = s.strip()
3970
acaff495 3971 days, hours, mins, secs, ms = [None] * 5
15846398 3972 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
acaff495 3973 if m:
3974 days, hours, mins, secs, ms = m.groups()
3975 else:
3976 m = re.match(
056653bb
S
3977 r'''(?ix)(?:P?
3978 (?:
3979 [0-9]+\s*y(?:ears?)?\s*
3980 )?
3981 (?:
3982 [0-9]+\s*m(?:onths?)?\s*
3983 )?
3984 (?:
3985 [0-9]+\s*w(?:eeks?)?\s*
3986 )?
8f4b58d7 3987 (?:
acaff495 3988 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
8f4b58d7 3989 )?
056653bb 3990 T)?
acaff495 3991 (?:
3992 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3993 )?
3994 (?:
3995 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3996 )?
3997 (?:
3998 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
15846398 3999 )?Z?$''', s)
acaff495 4000 if m:
4001 days, hours, mins, secs, ms = m.groups()
4002 else:
15846398 4003 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
acaff495 4004 if m:
4005 hours, mins = m.groups()
4006 else:
4007 return None
4008
4009 duration = 0
4010 if secs:
4011 duration += float(secs)
4012 if mins:
4013 duration += float(mins) * 60
4014 if hours:
4015 duration += float(hours) * 60 * 60
4016 if days:
4017 duration += float(days) * 24 * 60 * 60
4018 if ms:
4019 duration += float(ms)
4020 return duration
91d7d0b3
JMF
4021
4022
e65e4c88 4023def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 4024 name, real_ext = os.path.splitext(filename)
e65e4c88
S
4025 return (
4026 '{0}.{1}{2}'.format(name, ext, real_ext)
4027 if not expected_real_ext or real_ext[1:] == expected_real_ext
4028 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
4029
4030
b3ed15b7
S
4031def replace_extension(filename, ext, expected_real_ext=None):
4032 name, real_ext = os.path.splitext(filename)
4033 return '{0}.{1}'.format(
4034 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
4035 ext)
4036
4037
d70ad093
PH
4038def check_executable(exe, args=[]):
4039 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
4040 args can be a list of arguments for a short output (like -version) """
4041 try:
d3c93ec2 4042 Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate_or_kill()
d70ad093
PH
4043 except OSError:
4044 return False
4045 return exe
b7ab0590
PH
4046
4047
9af98e17 4048def _get_exe_version_output(exe, args):
95807118 4049 try:
b64d04c1 4050 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
7a5c1cfe 4051 # SIGTTOU if yt-dlp is run in the background.
067aa17e 4052 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
d3c93ec2 4053 out, _ = Popen(
4054 [encodeArgument(exe)] + args, stdin=subprocess.PIPE,
4055 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate_or_kill()
95807118
PH
4056 except OSError:
4057 return False
cae97f65
PH
4058 if isinstance(out, bytes): # Python 2.x
4059 out = out.decode('ascii', 'ignore')
9af98e17 4060 return out
cae97f65
PH
4061
4062
4063def detect_exe_version(output, version_re=None, unrecognized='present'):
4064 assert isinstance(output, compat_str)
4065 if version_re is None:
4066 version_re = r'version\s+([-0-9._a-zA-Z]+)'
4067 m = re.search(version_re, output)
95807118
PH
4068 if m:
4069 return m.group(1)
4070 else:
4071 return unrecognized
4072
4073
9af98e17 4074def get_exe_version(exe, args=['--version'],
4075 version_re=None, unrecognized='present'):
4076 """ Returns the version of the specified executable,
4077 or False if the executable is not present """
4078 out = _get_exe_version_output(exe, args)
4079 return detect_exe_version(out, version_re, unrecognized) if out else False
4080
4081
cb89cfc1 4082class LazyList(collections.abc.Sequence):
483336e7 4083 ''' Lazy immutable list from an iterable
4084 Note that slices of a LazyList are lists and not LazyList'''
4085
8e5fecc8 4086 class IndexError(IndexError):
4087 pass
4088
483336e7 4089 def __init__(self, iterable):
4090 self.__iterable = iter(iterable)
4091 self.__cache = []
28419ca2 4092 self.__reversed = False
483336e7 4093
4094 def __iter__(self):
28419ca2 4095 if self.__reversed:
4096 # We need to consume the entire iterable to iterate in reverse
981052c9 4097 yield from self.exhaust()
28419ca2 4098 return
4099 yield from self.__cache
483336e7 4100 for item in self.__iterable:
4101 self.__cache.append(item)
4102 yield item
4103
981052c9 4104 def __exhaust(self):
483336e7 4105 self.__cache.extend(self.__iterable)
9f1a1c36 4106 # Discard the emptied iterable to make it pickle-able
4107 self.__iterable = []
28419ca2 4108 return self.__cache
4109
981052c9 4110 def exhaust(self):
4111 ''' Evaluate the entire iterable '''
4112 return self.__exhaust()[::-1 if self.__reversed else 1]
4113
28419ca2 4114 @staticmethod
981052c9 4115 def __reverse_index(x):
e0f2b4b4 4116 return None if x is None else -(x + 1)
483336e7 4117
4118 def __getitem__(self, idx):
4119 if isinstance(idx, slice):
28419ca2 4120 if self.__reversed:
e0f2b4b4 4121 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
4122 start, stop, step = idx.start, idx.stop, idx.step or 1
483336e7 4123 elif isinstance(idx, int):
28419ca2 4124 if self.__reversed:
981052c9 4125 idx = self.__reverse_index(idx)
e0f2b4b4 4126 start, stop, step = idx, idx, 0
483336e7 4127 else:
4128 raise TypeError('indices must be integers or slices')
e0f2b4b4 4129 if ((start or 0) < 0 or (stop or 0) < 0
4130 or (start is None and step < 0)
4131 or (stop is None and step > 0)):
483336e7 4132 # We need to consume the entire iterable to be able to slice from the end
4133 # Obviously, never use this with infinite iterables
8e5fecc8 4134 self.__exhaust()
4135 try:
4136 return self.__cache[idx]
4137 except IndexError as e:
4138 raise self.IndexError(e) from e
e0f2b4b4 4139 n = max(start or 0, stop or 0) - len(self.__cache) + 1
28419ca2 4140 if n > 0:
4141 self.__cache.extend(itertools.islice(self.__iterable, n))
8e5fecc8 4142 try:
4143 return self.__cache[idx]
4144 except IndexError as e:
4145 raise self.IndexError(e) from e
483336e7 4146
4147 def __bool__(self):
4148 try:
28419ca2 4149 self[-1] if self.__reversed else self[0]
8e5fecc8 4150 except self.IndexError:
483336e7 4151 return False
4152 return True
4153
4154 def __len__(self):
8e5fecc8 4155 self.__exhaust()
483336e7 4156 return len(self.__cache)
4157
981052c9 4158 def reverse(self):
28419ca2 4159 self.__reversed = not self.__reversed
4160 return self
4161
4162 def __repr__(self):
4163 # repr and str should mimic a list. So we exhaust the iterable
4164 return repr(self.exhaust())
4165
4166 def __str__(self):
4167 return repr(self.exhaust())
4168
483336e7 4169
7be9ccff 4170class PagedList:
dd26ced1
PH
4171 def __len__(self):
4172 # This is only useful for tests
4173 return len(self.getslice())
4174
7be9ccff 4175 def __init__(self, pagefunc, pagesize, use_cache=True):
4176 self._pagefunc = pagefunc
4177 self._pagesize = pagesize
4178 self._use_cache = use_cache
4179 self._cache = {}
4180
4181 def getpage(self, pagenum):
d8cf8d97 4182 page_results = self._cache.get(pagenum)
4183 if page_results is None:
4184 page_results = list(self._pagefunc(pagenum))
7be9ccff 4185 if self._use_cache:
4186 self._cache[pagenum] = page_results
4187 return page_results
4188
4189 def getslice(self, start=0, end=None):
4190 return list(self._getslice(start, end))
4191
4192 def _getslice(self, start, end):
55575225 4193 raise NotImplementedError('This method must be implemented by subclasses')
4194
4195 def __getitem__(self, idx):
7be9ccff 4196 # NOTE: cache must be enabled if this is used
55575225 4197 if not isinstance(idx, int) or idx < 0:
4198 raise TypeError('indices must be non-negative integers')
4199 entries = self.getslice(idx, idx + 1)
d8cf8d97 4200 if not entries:
4201 raise IndexError()
4202 return entries[0]
55575225 4203
9c44d242
PH
4204
4205class OnDemandPagedList(PagedList):
7be9ccff 4206 def _getslice(self, start, end):
b7ab0590
PH
4207 for pagenum in itertools.count(start // self._pagesize):
4208 firstid = pagenum * self._pagesize
4209 nextfirstid = pagenum * self._pagesize + self._pagesize
4210 if start >= nextfirstid:
4211 continue
4212
b7ab0590
PH
4213 startv = (
4214 start % self._pagesize
4215 if firstid <= start < nextfirstid
4216 else 0)
b7ab0590
PH
4217 endv = (
4218 ((end - 1) % self._pagesize) + 1
4219 if (end is not None and firstid <= end <= nextfirstid)
4220 else None)
4221
7be9ccff 4222 page_results = self.getpage(pagenum)
b7ab0590
PH
4223 if startv != 0 or endv is not None:
4224 page_results = page_results[startv:endv]
7be9ccff 4225 yield from page_results
b7ab0590
PH
4226
4227 # A little optimization - if current page is not "full", ie. does
4228 # not contain page_size videos then we can assume that this page
4229 # is the last one - there are no more ids on further pages -
4230 # i.e. no need to query again.
4231 if len(page_results) + startv < self._pagesize:
4232 break
4233
4234 # If we got the whole page, but the next page is not interesting,
4235 # break out early as well
4236 if end == nextfirstid:
4237 break
81c2f20b
PH
4238
4239
9c44d242
PH
4240class InAdvancePagedList(PagedList):
4241 def __init__(self, pagefunc, pagecount, pagesize):
9c44d242 4242 self._pagecount = pagecount
7be9ccff 4243 PagedList.__init__(self, pagefunc, pagesize, True)
9c44d242 4244
7be9ccff 4245 def _getslice(self, start, end):
9c44d242
PH
4246 start_page = start // self._pagesize
4247 end_page = (
4248 self._pagecount if end is None else (end // self._pagesize + 1))
4249 skip_elems = start - start_page * self._pagesize
4250 only_more = None if end is None else end - start
4251 for pagenum in range(start_page, end_page):
7be9ccff 4252 page_results = self.getpage(pagenum)
9c44d242 4253 if skip_elems:
7be9ccff 4254 page_results = page_results[skip_elems:]
9c44d242
PH
4255 skip_elems = None
4256 if only_more is not None:
7be9ccff 4257 if len(page_results) < only_more:
4258 only_more -= len(page_results)
9c44d242 4259 else:
7be9ccff 4260 yield from page_results[:only_more]
9c44d242 4261 break
7be9ccff 4262 yield from page_results
9c44d242
PH
4263
4264
81c2f20b 4265def uppercase_escape(s):
676eb3f2 4266 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 4267 return re.sub(
a612753d 4268 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
4269 lambda m: unicode_escape(m.group(0))[0],
4270 s)
0fe2ff78
YCH
4271
4272
4273def lowercase_escape(s):
4274 unicode_escape = codecs.getdecoder('unicode_escape')
4275 return re.sub(
4276 r'\\u[0-9a-fA-F]{4}',
4277 lambda m: unicode_escape(m.group(0))[0],
4278 s)
b53466e1 4279
d05cfe06
S
4280
4281def escape_rfc3986(s):
4282 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 4283 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 4284 s = s.encode('utf-8')
ecc0c5ee 4285 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
4286
4287
4288def escape_url(url):
4289 """Escape URL as suggested by RFC 3986"""
4290 url_parsed = compat_urllib_parse_urlparse(url)
4291 return url_parsed._replace(
efbed08d 4292 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
d05cfe06
S
4293 path=escape_rfc3986(url_parsed.path),
4294 params=escape_rfc3986(url_parsed.params),
4295 query=escape_rfc3986(url_parsed.query),
4296 fragment=escape_rfc3986(url_parsed.fragment)
4297 ).geturl()
4298
62e609ab 4299
4dfbf869 4300def parse_qs(url):
4301 return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4302
4303
62e609ab
PH
4304def read_batch_urls(batch_fd):
4305 def fixup(url):
4306 if not isinstance(url, compat_str):
4307 url = url.decode('utf-8', 'replace')
8c04f0be 4308 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4309 for bom in BOM_UTF8:
4310 if url.startswith(bom):
4311 url = url[len(bom):]
4312 url = url.lstrip()
4313 if not url or url.startswith(('#', ';', ']')):
62e609ab 4314 return False
8c04f0be 4315 # "#" cannot be stripped out since it is part of the URI
4316 # However, it can be safely stipped out if follwing a whitespace
4317 return re.split(r'\s#', url, 1)[0].rstrip()
62e609ab
PH
4318
4319 with contextlib.closing(batch_fd) as fd:
4320 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
4321
4322
4323def urlencode_postdata(*args, **kargs):
15707c7e 4324 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
4325
4326
38f9ef31 4327def update_url_query(url, query):
cacd9966
YCH
4328 if not query:
4329 return url
38f9ef31 4330 parsed_url = compat_urlparse.urlparse(url)
4331 qs = compat_parse_qs(parsed_url.query)
4332 qs.update(query)
4333 return compat_urlparse.urlunparse(parsed_url._replace(
15707c7e 4334 query=compat_urllib_parse_urlencode(qs, True)))
16392824 4335
8e60dc75 4336
ed0291d1
S
4337def update_Request(req, url=None, data=None, headers={}, query={}):
4338 req_headers = req.headers.copy()
4339 req_headers.update(headers)
4340 req_data = data or req.data
4341 req_url = update_url_query(url or req.get_full_url(), query)
95cf60e8
S
4342 req_get_method = req.get_method()
4343 if req_get_method == 'HEAD':
4344 req_type = HEADRequest
4345 elif req_get_method == 'PUT':
4346 req_type = PUTRequest
4347 else:
4348 req_type = compat_urllib_request.Request
ed0291d1
S
4349 new_req = req_type(
4350 req_url, data=req_data, headers=req_headers,
4351 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4352 if hasattr(req, 'timeout'):
4353 new_req.timeout = req.timeout
4354 return new_req
4355
4356
10c87c15 4357def _multipart_encode_impl(data, boundary):
0c265486
YCH
4358 content_type = 'multipart/form-data; boundary=%s' % boundary
4359
4360 out = b''
4361 for k, v in data.items():
4362 out += b'--' + boundary.encode('ascii') + b'\r\n'
4363 if isinstance(k, compat_str):
4364 k = k.encode('utf-8')
4365 if isinstance(v, compat_str):
4366 v = v.encode('utf-8')
4367 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4368 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
b2ad479d 4369 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
0c265486
YCH
4370 if boundary.encode('ascii') in content:
4371 raise ValueError('Boundary overlaps with data')
4372 out += content
4373
4374 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4375
4376 return out, content_type
4377
4378
4379def multipart_encode(data, boundary=None):
4380 '''
4381 Encode a dict to RFC 7578-compliant form-data
4382
4383 data:
4384 A dict where keys and values can be either Unicode or bytes-like
4385 objects.
4386 boundary:
4387 If specified a Unicode object, it's used as the boundary. Otherwise
4388 a random boundary is generated.
4389
4390 Reference: https://tools.ietf.org/html/rfc7578
4391 '''
4392 has_specified_boundary = boundary is not None
4393
4394 while True:
4395 if boundary is None:
4396 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4397
4398 try:
10c87c15 4399 out, content_type = _multipart_encode_impl(data, boundary)
0c265486
YCH
4400 break
4401 except ValueError:
4402 if has_specified_boundary:
4403 raise
4404 boundary = None
4405
4406 return out, content_type
4407
4408
86296ad2 4409def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
4410 if isinstance(key_or_keys, (list, tuple)):
4411 for key in key_or_keys:
86296ad2
S
4412 if key not in d or d[key] is None or skip_false_values and not d[key]:
4413 continue
4414 return d[key]
cbecc9b9
S
4415 return default
4416 return d.get(key_or_keys, default)
4417
4418
329ca3be 4419def try_get(src, getter, expected_type=None):
6606817a 4420 for get in variadic(getter):
a32a9a7e
S
4421 try:
4422 v = get(src)
4423 except (AttributeError, KeyError, TypeError, IndexError):
4424 pass
4425 else:
4426 if expected_type is None or isinstance(v, expected_type):
4427 return v
329ca3be
S
4428
4429
6cc62232
S
4430def merge_dicts(*dicts):
4431 merged = {}
4432 for a_dict in dicts:
4433 for k, v in a_dict.items():
4434 if v is None:
4435 continue
3089bc74
S
4436 if (k not in merged
4437 or (isinstance(v, compat_str) and v
4438 and isinstance(merged[k], compat_str)
4439 and not merged[k])):
6cc62232
S
4440 merged[k] = v
4441 return merged
4442
4443
8e60dc75
S
4444def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4445 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4446
16392824 4447
a1a530b0
PH
4448US_RATINGS = {
4449 'G': 0,
4450 'PG': 10,
4451 'PG-13': 13,
4452 'R': 16,
4453 'NC': 18,
4454}
fac55558
PH
4455
4456
a8795327 4457TV_PARENTAL_GUIDELINES = {
5a16c9d9
RA
4458 'TV-Y': 0,
4459 'TV-Y7': 7,
4460 'TV-G': 0,
4461 'TV-PG': 0,
4462 'TV-14': 14,
4463 'TV-MA': 17,
a8795327
S
4464}
4465
4466
146c80e2 4467def parse_age_limit(s):
a8795327
S
4468 if type(s) == int:
4469 return s if 0 <= s <= 21 else None
4470 if not isinstance(s, compat_basestring):
d838b1bd 4471 return None
146c80e2 4472 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327
S
4473 if m:
4474 return int(m.group('age'))
5c5fae6d 4475 s = s.upper()
a8795327
S
4476 if s in US_RATINGS:
4477 return US_RATINGS[s]
5a16c9d9 4478 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
b8361187 4479 if m:
5a16c9d9 4480 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
b8361187 4481 return None
146c80e2
S
4482
4483
fac55558 4484def strip_jsonp(code):
609a61e3 4485 return re.sub(
5552c9eb 4486 r'''(?sx)^
e9c671d5 4487 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
5552c9eb
YCH
4488 (?:\s*&&\s*(?P=func_name))?
4489 \s*\(\s*(?P<callback_data>.*)\);?
4490 \s*?(?://[^\n]*)*$''',
4491 r'\g<callback_data>', code)
478c2c61
PH
4492
4493
5c610515 4494def js_to_json(code, vars={}):
4495 # vars is a dict of var, val pairs to substitute
c843e685 4496 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4195096e
S
4497 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4498 INTEGER_TABLE = (
4499 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4500 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4501 )
4502
e05f6939 4503 def fix_kv(m):
e7b6d122
PH
4504 v = m.group(0)
4505 if v in ('true', 'false', 'null'):
4506 return v
421ddcb8
C
4507 elif v in ('undefined', 'void 0'):
4508 return 'null'
8bdd16b4 4509 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
bd1e4844 4510 return ""
4511
4512 if v[0] in ("'", '"'):
4513 v = re.sub(r'(?s)\\.|"', lambda m: {
e7b6d122 4514 '"': '\\"',
bd1e4844 4515 "\\'": "'",
4516 '\\\n': '',
4517 '\\x': '\\u00',
4518 }.get(m.group(0), m.group(0)), v[1:-1])
8bdd16b4 4519 else:
4520 for regex, base in INTEGER_TABLE:
4521 im = re.match(regex, v)
4522 if im:
4523 i = int(im.group(1), base)
4524 return '"%d":' % i if v.endswith(':') else '%d' % i
89ac4a19 4525
5c610515 4526 if v in vars:
4527 return vars[v]
4528
e7b6d122 4529 return '"%s"' % v
e05f6939 4530
bd1e4844 4531 return re.sub(r'''(?sx)
4532 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4533 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4195096e 4534 {comment}|,(?={skip}[\]}}])|
421ddcb8 4535 void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
4195096e 4536 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
8bdd16b4 4537 [0-9]+(?={skip}:)|
4538 !+
4195096e 4539 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
e05f6939
PH
4540
4541
478c2c61
PH
4542def qualities(quality_ids):
4543 """ Get a numeric quality value out of a list of possible values """
4544 def q(qid):
4545 try:
4546 return quality_ids.index(qid)
4547 except ValueError:
4548 return -1
4549 return q
4550
acd69589 4551
de6000d9 4552DEFAULT_OUTTMPL = {
4553 'default': '%(title)s [%(id)s].%(ext)s',
72755351 4554 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
de6000d9 4555}
4556OUTTMPL_TYPES = {
72755351 4557 'chapter': None,
de6000d9 4558 'subtitle': None,
4559 'thumbnail': None,
4560 'description': 'description',
4561 'annotation': 'annotations.xml',
4562 'infojson': 'info.json',
08438d2c 4563 'link': None,
5112f26a 4564 'pl_thumbnail': None,
de6000d9 4565 'pl_description': 'description',
4566 'pl_infojson': 'info.json',
4567}
0a871f68 4568
143db31d 4569# As of [1] format syntax is:
4570# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4571# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
901130bb 4572STR_FORMAT_RE_TMPL = r'''(?x)
4573 (?<!%)(?P<prefix>(?:%%)*)
143db31d 4574 %
524e2e4f 4575 (?P<has_key>\((?P<key>{0})\))?
752cda38 4576 (?P<format>
524e2e4f 4577 (?P<conversion>[#0\-+ ]+)?
4578 (?P<min_width>\d+)?
4579 (?P<precision>\.\d+)?
4580 (?P<len_mod>[hlL])? # unused in python
901130bb 4581 {1} # conversion type
752cda38 4582 )
143db31d 4583'''
4584
7d1eb38a 4585
901130bb 4586STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
a020a0dc 4587
7d1eb38a 4588
a020a0dc
PH
4589def limit_length(s, length):
4590 """ Add ellipses to overly long strings """
4591 if s is None:
4592 return None
4593 ELLIPSES = '...'
4594 if len(s) > length:
4595 return s[:length - len(ELLIPSES)] + ELLIPSES
4596 return s
48844745
PH
4597
4598
4599def version_tuple(v):
5f9b8394 4600 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
4601
4602
4603def is_outdated_version(version, limit, assume_new=True):
4604 if not version:
4605 return not assume_new
4606 try:
4607 return version_tuple(version) < version_tuple(limit)
4608 except ValueError:
4609 return not assume_new
732ea2f0
PH
4610
4611
4612def ytdl_is_updateable():
7a5c1cfe 4613 """ Returns if yt-dlp can be updated with -U """
735d865e 4614
5d535b4a 4615 from .update import is_non_updateable
732ea2f0 4616
5d535b4a 4617 return not is_non_updateable()
7d4111ed
PH
4618
4619
4620def args_to_str(args):
4621 # Get a short string representation for a subprocess command
702ccf2d 4622 return ' '.join(compat_shlex_quote(a) for a in args)
2ccd1b10
PH
4623
4624
9b9c5355 4625def error_to_compat_str(err):
fdae2358
S
4626 err_str = str(err)
4627 # On python 2 error byte string must be decoded with proper
4628 # encoding rather than ascii
4629 if sys.version_info[0] < 3:
4630 err_str = err_str.decode(preferredencoding())
4631 return err_str
4632
4633
c460bdd5 4634def mimetype2ext(mt):
eb9ee194
S
4635 if mt is None:
4636 return None
4637
9359f3d4
F
4638 mt, _, params = mt.partition(';')
4639 mt = mt.strip()
4640
4641 FULL_MAP = {
765ac263 4642 'audio/mp4': 'm4a',
6c33d24b
YCH
4643 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4644 # it's the most popular one
4645 'audio/mpeg': 'mp3',
ba39289d 4646 'audio/x-wav': 'wav',
9359f3d4
F
4647 'audio/wav': 'wav',
4648 'audio/wave': 'wav',
4649 }
4650
4651 ext = FULL_MAP.get(mt)
765ac263
JMF
4652 if ext is not None:
4653 return ext
4654
9359f3d4 4655 SUBTYPE_MAP = {
f6861ec9 4656 '3gpp': '3gp',
cafcf657 4657 'smptett+xml': 'tt',
cafcf657 4658 'ttaf+xml': 'dfxp',
a0d8d704 4659 'ttml+xml': 'ttml',
f6861ec9 4660 'x-flv': 'flv',
a0d8d704 4661 'x-mp4-fragmented': 'mp4',
d4f05d47 4662 'x-ms-sami': 'sami',
a0d8d704 4663 'x-ms-wmv': 'wmv',
b4173f15
RA
4664 'mpegurl': 'm3u8',
4665 'x-mpegurl': 'm3u8',
4666 'vnd.apple.mpegurl': 'm3u8',
4667 'dash+xml': 'mpd',
b4173f15 4668 'f4m+xml': 'f4m',
f164b971 4669 'hds+xml': 'f4m',
e910fe2f 4670 'vnd.ms-sstr+xml': 'ism',
c2b2c7e1 4671 'quicktime': 'mov',
98ce1a3f 4672 'mp2t': 'ts',
39e7107d 4673 'x-wav': 'wav',
9359f3d4
F
4674 'filmstrip+json': 'fs',
4675 'svg+xml': 'svg',
4676 }
4677
4678 _, _, subtype = mt.rpartition('/')
4679 ext = SUBTYPE_MAP.get(subtype.lower())
4680 if ext is not None:
4681 return ext
4682
4683 SUFFIX_MAP = {
4684 'json': 'json',
4685 'xml': 'xml',
4686 'zip': 'zip',
4687 'gzip': 'gz',
4688 }
4689
4690 _, _, suffix = subtype.partition('+')
4691 ext = SUFFIX_MAP.get(suffix)
4692 if ext is not None:
4693 return ext
4694
4695 return subtype.replace('+', '.')
c460bdd5
PH
4696
4697
4f3c5e06 4698def parse_codecs(codecs_str):
4699 # http://tools.ietf.org/html/rfc6381
4700 if not codecs_str:
4701 return {}
a0566bbf 4702 split_codecs = list(filter(None, map(
dbf5416a 4703 str.strip, codecs_str.strip().strip(',').split(','))))
176f1866 4704 vcodec, acodec, hdr = None, None, None
a0566bbf 4705 for full_codec in split_codecs:
9bd979ca 4706 parts = full_codec.split('.')
4707 codec = parts[0].replace('0', '')
4708 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
4709 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
4f3c5e06 4710 if not vcodec:
9bd979ca 4711 vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1') else full_codec
176f1866 4712 if codec in ('dvh1', 'dvhe'):
4713 hdr = 'DV'
9bd979ca 4714 elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
4715 hdr = 'HDR10'
4716 elif full_codec.replace('0', '').startswith('vp9.2'):
176f1866 4717 hdr = 'HDR10'
60f5c9fb 4718 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4f3c5e06 4719 if not acodec:
4720 acodec = full_codec
4721 else:
60f5c9fb 4722 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4f3c5e06 4723 if not vcodec and not acodec:
a0566bbf 4724 if len(split_codecs) == 2:
4f3c5e06 4725 return {
a0566bbf 4726 'vcodec': split_codecs[0],
4727 'acodec': split_codecs[1],
4f3c5e06 4728 }
4729 else:
4730 return {
4731 'vcodec': vcodec or 'none',
4732 'acodec': acodec or 'none',
176f1866 4733 'dynamic_range': hdr,
4f3c5e06 4734 }
4735 return {}
4736
4737
2ccd1b10 4738def urlhandle_detect_ext(url_handle):
79298173 4739 getheader = url_handle.headers.get
2ccd1b10 4740
b55ee18f
PH
4741 cd = getheader('Content-Disposition')
4742 if cd:
4743 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4744 if m:
4745 e = determine_ext(m.group('filename'), default_ext=None)
4746 if e:
4747 return e
4748
c460bdd5 4749 return mimetype2ext(getheader('Content-Type'))
05900629
PH
4750
4751
1e399778
YCH
4752def encode_data_uri(data, mime_type):
4753 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4754
4755
05900629 4756def age_restricted(content_limit, age_limit):
6ec6cb4e 4757 """ Returns True iff the content should be blocked """
05900629
PH
4758
4759 if age_limit is None: # No limit set
4760 return False
4761 if content_limit is None:
4762 return False # Content available for everyone
4763 return age_limit < content_limit
61ca9a80
PH
4764
4765
4766def is_html(first_bytes):
4767 """ Detect whether a file contains HTML by examining its first bytes. """
4768
4769 BOMS = [
4770 (b'\xef\xbb\xbf', 'utf-8'),
4771 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4772 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4773 (b'\xff\xfe', 'utf-16-le'),
4774 (b'\xfe\xff', 'utf-16-be'),
4775 ]
4776 for bom, enc in BOMS:
4777 if first_bytes.startswith(bom):
4778 s = first_bytes[len(bom):].decode(enc, 'replace')
4779 break
4780 else:
4781 s = first_bytes.decode('utf-8', 'replace')
4782
4783 return re.match(r'^\s*<', s)
a055469f
PH
4784
4785
4786def determine_protocol(info_dict):
4787 protocol = info_dict.get('protocol')
4788 if protocol is not None:
4789 return protocol
4790
7de837a5 4791 url = sanitize_url(info_dict['url'])
a055469f
PH
4792 if url.startswith('rtmp'):
4793 return 'rtmp'
4794 elif url.startswith('mms'):
4795 return 'mms'
4796 elif url.startswith('rtsp'):
4797 return 'rtsp'
4798
4799 ext = determine_ext(url)
4800 if ext == 'm3u8':
4801 return 'm3u8'
4802 elif ext == 'f4m':
4803 return 'f4m'
4804
4805 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
4806
4807
c5e3f849 4808def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
4809 """ Render a list of rows, each as a list of values.
4810 Text after a \t will be right aligned """
ec11a9f4 4811 def width(string):
c5e3f849 4812 return len(remove_terminal_sequences(string).replace('\t', ''))
76d321f6 4813
4814 def get_max_lens(table):
ec11a9f4 4815 return [max(width(str(v)) for v in col) for col in zip(*table)]
76d321f6 4816
4817 def filter_using_list(row, filterArray):
4818 return [col for (take, col) in zip(filterArray, row) if take]
4819
c5e3f849 4820 if hide_empty:
76d321f6 4821 max_lens = get_max_lens(data)
4822 header_row = filter_using_list(header_row, max_lens)
4823 data = [filter_using_list(row, max_lens) for row in data]
4824
cfb56d1a 4825 table = [header_row] + data
76d321f6 4826 max_lens = get_max_lens(table)
c5e3f849 4827 extra_gap += 1
76d321f6 4828 if delim:
c5e3f849 4829 table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
4830 table[1][-1] = table[1][-1][:-extra_gap] # Remove extra_gap from end of delimiter
ec11a9f4 4831 for row in table:
4832 for pos, text in enumerate(map(str, row)):
c5e3f849 4833 if '\t' in text:
4834 row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
4835 else:
4836 row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
4837 ret = '\n'.join(''.join(row).rstrip() for row in table)
ec11a9f4 4838 return ret
347de493
PH
4839
4840
8f18aca8 4841def _match_one(filter_part, dct, incomplete):
77b87f05 4842 # TODO: Generalize code with YoutubeDL._build_format_filter
a047eeb6 4843 STRING_OPERATORS = {
4844 '*=': operator.contains,
4845 '^=': lambda attr, value: attr.startswith(value),
4846 '$=': lambda attr, value: attr.endswith(value),
4847 '~=': lambda attr, value: re.search(value, attr),
4848 }
347de493 4849 COMPARISON_OPERATORS = {
a047eeb6 4850 **STRING_OPERATORS,
4851 '<=': operator.le, # "<=" must be defined above "<"
347de493 4852 '<': operator.lt,
347de493 4853 '>=': operator.ge,
a047eeb6 4854 '>': operator.gt,
347de493 4855 '=': operator.eq,
347de493 4856 }
a047eeb6 4857
347de493
PH
4858 operator_rex = re.compile(r'''(?x)\s*
4859 (?P<key>[a-z_]+)
77b87f05 4860 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
347de493 4861 (?:
a047eeb6 4862 (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
4863 (?P<strval>.+?)
347de493
PH
4864 )
4865 \s*$
4866 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4867 m = operator_rex.search(filter_part)
4868 if m:
18f96d12 4869 m = m.groupdict()
4870 unnegated_op = COMPARISON_OPERATORS[m['op']]
4871 if m['negation']:
77b87f05
MT
4872 op = lambda attr, value: not unnegated_op(attr, value)
4873 else:
4874 op = unnegated_op
18f96d12 4875 comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
4876 if m['quote']:
4877 comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
4878 actual_value = dct.get(m['key'])
4879 numeric_comparison = None
4880 if isinstance(actual_value, compat_numeric_types):
e5a088dc
S
4881 # If the original field is a string and matching comparisonvalue is
4882 # a number we should respect the origin of the original field
4883 # and process comparison value as a string (see
18f96d12 4884 # https://github.com/ytdl-org/youtube-dl/issues/11082)
347de493 4885 try:
18f96d12 4886 numeric_comparison = int(comparison_value)
347de493 4887 except ValueError:
18f96d12 4888 numeric_comparison = parse_filesize(comparison_value)
4889 if numeric_comparison is None:
4890 numeric_comparison = parse_filesize(f'{comparison_value}B')
4891 if numeric_comparison is None:
4892 numeric_comparison = parse_duration(comparison_value)
4893 if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
4894 raise ValueError('Operator %s only supports string values!' % m['op'])
347de493 4895 if actual_value is None:
18f96d12 4896 return incomplete or m['none_inclusive']
4897 return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
347de493
PH
4898
4899 UNARY_OPERATORS = {
1cc47c66
S
4900 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4901 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
347de493
PH
4902 }
4903 operator_rex = re.compile(r'''(?x)\s*
4904 (?P<op>%s)\s*(?P<key>[a-z_]+)
4905 \s*$
4906 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4907 m = operator_rex.search(filter_part)
4908 if m:
4909 op = UNARY_OPERATORS[m.group('op')]
4910 actual_value = dct.get(m.group('key'))
8f18aca8 4911 if incomplete and actual_value is None:
4912 return True
347de493
PH
4913 return op(actual_value)
4914
4915 raise ValueError('Invalid filter part %r' % filter_part)
4916
4917
8f18aca8 4918def match_str(filter_str, dct, incomplete=False):
4919 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
4920 When incomplete, all conditions passes on missing fields
4921 """
347de493 4922 return all(
8f18aca8 4923 _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
a047eeb6 4924 for filter_part in re.split(r'(?<!\\)&', filter_str))
347de493
PH
4925
4926
4927def match_filter_func(filter_str):
8f18aca8 4928 def _match_func(info_dict, *args, **kwargs):
4929 if match_str(filter_str, info_dict, *args, **kwargs):
347de493
PH
4930 return None
4931 else:
4932 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4933 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4934 return _match_func
91410c9b
PH
4935
4936
bf6427d2
YCH
4937def parse_dfxp_time_expr(time_expr):
4938 if not time_expr:
d631d5f9 4939 return
bf6427d2
YCH
4940
4941 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4942 if mobj:
4943 return float(mobj.group('time_offset'))
4944
db2fe38b 4945 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 4946 if mobj:
db2fe38b 4947 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
4948
4949
c1c924ab 4950def srt_subtitles_timecode(seconds):
aa7785f8 4951 return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
4952
4953
4954def ass_subtitles_timecode(seconds):
4955 time = timetuple_from_msec(seconds * 1000)
4956 return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
bf6427d2
YCH
4957
4958
4959def dfxp2srt(dfxp_data):
3869028f
YCH
4960 '''
4961 @param dfxp_data A bytes-like object containing DFXP data
4962 @returns A unicode object containing converted SRT data
4963 '''
5b995f71 4964 LEGACY_NAMESPACES = (
3869028f
YCH
4965 (b'http://www.w3.org/ns/ttml', [
4966 b'http://www.w3.org/2004/11/ttaf1',
4967 b'http://www.w3.org/2006/04/ttaf1',
4968 b'http://www.w3.org/2006/10/ttaf1',
5b995f71 4969 ]),
3869028f
YCH
4970 (b'http://www.w3.org/ns/ttml#styling', [
4971 b'http://www.w3.org/ns/ttml#style',
5b995f71
RA
4972 ]),
4973 )
4974
4975 SUPPORTED_STYLING = [
4976 'color',
4977 'fontFamily',
4978 'fontSize',
4979 'fontStyle',
4980 'fontWeight',
4981 'textDecoration'
4982 ]
4983
4e335771 4984 _x = functools.partial(xpath_with_ns, ns_map={
261f4730 4985 'xml': 'http://www.w3.org/XML/1998/namespace',
4e335771 4986 'ttml': 'http://www.w3.org/ns/ttml',
5b995f71 4987 'tts': 'http://www.w3.org/ns/ttml#styling',
4e335771 4988 })
bf6427d2 4989
5b995f71
RA
4990 styles = {}
4991 default_style = {}
4992
87de7069 4993 class TTMLPElementParser(object):
5b995f71
RA
4994 _out = ''
4995 _unclosed_elements = []
4996 _applied_styles = []
bf6427d2 4997
2b14cb56 4998 def start(self, tag, attrib):
5b995f71
RA
4999 if tag in (_x('ttml:br'), 'br'):
5000 self._out += '\n'
5001 else:
5002 unclosed_elements = []
5003 style = {}
5004 element_style_id = attrib.get('style')
5005 if default_style:
5006 style.update(default_style)
5007 if element_style_id:
5008 style.update(styles.get(element_style_id, {}))
5009 for prop in SUPPORTED_STYLING:
5010 prop_val = attrib.get(_x('tts:' + prop))
5011 if prop_val:
5012 style[prop] = prop_val
5013 if style:
5014 font = ''
5015 for k, v in sorted(style.items()):
5016 if self._applied_styles and self._applied_styles[-1].get(k) == v:
5017 continue
5018 if k == 'color':
5019 font += ' color="%s"' % v
5020 elif k == 'fontSize':
5021 font += ' size="%s"' % v
5022 elif k == 'fontFamily':
5023 font += ' face="%s"' % v
5024 elif k == 'fontWeight' and v == 'bold':
5025 self._out += '<b>'
5026 unclosed_elements.append('b')
5027 elif k == 'fontStyle' and v == 'italic':
5028 self._out += '<i>'
5029 unclosed_elements.append('i')
5030 elif k == 'textDecoration' and v == 'underline':
5031 self._out += '<u>'
5032 unclosed_elements.append('u')
5033 if font:
5034 self._out += '<font' + font + '>'
5035 unclosed_elements.append('font')
5036 applied_style = {}
5037 if self._applied_styles:
5038 applied_style.update(self._applied_styles[-1])
5039 applied_style.update(style)
5040 self._applied_styles.append(applied_style)
5041 self._unclosed_elements.append(unclosed_elements)
bf6427d2 5042
2b14cb56 5043 def end(self, tag):
5b995f71
RA
5044 if tag not in (_x('ttml:br'), 'br'):
5045 unclosed_elements = self._unclosed_elements.pop()
5046 for element in reversed(unclosed_elements):
5047 self._out += '</%s>' % element
5048 if unclosed_elements and self._applied_styles:
5049 self._applied_styles.pop()
bf6427d2 5050
2b14cb56 5051 def data(self, data):
5b995f71 5052 self._out += data
2b14cb56 5053
5054 def close(self):
5b995f71 5055 return self._out.strip()
2b14cb56 5056
5057 def parse_node(node):
5058 target = TTMLPElementParser()
5059 parser = xml.etree.ElementTree.XMLParser(target=target)
5060 parser.feed(xml.etree.ElementTree.tostring(node))
5061 return parser.close()
bf6427d2 5062
5b995f71
RA
5063 for k, v in LEGACY_NAMESPACES:
5064 for ns in v:
5065 dfxp_data = dfxp_data.replace(ns, k)
5066
3869028f 5067 dfxp = compat_etree_fromstring(dfxp_data)
bf6427d2 5068 out = []
5b995f71 5069 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
1b0427e6
YCH
5070
5071 if not paras:
5072 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2 5073
5b995f71
RA
5074 repeat = False
5075 while True:
5076 for style in dfxp.findall(_x('.//ttml:style')):
261f4730
RA
5077 style_id = style.get('id') or style.get(_x('xml:id'))
5078 if not style_id:
5079 continue
5b995f71
RA
5080 parent_style_id = style.get('style')
5081 if parent_style_id:
5082 if parent_style_id not in styles:
5083 repeat = True
5084 continue
5085 styles[style_id] = styles[parent_style_id].copy()
5086 for prop in SUPPORTED_STYLING:
5087 prop_val = style.get(_x('tts:' + prop))
5088 if prop_val:
5089 styles.setdefault(style_id, {})[prop] = prop_val
5090 if repeat:
5091 repeat = False
5092 else:
5093 break
5094
5095 for p in ('body', 'div'):
5096 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
5097 if ele is None:
5098 continue
5099 style = styles.get(ele.get('style'))
5100 if not style:
5101 continue
5102 default_style.update(style)
5103
bf6427d2 5104 for para, index in zip(paras, itertools.count(1)):
d631d5f9 5105 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 5106 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
5107 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
5108 if begin_time is None:
5109 continue
7dff0363 5110 if not end_time:
d631d5f9
YCH
5111 if not dur:
5112 continue
5113 end_time = begin_time + dur
bf6427d2
YCH
5114 out.append('%d\n%s --> %s\n%s\n\n' % (
5115 index,
c1c924ab
YCH
5116 srt_subtitles_timecode(begin_time),
5117 srt_subtitles_timecode(end_time),
bf6427d2
YCH
5118 parse_node(para)))
5119
5120 return ''.join(out)
5121
5122
66e289ba
S
5123def cli_option(params, command_option, param):
5124 param = params.get(param)
98e698f1
RA
5125 if param:
5126 param = compat_str(param)
66e289ba
S
5127 return [command_option, param] if param is not None else []
5128
5129
5130def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
5131 param = params.get(param)
5b232f46
S
5132 if param is None:
5133 return []
66e289ba
S
5134 assert isinstance(param, bool)
5135 if separator:
5136 return [command_option + separator + (true_value if param else false_value)]
5137 return [command_option, true_value if param else false_value]
5138
5139
5140def cli_valueless_option(params, command_option, param, expected_value=True):
5141 param = params.get(param)
5142 return [command_option] if param == expected_value else []
5143
5144
e92caff5 5145def cli_configuration_args(argdict, keys, default=[], use_compat=True):
eab9b2bc 5146 if isinstance(argdict, (list, tuple)): # for backward compatibility
e92caff5 5147 if use_compat:
5b1ecbb3 5148 return argdict
5149 else:
5150 argdict = None
eab9b2bc 5151 if argdict is None:
5b1ecbb3 5152 return default
eab9b2bc 5153 assert isinstance(argdict, dict)
5154
e92caff5 5155 assert isinstance(keys, (list, tuple))
5156 for key_list in keys:
e92caff5 5157 arg_list = list(filter(
5158 lambda x: x is not None,
6606817a 5159 [argdict.get(key.lower()) for key in variadic(key_list)]))
e92caff5 5160 if arg_list:
5161 return [arg for args in arg_list for arg in args]
5162 return default
66e289ba 5163
6251555f 5164
330690a2 5165def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
5166 main_key, exe = main_key.lower(), exe.lower()
5167 root_key = exe if main_key == exe else f'{main_key}+{exe}'
5168 keys = [f'{root_key}{k}' for k in (keys or [''])]
5169 if root_key in keys:
5170 if main_key != exe:
5171 keys.append((main_key, exe))
5172 keys.append('default')
5173 else:
5174 use_compat = False
5175 return cli_configuration_args(argdict, keys, default, use_compat)
5176
66e289ba 5177
39672624
YCH
5178class ISO639Utils(object):
5179 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
5180 _lang_map = {
5181 'aa': 'aar',
5182 'ab': 'abk',
5183 'ae': 'ave',
5184 'af': 'afr',
5185 'ak': 'aka',
5186 'am': 'amh',
5187 'an': 'arg',
5188 'ar': 'ara',
5189 'as': 'asm',
5190 'av': 'ava',
5191 'ay': 'aym',
5192 'az': 'aze',
5193 'ba': 'bak',
5194 'be': 'bel',
5195 'bg': 'bul',
5196 'bh': 'bih',
5197 'bi': 'bis',
5198 'bm': 'bam',
5199 'bn': 'ben',
5200 'bo': 'bod',
5201 'br': 'bre',
5202 'bs': 'bos',
5203 'ca': 'cat',
5204 'ce': 'che',
5205 'ch': 'cha',
5206 'co': 'cos',
5207 'cr': 'cre',
5208 'cs': 'ces',
5209 'cu': 'chu',
5210 'cv': 'chv',
5211 'cy': 'cym',
5212 'da': 'dan',
5213 'de': 'deu',
5214 'dv': 'div',
5215 'dz': 'dzo',
5216 'ee': 'ewe',
5217 'el': 'ell',
5218 'en': 'eng',
5219 'eo': 'epo',
5220 'es': 'spa',
5221 'et': 'est',
5222 'eu': 'eus',
5223 'fa': 'fas',
5224 'ff': 'ful',
5225 'fi': 'fin',
5226 'fj': 'fij',
5227 'fo': 'fao',
5228 'fr': 'fra',
5229 'fy': 'fry',
5230 'ga': 'gle',
5231 'gd': 'gla',
5232 'gl': 'glg',
5233 'gn': 'grn',
5234 'gu': 'guj',
5235 'gv': 'glv',
5236 'ha': 'hau',
5237 'he': 'heb',
b7acc835 5238 'iw': 'heb', # Replaced by he in 1989 revision
39672624
YCH
5239 'hi': 'hin',
5240 'ho': 'hmo',
5241 'hr': 'hrv',
5242 'ht': 'hat',
5243 'hu': 'hun',
5244 'hy': 'hye',
5245 'hz': 'her',
5246 'ia': 'ina',
5247 'id': 'ind',
b7acc835 5248 'in': 'ind', # Replaced by id in 1989 revision
39672624
YCH
5249 'ie': 'ile',
5250 'ig': 'ibo',
5251 'ii': 'iii',
5252 'ik': 'ipk',
5253 'io': 'ido',
5254 'is': 'isl',
5255 'it': 'ita',
5256 'iu': 'iku',
5257 'ja': 'jpn',
5258 'jv': 'jav',
5259 'ka': 'kat',
5260 'kg': 'kon',
5261 'ki': 'kik',
5262 'kj': 'kua',
5263 'kk': 'kaz',
5264 'kl': 'kal',
5265 'km': 'khm',
5266 'kn': 'kan',
5267 'ko': 'kor',
5268 'kr': 'kau',
5269 'ks': 'kas',
5270 'ku': 'kur',
5271 'kv': 'kom',
5272 'kw': 'cor',
5273 'ky': 'kir',
5274 'la': 'lat',
5275 'lb': 'ltz',
5276 'lg': 'lug',
5277 'li': 'lim',
5278 'ln': 'lin',
5279 'lo': 'lao',
5280 'lt': 'lit',
5281 'lu': 'lub',
5282 'lv': 'lav',
5283 'mg': 'mlg',
5284 'mh': 'mah',
5285 'mi': 'mri',
5286 'mk': 'mkd',
5287 'ml': 'mal',
5288 'mn': 'mon',
5289 'mr': 'mar',
5290 'ms': 'msa',
5291 'mt': 'mlt',
5292 'my': 'mya',
5293 'na': 'nau',
5294 'nb': 'nob',
5295 'nd': 'nde',
5296 'ne': 'nep',
5297 'ng': 'ndo',
5298 'nl': 'nld',
5299 'nn': 'nno',
5300 'no': 'nor',
5301 'nr': 'nbl',
5302 'nv': 'nav',
5303 'ny': 'nya',
5304 'oc': 'oci',
5305 'oj': 'oji',
5306 'om': 'orm',
5307 'or': 'ori',
5308 'os': 'oss',
5309 'pa': 'pan',
5310 'pi': 'pli',
5311 'pl': 'pol',
5312 'ps': 'pus',
5313 'pt': 'por',
5314 'qu': 'que',
5315 'rm': 'roh',
5316 'rn': 'run',
5317 'ro': 'ron',
5318 'ru': 'rus',
5319 'rw': 'kin',
5320 'sa': 'san',
5321 'sc': 'srd',
5322 'sd': 'snd',
5323 'se': 'sme',
5324 'sg': 'sag',
5325 'si': 'sin',
5326 'sk': 'slk',
5327 'sl': 'slv',
5328 'sm': 'smo',
5329 'sn': 'sna',
5330 'so': 'som',
5331 'sq': 'sqi',
5332 'sr': 'srp',
5333 'ss': 'ssw',
5334 'st': 'sot',
5335 'su': 'sun',
5336 'sv': 'swe',
5337 'sw': 'swa',
5338 'ta': 'tam',
5339 'te': 'tel',
5340 'tg': 'tgk',
5341 'th': 'tha',
5342 'ti': 'tir',
5343 'tk': 'tuk',
5344 'tl': 'tgl',
5345 'tn': 'tsn',
5346 'to': 'ton',
5347 'tr': 'tur',
5348 'ts': 'tso',
5349 'tt': 'tat',
5350 'tw': 'twi',
5351 'ty': 'tah',
5352 'ug': 'uig',
5353 'uk': 'ukr',
5354 'ur': 'urd',
5355 'uz': 'uzb',
5356 've': 'ven',
5357 'vi': 'vie',
5358 'vo': 'vol',
5359 'wa': 'wln',
5360 'wo': 'wol',
5361 'xh': 'xho',
5362 'yi': 'yid',
e9a50fba 5363 'ji': 'yid', # Replaced by yi in 1989 revision
39672624
YCH
5364 'yo': 'yor',
5365 'za': 'zha',
5366 'zh': 'zho',
5367 'zu': 'zul',
5368 }
5369
5370 @classmethod
5371 def short2long(cls, code):
5372 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5373 return cls._lang_map.get(code[:2])
5374
5375 @classmethod
5376 def long2short(cls, code):
5377 """Convert language code from ISO 639-2/T to ISO 639-1"""
5378 for short_name, long_name in cls._lang_map.items():
5379 if long_name == code:
5380 return short_name
5381
5382
4eb10f66
YCH
5383class ISO3166Utils(object):
5384 # From http://data.okfn.org/data/core/country-list
5385 _country_map = {
5386 'AF': 'Afghanistan',
5387 'AX': 'Åland Islands',
5388 'AL': 'Albania',
5389 'DZ': 'Algeria',
5390 'AS': 'American Samoa',
5391 'AD': 'Andorra',
5392 'AO': 'Angola',
5393 'AI': 'Anguilla',
5394 'AQ': 'Antarctica',
5395 'AG': 'Antigua and Barbuda',
5396 'AR': 'Argentina',
5397 'AM': 'Armenia',
5398 'AW': 'Aruba',
5399 'AU': 'Australia',
5400 'AT': 'Austria',
5401 'AZ': 'Azerbaijan',
5402 'BS': 'Bahamas',
5403 'BH': 'Bahrain',
5404 'BD': 'Bangladesh',
5405 'BB': 'Barbados',
5406 'BY': 'Belarus',
5407 'BE': 'Belgium',
5408 'BZ': 'Belize',
5409 'BJ': 'Benin',
5410 'BM': 'Bermuda',
5411 'BT': 'Bhutan',
5412 'BO': 'Bolivia, Plurinational State of',
5413 'BQ': 'Bonaire, Sint Eustatius and Saba',
5414 'BA': 'Bosnia and Herzegovina',
5415 'BW': 'Botswana',
5416 'BV': 'Bouvet Island',
5417 'BR': 'Brazil',
5418 'IO': 'British Indian Ocean Territory',
5419 'BN': 'Brunei Darussalam',
5420 'BG': 'Bulgaria',
5421 'BF': 'Burkina Faso',
5422 'BI': 'Burundi',
5423 'KH': 'Cambodia',
5424 'CM': 'Cameroon',
5425 'CA': 'Canada',
5426 'CV': 'Cape Verde',
5427 'KY': 'Cayman Islands',
5428 'CF': 'Central African Republic',
5429 'TD': 'Chad',
5430 'CL': 'Chile',
5431 'CN': 'China',
5432 'CX': 'Christmas Island',
5433 'CC': 'Cocos (Keeling) Islands',
5434 'CO': 'Colombia',
5435 'KM': 'Comoros',
5436 'CG': 'Congo',
5437 'CD': 'Congo, the Democratic Republic of the',
5438 'CK': 'Cook Islands',
5439 'CR': 'Costa Rica',
5440 'CI': 'Côte d\'Ivoire',
5441 'HR': 'Croatia',
5442 'CU': 'Cuba',
5443 'CW': 'Curaçao',
5444 'CY': 'Cyprus',
5445 'CZ': 'Czech Republic',
5446 'DK': 'Denmark',
5447 'DJ': 'Djibouti',
5448 'DM': 'Dominica',
5449 'DO': 'Dominican Republic',
5450 'EC': 'Ecuador',
5451 'EG': 'Egypt',
5452 'SV': 'El Salvador',
5453 'GQ': 'Equatorial Guinea',
5454 'ER': 'Eritrea',
5455 'EE': 'Estonia',
5456 'ET': 'Ethiopia',
5457 'FK': 'Falkland Islands (Malvinas)',
5458 'FO': 'Faroe Islands',
5459 'FJ': 'Fiji',
5460 'FI': 'Finland',
5461 'FR': 'France',
5462 'GF': 'French Guiana',
5463 'PF': 'French Polynesia',
5464 'TF': 'French Southern Territories',
5465 'GA': 'Gabon',
5466 'GM': 'Gambia',
5467 'GE': 'Georgia',
5468 'DE': 'Germany',
5469 'GH': 'Ghana',
5470 'GI': 'Gibraltar',
5471 'GR': 'Greece',
5472 'GL': 'Greenland',
5473 'GD': 'Grenada',
5474 'GP': 'Guadeloupe',
5475 'GU': 'Guam',
5476 'GT': 'Guatemala',
5477 'GG': 'Guernsey',
5478 'GN': 'Guinea',
5479 'GW': 'Guinea-Bissau',
5480 'GY': 'Guyana',
5481 'HT': 'Haiti',
5482 'HM': 'Heard Island and McDonald Islands',
5483 'VA': 'Holy See (Vatican City State)',
5484 'HN': 'Honduras',
5485 'HK': 'Hong Kong',
5486 'HU': 'Hungary',
5487 'IS': 'Iceland',
5488 'IN': 'India',
5489 'ID': 'Indonesia',
5490 'IR': 'Iran, Islamic Republic of',
5491 'IQ': 'Iraq',
5492 'IE': 'Ireland',
5493 'IM': 'Isle of Man',
5494 'IL': 'Israel',
5495 'IT': 'Italy',
5496 'JM': 'Jamaica',
5497 'JP': 'Japan',
5498 'JE': 'Jersey',
5499 'JO': 'Jordan',
5500 'KZ': 'Kazakhstan',
5501 'KE': 'Kenya',
5502 'KI': 'Kiribati',
5503 'KP': 'Korea, Democratic People\'s Republic of',
5504 'KR': 'Korea, Republic of',
5505 'KW': 'Kuwait',
5506 'KG': 'Kyrgyzstan',
5507 'LA': 'Lao People\'s Democratic Republic',
5508 'LV': 'Latvia',
5509 'LB': 'Lebanon',
5510 'LS': 'Lesotho',
5511 'LR': 'Liberia',
5512 'LY': 'Libya',
5513 'LI': 'Liechtenstein',
5514 'LT': 'Lithuania',
5515 'LU': 'Luxembourg',
5516 'MO': 'Macao',
5517 'MK': 'Macedonia, the Former Yugoslav Republic of',
5518 'MG': 'Madagascar',
5519 'MW': 'Malawi',
5520 'MY': 'Malaysia',
5521 'MV': 'Maldives',
5522 'ML': 'Mali',
5523 'MT': 'Malta',
5524 'MH': 'Marshall Islands',
5525 'MQ': 'Martinique',
5526 'MR': 'Mauritania',
5527 'MU': 'Mauritius',
5528 'YT': 'Mayotte',
5529 'MX': 'Mexico',
5530 'FM': 'Micronesia, Federated States of',
5531 'MD': 'Moldova, Republic of',
5532 'MC': 'Monaco',
5533 'MN': 'Mongolia',
5534 'ME': 'Montenegro',
5535 'MS': 'Montserrat',
5536 'MA': 'Morocco',
5537 'MZ': 'Mozambique',
5538 'MM': 'Myanmar',
5539 'NA': 'Namibia',
5540 'NR': 'Nauru',
5541 'NP': 'Nepal',
5542 'NL': 'Netherlands',
5543 'NC': 'New Caledonia',
5544 'NZ': 'New Zealand',
5545 'NI': 'Nicaragua',
5546 'NE': 'Niger',
5547 'NG': 'Nigeria',
5548 'NU': 'Niue',
5549 'NF': 'Norfolk Island',
5550 'MP': 'Northern Mariana Islands',
5551 'NO': 'Norway',
5552 'OM': 'Oman',
5553 'PK': 'Pakistan',
5554 'PW': 'Palau',
5555 'PS': 'Palestine, State of',
5556 'PA': 'Panama',
5557 'PG': 'Papua New Guinea',
5558 'PY': 'Paraguay',
5559 'PE': 'Peru',
5560 'PH': 'Philippines',
5561 'PN': 'Pitcairn',
5562 'PL': 'Poland',
5563 'PT': 'Portugal',
5564 'PR': 'Puerto Rico',
5565 'QA': 'Qatar',
5566 'RE': 'Réunion',
5567 'RO': 'Romania',
5568 'RU': 'Russian Federation',
5569 'RW': 'Rwanda',
5570 'BL': 'Saint Barthélemy',
5571 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5572 'KN': 'Saint Kitts and Nevis',
5573 'LC': 'Saint Lucia',
5574 'MF': 'Saint Martin (French part)',
5575 'PM': 'Saint Pierre and Miquelon',
5576 'VC': 'Saint Vincent and the Grenadines',
5577 'WS': 'Samoa',
5578 'SM': 'San Marino',
5579 'ST': 'Sao Tome and Principe',
5580 'SA': 'Saudi Arabia',
5581 'SN': 'Senegal',
5582 'RS': 'Serbia',
5583 'SC': 'Seychelles',
5584 'SL': 'Sierra Leone',
5585 'SG': 'Singapore',
5586 'SX': 'Sint Maarten (Dutch part)',
5587 'SK': 'Slovakia',
5588 'SI': 'Slovenia',
5589 'SB': 'Solomon Islands',
5590 'SO': 'Somalia',
5591 'ZA': 'South Africa',
5592 'GS': 'South Georgia and the South Sandwich Islands',
5593 'SS': 'South Sudan',
5594 'ES': 'Spain',
5595 'LK': 'Sri Lanka',
5596 'SD': 'Sudan',
5597 'SR': 'Suriname',
5598 'SJ': 'Svalbard and Jan Mayen',
5599 'SZ': 'Swaziland',
5600 'SE': 'Sweden',
5601 'CH': 'Switzerland',
5602 'SY': 'Syrian Arab Republic',
5603 'TW': 'Taiwan, Province of China',
5604 'TJ': 'Tajikistan',
5605 'TZ': 'Tanzania, United Republic of',
5606 'TH': 'Thailand',
5607 'TL': 'Timor-Leste',
5608 'TG': 'Togo',
5609 'TK': 'Tokelau',
5610 'TO': 'Tonga',
5611 'TT': 'Trinidad and Tobago',
5612 'TN': 'Tunisia',
5613 'TR': 'Turkey',
5614 'TM': 'Turkmenistan',
5615 'TC': 'Turks and Caicos Islands',
5616 'TV': 'Tuvalu',
5617 'UG': 'Uganda',
5618 'UA': 'Ukraine',
5619 'AE': 'United Arab Emirates',
5620 'GB': 'United Kingdom',
5621 'US': 'United States',
5622 'UM': 'United States Minor Outlying Islands',
5623 'UY': 'Uruguay',
5624 'UZ': 'Uzbekistan',
5625 'VU': 'Vanuatu',
5626 'VE': 'Venezuela, Bolivarian Republic of',
5627 'VN': 'Viet Nam',
5628 'VG': 'Virgin Islands, British',
5629 'VI': 'Virgin Islands, U.S.',
5630 'WF': 'Wallis and Futuna',
5631 'EH': 'Western Sahara',
5632 'YE': 'Yemen',
5633 'ZM': 'Zambia',
5634 'ZW': 'Zimbabwe',
5635 }
5636
5637 @classmethod
5638 def short2full(cls, code):
5639 """Convert an ISO 3166-2 country code to the corresponding full name"""
5640 return cls._country_map.get(code.upper())
5641
5642
773f291d
S
5643class GeoUtils(object):
5644 # Major IPv4 address blocks per country
5645 _country_ip_map = {
53896ca5 5646 'AD': '46.172.224.0/19',
773f291d
S
5647 'AE': '94.200.0.0/13',
5648 'AF': '149.54.0.0/17',
5649 'AG': '209.59.64.0/18',
5650 'AI': '204.14.248.0/21',
5651 'AL': '46.99.0.0/16',
5652 'AM': '46.70.0.0/15',
5653 'AO': '105.168.0.0/13',
53896ca5
S
5654 'AP': '182.50.184.0/21',
5655 'AQ': '23.154.160.0/24',
773f291d
S
5656 'AR': '181.0.0.0/12',
5657 'AS': '202.70.112.0/20',
53896ca5 5658 'AT': '77.116.0.0/14',
773f291d
S
5659 'AU': '1.128.0.0/11',
5660 'AW': '181.41.0.0/18',
53896ca5
S
5661 'AX': '185.217.4.0/22',
5662 'AZ': '5.197.0.0/16',
773f291d
S
5663 'BA': '31.176.128.0/17',
5664 'BB': '65.48.128.0/17',
5665 'BD': '114.130.0.0/16',
5666 'BE': '57.0.0.0/8',
53896ca5 5667 'BF': '102.178.0.0/15',
773f291d
S
5668 'BG': '95.42.0.0/15',
5669 'BH': '37.131.0.0/17',
5670 'BI': '154.117.192.0/18',
5671 'BJ': '137.255.0.0/16',
53896ca5 5672 'BL': '185.212.72.0/23',
773f291d
S
5673 'BM': '196.12.64.0/18',
5674 'BN': '156.31.0.0/16',
5675 'BO': '161.56.0.0/16',
5676 'BQ': '161.0.80.0/20',
53896ca5 5677 'BR': '191.128.0.0/12',
773f291d
S
5678 'BS': '24.51.64.0/18',
5679 'BT': '119.2.96.0/19',
5680 'BW': '168.167.0.0/16',
5681 'BY': '178.120.0.0/13',
5682 'BZ': '179.42.192.0/18',
5683 'CA': '99.224.0.0/11',
5684 'CD': '41.243.0.0/16',
53896ca5
S
5685 'CF': '197.242.176.0/21',
5686 'CG': '160.113.0.0/16',
773f291d 5687 'CH': '85.0.0.0/13',
53896ca5 5688 'CI': '102.136.0.0/14',
773f291d
S
5689 'CK': '202.65.32.0/19',
5690 'CL': '152.172.0.0/14',
53896ca5 5691 'CM': '102.244.0.0/14',
773f291d
S
5692 'CN': '36.128.0.0/10',
5693 'CO': '181.240.0.0/12',
5694 'CR': '201.192.0.0/12',
5695 'CU': '152.206.0.0/15',
5696 'CV': '165.90.96.0/19',
5697 'CW': '190.88.128.0/17',
53896ca5 5698 'CY': '31.153.0.0/16',
773f291d
S
5699 'CZ': '88.100.0.0/14',
5700 'DE': '53.0.0.0/8',
5701 'DJ': '197.241.0.0/17',
5702 'DK': '87.48.0.0/12',
5703 'DM': '192.243.48.0/20',
5704 'DO': '152.166.0.0/15',
5705 'DZ': '41.96.0.0/12',
5706 'EC': '186.68.0.0/15',
5707 'EE': '90.190.0.0/15',
5708 'EG': '156.160.0.0/11',
5709 'ER': '196.200.96.0/20',
5710 'ES': '88.0.0.0/11',
5711 'ET': '196.188.0.0/14',
5712 'EU': '2.16.0.0/13',
5713 'FI': '91.152.0.0/13',
5714 'FJ': '144.120.0.0/16',
53896ca5 5715 'FK': '80.73.208.0/21',
773f291d
S
5716 'FM': '119.252.112.0/20',
5717 'FO': '88.85.32.0/19',
5718 'FR': '90.0.0.0/9',
5719 'GA': '41.158.0.0/15',
5720 'GB': '25.0.0.0/8',
5721 'GD': '74.122.88.0/21',
5722 'GE': '31.146.0.0/16',
5723 'GF': '161.22.64.0/18',
5724 'GG': '62.68.160.0/19',
53896ca5
S
5725 'GH': '154.160.0.0/12',
5726 'GI': '95.164.0.0/16',
773f291d
S
5727 'GL': '88.83.0.0/19',
5728 'GM': '160.182.0.0/15',
5729 'GN': '197.149.192.0/18',
5730 'GP': '104.250.0.0/19',
5731 'GQ': '105.235.224.0/20',
5732 'GR': '94.64.0.0/13',
5733 'GT': '168.234.0.0/16',
5734 'GU': '168.123.0.0/16',
5735 'GW': '197.214.80.0/20',
5736 'GY': '181.41.64.0/18',
5737 'HK': '113.252.0.0/14',
5738 'HN': '181.210.0.0/16',
5739 'HR': '93.136.0.0/13',
5740 'HT': '148.102.128.0/17',
5741 'HU': '84.0.0.0/14',
5742 'ID': '39.192.0.0/10',
5743 'IE': '87.32.0.0/12',
5744 'IL': '79.176.0.0/13',
5745 'IM': '5.62.80.0/20',
5746 'IN': '117.192.0.0/10',
5747 'IO': '203.83.48.0/21',
5748 'IQ': '37.236.0.0/14',
5749 'IR': '2.176.0.0/12',
5750 'IS': '82.221.0.0/16',
5751 'IT': '79.0.0.0/10',
5752 'JE': '87.244.64.0/18',
5753 'JM': '72.27.0.0/17',
5754 'JO': '176.29.0.0/16',
53896ca5 5755 'JP': '133.0.0.0/8',
773f291d
S
5756 'KE': '105.48.0.0/12',
5757 'KG': '158.181.128.0/17',
5758 'KH': '36.37.128.0/17',
5759 'KI': '103.25.140.0/22',
5760 'KM': '197.255.224.0/20',
53896ca5 5761 'KN': '198.167.192.0/19',
773f291d
S
5762 'KP': '175.45.176.0/22',
5763 'KR': '175.192.0.0/10',
5764 'KW': '37.36.0.0/14',
5765 'KY': '64.96.0.0/15',
5766 'KZ': '2.72.0.0/13',
5767 'LA': '115.84.64.0/18',
5768 'LB': '178.135.0.0/16',
53896ca5 5769 'LC': '24.92.144.0/20',
773f291d
S
5770 'LI': '82.117.0.0/19',
5771 'LK': '112.134.0.0/15',
53896ca5 5772 'LR': '102.183.0.0/16',
773f291d
S
5773 'LS': '129.232.0.0/17',
5774 'LT': '78.56.0.0/13',
5775 'LU': '188.42.0.0/16',
5776 'LV': '46.109.0.0/16',
5777 'LY': '41.252.0.0/14',
5778 'MA': '105.128.0.0/11',
5779 'MC': '88.209.64.0/18',
5780 'MD': '37.246.0.0/16',
5781 'ME': '178.175.0.0/17',
5782 'MF': '74.112.232.0/21',
5783 'MG': '154.126.0.0/17',
5784 'MH': '117.103.88.0/21',
5785 'MK': '77.28.0.0/15',
5786 'ML': '154.118.128.0/18',
5787 'MM': '37.111.0.0/17',
5788 'MN': '49.0.128.0/17',
5789 'MO': '60.246.0.0/16',
5790 'MP': '202.88.64.0/20',
5791 'MQ': '109.203.224.0/19',
5792 'MR': '41.188.64.0/18',
5793 'MS': '208.90.112.0/22',
5794 'MT': '46.11.0.0/16',
5795 'MU': '105.16.0.0/12',
5796 'MV': '27.114.128.0/18',
53896ca5 5797 'MW': '102.70.0.0/15',
773f291d
S
5798 'MX': '187.192.0.0/11',
5799 'MY': '175.136.0.0/13',
5800 'MZ': '197.218.0.0/15',
5801 'NA': '41.182.0.0/16',
5802 'NC': '101.101.0.0/18',
5803 'NE': '197.214.0.0/18',
5804 'NF': '203.17.240.0/22',
5805 'NG': '105.112.0.0/12',
5806 'NI': '186.76.0.0/15',
5807 'NL': '145.96.0.0/11',
5808 'NO': '84.208.0.0/13',
5809 'NP': '36.252.0.0/15',
5810 'NR': '203.98.224.0/19',
5811 'NU': '49.156.48.0/22',
5812 'NZ': '49.224.0.0/14',
5813 'OM': '5.36.0.0/15',
5814 'PA': '186.72.0.0/15',
5815 'PE': '186.160.0.0/14',
5816 'PF': '123.50.64.0/18',
5817 'PG': '124.240.192.0/19',
5818 'PH': '49.144.0.0/13',
5819 'PK': '39.32.0.0/11',
5820 'PL': '83.0.0.0/11',
5821 'PM': '70.36.0.0/20',
5822 'PR': '66.50.0.0/16',
5823 'PS': '188.161.0.0/16',
5824 'PT': '85.240.0.0/13',
5825 'PW': '202.124.224.0/20',
5826 'PY': '181.120.0.0/14',
5827 'QA': '37.210.0.0/15',
53896ca5 5828 'RE': '102.35.0.0/16',
773f291d 5829 'RO': '79.112.0.0/13',
53896ca5 5830 'RS': '93.86.0.0/15',
773f291d 5831 'RU': '5.136.0.0/13',
53896ca5 5832 'RW': '41.186.0.0/16',
773f291d
S
5833 'SA': '188.48.0.0/13',
5834 'SB': '202.1.160.0/19',
5835 'SC': '154.192.0.0/11',
53896ca5 5836 'SD': '102.120.0.0/13',
773f291d 5837 'SE': '78.64.0.0/12',
53896ca5 5838 'SG': '8.128.0.0/10',
773f291d
S
5839 'SI': '188.196.0.0/14',
5840 'SK': '78.98.0.0/15',
53896ca5 5841 'SL': '102.143.0.0/17',
773f291d
S
5842 'SM': '89.186.32.0/19',
5843 'SN': '41.82.0.0/15',
53896ca5 5844 'SO': '154.115.192.0/18',
773f291d
S
5845 'SR': '186.179.128.0/17',
5846 'SS': '105.235.208.0/21',
5847 'ST': '197.159.160.0/19',
5848 'SV': '168.243.0.0/16',
5849 'SX': '190.102.0.0/20',
5850 'SY': '5.0.0.0/16',
5851 'SZ': '41.84.224.0/19',
5852 'TC': '65.255.48.0/20',
5853 'TD': '154.68.128.0/19',
5854 'TG': '196.168.0.0/14',
5855 'TH': '171.96.0.0/13',
5856 'TJ': '85.9.128.0/18',
5857 'TK': '27.96.24.0/21',
5858 'TL': '180.189.160.0/20',
5859 'TM': '95.85.96.0/19',
5860 'TN': '197.0.0.0/11',
5861 'TO': '175.176.144.0/21',
5862 'TR': '78.160.0.0/11',
5863 'TT': '186.44.0.0/15',
5864 'TV': '202.2.96.0/19',
5865 'TW': '120.96.0.0/11',
5866 'TZ': '156.156.0.0/14',
53896ca5
S
5867 'UA': '37.52.0.0/14',
5868 'UG': '102.80.0.0/13',
5869 'US': '6.0.0.0/8',
773f291d 5870 'UY': '167.56.0.0/13',
53896ca5 5871 'UZ': '84.54.64.0/18',
773f291d 5872 'VA': '212.77.0.0/19',
53896ca5 5873 'VC': '207.191.240.0/21',
773f291d 5874 'VE': '186.88.0.0/13',
53896ca5 5875 'VG': '66.81.192.0/20',
773f291d
S
5876 'VI': '146.226.0.0/16',
5877 'VN': '14.160.0.0/11',
5878 'VU': '202.80.32.0/20',
5879 'WF': '117.20.32.0/21',
5880 'WS': '202.4.32.0/19',
5881 'YE': '134.35.0.0/16',
5882 'YT': '41.242.116.0/22',
5883 'ZA': '41.0.0.0/11',
53896ca5
S
5884 'ZM': '102.144.0.0/13',
5885 'ZW': '102.177.192.0/18',
773f291d
S
5886 }
5887
5888 @classmethod
5f95927a
S
5889 def random_ipv4(cls, code_or_block):
5890 if len(code_or_block) == 2:
5891 block = cls._country_ip_map.get(code_or_block.upper())
5892 if not block:
5893 return None
5894 else:
5895 block = code_or_block
773f291d
S
5896 addr, preflen = block.split('/')
5897 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5898 addr_max = addr_min | (0xffffffff >> int(preflen))
18a0defa 5899 return compat_str(socket.inet_ntoa(
4248dad9 5900 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
773f291d
S
5901
5902
91410c9b 5903class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
5904 def __init__(self, proxies=None):
5905 # Set default handlers
5906 for type in ('http', 'https'):
5907 setattr(self, '%s_open' % type,
5908 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5909 meth(r, proxy, type))
38e87f6c 5910 compat_urllib_request.ProxyHandler.__init__(self, proxies)
2461f79d 5911
91410c9b 5912 def proxy_open(self, req, proxy, type):
2461f79d 5913 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
5914 if req_proxy is not None:
5915 proxy = req_proxy
2461f79d
PH
5916 del req.headers['Ytdl-request-proxy']
5917
5918 if proxy == '__noproxy__':
5919 return None # No Proxy
51fb4995 5920 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
71aff188 5921 req.add_header('Ytdl-socks-proxy', proxy)
7a5c1cfe 5922 # yt-dlp's http/https handlers do wrapping the socket with socks
71aff188 5923 return None
91410c9b
PH
5924 return compat_urllib_request.ProxyHandler.proxy_open(
5925 self, req, proxy, type)
5bc880b9
YCH
5926
5927
0a5445dd
YCH
5928# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5929# released into Public Domain
5930# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5931
5932def long_to_bytes(n, blocksize=0):
5933 """long_to_bytes(n:long, blocksize:int) : string
5934 Convert a long integer to a byte string.
5935
5936 If optional blocksize is given and greater than zero, pad the front of the
5937 byte string with binary zeros so that the length is a multiple of
5938 blocksize.
5939 """
5940 # after much testing, this algorithm was deemed to be the fastest
5941 s = b''
5942 n = int(n)
5943 while n > 0:
5944 s = compat_struct_pack('>I', n & 0xffffffff) + s
5945 n = n >> 32
5946 # strip off leading zeros
5947 for i in range(len(s)):
5948 if s[i] != b'\000'[0]:
5949 break
5950 else:
5951 # only happens when n == 0
5952 s = b'\000'
5953 i = 0
5954 s = s[i:]
5955 # add back some pad bytes. this could be done more efficiently w.r.t. the
5956 # de-padding being done above, but sigh...
5957 if blocksize > 0 and len(s) % blocksize:
5958 s = (blocksize - len(s) % blocksize) * b'\000' + s
5959 return s
5960
5961
5962def bytes_to_long(s):
5963 """bytes_to_long(string) : long
5964 Convert a byte string to a long integer.
5965
5966 This is (essentially) the inverse of long_to_bytes().
5967 """
5968 acc = 0
5969 length = len(s)
5970 if length % 4:
5971 extra = (4 - length % 4)
5972 s = b'\000' * extra + s
5973 length = length + extra
5974 for i in range(0, length, 4):
5975 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5976 return acc
5977
5978
5bc880b9
YCH
5979def ohdave_rsa_encrypt(data, exponent, modulus):
5980 '''
5981 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5982
5983 Input:
5984 data: data to encrypt, bytes-like object
5985 exponent, modulus: parameter e and N of RSA algorithm, both integer
5986 Output: hex string of encrypted data
5987
5988 Limitation: supports one block encryption only
5989 '''
5990
5991 payload = int(binascii.hexlify(data[::-1]), 16)
5992 encrypted = pow(payload, exponent, modulus)
5993 return '%x' % encrypted
81bdc8fd
YCH
5994
5995
f48409c7
YCH
5996def pkcs1pad(data, length):
5997 """
5998 Padding input data with PKCS#1 scheme
5999
6000 @param {int[]} data input data
6001 @param {int} length target length
6002 @returns {int[]} padded data
6003 """
6004 if len(data) > length - 11:
6005 raise ValueError('Input data too long for PKCS#1 padding')
6006
6007 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
6008 return [0, 2] + pseudo_random + [0] + data
6009
6010
5eb6bdce 6011def encode_base_n(num, n, table=None):
59f898b7 6012 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
59f898b7
YCH
6013 if not table:
6014 table = FULL_TABLE[:n]
6015
5eb6bdce
YCH
6016 if n > len(table):
6017 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
6018
6019 if num == 0:
6020 return table[0]
6021
81bdc8fd
YCH
6022 ret = ''
6023 while num:
6024 ret = table[num % n] + ret
6025 num = num // n
6026 return ret
f52354a8
YCH
6027
6028
6029def decode_packed_codes(code):
06b3fe29 6030 mobj = re.search(PACKED_CODES_RE, code)
a0566bbf 6031 obfuscated_code, base, count, symbols = mobj.groups()
f52354a8
YCH
6032 base = int(base)
6033 count = int(count)
6034 symbols = symbols.split('|')
6035 symbol_table = {}
6036
6037 while count:
6038 count -= 1
5eb6bdce 6039 base_n_count = encode_base_n(count, base)
f52354a8
YCH
6040 symbol_table[base_n_count] = symbols[count] or base_n_count
6041
6042 return re.sub(
6043 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
a0566bbf 6044 obfuscated_code)
e154c651 6045
6046
1ced2221
S
6047def caesar(s, alphabet, shift):
6048 if shift == 0:
6049 return s
6050 l = len(alphabet)
6051 return ''.join(
6052 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
6053 for c in s)
6054
6055
6056def rot47(s):
6057 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
6058
6059
e154c651 6060def parse_m3u8_attributes(attrib):
6061 info = {}
6062 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
6063 if val.startswith('"'):
6064 val = val[1:-1]
6065 info[key] = val
6066 return info
1143535d
YCH
6067
6068
6069def urshift(val, n):
6070 return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038
YCH
6071
6072
6073# Based on png2str() written by @gdkchan and improved by @yokrysty
067aa17e 6074# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
d3f8e038
YCH
6075def decode_png(png_data):
6076 # Reference: https://www.w3.org/TR/PNG/
6077 header = png_data[8:]
6078
6079 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
6080 raise IOError('Not a valid PNG file.')
6081
6082 int_map = {1: '>B', 2: '>H', 4: '>I'}
6083 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
6084
6085 chunks = []
6086
6087 while header:
6088 length = unpack_integer(header[:4])
6089 header = header[4:]
6090
6091 chunk_type = header[:4]
6092 header = header[4:]
6093
6094 chunk_data = header[:length]
6095 header = header[length:]
6096
6097 header = header[4:] # Skip CRC
6098
6099 chunks.append({
6100 'type': chunk_type,
6101 'length': length,
6102 'data': chunk_data
6103 })
6104
6105 ihdr = chunks[0]['data']
6106
6107 width = unpack_integer(ihdr[:4])
6108 height = unpack_integer(ihdr[4:8])
6109
6110 idat = b''
6111
6112 for chunk in chunks:
6113 if chunk['type'] == b'IDAT':
6114 idat += chunk['data']
6115
6116 if not idat:
6117 raise IOError('Unable to read PNG data.')
6118
6119 decompressed_data = bytearray(zlib.decompress(idat))
6120
6121 stride = width * 3
6122 pixels = []
6123
6124 def _get_pixel(idx):
6125 x = idx % stride
6126 y = idx // stride
6127 return pixels[y][x]
6128
6129 for y in range(height):
6130 basePos = y * (1 + stride)
6131 filter_type = decompressed_data[basePos]
6132
6133 current_row = []
6134
6135 pixels.append(current_row)
6136
6137 for x in range(stride):
6138 color = decompressed_data[1 + basePos + x]
6139 basex = y * stride + x
6140 left = 0
6141 up = 0
6142
6143 if x > 2:
6144 left = _get_pixel(basex - 3)
6145 if y > 0:
6146 up = _get_pixel(basex - stride)
6147
6148 if filter_type == 1: # Sub
6149 color = (color + left) & 0xff
6150 elif filter_type == 2: # Up
6151 color = (color + up) & 0xff
6152 elif filter_type == 3: # Average
6153 color = (color + ((left + up) >> 1)) & 0xff
6154 elif filter_type == 4: # Paeth
6155 a = left
6156 b = up
6157 c = 0
6158
6159 if x > 2 and y > 0:
6160 c = _get_pixel(basex - stride - 3)
6161
6162 p = a + b - c
6163
6164 pa = abs(p - a)
6165 pb = abs(p - b)
6166 pc = abs(p - c)
6167
6168 if pa <= pb and pa <= pc:
6169 color = (color + a) & 0xff
6170 elif pb <= pc:
6171 color = (color + b) & 0xff
6172 else:
6173 color = (color + c) & 0xff
6174
6175 current_row.append(color)
6176
6177 return width, height, pixels
efa97bdc
YCH
6178
6179
6180def write_xattr(path, key, value):
6181 # This mess below finds the best xattr tool for the job
6182 try:
6183 # try the pyxattr module...
6184 import xattr
6185
53a7e3d2
YCH
6186 if hasattr(xattr, 'set'): # pyxattr
6187 # Unicode arguments are not supported in python-pyxattr until
6188 # version 0.5.0
067aa17e 6189 # See https://github.com/ytdl-org/youtube-dl/issues/5498
53a7e3d2
YCH
6190 pyxattr_required_version = '0.5.0'
6191 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
6192 # TODO: fallback to CLI tools
6193 raise XAttrUnavailableError(
6194 'python-pyxattr is detected but is too old. '
7a5c1cfe 6195 'yt-dlp requires %s or above while your version is %s. '
53a7e3d2
YCH
6196 'Falling back to other xattr implementations' % (
6197 pyxattr_required_version, xattr.__version__))
6198
6199 setxattr = xattr.set
6200 else: # xattr
6201 setxattr = xattr.setxattr
efa97bdc
YCH
6202
6203 try:
53a7e3d2 6204 setxattr(path, key, value)
efa97bdc
YCH
6205 except EnvironmentError as e:
6206 raise XAttrMetadataError(e.errno, e.strerror)
6207
6208 except ImportError:
6209 if compat_os_name == 'nt':
6210 # Write xattrs to NTFS Alternate Data Streams:
6211 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6212 assert ':' not in key
6213 assert os.path.exists(path)
6214
6215 ads_fn = path + ':' + key
6216 try:
6217 with open(ads_fn, 'wb') as f:
6218 f.write(value)
6219 except EnvironmentError as e:
6220 raise XAttrMetadataError(e.errno, e.strerror)
6221 else:
6222 user_has_setfattr = check_executable('setfattr', ['--version'])
6223 user_has_xattr = check_executable('xattr', ['-h'])
6224
6225 if user_has_setfattr or user_has_xattr:
6226
6227 value = value.decode('utf-8')
6228 if user_has_setfattr:
6229 executable = 'setfattr'
6230 opts = ['-n', key, '-v', value]
6231 elif user_has_xattr:
6232 executable = 'xattr'
6233 opts = ['-w', key, value]
6234
3089bc74
S
6235 cmd = ([encodeFilename(executable, True)]
6236 + [encodeArgument(o) for o in opts]
6237 + [encodeFilename(path, True)])
efa97bdc
YCH
6238
6239 try:
d3c93ec2 6240 p = Popen(
efa97bdc
YCH
6241 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6242 except EnvironmentError as e:
6243 raise XAttrMetadataError(e.errno, e.strerror)
d3c93ec2 6244 stdout, stderr = p.communicate_or_kill()
efa97bdc
YCH
6245 stderr = stderr.decode('utf-8', 'replace')
6246 if p.returncode != 0:
6247 raise XAttrMetadataError(p.returncode, stderr)
6248
6249 else:
6250 # On Unix, and can't find pyxattr, setfattr, or xattr.
6251 if sys.platform.startswith('linux'):
6252 raise XAttrUnavailableError(
6253 "Couldn't find a tool to set the xattrs. "
6254 "Install either the python 'pyxattr' or 'xattr' "
6255 "modules, or the GNU 'attr' package "
6256 "(which contains the 'setfattr' tool).")
6257 else:
6258 raise XAttrUnavailableError(
6259 "Couldn't find a tool to set the xattrs. "
6260 "Install either the python 'xattr' module, "
6261 "or the 'xattr' binary.")
0c265486
YCH
6262
6263
6264def random_birthday(year_field, month_field, day_field):
aa374bc7
AS
6265 start_date = datetime.date(1950, 1, 1)
6266 end_date = datetime.date(1995, 12, 31)
6267 offset = random.randint(0, (end_date - start_date).days)
6268 random_date = start_date + datetime.timedelta(offset)
0c265486 6269 return {
aa374bc7
AS
6270 year_field: str(random_date.year),
6271 month_field: str(random_date.month),
6272 day_field: str(random_date.day),
0c265486 6273 }
732044af 6274
c76eb41b 6275
732044af 6276# Templates for internet shortcut files, which are plain text files.
6277DOT_URL_LINK_TEMPLATE = '''
6278[InternetShortcut]
6279URL=%(url)s
6280'''.lstrip()
6281
6282DOT_WEBLOC_LINK_TEMPLATE = '''
6283<?xml version="1.0" encoding="UTF-8"?>
6284<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6285<plist version="1.0">
6286<dict>
6287\t<key>URL</key>
6288\t<string>%(url)s</string>
6289</dict>
6290</plist>
6291'''.lstrip()
6292
6293DOT_DESKTOP_LINK_TEMPLATE = '''
6294[Desktop Entry]
6295Encoding=UTF-8
6296Name=%(filename)s
6297Type=Link
6298URL=%(url)s
6299Icon=text-html
6300'''.lstrip()
6301
08438d2c 6302LINK_TEMPLATES = {
6303 'url': DOT_URL_LINK_TEMPLATE,
6304 'desktop': DOT_DESKTOP_LINK_TEMPLATE,
6305 'webloc': DOT_WEBLOC_LINK_TEMPLATE,
6306}
6307
732044af 6308
6309def iri_to_uri(iri):
6310 """
6311 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6312
6313 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6314 """
6315
6316 iri_parts = compat_urllib_parse_urlparse(iri)
6317
6318 if '[' in iri_parts.netloc:
6319 raise ValueError('IPv6 URIs are not, yet, supported.')
6320 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6321
6322 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6323
6324 net_location = ''
6325 if iri_parts.username:
6326 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6327 if iri_parts.password is not None:
6328 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6329 net_location += '@'
6330
6331 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
6332 # The 'idna' encoding produces ASCII text.
6333 if iri_parts.port is not None and iri_parts.port != 80:
6334 net_location += ':' + str(iri_parts.port)
6335
6336 return compat_urllib_parse_urlunparse(
6337 (iri_parts.scheme,
6338 net_location,
6339
6340 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6341
6342 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6343 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6344
6345 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6346 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6347
6348 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6349
6350 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6351
6352
6353def to_high_limit_path(path):
6354 if sys.platform in ['win32', 'cygwin']:
6355 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6356 return r'\\?\ '.rstrip() + os.path.abspath(path)
6357
6358 return path
76d321f6 6359
c76eb41b 6360
b868936c 6361def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
6362 if field is None:
6363 val = obj if obj is not None else default
6364 else:
6365 val = obj.get(field, default)
76d321f6 6366 if func and val not in ignore:
6367 val = func(val)
6368 return template % val if val not in ignore else default
00dd0cd5 6369
6370
6371def clean_podcast_url(url):
6372 return re.sub(r'''(?x)
6373 (?:
6374 (?:
6375 chtbl\.com/track|
6376 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6377 play\.podtrac\.com
6378 )/[^/]+|
6379 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6380 flex\.acast\.com|
6381 pd(?:
6382 cn\.co| # https://podcorn.com/analytics-prefix/
6383 st\.fm # https://podsights.com/docs/
6384 )/e
6385 )/''', '', url)
ffcb8191
THD
6386
6387
6388_HEX_TABLE = '0123456789abcdef'
6389
6390
6391def random_uuidv4():
6392 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
0202b52a 6393
6394
6395def make_dir(path, to_screen=None):
6396 try:
6397 dn = os.path.dirname(path)
6398 if dn and not os.path.exists(dn):
6399 os.makedirs(dn)
6400 return True
6401 except (OSError, IOError) as err:
6402 if callable(to_screen) is not None:
6403 to_screen('unable to create directory ' + error_to_compat_str(err))
6404 return False
f74980cb 6405
6406
6407def get_executable_path():
c552ae88 6408 from zipimport import zipimporter
6409 if hasattr(sys, 'frozen'): # Running from PyInstaller
6410 path = os.path.dirname(sys.executable)
6411 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
6412 path = os.path.join(os.path.dirname(__file__), '../..')
6413 else:
6414 path = os.path.join(os.path.dirname(__file__), '..')
f74980cb 6415 return os.path.abspath(path)
6416
6417
2f567473 6418def load_plugins(name, suffix, namespace):
3ae5e797 6419 classes = {}
f74980cb 6420 try:
019a94f7
ÁS
6421 plugins_spec = importlib.util.spec_from_file_location(
6422 name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
6423 plugins = importlib.util.module_from_spec(plugins_spec)
6424 sys.modules[plugins_spec.name] = plugins
6425 plugins_spec.loader.exec_module(plugins)
f74980cb 6426 for name in dir(plugins):
2f567473 6427 if name in namespace:
6428 continue
6429 if not name.endswith(suffix):
f74980cb 6430 continue
6431 klass = getattr(plugins, name)
3ae5e797 6432 classes[name] = namespace[name] = klass
019a94f7 6433 except FileNotFoundError:
f74980cb 6434 pass
f74980cb 6435 return classes
06167fbb 6436
6437
325ebc17 6438def traverse_obj(
352d63fd 6439 obj, *path_list, default=None, expected_type=None, get_all=True,
325ebc17 6440 casesense=True, is_user_input=False, traverse_string=False):
324ad820 6441 ''' Traverse nested list/dict/tuple
8f334380 6442 @param path_list A list of paths which are checked one by one.
6443 Each path is a list of keys where each key is a string,
2614f646 6444 a function, a tuple of strings or "...".
6445 When a fuction is given, it takes the key as argument and
6446 returns whether the key matches or not. When a tuple is given,
8f334380 6447 all the keys given in the tuple are traversed, and
6448 "..." traverses all the keys in the object
325ebc17 6449 @param default Default value to return
352d63fd 6450 @param expected_type Only accept final value of this type (Can also be any callable)
6451 @param get_all Return all the values obtained from a path or only the first one
324ad820 6452 @param casesense Whether to consider dictionary keys as case sensitive
6453 @param is_user_input Whether the keys are generated from user input. If True,
6454 strings are converted to int/slice if necessary
6455 @param traverse_string Whether to traverse inside strings. If True, any
6456 non-compatible object will also be converted into a string
8f334380 6457 # TODO: Write tests
324ad820 6458 '''
325ebc17 6459 if not casesense:
dbf5416a 6460 _lower = lambda k: (k.lower() if isinstance(k, str) else k)
8f334380 6461 path_list = (map(_lower, variadic(path)) for path in path_list)
6462
6463 def _traverse_obj(obj, path, _current_depth=0):
6464 nonlocal depth
6465 path = tuple(variadic(path))
6466 for i, key in enumerate(path):
582fad70 6467 if obj is None:
6468 return None
8f334380 6469 if isinstance(key, (list, tuple)):
6470 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
6471 key = ...
6472 if key is ...:
6473 obj = (obj.values() if isinstance(obj, dict)
6474 else obj if isinstance(obj, (list, tuple, LazyList))
6475 else str(obj) if traverse_string else [])
6476 _current_depth += 1
6477 depth = max(depth, _current_depth)
6478 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
2614f646 6479 elif callable(key):
6480 if isinstance(obj, (list, tuple, LazyList)):
6481 obj = enumerate(obj)
6482 elif isinstance(obj, dict):
6483 obj = obj.items()
6484 else:
6485 if not traverse_string:
6486 return None
6487 obj = str(obj)
6488 _current_depth += 1
6489 depth = max(depth, _current_depth)
6490 return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if key(k)]
575e17a1 6491 elif isinstance(obj, dict) and not (is_user_input and key == ':'):
325ebc17 6492 obj = (obj.get(key) if casesense or (key in obj)
6493 else next((v for k, v in obj.items() if _lower(k) == key), None))
6494 else:
6495 if is_user_input:
6496 key = (int_or_none(key) if ':' not in key
6497 else slice(*map(int_or_none, key.split(':'))))
8f334380 6498 if key == slice(None):
575e17a1 6499 return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
325ebc17 6500 if not isinstance(key, (int, slice)):
9fea350f 6501 return None
8f334380 6502 if not isinstance(obj, (list, tuple, LazyList)):
325ebc17 6503 if not traverse_string:
6504 return None
6505 obj = str(obj)
6506 try:
6507 obj = obj[key]
6508 except IndexError:
324ad820 6509 return None
325ebc17 6510 return obj
6511
352d63fd 6512 if isinstance(expected_type, type):
6513 type_test = lambda val: val if isinstance(val, expected_type) else None
6514 elif expected_type is not None:
6515 type_test = expected_type
6516 else:
6517 type_test = lambda val: val
6518
8f334380 6519 for path in path_list:
6520 depth = 0
6521 val = _traverse_obj(obj, path)
325ebc17 6522 if val is not None:
8f334380 6523 if depth:
6524 for _ in range(depth - 1):
6586bca9 6525 val = itertools.chain.from_iterable(v for v in val if v is not None)
352d63fd 6526 val = [v for v in map(type_test, val) if v is not None]
8f334380 6527 if val:
352d63fd 6528 return val if get_all else val[0]
6529 else:
6530 val = type_test(val)
6531 if val is not None:
8f334380 6532 return val
325ebc17 6533 return default
324ad820 6534
6535
6536def traverse_dict(dictn, keys, casesense=True):
6537 ''' For backward compatibility. Do not use '''
6538 return traverse_obj(dictn, keys, casesense=casesense,
6539 is_user_input=True, traverse_string=True)
6606817a 6540
6541
c634ad2a 6542def variadic(x, allowed_types=(str, bytes)):
cb89cfc1 6543 return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
bd50a52b
THD
6544
6545
49fa4d9a
N
6546# create a JSON Web Signature (jws) with HS256 algorithm
6547# the resulting format is in JWS Compact Serialization
6548# implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
6549# implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
6550def jwt_encode_hs256(payload_data, key, headers={}):
6551 header_data = {
6552 'alg': 'HS256',
6553 'typ': 'JWT',
6554 }
6555 if headers:
6556 header_data.update(headers)
6557 header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8'))
6558 payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8'))
6559 h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256)
6560 signature_b64 = base64.b64encode(h.digest())
6561 token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
6562 return token
819e0531 6563
6564
16b0d7e6 6565# can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
6566def jwt_decode_hs256(jwt):
6567 header_b64, payload_b64, signature_b64 = jwt.split('.')
6568 payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
6569 return payload_data
6570
6571
819e0531 6572def supports_terminal_sequences(stream):
6573 if compat_os_name == 'nt':
d1d5c08f 6574 if get_windows_version() < (10, 0, 10586):
819e0531 6575 return False
6576 elif not os.getenv('TERM'):
6577 return False
6578 try:
6579 return stream.isatty()
6580 except BaseException:
6581 return False
6582
6583
ec11a9f4 6584_terminal_sequences_re = re.compile('\033\\[[^m]+m')
6585
6586
6587def remove_terminal_sequences(string):
6588 return _terminal_sequences_re.sub('', string)
6589
6590
6591def number_of_digits(number):
6592 return len('%d' % number)
34921b43 6593
6594
6595def join_nonempty(*values, delim='-', from_dict=None):
6596 if from_dict is not None:
c586f9e8 6597 values = map(from_dict.get, values)
34921b43 6598 return delim.join(map(str, filter(None, values)))