]> jfr.im git - yt-dlp.git/blame - yt_dlp/utils.py
[utils] Add `join_nonempty`
[yt-dlp.git] / yt_dlp / utils.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
d77c3dfd 3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
c380cc28 10import collections
62e609ab 11import contextlib
e3946f98 12import ctypes
c496ca96
PH
13import datetime
14import email.utils
0c265486 15import email.header
f45c185f 16import errno
be4a824d 17import functools
d77c3dfd 18import gzip
49fa4d9a
N
19import hashlib
20import hmac
019a94f7 21import importlib.util
03f9daab 22import io
79a2e94e 23import itertools
f4bfd65f 24import json
d77c3dfd 25import locale
02dbf93f 26import math
347de493 27import operator
d77c3dfd 28import os
c496ca96 29import platform
773f291d 30import random
d77c3dfd 31import re
c496ca96 32import socket
79a2e94e 33import ssl
1c088fa8 34import subprocess
d77c3dfd 35import sys
181c8655 36import tempfile
c380cc28 37import time
01951dda 38import traceback
bcf89ce6 39import xml.etree.ElementTree
d77c3dfd 40import zlib
d77c3dfd 41
8c25f81b 42from .compat import (
b4a3d461 43 compat_HTMLParseError,
8bb56eee 44 compat_HTMLParser,
201c1459 45 compat_HTTPError,
8f9312c3 46 compat_basestring,
8c25f81b 47 compat_chr,
1bab3437 48 compat_cookiejar,
d7cd9a9e 49 compat_ctypes_WINFUNCTYPE,
36e6f62c 50 compat_etree_fromstring,
51098426 51 compat_expanduser,
8c25f81b 52 compat_html_entities,
55b2f099 53 compat_html_entities_html5,
be4a824d 54 compat_http_client,
42db58ec 55 compat_integer_types,
e29663c6 56 compat_numeric_types,
c86b6142 57 compat_kwargs,
efa97bdc 58 compat_os_name,
8c25f81b 59 compat_parse_qs,
702ccf2d 60 compat_shlex_quote,
8c25f81b 61 compat_str,
edaa23f8 62 compat_struct_pack,
d3f8e038 63 compat_struct_unpack,
8c25f81b
PH
64 compat_urllib_error,
65 compat_urllib_parse,
15707c7e 66 compat_urllib_parse_urlencode,
8c25f81b 67 compat_urllib_parse_urlparse,
732044af 68 compat_urllib_parse_urlunparse,
69 compat_urllib_parse_quote,
70 compat_urllib_parse_quote_plus,
7581bfc9 71 compat_urllib_parse_unquote_plus,
8c25f81b
PH
72 compat_urllib_request,
73 compat_urlparse,
810c10ba 74 compat_xpath,
8c25f81b 75)
4644ac55 76
71aff188
YCH
77from .socks import (
78 ProxyType,
79 sockssocket,
80)
81
4644ac55 82
51fb4995
YCH
83def register_socks_protocols():
84 # "Register" SOCKS protocols
d5ae6bb5
YCH
85 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
86 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
51fb4995
YCH
87 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
88 if scheme not in compat_urlparse.uses_netloc:
89 compat_urlparse.uses_netloc.append(scheme)
90
91
468e2e92
FV
92# This is not clearly defined otherwise
93compiled_regex_type = type(re.compile(''))
94
f7a147e3
S
95
96def random_user_agent():
97 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
98 _CHROME_VERSIONS = (
99 '74.0.3729.129',
100 '76.0.3780.3',
101 '76.0.3780.2',
102 '74.0.3729.128',
103 '76.0.3780.1',
104 '76.0.3780.0',
105 '75.0.3770.15',
106 '74.0.3729.127',
107 '74.0.3729.126',
108 '76.0.3779.1',
109 '76.0.3779.0',
110 '75.0.3770.14',
111 '74.0.3729.125',
112 '76.0.3778.1',
113 '76.0.3778.0',
114 '75.0.3770.13',
115 '74.0.3729.124',
116 '74.0.3729.123',
117 '73.0.3683.121',
118 '76.0.3777.1',
119 '76.0.3777.0',
120 '75.0.3770.12',
121 '74.0.3729.122',
122 '76.0.3776.4',
123 '75.0.3770.11',
124 '74.0.3729.121',
125 '76.0.3776.3',
126 '76.0.3776.2',
127 '73.0.3683.120',
128 '74.0.3729.120',
129 '74.0.3729.119',
130 '74.0.3729.118',
131 '76.0.3776.1',
132 '76.0.3776.0',
133 '76.0.3775.5',
134 '75.0.3770.10',
135 '74.0.3729.117',
136 '76.0.3775.4',
137 '76.0.3775.3',
138 '74.0.3729.116',
139 '75.0.3770.9',
140 '76.0.3775.2',
141 '76.0.3775.1',
142 '76.0.3775.0',
143 '75.0.3770.8',
144 '74.0.3729.115',
145 '74.0.3729.114',
146 '76.0.3774.1',
147 '76.0.3774.0',
148 '75.0.3770.7',
149 '74.0.3729.113',
150 '74.0.3729.112',
151 '74.0.3729.111',
152 '76.0.3773.1',
153 '76.0.3773.0',
154 '75.0.3770.6',
155 '74.0.3729.110',
156 '74.0.3729.109',
157 '76.0.3772.1',
158 '76.0.3772.0',
159 '75.0.3770.5',
160 '74.0.3729.108',
161 '74.0.3729.107',
162 '76.0.3771.1',
163 '76.0.3771.0',
164 '75.0.3770.4',
165 '74.0.3729.106',
166 '74.0.3729.105',
167 '75.0.3770.3',
168 '74.0.3729.104',
169 '74.0.3729.103',
170 '74.0.3729.102',
171 '75.0.3770.2',
172 '74.0.3729.101',
173 '75.0.3770.1',
174 '75.0.3770.0',
175 '74.0.3729.100',
176 '75.0.3769.5',
177 '75.0.3769.4',
178 '74.0.3729.99',
179 '75.0.3769.3',
180 '75.0.3769.2',
181 '75.0.3768.6',
182 '74.0.3729.98',
183 '75.0.3769.1',
184 '75.0.3769.0',
185 '74.0.3729.97',
186 '73.0.3683.119',
187 '73.0.3683.118',
188 '74.0.3729.96',
189 '75.0.3768.5',
190 '75.0.3768.4',
191 '75.0.3768.3',
192 '75.0.3768.2',
193 '74.0.3729.95',
194 '74.0.3729.94',
195 '75.0.3768.1',
196 '75.0.3768.0',
197 '74.0.3729.93',
198 '74.0.3729.92',
199 '73.0.3683.117',
200 '74.0.3729.91',
201 '75.0.3766.3',
202 '74.0.3729.90',
203 '75.0.3767.2',
204 '75.0.3767.1',
205 '75.0.3767.0',
206 '74.0.3729.89',
207 '73.0.3683.116',
208 '75.0.3766.2',
209 '74.0.3729.88',
210 '75.0.3766.1',
211 '75.0.3766.0',
212 '74.0.3729.87',
213 '73.0.3683.115',
214 '74.0.3729.86',
215 '75.0.3765.1',
216 '75.0.3765.0',
217 '74.0.3729.85',
218 '73.0.3683.114',
219 '74.0.3729.84',
220 '75.0.3764.1',
221 '75.0.3764.0',
222 '74.0.3729.83',
223 '73.0.3683.113',
224 '75.0.3763.2',
225 '75.0.3761.4',
226 '74.0.3729.82',
227 '75.0.3763.1',
228 '75.0.3763.0',
229 '74.0.3729.81',
230 '73.0.3683.112',
231 '75.0.3762.1',
232 '75.0.3762.0',
233 '74.0.3729.80',
234 '75.0.3761.3',
235 '74.0.3729.79',
236 '73.0.3683.111',
237 '75.0.3761.2',
238 '74.0.3729.78',
239 '74.0.3729.77',
240 '75.0.3761.1',
241 '75.0.3761.0',
242 '73.0.3683.110',
243 '74.0.3729.76',
244 '74.0.3729.75',
245 '75.0.3760.0',
246 '74.0.3729.74',
247 '75.0.3759.8',
248 '75.0.3759.7',
249 '75.0.3759.6',
250 '74.0.3729.73',
251 '75.0.3759.5',
252 '74.0.3729.72',
253 '73.0.3683.109',
254 '75.0.3759.4',
255 '75.0.3759.3',
256 '74.0.3729.71',
257 '75.0.3759.2',
258 '74.0.3729.70',
259 '73.0.3683.108',
260 '74.0.3729.69',
261 '75.0.3759.1',
262 '75.0.3759.0',
263 '74.0.3729.68',
264 '73.0.3683.107',
265 '74.0.3729.67',
266 '75.0.3758.1',
267 '75.0.3758.0',
268 '74.0.3729.66',
269 '73.0.3683.106',
270 '74.0.3729.65',
271 '75.0.3757.1',
272 '75.0.3757.0',
273 '74.0.3729.64',
274 '73.0.3683.105',
275 '74.0.3729.63',
276 '75.0.3756.1',
277 '75.0.3756.0',
278 '74.0.3729.62',
279 '73.0.3683.104',
280 '75.0.3755.3',
281 '75.0.3755.2',
282 '73.0.3683.103',
283 '75.0.3755.1',
284 '75.0.3755.0',
285 '74.0.3729.61',
286 '73.0.3683.102',
287 '74.0.3729.60',
288 '75.0.3754.2',
289 '74.0.3729.59',
290 '75.0.3753.4',
291 '74.0.3729.58',
292 '75.0.3754.1',
293 '75.0.3754.0',
294 '74.0.3729.57',
295 '73.0.3683.101',
296 '75.0.3753.3',
297 '75.0.3752.2',
298 '75.0.3753.2',
299 '74.0.3729.56',
300 '75.0.3753.1',
301 '75.0.3753.0',
302 '74.0.3729.55',
303 '73.0.3683.100',
304 '74.0.3729.54',
305 '75.0.3752.1',
306 '75.0.3752.0',
307 '74.0.3729.53',
308 '73.0.3683.99',
309 '74.0.3729.52',
310 '75.0.3751.1',
311 '75.0.3751.0',
312 '74.0.3729.51',
313 '73.0.3683.98',
314 '74.0.3729.50',
315 '75.0.3750.0',
316 '74.0.3729.49',
317 '74.0.3729.48',
318 '74.0.3729.47',
319 '75.0.3749.3',
320 '74.0.3729.46',
321 '73.0.3683.97',
322 '75.0.3749.2',
323 '74.0.3729.45',
324 '75.0.3749.1',
325 '75.0.3749.0',
326 '74.0.3729.44',
327 '73.0.3683.96',
328 '74.0.3729.43',
329 '74.0.3729.42',
330 '75.0.3748.1',
331 '75.0.3748.0',
332 '74.0.3729.41',
333 '75.0.3747.1',
334 '73.0.3683.95',
335 '75.0.3746.4',
336 '74.0.3729.40',
337 '74.0.3729.39',
338 '75.0.3747.0',
339 '75.0.3746.3',
340 '75.0.3746.2',
341 '74.0.3729.38',
342 '75.0.3746.1',
343 '75.0.3746.0',
344 '74.0.3729.37',
345 '73.0.3683.94',
346 '75.0.3745.5',
347 '75.0.3745.4',
348 '75.0.3745.3',
349 '75.0.3745.2',
350 '74.0.3729.36',
351 '75.0.3745.1',
352 '75.0.3745.0',
353 '75.0.3744.2',
354 '74.0.3729.35',
355 '73.0.3683.93',
356 '74.0.3729.34',
357 '75.0.3744.1',
358 '75.0.3744.0',
359 '74.0.3729.33',
360 '73.0.3683.92',
361 '74.0.3729.32',
362 '74.0.3729.31',
363 '73.0.3683.91',
364 '75.0.3741.2',
365 '75.0.3740.5',
366 '74.0.3729.30',
367 '75.0.3741.1',
368 '75.0.3741.0',
369 '74.0.3729.29',
370 '75.0.3740.4',
371 '73.0.3683.90',
372 '74.0.3729.28',
373 '75.0.3740.3',
374 '73.0.3683.89',
375 '75.0.3740.2',
376 '74.0.3729.27',
377 '75.0.3740.1',
378 '75.0.3740.0',
379 '74.0.3729.26',
380 '73.0.3683.88',
381 '73.0.3683.87',
382 '74.0.3729.25',
383 '75.0.3739.1',
384 '75.0.3739.0',
385 '73.0.3683.86',
386 '74.0.3729.24',
387 '73.0.3683.85',
388 '75.0.3738.4',
389 '75.0.3738.3',
390 '75.0.3738.2',
391 '75.0.3738.1',
392 '75.0.3738.0',
393 '74.0.3729.23',
394 '73.0.3683.84',
395 '74.0.3729.22',
396 '74.0.3729.21',
397 '75.0.3737.1',
398 '75.0.3737.0',
399 '74.0.3729.20',
400 '73.0.3683.83',
401 '74.0.3729.19',
402 '75.0.3736.1',
403 '75.0.3736.0',
404 '74.0.3729.18',
405 '73.0.3683.82',
406 '74.0.3729.17',
407 '75.0.3735.1',
408 '75.0.3735.0',
409 '74.0.3729.16',
410 '73.0.3683.81',
411 '75.0.3734.1',
412 '75.0.3734.0',
413 '74.0.3729.15',
414 '73.0.3683.80',
415 '74.0.3729.14',
416 '75.0.3733.1',
417 '75.0.3733.0',
418 '75.0.3732.1',
419 '74.0.3729.13',
420 '74.0.3729.12',
421 '73.0.3683.79',
422 '74.0.3729.11',
423 '75.0.3732.0',
424 '74.0.3729.10',
425 '73.0.3683.78',
426 '74.0.3729.9',
427 '74.0.3729.8',
428 '74.0.3729.7',
429 '75.0.3731.3',
430 '75.0.3731.2',
431 '75.0.3731.0',
432 '74.0.3729.6',
433 '73.0.3683.77',
434 '73.0.3683.76',
435 '75.0.3730.5',
436 '75.0.3730.4',
437 '73.0.3683.75',
438 '74.0.3729.5',
439 '73.0.3683.74',
440 '75.0.3730.3',
441 '75.0.3730.2',
442 '74.0.3729.4',
443 '73.0.3683.73',
444 '73.0.3683.72',
445 '75.0.3730.1',
446 '75.0.3730.0',
447 '74.0.3729.3',
448 '73.0.3683.71',
449 '74.0.3729.2',
450 '73.0.3683.70',
451 '74.0.3729.1',
452 '74.0.3729.0',
453 '74.0.3726.4',
454 '73.0.3683.69',
455 '74.0.3726.3',
456 '74.0.3728.0',
457 '74.0.3726.2',
458 '73.0.3683.68',
459 '74.0.3726.1',
460 '74.0.3726.0',
461 '74.0.3725.4',
462 '73.0.3683.67',
463 '73.0.3683.66',
464 '74.0.3725.3',
465 '74.0.3725.2',
466 '74.0.3725.1',
467 '74.0.3724.8',
468 '74.0.3725.0',
469 '73.0.3683.65',
470 '74.0.3724.7',
471 '74.0.3724.6',
472 '74.0.3724.5',
473 '74.0.3724.4',
474 '74.0.3724.3',
475 '74.0.3724.2',
476 '74.0.3724.1',
477 '74.0.3724.0',
478 '73.0.3683.64',
479 '74.0.3723.1',
480 '74.0.3723.0',
481 '73.0.3683.63',
482 '74.0.3722.1',
483 '74.0.3722.0',
484 '73.0.3683.62',
485 '74.0.3718.9',
486 '74.0.3702.3',
487 '74.0.3721.3',
488 '74.0.3721.2',
489 '74.0.3721.1',
490 '74.0.3721.0',
491 '74.0.3720.6',
492 '73.0.3683.61',
493 '72.0.3626.122',
494 '73.0.3683.60',
495 '74.0.3720.5',
496 '72.0.3626.121',
497 '74.0.3718.8',
498 '74.0.3720.4',
499 '74.0.3720.3',
500 '74.0.3718.7',
501 '74.0.3720.2',
502 '74.0.3720.1',
503 '74.0.3720.0',
504 '74.0.3718.6',
505 '74.0.3719.5',
506 '73.0.3683.59',
507 '74.0.3718.5',
508 '74.0.3718.4',
509 '74.0.3719.4',
510 '74.0.3719.3',
511 '74.0.3719.2',
512 '74.0.3719.1',
513 '73.0.3683.58',
514 '74.0.3719.0',
515 '73.0.3683.57',
516 '73.0.3683.56',
517 '74.0.3718.3',
518 '73.0.3683.55',
519 '74.0.3718.2',
520 '74.0.3718.1',
521 '74.0.3718.0',
522 '73.0.3683.54',
523 '74.0.3717.2',
524 '73.0.3683.53',
525 '74.0.3717.1',
526 '74.0.3717.0',
527 '73.0.3683.52',
528 '74.0.3716.1',
529 '74.0.3716.0',
530 '73.0.3683.51',
531 '74.0.3715.1',
532 '74.0.3715.0',
533 '73.0.3683.50',
534 '74.0.3711.2',
535 '74.0.3714.2',
536 '74.0.3713.3',
537 '74.0.3714.1',
538 '74.0.3714.0',
539 '73.0.3683.49',
540 '74.0.3713.1',
541 '74.0.3713.0',
542 '72.0.3626.120',
543 '73.0.3683.48',
544 '74.0.3712.2',
545 '74.0.3712.1',
546 '74.0.3712.0',
547 '73.0.3683.47',
548 '72.0.3626.119',
549 '73.0.3683.46',
550 '74.0.3710.2',
551 '72.0.3626.118',
552 '74.0.3711.1',
553 '74.0.3711.0',
554 '73.0.3683.45',
555 '72.0.3626.117',
556 '74.0.3710.1',
557 '74.0.3710.0',
558 '73.0.3683.44',
559 '72.0.3626.116',
560 '74.0.3709.1',
561 '74.0.3709.0',
562 '74.0.3704.9',
563 '73.0.3683.43',
564 '72.0.3626.115',
565 '74.0.3704.8',
566 '74.0.3704.7',
567 '74.0.3708.0',
568 '74.0.3706.7',
569 '74.0.3704.6',
570 '73.0.3683.42',
571 '72.0.3626.114',
572 '74.0.3706.6',
573 '72.0.3626.113',
574 '74.0.3704.5',
575 '74.0.3706.5',
576 '74.0.3706.4',
577 '74.0.3706.3',
578 '74.0.3706.2',
579 '74.0.3706.1',
580 '74.0.3706.0',
581 '73.0.3683.41',
582 '72.0.3626.112',
583 '74.0.3705.1',
584 '74.0.3705.0',
585 '73.0.3683.40',
586 '72.0.3626.111',
587 '73.0.3683.39',
588 '74.0.3704.4',
589 '73.0.3683.38',
590 '74.0.3704.3',
591 '74.0.3704.2',
592 '74.0.3704.1',
593 '74.0.3704.0',
594 '73.0.3683.37',
595 '72.0.3626.110',
596 '72.0.3626.109',
597 '74.0.3703.3',
598 '74.0.3703.2',
599 '73.0.3683.36',
600 '74.0.3703.1',
601 '74.0.3703.0',
602 '73.0.3683.35',
603 '72.0.3626.108',
604 '74.0.3702.2',
605 '74.0.3699.3',
606 '74.0.3702.1',
607 '74.0.3702.0',
608 '73.0.3683.34',
609 '72.0.3626.107',
610 '73.0.3683.33',
611 '74.0.3701.1',
612 '74.0.3701.0',
613 '73.0.3683.32',
614 '73.0.3683.31',
615 '72.0.3626.105',
616 '74.0.3700.1',
617 '74.0.3700.0',
618 '73.0.3683.29',
619 '72.0.3626.103',
620 '74.0.3699.2',
621 '74.0.3699.1',
622 '74.0.3699.0',
623 '73.0.3683.28',
624 '72.0.3626.102',
625 '73.0.3683.27',
626 '73.0.3683.26',
627 '74.0.3698.0',
628 '74.0.3696.2',
629 '72.0.3626.101',
630 '73.0.3683.25',
631 '74.0.3696.1',
632 '74.0.3696.0',
633 '74.0.3694.8',
634 '72.0.3626.100',
635 '74.0.3694.7',
636 '74.0.3694.6',
637 '74.0.3694.5',
638 '74.0.3694.4',
639 '72.0.3626.99',
640 '72.0.3626.98',
641 '74.0.3694.3',
642 '73.0.3683.24',
643 '72.0.3626.97',
644 '72.0.3626.96',
645 '72.0.3626.95',
646 '73.0.3683.23',
647 '72.0.3626.94',
648 '73.0.3683.22',
649 '73.0.3683.21',
650 '72.0.3626.93',
651 '74.0.3694.2',
652 '72.0.3626.92',
653 '74.0.3694.1',
654 '74.0.3694.0',
655 '74.0.3693.6',
656 '73.0.3683.20',
657 '72.0.3626.91',
658 '74.0.3693.5',
659 '74.0.3693.4',
660 '74.0.3693.3',
661 '74.0.3693.2',
662 '73.0.3683.19',
663 '74.0.3693.1',
664 '74.0.3693.0',
665 '73.0.3683.18',
666 '72.0.3626.90',
667 '74.0.3692.1',
668 '74.0.3692.0',
669 '73.0.3683.17',
670 '72.0.3626.89',
671 '74.0.3687.3',
672 '74.0.3691.1',
673 '74.0.3691.0',
674 '73.0.3683.16',
675 '72.0.3626.88',
676 '72.0.3626.87',
677 '73.0.3683.15',
678 '74.0.3690.1',
679 '74.0.3690.0',
680 '73.0.3683.14',
681 '72.0.3626.86',
682 '73.0.3683.13',
683 '73.0.3683.12',
684 '74.0.3689.1',
685 '74.0.3689.0',
686 '73.0.3683.11',
687 '72.0.3626.85',
688 '73.0.3683.10',
689 '72.0.3626.84',
690 '73.0.3683.9',
691 '74.0.3688.1',
692 '74.0.3688.0',
693 '73.0.3683.8',
694 '72.0.3626.83',
695 '74.0.3687.2',
696 '74.0.3687.1',
697 '74.0.3687.0',
698 '73.0.3683.7',
699 '72.0.3626.82',
700 '74.0.3686.4',
701 '72.0.3626.81',
702 '74.0.3686.3',
703 '74.0.3686.2',
704 '74.0.3686.1',
705 '74.0.3686.0',
706 '73.0.3683.6',
707 '72.0.3626.80',
708 '74.0.3685.1',
709 '74.0.3685.0',
710 '73.0.3683.5',
711 '72.0.3626.79',
712 '74.0.3684.1',
713 '74.0.3684.0',
714 '73.0.3683.4',
715 '72.0.3626.78',
716 '72.0.3626.77',
717 '73.0.3683.3',
718 '73.0.3683.2',
719 '72.0.3626.76',
720 '73.0.3683.1',
721 '73.0.3683.0',
722 '72.0.3626.75',
723 '71.0.3578.141',
724 '73.0.3682.1',
725 '73.0.3682.0',
726 '72.0.3626.74',
727 '71.0.3578.140',
728 '73.0.3681.4',
729 '73.0.3681.3',
730 '73.0.3681.2',
731 '73.0.3681.1',
732 '73.0.3681.0',
733 '72.0.3626.73',
734 '71.0.3578.139',
735 '72.0.3626.72',
736 '72.0.3626.71',
737 '73.0.3680.1',
738 '73.0.3680.0',
739 '72.0.3626.70',
740 '71.0.3578.138',
741 '73.0.3678.2',
742 '73.0.3679.1',
743 '73.0.3679.0',
744 '72.0.3626.69',
745 '71.0.3578.137',
746 '73.0.3678.1',
747 '73.0.3678.0',
748 '71.0.3578.136',
749 '73.0.3677.1',
750 '73.0.3677.0',
751 '72.0.3626.68',
752 '72.0.3626.67',
753 '71.0.3578.135',
754 '73.0.3676.1',
755 '73.0.3676.0',
756 '73.0.3674.2',
757 '72.0.3626.66',
758 '71.0.3578.134',
759 '73.0.3674.1',
760 '73.0.3674.0',
761 '72.0.3626.65',
762 '71.0.3578.133',
763 '73.0.3673.2',
764 '73.0.3673.1',
765 '73.0.3673.0',
766 '72.0.3626.64',
767 '71.0.3578.132',
768 '72.0.3626.63',
769 '72.0.3626.62',
770 '72.0.3626.61',
771 '72.0.3626.60',
772 '73.0.3672.1',
773 '73.0.3672.0',
774 '72.0.3626.59',
775 '71.0.3578.131',
776 '73.0.3671.3',
777 '73.0.3671.2',
778 '73.0.3671.1',
779 '73.0.3671.0',
780 '72.0.3626.58',
781 '71.0.3578.130',
782 '73.0.3670.1',
783 '73.0.3670.0',
784 '72.0.3626.57',
785 '71.0.3578.129',
786 '73.0.3669.1',
787 '73.0.3669.0',
788 '72.0.3626.56',
789 '71.0.3578.128',
790 '73.0.3668.2',
791 '73.0.3668.1',
792 '73.0.3668.0',
793 '72.0.3626.55',
794 '71.0.3578.127',
795 '73.0.3667.2',
796 '73.0.3667.1',
797 '73.0.3667.0',
798 '72.0.3626.54',
799 '71.0.3578.126',
800 '73.0.3666.1',
801 '73.0.3666.0',
802 '72.0.3626.53',
803 '71.0.3578.125',
804 '73.0.3665.4',
805 '73.0.3665.3',
806 '72.0.3626.52',
807 '73.0.3665.2',
808 '73.0.3664.4',
809 '73.0.3665.1',
810 '73.0.3665.0',
811 '72.0.3626.51',
812 '71.0.3578.124',
813 '72.0.3626.50',
814 '73.0.3664.3',
815 '73.0.3664.2',
816 '73.0.3664.1',
817 '73.0.3664.0',
818 '73.0.3663.2',
819 '72.0.3626.49',
820 '71.0.3578.123',
821 '73.0.3663.1',
822 '73.0.3663.0',
823 '72.0.3626.48',
824 '71.0.3578.122',
825 '73.0.3662.1',
826 '73.0.3662.0',
827 '72.0.3626.47',
828 '71.0.3578.121',
829 '73.0.3661.1',
830 '72.0.3626.46',
831 '73.0.3661.0',
832 '72.0.3626.45',
833 '71.0.3578.120',
834 '73.0.3660.2',
835 '73.0.3660.1',
836 '73.0.3660.0',
837 '72.0.3626.44',
838 '71.0.3578.119',
839 '73.0.3659.1',
840 '73.0.3659.0',
841 '72.0.3626.43',
842 '71.0.3578.118',
843 '73.0.3658.1',
844 '73.0.3658.0',
845 '72.0.3626.42',
846 '71.0.3578.117',
847 '73.0.3657.1',
848 '73.0.3657.0',
849 '72.0.3626.41',
850 '71.0.3578.116',
851 '73.0.3656.1',
852 '73.0.3656.0',
853 '72.0.3626.40',
854 '71.0.3578.115',
855 '73.0.3655.1',
856 '73.0.3655.0',
857 '72.0.3626.39',
858 '71.0.3578.114',
859 '73.0.3654.1',
860 '73.0.3654.0',
861 '72.0.3626.38',
862 '71.0.3578.113',
863 '73.0.3653.1',
864 '73.0.3653.0',
865 '72.0.3626.37',
866 '71.0.3578.112',
867 '73.0.3652.1',
868 '73.0.3652.0',
869 '72.0.3626.36',
870 '71.0.3578.111',
871 '73.0.3651.1',
872 '73.0.3651.0',
873 '72.0.3626.35',
874 '71.0.3578.110',
875 '73.0.3650.1',
876 '73.0.3650.0',
877 '72.0.3626.34',
878 '71.0.3578.109',
879 '73.0.3649.1',
880 '73.0.3649.0',
881 '72.0.3626.33',
882 '71.0.3578.108',
883 '73.0.3648.2',
884 '73.0.3648.1',
885 '73.0.3648.0',
886 '72.0.3626.32',
887 '71.0.3578.107',
888 '73.0.3647.2',
889 '73.0.3647.1',
890 '73.0.3647.0',
891 '72.0.3626.31',
892 '71.0.3578.106',
893 '73.0.3635.3',
894 '73.0.3646.2',
895 '73.0.3646.1',
896 '73.0.3646.0',
897 '72.0.3626.30',
898 '71.0.3578.105',
899 '72.0.3626.29',
900 '73.0.3645.2',
901 '73.0.3645.1',
902 '73.0.3645.0',
903 '72.0.3626.28',
904 '71.0.3578.104',
905 '72.0.3626.27',
906 '72.0.3626.26',
907 '72.0.3626.25',
908 '72.0.3626.24',
909 '73.0.3644.0',
910 '73.0.3643.2',
911 '72.0.3626.23',
912 '71.0.3578.103',
913 '73.0.3643.1',
914 '73.0.3643.0',
915 '72.0.3626.22',
916 '71.0.3578.102',
917 '73.0.3642.1',
918 '73.0.3642.0',
919 '72.0.3626.21',
920 '71.0.3578.101',
921 '73.0.3641.1',
922 '73.0.3641.0',
923 '72.0.3626.20',
924 '71.0.3578.100',
925 '72.0.3626.19',
926 '73.0.3640.1',
927 '73.0.3640.0',
928 '72.0.3626.18',
929 '73.0.3639.1',
930 '71.0.3578.99',
931 '73.0.3639.0',
932 '72.0.3626.17',
933 '73.0.3638.2',
934 '72.0.3626.16',
935 '73.0.3638.1',
936 '73.0.3638.0',
937 '72.0.3626.15',
938 '71.0.3578.98',
939 '73.0.3635.2',
940 '71.0.3578.97',
941 '73.0.3637.1',
942 '73.0.3637.0',
943 '72.0.3626.14',
944 '71.0.3578.96',
945 '71.0.3578.95',
946 '72.0.3626.13',
947 '71.0.3578.94',
948 '73.0.3636.2',
949 '71.0.3578.93',
950 '73.0.3636.1',
951 '73.0.3636.0',
952 '72.0.3626.12',
953 '71.0.3578.92',
954 '73.0.3635.1',
955 '73.0.3635.0',
956 '72.0.3626.11',
957 '71.0.3578.91',
958 '73.0.3634.2',
959 '73.0.3634.1',
960 '73.0.3634.0',
961 '72.0.3626.10',
962 '71.0.3578.90',
963 '71.0.3578.89',
964 '73.0.3633.2',
965 '73.0.3633.1',
966 '73.0.3633.0',
967 '72.0.3610.4',
968 '72.0.3626.9',
969 '71.0.3578.88',
970 '73.0.3632.5',
971 '73.0.3632.4',
972 '73.0.3632.3',
973 '73.0.3632.2',
974 '73.0.3632.1',
975 '73.0.3632.0',
976 '72.0.3626.8',
977 '71.0.3578.87',
978 '73.0.3631.2',
979 '73.0.3631.1',
980 '73.0.3631.0',
981 '72.0.3626.7',
982 '71.0.3578.86',
983 '72.0.3626.6',
984 '73.0.3630.1',
985 '73.0.3630.0',
986 '72.0.3626.5',
987 '71.0.3578.85',
988 '72.0.3626.4',
989 '73.0.3628.3',
990 '73.0.3628.2',
991 '73.0.3629.1',
992 '73.0.3629.0',
993 '72.0.3626.3',
994 '71.0.3578.84',
995 '73.0.3628.1',
996 '73.0.3628.0',
997 '71.0.3578.83',
998 '73.0.3627.1',
999 '73.0.3627.0',
1000 '72.0.3626.2',
1001 '71.0.3578.82',
1002 '71.0.3578.81',
1003 '71.0.3578.80',
1004 '72.0.3626.1',
1005 '72.0.3626.0',
1006 '71.0.3578.79',
1007 '70.0.3538.124',
1008 '71.0.3578.78',
1009 '72.0.3623.4',
1010 '72.0.3625.2',
1011 '72.0.3625.1',
1012 '72.0.3625.0',
1013 '71.0.3578.77',
1014 '70.0.3538.123',
1015 '72.0.3624.4',
1016 '72.0.3624.3',
1017 '72.0.3624.2',
1018 '71.0.3578.76',
1019 '72.0.3624.1',
1020 '72.0.3624.0',
1021 '72.0.3623.3',
1022 '71.0.3578.75',
1023 '70.0.3538.122',
1024 '71.0.3578.74',
1025 '72.0.3623.2',
1026 '72.0.3610.3',
1027 '72.0.3623.1',
1028 '72.0.3623.0',
1029 '72.0.3622.3',
1030 '72.0.3622.2',
1031 '71.0.3578.73',
1032 '70.0.3538.121',
1033 '72.0.3622.1',
1034 '72.0.3622.0',
1035 '71.0.3578.72',
1036 '70.0.3538.120',
1037 '72.0.3621.1',
1038 '72.0.3621.0',
1039 '71.0.3578.71',
1040 '70.0.3538.119',
1041 '72.0.3620.1',
1042 '72.0.3620.0',
1043 '71.0.3578.70',
1044 '70.0.3538.118',
1045 '71.0.3578.69',
1046 '72.0.3619.1',
1047 '72.0.3619.0',
1048 '71.0.3578.68',
1049 '70.0.3538.117',
1050 '71.0.3578.67',
1051 '72.0.3618.1',
1052 '72.0.3618.0',
1053 '71.0.3578.66',
1054 '70.0.3538.116',
1055 '72.0.3617.1',
1056 '72.0.3617.0',
1057 '71.0.3578.65',
1058 '70.0.3538.115',
1059 '72.0.3602.3',
1060 '71.0.3578.64',
1061 '72.0.3616.1',
1062 '72.0.3616.0',
1063 '71.0.3578.63',
1064 '70.0.3538.114',
1065 '71.0.3578.62',
1066 '72.0.3615.1',
1067 '72.0.3615.0',
1068 '71.0.3578.61',
1069 '70.0.3538.113',
1070 '72.0.3614.1',
1071 '72.0.3614.0',
1072 '71.0.3578.60',
1073 '70.0.3538.112',
1074 '72.0.3613.1',
1075 '72.0.3613.0',
1076 '71.0.3578.59',
1077 '70.0.3538.111',
1078 '72.0.3612.2',
1079 '72.0.3612.1',
1080 '72.0.3612.0',
1081 '70.0.3538.110',
1082 '71.0.3578.58',
1083 '70.0.3538.109',
1084 '72.0.3611.2',
1085 '72.0.3611.1',
1086 '72.0.3611.0',
1087 '71.0.3578.57',
1088 '70.0.3538.108',
1089 '72.0.3610.2',
1090 '71.0.3578.56',
1091 '71.0.3578.55',
1092 '72.0.3610.1',
1093 '72.0.3610.0',
1094 '71.0.3578.54',
1095 '70.0.3538.107',
1096 '71.0.3578.53',
1097 '72.0.3609.3',
1098 '71.0.3578.52',
1099 '72.0.3609.2',
1100 '71.0.3578.51',
1101 '72.0.3608.5',
1102 '72.0.3609.1',
1103 '72.0.3609.0',
1104 '71.0.3578.50',
1105 '70.0.3538.106',
1106 '72.0.3608.4',
1107 '72.0.3608.3',
1108 '72.0.3608.2',
1109 '71.0.3578.49',
1110 '72.0.3608.1',
1111 '72.0.3608.0',
1112 '70.0.3538.105',
1113 '71.0.3578.48',
1114 '72.0.3607.1',
1115 '72.0.3607.0',
1116 '71.0.3578.47',
1117 '70.0.3538.104',
1118 '72.0.3606.2',
1119 '72.0.3606.1',
1120 '72.0.3606.0',
1121 '71.0.3578.46',
1122 '70.0.3538.103',
1123 '70.0.3538.102',
1124 '72.0.3605.3',
1125 '72.0.3605.2',
1126 '72.0.3605.1',
1127 '72.0.3605.0',
1128 '71.0.3578.45',
1129 '70.0.3538.101',
1130 '71.0.3578.44',
1131 '71.0.3578.43',
1132 '70.0.3538.100',
1133 '70.0.3538.99',
1134 '71.0.3578.42',
1135 '72.0.3604.1',
1136 '72.0.3604.0',
1137 '71.0.3578.41',
1138 '70.0.3538.98',
1139 '71.0.3578.40',
1140 '72.0.3603.2',
1141 '72.0.3603.1',
1142 '72.0.3603.0',
1143 '71.0.3578.39',
1144 '70.0.3538.97',
1145 '72.0.3602.2',
1146 '71.0.3578.38',
1147 '71.0.3578.37',
1148 '72.0.3602.1',
1149 '72.0.3602.0',
1150 '71.0.3578.36',
1151 '70.0.3538.96',
1152 '72.0.3601.1',
1153 '72.0.3601.0',
1154 '71.0.3578.35',
1155 '70.0.3538.95',
1156 '72.0.3600.1',
1157 '72.0.3600.0',
1158 '71.0.3578.34',
1159 '70.0.3538.94',
1160 '72.0.3599.3',
1161 '72.0.3599.2',
1162 '72.0.3599.1',
1163 '72.0.3599.0',
1164 '71.0.3578.33',
1165 '70.0.3538.93',
1166 '72.0.3598.1',
1167 '72.0.3598.0',
1168 '71.0.3578.32',
1169 '70.0.3538.87',
1170 '72.0.3597.1',
1171 '72.0.3597.0',
1172 '72.0.3596.2',
1173 '71.0.3578.31',
1174 '70.0.3538.86',
1175 '71.0.3578.30',
1176 '71.0.3578.29',
1177 '72.0.3596.1',
1178 '72.0.3596.0',
1179 '71.0.3578.28',
1180 '70.0.3538.85',
1181 '72.0.3595.2',
1182 '72.0.3591.3',
1183 '72.0.3595.1',
1184 '72.0.3595.0',
1185 '71.0.3578.27',
1186 '70.0.3538.84',
1187 '72.0.3594.1',
1188 '72.0.3594.0',
1189 '71.0.3578.26',
1190 '70.0.3538.83',
1191 '72.0.3593.2',
1192 '72.0.3593.1',
1193 '72.0.3593.0',
1194 '71.0.3578.25',
1195 '70.0.3538.82',
1196 '72.0.3589.3',
1197 '72.0.3592.2',
1198 '72.0.3592.1',
1199 '72.0.3592.0',
1200 '71.0.3578.24',
1201 '72.0.3589.2',
1202 '70.0.3538.81',
1203 '70.0.3538.80',
1204 '72.0.3591.2',
1205 '72.0.3591.1',
1206 '72.0.3591.0',
1207 '71.0.3578.23',
1208 '70.0.3538.79',
1209 '71.0.3578.22',
1210 '72.0.3590.1',
1211 '72.0.3590.0',
1212 '71.0.3578.21',
1213 '70.0.3538.78',
1214 '70.0.3538.77',
1215 '72.0.3589.1',
1216 '72.0.3589.0',
1217 '71.0.3578.20',
1218 '70.0.3538.76',
1219 '71.0.3578.19',
1220 '70.0.3538.75',
1221 '72.0.3588.1',
1222 '72.0.3588.0',
1223 '71.0.3578.18',
1224 '70.0.3538.74',
1225 '72.0.3586.2',
1226 '72.0.3587.0',
1227 '71.0.3578.17',
1228 '70.0.3538.73',
1229 '72.0.3586.1',
1230 '72.0.3586.0',
1231 '71.0.3578.16',
1232 '70.0.3538.72',
1233 '72.0.3585.1',
1234 '72.0.3585.0',
1235 '71.0.3578.15',
1236 '70.0.3538.71',
1237 '71.0.3578.14',
1238 '72.0.3584.1',
1239 '72.0.3584.0',
1240 '71.0.3578.13',
1241 '70.0.3538.70',
1242 '72.0.3583.2',
1243 '71.0.3578.12',
1244 '72.0.3583.1',
1245 '72.0.3583.0',
1246 '71.0.3578.11',
1247 '70.0.3538.69',
1248 '71.0.3578.10',
1249 '72.0.3582.0',
1250 '72.0.3581.4',
1251 '71.0.3578.9',
1252 '70.0.3538.67',
1253 '72.0.3581.3',
1254 '72.0.3581.2',
1255 '72.0.3581.1',
1256 '72.0.3581.0',
1257 '71.0.3578.8',
1258 '70.0.3538.66',
1259 '72.0.3580.1',
1260 '72.0.3580.0',
1261 '71.0.3578.7',
1262 '70.0.3538.65',
1263 '71.0.3578.6',
1264 '72.0.3579.1',
1265 '72.0.3579.0',
1266 '71.0.3578.5',
1267 '70.0.3538.64',
1268 '71.0.3578.4',
1269 '71.0.3578.3',
1270 '71.0.3578.2',
1271 '71.0.3578.1',
1272 '71.0.3578.0',
1273 '70.0.3538.63',
1274 '69.0.3497.128',
1275 '70.0.3538.62',
1276 '70.0.3538.61',
1277 '70.0.3538.60',
1278 '70.0.3538.59',
1279 '71.0.3577.1',
1280 '71.0.3577.0',
1281 '70.0.3538.58',
1282 '69.0.3497.127',
1283 '71.0.3576.2',
1284 '71.0.3576.1',
1285 '71.0.3576.0',
1286 '70.0.3538.57',
1287 '70.0.3538.56',
1288 '71.0.3575.2',
1289 '70.0.3538.55',
1290 '69.0.3497.126',
1291 '70.0.3538.54',
1292 '71.0.3575.1',
1293 '71.0.3575.0',
1294 '71.0.3574.1',
1295 '71.0.3574.0',
1296 '70.0.3538.53',
1297 '69.0.3497.125',
1298 '70.0.3538.52',
1299 '71.0.3573.1',
1300 '71.0.3573.0',
1301 '70.0.3538.51',
1302 '69.0.3497.124',
1303 '71.0.3572.1',
1304 '71.0.3572.0',
1305 '70.0.3538.50',
1306 '69.0.3497.123',
1307 '71.0.3571.2',
1308 '70.0.3538.49',
1309 '69.0.3497.122',
1310 '71.0.3571.1',
1311 '71.0.3571.0',
1312 '70.0.3538.48',
1313 '69.0.3497.121',
1314 '71.0.3570.1',
1315 '71.0.3570.0',
1316 '70.0.3538.47',
1317 '69.0.3497.120',
1318 '71.0.3568.2',
1319 '71.0.3569.1',
1320 '71.0.3569.0',
1321 '70.0.3538.46',
1322 '69.0.3497.119',
1323 '70.0.3538.45',
1324 '71.0.3568.1',
1325 '71.0.3568.0',
1326 '70.0.3538.44',
1327 '69.0.3497.118',
1328 '70.0.3538.43',
1329 '70.0.3538.42',
1330 '71.0.3567.1',
1331 '71.0.3567.0',
1332 '70.0.3538.41',
1333 '69.0.3497.117',
1334 '71.0.3566.1',
1335 '71.0.3566.0',
1336 '70.0.3538.40',
1337 '69.0.3497.116',
1338 '71.0.3565.1',
1339 '71.0.3565.0',
1340 '70.0.3538.39',
1341 '69.0.3497.115',
1342 '71.0.3564.1',
1343 '71.0.3564.0',
1344 '70.0.3538.38',
1345 '69.0.3497.114',
1346 '71.0.3563.0',
1347 '71.0.3562.2',
1348 '70.0.3538.37',
1349 '69.0.3497.113',
1350 '70.0.3538.36',
1351 '70.0.3538.35',
1352 '71.0.3562.1',
1353 '71.0.3562.0',
1354 '70.0.3538.34',
1355 '69.0.3497.112',
1356 '70.0.3538.33',
1357 '71.0.3561.1',
1358 '71.0.3561.0',
1359 '70.0.3538.32',
1360 '69.0.3497.111',
1361 '71.0.3559.6',
1362 '71.0.3560.1',
1363 '71.0.3560.0',
1364 '71.0.3559.5',
1365 '71.0.3559.4',
1366 '70.0.3538.31',
1367 '69.0.3497.110',
1368 '71.0.3559.3',
1369 '70.0.3538.30',
1370 '69.0.3497.109',
1371 '71.0.3559.2',
1372 '71.0.3559.1',
1373 '71.0.3559.0',
1374 '70.0.3538.29',
1375 '69.0.3497.108',
1376 '71.0.3558.2',
1377 '71.0.3558.1',
1378 '71.0.3558.0',
1379 '70.0.3538.28',
1380 '69.0.3497.107',
1381 '71.0.3557.2',
1382 '71.0.3557.1',
1383 '71.0.3557.0',
1384 '70.0.3538.27',
1385 '69.0.3497.106',
1386 '71.0.3554.4',
1387 '70.0.3538.26',
1388 '71.0.3556.1',
1389 '71.0.3556.0',
1390 '70.0.3538.25',
1391 '71.0.3554.3',
1392 '69.0.3497.105',
1393 '71.0.3554.2',
1394 '70.0.3538.24',
1395 '69.0.3497.104',
1396 '71.0.3555.2',
1397 '70.0.3538.23',
1398 '71.0.3555.1',
1399 '71.0.3555.0',
1400 '70.0.3538.22',
1401 '69.0.3497.103',
1402 '71.0.3554.1',
1403 '71.0.3554.0',
1404 '70.0.3538.21',
1405 '69.0.3497.102',
1406 '71.0.3553.3',
1407 '70.0.3538.20',
1408 '69.0.3497.101',
1409 '71.0.3553.2',
1410 '69.0.3497.100',
1411 '71.0.3553.1',
1412 '71.0.3553.0',
1413 '70.0.3538.19',
1414 '69.0.3497.99',
1415 '69.0.3497.98',
1416 '69.0.3497.97',
1417 '71.0.3552.6',
1418 '71.0.3552.5',
1419 '71.0.3552.4',
1420 '71.0.3552.3',
1421 '71.0.3552.2',
1422 '71.0.3552.1',
1423 '71.0.3552.0',
1424 '70.0.3538.18',
1425 '69.0.3497.96',
1426 '71.0.3551.3',
1427 '71.0.3551.2',
1428 '71.0.3551.1',
1429 '71.0.3551.0',
1430 '70.0.3538.17',
1431 '69.0.3497.95',
1432 '71.0.3550.3',
1433 '71.0.3550.2',
1434 '71.0.3550.1',
1435 '71.0.3550.0',
1436 '70.0.3538.16',
1437 '69.0.3497.94',
1438 '71.0.3549.1',
1439 '71.0.3549.0',
1440 '70.0.3538.15',
1441 '69.0.3497.93',
1442 '69.0.3497.92',
1443 '71.0.3548.1',
1444 '71.0.3548.0',
1445 '70.0.3538.14',
1446 '69.0.3497.91',
1447 '71.0.3547.1',
1448 '71.0.3547.0',
1449 '70.0.3538.13',
1450 '69.0.3497.90',
1451 '71.0.3546.2',
1452 '69.0.3497.89',
1453 '71.0.3546.1',
1454 '71.0.3546.0',
1455 '70.0.3538.12',
1456 '69.0.3497.88',
1457 '71.0.3545.4',
1458 '71.0.3545.3',
1459 '71.0.3545.2',
1460 '71.0.3545.1',
1461 '71.0.3545.0',
1462 '70.0.3538.11',
1463 '69.0.3497.87',
1464 '71.0.3544.5',
1465 '71.0.3544.4',
1466 '71.0.3544.3',
1467 '71.0.3544.2',
1468 '71.0.3544.1',
1469 '71.0.3544.0',
1470 '69.0.3497.86',
1471 '70.0.3538.10',
1472 '69.0.3497.85',
1473 '70.0.3538.9',
1474 '69.0.3497.84',
1475 '71.0.3543.4',
1476 '70.0.3538.8',
1477 '71.0.3543.3',
1478 '71.0.3543.2',
1479 '71.0.3543.1',
1480 '71.0.3543.0',
1481 '70.0.3538.7',
1482 '69.0.3497.83',
1483 '71.0.3542.2',
1484 '71.0.3542.1',
1485 '71.0.3542.0',
1486 '70.0.3538.6',
1487 '69.0.3497.82',
1488 '69.0.3497.81',
1489 '71.0.3541.1',
1490 '71.0.3541.0',
1491 '70.0.3538.5',
1492 '69.0.3497.80',
1493 '71.0.3540.1',
1494 '71.0.3540.0',
1495 '70.0.3538.4',
1496 '69.0.3497.79',
1497 '70.0.3538.3',
1498 '71.0.3539.1',
1499 '71.0.3539.0',
1500 '69.0.3497.78',
1501 '68.0.3440.134',
1502 '69.0.3497.77',
1503 '70.0.3538.2',
1504 '70.0.3538.1',
1505 '70.0.3538.0',
1506 '69.0.3497.76',
1507 '68.0.3440.133',
1508 '69.0.3497.75',
1509 '70.0.3537.2',
1510 '70.0.3537.1',
1511 '70.0.3537.0',
1512 '69.0.3497.74',
1513 '68.0.3440.132',
1514 '70.0.3536.0',
1515 '70.0.3535.5',
1516 '70.0.3535.4',
1517 '70.0.3535.3',
1518 '69.0.3497.73',
1519 '68.0.3440.131',
1520 '70.0.3532.8',
1521 '70.0.3532.7',
1522 '69.0.3497.72',
1523 '69.0.3497.71',
1524 '70.0.3535.2',
1525 '70.0.3535.1',
1526 '70.0.3535.0',
1527 '69.0.3497.70',
1528 '68.0.3440.130',
1529 '69.0.3497.69',
1530 '68.0.3440.129',
1531 '70.0.3534.4',
1532 '70.0.3534.3',
1533 '70.0.3534.2',
1534 '70.0.3534.1',
1535 '70.0.3534.0',
1536 '69.0.3497.68',
1537 '68.0.3440.128',
1538 '70.0.3533.2',
1539 '70.0.3533.1',
1540 '70.0.3533.0',
1541 '69.0.3497.67',
1542 '68.0.3440.127',
1543 '70.0.3532.6',
1544 '70.0.3532.5',
1545 '70.0.3532.4',
1546 '69.0.3497.66',
1547 '68.0.3440.126',
1548 '70.0.3532.3',
1549 '70.0.3532.2',
1550 '70.0.3532.1',
1551 '69.0.3497.60',
1552 '69.0.3497.65',
1553 '69.0.3497.64',
1554 '70.0.3532.0',
1555 '70.0.3531.0',
1556 '70.0.3530.4',
1557 '70.0.3530.3',
1558 '70.0.3530.2',
1559 '69.0.3497.58',
1560 '68.0.3440.125',
1561 '69.0.3497.57',
1562 '69.0.3497.56',
1563 '69.0.3497.55',
1564 '69.0.3497.54',
1565 '70.0.3530.1',
1566 '70.0.3530.0',
1567 '69.0.3497.53',
1568 '68.0.3440.124',
1569 '69.0.3497.52',
1570 '70.0.3529.3',
1571 '70.0.3529.2',
1572 '70.0.3529.1',
1573 '70.0.3529.0',
1574 '69.0.3497.51',
1575 '70.0.3528.4',
1576 '68.0.3440.123',
1577 '70.0.3528.3',
1578 '70.0.3528.2',
1579 '70.0.3528.1',
1580 '70.0.3528.0',
1581 '69.0.3497.50',
1582 '68.0.3440.122',
1583 '70.0.3527.1',
1584 '70.0.3527.0',
1585 '69.0.3497.49',
1586 '68.0.3440.121',
1587 '70.0.3526.1',
1588 '70.0.3526.0',
1589 '68.0.3440.120',
1590 '69.0.3497.48',
1591 '69.0.3497.47',
1592 '68.0.3440.119',
1593 '68.0.3440.118',
1594 '70.0.3525.5',
1595 '70.0.3525.4',
1596 '70.0.3525.3',
1597 '68.0.3440.117',
1598 '69.0.3497.46',
1599 '70.0.3525.2',
1600 '70.0.3525.1',
1601 '70.0.3525.0',
1602 '69.0.3497.45',
1603 '68.0.3440.116',
1604 '70.0.3524.4',
1605 '70.0.3524.3',
1606 '69.0.3497.44',
1607 '70.0.3524.2',
1608 '70.0.3524.1',
1609 '70.0.3524.0',
1610 '70.0.3523.2',
1611 '69.0.3497.43',
1612 '68.0.3440.115',
1613 '70.0.3505.9',
1614 '69.0.3497.42',
1615 '70.0.3505.8',
1616 '70.0.3523.1',
1617 '70.0.3523.0',
1618 '69.0.3497.41',
1619 '68.0.3440.114',
1620 '70.0.3505.7',
1621 '69.0.3497.40',
1622 '70.0.3522.1',
1623 '70.0.3522.0',
1624 '70.0.3521.2',
1625 '69.0.3497.39',
1626 '68.0.3440.113',
1627 '70.0.3505.6',
1628 '70.0.3521.1',
1629 '70.0.3521.0',
1630 '69.0.3497.38',
1631 '68.0.3440.112',
1632 '70.0.3520.1',
1633 '70.0.3520.0',
1634 '69.0.3497.37',
1635 '68.0.3440.111',
1636 '70.0.3519.3',
1637 '70.0.3519.2',
1638 '70.0.3519.1',
1639 '70.0.3519.0',
1640 '69.0.3497.36',
1641 '68.0.3440.110',
1642 '70.0.3518.1',
1643 '70.0.3518.0',
1644 '69.0.3497.35',
1645 '69.0.3497.34',
1646 '68.0.3440.109',
1647 '70.0.3517.1',
1648 '70.0.3517.0',
1649 '69.0.3497.33',
1650 '68.0.3440.108',
1651 '69.0.3497.32',
1652 '70.0.3516.3',
1653 '70.0.3516.2',
1654 '70.0.3516.1',
1655 '70.0.3516.0',
1656 '69.0.3497.31',
1657 '68.0.3440.107',
1658 '70.0.3515.4',
1659 '68.0.3440.106',
1660 '70.0.3515.3',
1661 '70.0.3515.2',
1662 '70.0.3515.1',
1663 '70.0.3515.0',
1664 '69.0.3497.30',
1665 '68.0.3440.105',
1666 '68.0.3440.104',
1667 '70.0.3514.2',
1668 '70.0.3514.1',
1669 '70.0.3514.0',
1670 '69.0.3497.29',
1671 '68.0.3440.103',
1672 '70.0.3513.1',
1673 '70.0.3513.0',
1674 '69.0.3497.28',
1675 )
1676 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1677
1678
3e669f36 1679std_headers = {
f7a147e3 1680 'User-Agent': random_user_agent(),
59ae15a5
PH
1681 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1682 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1683 'Accept-Encoding': 'gzip, deflate',
1684 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 1685}
f427df17 1686
5f6a1245 1687
fb37eb25
S
1688USER_AGENTS = {
1689 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1690}
1691
1692
bf42a990
S
1693NO_DEFAULT = object()
1694
7105440c
YCH
1695ENGLISH_MONTH_NAMES = [
1696 'January', 'February', 'March', 'April', 'May', 'June',
1697 'July', 'August', 'September', 'October', 'November', 'December']
1698
f6717dec
S
1699MONTH_NAMES = {
1700 'en': ENGLISH_MONTH_NAMES,
1701 'fr': [
3e4185c3
S
1702 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1703 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
f6717dec 1704}
a942d6cb 1705
a7aaa398
S
1706KNOWN_EXTENSIONS = (
1707 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1708 'flv', 'f4v', 'f4a', 'f4b',
1709 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1710 'mkv', 'mka', 'mk3d',
1711 'avi', 'divx',
1712 'mov',
1713 'asf', 'wmv', 'wma',
1714 '3gp', '3g2',
1715 'mp3',
1716 'flac',
1717 'ape',
1718 'wav',
1719 'f4f', 'f4m', 'm3u8', 'smil')
1720
c587cbb7 1721# needed for sanitizing filenames in restricted mode
c8827027 1722ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
fd35d8cd
JW
1723 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1724 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
c587cbb7 1725
46f59e89
S
1726DATE_FORMATS = (
1727 '%d %B %Y',
1728 '%d %b %Y',
1729 '%B %d %Y',
cb655f34
S
1730 '%B %dst %Y',
1731 '%B %dnd %Y',
9d30c213 1732 '%B %drd %Y',
cb655f34 1733 '%B %dth %Y',
46f59e89 1734 '%b %d %Y',
cb655f34
S
1735 '%b %dst %Y',
1736 '%b %dnd %Y',
9d30c213 1737 '%b %drd %Y',
cb655f34 1738 '%b %dth %Y',
46f59e89
S
1739 '%b %dst %Y %I:%M',
1740 '%b %dnd %Y %I:%M',
9d30c213 1741 '%b %drd %Y %I:%M',
46f59e89
S
1742 '%b %dth %Y %I:%M',
1743 '%Y %m %d',
1744 '%Y-%m-%d',
bccdbd22 1745 '%Y.%m.%d.',
46f59e89 1746 '%Y/%m/%d',
81c13222 1747 '%Y/%m/%d %H:%M',
46f59e89 1748 '%Y/%m/%d %H:%M:%S',
1931a55e
THD
1749 '%Y%m%d%H%M',
1750 '%Y%m%d%H%M%S',
0c1c6f4b 1751 '%Y-%m-%d %H:%M',
46f59e89
S
1752 '%Y-%m-%d %H:%M:%S',
1753 '%Y-%m-%d %H:%M:%S.%f',
5014558a 1754 '%Y-%m-%d %H:%M:%S:%f',
46f59e89
S
1755 '%d.%m.%Y %H:%M',
1756 '%d.%m.%Y %H.%M',
1757 '%Y-%m-%dT%H:%M:%SZ',
1758 '%Y-%m-%dT%H:%M:%S.%fZ',
1759 '%Y-%m-%dT%H:%M:%S.%f0Z',
1760 '%Y-%m-%dT%H:%M:%S',
1761 '%Y-%m-%dT%H:%M:%S.%f',
1762 '%Y-%m-%dT%H:%M',
c6eed6b8
S
1763 '%b %d %Y at %H:%M',
1764 '%b %d %Y at %H:%M:%S',
b555ae9b
S
1765 '%B %d %Y at %H:%M',
1766 '%B %d %Y at %H:%M:%S',
a63d9bd0 1767 '%H:%M %d-%b-%Y',
46f59e89
S
1768)
1769
1770DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1771DATE_FORMATS_DAY_FIRST.extend([
1772 '%d-%m-%Y',
1773 '%d.%m.%Y',
1774 '%d.%m.%y',
1775 '%d/%m/%Y',
1776 '%d/%m/%y',
1777 '%d/%m/%Y %H:%M:%S',
1778])
1779
1780DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1781DATE_FORMATS_MONTH_FIRST.extend([
1782 '%m-%d-%Y',
1783 '%m.%d.%Y',
1784 '%m/%d/%Y',
1785 '%m/%d/%y',
1786 '%m/%d/%Y %H:%M:%S',
1787])
1788
06b3fe29 1789PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
22f5f5c6 1790JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
06b3fe29 1791
7105440c 1792
d77c3dfd 1793def preferredencoding():
59ae15a5 1794 """Get preferred encoding.
d77c3dfd 1795
59ae15a5
PH
1796 Returns the best encoding scheme for the system, based on
1797 locale.getpreferredencoding() and some further tweaks.
1798 """
1799 try:
1800 pref = locale.getpreferredencoding()
28e614de 1801 'TEST'.encode(pref)
70a1165b 1802 except Exception:
59ae15a5 1803 pref = 'UTF-8'
bae611f2 1804
59ae15a5 1805 return pref
d77c3dfd 1806
f4bfd65f 1807
181c8655 1808def write_json_file(obj, fn):
1394646a 1809 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 1810
92120217 1811 fn = encodeFilename(fn)
61ee5aeb 1812 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
1813 encoding = get_filesystem_encoding()
1814 # os.path.basename returns a bytes object, but NamedTemporaryFile
1815 # will fail if the filename contains non ascii characters unless we
1816 # use a unicode object
1817 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1818 # the same for os.path.dirname
1819 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1820 else:
1821 path_basename = os.path.basename
1822 path_dirname = os.path.dirname
1823
73159f99
S
1824 args = {
1825 'suffix': '.tmp',
ec5f6016
JMF
1826 'prefix': path_basename(fn) + '.',
1827 'dir': path_dirname(fn),
73159f99
S
1828 'delete': False,
1829 }
1830
181c8655
PH
1831 # In Python 2.x, json.dump expects a bytestream.
1832 # In Python 3.x, it writes to a character stream
1833 if sys.version_info < (3, 0):
73159f99 1834 args['mode'] = 'wb'
181c8655 1835 else:
73159f99
S
1836 args.update({
1837 'mode': 'w',
1838 'encoding': 'utf-8',
1839 })
1840
c86b6142 1841 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
1842
1843 try:
1844 with tf:
6e84b215 1845 json.dump(obj, tf)
1394646a
IK
1846 if sys.platform == 'win32':
1847 # Need to remove existing file on Windows, else os.rename raises
1848 # WindowsError or FileExistsError.
1849 try:
1850 os.unlink(fn)
1851 except OSError:
1852 pass
9cd5f54e
R
1853 try:
1854 mask = os.umask(0)
1855 os.umask(mask)
1856 os.chmod(tf.name, 0o666 & ~mask)
1857 except OSError:
1858 pass
181c8655 1859 os.rename(tf.name, fn)
70a1165b 1860 except Exception:
181c8655
PH
1861 try:
1862 os.remove(tf.name)
1863 except OSError:
1864 pass
1865 raise
1866
1867
1868if sys.version_info >= (2, 7):
ee114368 1869 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 1870 """ Find the xpath xpath[@key=val] """
5d2354f1 1871 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368 1872 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
1873 return node.find(expr)
1874else:
ee114368 1875 def find_xpath_attr(node, xpath, key, val=None):
810c10ba 1876 for f in node.findall(compat_xpath(xpath)):
ee114368
S
1877 if key not in f.attrib:
1878 continue
1879 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
1880 return f
1881 return None
1882
d7e66d39
JMF
1883# On python2.6 the xml.etree.ElementTree.Element methods don't support
1884# the namespace parameter
5f6a1245
JW
1885
1886
d7e66d39
JMF
1887def xpath_with_ns(path, ns_map):
1888 components = [c.split(':') for c in path.split('/')]
1889 replaced = []
1890 for c in components:
1891 if len(c) == 1:
1892 replaced.append(c[0])
1893 else:
1894 ns, tag = c
1895 replaced.append('{%s}%s' % (ns_map[ns], tag))
1896 return '/'.join(replaced)
1897
d77c3dfd 1898
a41fb80c 1899def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745 1900 def _find_xpath(xpath):
810c10ba 1901 return node.find(compat_xpath(xpath))
578c0745
S
1902
1903 if isinstance(xpath, (str, compat_str)):
1904 n = _find_xpath(xpath)
1905 else:
1906 for xp in xpath:
1907 n = _find_xpath(xp)
1908 if n is not None:
1909 break
d74bebd5 1910
8e636da4 1911 if n is None:
bf42a990
S
1912 if default is not NO_DEFAULT:
1913 return default
1914 elif fatal:
bf0ff932
PH
1915 name = xpath if name is None else name
1916 raise ExtractorError('Could not find XML element %s' % name)
1917 else:
1918 return None
a41fb80c
S
1919 return n
1920
1921
1922def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
1923 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1924 if n is None or n == default:
1925 return n
1926 if n.text is None:
1927 if default is not NO_DEFAULT:
1928 return default
1929 elif fatal:
1930 name = xpath if name is None else name
1931 raise ExtractorError('Could not find XML element\'s text %s' % name)
1932 else:
1933 return None
1934 return n.text
a41fb80c
S
1935
1936
1937def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1938 n = find_xpath_attr(node, xpath, key)
1939 if n is None:
1940 if default is not NO_DEFAULT:
1941 return default
1942 elif fatal:
1943 name = '%s[@%s]' % (xpath, key) if name is None else name
1944 raise ExtractorError('Could not find XML attribute %s' % name)
1945 else:
1946 return None
1947 return n.attrib[key]
bf0ff932
PH
1948
1949
9e6dd238 1950def get_element_by_id(id, html):
43e8fafd 1951 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 1952 return get_element_by_attribute('id', id, html)
43e8fafd 1953
12ea2f30 1954
84c237fb 1955def get_element_by_class(class_name, html):
2af12ad9
TC
1956 """Return the content of the first tag with the specified class in the passed HTML document"""
1957 retval = get_elements_by_class(class_name, html)
1958 return retval[0] if retval else None
1959
1960
1961def get_element_by_attribute(attribute, value, html, escape_value=True):
1962 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1963 return retval[0] if retval else None
1964
1965
1966def get_elements_by_class(class_name, html):
1967 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1968 return get_elements_by_attribute(
84c237fb
YCH
1969 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1970 html, escape_value=False)
1971
1972
2af12ad9 1973def get_elements_by_attribute(attribute, value, html, escape_value=True):
43e8fafd 1974 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 1975
84c237fb
YCH
1976 value = re.escape(value) if escape_value else value
1977
2af12ad9
TC
1978 retlist = []
1979 for m in re.finditer(r'''(?xs)
38285056 1980 <([a-zA-Z0-9:._-]+)
609ff8ca 1981 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056 1982 \s+%s=['"]?%s['"]?
609ff8ca 1983 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056
PH
1984 \s*>
1985 (?P<content>.*?)
1986 </\1>
2af12ad9
TC
1987 ''' % (re.escape(attribute), value), html):
1988 res = m.group('content')
38285056 1989
2af12ad9
TC
1990 if res.startswith('"') or res.startswith("'"):
1991 res = res[1:-1]
38285056 1992
2af12ad9 1993 retlist.append(unescapeHTML(res))
a921f407 1994
2af12ad9 1995 return retlist
a921f407 1996
c5229f39 1997
8bb56eee
BF
1998class HTMLAttributeParser(compat_HTMLParser):
1999 """Trivial HTML parser to gather the attributes for a single element"""
b6e0c7d2 2000
8bb56eee 2001 def __init__(self):
c5229f39 2002 self.attrs = {}
8bb56eee
BF
2003 compat_HTMLParser.__init__(self)
2004
2005 def handle_starttag(self, tag, attrs):
2006 self.attrs = dict(attrs)
2007
c5229f39 2008
73673ccf
FF
2009class HTMLListAttrsParser(compat_HTMLParser):
2010 """HTML parser to gather the attributes for the elements of a list"""
2011
2012 def __init__(self):
2013 compat_HTMLParser.__init__(self)
2014 self.items = []
2015 self._level = 0
2016
2017 def handle_starttag(self, tag, attrs):
2018 if tag == 'li' and self._level == 0:
2019 self.items.append(dict(attrs))
2020 self._level += 1
2021
2022 def handle_endtag(self, tag):
2023 self._level -= 1
2024
2025
8bb56eee
BF
2026def extract_attributes(html_element):
2027 """Given a string for an HTML element such as
2028 <el
2029 a="foo" B="bar" c="&98;az" d=boz
2030 empty= noval entity="&amp;"
2031 sq='"' dq="'"
2032 >
2033 Decode and return a dictionary of attributes.
2034 {
2035 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2036 'empty': '', 'noval': None, 'entity': '&',
2037 'sq': '"', 'dq': '\''
2038 }.
2039 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2040 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2041 """
2042 parser = HTMLAttributeParser()
b4a3d461
S
2043 try:
2044 parser.feed(html_element)
2045 parser.close()
2046 # Older Python may throw HTMLParseError in case of malformed HTML
2047 except compat_HTMLParseError:
2048 pass
8bb56eee 2049 return parser.attrs
9e6dd238 2050
c5229f39 2051
73673ccf
FF
2052def parse_list(webpage):
2053 """Given a string for an series of HTML <li> elements,
2054 return a dictionary of their attributes"""
2055 parser = HTMLListAttrsParser()
2056 parser.feed(webpage)
2057 parser.close()
2058 return parser.items
2059
2060
9e6dd238 2061def clean_html(html):
59ae15a5 2062 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
2063
2064 if html is None: # Convenience for sanitizing descriptions etc.
2065 return html
2066
59ae15a5
PH
2067 # Newline vs <br />
2068 html = html.replace('\n', ' ')
edd9221c
TF
2069 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2070 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
2071 # Strip html tags
2072 html = re.sub('<.*?>', '', html)
2073 # Replace html entities
2074 html = unescapeHTML(html)
7decf895 2075 return html.strip()
9e6dd238
FV
2076
2077
d77c3dfd 2078def sanitize_open(filename, open_mode):
59ae15a5
PH
2079 """Try to open the given filename, and slightly tweak it if this fails.
2080
2081 Attempts to open the given filename. If this fails, it tries to change
2082 the filename slightly, step by step, until it's either able to open it
2083 or it fails and raises a final exception, like the standard open()
2084 function.
2085
2086 It returns the tuple (stream, definitive_file_name).
2087 """
2088 try:
28e614de 2089 if filename == '-':
59ae15a5
PH
2090 if sys.platform == 'win32':
2091 import msvcrt
2092 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 2093 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
2094 stream = open(encodeFilename(filename), open_mode)
2095 return (stream, filename)
2096 except (IOError, OSError) as err:
f45c185f
PH
2097 if err.errno in (errno.EACCES,):
2098 raise
59ae15a5 2099
f45c185f 2100 # In case of error, try to remove win32 forbidden chars
d55de57b 2101 alt_filename = sanitize_path(filename)
f45c185f
PH
2102 if alt_filename == filename:
2103 raise
2104 else:
2105 # An exception here should be caught in the caller
d55de57b 2106 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 2107 return (stream, alt_filename)
d77c3dfd
FV
2108
2109
2110def timeconvert(timestr):
59ae15a5
PH
2111 """Convert RFC 2822 defined time string into system timestamp"""
2112 timestamp = None
2113 timetuple = email.utils.parsedate_tz(timestr)
2114 if timetuple is not None:
2115 timestamp = email.utils.mktime_tz(timetuple)
2116 return timestamp
1c469a94 2117
5f6a1245 2118
796173d0 2119def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
2120 """Sanitizes a string so it could be used as part of a filename.
2121 If restricted is set, use a stricter subset of allowed characters.
158af524
S
2122 Set is_id if this is not an arbitrary string, but an ID that should be kept
2123 if possible.
59ae15a5
PH
2124 """
2125 def replace_insane(char):
c587cbb7
AT
2126 if restricted and char in ACCENT_CHARS:
2127 return ACCENT_CHARS[char]
91dd88b9 2128 elif not restricted and char == '\n':
2129 return ' '
2130 elif char == '?' or ord(char) < 32 or ord(char) == 127:
59ae15a5
PH
2131 return ''
2132 elif char == '"':
2133 return '' if restricted else '\''
2134 elif char == ':':
2135 return '_-' if restricted else ' -'
2136 elif char in '\\/|*<>':
2137 return '_'
627dcfff 2138 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
2139 return '_'
2140 if restricted and ord(char) > 127:
2141 return '_'
2142 return char
2143
639f1cea 2144 if s == '':
2145 return ''
2aeb06d6
PH
2146 # Handle timestamps
2147 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 2148 result = ''.join(map(replace_insane, s))
796173d0
PH
2149 if not is_id:
2150 while '__' in result:
2151 result = result.replace('__', '_')
2152 result = result.strip('_')
2153 # Common case of "Foreign band name - English song title"
2154 if restricted and result.startswith('-_'):
2155 result = result[2:]
5a42414b
PH
2156 if result.startswith('-'):
2157 result = '_' + result[len('-'):]
a7440261 2158 result = result.lstrip('.')
796173d0
PH
2159 if not result:
2160 result = '_'
59ae15a5 2161 return result
d77c3dfd 2162
5f6a1245 2163
c2934512 2164def sanitize_path(s, force=False):
a2aaf4db 2165 """Sanitizes and normalizes path on Windows"""
c2934512 2166 if sys.platform == 'win32':
c4218ac3 2167 force = False
c2934512 2168 drive_or_unc, _ = os.path.splitdrive(s)
2169 if sys.version_info < (2, 7) and not drive_or_unc:
2170 drive_or_unc, _ = os.path.splitunc(s)
2171 elif force:
2172 drive_or_unc = ''
2173 else:
a2aaf4db 2174 return s
c2934512 2175
be531ef1
S
2176 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2177 if drive_or_unc:
a2aaf4db
S
2178 norm_path.pop(0)
2179 sanitized_path = [
ec85ded8 2180 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
a2aaf4db 2181 for path_part in norm_path]
be531ef1
S
2182 if drive_or_unc:
2183 sanitized_path.insert(0, drive_or_unc + os.path.sep)
c4218ac3 2184 elif force and s[0] == os.path.sep:
2185 sanitized_path.insert(0, os.path.sep)
a2aaf4db
S
2186 return os.path.join(*sanitized_path)
2187
2188
17bcc626 2189def sanitize_url(url):
befa4708
S
2190 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2191 # the number of unwanted failures due to missing protocol
2192 if url.startswith('//'):
2193 return 'http:%s' % url
2194 # Fix some common typos seen so far
2195 COMMON_TYPOS = (
067aa17e 2196 # https://github.com/ytdl-org/youtube-dl/issues/15649
befa4708
S
2197 (r'^httpss://', r'https://'),
2198 # https://bx1.be/lives/direct-tv/
2199 (r'^rmtp([es]?)://', r'rtmp\1://'),
2200 )
2201 for mistake, fixup in COMMON_TYPOS:
2202 if re.match(mistake, url):
2203 return re.sub(mistake, fixup, url)
bc6b9bcd 2204 return url
17bcc626
S
2205
2206
5435dcf9
HH
2207def extract_basic_auth(url):
2208 parts = compat_urlparse.urlsplit(url)
2209 if parts.username is None:
2210 return url, None
2211 url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2212 parts.hostname if parts.port is None
2213 else '%s:%d' % (parts.hostname, parts.port))))
2214 auth_payload = base64.b64encode(
2215 ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2216 return url, 'Basic ' + auth_payload.decode('utf-8')
2217
2218
67dda517 2219def sanitized_Request(url, *args, **kwargs):
bc6b9bcd 2220 url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
5435dcf9
HH
2221 if auth_header is not None:
2222 headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2223 headers['Authorization'] = auth_header
2224 return compat_urllib_request.Request(url, *args, **kwargs)
67dda517
S
2225
2226
51098426
S
2227def expand_path(s):
2228 """Expand shell variables and ~"""
2229 return os.path.expandvars(compat_expanduser(s))
2230
2231
d77c3dfd 2232def orderedSet(iterable):
59ae15a5
PH
2233 """ Remove all duplicates from the input iterable """
2234 res = []
2235 for el in iterable:
2236 if el not in res:
2237 res.append(el)
2238 return res
d77c3dfd 2239
912b38b4 2240
55b2f099 2241def _htmlentity_transform(entity_with_semicolon):
4e408e47 2242 """Transforms an HTML entity to a character."""
55b2f099
YCH
2243 entity = entity_with_semicolon[:-1]
2244
4e408e47
PH
2245 # Known non-numeric HTML entity
2246 if entity in compat_html_entities.name2codepoint:
2247 return compat_chr(compat_html_entities.name2codepoint[entity])
2248
55b2f099
YCH
2249 # TODO: HTML5 allows entities without a semicolon. For example,
2250 # '&Eacuteric' should be decoded as 'Éric'.
2251 if entity_with_semicolon in compat_html_entities_html5:
2252 return compat_html_entities_html5[entity_with_semicolon]
2253
91757b0f 2254 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
2255 if mobj is not None:
2256 numstr = mobj.group(1)
28e614de 2257 if numstr.startswith('x'):
4e408e47 2258 base = 16
28e614de 2259 numstr = '0%s' % numstr
4e408e47
PH
2260 else:
2261 base = 10
067aa17e 2262 # See https://github.com/ytdl-org/youtube-dl/issues/7518
7aefc49c
S
2263 try:
2264 return compat_chr(int(numstr, base))
2265 except ValueError:
2266 pass
4e408e47
PH
2267
2268 # Unknown entity in name, return its literal representation
7a3f0c00 2269 return '&%s;' % entity
4e408e47
PH
2270
2271
d77c3dfd 2272def unescapeHTML(s):
912b38b4
PH
2273 if s is None:
2274 return None
2275 assert type(s) == compat_str
d77c3dfd 2276
4e408e47 2277 return re.sub(
95f3f7c2 2278 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 2279
8bf48f23 2280
cdb19aa4 2281def escapeHTML(text):
2282 return (
2283 text
2284 .replace('&', '&amp;')
2285 .replace('<', '&lt;')
2286 .replace('>', '&gt;')
2287 .replace('"', '&quot;')
2288 .replace("'", '&#39;')
2289 )
2290
2291
f5b1bca9 2292def process_communicate_or_kill(p, *args, **kwargs):
2293 try:
2294 return p.communicate(*args, **kwargs)
2295 except BaseException: # Including KeyboardInterrupt
2296 p.kill()
2297 p.wait()
2298 raise
2299
2300
d3c93ec2 2301class Popen(subprocess.Popen):
2302 if sys.platform == 'win32':
2303 _startupinfo = subprocess.STARTUPINFO()
2304 _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
2305 else:
2306 _startupinfo = None
2307
2308 def __init__(self, *args, **kwargs):
2309 super(Popen, self).__init__(*args, **kwargs, startupinfo=self._startupinfo)
2310
2311 def communicate_or_kill(self, *args, **kwargs):
2312 return process_communicate_or_kill(self, *args, **kwargs)
2313
2314
aa49acd1
S
2315def get_subprocess_encoding():
2316 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2317 # For subprocess calls, encode with locale encoding
2318 # Refer to http://stackoverflow.com/a/9951851/35070
2319 encoding = preferredencoding()
2320 else:
2321 encoding = sys.getfilesystemencoding()
2322 if encoding is None:
2323 encoding = 'utf-8'
2324 return encoding
2325
2326
8bf48f23 2327def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
2328 """
2329 @param s The name of the file
2330 """
d77c3dfd 2331
8bf48f23 2332 assert type(s) == compat_str
d77c3dfd 2333
59ae15a5
PH
2334 # Python 3 has a Unicode API
2335 if sys.version_info >= (3, 0):
2336 return s
0f00efed 2337
aa49acd1
S
2338 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2339 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2340 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2341 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2342 return s
2343
8ee239e9
YCH
2344 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2345 if sys.platform.startswith('java'):
2346 return s
2347
aa49acd1
S
2348 return s.encode(get_subprocess_encoding(), 'ignore')
2349
2350
2351def decodeFilename(b, for_subprocess=False):
2352
2353 if sys.version_info >= (3, 0):
2354 return b
2355
2356 if not isinstance(b, bytes):
2357 return b
2358
2359 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 2360
f07b74fc
PH
2361
2362def encodeArgument(s):
2363 if not isinstance(s, compat_str):
2364 # Legacy code that uses byte strings
2365 # Uncomment the following line after fixing all post processors
7af808a5 2366 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
2367 s = s.decode('ascii')
2368 return encodeFilename(s, True)
2369
2370
aa49acd1
S
2371def decodeArgument(b):
2372 return decodeFilename(b, True)
2373
2374
8271226a
PH
2375def decodeOption(optval):
2376 if optval is None:
2377 return optval
2378 if isinstance(optval, bytes):
2379 optval = optval.decode(preferredencoding())
2380
2381 assert isinstance(optval, compat_str)
2382 return optval
1c256f70 2383
5f6a1245 2384
aa7785f8 2385_timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
2386
2387
2388def timetuple_from_msec(msec):
2389 secs, msec = divmod(msec, 1000)
2390 mins, secs = divmod(secs, 60)
2391 hrs, mins = divmod(mins, 60)
2392 return _timetuple(hrs, mins, secs, msec)
2393
2394
cdb19aa4 2395def formatSeconds(secs, delim=':', msec=False):
aa7785f8 2396 time = timetuple_from_msec(secs * 1000)
2397 if time.hours:
2398 ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
2399 elif time.minutes:
2400 ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
4539dd30 2401 else:
aa7785f8 2402 ret = '%d' % time.seconds
2403 return '%s.%03d' % (ret, time.milliseconds) if msec else ret
4539dd30 2404
a0ddb8a2 2405
77562778 2406def _ssl_load_windows_store_certs(ssl_context, storename):
2407 # Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
2408 try:
2409 certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
2410 if encoding == 'x509_asn' and (
2411 trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
2412 except PermissionError:
2413 return
2414 for cert in certs:
a2366922 2415 try:
77562778 2416 ssl_context.load_verify_locations(cadata=cert)
2417 except ssl.SSLError:
a2366922
PH
2418 pass
2419
77562778 2420
2421def make_HTTPS_handler(params, **kwargs):
2422 opts_check_certificate = not params.get('nocheckcertificate')
2423 context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
2424 context.check_hostname = opts_check_certificate
2425 context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
2426 if opts_check_certificate:
4e3d1898 2427 try:
2428 context.load_default_certs()
2429 # Work around the issue in load_default_certs when there are bad certificates. See:
2430 # https://github.com/yt-dlp/yt-dlp/issues/1060,
2431 # https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
2432 except ssl.SSLError:
2433 # enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
2434 if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
2435 # Create a new context to discard any certificates that were already loaded
2436 context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
2437 context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED
2438 for storename in ('CA', 'ROOT'):
2439 _ssl_load_windows_store_certs(context, storename)
2440 context.set_default_verify_paths()
77562778 2441 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 2442
732ea2f0 2443
5873d4cc 2444def bug_reports_message(before=';'):
08f2a92c 2445 if ytdl_is_updateable():
7a5c1cfe 2446 update_cmd = 'type yt-dlp -U to update'
08f2a92c 2447 else:
7a5c1cfe 2448 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
5873d4cc 2449 msg = 'please report this issue on https://github.com/yt-dlp/yt-dlp .'
08f2a92c 2450 msg += ' Make sure you are using the latest version; %s.' % update_cmd
7a5c1cfe 2451 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
5873d4cc
F
2452
2453 before = before.rstrip()
2454 if not before or before.endswith(('.', '!', '?')):
2455 msg = msg[0].title() + msg[1:]
2456
2457 return (before + ' ' if before else '') + msg
08f2a92c
JMF
2458
2459
bf5b9d85
PM
2460class YoutubeDLError(Exception):
2461 """Base exception for YoutubeDL errors."""
2462 pass
2463
2464
3158150c 2465network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2466if hasattr(ssl, 'CertificateError'):
2467 network_exceptions.append(ssl.CertificateError)
2468network_exceptions = tuple(network_exceptions)
2469
2470
bf5b9d85 2471class ExtractorError(YoutubeDLError):
1c256f70 2472 """Error during info extraction."""
5f6a1245 2473
1151c407 2474 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
9a82b238 2475 """ tb, if given, is the original traceback (so that it can be printed out).
7a5c1cfe 2476 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
9a82b238 2477 """
3158150c 2478 if sys.exc_info()[0] in network_exceptions:
9a82b238 2479 expected = True
d5979c5d 2480
526d74ec 2481 self.msg = str(msg)
1c256f70 2482 self.traceback = tb
1151c407 2483 self.expected = expected
2eabb802 2484 self.cause = cause
d11271dd 2485 self.video_id = video_id
1151c407 2486 self.ie = ie
2487 self.exc_info = sys.exc_info() # preserve original exception
2488
2489 super(ExtractorError, self).__init__(''.join((
2490 format_field(ie, template='[%s] '),
2491 format_field(video_id, template='%s: '),
526d74ec 2492 self.msg,
1151c407 2493 format_field(cause, template=' (caused by %r)'),
2494 '' if expected else bug_reports_message())))
1c256f70 2495
01951dda
PH
2496 def format_traceback(self):
2497 if self.traceback is None:
2498 return None
28e614de 2499 return ''.join(traceback.format_tb(self.traceback))
01951dda 2500
1c256f70 2501
416c7fcb
PH
2502class UnsupportedError(ExtractorError):
2503 def __init__(self, url):
2504 super(UnsupportedError, self).__init__(
2505 'Unsupported URL: %s' % url, expected=True)
2506 self.url = url
2507
2508
55b3e45b
JMF
2509class RegexNotFoundError(ExtractorError):
2510 """Error when a regex didn't match"""
2511 pass
2512
2513
773f291d
S
2514class GeoRestrictedError(ExtractorError):
2515 """Geographic restriction Error exception.
2516
2517 This exception may be thrown when a video is not available from your
2518 geographic location due to geographic restrictions imposed by a website.
2519 """
b6e0c7d2 2520
0db3bae8 2521 def __init__(self, msg, countries=None, **kwargs):
2522 kwargs['expected'] = True
2523 super(GeoRestrictedError, self).__init__(msg, **kwargs)
773f291d
S
2524 self.countries = countries
2525
2526
bf5b9d85 2527class DownloadError(YoutubeDLError):
59ae15a5 2528 """Download Error exception.
d77c3dfd 2529
59ae15a5
PH
2530 This exception may be thrown by FileDownloader objects if they are not
2531 configured to continue on errors. They will contain the appropriate
2532 error message.
2533 """
5f6a1245 2534
8cc83b8d
FV
2535 def __init__(self, msg, exc_info=None):
2536 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2537 super(DownloadError, self).__init__(msg)
2538 self.exc_info = exc_info
d77c3dfd
FV
2539
2540
498f5606 2541class EntryNotInPlaylist(YoutubeDLError):
2542 """Entry not in playlist exception.
2543
2544 This exception will be thrown by YoutubeDL when a requested entry
2545 is not found in the playlist info_dict
2546 """
2547 pass
2548
2549
bf5b9d85 2550class SameFileError(YoutubeDLError):
59ae15a5 2551 """Same File exception.
d77c3dfd 2552
59ae15a5
PH
2553 This exception will be thrown by FileDownloader objects if they detect
2554 multiple files would have to be downloaded to the same file on disk.
2555 """
2556 pass
d77c3dfd
FV
2557
2558
bf5b9d85 2559class PostProcessingError(YoutubeDLError):
59ae15a5 2560 """Post Processing exception.
d77c3dfd 2561
59ae15a5
PH
2562 This exception may be raised by PostProcessor's .run() method to
2563 indicate an error in the postprocessing task.
2564 """
5f6a1245 2565
7851b379 2566 def __init__(self, msg):
bf5b9d85 2567 super(PostProcessingError, self).__init__(msg)
7851b379 2568 self.msg = msg
d77c3dfd 2569
5f6a1245 2570
48f79687 2571class DownloadCancelled(YoutubeDLError):
2572 """ Exception raised when the download queue should be interrupted """
2573 msg = 'The download was cancelled'
8b0d7497 2574
48f79687 2575 def __init__(self, msg=None):
2576 if msg is not None:
2577 self.msg = msg
2578 YoutubeDLError.__init__(self, self.msg)
8b0d7497 2579
8b0d7497 2580
48f79687 2581class ExistingVideoReached(DownloadCancelled):
2582 """ --break-on-existing triggered """
2583 msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
8b0d7497 2584
48f79687 2585
2586class RejectedVideoReached(DownloadCancelled):
2587 """ --break-on-reject triggered """
2588 msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
51d9739f 2589
2590
48f79687 2591class MaxDownloadsReached(DownloadCancelled):
59ae15a5 2592 """ --max-downloads limit has been reached. """
48f79687 2593 msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
2594
2595
2596class ThrottledDownload(YoutubeDLError):
2597 """ Download speed below --throttled-rate. """
59ae15a5 2598 pass
d77c3dfd
FV
2599
2600
bf5b9d85 2601class UnavailableVideoError(YoutubeDLError):
59ae15a5 2602 """Unavailable Format exception.
d77c3dfd 2603
59ae15a5
PH
2604 This exception will be thrown when a video is requested
2605 in a format that is not available for that video.
2606 """
2607 pass
d77c3dfd
FV
2608
2609
bf5b9d85 2610class ContentTooShortError(YoutubeDLError):
59ae15a5 2611 """Content Too Short exception.
d77c3dfd 2612
59ae15a5
PH
2613 This exception may be raised by FileDownloader objects when a file they
2614 download is too small for what the server announced first, indicating
2615 the connection was probably interrupted.
2616 """
d77c3dfd 2617
59ae15a5 2618 def __init__(self, downloaded, expected):
bf5b9d85
PM
2619 super(ContentTooShortError, self).__init__(
2620 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2621 )
2c7ed247 2622 # Both in bytes
59ae15a5
PH
2623 self.downloaded = downloaded
2624 self.expected = expected
d77c3dfd 2625
5f6a1245 2626
bf5b9d85 2627class XAttrMetadataError(YoutubeDLError):
efa97bdc
YCH
2628 def __init__(self, code=None, msg='Unknown error'):
2629 super(XAttrMetadataError, self).__init__(msg)
2630 self.code = code
bd264412 2631 self.msg = msg
efa97bdc
YCH
2632
2633 # Parsing code and msg
3089bc74 2634 if (self.code in (errno.ENOSPC, errno.EDQUOT)
a0566bbf 2635 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
efa97bdc
YCH
2636 self.reason = 'NO_SPACE'
2637 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2638 self.reason = 'VALUE_TOO_LONG'
2639 else:
2640 self.reason = 'NOT_SUPPORTED'
2641
2642
bf5b9d85 2643class XAttrUnavailableError(YoutubeDLError):
efa97bdc
YCH
2644 pass
2645
2646
c5a59d93 2647def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
2648 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2649 # expected HTTP responses to meet HTTP/1.0 or later (see also
067aa17e 2650 # https://github.com/ytdl-org/youtube-dl/issues/6727)
e5e78797 2651 if sys.version_info < (3, 0):
65220c3b
S
2652 kwargs['strict'] = True
2653 hc = http_class(*args, **compat_kwargs(kwargs))
be4a824d 2654 source_address = ydl_handler._params.get('source_address')
8959018a 2655
be4a824d 2656 if source_address is not None:
8959018a
AU
2657 # This is to workaround _create_connection() from socket where it will try all
2658 # address data from getaddrinfo() including IPv6. This filters the result from
2659 # getaddrinfo() based on the source_address value.
2660 # This is based on the cpython socket.create_connection() function.
2661 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2662 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2663 host, port = address
2664 err = None
2665 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
9e21e6d9
S
2666 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2667 ip_addrs = [addr for addr in addrs if addr[0] == af]
2668 if addrs and not ip_addrs:
2669 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2670 raise socket.error(
2671 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2672 % (ip_version, source_address[0]))
8959018a
AU
2673 for res in ip_addrs:
2674 af, socktype, proto, canonname, sa = res
2675 sock = None
2676 try:
2677 sock = socket.socket(af, socktype, proto)
2678 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2679 sock.settimeout(timeout)
2680 sock.bind(source_address)
2681 sock.connect(sa)
2682 err = None # Explicitly break reference cycle
2683 return sock
2684 except socket.error as _:
2685 err = _
2686 if sock is not None:
2687 sock.close()
2688 if err is not None:
2689 raise err
2690 else:
9e21e6d9
S
2691 raise socket.error('getaddrinfo returns an empty list')
2692 if hasattr(hc, '_create_connection'):
2693 hc._create_connection = _create_connection
be4a824d
PH
2694 sa = (source_address, 0)
2695 if hasattr(hc, 'source_address'): # Python 2.7+
2696 hc.source_address = sa
2697 else: # Python 2.6
2698 def _hc_connect(self, *args, **kwargs):
9e21e6d9 2699 sock = _create_connection(
be4a824d
PH
2700 (self.host, self.port), self.timeout, sa)
2701 if is_https:
d7932313
PH
2702 self.sock = ssl.wrap_socket(
2703 sock, self.key_file, self.cert_file,
2704 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
2705 else:
2706 self.sock = sock
2707 hc.connect = functools.partial(_hc_connect, hc)
2708
2709 return hc
2710
2711
87f0e62d 2712def handle_youtubedl_headers(headers):
992fc9d6
YCH
2713 filtered_headers = headers
2714
2715 if 'Youtubedl-no-compression' in filtered_headers:
2716 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 2717 del filtered_headers['Youtubedl-no-compression']
87f0e62d 2718
992fc9d6 2719 return filtered_headers
87f0e62d
YCH
2720
2721
acebc9cd 2722class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
2723 """Handler for HTTP requests and responses.
2724
2725 This class, when installed with an OpenerDirector, automatically adds
2726 the standard headers to every HTTP request and handles gzipped and
2727 deflated responses from web servers. If compression is to be avoided in
2728 a particular request, the original request in the program code only has
0424ec30 2729 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
2730 removed before making the real request.
2731
2732 Part of this code was copied from:
2733
2734 http://techknack.net/python-urllib2-handlers/
2735
2736 Andrew Rowls, the author of that code, agreed to release it to the
2737 public domain.
2738 """
2739
be4a824d
PH
2740 def __init__(self, params, *args, **kwargs):
2741 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2742 self._params = params
2743
2744 def http_open(self, req):
71aff188
YCH
2745 conn_class = compat_http_client.HTTPConnection
2746
2747 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2748 if socks_proxy:
2749 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2750 del req.headers['Ytdl-socks-proxy']
2751
be4a824d 2752 return self.do_open(functools.partial(
71aff188 2753 _create_http_connection, self, conn_class, False),
be4a824d
PH
2754 req)
2755
59ae15a5
PH
2756 @staticmethod
2757 def deflate(data):
fc2119f2 2758 if not data:
2759 return data
59ae15a5
PH
2760 try:
2761 return zlib.decompress(data, -zlib.MAX_WBITS)
2762 except zlib.error:
2763 return zlib.decompress(data)
2764
acebc9cd 2765 def http_request(self, req):
51f267d9
S
2766 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2767 # always respected by websites, some tend to give out URLs with non percent-encoded
2768 # non-ASCII characters (see telemb.py, ard.py [#3412])
2769 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2770 # To work around aforementioned issue we will replace request's original URL with
2771 # percent-encoded one
2772 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2773 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2774 url = req.get_full_url()
2775 url_escaped = escape_url(url)
2776
2777 # Substitute URL if any change after escaping
2778 if url != url_escaped:
15d260eb 2779 req = update_Request(req, url=url_escaped)
51f267d9 2780
33ac271b 2781 for h, v in std_headers.items():
3d5f7a39
JK
2782 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2783 # The dict keys are capitalized because of this bug by urllib
2784 if h.capitalize() not in req.headers:
33ac271b 2785 req.add_header(h, v)
87f0e62d
YCH
2786
2787 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
2788
2789 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2790 # Python 2.6 is brain-dead when it comes to fragments
2791 req._Request__original = req._Request__original.partition('#')[0]
2792 req._Request__r_type = req._Request__r_type.partition('#')[0]
2793
59ae15a5
PH
2794 return req
2795
acebc9cd 2796 def http_response(self, req, resp):
59ae15a5
PH
2797 old_resp = resp
2798 # gzip
2799 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
2800 content = resp.read()
2801 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2802 try:
2803 uncompressed = io.BytesIO(gz.read())
2804 except IOError as original_ioerror:
2805 # There may be junk add the end of the file
2806 # See http://stackoverflow.com/q/4928560/35070 for details
2807 for i in range(1, 1024):
2808 try:
2809 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2810 uncompressed = io.BytesIO(gz.read())
2811 except IOError:
2812 continue
2813 break
2814 else:
2815 raise original_ioerror
b407d853 2816 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2817 resp.msg = old_resp.msg
c047270c 2818 del resp.headers['Content-encoding']
59ae15a5
PH
2819 # deflate
2820 if resp.headers.get('Content-encoding', '') == 'deflate':
2821 gz = io.BytesIO(self.deflate(resp.read()))
b407d853 2822 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2823 resp.msg = old_resp.msg
c047270c 2824 del resp.headers['Content-encoding']
ad729172 2825 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
067aa17e 2826 # https://github.com/ytdl-org/youtube-dl/issues/6457).
5a4d9ddb
S
2827 if 300 <= resp.code < 400:
2828 location = resp.headers.get('Location')
2829 if location:
2830 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2831 if sys.version_info >= (3, 0):
2832 location = location.encode('iso-8859-1').decode('utf-8')
0ea59007
YCH
2833 else:
2834 location = location.decode('utf-8')
5a4d9ddb
S
2835 location_escaped = escape_url(location)
2836 if location != location_escaped:
2837 del resp.headers['Location']
9a4aec8b
YCH
2838 if sys.version_info < (3, 0):
2839 location_escaped = location_escaped.encode('utf-8')
5a4d9ddb 2840 resp.headers['Location'] = location_escaped
59ae15a5 2841 return resp
0f8d03f8 2842
acebc9cd
PH
2843 https_request = http_request
2844 https_response = http_response
bf50b038 2845
5de90176 2846
71aff188
YCH
2847def make_socks_conn_class(base_class, socks_proxy):
2848 assert issubclass(base_class, (
2849 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2850
2851 url_components = compat_urlparse.urlparse(socks_proxy)
2852 if url_components.scheme.lower() == 'socks5':
2853 socks_type = ProxyType.SOCKS5
2854 elif url_components.scheme.lower() in ('socks', 'socks4'):
2855 socks_type = ProxyType.SOCKS4
51fb4995
YCH
2856 elif url_components.scheme.lower() == 'socks4a':
2857 socks_type = ProxyType.SOCKS4A
71aff188 2858
cdd94c2e
YCH
2859 def unquote_if_non_empty(s):
2860 if not s:
2861 return s
2862 return compat_urllib_parse_unquote_plus(s)
2863
71aff188
YCH
2864 proxy_args = (
2865 socks_type,
2866 url_components.hostname, url_components.port or 1080,
2867 True, # Remote DNS
cdd94c2e
YCH
2868 unquote_if_non_empty(url_components.username),
2869 unquote_if_non_empty(url_components.password),
71aff188
YCH
2870 )
2871
2872 class SocksConnection(base_class):
2873 def connect(self):
2874 self.sock = sockssocket()
2875 self.sock.setproxy(*proxy_args)
2876 if type(self.timeout) in (int, float):
2877 self.sock.settimeout(self.timeout)
2878 self.sock.connect((self.host, self.port))
2879
2880 if isinstance(self, compat_http_client.HTTPSConnection):
2881 if hasattr(self, '_context'): # Python > 2.6
2882 self.sock = self._context.wrap_socket(
2883 self.sock, server_hostname=self.host)
2884 else:
2885 self.sock = ssl.wrap_socket(self.sock)
2886
2887 return SocksConnection
2888
2889
be4a824d
PH
2890class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2891 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2892 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2893 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2894 self._params = params
2895
2896 def https_open(self, req):
4f264c02 2897 kwargs = {}
71aff188
YCH
2898 conn_class = self._https_conn_class
2899
4f264c02
JMF
2900 if hasattr(self, '_context'): # python > 2.6
2901 kwargs['context'] = self._context
2902 if hasattr(self, '_check_hostname'): # python 3.x
2903 kwargs['check_hostname'] = self._check_hostname
71aff188
YCH
2904
2905 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2906 if socks_proxy:
2907 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2908 del req.headers['Ytdl-socks-proxy']
2909
be4a824d 2910 return self.do_open(functools.partial(
71aff188 2911 _create_http_connection, self, conn_class, True),
4f264c02 2912 req, **kwargs)
be4a824d
PH
2913
2914
1bab3437 2915class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
f1a8511f
S
2916 """
2917 See [1] for cookie file format.
2918
2919 1. https://curl.haxx.se/docs/http-cookies.html
2920 """
e7e62441 2921 _HTTPONLY_PREFIX = '#HttpOnly_'
c380cc28
S
2922 _ENTRY_LEN = 7
2923 _HEADER = '''# Netscape HTTP Cookie File
7a5c1cfe 2924# This file is generated by yt-dlp. Do not edit.
c380cc28
S
2925
2926'''
2927 _CookieFileEntry = collections.namedtuple(
2928 'CookieFileEntry',
2929 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
e7e62441 2930
1bab3437 2931 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
c380cc28
S
2932 """
2933 Save cookies to a file.
2934
2935 Most of the code is taken from CPython 3.8 and slightly adapted
2936 to support cookie files with UTF-8 in both python 2 and 3.
2937 """
2938 if filename is None:
2939 if self.filename is not None:
2940 filename = self.filename
2941 else:
2942 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2943
1bab3437
S
2944 # Store session cookies with `expires` set to 0 instead of an empty
2945 # string
2946 for cookie in self:
2947 if cookie.expires is None:
2948 cookie.expires = 0
c380cc28
S
2949
2950 with io.open(filename, 'w', encoding='utf-8') as f:
2951 f.write(self._HEADER)
2952 now = time.time()
2953 for cookie in self:
2954 if not ignore_discard and cookie.discard:
2955 continue
2956 if not ignore_expires and cookie.is_expired(now):
2957 continue
2958 if cookie.secure:
2959 secure = 'TRUE'
2960 else:
2961 secure = 'FALSE'
2962 if cookie.domain.startswith('.'):
2963 initial_dot = 'TRUE'
2964 else:
2965 initial_dot = 'FALSE'
2966 if cookie.expires is not None:
2967 expires = compat_str(cookie.expires)
2968 else:
2969 expires = ''
2970 if cookie.value is None:
2971 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2972 # with no name, whereas http.cookiejar regards it as a
2973 # cookie with no value.
2974 name = ''
2975 value = cookie.name
2976 else:
2977 name = cookie.name
2978 value = cookie.value
2979 f.write(
2980 '\t'.join([cookie.domain, initial_dot, cookie.path,
2981 secure, expires, name, value]) + '\n')
1bab3437
S
2982
2983 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
e7e62441 2984 """Load cookies from a file."""
2985 if filename is None:
2986 if self.filename is not None:
2987 filename = self.filename
2988 else:
2989 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2990
c380cc28
S
2991 def prepare_line(line):
2992 if line.startswith(self._HTTPONLY_PREFIX):
2993 line = line[len(self._HTTPONLY_PREFIX):]
2994 # comments and empty lines are fine
2995 if line.startswith('#') or not line.strip():
2996 return line
2997 cookie_list = line.split('\t')
2998 if len(cookie_list) != self._ENTRY_LEN:
2999 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
3000 cookie = self._CookieFileEntry(*cookie_list)
3001 if cookie.expires_at and not cookie.expires_at.isdigit():
3002 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
3003 return line
3004
e7e62441 3005 cf = io.StringIO()
c380cc28 3006 with io.open(filename, encoding='utf-8') as f:
e7e62441 3007 for line in f:
c380cc28
S
3008 try:
3009 cf.write(prepare_line(line))
3010 except compat_cookiejar.LoadError as e:
3011 write_string(
3012 'WARNING: skipping cookie file entry due to %s: %r\n'
3013 % (e, line), sys.stderr)
3014 continue
e7e62441 3015 cf.seek(0)
3016 self._really_load(cf, filename, ignore_discard, ignore_expires)
1bab3437
S
3017 # Session cookies are denoted by either `expires` field set to
3018 # an empty string or 0. MozillaCookieJar only recognizes the former
3019 # (see [1]). So we need force the latter to be recognized as session
3020 # cookies on our own.
3021 # Session cookies may be important for cookies-based authentication,
3022 # e.g. usually, when user does not check 'Remember me' check box while
3023 # logging in on a site, some important cookies are stored as session
3024 # cookies so that not recognizing them will result in failed login.
3025 # 1. https://bugs.python.org/issue17164
3026 for cookie in self:
3027 # Treat `expires=0` cookies as session cookies
3028 if cookie.expires == 0:
3029 cookie.expires = None
3030 cookie.discard = True
3031
3032
a6420bf5
S
3033class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
3034 def __init__(self, cookiejar=None):
3035 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
3036
3037 def http_response(self, request, response):
3038 # Python 2 will choke on next HTTP request in row if there are non-ASCII
3039 # characters in Set-Cookie HTTP header of last response (see
067aa17e 3040 # https://github.com/ytdl-org/youtube-dl/issues/6769).
a6420bf5
S
3041 # In order to at least prevent crashing we will percent encode Set-Cookie
3042 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
3043 # if sys.version_info < (3, 0) and response.headers:
3044 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
3045 # set_cookie = response.headers.get(set_cookie_header)
3046 # if set_cookie:
3047 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
3048 # if set_cookie != set_cookie_escaped:
3049 # del response.headers[set_cookie_header]
3050 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
3051 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
3052
f5fa042c 3053 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
a6420bf5
S
3054 https_response = http_response
3055
3056
fca6dba8 3057class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
201c1459 3058 """YoutubeDL redirect handler
3059
3060 The code is based on HTTPRedirectHandler implementation from CPython [1].
3061
3062 This redirect handler solves two issues:
3063 - ensures redirect URL is always unicode under python 2
3064 - introduces support for experimental HTTP response status code
3065 308 Permanent Redirect [2] used by some sites [3]
3066
3067 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
3068 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
3069 3. https://github.com/ytdl-org/youtube-dl/issues/28768
3070 """
3071
3072 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
3073
3074 def redirect_request(self, req, fp, code, msg, headers, newurl):
3075 """Return a Request or None in response to a redirect.
3076
3077 This is called by the http_error_30x methods when a
3078 redirection response is received. If a redirection should
3079 take place, return a new Request to allow http_error_30x to
3080 perform the redirect. Otherwise, raise HTTPError if no-one
3081 else should try to handle this url. Return None if you can't
3082 but another Handler might.
3083 """
3084 m = req.get_method()
3085 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
3086 or code in (301, 302, 303) and m == "POST")):
3087 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
3088 # Strictly (according to RFC 2616), 301 or 302 in response to
3089 # a POST MUST NOT cause a redirection without confirmation
3090 # from the user (of urllib.request, in this case). In practice,
3091 # essentially all clients do redirect in this case, so we do
3092 # the same.
3093
3094 # On python 2 urlh.geturl() may sometimes return redirect URL
3095 # as byte string instead of unicode. This workaround allows
3096 # to force it always return unicode.
3097 if sys.version_info[0] < 3:
3098 newurl = compat_str(newurl)
3099
3100 # Be conciliant with URIs containing a space. This is mainly
3101 # redundant with the more complete encoding done in http_error_302(),
3102 # but it is kept for compatibility with other callers.
3103 newurl = newurl.replace(' ', '%20')
3104
3105 CONTENT_HEADERS = ("content-length", "content-type")
3106 # NB: don't use dict comprehension for python 2.6 compatibility
3107 newheaders = dict((k, v) for k, v in req.headers.items()
3108 if k.lower() not in CONTENT_HEADERS)
3109 return compat_urllib_request.Request(
3110 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3111 unverifiable=True)
fca6dba8
S
3112
3113
46f59e89
S
3114def extract_timezone(date_str):
3115 m = re.search(
f137e4c2 3116 r'''(?x)
3117 ^.{8,}? # >=8 char non-TZ prefix, if present
3118 (?P<tz>Z| # just the UTC Z, or
3119 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
3120 (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
3121 [ ]? # optional space
3122 (?P<sign>\+|-) # +/-
3123 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
3124 $)
3125 ''', date_str)
46f59e89
S
3126 if not m:
3127 timezone = datetime.timedelta()
3128 else:
3129 date_str = date_str[:-len(m.group('tz'))]
3130 if not m.group('sign'):
3131 timezone = datetime.timedelta()
3132 else:
3133 sign = 1 if m.group('sign') == '+' else -1
3134 timezone = datetime.timedelta(
3135 hours=sign * int(m.group('hours')),
3136 minutes=sign * int(m.group('minutes')))
3137 return timezone, date_str
3138
3139
08b38d54 3140def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
3141 """ Return a UNIX timestamp from the given date """
3142
3143 if date_str is None:
3144 return None
3145
52c3a6e4
S
3146 date_str = re.sub(r'\.[0-9]+', '', date_str)
3147
08b38d54 3148 if timezone is None:
46f59e89
S
3149 timezone, date_str = extract_timezone(date_str)
3150
52c3a6e4
S
3151 try:
3152 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3153 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3154 return calendar.timegm(dt.timetuple())
3155 except ValueError:
3156 pass
912b38b4
PH
3157
3158
46f59e89
S
3159def date_formats(day_first=True):
3160 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3161
3162
42bdd9d0 3163def unified_strdate(date_str, day_first=True):
bf50b038 3164 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
3165
3166 if date_str is None:
3167 return None
bf50b038 3168 upload_date = None
5f6a1245 3169 # Replace commas
026fcc04 3170 date_str = date_str.replace(',', ' ')
42bdd9d0 3171 # Remove AM/PM + timezone
9bb8e0a3 3172 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89 3173 _, date_str = extract_timezone(date_str)
42bdd9d0 3174
46f59e89 3175 for expression in date_formats(day_first):
bf50b038
JMF
3176 try:
3177 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 3178 except ValueError:
bf50b038 3179 pass
42393ce2
PH
3180 if upload_date is None:
3181 timetuple = email.utils.parsedate_tz(date_str)
3182 if timetuple:
c6b9cf05
S
3183 try:
3184 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3185 except ValueError:
3186 pass
6a750402
JMF
3187 if upload_date is not None:
3188 return compat_str(upload_date)
bf50b038 3189
5f6a1245 3190
46f59e89
S
3191def unified_timestamp(date_str, day_first=True):
3192 if date_str is None:
3193 return None
3194
2ae2ffda 3195 date_str = re.sub(r'[,|]', '', date_str)
46f59e89 3196
7dc2a74e 3197 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89
S
3198 timezone, date_str = extract_timezone(date_str)
3199
3200 # Remove AM/PM + timezone
3201 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3202
deef3195
S
3203 # Remove unrecognized timezones from ISO 8601 alike timestamps
3204 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3205 if m:
3206 date_str = date_str[:-len(m.group('tz'))]
3207
f226880c
PH
3208 # Python only supports microseconds, so remove nanoseconds
3209 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3210 if m:
3211 date_str = m.group(1)
3212
46f59e89
S
3213 for expression in date_formats(day_first):
3214 try:
7dc2a74e 3215 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
46f59e89
S
3216 return calendar.timegm(dt.timetuple())
3217 except ValueError:
3218 pass
3219 timetuple = email.utils.parsedate_tz(date_str)
3220 if timetuple:
7dc2a74e 3221 return calendar.timegm(timetuple) + pm_delta * 3600
46f59e89
S
3222
3223
28e614de 3224def determine_ext(url, default_ext='unknown_video'):
85750f89 3225 if url is None or '.' not in url:
f4776371 3226 return default_ext
9cb9a5df 3227 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
3228 if re.match(r'^[A-Za-z0-9]+$', guess):
3229 return guess
a7aaa398
S
3230 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3231 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 3232 return guess.rstrip('/')
73e79f2a 3233 else:
cbdbb766 3234 return default_ext
73e79f2a 3235
5f6a1245 3236
824fa511
S
3237def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3238 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
d4051a8e 3239
5f6a1245 3240
9e62f283 3241def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
37254abc
JMF
3242 """
3243 Return a datetime object from a string in the format YYYYMMDD or
9e62f283 3244 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3245
3246 format: string date format used to return datetime object from
3247 precision: round the time portion of a datetime object.
3248 auto|microsecond|second|minute|hour|day.
3249 auto: round to the unit provided in date_str (if applicable).
3250 """
3251 auto_precision = False
3252 if precision == 'auto':
3253 auto_precision = True
3254 precision = 'microsecond'
3255 today = datetime_round(datetime.datetime.now(), precision)
f8795e10 3256 if date_str in ('now', 'today'):
37254abc 3257 return today
f8795e10
PH
3258 if date_str == 'yesterday':
3259 return today - datetime.timedelta(days=1)
9e62f283 3260 match = re.match(
3261 r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3262 date_str)
37254abc 3263 if match is not None:
9e62f283 3264 start_time = datetime_from_str(match.group('start'), precision, format)
3265 time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
37254abc 3266 unit = match.group('unit')
9e62f283 3267 if unit == 'month' or unit == 'year':
3268 new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
37254abc 3269 unit = 'day'
9e62f283 3270 else:
3271 if unit == 'week':
3272 unit = 'day'
3273 time *= 7
3274 delta = datetime.timedelta(**{unit + 's': time})
3275 new_date = start_time + delta
3276 if auto_precision:
3277 return datetime_round(new_date, unit)
3278 return new_date
3279
3280 return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3281
3282
3283def date_from_str(date_str, format='%Y%m%d'):
3284 """
3285 Return a datetime object from a string in the format YYYYMMDD or
3286 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3287
3288 format: string date format used to return datetime object from
3289 """
3290 return datetime_from_str(date_str, precision='microsecond', format=format).date()
3291
3292
3293def datetime_add_months(dt, months):
3294 """Increment/Decrement a datetime object by months."""
3295 month = dt.month + months - 1
3296 year = dt.year + month // 12
3297 month = month % 12 + 1
3298 day = min(dt.day, calendar.monthrange(year, month)[1])
3299 return dt.replace(year, month, day)
3300
3301
3302def datetime_round(dt, precision='day'):
3303 """
3304 Round a datetime object's time to a specific precision
3305 """
3306 if precision == 'microsecond':
3307 return dt
3308
3309 unit_seconds = {
3310 'day': 86400,
3311 'hour': 3600,
3312 'minute': 60,
3313 'second': 1,
3314 }
3315 roundto = lambda x, n: ((x + n / 2) // n) * n
3316 timestamp = calendar.timegm(dt.timetuple())
3317 return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
5f6a1245
JW
3318
3319
e63fc1be 3320def hyphenate_date(date_str):
3321 """
3322 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3323 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3324 if match is not None:
3325 return '-'.join(match.groups())
3326 else:
3327 return date_str
3328
5f6a1245 3329
bd558525
JMF
3330class DateRange(object):
3331 """Represents a time interval between two dates"""
5f6a1245 3332
bd558525
JMF
3333 def __init__(self, start=None, end=None):
3334 """start and end must be strings in the format accepted by date"""
3335 if start is not None:
3336 self.start = date_from_str(start)
3337 else:
3338 self.start = datetime.datetime.min.date()
3339 if end is not None:
3340 self.end = date_from_str(end)
3341 else:
3342 self.end = datetime.datetime.max.date()
37254abc 3343 if self.start > self.end:
bd558525 3344 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 3345
bd558525
JMF
3346 @classmethod
3347 def day(cls, day):
3348 """Returns a range that only contains the given day"""
5f6a1245
JW
3349 return cls(day, day)
3350
bd558525
JMF
3351 def __contains__(self, date):
3352 """Check if the date is in the range"""
37254abc
JMF
3353 if not isinstance(date, datetime.date):
3354 date = date_from_str(date)
3355 return self.start <= date <= self.end
5f6a1245 3356
bd558525 3357 def __str__(self):
5f6a1245 3358 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
3359
3360
3361def platform_name():
3362 """ Returns the platform name as a compat_str """
3363 res = platform.platform()
3364 if isinstance(res, bytes):
3365 res = res.decode(preferredencoding())
3366
3367 assert isinstance(res, compat_str)
3368 return res
c257baff
PH
3369
3370
49fa4d9a
N
3371def get_windows_version():
3372 ''' Get Windows version. None if it's not running on Windows '''
3373 if compat_os_name == 'nt':
3374 return version_tuple(platform.win32_ver()[1])
3375 else:
3376 return None
3377
3378
b58ddb32
PH
3379def _windows_write_string(s, out):
3380 """ Returns True if the string was written using special methods,
3381 False if it has yet to be written out."""
3382 # Adapted from http://stackoverflow.com/a/3259271/35070
3383
3384 import ctypes
3385 import ctypes.wintypes
3386
3387 WIN_OUTPUT_IDS = {
3388 1: -11,
3389 2: -12,
3390 }
3391
a383a98a
PH
3392 try:
3393 fileno = out.fileno()
3394 except AttributeError:
3395 # If the output stream doesn't have a fileno, it's virtual
3396 return False
aa42e873
PH
3397 except io.UnsupportedOperation:
3398 # Some strange Windows pseudo files?
3399 return False
b58ddb32
PH
3400 if fileno not in WIN_OUTPUT_IDS:
3401 return False
3402
d7cd9a9e 3403 GetStdHandle = compat_ctypes_WINFUNCTYPE(
b58ddb32 3404 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
d7cd9a9e 3405 ('GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
3406 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3407
d7cd9a9e 3408 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3409 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3410 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
d7cd9a9e 3411 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
3412 written = ctypes.wintypes.DWORD(0)
3413
d7cd9a9e 3414 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
3415 FILE_TYPE_CHAR = 0x0002
3416 FILE_TYPE_REMOTE = 0x8000
d7cd9a9e 3417 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3418 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3419 ctypes.POINTER(ctypes.wintypes.DWORD))(
d7cd9a9e 3420 ('GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
3421 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3422
3423 def not_a_console(handle):
3424 if handle == INVALID_HANDLE_VALUE or handle is None:
3425 return True
3089bc74
S
3426 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3427 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
3428
3429 if not_a_console(h):
3430 return False
3431
d1b9c912
PH
3432 def next_nonbmp_pos(s):
3433 try:
3434 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3435 except StopIteration:
3436 return len(s)
3437
3438 while s:
3439 count = min(next_nonbmp_pos(s), 1024)
3440
b58ddb32 3441 ret = WriteConsoleW(
d1b9c912 3442 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
3443 if ret == 0:
3444 raise OSError('Failed to write string')
d1b9c912
PH
3445 if not count: # We just wrote a non-BMP character
3446 assert written.value == 2
3447 s = s[1:]
3448 else:
3449 assert written.value > 0
3450 s = s[written.value:]
b58ddb32
PH
3451 return True
3452
3453
734f90bb 3454def write_string(s, out=None, encoding=None):
7459e3a2
PH
3455 if out is None:
3456 out = sys.stderr
8bf48f23 3457 assert type(s) == compat_str
7459e3a2 3458
b58ddb32
PH
3459 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3460 if _windows_write_string(s, out):
3461 return
3462
3089bc74
S
3463 if ('b' in getattr(out, 'mode', '')
3464 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
3465 byt = s.encode(encoding or preferredencoding(), 'ignore')
3466 out.write(byt)
3467 elif hasattr(out, 'buffer'):
3468 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3469 byt = s.encode(enc, 'ignore')
3470 out.buffer.write(byt)
3471 else:
8bf48f23 3472 out.write(s)
7459e3a2
PH
3473 out.flush()
3474
3475
48ea9cea
PH
3476def bytes_to_intlist(bs):
3477 if not bs:
3478 return []
3479 if isinstance(bs[0], int): # Python 3
3480 return list(bs)
3481 else:
3482 return [ord(c) for c in bs]
3483
c257baff 3484
cba892fa 3485def intlist_to_bytes(xs):
3486 if not xs:
3487 return b''
edaa23f8 3488 return compat_struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
3489
3490
c1c9a79c
PH
3491# Cross-platform file locking
3492if sys.platform == 'win32':
3493 import ctypes.wintypes
3494 import msvcrt
3495
3496 class OVERLAPPED(ctypes.Structure):
3497 _fields_ = [
3498 ('Internal', ctypes.wintypes.LPVOID),
3499 ('InternalHigh', ctypes.wintypes.LPVOID),
3500 ('Offset', ctypes.wintypes.DWORD),
3501 ('OffsetHigh', ctypes.wintypes.DWORD),
3502 ('hEvent', ctypes.wintypes.HANDLE),
3503 ]
3504
3505 kernel32 = ctypes.windll.kernel32
3506 LockFileEx = kernel32.LockFileEx
3507 LockFileEx.argtypes = [
3508 ctypes.wintypes.HANDLE, # hFile
3509 ctypes.wintypes.DWORD, # dwFlags
3510 ctypes.wintypes.DWORD, # dwReserved
3511 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3512 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3513 ctypes.POINTER(OVERLAPPED) # Overlapped
3514 ]
3515 LockFileEx.restype = ctypes.wintypes.BOOL
3516 UnlockFileEx = kernel32.UnlockFileEx
3517 UnlockFileEx.argtypes = [
3518 ctypes.wintypes.HANDLE, # hFile
3519 ctypes.wintypes.DWORD, # dwReserved
3520 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3521 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3522 ctypes.POINTER(OVERLAPPED) # Overlapped
3523 ]
3524 UnlockFileEx.restype = ctypes.wintypes.BOOL
3525 whole_low = 0xffffffff
3526 whole_high = 0x7fffffff
3527
3528 def _lock_file(f, exclusive):
3529 overlapped = OVERLAPPED()
3530 overlapped.Offset = 0
3531 overlapped.OffsetHigh = 0
3532 overlapped.hEvent = 0
3533 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3534 handle = msvcrt.get_osfhandle(f.fileno())
3535 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3536 whole_low, whole_high, f._lock_file_overlapped_p):
3537 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3538
3539 def _unlock_file(f):
3540 assert f._lock_file_overlapped_p
3541 handle = msvcrt.get_osfhandle(f.fileno())
3542 if not UnlockFileEx(handle, 0,
3543 whole_low, whole_high, f._lock_file_overlapped_p):
3544 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3545
3546else:
399a76e6
YCH
3547 # Some platforms, such as Jython, is missing fcntl
3548 try:
3549 import fcntl
c1c9a79c 3550
399a76e6
YCH
3551 def _lock_file(f, exclusive):
3552 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 3553
399a76e6
YCH
3554 def _unlock_file(f):
3555 fcntl.flock(f, fcntl.LOCK_UN)
3556 except ImportError:
3557 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3558
3559 def _lock_file(f, exclusive):
3560 raise IOError(UNSUPPORTED_MSG)
3561
3562 def _unlock_file(f):
3563 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
3564
3565
3566class locked_file(object):
3567 def __init__(self, filename, mode, encoding=None):
3568 assert mode in ['r', 'a', 'w']
3569 self.f = io.open(filename, mode, encoding=encoding)
3570 self.mode = mode
3571
3572 def __enter__(self):
3573 exclusive = self.mode != 'r'
3574 try:
3575 _lock_file(self.f, exclusive)
3576 except IOError:
3577 self.f.close()
3578 raise
3579 return self
3580
3581 def __exit__(self, etype, value, traceback):
3582 try:
3583 _unlock_file(self.f)
3584 finally:
3585 self.f.close()
3586
3587 def __iter__(self):
3588 return iter(self.f)
3589
3590 def write(self, *args):
3591 return self.f.write(*args)
3592
3593 def read(self, *args):
3594 return self.f.read(*args)
4eb7f1d1
JMF
3595
3596
4644ac55
S
3597def get_filesystem_encoding():
3598 encoding = sys.getfilesystemencoding()
3599 return encoding if encoding is not None else 'utf-8'
3600
3601
4eb7f1d1 3602def shell_quote(args):
a6a173c2 3603 quoted_args = []
4644ac55 3604 encoding = get_filesystem_encoding()
a6a173c2
JMF
3605 for a in args:
3606 if isinstance(a, bytes):
3607 # We may get a filename encoded with 'encodeFilename'
3608 a = a.decode(encoding)
aefce8e6 3609 quoted_args.append(compat_shlex_quote(a))
28e614de 3610 return ' '.join(quoted_args)
9d4660ca
PH
3611
3612
3613def smuggle_url(url, data):
3614 """ Pass additional data in a URL for internal use. """
3615
81953d1a
RA
3616 url, idata = unsmuggle_url(url, {})
3617 data.update(idata)
15707c7e 3618 sdata = compat_urllib_parse_urlencode(
28e614de
PH
3619 {'__youtubedl_smuggle': json.dumps(data)})
3620 return url + '#' + sdata
9d4660ca
PH
3621
3622
79f82953 3623def unsmuggle_url(smug_url, default=None):
83e865a3 3624 if '#__youtubedl_smuggle' not in smug_url:
79f82953 3625 return smug_url, default
28e614de
PH
3626 url, _, sdata = smug_url.rpartition('#')
3627 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
3628 data = json.loads(jsond)
3629 return url, data
02dbf93f
PH
3630
3631
02dbf93f
PH
3632def format_bytes(bytes):
3633 if bytes is None:
28e614de 3634 return 'N/A'
02dbf93f
PH
3635 if type(bytes) is str:
3636 bytes = float(bytes)
3637 if bytes == 0.0:
3638 exponent = 0
3639 else:
3640 exponent = int(math.log(bytes, 1024.0))
28e614de 3641 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 3642 converted = float(bytes) / float(1024 ** exponent)
28e614de 3643 return '%.2f%s' % (converted, suffix)
f53c966a 3644
1c088fa8 3645
fb47597b
S
3646def lookup_unit_table(unit_table, s):
3647 units_re = '|'.join(re.escape(u) for u in unit_table)
3648 m = re.match(
782b1b5b 3649 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
fb47597b
S
3650 if not m:
3651 return None
3652 num_str = m.group('num').replace(',', '.')
3653 mult = unit_table[m.group('unit')]
3654 return int(float(num_str) * mult)
3655
3656
be64b5b0
PH
3657def parse_filesize(s):
3658 if s is None:
3659 return None
3660
dfb1b146 3661 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
3662 # but we support those too
3663 _UNIT_TABLE = {
3664 'B': 1,
3665 'b': 1,
70852b47 3666 'bytes': 1,
be64b5b0
PH
3667 'KiB': 1024,
3668 'KB': 1000,
3669 'kB': 1024,
3670 'Kb': 1000,
13585d76 3671 'kb': 1000,
70852b47
YCH
3672 'kilobytes': 1000,
3673 'kibibytes': 1024,
be64b5b0
PH
3674 'MiB': 1024 ** 2,
3675 'MB': 1000 ** 2,
3676 'mB': 1024 ** 2,
3677 'Mb': 1000 ** 2,
13585d76 3678 'mb': 1000 ** 2,
70852b47
YCH
3679 'megabytes': 1000 ** 2,
3680 'mebibytes': 1024 ** 2,
be64b5b0
PH
3681 'GiB': 1024 ** 3,
3682 'GB': 1000 ** 3,
3683 'gB': 1024 ** 3,
3684 'Gb': 1000 ** 3,
13585d76 3685 'gb': 1000 ** 3,
70852b47
YCH
3686 'gigabytes': 1000 ** 3,
3687 'gibibytes': 1024 ** 3,
be64b5b0
PH
3688 'TiB': 1024 ** 4,
3689 'TB': 1000 ** 4,
3690 'tB': 1024 ** 4,
3691 'Tb': 1000 ** 4,
13585d76 3692 'tb': 1000 ** 4,
70852b47
YCH
3693 'terabytes': 1000 ** 4,
3694 'tebibytes': 1024 ** 4,
be64b5b0
PH
3695 'PiB': 1024 ** 5,
3696 'PB': 1000 ** 5,
3697 'pB': 1024 ** 5,
3698 'Pb': 1000 ** 5,
13585d76 3699 'pb': 1000 ** 5,
70852b47
YCH
3700 'petabytes': 1000 ** 5,
3701 'pebibytes': 1024 ** 5,
be64b5b0
PH
3702 'EiB': 1024 ** 6,
3703 'EB': 1000 ** 6,
3704 'eB': 1024 ** 6,
3705 'Eb': 1000 ** 6,
13585d76 3706 'eb': 1000 ** 6,
70852b47
YCH
3707 'exabytes': 1000 ** 6,
3708 'exbibytes': 1024 ** 6,
be64b5b0
PH
3709 'ZiB': 1024 ** 7,
3710 'ZB': 1000 ** 7,
3711 'zB': 1024 ** 7,
3712 'Zb': 1000 ** 7,
13585d76 3713 'zb': 1000 ** 7,
70852b47
YCH
3714 'zettabytes': 1000 ** 7,
3715 'zebibytes': 1024 ** 7,
be64b5b0
PH
3716 'YiB': 1024 ** 8,
3717 'YB': 1000 ** 8,
3718 'yB': 1024 ** 8,
3719 'Yb': 1000 ** 8,
13585d76 3720 'yb': 1000 ** 8,
70852b47
YCH
3721 'yottabytes': 1000 ** 8,
3722 'yobibytes': 1024 ** 8,
be64b5b0
PH
3723 }
3724
fb47597b
S
3725 return lookup_unit_table(_UNIT_TABLE, s)
3726
3727
3728def parse_count(s):
3729 if s is None:
be64b5b0
PH
3730 return None
3731
fb47597b
S
3732 s = s.strip()
3733
3734 if re.match(r'^[\d,.]+$', s):
3735 return str_to_int(s)
3736
3737 _UNIT_TABLE = {
3738 'k': 1000,
3739 'K': 1000,
3740 'm': 1000 ** 2,
3741 'M': 1000 ** 2,
3742 'kk': 1000 ** 2,
3743 'KK': 1000 ** 2,
3744 }
be64b5b0 3745
fb47597b 3746 return lookup_unit_table(_UNIT_TABLE, s)
be64b5b0 3747
2f7ae819 3748
b871d7e9
S
3749def parse_resolution(s):
3750 if s is None:
3751 return {}
3752
17ec8bcf 3753 mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
b871d7e9
S
3754 if mobj:
3755 return {
3756 'width': int(mobj.group('w')),
3757 'height': int(mobj.group('h')),
3758 }
3759
17ec8bcf 3760 mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
b871d7e9
S
3761 if mobj:
3762 return {'height': int(mobj.group(1))}
3763
3764 mobj = re.search(r'\b([48])[kK]\b', s)
3765 if mobj:
3766 return {'height': int(mobj.group(1)) * 540}
3767
3768 return {}
3769
3770
0dc41787
S
3771def parse_bitrate(s):
3772 if not isinstance(s, compat_str):
3773 return
3774 mobj = re.search(r'\b(\d+)\s*kbps', s)
3775 if mobj:
3776 return int(mobj.group(1))
3777
3778
a942d6cb 3779def month_by_name(name, lang='en'):
caefb1de
PH
3780 """ Return the number of a month by (locale-independently) English name """
3781
f6717dec 3782 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb 3783
caefb1de 3784 try:
f6717dec 3785 return month_names.index(name) + 1
7105440c
YCH
3786 except ValueError:
3787 return None
3788
3789
3790def month_by_abbreviation(abbrev):
3791 """ Return the number of a month by (locale-independently) English
3792 abbreviations """
3793
3794 try:
3795 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
3796 except ValueError:
3797 return None
18258362
JMF
3798
3799
5aafe895 3800def fix_xml_ampersands(xml_str):
18258362 3801 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
3802 return re.sub(
3803 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 3804 '&amp;',
5aafe895 3805 xml_str)
e3946f98
PH
3806
3807
3808def setproctitle(title):
8bf48f23 3809 assert isinstance(title, compat_str)
c1c05c67
YCH
3810
3811 # ctypes in Jython is not complete
3812 # http://bugs.jython.org/issue2148
3813 if sys.platform.startswith('java'):
3814 return
3815
e3946f98 3816 try:
611c1dd9 3817 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
3818 except OSError:
3819 return
2f49bcd6
RC
3820 except TypeError:
3821 # LoadLibrary in Windows Python 2.7.13 only expects
3822 # a bytestring, but since unicode_literals turns
3823 # every string into a unicode string, it fails.
3824 return
6eefe533
PH
3825 title_bytes = title.encode('utf-8')
3826 buf = ctypes.create_string_buffer(len(title_bytes))
3827 buf.value = title_bytes
e3946f98 3828 try:
6eefe533 3829 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
3830 except AttributeError:
3831 return # Strange libc, just skip this
d7dda168
PH
3832
3833
3834def remove_start(s, start):
46bc9b7d 3835 return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174
PH
3836
3837
2b9faf55 3838def remove_end(s, end):
46bc9b7d 3839 return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55
PH
3840
3841
31b2051e
S
3842def remove_quotes(s):
3843 if s is None or len(s) < 2:
3844 return s
3845 for quote in ('"', "'", ):
3846 if s[0] == quote and s[-1] == quote:
3847 return s[1:-1]
3848 return s
3849
3850
b6e0c7d2
U
3851def get_domain(url):
3852 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3853 return domain.group('domain') if domain else None
3854
3855
29eb5174 3856def url_basename(url):
9b8aaeed 3857 path = compat_urlparse.urlparse(url).path
28e614de 3858 return path.strip('/').split('/')[-1]
aa94a6d3
PH
3859
3860
02dc0a36
S
3861def base_url(url):
3862 return re.match(r'https?://[^?#&]+/', url).group()
3863
3864
e34c3361 3865def urljoin(base, path):
4b5de77b
S
3866 if isinstance(path, bytes):
3867 path = path.decode('utf-8')
e34c3361
S
3868 if not isinstance(path, compat_str) or not path:
3869 return None
fad4ceb5 3870 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
e34c3361 3871 return path
4b5de77b
S
3872 if isinstance(base, bytes):
3873 base = base.decode('utf-8')
3874 if not isinstance(base, compat_str) or not re.match(
3875 r'^(?:https?:)?//', base):
e34c3361
S
3876 return None
3877 return compat_urlparse.urljoin(base, path)
3878
3879
aa94a6d3
PH
3880class HEADRequest(compat_urllib_request.Request):
3881 def get_method(self):
611c1dd9 3882 return 'HEAD'
7217e148
PH
3883
3884
95cf60e8
S
3885class PUTRequest(compat_urllib_request.Request):
3886 def get_method(self):
3887 return 'PUT'
3888
3889
9732d77e 3890def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
3891 if get_attr:
3892 if v is not None:
3893 v = getattr(v, get_attr, None)
9572013d
PH
3894 if v == '':
3895 v = None
1812afb7
S
3896 if v is None:
3897 return default
3898 try:
3899 return int(v) * invscale // scale
31c49255 3900 except (ValueError, TypeError, OverflowError):
af98f8ff 3901 return default
9732d77e 3902
9572013d 3903
40a90862
JMF
3904def str_or_none(v, default=None):
3905 return default if v is None else compat_str(v)
3906
9732d77e
PH
3907
3908def str_to_int(int_str):
48d4681e 3909 """ A more relaxed version of int_or_none """
42db58ec 3910 if isinstance(int_str, compat_integer_types):
348c6bf1 3911 return int_str
42db58ec
S
3912 elif isinstance(int_str, compat_str):
3913 int_str = re.sub(r'[,\.\+]', '', int_str)
3914 return int_or_none(int_str)
608d11f5
PH
3915
3916
9732d77e 3917def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
3918 if v is None:
3919 return default
3920 try:
3921 return float(v) * invscale / scale
5e1271c5 3922 except (ValueError, TypeError):
caf80631 3923 return default
43f775e4
PH
3924
3925
c7e327c4
S
3926def bool_or_none(v, default=None):
3927 return v if isinstance(v, bool) else default
3928
3929
53cd37ba
S
3930def strip_or_none(v, default=None):
3931 return v.strip() if isinstance(v, compat_str) else default
b72b4431
S
3932
3933
af03000a
S
3934def url_or_none(url):
3935 if not url or not isinstance(url, compat_str):
3936 return None
3937 url = url.strip()
29f7c58a 3938 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
af03000a
S
3939
3940
e29663c6 3941def strftime_or_none(timestamp, date_format, default=None):
3942 datetime_object = None
3943 try:
3944 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3945 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3946 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3947 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3948 return datetime_object.strftime(date_format)
3949 except (ValueError, TypeError, AttributeError):
3950 return default
3951
3952
608d11f5 3953def parse_duration(s):
8f9312c3 3954 if not isinstance(s, compat_basestring):
608d11f5
PH
3955 return None
3956
ca7b3246
S
3957 s = s.strip()
3958
acaff495 3959 days, hours, mins, secs, ms = [None] * 5
15846398 3960 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
acaff495 3961 if m:
3962 days, hours, mins, secs, ms = m.groups()
3963 else:
3964 m = re.match(
056653bb
S
3965 r'''(?ix)(?:P?
3966 (?:
3967 [0-9]+\s*y(?:ears?)?\s*
3968 )?
3969 (?:
3970 [0-9]+\s*m(?:onths?)?\s*
3971 )?
3972 (?:
3973 [0-9]+\s*w(?:eeks?)?\s*
3974 )?
8f4b58d7 3975 (?:
acaff495 3976 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
8f4b58d7 3977 )?
056653bb 3978 T)?
acaff495 3979 (?:
3980 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3981 )?
3982 (?:
3983 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3984 )?
3985 (?:
3986 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
15846398 3987 )?Z?$''', s)
acaff495 3988 if m:
3989 days, hours, mins, secs, ms = m.groups()
3990 else:
15846398 3991 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
acaff495 3992 if m:
3993 hours, mins = m.groups()
3994 else:
3995 return None
3996
3997 duration = 0
3998 if secs:
3999 duration += float(secs)
4000 if mins:
4001 duration += float(mins) * 60
4002 if hours:
4003 duration += float(hours) * 60 * 60
4004 if days:
4005 duration += float(days) * 24 * 60 * 60
4006 if ms:
4007 duration += float(ms)
4008 return duration
91d7d0b3
JMF
4009
4010
e65e4c88 4011def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 4012 name, real_ext = os.path.splitext(filename)
e65e4c88
S
4013 return (
4014 '{0}.{1}{2}'.format(name, ext, real_ext)
4015 if not expected_real_ext or real_ext[1:] == expected_real_ext
4016 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
4017
4018
b3ed15b7
S
4019def replace_extension(filename, ext, expected_real_ext=None):
4020 name, real_ext = os.path.splitext(filename)
4021 return '{0}.{1}'.format(
4022 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
4023 ext)
4024
4025
d70ad093
PH
4026def check_executable(exe, args=[]):
4027 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
4028 args can be a list of arguments for a short output (like -version) """
4029 try:
d3c93ec2 4030 Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate_or_kill()
d70ad093
PH
4031 except OSError:
4032 return False
4033 return exe
b7ab0590
PH
4034
4035
9af98e17 4036def _get_exe_version_output(exe, args):
95807118 4037 try:
b64d04c1 4038 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
7a5c1cfe 4039 # SIGTTOU if yt-dlp is run in the background.
067aa17e 4040 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
d3c93ec2 4041 out, _ = Popen(
4042 [encodeArgument(exe)] + args, stdin=subprocess.PIPE,
4043 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate_or_kill()
95807118
PH
4044 except OSError:
4045 return False
cae97f65
PH
4046 if isinstance(out, bytes): # Python 2.x
4047 out = out.decode('ascii', 'ignore')
9af98e17 4048 return out
cae97f65
PH
4049
4050
4051def detect_exe_version(output, version_re=None, unrecognized='present'):
4052 assert isinstance(output, compat_str)
4053 if version_re is None:
4054 version_re = r'version\s+([-0-9._a-zA-Z]+)'
4055 m = re.search(version_re, output)
95807118
PH
4056 if m:
4057 return m.group(1)
4058 else:
4059 return unrecognized
4060
4061
9af98e17 4062def get_exe_version(exe, args=['--version'],
4063 version_re=None, unrecognized='present'):
4064 """ Returns the version of the specified executable,
4065 or False if the executable is not present """
4066 out = _get_exe_version_output(exe, args)
4067 return detect_exe_version(out, version_re, unrecognized) if out else False
4068
4069
cb89cfc1 4070class LazyList(collections.abc.Sequence):
483336e7 4071 ''' Lazy immutable list from an iterable
4072 Note that slices of a LazyList are lists and not LazyList'''
4073
8e5fecc8 4074 class IndexError(IndexError):
4075 pass
4076
483336e7 4077 def __init__(self, iterable):
4078 self.__iterable = iter(iterable)
4079 self.__cache = []
28419ca2 4080 self.__reversed = False
483336e7 4081
4082 def __iter__(self):
28419ca2 4083 if self.__reversed:
4084 # We need to consume the entire iterable to iterate in reverse
981052c9 4085 yield from self.exhaust()
28419ca2 4086 return
4087 yield from self.__cache
483336e7 4088 for item in self.__iterable:
4089 self.__cache.append(item)
4090 yield item
4091
981052c9 4092 def __exhaust(self):
483336e7 4093 self.__cache.extend(self.__iterable)
9f1a1c36 4094 # Discard the emptied iterable to make it pickle-able
4095 self.__iterable = []
28419ca2 4096 return self.__cache
4097
981052c9 4098 def exhaust(self):
4099 ''' Evaluate the entire iterable '''
4100 return self.__exhaust()[::-1 if self.__reversed else 1]
4101
28419ca2 4102 @staticmethod
981052c9 4103 def __reverse_index(x):
e0f2b4b4 4104 return None if x is None else -(x + 1)
483336e7 4105
4106 def __getitem__(self, idx):
4107 if isinstance(idx, slice):
28419ca2 4108 if self.__reversed:
e0f2b4b4 4109 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
4110 start, stop, step = idx.start, idx.stop, idx.step or 1
483336e7 4111 elif isinstance(idx, int):
28419ca2 4112 if self.__reversed:
981052c9 4113 idx = self.__reverse_index(idx)
e0f2b4b4 4114 start, stop, step = idx, idx, 0
483336e7 4115 else:
4116 raise TypeError('indices must be integers or slices')
e0f2b4b4 4117 if ((start or 0) < 0 or (stop or 0) < 0
4118 or (start is None and step < 0)
4119 or (stop is None and step > 0)):
483336e7 4120 # We need to consume the entire iterable to be able to slice from the end
4121 # Obviously, never use this with infinite iterables
8e5fecc8 4122 self.__exhaust()
4123 try:
4124 return self.__cache[idx]
4125 except IndexError as e:
4126 raise self.IndexError(e) from e
e0f2b4b4 4127 n = max(start or 0, stop or 0) - len(self.__cache) + 1
28419ca2 4128 if n > 0:
4129 self.__cache.extend(itertools.islice(self.__iterable, n))
8e5fecc8 4130 try:
4131 return self.__cache[idx]
4132 except IndexError as e:
4133 raise self.IndexError(e) from e
483336e7 4134
4135 def __bool__(self):
4136 try:
28419ca2 4137 self[-1] if self.__reversed else self[0]
8e5fecc8 4138 except self.IndexError:
483336e7 4139 return False
4140 return True
4141
4142 def __len__(self):
8e5fecc8 4143 self.__exhaust()
483336e7 4144 return len(self.__cache)
4145
981052c9 4146 def reverse(self):
28419ca2 4147 self.__reversed = not self.__reversed
4148 return self
4149
4150 def __repr__(self):
4151 # repr and str should mimic a list. So we exhaust the iterable
4152 return repr(self.exhaust())
4153
4154 def __str__(self):
4155 return repr(self.exhaust())
4156
483336e7 4157
7be9ccff 4158class PagedList:
dd26ced1
PH
4159 def __len__(self):
4160 # This is only useful for tests
4161 return len(self.getslice())
4162
7be9ccff 4163 def __init__(self, pagefunc, pagesize, use_cache=True):
4164 self._pagefunc = pagefunc
4165 self._pagesize = pagesize
4166 self._use_cache = use_cache
4167 self._cache = {}
4168
4169 def getpage(self, pagenum):
4170 page_results = self._cache.get(pagenum) or list(self._pagefunc(pagenum))
4171 if self._use_cache:
4172 self._cache[pagenum] = page_results
4173 return page_results
4174
4175 def getslice(self, start=0, end=None):
4176 return list(self._getslice(start, end))
4177
4178 def _getslice(self, start, end):
55575225 4179 raise NotImplementedError('This method must be implemented by subclasses')
4180
4181 def __getitem__(self, idx):
7be9ccff 4182 # NOTE: cache must be enabled if this is used
55575225 4183 if not isinstance(idx, int) or idx < 0:
4184 raise TypeError('indices must be non-negative integers')
4185 entries = self.getslice(idx, idx + 1)
4186 return entries[0] if entries else None
4187
9c44d242
PH
4188
4189class OnDemandPagedList(PagedList):
7be9ccff 4190 def _getslice(self, start, end):
b7ab0590
PH
4191 for pagenum in itertools.count(start // self._pagesize):
4192 firstid = pagenum * self._pagesize
4193 nextfirstid = pagenum * self._pagesize + self._pagesize
4194 if start >= nextfirstid:
4195 continue
4196
b7ab0590
PH
4197 startv = (
4198 start % self._pagesize
4199 if firstid <= start < nextfirstid
4200 else 0)
b7ab0590
PH
4201 endv = (
4202 ((end - 1) % self._pagesize) + 1
4203 if (end is not None and firstid <= end <= nextfirstid)
4204 else None)
4205
7be9ccff 4206 page_results = self.getpage(pagenum)
b7ab0590
PH
4207 if startv != 0 or endv is not None:
4208 page_results = page_results[startv:endv]
7be9ccff 4209 yield from page_results
b7ab0590
PH
4210
4211 # A little optimization - if current page is not "full", ie. does
4212 # not contain page_size videos then we can assume that this page
4213 # is the last one - there are no more ids on further pages -
4214 # i.e. no need to query again.
4215 if len(page_results) + startv < self._pagesize:
4216 break
4217
4218 # If we got the whole page, but the next page is not interesting,
4219 # break out early as well
4220 if end == nextfirstid:
4221 break
81c2f20b
PH
4222
4223
9c44d242
PH
4224class InAdvancePagedList(PagedList):
4225 def __init__(self, pagefunc, pagecount, pagesize):
9c44d242 4226 self._pagecount = pagecount
7be9ccff 4227 PagedList.__init__(self, pagefunc, pagesize, True)
9c44d242 4228
7be9ccff 4229 def _getslice(self, start, end):
9c44d242
PH
4230 start_page = start // self._pagesize
4231 end_page = (
4232 self._pagecount if end is None else (end // self._pagesize + 1))
4233 skip_elems = start - start_page * self._pagesize
4234 only_more = None if end is None else end - start
4235 for pagenum in range(start_page, end_page):
7be9ccff 4236 page_results = self.getpage(pagenum)
9c44d242 4237 if skip_elems:
7be9ccff 4238 page_results = page_results[skip_elems:]
9c44d242
PH
4239 skip_elems = None
4240 if only_more is not None:
7be9ccff 4241 if len(page_results) < only_more:
4242 only_more -= len(page_results)
9c44d242 4243 else:
7be9ccff 4244 yield from page_results[:only_more]
9c44d242 4245 break
7be9ccff 4246 yield from page_results
9c44d242
PH
4247
4248
81c2f20b 4249def uppercase_escape(s):
676eb3f2 4250 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 4251 return re.sub(
a612753d 4252 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
4253 lambda m: unicode_escape(m.group(0))[0],
4254 s)
0fe2ff78
YCH
4255
4256
4257def lowercase_escape(s):
4258 unicode_escape = codecs.getdecoder('unicode_escape')
4259 return re.sub(
4260 r'\\u[0-9a-fA-F]{4}',
4261 lambda m: unicode_escape(m.group(0))[0],
4262 s)
b53466e1 4263
d05cfe06
S
4264
4265def escape_rfc3986(s):
4266 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 4267 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 4268 s = s.encode('utf-8')
ecc0c5ee 4269 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
4270
4271
4272def escape_url(url):
4273 """Escape URL as suggested by RFC 3986"""
4274 url_parsed = compat_urllib_parse_urlparse(url)
4275 return url_parsed._replace(
efbed08d 4276 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
d05cfe06
S
4277 path=escape_rfc3986(url_parsed.path),
4278 params=escape_rfc3986(url_parsed.params),
4279 query=escape_rfc3986(url_parsed.query),
4280 fragment=escape_rfc3986(url_parsed.fragment)
4281 ).geturl()
4282
62e609ab 4283
4dfbf869 4284def parse_qs(url):
4285 return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4286
4287
62e609ab
PH
4288def read_batch_urls(batch_fd):
4289 def fixup(url):
4290 if not isinstance(url, compat_str):
4291 url = url.decode('utf-8', 'replace')
8c04f0be 4292 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4293 for bom in BOM_UTF8:
4294 if url.startswith(bom):
4295 url = url[len(bom):]
4296 url = url.lstrip()
4297 if not url or url.startswith(('#', ';', ']')):
62e609ab 4298 return False
8c04f0be 4299 # "#" cannot be stripped out since it is part of the URI
4300 # However, it can be safely stipped out if follwing a whitespace
4301 return re.split(r'\s#', url, 1)[0].rstrip()
62e609ab
PH
4302
4303 with contextlib.closing(batch_fd) as fd:
4304 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
4305
4306
4307def urlencode_postdata(*args, **kargs):
15707c7e 4308 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
4309
4310
38f9ef31 4311def update_url_query(url, query):
cacd9966
YCH
4312 if not query:
4313 return url
38f9ef31 4314 parsed_url = compat_urlparse.urlparse(url)
4315 qs = compat_parse_qs(parsed_url.query)
4316 qs.update(query)
4317 return compat_urlparse.urlunparse(parsed_url._replace(
15707c7e 4318 query=compat_urllib_parse_urlencode(qs, True)))
16392824 4319
8e60dc75 4320
ed0291d1
S
4321def update_Request(req, url=None, data=None, headers={}, query={}):
4322 req_headers = req.headers.copy()
4323 req_headers.update(headers)
4324 req_data = data or req.data
4325 req_url = update_url_query(url or req.get_full_url(), query)
95cf60e8
S
4326 req_get_method = req.get_method()
4327 if req_get_method == 'HEAD':
4328 req_type = HEADRequest
4329 elif req_get_method == 'PUT':
4330 req_type = PUTRequest
4331 else:
4332 req_type = compat_urllib_request.Request
ed0291d1
S
4333 new_req = req_type(
4334 req_url, data=req_data, headers=req_headers,
4335 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4336 if hasattr(req, 'timeout'):
4337 new_req.timeout = req.timeout
4338 return new_req
4339
4340
10c87c15 4341def _multipart_encode_impl(data, boundary):
0c265486
YCH
4342 content_type = 'multipart/form-data; boundary=%s' % boundary
4343
4344 out = b''
4345 for k, v in data.items():
4346 out += b'--' + boundary.encode('ascii') + b'\r\n'
4347 if isinstance(k, compat_str):
4348 k = k.encode('utf-8')
4349 if isinstance(v, compat_str):
4350 v = v.encode('utf-8')
4351 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4352 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
b2ad479d 4353 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
0c265486
YCH
4354 if boundary.encode('ascii') in content:
4355 raise ValueError('Boundary overlaps with data')
4356 out += content
4357
4358 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4359
4360 return out, content_type
4361
4362
4363def multipart_encode(data, boundary=None):
4364 '''
4365 Encode a dict to RFC 7578-compliant form-data
4366
4367 data:
4368 A dict where keys and values can be either Unicode or bytes-like
4369 objects.
4370 boundary:
4371 If specified a Unicode object, it's used as the boundary. Otherwise
4372 a random boundary is generated.
4373
4374 Reference: https://tools.ietf.org/html/rfc7578
4375 '''
4376 has_specified_boundary = boundary is not None
4377
4378 while True:
4379 if boundary is None:
4380 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4381
4382 try:
10c87c15 4383 out, content_type = _multipart_encode_impl(data, boundary)
0c265486
YCH
4384 break
4385 except ValueError:
4386 if has_specified_boundary:
4387 raise
4388 boundary = None
4389
4390 return out, content_type
4391
4392
86296ad2 4393def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
4394 if isinstance(key_or_keys, (list, tuple)):
4395 for key in key_or_keys:
86296ad2
S
4396 if key not in d or d[key] is None or skip_false_values and not d[key]:
4397 continue
4398 return d[key]
cbecc9b9
S
4399 return default
4400 return d.get(key_or_keys, default)
4401
4402
329ca3be 4403def try_get(src, getter, expected_type=None):
6606817a 4404 for get in variadic(getter):
a32a9a7e
S
4405 try:
4406 v = get(src)
4407 except (AttributeError, KeyError, TypeError, IndexError):
4408 pass
4409 else:
4410 if expected_type is None or isinstance(v, expected_type):
4411 return v
329ca3be
S
4412
4413
6cc62232
S
4414def merge_dicts(*dicts):
4415 merged = {}
4416 for a_dict in dicts:
4417 for k, v in a_dict.items():
4418 if v is None:
4419 continue
3089bc74
S
4420 if (k not in merged
4421 or (isinstance(v, compat_str) and v
4422 and isinstance(merged[k], compat_str)
4423 and not merged[k])):
6cc62232
S
4424 merged[k] = v
4425 return merged
4426
4427
8e60dc75
S
4428def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4429 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4430
16392824 4431
a1a530b0
PH
4432US_RATINGS = {
4433 'G': 0,
4434 'PG': 10,
4435 'PG-13': 13,
4436 'R': 16,
4437 'NC': 18,
4438}
fac55558
PH
4439
4440
a8795327 4441TV_PARENTAL_GUIDELINES = {
5a16c9d9
RA
4442 'TV-Y': 0,
4443 'TV-Y7': 7,
4444 'TV-G': 0,
4445 'TV-PG': 0,
4446 'TV-14': 14,
4447 'TV-MA': 17,
a8795327
S
4448}
4449
4450
146c80e2 4451def parse_age_limit(s):
a8795327
S
4452 if type(s) == int:
4453 return s if 0 <= s <= 21 else None
4454 if not isinstance(s, compat_basestring):
d838b1bd 4455 return None
146c80e2 4456 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327
S
4457 if m:
4458 return int(m.group('age'))
5c5fae6d 4459 s = s.upper()
a8795327
S
4460 if s in US_RATINGS:
4461 return US_RATINGS[s]
5a16c9d9 4462 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
b8361187 4463 if m:
5a16c9d9 4464 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
b8361187 4465 return None
146c80e2
S
4466
4467
fac55558 4468def strip_jsonp(code):
609a61e3 4469 return re.sub(
5552c9eb 4470 r'''(?sx)^
e9c671d5 4471 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
5552c9eb
YCH
4472 (?:\s*&&\s*(?P=func_name))?
4473 \s*\(\s*(?P<callback_data>.*)\);?
4474 \s*?(?://[^\n]*)*$''',
4475 r'\g<callback_data>', code)
478c2c61
PH
4476
4477
5c610515 4478def js_to_json(code, vars={}):
4479 # vars is a dict of var, val pairs to substitute
c843e685 4480 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4195096e
S
4481 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4482 INTEGER_TABLE = (
4483 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4484 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4485 )
4486
e05f6939 4487 def fix_kv(m):
e7b6d122
PH
4488 v = m.group(0)
4489 if v in ('true', 'false', 'null'):
4490 return v
421ddcb8
C
4491 elif v in ('undefined', 'void 0'):
4492 return 'null'
8bdd16b4 4493 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
bd1e4844 4494 return ""
4495
4496 if v[0] in ("'", '"'):
4497 v = re.sub(r'(?s)\\.|"', lambda m: {
e7b6d122 4498 '"': '\\"',
bd1e4844 4499 "\\'": "'",
4500 '\\\n': '',
4501 '\\x': '\\u00',
4502 }.get(m.group(0), m.group(0)), v[1:-1])
8bdd16b4 4503 else:
4504 for regex, base in INTEGER_TABLE:
4505 im = re.match(regex, v)
4506 if im:
4507 i = int(im.group(1), base)
4508 return '"%d":' % i if v.endswith(':') else '%d' % i
89ac4a19 4509
5c610515 4510 if v in vars:
4511 return vars[v]
4512
e7b6d122 4513 return '"%s"' % v
e05f6939 4514
bd1e4844 4515 return re.sub(r'''(?sx)
4516 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4517 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4195096e 4518 {comment}|,(?={skip}[\]}}])|
421ddcb8 4519 void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
4195096e 4520 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
8bdd16b4 4521 [0-9]+(?={skip}:)|
4522 !+
4195096e 4523 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
e05f6939
PH
4524
4525
478c2c61
PH
4526def qualities(quality_ids):
4527 """ Get a numeric quality value out of a list of possible values """
4528 def q(qid):
4529 try:
4530 return quality_ids.index(qid)
4531 except ValueError:
4532 return -1
4533 return q
4534
acd69589 4535
de6000d9 4536DEFAULT_OUTTMPL = {
4537 'default': '%(title)s [%(id)s].%(ext)s',
72755351 4538 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
de6000d9 4539}
4540OUTTMPL_TYPES = {
72755351 4541 'chapter': None,
de6000d9 4542 'subtitle': None,
4543 'thumbnail': None,
4544 'description': 'description',
4545 'annotation': 'annotations.xml',
4546 'infojson': 'info.json',
08438d2c 4547 'link': None,
5112f26a 4548 'pl_thumbnail': None,
de6000d9 4549 'pl_description': 'description',
4550 'pl_infojson': 'info.json',
4551}
0a871f68 4552
143db31d 4553# As of [1] format syntax is:
4554# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4555# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
901130bb 4556STR_FORMAT_RE_TMPL = r'''(?x)
4557 (?<!%)(?P<prefix>(?:%%)*)
143db31d 4558 %
524e2e4f 4559 (?P<has_key>\((?P<key>{0})\))?
752cda38 4560 (?P<format>
524e2e4f 4561 (?P<conversion>[#0\-+ ]+)?
4562 (?P<min_width>\d+)?
4563 (?P<precision>\.\d+)?
4564 (?P<len_mod>[hlL])? # unused in python
901130bb 4565 {1} # conversion type
752cda38 4566 )
143db31d 4567'''
4568
7d1eb38a 4569
901130bb 4570STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
a020a0dc 4571
7d1eb38a 4572
a020a0dc
PH
4573def limit_length(s, length):
4574 """ Add ellipses to overly long strings """
4575 if s is None:
4576 return None
4577 ELLIPSES = '...'
4578 if len(s) > length:
4579 return s[:length - len(ELLIPSES)] + ELLIPSES
4580 return s
48844745
PH
4581
4582
4583def version_tuple(v):
5f9b8394 4584 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
4585
4586
4587def is_outdated_version(version, limit, assume_new=True):
4588 if not version:
4589 return not assume_new
4590 try:
4591 return version_tuple(version) < version_tuple(limit)
4592 except ValueError:
4593 return not assume_new
732ea2f0
PH
4594
4595
4596def ytdl_is_updateable():
7a5c1cfe 4597 """ Returns if yt-dlp can be updated with -U """
735d865e 4598
5d535b4a 4599 from .update import is_non_updateable
732ea2f0 4600
5d535b4a 4601 return not is_non_updateable()
7d4111ed
PH
4602
4603
4604def args_to_str(args):
4605 # Get a short string representation for a subprocess command
702ccf2d 4606 return ' '.join(compat_shlex_quote(a) for a in args)
2ccd1b10
PH
4607
4608
9b9c5355 4609def error_to_compat_str(err):
fdae2358
S
4610 err_str = str(err)
4611 # On python 2 error byte string must be decoded with proper
4612 # encoding rather than ascii
4613 if sys.version_info[0] < 3:
4614 err_str = err_str.decode(preferredencoding())
4615 return err_str
4616
4617
c460bdd5 4618def mimetype2ext(mt):
eb9ee194
S
4619 if mt is None:
4620 return None
4621
9359f3d4
F
4622 mt, _, params = mt.partition(';')
4623 mt = mt.strip()
4624
4625 FULL_MAP = {
765ac263 4626 'audio/mp4': 'm4a',
6c33d24b
YCH
4627 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4628 # it's the most popular one
4629 'audio/mpeg': 'mp3',
ba39289d 4630 'audio/x-wav': 'wav',
9359f3d4
F
4631 'audio/wav': 'wav',
4632 'audio/wave': 'wav',
4633 }
4634
4635 ext = FULL_MAP.get(mt)
765ac263
JMF
4636 if ext is not None:
4637 return ext
4638
9359f3d4 4639 SUBTYPE_MAP = {
f6861ec9 4640 '3gpp': '3gp',
cafcf657 4641 'smptett+xml': 'tt',
cafcf657 4642 'ttaf+xml': 'dfxp',
a0d8d704 4643 'ttml+xml': 'ttml',
f6861ec9 4644 'x-flv': 'flv',
a0d8d704 4645 'x-mp4-fragmented': 'mp4',
d4f05d47 4646 'x-ms-sami': 'sami',
a0d8d704 4647 'x-ms-wmv': 'wmv',
b4173f15
RA
4648 'mpegurl': 'm3u8',
4649 'x-mpegurl': 'm3u8',
4650 'vnd.apple.mpegurl': 'm3u8',
4651 'dash+xml': 'mpd',
b4173f15 4652 'f4m+xml': 'f4m',
f164b971 4653 'hds+xml': 'f4m',
e910fe2f 4654 'vnd.ms-sstr+xml': 'ism',
c2b2c7e1 4655 'quicktime': 'mov',
98ce1a3f 4656 'mp2t': 'ts',
39e7107d 4657 'x-wav': 'wav',
9359f3d4
F
4658 'filmstrip+json': 'fs',
4659 'svg+xml': 'svg',
4660 }
4661
4662 _, _, subtype = mt.rpartition('/')
4663 ext = SUBTYPE_MAP.get(subtype.lower())
4664 if ext is not None:
4665 return ext
4666
4667 SUFFIX_MAP = {
4668 'json': 'json',
4669 'xml': 'xml',
4670 'zip': 'zip',
4671 'gzip': 'gz',
4672 }
4673
4674 _, _, suffix = subtype.partition('+')
4675 ext = SUFFIX_MAP.get(suffix)
4676 if ext is not None:
4677 return ext
4678
4679 return subtype.replace('+', '.')
c460bdd5
PH
4680
4681
4f3c5e06 4682def parse_codecs(codecs_str):
4683 # http://tools.ietf.org/html/rfc6381
4684 if not codecs_str:
4685 return {}
a0566bbf 4686 split_codecs = list(filter(None, map(
dbf5416a 4687 str.strip, codecs_str.strip().strip(',').split(','))))
176f1866 4688 vcodec, acodec, hdr = None, None, None
a0566bbf 4689 for full_codec in split_codecs:
9bd979ca 4690 parts = full_codec.split('.')
4691 codec = parts[0].replace('0', '')
4692 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
4693 'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
4f3c5e06 4694 if not vcodec:
9bd979ca 4695 vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1') else full_codec
176f1866 4696 if codec in ('dvh1', 'dvhe'):
4697 hdr = 'DV'
9bd979ca 4698 elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
4699 hdr = 'HDR10'
4700 elif full_codec.replace('0', '').startswith('vp9.2'):
176f1866 4701 hdr = 'HDR10'
60f5c9fb 4702 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4f3c5e06 4703 if not acodec:
4704 acodec = full_codec
4705 else:
60f5c9fb 4706 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4f3c5e06 4707 if not vcodec and not acodec:
a0566bbf 4708 if len(split_codecs) == 2:
4f3c5e06 4709 return {
a0566bbf 4710 'vcodec': split_codecs[0],
4711 'acodec': split_codecs[1],
4f3c5e06 4712 }
4713 else:
4714 return {
4715 'vcodec': vcodec or 'none',
4716 'acodec': acodec or 'none',
176f1866 4717 'dynamic_range': hdr,
4f3c5e06 4718 }
4719 return {}
4720
4721
2ccd1b10 4722def urlhandle_detect_ext(url_handle):
79298173 4723 getheader = url_handle.headers.get
2ccd1b10 4724
b55ee18f
PH
4725 cd = getheader('Content-Disposition')
4726 if cd:
4727 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4728 if m:
4729 e = determine_ext(m.group('filename'), default_ext=None)
4730 if e:
4731 return e
4732
c460bdd5 4733 return mimetype2ext(getheader('Content-Type'))
05900629
PH
4734
4735
1e399778
YCH
4736def encode_data_uri(data, mime_type):
4737 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4738
4739
05900629 4740def age_restricted(content_limit, age_limit):
6ec6cb4e 4741 """ Returns True iff the content should be blocked """
05900629
PH
4742
4743 if age_limit is None: # No limit set
4744 return False
4745 if content_limit is None:
4746 return False # Content available for everyone
4747 return age_limit < content_limit
61ca9a80
PH
4748
4749
4750def is_html(first_bytes):
4751 """ Detect whether a file contains HTML by examining its first bytes. """
4752
4753 BOMS = [
4754 (b'\xef\xbb\xbf', 'utf-8'),
4755 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4756 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4757 (b'\xff\xfe', 'utf-16-le'),
4758 (b'\xfe\xff', 'utf-16-be'),
4759 ]
4760 for bom, enc in BOMS:
4761 if first_bytes.startswith(bom):
4762 s = first_bytes[len(bom):].decode(enc, 'replace')
4763 break
4764 else:
4765 s = first_bytes.decode('utf-8', 'replace')
4766
4767 return re.match(r'^\s*<', s)
a055469f
PH
4768
4769
4770def determine_protocol(info_dict):
4771 protocol = info_dict.get('protocol')
4772 if protocol is not None:
4773 return protocol
4774
7de837a5 4775 url = sanitize_url(info_dict['url'])
a055469f
PH
4776 if url.startswith('rtmp'):
4777 return 'rtmp'
4778 elif url.startswith('mms'):
4779 return 'mms'
4780 elif url.startswith('rtsp'):
4781 return 'rtsp'
4782
4783 ext = determine_ext(url)
4784 if ext == 'm3u8':
4785 return 'm3u8'
4786 elif ext == 'f4m':
4787 return 'f4m'
4788
4789 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
4790
4791
76d321f6 4792def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
cfb56d1a 4793 """ Render a list of rows, each as a list of values """
ec11a9f4 4794 def width(string):
4795 return len(remove_terminal_sequences(string))
76d321f6 4796
4797 def get_max_lens(table):
ec11a9f4 4798 return [max(width(str(v)) for v in col) for col in zip(*table)]
76d321f6 4799
4800 def filter_using_list(row, filterArray):
4801 return [col for (take, col) in zip(filterArray, row) if take]
4802
4803 if hideEmpty:
4804 max_lens = get_max_lens(data)
4805 header_row = filter_using_list(header_row, max_lens)
4806 data = [filter_using_list(row, max_lens) for row in data]
4807
cfb56d1a 4808 table = [header_row] + data
76d321f6 4809 max_lens = get_max_lens(table)
ec11a9f4 4810 extraGap += 1
76d321f6 4811 if delim:
ec11a9f4 4812 table = [header_row] + [[delim * (ml + extraGap) for ml in max_lens]] + data
4813 max_lens[-1] = 0
4814 for row in table:
4815 for pos, text in enumerate(map(str, row)):
4816 row[pos] = text + (' ' * (max_lens[pos] - width(text) + extraGap))
4817 ret = '\n'.join(''.join(row) for row in table)
4818 return ret
347de493
PH
4819
4820
8f18aca8 4821def _match_one(filter_part, dct, incomplete):
77b87f05 4822 # TODO: Generalize code with YoutubeDL._build_format_filter
a047eeb6 4823 STRING_OPERATORS = {
4824 '*=': operator.contains,
4825 '^=': lambda attr, value: attr.startswith(value),
4826 '$=': lambda attr, value: attr.endswith(value),
4827 '~=': lambda attr, value: re.search(value, attr),
4828 }
347de493 4829 COMPARISON_OPERATORS = {
a047eeb6 4830 **STRING_OPERATORS,
4831 '<=': operator.le, # "<=" must be defined above "<"
347de493 4832 '<': operator.lt,
347de493 4833 '>=': operator.ge,
a047eeb6 4834 '>': operator.gt,
347de493 4835 '=': operator.eq,
347de493 4836 }
a047eeb6 4837
347de493
PH
4838 operator_rex = re.compile(r'''(?x)\s*
4839 (?P<key>[a-z_]+)
77b87f05 4840 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
347de493 4841 (?:
a047eeb6 4842 (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
4843 (?P<strval>.+?)
347de493
PH
4844 )
4845 \s*$
4846 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4847 m = operator_rex.search(filter_part)
4848 if m:
18f96d12 4849 m = m.groupdict()
4850 unnegated_op = COMPARISON_OPERATORS[m['op']]
4851 if m['negation']:
77b87f05
MT
4852 op = lambda attr, value: not unnegated_op(attr, value)
4853 else:
4854 op = unnegated_op
18f96d12 4855 comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
4856 if m['quote']:
4857 comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
4858 actual_value = dct.get(m['key'])
4859 numeric_comparison = None
4860 if isinstance(actual_value, compat_numeric_types):
e5a088dc
S
4861 # If the original field is a string and matching comparisonvalue is
4862 # a number we should respect the origin of the original field
4863 # and process comparison value as a string (see
18f96d12 4864 # https://github.com/ytdl-org/youtube-dl/issues/11082)
347de493 4865 try:
18f96d12 4866 numeric_comparison = int(comparison_value)
347de493 4867 except ValueError:
18f96d12 4868 numeric_comparison = parse_filesize(comparison_value)
4869 if numeric_comparison is None:
4870 numeric_comparison = parse_filesize(f'{comparison_value}B')
4871 if numeric_comparison is None:
4872 numeric_comparison = parse_duration(comparison_value)
4873 if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
4874 raise ValueError('Operator %s only supports string values!' % m['op'])
347de493 4875 if actual_value is None:
18f96d12 4876 return incomplete or m['none_inclusive']
4877 return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
347de493
PH
4878
4879 UNARY_OPERATORS = {
1cc47c66
S
4880 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4881 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
347de493
PH
4882 }
4883 operator_rex = re.compile(r'''(?x)\s*
4884 (?P<op>%s)\s*(?P<key>[a-z_]+)
4885 \s*$
4886 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4887 m = operator_rex.search(filter_part)
4888 if m:
4889 op = UNARY_OPERATORS[m.group('op')]
4890 actual_value = dct.get(m.group('key'))
8f18aca8 4891 if incomplete and actual_value is None:
4892 return True
347de493
PH
4893 return op(actual_value)
4894
4895 raise ValueError('Invalid filter part %r' % filter_part)
4896
4897
8f18aca8 4898def match_str(filter_str, dct, incomplete=False):
4899 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
4900 When incomplete, all conditions passes on missing fields
4901 """
347de493 4902 return all(
8f18aca8 4903 _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
a047eeb6 4904 for filter_part in re.split(r'(?<!\\)&', filter_str))
347de493
PH
4905
4906
4907def match_filter_func(filter_str):
8f18aca8 4908 def _match_func(info_dict, *args, **kwargs):
4909 if match_str(filter_str, info_dict, *args, **kwargs):
347de493
PH
4910 return None
4911 else:
4912 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4913 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4914 return _match_func
91410c9b
PH
4915
4916
bf6427d2
YCH
4917def parse_dfxp_time_expr(time_expr):
4918 if not time_expr:
d631d5f9 4919 return
bf6427d2
YCH
4920
4921 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4922 if mobj:
4923 return float(mobj.group('time_offset'))
4924
db2fe38b 4925 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 4926 if mobj:
db2fe38b 4927 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
4928
4929
c1c924ab 4930def srt_subtitles_timecode(seconds):
aa7785f8 4931 return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
4932
4933
4934def ass_subtitles_timecode(seconds):
4935 time = timetuple_from_msec(seconds * 1000)
4936 return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
bf6427d2
YCH
4937
4938
4939def dfxp2srt(dfxp_data):
3869028f
YCH
4940 '''
4941 @param dfxp_data A bytes-like object containing DFXP data
4942 @returns A unicode object containing converted SRT data
4943 '''
5b995f71 4944 LEGACY_NAMESPACES = (
3869028f
YCH
4945 (b'http://www.w3.org/ns/ttml', [
4946 b'http://www.w3.org/2004/11/ttaf1',
4947 b'http://www.w3.org/2006/04/ttaf1',
4948 b'http://www.w3.org/2006/10/ttaf1',
5b995f71 4949 ]),
3869028f
YCH
4950 (b'http://www.w3.org/ns/ttml#styling', [
4951 b'http://www.w3.org/ns/ttml#style',
5b995f71
RA
4952 ]),
4953 )
4954
4955 SUPPORTED_STYLING = [
4956 'color',
4957 'fontFamily',
4958 'fontSize',
4959 'fontStyle',
4960 'fontWeight',
4961 'textDecoration'
4962 ]
4963
4e335771 4964 _x = functools.partial(xpath_with_ns, ns_map={
261f4730 4965 'xml': 'http://www.w3.org/XML/1998/namespace',
4e335771 4966 'ttml': 'http://www.w3.org/ns/ttml',
5b995f71 4967 'tts': 'http://www.w3.org/ns/ttml#styling',
4e335771 4968 })
bf6427d2 4969
5b995f71
RA
4970 styles = {}
4971 default_style = {}
4972
87de7069 4973 class TTMLPElementParser(object):
5b995f71
RA
4974 _out = ''
4975 _unclosed_elements = []
4976 _applied_styles = []
bf6427d2 4977
2b14cb56 4978 def start(self, tag, attrib):
5b995f71
RA
4979 if tag in (_x('ttml:br'), 'br'):
4980 self._out += '\n'
4981 else:
4982 unclosed_elements = []
4983 style = {}
4984 element_style_id = attrib.get('style')
4985 if default_style:
4986 style.update(default_style)
4987 if element_style_id:
4988 style.update(styles.get(element_style_id, {}))
4989 for prop in SUPPORTED_STYLING:
4990 prop_val = attrib.get(_x('tts:' + prop))
4991 if prop_val:
4992 style[prop] = prop_val
4993 if style:
4994 font = ''
4995 for k, v in sorted(style.items()):
4996 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4997 continue
4998 if k == 'color':
4999 font += ' color="%s"' % v
5000 elif k == 'fontSize':
5001 font += ' size="%s"' % v
5002 elif k == 'fontFamily':
5003 font += ' face="%s"' % v
5004 elif k == 'fontWeight' and v == 'bold':
5005 self._out += '<b>'
5006 unclosed_elements.append('b')
5007 elif k == 'fontStyle' and v == 'italic':
5008 self._out += '<i>'
5009 unclosed_elements.append('i')
5010 elif k == 'textDecoration' and v == 'underline':
5011 self._out += '<u>'
5012 unclosed_elements.append('u')
5013 if font:
5014 self._out += '<font' + font + '>'
5015 unclosed_elements.append('font')
5016 applied_style = {}
5017 if self._applied_styles:
5018 applied_style.update(self._applied_styles[-1])
5019 applied_style.update(style)
5020 self._applied_styles.append(applied_style)
5021 self._unclosed_elements.append(unclosed_elements)
bf6427d2 5022
2b14cb56 5023 def end(self, tag):
5b995f71
RA
5024 if tag not in (_x('ttml:br'), 'br'):
5025 unclosed_elements = self._unclosed_elements.pop()
5026 for element in reversed(unclosed_elements):
5027 self._out += '</%s>' % element
5028 if unclosed_elements and self._applied_styles:
5029 self._applied_styles.pop()
bf6427d2 5030
2b14cb56 5031 def data(self, data):
5b995f71 5032 self._out += data
2b14cb56 5033
5034 def close(self):
5b995f71 5035 return self._out.strip()
2b14cb56 5036
5037 def parse_node(node):
5038 target = TTMLPElementParser()
5039 parser = xml.etree.ElementTree.XMLParser(target=target)
5040 parser.feed(xml.etree.ElementTree.tostring(node))
5041 return parser.close()
bf6427d2 5042
5b995f71
RA
5043 for k, v in LEGACY_NAMESPACES:
5044 for ns in v:
5045 dfxp_data = dfxp_data.replace(ns, k)
5046
3869028f 5047 dfxp = compat_etree_fromstring(dfxp_data)
bf6427d2 5048 out = []
5b995f71 5049 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
1b0427e6
YCH
5050
5051 if not paras:
5052 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2 5053
5b995f71
RA
5054 repeat = False
5055 while True:
5056 for style in dfxp.findall(_x('.//ttml:style')):
261f4730
RA
5057 style_id = style.get('id') or style.get(_x('xml:id'))
5058 if not style_id:
5059 continue
5b995f71
RA
5060 parent_style_id = style.get('style')
5061 if parent_style_id:
5062 if parent_style_id not in styles:
5063 repeat = True
5064 continue
5065 styles[style_id] = styles[parent_style_id].copy()
5066 for prop in SUPPORTED_STYLING:
5067 prop_val = style.get(_x('tts:' + prop))
5068 if prop_val:
5069 styles.setdefault(style_id, {})[prop] = prop_val
5070 if repeat:
5071 repeat = False
5072 else:
5073 break
5074
5075 for p in ('body', 'div'):
5076 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
5077 if ele is None:
5078 continue
5079 style = styles.get(ele.get('style'))
5080 if not style:
5081 continue
5082 default_style.update(style)
5083
bf6427d2 5084 for para, index in zip(paras, itertools.count(1)):
d631d5f9 5085 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 5086 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
5087 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
5088 if begin_time is None:
5089 continue
7dff0363 5090 if not end_time:
d631d5f9
YCH
5091 if not dur:
5092 continue
5093 end_time = begin_time + dur
bf6427d2
YCH
5094 out.append('%d\n%s --> %s\n%s\n\n' % (
5095 index,
c1c924ab
YCH
5096 srt_subtitles_timecode(begin_time),
5097 srt_subtitles_timecode(end_time),
bf6427d2
YCH
5098 parse_node(para)))
5099
5100 return ''.join(out)
5101
5102
66e289ba
S
5103def cli_option(params, command_option, param):
5104 param = params.get(param)
98e698f1
RA
5105 if param:
5106 param = compat_str(param)
66e289ba
S
5107 return [command_option, param] if param is not None else []
5108
5109
5110def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
5111 param = params.get(param)
5b232f46
S
5112 if param is None:
5113 return []
66e289ba
S
5114 assert isinstance(param, bool)
5115 if separator:
5116 return [command_option + separator + (true_value if param else false_value)]
5117 return [command_option, true_value if param else false_value]
5118
5119
5120def cli_valueless_option(params, command_option, param, expected_value=True):
5121 param = params.get(param)
5122 return [command_option] if param == expected_value else []
5123
5124
e92caff5 5125def cli_configuration_args(argdict, keys, default=[], use_compat=True):
eab9b2bc 5126 if isinstance(argdict, (list, tuple)): # for backward compatibility
e92caff5 5127 if use_compat:
5b1ecbb3 5128 return argdict
5129 else:
5130 argdict = None
eab9b2bc 5131 if argdict is None:
5b1ecbb3 5132 return default
eab9b2bc 5133 assert isinstance(argdict, dict)
5134
e92caff5 5135 assert isinstance(keys, (list, tuple))
5136 for key_list in keys:
e92caff5 5137 arg_list = list(filter(
5138 lambda x: x is not None,
6606817a 5139 [argdict.get(key.lower()) for key in variadic(key_list)]))
e92caff5 5140 if arg_list:
5141 return [arg for args in arg_list for arg in args]
5142 return default
66e289ba 5143
6251555f 5144
330690a2 5145def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
5146 main_key, exe = main_key.lower(), exe.lower()
5147 root_key = exe if main_key == exe else f'{main_key}+{exe}'
5148 keys = [f'{root_key}{k}' for k in (keys or [''])]
5149 if root_key in keys:
5150 if main_key != exe:
5151 keys.append((main_key, exe))
5152 keys.append('default')
5153 else:
5154 use_compat = False
5155 return cli_configuration_args(argdict, keys, default, use_compat)
5156
66e289ba 5157
39672624
YCH
5158class ISO639Utils(object):
5159 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
5160 _lang_map = {
5161 'aa': 'aar',
5162 'ab': 'abk',
5163 'ae': 'ave',
5164 'af': 'afr',
5165 'ak': 'aka',
5166 'am': 'amh',
5167 'an': 'arg',
5168 'ar': 'ara',
5169 'as': 'asm',
5170 'av': 'ava',
5171 'ay': 'aym',
5172 'az': 'aze',
5173 'ba': 'bak',
5174 'be': 'bel',
5175 'bg': 'bul',
5176 'bh': 'bih',
5177 'bi': 'bis',
5178 'bm': 'bam',
5179 'bn': 'ben',
5180 'bo': 'bod',
5181 'br': 'bre',
5182 'bs': 'bos',
5183 'ca': 'cat',
5184 'ce': 'che',
5185 'ch': 'cha',
5186 'co': 'cos',
5187 'cr': 'cre',
5188 'cs': 'ces',
5189 'cu': 'chu',
5190 'cv': 'chv',
5191 'cy': 'cym',
5192 'da': 'dan',
5193 'de': 'deu',
5194 'dv': 'div',
5195 'dz': 'dzo',
5196 'ee': 'ewe',
5197 'el': 'ell',
5198 'en': 'eng',
5199 'eo': 'epo',
5200 'es': 'spa',
5201 'et': 'est',
5202 'eu': 'eus',
5203 'fa': 'fas',
5204 'ff': 'ful',
5205 'fi': 'fin',
5206 'fj': 'fij',
5207 'fo': 'fao',
5208 'fr': 'fra',
5209 'fy': 'fry',
5210 'ga': 'gle',
5211 'gd': 'gla',
5212 'gl': 'glg',
5213 'gn': 'grn',
5214 'gu': 'guj',
5215 'gv': 'glv',
5216 'ha': 'hau',
5217 'he': 'heb',
b7acc835 5218 'iw': 'heb', # Replaced by he in 1989 revision
39672624
YCH
5219 'hi': 'hin',
5220 'ho': 'hmo',
5221 'hr': 'hrv',
5222 'ht': 'hat',
5223 'hu': 'hun',
5224 'hy': 'hye',
5225 'hz': 'her',
5226 'ia': 'ina',
5227 'id': 'ind',
b7acc835 5228 'in': 'ind', # Replaced by id in 1989 revision
39672624
YCH
5229 'ie': 'ile',
5230 'ig': 'ibo',
5231 'ii': 'iii',
5232 'ik': 'ipk',
5233 'io': 'ido',
5234 'is': 'isl',
5235 'it': 'ita',
5236 'iu': 'iku',
5237 'ja': 'jpn',
5238 'jv': 'jav',
5239 'ka': 'kat',
5240 'kg': 'kon',
5241 'ki': 'kik',
5242 'kj': 'kua',
5243 'kk': 'kaz',
5244 'kl': 'kal',
5245 'km': 'khm',
5246 'kn': 'kan',
5247 'ko': 'kor',
5248 'kr': 'kau',
5249 'ks': 'kas',
5250 'ku': 'kur',
5251 'kv': 'kom',
5252 'kw': 'cor',
5253 'ky': 'kir',
5254 'la': 'lat',
5255 'lb': 'ltz',
5256 'lg': 'lug',
5257 'li': 'lim',
5258 'ln': 'lin',
5259 'lo': 'lao',
5260 'lt': 'lit',
5261 'lu': 'lub',
5262 'lv': 'lav',
5263 'mg': 'mlg',
5264 'mh': 'mah',
5265 'mi': 'mri',
5266 'mk': 'mkd',
5267 'ml': 'mal',
5268 'mn': 'mon',
5269 'mr': 'mar',
5270 'ms': 'msa',
5271 'mt': 'mlt',
5272 'my': 'mya',
5273 'na': 'nau',
5274 'nb': 'nob',
5275 'nd': 'nde',
5276 'ne': 'nep',
5277 'ng': 'ndo',
5278 'nl': 'nld',
5279 'nn': 'nno',
5280 'no': 'nor',
5281 'nr': 'nbl',
5282 'nv': 'nav',
5283 'ny': 'nya',
5284 'oc': 'oci',
5285 'oj': 'oji',
5286 'om': 'orm',
5287 'or': 'ori',
5288 'os': 'oss',
5289 'pa': 'pan',
5290 'pi': 'pli',
5291 'pl': 'pol',
5292 'ps': 'pus',
5293 'pt': 'por',
5294 'qu': 'que',
5295 'rm': 'roh',
5296 'rn': 'run',
5297 'ro': 'ron',
5298 'ru': 'rus',
5299 'rw': 'kin',
5300 'sa': 'san',
5301 'sc': 'srd',
5302 'sd': 'snd',
5303 'se': 'sme',
5304 'sg': 'sag',
5305 'si': 'sin',
5306 'sk': 'slk',
5307 'sl': 'slv',
5308 'sm': 'smo',
5309 'sn': 'sna',
5310 'so': 'som',
5311 'sq': 'sqi',
5312 'sr': 'srp',
5313 'ss': 'ssw',
5314 'st': 'sot',
5315 'su': 'sun',
5316 'sv': 'swe',
5317 'sw': 'swa',
5318 'ta': 'tam',
5319 'te': 'tel',
5320 'tg': 'tgk',
5321 'th': 'tha',
5322 'ti': 'tir',
5323 'tk': 'tuk',
5324 'tl': 'tgl',
5325 'tn': 'tsn',
5326 'to': 'ton',
5327 'tr': 'tur',
5328 'ts': 'tso',
5329 'tt': 'tat',
5330 'tw': 'twi',
5331 'ty': 'tah',
5332 'ug': 'uig',
5333 'uk': 'ukr',
5334 'ur': 'urd',
5335 'uz': 'uzb',
5336 've': 'ven',
5337 'vi': 'vie',
5338 'vo': 'vol',
5339 'wa': 'wln',
5340 'wo': 'wol',
5341 'xh': 'xho',
5342 'yi': 'yid',
e9a50fba 5343 'ji': 'yid', # Replaced by yi in 1989 revision
39672624
YCH
5344 'yo': 'yor',
5345 'za': 'zha',
5346 'zh': 'zho',
5347 'zu': 'zul',
5348 }
5349
5350 @classmethod
5351 def short2long(cls, code):
5352 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5353 return cls._lang_map.get(code[:2])
5354
5355 @classmethod
5356 def long2short(cls, code):
5357 """Convert language code from ISO 639-2/T to ISO 639-1"""
5358 for short_name, long_name in cls._lang_map.items():
5359 if long_name == code:
5360 return short_name
5361
5362
4eb10f66
YCH
5363class ISO3166Utils(object):
5364 # From http://data.okfn.org/data/core/country-list
5365 _country_map = {
5366 'AF': 'Afghanistan',
5367 'AX': 'Åland Islands',
5368 'AL': 'Albania',
5369 'DZ': 'Algeria',
5370 'AS': 'American Samoa',
5371 'AD': 'Andorra',
5372 'AO': 'Angola',
5373 'AI': 'Anguilla',
5374 'AQ': 'Antarctica',
5375 'AG': 'Antigua and Barbuda',
5376 'AR': 'Argentina',
5377 'AM': 'Armenia',
5378 'AW': 'Aruba',
5379 'AU': 'Australia',
5380 'AT': 'Austria',
5381 'AZ': 'Azerbaijan',
5382 'BS': 'Bahamas',
5383 'BH': 'Bahrain',
5384 'BD': 'Bangladesh',
5385 'BB': 'Barbados',
5386 'BY': 'Belarus',
5387 'BE': 'Belgium',
5388 'BZ': 'Belize',
5389 'BJ': 'Benin',
5390 'BM': 'Bermuda',
5391 'BT': 'Bhutan',
5392 'BO': 'Bolivia, Plurinational State of',
5393 'BQ': 'Bonaire, Sint Eustatius and Saba',
5394 'BA': 'Bosnia and Herzegovina',
5395 'BW': 'Botswana',
5396 'BV': 'Bouvet Island',
5397 'BR': 'Brazil',
5398 'IO': 'British Indian Ocean Territory',
5399 'BN': 'Brunei Darussalam',
5400 'BG': 'Bulgaria',
5401 'BF': 'Burkina Faso',
5402 'BI': 'Burundi',
5403 'KH': 'Cambodia',
5404 'CM': 'Cameroon',
5405 'CA': 'Canada',
5406 'CV': 'Cape Verde',
5407 'KY': 'Cayman Islands',
5408 'CF': 'Central African Republic',
5409 'TD': 'Chad',
5410 'CL': 'Chile',
5411 'CN': 'China',
5412 'CX': 'Christmas Island',
5413 'CC': 'Cocos (Keeling) Islands',
5414 'CO': 'Colombia',
5415 'KM': 'Comoros',
5416 'CG': 'Congo',
5417 'CD': 'Congo, the Democratic Republic of the',
5418 'CK': 'Cook Islands',
5419 'CR': 'Costa Rica',
5420 'CI': 'Côte d\'Ivoire',
5421 'HR': 'Croatia',
5422 'CU': 'Cuba',
5423 'CW': 'Curaçao',
5424 'CY': 'Cyprus',
5425 'CZ': 'Czech Republic',
5426 'DK': 'Denmark',
5427 'DJ': 'Djibouti',
5428 'DM': 'Dominica',
5429 'DO': 'Dominican Republic',
5430 'EC': 'Ecuador',
5431 'EG': 'Egypt',
5432 'SV': 'El Salvador',
5433 'GQ': 'Equatorial Guinea',
5434 'ER': 'Eritrea',
5435 'EE': 'Estonia',
5436 'ET': 'Ethiopia',
5437 'FK': 'Falkland Islands (Malvinas)',
5438 'FO': 'Faroe Islands',
5439 'FJ': 'Fiji',
5440 'FI': 'Finland',
5441 'FR': 'France',
5442 'GF': 'French Guiana',
5443 'PF': 'French Polynesia',
5444 'TF': 'French Southern Territories',
5445 'GA': 'Gabon',
5446 'GM': 'Gambia',
5447 'GE': 'Georgia',
5448 'DE': 'Germany',
5449 'GH': 'Ghana',
5450 'GI': 'Gibraltar',
5451 'GR': 'Greece',
5452 'GL': 'Greenland',
5453 'GD': 'Grenada',
5454 'GP': 'Guadeloupe',
5455 'GU': 'Guam',
5456 'GT': 'Guatemala',
5457 'GG': 'Guernsey',
5458 'GN': 'Guinea',
5459 'GW': 'Guinea-Bissau',
5460 'GY': 'Guyana',
5461 'HT': 'Haiti',
5462 'HM': 'Heard Island and McDonald Islands',
5463 'VA': 'Holy See (Vatican City State)',
5464 'HN': 'Honduras',
5465 'HK': 'Hong Kong',
5466 'HU': 'Hungary',
5467 'IS': 'Iceland',
5468 'IN': 'India',
5469 'ID': 'Indonesia',
5470 'IR': 'Iran, Islamic Republic of',
5471 'IQ': 'Iraq',
5472 'IE': 'Ireland',
5473 'IM': 'Isle of Man',
5474 'IL': 'Israel',
5475 'IT': 'Italy',
5476 'JM': 'Jamaica',
5477 'JP': 'Japan',
5478 'JE': 'Jersey',
5479 'JO': 'Jordan',
5480 'KZ': 'Kazakhstan',
5481 'KE': 'Kenya',
5482 'KI': 'Kiribati',
5483 'KP': 'Korea, Democratic People\'s Republic of',
5484 'KR': 'Korea, Republic of',
5485 'KW': 'Kuwait',
5486 'KG': 'Kyrgyzstan',
5487 'LA': 'Lao People\'s Democratic Republic',
5488 'LV': 'Latvia',
5489 'LB': 'Lebanon',
5490 'LS': 'Lesotho',
5491 'LR': 'Liberia',
5492 'LY': 'Libya',
5493 'LI': 'Liechtenstein',
5494 'LT': 'Lithuania',
5495 'LU': 'Luxembourg',
5496 'MO': 'Macao',
5497 'MK': 'Macedonia, the Former Yugoslav Republic of',
5498 'MG': 'Madagascar',
5499 'MW': 'Malawi',
5500 'MY': 'Malaysia',
5501 'MV': 'Maldives',
5502 'ML': 'Mali',
5503 'MT': 'Malta',
5504 'MH': 'Marshall Islands',
5505 'MQ': 'Martinique',
5506 'MR': 'Mauritania',
5507 'MU': 'Mauritius',
5508 'YT': 'Mayotte',
5509 'MX': 'Mexico',
5510 'FM': 'Micronesia, Federated States of',
5511 'MD': 'Moldova, Republic of',
5512 'MC': 'Monaco',
5513 'MN': 'Mongolia',
5514 'ME': 'Montenegro',
5515 'MS': 'Montserrat',
5516 'MA': 'Morocco',
5517 'MZ': 'Mozambique',
5518 'MM': 'Myanmar',
5519 'NA': 'Namibia',
5520 'NR': 'Nauru',
5521 'NP': 'Nepal',
5522 'NL': 'Netherlands',
5523 'NC': 'New Caledonia',
5524 'NZ': 'New Zealand',
5525 'NI': 'Nicaragua',
5526 'NE': 'Niger',
5527 'NG': 'Nigeria',
5528 'NU': 'Niue',
5529 'NF': 'Norfolk Island',
5530 'MP': 'Northern Mariana Islands',
5531 'NO': 'Norway',
5532 'OM': 'Oman',
5533 'PK': 'Pakistan',
5534 'PW': 'Palau',
5535 'PS': 'Palestine, State of',
5536 'PA': 'Panama',
5537 'PG': 'Papua New Guinea',
5538 'PY': 'Paraguay',
5539 'PE': 'Peru',
5540 'PH': 'Philippines',
5541 'PN': 'Pitcairn',
5542 'PL': 'Poland',
5543 'PT': 'Portugal',
5544 'PR': 'Puerto Rico',
5545 'QA': 'Qatar',
5546 'RE': 'Réunion',
5547 'RO': 'Romania',
5548 'RU': 'Russian Federation',
5549 'RW': 'Rwanda',
5550 'BL': 'Saint Barthélemy',
5551 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5552 'KN': 'Saint Kitts and Nevis',
5553 'LC': 'Saint Lucia',
5554 'MF': 'Saint Martin (French part)',
5555 'PM': 'Saint Pierre and Miquelon',
5556 'VC': 'Saint Vincent and the Grenadines',
5557 'WS': 'Samoa',
5558 'SM': 'San Marino',
5559 'ST': 'Sao Tome and Principe',
5560 'SA': 'Saudi Arabia',
5561 'SN': 'Senegal',
5562 'RS': 'Serbia',
5563 'SC': 'Seychelles',
5564 'SL': 'Sierra Leone',
5565 'SG': 'Singapore',
5566 'SX': 'Sint Maarten (Dutch part)',
5567 'SK': 'Slovakia',
5568 'SI': 'Slovenia',
5569 'SB': 'Solomon Islands',
5570 'SO': 'Somalia',
5571 'ZA': 'South Africa',
5572 'GS': 'South Georgia and the South Sandwich Islands',
5573 'SS': 'South Sudan',
5574 'ES': 'Spain',
5575 'LK': 'Sri Lanka',
5576 'SD': 'Sudan',
5577 'SR': 'Suriname',
5578 'SJ': 'Svalbard and Jan Mayen',
5579 'SZ': 'Swaziland',
5580 'SE': 'Sweden',
5581 'CH': 'Switzerland',
5582 'SY': 'Syrian Arab Republic',
5583 'TW': 'Taiwan, Province of China',
5584 'TJ': 'Tajikistan',
5585 'TZ': 'Tanzania, United Republic of',
5586 'TH': 'Thailand',
5587 'TL': 'Timor-Leste',
5588 'TG': 'Togo',
5589 'TK': 'Tokelau',
5590 'TO': 'Tonga',
5591 'TT': 'Trinidad and Tobago',
5592 'TN': 'Tunisia',
5593 'TR': 'Turkey',
5594 'TM': 'Turkmenistan',
5595 'TC': 'Turks and Caicos Islands',
5596 'TV': 'Tuvalu',
5597 'UG': 'Uganda',
5598 'UA': 'Ukraine',
5599 'AE': 'United Arab Emirates',
5600 'GB': 'United Kingdom',
5601 'US': 'United States',
5602 'UM': 'United States Minor Outlying Islands',
5603 'UY': 'Uruguay',
5604 'UZ': 'Uzbekistan',
5605 'VU': 'Vanuatu',
5606 'VE': 'Venezuela, Bolivarian Republic of',
5607 'VN': 'Viet Nam',
5608 'VG': 'Virgin Islands, British',
5609 'VI': 'Virgin Islands, U.S.',
5610 'WF': 'Wallis and Futuna',
5611 'EH': 'Western Sahara',
5612 'YE': 'Yemen',
5613 'ZM': 'Zambia',
5614 'ZW': 'Zimbabwe',
5615 }
5616
5617 @classmethod
5618 def short2full(cls, code):
5619 """Convert an ISO 3166-2 country code to the corresponding full name"""
5620 return cls._country_map.get(code.upper())
5621
5622
773f291d
S
5623class GeoUtils(object):
5624 # Major IPv4 address blocks per country
5625 _country_ip_map = {
53896ca5 5626 'AD': '46.172.224.0/19',
773f291d
S
5627 'AE': '94.200.0.0/13',
5628 'AF': '149.54.0.0/17',
5629 'AG': '209.59.64.0/18',
5630 'AI': '204.14.248.0/21',
5631 'AL': '46.99.0.0/16',
5632 'AM': '46.70.0.0/15',
5633 'AO': '105.168.0.0/13',
53896ca5
S
5634 'AP': '182.50.184.0/21',
5635 'AQ': '23.154.160.0/24',
773f291d
S
5636 'AR': '181.0.0.0/12',
5637 'AS': '202.70.112.0/20',
53896ca5 5638 'AT': '77.116.0.0/14',
773f291d
S
5639 'AU': '1.128.0.0/11',
5640 'AW': '181.41.0.0/18',
53896ca5
S
5641 'AX': '185.217.4.0/22',
5642 'AZ': '5.197.0.0/16',
773f291d
S
5643 'BA': '31.176.128.0/17',
5644 'BB': '65.48.128.0/17',
5645 'BD': '114.130.0.0/16',
5646 'BE': '57.0.0.0/8',
53896ca5 5647 'BF': '102.178.0.0/15',
773f291d
S
5648 'BG': '95.42.0.0/15',
5649 'BH': '37.131.0.0/17',
5650 'BI': '154.117.192.0/18',
5651 'BJ': '137.255.0.0/16',
53896ca5 5652 'BL': '185.212.72.0/23',
773f291d
S
5653 'BM': '196.12.64.0/18',
5654 'BN': '156.31.0.0/16',
5655 'BO': '161.56.0.0/16',
5656 'BQ': '161.0.80.0/20',
53896ca5 5657 'BR': '191.128.0.0/12',
773f291d
S
5658 'BS': '24.51.64.0/18',
5659 'BT': '119.2.96.0/19',
5660 'BW': '168.167.0.0/16',
5661 'BY': '178.120.0.0/13',
5662 'BZ': '179.42.192.0/18',
5663 'CA': '99.224.0.0/11',
5664 'CD': '41.243.0.0/16',
53896ca5
S
5665 'CF': '197.242.176.0/21',
5666 'CG': '160.113.0.0/16',
773f291d 5667 'CH': '85.0.0.0/13',
53896ca5 5668 'CI': '102.136.0.0/14',
773f291d
S
5669 'CK': '202.65.32.0/19',
5670 'CL': '152.172.0.0/14',
53896ca5 5671 'CM': '102.244.0.0/14',
773f291d
S
5672 'CN': '36.128.0.0/10',
5673 'CO': '181.240.0.0/12',
5674 'CR': '201.192.0.0/12',
5675 'CU': '152.206.0.0/15',
5676 'CV': '165.90.96.0/19',
5677 'CW': '190.88.128.0/17',
53896ca5 5678 'CY': '31.153.0.0/16',
773f291d
S
5679 'CZ': '88.100.0.0/14',
5680 'DE': '53.0.0.0/8',
5681 'DJ': '197.241.0.0/17',
5682 'DK': '87.48.0.0/12',
5683 'DM': '192.243.48.0/20',
5684 'DO': '152.166.0.0/15',
5685 'DZ': '41.96.0.0/12',
5686 'EC': '186.68.0.0/15',
5687 'EE': '90.190.0.0/15',
5688 'EG': '156.160.0.0/11',
5689 'ER': '196.200.96.0/20',
5690 'ES': '88.0.0.0/11',
5691 'ET': '196.188.0.0/14',
5692 'EU': '2.16.0.0/13',
5693 'FI': '91.152.0.0/13',
5694 'FJ': '144.120.0.0/16',
53896ca5 5695 'FK': '80.73.208.0/21',
773f291d
S
5696 'FM': '119.252.112.0/20',
5697 'FO': '88.85.32.0/19',
5698 'FR': '90.0.0.0/9',
5699 'GA': '41.158.0.0/15',
5700 'GB': '25.0.0.0/8',
5701 'GD': '74.122.88.0/21',
5702 'GE': '31.146.0.0/16',
5703 'GF': '161.22.64.0/18',
5704 'GG': '62.68.160.0/19',
53896ca5
S
5705 'GH': '154.160.0.0/12',
5706 'GI': '95.164.0.0/16',
773f291d
S
5707 'GL': '88.83.0.0/19',
5708 'GM': '160.182.0.0/15',
5709 'GN': '197.149.192.0/18',
5710 'GP': '104.250.0.0/19',
5711 'GQ': '105.235.224.0/20',
5712 'GR': '94.64.0.0/13',
5713 'GT': '168.234.0.0/16',
5714 'GU': '168.123.0.0/16',
5715 'GW': '197.214.80.0/20',
5716 'GY': '181.41.64.0/18',
5717 'HK': '113.252.0.0/14',
5718 'HN': '181.210.0.0/16',
5719 'HR': '93.136.0.0/13',
5720 'HT': '148.102.128.0/17',
5721 'HU': '84.0.0.0/14',
5722 'ID': '39.192.0.0/10',
5723 'IE': '87.32.0.0/12',
5724 'IL': '79.176.0.0/13',
5725 'IM': '5.62.80.0/20',
5726 'IN': '117.192.0.0/10',
5727 'IO': '203.83.48.0/21',
5728 'IQ': '37.236.0.0/14',
5729 'IR': '2.176.0.0/12',
5730 'IS': '82.221.0.0/16',
5731 'IT': '79.0.0.0/10',
5732 'JE': '87.244.64.0/18',
5733 'JM': '72.27.0.0/17',
5734 'JO': '176.29.0.0/16',
53896ca5 5735 'JP': '133.0.0.0/8',
773f291d
S
5736 'KE': '105.48.0.0/12',
5737 'KG': '158.181.128.0/17',
5738 'KH': '36.37.128.0/17',
5739 'KI': '103.25.140.0/22',
5740 'KM': '197.255.224.0/20',
53896ca5 5741 'KN': '198.167.192.0/19',
773f291d
S
5742 'KP': '175.45.176.0/22',
5743 'KR': '175.192.0.0/10',
5744 'KW': '37.36.0.0/14',
5745 'KY': '64.96.0.0/15',
5746 'KZ': '2.72.0.0/13',
5747 'LA': '115.84.64.0/18',
5748 'LB': '178.135.0.0/16',
53896ca5 5749 'LC': '24.92.144.0/20',
773f291d
S
5750 'LI': '82.117.0.0/19',
5751 'LK': '112.134.0.0/15',
53896ca5 5752 'LR': '102.183.0.0/16',
773f291d
S
5753 'LS': '129.232.0.0/17',
5754 'LT': '78.56.0.0/13',
5755 'LU': '188.42.0.0/16',
5756 'LV': '46.109.0.0/16',
5757 'LY': '41.252.0.0/14',
5758 'MA': '105.128.0.0/11',
5759 'MC': '88.209.64.0/18',
5760 'MD': '37.246.0.0/16',
5761 'ME': '178.175.0.0/17',
5762 'MF': '74.112.232.0/21',
5763 'MG': '154.126.0.0/17',
5764 'MH': '117.103.88.0/21',
5765 'MK': '77.28.0.0/15',
5766 'ML': '154.118.128.0/18',
5767 'MM': '37.111.0.0/17',
5768 'MN': '49.0.128.0/17',
5769 'MO': '60.246.0.0/16',
5770 'MP': '202.88.64.0/20',
5771 'MQ': '109.203.224.0/19',
5772 'MR': '41.188.64.0/18',
5773 'MS': '208.90.112.0/22',
5774 'MT': '46.11.0.0/16',
5775 'MU': '105.16.0.0/12',
5776 'MV': '27.114.128.0/18',
53896ca5 5777 'MW': '102.70.0.0/15',
773f291d
S
5778 'MX': '187.192.0.0/11',
5779 'MY': '175.136.0.0/13',
5780 'MZ': '197.218.0.0/15',
5781 'NA': '41.182.0.0/16',
5782 'NC': '101.101.0.0/18',
5783 'NE': '197.214.0.0/18',
5784 'NF': '203.17.240.0/22',
5785 'NG': '105.112.0.0/12',
5786 'NI': '186.76.0.0/15',
5787 'NL': '145.96.0.0/11',
5788 'NO': '84.208.0.0/13',
5789 'NP': '36.252.0.0/15',
5790 'NR': '203.98.224.0/19',
5791 'NU': '49.156.48.0/22',
5792 'NZ': '49.224.0.0/14',
5793 'OM': '5.36.0.0/15',
5794 'PA': '186.72.0.0/15',
5795 'PE': '186.160.0.0/14',
5796 'PF': '123.50.64.0/18',
5797 'PG': '124.240.192.0/19',
5798 'PH': '49.144.0.0/13',
5799 'PK': '39.32.0.0/11',
5800 'PL': '83.0.0.0/11',
5801 'PM': '70.36.0.0/20',
5802 'PR': '66.50.0.0/16',
5803 'PS': '188.161.0.0/16',
5804 'PT': '85.240.0.0/13',
5805 'PW': '202.124.224.0/20',
5806 'PY': '181.120.0.0/14',
5807 'QA': '37.210.0.0/15',
53896ca5 5808 'RE': '102.35.0.0/16',
773f291d 5809 'RO': '79.112.0.0/13',
53896ca5 5810 'RS': '93.86.0.0/15',
773f291d 5811 'RU': '5.136.0.0/13',
53896ca5 5812 'RW': '41.186.0.0/16',
773f291d
S
5813 'SA': '188.48.0.0/13',
5814 'SB': '202.1.160.0/19',
5815 'SC': '154.192.0.0/11',
53896ca5 5816 'SD': '102.120.0.0/13',
773f291d 5817 'SE': '78.64.0.0/12',
53896ca5 5818 'SG': '8.128.0.0/10',
773f291d
S
5819 'SI': '188.196.0.0/14',
5820 'SK': '78.98.0.0/15',
53896ca5 5821 'SL': '102.143.0.0/17',
773f291d
S
5822 'SM': '89.186.32.0/19',
5823 'SN': '41.82.0.0/15',
53896ca5 5824 'SO': '154.115.192.0/18',
773f291d
S
5825 'SR': '186.179.128.0/17',
5826 'SS': '105.235.208.0/21',
5827 'ST': '197.159.160.0/19',
5828 'SV': '168.243.0.0/16',
5829 'SX': '190.102.0.0/20',
5830 'SY': '5.0.0.0/16',
5831 'SZ': '41.84.224.0/19',
5832 'TC': '65.255.48.0/20',
5833 'TD': '154.68.128.0/19',
5834 'TG': '196.168.0.0/14',
5835 'TH': '171.96.0.0/13',
5836 'TJ': '85.9.128.0/18',
5837 'TK': '27.96.24.0/21',
5838 'TL': '180.189.160.0/20',
5839 'TM': '95.85.96.0/19',
5840 'TN': '197.0.0.0/11',
5841 'TO': '175.176.144.0/21',
5842 'TR': '78.160.0.0/11',
5843 'TT': '186.44.0.0/15',
5844 'TV': '202.2.96.0/19',
5845 'TW': '120.96.0.0/11',
5846 'TZ': '156.156.0.0/14',
53896ca5
S
5847 'UA': '37.52.0.0/14',
5848 'UG': '102.80.0.0/13',
5849 'US': '6.0.0.0/8',
773f291d 5850 'UY': '167.56.0.0/13',
53896ca5 5851 'UZ': '84.54.64.0/18',
773f291d 5852 'VA': '212.77.0.0/19',
53896ca5 5853 'VC': '207.191.240.0/21',
773f291d 5854 'VE': '186.88.0.0/13',
53896ca5 5855 'VG': '66.81.192.0/20',
773f291d
S
5856 'VI': '146.226.0.0/16',
5857 'VN': '14.160.0.0/11',
5858 'VU': '202.80.32.0/20',
5859 'WF': '117.20.32.0/21',
5860 'WS': '202.4.32.0/19',
5861 'YE': '134.35.0.0/16',
5862 'YT': '41.242.116.0/22',
5863 'ZA': '41.0.0.0/11',
53896ca5
S
5864 'ZM': '102.144.0.0/13',
5865 'ZW': '102.177.192.0/18',
773f291d
S
5866 }
5867
5868 @classmethod
5f95927a
S
5869 def random_ipv4(cls, code_or_block):
5870 if len(code_or_block) == 2:
5871 block = cls._country_ip_map.get(code_or_block.upper())
5872 if not block:
5873 return None
5874 else:
5875 block = code_or_block
773f291d
S
5876 addr, preflen = block.split('/')
5877 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5878 addr_max = addr_min | (0xffffffff >> int(preflen))
18a0defa 5879 return compat_str(socket.inet_ntoa(
4248dad9 5880 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
773f291d
S
5881
5882
91410c9b 5883class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
5884 def __init__(self, proxies=None):
5885 # Set default handlers
5886 for type in ('http', 'https'):
5887 setattr(self, '%s_open' % type,
5888 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5889 meth(r, proxy, type))
38e87f6c 5890 compat_urllib_request.ProxyHandler.__init__(self, proxies)
2461f79d 5891
91410c9b 5892 def proxy_open(self, req, proxy, type):
2461f79d 5893 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
5894 if req_proxy is not None:
5895 proxy = req_proxy
2461f79d
PH
5896 del req.headers['Ytdl-request-proxy']
5897
5898 if proxy == '__noproxy__':
5899 return None # No Proxy
51fb4995 5900 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
71aff188 5901 req.add_header('Ytdl-socks-proxy', proxy)
7a5c1cfe 5902 # yt-dlp's http/https handlers do wrapping the socket with socks
71aff188 5903 return None
91410c9b
PH
5904 return compat_urllib_request.ProxyHandler.proxy_open(
5905 self, req, proxy, type)
5bc880b9
YCH
5906
5907
0a5445dd
YCH
5908# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5909# released into Public Domain
5910# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5911
5912def long_to_bytes(n, blocksize=0):
5913 """long_to_bytes(n:long, blocksize:int) : string
5914 Convert a long integer to a byte string.
5915
5916 If optional blocksize is given and greater than zero, pad the front of the
5917 byte string with binary zeros so that the length is a multiple of
5918 blocksize.
5919 """
5920 # after much testing, this algorithm was deemed to be the fastest
5921 s = b''
5922 n = int(n)
5923 while n > 0:
5924 s = compat_struct_pack('>I', n & 0xffffffff) + s
5925 n = n >> 32
5926 # strip off leading zeros
5927 for i in range(len(s)):
5928 if s[i] != b'\000'[0]:
5929 break
5930 else:
5931 # only happens when n == 0
5932 s = b'\000'
5933 i = 0
5934 s = s[i:]
5935 # add back some pad bytes. this could be done more efficiently w.r.t. the
5936 # de-padding being done above, but sigh...
5937 if blocksize > 0 and len(s) % blocksize:
5938 s = (blocksize - len(s) % blocksize) * b'\000' + s
5939 return s
5940
5941
5942def bytes_to_long(s):
5943 """bytes_to_long(string) : long
5944 Convert a byte string to a long integer.
5945
5946 This is (essentially) the inverse of long_to_bytes().
5947 """
5948 acc = 0
5949 length = len(s)
5950 if length % 4:
5951 extra = (4 - length % 4)
5952 s = b'\000' * extra + s
5953 length = length + extra
5954 for i in range(0, length, 4):
5955 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5956 return acc
5957
5958
5bc880b9
YCH
5959def ohdave_rsa_encrypt(data, exponent, modulus):
5960 '''
5961 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5962
5963 Input:
5964 data: data to encrypt, bytes-like object
5965 exponent, modulus: parameter e and N of RSA algorithm, both integer
5966 Output: hex string of encrypted data
5967
5968 Limitation: supports one block encryption only
5969 '''
5970
5971 payload = int(binascii.hexlify(data[::-1]), 16)
5972 encrypted = pow(payload, exponent, modulus)
5973 return '%x' % encrypted
81bdc8fd
YCH
5974
5975
f48409c7
YCH
5976def pkcs1pad(data, length):
5977 """
5978 Padding input data with PKCS#1 scheme
5979
5980 @param {int[]} data input data
5981 @param {int} length target length
5982 @returns {int[]} padded data
5983 """
5984 if len(data) > length - 11:
5985 raise ValueError('Input data too long for PKCS#1 padding')
5986
5987 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5988 return [0, 2] + pseudo_random + [0] + data
5989
5990
5eb6bdce 5991def encode_base_n(num, n, table=None):
59f898b7 5992 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
59f898b7
YCH
5993 if not table:
5994 table = FULL_TABLE[:n]
5995
5eb6bdce
YCH
5996 if n > len(table):
5997 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5998
5999 if num == 0:
6000 return table[0]
6001
81bdc8fd
YCH
6002 ret = ''
6003 while num:
6004 ret = table[num % n] + ret
6005 num = num // n
6006 return ret
f52354a8
YCH
6007
6008
6009def decode_packed_codes(code):
06b3fe29 6010 mobj = re.search(PACKED_CODES_RE, code)
a0566bbf 6011 obfuscated_code, base, count, symbols = mobj.groups()
f52354a8
YCH
6012 base = int(base)
6013 count = int(count)
6014 symbols = symbols.split('|')
6015 symbol_table = {}
6016
6017 while count:
6018 count -= 1
5eb6bdce 6019 base_n_count = encode_base_n(count, base)
f52354a8
YCH
6020 symbol_table[base_n_count] = symbols[count] or base_n_count
6021
6022 return re.sub(
6023 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
a0566bbf 6024 obfuscated_code)
e154c651 6025
6026
1ced2221
S
6027def caesar(s, alphabet, shift):
6028 if shift == 0:
6029 return s
6030 l = len(alphabet)
6031 return ''.join(
6032 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
6033 for c in s)
6034
6035
6036def rot47(s):
6037 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
6038
6039
e154c651 6040def parse_m3u8_attributes(attrib):
6041 info = {}
6042 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
6043 if val.startswith('"'):
6044 val = val[1:-1]
6045 info[key] = val
6046 return info
1143535d
YCH
6047
6048
6049def urshift(val, n):
6050 return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038
YCH
6051
6052
6053# Based on png2str() written by @gdkchan and improved by @yokrysty
067aa17e 6054# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
d3f8e038
YCH
6055def decode_png(png_data):
6056 # Reference: https://www.w3.org/TR/PNG/
6057 header = png_data[8:]
6058
6059 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
6060 raise IOError('Not a valid PNG file.')
6061
6062 int_map = {1: '>B', 2: '>H', 4: '>I'}
6063 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
6064
6065 chunks = []
6066
6067 while header:
6068 length = unpack_integer(header[:4])
6069 header = header[4:]
6070
6071 chunk_type = header[:4]
6072 header = header[4:]
6073
6074 chunk_data = header[:length]
6075 header = header[length:]
6076
6077 header = header[4:] # Skip CRC
6078
6079 chunks.append({
6080 'type': chunk_type,
6081 'length': length,
6082 'data': chunk_data
6083 })
6084
6085 ihdr = chunks[0]['data']
6086
6087 width = unpack_integer(ihdr[:4])
6088 height = unpack_integer(ihdr[4:8])
6089
6090 idat = b''
6091
6092 for chunk in chunks:
6093 if chunk['type'] == b'IDAT':
6094 idat += chunk['data']
6095
6096 if not idat:
6097 raise IOError('Unable to read PNG data.')
6098
6099 decompressed_data = bytearray(zlib.decompress(idat))
6100
6101 stride = width * 3
6102 pixels = []
6103
6104 def _get_pixel(idx):
6105 x = idx % stride
6106 y = idx // stride
6107 return pixels[y][x]
6108
6109 for y in range(height):
6110 basePos = y * (1 + stride)
6111 filter_type = decompressed_data[basePos]
6112
6113 current_row = []
6114
6115 pixels.append(current_row)
6116
6117 for x in range(stride):
6118 color = decompressed_data[1 + basePos + x]
6119 basex = y * stride + x
6120 left = 0
6121 up = 0
6122
6123 if x > 2:
6124 left = _get_pixel(basex - 3)
6125 if y > 0:
6126 up = _get_pixel(basex - stride)
6127
6128 if filter_type == 1: # Sub
6129 color = (color + left) & 0xff
6130 elif filter_type == 2: # Up
6131 color = (color + up) & 0xff
6132 elif filter_type == 3: # Average
6133 color = (color + ((left + up) >> 1)) & 0xff
6134 elif filter_type == 4: # Paeth
6135 a = left
6136 b = up
6137 c = 0
6138
6139 if x > 2 and y > 0:
6140 c = _get_pixel(basex - stride - 3)
6141
6142 p = a + b - c
6143
6144 pa = abs(p - a)
6145 pb = abs(p - b)
6146 pc = abs(p - c)
6147
6148 if pa <= pb and pa <= pc:
6149 color = (color + a) & 0xff
6150 elif pb <= pc:
6151 color = (color + b) & 0xff
6152 else:
6153 color = (color + c) & 0xff
6154
6155 current_row.append(color)
6156
6157 return width, height, pixels
efa97bdc
YCH
6158
6159
6160def write_xattr(path, key, value):
6161 # This mess below finds the best xattr tool for the job
6162 try:
6163 # try the pyxattr module...
6164 import xattr
6165
53a7e3d2
YCH
6166 if hasattr(xattr, 'set'): # pyxattr
6167 # Unicode arguments are not supported in python-pyxattr until
6168 # version 0.5.0
067aa17e 6169 # See https://github.com/ytdl-org/youtube-dl/issues/5498
53a7e3d2
YCH
6170 pyxattr_required_version = '0.5.0'
6171 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
6172 # TODO: fallback to CLI tools
6173 raise XAttrUnavailableError(
6174 'python-pyxattr is detected but is too old. '
7a5c1cfe 6175 'yt-dlp requires %s or above while your version is %s. '
53a7e3d2
YCH
6176 'Falling back to other xattr implementations' % (
6177 pyxattr_required_version, xattr.__version__))
6178
6179 setxattr = xattr.set
6180 else: # xattr
6181 setxattr = xattr.setxattr
efa97bdc
YCH
6182
6183 try:
53a7e3d2 6184 setxattr(path, key, value)
efa97bdc
YCH
6185 except EnvironmentError as e:
6186 raise XAttrMetadataError(e.errno, e.strerror)
6187
6188 except ImportError:
6189 if compat_os_name == 'nt':
6190 # Write xattrs to NTFS Alternate Data Streams:
6191 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6192 assert ':' not in key
6193 assert os.path.exists(path)
6194
6195 ads_fn = path + ':' + key
6196 try:
6197 with open(ads_fn, 'wb') as f:
6198 f.write(value)
6199 except EnvironmentError as e:
6200 raise XAttrMetadataError(e.errno, e.strerror)
6201 else:
6202 user_has_setfattr = check_executable('setfattr', ['--version'])
6203 user_has_xattr = check_executable('xattr', ['-h'])
6204
6205 if user_has_setfattr or user_has_xattr:
6206
6207 value = value.decode('utf-8')
6208 if user_has_setfattr:
6209 executable = 'setfattr'
6210 opts = ['-n', key, '-v', value]
6211 elif user_has_xattr:
6212 executable = 'xattr'
6213 opts = ['-w', key, value]
6214
3089bc74
S
6215 cmd = ([encodeFilename(executable, True)]
6216 + [encodeArgument(o) for o in opts]
6217 + [encodeFilename(path, True)])
efa97bdc
YCH
6218
6219 try:
d3c93ec2 6220 p = Popen(
efa97bdc
YCH
6221 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6222 except EnvironmentError as e:
6223 raise XAttrMetadataError(e.errno, e.strerror)
d3c93ec2 6224 stdout, stderr = p.communicate_or_kill()
efa97bdc
YCH
6225 stderr = stderr.decode('utf-8', 'replace')
6226 if p.returncode != 0:
6227 raise XAttrMetadataError(p.returncode, stderr)
6228
6229 else:
6230 # On Unix, and can't find pyxattr, setfattr, or xattr.
6231 if sys.platform.startswith('linux'):
6232 raise XAttrUnavailableError(
6233 "Couldn't find a tool to set the xattrs. "
6234 "Install either the python 'pyxattr' or 'xattr' "
6235 "modules, or the GNU 'attr' package "
6236 "(which contains the 'setfattr' tool).")
6237 else:
6238 raise XAttrUnavailableError(
6239 "Couldn't find a tool to set the xattrs. "
6240 "Install either the python 'xattr' module, "
6241 "or the 'xattr' binary.")
0c265486
YCH
6242
6243
6244def random_birthday(year_field, month_field, day_field):
aa374bc7
AS
6245 start_date = datetime.date(1950, 1, 1)
6246 end_date = datetime.date(1995, 12, 31)
6247 offset = random.randint(0, (end_date - start_date).days)
6248 random_date = start_date + datetime.timedelta(offset)
0c265486 6249 return {
aa374bc7
AS
6250 year_field: str(random_date.year),
6251 month_field: str(random_date.month),
6252 day_field: str(random_date.day),
0c265486 6253 }
732044af 6254
c76eb41b 6255
732044af 6256# Templates for internet shortcut files, which are plain text files.
6257DOT_URL_LINK_TEMPLATE = '''
6258[InternetShortcut]
6259URL=%(url)s
6260'''.lstrip()
6261
6262DOT_WEBLOC_LINK_TEMPLATE = '''
6263<?xml version="1.0" encoding="UTF-8"?>
6264<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6265<plist version="1.0">
6266<dict>
6267\t<key>URL</key>
6268\t<string>%(url)s</string>
6269</dict>
6270</plist>
6271'''.lstrip()
6272
6273DOT_DESKTOP_LINK_TEMPLATE = '''
6274[Desktop Entry]
6275Encoding=UTF-8
6276Name=%(filename)s
6277Type=Link
6278URL=%(url)s
6279Icon=text-html
6280'''.lstrip()
6281
08438d2c 6282LINK_TEMPLATES = {
6283 'url': DOT_URL_LINK_TEMPLATE,
6284 'desktop': DOT_DESKTOP_LINK_TEMPLATE,
6285 'webloc': DOT_WEBLOC_LINK_TEMPLATE,
6286}
6287
732044af 6288
6289def iri_to_uri(iri):
6290 """
6291 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6292
6293 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6294 """
6295
6296 iri_parts = compat_urllib_parse_urlparse(iri)
6297
6298 if '[' in iri_parts.netloc:
6299 raise ValueError('IPv6 URIs are not, yet, supported.')
6300 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6301
6302 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6303
6304 net_location = ''
6305 if iri_parts.username:
6306 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6307 if iri_parts.password is not None:
6308 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6309 net_location += '@'
6310
6311 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
6312 # The 'idna' encoding produces ASCII text.
6313 if iri_parts.port is not None and iri_parts.port != 80:
6314 net_location += ':' + str(iri_parts.port)
6315
6316 return compat_urllib_parse_urlunparse(
6317 (iri_parts.scheme,
6318 net_location,
6319
6320 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6321
6322 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6323 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6324
6325 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6326 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6327
6328 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6329
6330 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6331
6332
6333def to_high_limit_path(path):
6334 if sys.platform in ['win32', 'cygwin']:
6335 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6336 return r'\\?\ '.rstrip() + os.path.abspath(path)
6337
6338 return path
76d321f6 6339
c76eb41b 6340
b868936c 6341def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
6342 if field is None:
6343 val = obj if obj is not None else default
6344 else:
6345 val = obj.get(field, default)
76d321f6 6346 if func and val not in ignore:
6347 val = func(val)
6348 return template % val if val not in ignore else default
00dd0cd5 6349
6350
6351def clean_podcast_url(url):
6352 return re.sub(r'''(?x)
6353 (?:
6354 (?:
6355 chtbl\.com/track|
6356 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6357 play\.podtrac\.com
6358 )/[^/]+|
6359 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6360 flex\.acast\.com|
6361 pd(?:
6362 cn\.co| # https://podcorn.com/analytics-prefix/
6363 st\.fm # https://podsights.com/docs/
6364 )/e
6365 )/''', '', url)
ffcb8191
THD
6366
6367
6368_HEX_TABLE = '0123456789abcdef'
6369
6370
6371def random_uuidv4():
6372 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
0202b52a 6373
6374
6375def make_dir(path, to_screen=None):
6376 try:
6377 dn = os.path.dirname(path)
6378 if dn and not os.path.exists(dn):
6379 os.makedirs(dn)
6380 return True
6381 except (OSError, IOError) as err:
6382 if callable(to_screen) is not None:
6383 to_screen('unable to create directory ' + error_to_compat_str(err))
6384 return False
f74980cb 6385
6386
6387def get_executable_path():
c552ae88 6388 from zipimport import zipimporter
6389 if hasattr(sys, 'frozen'): # Running from PyInstaller
6390 path = os.path.dirname(sys.executable)
6391 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
6392 path = os.path.join(os.path.dirname(__file__), '../..')
6393 else:
6394 path = os.path.join(os.path.dirname(__file__), '..')
f74980cb 6395 return os.path.abspath(path)
6396
6397
2f567473 6398def load_plugins(name, suffix, namespace):
3ae5e797 6399 classes = {}
f74980cb 6400 try:
019a94f7
ÁS
6401 plugins_spec = importlib.util.spec_from_file_location(
6402 name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
6403 plugins = importlib.util.module_from_spec(plugins_spec)
6404 sys.modules[plugins_spec.name] = plugins
6405 plugins_spec.loader.exec_module(plugins)
f74980cb 6406 for name in dir(plugins):
2f567473 6407 if name in namespace:
6408 continue
6409 if not name.endswith(suffix):
f74980cb 6410 continue
6411 klass = getattr(plugins, name)
3ae5e797 6412 classes[name] = namespace[name] = klass
019a94f7 6413 except FileNotFoundError:
f74980cb 6414 pass
f74980cb 6415 return classes
06167fbb 6416
6417
325ebc17 6418def traverse_obj(
352d63fd 6419 obj, *path_list, default=None, expected_type=None, get_all=True,
325ebc17 6420 casesense=True, is_user_input=False, traverse_string=False):
324ad820 6421 ''' Traverse nested list/dict/tuple
8f334380 6422 @param path_list A list of paths which are checked one by one.
6423 Each path is a list of keys where each key is a string,
2614f646 6424 a function, a tuple of strings or "...".
6425 When a fuction is given, it takes the key as argument and
6426 returns whether the key matches or not. When a tuple is given,
8f334380 6427 all the keys given in the tuple are traversed, and
6428 "..." traverses all the keys in the object
325ebc17 6429 @param default Default value to return
352d63fd 6430 @param expected_type Only accept final value of this type (Can also be any callable)
6431 @param get_all Return all the values obtained from a path or only the first one
324ad820 6432 @param casesense Whether to consider dictionary keys as case sensitive
6433 @param is_user_input Whether the keys are generated from user input. If True,
6434 strings are converted to int/slice if necessary
6435 @param traverse_string Whether to traverse inside strings. If True, any
6436 non-compatible object will also be converted into a string
8f334380 6437 # TODO: Write tests
324ad820 6438 '''
325ebc17 6439 if not casesense:
dbf5416a 6440 _lower = lambda k: (k.lower() if isinstance(k, str) else k)
8f334380 6441 path_list = (map(_lower, variadic(path)) for path in path_list)
6442
6443 def _traverse_obj(obj, path, _current_depth=0):
6444 nonlocal depth
575e17a1 6445 if obj is None:
6446 return None
8f334380 6447 path = tuple(variadic(path))
6448 for i, key in enumerate(path):
6449 if isinstance(key, (list, tuple)):
6450 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
6451 key = ...
6452 if key is ...:
6453 obj = (obj.values() if isinstance(obj, dict)
6454 else obj if isinstance(obj, (list, tuple, LazyList))
6455 else str(obj) if traverse_string else [])
6456 _current_depth += 1
6457 depth = max(depth, _current_depth)
6458 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
2614f646 6459 elif callable(key):
6460 if isinstance(obj, (list, tuple, LazyList)):
6461 obj = enumerate(obj)
6462 elif isinstance(obj, dict):
6463 obj = obj.items()
6464 else:
6465 if not traverse_string:
6466 return None
6467 obj = str(obj)
6468 _current_depth += 1
6469 depth = max(depth, _current_depth)
6470 return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if key(k)]
575e17a1 6471 elif isinstance(obj, dict) and not (is_user_input and key == ':'):
325ebc17 6472 obj = (obj.get(key) if casesense or (key in obj)
6473 else next((v for k, v in obj.items() if _lower(k) == key), None))
6474 else:
6475 if is_user_input:
6476 key = (int_or_none(key) if ':' not in key
6477 else slice(*map(int_or_none, key.split(':'))))
8f334380 6478 if key == slice(None):
575e17a1 6479 return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
325ebc17 6480 if not isinstance(key, (int, slice)):
9fea350f 6481 return None
8f334380 6482 if not isinstance(obj, (list, tuple, LazyList)):
325ebc17 6483 if not traverse_string:
6484 return None
6485 obj = str(obj)
6486 try:
6487 obj = obj[key]
6488 except IndexError:
324ad820 6489 return None
325ebc17 6490 return obj
6491
352d63fd 6492 if isinstance(expected_type, type):
6493 type_test = lambda val: val if isinstance(val, expected_type) else None
6494 elif expected_type is not None:
6495 type_test = expected_type
6496 else:
6497 type_test = lambda val: val
6498
8f334380 6499 for path in path_list:
6500 depth = 0
6501 val = _traverse_obj(obj, path)
325ebc17 6502 if val is not None:
8f334380 6503 if depth:
6504 for _ in range(depth - 1):
6586bca9 6505 val = itertools.chain.from_iterable(v for v in val if v is not None)
352d63fd 6506 val = [v for v in map(type_test, val) if v is not None]
8f334380 6507 if val:
352d63fd 6508 return val if get_all else val[0]
6509 else:
6510 val = type_test(val)
6511 if val is not None:
8f334380 6512 return val
325ebc17 6513 return default
324ad820 6514
6515
6516def traverse_dict(dictn, keys, casesense=True):
6517 ''' For backward compatibility. Do not use '''
6518 return traverse_obj(dictn, keys, casesense=casesense,
6519 is_user_input=True, traverse_string=True)
6606817a 6520
6521
c634ad2a 6522def variadic(x, allowed_types=(str, bytes)):
cb89cfc1 6523 return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
bd50a52b
THD
6524
6525
49fa4d9a
N
6526# create a JSON Web Signature (jws) with HS256 algorithm
6527# the resulting format is in JWS Compact Serialization
6528# implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
6529# implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
6530def jwt_encode_hs256(payload_data, key, headers={}):
6531 header_data = {
6532 'alg': 'HS256',
6533 'typ': 'JWT',
6534 }
6535 if headers:
6536 header_data.update(headers)
6537 header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8'))
6538 payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8'))
6539 h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256)
6540 signature_b64 = base64.b64encode(h.digest())
6541 token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
6542 return token
819e0531 6543
6544
16b0d7e6 6545# can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
6546def jwt_decode_hs256(jwt):
6547 header_b64, payload_b64, signature_b64 = jwt.split('.')
6548 payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
6549 return payload_data
6550
6551
819e0531 6552def supports_terminal_sequences(stream):
6553 if compat_os_name == 'nt':
d1d5c08f 6554 if get_windows_version() < (10, 0, 10586):
819e0531 6555 return False
6556 elif not os.getenv('TERM'):
6557 return False
6558 try:
6559 return stream.isatty()
6560 except BaseException:
6561 return False
6562
6563
ec11a9f4 6564_terminal_sequences_re = re.compile('\033\\[[^m]+m')
6565
6566
6567def remove_terminal_sequences(string):
6568 return _terminal_sequences_re.sub('', string)
6569
6570
6571def number_of_digits(number):
6572 return len('%d' % number)
34921b43 6573
6574
6575def join_nonempty(*values, delim='-', from_dict=None):
6576 if from_dict is not None:
6577 values = operator.itemgetter(values)(from_dict)
6578 return delim.join(map(str, filter(None, values)))