]> jfr.im git - yt-dlp.git/blame - yt_dlp/utils.py
[utils] Add `parse_qs`
[yt-dlp.git] / yt_dlp / utils.py
CommitLineData
cc52de43 1#!/usr/bin/env python3
dcdb292f 2# coding: utf-8
d77c3dfd 3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
c380cc28 10import collections
62e609ab 11import contextlib
e3946f98 12import ctypes
c496ca96
PH
13import datetime
14import email.utils
0c265486 15import email.header
f45c185f 16import errno
be4a824d 17import functools
d77c3dfd 18import gzip
f74980cb 19import imp
03f9daab 20import io
79a2e94e 21import itertools
f4bfd65f 22import json
d77c3dfd 23import locale
02dbf93f 24import math
347de493 25import operator
d77c3dfd 26import os
c496ca96 27import platform
773f291d 28import random
d77c3dfd 29import re
c496ca96 30import socket
79a2e94e 31import ssl
1c088fa8 32import subprocess
d77c3dfd 33import sys
181c8655 34import tempfile
c380cc28 35import time
01951dda 36import traceback
bcf89ce6 37import xml.etree.ElementTree
d77c3dfd 38import zlib
d77c3dfd 39
8c25f81b 40from .compat import (
b4a3d461 41 compat_HTMLParseError,
8bb56eee 42 compat_HTMLParser,
201c1459 43 compat_HTTPError,
8f9312c3 44 compat_basestring,
8c25f81b 45 compat_chr,
1bab3437 46 compat_cookiejar,
d7cd9a9e 47 compat_ctypes_WINFUNCTYPE,
36e6f62c 48 compat_etree_fromstring,
51098426 49 compat_expanduser,
8c25f81b 50 compat_html_entities,
55b2f099 51 compat_html_entities_html5,
be4a824d 52 compat_http_client,
42db58ec 53 compat_integer_types,
e29663c6 54 compat_numeric_types,
c86b6142 55 compat_kwargs,
efa97bdc 56 compat_os_name,
8c25f81b 57 compat_parse_qs,
702ccf2d 58 compat_shlex_quote,
8c25f81b 59 compat_str,
edaa23f8 60 compat_struct_pack,
d3f8e038 61 compat_struct_unpack,
8c25f81b
PH
62 compat_urllib_error,
63 compat_urllib_parse,
15707c7e 64 compat_urllib_parse_urlencode,
8c25f81b 65 compat_urllib_parse_urlparse,
732044af 66 compat_urllib_parse_urlunparse,
67 compat_urllib_parse_quote,
68 compat_urllib_parse_quote_plus,
7581bfc9 69 compat_urllib_parse_unquote_plus,
8c25f81b
PH
70 compat_urllib_request,
71 compat_urlparse,
810c10ba 72 compat_xpath,
8c25f81b 73)
4644ac55 74
71aff188
YCH
75from .socks import (
76 ProxyType,
77 sockssocket,
78)
79
4644ac55 80
51fb4995
YCH
81def register_socks_protocols():
82 # "Register" SOCKS protocols
d5ae6bb5
YCH
83 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
84 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
51fb4995
YCH
85 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
86 if scheme not in compat_urlparse.uses_netloc:
87 compat_urlparse.uses_netloc.append(scheme)
88
89
468e2e92
FV
90# This is not clearly defined otherwise
91compiled_regex_type = type(re.compile(''))
92
f7a147e3
S
93
94def random_user_agent():
95 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
96 _CHROME_VERSIONS = (
97 '74.0.3729.129',
98 '76.0.3780.3',
99 '76.0.3780.2',
100 '74.0.3729.128',
101 '76.0.3780.1',
102 '76.0.3780.0',
103 '75.0.3770.15',
104 '74.0.3729.127',
105 '74.0.3729.126',
106 '76.0.3779.1',
107 '76.0.3779.0',
108 '75.0.3770.14',
109 '74.0.3729.125',
110 '76.0.3778.1',
111 '76.0.3778.0',
112 '75.0.3770.13',
113 '74.0.3729.124',
114 '74.0.3729.123',
115 '73.0.3683.121',
116 '76.0.3777.1',
117 '76.0.3777.0',
118 '75.0.3770.12',
119 '74.0.3729.122',
120 '76.0.3776.4',
121 '75.0.3770.11',
122 '74.0.3729.121',
123 '76.0.3776.3',
124 '76.0.3776.2',
125 '73.0.3683.120',
126 '74.0.3729.120',
127 '74.0.3729.119',
128 '74.0.3729.118',
129 '76.0.3776.1',
130 '76.0.3776.0',
131 '76.0.3775.5',
132 '75.0.3770.10',
133 '74.0.3729.117',
134 '76.0.3775.4',
135 '76.0.3775.3',
136 '74.0.3729.116',
137 '75.0.3770.9',
138 '76.0.3775.2',
139 '76.0.3775.1',
140 '76.0.3775.0',
141 '75.0.3770.8',
142 '74.0.3729.115',
143 '74.0.3729.114',
144 '76.0.3774.1',
145 '76.0.3774.0',
146 '75.0.3770.7',
147 '74.0.3729.113',
148 '74.0.3729.112',
149 '74.0.3729.111',
150 '76.0.3773.1',
151 '76.0.3773.0',
152 '75.0.3770.6',
153 '74.0.3729.110',
154 '74.0.3729.109',
155 '76.0.3772.1',
156 '76.0.3772.0',
157 '75.0.3770.5',
158 '74.0.3729.108',
159 '74.0.3729.107',
160 '76.0.3771.1',
161 '76.0.3771.0',
162 '75.0.3770.4',
163 '74.0.3729.106',
164 '74.0.3729.105',
165 '75.0.3770.3',
166 '74.0.3729.104',
167 '74.0.3729.103',
168 '74.0.3729.102',
169 '75.0.3770.2',
170 '74.0.3729.101',
171 '75.0.3770.1',
172 '75.0.3770.0',
173 '74.0.3729.100',
174 '75.0.3769.5',
175 '75.0.3769.4',
176 '74.0.3729.99',
177 '75.0.3769.3',
178 '75.0.3769.2',
179 '75.0.3768.6',
180 '74.0.3729.98',
181 '75.0.3769.1',
182 '75.0.3769.0',
183 '74.0.3729.97',
184 '73.0.3683.119',
185 '73.0.3683.118',
186 '74.0.3729.96',
187 '75.0.3768.5',
188 '75.0.3768.4',
189 '75.0.3768.3',
190 '75.0.3768.2',
191 '74.0.3729.95',
192 '74.0.3729.94',
193 '75.0.3768.1',
194 '75.0.3768.0',
195 '74.0.3729.93',
196 '74.0.3729.92',
197 '73.0.3683.117',
198 '74.0.3729.91',
199 '75.0.3766.3',
200 '74.0.3729.90',
201 '75.0.3767.2',
202 '75.0.3767.1',
203 '75.0.3767.0',
204 '74.0.3729.89',
205 '73.0.3683.116',
206 '75.0.3766.2',
207 '74.0.3729.88',
208 '75.0.3766.1',
209 '75.0.3766.0',
210 '74.0.3729.87',
211 '73.0.3683.115',
212 '74.0.3729.86',
213 '75.0.3765.1',
214 '75.0.3765.0',
215 '74.0.3729.85',
216 '73.0.3683.114',
217 '74.0.3729.84',
218 '75.0.3764.1',
219 '75.0.3764.0',
220 '74.0.3729.83',
221 '73.0.3683.113',
222 '75.0.3763.2',
223 '75.0.3761.4',
224 '74.0.3729.82',
225 '75.0.3763.1',
226 '75.0.3763.0',
227 '74.0.3729.81',
228 '73.0.3683.112',
229 '75.0.3762.1',
230 '75.0.3762.0',
231 '74.0.3729.80',
232 '75.0.3761.3',
233 '74.0.3729.79',
234 '73.0.3683.111',
235 '75.0.3761.2',
236 '74.0.3729.78',
237 '74.0.3729.77',
238 '75.0.3761.1',
239 '75.0.3761.0',
240 '73.0.3683.110',
241 '74.0.3729.76',
242 '74.0.3729.75',
243 '75.0.3760.0',
244 '74.0.3729.74',
245 '75.0.3759.8',
246 '75.0.3759.7',
247 '75.0.3759.6',
248 '74.0.3729.73',
249 '75.0.3759.5',
250 '74.0.3729.72',
251 '73.0.3683.109',
252 '75.0.3759.4',
253 '75.0.3759.3',
254 '74.0.3729.71',
255 '75.0.3759.2',
256 '74.0.3729.70',
257 '73.0.3683.108',
258 '74.0.3729.69',
259 '75.0.3759.1',
260 '75.0.3759.0',
261 '74.0.3729.68',
262 '73.0.3683.107',
263 '74.0.3729.67',
264 '75.0.3758.1',
265 '75.0.3758.0',
266 '74.0.3729.66',
267 '73.0.3683.106',
268 '74.0.3729.65',
269 '75.0.3757.1',
270 '75.0.3757.0',
271 '74.0.3729.64',
272 '73.0.3683.105',
273 '74.0.3729.63',
274 '75.0.3756.1',
275 '75.0.3756.0',
276 '74.0.3729.62',
277 '73.0.3683.104',
278 '75.0.3755.3',
279 '75.0.3755.2',
280 '73.0.3683.103',
281 '75.0.3755.1',
282 '75.0.3755.0',
283 '74.0.3729.61',
284 '73.0.3683.102',
285 '74.0.3729.60',
286 '75.0.3754.2',
287 '74.0.3729.59',
288 '75.0.3753.4',
289 '74.0.3729.58',
290 '75.0.3754.1',
291 '75.0.3754.0',
292 '74.0.3729.57',
293 '73.0.3683.101',
294 '75.0.3753.3',
295 '75.0.3752.2',
296 '75.0.3753.2',
297 '74.0.3729.56',
298 '75.0.3753.1',
299 '75.0.3753.0',
300 '74.0.3729.55',
301 '73.0.3683.100',
302 '74.0.3729.54',
303 '75.0.3752.1',
304 '75.0.3752.0',
305 '74.0.3729.53',
306 '73.0.3683.99',
307 '74.0.3729.52',
308 '75.0.3751.1',
309 '75.0.3751.0',
310 '74.0.3729.51',
311 '73.0.3683.98',
312 '74.0.3729.50',
313 '75.0.3750.0',
314 '74.0.3729.49',
315 '74.0.3729.48',
316 '74.0.3729.47',
317 '75.0.3749.3',
318 '74.0.3729.46',
319 '73.0.3683.97',
320 '75.0.3749.2',
321 '74.0.3729.45',
322 '75.0.3749.1',
323 '75.0.3749.0',
324 '74.0.3729.44',
325 '73.0.3683.96',
326 '74.0.3729.43',
327 '74.0.3729.42',
328 '75.0.3748.1',
329 '75.0.3748.0',
330 '74.0.3729.41',
331 '75.0.3747.1',
332 '73.0.3683.95',
333 '75.0.3746.4',
334 '74.0.3729.40',
335 '74.0.3729.39',
336 '75.0.3747.0',
337 '75.0.3746.3',
338 '75.0.3746.2',
339 '74.0.3729.38',
340 '75.0.3746.1',
341 '75.0.3746.0',
342 '74.0.3729.37',
343 '73.0.3683.94',
344 '75.0.3745.5',
345 '75.0.3745.4',
346 '75.0.3745.3',
347 '75.0.3745.2',
348 '74.0.3729.36',
349 '75.0.3745.1',
350 '75.0.3745.0',
351 '75.0.3744.2',
352 '74.0.3729.35',
353 '73.0.3683.93',
354 '74.0.3729.34',
355 '75.0.3744.1',
356 '75.0.3744.0',
357 '74.0.3729.33',
358 '73.0.3683.92',
359 '74.0.3729.32',
360 '74.0.3729.31',
361 '73.0.3683.91',
362 '75.0.3741.2',
363 '75.0.3740.5',
364 '74.0.3729.30',
365 '75.0.3741.1',
366 '75.0.3741.0',
367 '74.0.3729.29',
368 '75.0.3740.4',
369 '73.0.3683.90',
370 '74.0.3729.28',
371 '75.0.3740.3',
372 '73.0.3683.89',
373 '75.0.3740.2',
374 '74.0.3729.27',
375 '75.0.3740.1',
376 '75.0.3740.0',
377 '74.0.3729.26',
378 '73.0.3683.88',
379 '73.0.3683.87',
380 '74.0.3729.25',
381 '75.0.3739.1',
382 '75.0.3739.0',
383 '73.0.3683.86',
384 '74.0.3729.24',
385 '73.0.3683.85',
386 '75.0.3738.4',
387 '75.0.3738.3',
388 '75.0.3738.2',
389 '75.0.3738.1',
390 '75.0.3738.0',
391 '74.0.3729.23',
392 '73.0.3683.84',
393 '74.0.3729.22',
394 '74.0.3729.21',
395 '75.0.3737.1',
396 '75.0.3737.0',
397 '74.0.3729.20',
398 '73.0.3683.83',
399 '74.0.3729.19',
400 '75.0.3736.1',
401 '75.0.3736.0',
402 '74.0.3729.18',
403 '73.0.3683.82',
404 '74.0.3729.17',
405 '75.0.3735.1',
406 '75.0.3735.0',
407 '74.0.3729.16',
408 '73.0.3683.81',
409 '75.0.3734.1',
410 '75.0.3734.0',
411 '74.0.3729.15',
412 '73.0.3683.80',
413 '74.0.3729.14',
414 '75.0.3733.1',
415 '75.0.3733.0',
416 '75.0.3732.1',
417 '74.0.3729.13',
418 '74.0.3729.12',
419 '73.0.3683.79',
420 '74.0.3729.11',
421 '75.0.3732.0',
422 '74.0.3729.10',
423 '73.0.3683.78',
424 '74.0.3729.9',
425 '74.0.3729.8',
426 '74.0.3729.7',
427 '75.0.3731.3',
428 '75.0.3731.2',
429 '75.0.3731.0',
430 '74.0.3729.6',
431 '73.0.3683.77',
432 '73.0.3683.76',
433 '75.0.3730.5',
434 '75.0.3730.4',
435 '73.0.3683.75',
436 '74.0.3729.5',
437 '73.0.3683.74',
438 '75.0.3730.3',
439 '75.0.3730.2',
440 '74.0.3729.4',
441 '73.0.3683.73',
442 '73.0.3683.72',
443 '75.0.3730.1',
444 '75.0.3730.0',
445 '74.0.3729.3',
446 '73.0.3683.71',
447 '74.0.3729.2',
448 '73.0.3683.70',
449 '74.0.3729.1',
450 '74.0.3729.0',
451 '74.0.3726.4',
452 '73.0.3683.69',
453 '74.0.3726.3',
454 '74.0.3728.0',
455 '74.0.3726.2',
456 '73.0.3683.68',
457 '74.0.3726.1',
458 '74.0.3726.0',
459 '74.0.3725.4',
460 '73.0.3683.67',
461 '73.0.3683.66',
462 '74.0.3725.3',
463 '74.0.3725.2',
464 '74.0.3725.1',
465 '74.0.3724.8',
466 '74.0.3725.0',
467 '73.0.3683.65',
468 '74.0.3724.7',
469 '74.0.3724.6',
470 '74.0.3724.5',
471 '74.0.3724.4',
472 '74.0.3724.3',
473 '74.0.3724.2',
474 '74.0.3724.1',
475 '74.0.3724.0',
476 '73.0.3683.64',
477 '74.0.3723.1',
478 '74.0.3723.0',
479 '73.0.3683.63',
480 '74.0.3722.1',
481 '74.0.3722.0',
482 '73.0.3683.62',
483 '74.0.3718.9',
484 '74.0.3702.3',
485 '74.0.3721.3',
486 '74.0.3721.2',
487 '74.0.3721.1',
488 '74.0.3721.0',
489 '74.0.3720.6',
490 '73.0.3683.61',
491 '72.0.3626.122',
492 '73.0.3683.60',
493 '74.0.3720.5',
494 '72.0.3626.121',
495 '74.0.3718.8',
496 '74.0.3720.4',
497 '74.0.3720.3',
498 '74.0.3718.7',
499 '74.0.3720.2',
500 '74.0.3720.1',
501 '74.0.3720.0',
502 '74.0.3718.6',
503 '74.0.3719.5',
504 '73.0.3683.59',
505 '74.0.3718.5',
506 '74.0.3718.4',
507 '74.0.3719.4',
508 '74.0.3719.3',
509 '74.0.3719.2',
510 '74.0.3719.1',
511 '73.0.3683.58',
512 '74.0.3719.0',
513 '73.0.3683.57',
514 '73.0.3683.56',
515 '74.0.3718.3',
516 '73.0.3683.55',
517 '74.0.3718.2',
518 '74.0.3718.1',
519 '74.0.3718.0',
520 '73.0.3683.54',
521 '74.0.3717.2',
522 '73.0.3683.53',
523 '74.0.3717.1',
524 '74.0.3717.0',
525 '73.0.3683.52',
526 '74.0.3716.1',
527 '74.0.3716.0',
528 '73.0.3683.51',
529 '74.0.3715.1',
530 '74.0.3715.0',
531 '73.0.3683.50',
532 '74.0.3711.2',
533 '74.0.3714.2',
534 '74.0.3713.3',
535 '74.0.3714.1',
536 '74.0.3714.0',
537 '73.0.3683.49',
538 '74.0.3713.1',
539 '74.0.3713.0',
540 '72.0.3626.120',
541 '73.0.3683.48',
542 '74.0.3712.2',
543 '74.0.3712.1',
544 '74.0.3712.0',
545 '73.0.3683.47',
546 '72.0.3626.119',
547 '73.0.3683.46',
548 '74.0.3710.2',
549 '72.0.3626.118',
550 '74.0.3711.1',
551 '74.0.3711.0',
552 '73.0.3683.45',
553 '72.0.3626.117',
554 '74.0.3710.1',
555 '74.0.3710.0',
556 '73.0.3683.44',
557 '72.0.3626.116',
558 '74.0.3709.1',
559 '74.0.3709.0',
560 '74.0.3704.9',
561 '73.0.3683.43',
562 '72.0.3626.115',
563 '74.0.3704.8',
564 '74.0.3704.7',
565 '74.0.3708.0',
566 '74.0.3706.7',
567 '74.0.3704.6',
568 '73.0.3683.42',
569 '72.0.3626.114',
570 '74.0.3706.6',
571 '72.0.3626.113',
572 '74.0.3704.5',
573 '74.0.3706.5',
574 '74.0.3706.4',
575 '74.0.3706.3',
576 '74.0.3706.2',
577 '74.0.3706.1',
578 '74.0.3706.0',
579 '73.0.3683.41',
580 '72.0.3626.112',
581 '74.0.3705.1',
582 '74.0.3705.0',
583 '73.0.3683.40',
584 '72.0.3626.111',
585 '73.0.3683.39',
586 '74.0.3704.4',
587 '73.0.3683.38',
588 '74.0.3704.3',
589 '74.0.3704.2',
590 '74.0.3704.1',
591 '74.0.3704.0',
592 '73.0.3683.37',
593 '72.0.3626.110',
594 '72.0.3626.109',
595 '74.0.3703.3',
596 '74.0.3703.2',
597 '73.0.3683.36',
598 '74.0.3703.1',
599 '74.0.3703.0',
600 '73.0.3683.35',
601 '72.0.3626.108',
602 '74.0.3702.2',
603 '74.0.3699.3',
604 '74.0.3702.1',
605 '74.0.3702.0',
606 '73.0.3683.34',
607 '72.0.3626.107',
608 '73.0.3683.33',
609 '74.0.3701.1',
610 '74.0.3701.0',
611 '73.0.3683.32',
612 '73.0.3683.31',
613 '72.0.3626.105',
614 '74.0.3700.1',
615 '74.0.3700.0',
616 '73.0.3683.29',
617 '72.0.3626.103',
618 '74.0.3699.2',
619 '74.0.3699.1',
620 '74.0.3699.0',
621 '73.0.3683.28',
622 '72.0.3626.102',
623 '73.0.3683.27',
624 '73.0.3683.26',
625 '74.0.3698.0',
626 '74.0.3696.2',
627 '72.0.3626.101',
628 '73.0.3683.25',
629 '74.0.3696.1',
630 '74.0.3696.0',
631 '74.0.3694.8',
632 '72.0.3626.100',
633 '74.0.3694.7',
634 '74.0.3694.6',
635 '74.0.3694.5',
636 '74.0.3694.4',
637 '72.0.3626.99',
638 '72.0.3626.98',
639 '74.0.3694.3',
640 '73.0.3683.24',
641 '72.0.3626.97',
642 '72.0.3626.96',
643 '72.0.3626.95',
644 '73.0.3683.23',
645 '72.0.3626.94',
646 '73.0.3683.22',
647 '73.0.3683.21',
648 '72.0.3626.93',
649 '74.0.3694.2',
650 '72.0.3626.92',
651 '74.0.3694.1',
652 '74.0.3694.0',
653 '74.0.3693.6',
654 '73.0.3683.20',
655 '72.0.3626.91',
656 '74.0.3693.5',
657 '74.0.3693.4',
658 '74.0.3693.3',
659 '74.0.3693.2',
660 '73.0.3683.19',
661 '74.0.3693.1',
662 '74.0.3693.0',
663 '73.0.3683.18',
664 '72.0.3626.90',
665 '74.0.3692.1',
666 '74.0.3692.0',
667 '73.0.3683.17',
668 '72.0.3626.89',
669 '74.0.3687.3',
670 '74.0.3691.1',
671 '74.0.3691.0',
672 '73.0.3683.16',
673 '72.0.3626.88',
674 '72.0.3626.87',
675 '73.0.3683.15',
676 '74.0.3690.1',
677 '74.0.3690.0',
678 '73.0.3683.14',
679 '72.0.3626.86',
680 '73.0.3683.13',
681 '73.0.3683.12',
682 '74.0.3689.1',
683 '74.0.3689.0',
684 '73.0.3683.11',
685 '72.0.3626.85',
686 '73.0.3683.10',
687 '72.0.3626.84',
688 '73.0.3683.9',
689 '74.0.3688.1',
690 '74.0.3688.0',
691 '73.0.3683.8',
692 '72.0.3626.83',
693 '74.0.3687.2',
694 '74.0.3687.1',
695 '74.0.3687.0',
696 '73.0.3683.7',
697 '72.0.3626.82',
698 '74.0.3686.4',
699 '72.0.3626.81',
700 '74.0.3686.3',
701 '74.0.3686.2',
702 '74.0.3686.1',
703 '74.0.3686.0',
704 '73.0.3683.6',
705 '72.0.3626.80',
706 '74.0.3685.1',
707 '74.0.3685.0',
708 '73.0.3683.5',
709 '72.0.3626.79',
710 '74.0.3684.1',
711 '74.0.3684.0',
712 '73.0.3683.4',
713 '72.0.3626.78',
714 '72.0.3626.77',
715 '73.0.3683.3',
716 '73.0.3683.2',
717 '72.0.3626.76',
718 '73.0.3683.1',
719 '73.0.3683.0',
720 '72.0.3626.75',
721 '71.0.3578.141',
722 '73.0.3682.1',
723 '73.0.3682.0',
724 '72.0.3626.74',
725 '71.0.3578.140',
726 '73.0.3681.4',
727 '73.0.3681.3',
728 '73.0.3681.2',
729 '73.0.3681.1',
730 '73.0.3681.0',
731 '72.0.3626.73',
732 '71.0.3578.139',
733 '72.0.3626.72',
734 '72.0.3626.71',
735 '73.0.3680.1',
736 '73.0.3680.0',
737 '72.0.3626.70',
738 '71.0.3578.138',
739 '73.0.3678.2',
740 '73.0.3679.1',
741 '73.0.3679.0',
742 '72.0.3626.69',
743 '71.0.3578.137',
744 '73.0.3678.1',
745 '73.0.3678.0',
746 '71.0.3578.136',
747 '73.0.3677.1',
748 '73.0.3677.0',
749 '72.0.3626.68',
750 '72.0.3626.67',
751 '71.0.3578.135',
752 '73.0.3676.1',
753 '73.0.3676.0',
754 '73.0.3674.2',
755 '72.0.3626.66',
756 '71.0.3578.134',
757 '73.0.3674.1',
758 '73.0.3674.0',
759 '72.0.3626.65',
760 '71.0.3578.133',
761 '73.0.3673.2',
762 '73.0.3673.1',
763 '73.0.3673.0',
764 '72.0.3626.64',
765 '71.0.3578.132',
766 '72.0.3626.63',
767 '72.0.3626.62',
768 '72.0.3626.61',
769 '72.0.3626.60',
770 '73.0.3672.1',
771 '73.0.3672.0',
772 '72.0.3626.59',
773 '71.0.3578.131',
774 '73.0.3671.3',
775 '73.0.3671.2',
776 '73.0.3671.1',
777 '73.0.3671.0',
778 '72.0.3626.58',
779 '71.0.3578.130',
780 '73.0.3670.1',
781 '73.0.3670.0',
782 '72.0.3626.57',
783 '71.0.3578.129',
784 '73.0.3669.1',
785 '73.0.3669.0',
786 '72.0.3626.56',
787 '71.0.3578.128',
788 '73.0.3668.2',
789 '73.0.3668.1',
790 '73.0.3668.0',
791 '72.0.3626.55',
792 '71.0.3578.127',
793 '73.0.3667.2',
794 '73.0.3667.1',
795 '73.0.3667.0',
796 '72.0.3626.54',
797 '71.0.3578.126',
798 '73.0.3666.1',
799 '73.0.3666.0',
800 '72.0.3626.53',
801 '71.0.3578.125',
802 '73.0.3665.4',
803 '73.0.3665.3',
804 '72.0.3626.52',
805 '73.0.3665.2',
806 '73.0.3664.4',
807 '73.0.3665.1',
808 '73.0.3665.0',
809 '72.0.3626.51',
810 '71.0.3578.124',
811 '72.0.3626.50',
812 '73.0.3664.3',
813 '73.0.3664.2',
814 '73.0.3664.1',
815 '73.0.3664.0',
816 '73.0.3663.2',
817 '72.0.3626.49',
818 '71.0.3578.123',
819 '73.0.3663.1',
820 '73.0.3663.0',
821 '72.0.3626.48',
822 '71.0.3578.122',
823 '73.0.3662.1',
824 '73.0.3662.0',
825 '72.0.3626.47',
826 '71.0.3578.121',
827 '73.0.3661.1',
828 '72.0.3626.46',
829 '73.0.3661.0',
830 '72.0.3626.45',
831 '71.0.3578.120',
832 '73.0.3660.2',
833 '73.0.3660.1',
834 '73.0.3660.0',
835 '72.0.3626.44',
836 '71.0.3578.119',
837 '73.0.3659.1',
838 '73.0.3659.0',
839 '72.0.3626.43',
840 '71.0.3578.118',
841 '73.0.3658.1',
842 '73.0.3658.0',
843 '72.0.3626.42',
844 '71.0.3578.117',
845 '73.0.3657.1',
846 '73.0.3657.0',
847 '72.0.3626.41',
848 '71.0.3578.116',
849 '73.0.3656.1',
850 '73.0.3656.0',
851 '72.0.3626.40',
852 '71.0.3578.115',
853 '73.0.3655.1',
854 '73.0.3655.0',
855 '72.0.3626.39',
856 '71.0.3578.114',
857 '73.0.3654.1',
858 '73.0.3654.0',
859 '72.0.3626.38',
860 '71.0.3578.113',
861 '73.0.3653.1',
862 '73.0.3653.0',
863 '72.0.3626.37',
864 '71.0.3578.112',
865 '73.0.3652.1',
866 '73.0.3652.0',
867 '72.0.3626.36',
868 '71.0.3578.111',
869 '73.0.3651.1',
870 '73.0.3651.0',
871 '72.0.3626.35',
872 '71.0.3578.110',
873 '73.0.3650.1',
874 '73.0.3650.0',
875 '72.0.3626.34',
876 '71.0.3578.109',
877 '73.0.3649.1',
878 '73.0.3649.0',
879 '72.0.3626.33',
880 '71.0.3578.108',
881 '73.0.3648.2',
882 '73.0.3648.1',
883 '73.0.3648.0',
884 '72.0.3626.32',
885 '71.0.3578.107',
886 '73.0.3647.2',
887 '73.0.3647.1',
888 '73.0.3647.0',
889 '72.0.3626.31',
890 '71.0.3578.106',
891 '73.0.3635.3',
892 '73.0.3646.2',
893 '73.0.3646.1',
894 '73.0.3646.0',
895 '72.0.3626.30',
896 '71.0.3578.105',
897 '72.0.3626.29',
898 '73.0.3645.2',
899 '73.0.3645.1',
900 '73.0.3645.0',
901 '72.0.3626.28',
902 '71.0.3578.104',
903 '72.0.3626.27',
904 '72.0.3626.26',
905 '72.0.3626.25',
906 '72.0.3626.24',
907 '73.0.3644.0',
908 '73.0.3643.2',
909 '72.0.3626.23',
910 '71.0.3578.103',
911 '73.0.3643.1',
912 '73.0.3643.0',
913 '72.0.3626.22',
914 '71.0.3578.102',
915 '73.0.3642.1',
916 '73.0.3642.0',
917 '72.0.3626.21',
918 '71.0.3578.101',
919 '73.0.3641.1',
920 '73.0.3641.0',
921 '72.0.3626.20',
922 '71.0.3578.100',
923 '72.0.3626.19',
924 '73.0.3640.1',
925 '73.0.3640.0',
926 '72.0.3626.18',
927 '73.0.3639.1',
928 '71.0.3578.99',
929 '73.0.3639.0',
930 '72.0.3626.17',
931 '73.0.3638.2',
932 '72.0.3626.16',
933 '73.0.3638.1',
934 '73.0.3638.0',
935 '72.0.3626.15',
936 '71.0.3578.98',
937 '73.0.3635.2',
938 '71.0.3578.97',
939 '73.0.3637.1',
940 '73.0.3637.0',
941 '72.0.3626.14',
942 '71.0.3578.96',
943 '71.0.3578.95',
944 '72.0.3626.13',
945 '71.0.3578.94',
946 '73.0.3636.2',
947 '71.0.3578.93',
948 '73.0.3636.1',
949 '73.0.3636.0',
950 '72.0.3626.12',
951 '71.0.3578.92',
952 '73.0.3635.1',
953 '73.0.3635.0',
954 '72.0.3626.11',
955 '71.0.3578.91',
956 '73.0.3634.2',
957 '73.0.3634.1',
958 '73.0.3634.0',
959 '72.0.3626.10',
960 '71.0.3578.90',
961 '71.0.3578.89',
962 '73.0.3633.2',
963 '73.0.3633.1',
964 '73.0.3633.0',
965 '72.0.3610.4',
966 '72.0.3626.9',
967 '71.0.3578.88',
968 '73.0.3632.5',
969 '73.0.3632.4',
970 '73.0.3632.3',
971 '73.0.3632.2',
972 '73.0.3632.1',
973 '73.0.3632.0',
974 '72.0.3626.8',
975 '71.0.3578.87',
976 '73.0.3631.2',
977 '73.0.3631.1',
978 '73.0.3631.0',
979 '72.0.3626.7',
980 '71.0.3578.86',
981 '72.0.3626.6',
982 '73.0.3630.1',
983 '73.0.3630.0',
984 '72.0.3626.5',
985 '71.0.3578.85',
986 '72.0.3626.4',
987 '73.0.3628.3',
988 '73.0.3628.2',
989 '73.0.3629.1',
990 '73.0.3629.0',
991 '72.0.3626.3',
992 '71.0.3578.84',
993 '73.0.3628.1',
994 '73.0.3628.0',
995 '71.0.3578.83',
996 '73.0.3627.1',
997 '73.0.3627.0',
998 '72.0.3626.2',
999 '71.0.3578.82',
1000 '71.0.3578.81',
1001 '71.0.3578.80',
1002 '72.0.3626.1',
1003 '72.0.3626.0',
1004 '71.0.3578.79',
1005 '70.0.3538.124',
1006 '71.0.3578.78',
1007 '72.0.3623.4',
1008 '72.0.3625.2',
1009 '72.0.3625.1',
1010 '72.0.3625.0',
1011 '71.0.3578.77',
1012 '70.0.3538.123',
1013 '72.0.3624.4',
1014 '72.0.3624.3',
1015 '72.0.3624.2',
1016 '71.0.3578.76',
1017 '72.0.3624.1',
1018 '72.0.3624.0',
1019 '72.0.3623.3',
1020 '71.0.3578.75',
1021 '70.0.3538.122',
1022 '71.0.3578.74',
1023 '72.0.3623.2',
1024 '72.0.3610.3',
1025 '72.0.3623.1',
1026 '72.0.3623.0',
1027 '72.0.3622.3',
1028 '72.0.3622.2',
1029 '71.0.3578.73',
1030 '70.0.3538.121',
1031 '72.0.3622.1',
1032 '72.0.3622.0',
1033 '71.0.3578.72',
1034 '70.0.3538.120',
1035 '72.0.3621.1',
1036 '72.0.3621.0',
1037 '71.0.3578.71',
1038 '70.0.3538.119',
1039 '72.0.3620.1',
1040 '72.0.3620.0',
1041 '71.0.3578.70',
1042 '70.0.3538.118',
1043 '71.0.3578.69',
1044 '72.0.3619.1',
1045 '72.0.3619.0',
1046 '71.0.3578.68',
1047 '70.0.3538.117',
1048 '71.0.3578.67',
1049 '72.0.3618.1',
1050 '72.0.3618.0',
1051 '71.0.3578.66',
1052 '70.0.3538.116',
1053 '72.0.3617.1',
1054 '72.0.3617.0',
1055 '71.0.3578.65',
1056 '70.0.3538.115',
1057 '72.0.3602.3',
1058 '71.0.3578.64',
1059 '72.0.3616.1',
1060 '72.0.3616.0',
1061 '71.0.3578.63',
1062 '70.0.3538.114',
1063 '71.0.3578.62',
1064 '72.0.3615.1',
1065 '72.0.3615.0',
1066 '71.0.3578.61',
1067 '70.0.3538.113',
1068 '72.0.3614.1',
1069 '72.0.3614.0',
1070 '71.0.3578.60',
1071 '70.0.3538.112',
1072 '72.0.3613.1',
1073 '72.0.3613.0',
1074 '71.0.3578.59',
1075 '70.0.3538.111',
1076 '72.0.3612.2',
1077 '72.0.3612.1',
1078 '72.0.3612.0',
1079 '70.0.3538.110',
1080 '71.0.3578.58',
1081 '70.0.3538.109',
1082 '72.0.3611.2',
1083 '72.0.3611.1',
1084 '72.0.3611.0',
1085 '71.0.3578.57',
1086 '70.0.3538.108',
1087 '72.0.3610.2',
1088 '71.0.3578.56',
1089 '71.0.3578.55',
1090 '72.0.3610.1',
1091 '72.0.3610.0',
1092 '71.0.3578.54',
1093 '70.0.3538.107',
1094 '71.0.3578.53',
1095 '72.0.3609.3',
1096 '71.0.3578.52',
1097 '72.0.3609.2',
1098 '71.0.3578.51',
1099 '72.0.3608.5',
1100 '72.0.3609.1',
1101 '72.0.3609.0',
1102 '71.0.3578.50',
1103 '70.0.3538.106',
1104 '72.0.3608.4',
1105 '72.0.3608.3',
1106 '72.0.3608.2',
1107 '71.0.3578.49',
1108 '72.0.3608.1',
1109 '72.0.3608.0',
1110 '70.0.3538.105',
1111 '71.0.3578.48',
1112 '72.0.3607.1',
1113 '72.0.3607.0',
1114 '71.0.3578.47',
1115 '70.0.3538.104',
1116 '72.0.3606.2',
1117 '72.0.3606.1',
1118 '72.0.3606.0',
1119 '71.0.3578.46',
1120 '70.0.3538.103',
1121 '70.0.3538.102',
1122 '72.0.3605.3',
1123 '72.0.3605.2',
1124 '72.0.3605.1',
1125 '72.0.3605.0',
1126 '71.0.3578.45',
1127 '70.0.3538.101',
1128 '71.0.3578.44',
1129 '71.0.3578.43',
1130 '70.0.3538.100',
1131 '70.0.3538.99',
1132 '71.0.3578.42',
1133 '72.0.3604.1',
1134 '72.0.3604.0',
1135 '71.0.3578.41',
1136 '70.0.3538.98',
1137 '71.0.3578.40',
1138 '72.0.3603.2',
1139 '72.0.3603.1',
1140 '72.0.3603.0',
1141 '71.0.3578.39',
1142 '70.0.3538.97',
1143 '72.0.3602.2',
1144 '71.0.3578.38',
1145 '71.0.3578.37',
1146 '72.0.3602.1',
1147 '72.0.3602.0',
1148 '71.0.3578.36',
1149 '70.0.3538.96',
1150 '72.0.3601.1',
1151 '72.0.3601.0',
1152 '71.0.3578.35',
1153 '70.0.3538.95',
1154 '72.0.3600.1',
1155 '72.0.3600.0',
1156 '71.0.3578.34',
1157 '70.0.3538.94',
1158 '72.0.3599.3',
1159 '72.0.3599.2',
1160 '72.0.3599.1',
1161 '72.0.3599.0',
1162 '71.0.3578.33',
1163 '70.0.3538.93',
1164 '72.0.3598.1',
1165 '72.0.3598.0',
1166 '71.0.3578.32',
1167 '70.0.3538.87',
1168 '72.0.3597.1',
1169 '72.0.3597.0',
1170 '72.0.3596.2',
1171 '71.0.3578.31',
1172 '70.0.3538.86',
1173 '71.0.3578.30',
1174 '71.0.3578.29',
1175 '72.0.3596.1',
1176 '72.0.3596.0',
1177 '71.0.3578.28',
1178 '70.0.3538.85',
1179 '72.0.3595.2',
1180 '72.0.3591.3',
1181 '72.0.3595.1',
1182 '72.0.3595.0',
1183 '71.0.3578.27',
1184 '70.0.3538.84',
1185 '72.0.3594.1',
1186 '72.0.3594.0',
1187 '71.0.3578.26',
1188 '70.0.3538.83',
1189 '72.0.3593.2',
1190 '72.0.3593.1',
1191 '72.0.3593.0',
1192 '71.0.3578.25',
1193 '70.0.3538.82',
1194 '72.0.3589.3',
1195 '72.0.3592.2',
1196 '72.0.3592.1',
1197 '72.0.3592.0',
1198 '71.0.3578.24',
1199 '72.0.3589.2',
1200 '70.0.3538.81',
1201 '70.0.3538.80',
1202 '72.0.3591.2',
1203 '72.0.3591.1',
1204 '72.0.3591.0',
1205 '71.0.3578.23',
1206 '70.0.3538.79',
1207 '71.0.3578.22',
1208 '72.0.3590.1',
1209 '72.0.3590.0',
1210 '71.0.3578.21',
1211 '70.0.3538.78',
1212 '70.0.3538.77',
1213 '72.0.3589.1',
1214 '72.0.3589.0',
1215 '71.0.3578.20',
1216 '70.0.3538.76',
1217 '71.0.3578.19',
1218 '70.0.3538.75',
1219 '72.0.3588.1',
1220 '72.0.3588.0',
1221 '71.0.3578.18',
1222 '70.0.3538.74',
1223 '72.0.3586.2',
1224 '72.0.3587.0',
1225 '71.0.3578.17',
1226 '70.0.3538.73',
1227 '72.0.3586.1',
1228 '72.0.3586.0',
1229 '71.0.3578.16',
1230 '70.0.3538.72',
1231 '72.0.3585.1',
1232 '72.0.3585.0',
1233 '71.0.3578.15',
1234 '70.0.3538.71',
1235 '71.0.3578.14',
1236 '72.0.3584.1',
1237 '72.0.3584.0',
1238 '71.0.3578.13',
1239 '70.0.3538.70',
1240 '72.0.3583.2',
1241 '71.0.3578.12',
1242 '72.0.3583.1',
1243 '72.0.3583.0',
1244 '71.0.3578.11',
1245 '70.0.3538.69',
1246 '71.0.3578.10',
1247 '72.0.3582.0',
1248 '72.0.3581.4',
1249 '71.0.3578.9',
1250 '70.0.3538.67',
1251 '72.0.3581.3',
1252 '72.0.3581.2',
1253 '72.0.3581.1',
1254 '72.0.3581.0',
1255 '71.0.3578.8',
1256 '70.0.3538.66',
1257 '72.0.3580.1',
1258 '72.0.3580.0',
1259 '71.0.3578.7',
1260 '70.0.3538.65',
1261 '71.0.3578.6',
1262 '72.0.3579.1',
1263 '72.0.3579.0',
1264 '71.0.3578.5',
1265 '70.0.3538.64',
1266 '71.0.3578.4',
1267 '71.0.3578.3',
1268 '71.0.3578.2',
1269 '71.0.3578.1',
1270 '71.0.3578.0',
1271 '70.0.3538.63',
1272 '69.0.3497.128',
1273 '70.0.3538.62',
1274 '70.0.3538.61',
1275 '70.0.3538.60',
1276 '70.0.3538.59',
1277 '71.0.3577.1',
1278 '71.0.3577.0',
1279 '70.0.3538.58',
1280 '69.0.3497.127',
1281 '71.0.3576.2',
1282 '71.0.3576.1',
1283 '71.0.3576.0',
1284 '70.0.3538.57',
1285 '70.0.3538.56',
1286 '71.0.3575.2',
1287 '70.0.3538.55',
1288 '69.0.3497.126',
1289 '70.0.3538.54',
1290 '71.0.3575.1',
1291 '71.0.3575.0',
1292 '71.0.3574.1',
1293 '71.0.3574.0',
1294 '70.0.3538.53',
1295 '69.0.3497.125',
1296 '70.0.3538.52',
1297 '71.0.3573.1',
1298 '71.0.3573.0',
1299 '70.0.3538.51',
1300 '69.0.3497.124',
1301 '71.0.3572.1',
1302 '71.0.3572.0',
1303 '70.0.3538.50',
1304 '69.0.3497.123',
1305 '71.0.3571.2',
1306 '70.0.3538.49',
1307 '69.0.3497.122',
1308 '71.0.3571.1',
1309 '71.0.3571.0',
1310 '70.0.3538.48',
1311 '69.0.3497.121',
1312 '71.0.3570.1',
1313 '71.0.3570.0',
1314 '70.0.3538.47',
1315 '69.0.3497.120',
1316 '71.0.3568.2',
1317 '71.0.3569.1',
1318 '71.0.3569.0',
1319 '70.0.3538.46',
1320 '69.0.3497.119',
1321 '70.0.3538.45',
1322 '71.0.3568.1',
1323 '71.0.3568.0',
1324 '70.0.3538.44',
1325 '69.0.3497.118',
1326 '70.0.3538.43',
1327 '70.0.3538.42',
1328 '71.0.3567.1',
1329 '71.0.3567.0',
1330 '70.0.3538.41',
1331 '69.0.3497.117',
1332 '71.0.3566.1',
1333 '71.0.3566.0',
1334 '70.0.3538.40',
1335 '69.0.3497.116',
1336 '71.0.3565.1',
1337 '71.0.3565.0',
1338 '70.0.3538.39',
1339 '69.0.3497.115',
1340 '71.0.3564.1',
1341 '71.0.3564.0',
1342 '70.0.3538.38',
1343 '69.0.3497.114',
1344 '71.0.3563.0',
1345 '71.0.3562.2',
1346 '70.0.3538.37',
1347 '69.0.3497.113',
1348 '70.0.3538.36',
1349 '70.0.3538.35',
1350 '71.0.3562.1',
1351 '71.0.3562.0',
1352 '70.0.3538.34',
1353 '69.0.3497.112',
1354 '70.0.3538.33',
1355 '71.0.3561.1',
1356 '71.0.3561.0',
1357 '70.0.3538.32',
1358 '69.0.3497.111',
1359 '71.0.3559.6',
1360 '71.0.3560.1',
1361 '71.0.3560.0',
1362 '71.0.3559.5',
1363 '71.0.3559.4',
1364 '70.0.3538.31',
1365 '69.0.3497.110',
1366 '71.0.3559.3',
1367 '70.0.3538.30',
1368 '69.0.3497.109',
1369 '71.0.3559.2',
1370 '71.0.3559.1',
1371 '71.0.3559.0',
1372 '70.0.3538.29',
1373 '69.0.3497.108',
1374 '71.0.3558.2',
1375 '71.0.3558.1',
1376 '71.0.3558.0',
1377 '70.0.3538.28',
1378 '69.0.3497.107',
1379 '71.0.3557.2',
1380 '71.0.3557.1',
1381 '71.0.3557.0',
1382 '70.0.3538.27',
1383 '69.0.3497.106',
1384 '71.0.3554.4',
1385 '70.0.3538.26',
1386 '71.0.3556.1',
1387 '71.0.3556.0',
1388 '70.0.3538.25',
1389 '71.0.3554.3',
1390 '69.0.3497.105',
1391 '71.0.3554.2',
1392 '70.0.3538.24',
1393 '69.0.3497.104',
1394 '71.0.3555.2',
1395 '70.0.3538.23',
1396 '71.0.3555.1',
1397 '71.0.3555.0',
1398 '70.0.3538.22',
1399 '69.0.3497.103',
1400 '71.0.3554.1',
1401 '71.0.3554.0',
1402 '70.0.3538.21',
1403 '69.0.3497.102',
1404 '71.0.3553.3',
1405 '70.0.3538.20',
1406 '69.0.3497.101',
1407 '71.0.3553.2',
1408 '69.0.3497.100',
1409 '71.0.3553.1',
1410 '71.0.3553.0',
1411 '70.0.3538.19',
1412 '69.0.3497.99',
1413 '69.0.3497.98',
1414 '69.0.3497.97',
1415 '71.0.3552.6',
1416 '71.0.3552.5',
1417 '71.0.3552.4',
1418 '71.0.3552.3',
1419 '71.0.3552.2',
1420 '71.0.3552.1',
1421 '71.0.3552.0',
1422 '70.0.3538.18',
1423 '69.0.3497.96',
1424 '71.0.3551.3',
1425 '71.0.3551.2',
1426 '71.0.3551.1',
1427 '71.0.3551.0',
1428 '70.0.3538.17',
1429 '69.0.3497.95',
1430 '71.0.3550.3',
1431 '71.0.3550.2',
1432 '71.0.3550.1',
1433 '71.0.3550.0',
1434 '70.0.3538.16',
1435 '69.0.3497.94',
1436 '71.0.3549.1',
1437 '71.0.3549.0',
1438 '70.0.3538.15',
1439 '69.0.3497.93',
1440 '69.0.3497.92',
1441 '71.0.3548.1',
1442 '71.0.3548.0',
1443 '70.0.3538.14',
1444 '69.0.3497.91',
1445 '71.0.3547.1',
1446 '71.0.3547.0',
1447 '70.0.3538.13',
1448 '69.0.3497.90',
1449 '71.0.3546.2',
1450 '69.0.3497.89',
1451 '71.0.3546.1',
1452 '71.0.3546.0',
1453 '70.0.3538.12',
1454 '69.0.3497.88',
1455 '71.0.3545.4',
1456 '71.0.3545.3',
1457 '71.0.3545.2',
1458 '71.0.3545.1',
1459 '71.0.3545.0',
1460 '70.0.3538.11',
1461 '69.0.3497.87',
1462 '71.0.3544.5',
1463 '71.0.3544.4',
1464 '71.0.3544.3',
1465 '71.0.3544.2',
1466 '71.0.3544.1',
1467 '71.0.3544.0',
1468 '69.0.3497.86',
1469 '70.0.3538.10',
1470 '69.0.3497.85',
1471 '70.0.3538.9',
1472 '69.0.3497.84',
1473 '71.0.3543.4',
1474 '70.0.3538.8',
1475 '71.0.3543.3',
1476 '71.0.3543.2',
1477 '71.0.3543.1',
1478 '71.0.3543.0',
1479 '70.0.3538.7',
1480 '69.0.3497.83',
1481 '71.0.3542.2',
1482 '71.0.3542.1',
1483 '71.0.3542.0',
1484 '70.0.3538.6',
1485 '69.0.3497.82',
1486 '69.0.3497.81',
1487 '71.0.3541.1',
1488 '71.0.3541.0',
1489 '70.0.3538.5',
1490 '69.0.3497.80',
1491 '71.0.3540.1',
1492 '71.0.3540.0',
1493 '70.0.3538.4',
1494 '69.0.3497.79',
1495 '70.0.3538.3',
1496 '71.0.3539.1',
1497 '71.0.3539.0',
1498 '69.0.3497.78',
1499 '68.0.3440.134',
1500 '69.0.3497.77',
1501 '70.0.3538.2',
1502 '70.0.3538.1',
1503 '70.0.3538.0',
1504 '69.0.3497.76',
1505 '68.0.3440.133',
1506 '69.0.3497.75',
1507 '70.0.3537.2',
1508 '70.0.3537.1',
1509 '70.0.3537.0',
1510 '69.0.3497.74',
1511 '68.0.3440.132',
1512 '70.0.3536.0',
1513 '70.0.3535.5',
1514 '70.0.3535.4',
1515 '70.0.3535.3',
1516 '69.0.3497.73',
1517 '68.0.3440.131',
1518 '70.0.3532.8',
1519 '70.0.3532.7',
1520 '69.0.3497.72',
1521 '69.0.3497.71',
1522 '70.0.3535.2',
1523 '70.0.3535.1',
1524 '70.0.3535.0',
1525 '69.0.3497.70',
1526 '68.0.3440.130',
1527 '69.0.3497.69',
1528 '68.0.3440.129',
1529 '70.0.3534.4',
1530 '70.0.3534.3',
1531 '70.0.3534.2',
1532 '70.0.3534.1',
1533 '70.0.3534.0',
1534 '69.0.3497.68',
1535 '68.0.3440.128',
1536 '70.0.3533.2',
1537 '70.0.3533.1',
1538 '70.0.3533.0',
1539 '69.0.3497.67',
1540 '68.0.3440.127',
1541 '70.0.3532.6',
1542 '70.0.3532.5',
1543 '70.0.3532.4',
1544 '69.0.3497.66',
1545 '68.0.3440.126',
1546 '70.0.3532.3',
1547 '70.0.3532.2',
1548 '70.0.3532.1',
1549 '69.0.3497.60',
1550 '69.0.3497.65',
1551 '69.0.3497.64',
1552 '70.0.3532.0',
1553 '70.0.3531.0',
1554 '70.0.3530.4',
1555 '70.0.3530.3',
1556 '70.0.3530.2',
1557 '69.0.3497.58',
1558 '68.0.3440.125',
1559 '69.0.3497.57',
1560 '69.0.3497.56',
1561 '69.0.3497.55',
1562 '69.0.3497.54',
1563 '70.0.3530.1',
1564 '70.0.3530.0',
1565 '69.0.3497.53',
1566 '68.0.3440.124',
1567 '69.0.3497.52',
1568 '70.0.3529.3',
1569 '70.0.3529.2',
1570 '70.0.3529.1',
1571 '70.0.3529.0',
1572 '69.0.3497.51',
1573 '70.0.3528.4',
1574 '68.0.3440.123',
1575 '70.0.3528.3',
1576 '70.0.3528.2',
1577 '70.0.3528.1',
1578 '70.0.3528.0',
1579 '69.0.3497.50',
1580 '68.0.3440.122',
1581 '70.0.3527.1',
1582 '70.0.3527.0',
1583 '69.0.3497.49',
1584 '68.0.3440.121',
1585 '70.0.3526.1',
1586 '70.0.3526.0',
1587 '68.0.3440.120',
1588 '69.0.3497.48',
1589 '69.0.3497.47',
1590 '68.0.3440.119',
1591 '68.0.3440.118',
1592 '70.0.3525.5',
1593 '70.0.3525.4',
1594 '70.0.3525.3',
1595 '68.0.3440.117',
1596 '69.0.3497.46',
1597 '70.0.3525.2',
1598 '70.0.3525.1',
1599 '70.0.3525.0',
1600 '69.0.3497.45',
1601 '68.0.3440.116',
1602 '70.0.3524.4',
1603 '70.0.3524.3',
1604 '69.0.3497.44',
1605 '70.0.3524.2',
1606 '70.0.3524.1',
1607 '70.0.3524.0',
1608 '70.0.3523.2',
1609 '69.0.3497.43',
1610 '68.0.3440.115',
1611 '70.0.3505.9',
1612 '69.0.3497.42',
1613 '70.0.3505.8',
1614 '70.0.3523.1',
1615 '70.0.3523.0',
1616 '69.0.3497.41',
1617 '68.0.3440.114',
1618 '70.0.3505.7',
1619 '69.0.3497.40',
1620 '70.0.3522.1',
1621 '70.0.3522.0',
1622 '70.0.3521.2',
1623 '69.0.3497.39',
1624 '68.0.3440.113',
1625 '70.0.3505.6',
1626 '70.0.3521.1',
1627 '70.0.3521.0',
1628 '69.0.3497.38',
1629 '68.0.3440.112',
1630 '70.0.3520.1',
1631 '70.0.3520.0',
1632 '69.0.3497.37',
1633 '68.0.3440.111',
1634 '70.0.3519.3',
1635 '70.0.3519.2',
1636 '70.0.3519.1',
1637 '70.0.3519.0',
1638 '69.0.3497.36',
1639 '68.0.3440.110',
1640 '70.0.3518.1',
1641 '70.0.3518.0',
1642 '69.0.3497.35',
1643 '69.0.3497.34',
1644 '68.0.3440.109',
1645 '70.0.3517.1',
1646 '70.0.3517.0',
1647 '69.0.3497.33',
1648 '68.0.3440.108',
1649 '69.0.3497.32',
1650 '70.0.3516.3',
1651 '70.0.3516.2',
1652 '70.0.3516.1',
1653 '70.0.3516.0',
1654 '69.0.3497.31',
1655 '68.0.3440.107',
1656 '70.0.3515.4',
1657 '68.0.3440.106',
1658 '70.0.3515.3',
1659 '70.0.3515.2',
1660 '70.0.3515.1',
1661 '70.0.3515.0',
1662 '69.0.3497.30',
1663 '68.0.3440.105',
1664 '68.0.3440.104',
1665 '70.0.3514.2',
1666 '70.0.3514.1',
1667 '70.0.3514.0',
1668 '69.0.3497.29',
1669 '68.0.3440.103',
1670 '70.0.3513.1',
1671 '70.0.3513.0',
1672 '69.0.3497.28',
1673 )
1674 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1675
1676
3e669f36 1677std_headers = {
f7a147e3 1678 'User-Agent': random_user_agent(),
59ae15a5
PH
1679 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1680 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1681 'Accept-Encoding': 'gzip, deflate',
1682 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 1683}
f427df17 1684
5f6a1245 1685
fb37eb25
S
1686USER_AGENTS = {
1687 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1688}
1689
1690
bf42a990
S
1691NO_DEFAULT = object()
1692
7105440c
YCH
1693ENGLISH_MONTH_NAMES = [
1694 'January', 'February', 'March', 'April', 'May', 'June',
1695 'July', 'August', 'September', 'October', 'November', 'December']
1696
f6717dec
S
1697MONTH_NAMES = {
1698 'en': ENGLISH_MONTH_NAMES,
1699 'fr': [
3e4185c3
S
1700 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1701 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
f6717dec 1702}
a942d6cb 1703
a7aaa398
S
1704KNOWN_EXTENSIONS = (
1705 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1706 'flv', 'f4v', 'f4a', 'f4b',
1707 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1708 'mkv', 'mka', 'mk3d',
1709 'avi', 'divx',
1710 'mov',
1711 'asf', 'wmv', 'wma',
1712 '3gp', '3g2',
1713 'mp3',
1714 'flac',
1715 'ape',
1716 'wav',
1717 'f4f', 'f4m', 'm3u8', 'smil')
1718
c587cbb7 1719# needed for sanitizing filenames in restricted mode
c8827027 1720ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
fd35d8cd
JW
1721 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1722 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
c587cbb7 1723
46f59e89
S
1724DATE_FORMATS = (
1725 '%d %B %Y',
1726 '%d %b %Y',
1727 '%B %d %Y',
cb655f34
S
1728 '%B %dst %Y',
1729 '%B %dnd %Y',
9d30c213 1730 '%B %drd %Y',
cb655f34 1731 '%B %dth %Y',
46f59e89 1732 '%b %d %Y',
cb655f34
S
1733 '%b %dst %Y',
1734 '%b %dnd %Y',
9d30c213 1735 '%b %drd %Y',
cb655f34 1736 '%b %dth %Y',
46f59e89
S
1737 '%b %dst %Y %I:%M',
1738 '%b %dnd %Y %I:%M',
9d30c213 1739 '%b %drd %Y %I:%M',
46f59e89
S
1740 '%b %dth %Y %I:%M',
1741 '%Y %m %d',
1742 '%Y-%m-%d',
1743 '%Y/%m/%d',
81c13222 1744 '%Y/%m/%d %H:%M',
46f59e89 1745 '%Y/%m/%d %H:%M:%S',
0c1c6f4b 1746 '%Y-%m-%d %H:%M',
46f59e89
S
1747 '%Y-%m-%d %H:%M:%S',
1748 '%Y-%m-%d %H:%M:%S.%f',
5014558a 1749 '%Y-%m-%d %H:%M:%S:%f',
46f59e89
S
1750 '%d.%m.%Y %H:%M',
1751 '%d.%m.%Y %H.%M',
1752 '%Y-%m-%dT%H:%M:%SZ',
1753 '%Y-%m-%dT%H:%M:%S.%fZ',
1754 '%Y-%m-%dT%H:%M:%S.%f0Z',
1755 '%Y-%m-%dT%H:%M:%S',
1756 '%Y-%m-%dT%H:%M:%S.%f',
1757 '%Y-%m-%dT%H:%M',
c6eed6b8
S
1758 '%b %d %Y at %H:%M',
1759 '%b %d %Y at %H:%M:%S',
b555ae9b
S
1760 '%B %d %Y at %H:%M',
1761 '%B %d %Y at %H:%M:%S',
46f59e89
S
1762)
1763
1764DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1765DATE_FORMATS_DAY_FIRST.extend([
1766 '%d-%m-%Y',
1767 '%d.%m.%Y',
1768 '%d.%m.%y',
1769 '%d/%m/%Y',
1770 '%d/%m/%y',
1771 '%d/%m/%Y %H:%M:%S',
1772])
1773
1774DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1775DATE_FORMATS_MONTH_FIRST.extend([
1776 '%m-%d-%Y',
1777 '%m.%d.%Y',
1778 '%m/%d/%Y',
1779 '%m/%d/%y',
1780 '%m/%d/%Y %H:%M:%S',
1781])
1782
06b3fe29 1783PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
22f5f5c6 1784JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
06b3fe29 1785
7105440c 1786
d77c3dfd 1787def preferredencoding():
59ae15a5 1788 """Get preferred encoding.
d77c3dfd 1789
59ae15a5
PH
1790 Returns the best encoding scheme for the system, based on
1791 locale.getpreferredencoding() and some further tweaks.
1792 """
1793 try:
1794 pref = locale.getpreferredencoding()
28e614de 1795 'TEST'.encode(pref)
70a1165b 1796 except Exception:
59ae15a5 1797 pref = 'UTF-8'
bae611f2 1798
59ae15a5 1799 return pref
d77c3dfd 1800
f4bfd65f 1801
181c8655 1802def write_json_file(obj, fn):
1394646a 1803 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 1804
92120217 1805 fn = encodeFilename(fn)
61ee5aeb 1806 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
1807 encoding = get_filesystem_encoding()
1808 # os.path.basename returns a bytes object, but NamedTemporaryFile
1809 # will fail if the filename contains non ascii characters unless we
1810 # use a unicode object
1811 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1812 # the same for os.path.dirname
1813 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1814 else:
1815 path_basename = os.path.basename
1816 path_dirname = os.path.dirname
1817
73159f99
S
1818 args = {
1819 'suffix': '.tmp',
ec5f6016
JMF
1820 'prefix': path_basename(fn) + '.',
1821 'dir': path_dirname(fn),
73159f99
S
1822 'delete': False,
1823 }
1824
181c8655
PH
1825 # In Python 2.x, json.dump expects a bytestream.
1826 # In Python 3.x, it writes to a character stream
1827 if sys.version_info < (3, 0):
73159f99 1828 args['mode'] = 'wb'
181c8655 1829 else:
73159f99
S
1830 args.update({
1831 'mode': 'w',
1832 'encoding': 'utf-8',
1833 })
1834
c86b6142 1835 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
1836
1837 try:
1838 with tf:
6e84b215 1839 json.dump(obj, tf)
1394646a
IK
1840 if sys.platform == 'win32':
1841 # Need to remove existing file on Windows, else os.rename raises
1842 # WindowsError or FileExistsError.
1843 try:
1844 os.unlink(fn)
1845 except OSError:
1846 pass
9cd5f54e
R
1847 try:
1848 mask = os.umask(0)
1849 os.umask(mask)
1850 os.chmod(tf.name, 0o666 & ~mask)
1851 except OSError:
1852 pass
181c8655 1853 os.rename(tf.name, fn)
70a1165b 1854 except Exception:
181c8655
PH
1855 try:
1856 os.remove(tf.name)
1857 except OSError:
1858 pass
1859 raise
1860
1861
1862if sys.version_info >= (2, 7):
ee114368 1863 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 1864 """ Find the xpath xpath[@key=val] """
5d2354f1 1865 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368 1866 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
1867 return node.find(expr)
1868else:
ee114368 1869 def find_xpath_attr(node, xpath, key, val=None):
810c10ba 1870 for f in node.findall(compat_xpath(xpath)):
ee114368
S
1871 if key not in f.attrib:
1872 continue
1873 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
1874 return f
1875 return None
1876
d7e66d39
JMF
1877# On python2.6 the xml.etree.ElementTree.Element methods don't support
1878# the namespace parameter
5f6a1245
JW
1879
1880
d7e66d39
JMF
1881def xpath_with_ns(path, ns_map):
1882 components = [c.split(':') for c in path.split('/')]
1883 replaced = []
1884 for c in components:
1885 if len(c) == 1:
1886 replaced.append(c[0])
1887 else:
1888 ns, tag = c
1889 replaced.append('{%s}%s' % (ns_map[ns], tag))
1890 return '/'.join(replaced)
1891
d77c3dfd 1892
a41fb80c 1893def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745 1894 def _find_xpath(xpath):
810c10ba 1895 return node.find(compat_xpath(xpath))
578c0745
S
1896
1897 if isinstance(xpath, (str, compat_str)):
1898 n = _find_xpath(xpath)
1899 else:
1900 for xp in xpath:
1901 n = _find_xpath(xp)
1902 if n is not None:
1903 break
d74bebd5 1904
8e636da4 1905 if n is None:
bf42a990
S
1906 if default is not NO_DEFAULT:
1907 return default
1908 elif fatal:
bf0ff932
PH
1909 name = xpath if name is None else name
1910 raise ExtractorError('Could not find XML element %s' % name)
1911 else:
1912 return None
a41fb80c
S
1913 return n
1914
1915
1916def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
1917 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1918 if n is None or n == default:
1919 return n
1920 if n.text is None:
1921 if default is not NO_DEFAULT:
1922 return default
1923 elif fatal:
1924 name = xpath if name is None else name
1925 raise ExtractorError('Could not find XML element\'s text %s' % name)
1926 else:
1927 return None
1928 return n.text
a41fb80c
S
1929
1930
1931def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1932 n = find_xpath_attr(node, xpath, key)
1933 if n is None:
1934 if default is not NO_DEFAULT:
1935 return default
1936 elif fatal:
1937 name = '%s[@%s]' % (xpath, key) if name is None else name
1938 raise ExtractorError('Could not find XML attribute %s' % name)
1939 else:
1940 return None
1941 return n.attrib[key]
bf0ff932
PH
1942
1943
9e6dd238 1944def get_element_by_id(id, html):
43e8fafd 1945 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 1946 return get_element_by_attribute('id', id, html)
43e8fafd 1947
12ea2f30 1948
84c237fb 1949def get_element_by_class(class_name, html):
2af12ad9
TC
1950 """Return the content of the first tag with the specified class in the passed HTML document"""
1951 retval = get_elements_by_class(class_name, html)
1952 return retval[0] if retval else None
1953
1954
1955def get_element_by_attribute(attribute, value, html, escape_value=True):
1956 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1957 return retval[0] if retval else None
1958
1959
1960def get_elements_by_class(class_name, html):
1961 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1962 return get_elements_by_attribute(
84c237fb
YCH
1963 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1964 html, escape_value=False)
1965
1966
2af12ad9 1967def get_elements_by_attribute(attribute, value, html, escape_value=True):
43e8fafd 1968 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 1969
84c237fb
YCH
1970 value = re.escape(value) if escape_value else value
1971
2af12ad9
TC
1972 retlist = []
1973 for m in re.finditer(r'''(?xs)
38285056 1974 <([a-zA-Z0-9:._-]+)
609ff8ca 1975 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056 1976 \s+%s=['"]?%s['"]?
609ff8ca 1977 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056
PH
1978 \s*>
1979 (?P<content>.*?)
1980 </\1>
2af12ad9
TC
1981 ''' % (re.escape(attribute), value), html):
1982 res = m.group('content')
38285056 1983
2af12ad9
TC
1984 if res.startswith('"') or res.startswith("'"):
1985 res = res[1:-1]
38285056 1986
2af12ad9 1987 retlist.append(unescapeHTML(res))
a921f407 1988
2af12ad9 1989 return retlist
a921f407 1990
c5229f39 1991
8bb56eee
BF
1992class HTMLAttributeParser(compat_HTMLParser):
1993 """Trivial HTML parser to gather the attributes for a single element"""
b6e0c7d2 1994
8bb56eee 1995 def __init__(self):
c5229f39 1996 self.attrs = {}
8bb56eee
BF
1997 compat_HTMLParser.__init__(self)
1998
1999 def handle_starttag(self, tag, attrs):
2000 self.attrs = dict(attrs)
2001
c5229f39 2002
8bb56eee
BF
2003def extract_attributes(html_element):
2004 """Given a string for an HTML element such as
2005 <el
2006 a="foo" B="bar" c="&98;az" d=boz
2007 empty= noval entity="&amp;"
2008 sq='"' dq="'"
2009 >
2010 Decode and return a dictionary of attributes.
2011 {
2012 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2013 'empty': '', 'noval': None, 'entity': '&',
2014 'sq': '"', 'dq': '\''
2015 }.
2016 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2017 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2018 """
2019 parser = HTMLAttributeParser()
b4a3d461
S
2020 try:
2021 parser.feed(html_element)
2022 parser.close()
2023 # Older Python may throw HTMLParseError in case of malformed HTML
2024 except compat_HTMLParseError:
2025 pass
8bb56eee 2026 return parser.attrs
9e6dd238 2027
c5229f39 2028
9e6dd238 2029def clean_html(html):
59ae15a5 2030 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
2031
2032 if html is None: # Convenience for sanitizing descriptions etc.
2033 return html
2034
59ae15a5
PH
2035 # Newline vs <br />
2036 html = html.replace('\n', ' ')
edd9221c
TF
2037 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2038 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
2039 # Strip html tags
2040 html = re.sub('<.*?>', '', html)
2041 # Replace html entities
2042 html = unescapeHTML(html)
7decf895 2043 return html.strip()
9e6dd238
FV
2044
2045
d77c3dfd 2046def sanitize_open(filename, open_mode):
59ae15a5
PH
2047 """Try to open the given filename, and slightly tweak it if this fails.
2048
2049 Attempts to open the given filename. If this fails, it tries to change
2050 the filename slightly, step by step, until it's either able to open it
2051 or it fails and raises a final exception, like the standard open()
2052 function.
2053
2054 It returns the tuple (stream, definitive_file_name).
2055 """
2056 try:
28e614de 2057 if filename == '-':
59ae15a5
PH
2058 if sys.platform == 'win32':
2059 import msvcrt
2060 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 2061 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
2062 stream = open(encodeFilename(filename), open_mode)
2063 return (stream, filename)
2064 except (IOError, OSError) as err:
f45c185f
PH
2065 if err.errno in (errno.EACCES,):
2066 raise
59ae15a5 2067
f45c185f 2068 # In case of error, try to remove win32 forbidden chars
d55de57b 2069 alt_filename = sanitize_path(filename)
f45c185f
PH
2070 if alt_filename == filename:
2071 raise
2072 else:
2073 # An exception here should be caught in the caller
d55de57b 2074 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 2075 return (stream, alt_filename)
d77c3dfd
FV
2076
2077
2078def timeconvert(timestr):
59ae15a5
PH
2079 """Convert RFC 2822 defined time string into system timestamp"""
2080 timestamp = None
2081 timetuple = email.utils.parsedate_tz(timestr)
2082 if timetuple is not None:
2083 timestamp = email.utils.mktime_tz(timetuple)
2084 return timestamp
1c469a94 2085
5f6a1245 2086
796173d0 2087def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
2088 """Sanitizes a string so it could be used as part of a filename.
2089 If restricted is set, use a stricter subset of allowed characters.
158af524
S
2090 Set is_id if this is not an arbitrary string, but an ID that should be kept
2091 if possible.
59ae15a5
PH
2092 """
2093 def replace_insane(char):
c587cbb7
AT
2094 if restricted and char in ACCENT_CHARS:
2095 return ACCENT_CHARS[char]
59ae15a5
PH
2096 if char == '?' or ord(char) < 32 or ord(char) == 127:
2097 return ''
2098 elif char == '"':
2099 return '' if restricted else '\''
2100 elif char == ':':
2101 return '_-' if restricted else ' -'
2102 elif char in '\\/|*<>':
2103 return '_'
627dcfff 2104 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
2105 return '_'
2106 if restricted and ord(char) > 127:
2107 return '_'
2108 return char
2109
639f1cea 2110 if s == '':
2111 return ''
2aeb06d6
PH
2112 # Handle timestamps
2113 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 2114 result = ''.join(map(replace_insane, s))
796173d0
PH
2115 if not is_id:
2116 while '__' in result:
2117 result = result.replace('__', '_')
2118 result = result.strip('_')
2119 # Common case of "Foreign band name - English song title"
2120 if restricted and result.startswith('-_'):
2121 result = result[2:]
5a42414b
PH
2122 if result.startswith('-'):
2123 result = '_' + result[len('-'):]
a7440261 2124 result = result.lstrip('.')
796173d0
PH
2125 if not result:
2126 result = '_'
59ae15a5 2127 return result
d77c3dfd 2128
5f6a1245 2129
c2934512 2130def sanitize_path(s, force=False):
a2aaf4db 2131 """Sanitizes and normalizes path on Windows"""
c2934512 2132 if sys.platform == 'win32':
c4218ac3 2133 force = False
c2934512 2134 drive_or_unc, _ = os.path.splitdrive(s)
2135 if sys.version_info < (2, 7) and not drive_or_unc:
2136 drive_or_unc, _ = os.path.splitunc(s)
2137 elif force:
2138 drive_or_unc = ''
2139 else:
a2aaf4db 2140 return s
c2934512 2141
be531ef1
S
2142 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2143 if drive_or_unc:
a2aaf4db
S
2144 norm_path.pop(0)
2145 sanitized_path = [
ec85ded8 2146 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
a2aaf4db 2147 for path_part in norm_path]
be531ef1
S
2148 if drive_or_unc:
2149 sanitized_path.insert(0, drive_or_unc + os.path.sep)
c4218ac3 2150 elif force and s[0] == os.path.sep:
2151 sanitized_path.insert(0, os.path.sep)
a2aaf4db
S
2152 return os.path.join(*sanitized_path)
2153
2154
17bcc626 2155def sanitize_url(url):
befa4708
S
2156 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2157 # the number of unwanted failures due to missing protocol
2158 if url.startswith('//'):
2159 return 'http:%s' % url
2160 # Fix some common typos seen so far
2161 COMMON_TYPOS = (
067aa17e 2162 # https://github.com/ytdl-org/youtube-dl/issues/15649
befa4708
S
2163 (r'^httpss://', r'https://'),
2164 # https://bx1.be/lives/direct-tv/
2165 (r'^rmtp([es]?)://', r'rtmp\1://'),
2166 )
2167 for mistake, fixup in COMMON_TYPOS:
2168 if re.match(mistake, url):
2169 return re.sub(mistake, fixup, url)
bc6b9bcd 2170 return url
17bcc626
S
2171
2172
5435dcf9
HH
2173def extract_basic_auth(url):
2174 parts = compat_urlparse.urlsplit(url)
2175 if parts.username is None:
2176 return url, None
2177 url = compat_urlparse.urlunsplit(parts._replace(netloc=(
2178 parts.hostname if parts.port is None
2179 else '%s:%d' % (parts.hostname, parts.port))))
2180 auth_payload = base64.b64encode(
2181 ('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
2182 return url, 'Basic ' + auth_payload.decode('utf-8')
2183
2184
67dda517 2185def sanitized_Request(url, *args, **kwargs):
bc6b9bcd 2186 url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
5435dcf9
HH
2187 if auth_header is not None:
2188 headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
2189 headers['Authorization'] = auth_header
2190 return compat_urllib_request.Request(url, *args, **kwargs)
67dda517
S
2191
2192
51098426
S
2193def expand_path(s):
2194 """Expand shell variables and ~"""
2195 return os.path.expandvars(compat_expanduser(s))
2196
2197
d77c3dfd 2198def orderedSet(iterable):
59ae15a5
PH
2199 """ Remove all duplicates from the input iterable """
2200 res = []
2201 for el in iterable:
2202 if el not in res:
2203 res.append(el)
2204 return res
d77c3dfd 2205
912b38b4 2206
55b2f099 2207def _htmlentity_transform(entity_with_semicolon):
4e408e47 2208 """Transforms an HTML entity to a character."""
55b2f099
YCH
2209 entity = entity_with_semicolon[:-1]
2210
4e408e47
PH
2211 # Known non-numeric HTML entity
2212 if entity in compat_html_entities.name2codepoint:
2213 return compat_chr(compat_html_entities.name2codepoint[entity])
2214
55b2f099
YCH
2215 # TODO: HTML5 allows entities without a semicolon. For example,
2216 # '&Eacuteric' should be decoded as 'Éric'.
2217 if entity_with_semicolon in compat_html_entities_html5:
2218 return compat_html_entities_html5[entity_with_semicolon]
2219
91757b0f 2220 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
2221 if mobj is not None:
2222 numstr = mobj.group(1)
28e614de 2223 if numstr.startswith('x'):
4e408e47 2224 base = 16
28e614de 2225 numstr = '0%s' % numstr
4e408e47
PH
2226 else:
2227 base = 10
067aa17e 2228 # See https://github.com/ytdl-org/youtube-dl/issues/7518
7aefc49c
S
2229 try:
2230 return compat_chr(int(numstr, base))
2231 except ValueError:
2232 pass
4e408e47
PH
2233
2234 # Unknown entity in name, return its literal representation
7a3f0c00 2235 return '&%s;' % entity
4e408e47
PH
2236
2237
d77c3dfd 2238def unescapeHTML(s):
912b38b4
PH
2239 if s is None:
2240 return None
2241 assert type(s) == compat_str
d77c3dfd 2242
4e408e47 2243 return re.sub(
95f3f7c2 2244 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 2245
8bf48f23 2246
cdb19aa4 2247def escapeHTML(text):
2248 return (
2249 text
2250 .replace('&', '&amp;')
2251 .replace('<', '&lt;')
2252 .replace('>', '&gt;')
2253 .replace('"', '&quot;')
2254 .replace("'", '&#39;')
2255 )
2256
2257
f5b1bca9 2258def process_communicate_or_kill(p, *args, **kwargs):
2259 try:
2260 return p.communicate(*args, **kwargs)
2261 except BaseException: # Including KeyboardInterrupt
2262 p.kill()
2263 p.wait()
2264 raise
2265
2266
aa49acd1
S
2267def get_subprocess_encoding():
2268 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2269 # For subprocess calls, encode with locale encoding
2270 # Refer to http://stackoverflow.com/a/9951851/35070
2271 encoding = preferredencoding()
2272 else:
2273 encoding = sys.getfilesystemencoding()
2274 if encoding is None:
2275 encoding = 'utf-8'
2276 return encoding
2277
2278
8bf48f23 2279def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
2280 """
2281 @param s The name of the file
2282 """
d77c3dfd 2283
8bf48f23 2284 assert type(s) == compat_str
d77c3dfd 2285
59ae15a5
PH
2286 # Python 3 has a Unicode API
2287 if sys.version_info >= (3, 0):
2288 return s
0f00efed 2289
aa49acd1
S
2290 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2291 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2292 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2293 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2294 return s
2295
8ee239e9
YCH
2296 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2297 if sys.platform.startswith('java'):
2298 return s
2299
aa49acd1
S
2300 return s.encode(get_subprocess_encoding(), 'ignore')
2301
2302
2303def decodeFilename(b, for_subprocess=False):
2304
2305 if sys.version_info >= (3, 0):
2306 return b
2307
2308 if not isinstance(b, bytes):
2309 return b
2310
2311 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 2312
f07b74fc
PH
2313
2314def encodeArgument(s):
2315 if not isinstance(s, compat_str):
2316 # Legacy code that uses byte strings
2317 # Uncomment the following line after fixing all post processors
7af808a5 2318 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
2319 s = s.decode('ascii')
2320 return encodeFilename(s, True)
2321
2322
aa49acd1
S
2323def decodeArgument(b):
2324 return decodeFilename(b, True)
2325
2326
8271226a
PH
2327def decodeOption(optval):
2328 if optval is None:
2329 return optval
2330 if isinstance(optval, bytes):
2331 optval = optval.decode(preferredencoding())
2332
2333 assert isinstance(optval, compat_str)
2334 return optval
1c256f70 2335
5f6a1245 2336
cdb19aa4 2337def formatSeconds(secs, delim=':', msec=False):
4539dd30 2338 if secs > 3600:
cdb19aa4 2339 ret = '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
4539dd30 2340 elif secs > 60:
cdb19aa4 2341 ret = '%d%s%02d' % (secs // 60, delim, secs % 60)
4539dd30 2342 else:
cdb19aa4 2343 ret = '%d' % secs
2344 return '%s.%03d' % (ret, secs % 1) if msec else ret
4539dd30 2345
a0ddb8a2 2346
be4a824d
PH
2347def make_HTTPS_handler(params, **kwargs):
2348 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 2349 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 2350 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 2351 if opts_no_check_certificate:
be5f2c19 2352 context.check_hostname = False
0db261ba 2353 context.verify_mode = ssl.CERT_NONE
a2366922 2354 try:
be4a824d 2355 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
2356 except TypeError:
2357 # Python 2.7.8
2358 # (create_default_context present but HTTPSHandler has no context=)
2359 pass
2360
2361 if sys.version_info < (3, 2):
d7932313 2362 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 2363 else: # Python < 3.4
d7932313 2364 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 2365 context.verify_mode = (ssl.CERT_NONE
dca08720 2366 if opts_no_check_certificate
ea6d901e 2367 else ssl.CERT_REQUIRED)
303b479e 2368 context.set_default_verify_paths()
be4a824d 2369 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 2370
732ea2f0 2371
5873d4cc 2372def bug_reports_message(before=';'):
08f2a92c 2373 if ytdl_is_updateable():
7a5c1cfe 2374 update_cmd = 'type yt-dlp -U to update'
08f2a92c 2375 else:
7a5c1cfe 2376 update_cmd = 'see https://github.com/yt-dlp/yt-dlp on how to update'
5873d4cc 2377 msg = 'please report this issue on https://github.com/yt-dlp/yt-dlp .'
08f2a92c 2378 msg += ' Make sure you are using the latest version; %s.' % update_cmd
7a5c1cfe 2379 msg += ' Be sure to call yt-dlp with the --verbose flag and include its complete output.'
5873d4cc
F
2380
2381 before = before.rstrip()
2382 if not before or before.endswith(('.', '!', '?')):
2383 msg = msg[0].title() + msg[1:]
2384
2385 return (before + ' ' if before else '') + msg
08f2a92c
JMF
2386
2387
bf5b9d85
PM
2388class YoutubeDLError(Exception):
2389 """Base exception for YoutubeDL errors."""
2390 pass
2391
2392
3158150c 2393network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
2394if hasattr(ssl, 'CertificateError'):
2395 network_exceptions.append(ssl.CertificateError)
2396network_exceptions = tuple(network_exceptions)
2397
2398
bf5b9d85 2399class ExtractorError(YoutubeDLError):
1c256f70 2400 """Error during info extraction."""
5f6a1245 2401
d11271dd 2402 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238 2403 """ tb, if given, is the original traceback (so that it can be printed out).
7a5c1cfe 2404 If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
9a82b238
PH
2405 """
2406
3158150c 2407 if sys.exc_info()[0] in network_exceptions:
9a82b238 2408 expected = True
d11271dd
PH
2409 if video_id is not None:
2410 msg = video_id + ': ' + msg
410f3e73 2411 if cause:
28e614de 2412 msg += ' (caused by %r)' % cause
9a82b238 2413 if not expected:
08f2a92c 2414 msg += bug_reports_message()
1c256f70 2415 super(ExtractorError, self).__init__(msg)
d5979c5d 2416
1c256f70 2417 self.traceback = tb
8cc83b8d 2418 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 2419 self.cause = cause
d11271dd 2420 self.video_id = video_id
1c256f70 2421
01951dda
PH
2422 def format_traceback(self):
2423 if self.traceback is None:
2424 return None
28e614de 2425 return ''.join(traceback.format_tb(self.traceback))
01951dda 2426
1c256f70 2427
416c7fcb
PH
2428class UnsupportedError(ExtractorError):
2429 def __init__(self, url):
2430 super(UnsupportedError, self).__init__(
2431 'Unsupported URL: %s' % url, expected=True)
2432 self.url = url
2433
2434
55b3e45b
JMF
2435class RegexNotFoundError(ExtractorError):
2436 """Error when a regex didn't match"""
2437 pass
2438
2439
773f291d
S
2440class GeoRestrictedError(ExtractorError):
2441 """Geographic restriction Error exception.
2442
2443 This exception may be thrown when a video is not available from your
2444 geographic location due to geographic restrictions imposed by a website.
2445 """
b6e0c7d2 2446
773f291d
S
2447 def __init__(self, msg, countries=None):
2448 super(GeoRestrictedError, self).__init__(msg, expected=True)
2449 self.msg = msg
2450 self.countries = countries
2451
2452
bf5b9d85 2453class DownloadError(YoutubeDLError):
59ae15a5 2454 """Download Error exception.
d77c3dfd 2455
59ae15a5
PH
2456 This exception may be thrown by FileDownloader objects if they are not
2457 configured to continue on errors. They will contain the appropriate
2458 error message.
2459 """
5f6a1245 2460
8cc83b8d
FV
2461 def __init__(self, msg, exc_info=None):
2462 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2463 super(DownloadError, self).__init__(msg)
2464 self.exc_info = exc_info
d77c3dfd
FV
2465
2466
498f5606 2467class EntryNotInPlaylist(YoutubeDLError):
2468 """Entry not in playlist exception.
2469
2470 This exception will be thrown by YoutubeDL when a requested entry
2471 is not found in the playlist info_dict
2472 """
2473 pass
2474
2475
bf5b9d85 2476class SameFileError(YoutubeDLError):
59ae15a5 2477 """Same File exception.
d77c3dfd 2478
59ae15a5
PH
2479 This exception will be thrown by FileDownloader objects if they detect
2480 multiple files would have to be downloaded to the same file on disk.
2481 """
2482 pass
d77c3dfd
FV
2483
2484
bf5b9d85 2485class PostProcessingError(YoutubeDLError):
59ae15a5 2486 """Post Processing exception.
d77c3dfd 2487
59ae15a5
PH
2488 This exception may be raised by PostProcessor's .run() method to
2489 indicate an error in the postprocessing task.
2490 """
5f6a1245 2491
7851b379 2492 def __init__(self, msg):
bf5b9d85 2493 super(PostProcessingError, self).__init__(msg)
7851b379 2494 self.msg = msg
d77c3dfd 2495
5f6a1245 2496
8b0d7497 2497class ExistingVideoReached(YoutubeDLError):
2498 """ --max-downloads limit has been reached. """
2499 pass
2500
2501
2502class RejectedVideoReached(YoutubeDLError):
2503 """ --max-downloads limit has been reached. """
2504 pass
2505
2506
51d9739f 2507class ThrottledDownload(YoutubeDLError):
2508 """ Download speed below --throttled-rate. """
2509 pass
2510
2511
bf5b9d85 2512class MaxDownloadsReached(YoutubeDLError):
59ae15a5
PH
2513 """ --max-downloads limit has been reached. """
2514 pass
d77c3dfd
FV
2515
2516
bf5b9d85 2517class UnavailableVideoError(YoutubeDLError):
59ae15a5 2518 """Unavailable Format exception.
d77c3dfd 2519
59ae15a5
PH
2520 This exception will be thrown when a video is requested
2521 in a format that is not available for that video.
2522 """
2523 pass
d77c3dfd
FV
2524
2525
bf5b9d85 2526class ContentTooShortError(YoutubeDLError):
59ae15a5 2527 """Content Too Short exception.
d77c3dfd 2528
59ae15a5
PH
2529 This exception may be raised by FileDownloader objects when a file they
2530 download is too small for what the server announced first, indicating
2531 the connection was probably interrupted.
2532 """
d77c3dfd 2533
59ae15a5 2534 def __init__(self, downloaded, expected):
bf5b9d85
PM
2535 super(ContentTooShortError, self).__init__(
2536 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2537 )
2c7ed247 2538 # Both in bytes
59ae15a5
PH
2539 self.downloaded = downloaded
2540 self.expected = expected
d77c3dfd 2541
5f6a1245 2542
bf5b9d85 2543class XAttrMetadataError(YoutubeDLError):
efa97bdc
YCH
2544 def __init__(self, code=None, msg='Unknown error'):
2545 super(XAttrMetadataError, self).__init__(msg)
2546 self.code = code
bd264412 2547 self.msg = msg
efa97bdc
YCH
2548
2549 # Parsing code and msg
3089bc74 2550 if (self.code in (errno.ENOSPC, errno.EDQUOT)
a0566bbf 2551 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
efa97bdc
YCH
2552 self.reason = 'NO_SPACE'
2553 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2554 self.reason = 'VALUE_TOO_LONG'
2555 else:
2556 self.reason = 'NOT_SUPPORTED'
2557
2558
bf5b9d85 2559class XAttrUnavailableError(YoutubeDLError):
efa97bdc
YCH
2560 pass
2561
2562
c5a59d93 2563def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
2564 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2565 # expected HTTP responses to meet HTTP/1.0 or later (see also
067aa17e 2566 # https://github.com/ytdl-org/youtube-dl/issues/6727)
e5e78797 2567 if sys.version_info < (3, 0):
65220c3b
S
2568 kwargs['strict'] = True
2569 hc = http_class(*args, **compat_kwargs(kwargs))
be4a824d 2570 source_address = ydl_handler._params.get('source_address')
8959018a 2571
be4a824d 2572 if source_address is not None:
8959018a
AU
2573 # This is to workaround _create_connection() from socket where it will try all
2574 # address data from getaddrinfo() including IPv6. This filters the result from
2575 # getaddrinfo() based on the source_address value.
2576 # This is based on the cpython socket.create_connection() function.
2577 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2578 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2579 host, port = address
2580 err = None
2581 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
9e21e6d9
S
2582 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2583 ip_addrs = [addr for addr in addrs if addr[0] == af]
2584 if addrs and not ip_addrs:
2585 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2586 raise socket.error(
2587 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2588 % (ip_version, source_address[0]))
8959018a
AU
2589 for res in ip_addrs:
2590 af, socktype, proto, canonname, sa = res
2591 sock = None
2592 try:
2593 sock = socket.socket(af, socktype, proto)
2594 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2595 sock.settimeout(timeout)
2596 sock.bind(source_address)
2597 sock.connect(sa)
2598 err = None # Explicitly break reference cycle
2599 return sock
2600 except socket.error as _:
2601 err = _
2602 if sock is not None:
2603 sock.close()
2604 if err is not None:
2605 raise err
2606 else:
9e21e6d9
S
2607 raise socket.error('getaddrinfo returns an empty list')
2608 if hasattr(hc, '_create_connection'):
2609 hc._create_connection = _create_connection
be4a824d
PH
2610 sa = (source_address, 0)
2611 if hasattr(hc, 'source_address'): # Python 2.7+
2612 hc.source_address = sa
2613 else: # Python 2.6
2614 def _hc_connect(self, *args, **kwargs):
9e21e6d9 2615 sock = _create_connection(
be4a824d
PH
2616 (self.host, self.port), self.timeout, sa)
2617 if is_https:
d7932313
PH
2618 self.sock = ssl.wrap_socket(
2619 sock, self.key_file, self.cert_file,
2620 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
2621 else:
2622 self.sock = sock
2623 hc.connect = functools.partial(_hc_connect, hc)
2624
2625 return hc
2626
2627
87f0e62d 2628def handle_youtubedl_headers(headers):
992fc9d6
YCH
2629 filtered_headers = headers
2630
2631 if 'Youtubedl-no-compression' in filtered_headers:
2632 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 2633 del filtered_headers['Youtubedl-no-compression']
87f0e62d 2634
992fc9d6 2635 return filtered_headers
87f0e62d
YCH
2636
2637
acebc9cd 2638class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
2639 """Handler for HTTP requests and responses.
2640
2641 This class, when installed with an OpenerDirector, automatically adds
2642 the standard headers to every HTTP request and handles gzipped and
2643 deflated responses from web servers. If compression is to be avoided in
2644 a particular request, the original request in the program code only has
0424ec30 2645 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
2646 removed before making the real request.
2647
2648 Part of this code was copied from:
2649
2650 http://techknack.net/python-urllib2-handlers/
2651
2652 Andrew Rowls, the author of that code, agreed to release it to the
2653 public domain.
2654 """
2655
be4a824d
PH
2656 def __init__(self, params, *args, **kwargs):
2657 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2658 self._params = params
2659
2660 def http_open(self, req):
71aff188
YCH
2661 conn_class = compat_http_client.HTTPConnection
2662
2663 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2664 if socks_proxy:
2665 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2666 del req.headers['Ytdl-socks-proxy']
2667
be4a824d 2668 return self.do_open(functools.partial(
71aff188 2669 _create_http_connection, self, conn_class, False),
be4a824d
PH
2670 req)
2671
59ae15a5
PH
2672 @staticmethod
2673 def deflate(data):
fc2119f2 2674 if not data:
2675 return data
59ae15a5
PH
2676 try:
2677 return zlib.decompress(data, -zlib.MAX_WBITS)
2678 except zlib.error:
2679 return zlib.decompress(data)
2680
acebc9cd 2681 def http_request(self, req):
51f267d9
S
2682 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2683 # always respected by websites, some tend to give out URLs with non percent-encoded
2684 # non-ASCII characters (see telemb.py, ard.py [#3412])
2685 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2686 # To work around aforementioned issue we will replace request's original URL with
2687 # percent-encoded one
2688 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2689 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2690 url = req.get_full_url()
2691 url_escaped = escape_url(url)
2692
2693 # Substitute URL if any change after escaping
2694 if url != url_escaped:
15d260eb 2695 req = update_Request(req, url=url_escaped)
51f267d9 2696
33ac271b 2697 for h, v in std_headers.items():
3d5f7a39
JK
2698 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2699 # The dict keys are capitalized because of this bug by urllib
2700 if h.capitalize() not in req.headers:
33ac271b 2701 req.add_header(h, v)
87f0e62d
YCH
2702
2703 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
2704
2705 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2706 # Python 2.6 is brain-dead when it comes to fragments
2707 req._Request__original = req._Request__original.partition('#')[0]
2708 req._Request__r_type = req._Request__r_type.partition('#')[0]
2709
59ae15a5
PH
2710 return req
2711
acebc9cd 2712 def http_response(self, req, resp):
59ae15a5
PH
2713 old_resp = resp
2714 # gzip
2715 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
2716 content = resp.read()
2717 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2718 try:
2719 uncompressed = io.BytesIO(gz.read())
2720 except IOError as original_ioerror:
2721 # There may be junk add the end of the file
2722 # See http://stackoverflow.com/q/4928560/35070 for details
2723 for i in range(1, 1024):
2724 try:
2725 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2726 uncompressed = io.BytesIO(gz.read())
2727 except IOError:
2728 continue
2729 break
2730 else:
2731 raise original_ioerror
b407d853 2732 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2733 resp.msg = old_resp.msg
c047270c 2734 del resp.headers['Content-encoding']
59ae15a5
PH
2735 # deflate
2736 if resp.headers.get('Content-encoding', '') == 'deflate':
2737 gz = io.BytesIO(self.deflate(resp.read()))
b407d853 2738 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2739 resp.msg = old_resp.msg
c047270c 2740 del resp.headers['Content-encoding']
ad729172 2741 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
067aa17e 2742 # https://github.com/ytdl-org/youtube-dl/issues/6457).
5a4d9ddb
S
2743 if 300 <= resp.code < 400:
2744 location = resp.headers.get('Location')
2745 if location:
2746 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2747 if sys.version_info >= (3, 0):
2748 location = location.encode('iso-8859-1').decode('utf-8')
0ea59007
YCH
2749 else:
2750 location = location.decode('utf-8')
5a4d9ddb
S
2751 location_escaped = escape_url(location)
2752 if location != location_escaped:
2753 del resp.headers['Location']
9a4aec8b
YCH
2754 if sys.version_info < (3, 0):
2755 location_escaped = location_escaped.encode('utf-8')
5a4d9ddb 2756 resp.headers['Location'] = location_escaped
59ae15a5 2757 return resp
0f8d03f8 2758
acebc9cd
PH
2759 https_request = http_request
2760 https_response = http_response
bf50b038 2761
5de90176 2762
71aff188
YCH
2763def make_socks_conn_class(base_class, socks_proxy):
2764 assert issubclass(base_class, (
2765 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2766
2767 url_components = compat_urlparse.urlparse(socks_proxy)
2768 if url_components.scheme.lower() == 'socks5':
2769 socks_type = ProxyType.SOCKS5
2770 elif url_components.scheme.lower() in ('socks', 'socks4'):
2771 socks_type = ProxyType.SOCKS4
51fb4995
YCH
2772 elif url_components.scheme.lower() == 'socks4a':
2773 socks_type = ProxyType.SOCKS4A
71aff188 2774
cdd94c2e
YCH
2775 def unquote_if_non_empty(s):
2776 if not s:
2777 return s
2778 return compat_urllib_parse_unquote_plus(s)
2779
71aff188
YCH
2780 proxy_args = (
2781 socks_type,
2782 url_components.hostname, url_components.port or 1080,
2783 True, # Remote DNS
cdd94c2e
YCH
2784 unquote_if_non_empty(url_components.username),
2785 unquote_if_non_empty(url_components.password),
71aff188
YCH
2786 )
2787
2788 class SocksConnection(base_class):
2789 def connect(self):
2790 self.sock = sockssocket()
2791 self.sock.setproxy(*proxy_args)
2792 if type(self.timeout) in (int, float):
2793 self.sock.settimeout(self.timeout)
2794 self.sock.connect((self.host, self.port))
2795
2796 if isinstance(self, compat_http_client.HTTPSConnection):
2797 if hasattr(self, '_context'): # Python > 2.6
2798 self.sock = self._context.wrap_socket(
2799 self.sock, server_hostname=self.host)
2800 else:
2801 self.sock = ssl.wrap_socket(self.sock)
2802
2803 return SocksConnection
2804
2805
be4a824d
PH
2806class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2807 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2808 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2809 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2810 self._params = params
2811
2812 def https_open(self, req):
4f264c02 2813 kwargs = {}
71aff188
YCH
2814 conn_class = self._https_conn_class
2815
4f264c02
JMF
2816 if hasattr(self, '_context'): # python > 2.6
2817 kwargs['context'] = self._context
2818 if hasattr(self, '_check_hostname'): # python 3.x
2819 kwargs['check_hostname'] = self._check_hostname
71aff188
YCH
2820
2821 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2822 if socks_proxy:
2823 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2824 del req.headers['Ytdl-socks-proxy']
2825
be4a824d 2826 return self.do_open(functools.partial(
71aff188 2827 _create_http_connection, self, conn_class, True),
4f264c02 2828 req, **kwargs)
be4a824d
PH
2829
2830
1bab3437 2831class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
f1a8511f
S
2832 """
2833 See [1] for cookie file format.
2834
2835 1. https://curl.haxx.se/docs/http-cookies.html
2836 """
e7e62441 2837 _HTTPONLY_PREFIX = '#HttpOnly_'
c380cc28
S
2838 _ENTRY_LEN = 7
2839 _HEADER = '''# Netscape HTTP Cookie File
7a5c1cfe 2840# This file is generated by yt-dlp. Do not edit.
c380cc28
S
2841
2842'''
2843 _CookieFileEntry = collections.namedtuple(
2844 'CookieFileEntry',
2845 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
e7e62441 2846
1bab3437 2847 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
c380cc28
S
2848 """
2849 Save cookies to a file.
2850
2851 Most of the code is taken from CPython 3.8 and slightly adapted
2852 to support cookie files with UTF-8 in both python 2 and 3.
2853 """
2854 if filename is None:
2855 if self.filename is not None:
2856 filename = self.filename
2857 else:
2858 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2859
1bab3437
S
2860 # Store session cookies with `expires` set to 0 instead of an empty
2861 # string
2862 for cookie in self:
2863 if cookie.expires is None:
2864 cookie.expires = 0
c380cc28
S
2865
2866 with io.open(filename, 'w', encoding='utf-8') as f:
2867 f.write(self._HEADER)
2868 now = time.time()
2869 for cookie in self:
2870 if not ignore_discard and cookie.discard:
2871 continue
2872 if not ignore_expires and cookie.is_expired(now):
2873 continue
2874 if cookie.secure:
2875 secure = 'TRUE'
2876 else:
2877 secure = 'FALSE'
2878 if cookie.domain.startswith('.'):
2879 initial_dot = 'TRUE'
2880 else:
2881 initial_dot = 'FALSE'
2882 if cookie.expires is not None:
2883 expires = compat_str(cookie.expires)
2884 else:
2885 expires = ''
2886 if cookie.value is None:
2887 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2888 # with no name, whereas http.cookiejar regards it as a
2889 # cookie with no value.
2890 name = ''
2891 value = cookie.name
2892 else:
2893 name = cookie.name
2894 value = cookie.value
2895 f.write(
2896 '\t'.join([cookie.domain, initial_dot, cookie.path,
2897 secure, expires, name, value]) + '\n')
1bab3437
S
2898
2899 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
e7e62441 2900 """Load cookies from a file."""
2901 if filename is None:
2902 if self.filename is not None:
2903 filename = self.filename
2904 else:
2905 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2906
c380cc28
S
2907 def prepare_line(line):
2908 if line.startswith(self._HTTPONLY_PREFIX):
2909 line = line[len(self._HTTPONLY_PREFIX):]
2910 # comments and empty lines are fine
2911 if line.startswith('#') or not line.strip():
2912 return line
2913 cookie_list = line.split('\t')
2914 if len(cookie_list) != self._ENTRY_LEN:
2915 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2916 cookie = self._CookieFileEntry(*cookie_list)
2917 if cookie.expires_at and not cookie.expires_at.isdigit():
2918 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2919 return line
2920
e7e62441 2921 cf = io.StringIO()
c380cc28 2922 with io.open(filename, encoding='utf-8') as f:
e7e62441 2923 for line in f:
c380cc28
S
2924 try:
2925 cf.write(prepare_line(line))
2926 except compat_cookiejar.LoadError as e:
2927 write_string(
2928 'WARNING: skipping cookie file entry due to %s: %r\n'
2929 % (e, line), sys.stderr)
2930 continue
e7e62441 2931 cf.seek(0)
2932 self._really_load(cf, filename, ignore_discard, ignore_expires)
1bab3437
S
2933 # Session cookies are denoted by either `expires` field set to
2934 # an empty string or 0. MozillaCookieJar only recognizes the former
2935 # (see [1]). So we need force the latter to be recognized as session
2936 # cookies on our own.
2937 # Session cookies may be important for cookies-based authentication,
2938 # e.g. usually, when user does not check 'Remember me' check box while
2939 # logging in on a site, some important cookies are stored as session
2940 # cookies so that not recognizing them will result in failed login.
2941 # 1. https://bugs.python.org/issue17164
2942 for cookie in self:
2943 # Treat `expires=0` cookies as session cookies
2944 if cookie.expires == 0:
2945 cookie.expires = None
2946 cookie.discard = True
2947
2948
a6420bf5
S
2949class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2950 def __init__(self, cookiejar=None):
2951 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2952
2953 def http_response(self, request, response):
2954 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2955 # characters in Set-Cookie HTTP header of last response (see
067aa17e 2956 # https://github.com/ytdl-org/youtube-dl/issues/6769).
a6420bf5
S
2957 # In order to at least prevent crashing we will percent encode Set-Cookie
2958 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
2959 # if sys.version_info < (3, 0) and response.headers:
2960 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2961 # set_cookie = response.headers.get(set_cookie_header)
2962 # if set_cookie:
2963 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2964 # if set_cookie != set_cookie_escaped:
2965 # del response.headers[set_cookie_header]
2966 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
2967 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2968
f5fa042c 2969 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
a6420bf5
S
2970 https_response = http_response
2971
2972
fca6dba8 2973class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
201c1459 2974 """YoutubeDL redirect handler
2975
2976 The code is based on HTTPRedirectHandler implementation from CPython [1].
2977
2978 This redirect handler solves two issues:
2979 - ensures redirect URL is always unicode under python 2
2980 - introduces support for experimental HTTP response status code
2981 308 Permanent Redirect [2] used by some sites [3]
2982
2983 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
2984 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
2985 3. https://github.com/ytdl-org/youtube-dl/issues/28768
2986 """
2987
2988 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
2989
2990 def redirect_request(self, req, fp, code, msg, headers, newurl):
2991 """Return a Request or None in response to a redirect.
2992
2993 This is called by the http_error_30x methods when a
2994 redirection response is received. If a redirection should
2995 take place, return a new Request to allow http_error_30x to
2996 perform the redirect. Otherwise, raise HTTPError if no-one
2997 else should try to handle this url. Return None if you can't
2998 but another Handler might.
2999 """
3000 m = req.get_method()
3001 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
3002 or code in (301, 302, 303) and m == "POST")):
3003 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
3004 # Strictly (according to RFC 2616), 301 or 302 in response to
3005 # a POST MUST NOT cause a redirection without confirmation
3006 # from the user (of urllib.request, in this case). In practice,
3007 # essentially all clients do redirect in this case, so we do
3008 # the same.
3009
3010 # On python 2 urlh.geturl() may sometimes return redirect URL
3011 # as byte string instead of unicode. This workaround allows
3012 # to force it always return unicode.
3013 if sys.version_info[0] < 3:
3014 newurl = compat_str(newurl)
3015
3016 # Be conciliant with URIs containing a space. This is mainly
3017 # redundant with the more complete encoding done in http_error_302(),
3018 # but it is kept for compatibility with other callers.
3019 newurl = newurl.replace(' ', '%20')
3020
3021 CONTENT_HEADERS = ("content-length", "content-type")
3022 # NB: don't use dict comprehension for python 2.6 compatibility
3023 newheaders = dict((k, v) for k, v in req.headers.items()
3024 if k.lower() not in CONTENT_HEADERS)
3025 return compat_urllib_request.Request(
3026 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
3027 unverifiable=True)
fca6dba8
S
3028
3029
46f59e89
S
3030def extract_timezone(date_str):
3031 m = re.search(
3032 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
3033 date_str)
3034 if not m:
3035 timezone = datetime.timedelta()
3036 else:
3037 date_str = date_str[:-len(m.group('tz'))]
3038 if not m.group('sign'):
3039 timezone = datetime.timedelta()
3040 else:
3041 sign = 1 if m.group('sign') == '+' else -1
3042 timezone = datetime.timedelta(
3043 hours=sign * int(m.group('hours')),
3044 minutes=sign * int(m.group('minutes')))
3045 return timezone, date_str
3046
3047
08b38d54 3048def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
3049 """ Return a UNIX timestamp from the given date """
3050
3051 if date_str is None:
3052 return None
3053
52c3a6e4
S
3054 date_str = re.sub(r'\.[0-9]+', '', date_str)
3055
08b38d54 3056 if timezone is None:
46f59e89
S
3057 timezone, date_str = extract_timezone(date_str)
3058
52c3a6e4
S
3059 try:
3060 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3061 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3062 return calendar.timegm(dt.timetuple())
3063 except ValueError:
3064 pass
912b38b4
PH
3065
3066
46f59e89
S
3067def date_formats(day_first=True):
3068 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3069
3070
42bdd9d0 3071def unified_strdate(date_str, day_first=True):
bf50b038 3072 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
3073
3074 if date_str is None:
3075 return None
bf50b038 3076 upload_date = None
5f6a1245 3077 # Replace commas
026fcc04 3078 date_str = date_str.replace(',', ' ')
42bdd9d0 3079 # Remove AM/PM + timezone
9bb8e0a3 3080 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89 3081 _, date_str = extract_timezone(date_str)
42bdd9d0 3082
46f59e89 3083 for expression in date_formats(day_first):
bf50b038
JMF
3084 try:
3085 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 3086 except ValueError:
bf50b038 3087 pass
42393ce2
PH
3088 if upload_date is None:
3089 timetuple = email.utils.parsedate_tz(date_str)
3090 if timetuple:
c6b9cf05
S
3091 try:
3092 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3093 except ValueError:
3094 pass
6a750402
JMF
3095 if upload_date is not None:
3096 return compat_str(upload_date)
bf50b038 3097
5f6a1245 3098
46f59e89
S
3099def unified_timestamp(date_str, day_first=True):
3100 if date_str is None:
3101 return None
3102
2ae2ffda 3103 date_str = re.sub(r'[,|]', '', date_str)
46f59e89 3104
7dc2a74e 3105 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89
S
3106 timezone, date_str = extract_timezone(date_str)
3107
3108 # Remove AM/PM + timezone
3109 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3110
deef3195
S
3111 # Remove unrecognized timezones from ISO 8601 alike timestamps
3112 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3113 if m:
3114 date_str = date_str[:-len(m.group('tz'))]
3115
f226880c
PH
3116 # Python only supports microseconds, so remove nanoseconds
3117 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3118 if m:
3119 date_str = m.group(1)
3120
46f59e89
S
3121 for expression in date_formats(day_first):
3122 try:
7dc2a74e 3123 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
46f59e89
S
3124 return calendar.timegm(dt.timetuple())
3125 except ValueError:
3126 pass
3127 timetuple = email.utils.parsedate_tz(date_str)
3128 if timetuple:
7dc2a74e 3129 return calendar.timegm(timetuple) + pm_delta * 3600
46f59e89
S
3130
3131
28e614de 3132def determine_ext(url, default_ext='unknown_video'):
85750f89 3133 if url is None or '.' not in url:
f4776371 3134 return default_ext
9cb9a5df 3135 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
3136 if re.match(r'^[A-Za-z0-9]+$', guess):
3137 return guess
a7aaa398
S
3138 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3139 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 3140 return guess.rstrip('/')
73e79f2a 3141 else:
cbdbb766 3142 return default_ext
73e79f2a 3143
5f6a1245 3144
824fa511
S
3145def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3146 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
d4051a8e 3147
5f6a1245 3148
9e62f283 3149def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
37254abc
JMF
3150 """
3151 Return a datetime object from a string in the format YYYYMMDD or
9e62f283 3152 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3153
3154 format: string date format used to return datetime object from
3155 precision: round the time portion of a datetime object.
3156 auto|microsecond|second|minute|hour|day.
3157 auto: round to the unit provided in date_str (if applicable).
3158 """
3159 auto_precision = False
3160 if precision == 'auto':
3161 auto_precision = True
3162 precision = 'microsecond'
3163 today = datetime_round(datetime.datetime.now(), precision)
f8795e10 3164 if date_str in ('now', 'today'):
37254abc 3165 return today
f8795e10
PH
3166 if date_str == 'yesterday':
3167 return today - datetime.timedelta(days=1)
9e62f283 3168 match = re.match(
3169 r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)(s)?',
3170 date_str)
37254abc 3171 if match is not None:
9e62f283 3172 start_time = datetime_from_str(match.group('start'), precision, format)
3173 time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
37254abc 3174 unit = match.group('unit')
9e62f283 3175 if unit == 'month' or unit == 'year':
3176 new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
37254abc 3177 unit = 'day'
9e62f283 3178 else:
3179 if unit == 'week':
3180 unit = 'day'
3181 time *= 7
3182 delta = datetime.timedelta(**{unit + 's': time})
3183 new_date = start_time + delta
3184 if auto_precision:
3185 return datetime_round(new_date, unit)
3186 return new_date
3187
3188 return datetime_round(datetime.datetime.strptime(date_str, format), precision)
3189
3190
3191def date_from_str(date_str, format='%Y%m%d'):
3192 """
3193 Return a datetime object from a string in the format YYYYMMDD or
3194 (now|today|date)[+-][0-9](microsecond|second|minute|hour|day|week|month|year)(s)?
3195
3196 format: string date format used to return datetime object from
3197 """
3198 return datetime_from_str(date_str, precision='microsecond', format=format).date()
3199
3200
3201def datetime_add_months(dt, months):
3202 """Increment/Decrement a datetime object by months."""
3203 month = dt.month + months - 1
3204 year = dt.year + month // 12
3205 month = month % 12 + 1
3206 day = min(dt.day, calendar.monthrange(year, month)[1])
3207 return dt.replace(year, month, day)
3208
3209
3210def datetime_round(dt, precision='day'):
3211 """
3212 Round a datetime object's time to a specific precision
3213 """
3214 if precision == 'microsecond':
3215 return dt
3216
3217 unit_seconds = {
3218 'day': 86400,
3219 'hour': 3600,
3220 'minute': 60,
3221 'second': 1,
3222 }
3223 roundto = lambda x, n: ((x + n / 2) // n) * n
3224 timestamp = calendar.timegm(dt.timetuple())
3225 return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
5f6a1245
JW
3226
3227
e63fc1be 3228def hyphenate_date(date_str):
3229 """
3230 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3231 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3232 if match is not None:
3233 return '-'.join(match.groups())
3234 else:
3235 return date_str
3236
5f6a1245 3237
bd558525
JMF
3238class DateRange(object):
3239 """Represents a time interval between two dates"""
5f6a1245 3240
bd558525
JMF
3241 def __init__(self, start=None, end=None):
3242 """start and end must be strings in the format accepted by date"""
3243 if start is not None:
3244 self.start = date_from_str(start)
3245 else:
3246 self.start = datetime.datetime.min.date()
3247 if end is not None:
3248 self.end = date_from_str(end)
3249 else:
3250 self.end = datetime.datetime.max.date()
37254abc 3251 if self.start > self.end:
bd558525 3252 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 3253
bd558525
JMF
3254 @classmethod
3255 def day(cls, day):
3256 """Returns a range that only contains the given day"""
5f6a1245
JW
3257 return cls(day, day)
3258
bd558525
JMF
3259 def __contains__(self, date):
3260 """Check if the date is in the range"""
37254abc
JMF
3261 if not isinstance(date, datetime.date):
3262 date = date_from_str(date)
3263 return self.start <= date <= self.end
5f6a1245 3264
bd558525 3265 def __str__(self):
5f6a1245 3266 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
3267
3268
3269def platform_name():
3270 """ Returns the platform name as a compat_str """
3271 res = platform.platform()
3272 if isinstance(res, bytes):
3273 res = res.decode(preferredencoding())
3274
3275 assert isinstance(res, compat_str)
3276 return res
c257baff
PH
3277
3278
b58ddb32
PH
3279def _windows_write_string(s, out):
3280 """ Returns True if the string was written using special methods,
3281 False if it has yet to be written out."""
3282 # Adapted from http://stackoverflow.com/a/3259271/35070
3283
3284 import ctypes
3285 import ctypes.wintypes
3286
3287 WIN_OUTPUT_IDS = {
3288 1: -11,
3289 2: -12,
3290 }
3291
a383a98a
PH
3292 try:
3293 fileno = out.fileno()
3294 except AttributeError:
3295 # If the output stream doesn't have a fileno, it's virtual
3296 return False
aa42e873
PH
3297 except io.UnsupportedOperation:
3298 # Some strange Windows pseudo files?
3299 return False
b58ddb32
PH
3300 if fileno not in WIN_OUTPUT_IDS:
3301 return False
3302
d7cd9a9e 3303 GetStdHandle = compat_ctypes_WINFUNCTYPE(
b58ddb32 3304 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
d7cd9a9e 3305 ('GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
3306 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3307
d7cd9a9e 3308 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3309 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3310 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
d7cd9a9e 3311 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
3312 written = ctypes.wintypes.DWORD(0)
3313
d7cd9a9e 3314 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
3315 FILE_TYPE_CHAR = 0x0002
3316 FILE_TYPE_REMOTE = 0x8000
d7cd9a9e 3317 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3318 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3319 ctypes.POINTER(ctypes.wintypes.DWORD))(
d7cd9a9e 3320 ('GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
3321 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3322
3323 def not_a_console(handle):
3324 if handle == INVALID_HANDLE_VALUE or handle is None:
3325 return True
3089bc74
S
3326 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3327 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
3328
3329 if not_a_console(h):
3330 return False
3331
d1b9c912
PH
3332 def next_nonbmp_pos(s):
3333 try:
3334 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3335 except StopIteration:
3336 return len(s)
3337
3338 while s:
3339 count = min(next_nonbmp_pos(s), 1024)
3340
b58ddb32 3341 ret = WriteConsoleW(
d1b9c912 3342 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
3343 if ret == 0:
3344 raise OSError('Failed to write string')
d1b9c912
PH
3345 if not count: # We just wrote a non-BMP character
3346 assert written.value == 2
3347 s = s[1:]
3348 else:
3349 assert written.value > 0
3350 s = s[written.value:]
b58ddb32
PH
3351 return True
3352
3353
734f90bb 3354def write_string(s, out=None, encoding=None):
7459e3a2
PH
3355 if out is None:
3356 out = sys.stderr
8bf48f23 3357 assert type(s) == compat_str
7459e3a2 3358
b58ddb32
PH
3359 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3360 if _windows_write_string(s, out):
3361 return
3362
3089bc74
S
3363 if ('b' in getattr(out, 'mode', '')
3364 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
3365 byt = s.encode(encoding or preferredencoding(), 'ignore')
3366 out.write(byt)
3367 elif hasattr(out, 'buffer'):
3368 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3369 byt = s.encode(enc, 'ignore')
3370 out.buffer.write(byt)
3371 else:
8bf48f23 3372 out.write(s)
7459e3a2
PH
3373 out.flush()
3374
3375
48ea9cea
PH
3376def bytes_to_intlist(bs):
3377 if not bs:
3378 return []
3379 if isinstance(bs[0], int): # Python 3
3380 return list(bs)
3381 else:
3382 return [ord(c) for c in bs]
3383
c257baff 3384
cba892fa 3385def intlist_to_bytes(xs):
3386 if not xs:
3387 return b''
edaa23f8 3388 return compat_struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
3389
3390
c1c9a79c
PH
3391# Cross-platform file locking
3392if sys.platform == 'win32':
3393 import ctypes.wintypes
3394 import msvcrt
3395
3396 class OVERLAPPED(ctypes.Structure):
3397 _fields_ = [
3398 ('Internal', ctypes.wintypes.LPVOID),
3399 ('InternalHigh', ctypes.wintypes.LPVOID),
3400 ('Offset', ctypes.wintypes.DWORD),
3401 ('OffsetHigh', ctypes.wintypes.DWORD),
3402 ('hEvent', ctypes.wintypes.HANDLE),
3403 ]
3404
3405 kernel32 = ctypes.windll.kernel32
3406 LockFileEx = kernel32.LockFileEx
3407 LockFileEx.argtypes = [
3408 ctypes.wintypes.HANDLE, # hFile
3409 ctypes.wintypes.DWORD, # dwFlags
3410 ctypes.wintypes.DWORD, # dwReserved
3411 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3412 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3413 ctypes.POINTER(OVERLAPPED) # Overlapped
3414 ]
3415 LockFileEx.restype = ctypes.wintypes.BOOL
3416 UnlockFileEx = kernel32.UnlockFileEx
3417 UnlockFileEx.argtypes = [
3418 ctypes.wintypes.HANDLE, # hFile
3419 ctypes.wintypes.DWORD, # dwReserved
3420 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3421 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3422 ctypes.POINTER(OVERLAPPED) # Overlapped
3423 ]
3424 UnlockFileEx.restype = ctypes.wintypes.BOOL
3425 whole_low = 0xffffffff
3426 whole_high = 0x7fffffff
3427
3428 def _lock_file(f, exclusive):
3429 overlapped = OVERLAPPED()
3430 overlapped.Offset = 0
3431 overlapped.OffsetHigh = 0
3432 overlapped.hEvent = 0
3433 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3434 handle = msvcrt.get_osfhandle(f.fileno())
3435 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3436 whole_low, whole_high, f._lock_file_overlapped_p):
3437 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3438
3439 def _unlock_file(f):
3440 assert f._lock_file_overlapped_p
3441 handle = msvcrt.get_osfhandle(f.fileno())
3442 if not UnlockFileEx(handle, 0,
3443 whole_low, whole_high, f._lock_file_overlapped_p):
3444 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3445
3446else:
399a76e6
YCH
3447 # Some platforms, such as Jython, is missing fcntl
3448 try:
3449 import fcntl
c1c9a79c 3450
399a76e6
YCH
3451 def _lock_file(f, exclusive):
3452 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 3453
399a76e6
YCH
3454 def _unlock_file(f):
3455 fcntl.flock(f, fcntl.LOCK_UN)
3456 except ImportError:
3457 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3458
3459 def _lock_file(f, exclusive):
3460 raise IOError(UNSUPPORTED_MSG)
3461
3462 def _unlock_file(f):
3463 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
3464
3465
3466class locked_file(object):
3467 def __init__(self, filename, mode, encoding=None):
3468 assert mode in ['r', 'a', 'w']
3469 self.f = io.open(filename, mode, encoding=encoding)
3470 self.mode = mode
3471
3472 def __enter__(self):
3473 exclusive = self.mode != 'r'
3474 try:
3475 _lock_file(self.f, exclusive)
3476 except IOError:
3477 self.f.close()
3478 raise
3479 return self
3480
3481 def __exit__(self, etype, value, traceback):
3482 try:
3483 _unlock_file(self.f)
3484 finally:
3485 self.f.close()
3486
3487 def __iter__(self):
3488 return iter(self.f)
3489
3490 def write(self, *args):
3491 return self.f.write(*args)
3492
3493 def read(self, *args):
3494 return self.f.read(*args)
4eb7f1d1
JMF
3495
3496
4644ac55
S
3497def get_filesystem_encoding():
3498 encoding = sys.getfilesystemencoding()
3499 return encoding if encoding is not None else 'utf-8'
3500
3501
4eb7f1d1 3502def shell_quote(args):
a6a173c2 3503 quoted_args = []
4644ac55 3504 encoding = get_filesystem_encoding()
a6a173c2
JMF
3505 for a in args:
3506 if isinstance(a, bytes):
3507 # We may get a filename encoded with 'encodeFilename'
3508 a = a.decode(encoding)
aefce8e6 3509 quoted_args.append(compat_shlex_quote(a))
28e614de 3510 return ' '.join(quoted_args)
9d4660ca
PH
3511
3512
3513def smuggle_url(url, data):
3514 """ Pass additional data in a URL for internal use. """
3515
81953d1a
RA
3516 url, idata = unsmuggle_url(url, {})
3517 data.update(idata)
15707c7e 3518 sdata = compat_urllib_parse_urlencode(
28e614de
PH
3519 {'__youtubedl_smuggle': json.dumps(data)})
3520 return url + '#' + sdata
9d4660ca
PH
3521
3522
79f82953 3523def unsmuggle_url(smug_url, default=None):
83e865a3 3524 if '#__youtubedl_smuggle' not in smug_url:
79f82953 3525 return smug_url, default
28e614de
PH
3526 url, _, sdata = smug_url.rpartition('#')
3527 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
3528 data = json.loads(jsond)
3529 return url, data
02dbf93f
PH
3530
3531
02dbf93f
PH
3532def format_bytes(bytes):
3533 if bytes is None:
28e614de 3534 return 'N/A'
02dbf93f
PH
3535 if type(bytes) is str:
3536 bytes = float(bytes)
3537 if bytes == 0.0:
3538 exponent = 0
3539 else:
3540 exponent = int(math.log(bytes, 1024.0))
28e614de 3541 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 3542 converted = float(bytes) / float(1024 ** exponent)
28e614de 3543 return '%.2f%s' % (converted, suffix)
f53c966a 3544
1c088fa8 3545
fb47597b
S
3546def lookup_unit_table(unit_table, s):
3547 units_re = '|'.join(re.escape(u) for u in unit_table)
3548 m = re.match(
782b1b5b 3549 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
fb47597b
S
3550 if not m:
3551 return None
3552 num_str = m.group('num').replace(',', '.')
3553 mult = unit_table[m.group('unit')]
3554 return int(float(num_str) * mult)
3555
3556
be64b5b0
PH
3557def parse_filesize(s):
3558 if s is None:
3559 return None
3560
dfb1b146 3561 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
3562 # but we support those too
3563 _UNIT_TABLE = {
3564 'B': 1,
3565 'b': 1,
70852b47 3566 'bytes': 1,
be64b5b0
PH
3567 'KiB': 1024,
3568 'KB': 1000,
3569 'kB': 1024,
3570 'Kb': 1000,
13585d76 3571 'kb': 1000,
70852b47
YCH
3572 'kilobytes': 1000,
3573 'kibibytes': 1024,
be64b5b0
PH
3574 'MiB': 1024 ** 2,
3575 'MB': 1000 ** 2,
3576 'mB': 1024 ** 2,
3577 'Mb': 1000 ** 2,
13585d76 3578 'mb': 1000 ** 2,
70852b47
YCH
3579 'megabytes': 1000 ** 2,
3580 'mebibytes': 1024 ** 2,
be64b5b0
PH
3581 'GiB': 1024 ** 3,
3582 'GB': 1000 ** 3,
3583 'gB': 1024 ** 3,
3584 'Gb': 1000 ** 3,
13585d76 3585 'gb': 1000 ** 3,
70852b47
YCH
3586 'gigabytes': 1000 ** 3,
3587 'gibibytes': 1024 ** 3,
be64b5b0
PH
3588 'TiB': 1024 ** 4,
3589 'TB': 1000 ** 4,
3590 'tB': 1024 ** 4,
3591 'Tb': 1000 ** 4,
13585d76 3592 'tb': 1000 ** 4,
70852b47
YCH
3593 'terabytes': 1000 ** 4,
3594 'tebibytes': 1024 ** 4,
be64b5b0
PH
3595 'PiB': 1024 ** 5,
3596 'PB': 1000 ** 5,
3597 'pB': 1024 ** 5,
3598 'Pb': 1000 ** 5,
13585d76 3599 'pb': 1000 ** 5,
70852b47
YCH
3600 'petabytes': 1000 ** 5,
3601 'pebibytes': 1024 ** 5,
be64b5b0
PH
3602 'EiB': 1024 ** 6,
3603 'EB': 1000 ** 6,
3604 'eB': 1024 ** 6,
3605 'Eb': 1000 ** 6,
13585d76 3606 'eb': 1000 ** 6,
70852b47
YCH
3607 'exabytes': 1000 ** 6,
3608 'exbibytes': 1024 ** 6,
be64b5b0
PH
3609 'ZiB': 1024 ** 7,
3610 'ZB': 1000 ** 7,
3611 'zB': 1024 ** 7,
3612 'Zb': 1000 ** 7,
13585d76 3613 'zb': 1000 ** 7,
70852b47
YCH
3614 'zettabytes': 1000 ** 7,
3615 'zebibytes': 1024 ** 7,
be64b5b0
PH
3616 'YiB': 1024 ** 8,
3617 'YB': 1000 ** 8,
3618 'yB': 1024 ** 8,
3619 'Yb': 1000 ** 8,
13585d76 3620 'yb': 1000 ** 8,
70852b47
YCH
3621 'yottabytes': 1000 ** 8,
3622 'yobibytes': 1024 ** 8,
be64b5b0
PH
3623 }
3624
fb47597b
S
3625 return lookup_unit_table(_UNIT_TABLE, s)
3626
3627
3628def parse_count(s):
3629 if s is None:
be64b5b0
PH
3630 return None
3631
fb47597b
S
3632 s = s.strip()
3633
3634 if re.match(r'^[\d,.]+$', s):
3635 return str_to_int(s)
3636
3637 _UNIT_TABLE = {
3638 'k': 1000,
3639 'K': 1000,
3640 'm': 1000 ** 2,
3641 'M': 1000 ** 2,
3642 'kk': 1000 ** 2,
3643 'KK': 1000 ** 2,
3644 }
be64b5b0 3645
fb47597b 3646 return lookup_unit_table(_UNIT_TABLE, s)
be64b5b0 3647
2f7ae819 3648
b871d7e9
S
3649def parse_resolution(s):
3650 if s is None:
3651 return {}
3652
3653 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3654 if mobj:
3655 return {
3656 'width': int(mobj.group('w')),
3657 'height': int(mobj.group('h')),
3658 }
3659
3660 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3661 if mobj:
3662 return {'height': int(mobj.group(1))}
3663
3664 mobj = re.search(r'\b([48])[kK]\b', s)
3665 if mobj:
3666 return {'height': int(mobj.group(1)) * 540}
3667
3668 return {}
3669
3670
0dc41787
S
3671def parse_bitrate(s):
3672 if not isinstance(s, compat_str):
3673 return
3674 mobj = re.search(r'\b(\d+)\s*kbps', s)
3675 if mobj:
3676 return int(mobj.group(1))
3677
3678
a942d6cb 3679def month_by_name(name, lang='en'):
caefb1de
PH
3680 """ Return the number of a month by (locale-independently) English name """
3681
f6717dec 3682 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb 3683
caefb1de 3684 try:
f6717dec 3685 return month_names.index(name) + 1
7105440c
YCH
3686 except ValueError:
3687 return None
3688
3689
3690def month_by_abbreviation(abbrev):
3691 """ Return the number of a month by (locale-independently) English
3692 abbreviations """
3693
3694 try:
3695 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
3696 except ValueError:
3697 return None
18258362
JMF
3698
3699
5aafe895 3700def fix_xml_ampersands(xml_str):
18258362 3701 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
3702 return re.sub(
3703 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 3704 '&amp;',
5aafe895 3705 xml_str)
e3946f98
PH
3706
3707
3708def setproctitle(title):
8bf48f23 3709 assert isinstance(title, compat_str)
c1c05c67
YCH
3710
3711 # ctypes in Jython is not complete
3712 # http://bugs.jython.org/issue2148
3713 if sys.platform.startswith('java'):
3714 return
3715
e3946f98 3716 try:
611c1dd9 3717 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
3718 except OSError:
3719 return
2f49bcd6
RC
3720 except TypeError:
3721 # LoadLibrary in Windows Python 2.7.13 only expects
3722 # a bytestring, but since unicode_literals turns
3723 # every string into a unicode string, it fails.
3724 return
6eefe533
PH
3725 title_bytes = title.encode('utf-8')
3726 buf = ctypes.create_string_buffer(len(title_bytes))
3727 buf.value = title_bytes
e3946f98 3728 try:
6eefe533 3729 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
3730 except AttributeError:
3731 return # Strange libc, just skip this
d7dda168
PH
3732
3733
3734def remove_start(s, start):
46bc9b7d 3735 return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174
PH
3736
3737
2b9faf55 3738def remove_end(s, end):
46bc9b7d 3739 return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55
PH
3740
3741
31b2051e
S
3742def remove_quotes(s):
3743 if s is None or len(s) < 2:
3744 return s
3745 for quote in ('"', "'", ):
3746 if s[0] == quote and s[-1] == quote:
3747 return s[1:-1]
3748 return s
3749
3750
b6e0c7d2
U
3751def get_domain(url):
3752 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3753 return domain.group('domain') if domain else None
3754
3755
29eb5174 3756def url_basename(url):
9b8aaeed 3757 path = compat_urlparse.urlparse(url).path
28e614de 3758 return path.strip('/').split('/')[-1]
aa94a6d3
PH
3759
3760
02dc0a36
S
3761def base_url(url):
3762 return re.match(r'https?://[^?#&]+/', url).group()
3763
3764
e34c3361 3765def urljoin(base, path):
4b5de77b
S
3766 if isinstance(path, bytes):
3767 path = path.decode('utf-8')
e34c3361
S
3768 if not isinstance(path, compat_str) or not path:
3769 return None
fad4ceb5 3770 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
e34c3361 3771 return path
4b5de77b
S
3772 if isinstance(base, bytes):
3773 base = base.decode('utf-8')
3774 if not isinstance(base, compat_str) or not re.match(
3775 r'^(?:https?:)?//', base):
e34c3361
S
3776 return None
3777 return compat_urlparse.urljoin(base, path)
3778
3779
aa94a6d3
PH
3780class HEADRequest(compat_urllib_request.Request):
3781 def get_method(self):
611c1dd9 3782 return 'HEAD'
7217e148
PH
3783
3784
95cf60e8
S
3785class PUTRequest(compat_urllib_request.Request):
3786 def get_method(self):
3787 return 'PUT'
3788
3789
9732d77e 3790def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
3791 if get_attr:
3792 if v is not None:
3793 v = getattr(v, get_attr, None)
9572013d
PH
3794 if v == '':
3795 v = None
1812afb7
S
3796 if v is None:
3797 return default
3798 try:
3799 return int(v) * invscale // scale
5e1271c5 3800 except (ValueError, TypeError):
af98f8ff 3801 return default
9732d77e 3802
9572013d 3803
40a90862
JMF
3804def str_or_none(v, default=None):
3805 return default if v is None else compat_str(v)
3806
9732d77e
PH
3807
3808def str_to_int(int_str):
48d4681e 3809 """ A more relaxed version of int_or_none """
42db58ec 3810 if isinstance(int_str, compat_integer_types):
348c6bf1 3811 return int_str
42db58ec
S
3812 elif isinstance(int_str, compat_str):
3813 int_str = re.sub(r'[,\.\+]', '', int_str)
3814 return int_or_none(int_str)
608d11f5
PH
3815
3816
9732d77e 3817def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
3818 if v is None:
3819 return default
3820 try:
3821 return float(v) * invscale / scale
5e1271c5 3822 except (ValueError, TypeError):
caf80631 3823 return default
43f775e4
PH
3824
3825
c7e327c4
S
3826def bool_or_none(v, default=None):
3827 return v if isinstance(v, bool) else default
3828
3829
53cd37ba
S
3830def strip_or_none(v, default=None):
3831 return v.strip() if isinstance(v, compat_str) else default
b72b4431
S
3832
3833
af03000a
S
3834def url_or_none(url):
3835 if not url or not isinstance(url, compat_str):
3836 return None
3837 url = url.strip()
29f7c58a 3838 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
af03000a
S
3839
3840
e29663c6 3841def strftime_or_none(timestamp, date_format, default=None):
3842 datetime_object = None
3843 try:
3844 if isinstance(timestamp, compat_numeric_types): # unix timestamp
3845 datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
3846 elif isinstance(timestamp, compat_str): # assume YYYYMMDD
3847 datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
3848 return datetime_object.strftime(date_format)
3849 except (ValueError, TypeError, AttributeError):
3850 return default
3851
3852
608d11f5 3853def parse_duration(s):
8f9312c3 3854 if not isinstance(s, compat_basestring):
608d11f5
PH
3855 return None
3856
ca7b3246
S
3857 s = s.strip()
3858
acaff495 3859 days, hours, mins, secs, ms = [None] * 5
15846398 3860 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
acaff495 3861 if m:
3862 days, hours, mins, secs, ms = m.groups()
3863 else:
3864 m = re.match(
056653bb
S
3865 r'''(?ix)(?:P?
3866 (?:
3867 [0-9]+\s*y(?:ears?)?\s*
3868 )?
3869 (?:
3870 [0-9]+\s*m(?:onths?)?\s*
3871 )?
3872 (?:
3873 [0-9]+\s*w(?:eeks?)?\s*
3874 )?
8f4b58d7 3875 (?:
acaff495 3876 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
8f4b58d7 3877 )?
056653bb 3878 T)?
acaff495 3879 (?:
3880 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3881 )?
3882 (?:
3883 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3884 )?
3885 (?:
3886 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
15846398 3887 )?Z?$''', s)
acaff495 3888 if m:
3889 days, hours, mins, secs, ms = m.groups()
3890 else:
15846398 3891 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
acaff495 3892 if m:
3893 hours, mins = m.groups()
3894 else:
3895 return None
3896
3897 duration = 0
3898 if secs:
3899 duration += float(secs)
3900 if mins:
3901 duration += float(mins) * 60
3902 if hours:
3903 duration += float(hours) * 60 * 60
3904 if days:
3905 duration += float(days) * 24 * 60 * 60
3906 if ms:
3907 duration += float(ms)
3908 return duration
91d7d0b3
JMF
3909
3910
e65e4c88 3911def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 3912 name, real_ext = os.path.splitext(filename)
e65e4c88
S
3913 return (
3914 '{0}.{1}{2}'.format(name, ext, real_ext)
3915 if not expected_real_ext or real_ext[1:] == expected_real_ext
3916 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
3917
3918
b3ed15b7
S
3919def replace_extension(filename, ext, expected_real_ext=None):
3920 name, real_ext = os.path.splitext(filename)
3921 return '{0}.{1}'.format(
3922 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3923 ext)
3924
3925
d70ad093
PH
3926def check_executable(exe, args=[]):
3927 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3928 args can be a list of arguments for a short output (like -version) """
3929 try:
f5b1bca9 3930 process_communicate_or_kill(subprocess.Popen(
3931 [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
d70ad093
PH
3932 except OSError:
3933 return False
3934 return exe
b7ab0590
PH
3935
3936
95807118 3937def get_exe_version(exe, args=['--version'],
cae97f65 3938 version_re=None, unrecognized='present'):
95807118
PH
3939 """ Returns the version of the specified executable,
3940 or False if the executable is not present """
3941 try:
b64d04c1 3942 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
7a5c1cfe 3943 # SIGTTOU if yt-dlp is run in the background.
067aa17e 3944 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
f5b1bca9 3945 out, _ = process_communicate_or_kill(subprocess.Popen(
54116803 3946 [encodeArgument(exe)] + args,
00ca7552 3947 stdin=subprocess.PIPE,
f5b1bca9 3948 stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
95807118
PH
3949 except OSError:
3950 return False
cae97f65
PH
3951 if isinstance(out, bytes): # Python 2.x
3952 out = out.decode('ascii', 'ignore')
3953 return detect_exe_version(out, version_re, unrecognized)
3954
3955
3956def detect_exe_version(output, version_re=None, unrecognized='present'):
3957 assert isinstance(output, compat_str)
3958 if version_re is None:
3959 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3960 m = re.search(version_re, output)
95807118
PH
3961 if m:
3962 return m.group(1)
3963 else:
3964 return unrecognized
3965
3966
cb89cfc1 3967class LazyList(collections.abc.Sequence):
483336e7 3968 ''' Lazy immutable list from an iterable
3969 Note that slices of a LazyList are lists and not LazyList'''
3970
3971 def __init__(self, iterable):
3972 self.__iterable = iter(iterable)
3973 self.__cache = []
28419ca2 3974 self.__reversed = False
483336e7 3975
3976 def __iter__(self):
28419ca2 3977 if self.__reversed:
3978 # We need to consume the entire iterable to iterate in reverse
981052c9 3979 yield from self.exhaust()
28419ca2 3980 return
3981 yield from self.__cache
483336e7 3982 for item in self.__iterable:
3983 self.__cache.append(item)
3984 yield item
3985
981052c9 3986 def __exhaust(self):
483336e7 3987 self.__cache.extend(self.__iterable)
28419ca2 3988 return self.__cache
3989
981052c9 3990 def exhaust(self):
3991 ''' Evaluate the entire iterable '''
3992 return self.__exhaust()[::-1 if self.__reversed else 1]
3993
28419ca2 3994 @staticmethod
981052c9 3995 def __reverse_index(x):
e0f2b4b4 3996 return None if x is None else -(x + 1)
483336e7 3997
3998 def __getitem__(self, idx):
3999 if isinstance(idx, slice):
28419ca2 4000 if self.__reversed:
e0f2b4b4 4001 idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
4002 start, stop, step = idx.start, idx.stop, idx.step or 1
483336e7 4003 elif isinstance(idx, int):
28419ca2 4004 if self.__reversed:
981052c9 4005 idx = self.__reverse_index(idx)
e0f2b4b4 4006 start, stop, step = idx, idx, 0
483336e7 4007 else:
4008 raise TypeError('indices must be integers or slices')
e0f2b4b4 4009 if ((start or 0) < 0 or (stop or 0) < 0
4010 or (start is None and step < 0)
4011 or (stop is None and step > 0)):
483336e7 4012 # We need to consume the entire iterable to be able to slice from the end
4013 # Obviously, never use this with infinite iterables
981052c9 4014 return self.__exhaust()[idx]
28419ca2 4015
e0f2b4b4 4016 n = max(start or 0, stop or 0) - len(self.__cache) + 1
28419ca2 4017 if n > 0:
4018 self.__cache.extend(itertools.islice(self.__iterable, n))
483336e7 4019 return self.__cache[idx]
4020
4021 def __bool__(self):
4022 try:
28419ca2 4023 self[-1] if self.__reversed else self[0]
483336e7 4024 except IndexError:
4025 return False
4026 return True
4027
4028 def __len__(self):
4029 self.exhaust()
4030 return len(self.__cache)
4031
981052c9 4032 def reverse(self):
28419ca2 4033 self.__reversed = not self.__reversed
4034 return self
4035
4036 def __repr__(self):
4037 # repr and str should mimic a list. So we exhaust the iterable
4038 return repr(self.exhaust())
4039
4040 def __str__(self):
4041 return repr(self.exhaust())
4042
483336e7 4043
7be9ccff 4044class PagedList:
dd26ced1
PH
4045 def __len__(self):
4046 # This is only useful for tests
4047 return len(self.getslice())
4048
7be9ccff 4049 def __init__(self, pagefunc, pagesize, use_cache=True):
4050 self._pagefunc = pagefunc
4051 self._pagesize = pagesize
4052 self._use_cache = use_cache
4053 self._cache = {}
4054
4055 def getpage(self, pagenum):
4056 page_results = self._cache.get(pagenum) or list(self._pagefunc(pagenum))
4057 if self._use_cache:
4058 self._cache[pagenum] = page_results
4059 return page_results
4060
4061 def getslice(self, start=0, end=None):
4062 return list(self._getslice(start, end))
4063
4064 def _getslice(self, start, end):
55575225 4065 raise NotImplementedError('This method must be implemented by subclasses')
4066
4067 def __getitem__(self, idx):
7be9ccff 4068 # NOTE: cache must be enabled if this is used
55575225 4069 if not isinstance(idx, int) or idx < 0:
4070 raise TypeError('indices must be non-negative integers')
4071 entries = self.getslice(idx, idx + 1)
4072 return entries[0] if entries else None
4073
9c44d242
PH
4074
4075class OnDemandPagedList(PagedList):
7be9ccff 4076 def _getslice(self, start, end):
b7ab0590
PH
4077 for pagenum in itertools.count(start // self._pagesize):
4078 firstid = pagenum * self._pagesize
4079 nextfirstid = pagenum * self._pagesize + self._pagesize
4080 if start >= nextfirstid:
4081 continue
4082
b7ab0590
PH
4083 startv = (
4084 start % self._pagesize
4085 if firstid <= start < nextfirstid
4086 else 0)
b7ab0590
PH
4087 endv = (
4088 ((end - 1) % self._pagesize) + 1
4089 if (end is not None and firstid <= end <= nextfirstid)
4090 else None)
4091
7be9ccff 4092 page_results = self.getpage(pagenum)
b7ab0590
PH
4093 if startv != 0 or endv is not None:
4094 page_results = page_results[startv:endv]
7be9ccff 4095 yield from page_results
b7ab0590
PH
4096
4097 # A little optimization - if current page is not "full", ie. does
4098 # not contain page_size videos then we can assume that this page
4099 # is the last one - there are no more ids on further pages -
4100 # i.e. no need to query again.
4101 if len(page_results) + startv < self._pagesize:
4102 break
4103
4104 # If we got the whole page, but the next page is not interesting,
4105 # break out early as well
4106 if end == nextfirstid:
4107 break
81c2f20b
PH
4108
4109
9c44d242
PH
4110class InAdvancePagedList(PagedList):
4111 def __init__(self, pagefunc, pagecount, pagesize):
9c44d242 4112 self._pagecount = pagecount
7be9ccff 4113 PagedList.__init__(self, pagefunc, pagesize, True)
9c44d242 4114
7be9ccff 4115 def _getslice(self, start, end):
9c44d242
PH
4116 start_page = start // self._pagesize
4117 end_page = (
4118 self._pagecount if end is None else (end // self._pagesize + 1))
4119 skip_elems = start - start_page * self._pagesize
4120 only_more = None if end is None else end - start
4121 for pagenum in range(start_page, end_page):
7be9ccff 4122 page_results = self.getpage(pagenum)
9c44d242 4123 if skip_elems:
7be9ccff 4124 page_results = page_results[skip_elems:]
9c44d242
PH
4125 skip_elems = None
4126 if only_more is not None:
7be9ccff 4127 if len(page_results) < only_more:
4128 only_more -= len(page_results)
9c44d242 4129 else:
7be9ccff 4130 yield from page_results[:only_more]
9c44d242 4131 break
7be9ccff 4132 yield from page_results
9c44d242
PH
4133
4134
81c2f20b 4135def uppercase_escape(s):
676eb3f2 4136 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 4137 return re.sub(
a612753d 4138 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
4139 lambda m: unicode_escape(m.group(0))[0],
4140 s)
0fe2ff78
YCH
4141
4142
4143def lowercase_escape(s):
4144 unicode_escape = codecs.getdecoder('unicode_escape')
4145 return re.sub(
4146 r'\\u[0-9a-fA-F]{4}',
4147 lambda m: unicode_escape(m.group(0))[0],
4148 s)
b53466e1 4149
d05cfe06
S
4150
4151def escape_rfc3986(s):
4152 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 4153 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 4154 s = s.encode('utf-8')
ecc0c5ee 4155 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
4156
4157
4158def escape_url(url):
4159 """Escape URL as suggested by RFC 3986"""
4160 url_parsed = compat_urllib_parse_urlparse(url)
4161 return url_parsed._replace(
efbed08d 4162 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
d05cfe06
S
4163 path=escape_rfc3986(url_parsed.path),
4164 params=escape_rfc3986(url_parsed.params),
4165 query=escape_rfc3986(url_parsed.query),
4166 fragment=escape_rfc3986(url_parsed.fragment)
4167 ).geturl()
4168
62e609ab 4169
4dfbf869 4170def parse_qs(url):
4171 return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
4172
4173
62e609ab
PH
4174def read_batch_urls(batch_fd):
4175 def fixup(url):
4176 if not isinstance(url, compat_str):
4177 url = url.decode('utf-8', 'replace')
8c04f0be 4178 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
4179 for bom in BOM_UTF8:
4180 if url.startswith(bom):
4181 url = url[len(bom):]
4182 url = url.lstrip()
4183 if not url or url.startswith(('#', ';', ']')):
62e609ab 4184 return False
8c04f0be 4185 # "#" cannot be stripped out since it is part of the URI
4186 # However, it can be safely stipped out if follwing a whitespace
4187 return re.split(r'\s#', url, 1)[0].rstrip()
62e609ab
PH
4188
4189 with contextlib.closing(batch_fd) as fd:
4190 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
4191
4192
4193def urlencode_postdata(*args, **kargs):
15707c7e 4194 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
4195
4196
38f9ef31 4197def update_url_query(url, query):
cacd9966
YCH
4198 if not query:
4199 return url
38f9ef31 4200 parsed_url = compat_urlparse.urlparse(url)
4201 qs = compat_parse_qs(parsed_url.query)
4202 qs.update(query)
4203 return compat_urlparse.urlunparse(parsed_url._replace(
15707c7e 4204 query=compat_urllib_parse_urlencode(qs, True)))
16392824 4205
8e60dc75 4206
ed0291d1
S
4207def update_Request(req, url=None, data=None, headers={}, query={}):
4208 req_headers = req.headers.copy()
4209 req_headers.update(headers)
4210 req_data = data or req.data
4211 req_url = update_url_query(url or req.get_full_url(), query)
95cf60e8
S
4212 req_get_method = req.get_method()
4213 if req_get_method == 'HEAD':
4214 req_type = HEADRequest
4215 elif req_get_method == 'PUT':
4216 req_type = PUTRequest
4217 else:
4218 req_type = compat_urllib_request.Request
ed0291d1
S
4219 new_req = req_type(
4220 req_url, data=req_data, headers=req_headers,
4221 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4222 if hasattr(req, 'timeout'):
4223 new_req.timeout = req.timeout
4224 return new_req
4225
4226
10c87c15 4227def _multipart_encode_impl(data, boundary):
0c265486
YCH
4228 content_type = 'multipart/form-data; boundary=%s' % boundary
4229
4230 out = b''
4231 for k, v in data.items():
4232 out += b'--' + boundary.encode('ascii') + b'\r\n'
4233 if isinstance(k, compat_str):
4234 k = k.encode('utf-8')
4235 if isinstance(v, compat_str):
4236 v = v.encode('utf-8')
4237 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4238 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
b2ad479d 4239 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
0c265486
YCH
4240 if boundary.encode('ascii') in content:
4241 raise ValueError('Boundary overlaps with data')
4242 out += content
4243
4244 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4245
4246 return out, content_type
4247
4248
4249def multipart_encode(data, boundary=None):
4250 '''
4251 Encode a dict to RFC 7578-compliant form-data
4252
4253 data:
4254 A dict where keys and values can be either Unicode or bytes-like
4255 objects.
4256 boundary:
4257 If specified a Unicode object, it's used as the boundary. Otherwise
4258 a random boundary is generated.
4259
4260 Reference: https://tools.ietf.org/html/rfc7578
4261 '''
4262 has_specified_boundary = boundary is not None
4263
4264 while True:
4265 if boundary is None:
4266 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4267
4268 try:
10c87c15 4269 out, content_type = _multipart_encode_impl(data, boundary)
0c265486
YCH
4270 break
4271 except ValueError:
4272 if has_specified_boundary:
4273 raise
4274 boundary = None
4275
4276 return out, content_type
4277
4278
86296ad2 4279def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
4280 if isinstance(key_or_keys, (list, tuple)):
4281 for key in key_or_keys:
86296ad2
S
4282 if key not in d or d[key] is None or skip_false_values and not d[key]:
4283 continue
4284 return d[key]
cbecc9b9
S
4285 return default
4286 return d.get(key_or_keys, default)
4287
4288
329ca3be 4289def try_get(src, getter, expected_type=None):
6606817a 4290 for get in variadic(getter):
a32a9a7e
S
4291 try:
4292 v = get(src)
4293 except (AttributeError, KeyError, TypeError, IndexError):
4294 pass
4295 else:
4296 if expected_type is None or isinstance(v, expected_type):
4297 return v
329ca3be
S
4298
4299
6cc62232
S
4300def merge_dicts(*dicts):
4301 merged = {}
4302 for a_dict in dicts:
4303 for k, v in a_dict.items():
4304 if v is None:
4305 continue
3089bc74
S
4306 if (k not in merged
4307 or (isinstance(v, compat_str) and v
4308 and isinstance(merged[k], compat_str)
4309 and not merged[k])):
6cc62232
S
4310 merged[k] = v
4311 return merged
4312
4313
8e60dc75
S
4314def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4315 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4316
16392824 4317
a1a530b0
PH
4318US_RATINGS = {
4319 'G': 0,
4320 'PG': 10,
4321 'PG-13': 13,
4322 'R': 16,
4323 'NC': 18,
4324}
fac55558
PH
4325
4326
a8795327 4327TV_PARENTAL_GUIDELINES = {
5a16c9d9
RA
4328 'TV-Y': 0,
4329 'TV-Y7': 7,
4330 'TV-G': 0,
4331 'TV-PG': 0,
4332 'TV-14': 14,
4333 'TV-MA': 17,
a8795327
S
4334}
4335
4336
146c80e2 4337def parse_age_limit(s):
a8795327
S
4338 if type(s) == int:
4339 return s if 0 <= s <= 21 else None
4340 if not isinstance(s, compat_basestring):
d838b1bd 4341 return None
146c80e2 4342 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327
S
4343 if m:
4344 return int(m.group('age'))
5c5fae6d 4345 s = s.upper()
a8795327
S
4346 if s in US_RATINGS:
4347 return US_RATINGS[s]
5a16c9d9 4348 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
b8361187 4349 if m:
5a16c9d9 4350 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
b8361187 4351 return None
146c80e2
S
4352
4353
fac55558 4354def strip_jsonp(code):
609a61e3 4355 return re.sub(
5552c9eb 4356 r'''(?sx)^
e9c671d5 4357 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
5552c9eb
YCH
4358 (?:\s*&&\s*(?P=func_name))?
4359 \s*\(\s*(?P<callback_data>.*)\);?
4360 \s*?(?://[^\n]*)*$''',
4361 r'\g<callback_data>', code)
478c2c61
PH
4362
4363
5c610515 4364def js_to_json(code, vars={}):
4365 # vars is a dict of var, val pairs to substitute
c843e685 4366 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
4195096e
S
4367 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4368 INTEGER_TABLE = (
4369 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4370 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4371 )
4372
e05f6939 4373 def fix_kv(m):
e7b6d122
PH
4374 v = m.group(0)
4375 if v in ('true', 'false', 'null'):
4376 return v
8bdd16b4 4377 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
bd1e4844 4378 return ""
4379
4380 if v[0] in ("'", '"'):
4381 v = re.sub(r'(?s)\\.|"', lambda m: {
e7b6d122 4382 '"': '\\"',
bd1e4844 4383 "\\'": "'",
4384 '\\\n': '',
4385 '\\x': '\\u00',
4386 }.get(m.group(0), m.group(0)), v[1:-1])
8bdd16b4 4387 else:
4388 for regex, base in INTEGER_TABLE:
4389 im = re.match(regex, v)
4390 if im:
4391 i = int(im.group(1), base)
4392 return '"%d":' % i if v.endswith(':') else '%d' % i
89ac4a19 4393
5c610515 4394 if v in vars:
4395 return vars[v]
4396
e7b6d122 4397 return '"%s"' % v
e05f6939 4398
bd1e4844 4399 return re.sub(r'''(?sx)
4400 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4401 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4195096e 4402 {comment}|,(?={skip}[\]}}])|
c384d537 4403 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4195096e 4404 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
8bdd16b4 4405 [0-9]+(?={skip}:)|
4406 !+
4195096e 4407 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
e05f6939
PH
4408
4409
478c2c61
PH
4410def qualities(quality_ids):
4411 """ Get a numeric quality value out of a list of possible values """
4412 def q(qid):
4413 try:
4414 return quality_ids.index(qid)
4415 except ValueError:
4416 return -1
4417 return q
4418
acd69589 4419
de6000d9 4420DEFAULT_OUTTMPL = {
4421 'default': '%(title)s [%(id)s].%(ext)s',
72755351 4422 'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
de6000d9 4423}
4424OUTTMPL_TYPES = {
72755351 4425 'chapter': None,
de6000d9 4426 'subtitle': None,
4427 'thumbnail': None,
4428 'description': 'description',
4429 'annotation': 'annotations.xml',
4430 'infojson': 'info.json',
5112f26a 4431 'pl_thumbnail': None,
de6000d9 4432 'pl_description': 'description',
4433 'pl_infojson': 'info.json',
4434}
0a871f68 4435
143db31d 4436# As of [1] format syntax is:
4437# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
4438# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
901130bb 4439STR_FORMAT_RE_TMPL = r'''(?x)
4440 (?<!%)(?P<prefix>(?:%%)*)
143db31d 4441 %
752cda38 4442 (?P<has_key>\((?P<key>{0})\))? # mapping key
4443 (?P<format>
4444 (?:[#0\-+ ]+)? # conversion flags (optional)
4445 (?:\d+)? # minimum field width (optional)
4446 (?:\.\d+)? # precision (optional)
4447 [hlL]? # length modifier (optional)
901130bb 4448 {1} # conversion type
752cda38 4449 )
143db31d 4450'''
4451
7d1eb38a 4452
901130bb 4453STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
a020a0dc 4454
7d1eb38a 4455
a020a0dc
PH
4456def limit_length(s, length):
4457 """ Add ellipses to overly long strings """
4458 if s is None:
4459 return None
4460 ELLIPSES = '...'
4461 if len(s) > length:
4462 return s[:length - len(ELLIPSES)] + ELLIPSES
4463 return s
48844745
PH
4464
4465
4466def version_tuple(v):
5f9b8394 4467 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
4468
4469
4470def is_outdated_version(version, limit, assume_new=True):
4471 if not version:
4472 return not assume_new
4473 try:
4474 return version_tuple(version) < version_tuple(limit)
4475 except ValueError:
4476 return not assume_new
732ea2f0
PH
4477
4478
4479def ytdl_is_updateable():
7a5c1cfe 4480 """ Returns if yt-dlp can be updated with -U """
735d865e 4481 return False
4482
732ea2f0
PH
4483 from zipimport import zipimporter
4484
4485 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
4486
4487
4488def args_to_str(args):
4489 # Get a short string representation for a subprocess command
702ccf2d 4490 return ' '.join(compat_shlex_quote(a) for a in args)
2ccd1b10
PH
4491
4492
9b9c5355 4493def error_to_compat_str(err):
fdae2358
S
4494 err_str = str(err)
4495 # On python 2 error byte string must be decoded with proper
4496 # encoding rather than ascii
4497 if sys.version_info[0] < 3:
4498 err_str = err_str.decode(preferredencoding())
4499 return err_str
4500
4501
c460bdd5 4502def mimetype2ext(mt):
eb9ee194
S
4503 if mt is None:
4504 return None
4505
765ac263
JMF
4506 ext = {
4507 'audio/mp4': 'm4a',
6c33d24b
YCH
4508 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4509 # it's the most popular one
4510 'audio/mpeg': 'mp3',
ba39289d 4511 'audio/x-wav': 'wav',
765ac263
JMF
4512 }.get(mt)
4513 if ext is not None:
4514 return ext
4515
c460bdd5 4516 _, _, res = mt.rpartition('/')
6562d34a 4517 res = res.split(';')[0].strip().lower()
c460bdd5
PH
4518
4519 return {
f6861ec9 4520 '3gpp': '3gp',
cafcf657 4521 'smptett+xml': 'tt',
cafcf657 4522 'ttaf+xml': 'dfxp',
a0d8d704 4523 'ttml+xml': 'ttml',
f6861ec9 4524 'x-flv': 'flv',
a0d8d704 4525 'x-mp4-fragmented': 'mp4',
d4f05d47 4526 'x-ms-sami': 'sami',
a0d8d704 4527 'x-ms-wmv': 'wmv',
b4173f15
RA
4528 'mpegurl': 'm3u8',
4529 'x-mpegurl': 'm3u8',
4530 'vnd.apple.mpegurl': 'm3u8',
4531 'dash+xml': 'mpd',
b4173f15 4532 'f4m+xml': 'f4m',
f164b971 4533 'hds+xml': 'f4m',
e910fe2f 4534 'vnd.ms-sstr+xml': 'ism',
c2b2c7e1 4535 'quicktime': 'mov',
98ce1a3f 4536 'mp2t': 'ts',
39e7107d 4537 'x-wav': 'wav',
c460bdd5
PH
4538 }.get(res, res)
4539
4540
4f3c5e06 4541def parse_codecs(codecs_str):
4542 # http://tools.ietf.org/html/rfc6381
4543 if not codecs_str:
4544 return {}
a0566bbf 4545 split_codecs = list(filter(None, map(
dbf5416a 4546 str.strip, codecs_str.strip().strip(',').split(','))))
4f3c5e06 4547 vcodec, acodec = None, None
a0566bbf 4548 for full_codec in split_codecs:
4f3c5e06 4549 codec = full_codec.split('.')[0]
28cc2241 4550 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4f3c5e06 4551 if not vcodec:
4552 vcodec = full_codec
60f5c9fb 4553 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4f3c5e06 4554 if not acodec:
4555 acodec = full_codec
4556 else:
60f5c9fb 4557 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4f3c5e06 4558 if not vcodec and not acodec:
a0566bbf 4559 if len(split_codecs) == 2:
4f3c5e06 4560 return {
a0566bbf 4561 'vcodec': split_codecs[0],
4562 'acodec': split_codecs[1],
4f3c5e06 4563 }
4564 else:
4565 return {
4566 'vcodec': vcodec or 'none',
4567 'acodec': acodec or 'none',
4568 }
4569 return {}
4570
4571
2ccd1b10 4572def urlhandle_detect_ext(url_handle):
79298173 4573 getheader = url_handle.headers.get
2ccd1b10 4574
b55ee18f
PH
4575 cd = getheader('Content-Disposition')
4576 if cd:
4577 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4578 if m:
4579 e = determine_ext(m.group('filename'), default_ext=None)
4580 if e:
4581 return e
4582
c460bdd5 4583 return mimetype2ext(getheader('Content-Type'))
05900629
PH
4584
4585
1e399778
YCH
4586def encode_data_uri(data, mime_type):
4587 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4588
4589
05900629 4590def age_restricted(content_limit, age_limit):
6ec6cb4e 4591 """ Returns True iff the content should be blocked """
05900629
PH
4592
4593 if age_limit is None: # No limit set
4594 return False
4595 if content_limit is None:
4596 return False # Content available for everyone
4597 return age_limit < content_limit
61ca9a80
PH
4598
4599
4600def is_html(first_bytes):
4601 """ Detect whether a file contains HTML by examining its first bytes. """
4602
4603 BOMS = [
4604 (b'\xef\xbb\xbf', 'utf-8'),
4605 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4606 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4607 (b'\xff\xfe', 'utf-16-le'),
4608 (b'\xfe\xff', 'utf-16-be'),
4609 ]
4610 for bom, enc in BOMS:
4611 if first_bytes.startswith(bom):
4612 s = first_bytes[len(bom):].decode(enc, 'replace')
4613 break
4614 else:
4615 s = first_bytes.decode('utf-8', 'replace')
4616
4617 return re.match(r'^\s*<', s)
a055469f
PH
4618
4619
4620def determine_protocol(info_dict):
4621 protocol = info_dict.get('protocol')
4622 if protocol is not None:
4623 return protocol
4624
4625 url = info_dict['url']
4626 if url.startswith('rtmp'):
4627 return 'rtmp'
4628 elif url.startswith('mms'):
4629 return 'mms'
4630 elif url.startswith('rtsp'):
4631 return 'rtsp'
4632
4633 ext = determine_ext(url)
4634 if ext == 'm3u8':
4635 return 'm3u8'
4636 elif ext == 'f4m':
4637 return 'f4m'
4638
4639 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
4640
4641
76d321f6 4642def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
cfb56d1a 4643 """ Render a list of rows, each as a list of values """
76d321f6 4644
4645 def get_max_lens(table):
4646 return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4647
4648 def filter_using_list(row, filterArray):
4649 return [col for (take, col) in zip(filterArray, row) if take]
4650
4651 if hideEmpty:
4652 max_lens = get_max_lens(data)
4653 header_row = filter_using_list(header_row, max_lens)
4654 data = [filter_using_list(row, max_lens) for row in data]
4655
cfb56d1a 4656 table = [header_row] + data
76d321f6 4657 max_lens = get_max_lens(table)
4658 if delim:
4659 table = [header_row] + [['-' * ml for ml in max_lens]] + data
4660 format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
cfb56d1a 4661 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
4662
4663
8f18aca8 4664def _match_one(filter_part, dct, incomplete):
77b87f05 4665 # TODO: Generalize code with YoutubeDL._build_format_filter
a047eeb6 4666 STRING_OPERATORS = {
4667 '*=': operator.contains,
4668 '^=': lambda attr, value: attr.startswith(value),
4669 '$=': lambda attr, value: attr.endswith(value),
4670 '~=': lambda attr, value: re.search(value, attr),
4671 }
347de493 4672 COMPARISON_OPERATORS = {
a047eeb6 4673 **STRING_OPERATORS,
4674 '<=': operator.le, # "<=" must be defined above "<"
347de493 4675 '<': operator.lt,
347de493 4676 '>=': operator.ge,
a047eeb6 4677 '>': operator.gt,
347de493 4678 '=': operator.eq,
347de493 4679 }
a047eeb6 4680
347de493
PH
4681 operator_rex = re.compile(r'''(?x)\s*
4682 (?P<key>[a-z_]+)
77b87f05 4683 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
347de493
PH
4684 (?:
4685 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
a047eeb6 4686 (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
4687 (?P<strval>.+?)
347de493
PH
4688 )
4689 \s*$
4690 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4691 m = operator_rex.search(filter_part)
4692 if m:
77b87f05
MT
4693 unnegated_op = COMPARISON_OPERATORS[m.group('op')]
4694 if m.group('negation'):
4695 op = lambda attr, value: not unnegated_op(attr, value)
4696 else:
4697 op = unnegated_op
e5a088dc 4698 actual_value = dct.get(m.group('key'))
3089bc74
S
4699 if (m.group('quotedstrval') is not None
4700 or m.group('strval') is not None
e5a088dc
S
4701 # If the original field is a string and matching comparisonvalue is
4702 # a number we should respect the origin of the original field
4703 # and process comparison value as a string (see
067aa17e 4704 # https://github.com/ytdl-org/youtube-dl/issues/11082).
3089bc74
S
4705 or actual_value is not None and m.group('intval') is not None
4706 and isinstance(actual_value, compat_str)):
db13c16e
S
4707 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4708 quote = m.group('quote')
4709 if quote is not None:
4710 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
347de493 4711 else:
a047eeb6 4712 if m.group('op') in STRING_OPERATORS:
4713 raise ValueError('Operator %s only supports string values!' % m.group('op'))
347de493
PH
4714 try:
4715 comparison_value = int(m.group('intval'))
4716 except ValueError:
4717 comparison_value = parse_filesize(m.group('intval'))
4718 if comparison_value is None:
4719 comparison_value = parse_filesize(m.group('intval') + 'B')
4720 if comparison_value is None:
4721 raise ValueError(
4722 'Invalid integer value %r in filter part %r' % (
4723 m.group('intval'), filter_part))
347de493 4724 if actual_value is None:
8f18aca8 4725 return incomplete or m.group('none_inclusive')
347de493
PH
4726 return op(actual_value, comparison_value)
4727
4728 UNARY_OPERATORS = {
1cc47c66
S
4729 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4730 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
347de493
PH
4731 }
4732 operator_rex = re.compile(r'''(?x)\s*
4733 (?P<op>%s)\s*(?P<key>[a-z_]+)
4734 \s*$
4735 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4736 m = operator_rex.search(filter_part)
4737 if m:
4738 op = UNARY_OPERATORS[m.group('op')]
4739 actual_value = dct.get(m.group('key'))
8f18aca8 4740 if incomplete and actual_value is None:
4741 return True
347de493
PH
4742 return op(actual_value)
4743
4744 raise ValueError('Invalid filter part %r' % filter_part)
4745
4746
8f18aca8 4747def match_str(filter_str, dct, incomplete=False):
4748 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
4749 When incomplete, all conditions passes on missing fields
4750 """
347de493 4751 return all(
8f18aca8 4752 _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
a047eeb6 4753 for filter_part in re.split(r'(?<!\\)&', filter_str))
347de493
PH
4754
4755
4756def match_filter_func(filter_str):
8f18aca8 4757 def _match_func(info_dict, *args, **kwargs):
4758 if match_str(filter_str, info_dict, *args, **kwargs):
347de493
PH
4759 return None
4760 else:
4761 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4762 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4763 return _match_func
91410c9b
PH
4764
4765
bf6427d2
YCH
4766def parse_dfxp_time_expr(time_expr):
4767 if not time_expr:
d631d5f9 4768 return
bf6427d2
YCH
4769
4770 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4771 if mobj:
4772 return float(mobj.group('time_offset'))
4773
db2fe38b 4774 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 4775 if mobj:
db2fe38b 4776 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
4777
4778
c1c924ab
YCH
4779def srt_subtitles_timecode(seconds):
4780 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
4781
4782
4783def dfxp2srt(dfxp_data):
3869028f
YCH
4784 '''
4785 @param dfxp_data A bytes-like object containing DFXP data
4786 @returns A unicode object containing converted SRT data
4787 '''
5b995f71 4788 LEGACY_NAMESPACES = (
3869028f
YCH
4789 (b'http://www.w3.org/ns/ttml', [
4790 b'http://www.w3.org/2004/11/ttaf1',
4791 b'http://www.w3.org/2006/04/ttaf1',
4792 b'http://www.w3.org/2006/10/ttaf1',
5b995f71 4793 ]),
3869028f
YCH
4794 (b'http://www.w3.org/ns/ttml#styling', [
4795 b'http://www.w3.org/ns/ttml#style',
5b995f71
RA
4796 ]),
4797 )
4798
4799 SUPPORTED_STYLING = [
4800 'color',
4801 'fontFamily',
4802 'fontSize',
4803 'fontStyle',
4804 'fontWeight',
4805 'textDecoration'
4806 ]
4807
4e335771 4808 _x = functools.partial(xpath_with_ns, ns_map={
261f4730 4809 'xml': 'http://www.w3.org/XML/1998/namespace',
4e335771 4810 'ttml': 'http://www.w3.org/ns/ttml',
5b995f71 4811 'tts': 'http://www.w3.org/ns/ttml#styling',
4e335771 4812 })
bf6427d2 4813
5b995f71
RA
4814 styles = {}
4815 default_style = {}
4816
87de7069 4817 class TTMLPElementParser(object):
5b995f71
RA
4818 _out = ''
4819 _unclosed_elements = []
4820 _applied_styles = []
bf6427d2 4821
2b14cb56 4822 def start(self, tag, attrib):
5b995f71
RA
4823 if tag in (_x('ttml:br'), 'br'):
4824 self._out += '\n'
4825 else:
4826 unclosed_elements = []
4827 style = {}
4828 element_style_id = attrib.get('style')
4829 if default_style:
4830 style.update(default_style)
4831 if element_style_id:
4832 style.update(styles.get(element_style_id, {}))
4833 for prop in SUPPORTED_STYLING:
4834 prop_val = attrib.get(_x('tts:' + prop))
4835 if prop_val:
4836 style[prop] = prop_val
4837 if style:
4838 font = ''
4839 for k, v in sorted(style.items()):
4840 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4841 continue
4842 if k == 'color':
4843 font += ' color="%s"' % v
4844 elif k == 'fontSize':
4845 font += ' size="%s"' % v
4846 elif k == 'fontFamily':
4847 font += ' face="%s"' % v
4848 elif k == 'fontWeight' and v == 'bold':
4849 self._out += '<b>'
4850 unclosed_elements.append('b')
4851 elif k == 'fontStyle' and v == 'italic':
4852 self._out += '<i>'
4853 unclosed_elements.append('i')
4854 elif k == 'textDecoration' and v == 'underline':
4855 self._out += '<u>'
4856 unclosed_elements.append('u')
4857 if font:
4858 self._out += '<font' + font + '>'
4859 unclosed_elements.append('font')
4860 applied_style = {}
4861 if self._applied_styles:
4862 applied_style.update(self._applied_styles[-1])
4863 applied_style.update(style)
4864 self._applied_styles.append(applied_style)
4865 self._unclosed_elements.append(unclosed_elements)
bf6427d2 4866
2b14cb56 4867 def end(self, tag):
5b995f71
RA
4868 if tag not in (_x('ttml:br'), 'br'):
4869 unclosed_elements = self._unclosed_elements.pop()
4870 for element in reversed(unclosed_elements):
4871 self._out += '</%s>' % element
4872 if unclosed_elements and self._applied_styles:
4873 self._applied_styles.pop()
bf6427d2 4874
2b14cb56 4875 def data(self, data):
5b995f71 4876 self._out += data
2b14cb56 4877
4878 def close(self):
5b995f71 4879 return self._out.strip()
2b14cb56 4880
4881 def parse_node(node):
4882 target = TTMLPElementParser()
4883 parser = xml.etree.ElementTree.XMLParser(target=target)
4884 parser.feed(xml.etree.ElementTree.tostring(node))
4885 return parser.close()
bf6427d2 4886
5b995f71
RA
4887 for k, v in LEGACY_NAMESPACES:
4888 for ns in v:
4889 dfxp_data = dfxp_data.replace(ns, k)
4890
3869028f 4891 dfxp = compat_etree_fromstring(dfxp_data)
bf6427d2 4892 out = []
5b995f71 4893 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
1b0427e6
YCH
4894
4895 if not paras:
4896 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2 4897
5b995f71
RA
4898 repeat = False
4899 while True:
4900 for style in dfxp.findall(_x('.//ttml:style')):
261f4730
RA
4901 style_id = style.get('id') or style.get(_x('xml:id'))
4902 if not style_id:
4903 continue
5b995f71
RA
4904 parent_style_id = style.get('style')
4905 if parent_style_id:
4906 if parent_style_id not in styles:
4907 repeat = True
4908 continue
4909 styles[style_id] = styles[parent_style_id].copy()
4910 for prop in SUPPORTED_STYLING:
4911 prop_val = style.get(_x('tts:' + prop))
4912 if prop_val:
4913 styles.setdefault(style_id, {})[prop] = prop_val
4914 if repeat:
4915 repeat = False
4916 else:
4917 break
4918
4919 for p in ('body', 'div'):
4920 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4921 if ele is None:
4922 continue
4923 style = styles.get(ele.get('style'))
4924 if not style:
4925 continue
4926 default_style.update(style)
4927
bf6427d2 4928 for para, index in zip(paras, itertools.count(1)):
d631d5f9 4929 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 4930 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
4931 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4932 if begin_time is None:
4933 continue
7dff0363 4934 if not end_time:
d631d5f9
YCH
4935 if not dur:
4936 continue
4937 end_time = begin_time + dur
bf6427d2
YCH
4938 out.append('%d\n%s --> %s\n%s\n\n' % (
4939 index,
c1c924ab
YCH
4940 srt_subtitles_timecode(begin_time),
4941 srt_subtitles_timecode(end_time),
bf6427d2
YCH
4942 parse_node(para)))
4943
4944 return ''.join(out)
4945
4946
66e289ba
S
4947def cli_option(params, command_option, param):
4948 param = params.get(param)
98e698f1
RA
4949 if param:
4950 param = compat_str(param)
66e289ba
S
4951 return [command_option, param] if param is not None else []
4952
4953
4954def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4955 param = params.get(param)
5b232f46
S
4956 if param is None:
4957 return []
66e289ba
S
4958 assert isinstance(param, bool)
4959 if separator:
4960 return [command_option + separator + (true_value if param else false_value)]
4961 return [command_option, true_value if param else false_value]
4962
4963
4964def cli_valueless_option(params, command_option, param, expected_value=True):
4965 param = params.get(param)
4966 return [command_option] if param == expected_value else []
4967
4968
e92caff5 4969def cli_configuration_args(argdict, keys, default=[], use_compat=True):
eab9b2bc 4970 if isinstance(argdict, (list, tuple)): # for backward compatibility
e92caff5 4971 if use_compat:
5b1ecbb3 4972 return argdict
4973 else:
4974 argdict = None
eab9b2bc 4975 if argdict is None:
5b1ecbb3 4976 return default
eab9b2bc 4977 assert isinstance(argdict, dict)
4978
e92caff5 4979 assert isinstance(keys, (list, tuple))
4980 for key_list in keys:
e92caff5 4981 arg_list = list(filter(
4982 lambda x: x is not None,
6606817a 4983 [argdict.get(key.lower()) for key in variadic(key_list)]))
e92caff5 4984 if arg_list:
4985 return [arg for args in arg_list for arg in args]
4986 return default
66e289ba
S
4987
4988
39672624
YCH
4989class ISO639Utils(object):
4990 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4991 _lang_map = {
4992 'aa': 'aar',
4993 'ab': 'abk',
4994 'ae': 'ave',
4995 'af': 'afr',
4996 'ak': 'aka',
4997 'am': 'amh',
4998 'an': 'arg',
4999 'ar': 'ara',
5000 'as': 'asm',
5001 'av': 'ava',
5002 'ay': 'aym',
5003 'az': 'aze',
5004 'ba': 'bak',
5005 'be': 'bel',
5006 'bg': 'bul',
5007 'bh': 'bih',
5008 'bi': 'bis',
5009 'bm': 'bam',
5010 'bn': 'ben',
5011 'bo': 'bod',
5012 'br': 'bre',
5013 'bs': 'bos',
5014 'ca': 'cat',
5015 'ce': 'che',
5016 'ch': 'cha',
5017 'co': 'cos',
5018 'cr': 'cre',
5019 'cs': 'ces',
5020 'cu': 'chu',
5021 'cv': 'chv',
5022 'cy': 'cym',
5023 'da': 'dan',
5024 'de': 'deu',
5025 'dv': 'div',
5026 'dz': 'dzo',
5027 'ee': 'ewe',
5028 'el': 'ell',
5029 'en': 'eng',
5030 'eo': 'epo',
5031 'es': 'spa',
5032 'et': 'est',
5033 'eu': 'eus',
5034 'fa': 'fas',
5035 'ff': 'ful',
5036 'fi': 'fin',
5037 'fj': 'fij',
5038 'fo': 'fao',
5039 'fr': 'fra',
5040 'fy': 'fry',
5041 'ga': 'gle',
5042 'gd': 'gla',
5043 'gl': 'glg',
5044 'gn': 'grn',
5045 'gu': 'guj',
5046 'gv': 'glv',
5047 'ha': 'hau',
5048 'he': 'heb',
b7acc835 5049 'iw': 'heb', # Replaced by he in 1989 revision
39672624
YCH
5050 'hi': 'hin',
5051 'ho': 'hmo',
5052 'hr': 'hrv',
5053 'ht': 'hat',
5054 'hu': 'hun',
5055 'hy': 'hye',
5056 'hz': 'her',
5057 'ia': 'ina',
5058 'id': 'ind',
b7acc835 5059 'in': 'ind', # Replaced by id in 1989 revision
39672624
YCH
5060 'ie': 'ile',
5061 'ig': 'ibo',
5062 'ii': 'iii',
5063 'ik': 'ipk',
5064 'io': 'ido',
5065 'is': 'isl',
5066 'it': 'ita',
5067 'iu': 'iku',
5068 'ja': 'jpn',
5069 'jv': 'jav',
5070 'ka': 'kat',
5071 'kg': 'kon',
5072 'ki': 'kik',
5073 'kj': 'kua',
5074 'kk': 'kaz',
5075 'kl': 'kal',
5076 'km': 'khm',
5077 'kn': 'kan',
5078 'ko': 'kor',
5079 'kr': 'kau',
5080 'ks': 'kas',
5081 'ku': 'kur',
5082 'kv': 'kom',
5083 'kw': 'cor',
5084 'ky': 'kir',
5085 'la': 'lat',
5086 'lb': 'ltz',
5087 'lg': 'lug',
5088 'li': 'lim',
5089 'ln': 'lin',
5090 'lo': 'lao',
5091 'lt': 'lit',
5092 'lu': 'lub',
5093 'lv': 'lav',
5094 'mg': 'mlg',
5095 'mh': 'mah',
5096 'mi': 'mri',
5097 'mk': 'mkd',
5098 'ml': 'mal',
5099 'mn': 'mon',
5100 'mr': 'mar',
5101 'ms': 'msa',
5102 'mt': 'mlt',
5103 'my': 'mya',
5104 'na': 'nau',
5105 'nb': 'nob',
5106 'nd': 'nde',
5107 'ne': 'nep',
5108 'ng': 'ndo',
5109 'nl': 'nld',
5110 'nn': 'nno',
5111 'no': 'nor',
5112 'nr': 'nbl',
5113 'nv': 'nav',
5114 'ny': 'nya',
5115 'oc': 'oci',
5116 'oj': 'oji',
5117 'om': 'orm',
5118 'or': 'ori',
5119 'os': 'oss',
5120 'pa': 'pan',
5121 'pi': 'pli',
5122 'pl': 'pol',
5123 'ps': 'pus',
5124 'pt': 'por',
5125 'qu': 'que',
5126 'rm': 'roh',
5127 'rn': 'run',
5128 'ro': 'ron',
5129 'ru': 'rus',
5130 'rw': 'kin',
5131 'sa': 'san',
5132 'sc': 'srd',
5133 'sd': 'snd',
5134 'se': 'sme',
5135 'sg': 'sag',
5136 'si': 'sin',
5137 'sk': 'slk',
5138 'sl': 'slv',
5139 'sm': 'smo',
5140 'sn': 'sna',
5141 'so': 'som',
5142 'sq': 'sqi',
5143 'sr': 'srp',
5144 'ss': 'ssw',
5145 'st': 'sot',
5146 'su': 'sun',
5147 'sv': 'swe',
5148 'sw': 'swa',
5149 'ta': 'tam',
5150 'te': 'tel',
5151 'tg': 'tgk',
5152 'th': 'tha',
5153 'ti': 'tir',
5154 'tk': 'tuk',
5155 'tl': 'tgl',
5156 'tn': 'tsn',
5157 'to': 'ton',
5158 'tr': 'tur',
5159 'ts': 'tso',
5160 'tt': 'tat',
5161 'tw': 'twi',
5162 'ty': 'tah',
5163 'ug': 'uig',
5164 'uk': 'ukr',
5165 'ur': 'urd',
5166 'uz': 'uzb',
5167 've': 'ven',
5168 'vi': 'vie',
5169 'vo': 'vol',
5170 'wa': 'wln',
5171 'wo': 'wol',
5172 'xh': 'xho',
5173 'yi': 'yid',
e9a50fba 5174 'ji': 'yid', # Replaced by yi in 1989 revision
39672624
YCH
5175 'yo': 'yor',
5176 'za': 'zha',
5177 'zh': 'zho',
5178 'zu': 'zul',
5179 }
5180
5181 @classmethod
5182 def short2long(cls, code):
5183 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5184 return cls._lang_map.get(code[:2])
5185
5186 @classmethod
5187 def long2short(cls, code):
5188 """Convert language code from ISO 639-2/T to ISO 639-1"""
5189 for short_name, long_name in cls._lang_map.items():
5190 if long_name == code:
5191 return short_name
5192
5193
4eb10f66
YCH
5194class ISO3166Utils(object):
5195 # From http://data.okfn.org/data/core/country-list
5196 _country_map = {
5197 'AF': 'Afghanistan',
5198 'AX': 'Åland Islands',
5199 'AL': 'Albania',
5200 'DZ': 'Algeria',
5201 'AS': 'American Samoa',
5202 'AD': 'Andorra',
5203 'AO': 'Angola',
5204 'AI': 'Anguilla',
5205 'AQ': 'Antarctica',
5206 'AG': 'Antigua and Barbuda',
5207 'AR': 'Argentina',
5208 'AM': 'Armenia',
5209 'AW': 'Aruba',
5210 'AU': 'Australia',
5211 'AT': 'Austria',
5212 'AZ': 'Azerbaijan',
5213 'BS': 'Bahamas',
5214 'BH': 'Bahrain',
5215 'BD': 'Bangladesh',
5216 'BB': 'Barbados',
5217 'BY': 'Belarus',
5218 'BE': 'Belgium',
5219 'BZ': 'Belize',
5220 'BJ': 'Benin',
5221 'BM': 'Bermuda',
5222 'BT': 'Bhutan',
5223 'BO': 'Bolivia, Plurinational State of',
5224 'BQ': 'Bonaire, Sint Eustatius and Saba',
5225 'BA': 'Bosnia and Herzegovina',
5226 'BW': 'Botswana',
5227 'BV': 'Bouvet Island',
5228 'BR': 'Brazil',
5229 'IO': 'British Indian Ocean Territory',
5230 'BN': 'Brunei Darussalam',
5231 'BG': 'Bulgaria',
5232 'BF': 'Burkina Faso',
5233 'BI': 'Burundi',
5234 'KH': 'Cambodia',
5235 'CM': 'Cameroon',
5236 'CA': 'Canada',
5237 'CV': 'Cape Verde',
5238 'KY': 'Cayman Islands',
5239 'CF': 'Central African Republic',
5240 'TD': 'Chad',
5241 'CL': 'Chile',
5242 'CN': 'China',
5243 'CX': 'Christmas Island',
5244 'CC': 'Cocos (Keeling) Islands',
5245 'CO': 'Colombia',
5246 'KM': 'Comoros',
5247 'CG': 'Congo',
5248 'CD': 'Congo, the Democratic Republic of the',
5249 'CK': 'Cook Islands',
5250 'CR': 'Costa Rica',
5251 'CI': 'Côte d\'Ivoire',
5252 'HR': 'Croatia',
5253 'CU': 'Cuba',
5254 'CW': 'Curaçao',
5255 'CY': 'Cyprus',
5256 'CZ': 'Czech Republic',
5257 'DK': 'Denmark',
5258 'DJ': 'Djibouti',
5259 'DM': 'Dominica',
5260 'DO': 'Dominican Republic',
5261 'EC': 'Ecuador',
5262 'EG': 'Egypt',
5263 'SV': 'El Salvador',
5264 'GQ': 'Equatorial Guinea',
5265 'ER': 'Eritrea',
5266 'EE': 'Estonia',
5267 'ET': 'Ethiopia',
5268 'FK': 'Falkland Islands (Malvinas)',
5269 'FO': 'Faroe Islands',
5270 'FJ': 'Fiji',
5271 'FI': 'Finland',
5272 'FR': 'France',
5273 'GF': 'French Guiana',
5274 'PF': 'French Polynesia',
5275 'TF': 'French Southern Territories',
5276 'GA': 'Gabon',
5277 'GM': 'Gambia',
5278 'GE': 'Georgia',
5279 'DE': 'Germany',
5280 'GH': 'Ghana',
5281 'GI': 'Gibraltar',
5282 'GR': 'Greece',
5283 'GL': 'Greenland',
5284 'GD': 'Grenada',
5285 'GP': 'Guadeloupe',
5286 'GU': 'Guam',
5287 'GT': 'Guatemala',
5288 'GG': 'Guernsey',
5289 'GN': 'Guinea',
5290 'GW': 'Guinea-Bissau',
5291 'GY': 'Guyana',
5292 'HT': 'Haiti',
5293 'HM': 'Heard Island and McDonald Islands',
5294 'VA': 'Holy See (Vatican City State)',
5295 'HN': 'Honduras',
5296 'HK': 'Hong Kong',
5297 'HU': 'Hungary',
5298 'IS': 'Iceland',
5299 'IN': 'India',
5300 'ID': 'Indonesia',
5301 'IR': 'Iran, Islamic Republic of',
5302 'IQ': 'Iraq',
5303 'IE': 'Ireland',
5304 'IM': 'Isle of Man',
5305 'IL': 'Israel',
5306 'IT': 'Italy',
5307 'JM': 'Jamaica',
5308 'JP': 'Japan',
5309 'JE': 'Jersey',
5310 'JO': 'Jordan',
5311 'KZ': 'Kazakhstan',
5312 'KE': 'Kenya',
5313 'KI': 'Kiribati',
5314 'KP': 'Korea, Democratic People\'s Republic of',
5315 'KR': 'Korea, Republic of',
5316 'KW': 'Kuwait',
5317 'KG': 'Kyrgyzstan',
5318 'LA': 'Lao People\'s Democratic Republic',
5319 'LV': 'Latvia',
5320 'LB': 'Lebanon',
5321 'LS': 'Lesotho',
5322 'LR': 'Liberia',
5323 'LY': 'Libya',
5324 'LI': 'Liechtenstein',
5325 'LT': 'Lithuania',
5326 'LU': 'Luxembourg',
5327 'MO': 'Macao',
5328 'MK': 'Macedonia, the Former Yugoslav Republic of',
5329 'MG': 'Madagascar',
5330 'MW': 'Malawi',
5331 'MY': 'Malaysia',
5332 'MV': 'Maldives',
5333 'ML': 'Mali',
5334 'MT': 'Malta',
5335 'MH': 'Marshall Islands',
5336 'MQ': 'Martinique',
5337 'MR': 'Mauritania',
5338 'MU': 'Mauritius',
5339 'YT': 'Mayotte',
5340 'MX': 'Mexico',
5341 'FM': 'Micronesia, Federated States of',
5342 'MD': 'Moldova, Republic of',
5343 'MC': 'Monaco',
5344 'MN': 'Mongolia',
5345 'ME': 'Montenegro',
5346 'MS': 'Montserrat',
5347 'MA': 'Morocco',
5348 'MZ': 'Mozambique',
5349 'MM': 'Myanmar',
5350 'NA': 'Namibia',
5351 'NR': 'Nauru',
5352 'NP': 'Nepal',
5353 'NL': 'Netherlands',
5354 'NC': 'New Caledonia',
5355 'NZ': 'New Zealand',
5356 'NI': 'Nicaragua',
5357 'NE': 'Niger',
5358 'NG': 'Nigeria',
5359 'NU': 'Niue',
5360 'NF': 'Norfolk Island',
5361 'MP': 'Northern Mariana Islands',
5362 'NO': 'Norway',
5363 'OM': 'Oman',
5364 'PK': 'Pakistan',
5365 'PW': 'Palau',
5366 'PS': 'Palestine, State of',
5367 'PA': 'Panama',
5368 'PG': 'Papua New Guinea',
5369 'PY': 'Paraguay',
5370 'PE': 'Peru',
5371 'PH': 'Philippines',
5372 'PN': 'Pitcairn',
5373 'PL': 'Poland',
5374 'PT': 'Portugal',
5375 'PR': 'Puerto Rico',
5376 'QA': 'Qatar',
5377 'RE': 'Réunion',
5378 'RO': 'Romania',
5379 'RU': 'Russian Federation',
5380 'RW': 'Rwanda',
5381 'BL': 'Saint Barthélemy',
5382 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5383 'KN': 'Saint Kitts and Nevis',
5384 'LC': 'Saint Lucia',
5385 'MF': 'Saint Martin (French part)',
5386 'PM': 'Saint Pierre and Miquelon',
5387 'VC': 'Saint Vincent and the Grenadines',
5388 'WS': 'Samoa',
5389 'SM': 'San Marino',
5390 'ST': 'Sao Tome and Principe',
5391 'SA': 'Saudi Arabia',
5392 'SN': 'Senegal',
5393 'RS': 'Serbia',
5394 'SC': 'Seychelles',
5395 'SL': 'Sierra Leone',
5396 'SG': 'Singapore',
5397 'SX': 'Sint Maarten (Dutch part)',
5398 'SK': 'Slovakia',
5399 'SI': 'Slovenia',
5400 'SB': 'Solomon Islands',
5401 'SO': 'Somalia',
5402 'ZA': 'South Africa',
5403 'GS': 'South Georgia and the South Sandwich Islands',
5404 'SS': 'South Sudan',
5405 'ES': 'Spain',
5406 'LK': 'Sri Lanka',
5407 'SD': 'Sudan',
5408 'SR': 'Suriname',
5409 'SJ': 'Svalbard and Jan Mayen',
5410 'SZ': 'Swaziland',
5411 'SE': 'Sweden',
5412 'CH': 'Switzerland',
5413 'SY': 'Syrian Arab Republic',
5414 'TW': 'Taiwan, Province of China',
5415 'TJ': 'Tajikistan',
5416 'TZ': 'Tanzania, United Republic of',
5417 'TH': 'Thailand',
5418 'TL': 'Timor-Leste',
5419 'TG': 'Togo',
5420 'TK': 'Tokelau',
5421 'TO': 'Tonga',
5422 'TT': 'Trinidad and Tobago',
5423 'TN': 'Tunisia',
5424 'TR': 'Turkey',
5425 'TM': 'Turkmenistan',
5426 'TC': 'Turks and Caicos Islands',
5427 'TV': 'Tuvalu',
5428 'UG': 'Uganda',
5429 'UA': 'Ukraine',
5430 'AE': 'United Arab Emirates',
5431 'GB': 'United Kingdom',
5432 'US': 'United States',
5433 'UM': 'United States Minor Outlying Islands',
5434 'UY': 'Uruguay',
5435 'UZ': 'Uzbekistan',
5436 'VU': 'Vanuatu',
5437 'VE': 'Venezuela, Bolivarian Republic of',
5438 'VN': 'Viet Nam',
5439 'VG': 'Virgin Islands, British',
5440 'VI': 'Virgin Islands, U.S.',
5441 'WF': 'Wallis and Futuna',
5442 'EH': 'Western Sahara',
5443 'YE': 'Yemen',
5444 'ZM': 'Zambia',
5445 'ZW': 'Zimbabwe',
5446 }
5447
5448 @classmethod
5449 def short2full(cls, code):
5450 """Convert an ISO 3166-2 country code to the corresponding full name"""
5451 return cls._country_map.get(code.upper())
5452
5453
773f291d
S
5454class GeoUtils(object):
5455 # Major IPv4 address blocks per country
5456 _country_ip_map = {
53896ca5 5457 'AD': '46.172.224.0/19',
773f291d
S
5458 'AE': '94.200.0.0/13',
5459 'AF': '149.54.0.0/17',
5460 'AG': '209.59.64.0/18',
5461 'AI': '204.14.248.0/21',
5462 'AL': '46.99.0.0/16',
5463 'AM': '46.70.0.0/15',
5464 'AO': '105.168.0.0/13',
53896ca5
S
5465 'AP': '182.50.184.0/21',
5466 'AQ': '23.154.160.0/24',
773f291d
S
5467 'AR': '181.0.0.0/12',
5468 'AS': '202.70.112.0/20',
53896ca5 5469 'AT': '77.116.0.0/14',
773f291d
S
5470 'AU': '1.128.0.0/11',
5471 'AW': '181.41.0.0/18',
53896ca5
S
5472 'AX': '185.217.4.0/22',
5473 'AZ': '5.197.0.0/16',
773f291d
S
5474 'BA': '31.176.128.0/17',
5475 'BB': '65.48.128.0/17',
5476 'BD': '114.130.0.0/16',
5477 'BE': '57.0.0.0/8',
53896ca5 5478 'BF': '102.178.0.0/15',
773f291d
S
5479 'BG': '95.42.0.0/15',
5480 'BH': '37.131.0.0/17',
5481 'BI': '154.117.192.0/18',
5482 'BJ': '137.255.0.0/16',
53896ca5 5483 'BL': '185.212.72.0/23',
773f291d
S
5484 'BM': '196.12.64.0/18',
5485 'BN': '156.31.0.0/16',
5486 'BO': '161.56.0.0/16',
5487 'BQ': '161.0.80.0/20',
53896ca5 5488 'BR': '191.128.0.0/12',
773f291d
S
5489 'BS': '24.51.64.0/18',
5490 'BT': '119.2.96.0/19',
5491 'BW': '168.167.0.0/16',
5492 'BY': '178.120.0.0/13',
5493 'BZ': '179.42.192.0/18',
5494 'CA': '99.224.0.0/11',
5495 'CD': '41.243.0.0/16',
53896ca5
S
5496 'CF': '197.242.176.0/21',
5497 'CG': '160.113.0.0/16',
773f291d 5498 'CH': '85.0.0.0/13',
53896ca5 5499 'CI': '102.136.0.0/14',
773f291d
S
5500 'CK': '202.65.32.0/19',
5501 'CL': '152.172.0.0/14',
53896ca5 5502 'CM': '102.244.0.0/14',
773f291d
S
5503 'CN': '36.128.0.0/10',
5504 'CO': '181.240.0.0/12',
5505 'CR': '201.192.0.0/12',
5506 'CU': '152.206.0.0/15',
5507 'CV': '165.90.96.0/19',
5508 'CW': '190.88.128.0/17',
53896ca5 5509 'CY': '31.153.0.0/16',
773f291d
S
5510 'CZ': '88.100.0.0/14',
5511 'DE': '53.0.0.0/8',
5512 'DJ': '197.241.0.0/17',
5513 'DK': '87.48.0.0/12',
5514 'DM': '192.243.48.0/20',
5515 'DO': '152.166.0.0/15',
5516 'DZ': '41.96.0.0/12',
5517 'EC': '186.68.0.0/15',
5518 'EE': '90.190.0.0/15',
5519 'EG': '156.160.0.0/11',
5520 'ER': '196.200.96.0/20',
5521 'ES': '88.0.0.0/11',
5522 'ET': '196.188.0.0/14',
5523 'EU': '2.16.0.0/13',
5524 'FI': '91.152.0.0/13',
5525 'FJ': '144.120.0.0/16',
53896ca5 5526 'FK': '80.73.208.0/21',
773f291d
S
5527 'FM': '119.252.112.0/20',
5528 'FO': '88.85.32.0/19',
5529 'FR': '90.0.0.0/9',
5530 'GA': '41.158.0.0/15',
5531 'GB': '25.0.0.0/8',
5532 'GD': '74.122.88.0/21',
5533 'GE': '31.146.0.0/16',
5534 'GF': '161.22.64.0/18',
5535 'GG': '62.68.160.0/19',
53896ca5
S
5536 'GH': '154.160.0.0/12',
5537 'GI': '95.164.0.0/16',
773f291d
S
5538 'GL': '88.83.0.0/19',
5539 'GM': '160.182.0.0/15',
5540 'GN': '197.149.192.0/18',
5541 'GP': '104.250.0.0/19',
5542 'GQ': '105.235.224.0/20',
5543 'GR': '94.64.0.0/13',
5544 'GT': '168.234.0.0/16',
5545 'GU': '168.123.0.0/16',
5546 'GW': '197.214.80.0/20',
5547 'GY': '181.41.64.0/18',
5548 'HK': '113.252.0.0/14',
5549 'HN': '181.210.0.0/16',
5550 'HR': '93.136.0.0/13',
5551 'HT': '148.102.128.0/17',
5552 'HU': '84.0.0.0/14',
5553 'ID': '39.192.0.0/10',
5554 'IE': '87.32.0.0/12',
5555 'IL': '79.176.0.0/13',
5556 'IM': '5.62.80.0/20',
5557 'IN': '117.192.0.0/10',
5558 'IO': '203.83.48.0/21',
5559 'IQ': '37.236.0.0/14',
5560 'IR': '2.176.0.0/12',
5561 'IS': '82.221.0.0/16',
5562 'IT': '79.0.0.0/10',
5563 'JE': '87.244.64.0/18',
5564 'JM': '72.27.0.0/17',
5565 'JO': '176.29.0.0/16',
53896ca5 5566 'JP': '133.0.0.0/8',
773f291d
S
5567 'KE': '105.48.0.0/12',
5568 'KG': '158.181.128.0/17',
5569 'KH': '36.37.128.0/17',
5570 'KI': '103.25.140.0/22',
5571 'KM': '197.255.224.0/20',
53896ca5 5572 'KN': '198.167.192.0/19',
773f291d
S
5573 'KP': '175.45.176.0/22',
5574 'KR': '175.192.0.0/10',
5575 'KW': '37.36.0.0/14',
5576 'KY': '64.96.0.0/15',
5577 'KZ': '2.72.0.0/13',
5578 'LA': '115.84.64.0/18',
5579 'LB': '178.135.0.0/16',
53896ca5 5580 'LC': '24.92.144.0/20',
773f291d
S
5581 'LI': '82.117.0.0/19',
5582 'LK': '112.134.0.0/15',
53896ca5 5583 'LR': '102.183.0.0/16',
773f291d
S
5584 'LS': '129.232.0.0/17',
5585 'LT': '78.56.0.0/13',
5586 'LU': '188.42.0.0/16',
5587 'LV': '46.109.0.0/16',
5588 'LY': '41.252.0.0/14',
5589 'MA': '105.128.0.0/11',
5590 'MC': '88.209.64.0/18',
5591 'MD': '37.246.0.0/16',
5592 'ME': '178.175.0.0/17',
5593 'MF': '74.112.232.0/21',
5594 'MG': '154.126.0.0/17',
5595 'MH': '117.103.88.0/21',
5596 'MK': '77.28.0.0/15',
5597 'ML': '154.118.128.0/18',
5598 'MM': '37.111.0.0/17',
5599 'MN': '49.0.128.0/17',
5600 'MO': '60.246.0.0/16',
5601 'MP': '202.88.64.0/20',
5602 'MQ': '109.203.224.0/19',
5603 'MR': '41.188.64.0/18',
5604 'MS': '208.90.112.0/22',
5605 'MT': '46.11.0.0/16',
5606 'MU': '105.16.0.0/12',
5607 'MV': '27.114.128.0/18',
53896ca5 5608 'MW': '102.70.0.0/15',
773f291d
S
5609 'MX': '187.192.0.0/11',
5610 'MY': '175.136.0.0/13',
5611 'MZ': '197.218.0.0/15',
5612 'NA': '41.182.0.0/16',
5613 'NC': '101.101.0.0/18',
5614 'NE': '197.214.0.0/18',
5615 'NF': '203.17.240.0/22',
5616 'NG': '105.112.0.0/12',
5617 'NI': '186.76.0.0/15',
5618 'NL': '145.96.0.0/11',
5619 'NO': '84.208.0.0/13',
5620 'NP': '36.252.0.0/15',
5621 'NR': '203.98.224.0/19',
5622 'NU': '49.156.48.0/22',
5623 'NZ': '49.224.0.0/14',
5624 'OM': '5.36.0.0/15',
5625 'PA': '186.72.0.0/15',
5626 'PE': '186.160.0.0/14',
5627 'PF': '123.50.64.0/18',
5628 'PG': '124.240.192.0/19',
5629 'PH': '49.144.0.0/13',
5630 'PK': '39.32.0.0/11',
5631 'PL': '83.0.0.0/11',
5632 'PM': '70.36.0.0/20',
5633 'PR': '66.50.0.0/16',
5634 'PS': '188.161.0.0/16',
5635 'PT': '85.240.0.0/13',
5636 'PW': '202.124.224.0/20',
5637 'PY': '181.120.0.0/14',
5638 'QA': '37.210.0.0/15',
53896ca5 5639 'RE': '102.35.0.0/16',
773f291d 5640 'RO': '79.112.0.0/13',
53896ca5 5641 'RS': '93.86.0.0/15',
773f291d 5642 'RU': '5.136.0.0/13',
53896ca5 5643 'RW': '41.186.0.0/16',
773f291d
S
5644 'SA': '188.48.0.0/13',
5645 'SB': '202.1.160.0/19',
5646 'SC': '154.192.0.0/11',
53896ca5 5647 'SD': '102.120.0.0/13',
773f291d 5648 'SE': '78.64.0.0/12',
53896ca5 5649 'SG': '8.128.0.0/10',
773f291d
S
5650 'SI': '188.196.0.0/14',
5651 'SK': '78.98.0.0/15',
53896ca5 5652 'SL': '102.143.0.0/17',
773f291d
S
5653 'SM': '89.186.32.0/19',
5654 'SN': '41.82.0.0/15',
53896ca5 5655 'SO': '154.115.192.0/18',
773f291d
S
5656 'SR': '186.179.128.0/17',
5657 'SS': '105.235.208.0/21',
5658 'ST': '197.159.160.0/19',
5659 'SV': '168.243.0.0/16',
5660 'SX': '190.102.0.0/20',
5661 'SY': '5.0.0.0/16',
5662 'SZ': '41.84.224.0/19',
5663 'TC': '65.255.48.0/20',
5664 'TD': '154.68.128.0/19',
5665 'TG': '196.168.0.0/14',
5666 'TH': '171.96.0.0/13',
5667 'TJ': '85.9.128.0/18',
5668 'TK': '27.96.24.0/21',
5669 'TL': '180.189.160.0/20',
5670 'TM': '95.85.96.0/19',
5671 'TN': '197.0.0.0/11',
5672 'TO': '175.176.144.0/21',
5673 'TR': '78.160.0.0/11',
5674 'TT': '186.44.0.0/15',
5675 'TV': '202.2.96.0/19',
5676 'TW': '120.96.0.0/11',
5677 'TZ': '156.156.0.0/14',
53896ca5
S
5678 'UA': '37.52.0.0/14',
5679 'UG': '102.80.0.0/13',
5680 'US': '6.0.0.0/8',
773f291d 5681 'UY': '167.56.0.0/13',
53896ca5 5682 'UZ': '84.54.64.0/18',
773f291d 5683 'VA': '212.77.0.0/19',
53896ca5 5684 'VC': '207.191.240.0/21',
773f291d 5685 'VE': '186.88.0.0/13',
53896ca5 5686 'VG': '66.81.192.0/20',
773f291d
S
5687 'VI': '146.226.0.0/16',
5688 'VN': '14.160.0.0/11',
5689 'VU': '202.80.32.0/20',
5690 'WF': '117.20.32.0/21',
5691 'WS': '202.4.32.0/19',
5692 'YE': '134.35.0.0/16',
5693 'YT': '41.242.116.0/22',
5694 'ZA': '41.0.0.0/11',
53896ca5
S
5695 'ZM': '102.144.0.0/13',
5696 'ZW': '102.177.192.0/18',
773f291d
S
5697 }
5698
5699 @classmethod
5f95927a
S
5700 def random_ipv4(cls, code_or_block):
5701 if len(code_or_block) == 2:
5702 block = cls._country_ip_map.get(code_or_block.upper())
5703 if not block:
5704 return None
5705 else:
5706 block = code_or_block
773f291d
S
5707 addr, preflen = block.split('/')
5708 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5709 addr_max = addr_min | (0xffffffff >> int(preflen))
18a0defa 5710 return compat_str(socket.inet_ntoa(
4248dad9 5711 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
773f291d
S
5712
5713
91410c9b 5714class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
5715 def __init__(self, proxies=None):
5716 # Set default handlers
5717 for type in ('http', 'https'):
5718 setattr(self, '%s_open' % type,
5719 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5720 meth(r, proxy, type))
38e87f6c 5721 compat_urllib_request.ProxyHandler.__init__(self, proxies)
2461f79d 5722
91410c9b 5723 def proxy_open(self, req, proxy, type):
2461f79d 5724 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
5725 if req_proxy is not None:
5726 proxy = req_proxy
2461f79d
PH
5727 del req.headers['Ytdl-request-proxy']
5728
5729 if proxy == '__noproxy__':
5730 return None # No Proxy
51fb4995 5731 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
71aff188 5732 req.add_header('Ytdl-socks-proxy', proxy)
7a5c1cfe 5733 # yt-dlp's http/https handlers do wrapping the socket with socks
71aff188 5734 return None
91410c9b
PH
5735 return compat_urllib_request.ProxyHandler.proxy_open(
5736 self, req, proxy, type)
5bc880b9
YCH
5737
5738
0a5445dd
YCH
5739# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5740# released into Public Domain
5741# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5742
5743def long_to_bytes(n, blocksize=0):
5744 """long_to_bytes(n:long, blocksize:int) : string
5745 Convert a long integer to a byte string.
5746
5747 If optional blocksize is given and greater than zero, pad the front of the
5748 byte string with binary zeros so that the length is a multiple of
5749 blocksize.
5750 """
5751 # after much testing, this algorithm was deemed to be the fastest
5752 s = b''
5753 n = int(n)
5754 while n > 0:
5755 s = compat_struct_pack('>I', n & 0xffffffff) + s
5756 n = n >> 32
5757 # strip off leading zeros
5758 for i in range(len(s)):
5759 if s[i] != b'\000'[0]:
5760 break
5761 else:
5762 # only happens when n == 0
5763 s = b'\000'
5764 i = 0
5765 s = s[i:]
5766 # add back some pad bytes. this could be done more efficiently w.r.t. the
5767 # de-padding being done above, but sigh...
5768 if blocksize > 0 and len(s) % blocksize:
5769 s = (blocksize - len(s) % blocksize) * b'\000' + s
5770 return s
5771
5772
5773def bytes_to_long(s):
5774 """bytes_to_long(string) : long
5775 Convert a byte string to a long integer.
5776
5777 This is (essentially) the inverse of long_to_bytes().
5778 """
5779 acc = 0
5780 length = len(s)
5781 if length % 4:
5782 extra = (4 - length % 4)
5783 s = b'\000' * extra + s
5784 length = length + extra
5785 for i in range(0, length, 4):
5786 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5787 return acc
5788
5789
5bc880b9
YCH
5790def ohdave_rsa_encrypt(data, exponent, modulus):
5791 '''
5792 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5793
5794 Input:
5795 data: data to encrypt, bytes-like object
5796 exponent, modulus: parameter e and N of RSA algorithm, both integer
5797 Output: hex string of encrypted data
5798
5799 Limitation: supports one block encryption only
5800 '''
5801
5802 payload = int(binascii.hexlify(data[::-1]), 16)
5803 encrypted = pow(payload, exponent, modulus)
5804 return '%x' % encrypted
81bdc8fd
YCH
5805
5806
f48409c7
YCH
5807def pkcs1pad(data, length):
5808 """
5809 Padding input data with PKCS#1 scheme
5810
5811 @param {int[]} data input data
5812 @param {int} length target length
5813 @returns {int[]} padded data
5814 """
5815 if len(data) > length - 11:
5816 raise ValueError('Input data too long for PKCS#1 padding')
5817
5818 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5819 return [0, 2] + pseudo_random + [0] + data
5820
5821
5eb6bdce 5822def encode_base_n(num, n, table=None):
59f898b7 5823 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
59f898b7
YCH
5824 if not table:
5825 table = FULL_TABLE[:n]
5826
5eb6bdce
YCH
5827 if n > len(table):
5828 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5829
5830 if num == 0:
5831 return table[0]
5832
81bdc8fd
YCH
5833 ret = ''
5834 while num:
5835 ret = table[num % n] + ret
5836 num = num // n
5837 return ret
f52354a8
YCH
5838
5839
5840def decode_packed_codes(code):
06b3fe29 5841 mobj = re.search(PACKED_CODES_RE, code)
a0566bbf 5842 obfuscated_code, base, count, symbols = mobj.groups()
f52354a8
YCH
5843 base = int(base)
5844 count = int(count)
5845 symbols = symbols.split('|')
5846 symbol_table = {}
5847
5848 while count:
5849 count -= 1
5eb6bdce 5850 base_n_count = encode_base_n(count, base)
f52354a8
YCH
5851 symbol_table[base_n_count] = symbols[count] or base_n_count
5852
5853 return re.sub(
5854 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
a0566bbf 5855 obfuscated_code)
e154c651 5856
5857
1ced2221
S
5858def caesar(s, alphabet, shift):
5859 if shift == 0:
5860 return s
5861 l = len(alphabet)
5862 return ''.join(
5863 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5864 for c in s)
5865
5866
5867def rot47(s):
5868 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5869
5870
e154c651 5871def parse_m3u8_attributes(attrib):
5872 info = {}
5873 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5874 if val.startswith('"'):
5875 val = val[1:-1]
5876 info[key] = val
5877 return info
1143535d
YCH
5878
5879
5880def urshift(val, n):
5881 return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038
YCH
5882
5883
5884# Based on png2str() written by @gdkchan and improved by @yokrysty
067aa17e 5885# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
d3f8e038
YCH
5886def decode_png(png_data):
5887 # Reference: https://www.w3.org/TR/PNG/
5888 header = png_data[8:]
5889
5890 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5891 raise IOError('Not a valid PNG file.')
5892
5893 int_map = {1: '>B', 2: '>H', 4: '>I'}
5894 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5895
5896 chunks = []
5897
5898 while header:
5899 length = unpack_integer(header[:4])
5900 header = header[4:]
5901
5902 chunk_type = header[:4]
5903 header = header[4:]
5904
5905 chunk_data = header[:length]
5906 header = header[length:]
5907
5908 header = header[4:] # Skip CRC
5909
5910 chunks.append({
5911 'type': chunk_type,
5912 'length': length,
5913 'data': chunk_data
5914 })
5915
5916 ihdr = chunks[0]['data']
5917
5918 width = unpack_integer(ihdr[:4])
5919 height = unpack_integer(ihdr[4:8])
5920
5921 idat = b''
5922
5923 for chunk in chunks:
5924 if chunk['type'] == b'IDAT':
5925 idat += chunk['data']
5926
5927 if not idat:
5928 raise IOError('Unable to read PNG data.')
5929
5930 decompressed_data = bytearray(zlib.decompress(idat))
5931
5932 stride = width * 3
5933 pixels = []
5934
5935 def _get_pixel(idx):
5936 x = idx % stride
5937 y = idx // stride
5938 return pixels[y][x]
5939
5940 for y in range(height):
5941 basePos = y * (1 + stride)
5942 filter_type = decompressed_data[basePos]
5943
5944 current_row = []
5945
5946 pixels.append(current_row)
5947
5948 for x in range(stride):
5949 color = decompressed_data[1 + basePos + x]
5950 basex = y * stride + x
5951 left = 0
5952 up = 0
5953
5954 if x > 2:
5955 left = _get_pixel(basex - 3)
5956 if y > 0:
5957 up = _get_pixel(basex - stride)
5958
5959 if filter_type == 1: # Sub
5960 color = (color + left) & 0xff
5961 elif filter_type == 2: # Up
5962 color = (color + up) & 0xff
5963 elif filter_type == 3: # Average
5964 color = (color + ((left + up) >> 1)) & 0xff
5965 elif filter_type == 4: # Paeth
5966 a = left
5967 b = up
5968 c = 0
5969
5970 if x > 2 and y > 0:
5971 c = _get_pixel(basex - stride - 3)
5972
5973 p = a + b - c
5974
5975 pa = abs(p - a)
5976 pb = abs(p - b)
5977 pc = abs(p - c)
5978
5979 if pa <= pb and pa <= pc:
5980 color = (color + a) & 0xff
5981 elif pb <= pc:
5982 color = (color + b) & 0xff
5983 else:
5984 color = (color + c) & 0xff
5985
5986 current_row.append(color)
5987
5988 return width, height, pixels
efa97bdc
YCH
5989
5990
5991def write_xattr(path, key, value):
5992 # This mess below finds the best xattr tool for the job
5993 try:
5994 # try the pyxattr module...
5995 import xattr
5996
53a7e3d2
YCH
5997 if hasattr(xattr, 'set'): # pyxattr
5998 # Unicode arguments are not supported in python-pyxattr until
5999 # version 0.5.0
067aa17e 6000 # See https://github.com/ytdl-org/youtube-dl/issues/5498
53a7e3d2
YCH
6001 pyxattr_required_version = '0.5.0'
6002 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
6003 # TODO: fallback to CLI tools
6004 raise XAttrUnavailableError(
6005 'python-pyxattr is detected but is too old. '
7a5c1cfe 6006 'yt-dlp requires %s or above while your version is %s. '
53a7e3d2
YCH
6007 'Falling back to other xattr implementations' % (
6008 pyxattr_required_version, xattr.__version__))
6009
6010 setxattr = xattr.set
6011 else: # xattr
6012 setxattr = xattr.setxattr
efa97bdc
YCH
6013
6014 try:
53a7e3d2 6015 setxattr(path, key, value)
efa97bdc
YCH
6016 except EnvironmentError as e:
6017 raise XAttrMetadataError(e.errno, e.strerror)
6018
6019 except ImportError:
6020 if compat_os_name == 'nt':
6021 # Write xattrs to NTFS Alternate Data Streams:
6022 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
6023 assert ':' not in key
6024 assert os.path.exists(path)
6025
6026 ads_fn = path + ':' + key
6027 try:
6028 with open(ads_fn, 'wb') as f:
6029 f.write(value)
6030 except EnvironmentError as e:
6031 raise XAttrMetadataError(e.errno, e.strerror)
6032 else:
6033 user_has_setfattr = check_executable('setfattr', ['--version'])
6034 user_has_xattr = check_executable('xattr', ['-h'])
6035
6036 if user_has_setfattr or user_has_xattr:
6037
6038 value = value.decode('utf-8')
6039 if user_has_setfattr:
6040 executable = 'setfattr'
6041 opts = ['-n', key, '-v', value]
6042 elif user_has_xattr:
6043 executable = 'xattr'
6044 opts = ['-w', key, value]
6045
3089bc74
S
6046 cmd = ([encodeFilename(executable, True)]
6047 + [encodeArgument(o) for o in opts]
6048 + [encodeFilename(path, True)])
efa97bdc
YCH
6049
6050 try:
6051 p = subprocess.Popen(
6052 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6053 except EnvironmentError as e:
6054 raise XAttrMetadataError(e.errno, e.strerror)
f5b1bca9 6055 stdout, stderr = process_communicate_or_kill(p)
efa97bdc
YCH
6056 stderr = stderr.decode('utf-8', 'replace')
6057 if p.returncode != 0:
6058 raise XAttrMetadataError(p.returncode, stderr)
6059
6060 else:
6061 # On Unix, and can't find pyxattr, setfattr, or xattr.
6062 if sys.platform.startswith('linux'):
6063 raise XAttrUnavailableError(
6064 "Couldn't find a tool to set the xattrs. "
6065 "Install either the python 'pyxattr' or 'xattr' "
6066 "modules, or the GNU 'attr' package "
6067 "(which contains the 'setfattr' tool).")
6068 else:
6069 raise XAttrUnavailableError(
6070 "Couldn't find a tool to set the xattrs. "
6071 "Install either the python 'xattr' module, "
6072 "or the 'xattr' binary.")
0c265486
YCH
6073
6074
6075def random_birthday(year_field, month_field, day_field):
aa374bc7
AS
6076 start_date = datetime.date(1950, 1, 1)
6077 end_date = datetime.date(1995, 12, 31)
6078 offset = random.randint(0, (end_date - start_date).days)
6079 random_date = start_date + datetime.timedelta(offset)
0c265486 6080 return {
aa374bc7
AS
6081 year_field: str(random_date.year),
6082 month_field: str(random_date.month),
6083 day_field: str(random_date.day),
0c265486 6084 }
732044af 6085
c76eb41b 6086
732044af 6087# Templates for internet shortcut files, which are plain text files.
6088DOT_URL_LINK_TEMPLATE = '''
6089[InternetShortcut]
6090URL=%(url)s
6091'''.lstrip()
6092
6093DOT_WEBLOC_LINK_TEMPLATE = '''
6094<?xml version="1.0" encoding="UTF-8"?>
6095<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
6096<plist version="1.0">
6097<dict>
6098\t<key>URL</key>
6099\t<string>%(url)s</string>
6100</dict>
6101</plist>
6102'''.lstrip()
6103
6104DOT_DESKTOP_LINK_TEMPLATE = '''
6105[Desktop Entry]
6106Encoding=UTF-8
6107Name=%(filename)s
6108Type=Link
6109URL=%(url)s
6110Icon=text-html
6111'''.lstrip()
6112
6113
6114def iri_to_uri(iri):
6115 """
6116 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
6117
6118 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
6119 """
6120
6121 iri_parts = compat_urllib_parse_urlparse(iri)
6122
6123 if '[' in iri_parts.netloc:
6124 raise ValueError('IPv6 URIs are not, yet, supported.')
6125 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
6126
6127 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
6128
6129 net_location = ''
6130 if iri_parts.username:
6131 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
6132 if iri_parts.password is not None:
6133 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
6134 net_location += '@'
6135
6136 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
6137 # The 'idna' encoding produces ASCII text.
6138 if iri_parts.port is not None and iri_parts.port != 80:
6139 net_location += ':' + str(iri_parts.port)
6140
6141 return compat_urllib_parse_urlunparse(
6142 (iri_parts.scheme,
6143 net_location,
6144
6145 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
6146
6147 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
6148 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
6149
6150 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
6151 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
6152
6153 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
6154
6155 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
6156
6157
6158def to_high_limit_path(path):
6159 if sys.platform in ['win32', 'cygwin']:
6160 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
6161 return r'\\?\ '.rstrip() + os.path.abspath(path)
6162
6163 return path
76d321f6 6164
c76eb41b 6165
b868936c 6166def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
6167 if field is None:
6168 val = obj if obj is not None else default
6169 else:
6170 val = obj.get(field, default)
76d321f6 6171 if func and val not in ignore:
6172 val = func(val)
6173 return template % val if val not in ignore else default
00dd0cd5 6174
6175
6176def clean_podcast_url(url):
6177 return re.sub(r'''(?x)
6178 (?:
6179 (?:
6180 chtbl\.com/track|
6181 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
6182 play\.podtrac\.com
6183 )/[^/]+|
6184 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
6185 flex\.acast\.com|
6186 pd(?:
6187 cn\.co| # https://podcorn.com/analytics-prefix/
6188 st\.fm # https://podsights.com/docs/
6189 )/e
6190 )/''', '', url)
ffcb8191
THD
6191
6192
6193_HEX_TABLE = '0123456789abcdef'
6194
6195
6196def random_uuidv4():
6197 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
0202b52a 6198
6199
6200def make_dir(path, to_screen=None):
6201 try:
6202 dn = os.path.dirname(path)
6203 if dn and not os.path.exists(dn):
6204 os.makedirs(dn)
6205 return True
6206 except (OSError, IOError) as err:
6207 if callable(to_screen) is not None:
6208 to_screen('unable to create directory ' + error_to_compat_str(err))
6209 return False
f74980cb 6210
6211
6212def get_executable_path():
c552ae88 6213 from zipimport import zipimporter
6214 if hasattr(sys, 'frozen'): # Running from PyInstaller
6215 path = os.path.dirname(sys.executable)
6216 elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
6217 path = os.path.join(os.path.dirname(__file__), '../..')
6218 else:
6219 path = os.path.join(os.path.dirname(__file__), '..')
f74980cb 6220 return os.path.abspath(path)
6221
6222
2f567473 6223def load_plugins(name, suffix, namespace):
f74980cb 6224 plugin_info = [None]
6225 classes = []
6226 try:
6227 plugin_info = imp.find_module(
6228 name, [os.path.join(get_executable_path(), 'ytdlp_plugins')])
6229 plugins = imp.load_module(name, *plugin_info)
6230 for name in dir(plugins):
2f567473 6231 if name in namespace:
6232 continue
6233 if not name.endswith(suffix):
f74980cb 6234 continue
6235 klass = getattr(plugins, name)
6236 classes.append(klass)
6237 namespace[name] = klass
6238 except ImportError:
6239 pass
6240 finally:
6241 if plugin_info[0] is not None:
6242 plugin_info[0].close()
6243 return classes
06167fbb 6244
6245
325ebc17 6246def traverse_obj(
352d63fd 6247 obj, *path_list, default=None, expected_type=None, get_all=True,
325ebc17 6248 casesense=True, is_user_input=False, traverse_string=False):
324ad820 6249 ''' Traverse nested list/dict/tuple
8f334380 6250 @param path_list A list of paths which are checked one by one.
6251 Each path is a list of keys where each key is a string,
6252 a tuple of strings or "...". When a tuple is given,
6253 all the keys given in the tuple are traversed, and
6254 "..." traverses all the keys in the object
325ebc17 6255 @param default Default value to return
352d63fd 6256 @param expected_type Only accept final value of this type (Can also be any callable)
6257 @param get_all Return all the values obtained from a path or only the first one
324ad820 6258 @param casesense Whether to consider dictionary keys as case sensitive
6259 @param is_user_input Whether the keys are generated from user input. If True,
6260 strings are converted to int/slice if necessary
6261 @param traverse_string Whether to traverse inside strings. If True, any
6262 non-compatible object will also be converted into a string
8f334380 6263 # TODO: Write tests
324ad820 6264 '''
325ebc17 6265 if not casesense:
dbf5416a 6266 _lower = lambda k: (k.lower() if isinstance(k, str) else k)
8f334380 6267 path_list = (map(_lower, variadic(path)) for path in path_list)
6268
6269 def _traverse_obj(obj, path, _current_depth=0):
6270 nonlocal depth
575e17a1 6271 if obj is None:
6272 return None
8f334380 6273 path = tuple(variadic(path))
6274 for i, key in enumerate(path):
6275 if isinstance(key, (list, tuple)):
6276 obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
6277 key = ...
6278 if key is ...:
6279 obj = (obj.values() if isinstance(obj, dict)
6280 else obj if isinstance(obj, (list, tuple, LazyList))
6281 else str(obj) if traverse_string else [])
6282 _current_depth += 1
6283 depth = max(depth, _current_depth)
6284 return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
575e17a1 6285 elif isinstance(obj, dict) and not (is_user_input and key == ':'):
325ebc17 6286 obj = (obj.get(key) if casesense or (key in obj)
6287 else next((v for k, v in obj.items() if _lower(k) == key), None))
6288 else:
6289 if is_user_input:
6290 key = (int_or_none(key) if ':' not in key
6291 else slice(*map(int_or_none, key.split(':'))))
8f334380 6292 if key == slice(None):
575e17a1 6293 return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
325ebc17 6294 if not isinstance(key, (int, slice)):
9fea350f 6295 return None
8f334380 6296 if not isinstance(obj, (list, tuple, LazyList)):
325ebc17 6297 if not traverse_string:
6298 return None
6299 obj = str(obj)
6300 try:
6301 obj = obj[key]
6302 except IndexError:
324ad820 6303 return None
325ebc17 6304 return obj
6305
352d63fd 6306 if isinstance(expected_type, type):
6307 type_test = lambda val: val if isinstance(val, expected_type) else None
6308 elif expected_type is not None:
6309 type_test = expected_type
6310 else:
6311 type_test = lambda val: val
6312
8f334380 6313 for path in path_list:
6314 depth = 0
6315 val = _traverse_obj(obj, path)
325ebc17 6316 if val is not None:
8f334380 6317 if depth:
6318 for _ in range(depth - 1):
6586bca9 6319 val = itertools.chain.from_iterable(v for v in val if v is not None)
352d63fd 6320 val = [v for v in map(type_test, val) if v is not None]
8f334380 6321 if val:
352d63fd 6322 return val if get_all else val[0]
6323 else:
6324 val = type_test(val)
6325 if val is not None:
8f334380 6326 return val
325ebc17 6327 return default
324ad820 6328
6329
6330def traverse_dict(dictn, keys, casesense=True):
6331 ''' For backward compatibility. Do not use '''
6332 return traverse_obj(dictn, keys, casesense=casesense,
6333 is_user_input=True, traverse_string=True)
6606817a 6334
6335
c634ad2a 6336def variadic(x, allowed_types=(str, bytes)):
cb89cfc1 6337 return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)