]> jfr.im git - yt-dlp.git/blame - youtube_dlc/utils.py
Modified function `cli_configuration_args`
[yt-dlp.git] / youtube_dlc / utils.py
CommitLineData
d77c3dfd 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
d77c3dfd 3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
c380cc28 10import collections
62e609ab 11import contextlib
e3946f98 12import ctypes
c496ca96
PH
13import datetime
14import email.utils
0c265486 15import email.header
f45c185f 16import errno
be4a824d 17import functools
d77c3dfd 18import gzip
03f9daab 19import io
79a2e94e 20import itertools
f4bfd65f 21import json
d77c3dfd 22import locale
02dbf93f 23import math
347de493 24import operator
d77c3dfd 25import os
c496ca96 26import platform
773f291d 27import random
d77c3dfd 28import re
c496ca96 29import socket
79a2e94e 30import ssl
1c088fa8 31import subprocess
d77c3dfd 32import sys
181c8655 33import tempfile
c380cc28 34import time
01951dda 35import traceback
bcf89ce6 36import xml.etree.ElementTree
d77c3dfd 37import zlib
d77c3dfd 38
8c25f81b 39from .compat import (
b4a3d461 40 compat_HTMLParseError,
8bb56eee 41 compat_HTMLParser,
8f9312c3 42 compat_basestring,
8c25f81b 43 compat_chr,
1bab3437 44 compat_cookiejar,
d7cd9a9e 45 compat_ctypes_WINFUNCTYPE,
36e6f62c 46 compat_etree_fromstring,
51098426 47 compat_expanduser,
8c25f81b 48 compat_html_entities,
55b2f099 49 compat_html_entities_html5,
be4a824d 50 compat_http_client,
42db58ec 51 compat_integer_types,
c86b6142 52 compat_kwargs,
efa97bdc 53 compat_os_name,
8c25f81b 54 compat_parse_qs,
702ccf2d 55 compat_shlex_quote,
8c25f81b 56 compat_str,
edaa23f8 57 compat_struct_pack,
d3f8e038 58 compat_struct_unpack,
8c25f81b
PH
59 compat_urllib_error,
60 compat_urllib_parse,
15707c7e 61 compat_urllib_parse_urlencode,
8c25f81b 62 compat_urllib_parse_urlparse,
732044af 63 compat_urllib_parse_urlunparse,
64 compat_urllib_parse_quote,
65 compat_urllib_parse_quote_plus,
7581bfc9 66 compat_urllib_parse_unquote_plus,
8c25f81b
PH
67 compat_urllib_request,
68 compat_urlparse,
810c10ba 69 compat_xpath,
8c25f81b 70)
4644ac55 71
71aff188
YCH
72from .socks import (
73 ProxyType,
74 sockssocket,
75)
76
4644ac55 77
51fb4995
YCH
78def register_socks_protocols():
79 # "Register" SOCKS protocols
d5ae6bb5
YCH
80 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
81 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
51fb4995
YCH
82 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
83 if scheme not in compat_urlparse.uses_netloc:
84 compat_urlparse.uses_netloc.append(scheme)
85
86
468e2e92
FV
87# This is not clearly defined otherwise
88compiled_regex_type = type(re.compile(''))
89
f7a147e3
S
90
91def random_user_agent():
92 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
93 _CHROME_VERSIONS = (
94 '74.0.3729.129',
95 '76.0.3780.3',
96 '76.0.3780.2',
97 '74.0.3729.128',
98 '76.0.3780.1',
99 '76.0.3780.0',
100 '75.0.3770.15',
101 '74.0.3729.127',
102 '74.0.3729.126',
103 '76.0.3779.1',
104 '76.0.3779.0',
105 '75.0.3770.14',
106 '74.0.3729.125',
107 '76.0.3778.1',
108 '76.0.3778.0',
109 '75.0.3770.13',
110 '74.0.3729.124',
111 '74.0.3729.123',
112 '73.0.3683.121',
113 '76.0.3777.1',
114 '76.0.3777.0',
115 '75.0.3770.12',
116 '74.0.3729.122',
117 '76.0.3776.4',
118 '75.0.3770.11',
119 '74.0.3729.121',
120 '76.0.3776.3',
121 '76.0.3776.2',
122 '73.0.3683.120',
123 '74.0.3729.120',
124 '74.0.3729.119',
125 '74.0.3729.118',
126 '76.0.3776.1',
127 '76.0.3776.0',
128 '76.0.3775.5',
129 '75.0.3770.10',
130 '74.0.3729.117',
131 '76.0.3775.4',
132 '76.0.3775.3',
133 '74.0.3729.116',
134 '75.0.3770.9',
135 '76.0.3775.2',
136 '76.0.3775.1',
137 '76.0.3775.0',
138 '75.0.3770.8',
139 '74.0.3729.115',
140 '74.0.3729.114',
141 '76.0.3774.1',
142 '76.0.3774.0',
143 '75.0.3770.7',
144 '74.0.3729.113',
145 '74.0.3729.112',
146 '74.0.3729.111',
147 '76.0.3773.1',
148 '76.0.3773.0',
149 '75.0.3770.6',
150 '74.0.3729.110',
151 '74.0.3729.109',
152 '76.0.3772.1',
153 '76.0.3772.0',
154 '75.0.3770.5',
155 '74.0.3729.108',
156 '74.0.3729.107',
157 '76.0.3771.1',
158 '76.0.3771.0',
159 '75.0.3770.4',
160 '74.0.3729.106',
161 '74.0.3729.105',
162 '75.0.3770.3',
163 '74.0.3729.104',
164 '74.0.3729.103',
165 '74.0.3729.102',
166 '75.0.3770.2',
167 '74.0.3729.101',
168 '75.0.3770.1',
169 '75.0.3770.0',
170 '74.0.3729.100',
171 '75.0.3769.5',
172 '75.0.3769.4',
173 '74.0.3729.99',
174 '75.0.3769.3',
175 '75.0.3769.2',
176 '75.0.3768.6',
177 '74.0.3729.98',
178 '75.0.3769.1',
179 '75.0.3769.0',
180 '74.0.3729.97',
181 '73.0.3683.119',
182 '73.0.3683.118',
183 '74.0.3729.96',
184 '75.0.3768.5',
185 '75.0.3768.4',
186 '75.0.3768.3',
187 '75.0.3768.2',
188 '74.0.3729.95',
189 '74.0.3729.94',
190 '75.0.3768.1',
191 '75.0.3768.0',
192 '74.0.3729.93',
193 '74.0.3729.92',
194 '73.0.3683.117',
195 '74.0.3729.91',
196 '75.0.3766.3',
197 '74.0.3729.90',
198 '75.0.3767.2',
199 '75.0.3767.1',
200 '75.0.3767.0',
201 '74.0.3729.89',
202 '73.0.3683.116',
203 '75.0.3766.2',
204 '74.0.3729.88',
205 '75.0.3766.1',
206 '75.0.3766.0',
207 '74.0.3729.87',
208 '73.0.3683.115',
209 '74.0.3729.86',
210 '75.0.3765.1',
211 '75.0.3765.0',
212 '74.0.3729.85',
213 '73.0.3683.114',
214 '74.0.3729.84',
215 '75.0.3764.1',
216 '75.0.3764.0',
217 '74.0.3729.83',
218 '73.0.3683.113',
219 '75.0.3763.2',
220 '75.0.3761.4',
221 '74.0.3729.82',
222 '75.0.3763.1',
223 '75.0.3763.0',
224 '74.0.3729.81',
225 '73.0.3683.112',
226 '75.0.3762.1',
227 '75.0.3762.0',
228 '74.0.3729.80',
229 '75.0.3761.3',
230 '74.0.3729.79',
231 '73.0.3683.111',
232 '75.0.3761.2',
233 '74.0.3729.78',
234 '74.0.3729.77',
235 '75.0.3761.1',
236 '75.0.3761.0',
237 '73.0.3683.110',
238 '74.0.3729.76',
239 '74.0.3729.75',
240 '75.0.3760.0',
241 '74.0.3729.74',
242 '75.0.3759.8',
243 '75.0.3759.7',
244 '75.0.3759.6',
245 '74.0.3729.73',
246 '75.0.3759.5',
247 '74.0.3729.72',
248 '73.0.3683.109',
249 '75.0.3759.4',
250 '75.0.3759.3',
251 '74.0.3729.71',
252 '75.0.3759.2',
253 '74.0.3729.70',
254 '73.0.3683.108',
255 '74.0.3729.69',
256 '75.0.3759.1',
257 '75.0.3759.0',
258 '74.0.3729.68',
259 '73.0.3683.107',
260 '74.0.3729.67',
261 '75.0.3758.1',
262 '75.0.3758.0',
263 '74.0.3729.66',
264 '73.0.3683.106',
265 '74.0.3729.65',
266 '75.0.3757.1',
267 '75.0.3757.0',
268 '74.0.3729.64',
269 '73.0.3683.105',
270 '74.0.3729.63',
271 '75.0.3756.1',
272 '75.0.3756.0',
273 '74.0.3729.62',
274 '73.0.3683.104',
275 '75.0.3755.3',
276 '75.0.3755.2',
277 '73.0.3683.103',
278 '75.0.3755.1',
279 '75.0.3755.0',
280 '74.0.3729.61',
281 '73.0.3683.102',
282 '74.0.3729.60',
283 '75.0.3754.2',
284 '74.0.3729.59',
285 '75.0.3753.4',
286 '74.0.3729.58',
287 '75.0.3754.1',
288 '75.0.3754.0',
289 '74.0.3729.57',
290 '73.0.3683.101',
291 '75.0.3753.3',
292 '75.0.3752.2',
293 '75.0.3753.2',
294 '74.0.3729.56',
295 '75.0.3753.1',
296 '75.0.3753.0',
297 '74.0.3729.55',
298 '73.0.3683.100',
299 '74.0.3729.54',
300 '75.0.3752.1',
301 '75.0.3752.0',
302 '74.0.3729.53',
303 '73.0.3683.99',
304 '74.0.3729.52',
305 '75.0.3751.1',
306 '75.0.3751.0',
307 '74.0.3729.51',
308 '73.0.3683.98',
309 '74.0.3729.50',
310 '75.0.3750.0',
311 '74.0.3729.49',
312 '74.0.3729.48',
313 '74.0.3729.47',
314 '75.0.3749.3',
315 '74.0.3729.46',
316 '73.0.3683.97',
317 '75.0.3749.2',
318 '74.0.3729.45',
319 '75.0.3749.1',
320 '75.0.3749.0',
321 '74.0.3729.44',
322 '73.0.3683.96',
323 '74.0.3729.43',
324 '74.0.3729.42',
325 '75.0.3748.1',
326 '75.0.3748.0',
327 '74.0.3729.41',
328 '75.0.3747.1',
329 '73.0.3683.95',
330 '75.0.3746.4',
331 '74.0.3729.40',
332 '74.0.3729.39',
333 '75.0.3747.0',
334 '75.0.3746.3',
335 '75.0.3746.2',
336 '74.0.3729.38',
337 '75.0.3746.1',
338 '75.0.3746.0',
339 '74.0.3729.37',
340 '73.0.3683.94',
341 '75.0.3745.5',
342 '75.0.3745.4',
343 '75.0.3745.3',
344 '75.0.3745.2',
345 '74.0.3729.36',
346 '75.0.3745.1',
347 '75.0.3745.0',
348 '75.0.3744.2',
349 '74.0.3729.35',
350 '73.0.3683.93',
351 '74.0.3729.34',
352 '75.0.3744.1',
353 '75.0.3744.0',
354 '74.0.3729.33',
355 '73.0.3683.92',
356 '74.0.3729.32',
357 '74.0.3729.31',
358 '73.0.3683.91',
359 '75.0.3741.2',
360 '75.0.3740.5',
361 '74.0.3729.30',
362 '75.0.3741.1',
363 '75.0.3741.0',
364 '74.0.3729.29',
365 '75.0.3740.4',
366 '73.0.3683.90',
367 '74.0.3729.28',
368 '75.0.3740.3',
369 '73.0.3683.89',
370 '75.0.3740.2',
371 '74.0.3729.27',
372 '75.0.3740.1',
373 '75.0.3740.0',
374 '74.0.3729.26',
375 '73.0.3683.88',
376 '73.0.3683.87',
377 '74.0.3729.25',
378 '75.0.3739.1',
379 '75.0.3739.0',
380 '73.0.3683.86',
381 '74.0.3729.24',
382 '73.0.3683.85',
383 '75.0.3738.4',
384 '75.0.3738.3',
385 '75.0.3738.2',
386 '75.0.3738.1',
387 '75.0.3738.0',
388 '74.0.3729.23',
389 '73.0.3683.84',
390 '74.0.3729.22',
391 '74.0.3729.21',
392 '75.0.3737.1',
393 '75.0.3737.0',
394 '74.0.3729.20',
395 '73.0.3683.83',
396 '74.0.3729.19',
397 '75.0.3736.1',
398 '75.0.3736.0',
399 '74.0.3729.18',
400 '73.0.3683.82',
401 '74.0.3729.17',
402 '75.0.3735.1',
403 '75.0.3735.0',
404 '74.0.3729.16',
405 '73.0.3683.81',
406 '75.0.3734.1',
407 '75.0.3734.0',
408 '74.0.3729.15',
409 '73.0.3683.80',
410 '74.0.3729.14',
411 '75.0.3733.1',
412 '75.0.3733.0',
413 '75.0.3732.1',
414 '74.0.3729.13',
415 '74.0.3729.12',
416 '73.0.3683.79',
417 '74.0.3729.11',
418 '75.0.3732.0',
419 '74.0.3729.10',
420 '73.0.3683.78',
421 '74.0.3729.9',
422 '74.0.3729.8',
423 '74.0.3729.7',
424 '75.0.3731.3',
425 '75.0.3731.2',
426 '75.0.3731.0',
427 '74.0.3729.6',
428 '73.0.3683.77',
429 '73.0.3683.76',
430 '75.0.3730.5',
431 '75.0.3730.4',
432 '73.0.3683.75',
433 '74.0.3729.5',
434 '73.0.3683.74',
435 '75.0.3730.3',
436 '75.0.3730.2',
437 '74.0.3729.4',
438 '73.0.3683.73',
439 '73.0.3683.72',
440 '75.0.3730.1',
441 '75.0.3730.0',
442 '74.0.3729.3',
443 '73.0.3683.71',
444 '74.0.3729.2',
445 '73.0.3683.70',
446 '74.0.3729.1',
447 '74.0.3729.0',
448 '74.0.3726.4',
449 '73.0.3683.69',
450 '74.0.3726.3',
451 '74.0.3728.0',
452 '74.0.3726.2',
453 '73.0.3683.68',
454 '74.0.3726.1',
455 '74.0.3726.0',
456 '74.0.3725.4',
457 '73.0.3683.67',
458 '73.0.3683.66',
459 '74.0.3725.3',
460 '74.0.3725.2',
461 '74.0.3725.1',
462 '74.0.3724.8',
463 '74.0.3725.0',
464 '73.0.3683.65',
465 '74.0.3724.7',
466 '74.0.3724.6',
467 '74.0.3724.5',
468 '74.0.3724.4',
469 '74.0.3724.3',
470 '74.0.3724.2',
471 '74.0.3724.1',
472 '74.0.3724.0',
473 '73.0.3683.64',
474 '74.0.3723.1',
475 '74.0.3723.0',
476 '73.0.3683.63',
477 '74.0.3722.1',
478 '74.0.3722.0',
479 '73.0.3683.62',
480 '74.0.3718.9',
481 '74.0.3702.3',
482 '74.0.3721.3',
483 '74.0.3721.2',
484 '74.0.3721.1',
485 '74.0.3721.0',
486 '74.0.3720.6',
487 '73.0.3683.61',
488 '72.0.3626.122',
489 '73.0.3683.60',
490 '74.0.3720.5',
491 '72.0.3626.121',
492 '74.0.3718.8',
493 '74.0.3720.4',
494 '74.0.3720.3',
495 '74.0.3718.7',
496 '74.0.3720.2',
497 '74.0.3720.1',
498 '74.0.3720.0',
499 '74.0.3718.6',
500 '74.0.3719.5',
501 '73.0.3683.59',
502 '74.0.3718.5',
503 '74.0.3718.4',
504 '74.0.3719.4',
505 '74.0.3719.3',
506 '74.0.3719.2',
507 '74.0.3719.1',
508 '73.0.3683.58',
509 '74.0.3719.0',
510 '73.0.3683.57',
511 '73.0.3683.56',
512 '74.0.3718.3',
513 '73.0.3683.55',
514 '74.0.3718.2',
515 '74.0.3718.1',
516 '74.0.3718.0',
517 '73.0.3683.54',
518 '74.0.3717.2',
519 '73.0.3683.53',
520 '74.0.3717.1',
521 '74.0.3717.0',
522 '73.0.3683.52',
523 '74.0.3716.1',
524 '74.0.3716.0',
525 '73.0.3683.51',
526 '74.0.3715.1',
527 '74.0.3715.0',
528 '73.0.3683.50',
529 '74.0.3711.2',
530 '74.0.3714.2',
531 '74.0.3713.3',
532 '74.0.3714.1',
533 '74.0.3714.0',
534 '73.0.3683.49',
535 '74.0.3713.1',
536 '74.0.3713.0',
537 '72.0.3626.120',
538 '73.0.3683.48',
539 '74.0.3712.2',
540 '74.0.3712.1',
541 '74.0.3712.0',
542 '73.0.3683.47',
543 '72.0.3626.119',
544 '73.0.3683.46',
545 '74.0.3710.2',
546 '72.0.3626.118',
547 '74.0.3711.1',
548 '74.0.3711.0',
549 '73.0.3683.45',
550 '72.0.3626.117',
551 '74.0.3710.1',
552 '74.0.3710.0',
553 '73.0.3683.44',
554 '72.0.3626.116',
555 '74.0.3709.1',
556 '74.0.3709.0',
557 '74.0.3704.9',
558 '73.0.3683.43',
559 '72.0.3626.115',
560 '74.0.3704.8',
561 '74.0.3704.7',
562 '74.0.3708.0',
563 '74.0.3706.7',
564 '74.0.3704.6',
565 '73.0.3683.42',
566 '72.0.3626.114',
567 '74.0.3706.6',
568 '72.0.3626.113',
569 '74.0.3704.5',
570 '74.0.3706.5',
571 '74.0.3706.4',
572 '74.0.3706.3',
573 '74.0.3706.2',
574 '74.0.3706.1',
575 '74.0.3706.0',
576 '73.0.3683.41',
577 '72.0.3626.112',
578 '74.0.3705.1',
579 '74.0.3705.0',
580 '73.0.3683.40',
581 '72.0.3626.111',
582 '73.0.3683.39',
583 '74.0.3704.4',
584 '73.0.3683.38',
585 '74.0.3704.3',
586 '74.0.3704.2',
587 '74.0.3704.1',
588 '74.0.3704.0',
589 '73.0.3683.37',
590 '72.0.3626.110',
591 '72.0.3626.109',
592 '74.0.3703.3',
593 '74.0.3703.2',
594 '73.0.3683.36',
595 '74.0.3703.1',
596 '74.0.3703.0',
597 '73.0.3683.35',
598 '72.0.3626.108',
599 '74.0.3702.2',
600 '74.0.3699.3',
601 '74.0.3702.1',
602 '74.0.3702.0',
603 '73.0.3683.34',
604 '72.0.3626.107',
605 '73.0.3683.33',
606 '74.0.3701.1',
607 '74.0.3701.0',
608 '73.0.3683.32',
609 '73.0.3683.31',
610 '72.0.3626.105',
611 '74.0.3700.1',
612 '74.0.3700.0',
613 '73.0.3683.29',
614 '72.0.3626.103',
615 '74.0.3699.2',
616 '74.0.3699.1',
617 '74.0.3699.0',
618 '73.0.3683.28',
619 '72.0.3626.102',
620 '73.0.3683.27',
621 '73.0.3683.26',
622 '74.0.3698.0',
623 '74.0.3696.2',
624 '72.0.3626.101',
625 '73.0.3683.25',
626 '74.0.3696.1',
627 '74.0.3696.0',
628 '74.0.3694.8',
629 '72.0.3626.100',
630 '74.0.3694.7',
631 '74.0.3694.6',
632 '74.0.3694.5',
633 '74.0.3694.4',
634 '72.0.3626.99',
635 '72.0.3626.98',
636 '74.0.3694.3',
637 '73.0.3683.24',
638 '72.0.3626.97',
639 '72.0.3626.96',
640 '72.0.3626.95',
641 '73.0.3683.23',
642 '72.0.3626.94',
643 '73.0.3683.22',
644 '73.0.3683.21',
645 '72.0.3626.93',
646 '74.0.3694.2',
647 '72.0.3626.92',
648 '74.0.3694.1',
649 '74.0.3694.0',
650 '74.0.3693.6',
651 '73.0.3683.20',
652 '72.0.3626.91',
653 '74.0.3693.5',
654 '74.0.3693.4',
655 '74.0.3693.3',
656 '74.0.3693.2',
657 '73.0.3683.19',
658 '74.0.3693.1',
659 '74.0.3693.0',
660 '73.0.3683.18',
661 '72.0.3626.90',
662 '74.0.3692.1',
663 '74.0.3692.0',
664 '73.0.3683.17',
665 '72.0.3626.89',
666 '74.0.3687.3',
667 '74.0.3691.1',
668 '74.0.3691.0',
669 '73.0.3683.16',
670 '72.0.3626.88',
671 '72.0.3626.87',
672 '73.0.3683.15',
673 '74.0.3690.1',
674 '74.0.3690.0',
675 '73.0.3683.14',
676 '72.0.3626.86',
677 '73.0.3683.13',
678 '73.0.3683.12',
679 '74.0.3689.1',
680 '74.0.3689.0',
681 '73.0.3683.11',
682 '72.0.3626.85',
683 '73.0.3683.10',
684 '72.0.3626.84',
685 '73.0.3683.9',
686 '74.0.3688.1',
687 '74.0.3688.0',
688 '73.0.3683.8',
689 '72.0.3626.83',
690 '74.0.3687.2',
691 '74.0.3687.1',
692 '74.0.3687.0',
693 '73.0.3683.7',
694 '72.0.3626.82',
695 '74.0.3686.4',
696 '72.0.3626.81',
697 '74.0.3686.3',
698 '74.0.3686.2',
699 '74.0.3686.1',
700 '74.0.3686.0',
701 '73.0.3683.6',
702 '72.0.3626.80',
703 '74.0.3685.1',
704 '74.0.3685.0',
705 '73.0.3683.5',
706 '72.0.3626.79',
707 '74.0.3684.1',
708 '74.0.3684.0',
709 '73.0.3683.4',
710 '72.0.3626.78',
711 '72.0.3626.77',
712 '73.0.3683.3',
713 '73.0.3683.2',
714 '72.0.3626.76',
715 '73.0.3683.1',
716 '73.0.3683.0',
717 '72.0.3626.75',
718 '71.0.3578.141',
719 '73.0.3682.1',
720 '73.0.3682.0',
721 '72.0.3626.74',
722 '71.0.3578.140',
723 '73.0.3681.4',
724 '73.0.3681.3',
725 '73.0.3681.2',
726 '73.0.3681.1',
727 '73.0.3681.0',
728 '72.0.3626.73',
729 '71.0.3578.139',
730 '72.0.3626.72',
731 '72.0.3626.71',
732 '73.0.3680.1',
733 '73.0.3680.0',
734 '72.0.3626.70',
735 '71.0.3578.138',
736 '73.0.3678.2',
737 '73.0.3679.1',
738 '73.0.3679.0',
739 '72.0.3626.69',
740 '71.0.3578.137',
741 '73.0.3678.1',
742 '73.0.3678.0',
743 '71.0.3578.136',
744 '73.0.3677.1',
745 '73.0.3677.0',
746 '72.0.3626.68',
747 '72.0.3626.67',
748 '71.0.3578.135',
749 '73.0.3676.1',
750 '73.0.3676.0',
751 '73.0.3674.2',
752 '72.0.3626.66',
753 '71.0.3578.134',
754 '73.0.3674.1',
755 '73.0.3674.0',
756 '72.0.3626.65',
757 '71.0.3578.133',
758 '73.0.3673.2',
759 '73.0.3673.1',
760 '73.0.3673.0',
761 '72.0.3626.64',
762 '71.0.3578.132',
763 '72.0.3626.63',
764 '72.0.3626.62',
765 '72.0.3626.61',
766 '72.0.3626.60',
767 '73.0.3672.1',
768 '73.0.3672.0',
769 '72.0.3626.59',
770 '71.0.3578.131',
771 '73.0.3671.3',
772 '73.0.3671.2',
773 '73.0.3671.1',
774 '73.0.3671.0',
775 '72.0.3626.58',
776 '71.0.3578.130',
777 '73.0.3670.1',
778 '73.0.3670.0',
779 '72.0.3626.57',
780 '71.0.3578.129',
781 '73.0.3669.1',
782 '73.0.3669.0',
783 '72.0.3626.56',
784 '71.0.3578.128',
785 '73.0.3668.2',
786 '73.0.3668.1',
787 '73.0.3668.0',
788 '72.0.3626.55',
789 '71.0.3578.127',
790 '73.0.3667.2',
791 '73.0.3667.1',
792 '73.0.3667.0',
793 '72.0.3626.54',
794 '71.0.3578.126',
795 '73.0.3666.1',
796 '73.0.3666.0',
797 '72.0.3626.53',
798 '71.0.3578.125',
799 '73.0.3665.4',
800 '73.0.3665.3',
801 '72.0.3626.52',
802 '73.0.3665.2',
803 '73.0.3664.4',
804 '73.0.3665.1',
805 '73.0.3665.0',
806 '72.0.3626.51',
807 '71.0.3578.124',
808 '72.0.3626.50',
809 '73.0.3664.3',
810 '73.0.3664.2',
811 '73.0.3664.1',
812 '73.0.3664.0',
813 '73.0.3663.2',
814 '72.0.3626.49',
815 '71.0.3578.123',
816 '73.0.3663.1',
817 '73.0.3663.0',
818 '72.0.3626.48',
819 '71.0.3578.122',
820 '73.0.3662.1',
821 '73.0.3662.0',
822 '72.0.3626.47',
823 '71.0.3578.121',
824 '73.0.3661.1',
825 '72.0.3626.46',
826 '73.0.3661.0',
827 '72.0.3626.45',
828 '71.0.3578.120',
829 '73.0.3660.2',
830 '73.0.3660.1',
831 '73.0.3660.0',
832 '72.0.3626.44',
833 '71.0.3578.119',
834 '73.0.3659.1',
835 '73.0.3659.0',
836 '72.0.3626.43',
837 '71.0.3578.118',
838 '73.0.3658.1',
839 '73.0.3658.0',
840 '72.0.3626.42',
841 '71.0.3578.117',
842 '73.0.3657.1',
843 '73.0.3657.0',
844 '72.0.3626.41',
845 '71.0.3578.116',
846 '73.0.3656.1',
847 '73.0.3656.0',
848 '72.0.3626.40',
849 '71.0.3578.115',
850 '73.0.3655.1',
851 '73.0.3655.0',
852 '72.0.3626.39',
853 '71.0.3578.114',
854 '73.0.3654.1',
855 '73.0.3654.0',
856 '72.0.3626.38',
857 '71.0.3578.113',
858 '73.0.3653.1',
859 '73.0.3653.0',
860 '72.0.3626.37',
861 '71.0.3578.112',
862 '73.0.3652.1',
863 '73.0.3652.0',
864 '72.0.3626.36',
865 '71.0.3578.111',
866 '73.0.3651.1',
867 '73.0.3651.0',
868 '72.0.3626.35',
869 '71.0.3578.110',
870 '73.0.3650.1',
871 '73.0.3650.0',
872 '72.0.3626.34',
873 '71.0.3578.109',
874 '73.0.3649.1',
875 '73.0.3649.0',
876 '72.0.3626.33',
877 '71.0.3578.108',
878 '73.0.3648.2',
879 '73.0.3648.1',
880 '73.0.3648.0',
881 '72.0.3626.32',
882 '71.0.3578.107',
883 '73.0.3647.2',
884 '73.0.3647.1',
885 '73.0.3647.0',
886 '72.0.3626.31',
887 '71.0.3578.106',
888 '73.0.3635.3',
889 '73.0.3646.2',
890 '73.0.3646.1',
891 '73.0.3646.0',
892 '72.0.3626.30',
893 '71.0.3578.105',
894 '72.0.3626.29',
895 '73.0.3645.2',
896 '73.0.3645.1',
897 '73.0.3645.0',
898 '72.0.3626.28',
899 '71.0.3578.104',
900 '72.0.3626.27',
901 '72.0.3626.26',
902 '72.0.3626.25',
903 '72.0.3626.24',
904 '73.0.3644.0',
905 '73.0.3643.2',
906 '72.0.3626.23',
907 '71.0.3578.103',
908 '73.0.3643.1',
909 '73.0.3643.0',
910 '72.0.3626.22',
911 '71.0.3578.102',
912 '73.0.3642.1',
913 '73.0.3642.0',
914 '72.0.3626.21',
915 '71.0.3578.101',
916 '73.0.3641.1',
917 '73.0.3641.0',
918 '72.0.3626.20',
919 '71.0.3578.100',
920 '72.0.3626.19',
921 '73.0.3640.1',
922 '73.0.3640.0',
923 '72.0.3626.18',
924 '73.0.3639.1',
925 '71.0.3578.99',
926 '73.0.3639.0',
927 '72.0.3626.17',
928 '73.0.3638.2',
929 '72.0.3626.16',
930 '73.0.3638.1',
931 '73.0.3638.0',
932 '72.0.3626.15',
933 '71.0.3578.98',
934 '73.0.3635.2',
935 '71.0.3578.97',
936 '73.0.3637.1',
937 '73.0.3637.0',
938 '72.0.3626.14',
939 '71.0.3578.96',
940 '71.0.3578.95',
941 '72.0.3626.13',
942 '71.0.3578.94',
943 '73.0.3636.2',
944 '71.0.3578.93',
945 '73.0.3636.1',
946 '73.0.3636.0',
947 '72.0.3626.12',
948 '71.0.3578.92',
949 '73.0.3635.1',
950 '73.0.3635.0',
951 '72.0.3626.11',
952 '71.0.3578.91',
953 '73.0.3634.2',
954 '73.0.3634.1',
955 '73.0.3634.0',
956 '72.0.3626.10',
957 '71.0.3578.90',
958 '71.0.3578.89',
959 '73.0.3633.2',
960 '73.0.3633.1',
961 '73.0.3633.0',
962 '72.0.3610.4',
963 '72.0.3626.9',
964 '71.0.3578.88',
965 '73.0.3632.5',
966 '73.0.3632.4',
967 '73.0.3632.3',
968 '73.0.3632.2',
969 '73.0.3632.1',
970 '73.0.3632.0',
971 '72.0.3626.8',
972 '71.0.3578.87',
973 '73.0.3631.2',
974 '73.0.3631.1',
975 '73.0.3631.0',
976 '72.0.3626.7',
977 '71.0.3578.86',
978 '72.0.3626.6',
979 '73.0.3630.1',
980 '73.0.3630.0',
981 '72.0.3626.5',
982 '71.0.3578.85',
983 '72.0.3626.4',
984 '73.0.3628.3',
985 '73.0.3628.2',
986 '73.0.3629.1',
987 '73.0.3629.0',
988 '72.0.3626.3',
989 '71.0.3578.84',
990 '73.0.3628.1',
991 '73.0.3628.0',
992 '71.0.3578.83',
993 '73.0.3627.1',
994 '73.0.3627.0',
995 '72.0.3626.2',
996 '71.0.3578.82',
997 '71.0.3578.81',
998 '71.0.3578.80',
999 '72.0.3626.1',
1000 '72.0.3626.0',
1001 '71.0.3578.79',
1002 '70.0.3538.124',
1003 '71.0.3578.78',
1004 '72.0.3623.4',
1005 '72.0.3625.2',
1006 '72.0.3625.1',
1007 '72.0.3625.0',
1008 '71.0.3578.77',
1009 '70.0.3538.123',
1010 '72.0.3624.4',
1011 '72.0.3624.3',
1012 '72.0.3624.2',
1013 '71.0.3578.76',
1014 '72.0.3624.1',
1015 '72.0.3624.0',
1016 '72.0.3623.3',
1017 '71.0.3578.75',
1018 '70.0.3538.122',
1019 '71.0.3578.74',
1020 '72.0.3623.2',
1021 '72.0.3610.3',
1022 '72.0.3623.1',
1023 '72.0.3623.0',
1024 '72.0.3622.3',
1025 '72.0.3622.2',
1026 '71.0.3578.73',
1027 '70.0.3538.121',
1028 '72.0.3622.1',
1029 '72.0.3622.0',
1030 '71.0.3578.72',
1031 '70.0.3538.120',
1032 '72.0.3621.1',
1033 '72.0.3621.0',
1034 '71.0.3578.71',
1035 '70.0.3538.119',
1036 '72.0.3620.1',
1037 '72.0.3620.0',
1038 '71.0.3578.70',
1039 '70.0.3538.118',
1040 '71.0.3578.69',
1041 '72.0.3619.1',
1042 '72.0.3619.0',
1043 '71.0.3578.68',
1044 '70.0.3538.117',
1045 '71.0.3578.67',
1046 '72.0.3618.1',
1047 '72.0.3618.0',
1048 '71.0.3578.66',
1049 '70.0.3538.116',
1050 '72.0.3617.1',
1051 '72.0.3617.0',
1052 '71.0.3578.65',
1053 '70.0.3538.115',
1054 '72.0.3602.3',
1055 '71.0.3578.64',
1056 '72.0.3616.1',
1057 '72.0.3616.0',
1058 '71.0.3578.63',
1059 '70.0.3538.114',
1060 '71.0.3578.62',
1061 '72.0.3615.1',
1062 '72.0.3615.0',
1063 '71.0.3578.61',
1064 '70.0.3538.113',
1065 '72.0.3614.1',
1066 '72.0.3614.0',
1067 '71.0.3578.60',
1068 '70.0.3538.112',
1069 '72.0.3613.1',
1070 '72.0.3613.0',
1071 '71.0.3578.59',
1072 '70.0.3538.111',
1073 '72.0.3612.2',
1074 '72.0.3612.1',
1075 '72.0.3612.0',
1076 '70.0.3538.110',
1077 '71.0.3578.58',
1078 '70.0.3538.109',
1079 '72.0.3611.2',
1080 '72.0.3611.1',
1081 '72.0.3611.0',
1082 '71.0.3578.57',
1083 '70.0.3538.108',
1084 '72.0.3610.2',
1085 '71.0.3578.56',
1086 '71.0.3578.55',
1087 '72.0.3610.1',
1088 '72.0.3610.0',
1089 '71.0.3578.54',
1090 '70.0.3538.107',
1091 '71.0.3578.53',
1092 '72.0.3609.3',
1093 '71.0.3578.52',
1094 '72.0.3609.2',
1095 '71.0.3578.51',
1096 '72.0.3608.5',
1097 '72.0.3609.1',
1098 '72.0.3609.0',
1099 '71.0.3578.50',
1100 '70.0.3538.106',
1101 '72.0.3608.4',
1102 '72.0.3608.3',
1103 '72.0.3608.2',
1104 '71.0.3578.49',
1105 '72.0.3608.1',
1106 '72.0.3608.0',
1107 '70.0.3538.105',
1108 '71.0.3578.48',
1109 '72.0.3607.1',
1110 '72.0.3607.0',
1111 '71.0.3578.47',
1112 '70.0.3538.104',
1113 '72.0.3606.2',
1114 '72.0.3606.1',
1115 '72.0.3606.0',
1116 '71.0.3578.46',
1117 '70.0.3538.103',
1118 '70.0.3538.102',
1119 '72.0.3605.3',
1120 '72.0.3605.2',
1121 '72.0.3605.1',
1122 '72.0.3605.0',
1123 '71.0.3578.45',
1124 '70.0.3538.101',
1125 '71.0.3578.44',
1126 '71.0.3578.43',
1127 '70.0.3538.100',
1128 '70.0.3538.99',
1129 '71.0.3578.42',
1130 '72.0.3604.1',
1131 '72.0.3604.0',
1132 '71.0.3578.41',
1133 '70.0.3538.98',
1134 '71.0.3578.40',
1135 '72.0.3603.2',
1136 '72.0.3603.1',
1137 '72.0.3603.0',
1138 '71.0.3578.39',
1139 '70.0.3538.97',
1140 '72.0.3602.2',
1141 '71.0.3578.38',
1142 '71.0.3578.37',
1143 '72.0.3602.1',
1144 '72.0.3602.0',
1145 '71.0.3578.36',
1146 '70.0.3538.96',
1147 '72.0.3601.1',
1148 '72.0.3601.0',
1149 '71.0.3578.35',
1150 '70.0.3538.95',
1151 '72.0.3600.1',
1152 '72.0.3600.0',
1153 '71.0.3578.34',
1154 '70.0.3538.94',
1155 '72.0.3599.3',
1156 '72.0.3599.2',
1157 '72.0.3599.1',
1158 '72.0.3599.0',
1159 '71.0.3578.33',
1160 '70.0.3538.93',
1161 '72.0.3598.1',
1162 '72.0.3598.0',
1163 '71.0.3578.32',
1164 '70.0.3538.87',
1165 '72.0.3597.1',
1166 '72.0.3597.0',
1167 '72.0.3596.2',
1168 '71.0.3578.31',
1169 '70.0.3538.86',
1170 '71.0.3578.30',
1171 '71.0.3578.29',
1172 '72.0.3596.1',
1173 '72.0.3596.0',
1174 '71.0.3578.28',
1175 '70.0.3538.85',
1176 '72.0.3595.2',
1177 '72.0.3591.3',
1178 '72.0.3595.1',
1179 '72.0.3595.0',
1180 '71.0.3578.27',
1181 '70.0.3538.84',
1182 '72.0.3594.1',
1183 '72.0.3594.0',
1184 '71.0.3578.26',
1185 '70.0.3538.83',
1186 '72.0.3593.2',
1187 '72.0.3593.1',
1188 '72.0.3593.0',
1189 '71.0.3578.25',
1190 '70.0.3538.82',
1191 '72.0.3589.3',
1192 '72.0.3592.2',
1193 '72.0.3592.1',
1194 '72.0.3592.0',
1195 '71.0.3578.24',
1196 '72.0.3589.2',
1197 '70.0.3538.81',
1198 '70.0.3538.80',
1199 '72.0.3591.2',
1200 '72.0.3591.1',
1201 '72.0.3591.0',
1202 '71.0.3578.23',
1203 '70.0.3538.79',
1204 '71.0.3578.22',
1205 '72.0.3590.1',
1206 '72.0.3590.0',
1207 '71.0.3578.21',
1208 '70.0.3538.78',
1209 '70.0.3538.77',
1210 '72.0.3589.1',
1211 '72.0.3589.0',
1212 '71.0.3578.20',
1213 '70.0.3538.76',
1214 '71.0.3578.19',
1215 '70.0.3538.75',
1216 '72.0.3588.1',
1217 '72.0.3588.0',
1218 '71.0.3578.18',
1219 '70.0.3538.74',
1220 '72.0.3586.2',
1221 '72.0.3587.0',
1222 '71.0.3578.17',
1223 '70.0.3538.73',
1224 '72.0.3586.1',
1225 '72.0.3586.0',
1226 '71.0.3578.16',
1227 '70.0.3538.72',
1228 '72.0.3585.1',
1229 '72.0.3585.0',
1230 '71.0.3578.15',
1231 '70.0.3538.71',
1232 '71.0.3578.14',
1233 '72.0.3584.1',
1234 '72.0.3584.0',
1235 '71.0.3578.13',
1236 '70.0.3538.70',
1237 '72.0.3583.2',
1238 '71.0.3578.12',
1239 '72.0.3583.1',
1240 '72.0.3583.0',
1241 '71.0.3578.11',
1242 '70.0.3538.69',
1243 '71.0.3578.10',
1244 '72.0.3582.0',
1245 '72.0.3581.4',
1246 '71.0.3578.9',
1247 '70.0.3538.67',
1248 '72.0.3581.3',
1249 '72.0.3581.2',
1250 '72.0.3581.1',
1251 '72.0.3581.0',
1252 '71.0.3578.8',
1253 '70.0.3538.66',
1254 '72.0.3580.1',
1255 '72.0.3580.0',
1256 '71.0.3578.7',
1257 '70.0.3538.65',
1258 '71.0.3578.6',
1259 '72.0.3579.1',
1260 '72.0.3579.0',
1261 '71.0.3578.5',
1262 '70.0.3538.64',
1263 '71.0.3578.4',
1264 '71.0.3578.3',
1265 '71.0.3578.2',
1266 '71.0.3578.1',
1267 '71.0.3578.0',
1268 '70.0.3538.63',
1269 '69.0.3497.128',
1270 '70.0.3538.62',
1271 '70.0.3538.61',
1272 '70.0.3538.60',
1273 '70.0.3538.59',
1274 '71.0.3577.1',
1275 '71.0.3577.0',
1276 '70.0.3538.58',
1277 '69.0.3497.127',
1278 '71.0.3576.2',
1279 '71.0.3576.1',
1280 '71.0.3576.0',
1281 '70.0.3538.57',
1282 '70.0.3538.56',
1283 '71.0.3575.2',
1284 '70.0.3538.55',
1285 '69.0.3497.126',
1286 '70.0.3538.54',
1287 '71.0.3575.1',
1288 '71.0.3575.0',
1289 '71.0.3574.1',
1290 '71.0.3574.0',
1291 '70.0.3538.53',
1292 '69.0.3497.125',
1293 '70.0.3538.52',
1294 '71.0.3573.1',
1295 '71.0.3573.0',
1296 '70.0.3538.51',
1297 '69.0.3497.124',
1298 '71.0.3572.1',
1299 '71.0.3572.0',
1300 '70.0.3538.50',
1301 '69.0.3497.123',
1302 '71.0.3571.2',
1303 '70.0.3538.49',
1304 '69.0.3497.122',
1305 '71.0.3571.1',
1306 '71.0.3571.0',
1307 '70.0.3538.48',
1308 '69.0.3497.121',
1309 '71.0.3570.1',
1310 '71.0.3570.0',
1311 '70.0.3538.47',
1312 '69.0.3497.120',
1313 '71.0.3568.2',
1314 '71.0.3569.1',
1315 '71.0.3569.0',
1316 '70.0.3538.46',
1317 '69.0.3497.119',
1318 '70.0.3538.45',
1319 '71.0.3568.1',
1320 '71.0.3568.0',
1321 '70.0.3538.44',
1322 '69.0.3497.118',
1323 '70.0.3538.43',
1324 '70.0.3538.42',
1325 '71.0.3567.1',
1326 '71.0.3567.0',
1327 '70.0.3538.41',
1328 '69.0.3497.117',
1329 '71.0.3566.1',
1330 '71.0.3566.0',
1331 '70.0.3538.40',
1332 '69.0.3497.116',
1333 '71.0.3565.1',
1334 '71.0.3565.0',
1335 '70.0.3538.39',
1336 '69.0.3497.115',
1337 '71.0.3564.1',
1338 '71.0.3564.0',
1339 '70.0.3538.38',
1340 '69.0.3497.114',
1341 '71.0.3563.0',
1342 '71.0.3562.2',
1343 '70.0.3538.37',
1344 '69.0.3497.113',
1345 '70.0.3538.36',
1346 '70.0.3538.35',
1347 '71.0.3562.1',
1348 '71.0.3562.0',
1349 '70.0.3538.34',
1350 '69.0.3497.112',
1351 '70.0.3538.33',
1352 '71.0.3561.1',
1353 '71.0.3561.0',
1354 '70.0.3538.32',
1355 '69.0.3497.111',
1356 '71.0.3559.6',
1357 '71.0.3560.1',
1358 '71.0.3560.0',
1359 '71.0.3559.5',
1360 '71.0.3559.4',
1361 '70.0.3538.31',
1362 '69.0.3497.110',
1363 '71.0.3559.3',
1364 '70.0.3538.30',
1365 '69.0.3497.109',
1366 '71.0.3559.2',
1367 '71.0.3559.1',
1368 '71.0.3559.0',
1369 '70.0.3538.29',
1370 '69.0.3497.108',
1371 '71.0.3558.2',
1372 '71.0.3558.1',
1373 '71.0.3558.0',
1374 '70.0.3538.28',
1375 '69.0.3497.107',
1376 '71.0.3557.2',
1377 '71.0.3557.1',
1378 '71.0.3557.0',
1379 '70.0.3538.27',
1380 '69.0.3497.106',
1381 '71.0.3554.4',
1382 '70.0.3538.26',
1383 '71.0.3556.1',
1384 '71.0.3556.0',
1385 '70.0.3538.25',
1386 '71.0.3554.3',
1387 '69.0.3497.105',
1388 '71.0.3554.2',
1389 '70.0.3538.24',
1390 '69.0.3497.104',
1391 '71.0.3555.2',
1392 '70.0.3538.23',
1393 '71.0.3555.1',
1394 '71.0.3555.0',
1395 '70.0.3538.22',
1396 '69.0.3497.103',
1397 '71.0.3554.1',
1398 '71.0.3554.0',
1399 '70.0.3538.21',
1400 '69.0.3497.102',
1401 '71.0.3553.3',
1402 '70.0.3538.20',
1403 '69.0.3497.101',
1404 '71.0.3553.2',
1405 '69.0.3497.100',
1406 '71.0.3553.1',
1407 '71.0.3553.0',
1408 '70.0.3538.19',
1409 '69.0.3497.99',
1410 '69.0.3497.98',
1411 '69.0.3497.97',
1412 '71.0.3552.6',
1413 '71.0.3552.5',
1414 '71.0.3552.4',
1415 '71.0.3552.3',
1416 '71.0.3552.2',
1417 '71.0.3552.1',
1418 '71.0.3552.0',
1419 '70.0.3538.18',
1420 '69.0.3497.96',
1421 '71.0.3551.3',
1422 '71.0.3551.2',
1423 '71.0.3551.1',
1424 '71.0.3551.0',
1425 '70.0.3538.17',
1426 '69.0.3497.95',
1427 '71.0.3550.3',
1428 '71.0.3550.2',
1429 '71.0.3550.1',
1430 '71.0.3550.0',
1431 '70.0.3538.16',
1432 '69.0.3497.94',
1433 '71.0.3549.1',
1434 '71.0.3549.0',
1435 '70.0.3538.15',
1436 '69.0.3497.93',
1437 '69.0.3497.92',
1438 '71.0.3548.1',
1439 '71.0.3548.0',
1440 '70.0.3538.14',
1441 '69.0.3497.91',
1442 '71.0.3547.1',
1443 '71.0.3547.0',
1444 '70.0.3538.13',
1445 '69.0.3497.90',
1446 '71.0.3546.2',
1447 '69.0.3497.89',
1448 '71.0.3546.1',
1449 '71.0.3546.0',
1450 '70.0.3538.12',
1451 '69.0.3497.88',
1452 '71.0.3545.4',
1453 '71.0.3545.3',
1454 '71.0.3545.2',
1455 '71.0.3545.1',
1456 '71.0.3545.0',
1457 '70.0.3538.11',
1458 '69.0.3497.87',
1459 '71.0.3544.5',
1460 '71.0.3544.4',
1461 '71.0.3544.3',
1462 '71.0.3544.2',
1463 '71.0.3544.1',
1464 '71.0.3544.0',
1465 '69.0.3497.86',
1466 '70.0.3538.10',
1467 '69.0.3497.85',
1468 '70.0.3538.9',
1469 '69.0.3497.84',
1470 '71.0.3543.4',
1471 '70.0.3538.8',
1472 '71.0.3543.3',
1473 '71.0.3543.2',
1474 '71.0.3543.1',
1475 '71.0.3543.0',
1476 '70.0.3538.7',
1477 '69.0.3497.83',
1478 '71.0.3542.2',
1479 '71.0.3542.1',
1480 '71.0.3542.0',
1481 '70.0.3538.6',
1482 '69.0.3497.82',
1483 '69.0.3497.81',
1484 '71.0.3541.1',
1485 '71.0.3541.0',
1486 '70.0.3538.5',
1487 '69.0.3497.80',
1488 '71.0.3540.1',
1489 '71.0.3540.0',
1490 '70.0.3538.4',
1491 '69.0.3497.79',
1492 '70.0.3538.3',
1493 '71.0.3539.1',
1494 '71.0.3539.0',
1495 '69.0.3497.78',
1496 '68.0.3440.134',
1497 '69.0.3497.77',
1498 '70.0.3538.2',
1499 '70.0.3538.1',
1500 '70.0.3538.0',
1501 '69.0.3497.76',
1502 '68.0.3440.133',
1503 '69.0.3497.75',
1504 '70.0.3537.2',
1505 '70.0.3537.1',
1506 '70.0.3537.0',
1507 '69.0.3497.74',
1508 '68.0.3440.132',
1509 '70.0.3536.0',
1510 '70.0.3535.5',
1511 '70.0.3535.4',
1512 '70.0.3535.3',
1513 '69.0.3497.73',
1514 '68.0.3440.131',
1515 '70.0.3532.8',
1516 '70.0.3532.7',
1517 '69.0.3497.72',
1518 '69.0.3497.71',
1519 '70.0.3535.2',
1520 '70.0.3535.1',
1521 '70.0.3535.0',
1522 '69.0.3497.70',
1523 '68.0.3440.130',
1524 '69.0.3497.69',
1525 '68.0.3440.129',
1526 '70.0.3534.4',
1527 '70.0.3534.3',
1528 '70.0.3534.2',
1529 '70.0.3534.1',
1530 '70.0.3534.0',
1531 '69.0.3497.68',
1532 '68.0.3440.128',
1533 '70.0.3533.2',
1534 '70.0.3533.1',
1535 '70.0.3533.0',
1536 '69.0.3497.67',
1537 '68.0.3440.127',
1538 '70.0.3532.6',
1539 '70.0.3532.5',
1540 '70.0.3532.4',
1541 '69.0.3497.66',
1542 '68.0.3440.126',
1543 '70.0.3532.3',
1544 '70.0.3532.2',
1545 '70.0.3532.1',
1546 '69.0.3497.60',
1547 '69.0.3497.65',
1548 '69.0.3497.64',
1549 '70.0.3532.0',
1550 '70.0.3531.0',
1551 '70.0.3530.4',
1552 '70.0.3530.3',
1553 '70.0.3530.2',
1554 '69.0.3497.58',
1555 '68.0.3440.125',
1556 '69.0.3497.57',
1557 '69.0.3497.56',
1558 '69.0.3497.55',
1559 '69.0.3497.54',
1560 '70.0.3530.1',
1561 '70.0.3530.0',
1562 '69.0.3497.53',
1563 '68.0.3440.124',
1564 '69.0.3497.52',
1565 '70.0.3529.3',
1566 '70.0.3529.2',
1567 '70.0.3529.1',
1568 '70.0.3529.0',
1569 '69.0.3497.51',
1570 '70.0.3528.4',
1571 '68.0.3440.123',
1572 '70.0.3528.3',
1573 '70.0.3528.2',
1574 '70.0.3528.1',
1575 '70.0.3528.0',
1576 '69.0.3497.50',
1577 '68.0.3440.122',
1578 '70.0.3527.1',
1579 '70.0.3527.0',
1580 '69.0.3497.49',
1581 '68.0.3440.121',
1582 '70.0.3526.1',
1583 '70.0.3526.0',
1584 '68.0.3440.120',
1585 '69.0.3497.48',
1586 '69.0.3497.47',
1587 '68.0.3440.119',
1588 '68.0.3440.118',
1589 '70.0.3525.5',
1590 '70.0.3525.4',
1591 '70.0.3525.3',
1592 '68.0.3440.117',
1593 '69.0.3497.46',
1594 '70.0.3525.2',
1595 '70.0.3525.1',
1596 '70.0.3525.0',
1597 '69.0.3497.45',
1598 '68.0.3440.116',
1599 '70.0.3524.4',
1600 '70.0.3524.3',
1601 '69.0.3497.44',
1602 '70.0.3524.2',
1603 '70.0.3524.1',
1604 '70.0.3524.0',
1605 '70.0.3523.2',
1606 '69.0.3497.43',
1607 '68.0.3440.115',
1608 '70.0.3505.9',
1609 '69.0.3497.42',
1610 '70.0.3505.8',
1611 '70.0.3523.1',
1612 '70.0.3523.0',
1613 '69.0.3497.41',
1614 '68.0.3440.114',
1615 '70.0.3505.7',
1616 '69.0.3497.40',
1617 '70.0.3522.1',
1618 '70.0.3522.0',
1619 '70.0.3521.2',
1620 '69.0.3497.39',
1621 '68.0.3440.113',
1622 '70.0.3505.6',
1623 '70.0.3521.1',
1624 '70.0.3521.0',
1625 '69.0.3497.38',
1626 '68.0.3440.112',
1627 '70.0.3520.1',
1628 '70.0.3520.0',
1629 '69.0.3497.37',
1630 '68.0.3440.111',
1631 '70.0.3519.3',
1632 '70.0.3519.2',
1633 '70.0.3519.1',
1634 '70.0.3519.0',
1635 '69.0.3497.36',
1636 '68.0.3440.110',
1637 '70.0.3518.1',
1638 '70.0.3518.0',
1639 '69.0.3497.35',
1640 '69.0.3497.34',
1641 '68.0.3440.109',
1642 '70.0.3517.1',
1643 '70.0.3517.0',
1644 '69.0.3497.33',
1645 '68.0.3440.108',
1646 '69.0.3497.32',
1647 '70.0.3516.3',
1648 '70.0.3516.2',
1649 '70.0.3516.1',
1650 '70.0.3516.0',
1651 '69.0.3497.31',
1652 '68.0.3440.107',
1653 '70.0.3515.4',
1654 '68.0.3440.106',
1655 '70.0.3515.3',
1656 '70.0.3515.2',
1657 '70.0.3515.1',
1658 '70.0.3515.0',
1659 '69.0.3497.30',
1660 '68.0.3440.105',
1661 '68.0.3440.104',
1662 '70.0.3514.2',
1663 '70.0.3514.1',
1664 '70.0.3514.0',
1665 '69.0.3497.29',
1666 '68.0.3440.103',
1667 '70.0.3513.1',
1668 '70.0.3513.0',
1669 '69.0.3497.28',
1670 )
1671 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1672
1673
3e669f36 1674std_headers = {
f7a147e3 1675 'User-Agent': random_user_agent(),
59ae15a5
PH
1676 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1677 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1678 'Accept-Encoding': 'gzip, deflate',
1679 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 1680}
f427df17 1681
5f6a1245 1682
fb37eb25
S
1683USER_AGENTS = {
1684 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1685}
1686
1687
bf42a990
S
1688NO_DEFAULT = object()
1689
7105440c
YCH
1690ENGLISH_MONTH_NAMES = [
1691 'January', 'February', 'March', 'April', 'May', 'June',
1692 'July', 'August', 'September', 'October', 'November', 'December']
1693
f6717dec
S
1694MONTH_NAMES = {
1695 'en': ENGLISH_MONTH_NAMES,
1696 'fr': [
3e4185c3
S
1697 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1698 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
f6717dec 1699}
a942d6cb 1700
a7aaa398
S
1701KNOWN_EXTENSIONS = (
1702 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1703 'flv', 'f4v', 'f4a', 'f4b',
1704 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1705 'mkv', 'mka', 'mk3d',
1706 'avi', 'divx',
1707 'mov',
1708 'asf', 'wmv', 'wma',
1709 '3gp', '3g2',
1710 'mp3',
1711 'flac',
1712 'ape',
1713 'wav',
1714 'f4f', 'f4m', 'm3u8', 'smil')
1715
c587cbb7 1716# needed for sanitizing filenames in restricted mode
c8827027 1717ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
fd35d8cd
JW
1718 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1719 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
c587cbb7 1720
46f59e89
S
1721DATE_FORMATS = (
1722 '%d %B %Y',
1723 '%d %b %Y',
1724 '%B %d %Y',
cb655f34
S
1725 '%B %dst %Y',
1726 '%B %dnd %Y',
9d30c213 1727 '%B %drd %Y',
cb655f34 1728 '%B %dth %Y',
46f59e89 1729 '%b %d %Y',
cb655f34
S
1730 '%b %dst %Y',
1731 '%b %dnd %Y',
9d30c213 1732 '%b %drd %Y',
cb655f34 1733 '%b %dth %Y',
46f59e89
S
1734 '%b %dst %Y %I:%M',
1735 '%b %dnd %Y %I:%M',
9d30c213 1736 '%b %drd %Y %I:%M',
46f59e89
S
1737 '%b %dth %Y %I:%M',
1738 '%Y %m %d',
1739 '%Y-%m-%d',
1740 '%Y/%m/%d',
81c13222 1741 '%Y/%m/%d %H:%M',
46f59e89 1742 '%Y/%m/%d %H:%M:%S',
0c1c6f4b 1743 '%Y-%m-%d %H:%M',
46f59e89
S
1744 '%Y-%m-%d %H:%M:%S',
1745 '%Y-%m-%d %H:%M:%S.%f',
1746 '%d.%m.%Y %H:%M',
1747 '%d.%m.%Y %H.%M',
1748 '%Y-%m-%dT%H:%M:%SZ',
1749 '%Y-%m-%dT%H:%M:%S.%fZ',
1750 '%Y-%m-%dT%H:%M:%S.%f0Z',
1751 '%Y-%m-%dT%H:%M:%S',
1752 '%Y-%m-%dT%H:%M:%S.%f',
1753 '%Y-%m-%dT%H:%M',
c6eed6b8
S
1754 '%b %d %Y at %H:%M',
1755 '%b %d %Y at %H:%M:%S',
b555ae9b
S
1756 '%B %d %Y at %H:%M',
1757 '%B %d %Y at %H:%M:%S',
46f59e89
S
1758)
1759
1760DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1761DATE_FORMATS_DAY_FIRST.extend([
1762 '%d-%m-%Y',
1763 '%d.%m.%Y',
1764 '%d.%m.%y',
1765 '%d/%m/%Y',
1766 '%d/%m/%y',
1767 '%d/%m/%Y %H:%M:%S',
1768])
1769
1770DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1771DATE_FORMATS_MONTH_FIRST.extend([
1772 '%m-%d-%Y',
1773 '%m.%d.%Y',
1774 '%m/%d/%Y',
1775 '%m/%d/%y',
1776 '%m/%d/%Y %H:%M:%S',
1777])
1778
06b3fe29 1779PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
22f5f5c6 1780JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
06b3fe29 1781
7105440c 1782
d77c3dfd 1783def preferredencoding():
59ae15a5 1784 """Get preferred encoding.
d77c3dfd 1785
59ae15a5
PH
1786 Returns the best encoding scheme for the system, based on
1787 locale.getpreferredencoding() and some further tweaks.
1788 """
1789 try:
1790 pref = locale.getpreferredencoding()
28e614de 1791 'TEST'.encode(pref)
70a1165b 1792 except Exception:
59ae15a5 1793 pref = 'UTF-8'
bae611f2 1794
59ae15a5 1795 return pref
d77c3dfd 1796
f4bfd65f 1797
181c8655 1798def write_json_file(obj, fn):
1394646a 1799 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 1800
92120217 1801 fn = encodeFilename(fn)
61ee5aeb 1802 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
1803 encoding = get_filesystem_encoding()
1804 # os.path.basename returns a bytes object, but NamedTemporaryFile
1805 # will fail if the filename contains non ascii characters unless we
1806 # use a unicode object
1807 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1808 # the same for os.path.dirname
1809 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1810 else:
1811 path_basename = os.path.basename
1812 path_dirname = os.path.dirname
1813
73159f99
S
1814 args = {
1815 'suffix': '.tmp',
ec5f6016
JMF
1816 'prefix': path_basename(fn) + '.',
1817 'dir': path_dirname(fn),
73159f99
S
1818 'delete': False,
1819 }
1820
181c8655
PH
1821 # In Python 2.x, json.dump expects a bytestream.
1822 # In Python 3.x, it writes to a character stream
1823 if sys.version_info < (3, 0):
73159f99 1824 args['mode'] = 'wb'
181c8655 1825 else:
73159f99
S
1826 args.update({
1827 'mode': 'w',
1828 'encoding': 'utf-8',
1829 })
1830
c86b6142 1831 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
1832
1833 try:
1834 with tf:
1835 json.dump(obj, tf)
1394646a
IK
1836 if sys.platform == 'win32':
1837 # Need to remove existing file on Windows, else os.rename raises
1838 # WindowsError or FileExistsError.
1839 try:
1840 os.unlink(fn)
1841 except OSError:
1842 pass
9cd5f54e
R
1843 try:
1844 mask = os.umask(0)
1845 os.umask(mask)
1846 os.chmod(tf.name, 0o666 & ~mask)
1847 except OSError:
1848 pass
181c8655 1849 os.rename(tf.name, fn)
70a1165b 1850 except Exception:
181c8655
PH
1851 try:
1852 os.remove(tf.name)
1853 except OSError:
1854 pass
1855 raise
1856
1857
1858if sys.version_info >= (2, 7):
ee114368 1859 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 1860 """ Find the xpath xpath[@key=val] """
5d2354f1 1861 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368 1862 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
1863 return node.find(expr)
1864else:
ee114368 1865 def find_xpath_attr(node, xpath, key, val=None):
810c10ba 1866 for f in node.findall(compat_xpath(xpath)):
ee114368
S
1867 if key not in f.attrib:
1868 continue
1869 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
1870 return f
1871 return None
1872
d7e66d39
JMF
1873# On python2.6 the xml.etree.ElementTree.Element methods don't support
1874# the namespace parameter
5f6a1245
JW
1875
1876
d7e66d39
JMF
1877def xpath_with_ns(path, ns_map):
1878 components = [c.split(':') for c in path.split('/')]
1879 replaced = []
1880 for c in components:
1881 if len(c) == 1:
1882 replaced.append(c[0])
1883 else:
1884 ns, tag = c
1885 replaced.append('{%s}%s' % (ns_map[ns], tag))
1886 return '/'.join(replaced)
1887
d77c3dfd 1888
a41fb80c 1889def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745 1890 def _find_xpath(xpath):
810c10ba 1891 return node.find(compat_xpath(xpath))
578c0745
S
1892
1893 if isinstance(xpath, (str, compat_str)):
1894 n = _find_xpath(xpath)
1895 else:
1896 for xp in xpath:
1897 n = _find_xpath(xp)
1898 if n is not None:
1899 break
d74bebd5 1900
8e636da4 1901 if n is None:
bf42a990
S
1902 if default is not NO_DEFAULT:
1903 return default
1904 elif fatal:
bf0ff932
PH
1905 name = xpath if name is None else name
1906 raise ExtractorError('Could not find XML element %s' % name)
1907 else:
1908 return None
a41fb80c
S
1909 return n
1910
1911
1912def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
1913 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1914 if n is None or n == default:
1915 return n
1916 if n.text is None:
1917 if default is not NO_DEFAULT:
1918 return default
1919 elif fatal:
1920 name = xpath if name is None else name
1921 raise ExtractorError('Could not find XML element\'s text %s' % name)
1922 else:
1923 return None
1924 return n.text
a41fb80c
S
1925
1926
1927def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1928 n = find_xpath_attr(node, xpath, key)
1929 if n is None:
1930 if default is not NO_DEFAULT:
1931 return default
1932 elif fatal:
1933 name = '%s[@%s]' % (xpath, key) if name is None else name
1934 raise ExtractorError('Could not find XML attribute %s' % name)
1935 else:
1936 return None
1937 return n.attrib[key]
bf0ff932
PH
1938
1939
9e6dd238 1940def get_element_by_id(id, html):
43e8fafd 1941 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 1942 return get_element_by_attribute('id', id, html)
43e8fafd 1943
12ea2f30 1944
84c237fb 1945def get_element_by_class(class_name, html):
2af12ad9
TC
1946 """Return the content of the first tag with the specified class in the passed HTML document"""
1947 retval = get_elements_by_class(class_name, html)
1948 return retval[0] if retval else None
1949
1950
1951def get_element_by_attribute(attribute, value, html, escape_value=True):
1952 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1953 return retval[0] if retval else None
1954
1955
1956def get_elements_by_class(class_name, html):
1957 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1958 return get_elements_by_attribute(
84c237fb
YCH
1959 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1960 html, escape_value=False)
1961
1962
2af12ad9 1963def get_elements_by_attribute(attribute, value, html, escape_value=True):
43e8fafd 1964 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 1965
84c237fb
YCH
1966 value = re.escape(value) if escape_value else value
1967
2af12ad9
TC
1968 retlist = []
1969 for m in re.finditer(r'''(?xs)
38285056 1970 <([a-zA-Z0-9:._-]+)
609ff8ca 1971 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056 1972 \s+%s=['"]?%s['"]?
609ff8ca 1973 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056
PH
1974 \s*>
1975 (?P<content>.*?)
1976 </\1>
2af12ad9
TC
1977 ''' % (re.escape(attribute), value), html):
1978 res = m.group('content')
38285056 1979
2af12ad9
TC
1980 if res.startswith('"') or res.startswith("'"):
1981 res = res[1:-1]
38285056 1982
2af12ad9 1983 retlist.append(unescapeHTML(res))
a921f407 1984
2af12ad9 1985 return retlist
a921f407 1986
c5229f39 1987
8bb56eee
BF
1988class HTMLAttributeParser(compat_HTMLParser):
1989 """Trivial HTML parser to gather the attributes for a single element"""
b6e0c7d2 1990
8bb56eee 1991 def __init__(self):
c5229f39 1992 self.attrs = {}
8bb56eee
BF
1993 compat_HTMLParser.__init__(self)
1994
1995 def handle_starttag(self, tag, attrs):
1996 self.attrs = dict(attrs)
1997
c5229f39 1998
8bb56eee
BF
1999def extract_attributes(html_element):
2000 """Given a string for an HTML element such as
2001 <el
2002 a="foo" B="bar" c="&98;az" d=boz
2003 empty= noval entity="&amp;"
2004 sq='"' dq="'"
2005 >
2006 Decode and return a dictionary of attributes.
2007 {
2008 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2009 'empty': '', 'noval': None, 'entity': '&',
2010 'sq': '"', 'dq': '\''
2011 }.
2012 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2013 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2014 """
2015 parser = HTMLAttributeParser()
b4a3d461
S
2016 try:
2017 parser.feed(html_element)
2018 parser.close()
2019 # Older Python may throw HTMLParseError in case of malformed HTML
2020 except compat_HTMLParseError:
2021 pass
8bb56eee 2022 return parser.attrs
9e6dd238 2023
c5229f39 2024
9e6dd238 2025def clean_html(html):
59ae15a5 2026 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
2027
2028 if html is None: # Convenience for sanitizing descriptions etc.
2029 return html
2030
59ae15a5
PH
2031 # Newline vs <br />
2032 html = html.replace('\n', ' ')
edd9221c
TF
2033 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2034 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
2035 # Strip html tags
2036 html = re.sub('<.*?>', '', html)
2037 # Replace html entities
2038 html = unescapeHTML(html)
7decf895 2039 return html.strip()
9e6dd238
FV
2040
2041
d77c3dfd 2042def sanitize_open(filename, open_mode):
59ae15a5
PH
2043 """Try to open the given filename, and slightly tweak it if this fails.
2044
2045 Attempts to open the given filename. If this fails, it tries to change
2046 the filename slightly, step by step, until it's either able to open it
2047 or it fails and raises a final exception, like the standard open()
2048 function.
2049
2050 It returns the tuple (stream, definitive_file_name).
2051 """
2052 try:
28e614de 2053 if filename == '-':
59ae15a5
PH
2054 if sys.platform == 'win32':
2055 import msvcrt
2056 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 2057 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
2058 stream = open(encodeFilename(filename), open_mode)
2059 return (stream, filename)
2060 except (IOError, OSError) as err:
f45c185f
PH
2061 if err.errno in (errno.EACCES,):
2062 raise
59ae15a5 2063
f45c185f 2064 # In case of error, try to remove win32 forbidden chars
d55de57b 2065 alt_filename = sanitize_path(filename)
f45c185f
PH
2066 if alt_filename == filename:
2067 raise
2068 else:
2069 # An exception here should be caught in the caller
d55de57b 2070 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 2071 return (stream, alt_filename)
d77c3dfd
FV
2072
2073
2074def timeconvert(timestr):
59ae15a5
PH
2075 """Convert RFC 2822 defined time string into system timestamp"""
2076 timestamp = None
2077 timetuple = email.utils.parsedate_tz(timestr)
2078 if timetuple is not None:
2079 timestamp = email.utils.mktime_tz(timetuple)
2080 return timestamp
1c469a94 2081
5f6a1245 2082
796173d0 2083def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
2084 """Sanitizes a string so it could be used as part of a filename.
2085 If restricted is set, use a stricter subset of allowed characters.
158af524
S
2086 Set is_id if this is not an arbitrary string, but an ID that should be kept
2087 if possible.
59ae15a5
PH
2088 """
2089 def replace_insane(char):
c587cbb7
AT
2090 if restricted and char in ACCENT_CHARS:
2091 return ACCENT_CHARS[char]
59ae15a5
PH
2092 if char == '?' or ord(char) < 32 or ord(char) == 127:
2093 return ''
2094 elif char == '"':
2095 return '' if restricted else '\''
2096 elif char == ':':
2097 return '_-' if restricted else ' -'
2098 elif char in '\\/|*<>':
2099 return '_'
627dcfff 2100 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
2101 return '_'
2102 if restricted and ord(char) > 127:
2103 return '_'
2104 return char
2105
2aeb06d6
PH
2106 # Handle timestamps
2107 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 2108 result = ''.join(map(replace_insane, s))
796173d0
PH
2109 if not is_id:
2110 while '__' in result:
2111 result = result.replace('__', '_')
2112 result = result.strip('_')
2113 # Common case of "Foreign band name - English song title"
2114 if restricted and result.startswith('-_'):
2115 result = result[2:]
5a42414b
PH
2116 if result.startswith('-'):
2117 result = '_' + result[len('-'):]
a7440261 2118 result = result.lstrip('.')
796173d0
PH
2119 if not result:
2120 result = '_'
59ae15a5 2121 return result
d77c3dfd 2122
5f6a1245 2123
a2aaf4db
S
2124def sanitize_path(s):
2125 """Sanitizes and normalizes path on Windows"""
2126 if sys.platform != 'win32':
2127 return s
be531ef1
S
2128 drive_or_unc, _ = os.path.splitdrive(s)
2129 if sys.version_info < (2, 7) and not drive_or_unc:
2130 drive_or_unc, _ = os.path.splitunc(s)
2131 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2132 if drive_or_unc:
a2aaf4db
S
2133 norm_path.pop(0)
2134 sanitized_path = [
ec85ded8 2135 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
a2aaf4db 2136 for path_part in norm_path]
be531ef1
S
2137 if drive_or_unc:
2138 sanitized_path.insert(0, drive_or_unc + os.path.sep)
a2aaf4db
S
2139 return os.path.join(*sanitized_path)
2140
2141
17bcc626 2142def sanitize_url(url):
befa4708
S
2143 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2144 # the number of unwanted failures due to missing protocol
2145 if url.startswith('//'):
2146 return 'http:%s' % url
2147 # Fix some common typos seen so far
2148 COMMON_TYPOS = (
067aa17e 2149 # https://github.com/ytdl-org/youtube-dl/issues/15649
befa4708
S
2150 (r'^httpss://', r'https://'),
2151 # https://bx1.be/lives/direct-tv/
2152 (r'^rmtp([es]?)://', r'rtmp\1://'),
2153 )
2154 for mistake, fixup in COMMON_TYPOS:
2155 if re.match(mistake, url):
2156 return re.sub(mistake, fixup, url)
2157 return url
17bcc626
S
2158
2159
67dda517 2160def sanitized_Request(url, *args, **kwargs):
17bcc626 2161 return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
67dda517
S
2162
2163
51098426
S
2164def expand_path(s):
2165 """Expand shell variables and ~"""
2166 return os.path.expandvars(compat_expanduser(s))
2167
2168
d77c3dfd 2169def orderedSet(iterable):
59ae15a5
PH
2170 """ Remove all duplicates from the input iterable """
2171 res = []
2172 for el in iterable:
2173 if el not in res:
2174 res.append(el)
2175 return res
d77c3dfd 2176
912b38b4 2177
55b2f099 2178def _htmlentity_transform(entity_with_semicolon):
4e408e47 2179 """Transforms an HTML entity to a character."""
55b2f099
YCH
2180 entity = entity_with_semicolon[:-1]
2181
4e408e47
PH
2182 # Known non-numeric HTML entity
2183 if entity in compat_html_entities.name2codepoint:
2184 return compat_chr(compat_html_entities.name2codepoint[entity])
2185
55b2f099
YCH
2186 # TODO: HTML5 allows entities without a semicolon. For example,
2187 # '&Eacuteric' should be decoded as 'Éric'.
2188 if entity_with_semicolon in compat_html_entities_html5:
2189 return compat_html_entities_html5[entity_with_semicolon]
2190
91757b0f 2191 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
2192 if mobj is not None:
2193 numstr = mobj.group(1)
28e614de 2194 if numstr.startswith('x'):
4e408e47 2195 base = 16
28e614de 2196 numstr = '0%s' % numstr
4e408e47
PH
2197 else:
2198 base = 10
067aa17e 2199 # See https://github.com/ytdl-org/youtube-dl/issues/7518
7aefc49c
S
2200 try:
2201 return compat_chr(int(numstr, base))
2202 except ValueError:
2203 pass
4e408e47
PH
2204
2205 # Unknown entity in name, return its literal representation
7a3f0c00 2206 return '&%s;' % entity
4e408e47
PH
2207
2208
d77c3dfd 2209def unescapeHTML(s):
912b38b4
PH
2210 if s is None:
2211 return None
2212 assert type(s) == compat_str
d77c3dfd 2213
4e408e47 2214 return re.sub(
95f3f7c2 2215 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 2216
8bf48f23 2217
f5b1bca9 2218def process_communicate_or_kill(p, *args, **kwargs):
2219 try:
2220 return p.communicate(*args, **kwargs)
2221 except BaseException: # Including KeyboardInterrupt
2222 p.kill()
2223 p.wait()
2224 raise
2225
2226
aa49acd1
S
2227def get_subprocess_encoding():
2228 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2229 # For subprocess calls, encode with locale encoding
2230 # Refer to http://stackoverflow.com/a/9951851/35070
2231 encoding = preferredencoding()
2232 else:
2233 encoding = sys.getfilesystemencoding()
2234 if encoding is None:
2235 encoding = 'utf-8'
2236 return encoding
2237
2238
8bf48f23 2239def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
2240 """
2241 @param s The name of the file
2242 """
d77c3dfd 2243
8bf48f23 2244 assert type(s) == compat_str
d77c3dfd 2245
59ae15a5
PH
2246 # Python 3 has a Unicode API
2247 if sys.version_info >= (3, 0):
2248 return s
0f00efed 2249
aa49acd1
S
2250 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2251 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2252 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2253 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2254 return s
2255
8ee239e9
YCH
2256 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2257 if sys.platform.startswith('java'):
2258 return s
2259
aa49acd1
S
2260 return s.encode(get_subprocess_encoding(), 'ignore')
2261
2262
2263def decodeFilename(b, for_subprocess=False):
2264
2265 if sys.version_info >= (3, 0):
2266 return b
2267
2268 if not isinstance(b, bytes):
2269 return b
2270
2271 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 2272
f07b74fc
PH
2273
2274def encodeArgument(s):
2275 if not isinstance(s, compat_str):
2276 # Legacy code that uses byte strings
2277 # Uncomment the following line after fixing all post processors
7af808a5 2278 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
2279 s = s.decode('ascii')
2280 return encodeFilename(s, True)
2281
2282
aa49acd1
S
2283def decodeArgument(b):
2284 return decodeFilename(b, True)
2285
2286
8271226a
PH
2287def decodeOption(optval):
2288 if optval is None:
2289 return optval
2290 if isinstance(optval, bytes):
2291 optval = optval.decode(preferredencoding())
2292
2293 assert isinstance(optval, compat_str)
2294 return optval
1c256f70 2295
5f6a1245 2296
dbbbe555 2297def formatSeconds(secs, delim=':'):
4539dd30 2298 if secs > 3600:
dbbbe555 2299 return '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
4539dd30 2300 elif secs > 60:
dbbbe555 2301 return '%d%s%02d' % (secs // 60, delim, secs % 60)
4539dd30
PH
2302 else:
2303 return '%d' % secs
2304
a0ddb8a2 2305
be4a824d
PH
2306def make_HTTPS_handler(params, **kwargs):
2307 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 2308 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 2309 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 2310 if opts_no_check_certificate:
be5f2c19 2311 context.check_hostname = False
0db261ba 2312 context.verify_mode = ssl.CERT_NONE
a2366922 2313 try:
be4a824d 2314 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
2315 except TypeError:
2316 # Python 2.7.8
2317 # (create_default_context present but HTTPSHandler has no context=)
2318 pass
2319
2320 if sys.version_info < (3, 2):
d7932313 2321 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 2322 else: # Python < 3.4
d7932313 2323 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 2324 context.verify_mode = (ssl.CERT_NONE
dca08720 2325 if opts_no_check_certificate
ea6d901e 2326 else ssl.CERT_REQUIRED)
303b479e 2327 context.set_default_verify_paths()
be4a824d 2328 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 2329
732ea2f0 2330
08f2a92c
JMF
2331def bug_reports_message():
2332 if ytdl_is_updateable():
cefecac1 2333 update_cmd = 'type youtube-dlc -U to update'
08f2a92c 2334 else:
17fa3ee2 2335 update_cmd = 'see https://github.com/pukkandan/yt-dlp on how to update'
2336 msg = '; please report this issue on https://github.com/pukkandan/yt-dlp .'
08f2a92c 2337 msg += ' Make sure you are using the latest version; %s.' % update_cmd
cefecac1 2338 msg += ' Be sure to call youtube-dlc with the --verbose flag and include its complete output.'
08f2a92c
JMF
2339 return msg
2340
2341
bf5b9d85
PM
2342class YoutubeDLError(Exception):
2343 """Base exception for YoutubeDL errors."""
2344 pass
2345
2346
2347class ExtractorError(YoutubeDLError):
1c256f70 2348 """Error during info extraction."""
5f6a1245 2349
d11271dd 2350 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238 2351 """ tb, if given, is the original traceback (so that it can be printed out).
cefecac1 2352 If expected is set, this is a normal error message and most likely not a bug in youtube-dlc.
9a82b238
PH
2353 """
2354
2355 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2356 expected = True
d11271dd
PH
2357 if video_id is not None:
2358 msg = video_id + ': ' + msg
410f3e73 2359 if cause:
28e614de 2360 msg += ' (caused by %r)' % cause
9a82b238 2361 if not expected:
08f2a92c 2362 msg += bug_reports_message()
1c256f70 2363 super(ExtractorError, self).__init__(msg)
d5979c5d 2364
1c256f70 2365 self.traceback = tb
8cc83b8d 2366 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 2367 self.cause = cause
d11271dd 2368 self.video_id = video_id
1c256f70 2369
01951dda
PH
2370 def format_traceback(self):
2371 if self.traceback is None:
2372 return None
28e614de 2373 return ''.join(traceback.format_tb(self.traceback))
01951dda 2374
1c256f70 2375
416c7fcb
PH
2376class UnsupportedError(ExtractorError):
2377 def __init__(self, url):
2378 super(UnsupportedError, self).__init__(
2379 'Unsupported URL: %s' % url, expected=True)
2380 self.url = url
2381
2382
55b3e45b
JMF
2383class RegexNotFoundError(ExtractorError):
2384 """Error when a regex didn't match"""
2385 pass
2386
2387
773f291d
S
2388class GeoRestrictedError(ExtractorError):
2389 """Geographic restriction Error exception.
2390
2391 This exception may be thrown when a video is not available from your
2392 geographic location due to geographic restrictions imposed by a website.
2393 """
b6e0c7d2 2394
773f291d
S
2395 def __init__(self, msg, countries=None):
2396 super(GeoRestrictedError, self).__init__(msg, expected=True)
2397 self.msg = msg
2398 self.countries = countries
2399
2400
bf5b9d85 2401class DownloadError(YoutubeDLError):
59ae15a5 2402 """Download Error exception.
d77c3dfd 2403
59ae15a5
PH
2404 This exception may be thrown by FileDownloader objects if they are not
2405 configured to continue on errors. They will contain the appropriate
2406 error message.
2407 """
5f6a1245 2408
8cc83b8d
FV
2409 def __init__(self, msg, exc_info=None):
2410 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2411 super(DownloadError, self).__init__(msg)
2412 self.exc_info = exc_info
d77c3dfd
FV
2413
2414
bf5b9d85 2415class SameFileError(YoutubeDLError):
59ae15a5 2416 """Same File exception.
d77c3dfd 2417
59ae15a5
PH
2418 This exception will be thrown by FileDownloader objects if they detect
2419 multiple files would have to be downloaded to the same file on disk.
2420 """
2421 pass
d77c3dfd
FV
2422
2423
bf5b9d85 2424class PostProcessingError(YoutubeDLError):
59ae15a5 2425 """Post Processing exception.
d77c3dfd 2426
59ae15a5
PH
2427 This exception may be raised by PostProcessor's .run() method to
2428 indicate an error in the postprocessing task.
2429 """
5f6a1245 2430
7851b379 2431 def __init__(self, msg):
bf5b9d85 2432 super(PostProcessingError, self).__init__(msg)
7851b379 2433 self.msg = msg
d77c3dfd 2434
5f6a1245 2435
8b0d7497 2436class ExistingVideoReached(YoutubeDLError):
2437 """ --max-downloads limit has been reached. """
2438 pass
2439
2440
2441class RejectedVideoReached(YoutubeDLError):
2442 """ --max-downloads limit has been reached. """
2443 pass
2444
2445
bf5b9d85 2446class MaxDownloadsReached(YoutubeDLError):
59ae15a5
PH
2447 """ --max-downloads limit has been reached. """
2448 pass
d77c3dfd
FV
2449
2450
bf5b9d85 2451class UnavailableVideoError(YoutubeDLError):
59ae15a5 2452 """Unavailable Format exception.
d77c3dfd 2453
59ae15a5
PH
2454 This exception will be thrown when a video is requested
2455 in a format that is not available for that video.
2456 """
2457 pass
d77c3dfd
FV
2458
2459
bf5b9d85 2460class ContentTooShortError(YoutubeDLError):
59ae15a5 2461 """Content Too Short exception.
d77c3dfd 2462
59ae15a5
PH
2463 This exception may be raised by FileDownloader objects when a file they
2464 download is too small for what the server announced first, indicating
2465 the connection was probably interrupted.
2466 """
d77c3dfd 2467
59ae15a5 2468 def __init__(self, downloaded, expected):
bf5b9d85
PM
2469 super(ContentTooShortError, self).__init__(
2470 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2471 )
2c7ed247 2472 # Both in bytes
59ae15a5
PH
2473 self.downloaded = downloaded
2474 self.expected = expected
d77c3dfd 2475
5f6a1245 2476
bf5b9d85 2477class XAttrMetadataError(YoutubeDLError):
efa97bdc
YCH
2478 def __init__(self, code=None, msg='Unknown error'):
2479 super(XAttrMetadataError, self).__init__(msg)
2480 self.code = code
bd264412 2481 self.msg = msg
efa97bdc
YCH
2482
2483 # Parsing code and msg
3089bc74 2484 if (self.code in (errno.ENOSPC, errno.EDQUOT)
a0566bbf 2485 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
efa97bdc
YCH
2486 self.reason = 'NO_SPACE'
2487 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2488 self.reason = 'VALUE_TOO_LONG'
2489 else:
2490 self.reason = 'NOT_SUPPORTED'
2491
2492
bf5b9d85 2493class XAttrUnavailableError(YoutubeDLError):
efa97bdc
YCH
2494 pass
2495
2496
c5a59d93 2497def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
2498 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2499 # expected HTTP responses to meet HTTP/1.0 or later (see also
067aa17e 2500 # https://github.com/ytdl-org/youtube-dl/issues/6727)
e5e78797 2501 if sys.version_info < (3, 0):
65220c3b
S
2502 kwargs['strict'] = True
2503 hc = http_class(*args, **compat_kwargs(kwargs))
be4a824d 2504 source_address = ydl_handler._params.get('source_address')
8959018a 2505
be4a824d 2506 if source_address is not None:
8959018a
AU
2507 # This is to workaround _create_connection() from socket where it will try all
2508 # address data from getaddrinfo() including IPv6. This filters the result from
2509 # getaddrinfo() based on the source_address value.
2510 # This is based on the cpython socket.create_connection() function.
2511 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2512 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2513 host, port = address
2514 err = None
2515 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
9e21e6d9
S
2516 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2517 ip_addrs = [addr for addr in addrs if addr[0] == af]
2518 if addrs and not ip_addrs:
2519 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2520 raise socket.error(
2521 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2522 % (ip_version, source_address[0]))
8959018a
AU
2523 for res in ip_addrs:
2524 af, socktype, proto, canonname, sa = res
2525 sock = None
2526 try:
2527 sock = socket.socket(af, socktype, proto)
2528 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2529 sock.settimeout(timeout)
2530 sock.bind(source_address)
2531 sock.connect(sa)
2532 err = None # Explicitly break reference cycle
2533 return sock
2534 except socket.error as _:
2535 err = _
2536 if sock is not None:
2537 sock.close()
2538 if err is not None:
2539 raise err
2540 else:
9e21e6d9
S
2541 raise socket.error('getaddrinfo returns an empty list')
2542 if hasattr(hc, '_create_connection'):
2543 hc._create_connection = _create_connection
be4a824d
PH
2544 sa = (source_address, 0)
2545 if hasattr(hc, 'source_address'): # Python 2.7+
2546 hc.source_address = sa
2547 else: # Python 2.6
2548 def _hc_connect(self, *args, **kwargs):
9e21e6d9 2549 sock = _create_connection(
be4a824d
PH
2550 (self.host, self.port), self.timeout, sa)
2551 if is_https:
d7932313
PH
2552 self.sock = ssl.wrap_socket(
2553 sock, self.key_file, self.cert_file,
2554 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
2555 else:
2556 self.sock = sock
2557 hc.connect = functools.partial(_hc_connect, hc)
2558
2559 return hc
2560
2561
87f0e62d 2562def handle_youtubedl_headers(headers):
992fc9d6
YCH
2563 filtered_headers = headers
2564
2565 if 'Youtubedl-no-compression' in filtered_headers:
2566 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 2567 del filtered_headers['Youtubedl-no-compression']
87f0e62d 2568
992fc9d6 2569 return filtered_headers
87f0e62d
YCH
2570
2571
acebc9cd 2572class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
2573 """Handler for HTTP requests and responses.
2574
2575 This class, when installed with an OpenerDirector, automatically adds
2576 the standard headers to every HTTP request and handles gzipped and
2577 deflated responses from web servers. If compression is to be avoided in
2578 a particular request, the original request in the program code only has
0424ec30 2579 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
2580 removed before making the real request.
2581
2582 Part of this code was copied from:
2583
2584 http://techknack.net/python-urllib2-handlers/
2585
2586 Andrew Rowls, the author of that code, agreed to release it to the
2587 public domain.
2588 """
2589
be4a824d
PH
2590 def __init__(self, params, *args, **kwargs):
2591 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2592 self._params = params
2593
2594 def http_open(self, req):
71aff188
YCH
2595 conn_class = compat_http_client.HTTPConnection
2596
2597 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2598 if socks_proxy:
2599 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2600 del req.headers['Ytdl-socks-proxy']
2601
be4a824d 2602 return self.do_open(functools.partial(
71aff188 2603 _create_http_connection, self, conn_class, False),
be4a824d
PH
2604 req)
2605
59ae15a5
PH
2606 @staticmethod
2607 def deflate(data):
2608 try:
2609 return zlib.decompress(data, -zlib.MAX_WBITS)
2610 except zlib.error:
2611 return zlib.decompress(data)
2612
acebc9cd 2613 def http_request(self, req):
51f267d9
S
2614 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2615 # always respected by websites, some tend to give out URLs with non percent-encoded
2616 # non-ASCII characters (see telemb.py, ard.py [#3412])
2617 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2618 # To work around aforementioned issue we will replace request's original URL with
2619 # percent-encoded one
2620 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2621 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2622 url = req.get_full_url()
2623 url_escaped = escape_url(url)
2624
2625 # Substitute URL if any change after escaping
2626 if url != url_escaped:
15d260eb 2627 req = update_Request(req, url=url_escaped)
51f267d9 2628
33ac271b 2629 for h, v in std_headers.items():
3d5f7a39
JK
2630 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2631 # The dict keys are capitalized because of this bug by urllib
2632 if h.capitalize() not in req.headers:
33ac271b 2633 req.add_header(h, v)
87f0e62d
YCH
2634
2635 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
2636
2637 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2638 # Python 2.6 is brain-dead when it comes to fragments
2639 req._Request__original = req._Request__original.partition('#')[0]
2640 req._Request__r_type = req._Request__r_type.partition('#')[0]
2641
59ae15a5
PH
2642 return req
2643
acebc9cd 2644 def http_response(self, req, resp):
59ae15a5
PH
2645 old_resp = resp
2646 # gzip
2647 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
2648 content = resp.read()
2649 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2650 try:
2651 uncompressed = io.BytesIO(gz.read())
2652 except IOError as original_ioerror:
2653 # There may be junk add the end of the file
2654 # See http://stackoverflow.com/q/4928560/35070 for details
2655 for i in range(1, 1024):
2656 try:
2657 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2658 uncompressed = io.BytesIO(gz.read())
2659 except IOError:
2660 continue
2661 break
2662 else:
2663 raise original_ioerror
b407d853 2664 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2665 resp.msg = old_resp.msg
c047270c 2666 del resp.headers['Content-encoding']
59ae15a5
PH
2667 # deflate
2668 if resp.headers.get('Content-encoding', '') == 'deflate':
2669 gz = io.BytesIO(self.deflate(resp.read()))
b407d853 2670 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2671 resp.msg = old_resp.msg
c047270c 2672 del resp.headers['Content-encoding']
ad729172 2673 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
067aa17e 2674 # https://github.com/ytdl-org/youtube-dl/issues/6457).
5a4d9ddb
S
2675 if 300 <= resp.code < 400:
2676 location = resp.headers.get('Location')
2677 if location:
2678 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2679 if sys.version_info >= (3, 0):
2680 location = location.encode('iso-8859-1').decode('utf-8')
0ea59007
YCH
2681 else:
2682 location = location.decode('utf-8')
5a4d9ddb
S
2683 location_escaped = escape_url(location)
2684 if location != location_escaped:
2685 del resp.headers['Location']
9a4aec8b
YCH
2686 if sys.version_info < (3, 0):
2687 location_escaped = location_escaped.encode('utf-8')
5a4d9ddb 2688 resp.headers['Location'] = location_escaped
59ae15a5 2689 return resp
0f8d03f8 2690
acebc9cd
PH
2691 https_request = http_request
2692 https_response = http_response
bf50b038 2693
5de90176 2694
71aff188
YCH
2695def make_socks_conn_class(base_class, socks_proxy):
2696 assert issubclass(base_class, (
2697 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2698
2699 url_components = compat_urlparse.urlparse(socks_proxy)
2700 if url_components.scheme.lower() == 'socks5':
2701 socks_type = ProxyType.SOCKS5
2702 elif url_components.scheme.lower() in ('socks', 'socks4'):
2703 socks_type = ProxyType.SOCKS4
51fb4995
YCH
2704 elif url_components.scheme.lower() == 'socks4a':
2705 socks_type = ProxyType.SOCKS4A
71aff188 2706
cdd94c2e
YCH
2707 def unquote_if_non_empty(s):
2708 if not s:
2709 return s
2710 return compat_urllib_parse_unquote_plus(s)
2711
71aff188
YCH
2712 proxy_args = (
2713 socks_type,
2714 url_components.hostname, url_components.port or 1080,
2715 True, # Remote DNS
cdd94c2e
YCH
2716 unquote_if_non_empty(url_components.username),
2717 unquote_if_non_empty(url_components.password),
71aff188
YCH
2718 )
2719
2720 class SocksConnection(base_class):
2721 def connect(self):
2722 self.sock = sockssocket()
2723 self.sock.setproxy(*proxy_args)
2724 if type(self.timeout) in (int, float):
2725 self.sock.settimeout(self.timeout)
2726 self.sock.connect((self.host, self.port))
2727
2728 if isinstance(self, compat_http_client.HTTPSConnection):
2729 if hasattr(self, '_context'): # Python > 2.6
2730 self.sock = self._context.wrap_socket(
2731 self.sock, server_hostname=self.host)
2732 else:
2733 self.sock = ssl.wrap_socket(self.sock)
2734
2735 return SocksConnection
2736
2737
be4a824d
PH
2738class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2739 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2740 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2741 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2742 self._params = params
2743
2744 def https_open(self, req):
4f264c02 2745 kwargs = {}
71aff188
YCH
2746 conn_class = self._https_conn_class
2747
4f264c02
JMF
2748 if hasattr(self, '_context'): # python > 2.6
2749 kwargs['context'] = self._context
2750 if hasattr(self, '_check_hostname'): # python 3.x
2751 kwargs['check_hostname'] = self._check_hostname
71aff188
YCH
2752
2753 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2754 if socks_proxy:
2755 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2756 del req.headers['Ytdl-socks-proxy']
2757
be4a824d 2758 return self.do_open(functools.partial(
71aff188 2759 _create_http_connection, self, conn_class, True),
4f264c02 2760 req, **kwargs)
be4a824d
PH
2761
2762
1bab3437 2763class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
f1a8511f
S
2764 """
2765 See [1] for cookie file format.
2766
2767 1. https://curl.haxx.se/docs/http-cookies.html
2768 """
e7e62441 2769 _HTTPONLY_PREFIX = '#HttpOnly_'
c380cc28
S
2770 _ENTRY_LEN = 7
2771 _HEADER = '''# Netscape HTTP Cookie File
cefecac1 2772# This file is generated by youtube-dlc. Do not edit.
c380cc28
S
2773
2774'''
2775 _CookieFileEntry = collections.namedtuple(
2776 'CookieFileEntry',
2777 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
e7e62441 2778
1bab3437 2779 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
c380cc28
S
2780 """
2781 Save cookies to a file.
2782
2783 Most of the code is taken from CPython 3.8 and slightly adapted
2784 to support cookie files with UTF-8 in both python 2 and 3.
2785 """
2786 if filename is None:
2787 if self.filename is not None:
2788 filename = self.filename
2789 else:
2790 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2791
1bab3437
S
2792 # Store session cookies with `expires` set to 0 instead of an empty
2793 # string
2794 for cookie in self:
2795 if cookie.expires is None:
2796 cookie.expires = 0
c380cc28
S
2797
2798 with io.open(filename, 'w', encoding='utf-8') as f:
2799 f.write(self._HEADER)
2800 now = time.time()
2801 for cookie in self:
2802 if not ignore_discard and cookie.discard:
2803 continue
2804 if not ignore_expires and cookie.is_expired(now):
2805 continue
2806 if cookie.secure:
2807 secure = 'TRUE'
2808 else:
2809 secure = 'FALSE'
2810 if cookie.domain.startswith('.'):
2811 initial_dot = 'TRUE'
2812 else:
2813 initial_dot = 'FALSE'
2814 if cookie.expires is not None:
2815 expires = compat_str(cookie.expires)
2816 else:
2817 expires = ''
2818 if cookie.value is None:
2819 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2820 # with no name, whereas http.cookiejar regards it as a
2821 # cookie with no value.
2822 name = ''
2823 value = cookie.name
2824 else:
2825 name = cookie.name
2826 value = cookie.value
2827 f.write(
2828 '\t'.join([cookie.domain, initial_dot, cookie.path,
2829 secure, expires, name, value]) + '\n')
1bab3437
S
2830
2831 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
e7e62441 2832 """Load cookies from a file."""
2833 if filename is None:
2834 if self.filename is not None:
2835 filename = self.filename
2836 else:
2837 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2838
c380cc28
S
2839 def prepare_line(line):
2840 if line.startswith(self._HTTPONLY_PREFIX):
2841 line = line[len(self._HTTPONLY_PREFIX):]
2842 # comments and empty lines are fine
2843 if line.startswith('#') or not line.strip():
2844 return line
2845 cookie_list = line.split('\t')
2846 if len(cookie_list) != self._ENTRY_LEN:
2847 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2848 cookie = self._CookieFileEntry(*cookie_list)
2849 if cookie.expires_at and not cookie.expires_at.isdigit():
2850 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2851 return line
2852
e7e62441 2853 cf = io.StringIO()
c380cc28 2854 with io.open(filename, encoding='utf-8') as f:
e7e62441 2855 for line in f:
c380cc28
S
2856 try:
2857 cf.write(prepare_line(line))
2858 except compat_cookiejar.LoadError as e:
2859 write_string(
2860 'WARNING: skipping cookie file entry due to %s: %r\n'
2861 % (e, line), sys.stderr)
2862 continue
e7e62441 2863 cf.seek(0)
2864 self._really_load(cf, filename, ignore_discard, ignore_expires)
1bab3437
S
2865 # Session cookies are denoted by either `expires` field set to
2866 # an empty string or 0. MozillaCookieJar only recognizes the former
2867 # (see [1]). So we need force the latter to be recognized as session
2868 # cookies on our own.
2869 # Session cookies may be important for cookies-based authentication,
2870 # e.g. usually, when user does not check 'Remember me' check box while
2871 # logging in on a site, some important cookies are stored as session
2872 # cookies so that not recognizing them will result in failed login.
2873 # 1. https://bugs.python.org/issue17164
2874 for cookie in self:
2875 # Treat `expires=0` cookies as session cookies
2876 if cookie.expires == 0:
2877 cookie.expires = None
2878 cookie.discard = True
2879
2880
a6420bf5
S
2881class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2882 def __init__(self, cookiejar=None):
2883 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2884
2885 def http_response(self, request, response):
2886 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2887 # characters in Set-Cookie HTTP header of last response (see
067aa17e 2888 # https://github.com/ytdl-org/youtube-dl/issues/6769).
a6420bf5
S
2889 # In order to at least prevent crashing we will percent encode Set-Cookie
2890 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
2891 # if sys.version_info < (3, 0) and response.headers:
2892 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2893 # set_cookie = response.headers.get(set_cookie_header)
2894 # if set_cookie:
2895 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2896 # if set_cookie != set_cookie_escaped:
2897 # del response.headers[set_cookie_header]
2898 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
2899 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2900
2901 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2902 https_response = http_response
2903
2904
fca6dba8
S
2905class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2906 if sys.version_info[0] < 3:
2907 def redirect_request(self, req, fp, code, msg, headers, newurl):
2908 # On python 2 urlh.geturl() may sometimes return redirect URL
2909 # as byte string instead of unicode. This workaround allows
2910 # to force it always return unicode.
2911 return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2912
2913
46f59e89
S
2914def extract_timezone(date_str):
2915 m = re.search(
2916 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2917 date_str)
2918 if not m:
2919 timezone = datetime.timedelta()
2920 else:
2921 date_str = date_str[:-len(m.group('tz'))]
2922 if not m.group('sign'):
2923 timezone = datetime.timedelta()
2924 else:
2925 sign = 1 if m.group('sign') == '+' else -1
2926 timezone = datetime.timedelta(
2927 hours=sign * int(m.group('hours')),
2928 minutes=sign * int(m.group('minutes')))
2929 return timezone, date_str
2930
2931
08b38d54 2932def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
2933 """ Return a UNIX timestamp from the given date """
2934
2935 if date_str is None:
2936 return None
2937
52c3a6e4
S
2938 date_str = re.sub(r'\.[0-9]+', '', date_str)
2939
08b38d54 2940 if timezone is None:
46f59e89
S
2941 timezone, date_str = extract_timezone(date_str)
2942
52c3a6e4
S
2943 try:
2944 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2945 dt = datetime.datetime.strptime(date_str, date_format) - timezone
2946 return calendar.timegm(dt.timetuple())
2947 except ValueError:
2948 pass
912b38b4
PH
2949
2950
46f59e89
S
2951def date_formats(day_first=True):
2952 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2953
2954
42bdd9d0 2955def unified_strdate(date_str, day_first=True):
bf50b038 2956 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
2957
2958 if date_str is None:
2959 return None
bf50b038 2960 upload_date = None
5f6a1245 2961 # Replace commas
026fcc04 2962 date_str = date_str.replace(',', ' ')
42bdd9d0 2963 # Remove AM/PM + timezone
9bb8e0a3 2964 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89 2965 _, date_str = extract_timezone(date_str)
42bdd9d0 2966
46f59e89 2967 for expression in date_formats(day_first):
bf50b038
JMF
2968 try:
2969 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 2970 except ValueError:
bf50b038 2971 pass
42393ce2
PH
2972 if upload_date is None:
2973 timetuple = email.utils.parsedate_tz(date_str)
2974 if timetuple:
c6b9cf05
S
2975 try:
2976 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2977 except ValueError:
2978 pass
6a750402
JMF
2979 if upload_date is not None:
2980 return compat_str(upload_date)
bf50b038 2981
5f6a1245 2982
46f59e89
S
2983def unified_timestamp(date_str, day_first=True):
2984 if date_str is None:
2985 return None
2986
2ae2ffda 2987 date_str = re.sub(r'[,|]', '', date_str)
46f59e89 2988
7dc2a74e 2989 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89
S
2990 timezone, date_str = extract_timezone(date_str)
2991
2992 # Remove AM/PM + timezone
2993 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2994
deef3195
S
2995 # Remove unrecognized timezones from ISO 8601 alike timestamps
2996 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
2997 if m:
2998 date_str = date_str[:-len(m.group('tz'))]
2999
f226880c
PH
3000 # Python only supports microseconds, so remove nanoseconds
3001 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3002 if m:
3003 date_str = m.group(1)
3004
46f59e89
S
3005 for expression in date_formats(day_first):
3006 try:
7dc2a74e 3007 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
46f59e89
S
3008 return calendar.timegm(dt.timetuple())
3009 except ValueError:
3010 pass
3011 timetuple = email.utils.parsedate_tz(date_str)
3012 if timetuple:
7dc2a74e 3013 return calendar.timegm(timetuple) + pm_delta * 3600
46f59e89
S
3014
3015
28e614de 3016def determine_ext(url, default_ext='unknown_video'):
85750f89 3017 if url is None or '.' not in url:
f4776371 3018 return default_ext
9cb9a5df 3019 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
3020 if re.match(r'^[A-Za-z0-9]+$', guess):
3021 return guess
a7aaa398
S
3022 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3023 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 3024 return guess.rstrip('/')
73e79f2a 3025 else:
cbdbb766 3026 return default_ext
73e79f2a 3027
5f6a1245 3028
824fa511
S
3029def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3030 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
d4051a8e 3031
5f6a1245 3032
bd558525 3033def date_from_str(date_str):
37254abc
JMF
3034 """
3035 Return a datetime object from a string in the format YYYYMMDD or
3036 (now|today)[+-][0-9](day|week|month|year)(s)?"""
3037 today = datetime.date.today()
f8795e10 3038 if date_str in ('now', 'today'):
37254abc 3039 return today
f8795e10
PH
3040 if date_str == 'yesterday':
3041 return today - datetime.timedelta(days=1)
ec85ded8 3042 match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
37254abc
JMF
3043 if match is not None:
3044 sign = match.group('sign')
3045 time = int(match.group('time'))
3046 if sign == '-':
3047 time = -time
3048 unit = match.group('unit')
dfb1b146 3049 # A bad approximation?
37254abc
JMF
3050 if unit == 'month':
3051 unit = 'day'
3052 time *= 30
3053 elif unit == 'year':
3054 unit = 'day'
3055 time *= 365
3056 unit += 's'
3057 delta = datetime.timedelta(**{unit: time})
3058 return today + delta
611c1dd9 3059 return datetime.datetime.strptime(date_str, '%Y%m%d').date()
5f6a1245
JW
3060
3061
e63fc1be 3062def hyphenate_date(date_str):
3063 """
3064 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3065 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3066 if match is not None:
3067 return '-'.join(match.groups())
3068 else:
3069 return date_str
3070
5f6a1245 3071
bd558525
JMF
3072class DateRange(object):
3073 """Represents a time interval between two dates"""
5f6a1245 3074
bd558525
JMF
3075 def __init__(self, start=None, end=None):
3076 """start and end must be strings in the format accepted by date"""
3077 if start is not None:
3078 self.start = date_from_str(start)
3079 else:
3080 self.start = datetime.datetime.min.date()
3081 if end is not None:
3082 self.end = date_from_str(end)
3083 else:
3084 self.end = datetime.datetime.max.date()
37254abc 3085 if self.start > self.end:
bd558525 3086 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 3087
bd558525
JMF
3088 @classmethod
3089 def day(cls, day):
3090 """Returns a range that only contains the given day"""
5f6a1245
JW
3091 return cls(day, day)
3092
bd558525
JMF
3093 def __contains__(self, date):
3094 """Check if the date is in the range"""
37254abc
JMF
3095 if not isinstance(date, datetime.date):
3096 date = date_from_str(date)
3097 return self.start <= date <= self.end
5f6a1245 3098
bd558525 3099 def __str__(self):
5f6a1245 3100 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
3101
3102
3103def platform_name():
3104 """ Returns the platform name as a compat_str """
3105 res = platform.platform()
3106 if isinstance(res, bytes):
3107 res = res.decode(preferredencoding())
3108
3109 assert isinstance(res, compat_str)
3110 return res
c257baff
PH
3111
3112
b58ddb32
PH
3113def _windows_write_string(s, out):
3114 """ Returns True if the string was written using special methods,
3115 False if it has yet to be written out."""
3116 # Adapted from http://stackoverflow.com/a/3259271/35070
3117
3118 import ctypes
3119 import ctypes.wintypes
3120
3121 WIN_OUTPUT_IDS = {
3122 1: -11,
3123 2: -12,
3124 }
3125
a383a98a
PH
3126 try:
3127 fileno = out.fileno()
3128 except AttributeError:
3129 # If the output stream doesn't have a fileno, it's virtual
3130 return False
aa42e873
PH
3131 except io.UnsupportedOperation:
3132 # Some strange Windows pseudo files?
3133 return False
b58ddb32
PH
3134 if fileno not in WIN_OUTPUT_IDS:
3135 return False
3136
d7cd9a9e 3137 GetStdHandle = compat_ctypes_WINFUNCTYPE(
b58ddb32 3138 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
d7cd9a9e 3139 ('GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
3140 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3141
d7cd9a9e 3142 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3143 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3144 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
d7cd9a9e 3145 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
3146 written = ctypes.wintypes.DWORD(0)
3147
d7cd9a9e 3148 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
3149 FILE_TYPE_CHAR = 0x0002
3150 FILE_TYPE_REMOTE = 0x8000
d7cd9a9e 3151 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3152 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3153 ctypes.POINTER(ctypes.wintypes.DWORD))(
d7cd9a9e 3154 ('GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
3155 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3156
3157 def not_a_console(handle):
3158 if handle == INVALID_HANDLE_VALUE or handle is None:
3159 return True
3089bc74
S
3160 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3161 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
3162
3163 if not_a_console(h):
3164 return False
3165
d1b9c912
PH
3166 def next_nonbmp_pos(s):
3167 try:
3168 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3169 except StopIteration:
3170 return len(s)
3171
3172 while s:
3173 count = min(next_nonbmp_pos(s), 1024)
3174
b58ddb32 3175 ret = WriteConsoleW(
d1b9c912 3176 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
3177 if ret == 0:
3178 raise OSError('Failed to write string')
d1b9c912
PH
3179 if not count: # We just wrote a non-BMP character
3180 assert written.value == 2
3181 s = s[1:]
3182 else:
3183 assert written.value > 0
3184 s = s[written.value:]
b58ddb32
PH
3185 return True
3186
3187
734f90bb 3188def write_string(s, out=None, encoding=None):
7459e3a2
PH
3189 if out is None:
3190 out = sys.stderr
8bf48f23 3191 assert type(s) == compat_str
7459e3a2 3192
b58ddb32
PH
3193 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3194 if _windows_write_string(s, out):
3195 return
3196
3089bc74
S
3197 if ('b' in getattr(out, 'mode', '')
3198 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
3199 byt = s.encode(encoding or preferredencoding(), 'ignore')
3200 out.write(byt)
3201 elif hasattr(out, 'buffer'):
3202 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3203 byt = s.encode(enc, 'ignore')
3204 out.buffer.write(byt)
3205 else:
8bf48f23 3206 out.write(s)
7459e3a2
PH
3207 out.flush()
3208
3209
48ea9cea
PH
3210def bytes_to_intlist(bs):
3211 if not bs:
3212 return []
3213 if isinstance(bs[0], int): # Python 3
3214 return list(bs)
3215 else:
3216 return [ord(c) for c in bs]
3217
c257baff 3218
cba892fa 3219def intlist_to_bytes(xs):
3220 if not xs:
3221 return b''
edaa23f8 3222 return compat_struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
3223
3224
c1c9a79c
PH
3225# Cross-platform file locking
3226if sys.platform == 'win32':
3227 import ctypes.wintypes
3228 import msvcrt
3229
3230 class OVERLAPPED(ctypes.Structure):
3231 _fields_ = [
3232 ('Internal', ctypes.wintypes.LPVOID),
3233 ('InternalHigh', ctypes.wintypes.LPVOID),
3234 ('Offset', ctypes.wintypes.DWORD),
3235 ('OffsetHigh', ctypes.wintypes.DWORD),
3236 ('hEvent', ctypes.wintypes.HANDLE),
3237 ]
3238
3239 kernel32 = ctypes.windll.kernel32
3240 LockFileEx = kernel32.LockFileEx
3241 LockFileEx.argtypes = [
3242 ctypes.wintypes.HANDLE, # hFile
3243 ctypes.wintypes.DWORD, # dwFlags
3244 ctypes.wintypes.DWORD, # dwReserved
3245 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3246 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3247 ctypes.POINTER(OVERLAPPED) # Overlapped
3248 ]
3249 LockFileEx.restype = ctypes.wintypes.BOOL
3250 UnlockFileEx = kernel32.UnlockFileEx
3251 UnlockFileEx.argtypes = [
3252 ctypes.wintypes.HANDLE, # hFile
3253 ctypes.wintypes.DWORD, # dwReserved
3254 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3255 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3256 ctypes.POINTER(OVERLAPPED) # Overlapped
3257 ]
3258 UnlockFileEx.restype = ctypes.wintypes.BOOL
3259 whole_low = 0xffffffff
3260 whole_high = 0x7fffffff
3261
3262 def _lock_file(f, exclusive):
3263 overlapped = OVERLAPPED()
3264 overlapped.Offset = 0
3265 overlapped.OffsetHigh = 0
3266 overlapped.hEvent = 0
3267 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3268 handle = msvcrt.get_osfhandle(f.fileno())
3269 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3270 whole_low, whole_high, f._lock_file_overlapped_p):
3271 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3272
3273 def _unlock_file(f):
3274 assert f._lock_file_overlapped_p
3275 handle = msvcrt.get_osfhandle(f.fileno())
3276 if not UnlockFileEx(handle, 0,
3277 whole_low, whole_high, f._lock_file_overlapped_p):
3278 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3279
3280else:
399a76e6
YCH
3281 # Some platforms, such as Jython, is missing fcntl
3282 try:
3283 import fcntl
c1c9a79c 3284
399a76e6
YCH
3285 def _lock_file(f, exclusive):
3286 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 3287
399a76e6
YCH
3288 def _unlock_file(f):
3289 fcntl.flock(f, fcntl.LOCK_UN)
3290 except ImportError:
3291 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3292
3293 def _lock_file(f, exclusive):
3294 raise IOError(UNSUPPORTED_MSG)
3295
3296 def _unlock_file(f):
3297 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
3298
3299
3300class locked_file(object):
3301 def __init__(self, filename, mode, encoding=None):
3302 assert mode in ['r', 'a', 'w']
3303 self.f = io.open(filename, mode, encoding=encoding)
3304 self.mode = mode
3305
3306 def __enter__(self):
3307 exclusive = self.mode != 'r'
3308 try:
3309 _lock_file(self.f, exclusive)
3310 except IOError:
3311 self.f.close()
3312 raise
3313 return self
3314
3315 def __exit__(self, etype, value, traceback):
3316 try:
3317 _unlock_file(self.f)
3318 finally:
3319 self.f.close()
3320
3321 def __iter__(self):
3322 return iter(self.f)
3323
3324 def write(self, *args):
3325 return self.f.write(*args)
3326
3327 def read(self, *args):
3328 return self.f.read(*args)
4eb7f1d1
JMF
3329
3330
4644ac55
S
3331def get_filesystem_encoding():
3332 encoding = sys.getfilesystemencoding()
3333 return encoding if encoding is not None else 'utf-8'
3334
3335
4eb7f1d1 3336def shell_quote(args):
a6a173c2 3337 quoted_args = []
4644ac55 3338 encoding = get_filesystem_encoding()
a6a173c2
JMF
3339 for a in args:
3340 if isinstance(a, bytes):
3341 # We may get a filename encoded with 'encodeFilename'
3342 a = a.decode(encoding)
aefce8e6 3343 quoted_args.append(compat_shlex_quote(a))
28e614de 3344 return ' '.join(quoted_args)
9d4660ca
PH
3345
3346
3347def smuggle_url(url, data):
3348 """ Pass additional data in a URL for internal use. """
3349
81953d1a
RA
3350 url, idata = unsmuggle_url(url, {})
3351 data.update(idata)
15707c7e 3352 sdata = compat_urllib_parse_urlencode(
28e614de
PH
3353 {'__youtubedl_smuggle': json.dumps(data)})
3354 return url + '#' + sdata
9d4660ca
PH
3355
3356
79f82953 3357def unsmuggle_url(smug_url, default=None):
83e865a3 3358 if '#__youtubedl_smuggle' not in smug_url:
79f82953 3359 return smug_url, default
28e614de
PH
3360 url, _, sdata = smug_url.rpartition('#')
3361 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
3362 data = json.loads(jsond)
3363 return url, data
02dbf93f
PH
3364
3365
02dbf93f
PH
3366def format_bytes(bytes):
3367 if bytes is None:
28e614de 3368 return 'N/A'
02dbf93f
PH
3369 if type(bytes) is str:
3370 bytes = float(bytes)
3371 if bytes == 0.0:
3372 exponent = 0
3373 else:
3374 exponent = int(math.log(bytes, 1024.0))
28e614de 3375 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 3376 converted = float(bytes) / float(1024 ** exponent)
28e614de 3377 return '%.2f%s' % (converted, suffix)
f53c966a 3378
1c088fa8 3379
fb47597b
S
3380def lookup_unit_table(unit_table, s):
3381 units_re = '|'.join(re.escape(u) for u in unit_table)
3382 m = re.match(
782b1b5b 3383 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
fb47597b
S
3384 if not m:
3385 return None
3386 num_str = m.group('num').replace(',', '.')
3387 mult = unit_table[m.group('unit')]
3388 return int(float(num_str) * mult)
3389
3390
be64b5b0
PH
3391def parse_filesize(s):
3392 if s is None:
3393 return None
3394
dfb1b146 3395 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
3396 # but we support those too
3397 _UNIT_TABLE = {
3398 'B': 1,
3399 'b': 1,
70852b47 3400 'bytes': 1,
be64b5b0
PH
3401 'KiB': 1024,
3402 'KB': 1000,
3403 'kB': 1024,
3404 'Kb': 1000,
13585d76 3405 'kb': 1000,
70852b47
YCH
3406 'kilobytes': 1000,
3407 'kibibytes': 1024,
be64b5b0
PH
3408 'MiB': 1024 ** 2,
3409 'MB': 1000 ** 2,
3410 'mB': 1024 ** 2,
3411 'Mb': 1000 ** 2,
13585d76 3412 'mb': 1000 ** 2,
70852b47
YCH
3413 'megabytes': 1000 ** 2,
3414 'mebibytes': 1024 ** 2,
be64b5b0
PH
3415 'GiB': 1024 ** 3,
3416 'GB': 1000 ** 3,
3417 'gB': 1024 ** 3,
3418 'Gb': 1000 ** 3,
13585d76 3419 'gb': 1000 ** 3,
70852b47
YCH
3420 'gigabytes': 1000 ** 3,
3421 'gibibytes': 1024 ** 3,
be64b5b0
PH
3422 'TiB': 1024 ** 4,
3423 'TB': 1000 ** 4,
3424 'tB': 1024 ** 4,
3425 'Tb': 1000 ** 4,
13585d76 3426 'tb': 1000 ** 4,
70852b47
YCH
3427 'terabytes': 1000 ** 4,
3428 'tebibytes': 1024 ** 4,
be64b5b0
PH
3429 'PiB': 1024 ** 5,
3430 'PB': 1000 ** 5,
3431 'pB': 1024 ** 5,
3432 'Pb': 1000 ** 5,
13585d76 3433 'pb': 1000 ** 5,
70852b47
YCH
3434 'petabytes': 1000 ** 5,
3435 'pebibytes': 1024 ** 5,
be64b5b0
PH
3436 'EiB': 1024 ** 6,
3437 'EB': 1000 ** 6,
3438 'eB': 1024 ** 6,
3439 'Eb': 1000 ** 6,
13585d76 3440 'eb': 1000 ** 6,
70852b47
YCH
3441 'exabytes': 1000 ** 6,
3442 'exbibytes': 1024 ** 6,
be64b5b0
PH
3443 'ZiB': 1024 ** 7,
3444 'ZB': 1000 ** 7,
3445 'zB': 1024 ** 7,
3446 'Zb': 1000 ** 7,
13585d76 3447 'zb': 1000 ** 7,
70852b47
YCH
3448 'zettabytes': 1000 ** 7,
3449 'zebibytes': 1024 ** 7,
be64b5b0
PH
3450 'YiB': 1024 ** 8,
3451 'YB': 1000 ** 8,
3452 'yB': 1024 ** 8,
3453 'Yb': 1000 ** 8,
13585d76 3454 'yb': 1000 ** 8,
70852b47
YCH
3455 'yottabytes': 1000 ** 8,
3456 'yobibytes': 1024 ** 8,
be64b5b0
PH
3457 }
3458
fb47597b
S
3459 return lookup_unit_table(_UNIT_TABLE, s)
3460
3461
3462def parse_count(s):
3463 if s is None:
be64b5b0
PH
3464 return None
3465
fb47597b
S
3466 s = s.strip()
3467
3468 if re.match(r'^[\d,.]+$', s):
3469 return str_to_int(s)
3470
3471 _UNIT_TABLE = {
3472 'k': 1000,
3473 'K': 1000,
3474 'm': 1000 ** 2,
3475 'M': 1000 ** 2,
3476 'kk': 1000 ** 2,
3477 'KK': 1000 ** 2,
3478 }
be64b5b0 3479
fb47597b 3480 return lookup_unit_table(_UNIT_TABLE, s)
be64b5b0 3481
2f7ae819 3482
b871d7e9
S
3483def parse_resolution(s):
3484 if s is None:
3485 return {}
3486
3487 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3488 if mobj:
3489 return {
3490 'width': int(mobj.group('w')),
3491 'height': int(mobj.group('h')),
3492 }
3493
3494 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3495 if mobj:
3496 return {'height': int(mobj.group(1))}
3497
3498 mobj = re.search(r'\b([48])[kK]\b', s)
3499 if mobj:
3500 return {'height': int(mobj.group(1)) * 540}
3501
3502 return {}
3503
3504
0dc41787
S
3505def parse_bitrate(s):
3506 if not isinstance(s, compat_str):
3507 return
3508 mobj = re.search(r'\b(\d+)\s*kbps', s)
3509 if mobj:
3510 return int(mobj.group(1))
3511
3512
a942d6cb 3513def month_by_name(name, lang='en'):
caefb1de
PH
3514 """ Return the number of a month by (locale-independently) English name """
3515
f6717dec 3516 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb 3517
caefb1de 3518 try:
f6717dec 3519 return month_names.index(name) + 1
7105440c
YCH
3520 except ValueError:
3521 return None
3522
3523
3524def month_by_abbreviation(abbrev):
3525 """ Return the number of a month by (locale-independently) English
3526 abbreviations """
3527
3528 try:
3529 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
3530 except ValueError:
3531 return None
18258362
JMF
3532
3533
5aafe895 3534def fix_xml_ampersands(xml_str):
18258362 3535 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
3536 return re.sub(
3537 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 3538 '&amp;',
5aafe895 3539 xml_str)
e3946f98
PH
3540
3541
3542def setproctitle(title):
8bf48f23 3543 assert isinstance(title, compat_str)
c1c05c67
YCH
3544
3545 # ctypes in Jython is not complete
3546 # http://bugs.jython.org/issue2148
3547 if sys.platform.startswith('java'):
3548 return
3549
e3946f98 3550 try:
611c1dd9 3551 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
3552 except OSError:
3553 return
2f49bcd6
RC
3554 except TypeError:
3555 # LoadLibrary in Windows Python 2.7.13 only expects
3556 # a bytestring, but since unicode_literals turns
3557 # every string into a unicode string, it fails.
3558 return
6eefe533
PH
3559 title_bytes = title.encode('utf-8')
3560 buf = ctypes.create_string_buffer(len(title_bytes))
3561 buf.value = title_bytes
e3946f98 3562 try:
6eefe533 3563 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
3564 except AttributeError:
3565 return # Strange libc, just skip this
d7dda168
PH
3566
3567
3568def remove_start(s, start):
46bc9b7d 3569 return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174
PH
3570
3571
2b9faf55 3572def remove_end(s, end):
46bc9b7d 3573 return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55
PH
3574
3575
31b2051e
S
3576def remove_quotes(s):
3577 if s is None or len(s) < 2:
3578 return s
3579 for quote in ('"', "'", ):
3580 if s[0] == quote and s[-1] == quote:
3581 return s[1:-1]
3582 return s
3583
3584
b6e0c7d2
U
3585def get_domain(url):
3586 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3587 return domain.group('domain') if domain else None
3588
3589
29eb5174 3590def url_basename(url):
9b8aaeed 3591 path = compat_urlparse.urlparse(url).path
28e614de 3592 return path.strip('/').split('/')[-1]
aa94a6d3
PH
3593
3594
02dc0a36
S
3595def base_url(url):
3596 return re.match(r'https?://[^?#&]+/', url).group()
3597
3598
e34c3361 3599def urljoin(base, path):
4b5de77b
S
3600 if isinstance(path, bytes):
3601 path = path.decode('utf-8')
e34c3361
S
3602 if not isinstance(path, compat_str) or not path:
3603 return None
fad4ceb5 3604 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
e34c3361 3605 return path
4b5de77b
S
3606 if isinstance(base, bytes):
3607 base = base.decode('utf-8')
3608 if not isinstance(base, compat_str) or not re.match(
3609 r'^(?:https?:)?//', base):
e34c3361
S
3610 return None
3611 return compat_urlparse.urljoin(base, path)
3612
3613
aa94a6d3
PH
3614class HEADRequest(compat_urllib_request.Request):
3615 def get_method(self):
611c1dd9 3616 return 'HEAD'
7217e148
PH
3617
3618
95cf60e8
S
3619class PUTRequest(compat_urllib_request.Request):
3620 def get_method(self):
3621 return 'PUT'
3622
3623
9732d77e 3624def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
3625 if get_attr:
3626 if v is not None:
3627 v = getattr(v, get_attr, None)
9572013d
PH
3628 if v == '':
3629 v = None
1812afb7
S
3630 if v is None:
3631 return default
3632 try:
3633 return int(v) * invscale // scale
5e1271c5 3634 except (ValueError, TypeError):
af98f8ff 3635 return default
9732d77e 3636
9572013d 3637
40a90862
JMF
3638def str_or_none(v, default=None):
3639 return default if v is None else compat_str(v)
3640
9732d77e
PH
3641
3642def str_to_int(int_str):
48d4681e 3643 """ A more relaxed version of int_or_none """
42db58ec 3644 if isinstance(int_str, compat_integer_types):
348c6bf1 3645 return int_str
42db58ec
S
3646 elif isinstance(int_str, compat_str):
3647 int_str = re.sub(r'[,\.\+]', '', int_str)
3648 return int_or_none(int_str)
608d11f5
PH
3649
3650
9732d77e 3651def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
3652 if v is None:
3653 return default
3654 try:
3655 return float(v) * invscale / scale
5e1271c5 3656 except (ValueError, TypeError):
caf80631 3657 return default
43f775e4
PH
3658
3659
c7e327c4
S
3660def bool_or_none(v, default=None):
3661 return v if isinstance(v, bool) else default
3662
3663
53cd37ba
S
3664def strip_or_none(v, default=None):
3665 return v.strip() if isinstance(v, compat_str) else default
b72b4431
S
3666
3667
af03000a
S
3668def url_or_none(url):
3669 if not url or not isinstance(url, compat_str):
3670 return None
3671 url = url.strip()
29f7c58a 3672 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
af03000a
S
3673
3674
608d11f5 3675def parse_duration(s):
8f9312c3 3676 if not isinstance(s, compat_basestring):
608d11f5
PH
3677 return None
3678
ca7b3246
S
3679 s = s.strip()
3680
acaff495 3681 days, hours, mins, secs, ms = [None] * 5
15846398 3682 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
acaff495 3683 if m:
3684 days, hours, mins, secs, ms = m.groups()
3685 else:
3686 m = re.match(
056653bb
S
3687 r'''(?ix)(?:P?
3688 (?:
3689 [0-9]+\s*y(?:ears?)?\s*
3690 )?
3691 (?:
3692 [0-9]+\s*m(?:onths?)?\s*
3693 )?
3694 (?:
3695 [0-9]+\s*w(?:eeks?)?\s*
3696 )?
8f4b58d7 3697 (?:
acaff495 3698 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
8f4b58d7 3699 )?
056653bb 3700 T)?
acaff495 3701 (?:
3702 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3703 )?
3704 (?:
3705 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3706 )?
3707 (?:
3708 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
15846398 3709 )?Z?$''', s)
acaff495 3710 if m:
3711 days, hours, mins, secs, ms = m.groups()
3712 else:
15846398 3713 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
acaff495 3714 if m:
3715 hours, mins = m.groups()
3716 else:
3717 return None
3718
3719 duration = 0
3720 if secs:
3721 duration += float(secs)
3722 if mins:
3723 duration += float(mins) * 60
3724 if hours:
3725 duration += float(hours) * 60 * 60
3726 if days:
3727 duration += float(days) * 24 * 60 * 60
3728 if ms:
3729 duration += float(ms)
3730 return duration
91d7d0b3
JMF
3731
3732
e65e4c88 3733def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 3734 name, real_ext = os.path.splitext(filename)
e65e4c88
S
3735 return (
3736 '{0}.{1}{2}'.format(name, ext, real_ext)
3737 if not expected_real_ext or real_ext[1:] == expected_real_ext
3738 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
3739
3740
b3ed15b7
S
3741def replace_extension(filename, ext, expected_real_ext=None):
3742 name, real_ext = os.path.splitext(filename)
3743 return '{0}.{1}'.format(
3744 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3745 ext)
3746
3747
d70ad093
PH
3748def check_executable(exe, args=[]):
3749 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3750 args can be a list of arguments for a short output (like -version) """
3751 try:
f5b1bca9 3752 process_communicate_or_kill(subprocess.Popen(
3753 [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
d70ad093
PH
3754 except OSError:
3755 return False
3756 return exe
b7ab0590
PH
3757
3758
95807118 3759def get_exe_version(exe, args=['--version'],
cae97f65 3760 version_re=None, unrecognized='present'):
95807118
PH
3761 """ Returns the version of the specified executable,
3762 or False if the executable is not present """
3763 try:
b64d04c1 3764 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
cefecac1 3765 # SIGTTOU if youtube-dlc is run in the background.
067aa17e 3766 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
f5b1bca9 3767 out, _ = process_communicate_or_kill(subprocess.Popen(
54116803 3768 [encodeArgument(exe)] + args,
00ca7552 3769 stdin=subprocess.PIPE,
f5b1bca9 3770 stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
95807118
PH
3771 except OSError:
3772 return False
cae97f65
PH
3773 if isinstance(out, bytes): # Python 2.x
3774 out = out.decode('ascii', 'ignore')
3775 return detect_exe_version(out, version_re, unrecognized)
3776
3777
3778def detect_exe_version(output, version_re=None, unrecognized='present'):
3779 assert isinstance(output, compat_str)
3780 if version_re is None:
3781 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3782 m = re.search(version_re, output)
95807118
PH
3783 if m:
3784 return m.group(1)
3785 else:
3786 return unrecognized
3787
3788
b7ab0590 3789class PagedList(object):
dd26ced1
PH
3790 def __len__(self):
3791 # This is only useful for tests
3792 return len(self.getslice())
3793
9c44d242
PH
3794
3795class OnDemandPagedList(PagedList):
6be08ce6 3796 def __init__(self, pagefunc, pagesize, use_cache=True):
9c44d242
PH
3797 self._pagefunc = pagefunc
3798 self._pagesize = pagesize
b95dc034
YCH
3799 self._use_cache = use_cache
3800 if use_cache:
3801 self._cache = {}
9c44d242 3802
b7ab0590
PH
3803 def getslice(self, start=0, end=None):
3804 res = []
3805 for pagenum in itertools.count(start // self._pagesize):
3806 firstid = pagenum * self._pagesize
3807 nextfirstid = pagenum * self._pagesize + self._pagesize
3808 if start >= nextfirstid:
3809 continue
3810
b95dc034
YCH
3811 page_results = None
3812 if self._use_cache:
3813 page_results = self._cache.get(pagenum)
3814 if page_results is None:
3815 page_results = list(self._pagefunc(pagenum))
3816 if self._use_cache:
3817 self._cache[pagenum] = page_results
b7ab0590
PH
3818
3819 startv = (
3820 start % self._pagesize
3821 if firstid <= start < nextfirstid
3822 else 0)
3823
3824 endv = (
3825 ((end - 1) % self._pagesize) + 1
3826 if (end is not None and firstid <= end <= nextfirstid)
3827 else None)
3828
3829 if startv != 0 or endv is not None:
3830 page_results = page_results[startv:endv]
3831 res.extend(page_results)
3832
3833 # A little optimization - if current page is not "full", ie. does
3834 # not contain page_size videos then we can assume that this page
3835 # is the last one - there are no more ids on further pages -
3836 # i.e. no need to query again.
3837 if len(page_results) + startv < self._pagesize:
3838 break
3839
3840 # If we got the whole page, but the next page is not interesting,
3841 # break out early as well
3842 if end == nextfirstid:
3843 break
3844 return res
81c2f20b
PH
3845
3846
9c44d242
PH
3847class InAdvancePagedList(PagedList):
3848 def __init__(self, pagefunc, pagecount, pagesize):
3849 self._pagefunc = pagefunc
3850 self._pagecount = pagecount
3851 self._pagesize = pagesize
3852
3853 def getslice(self, start=0, end=None):
3854 res = []
3855 start_page = start // self._pagesize
3856 end_page = (
3857 self._pagecount if end is None else (end // self._pagesize + 1))
3858 skip_elems = start - start_page * self._pagesize
3859 only_more = None if end is None else end - start
3860 for pagenum in range(start_page, end_page):
3861 page = list(self._pagefunc(pagenum))
3862 if skip_elems:
3863 page = page[skip_elems:]
3864 skip_elems = None
3865 if only_more is not None:
3866 if len(page) < only_more:
3867 only_more -= len(page)
3868 else:
3869 page = page[:only_more]
3870 res.extend(page)
3871 break
3872 res.extend(page)
3873 return res
3874
3875
81c2f20b 3876def uppercase_escape(s):
676eb3f2 3877 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 3878 return re.sub(
a612753d 3879 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
3880 lambda m: unicode_escape(m.group(0))[0],
3881 s)
0fe2ff78
YCH
3882
3883
3884def lowercase_escape(s):
3885 unicode_escape = codecs.getdecoder('unicode_escape')
3886 return re.sub(
3887 r'\\u[0-9a-fA-F]{4}',
3888 lambda m: unicode_escape(m.group(0))[0],
3889 s)
b53466e1 3890
d05cfe06
S
3891
3892def escape_rfc3986(s):
3893 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 3894 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 3895 s = s.encode('utf-8')
ecc0c5ee 3896 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
3897
3898
3899def escape_url(url):
3900 """Escape URL as suggested by RFC 3986"""
3901 url_parsed = compat_urllib_parse_urlparse(url)
3902 return url_parsed._replace(
efbed08d 3903 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
d05cfe06
S
3904 path=escape_rfc3986(url_parsed.path),
3905 params=escape_rfc3986(url_parsed.params),
3906 query=escape_rfc3986(url_parsed.query),
3907 fragment=escape_rfc3986(url_parsed.fragment)
3908 ).geturl()
3909
62e609ab
PH
3910
3911def read_batch_urls(batch_fd):
3912 def fixup(url):
3913 if not isinstance(url, compat_str):
3914 url = url.decode('utf-8', 'replace')
8c04f0be 3915 BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
3916 for bom in BOM_UTF8:
3917 if url.startswith(bom):
3918 url = url[len(bom):]
3919 url = url.lstrip()
3920 if not url or url.startswith(('#', ';', ']')):
62e609ab 3921 return False
8c04f0be 3922 # "#" cannot be stripped out since it is part of the URI
3923 # However, it can be safely stipped out if follwing a whitespace
3924 return re.split(r'\s#', url, 1)[0].rstrip()
62e609ab
PH
3925
3926 with contextlib.closing(batch_fd) as fd:
3927 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
3928
3929
3930def urlencode_postdata(*args, **kargs):
15707c7e 3931 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
3932
3933
38f9ef31 3934def update_url_query(url, query):
cacd9966
YCH
3935 if not query:
3936 return url
38f9ef31 3937 parsed_url = compat_urlparse.urlparse(url)
3938 qs = compat_parse_qs(parsed_url.query)
3939 qs.update(query)
3940 return compat_urlparse.urlunparse(parsed_url._replace(
15707c7e 3941 query=compat_urllib_parse_urlencode(qs, True)))
16392824 3942
8e60dc75 3943
ed0291d1
S
3944def update_Request(req, url=None, data=None, headers={}, query={}):
3945 req_headers = req.headers.copy()
3946 req_headers.update(headers)
3947 req_data = data or req.data
3948 req_url = update_url_query(url or req.get_full_url(), query)
95cf60e8
S
3949 req_get_method = req.get_method()
3950 if req_get_method == 'HEAD':
3951 req_type = HEADRequest
3952 elif req_get_method == 'PUT':
3953 req_type = PUTRequest
3954 else:
3955 req_type = compat_urllib_request.Request
ed0291d1
S
3956 new_req = req_type(
3957 req_url, data=req_data, headers=req_headers,
3958 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3959 if hasattr(req, 'timeout'):
3960 new_req.timeout = req.timeout
3961 return new_req
3962
3963
10c87c15 3964def _multipart_encode_impl(data, boundary):
0c265486
YCH
3965 content_type = 'multipart/form-data; boundary=%s' % boundary
3966
3967 out = b''
3968 for k, v in data.items():
3969 out += b'--' + boundary.encode('ascii') + b'\r\n'
3970 if isinstance(k, compat_str):
3971 k = k.encode('utf-8')
3972 if isinstance(v, compat_str):
3973 v = v.encode('utf-8')
3974 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3975 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
b2ad479d 3976 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
0c265486
YCH
3977 if boundary.encode('ascii') in content:
3978 raise ValueError('Boundary overlaps with data')
3979 out += content
3980
3981 out += b'--' + boundary.encode('ascii') + b'--\r\n'
3982
3983 return out, content_type
3984
3985
3986def multipart_encode(data, boundary=None):
3987 '''
3988 Encode a dict to RFC 7578-compliant form-data
3989
3990 data:
3991 A dict where keys and values can be either Unicode or bytes-like
3992 objects.
3993 boundary:
3994 If specified a Unicode object, it's used as the boundary. Otherwise
3995 a random boundary is generated.
3996
3997 Reference: https://tools.ietf.org/html/rfc7578
3998 '''
3999 has_specified_boundary = boundary is not None
4000
4001 while True:
4002 if boundary is None:
4003 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4004
4005 try:
10c87c15 4006 out, content_type = _multipart_encode_impl(data, boundary)
0c265486
YCH
4007 break
4008 except ValueError:
4009 if has_specified_boundary:
4010 raise
4011 boundary = None
4012
4013 return out, content_type
4014
4015
86296ad2 4016def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
4017 if isinstance(key_or_keys, (list, tuple)):
4018 for key in key_or_keys:
86296ad2
S
4019 if key not in d or d[key] is None or skip_false_values and not d[key]:
4020 continue
4021 return d[key]
cbecc9b9
S
4022 return default
4023 return d.get(key_or_keys, default)
4024
4025
329ca3be 4026def try_get(src, getter, expected_type=None):
a32a9a7e
S
4027 if not isinstance(getter, (list, tuple)):
4028 getter = [getter]
4029 for get in getter:
4030 try:
4031 v = get(src)
4032 except (AttributeError, KeyError, TypeError, IndexError):
4033 pass
4034 else:
4035 if expected_type is None or isinstance(v, expected_type):
4036 return v
329ca3be
S
4037
4038
6cc62232
S
4039def merge_dicts(*dicts):
4040 merged = {}
4041 for a_dict in dicts:
4042 for k, v in a_dict.items():
4043 if v is None:
4044 continue
3089bc74
S
4045 if (k not in merged
4046 or (isinstance(v, compat_str) and v
4047 and isinstance(merged[k], compat_str)
4048 and not merged[k])):
6cc62232
S
4049 merged[k] = v
4050 return merged
4051
4052
8e60dc75
S
4053def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4054 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4055
16392824 4056
a1a530b0
PH
4057US_RATINGS = {
4058 'G': 0,
4059 'PG': 10,
4060 'PG-13': 13,
4061 'R': 16,
4062 'NC': 18,
4063}
fac55558
PH
4064
4065
a8795327 4066TV_PARENTAL_GUIDELINES = {
5a16c9d9
RA
4067 'TV-Y': 0,
4068 'TV-Y7': 7,
4069 'TV-G': 0,
4070 'TV-PG': 0,
4071 'TV-14': 14,
4072 'TV-MA': 17,
a8795327
S
4073}
4074
4075
146c80e2 4076def parse_age_limit(s):
a8795327
S
4077 if type(s) == int:
4078 return s if 0 <= s <= 21 else None
4079 if not isinstance(s, compat_basestring):
d838b1bd 4080 return None
146c80e2 4081 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327
S
4082 if m:
4083 return int(m.group('age'))
4084 if s in US_RATINGS:
4085 return US_RATINGS[s]
5a16c9d9 4086 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
b8361187 4087 if m:
5a16c9d9 4088 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
b8361187 4089 return None
146c80e2
S
4090
4091
fac55558 4092def strip_jsonp(code):
609a61e3 4093 return re.sub(
5552c9eb 4094 r'''(?sx)^
e9c671d5 4095 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
5552c9eb
YCH
4096 (?:\s*&&\s*(?P=func_name))?
4097 \s*\(\s*(?P<callback_data>.*)\);?
4098 \s*?(?://[^\n]*)*$''',
4099 r'\g<callback_data>', code)
478c2c61
PH
4100
4101
5c610515 4102def js_to_json(code, vars={}):
4103 # vars is a dict of var, val pairs to substitute
4195096e
S
4104 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4105 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4106 INTEGER_TABLE = (
4107 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4108 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4109 )
4110
e05f6939 4111 def fix_kv(m):
e7b6d122
PH
4112 v = m.group(0)
4113 if v in ('true', 'false', 'null'):
4114 return v
8bdd16b4 4115 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
bd1e4844 4116 return ""
4117
4118 if v[0] in ("'", '"'):
4119 v = re.sub(r'(?s)\\.|"', lambda m: {
e7b6d122 4120 '"': '\\"',
bd1e4844 4121 "\\'": "'",
4122 '\\\n': '',
4123 '\\x': '\\u00',
4124 }.get(m.group(0), m.group(0)), v[1:-1])
8bdd16b4 4125 else:
4126 for regex, base in INTEGER_TABLE:
4127 im = re.match(regex, v)
4128 if im:
4129 i = int(im.group(1), base)
4130 return '"%d":' % i if v.endswith(':') else '%d' % i
89ac4a19 4131
5c610515 4132 if v in vars:
4133 return vars[v]
4134
e7b6d122 4135 return '"%s"' % v
e05f6939 4136
bd1e4844 4137 return re.sub(r'''(?sx)
4138 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4139 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4195096e 4140 {comment}|,(?={skip}[\]}}])|
c384d537 4141 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4195096e 4142 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
8bdd16b4 4143 [0-9]+(?={skip}:)|
4144 !+
4195096e 4145 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
e05f6939
PH
4146
4147
478c2c61
PH
4148def qualities(quality_ids):
4149 """ Get a numeric quality value out of a list of possible values """
4150 def q(qid):
4151 try:
4152 return quality_ids.index(qid)
4153 except ValueError:
4154 return -1
4155 return q
4156
acd69589 4157
91ebc640 4158DEFAULT_OUTTMPL = '%(title)s [%(id)s].%(ext)s'
0a871f68 4159
a020a0dc
PH
4160
4161def limit_length(s, length):
4162 """ Add ellipses to overly long strings """
4163 if s is None:
4164 return None
4165 ELLIPSES = '...'
4166 if len(s) > length:
4167 return s[:length - len(ELLIPSES)] + ELLIPSES
4168 return s
48844745
PH
4169
4170
4171def version_tuple(v):
5f9b8394 4172 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
4173
4174
4175def is_outdated_version(version, limit, assume_new=True):
4176 if not version:
4177 return not assume_new
4178 try:
4179 return version_tuple(version) < version_tuple(limit)
4180 except ValueError:
4181 return not assume_new
732ea2f0
PH
4182
4183
4184def ytdl_is_updateable():
cefecac1 4185 """ Returns if youtube-dlc can be updated with -U """
735d865e 4186 return False
4187
732ea2f0
PH
4188 from zipimport import zipimporter
4189
4190 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
4191
4192
4193def args_to_str(args):
4194 # Get a short string representation for a subprocess command
702ccf2d 4195 return ' '.join(compat_shlex_quote(a) for a in args)
2ccd1b10
PH
4196
4197
9b9c5355 4198def error_to_compat_str(err):
fdae2358
S
4199 err_str = str(err)
4200 # On python 2 error byte string must be decoded with proper
4201 # encoding rather than ascii
4202 if sys.version_info[0] < 3:
4203 err_str = err_str.decode(preferredencoding())
4204 return err_str
4205
4206
c460bdd5 4207def mimetype2ext(mt):
eb9ee194
S
4208 if mt is None:
4209 return None
4210
765ac263
JMF
4211 ext = {
4212 'audio/mp4': 'm4a',
6c33d24b
YCH
4213 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4214 # it's the most popular one
4215 'audio/mpeg': 'mp3',
ba39289d 4216 'audio/x-wav': 'wav',
765ac263
JMF
4217 }.get(mt)
4218 if ext is not None:
4219 return ext
4220
c460bdd5 4221 _, _, res = mt.rpartition('/')
6562d34a 4222 res = res.split(';')[0].strip().lower()
c460bdd5
PH
4223
4224 return {
f6861ec9 4225 '3gpp': '3gp',
cafcf657 4226 'smptett+xml': 'tt',
cafcf657 4227 'ttaf+xml': 'dfxp',
a0d8d704 4228 'ttml+xml': 'ttml',
f6861ec9 4229 'x-flv': 'flv',
a0d8d704 4230 'x-mp4-fragmented': 'mp4',
d4f05d47 4231 'x-ms-sami': 'sami',
a0d8d704 4232 'x-ms-wmv': 'wmv',
b4173f15
RA
4233 'mpegurl': 'm3u8',
4234 'x-mpegurl': 'm3u8',
4235 'vnd.apple.mpegurl': 'm3u8',
4236 'dash+xml': 'mpd',
b4173f15 4237 'f4m+xml': 'f4m',
f164b971 4238 'hds+xml': 'f4m',
e910fe2f 4239 'vnd.ms-sstr+xml': 'ism',
c2b2c7e1 4240 'quicktime': 'mov',
98ce1a3f 4241 'mp2t': 'ts',
39e7107d 4242 'x-wav': 'wav',
c460bdd5
PH
4243 }.get(res, res)
4244
4245
4f3c5e06 4246def parse_codecs(codecs_str):
4247 # http://tools.ietf.org/html/rfc6381
4248 if not codecs_str:
4249 return {}
a0566bbf 4250 split_codecs = list(filter(None, map(
4f3c5e06 4251 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4252 vcodec, acodec = None, None
a0566bbf 4253 for full_codec in split_codecs:
4f3c5e06 4254 codec = full_codec.split('.')[0]
28cc2241 4255 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4f3c5e06 4256 if not vcodec:
4257 vcodec = full_codec
60f5c9fb 4258 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4f3c5e06 4259 if not acodec:
4260 acodec = full_codec
4261 else:
60f5c9fb 4262 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4f3c5e06 4263 if not vcodec and not acodec:
a0566bbf 4264 if len(split_codecs) == 2:
4f3c5e06 4265 return {
a0566bbf 4266 'vcodec': split_codecs[0],
4267 'acodec': split_codecs[1],
4f3c5e06 4268 }
4269 else:
4270 return {
4271 'vcodec': vcodec or 'none',
4272 'acodec': acodec or 'none',
4273 }
4274 return {}
4275
4276
2ccd1b10 4277def urlhandle_detect_ext(url_handle):
79298173 4278 getheader = url_handle.headers.get
2ccd1b10 4279
b55ee18f
PH
4280 cd = getheader('Content-Disposition')
4281 if cd:
4282 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4283 if m:
4284 e = determine_ext(m.group('filename'), default_ext=None)
4285 if e:
4286 return e
4287
c460bdd5 4288 return mimetype2ext(getheader('Content-Type'))
05900629
PH
4289
4290
1e399778
YCH
4291def encode_data_uri(data, mime_type):
4292 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4293
4294
05900629 4295def age_restricted(content_limit, age_limit):
6ec6cb4e 4296 """ Returns True iff the content should be blocked """
05900629
PH
4297
4298 if age_limit is None: # No limit set
4299 return False
4300 if content_limit is None:
4301 return False # Content available for everyone
4302 return age_limit < content_limit
61ca9a80
PH
4303
4304
4305def is_html(first_bytes):
4306 """ Detect whether a file contains HTML by examining its first bytes. """
4307
4308 BOMS = [
4309 (b'\xef\xbb\xbf', 'utf-8'),
4310 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4311 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4312 (b'\xff\xfe', 'utf-16-le'),
4313 (b'\xfe\xff', 'utf-16-be'),
4314 ]
4315 for bom, enc in BOMS:
4316 if first_bytes.startswith(bom):
4317 s = first_bytes[len(bom):].decode(enc, 'replace')
4318 break
4319 else:
4320 s = first_bytes.decode('utf-8', 'replace')
4321
4322 return re.match(r'^\s*<', s)
a055469f
PH
4323
4324
4325def determine_protocol(info_dict):
4326 protocol = info_dict.get('protocol')
4327 if protocol is not None:
4328 return protocol
4329
4330 url = info_dict['url']
4331 if url.startswith('rtmp'):
4332 return 'rtmp'
4333 elif url.startswith('mms'):
4334 return 'mms'
4335 elif url.startswith('rtsp'):
4336 return 'rtsp'
4337
4338 ext = determine_ext(url)
4339 if ext == 'm3u8':
4340 return 'm3u8'
4341 elif ext == 'f4m':
4342 return 'f4m'
4343
4344 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
4345
4346
76d321f6 4347def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
cfb56d1a 4348 """ Render a list of rows, each as a list of values """
76d321f6 4349
4350 def get_max_lens(table):
4351 return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4352
4353 def filter_using_list(row, filterArray):
4354 return [col for (take, col) in zip(filterArray, row) if take]
4355
4356 if hideEmpty:
4357 max_lens = get_max_lens(data)
4358 header_row = filter_using_list(header_row, max_lens)
4359 data = [filter_using_list(row, max_lens) for row in data]
4360
cfb56d1a 4361 table = [header_row] + data
76d321f6 4362 max_lens = get_max_lens(table)
4363 if delim:
4364 table = [header_row] + [['-' * ml for ml in max_lens]] + data
4365 format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
cfb56d1a 4366 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
4367
4368
4369def _match_one(filter_part, dct):
4370 COMPARISON_OPERATORS = {
4371 '<': operator.lt,
4372 '<=': operator.le,
4373 '>': operator.gt,
4374 '>=': operator.ge,
4375 '=': operator.eq,
4376 '!=': operator.ne,
4377 }
4378 operator_rex = re.compile(r'''(?x)\s*
4379 (?P<key>[a-z_]+)
4380 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4381 (?:
4382 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
db13c16e 4383 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
347de493
PH
4384 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4385 )
4386 \s*$
4387 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4388 m = operator_rex.search(filter_part)
4389 if m:
4390 op = COMPARISON_OPERATORS[m.group('op')]
e5a088dc 4391 actual_value = dct.get(m.group('key'))
3089bc74
S
4392 if (m.group('quotedstrval') is not None
4393 or m.group('strval') is not None
e5a088dc
S
4394 # If the original field is a string and matching comparisonvalue is
4395 # a number we should respect the origin of the original field
4396 # and process comparison value as a string (see
067aa17e 4397 # https://github.com/ytdl-org/youtube-dl/issues/11082).
3089bc74
S
4398 or actual_value is not None and m.group('intval') is not None
4399 and isinstance(actual_value, compat_str)):
347de493
PH
4400 if m.group('op') not in ('=', '!='):
4401 raise ValueError(
4402 'Operator %s does not support string values!' % m.group('op'))
db13c16e
S
4403 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4404 quote = m.group('quote')
4405 if quote is not None:
4406 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
347de493
PH
4407 else:
4408 try:
4409 comparison_value = int(m.group('intval'))
4410 except ValueError:
4411 comparison_value = parse_filesize(m.group('intval'))
4412 if comparison_value is None:
4413 comparison_value = parse_filesize(m.group('intval') + 'B')
4414 if comparison_value is None:
4415 raise ValueError(
4416 'Invalid integer value %r in filter part %r' % (
4417 m.group('intval'), filter_part))
347de493
PH
4418 if actual_value is None:
4419 return m.group('none_inclusive')
4420 return op(actual_value, comparison_value)
4421
4422 UNARY_OPERATORS = {
1cc47c66
S
4423 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4424 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
347de493
PH
4425 }
4426 operator_rex = re.compile(r'''(?x)\s*
4427 (?P<op>%s)\s*(?P<key>[a-z_]+)
4428 \s*$
4429 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4430 m = operator_rex.search(filter_part)
4431 if m:
4432 op = UNARY_OPERATORS[m.group('op')]
4433 actual_value = dct.get(m.group('key'))
4434 return op(actual_value)
4435
4436 raise ValueError('Invalid filter part %r' % filter_part)
4437
4438
4439def match_str(filter_str, dct):
4440 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4441
4442 return all(
4443 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4444
4445
4446def match_filter_func(filter_str):
4447 def _match_func(info_dict):
4448 if match_str(filter_str, info_dict):
4449 return None
4450 else:
4451 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4452 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4453 return _match_func
91410c9b
PH
4454
4455
bf6427d2
YCH
4456def parse_dfxp_time_expr(time_expr):
4457 if not time_expr:
d631d5f9 4458 return
bf6427d2
YCH
4459
4460 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4461 if mobj:
4462 return float(mobj.group('time_offset'))
4463
db2fe38b 4464 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 4465 if mobj:
db2fe38b 4466 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
4467
4468
c1c924ab
YCH
4469def srt_subtitles_timecode(seconds):
4470 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
4471
4472
4473def dfxp2srt(dfxp_data):
3869028f
YCH
4474 '''
4475 @param dfxp_data A bytes-like object containing DFXP data
4476 @returns A unicode object containing converted SRT data
4477 '''
5b995f71 4478 LEGACY_NAMESPACES = (
3869028f
YCH
4479 (b'http://www.w3.org/ns/ttml', [
4480 b'http://www.w3.org/2004/11/ttaf1',
4481 b'http://www.w3.org/2006/04/ttaf1',
4482 b'http://www.w3.org/2006/10/ttaf1',
5b995f71 4483 ]),
3869028f
YCH
4484 (b'http://www.w3.org/ns/ttml#styling', [
4485 b'http://www.w3.org/ns/ttml#style',
5b995f71
RA
4486 ]),
4487 )
4488
4489 SUPPORTED_STYLING = [
4490 'color',
4491 'fontFamily',
4492 'fontSize',
4493 'fontStyle',
4494 'fontWeight',
4495 'textDecoration'
4496 ]
4497
4e335771 4498 _x = functools.partial(xpath_with_ns, ns_map={
261f4730 4499 'xml': 'http://www.w3.org/XML/1998/namespace',
4e335771 4500 'ttml': 'http://www.w3.org/ns/ttml',
5b995f71 4501 'tts': 'http://www.w3.org/ns/ttml#styling',
4e335771 4502 })
bf6427d2 4503
5b995f71
RA
4504 styles = {}
4505 default_style = {}
4506
87de7069 4507 class TTMLPElementParser(object):
5b995f71
RA
4508 _out = ''
4509 _unclosed_elements = []
4510 _applied_styles = []
bf6427d2 4511
2b14cb56 4512 def start(self, tag, attrib):
5b995f71
RA
4513 if tag in (_x('ttml:br'), 'br'):
4514 self._out += '\n'
4515 else:
4516 unclosed_elements = []
4517 style = {}
4518 element_style_id = attrib.get('style')
4519 if default_style:
4520 style.update(default_style)
4521 if element_style_id:
4522 style.update(styles.get(element_style_id, {}))
4523 for prop in SUPPORTED_STYLING:
4524 prop_val = attrib.get(_x('tts:' + prop))
4525 if prop_val:
4526 style[prop] = prop_val
4527 if style:
4528 font = ''
4529 for k, v in sorted(style.items()):
4530 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4531 continue
4532 if k == 'color':
4533 font += ' color="%s"' % v
4534 elif k == 'fontSize':
4535 font += ' size="%s"' % v
4536 elif k == 'fontFamily':
4537 font += ' face="%s"' % v
4538 elif k == 'fontWeight' and v == 'bold':
4539 self._out += '<b>'
4540 unclosed_elements.append('b')
4541 elif k == 'fontStyle' and v == 'italic':
4542 self._out += '<i>'
4543 unclosed_elements.append('i')
4544 elif k == 'textDecoration' and v == 'underline':
4545 self._out += '<u>'
4546 unclosed_elements.append('u')
4547 if font:
4548 self._out += '<font' + font + '>'
4549 unclosed_elements.append('font')
4550 applied_style = {}
4551 if self._applied_styles:
4552 applied_style.update(self._applied_styles[-1])
4553 applied_style.update(style)
4554 self._applied_styles.append(applied_style)
4555 self._unclosed_elements.append(unclosed_elements)
bf6427d2 4556
2b14cb56 4557 def end(self, tag):
5b995f71
RA
4558 if tag not in (_x('ttml:br'), 'br'):
4559 unclosed_elements = self._unclosed_elements.pop()
4560 for element in reversed(unclosed_elements):
4561 self._out += '</%s>' % element
4562 if unclosed_elements and self._applied_styles:
4563 self._applied_styles.pop()
bf6427d2 4564
2b14cb56 4565 def data(self, data):
5b995f71 4566 self._out += data
2b14cb56 4567
4568 def close(self):
5b995f71 4569 return self._out.strip()
2b14cb56 4570
4571 def parse_node(node):
4572 target = TTMLPElementParser()
4573 parser = xml.etree.ElementTree.XMLParser(target=target)
4574 parser.feed(xml.etree.ElementTree.tostring(node))
4575 return parser.close()
bf6427d2 4576
5b995f71
RA
4577 for k, v in LEGACY_NAMESPACES:
4578 for ns in v:
4579 dfxp_data = dfxp_data.replace(ns, k)
4580
3869028f 4581 dfxp = compat_etree_fromstring(dfxp_data)
bf6427d2 4582 out = []
5b995f71 4583 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
1b0427e6
YCH
4584
4585 if not paras:
4586 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2 4587
5b995f71
RA
4588 repeat = False
4589 while True:
4590 for style in dfxp.findall(_x('.//ttml:style')):
261f4730
RA
4591 style_id = style.get('id') or style.get(_x('xml:id'))
4592 if not style_id:
4593 continue
5b995f71
RA
4594 parent_style_id = style.get('style')
4595 if parent_style_id:
4596 if parent_style_id not in styles:
4597 repeat = True
4598 continue
4599 styles[style_id] = styles[parent_style_id].copy()
4600 for prop in SUPPORTED_STYLING:
4601 prop_val = style.get(_x('tts:' + prop))
4602 if prop_val:
4603 styles.setdefault(style_id, {})[prop] = prop_val
4604 if repeat:
4605 repeat = False
4606 else:
4607 break
4608
4609 for p in ('body', 'div'):
4610 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4611 if ele is None:
4612 continue
4613 style = styles.get(ele.get('style'))
4614 if not style:
4615 continue
4616 default_style.update(style)
4617
bf6427d2 4618 for para, index in zip(paras, itertools.count(1)):
d631d5f9 4619 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 4620 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
4621 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4622 if begin_time is None:
4623 continue
7dff0363 4624 if not end_time:
d631d5f9
YCH
4625 if not dur:
4626 continue
4627 end_time = begin_time + dur
bf6427d2
YCH
4628 out.append('%d\n%s --> %s\n%s\n\n' % (
4629 index,
c1c924ab
YCH
4630 srt_subtitles_timecode(begin_time),
4631 srt_subtitles_timecode(end_time),
bf6427d2
YCH
4632 parse_node(para)))
4633
4634 return ''.join(out)
4635
4636
66e289ba
S
4637def cli_option(params, command_option, param):
4638 param = params.get(param)
98e698f1
RA
4639 if param:
4640 param = compat_str(param)
66e289ba
S
4641 return [command_option, param] if param is not None else []
4642
4643
4644def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4645 param = params.get(param)
5b232f46
S
4646 if param is None:
4647 return []
66e289ba
S
4648 assert isinstance(param, bool)
4649 if separator:
4650 return [command_option + separator + (true_value if param else false_value)]
4651 return [command_option, true_value if param else false_value]
4652
4653
4654def cli_valueless_option(params, command_option, param, expected_value=True):
4655 param = params.get(param)
4656 return [command_option] if param == expected_value else []
4657
4658
eab9b2bc 4659def cli_configuration_args(params, arg_name, key, default=[], exe=None): # returns arg, for_compat
4660 argdict = params.get(arg_name, {})
4661 if isinstance(argdict, (list, tuple)): # for backward compatibility
4662 return argdict, True
4663
4664 if argdict is None:
4665 return default, False
4666 assert isinstance(argdict, dict)
4667
4668 assert isinstance(key, compat_str)
4669 key = key.lower()
4670
4671 args = exe_args = None
4672 if exe is not None:
4673 assert isinstance(exe, compat_str)
4674 exe = exe.lower()
4675 args = argdict.get('%s+%s' % (key, exe))
4676 if args is None:
4677 exe_args = argdict.get(exe)
4678
4679 if args is None:
4680 args = argdict.get(key) if key != exe else None
4681 if args is None and exe_args is None:
4682 args = argdict.get('default', default)
4683
4684 args, exe_args = args or [], exe_args or []
4685 assert isinstance(args, (list, tuple))
4686 assert isinstance(exe_args, (list, tuple))
4687 return args + exe_args, False
66e289ba
S
4688
4689
39672624
YCH
4690class ISO639Utils(object):
4691 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4692 _lang_map = {
4693 'aa': 'aar',
4694 'ab': 'abk',
4695 'ae': 'ave',
4696 'af': 'afr',
4697 'ak': 'aka',
4698 'am': 'amh',
4699 'an': 'arg',
4700 'ar': 'ara',
4701 'as': 'asm',
4702 'av': 'ava',
4703 'ay': 'aym',
4704 'az': 'aze',
4705 'ba': 'bak',
4706 'be': 'bel',
4707 'bg': 'bul',
4708 'bh': 'bih',
4709 'bi': 'bis',
4710 'bm': 'bam',
4711 'bn': 'ben',
4712 'bo': 'bod',
4713 'br': 'bre',
4714 'bs': 'bos',
4715 'ca': 'cat',
4716 'ce': 'che',
4717 'ch': 'cha',
4718 'co': 'cos',
4719 'cr': 'cre',
4720 'cs': 'ces',
4721 'cu': 'chu',
4722 'cv': 'chv',
4723 'cy': 'cym',
4724 'da': 'dan',
4725 'de': 'deu',
4726 'dv': 'div',
4727 'dz': 'dzo',
4728 'ee': 'ewe',
4729 'el': 'ell',
4730 'en': 'eng',
4731 'eo': 'epo',
4732 'es': 'spa',
4733 'et': 'est',
4734 'eu': 'eus',
4735 'fa': 'fas',
4736 'ff': 'ful',
4737 'fi': 'fin',
4738 'fj': 'fij',
4739 'fo': 'fao',
4740 'fr': 'fra',
4741 'fy': 'fry',
4742 'ga': 'gle',
4743 'gd': 'gla',
4744 'gl': 'glg',
4745 'gn': 'grn',
4746 'gu': 'guj',
4747 'gv': 'glv',
4748 'ha': 'hau',
4749 'he': 'heb',
b7acc835 4750 'iw': 'heb', # Replaced by he in 1989 revision
39672624
YCH
4751 'hi': 'hin',
4752 'ho': 'hmo',
4753 'hr': 'hrv',
4754 'ht': 'hat',
4755 'hu': 'hun',
4756 'hy': 'hye',
4757 'hz': 'her',
4758 'ia': 'ina',
4759 'id': 'ind',
b7acc835 4760 'in': 'ind', # Replaced by id in 1989 revision
39672624
YCH
4761 'ie': 'ile',
4762 'ig': 'ibo',
4763 'ii': 'iii',
4764 'ik': 'ipk',
4765 'io': 'ido',
4766 'is': 'isl',
4767 'it': 'ita',
4768 'iu': 'iku',
4769 'ja': 'jpn',
4770 'jv': 'jav',
4771 'ka': 'kat',
4772 'kg': 'kon',
4773 'ki': 'kik',
4774 'kj': 'kua',
4775 'kk': 'kaz',
4776 'kl': 'kal',
4777 'km': 'khm',
4778 'kn': 'kan',
4779 'ko': 'kor',
4780 'kr': 'kau',
4781 'ks': 'kas',
4782 'ku': 'kur',
4783 'kv': 'kom',
4784 'kw': 'cor',
4785 'ky': 'kir',
4786 'la': 'lat',
4787 'lb': 'ltz',
4788 'lg': 'lug',
4789 'li': 'lim',
4790 'ln': 'lin',
4791 'lo': 'lao',
4792 'lt': 'lit',
4793 'lu': 'lub',
4794 'lv': 'lav',
4795 'mg': 'mlg',
4796 'mh': 'mah',
4797 'mi': 'mri',
4798 'mk': 'mkd',
4799 'ml': 'mal',
4800 'mn': 'mon',
4801 'mr': 'mar',
4802 'ms': 'msa',
4803 'mt': 'mlt',
4804 'my': 'mya',
4805 'na': 'nau',
4806 'nb': 'nob',
4807 'nd': 'nde',
4808 'ne': 'nep',
4809 'ng': 'ndo',
4810 'nl': 'nld',
4811 'nn': 'nno',
4812 'no': 'nor',
4813 'nr': 'nbl',
4814 'nv': 'nav',
4815 'ny': 'nya',
4816 'oc': 'oci',
4817 'oj': 'oji',
4818 'om': 'orm',
4819 'or': 'ori',
4820 'os': 'oss',
4821 'pa': 'pan',
4822 'pi': 'pli',
4823 'pl': 'pol',
4824 'ps': 'pus',
4825 'pt': 'por',
4826 'qu': 'que',
4827 'rm': 'roh',
4828 'rn': 'run',
4829 'ro': 'ron',
4830 'ru': 'rus',
4831 'rw': 'kin',
4832 'sa': 'san',
4833 'sc': 'srd',
4834 'sd': 'snd',
4835 'se': 'sme',
4836 'sg': 'sag',
4837 'si': 'sin',
4838 'sk': 'slk',
4839 'sl': 'slv',
4840 'sm': 'smo',
4841 'sn': 'sna',
4842 'so': 'som',
4843 'sq': 'sqi',
4844 'sr': 'srp',
4845 'ss': 'ssw',
4846 'st': 'sot',
4847 'su': 'sun',
4848 'sv': 'swe',
4849 'sw': 'swa',
4850 'ta': 'tam',
4851 'te': 'tel',
4852 'tg': 'tgk',
4853 'th': 'tha',
4854 'ti': 'tir',
4855 'tk': 'tuk',
4856 'tl': 'tgl',
4857 'tn': 'tsn',
4858 'to': 'ton',
4859 'tr': 'tur',
4860 'ts': 'tso',
4861 'tt': 'tat',
4862 'tw': 'twi',
4863 'ty': 'tah',
4864 'ug': 'uig',
4865 'uk': 'ukr',
4866 'ur': 'urd',
4867 'uz': 'uzb',
4868 've': 'ven',
4869 'vi': 'vie',
4870 'vo': 'vol',
4871 'wa': 'wln',
4872 'wo': 'wol',
4873 'xh': 'xho',
4874 'yi': 'yid',
e9a50fba 4875 'ji': 'yid', # Replaced by yi in 1989 revision
39672624
YCH
4876 'yo': 'yor',
4877 'za': 'zha',
4878 'zh': 'zho',
4879 'zu': 'zul',
4880 }
4881
4882 @classmethod
4883 def short2long(cls, code):
4884 """Convert language code from ISO 639-1 to ISO 639-2/T"""
4885 return cls._lang_map.get(code[:2])
4886
4887 @classmethod
4888 def long2short(cls, code):
4889 """Convert language code from ISO 639-2/T to ISO 639-1"""
4890 for short_name, long_name in cls._lang_map.items():
4891 if long_name == code:
4892 return short_name
4893
4894
4eb10f66
YCH
4895class ISO3166Utils(object):
4896 # From http://data.okfn.org/data/core/country-list
4897 _country_map = {
4898 'AF': 'Afghanistan',
4899 'AX': 'Åland Islands',
4900 'AL': 'Albania',
4901 'DZ': 'Algeria',
4902 'AS': 'American Samoa',
4903 'AD': 'Andorra',
4904 'AO': 'Angola',
4905 'AI': 'Anguilla',
4906 'AQ': 'Antarctica',
4907 'AG': 'Antigua and Barbuda',
4908 'AR': 'Argentina',
4909 'AM': 'Armenia',
4910 'AW': 'Aruba',
4911 'AU': 'Australia',
4912 'AT': 'Austria',
4913 'AZ': 'Azerbaijan',
4914 'BS': 'Bahamas',
4915 'BH': 'Bahrain',
4916 'BD': 'Bangladesh',
4917 'BB': 'Barbados',
4918 'BY': 'Belarus',
4919 'BE': 'Belgium',
4920 'BZ': 'Belize',
4921 'BJ': 'Benin',
4922 'BM': 'Bermuda',
4923 'BT': 'Bhutan',
4924 'BO': 'Bolivia, Plurinational State of',
4925 'BQ': 'Bonaire, Sint Eustatius and Saba',
4926 'BA': 'Bosnia and Herzegovina',
4927 'BW': 'Botswana',
4928 'BV': 'Bouvet Island',
4929 'BR': 'Brazil',
4930 'IO': 'British Indian Ocean Territory',
4931 'BN': 'Brunei Darussalam',
4932 'BG': 'Bulgaria',
4933 'BF': 'Burkina Faso',
4934 'BI': 'Burundi',
4935 'KH': 'Cambodia',
4936 'CM': 'Cameroon',
4937 'CA': 'Canada',
4938 'CV': 'Cape Verde',
4939 'KY': 'Cayman Islands',
4940 'CF': 'Central African Republic',
4941 'TD': 'Chad',
4942 'CL': 'Chile',
4943 'CN': 'China',
4944 'CX': 'Christmas Island',
4945 'CC': 'Cocos (Keeling) Islands',
4946 'CO': 'Colombia',
4947 'KM': 'Comoros',
4948 'CG': 'Congo',
4949 'CD': 'Congo, the Democratic Republic of the',
4950 'CK': 'Cook Islands',
4951 'CR': 'Costa Rica',
4952 'CI': 'Côte d\'Ivoire',
4953 'HR': 'Croatia',
4954 'CU': 'Cuba',
4955 'CW': 'Curaçao',
4956 'CY': 'Cyprus',
4957 'CZ': 'Czech Republic',
4958 'DK': 'Denmark',
4959 'DJ': 'Djibouti',
4960 'DM': 'Dominica',
4961 'DO': 'Dominican Republic',
4962 'EC': 'Ecuador',
4963 'EG': 'Egypt',
4964 'SV': 'El Salvador',
4965 'GQ': 'Equatorial Guinea',
4966 'ER': 'Eritrea',
4967 'EE': 'Estonia',
4968 'ET': 'Ethiopia',
4969 'FK': 'Falkland Islands (Malvinas)',
4970 'FO': 'Faroe Islands',
4971 'FJ': 'Fiji',
4972 'FI': 'Finland',
4973 'FR': 'France',
4974 'GF': 'French Guiana',
4975 'PF': 'French Polynesia',
4976 'TF': 'French Southern Territories',
4977 'GA': 'Gabon',
4978 'GM': 'Gambia',
4979 'GE': 'Georgia',
4980 'DE': 'Germany',
4981 'GH': 'Ghana',
4982 'GI': 'Gibraltar',
4983 'GR': 'Greece',
4984 'GL': 'Greenland',
4985 'GD': 'Grenada',
4986 'GP': 'Guadeloupe',
4987 'GU': 'Guam',
4988 'GT': 'Guatemala',
4989 'GG': 'Guernsey',
4990 'GN': 'Guinea',
4991 'GW': 'Guinea-Bissau',
4992 'GY': 'Guyana',
4993 'HT': 'Haiti',
4994 'HM': 'Heard Island and McDonald Islands',
4995 'VA': 'Holy See (Vatican City State)',
4996 'HN': 'Honduras',
4997 'HK': 'Hong Kong',
4998 'HU': 'Hungary',
4999 'IS': 'Iceland',
5000 'IN': 'India',
5001 'ID': 'Indonesia',
5002 'IR': 'Iran, Islamic Republic of',
5003 'IQ': 'Iraq',
5004 'IE': 'Ireland',
5005 'IM': 'Isle of Man',
5006 'IL': 'Israel',
5007 'IT': 'Italy',
5008 'JM': 'Jamaica',
5009 'JP': 'Japan',
5010 'JE': 'Jersey',
5011 'JO': 'Jordan',
5012 'KZ': 'Kazakhstan',
5013 'KE': 'Kenya',
5014 'KI': 'Kiribati',
5015 'KP': 'Korea, Democratic People\'s Republic of',
5016 'KR': 'Korea, Republic of',
5017 'KW': 'Kuwait',
5018 'KG': 'Kyrgyzstan',
5019 'LA': 'Lao People\'s Democratic Republic',
5020 'LV': 'Latvia',
5021 'LB': 'Lebanon',
5022 'LS': 'Lesotho',
5023 'LR': 'Liberia',
5024 'LY': 'Libya',
5025 'LI': 'Liechtenstein',
5026 'LT': 'Lithuania',
5027 'LU': 'Luxembourg',
5028 'MO': 'Macao',
5029 'MK': 'Macedonia, the Former Yugoslav Republic of',
5030 'MG': 'Madagascar',
5031 'MW': 'Malawi',
5032 'MY': 'Malaysia',
5033 'MV': 'Maldives',
5034 'ML': 'Mali',
5035 'MT': 'Malta',
5036 'MH': 'Marshall Islands',
5037 'MQ': 'Martinique',
5038 'MR': 'Mauritania',
5039 'MU': 'Mauritius',
5040 'YT': 'Mayotte',
5041 'MX': 'Mexico',
5042 'FM': 'Micronesia, Federated States of',
5043 'MD': 'Moldova, Republic of',
5044 'MC': 'Monaco',
5045 'MN': 'Mongolia',
5046 'ME': 'Montenegro',
5047 'MS': 'Montserrat',
5048 'MA': 'Morocco',
5049 'MZ': 'Mozambique',
5050 'MM': 'Myanmar',
5051 'NA': 'Namibia',
5052 'NR': 'Nauru',
5053 'NP': 'Nepal',
5054 'NL': 'Netherlands',
5055 'NC': 'New Caledonia',
5056 'NZ': 'New Zealand',
5057 'NI': 'Nicaragua',
5058 'NE': 'Niger',
5059 'NG': 'Nigeria',
5060 'NU': 'Niue',
5061 'NF': 'Norfolk Island',
5062 'MP': 'Northern Mariana Islands',
5063 'NO': 'Norway',
5064 'OM': 'Oman',
5065 'PK': 'Pakistan',
5066 'PW': 'Palau',
5067 'PS': 'Palestine, State of',
5068 'PA': 'Panama',
5069 'PG': 'Papua New Guinea',
5070 'PY': 'Paraguay',
5071 'PE': 'Peru',
5072 'PH': 'Philippines',
5073 'PN': 'Pitcairn',
5074 'PL': 'Poland',
5075 'PT': 'Portugal',
5076 'PR': 'Puerto Rico',
5077 'QA': 'Qatar',
5078 'RE': 'Réunion',
5079 'RO': 'Romania',
5080 'RU': 'Russian Federation',
5081 'RW': 'Rwanda',
5082 'BL': 'Saint Barthélemy',
5083 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5084 'KN': 'Saint Kitts and Nevis',
5085 'LC': 'Saint Lucia',
5086 'MF': 'Saint Martin (French part)',
5087 'PM': 'Saint Pierre and Miquelon',
5088 'VC': 'Saint Vincent and the Grenadines',
5089 'WS': 'Samoa',
5090 'SM': 'San Marino',
5091 'ST': 'Sao Tome and Principe',
5092 'SA': 'Saudi Arabia',
5093 'SN': 'Senegal',
5094 'RS': 'Serbia',
5095 'SC': 'Seychelles',
5096 'SL': 'Sierra Leone',
5097 'SG': 'Singapore',
5098 'SX': 'Sint Maarten (Dutch part)',
5099 'SK': 'Slovakia',
5100 'SI': 'Slovenia',
5101 'SB': 'Solomon Islands',
5102 'SO': 'Somalia',
5103 'ZA': 'South Africa',
5104 'GS': 'South Georgia and the South Sandwich Islands',
5105 'SS': 'South Sudan',
5106 'ES': 'Spain',
5107 'LK': 'Sri Lanka',
5108 'SD': 'Sudan',
5109 'SR': 'Suriname',
5110 'SJ': 'Svalbard and Jan Mayen',
5111 'SZ': 'Swaziland',
5112 'SE': 'Sweden',
5113 'CH': 'Switzerland',
5114 'SY': 'Syrian Arab Republic',
5115 'TW': 'Taiwan, Province of China',
5116 'TJ': 'Tajikistan',
5117 'TZ': 'Tanzania, United Republic of',
5118 'TH': 'Thailand',
5119 'TL': 'Timor-Leste',
5120 'TG': 'Togo',
5121 'TK': 'Tokelau',
5122 'TO': 'Tonga',
5123 'TT': 'Trinidad and Tobago',
5124 'TN': 'Tunisia',
5125 'TR': 'Turkey',
5126 'TM': 'Turkmenistan',
5127 'TC': 'Turks and Caicos Islands',
5128 'TV': 'Tuvalu',
5129 'UG': 'Uganda',
5130 'UA': 'Ukraine',
5131 'AE': 'United Arab Emirates',
5132 'GB': 'United Kingdom',
5133 'US': 'United States',
5134 'UM': 'United States Minor Outlying Islands',
5135 'UY': 'Uruguay',
5136 'UZ': 'Uzbekistan',
5137 'VU': 'Vanuatu',
5138 'VE': 'Venezuela, Bolivarian Republic of',
5139 'VN': 'Viet Nam',
5140 'VG': 'Virgin Islands, British',
5141 'VI': 'Virgin Islands, U.S.',
5142 'WF': 'Wallis and Futuna',
5143 'EH': 'Western Sahara',
5144 'YE': 'Yemen',
5145 'ZM': 'Zambia',
5146 'ZW': 'Zimbabwe',
5147 }
5148
5149 @classmethod
5150 def short2full(cls, code):
5151 """Convert an ISO 3166-2 country code to the corresponding full name"""
5152 return cls._country_map.get(code.upper())
5153
5154
773f291d
S
5155class GeoUtils(object):
5156 # Major IPv4 address blocks per country
5157 _country_ip_map = {
53896ca5 5158 'AD': '46.172.224.0/19',
773f291d
S
5159 'AE': '94.200.0.0/13',
5160 'AF': '149.54.0.0/17',
5161 'AG': '209.59.64.0/18',
5162 'AI': '204.14.248.0/21',
5163 'AL': '46.99.0.0/16',
5164 'AM': '46.70.0.0/15',
5165 'AO': '105.168.0.0/13',
53896ca5
S
5166 'AP': '182.50.184.0/21',
5167 'AQ': '23.154.160.0/24',
773f291d
S
5168 'AR': '181.0.0.0/12',
5169 'AS': '202.70.112.0/20',
53896ca5 5170 'AT': '77.116.0.0/14',
773f291d
S
5171 'AU': '1.128.0.0/11',
5172 'AW': '181.41.0.0/18',
53896ca5
S
5173 'AX': '185.217.4.0/22',
5174 'AZ': '5.197.0.0/16',
773f291d
S
5175 'BA': '31.176.128.0/17',
5176 'BB': '65.48.128.0/17',
5177 'BD': '114.130.0.0/16',
5178 'BE': '57.0.0.0/8',
53896ca5 5179 'BF': '102.178.0.0/15',
773f291d
S
5180 'BG': '95.42.0.0/15',
5181 'BH': '37.131.0.0/17',
5182 'BI': '154.117.192.0/18',
5183 'BJ': '137.255.0.0/16',
53896ca5 5184 'BL': '185.212.72.0/23',
773f291d
S
5185 'BM': '196.12.64.0/18',
5186 'BN': '156.31.0.0/16',
5187 'BO': '161.56.0.0/16',
5188 'BQ': '161.0.80.0/20',
53896ca5 5189 'BR': '191.128.0.0/12',
773f291d
S
5190 'BS': '24.51.64.0/18',
5191 'BT': '119.2.96.0/19',
5192 'BW': '168.167.0.0/16',
5193 'BY': '178.120.0.0/13',
5194 'BZ': '179.42.192.0/18',
5195 'CA': '99.224.0.0/11',
5196 'CD': '41.243.0.0/16',
53896ca5
S
5197 'CF': '197.242.176.0/21',
5198 'CG': '160.113.0.0/16',
773f291d 5199 'CH': '85.0.0.0/13',
53896ca5 5200 'CI': '102.136.0.0/14',
773f291d
S
5201 'CK': '202.65.32.0/19',
5202 'CL': '152.172.0.0/14',
53896ca5 5203 'CM': '102.244.0.0/14',
773f291d
S
5204 'CN': '36.128.0.0/10',
5205 'CO': '181.240.0.0/12',
5206 'CR': '201.192.0.0/12',
5207 'CU': '152.206.0.0/15',
5208 'CV': '165.90.96.0/19',
5209 'CW': '190.88.128.0/17',
53896ca5 5210 'CY': '31.153.0.0/16',
773f291d
S
5211 'CZ': '88.100.0.0/14',
5212 'DE': '53.0.0.0/8',
5213 'DJ': '197.241.0.0/17',
5214 'DK': '87.48.0.0/12',
5215 'DM': '192.243.48.0/20',
5216 'DO': '152.166.0.0/15',
5217 'DZ': '41.96.0.0/12',
5218 'EC': '186.68.0.0/15',
5219 'EE': '90.190.0.0/15',
5220 'EG': '156.160.0.0/11',
5221 'ER': '196.200.96.0/20',
5222 'ES': '88.0.0.0/11',
5223 'ET': '196.188.0.0/14',
5224 'EU': '2.16.0.0/13',
5225 'FI': '91.152.0.0/13',
5226 'FJ': '144.120.0.0/16',
53896ca5 5227 'FK': '80.73.208.0/21',
773f291d
S
5228 'FM': '119.252.112.0/20',
5229 'FO': '88.85.32.0/19',
5230 'FR': '90.0.0.0/9',
5231 'GA': '41.158.0.0/15',
5232 'GB': '25.0.0.0/8',
5233 'GD': '74.122.88.0/21',
5234 'GE': '31.146.0.0/16',
5235 'GF': '161.22.64.0/18',
5236 'GG': '62.68.160.0/19',
53896ca5
S
5237 'GH': '154.160.0.0/12',
5238 'GI': '95.164.0.0/16',
773f291d
S
5239 'GL': '88.83.0.0/19',
5240 'GM': '160.182.0.0/15',
5241 'GN': '197.149.192.0/18',
5242 'GP': '104.250.0.0/19',
5243 'GQ': '105.235.224.0/20',
5244 'GR': '94.64.0.0/13',
5245 'GT': '168.234.0.0/16',
5246 'GU': '168.123.0.0/16',
5247 'GW': '197.214.80.0/20',
5248 'GY': '181.41.64.0/18',
5249 'HK': '113.252.0.0/14',
5250 'HN': '181.210.0.0/16',
5251 'HR': '93.136.0.0/13',
5252 'HT': '148.102.128.0/17',
5253 'HU': '84.0.0.0/14',
5254 'ID': '39.192.0.0/10',
5255 'IE': '87.32.0.0/12',
5256 'IL': '79.176.0.0/13',
5257 'IM': '5.62.80.0/20',
5258 'IN': '117.192.0.0/10',
5259 'IO': '203.83.48.0/21',
5260 'IQ': '37.236.0.0/14',
5261 'IR': '2.176.0.0/12',
5262 'IS': '82.221.0.0/16',
5263 'IT': '79.0.0.0/10',
5264 'JE': '87.244.64.0/18',
5265 'JM': '72.27.0.0/17',
5266 'JO': '176.29.0.0/16',
53896ca5 5267 'JP': '133.0.0.0/8',
773f291d
S
5268 'KE': '105.48.0.0/12',
5269 'KG': '158.181.128.0/17',
5270 'KH': '36.37.128.0/17',
5271 'KI': '103.25.140.0/22',
5272 'KM': '197.255.224.0/20',
53896ca5 5273 'KN': '198.167.192.0/19',
773f291d
S
5274 'KP': '175.45.176.0/22',
5275 'KR': '175.192.0.0/10',
5276 'KW': '37.36.0.0/14',
5277 'KY': '64.96.0.0/15',
5278 'KZ': '2.72.0.0/13',
5279 'LA': '115.84.64.0/18',
5280 'LB': '178.135.0.0/16',
53896ca5 5281 'LC': '24.92.144.0/20',
773f291d
S
5282 'LI': '82.117.0.0/19',
5283 'LK': '112.134.0.0/15',
53896ca5 5284 'LR': '102.183.0.0/16',
773f291d
S
5285 'LS': '129.232.0.0/17',
5286 'LT': '78.56.0.0/13',
5287 'LU': '188.42.0.0/16',
5288 'LV': '46.109.0.0/16',
5289 'LY': '41.252.0.0/14',
5290 'MA': '105.128.0.0/11',
5291 'MC': '88.209.64.0/18',
5292 'MD': '37.246.0.0/16',
5293 'ME': '178.175.0.0/17',
5294 'MF': '74.112.232.0/21',
5295 'MG': '154.126.0.0/17',
5296 'MH': '117.103.88.0/21',
5297 'MK': '77.28.0.0/15',
5298 'ML': '154.118.128.0/18',
5299 'MM': '37.111.0.0/17',
5300 'MN': '49.0.128.0/17',
5301 'MO': '60.246.0.0/16',
5302 'MP': '202.88.64.0/20',
5303 'MQ': '109.203.224.0/19',
5304 'MR': '41.188.64.0/18',
5305 'MS': '208.90.112.0/22',
5306 'MT': '46.11.0.0/16',
5307 'MU': '105.16.0.0/12',
5308 'MV': '27.114.128.0/18',
53896ca5 5309 'MW': '102.70.0.0/15',
773f291d
S
5310 'MX': '187.192.0.0/11',
5311 'MY': '175.136.0.0/13',
5312 'MZ': '197.218.0.0/15',
5313 'NA': '41.182.0.0/16',
5314 'NC': '101.101.0.0/18',
5315 'NE': '197.214.0.0/18',
5316 'NF': '203.17.240.0/22',
5317 'NG': '105.112.0.0/12',
5318 'NI': '186.76.0.0/15',
5319 'NL': '145.96.0.0/11',
5320 'NO': '84.208.0.0/13',
5321 'NP': '36.252.0.0/15',
5322 'NR': '203.98.224.0/19',
5323 'NU': '49.156.48.0/22',
5324 'NZ': '49.224.0.0/14',
5325 'OM': '5.36.0.0/15',
5326 'PA': '186.72.0.0/15',
5327 'PE': '186.160.0.0/14',
5328 'PF': '123.50.64.0/18',
5329 'PG': '124.240.192.0/19',
5330 'PH': '49.144.0.0/13',
5331 'PK': '39.32.0.0/11',
5332 'PL': '83.0.0.0/11',
5333 'PM': '70.36.0.0/20',
5334 'PR': '66.50.0.0/16',
5335 'PS': '188.161.0.0/16',
5336 'PT': '85.240.0.0/13',
5337 'PW': '202.124.224.0/20',
5338 'PY': '181.120.0.0/14',
5339 'QA': '37.210.0.0/15',
53896ca5 5340 'RE': '102.35.0.0/16',
773f291d 5341 'RO': '79.112.0.0/13',
53896ca5 5342 'RS': '93.86.0.0/15',
773f291d 5343 'RU': '5.136.0.0/13',
53896ca5 5344 'RW': '41.186.0.0/16',
773f291d
S
5345 'SA': '188.48.0.0/13',
5346 'SB': '202.1.160.0/19',
5347 'SC': '154.192.0.0/11',
53896ca5 5348 'SD': '102.120.0.0/13',
773f291d 5349 'SE': '78.64.0.0/12',
53896ca5 5350 'SG': '8.128.0.0/10',
773f291d
S
5351 'SI': '188.196.0.0/14',
5352 'SK': '78.98.0.0/15',
53896ca5 5353 'SL': '102.143.0.0/17',
773f291d
S
5354 'SM': '89.186.32.0/19',
5355 'SN': '41.82.0.0/15',
53896ca5 5356 'SO': '154.115.192.0/18',
773f291d
S
5357 'SR': '186.179.128.0/17',
5358 'SS': '105.235.208.0/21',
5359 'ST': '197.159.160.0/19',
5360 'SV': '168.243.0.0/16',
5361 'SX': '190.102.0.0/20',
5362 'SY': '5.0.0.0/16',
5363 'SZ': '41.84.224.0/19',
5364 'TC': '65.255.48.0/20',
5365 'TD': '154.68.128.0/19',
5366 'TG': '196.168.0.0/14',
5367 'TH': '171.96.0.0/13',
5368 'TJ': '85.9.128.0/18',
5369 'TK': '27.96.24.0/21',
5370 'TL': '180.189.160.0/20',
5371 'TM': '95.85.96.0/19',
5372 'TN': '197.0.0.0/11',
5373 'TO': '175.176.144.0/21',
5374 'TR': '78.160.0.0/11',
5375 'TT': '186.44.0.0/15',
5376 'TV': '202.2.96.0/19',
5377 'TW': '120.96.0.0/11',
5378 'TZ': '156.156.0.0/14',
53896ca5
S
5379 'UA': '37.52.0.0/14',
5380 'UG': '102.80.0.0/13',
5381 'US': '6.0.0.0/8',
773f291d 5382 'UY': '167.56.0.0/13',
53896ca5 5383 'UZ': '84.54.64.0/18',
773f291d 5384 'VA': '212.77.0.0/19',
53896ca5 5385 'VC': '207.191.240.0/21',
773f291d 5386 'VE': '186.88.0.0/13',
53896ca5 5387 'VG': '66.81.192.0/20',
773f291d
S
5388 'VI': '146.226.0.0/16',
5389 'VN': '14.160.0.0/11',
5390 'VU': '202.80.32.0/20',
5391 'WF': '117.20.32.0/21',
5392 'WS': '202.4.32.0/19',
5393 'YE': '134.35.0.0/16',
5394 'YT': '41.242.116.0/22',
5395 'ZA': '41.0.0.0/11',
53896ca5
S
5396 'ZM': '102.144.0.0/13',
5397 'ZW': '102.177.192.0/18',
773f291d
S
5398 }
5399
5400 @classmethod
5f95927a
S
5401 def random_ipv4(cls, code_or_block):
5402 if len(code_or_block) == 2:
5403 block = cls._country_ip_map.get(code_or_block.upper())
5404 if not block:
5405 return None
5406 else:
5407 block = code_or_block
773f291d
S
5408 addr, preflen = block.split('/')
5409 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5410 addr_max = addr_min | (0xffffffff >> int(preflen))
18a0defa 5411 return compat_str(socket.inet_ntoa(
4248dad9 5412 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
773f291d
S
5413
5414
91410c9b 5415class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
5416 def __init__(self, proxies=None):
5417 # Set default handlers
5418 for type in ('http', 'https'):
5419 setattr(self, '%s_open' % type,
5420 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5421 meth(r, proxy, type))
38e87f6c 5422 compat_urllib_request.ProxyHandler.__init__(self, proxies)
2461f79d 5423
91410c9b 5424 def proxy_open(self, req, proxy, type):
2461f79d 5425 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
5426 if req_proxy is not None:
5427 proxy = req_proxy
2461f79d
PH
5428 del req.headers['Ytdl-request-proxy']
5429
5430 if proxy == '__noproxy__':
5431 return None # No Proxy
51fb4995 5432 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
71aff188 5433 req.add_header('Ytdl-socks-proxy', proxy)
cefecac1 5434 # youtube-dlc's http/https handlers do wrapping the socket with socks
71aff188 5435 return None
91410c9b
PH
5436 return compat_urllib_request.ProxyHandler.proxy_open(
5437 self, req, proxy, type)
5bc880b9
YCH
5438
5439
0a5445dd
YCH
5440# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5441# released into Public Domain
5442# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5443
5444def long_to_bytes(n, blocksize=0):
5445 """long_to_bytes(n:long, blocksize:int) : string
5446 Convert a long integer to a byte string.
5447
5448 If optional blocksize is given and greater than zero, pad the front of the
5449 byte string with binary zeros so that the length is a multiple of
5450 blocksize.
5451 """
5452 # after much testing, this algorithm was deemed to be the fastest
5453 s = b''
5454 n = int(n)
5455 while n > 0:
5456 s = compat_struct_pack('>I', n & 0xffffffff) + s
5457 n = n >> 32
5458 # strip off leading zeros
5459 for i in range(len(s)):
5460 if s[i] != b'\000'[0]:
5461 break
5462 else:
5463 # only happens when n == 0
5464 s = b'\000'
5465 i = 0
5466 s = s[i:]
5467 # add back some pad bytes. this could be done more efficiently w.r.t. the
5468 # de-padding being done above, but sigh...
5469 if blocksize > 0 and len(s) % blocksize:
5470 s = (blocksize - len(s) % blocksize) * b'\000' + s
5471 return s
5472
5473
5474def bytes_to_long(s):
5475 """bytes_to_long(string) : long
5476 Convert a byte string to a long integer.
5477
5478 This is (essentially) the inverse of long_to_bytes().
5479 """
5480 acc = 0
5481 length = len(s)
5482 if length % 4:
5483 extra = (4 - length % 4)
5484 s = b'\000' * extra + s
5485 length = length + extra
5486 for i in range(0, length, 4):
5487 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5488 return acc
5489
5490
5bc880b9
YCH
5491def ohdave_rsa_encrypt(data, exponent, modulus):
5492 '''
5493 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5494
5495 Input:
5496 data: data to encrypt, bytes-like object
5497 exponent, modulus: parameter e and N of RSA algorithm, both integer
5498 Output: hex string of encrypted data
5499
5500 Limitation: supports one block encryption only
5501 '''
5502
5503 payload = int(binascii.hexlify(data[::-1]), 16)
5504 encrypted = pow(payload, exponent, modulus)
5505 return '%x' % encrypted
81bdc8fd
YCH
5506
5507
f48409c7
YCH
5508def pkcs1pad(data, length):
5509 """
5510 Padding input data with PKCS#1 scheme
5511
5512 @param {int[]} data input data
5513 @param {int} length target length
5514 @returns {int[]} padded data
5515 """
5516 if len(data) > length - 11:
5517 raise ValueError('Input data too long for PKCS#1 padding')
5518
5519 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5520 return [0, 2] + pseudo_random + [0] + data
5521
5522
5eb6bdce 5523def encode_base_n(num, n, table=None):
59f898b7 5524 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
59f898b7
YCH
5525 if not table:
5526 table = FULL_TABLE[:n]
5527
5eb6bdce
YCH
5528 if n > len(table):
5529 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5530
5531 if num == 0:
5532 return table[0]
5533
81bdc8fd
YCH
5534 ret = ''
5535 while num:
5536 ret = table[num % n] + ret
5537 num = num // n
5538 return ret
f52354a8
YCH
5539
5540
5541def decode_packed_codes(code):
06b3fe29 5542 mobj = re.search(PACKED_CODES_RE, code)
a0566bbf 5543 obfuscated_code, base, count, symbols = mobj.groups()
f52354a8
YCH
5544 base = int(base)
5545 count = int(count)
5546 symbols = symbols.split('|')
5547 symbol_table = {}
5548
5549 while count:
5550 count -= 1
5eb6bdce 5551 base_n_count = encode_base_n(count, base)
f52354a8
YCH
5552 symbol_table[base_n_count] = symbols[count] or base_n_count
5553
5554 return re.sub(
5555 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
a0566bbf 5556 obfuscated_code)
e154c651 5557
5558
1ced2221
S
5559def caesar(s, alphabet, shift):
5560 if shift == 0:
5561 return s
5562 l = len(alphabet)
5563 return ''.join(
5564 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5565 for c in s)
5566
5567
5568def rot47(s):
5569 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5570
5571
e154c651 5572def parse_m3u8_attributes(attrib):
5573 info = {}
5574 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5575 if val.startswith('"'):
5576 val = val[1:-1]
5577 info[key] = val
5578 return info
1143535d
YCH
5579
5580
5581def urshift(val, n):
5582 return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038
YCH
5583
5584
5585# Based on png2str() written by @gdkchan and improved by @yokrysty
067aa17e 5586# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
d3f8e038
YCH
5587def decode_png(png_data):
5588 # Reference: https://www.w3.org/TR/PNG/
5589 header = png_data[8:]
5590
5591 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5592 raise IOError('Not a valid PNG file.')
5593
5594 int_map = {1: '>B', 2: '>H', 4: '>I'}
5595 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5596
5597 chunks = []
5598
5599 while header:
5600 length = unpack_integer(header[:4])
5601 header = header[4:]
5602
5603 chunk_type = header[:4]
5604 header = header[4:]
5605
5606 chunk_data = header[:length]
5607 header = header[length:]
5608
5609 header = header[4:] # Skip CRC
5610
5611 chunks.append({
5612 'type': chunk_type,
5613 'length': length,
5614 'data': chunk_data
5615 })
5616
5617 ihdr = chunks[0]['data']
5618
5619 width = unpack_integer(ihdr[:4])
5620 height = unpack_integer(ihdr[4:8])
5621
5622 idat = b''
5623
5624 for chunk in chunks:
5625 if chunk['type'] == b'IDAT':
5626 idat += chunk['data']
5627
5628 if not idat:
5629 raise IOError('Unable to read PNG data.')
5630
5631 decompressed_data = bytearray(zlib.decompress(idat))
5632
5633 stride = width * 3
5634 pixels = []
5635
5636 def _get_pixel(idx):
5637 x = idx % stride
5638 y = idx // stride
5639 return pixels[y][x]
5640
5641 for y in range(height):
5642 basePos = y * (1 + stride)
5643 filter_type = decompressed_data[basePos]
5644
5645 current_row = []
5646
5647 pixels.append(current_row)
5648
5649 for x in range(stride):
5650 color = decompressed_data[1 + basePos + x]
5651 basex = y * stride + x
5652 left = 0
5653 up = 0
5654
5655 if x > 2:
5656 left = _get_pixel(basex - 3)
5657 if y > 0:
5658 up = _get_pixel(basex - stride)
5659
5660 if filter_type == 1: # Sub
5661 color = (color + left) & 0xff
5662 elif filter_type == 2: # Up
5663 color = (color + up) & 0xff
5664 elif filter_type == 3: # Average
5665 color = (color + ((left + up) >> 1)) & 0xff
5666 elif filter_type == 4: # Paeth
5667 a = left
5668 b = up
5669 c = 0
5670
5671 if x > 2 and y > 0:
5672 c = _get_pixel(basex - stride - 3)
5673
5674 p = a + b - c
5675
5676 pa = abs(p - a)
5677 pb = abs(p - b)
5678 pc = abs(p - c)
5679
5680 if pa <= pb and pa <= pc:
5681 color = (color + a) & 0xff
5682 elif pb <= pc:
5683 color = (color + b) & 0xff
5684 else:
5685 color = (color + c) & 0xff
5686
5687 current_row.append(color)
5688
5689 return width, height, pixels
efa97bdc
YCH
5690
5691
5692def write_xattr(path, key, value):
5693 # This mess below finds the best xattr tool for the job
5694 try:
5695 # try the pyxattr module...
5696 import xattr
5697
53a7e3d2
YCH
5698 if hasattr(xattr, 'set'): # pyxattr
5699 # Unicode arguments are not supported in python-pyxattr until
5700 # version 0.5.0
067aa17e 5701 # See https://github.com/ytdl-org/youtube-dl/issues/5498
53a7e3d2
YCH
5702 pyxattr_required_version = '0.5.0'
5703 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5704 # TODO: fallback to CLI tools
5705 raise XAttrUnavailableError(
5706 'python-pyxattr is detected but is too old. '
cefecac1 5707 'youtube-dlc requires %s or above while your version is %s. '
53a7e3d2
YCH
5708 'Falling back to other xattr implementations' % (
5709 pyxattr_required_version, xattr.__version__))
5710
5711 setxattr = xattr.set
5712 else: # xattr
5713 setxattr = xattr.setxattr
efa97bdc
YCH
5714
5715 try:
53a7e3d2 5716 setxattr(path, key, value)
efa97bdc
YCH
5717 except EnvironmentError as e:
5718 raise XAttrMetadataError(e.errno, e.strerror)
5719
5720 except ImportError:
5721 if compat_os_name == 'nt':
5722 # Write xattrs to NTFS Alternate Data Streams:
5723 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5724 assert ':' not in key
5725 assert os.path.exists(path)
5726
5727 ads_fn = path + ':' + key
5728 try:
5729 with open(ads_fn, 'wb') as f:
5730 f.write(value)
5731 except EnvironmentError as e:
5732 raise XAttrMetadataError(e.errno, e.strerror)
5733 else:
5734 user_has_setfattr = check_executable('setfattr', ['--version'])
5735 user_has_xattr = check_executable('xattr', ['-h'])
5736
5737 if user_has_setfattr or user_has_xattr:
5738
5739 value = value.decode('utf-8')
5740 if user_has_setfattr:
5741 executable = 'setfattr'
5742 opts = ['-n', key, '-v', value]
5743 elif user_has_xattr:
5744 executable = 'xattr'
5745 opts = ['-w', key, value]
5746
3089bc74
S
5747 cmd = ([encodeFilename(executable, True)]
5748 + [encodeArgument(o) for o in opts]
5749 + [encodeFilename(path, True)])
efa97bdc
YCH
5750
5751 try:
5752 p = subprocess.Popen(
5753 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5754 except EnvironmentError as e:
5755 raise XAttrMetadataError(e.errno, e.strerror)
f5b1bca9 5756 stdout, stderr = process_communicate_or_kill(p)
efa97bdc
YCH
5757 stderr = stderr.decode('utf-8', 'replace')
5758 if p.returncode != 0:
5759 raise XAttrMetadataError(p.returncode, stderr)
5760
5761 else:
5762 # On Unix, and can't find pyxattr, setfattr, or xattr.
5763 if sys.platform.startswith('linux'):
5764 raise XAttrUnavailableError(
5765 "Couldn't find a tool to set the xattrs. "
5766 "Install either the python 'pyxattr' or 'xattr' "
5767 "modules, or the GNU 'attr' package "
5768 "(which contains the 'setfattr' tool).")
5769 else:
5770 raise XAttrUnavailableError(
5771 "Couldn't find a tool to set the xattrs. "
5772 "Install either the python 'xattr' module, "
5773 "or the 'xattr' binary.")
0c265486
YCH
5774
5775
5776def random_birthday(year_field, month_field, day_field):
aa374bc7
AS
5777 start_date = datetime.date(1950, 1, 1)
5778 end_date = datetime.date(1995, 12, 31)
5779 offset = random.randint(0, (end_date - start_date).days)
5780 random_date = start_date + datetime.timedelta(offset)
0c265486 5781 return {
aa374bc7
AS
5782 year_field: str(random_date.year),
5783 month_field: str(random_date.month),
5784 day_field: str(random_date.day),
0c265486 5785 }
732044af 5786
c76eb41b 5787
732044af 5788# Templates for internet shortcut files, which are plain text files.
5789DOT_URL_LINK_TEMPLATE = '''
5790[InternetShortcut]
5791URL=%(url)s
5792'''.lstrip()
5793
5794DOT_WEBLOC_LINK_TEMPLATE = '''
5795<?xml version="1.0" encoding="UTF-8"?>
5796<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
5797<plist version="1.0">
5798<dict>
5799\t<key>URL</key>
5800\t<string>%(url)s</string>
5801</dict>
5802</plist>
5803'''.lstrip()
5804
5805DOT_DESKTOP_LINK_TEMPLATE = '''
5806[Desktop Entry]
5807Encoding=UTF-8
5808Name=%(filename)s
5809Type=Link
5810URL=%(url)s
5811Icon=text-html
5812'''.lstrip()
5813
5814
5815def iri_to_uri(iri):
5816 """
5817 Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
5818
5819 The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
5820 """
5821
5822 iri_parts = compat_urllib_parse_urlparse(iri)
5823
5824 if '[' in iri_parts.netloc:
5825 raise ValueError('IPv6 URIs are not, yet, supported.')
5826 # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
5827
5828 # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
5829
5830 net_location = ''
5831 if iri_parts.username:
5832 net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
5833 if iri_parts.password is not None:
5834 net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
5835 net_location += '@'
5836
5837 net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
5838 # The 'idna' encoding produces ASCII text.
5839 if iri_parts.port is not None and iri_parts.port != 80:
5840 net_location += ':' + str(iri_parts.port)
5841
5842 return compat_urllib_parse_urlunparse(
5843 (iri_parts.scheme,
5844 net_location,
5845
5846 compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
5847
5848 # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
5849 compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
5850
5851 # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
5852 compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
5853
5854 compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
5855
5856 # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
5857
5858
5859def to_high_limit_path(path):
5860 if sys.platform in ['win32', 'cygwin']:
5861 # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
5862 return r'\\?\ '.rstrip() + os.path.abspath(path)
5863
5864 return path
76d321f6 5865
c76eb41b 5866
76d321f6 5867def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
5868 val = obj.get(field, default)
5869 if func and val not in ignore:
5870 val = func(val)
5871 return template % val if val not in ignore else default
00dd0cd5 5872
5873
5874def clean_podcast_url(url):
5875 return re.sub(r'''(?x)
5876 (?:
5877 (?:
5878 chtbl\.com/track|
5879 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5880 play\.podtrac\.com
5881 )/[^/]+|
5882 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5883 flex\.acast\.com|
5884 pd(?:
5885 cn\.co| # https://podcorn.com/analytics-prefix/
5886 st\.fm # https://podsights.com/docs/
5887 )/e
5888 )/''', '', url)
ffcb8191
THD
5889
5890
5891_HEX_TABLE = '0123456789abcdef'
5892
5893
5894def random_uuidv4():
5895 return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')