]> jfr.im git - yt-dlp.git/blame - youtube_dlc/utils.py
Updated to release 2020.11.21.1
[yt-dlp.git] / youtube_dlc / utils.py
CommitLineData
d77c3dfd 1#!/usr/bin/env python
dcdb292f 2# coding: utf-8
d77c3dfd 3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
c380cc28 10import collections
62e609ab 11import contextlib
e3946f98 12import ctypes
c496ca96
PH
13import datetime
14import email.utils
0c265486 15import email.header
f45c185f 16import errno
be4a824d 17import functools
d77c3dfd 18import gzip
03f9daab 19import io
79a2e94e 20import itertools
f4bfd65f 21import json
d77c3dfd 22import locale
02dbf93f 23import math
347de493 24import operator
d77c3dfd 25import os
c496ca96 26import platform
773f291d 27import random
d77c3dfd 28import re
c496ca96 29import socket
79a2e94e 30import ssl
1c088fa8 31import subprocess
d77c3dfd 32import sys
181c8655 33import tempfile
c380cc28 34import time
01951dda 35import traceback
bcf89ce6 36import xml.etree.ElementTree
d77c3dfd 37import zlib
d77c3dfd 38
8c25f81b 39from .compat import (
b4a3d461 40 compat_HTMLParseError,
8bb56eee 41 compat_HTMLParser,
8f9312c3 42 compat_basestring,
8c25f81b 43 compat_chr,
1bab3437 44 compat_cookiejar,
d7cd9a9e 45 compat_ctypes_WINFUNCTYPE,
36e6f62c 46 compat_etree_fromstring,
51098426 47 compat_expanduser,
8c25f81b 48 compat_html_entities,
55b2f099 49 compat_html_entities_html5,
be4a824d 50 compat_http_client,
42db58ec 51 compat_integer_types,
c86b6142 52 compat_kwargs,
efa97bdc 53 compat_os_name,
8c25f81b 54 compat_parse_qs,
702ccf2d 55 compat_shlex_quote,
8c25f81b 56 compat_str,
edaa23f8 57 compat_struct_pack,
d3f8e038 58 compat_struct_unpack,
8c25f81b
PH
59 compat_urllib_error,
60 compat_urllib_parse,
15707c7e 61 compat_urllib_parse_urlencode,
8c25f81b 62 compat_urllib_parse_urlparse,
7581bfc9 63 compat_urllib_parse_unquote_plus,
8c25f81b
PH
64 compat_urllib_request,
65 compat_urlparse,
810c10ba 66 compat_xpath,
8c25f81b 67)
4644ac55 68
71aff188
YCH
69from .socks import (
70 ProxyType,
71 sockssocket,
72)
73
4644ac55 74
51fb4995
YCH
75def register_socks_protocols():
76 # "Register" SOCKS protocols
d5ae6bb5
YCH
77 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
78 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
51fb4995
YCH
79 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
80 if scheme not in compat_urlparse.uses_netloc:
81 compat_urlparse.uses_netloc.append(scheme)
82
83
468e2e92
FV
84# This is not clearly defined otherwise
85compiled_regex_type = type(re.compile(''))
86
f7a147e3
S
87
88def random_user_agent():
89 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
90 _CHROME_VERSIONS = (
91 '74.0.3729.129',
92 '76.0.3780.3',
93 '76.0.3780.2',
94 '74.0.3729.128',
95 '76.0.3780.1',
96 '76.0.3780.0',
97 '75.0.3770.15',
98 '74.0.3729.127',
99 '74.0.3729.126',
100 '76.0.3779.1',
101 '76.0.3779.0',
102 '75.0.3770.14',
103 '74.0.3729.125',
104 '76.0.3778.1',
105 '76.0.3778.0',
106 '75.0.3770.13',
107 '74.0.3729.124',
108 '74.0.3729.123',
109 '73.0.3683.121',
110 '76.0.3777.1',
111 '76.0.3777.0',
112 '75.0.3770.12',
113 '74.0.3729.122',
114 '76.0.3776.4',
115 '75.0.3770.11',
116 '74.0.3729.121',
117 '76.0.3776.3',
118 '76.0.3776.2',
119 '73.0.3683.120',
120 '74.0.3729.120',
121 '74.0.3729.119',
122 '74.0.3729.118',
123 '76.0.3776.1',
124 '76.0.3776.0',
125 '76.0.3775.5',
126 '75.0.3770.10',
127 '74.0.3729.117',
128 '76.0.3775.4',
129 '76.0.3775.3',
130 '74.0.3729.116',
131 '75.0.3770.9',
132 '76.0.3775.2',
133 '76.0.3775.1',
134 '76.0.3775.0',
135 '75.0.3770.8',
136 '74.0.3729.115',
137 '74.0.3729.114',
138 '76.0.3774.1',
139 '76.0.3774.0',
140 '75.0.3770.7',
141 '74.0.3729.113',
142 '74.0.3729.112',
143 '74.0.3729.111',
144 '76.0.3773.1',
145 '76.0.3773.0',
146 '75.0.3770.6',
147 '74.0.3729.110',
148 '74.0.3729.109',
149 '76.0.3772.1',
150 '76.0.3772.0',
151 '75.0.3770.5',
152 '74.0.3729.108',
153 '74.0.3729.107',
154 '76.0.3771.1',
155 '76.0.3771.0',
156 '75.0.3770.4',
157 '74.0.3729.106',
158 '74.0.3729.105',
159 '75.0.3770.3',
160 '74.0.3729.104',
161 '74.0.3729.103',
162 '74.0.3729.102',
163 '75.0.3770.2',
164 '74.0.3729.101',
165 '75.0.3770.1',
166 '75.0.3770.0',
167 '74.0.3729.100',
168 '75.0.3769.5',
169 '75.0.3769.4',
170 '74.0.3729.99',
171 '75.0.3769.3',
172 '75.0.3769.2',
173 '75.0.3768.6',
174 '74.0.3729.98',
175 '75.0.3769.1',
176 '75.0.3769.0',
177 '74.0.3729.97',
178 '73.0.3683.119',
179 '73.0.3683.118',
180 '74.0.3729.96',
181 '75.0.3768.5',
182 '75.0.3768.4',
183 '75.0.3768.3',
184 '75.0.3768.2',
185 '74.0.3729.95',
186 '74.0.3729.94',
187 '75.0.3768.1',
188 '75.0.3768.0',
189 '74.0.3729.93',
190 '74.0.3729.92',
191 '73.0.3683.117',
192 '74.0.3729.91',
193 '75.0.3766.3',
194 '74.0.3729.90',
195 '75.0.3767.2',
196 '75.0.3767.1',
197 '75.0.3767.0',
198 '74.0.3729.89',
199 '73.0.3683.116',
200 '75.0.3766.2',
201 '74.0.3729.88',
202 '75.0.3766.1',
203 '75.0.3766.0',
204 '74.0.3729.87',
205 '73.0.3683.115',
206 '74.0.3729.86',
207 '75.0.3765.1',
208 '75.0.3765.0',
209 '74.0.3729.85',
210 '73.0.3683.114',
211 '74.0.3729.84',
212 '75.0.3764.1',
213 '75.0.3764.0',
214 '74.0.3729.83',
215 '73.0.3683.113',
216 '75.0.3763.2',
217 '75.0.3761.4',
218 '74.0.3729.82',
219 '75.0.3763.1',
220 '75.0.3763.0',
221 '74.0.3729.81',
222 '73.0.3683.112',
223 '75.0.3762.1',
224 '75.0.3762.0',
225 '74.0.3729.80',
226 '75.0.3761.3',
227 '74.0.3729.79',
228 '73.0.3683.111',
229 '75.0.3761.2',
230 '74.0.3729.78',
231 '74.0.3729.77',
232 '75.0.3761.1',
233 '75.0.3761.0',
234 '73.0.3683.110',
235 '74.0.3729.76',
236 '74.0.3729.75',
237 '75.0.3760.0',
238 '74.0.3729.74',
239 '75.0.3759.8',
240 '75.0.3759.7',
241 '75.0.3759.6',
242 '74.0.3729.73',
243 '75.0.3759.5',
244 '74.0.3729.72',
245 '73.0.3683.109',
246 '75.0.3759.4',
247 '75.0.3759.3',
248 '74.0.3729.71',
249 '75.0.3759.2',
250 '74.0.3729.70',
251 '73.0.3683.108',
252 '74.0.3729.69',
253 '75.0.3759.1',
254 '75.0.3759.0',
255 '74.0.3729.68',
256 '73.0.3683.107',
257 '74.0.3729.67',
258 '75.0.3758.1',
259 '75.0.3758.0',
260 '74.0.3729.66',
261 '73.0.3683.106',
262 '74.0.3729.65',
263 '75.0.3757.1',
264 '75.0.3757.0',
265 '74.0.3729.64',
266 '73.0.3683.105',
267 '74.0.3729.63',
268 '75.0.3756.1',
269 '75.0.3756.0',
270 '74.0.3729.62',
271 '73.0.3683.104',
272 '75.0.3755.3',
273 '75.0.3755.2',
274 '73.0.3683.103',
275 '75.0.3755.1',
276 '75.0.3755.0',
277 '74.0.3729.61',
278 '73.0.3683.102',
279 '74.0.3729.60',
280 '75.0.3754.2',
281 '74.0.3729.59',
282 '75.0.3753.4',
283 '74.0.3729.58',
284 '75.0.3754.1',
285 '75.0.3754.0',
286 '74.0.3729.57',
287 '73.0.3683.101',
288 '75.0.3753.3',
289 '75.0.3752.2',
290 '75.0.3753.2',
291 '74.0.3729.56',
292 '75.0.3753.1',
293 '75.0.3753.0',
294 '74.0.3729.55',
295 '73.0.3683.100',
296 '74.0.3729.54',
297 '75.0.3752.1',
298 '75.0.3752.0',
299 '74.0.3729.53',
300 '73.0.3683.99',
301 '74.0.3729.52',
302 '75.0.3751.1',
303 '75.0.3751.0',
304 '74.0.3729.51',
305 '73.0.3683.98',
306 '74.0.3729.50',
307 '75.0.3750.0',
308 '74.0.3729.49',
309 '74.0.3729.48',
310 '74.0.3729.47',
311 '75.0.3749.3',
312 '74.0.3729.46',
313 '73.0.3683.97',
314 '75.0.3749.2',
315 '74.0.3729.45',
316 '75.0.3749.1',
317 '75.0.3749.0',
318 '74.0.3729.44',
319 '73.0.3683.96',
320 '74.0.3729.43',
321 '74.0.3729.42',
322 '75.0.3748.1',
323 '75.0.3748.0',
324 '74.0.3729.41',
325 '75.0.3747.1',
326 '73.0.3683.95',
327 '75.0.3746.4',
328 '74.0.3729.40',
329 '74.0.3729.39',
330 '75.0.3747.0',
331 '75.0.3746.3',
332 '75.0.3746.2',
333 '74.0.3729.38',
334 '75.0.3746.1',
335 '75.0.3746.0',
336 '74.0.3729.37',
337 '73.0.3683.94',
338 '75.0.3745.5',
339 '75.0.3745.4',
340 '75.0.3745.3',
341 '75.0.3745.2',
342 '74.0.3729.36',
343 '75.0.3745.1',
344 '75.0.3745.0',
345 '75.0.3744.2',
346 '74.0.3729.35',
347 '73.0.3683.93',
348 '74.0.3729.34',
349 '75.0.3744.1',
350 '75.0.3744.0',
351 '74.0.3729.33',
352 '73.0.3683.92',
353 '74.0.3729.32',
354 '74.0.3729.31',
355 '73.0.3683.91',
356 '75.0.3741.2',
357 '75.0.3740.5',
358 '74.0.3729.30',
359 '75.0.3741.1',
360 '75.0.3741.0',
361 '74.0.3729.29',
362 '75.0.3740.4',
363 '73.0.3683.90',
364 '74.0.3729.28',
365 '75.0.3740.3',
366 '73.0.3683.89',
367 '75.0.3740.2',
368 '74.0.3729.27',
369 '75.0.3740.1',
370 '75.0.3740.0',
371 '74.0.3729.26',
372 '73.0.3683.88',
373 '73.0.3683.87',
374 '74.0.3729.25',
375 '75.0.3739.1',
376 '75.0.3739.0',
377 '73.0.3683.86',
378 '74.0.3729.24',
379 '73.0.3683.85',
380 '75.0.3738.4',
381 '75.0.3738.3',
382 '75.0.3738.2',
383 '75.0.3738.1',
384 '75.0.3738.0',
385 '74.0.3729.23',
386 '73.0.3683.84',
387 '74.0.3729.22',
388 '74.0.3729.21',
389 '75.0.3737.1',
390 '75.0.3737.0',
391 '74.0.3729.20',
392 '73.0.3683.83',
393 '74.0.3729.19',
394 '75.0.3736.1',
395 '75.0.3736.0',
396 '74.0.3729.18',
397 '73.0.3683.82',
398 '74.0.3729.17',
399 '75.0.3735.1',
400 '75.0.3735.0',
401 '74.0.3729.16',
402 '73.0.3683.81',
403 '75.0.3734.1',
404 '75.0.3734.0',
405 '74.0.3729.15',
406 '73.0.3683.80',
407 '74.0.3729.14',
408 '75.0.3733.1',
409 '75.0.3733.0',
410 '75.0.3732.1',
411 '74.0.3729.13',
412 '74.0.3729.12',
413 '73.0.3683.79',
414 '74.0.3729.11',
415 '75.0.3732.0',
416 '74.0.3729.10',
417 '73.0.3683.78',
418 '74.0.3729.9',
419 '74.0.3729.8',
420 '74.0.3729.7',
421 '75.0.3731.3',
422 '75.0.3731.2',
423 '75.0.3731.0',
424 '74.0.3729.6',
425 '73.0.3683.77',
426 '73.0.3683.76',
427 '75.0.3730.5',
428 '75.0.3730.4',
429 '73.0.3683.75',
430 '74.0.3729.5',
431 '73.0.3683.74',
432 '75.0.3730.3',
433 '75.0.3730.2',
434 '74.0.3729.4',
435 '73.0.3683.73',
436 '73.0.3683.72',
437 '75.0.3730.1',
438 '75.0.3730.0',
439 '74.0.3729.3',
440 '73.0.3683.71',
441 '74.0.3729.2',
442 '73.0.3683.70',
443 '74.0.3729.1',
444 '74.0.3729.0',
445 '74.0.3726.4',
446 '73.0.3683.69',
447 '74.0.3726.3',
448 '74.0.3728.0',
449 '74.0.3726.2',
450 '73.0.3683.68',
451 '74.0.3726.1',
452 '74.0.3726.0',
453 '74.0.3725.4',
454 '73.0.3683.67',
455 '73.0.3683.66',
456 '74.0.3725.3',
457 '74.0.3725.2',
458 '74.0.3725.1',
459 '74.0.3724.8',
460 '74.0.3725.0',
461 '73.0.3683.65',
462 '74.0.3724.7',
463 '74.0.3724.6',
464 '74.0.3724.5',
465 '74.0.3724.4',
466 '74.0.3724.3',
467 '74.0.3724.2',
468 '74.0.3724.1',
469 '74.0.3724.0',
470 '73.0.3683.64',
471 '74.0.3723.1',
472 '74.0.3723.0',
473 '73.0.3683.63',
474 '74.0.3722.1',
475 '74.0.3722.0',
476 '73.0.3683.62',
477 '74.0.3718.9',
478 '74.0.3702.3',
479 '74.0.3721.3',
480 '74.0.3721.2',
481 '74.0.3721.1',
482 '74.0.3721.0',
483 '74.0.3720.6',
484 '73.0.3683.61',
485 '72.0.3626.122',
486 '73.0.3683.60',
487 '74.0.3720.5',
488 '72.0.3626.121',
489 '74.0.3718.8',
490 '74.0.3720.4',
491 '74.0.3720.3',
492 '74.0.3718.7',
493 '74.0.3720.2',
494 '74.0.3720.1',
495 '74.0.3720.0',
496 '74.0.3718.6',
497 '74.0.3719.5',
498 '73.0.3683.59',
499 '74.0.3718.5',
500 '74.0.3718.4',
501 '74.0.3719.4',
502 '74.0.3719.3',
503 '74.0.3719.2',
504 '74.0.3719.1',
505 '73.0.3683.58',
506 '74.0.3719.0',
507 '73.0.3683.57',
508 '73.0.3683.56',
509 '74.0.3718.3',
510 '73.0.3683.55',
511 '74.0.3718.2',
512 '74.0.3718.1',
513 '74.0.3718.0',
514 '73.0.3683.54',
515 '74.0.3717.2',
516 '73.0.3683.53',
517 '74.0.3717.1',
518 '74.0.3717.0',
519 '73.0.3683.52',
520 '74.0.3716.1',
521 '74.0.3716.0',
522 '73.0.3683.51',
523 '74.0.3715.1',
524 '74.0.3715.0',
525 '73.0.3683.50',
526 '74.0.3711.2',
527 '74.0.3714.2',
528 '74.0.3713.3',
529 '74.0.3714.1',
530 '74.0.3714.0',
531 '73.0.3683.49',
532 '74.0.3713.1',
533 '74.0.3713.0',
534 '72.0.3626.120',
535 '73.0.3683.48',
536 '74.0.3712.2',
537 '74.0.3712.1',
538 '74.0.3712.0',
539 '73.0.3683.47',
540 '72.0.3626.119',
541 '73.0.3683.46',
542 '74.0.3710.2',
543 '72.0.3626.118',
544 '74.0.3711.1',
545 '74.0.3711.0',
546 '73.0.3683.45',
547 '72.0.3626.117',
548 '74.0.3710.1',
549 '74.0.3710.0',
550 '73.0.3683.44',
551 '72.0.3626.116',
552 '74.0.3709.1',
553 '74.0.3709.0',
554 '74.0.3704.9',
555 '73.0.3683.43',
556 '72.0.3626.115',
557 '74.0.3704.8',
558 '74.0.3704.7',
559 '74.0.3708.0',
560 '74.0.3706.7',
561 '74.0.3704.6',
562 '73.0.3683.42',
563 '72.0.3626.114',
564 '74.0.3706.6',
565 '72.0.3626.113',
566 '74.0.3704.5',
567 '74.0.3706.5',
568 '74.0.3706.4',
569 '74.0.3706.3',
570 '74.0.3706.2',
571 '74.0.3706.1',
572 '74.0.3706.0',
573 '73.0.3683.41',
574 '72.0.3626.112',
575 '74.0.3705.1',
576 '74.0.3705.0',
577 '73.0.3683.40',
578 '72.0.3626.111',
579 '73.0.3683.39',
580 '74.0.3704.4',
581 '73.0.3683.38',
582 '74.0.3704.3',
583 '74.0.3704.2',
584 '74.0.3704.1',
585 '74.0.3704.0',
586 '73.0.3683.37',
587 '72.0.3626.110',
588 '72.0.3626.109',
589 '74.0.3703.3',
590 '74.0.3703.2',
591 '73.0.3683.36',
592 '74.0.3703.1',
593 '74.0.3703.0',
594 '73.0.3683.35',
595 '72.0.3626.108',
596 '74.0.3702.2',
597 '74.0.3699.3',
598 '74.0.3702.1',
599 '74.0.3702.0',
600 '73.0.3683.34',
601 '72.0.3626.107',
602 '73.0.3683.33',
603 '74.0.3701.1',
604 '74.0.3701.0',
605 '73.0.3683.32',
606 '73.0.3683.31',
607 '72.0.3626.105',
608 '74.0.3700.1',
609 '74.0.3700.0',
610 '73.0.3683.29',
611 '72.0.3626.103',
612 '74.0.3699.2',
613 '74.0.3699.1',
614 '74.0.3699.0',
615 '73.0.3683.28',
616 '72.0.3626.102',
617 '73.0.3683.27',
618 '73.0.3683.26',
619 '74.0.3698.0',
620 '74.0.3696.2',
621 '72.0.3626.101',
622 '73.0.3683.25',
623 '74.0.3696.1',
624 '74.0.3696.0',
625 '74.0.3694.8',
626 '72.0.3626.100',
627 '74.0.3694.7',
628 '74.0.3694.6',
629 '74.0.3694.5',
630 '74.0.3694.4',
631 '72.0.3626.99',
632 '72.0.3626.98',
633 '74.0.3694.3',
634 '73.0.3683.24',
635 '72.0.3626.97',
636 '72.0.3626.96',
637 '72.0.3626.95',
638 '73.0.3683.23',
639 '72.0.3626.94',
640 '73.0.3683.22',
641 '73.0.3683.21',
642 '72.0.3626.93',
643 '74.0.3694.2',
644 '72.0.3626.92',
645 '74.0.3694.1',
646 '74.0.3694.0',
647 '74.0.3693.6',
648 '73.0.3683.20',
649 '72.0.3626.91',
650 '74.0.3693.5',
651 '74.0.3693.4',
652 '74.0.3693.3',
653 '74.0.3693.2',
654 '73.0.3683.19',
655 '74.0.3693.1',
656 '74.0.3693.0',
657 '73.0.3683.18',
658 '72.0.3626.90',
659 '74.0.3692.1',
660 '74.0.3692.0',
661 '73.0.3683.17',
662 '72.0.3626.89',
663 '74.0.3687.3',
664 '74.0.3691.1',
665 '74.0.3691.0',
666 '73.0.3683.16',
667 '72.0.3626.88',
668 '72.0.3626.87',
669 '73.0.3683.15',
670 '74.0.3690.1',
671 '74.0.3690.0',
672 '73.0.3683.14',
673 '72.0.3626.86',
674 '73.0.3683.13',
675 '73.0.3683.12',
676 '74.0.3689.1',
677 '74.0.3689.0',
678 '73.0.3683.11',
679 '72.0.3626.85',
680 '73.0.3683.10',
681 '72.0.3626.84',
682 '73.0.3683.9',
683 '74.0.3688.1',
684 '74.0.3688.0',
685 '73.0.3683.8',
686 '72.0.3626.83',
687 '74.0.3687.2',
688 '74.0.3687.1',
689 '74.0.3687.0',
690 '73.0.3683.7',
691 '72.0.3626.82',
692 '74.0.3686.4',
693 '72.0.3626.81',
694 '74.0.3686.3',
695 '74.0.3686.2',
696 '74.0.3686.1',
697 '74.0.3686.0',
698 '73.0.3683.6',
699 '72.0.3626.80',
700 '74.0.3685.1',
701 '74.0.3685.0',
702 '73.0.3683.5',
703 '72.0.3626.79',
704 '74.0.3684.1',
705 '74.0.3684.0',
706 '73.0.3683.4',
707 '72.0.3626.78',
708 '72.0.3626.77',
709 '73.0.3683.3',
710 '73.0.3683.2',
711 '72.0.3626.76',
712 '73.0.3683.1',
713 '73.0.3683.0',
714 '72.0.3626.75',
715 '71.0.3578.141',
716 '73.0.3682.1',
717 '73.0.3682.0',
718 '72.0.3626.74',
719 '71.0.3578.140',
720 '73.0.3681.4',
721 '73.0.3681.3',
722 '73.0.3681.2',
723 '73.0.3681.1',
724 '73.0.3681.0',
725 '72.0.3626.73',
726 '71.0.3578.139',
727 '72.0.3626.72',
728 '72.0.3626.71',
729 '73.0.3680.1',
730 '73.0.3680.0',
731 '72.0.3626.70',
732 '71.0.3578.138',
733 '73.0.3678.2',
734 '73.0.3679.1',
735 '73.0.3679.0',
736 '72.0.3626.69',
737 '71.0.3578.137',
738 '73.0.3678.1',
739 '73.0.3678.0',
740 '71.0.3578.136',
741 '73.0.3677.1',
742 '73.0.3677.0',
743 '72.0.3626.68',
744 '72.0.3626.67',
745 '71.0.3578.135',
746 '73.0.3676.1',
747 '73.0.3676.0',
748 '73.0.3674.2',
749 '72.0.3626.66',
750 '71.0.3578.134',
751 '73.0.3674.1',
752 '73.0.3674.0',
753 '72.0.3626.65',
754 '71.0.3578.133',
755 '73.0.3673.2',
756 '73.0.3673.1',
757 '73.0.3673.0',
758 '72.0.3626.64',
759 '71.0.3578.132',
760 '72.0.3626.63',
761 '72.0.3626.62',
762 '72.0.3626.61',
763 '72.0.3626.60',
764 '73.0.3672.1',
765 '73.0.3672.0',
766 '72.0.3626.59',
767 '71.0.3578.131',
768 '73.0.3671.3',
769 '73.0.3671.2',
770 '73.0.3671.1',
771 '73.0.3671.0',
772 '72.0.3626.58',
773 '71.0.3578.130',
774 '73.0.3670.1',
775 '73.0.3670.0',
776 '72.0.3626.57',
777 '71.0.3578.129',
778 '73.0.3669.1',
779 '73.0.3669.0',
780 '72.0.3626.56',
781 '71.0.3578.128',
782 '73.0.3668.2',
783 '73.0.3668.1',
784 '73.0.3668.0',
785 '72.0.3626.55',
786 '71.0.3578.127',
787 '73.0.3667.2',
788 '73.0.3667.1',
789 '73.0.3667.0',
790 '72.0.3626.54',
791 '71.0.3578.126',
792 '73.0.3666.1',
793 '73.0.3666.0',
794 '72.0.3626.53',
795 '71.0.3578.125',
796 '73.0.3665.4',
797 '73.0.3665.3',
798 '72.0.3626.52',
799 '73.0.3665.2',
800 '73.0.3664.4',
801 '73.0.3665.1',
802 '73.0.3665.0',
803 '72.0.3626.51',
804 '71.0.3578.124',
805 '72.0.3626.50',
806 '73.0.3664.3',
807 '73.0.3664.2',
808 '73.0.3664.1',
809 '73.0.3664.0',
810 '73.0.3663.2',
811 '72.0.3626.49',
812 '71.0.3578.123',
813 '73.0.3663.1',
814 '73.0.3663.0',
815 '72.0.3626.48',
816 '71.0.3578.122',
817 '73.0.3662.1',
818 '73.0.3662.0',
819 '72.0.3626.47',
820 '71.0.3578.121',
821 '73.0.3661.1',
822 '72.0.3626.46',
823 '73.0.3661.0',
824 '72.0.3626.45',
825 '71.0.3578.120',
826 '73.0.3660.2',
827 '73.0.3660.1',
828 '73.0.3660.0',
829 '72.0.3626.44',
830 '71.0.3578.119',
831 '73.0.3659.1',
832 '73.0.3659.0',
833 '72.0.3626.43',
834 '71.0.3578.118',
835 '73.0.3658.1',
836 '73.0.3658.0',
837 '72.0.3626.42',
838 '71.0.3578.117',
839 '73.0.3657.1',
840 '73.0.3657.0',
841 '72.0.3626.41',
842 '71.0.3578.116',
843 '73.0.3656.1',
844 '73.0.3656.0',
845 '72.0.3626.40',
846 '71.0.3578.115',
847 '73.0.3655.1',
848 '73.0.3655.0',
849 '72.0.3626.39',
850 '71.0.3578.114',
851 '73.0.3654.1',
852 '73.0.3654.0',
853 '72.0.3626.38',
854 '71.0.3578.113',
855 '73.0.3653.1',
856 '73.0.3653.0',
857 '72.0.3626.37',
858 '71.0.3578.112',
859 '73.0.3652.1',
860 '73.0.3652.0',
861 '72.0.3626.36',
862 '71.0.3578.111',
863 '73.0.3651.1',
864 '73.0.3651.0',
865 '72.0.3626.35',
866 '71.0.3578.110',
867 '73.0.3650.1',
868 '73.0.3650.0',
869 '72.0.3626.34',
870 '71.0.3578.109',
871 '73.0.3649.1',
872 '73.0.3649.0',
873 '72.0.3626.33',
874 '71.0.3578.108',
875 '73.0.3648.2',
876 '73.0.3648.1',
877 '73.0.3648.0',
878 '72.0.3626.32',
879 '71.0.3578.107',
880 '73.0.3647.2',
881 '73.0.3647.1',
882 '73.0.3647.0',
883 '72.0.3626.31',
884 '71.0.3578.106',
885 '73.0.3635.3',
886 '73.0.3646.2',
887 '73.0.3646.1',
888 '73.0.3646.0',
889 '72.0.3626.30',
890 '71.0.3578.105',
891 '72.0.3626.29',
892 '73.0.3645.2',
893 '73.0.3645.1',
894 '73.0.3645.0',
895 '72.0.3626.28',
896 '71.0.3578.104',
897 '72.0.3626.27',
898 '72.0.3626.26',
899 '72.0.3626.25',
900 '72.0.3626.24',
901 '73.0.3644.0',
902 '73.0.3643.2',
903 '72.0.3626.23',
904 '71.0.3578.103',
905 '73.0.3643.1',
906 '73.0.3643.0',
907 '72.0.3626.22',
908 '71.0.3578.102',
909 '73.0.3642.1',
910 '73.0.3642.0',
911 '72.0.3626.21',
912 '71.0.3578.101',
913 '73.0.3641.1',
914 '73.0.3641.0',
915 '72.0.3626.20',
916 '71.0.3578.100',
917 '72.0.3626.19',
918 '73.0.3640.1',
919 '73.0.3640.0',
920 '72.0.3626.18',
921 '73.0.3639.1',
922 '71.0.3578.99',
923 '73.0.3639.0',
924 '72.0.3626.17',
925 '73.0.3638.2',
926 '72.0.3626.16',
927 '73.0.3638.1',
928 '73.0.3638.0',
929 '72.0.3626.15',
930 '71.0.3578.98',
931 '73.0.3635.2',
932 '71.0.3578.97',
933 '73.0.3637.1',
934 '73.0.3637.0',
935 '72.0.3626.14',
936 '71.0.3578.96',
937 '71.0.3578.95',
938 '72.0.3626.13',
939 '71.0.3578.94',
940 '73.0.3636.2',
941 '71.0.3578.93',
942 '73.0.3636.1',
943 '73.0.3636.0',
944 '72.0.3626.12',
945 '71.0.3578.92',
946 '73.0.3635.1',
947 '73.0.3635.0',
948 '72.0.3626.11',
949 '71.0.3578.91',
950 '73.0.3634.2',
951 '73.0.3634.1',
952 '73.0.3634.0',
953 '72.0.3626.10',
954 '71.0.3578.90',
955 '71.0.3578.89',
956 '73.0.3633.2',
957 '73.0.3633.1',
958 '73.0.3633.0',
959 '72.0.3610.4',
960 '72.0.3626.9',
961 '71.0.3578.88',
962 '73.0.3632.5',
963 '73.0.3632.4',
964 '73.0.3632.3',
965 '73.0.3632.2',
966 '73.0.3632.1',
967 '73.0.3632.0',
968 '72.0.3626.8',
969 '71.0.3578.87',
970 '73.0.3631.2',
971 '73.0.3631.1',
972 '73.0.3631.0',
973 '72.0.3626.7',
974 '71.0.3578.86',
975 '72.0.3626.6',
976 '73.0.3630.1',
977 '73.0.3630.0',
978 '72.0.3626.5',
979 '71.0.3578.85',
980 '72.0.3626.4',
981 '73.0.3628.3',
982 '73.0.3628.2',
983 '73.0.3629.1',
984 '73.0.3629.0',
985 '72.0.3626.3',
986 '71.0.3578.84',
987 '73.0.3628.1',
988 '73.0.3628.0',
989 '71.0.3578.83',
990 '73.0.3627.1',
991 '73.0.3627.0',
992 '72.0.3626.2',
993 '71.0.3578.82',
994 '71.0.3578.81',
995 '71.0.3578.80',
996 '72.0.3626.1',
997 '72.0.3626.0',
998 '71.0.3578.79',
999 '70.0.3538.124',
1000 '71.0.3578.78',
1001 '72.0.3623.4',
1002 '72.0.3625.2',
1003 '72.0.3625.1',
1004 '72.0.3625.0',
1005 '71.0.3578.77',
1006 '70.0.3538.123',
1007 '72.0.3624.4',
1008 '72.0.3624.3',
1009 '72.0.3624.2',
1010 '71.0.3578.76',
1011 '72.0.3624.1',
1012 '72.0.3624.0',
1013 '72.0.3623.3',
1014 '71.0.3578.75',
1015 '70.0.3538.122',
1016 '71.0.3578.74',
1017 '72.0.3623.2',
1018 '72.0.3610.3',
1019 '72.0.3623.1',
1020 '72.0.3623.0',
1021 '72.0.3622.3',
1022 '72.0.3622.2',
1023 '71.0.3578.73',
1024 '70.0.3538.121',
1025 '72.0.3622.1',
1026 '72.0.3622.0',
1027 '71.0.3578.72',
1028 '70.0.3538.120',
1029 '72.0.3621.1',
1030 '72.0.3621.0',
1031 '71.0.3578.71',
1032 '70.0.3538.119',
1033 '72.0.3620.1',
1034 '72.0.3620.0',
1035 '71.0.3578.70',
1036 '70.0.3538.118',
1037 '71.0.3578.69',
1038 '72.0.3619.1',
1039 '72.0.3619.0',
1040 '71.0.3578.68',
1041 '70.0.3538.117',
1042 '71.0.3578.67',
1043 '72.0.3618.1',
1044 '72.0.3618.0',
1045 '71.0.3578.66',
1046 '70.0.3538.116',
1047 '72.0.3617.1',
1048 '72.0.3617.0',
1049 '71.0.3578.65',
1050 '70.0.3538.115',
1051 '72.0.3602.3',
1052 '71.0.3578.64',
1053 '72.0.3616.1',
1054 '72.0.3616.0',
1055 '71.0.3578.63',
1056 '70.0.3538.114',
1057 '71.0.3578.62',
1058 '72.0.3615.1',
1059 '72.0.3615.0',
1060 '71.0.3578.61',
1061 '70.0.3538.113',
1062 '72.0.3614.1',
1063 '72.0.3614.0',
1064 '71.0.3578.60',
1065 '70.0.3538.112',
1066 '72.0.3613.1',
1067 '72.0.3613.0',
1068 '71.0.3578.59',
1069 '70.0.3538.111',
1070 '72.0.3612.2',
1071 '72.0.3612.1',
1072 '72.0.3612.0',
1073 '70.0.3538.110',
1074 '71.0.3578.58',
1075 '70.0.3538.109',
1076 '72.0.3611.2',
1077 '72.0.3611.1',
1078 '72.0.3611.0',
1079 '71.0.3578.57',
1080 '70.0.3538.108',
1081 '72.0.3610.2',
1082 '71.0.3578.56',
1083 '71.0.3578.55',
1084 '72.0.3610.1',
1085 '72.0.3610.0',
1086 '71.0.3578.54',
1087 '70.0.3538.107',
1088 '71.0.3578.53',
1089 '72.0.3609.3',
1090 '71.0.3578.52',
1091 '72.0.3609.2',
1092 '71.0.3578.51',
1093 '72.0.3608.5',
1094 '72.0.3609.1',
1095 '72.0.3609.0',
1096 '71.0.3578.50',
1097 '70.0.3538.106',
1098 '72.0.3608.4',
1099 '72.0.3608.3',
1100 '72.0.3608.2',
1101 '71.0.3578.49',
1102 '72.0.3608.1',
1103 '72.0.3608.0',
1104 '70.0.3538.105',
1105 '71.0.3578.48',
1106 '72.0.3607.1',
1107 '72.0.3607.0',
1108 '71.0.3578.47',
1109 '70.0.3538.104',
1110 '72.0.3606.2',
1111 '72.0.3606.1',
1112 '72.0.3606.0',
1113 '71.0.3578.46',
1114 '70.0.3538.103',
1115 '70.0.3538.102',
1116 '72.0.3605.3',
1117 '72.0.3605.2',
1118 '72.0.3605.1',
1119 '72.0.3605.0',
1120 '71.0.3578.45',
1121 '70.0.3538.101',
1122 '71.0.3578.44',
1123 '71.0.3578.43',
1124 '70.0.3538.100',
1125 '70.0.3538.99',
1126 '71.0.3578.42',
1127 '72.0.3604.1',
1128 '72.0.3604.0',
1129 '71.0.3578.41',
1130 '70.0.3538.98',
1131 '71.0.3578.40',
1132 '72.0.3603.2',
1133 '72.0.3603.1',
1134 '72.0.3603.0',
1135 '71.0.3578.39',
1136 '70.0.3538.97',
1137 '72.0.3602.2',
1138 '71.0.3578.38',
1139 '71.0.3578.37',
1140 '72.0.3602.1',
1141 '72.0.3602.0',
1142 '71.0.3578.36',
1143 '70.0.3538.96',
1144 '72.0.3601.1',
1145 '72.0.3601.0',
1146 '71.0.3578.35',
1147 '70.0.3538.95',
1148 '72.0.3600.1',
1149 '72.0.3600.0',
1150 '71.0.3578.34',
1151 '70.0.3538.94',
1152 '72.0.3599.3',
1153 '72.0.3599.2',
1154 '72.0.3599.1',
1155 '72.0.3599.0',
1156 '71.0.3578.33',
1157 '70.0.3538.93',
1158 '72.0.3598.1',
1159 '72.0.3598.0',
1160 '71.0.3578.32',
1161 '70.0.3538.87',
1162 '72.0.3597.1',
1163 '72.0.3597.0',
1164 '72.0.3596.2',
1165 '71.0.3578.31',
1166 '70.0.3538.86',
1167 '71.0.3578.30',
1168 '71.0.3578.29',
1169 '72.0.3596.1',
1170 '72.0.3596.0',
1171 '71.0.3578.28',
1172 '70.0.3538.85',
1173 '72.0.3595.2',
1174 '72.0.3591.3',
1175 '72.0.3595.1',
1176 '72.0.3595.0',
1177 '71.0.3578.27',
1178 '70.0.3538.84',
1179 '72.0.3594.1',
1180 '72.0.3594.0',
1181 '71.0.3578.26',
1182 '70.0.3538.83',
1183 '72.0.3593.2',
1184 '72.0.3593.1',
1185 '72.0.3593.0',
1186 '71.0.3578.25',
1187 '70.0.3538.82',
1188 '72.0.3589.3',
1189 '72.0.3592.2',
1190 '72.0.3592.1',
1191 '72.0.3592.0',
1192 '71.0.3578.24',
1193 '72.0.3589.2',
1194 '70.0.3538.81',
1195 '70.0.3538.80',
1196 '72.0.3591.2',
1197 '72.0.3591.1',
1198 '72.0.3591.0',
1199 '71.0.3578.23',
1200 '70.0.3538.79',
1201 '71.0.3578.22',
1202 '72.0.3590.1',
1203 '72.0.3590.0',
1204 '71.0.3578.21',
1205 '70.0.3538.78',
1206 '70.0.3538.77',
1207 '72.0.3589.1',
1208 '72.0.3589.0',
1209 '71.0.3578.20',
1210 '70.0.3538.76',
1211 '71.0.3578.19',
1212 '70.0.3538.75',
1213 '72.0.3588.1',
1214 '72.0.3588.0',
1215 '71.0.3578.18',
1216 '70.0.3538.74',
1217 '72.0.3586.2',
1218 '72.0.3587.0',
1219 '71.0.3578.17',
1220 '70.0.3538.73',
1221 '72.0.3586.1',
1222 '72.0.3586.0',
1223 '71.0.3578.16',
1224 '70.0.3538.72',
1225 '72.0.3585.1',
1226 '72.0.3585.0',
1227 '71.0.3578.15',
1228 '70.0.3538.71',
1229 '71.0.3578.14',
1230 '72.0.3584.1',
1231 '72.0.3584.0',
1232 '71.0.3578.13',
1233 '70.0.3538.70',
1234 '72.0.3583.2',
1235 '71.0.3578.12',
1236 '72.0.3583.1',
1237 '72.0.3583.0',
1238 '71.0.3578.11',
1239 '70.0.3538.69',
1240 '71.0.3578.10',
1241 '72.0.3582.0',
1242 '72.0.3581.4',
1243 '71.0.3578.9',
1244 '70.0.3538.67',
1245 '72.0.3581.3',
1246 '72.0.3581.2',
1247 '72.0.3581.1',
1248 '72.0.3581.0',
1249 '71.0.3578.8',
1250 '70.0.3538.66',
1251 '72.0.3580.1',
1252 '72.0.3580.0',
1253 '71.0.3578.7',
1254 '70.0.3538.65',
1255 '71.0.3578.6',
1256 '72.0.3579.1',
1257 '72.0.3579.0',
1258 '71.0.3578.5',
1259 '70.0.3538.64',
1260 '71.0.3578.4',
1261 '71.0.3578.3',
1262 '71.0.3578.2',
1263 '71.0.3578.1',
1264 '71.0.3578.0',
1265 '70.0.3538.63',
1266 '69.0.3497.128',
1267 '70.0.3538.62',
1268 '70.0.3538.61',
1269 '70.0.3538.60',
1270 '70.0.3538.59',
1271 '71.0.3577.1',
1272 '71.0.3577.0',
1273 '70.0.3538.58',
1274 '69.0.3497.127',
1275 '71.0.3576.2',
1276 '71.0.3576.1',
1277 '71.0.3576.0',
1278 '70.0.3538.57',
1279 '70.0.3538.56',
1280 '71.0.3575.2',
1281 '70.0.3538.55',
1282 '69.0.3497.126',
1283 '70.0.3538.54',
1284 '71.0.3575.1',
1285 '71.0.3575.0',
1286 '71.0.3574.1',
1287 '71.0.3574.0',
1288 '70.0.3538.53',
1289 '69.0.3497.125',
1290 '70.0.3538.52',
1291 '71.0.3573.1',
1292 '71.0.3573.0',
1293 '70.0.3538.51',
1294 '69.0.3497.124',
1295 '71.0.3572.1',
1296 '71.0.3572.0',
1297 '70.0.3538.50',
1298 '69.0.3497.123',
1299 '71.0.3571.2',
1300 '70.0.3538.49',
1301 '69.0.3497.122',
1302 '71.0.3571.1',
1303 '71.0.3571.0',
1304 '70.0.3538.48',
1305 '69.0.3497.121',
1306 '71.0.3570.1',
1307 '71.0.3570.0',
1308 '70.0.3538.47',
1309 '69.0.3497.120',
1310 '71.0.3568.2',
1311 '71.0.3569.1',
1312 '71.0.3569.0',
1313 '70.0.3538.46',
1314 '69.0.3497.119',
1315 '70.0.3538.45',
1316 '71.0.3568.1',
1317 '71.0.3568.0',
1318 '70.0.3538.44',
1319 '69.0.3497.118',
1320 '70.0.3538.43',
1321 '70.0.3538.42',
1322 '71.0.3567.1',
1323 '71.0.3567.0',
1324 '70.0.3538.41',
1325 '69.0.3497.117',
1326 '71.0.3566.1',
1327 '71.0.3566.0',
1328 '70.0.3538.40',
1329 '69.0.3497.116',
1330 '71.0.3565.1',
1331 '71.0.3565.0',
1332 '70.0.3538.39',
1333 '69.0.3497.115',
1334 '71.0.3564.1',
1335 '71.0.3564.0',
1336 '70.0.3538.38',
1337 '69.0.3497.114',
1338 '71.0.3563.0',
1339 '71.0.3562.2',
1340 '70.0.3538.37',
1341 '69.0.3497.113',
1342 '70.0.3538.36',
1343 '70.0.3538.35',
1344 '71.0.3562.1',
1345 '71.0.3562.0',
1346 '70.0.3538.34',
1347 '69.0.3497.112',
1348 '70.0.3538.33',
1349 '71.0.3561.1',
1350 '71.0.3561.0',
1351 '70.0.3538.32',
1352 '69.0.3497.111',
1353 '71.0.3559.6',
1354 '71.0.3560.1',
1355 '71.0.3560.0',
1356 '71.0.3559.5',
1357 '71.0.3559.4',
1358 '70.0.3538.31',
1359 '69.0.3497.110',
1360 '71.0.3559.3',
1361 '70.0.3538.30',
1362 '69.0.3497.109',
1363 '71.0.3559.2',
1364 '71.0.3559.1',
1365 '71.0.3559.0',
1366 '70.0.3538.29',
1367 '69.0.3497.108',
1368 '71.0.3558.2',
1369 '71.0.3558.1',
1370 '71.0.3558.0',
1371 '70.0.3538.28',
1372 '69.0.3497.107',
1373 '71.0.3557.2',
1374 '71.0.3557.1',
1375 '71.0.3557.0',
1376 '70.0.3538.27',
1377 '69.0.3497.106',
1378 '71.0.3554.4',
1379 '70.0.3538.26',
1380 '71.0.3556.1',
1381 '71.0.3556.0',
1382 '70.0.3538.25',
1383 '71.0.3554.3',
1384 '69.0.3497.105',
1385 '71.0.3554.2',
1386 '70.0.3538.24',
1387 '69.0.3497.104',
1388 '71.0.3555.2',
1389 '70.0.3538.23',
1390 '71.0.3555.1',
1391 '71.0.3555.0',
1392 '70.0.3538.22',
1393 '69.0.3497.103',
1394 '71.0.3554.1',
1395 '71.0.3554.0',
1396 '70.0.3538.21',
1397 '69.0.3497.102',
1398 '71.0.3553.3',
1399 '70.0.3538.20',
1400 '69.0.3497.101',
1401 '71.0.3553.2',
1402 '69.0.3497.100',
1403 '71.0.3553.1',
1404 '71.0.3553.0',
1405 '70.0.3538.19',
1406 '69.0.3497.99',
1407 '69.0.3497.98',
1408 '69.0.3497.97',
1409 '71.0.3552.6',
1410 '71.0.3552.5',
1411 '71.0.3552.4',
1412 '71.0.3552.3',
1413 '71.0.3552.2',
1414 '71.0.3552.1',
1415 '71.0.3552.0',
1416 '70.0.3538.18',
1417 '69.0.3497.96',
1418 '71.0.3551.3',
1419 '71.0.3551.2',
1420 '71.0.3551.1',
1421 '71.0.3551.0',
1422 '70.0.3538.17',
1423 '69.0.3497.95',
1424 '71.0.3550.3',
1425 '71.0.3550.2',
1426 '71.0.3550.1',
1427 '71.0.3550.0',
1428 '70.0.3538.16',
1429 '69.0.3497.94',
1430 '71.0.3549.1',
1431 '71.0.3549.0',
1432 '70.0.3538.15',
1433 '69.0.3497.93',
1434 '69.0.3497.92',
1435 '71.0.3548.1',
1436 '71.0.3548.0',
1437 '70.0.3538.14',
1438 '69.0.3497.91',
1439 '71.0.3547.1',
1440 '71.0.3547.0',
1441 '70.0.3538.13',
1442 '69.0.3497.90',
1443 '71.0.3546.2',
1444 '69.0.3497.89',
1445 '71.0.3546.1',
1446 '71.0.3546.0',
1447 '70.0.3538.12',
1448 '69.0.3497.88',
1449 '71.0.3545.4',
1450 '71.0.3545.3',
1451 '71.0.3545.2',
1452 '71.0.3545.1',
1453 '71.0.3545.0',
1454 '70.0.3538.11',
1455 '69.0.3497.87',
1456 '71.0.3544.5',
1457 '71.0.3544.4',
1458 '71.0.3544.3',
1459 '71.0.3544.2',
1460 '71.0.3544.1',
1461 '71.0.3544.0',
1462 '69.0.3497.86',
1463 '70.0.3538.10',
1464 '69.0.3497.85',
1465 '70.0.3538.9',
1466 '69.0.3497.84',
1467 '71.0.3543.4',
1468 '70.0.3538.8',
1469 '71.0.3543.3',
1470 '71.0.3543.2',
1471 '71.0.3543.1',
1472 '71.0.3543.0',
1473 '70.0.3538.7',
1474 '69.0.3497.83',
1475 '71.0.3542.2',
1476 '71.0.3542.1',
1477 '71.0.3542.0',
1478 '70.0.3538.6',
1479 '69.0.3497.82',
1480 '69.0.3497.81',
1481 '71.0.3541.1',
1482 '71.0.3541.0',
1483 '70.0.3538.5',
1484 '69.0.3497.80',
1485 '71.0.3540.1',
1486 '71.0.3540.0',
1487 '70.0.3538.4',
1488 '69.0.3497.79',
1489 '70.0.3538.3',
1490 '71.0.3539.1',
1491 '71.0.3539.0',
1492 '69.0.3497.78',
1493 '68.0.3440.134',
1494 '69.0.3497.77',
1495 '70.0.3538.2',
1496 '70.0.3538.1',
1497 '70.0.3538.0',
1498 '69.0.3497.76',
1499 '68.0.3440.133',
1500 '69.0.3497.75',
1501 '70.0.3537.2',
1502 '70.0.3537.1',
1503 '70.0.3537.0',
1504 '69.0.3497.74',
1505 '68.0.3440.132',
1506 '70.0.3536.0',
1507 '70.0.3535.5',
1508 '70.0.3535.4',
1509 '70.0.3535.3',
1510 '69.0.3497.73',
1511 '68.0.3440.131',
1512 '70.0.3532.8',
1513 '70.0.3532.7',
1514 '69.0.3497.72',
1515 '69.0.3497.71',
1516 '70.0.3535.2',
1517 '70.0.3535.1',
1518 '70.0.3535.0',
1519 '69.0.3497.70',
1520 '68.0.3440.130',
1521 '69.0.3497.69',
1522 '68.0.3440.129',
1523 '70.0.3534.4',
1524 '70.0.3534.3',
1525 '70.0.3534.2',
1526 '70.0.3534.1',
1527 '70.0.3534.0',
1528 '69.0.3497.68',
1529 '68.0.3440.128',
1530 '70.0.3533.2',
1531 '70.0.3533.1',
1532 '70.0.3533.0',
1533 '69.0.3497.67',
1534 '68.0.3440.127',
1535 '70.0.3532.6',
1536 '70.0.3532.5',
1537 '70.0.3532.4',
1538 '69.0.3497.66',
1539 '68.0.3440.126',
1540 '70.0.3532.3',
1541 '70.0.3532.2',
1542 '70.0.3532.1',
1543 '69.0.3497.60',
1544 '69.0.3497.65',
1545 '69.0.3497.64',
1546 '70.0.3532.0',
1547 '70.0.3531.0',
1548 '70.0.3530.4',
1549 '70.0.3530.3',
1550 '70.0.3530.2',
1551 '69.0.3497.58',
1552 '68.0.3440.125',
1553 '69.0.3497.57',
1554 '69.0.3497.56',
1555 '69.0.3497.55',
1556 '69.0.3497.54',
1557 '70.0.3530.1',
1558 '70.0.3530.0',
1559 '69.0.3497.53',
1560 '68.0.3440.124',
1561 '69.0.3497.52',
1562 '70.0.3529.3',
1563 '70.0.3529.2',
1564 '70.0.3529.1',
1565 '70.0.3529.0',
1566 '69.0.3497.51',
1567 '70.0.3528.4',
1568 '68.0.3440.123',
1569 '70.0.3528.3',
1570 '70.0.3528.2',
1571 '70.0.3528.1',
1572 '70.0.3528.0',
1573 '69.0.3497.50',
1574 '68.0.3440.122',
1575 '70.0.3527.1',
1576 '70.0.3527.0',
1577 '69.0.3497.49',
1578 '68.0.3440.121',
1579 '70.0.3526.1',
1580 '70.0.3526.0',
1581 '68.0.3440.120',
1582 '69.0.3497.48',
1583 '69.0.3497.47',
1584 '68.0.3440.119',
1585 '68.0.3440.118',
1586 '70.0.3525.5',
1587 '70.0.3525.4',
1588 '70.0.3525.3',
1589 '68.0.3440.117',
1590 '69.0.3497.46',
1591 '70.0.3525.2',
1592 '70.0.3525.1',
1593 '70.0.3525.0',
1594 '69.0.3497.45',
1595 '68.0.3440.116',
1596 '70.0.3524.4',
1597 '70.0.3524.3',
1598 '69.0.3497.44',
1599 '70.0.3524.2',
1600 '70.0.3524.1',
1601 '70.0.3524.0',
1602 '70.0.3523.2',
1603 '69.0.3497.43',
1604 '68.0.3440.115',
1605 '70.0.3505.9',
1606 '69.0.3497.42',
1607 '70.0.3505.8',
1608 '70.0.3523.1',
1609 '70.0.3523.0',
1610 '69.0.3497.41',
1611 '68.0.3440.114',
1612 '70.0.3505.7',
1613 '69.0.3497.40',
1614 '70.0.3522.1',
1615 '70.0.3522.0',
1616 '70.0.3521.2',
1617 '69.0.3497.39',
1618 '68.0.3440.113',
1619 '70.0.3505.6',
1620 '70.0.3521.1',
1621 '70.0.3521.0',
1622 '69.0.3497.38',
1623 '68.0.3440.112',
1624 '70.0.3520.1',
1625 '70.0.3520.0',
1626 '69.0.3497.37',
1627 '68.0.3440.111',
1628 '70.0.3519.3',
1629 '70.0.3519.2',
1630 '70.0.3519.1',
1631 '70.0.3519.0',
1632 '69.0.3497.36',
1633 '68.0.3440.110',
1634 '70.0.3518.1',
1635 '70.0.3518.0',
1636 '69.0.3497.35',
1637 '69.0.3497.34',
1638 '68.0.3440.109',
1639 '70.0.3517.1',
1640 '70.0.3517.0',
1641 '69.0.3497.33',
1642 '68.0.3440.108',
1643 '69.0.3497.32',
1644 '70.0.3516.3',
1645 '70.0.3516.2',
1646 '70.0.3516.1',
1647 '70.0.3516.0',
1648 '69.0.3497.31',
1649 '68.0.3440.107',
1650 '70.0.3515.4',
1651 '68.0.3440.106',
1652 '70.0.3515.3',
1653 '70.0.3515.2',
1654 '70.0.3515.1',
1655 '70.0.3515.0',
1656 '69.0.3497.30',
1657 '68.0.3440.105',
1658 '68.0.3440.104',
1659 '70.0.3514.2',
1660 '70.0.3514.1',
1661 '70.0.3514.0',
1662 '69.0.3497.29',
1663 '68.0.3440.103',
1664 '70.0.3513.1',
1665 '70.0.3513.0',
1666 '69.0.3497.28',
1667 )
1668 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1669
1670
3e669f36 1671std_headers = {
f7a147e3 1672 'User-Agent': random_user_agent(),
59ae15a5
PH
1673 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1674 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1675 'Accept-Encoding': 'gzip, deflate',
1676 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 1677}
f427df17 1678
5f6a1245 1679
fb37eb25
S
1680USER_AGENTS = {
1681 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1682}
1683
1684
bf42a990
S
1685NO_DEFAULT = object()
1686
7105440c
YCH
1687ENGLISH_MONTH_NAMES = [
1688 'January', 'February', 'March', 'April', 'May', 'June',
1689 'July', 'August', 'September', 'October', 'November', 'December']
1690
f6717dec
S
1691MONTH_NAMES = {
1692 'en': ENGLISH_MONTH_NAMES,
1693 'fr': [
3e4185c3
S
1694 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1695 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
f6717dec 1696}
a942d6cb 1697
a7aaa398
S
1698KNOWN_EXTENSIONS = (
1699 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1700 'flv', 'f4v', 'f4a', 'f4b',
1701 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1702 'mkv', 'mka', 'mk3d',
1703 'avi', 'divx',
1704 'mov',
1705 'asf', 'wmv', 'wma',
1706 '3gp', '3g2',
1707 'mp3',
1708 'flac',
1709 'ape',
1710 'wav',
1711 'f4f', 'f4m', 'm3u8', 'smil')
1712
c587cbb7 1713# needed for sanitizing filenames in restricted mode
c8827027 1714ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
fd35d8cd
JW
1715 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1716 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
c587cbb7 1717
46f59e89
S
1718DATE_FORMATS = (
1719 '%d %B %Y',
1720 '%d %b %Y',
1721 '%B %d %Y',
cb655f34
S
1722 '%B %dst %Y',
1723 '%B %dnd %Y',
9d30c213 1724 '%B %drd %Y',
cb655f34 1725 '%B %dth %Y',
46f59e89 1726 '%b %d %Y',
cb655f34
S
1727 '%b %dst %Y',
1728 '%b %dnd %Y',
9d30c213 1729 '%b %drd %Y',
cb655f34 1730 '%b %dth %Y',
46f59e89
S
1731 '%b %dst %Y %I:%M',
1732 '%b %dnd %Y %I:%M',
9d30c213 1733 '%b %drd %Y %I:%M',
46f59e89
S
1734 '%b %dth %Y %I:%M',
1735 '%Y %m %d',
1736 '%Y-%m-%d',
1737 '%Y/%m/%d',
81c13222 1738 '%Y/%m/%d %H:%M',
46f59e89 1739 '%Y/%m/%d %H:%M:%S',
0c1c6f4b 1740 '%Y-%m-%d %H:%M',
46f59e89
S
1741 '%Y-%m-%d %H:%M:%S',
1742 '%Y-%m-%d %H:%M:%S.%f',
1743 '%d.%m.%Y %H:%M',
1744 '%d.%m.%Y %H.%M',
1745 '%Y-%m-%dT%H:%M:%SZ',
1746 '%Y-%m-%dT%H:%M:%S.%fZ',
1747 '%Y-%m-%dT%H:%M:%S.%f0Z',
1748 '%Y-%m-%dT%H:%M:%S',
1749 '%Y-%m-%dT%H:%M:%S.%f',
1750 '%Y-%m-%dT%H:%M',
c6eed6b8
S
1751 '%b %d %Y at %H:%M',
1752 '%b %d %Y at %H:%M:%S',
b555ae9b
S
1753 '%B %d %Y at %H:%M',
1754 '%B %d %Y at %H:%M:%S',
46f59e89
S
1755)
1756
1757DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1758DATE_FORMATS_DAY_FIRST.extend([
1759 '%d-%m-%Y',
1760 '%d.%m.%Y',
1761 '%d.%m.%y',
1762 '%d/%m/%Y',
1763 '%d/%m/%y',
1764 '%d/%m/%Y %H:%M:%S',
1765])
1766
1767DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1768DATE_FORMATS_MONTH_FIRST.extend([
1769 '%m-%d-%Y',
1770 '%m.%d.%Y',
1771 '%m/%d/%Y',
1772 '%m/%d/%y',
1773 '%m/%d/%Y %H:%M:%S',
1774])
1775
06b3fe29 1776PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
22f5f5c6 1777JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
06b3fe29 1778
7105440c 1779
d77c3dfd 1780def preferredencoding():
59ae15a5 1781 """Get preferred encoding.
d77c3dfd 1782
59ae15a5
PH
1783 Returns the best encoding scheme for the system, based on
1784 locale.getpreferredencoding() and some further tweaks.
1785 """
1786 try:
1787 pref = locale.getpreferredencoding()
28e614de 1788 'TEST'.encode(pref)
70a1165b 1789 except Exception:
59ae15a5 1790 pref = 'UTF-8'
bae611f2 1791
59ae15a5 1792 return pref
d77c3dfd 1793
f4bfd65f 1794
181c8655 1795def write_json_file(obj, fn):
1394646a 1796 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 1797
92120217 1798 fn = encodeFilename(fn)
61ee5aeb 1799 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
1800 encoding = get_filesystem_encoding()
1801 # os.path.basename returns a bytes object, but NamedTemporaryFile
1802 # will fail if the filename contains non ascii characters unless we
1803 # use a unicode object
1804 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1805 # the same for os.path.dirname
1806 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1807 else:
1808 path_basename = os.path.basename
1809 path_dirname = os.path.dirname
1810
73159f99
S
1811 args = {
1812 'suffix': '.tmp',
ec5f6016
JMF
1813 'prefix': path_basename(fn) + '.',
1814 'dir': path_dirname(fn),
73159f99
S
1815 'delete': False,
1816 }
1817
181c8655
PH
1818 # In Python 2.x, json.dump expects a bytestream.
1819 # In Python 3.x, it writes to a character stream
1820 if sys.version_info < (3, 0):
73159f99 1821 args['mode'] = 'wb'
181c8655 1822 else:
73159f99
S
1823 args.update({
1824 'mode': 'w',
1825 'encoding': 'utf-8',
1826 })
1827
c86b6142 1828 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
1829
1830 try:
1831 with tf:
1832 json.dump(obj, tf)
1394646a
IK
1833 if sys.platform == 'win32':
1834 # Need to remove existing file on Windows, else os.rename raises
1835 # WindowsError or FileExistsError.
1836 try:
1837 os.unlink(fn)
1838 except OSError:
1839 pass
9cd5f54e
R
1840 try:
1841 mask = os.umask(0)
1842 os.umask(mask)
1843 os.chmod(tf.name, 0o666 & ~mask)
1844 except OSError:
1845 pass
181c8655 1846 os.rename(tf.name, fn)
70a1165b 1847 except Exception:
181c8655
PH
1848 try:
1849 os.remove(tf.name)
1850 except OSError:
1851 pass
1852 raise
1853
1854
1855if sys.version_info >= (2, 7):
ee114368 1856 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 1857 """ Find the xpath xpath[@key=val] """
5d2354f1 1858 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368 1859 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
1860 return node.find(expr)
1861else:
ee114368 1862 def find_xpath_attr(node, xpath, key, val=None):
810c10ba 1863 for f in node.findall(compat_xpath(xpath)):
ee114368
S
1864 if key not in f.attrib:
1865 continue
1866 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
1867 return f
1868 return None
1869
d7e66d39
JMF
1870# On python2.6 the xml.etree.ElementTree.Element methods don't support
1871# the namespace parameter
5f6a1245
JW
1872
1873
d7e66d39
JMF
1874def xpath_with_ns(path, ns_map):
1875 components = [c.split(':') for c in path.split('/')]
1876 replaced = []
1877 for c in components:
1878 if len(c) == 1:
1879 replaced.append(c[0])
1880 else:
1881 ns, tag = c
1882 replaced.append('{%s}%s' % (ns_map[ns], tag))
1883 return '/'.join(replaced)
1884
d77c3dfd 1885
a41fb80c 1886def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745 1887 def _find_xpath(xpath):
810c10ba 1888 return node.find(compat_xpath(xpath))
578c0745
S
1889
1890 if isinstance(xpath, (str, compat_str)):
1891 n = _find_xpath(xpath)
1892 else:
1893 for xp in xpath:
1894 n = _find_xpath(xp)
1895 if n is not None:
1896 break
d74bebd5 1897
8e636da4 1898 if n is None:
bf42a990
S
1899 if default is not NO_DEFAULT:
1900 return default
1901 elif fatal:
bf0ff932
PH
1902 name = xpath if name is None else name
1903 raise ExtractorError('Could not find XML element %s' % name)
1904 else:
1905 return None
a41fb80c
S
1906 return n
1907
1908
1909def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
1910 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1911 if n is None or n == default:
1912 return n
1913 if n.text is None:
1914 if default is not NO_DEFAULT:
1915 return default
1916 elif fatal:
1917 name = xpath if name is None else name
1918 raise ExtractorError('Could not find XML element\'s text %s' % name)
1919 else:
1920 return None
1921 return n.text
a41fb80c
S
1922
1923
1924def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1925 n = find_xpath_attr(node, xpath, key)
1926 if n is None:
1927 if default is not NO_DEFAULT:
1928 return default
1929 elif fatal:
1930 name = '%s[@%s]' % (xpath, key) if name is None else name
1931 raise ExtractorError('Could not find XML attribute %s' % name)
1932 else:
1933 return None
1934 return n.attrib[key]
bf0ff932
PH
1935
1936
9e6dd238 1937def get_element_by_id(id, html):
43e8fafd 1938 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 1939 return get_element_by_attribute('id', id, html)
43e8fafd 1940
12ea2f30 1941
84c237fb 1942def get_element_by_class(class_name, html):
2af12ad9
TC
1943 """Return the content of the first tag with the specified class in the passed HTML document"""
1944 retval = get_elements_by_class(class_name, html)
1945 return retval[0] if retval else None
1946
1947
1948def get_element_by_attribute(attribute, value, html, escape_value=True):
1949 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1950 return retval[0] if retval else None
1951
1952
1953def get_elements_by_class(class_name, html):
1954 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1955 return get_elements_by_attribute(
84c237fb
YCH
1956 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1957 html, escape_value=False)
1958
1959
2af12ad9 1960def get_elements_by_attribute(attribute, value, html, escape_value=True):
43e8fafd 1961 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 1962
84c237fb
YCH
1963 value = re.escape(value) if escape_value else value
1964
2af12ad9
TC
1965 retlist = []
1966 for m in re.finditer(r'''(?xs)
38285056 1967 <([a-zA-Z0-9:._-]+)
609ff8ca 1968 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056 1969 \s+%s=['"]?%s['"]?
609ff8ca 1970 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
38285056
PH
1971 \s*>
1972 (?P<content>.*?)
1973 </\1>
2af12ad9
TC
1974 ''' % (re.escape(attribute), value), html):
1975 res = m.group('content')
38285056 1976
2af12ad9
TC
1977 if res.startswith('"') or res.startswith("'"):
1978 res = res[1:-1]
38285056 1979
2af12ad9 1980 retlist.append(unescapeHTML(res))
a921f407 1981
2af12ad9 1982 return retlist
a921f407 1983
c5229f39 1984
8bb56eee
BF
1985class HTMLAttributeParser(compat_HTMLParser):
1986 """Trivial HTML parser to gather the attributes for a single element"""
b6e0c7d2 1987
8bb56eee 1988 def __init__(self):
c5229f39 1989 self.attrs = {}
8bb56eee
BF
1990 compat_HTMLParser.__init__(self)
1991
1992 def handle_starttag(self, tag, attrs):
1993 self.attrs = dict(attrs)
1994
c5229f39 1995
8bb56eee
BF
1996def extract_attributes(html_element):
1997 """Given a string for an HTML element such as
1998 <el
1999 a="foo" B="bar" c="&98;az" d=boz
2000 empty= noval entity="&amp;"
2001 sq='"' dq="'"
2002 >
2003 Decode and return a dictionary of attributes.
2004 {
2005 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2006 'empty': '', 'noval': None, 'entity': '&',
2007 'sq': '"', 'dq': '\''
2008 }.
2009 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2010 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2011 """
2012 parser = HTMLAttributeParser()
b4a3d461
S
2013 try:
2014 parser.feed(html_element)
2015 parser.close()
2016 # Older Python may throw HTMLParseError in case of malformed HTML
2017 except compat_HTMLParseError:
2018 pass
8bb56eee 2019 return parser.attrs
9e6dd238 2020
c5229f39 2021
9e6dd238 2022def clean_html(html):
59ae15a5 2023 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
2024
2025 if html is None: # Convenience for sanitizing descriptions etc.
2026 return html
2027
59ae15a5
PH
2028 # Newline vs <br />
2029 html = html.replace('\n', ' ')
edd9221c
TF
2030 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2031 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
2032 # Strip html tags
2033 html = re.sub('<.*?>', '', html)
2034 # Replace html entities
2035 html = unescapeHTML(html)
7decf895 2036 return html.strip()
9e6dd238
FV
2037
2038
d77c3dfd 2039def sanitize_open(filename, open_mode):
59ae15a5
PH
2040 """Try to open the given filename, and slightly tweak it if this fails.
2041
2042 Attempts to open the given filename. If this fails, it tries to change
2043 the filename slightly, step by step, until it's either able to open it
2044 or it fails and raises a final exception, like the standard open()
2045 function.
2046
2047 It returns the tuple (stream, definitive_file_name).
2048 """
2049 try:
28e614de 2050 if filename == '-':
59ae15a5
PH
2051 if sys.platform == 'win32':
2052 import msvcrt
2053 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 2054 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
2055 stream = open(encodeFilename(filename), open_mode)
2056 return (stream, filename)
2057 except (IOError, OSError) as err:
f45c185f
PH
2058 if err.errno in (errno.EACCES,):
2059 raise
59ae15a5 2060
f45c185f 2061 # In case of error, try to remove win32 forbidden chars
d55de57b 2062 alt_filename = sanitize_path(filename)
f45c185f
PH
2063 if alt_filename == filename:
2064 raise
2065 else:
2066 # An exception here should be caught in the caller
d55de57b 2067 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 2068 return (stream, alt_filename)
d77c3dfd
FV
2069
2070
2071def timeconvert(timestr):
59ae15a5
PH
2072 """Convert RFC 2822 defined time string into system timestamp"""
2073 timestamp = None
2074 timetuple = email.utils.parsedate_tz(timestr)
2075 if timetuple is not None:
2076 timestamp = email.utils.mktime_tz(timetuple)
2077 return timestamp
1c469a94 2078
5f6a1245 2079
796173d0 2080def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
2081 """Sanitizes a string so it could be used as part of a filename.
2082 If restricted is set, use a stricter subset of allowed characters.
158af524
S
2083 Set is_id if this is not an arbitrary string, but an ID that should be kept
2084 if possible.
59ae15a5
PH
2085 """
2086 def replace_insane(char):
c587cbb7
AT
2087 if restricted and char in ACCENT_CHARS:
2088 return ACCENT_CHARS[char]
59ae15a5
PH
2089 if char == '?' or ord(char) < 32 or ord(char) == 127:
2090 return ''
2091 elif char == '"':
2092 return '' if restricted else '\''
2093 elif char == ':':
2094 return '_-' if restricted else ' -'
2095 elif char in '\\/|*<>':
2096 return '_'
627dcfff 2097 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
2098 return '_'
2099 if restricted and ord(char) > 127:
2100 return '_'
2101 return char
2102
2aeb06d6
PH
2103 # Handle timestamps
2104 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 2105 result = ''.join(map(replace_insane, s))
796173d0
PH
2106 if not is_id:
2107 while '__' in result:
2108 result = result.replace('__', '_')
2109 result = result.strip('_')
2110 # Common case of "Foreign band name - English song title"
2111 if restricted and result.startswith('-_'):
2112 result = result[2:]
5a42414b
PH
2113 if result.startswith('-'):
2114 result = '_' + result[len('-'):]
a7440261 2115 result = result.lstrip('.')
796173d0
PH
2116 if not result:
2117 result = '_'
59ae15a5 2118 return result
d77c3dfd 2119
5f6a1245 2120
a2aaf4db
S
2121def sanitize_path(s):
2122 """Sanitizes and normalizes path on Windows"""
2123 if sys.platform != 'win32':
2124 return s
be531ef1
S
2125 drive_or_unc, _ = os.path.splitdrive(s)
2126 if sys.version_info < (2, 7) and not drive_or_unc:
2127 drive_or_unc, _ = os.path.splitunc(s)
2128 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2129 if drive_or_unc:
a2aaf4db
S
2130 norm_path.pop(0)
2131 sanitized_path = [
ec85ded8 2132 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
a2aaf4db 2133 for path_part in norm_path]
be531ef1
S
2134 if drive_or_unc:
2135 sanitized_path.insert(0, drive_or_unc + os.path.sep)
a2aaf4db
S
2136 return os.path.join(*sanitized_path)
2137
2138
17bcc626 2139def sanitize_url(url):
befa4708
S
2140 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2141 # the number of unwanted failures due to missing protocol
2142 if url.startswith('//'):
2143 return 'http:%s' % url
2144 # Fix some common typos seen so far
2145 COMMON_TYPOS = (
067aa17e 2146 # https://github.com/ytdl-org/youtube-dl/issues/15649
befa4708
S
2147 (r'^httpss://', r'https://'),
2148 # https://bx1.be/lives/direct-tv/
2149 (r'^rmtp([es]?)://', r'rtmp\1://'),
2150 )
2151 for mistake, fixup in COMMON_TYPOS:
2152 if re.match(mistake, url):
2153 return re.sub(mistake, fixup, url)
2154 return url
17bcc626
S
2155
2156
67dda517 2157def sanitized_Request(url, *args, **kwargs):
17bcc626 2158 return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
67dda517
S
2159
2160
51098426
S
2161def expand_path(s):
2162 """Expand shell variables and ~"""
2163 return os.path.expandvars(compat_expanduser(s))
2164
2165
d77c3dfd 2166def orderedSet(iterable):
59ae15a5
PH
2167 """ Remove all duplicates from the input iterable """
2168 res = []
2169 for el in iterable:
2170 if el not in res:
2171 res.append(el)
2172 return res
d77c3dfd 2173
912b38b4 2174
55b2f099 2175def _htmlentity_transform(entity_with_semicolon):
4e408e47 2176 """Transforms an HTML entity to a character."""
55b2f099
YCH
2177 entity = entity_with_semicolon[:-1]
2178
4e408e47
PH
2179 # Known non-numeric HTML entity
2180 if entity in compat_html_entities.name2codepoint:
2181 return compat_chr(compat_html_entities.name2codepoint[entity])
2182
55b2f099
YCH
2183 # TODO: HTML5 allows entities without a semicolon. For example,
2184 # '&Eacuteric' should be decoded as 'Éric'.
2185 if entity_with_semicolon in compat_html_entities_html5:
2186 return compat_html_entities_html5[entity_with_semicolon]
2187
91757b0f 2188 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
2189 if mobj is not None:
2190 numstr = mobj.group(1)
28e614de 2191 if numstr.startswith('x'):
4e408e47 2192 base = 16
28e614de 2193 numstr = '0%s' % numstr
4e408e47
PH
2194 else:
2195 base = 10
067aa17e 2196 # See https://github.com/ytdl-org/youtube-dl/issues/7518
7aefc49c
S
2197 try:
2198 return compat_chr(int(numstr, base))
2199 except ValueError:
2200 pass
4e408e47
PH
2201
2202 # Unknown entity in name, return its literal representation
7a3f0c00 2203 return '&%s;' % entity
4e408e47
PH
2204
2205
d77c3dfd 2206def unescapeHTML(s):
912b38b4
PH
2207 if s is None:
2208 return None
2209 assert type(s) == compat_str
d77c3dfd 2210
4e408e47 2211 return re.sub(
95f3f7c2 2212 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 2213
8bf48f23 2214
aa49acd1
S
2215def get_subprocess_encoding():
2216 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2217 # For subprocess calls, encode with locale encoding
2218 # Refer to http://stackoverflow.com/a/9951851/35070
2219 encoding = preferredencoding()
2220 else:
2221 encoding = sys.getfilesystemencoding()
2222 if encoding is None:
2223 encoding = 'utf-8'
2224 return encoding
2225
2226
8bf48f23 2227def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
2228 """
2229 @param s The name of the file
2230 """
d77c3dfd 2231
8bf48f23 2232 assert type(s) == compat_str
d77c3dfd 2233
59ae15a5
PH
2234 # Python 3 has a Unicode API
2235 if sys.version_info >= (3, 0):
2236 return s
0f00efed 2237
aa49acd1
S
2238 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2239 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2240 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2241 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2242 return s
2243
8ee239e9
YCH
2244 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2245 if sys.platform.startswith('java'):
2246 return s
2247
aa49acd1
S
2248 return s.encode(get_subprocess_encoding(), 'ignore')
2249
2250
2251def decodeFilename(b, for_subprocess=False):
2252
2253 if sys.version_info >= (3, 0):
2254 return b
2255
2256 if not isinstance(b, bytes):
2257 return b
2258
2259 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 2260
f07b74fc
PH
2261
2262def encodeArgument(s):
2263 if not isinstance(s, compat_str):
2264 # Legacy code that uses byte strings
2265 # Uncomment the following line after fixing all post processors
7af808a5 2266 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
2267 s = s.decode('ascii')
2268 return encodeFilename(s, True)
2269
2270
aa49acd1
S
2271def decodeArgument(b):
2272 return decodeFilename(b, True)
2273
2274
8271226a
PH
2275def decodeOption(optval):
2276 if optval is None:
2277 return optval
2278 if isinstance(optval, bytes):
2279 optval = optval.decode(preferredencoding())
2280
2281 assert isinstance(optval, compat_str)
2282 return optval
1c256f70 2283
5f6a1245 2284
4539dd30
PH
2285def formatSeconds(secs):
2286 if secs > 3600:
2287 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
2288 elif secs > 60:
2289 return '%d:%02d' % (secs // 60, secs % 60)
2290 else:
2291 return '%d' % secs
2292
a0ddb8a2 2293
be4a824d
PH
2294def make_HTTPS_handler(params, **kwargs):
2295 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 2296 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 2297 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 2298 if opts_no_check_certificate:
be5f2c19 2299 context.check_hostname = False
0db261ba 2300 context.verify_mode = ssl.CERT_NONE
a2366922 2301 try:
be4a824d 2302 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
2303 except TypeError:
2304 # Python 2.7.8
2305 # (create_default_context present but HTTPSHandler has no context=)
2306 pass
2307
2308 if sys.version_info < (3, 2):
d7932313 2309 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 2310 else: # Python < 3.4
d7932313 2311 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 2312 context.verify_mode = (ssl.CERT_NONE
dca08720 2313 if opts_no_check_certificate
ea6d901e 2314 else ssl.CERT_REQUIRED)
303b479e 2315 context.set_default_verify_paths()
be4a824d 2316 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 2317
732ea2f0 2318
08f2a92c
JMF
2319def bug_reports_message():
2320 if ytdl_is_updateable():
cefecac1 2321 update_cmd = 'type youtube-dlc -U to update'
08f2a92c 2322 else:
7fb5f2f2
U
2323 update_cmd = 'see https://github.com/blackjack4494/yt-dlc on how to update'
2324 msg = '; please report this issue on https://github.com/blackjack4494/yt-dlc .'
08f2a92c 2325 msg += ' Make sure you are using the latest version; %s.' % update_cmd
cefecac1 2326 msg += ' Be sure to call youtube-dlc with the --verbose flag and include its complete output.'
08f2a92c
JMF
2327 return msg
2328
2329
bf5b9d85
PM
2330class YoutubeDLError(Exception):
2331 """Base exception for YoutubeDL errors."""
2332 pass
2333
2334
2335class ExtractorError(YoutubeDLError):
1c256f70 2336 """Error during info extraction."""
5f6a1245 2337
d11271dd 2338 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238 2339 """ tb, if given, is the original traceback (so that it can be printed out).
cefecac1 2340 If expected is set, this is a normal error message and most likely not a bug in youtube-dlc.
9a82b238
PH
2341 """
2342
2343 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2344 expected = True
d11271dd
PH
2345 if video_id is not None:
2346 msg = video_id + ': ' + msg
410f3e73 2347 if cause:
28e614de 2348 msg += ' (caused by %r)' % cause
9a82b238 2349 if not expected:
08f2a92c 2350 msg += bug_reports_message()
1c256f70 2351 super(ExtractorError, self).__init__(msg)
d5979c5d 2352
1c256f70 2353 self.traceback = tb
8cc83b8d 2354 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 2355 self.cause = cause
d11271dd 2356 self.video_id = video_id
1c256f70 2357
01951dda
PH
2358 def format_traceback(self):
2359 if self.traceback is None:
2360 return None
28e614de 2361 return ''.join(traceback.format_tb(self.traceback))
01951dda 2362
1c256f70 2363
416c7fcb
PH
2364class UnsupportedError(ExtractorError):
2365 def __init__(self, url):
2366 super(UnsupportedError, self).__init__(
2367 'Unsupported URL: %s' % url, expected=True)
2368 self.url = url
2369
2370
55b3e45b
JMF
2371class RegexNotFoundError(ExtractorError):
2372 """Error when a regex didn't match"""
2373 pass
2374
2375
773f291d
S
2376class GeoRestrictedError(ExtractorError):
2377 """Geographic restriction Error exception.
2378
2379 This exception may be thrown when a video is not available from your
2380 geographic location due to geographic restrictions imposed by a website.
2381 """
b6e0c7d2 2382
773f291d
S
2383 def __init__(self, msg, countries=None):
2384 super(GeoRestrictedError, self).__init__(msg, expected=True)
2385 self.msg = msg
2386 self.countries = countries
2387
2388
bf5b9d85 2389class DownloadError(YoutubeDLError):
59ae15a5 2390 """Download Error exception.
d77c3dfd 2391
59ae15a5
PH
2392 This exception may be thrown by FileDownloader objects if they are not
2393 configured to continue on errors. They will contain the appropriate
2394 error message.
2395 """
5f6a1245 2396
8cc83b8d
FV
2397 def __init__(self, msg, exc_info=None):
2398 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2399 super(DownloadError, self).__init__(msg)
2400 self.exc_info = exc_info
d77c3dfd
FV
2401
2402
bf5b9d85 2403class SameFileError(YoutubeDLError):
59ae15a5 2404 """Same File exception.
d77c3dfd 2405
59ae15a5
PH
2406 This exception will be thrown by FileDownloader objects if they detect
2407 multiple files would have to be downloaded to the same file on disk.
2408 """
2409 pass
d77c3dfd
FV
2410
2411
bf5b9d85 2412class PostProcessingError(YoutubeDLError):
59ae15a5 2413 """Post Processing exception.
d77c3dfd 2414
59ae15a5
PH
2415 This exception may be raised by PostProcessor's .run() method to
2416 indicate an error in the postprocessing task.
2417 """
5f6a1245 2418
7851b379 2419 def __init__(self, msg):
bf5b9d85 2420 super(PostProcessingError, self).__init__(msg)
7851b379 2421 self.msg = msg
d77c3dfd 2422
5f6a1245 2423
bf5b9d85 2424class MaxDownloadsReached(YoutubeDLError):
59ae15a5
PH
2425 """ --max-downloads limit has been reached. """
2426 pass
d77c3dfd
FV
2427
2428
bf5b9d85 2429class UnavailableVideoError(YoutubeDLError):
59ae15a5 2430 """Unavailable Format exception.
d77c3dfd 2431
59ae15a5
PH
2432 This exception will be thrown when a video is requested
2433 in a format that is not available for that video.
2434 """
2435 pass
d77c3dfd
FV
2436
2437
bf5b9d85 2438class ContentTooShortError(YoutubeDLError):
59ae15a5 2439 """Content Too Short exception.
d77c3dfd 2440
59ae15a5
PH
2441 This exception may be raised by FileDownloader objects when a file they
2442 download is too small for what the server announced first, indicating
2443 the connection was probably interrupted.
2444 """
d77c3dfd 2445
59ae15a5 2446 def __init__(self, downloaded, expected):
bf5b9d85
PM
2447 super(ContentTooShortError, self).__init__(
2448 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2449 )
2c7ed247 2450 # Both in bytes
59ae15a5
PH
2451 self.downloaded = downloaded
2452 self.expected = expected
d77c3dfd 2453
5f6a1245 2454
bf5b9d85 2455class XAttrMetadataError(YoutubeDLError):
efa97bdc
YCH
2456 def __init__(self, code=None, msg='Unknown error'):
2457 super(XAttrMetadataError, self).__init__(msg)
2458 self.code = code
bd264412 2459 self.msg = msg
efa97bdc
YCH
2460
2461 # Parsing code and msg
3089bc74 2462 if (self.code in (errno.ENOSPC, errno.EDQUOT)
a0566bbf 2463 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
efa97bdc
YCH
2464 self.reason = 'NO_SPACE'
2465 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2466 self.reason = 'VALUE_TOO_LONG'
2467 else:
2468 self.reason = 'NOT_SUPPORTED'
2469
2470
bf5b9d85 2471class XAttrUnavailableError(YoutubeDLError):
efa97bdc
YCH
2472 pass
2473
2474
c5a59d93 2475def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
2476 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2477 # expected HTTP responses to meet HTTP/1.0 or later (see also
067aa17e 2478 # https://github.com/ytdl-org/youtube-dl/issues/6727)
e5e78797 2479 if sys.version_info < (3, 0):
65220c3b
S
2480 kwargs['strict'] = True
2481 hc = http_class(*args, **compat_kwargs(kwargs))
be4a824d 2482 source_address = ydl_handler._params.get('source_address')
8959018a 2483
be4a824d 2484 if source_address is not None:
8959018a
AU
2485 # This is to workaround _create_connection() from socket where it will try all
2486 # address data from getaddrinfo() including IPv6. This filters the result from
2487 # getaddrinfo() based on the source_address value.
2488 # This is based on the cpython socket.create_connection() function.
2489 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2490 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2491 host, port = address
2492 err = None
2493 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
9e21e6d9
S
2494 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2495 ip_addrs = [addr for addr in addrs if addr[0] == af]
2496 if addrs and not ip_addrs:
2497 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2498 raise socket.error(
2499 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2500 % (ip_version, source_address[0]))
8959018a
AU
2501 for res in ip_addrs:
2502 af, socktype, proto, canonname, sa = res
2503 sock = None
2504 try:
2505 sock = socket.socket(af, socktype, proto)
2506 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2507 sock.settimeout(timeout)
2508 sock.bind(source_address)
2509 sock.connect(sa)
2510 err = None # Explicitly break reference cycle
2511 return sock
2512 except socket.error as _:
2513 err = _
2514 if sock is not None:
2515 sock.close()
2516 if err is not None:
2517 raise err
2518 else:
9e21e6d9
S
2519 raise socket.error('getaddrinfo returns an empty list')
2520 if hasattr(hc, '_create_connection'):
2521 hc._create_connection = _create_connection
be4a824d
PH
2522 sa = (source_address, 0)
2523 if hasattr(hc, 'source_address'): # Python 2.7+
2524 hc.source_address = sa
2525 else: # Python 2.6
2526 def _hc_connect(self, *args, **kwargs):
9e21e6d9 2527 sock = _create_connection(
be4a824d
PH
2528 (self.host, self.port), self.timeout, sa)
2529 if is_https:
d7932313
PH
2530 self.sock = ssl.wrap_socket(
2531 sock, self.key_file, self.cert_file,
2532 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
2533 else:
2534 self.sock = sock
2535 hc.connect = functools.partial(_hc_connect, hc)
2536
2537 return hc
2538
2539
87f0e62d 2540def handle_youtubedl_headers(headers):
992fc9d6
YCH
2541 filtered_headers = headers
2542
2543 if 'Youtubedl-no-compression' in filtered_headers:
2544 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 2545 del filtered_headers['Youtubedl-no-compression']
87f0e62d 2546
992fc9d6 2547 return filtered_headers
87f0e62d
YCH
2548
2549
acebc9cd 2550class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
2551 """Handler for HTTP requests and responses.
2552
2553 This class, when installed with an OpenerDirector, automatically adds
2554 the standard headers to every HTTP request and handles gzipped and
2555 deflated responses from web servers. If compression is to be avoided in
2556 a particular request, the original request in the program code only has
0424ec30 2557 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
2558 removed before making the real request.
2559
2560 Part of this code was copied from:
2561
2562 http://techknack.net/python-urllib2-handlers/
2563
2564 Andrew Rowls, the author of that code, agreed to release it to the
2565 public domain.
2566 """
2567
be4a824d
PH
2568 def __init__(self, params, *args, **kwargs):
2569 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2570 self._params = params
2571
2572 def http_open(self, req):
71aff188
YCH
2573 conn_class = compat_http_client.HTTPConnection
2574
2575 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2576 if socks_proxy:
2577 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2578 del req.headers['Ytdl-socks-proxy']
2579
be4a824d 2580 return self.do_open(functools.partial(
71aff188 2581 _create_http_connection, self, conn_class, False),
be4a824d
PH
2582 req)
2583
59ae15a5
PH
2584 @staticmethod
2585 def deflate(data):
2586 try:
2587 return zlib.decompress(data, -zlib.MAX_WBITS)
2588 except zlib.error:
2589 return zlib.decompress(data)
2590
acebc9cd 2591 def http_request(self, req):
51f267d9
S
2592 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2593 # always respected by websites, some tend to give out URLs with non percent-encoded
2594 # non-ASCII characters (see telemb.py, ard.py [#3412])
2595 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2596 # To work around aforementioned issue we will replace request's original URL with
2597 # percent-encoded one
2598 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2599 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2600 url = req.get_full_url()
2601 url_escaped = escape_url(url)
2602
2603 # Substitute URL if any change after escaping
2604 if url != url_escaped:
15d260eb 2605 req = update_Request(req, url=url_escaped)
51f267d9 2606
33ac271b 2607 for h, v in std_headers.items():
3d5f7a39
JK
2608 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2609 # The dict keys are capitalized because of this bug by urllib
2610 if h.capitalize() not in req.headers:
33ac271b 2611 req.add_header(h, v)
87f0e62d
YCH
2612
2613 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
2614
2615 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2616 # Python 2.6 is brain-dead when it comes to fragments
2617 req._Request__original = req._Request__original.partition('#')[0]
2618 req._Request__r_type = req._Request__r_type.partition('#')[0]
2619
59ae15a5
PH
2620 return req
2621
acebc9cd 2622 def http_response(self, req, resp):
59ae15a5
PH
2623 old_resp = resp
2624 # gzip
2625 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
2626 content = resp.read()
2627 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2628 try:
2629 uncompressed = io.BytesIO(gz.read())
2630 except IOError as original_ioerror:
2631 # There may be junk add the end of the file
2632 # See http://stackoverflow.com/q/4928560/35070 for details
2633 for i in range(1, 1024):
2634 try:
2635 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2636 uncompressed = io.BytesIO(gz.read())
2637 except IOError:
2638 continue
2639 break
2640 else:
2641 raise original_ioerror
b407d853 2642 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2643 resp.msg = old_resp.msg
c047270c 2644 del resp.headers['Content-encoding']
59ae15a5
PH
2645 # deflate
2646 if resp.headers.get('Content-encoding', '') == 'deflate':
2647 gz = io.BytesIO(self.deflate(resp.read()))
b407d853 2648 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 2649 resp.msg = old_resp.msg
c047270c 2650 del resp.headers['Content-encoding']
ad729172 2651 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
067aa17e 2652 # https://github.com/ytdl-org/youtube-dl/issues/6457).
5a4d9ddb
S
2653 if 300 <= resp.code < 400:
2654 location = resp.headers.get('Location')
2655 if location:
2656 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2657 if sys.version_info >= (3, 0):
2658 location = location.encode('iso-8859-1').decode('utf-8')
0ea59007
YCH
2659 else:
2660 location = location.decode('utf-8')
5a4d9ddb
S
2661 location_escaped = escape_url(location)
2662 if location != location_escaped:
2663 del resp.headers['Location']
9a4aec8b
YCH
2664 if sys.version_info < (3, 0):
2665 location_escaped = location_escaped.encode('utf-8')
5a4d9ddb 2666 resp.headers['Location'] = location_escaped
59ae15a5 2667 return resp
0f8d03f8 2668
acebc9cd
PH
2669 https_request = http_request
2670 https_response = http_response
bf50b038 2671
5de90176 2672
71aff188
YCH
2673def make_socks_conn_class(base_class, socks_proxy):
2674 assert issubclass(base_class, (
2675 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2676
2677 url_components = compat_urlparse.urlparse(socks_proxy)
2678 if url_components.scheme.lower() == 'socks5':
2679 socks_type = ProxyType.SOCKS5
2680 elif url_components.scheme.lower() in ('socks', 'socks4'):
2681 socks_type = ProxyType.SOCKS4
51fb4995
YCH
2682 elif url_components.scheme.lower() == 'socks4a':
2683 socks_type = ProxyType.SOCKS4A
71aff188 2684
cdd94c2e
YCH
2685 def unquote_if_non_empty(s):
2686 if not s:
2687 return s
2688 return compat_urllib_parse_unquote_plus(s)
2689
71aff188
YCH
2690 proxy_args = (
2691 socks_type,
2692 url_components.hostname, url_components.port or 1080,
2693 True, # Remote DNS
cdd94c2e
YCH
2694 unquote_if_non_empty(url_components.username),
2695 unquote_if_non_empty(url_components.password),
71aff188
YCH
2696 )
2697
2698 class SocksConnection(base_class):
2699 def connect(self):
2700 self.sock = sockssocket()
2701 self.sock.setproxy(*proxy_args)
2702 if type(self.timeout) in (int, float):
2703 self.sock.settimeout(self.timeout)
2704 self.sock.connect((self.host, self.port))
2705
2706 if isinstance(self, compat_http_client.HTTPSConnection):
2707 if hasattr(self, '_context'): # Python > 2.6
2708 self.sock = self._context.wrap_socket(
2709 self.sock, server_hostname=self.host)
2710 else:
2711 self.sock = ssl.wrap_socket(self.sock)
2712
2713 return SocksConnection
2714
2715
be4a824d
PH
2716class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2717 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2718 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2719 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2720 self._params = params
2721
2722 def https_open(self, req):
4f264c02 2723 kwargs = {}
71aff188
YCH
2724 conn_class = self._https_conn_class
2725
4f264c02
JMF
2726 if hasattr(self, '_context'): # python > 2.6
2727 kwargs['context'] = self._context
2728 if hasattr(self, '_check_hostname'): # python 3.x
2729 kwargs['check_hostname'] = self._check_hostname
71aff188
YCH
2730
2731 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2732 if socks_proxy:
2733 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2734 del req.headers['Ytdl-socks-proxy']
2735
be4a824d 2736 return self.do_open(functools.partial(
71aff188 2737 _create_http_connection, self, conn_class, True),
4f264c02 2738 req, **kwargs)
be4a824d
PH
2739
2740
1bab3437 2741class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
f1a8511f
S
2742 """
2743 See [1] for cookie file format.
2744
2745 1. https://curl.haxx.se/docs/http-cookies.html
2746 """
e7e62441 2747 _HTTPONLY_PREFIX = '#HttpOnly_'
c380cc28
S
2748 _ENTRY_LEN = 7
2749 _HEADER = '''# Netscape HTTP Cookie File
cefecac1 2750# This file is generated by youtube-dlc. Do not edit.
c380cc28
S
2751
2752'''
2753 _CookieFileEntry = collections.namedtuple(
2754 'CookieFileEntry',
2755 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
e7e62441 2756
1bab3437 2757 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
c380cc28
S
2758 """
2759 Save cookies to a file.
2760
2761 Most of the code is taken from CPython 3.8 and slightly adapted
2762 to support cookie files with UTF-8 in both python 2 and 3.
2763 """
2764 if filename is None:
2765 if self.filename is not None:
2766 filename = self.filename
2767 else:
2768 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2769
1bab3437
S
2770 # Store session cookies with `expires` set to 0 instead of an empty
2771 # string
2772 for cookie in self:
2773 if cookie.expires is None:
2774 cookie.expires = 0
c380cc28
S
2775
2776 with io.open(filename, 'w', encoding='utf-8') as f:
2777 f.write(self._HEADER)
2778 now = time.time()
2779 for cookie in self:
2780 if not ignore_discard and cookie.discard:
2781 continue
2782 if not ignore_expires and cookie.is_expired(now):
2783 continue
2784 if cookie.secure:
2785 secure = 'TRUE'
2786 else:
2787 secure = 'FALSE'
2788 if cookie.domain.startswith('.'):
2789 initial_dot = 'TRUE'
2790 else:
2791 initial_dot = 'FALSE'
2792 if cookie.expires is not None:
2793 expires = compat_str(cookie.expires)
2794 else:
2795 expires = ''
2796 if cookie.value is None:
2797 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2798 # with no name, whereas http.cookiejar regards it as a
2799 # cookie with no value.
2800 name = ''
2801 value = cookie.name
2802 else:
2803 name = cookie.name
2804 value = cookie.value
2805 f.write(
2806 '\t'.join([cookie.domain, initial_dot, cookie.path,
2807 secure, expires, name, value]) + '\n')
1bab3437
S
2808
2809 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
e7e62441 2810 """Load cookies from a file."""
2811 if filename is None:
2812 if self.filename is not None:
2813 filename = self.filename
2814 else:
2815 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2816
c380cc28
S
2817 def prepare_line(line):
2818 if line.startswith(self._HTTPONLY_PREFIX):
2819 line = line[len(self._HTTPONLY_PREFIX):]
2820 # comments and empty lines are fine
2821 if line.startswith('#') or not line.strip():
2822 return line
2823 cookie_list = line.split('\t')
2824 if len(cookie_list) != self._ENTRY_LEN:
2825 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2826 cookie = self._CookieFileEntry(*cookie_list)
2827 if cookie.expires_at and not cookie.expires_at.isdigit():
2828 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2829 return line
2830
e7e62441 2831 cf = io.StringIO()
c380cc28 2832 with io.open(filename, encoding='utf-8') as f:
e7e62441 2833 for line in f:
c380cc28
S
2834 try:
2835 cf.write(prepare_line(line))
2836 except compat_cookiejar.LoadError as e:
2837 write_string(
2838 'WARNING: skipping cookie file entry due to %s: %r\n'
2839 % (e, line), sys.stderr)
2840 continue
e7e62441 2841 cf.seek(0)
2842 self._really_load(cf, filename, ignore_discard, ignore_expires)
1bab3437
S
2843 # Session cookies are denoted by either `expires` field set to
2844 # an empty string or 0. MozillaCookieJar only recognizes the former
2845 # (see [1]). So we need force the latter to be recognized as session
2846 # cookies on our own.
2847 # Session cookies may be important for cookies-based authentication,
2848 # e.g. usually, when user does not check 'Remember me' check box while
2849 # logging in on a site, some important cookies are stored as session
2850 # cookies so that not recognizing them will result in failed login.
2851 # 1. https://bugs.python.org/issue17164
2852 for cookie in self:
2853 # Treat `expires=0` cookies as session cookies
2854 if cookie.expires == 0:
2855 cookie.expires = None
2856 cookie.discard = True
2857
2858
a6420bf5
S
2859class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2860 def __init__(self, cookiejar=None):
2861 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2862
2863 def http_response(self, request, response):
2864 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2865 # characters in Set-Cookie HTTP header of last response (see
067aa17e 2866 # https://github.com/ytdl-org/youtube-dl/issues/6769).
a6420bf5
S
2867 # In order to at least prevent crashing we will percent encode Set-Cookie
2868 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
2869 # if sys.version_info < (3, 0) and response.headers:
2870 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2871 # set_cookie = response.headers.get(set_cookie_header)
2872 # if set_cookie:
2873 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2874 # if set_cookie != set_cookie_escaped:
2875 # del response.headers[set_cookie_header]
2876 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
2877 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2878
2879 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2880 https_response = http_response
2881
2882
fca6dba8
S
2883class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2884 if sys.version_info[0] < 3:
2885 def redirect_request(self, req, fp, code, msg, headers, newurl):
2886 # On python 2 urlh.geturl() may sometimes return redirect URL
2887 # as byte string instead of unicode. This workaround allows
2888 # to force it always return unicode.
2889 return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
2890
2891
46f59e89
S
2892def extract_timezone(date_str):
2893 m = re.search(
2894 r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
2895 date_str)
2896 if not m:
2897 timezone = datetime.timedelta()
2898 else:
2899 date_str = date_str[:-len(m.group('tz'))]
2900 if not m.group('sign'):
2901 timezone = datetime.timedelta()
2902 else:
2903 sign = 1 if m.group('sign') == '+' else -1
2904 timezone = datetime.timedelta(
2905 hours=sign * int(m.group('hours')),
2906 minutes=sign * int(m.group('minutes')))
2907 return timezone, date_str
2908
2909
08b38d54 2910def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
2911 """ Return a UNIX timestamp from the given date """
2912
2913 if date_str is None:
2914 return None
2915
52c3a6e4
S
2916 date_str = re.sub(r'\.[0-9]+', '', date_str)
2917
08b38d54 2918 if timezone is None:
46f59e89
S
2919 timezone, date_str = extract_timezone(date_str)
2920
52c3a6e4
S
2921 try:
2922 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
2923 dt = datetime.datetime.strptime(date_str, date_format) - timezone
2924 return calendar.timegm(dt.timetuple())
2925 except ValueError:
2926 pass
912b38b4
PH
2927
2928
46f59e89
S
2929def date_formats(day_first=True):
2930 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
2931
2932
42bdd9d0 2933def unified_strdate(date_str, day_first=True):
bf50b038 2934 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
2935
2936 if date_str is None:
2937 return None
bf50b038 2938 upload_date = None
5f6a1245 2939 # Replace commas
026fcc04 2940 date_str = date_str.replace(',', ' ')
42bdd9d0 2941 # Remove AM/PM + timezone
9bb8e0a3 2942 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89 2943 _, date_str = extract_timezone(date_str)
42bdd9d0 2944
46f59e89 2945 for expression in date_formats(day_first):
bf50b038
JMF
2946 try:
2947 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 2948 except ValueError:
bf50b038 2949 pass
42393ce2
PH
2950 if upload_date is None:
2951 timetuple = email.utils.parsedate_tz(date_str)
2952 if timetuple:
c6b9cf05
S
2953 try:
2954 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
2955 except ValueError:
2956 pass
6a750402
JMF
2957 if upload_date is not None:
2958 return compat_str(upload_date)
bf50b038 2959
5f6a1245 2960
46f59e89
S
2961def unified_timestamp(date_str, day_first=True):
2962 if date_str is None:
2963 return None
2964
2ae2ffda 2965 date_str = re.sub(r'[,|]', '', date_str)
46f59e89 2966
7dc2a74e 2967 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89
S
2968 timezone, date_str = extract_timezone(date_str)
2969
2970 # Remove AM/PM + timezone
2971 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
2972
deef3195
S
2973 # Remove unrecognized timezones from ISO 8601 alike timestamps
2974 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
2975 if m:
2976 date_str = date_str[:-len(m.group('tz'))]
2977
f226880c
PH
2978 # Python only supports microseconds, so remove nanoseconds
2979 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
2980 if m:
2981 date_str = m.group(1)
2982
46f59e89
S
2983 for expression in date_formats(day_first):
2984 try:
7dc2a74e 2985 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
46f59e89
S
2986 return calendar.timegm(dt.timetuple())
2987 except ValueError:
2988 pass
2989 timetuple = email.utils.parsedate_tz(date_str)
2990 if timetuple:
7dc2a74e 2991 return calendar.timegm(timetuple) + pm_delta * 3600
46f59e89
S
2992
2993
28e614de 2994def determine_ext(url, default_ext='unknown_video'):
85750f89 2995 if url is None or '.' not in url:
f4776371 2996 return default_ext
9cb9a5df 2997 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
2998 if re.match(r'^[A-Za-z0-9]+$', guess):
2999 return guess
a7aaa398
S
3000 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3001 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 3002 return guess.rstrip('/')
73e79f2a 3003 else:
cbdbb766 3004 return default_ext
73e79f2a 3005
5f6a1245 3006
824fa511
S
3007def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3008 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
d4051a8e 3009
5f6a1245 3010
bd558525 3011def date_from_str(date_str):
37254abc
JMF
3012 """
3013 Return a datetime object from a string in the format YYYYMMDD or
3014 (now|today)[+-][0-9](day|week|month|year)(s)?"""
3015 today = datetime.date.today()
f8795e10 3016 if date_str in ('now', 'today'):
37254abc 3017 return today
f8795e10
PH
3018 if date_str == 'yesterday':
3019 return today - datetime.timedelta(days=1)
ec85ded8 3020 match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
37254abc
JMF
3021 if match is not None:
3022 sign = match.group('sign')
3023 time = int(match.group('time'))
3024 if sign == '-':
3025 time = -time
3026 unit = match.group('unit')
dfb1b146 3027 # A bad approximation?
37254abc
JMF
3028 if unit == 'month':
3029 unit = 'day'
3030 time *= 30
3031 elif unit == 'year':
3032 unit = 'day'
3033 time *= 365
3034 unit += 's'
3035 delta = datetime.timedelta(**{unit: time})
3036 return today + delta
611c1dd9 3037 return datetime.datetime.strptime(date_str, '%Y%m%d').date()
5f6a1245
JW
3038
3039
e63fc1be 3040def hyphenate_date(date_str):
3041 """
3042 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3043 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3044 if match is not None:
3045 return '-'.join(match.groups())
3046 else:
3047 return date_str
3048
5f6a1245 3049
bd558525
JMF
3050class DateRange(object):
3051 """Represents a time interval between two dates"""
5f6a1245 3052
bd558525
JMF
3053 def __init__(self, start=None, end=None):
3054 """start and end must be strings in the format accepted by date"""
3055 if start is not None:
3056 self.start = date_from_str(start)
3057 else:
3058 self.start = datetime.datetime.min.date()
3059 if end is not None:
3060 self.end = date_from_str(end)
3061 else:
3062 self.end = datetime.datetime.max.date()
37254abc 3063 if self.start > self.end:
bd558525 3064 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 3065
bd558525
JMF
3066 @classmethod
3067 def day(cls, day):
3068 """Returns a range that only contains the given day"""
5f6a1245
JW
3069 return cls(day, day)
3070
bd558525
JMF
3071 def __contains__(self, date):
3072 """Check if the date is in the range"""
37254abc
JMF
3073 if not isinstance(date, datetime.date):
3074 date = date_from_str(date)
3075 return self.start <= date <= self.end
5f6a1245 3076
bd558525 3077 def __str__(self):
5f6a1245 3078 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
3079
3080
3081def platform_name():
3082 """ Returns the platform name as a compat_str """
3083 res = platform.platform()
3084 if isinstance(res, bytes):
3085 res = res.decode(preferredencoding())
3086
3087 assert isinstance(res, compat_str)
3088 return res
c257baff
PH
3089
3090
b58ddb32
PH
3091def _windows_write_string(s, out):
3092 """ Returns True if the string was written using special methods,
3093 False if it has yet to be written out."""
3094 # Adapted from http://stackoverflow.com/a/3259271/35070
3095
3096 import ctypes
3097 import ctypes.wintypes
3098
3099 WIN_OUTPUT_IDS = {
3100 1: -11,
3101 2: -12,
3102 }
3103
a383a98a
PH
3104 try:
3105 fileno = out.fileno()
3106 except AttributeError:
3107 # If the output stream doesn't have a fileno, it's virtual
3108 return False
aa42e873
PH
3109 except io.UnsupportedOperation:
3110 # Some strange Windows pseudo files?
3111 return False
b58ddb32
PH
3112 if fileno not in WIN_OUTPUT_IDS:
3113 return False
3114
d7cd9a9e 3115 GetStdHandle = compat_ctypes_WINFUNCTYPE(
b58ddb32 3116 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
d7cd9a9e 3117 ('GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
3118 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3119
d7cd9a9e 3120 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3121 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3122 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
d7cd9a9e 3123 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
3124 written = ctypes.wintypes.DWORD(0)
3125
d7cd9a9e 3126 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
3127 FILE_TYPE_CHAR = 0x0002
3128 FILE_TYPE_REMOTE = 0x8000
d7cd9a9e 3129 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
b58ddb32
PH
3130 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3131 ctypes.POINTER(ctypes.wintypes.DWORD))(
d7cd9a9e 3132 ('GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
3133 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3134
3135 def not_a_console(handle):
3136 if handle == INVALID_HANDLE_VALUE or handle is None:
3137 return True
3089bc74
S
3138 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3139 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
3140
3141 if not_a_console(h):
3142 return False
3143
d1b9c912
PH
3144 def next_nonbmp_pos(s):
3145 try:
3146 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3147 except StopIteration:
3148 return len(s)
3149
3150 while s:
3151 count = min(next_nonbmp_pos(s), 1024)
3152
b58ddb32 3153 ret = WriteConsoleW(
d1b9c912 3154 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
3155 if ret == 0:
3156 raise OSError('Failed to write string')
d1b9c912
PH
3157 if not count: # We just wrote a non-BMP character
3158 assert written.value == 2
3159 s = s[1:]
3160 else:
3161 assert written.value > 0
3162 s = s[written.value:]
b58ddb32
PH
3163 return True
3164
3165
734f90bb 3166def write_string(s, out=None, encoding=None):
7459e3a2
PH
3167 if out is None:
3168 out = sys.stderr
8bf48f23 3169 assert type(s) == compat_str
7459e3a2 3170
b58ddb32
PH
3171 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3172 if _windows_write_string(s, out):
3173 return
3174
3089bc74
S
3175 if ('b' in getattr(out, 'mode', '')
3176 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
3177 byt = s.encode(encoding or preferredencoding(), 'ignore')
3178 out.write(byt)
3179 elif hasattr(out, 'buffer'):
3180 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3181 byt = s.encode(enc, 'ignore')
3182 out.buffer.write(byt)
3183 else:
8bf48f23 3184 out.write(s)
7459e3a2
PH
3185 out.flush()
3186
3187
48ea9cea
PH
3188def bytes_to_intlist(bs):
3189 if not bs:
3190 return []
3191 if isinstance(bs[0], int): # Python 3
3192 return list(bs)
3193 else:
3194 return [ord(c) for c in bs]
3195
c257baff 3196
cba892fa 3197def intlist_to_bytes(xs):
3198 if not xs:
3199 return b''
edaa23f8 3200 return compat_struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
3201
3202
c1c9a79c
PH
3203# Cross-platform file locking
3204if sys.platform == 'win32':
3205 import ctypes.wintypes
3206 import msvcrt
3207
3208 class OVERLAPPED(ctypes.Structure):
3209 _fields_ = [
3210 ('Internal', ctypes.wintypes.LPVOID),
3211 ('InternalHigh', ctypes.wintypes.LPVOID),
3212 ('Offset', ctypes.wintypes.DWORD),
3213 ('OffsetHigh', ctypes.wintypes.DWORD),
3214 ('hEvent', ctypes.wintypes.HANDLE),
3215 ]
3216
3217 kernel32 = ctypes.windll.kernel32
3218 LockFileEx = kernel32.LockFileEx
3219 LockFileEx.argtypes = [
3220 ctypes.wintypes.HANDLE, # hFile
3221 ctypes.wintypes.DWORD, # dwFlags
3222 ctypes.wintypes.DWORD, # dwReserved
3223 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3224 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3225 ctypes.POINTER(OVERLAPPED) # Overlapped
3226 ]
3227 LockFileEx.restype = ctypes.wintypes.BOOL
3228 UnlockFileEx = kernel32.UnlockFileEx
3229 UnlockFileEx.argtypes = [
3230 ctypes.wintypes.HANDLE, # hFile
3231 ctypes.wintypes.DWORD, # dwReserved
3232 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3233 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3234 ctypes.POINTER(OVERLAPPED) # Overlapped
3235 ]
3236 UnlockFileEx.restype = ctypes.wintypes.BOOL
3237 whole_low = 0xffffffff
3238 whole_high = 0x7fffffff
3239
3240 def _lock_file(f, exclusive):
3241 overlapped = OVERLAPPED()
3242 overlapped.Offset = 0
3243 overlapped.OffsetHigh = 0
3244 overlapped.hEvent = 0
3245 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3246 handle = msvcrt.get_osfhandle(f.fileno())
3247 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3248 whole_low, whole_high, f._lock_file_overlapped_p):
3249 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3250
3251 def _unlock_file(f):
3252 assert f._lock_file_overlapped_p
3253 handle = msvcrt.get_osfhandle(f.fileno())
3254 if not UnlockFileEx(handle, 0,
3255 whole_low, whole_high, f._lock_file_overlapped_p):
3256 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3257
3258else:
399a76e6
YCH
3259 # Some platforms, such as Jython, is missing fcntl
3260 try:
3261 import fcntl
c1c9a79c 3262
399a76e6
YCH
3263 def _lock_file(f, exclusive):
3264 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c 3265
399a76e6
YCH
3266 def _unlock_file(f):
3267 fcntl.flock(f, fcntl.LOCK_UN)
3268 except ImportError:
3269 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3270
3271 def _lock_file(f, exclusive):
3272 raise IOError(UNSUPPORTED_MSG)
3273
3274 def _unlock_file(f):
3275 raise IOError(UNSUPPORTED_MSG)
c1c9a79c
PH
3276
3277
3278class locked_file(object):
3279 def __init__(self, filename, mode, encoding=None):
3280 assert mode in ['r', 'a', 'w']
3281 self.f = io.open(filename, mode, encoding=encoding)
3282 self.mode = mode
3283
3284 def __enter__(self):
3285 exclusive = self.mode != 'r'
3286 try:
3287 _lock_file(self.f, exclusive)
3288 except IOError:
3289 self.f.close()
3290 raise
3291 return self
3292
3293 def __exit__(self, etype, value, traceback):
3294 try:
3295 _unlock_file(self.f)
3296 finally:
3297 self.f.close()
3298
3299 def __iter__(self):
3300 return iter(self.f)
3301
3302 def write(self, *args):
3303 return self.f.write(*args)
3304
3305 def read(self, *args):
3306 return self.f.read(*args)
4eb7f1d1
JMF
3307
3308
4644ac55
S
3309def get_filesystem_encoding():
3310 encoding = sys.getfilesystemencoding()
3311 return encoding if encoding is not None else 'utf-8'
3312
3313
4eb7f1d1 3314def shell_quote(args):
a6a173c2 3315 quoted_args = []
4644ac55 3316 encoding = get_filesystem_encoding()
a6a173c2
JMF
3317 for a in args:
3318 if isinstance(a, bytes):
3319 # We may get a filename encoded with 'encodeFilename'
3320 a = a.decode(encoding)
aefce8e6 3321 quoted_args.append(compat_shlex_quote(a))
28e614de 3322 return ' '.join(quoted_args)
9d4660ca
PH
3323
3324
3325def smuggle_url(url, data):
3326 """ Pass additional data in a URL for internal use. """
3327
81953d1a
RA
3328 url, idata = unsmuggle_url(url, {})
3329 data.update(idata)
15707c7e 3330 sdata = compat_urllib_parse_urlencode(
28e614de
PH
3331 {'__youtubedl_smuggle': json.dumps(data)})
3332 return url + '#' + sdata
9d4660ca
PH
3333
3334
79f82953 3335def unsmuggle_url(smug_url, default=None):
83e865a3 3336 if '#__youtubedl_smuggle' not in smug_url:
79f82953 3337 return smug_url, default
28e614de
PH
3338 url, _, sdata = smug_url.rpartition('#')
3339 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
3340 data = json.loads(jsond)
3341 return url, data
02dbf93f
PH
3342
3343
02dbf93f
PH
3344def format_bytes(bytes):
3345 if bytes is None:
28e614de 3346 return 'N/A'
02dbf93f
PH
3347 if type(bytes) is str:
3348 bytes = float(bytes)
3349 if bytes == 0.0:
3350 exponent = 0
3351 else:
3352 exponent = int(math.log(bytes, 1024.0))
28e614de 3353 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 3354 converted = float(bytes) / float(1024 ** exponent)
28e614de 3355 return '%.2f%s' % (converted, suffix)
f53c966a 3356
1c088fa8 3357
fb47597b
S
3358def lookup_unit_table(unit_table, s):
3359 units_re = '|'.join(re.escape(u) for u in unit_table)
3360 m = re.match(
782b1b5b 3361 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
fb47597b
S
3362 if not m:
3363 return None
3364 num_str = m.group('num').replace(',', '.')
3365 mult = unit_table[m.group('unit')]
3366 return int(float(num_str) * mult)
3367
3368
be64b5b0
PH
3369def parse_filesize(s):
3370 if s is None:
3371 return None
3372
dfb1b146 3373 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
3374 # but we support those too
3375 _UNIT_TABLE = {
3376 'B': 1,
3377 'b': 1,
70852b47 3378 'bytes': 1,
be64b5b0
PH
3379 'KiB': 1024,
3380 'KB': 1000,
3381 'kB': 1024,
3382 'Kb': 1000,
13585d76 3383 'kb': 1000,
70852b47
YCH
3384 'kilobytes': 1000,
3385 'kibibytes': 1024,
be64b5b0
PH
3386 'MiB': 1024 ** 2,
3387 'MB': 1000 ** 2,
3388 'mB': 1024 ** 2,
3389 'Mb': 1000 ** 2,
13585d76 3390 'mb': 1000 ** 2,
70852b47
YCH
3391 'megabytes': 1000 ** 2,
3392 'mebibytes': 1024 ** 2,
be64b5b0
PH
3393 'GiB': 1024 ** 3,
3394 'GB': 1000 ** 3,
3395 'gB': 1024 ** 3,
3396 'Gb': 1000 ** 3,
13585d76 3397 'gb': 1000 ** 3,
70852b47
YCH
3398 'gigabytes': 1000 ** 3,
3399 'gibibytes': 1024 ** 3,
be64b5b0
PH
3400 'TiB': 1024 ** 4,
3401 'TB': 1000 ** 4,
3402 'tB': 1024 ** 4,
3403 'Tb': 1000 ** 4,
13585d76 3404 'tb': 1000 ** 4,
70852b47
YCH
3405 'terabytes': 1000 ** 4,
3406 'tebibytes': 1024 ** 4,
be64b5b0
PH
3407 'PiB': 1024 ** 5,
3408 'PB': 1000 ** 5,
3409 'pB': 1024 ** 5,
3410 'Pb': 1000 ** 5,
13585d76 3411 'pb': 1000 ** 5,
70852b47
YCH
3412 'petabytes': 1000 ** 5,
3413 'pebibytes': 1024 ** 5,
be64b5b0
PH
3414 'EiB': 1024 ** 6,
3415 'EB': 1000 ** 6,
3416 'eB': 1024 ** 6,
3417 'Eb': 1000 ** 6,
13585d76 3418 'eb': 1000 ** 6,
70852b47
YCH
3419 'exabytes': 1000 ** 6,
3420 'exbibytes': 1024 ** 6,
be64b5b0
PH
3421 'ZiB': 1024 ** 7,
3422 'ZB': 1000 ** 7,
3423 'zB': 1024 ** 7,
3424 'Zb': 1000 ** 7,
13585d76 3425 'zb': 1000 ** 7,
70852b47
YCH
3426 'zettabytes': 1000 ** 7,
3427 'zebibytes': 1024 ** 7,
be64b5b0
PH
3428 'YiB': 1024 ** 8,
3429 'YB': 1000 ** 8,
3430 'yB': 1024 ** 8,
3431 'Yb': 1000 ** 8,
13585d76 3432 'yb': 1000 ** 8,
70852b47
YCH
3433 'yottabytes': 1000 ** 8,
3434 'yobibytes': 1024 ** 8,
be64b5b0
PH
3435 }
3436
fb47597b
S
3437 return lookup_unit_table(_UNIT_TABLE, s)
3438
3439
3440def parse_count(s):
3441 if s is None:
be64b5b0
PH
3442 return None
3443
fb47597b
S
3444 s = s.strip()
3445
3446 if re.match(r'^[\d,.]+$', s):
3447 return str_to_int(s)
3448
3449 _UNIT_TABLE = {
3450 'k': 1000,
3451 'K': 1000,
3452 'm': 1000 ** 2,
3453 'M': 1000 ** 2,
3454 'kk': 1000 ** 2,
3455 'KK': 1000 ** 2,
3456 }
be64b5b0 3457
fb47597b 3458 return lookup_unit_table(_UNIT_TABLE, s)
be64b5b0 3459
2f7ae819 3460
b871d7e9
S
3461def parse_resolution(s):
3462 if s is None:
3463 return {}
3464
3465 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3466 if mobj:
3467 return {
3468 'width': int(mobj.group('w')),
3469 'height': int(mobj.group('h')),
3470 }
3471
3472 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3473 if mobj:
3474 return {'height': int(mobj.group(1))}
3475
3476 mobj = re.search(r'\b([48])[kK]\b', s)
3477 if mobj:
3478 return {'height': int(mobj.group(1)) * 540}
3479
3480 return {}
3481
3482
0dc41787
S
3483def parse_bitrate(s):
3484 if not isinstance(s, compat_str):
3485 return
3486 mobj = re.search(r'\b(\d+)\s*kbps', s)
3487 if mobj:
3488 return int(mobj.group(1))
3489
3490
a942d6cb 3491def month_by_name(name, lang='en'):
caefb1de
PH
3492 """ Return the number of a month by (locale-independently) English name """
3493
f6717dec 3494 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb 3495
caefb1de 3496 try:
f6717dec 3497 return month_names.index(name) + 1
7105440c
YCH
3498 except ValueError:
3499 return None
3500
3501
3502def month_by_abbreviation(abbrev):
3503 """ Return the number of a month by (locale-independently) English
3504 abbreviations """
3505
3506 try:
3507 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
3508 except ValueError:
3509 return None
18258362
JMF
3510
3511
5aafe895 3512def fix_xml_ampersands(xml_str):
18258362 3513 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
3514 return re.sub(
3515 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 3516 '&amp;',
5aafe895 3517 xml_str)
e3946f98
PH
3518
3519
3520def setproctitle(title):
8bf48f23 3521 assert isinstance(title, compat_str)
c1c05c67
YCH
3522
3523 # ctypes in Jython is not complete
3524 # http://bugs.jython.org/issue2148
3525 if sys.platform.startswith('java'):
3526 return
3527
e3946f98 3528 try:
611c1dd9 3529 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
3530 except OSError:
3531 return
2f49bcd6
RC
3532 except TypeError:
3533 # LoadLibrary in Windows Python 2.7.13 only expects
3534 # a bytestring, but since unicode_literals turns
3535 # every string into a unicode string, it fails.
3536 return
6eefe533
PH
3537 title_bytes = title.encode('utf-8')
3538 buf = ctypes.create_string_buffer(len(title_bytes))
3539 buf.value = title_bytes
e3946f98 3540 try:
6eefe533 3541 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
3542 except AttributeError:
3543 return # Strange libc, just skip this
d7dda168
PH
3544
3545
3546def remove_start(s, start):
46bc9b7d 3547 return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174
PH
3548
3549
2b9faf55 3550def remove_end(s, end):
46bc9b7d 3551 return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55
PH
3552
3553
31b2051e
S
3554def remove_quotes(s):
3555 if s is None or len(s) < 2:
3556 return s
3557 for quote in ('"', "'", ):
3558 if s[0] == quote and s[-1] == quote:
3559 return s[1:-1]
3560 return s
3561
3562
b6e0c7d2
U
3563def get_domain(url):
3564 domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
3565 return domain.group('domain') if domain else None
3566
3567
29eb5174 3568def url_basename(url):
9b8aaeed 3569 path = compat_urlparse.urlparse(url).path
28e614de 3570 return path.strip('/').split('/')[-1]
aa94a6d3
PH
3571
3572
02dc0a36
S
3573def base_url(url):
3574 return re.match(r'https?://[^?#&]+/', url).group()
3575
3576
e34c3361 3577def urljoin(base, path):
4b5de77b
S
3578 if isinstance(path, bytes):
3579 path = path.decode('utf-8')
e34c3361
S
3580 if not isinstance(path, compat_str) or not path:
3581 return None
fad4ceb5 3582 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
e34c3361 3583 return path
4b5de77b
S
3584 if isinstance(base, bytes):
3585 base = base.decode('utf-8')
3586 if not isinstance(base, compat_str) or not re.match(
3587 r'^(?:https?:)?//', base):
e34c3361
S
3588 return None
3589 return compat_urlparse.urljoin(base, path)
3590
3591
aa94a6d3
PH
3592class HEADRequest(compat_urllib_request.Request):
3593 def get_method(self):
611c1dd9 3594 return 'HEAD'
7217e148
PH
3595
3596
95cf60e8
S
3597class PUTRequest(compat_urllib_request.Request):
3598 def get_method(self):
3599 return 'PUT'
3600
3601
9732d77e 3602def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
3603 if get_attr:
3604 if v is not None:
3605 v = getattr(v, get_attr, None)
9572013d
PH
3606 if v == '':
3607 v = None
1812afb7
S
3608 if v is None:
3609 return default
3610 try:
3611 return int(v) * invscale // scale
5e1271c5 3612 except (ValueError, TypeError):
af98f8ff 3613 return default
9732d77e 3614
9572013d 3615
40a90862
JMF
3616def str_or_none(v, default=None):
3617 return default if v is None else compat_str(v)
3618
9732d77e
PH
3619
3620def str_to_int(int_str):
48d4681e 3621 """ A more relaxed version of int_or_none """
42db58ec 3622 if isinstance(int_str, compat_integer_types):
348c6bf1 3623 return int_str
42db58ec
S
3624 elif isinstance(int_str, compat_str):
3625 int_str = re.sub(r'[,\.\+]', '', int_str)
3626 return int_or_none(int_str)
608d11f5
PH
3627
3628
9732d77e 3629def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
3630 if v is None:
3631 return default
3632 try:
3633 return float(v) * invscale / scale
5e1271c5 3634 except (ValueError, TypeError):
caf80631 3635 return default
43f775e4
PH
3636
3637
c7e327c4
S
3638def bool_or_none(v, default=None):
3639 return v if isinstance(v, bool) else default
3640
3641
53cd37ba
S
3642def strip_or_none(v, default=None):
3643 return v.strip() if isinstance(v, compat_str) else default
b72b4431
S
3644
3645
af03000a
S
3646def url_or_none(url):
3647 if not url or not isinstance(url, compat_str):
3648 return None
3649 url = url.strip()
3650 return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
3651
3652
608d11f5 3653def parse_duration(s):
8f9312c3 3654 if not isinstance(s, compat_basestring):
608d11f5
PH
3655 return None
3656
ca7b3246
S
3657 s = s.strip()
3658
acaff495 3659 days, hours, mins, secs, ms = [None] * 5
15846398 3660 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
acaff495 3661 if m:
3662 days, hours, mins, secs, ms = m.groups()
3663 else:
3664 m = re.match(
056653bb
S
3665 r'''(?ix)(?:P?
3666 (?:
3667 [0-9]+\s*y(?:ears?)?\s*
3668 )?
3669 (?:
3670 [0-9]+\s*m(?:onths?)?\s*
3671 )?
3672 (?:
3673 [0-9]+\s*w(?:eeks?)?\s*
3674 )?
8f4b58d7 3675 (?:
acaff495 3676 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
8f4b58d7 3677 )?
056653bb 3678 T)?
acaff495 3679 (?:
3680 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3681 )?
3682 (?:
3683 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3684 )?
3685 (?:
3686 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
15846398 3687 )?Z?$''', s)
acaff495 3688 if m:
3689 days, hours, mins, secs, ms = m.groups()
3690 else:
15846398 3691 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
acaff495 3692 if m:
3693 hours, mins = m.groups()
3694 else:
3695 return None
3696
3697 duration = 0
3698 if secs:
3699 duration += float(secs)
3700 if mins:
3701 duration += float(mins) * 60
3702 if hours:
3703 duration += float(hours) * 60 * 60
3704 if days:
3705 duration += float(days) * 24 * 60 * 60
3706 if ms:
3707 duration += float(ms)
3708 return duration
91d7d0b3
JMF
3709
3710
e65e4c88 3711def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 3712 name, real_ext = os.path.splitext(filename)
e65e4c88
S
3713 return (
3714 '{0}.{1}{2}'.format(name, ext, real_ext)
3715 if not expected_real_ext or real_ext[1:] == expected_real_ext
3716 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
3717
3718
b3ed15b7
S
3719def replace_extension(filename, ext, expected_real_ext=None):
3720 name, real_ext = os.path.splitext(filename)
3721 return '{0}.{1}'.format(
3722 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3723 ext)
3724
3725
d70ad093
PH
3726def check_executable(exe, args=[]):
3727 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3728 args can be a list of arguments for a short output (like -version) """
3729 try:
3730 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
3731 except OSError:
3732 return False
3733 return exe
b7ab0590
PH
3734
3735
95807118 3736def get_exe_version(exe, args=['--version'],
cae97f65 3737 version_re=None, unrecognized='present'):
95807118
PH
3738 """ Returns the version of the specified executable,
3739 or False if the executable is not present """
3740 try:
b64d04c1 3741 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
cefecac1 3742 # SIGTTOU if youtube-dlc is run in the background.
067aa17e 3743 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
cae97f65 3744 out, _ = subprocess.Popen(
54116803 3745 [encodeArgument(exe)] + args,
00ca7552 3746 stdin=subprocess.PIPE,
95807118
PH
3747 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
3748 except OSError:
3749 return False
cae97f65
PH
3750 if isinstance(out, bytes): # Python 2.x
3751 out = out.decode('ascii', 'ignore')
3752 return detect_exe_version(out, version_re, unrecognized)
3753
3754
3755def detect_exe_version(output, version_re=None, unrecognized='present'):
3756 assert isinstance(output, compat_str)
3757 if version_re is None:
3758 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3759 m = re.search(version_re, output)
95807118
PH
3760 if m:
3761 return m.group(1)
3762 else:
3763 return unrecognized
3764
3765
b7ab0590 3766class PagedList(object):
dd26ced1
PH
3767 def __len__(self):
3768 # This is only useful for tests
3769 return len(self.getslice())
3770
9c44d242
PH
3771
3772class OnDemandPagedList(PagedList):
6be08ce6 3773 def __init__(self, pagefunc, pagesize, use_cache=True):
9c44d242
PH
3774 self._pagefunc = pagefunc
3775 self._pagesize = pagesize
b95dc034
YCH
3776 self._use_cache = use_cache
3777 if use_cache:
3778 self._cache = {}
9c44d242 3779
b7ab0590
PH
3780 def getslice(self, start=0, end=None):
3781 res = []
3782 for pagenum in itertools.count(start // self._pagesize):
3783 firstid = pagenum * self._pagesize
3784 nextfirstid = pagenum * self._pagesize + self._pagesize
3785 if start >= nextfirstid:
3786 continue
3787
b95dc034
YCH
3788 page_results = None
3789 if self._use_cache:
3790 page_results = self._cache.get(pagenum)
3791 if page_results is None:
3792 page_results = list(self._pagefunc(pagenum))
3793 if self._use_cache:
3794 self._cache[pagenum] = page_results
b7ab0590
PH
3795
3796 startv = (
3797 start % self._pagesize
3798 if firstid <= start < nextfirstid
3799 else 0)
3800
3801 endv = (
3802 ((end - 1) % self._pagesize) + 1
3803 if (end is not None and firstid <= end <= nextfirstid)
3804 else None)
3805
3806 if startv != 0 or endv is not None:
3807 page_results = page_results[startv:endv]
3808 res.extend(page_results)
3809
3810 # A little optimization - if current page is not "full", ie. does
3811 # not contain page_size videos then we can assume that this page
3812 # is the last one - there are no more ids on further pages -
3813 # i.e. no need to query again.
3814 if len(page_results) + startv < self._pagesize:
3815 break
3816
3817 # If we got the whole page, but the next page is not interesting,
3818 # break out early as well
3819 if end == nextfirstid:
3820 break
3821 return res
81c2f20b
PH
3822
3823
9c44d242
PH
3824class InAdvancePagedList(PagedList):
3825 def __init__(self, pagefunc, pagecount, pagesize):
3826 self._pagefunc = pagefunc
3827 self._pagecount = pagecount
3828 self._pagesize = pagesize
3829
3830 def getslice(self, start=0, end=None):
3831 res = []
3832 start_page = start // self._pagesize
3833 end_page = (
3834 self._pagecount if end is None else (end // self._pagesize + 1))
3835 skip_elems = start - start_page * self._pagesize
3836 only_more = None if end is None else end - start
3837 for pagenum in range(start_page, end_page):
3838 page = list(self._pagefunc(pagenum))
3839 if skip_elems:
3840 page = page[skip_elems:]
3841 skip_elems = None
3842 if only_more is not None:
3843 if len(page) < only_more:
3844 only_more -= len(page)
3845 else:
3846 page = page[:only_more]
3847 res.extend(page)
3848 break
3849 res.extend(page)
3850 return res
3851
3852
81c2f20b 3853def uppercase_escape(s):
676eb3f2 3854 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 3855 return re.sub(
a612753d 3856 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
3857 lambda m: unicode_escape(m.group(0))[0],
3858 s)
0fe2ff78
YCH
3859
3860
3861def lowercase_escape(s):
3862 unicode_escape = codecs.getdecoder('unicode_escape')
3863 return re.sub(
3864 r'\\u[0-9a-fA-F]{4}',
3865 lambda m: unicode_escape(m.group(0))[0],
3866 s)
b53466e1 3867
d05cfe06
S
3868
3869def escape_rfc3986(s):
3870 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 3871 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 3872 s = s.encode('utf-8')
ecc0c5ee 3873 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
3874
3875
3876def escape_url(url):
3877 """Escape URL as suggested by RFC 3986"""
3878 url_parsed = compat_urllib_parse_urlparse(url)
3879 return url_parsed._replace(
efbed08d 3880 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
d05cfe06
S
3881 path=escape_rfc3986(url_parsed.path),
3882 params=escape_rfc3986(url_parsed.params),
3883 query=escape_rfc3986(url_parsed.query),
3884 fragment=escape_rfc3986(url_parsed.fragment)
3885 ).geturl()
3886
62e609ab
PH
3887
3888def read_batch_urls(batch_fd):
3889 def fixup(url):
3890 if not isinstance(url, compat_str):
3891 url = url.decode('utf-8', 'replace')
28e614de 3892 BOM_UTF8 = '\xef\xbb\xbf'
62e609ab
PH
3893 if url.startswith(BOM_UTF8):
3894 url = url[len(BOM_UTF8):]
3895 url = url.strip()
3896 if url.startswith(('#', ';', ']')):
3897 return False
3898 return url
3899
3900 with contextlib.closing(batch_fd) as fd:
3901 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
3902
3903
3904def urlencode_postdata(*args, **kargs):
15707c7e 3905 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
3906
3907
38f9ef31 3908def update_url_query(url, query):
cacd9966
YCH
3909 if not query:
3910 return url
38f9ef31 3911 parsed_url = compat_urlparse.urlparse(url)
3912 qs = compat_parse_qs(parsed_url.query)
3913 qs.update(query)
3914 return compat_urlparse.urlunparse(parsed_url._replace(
15707c7e 3915 query=compat_urllib_parse_urlencode(qs, True)))
16392824 3916
8e60dc75 3917
ed0291d1
S
3918def update_Request(req, url=None, data=None, headers={}, query={}):
3919 req_headers = req.headers.copy()
3920 req_headers.update(headers)
3921 req_data = data or req.data
3922 req_url = update_url_query(url or req.get_full_url(), query)
95cf60e8
S
3923 req_get_method = req.get_method()
3924 if req_get_method == 'HEAD':
3925 req_type = HEADRequest
3926 elif req_get_method == 'PUT':
3927 req_type = PUTRequest
3928 else:
3929 req_type = compat_urllib_request.Request
ed0291d1
S
3930 new_req = req_type(
3931 req_url, data=req_data, headers=req_headers,
3932 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
3933 if hasattr(req, 'timeout'):
3934 new_req.timeout = req.timeout
3935 return new_req
3936
3937
10c87c15 3938def _multipart_encode_impl(data, boundary):
0c265486
YCH
3939 content_type = 'multipart/form-data; boundary=%s' % boundary
3940
3941 out = b''
3942 for k, v in data.items():
3943 out += b'--' + boundary.encode('ascii') + b'\r\n'
3944 if isinstance(k, compat_str):
3945 k = k.encode('utf-8')
3946 if isinstance(v, compat_str):
3947 v = v.encode('utf-8')
3948 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
3949 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
b2ad479d 3950 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
0c265486
YCH
3951 if boundary.encode('ascii') in content:
3952 raise ValueError('Boundary overlaps with data')
3953 out += content
3954
3955 out += b'--' + boundary.encode('ascii') + b'--\r\n'
3956
3957 return out, content_type
3958
3959
3960def multipart_encode(data, boundary=None):
3961 '''
3962 Encode a dict to RFC 7578-compliant form-data
3963
3964 data:
3965 A dict where keys and values can be either Unicode or bytes-like
3966 objects.
3967 boundary:
3968 If specified a Unicode object, it's used as the boundary. Otherwise
3969 a random boundary is generated.
3970
3971 Reference: https://tools.ietf.org/html/rfc7578
3972 '''
3973 has_specified_boundary = boundary is not None
3974
3975 while True:
3976 if boundary is None:
3977 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
3978
3979 try:
10c87c15 3980 out, content_type = _multipart_encode_impl(data, boundary)
0c265486
YCH
3981 break
3982 except ValueError:
3983 if has_specified_boundary:
3984 raise
3985 boundary = None
3986
3987 return out, content_type
3988
3989
86296ad2 3990def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
3991 if isinstance(key_or_keys, (list, tuple)):
3992 for key in key_or_keys:
86296ad2
S
3993 if key not in d or d[key] is None or skip_false_values and not d[key]:
3994 continue
3995 return d[key]
cbecc9b9
S
3996 return default
3997 return d.get(key_or_keys, default)
3998
3999
329ca3be 4000def try_get(src, getter, expected_type=None):
a32a9a7e
S
4001 if not isinstance(getter, (list, tuple)):
4002 getter = [getter]
4003 for get in getter:
4004 try:
4005 v = get(src)
4006 except (AttributeError, KeyError, TypeError, IndexError):
4007 pass
4008 else:
4009 if expected_type is None or isinstance(v, expected_type):
4010 return v
329ca3be
S
4011
4012
6cc62232
S
4013def merge_dicts(*dicts):
4014 merged = {}
4015 for a_dict in dicts:
4016 for k, v in a_dict.items():
4017 if v is None:
4018 continue
3089bc74
S
4019 if (k not in merged
4020 or (isinstance(v, compat_str) and v
4021 and isinstance(merged[k], compat_str)
4022 and not merged[k])):
6cc62232
S
4023 merged[k] = v
4024 return merged
4025
4026
8e60dc75
S
4027def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4028 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4029
16392824 4030
a1a530b0
PH
4031US_RATINGS = {
4032 'G': 0,
4033 'PG': 10,
4034 'PG-13': 13,
4035 'R': 16,
4036 'NC': 18,
4037}
fac55558
PH
4038
4039
a8795327 4040TV_PARENTAL_GUIDELINES = {
5a16c9d9
RA
4041 'TV-Y': 0,
4042 'TV-Y7': 7,
4043 'TV-G': 0,
4044 'TV-PG': 0,
4045 'TV-14': 14,
4046 'TV-MA': 17,
a8795327
S
4047}
4048
4049
146c80e2 4050def parse_age_limit(s):
a8795327
S
4051 if type(s) == int:
4052 return s if 0 <= s <= 21 else None
4053 if not isinstance(s, compat_basestring):
d838b1bd 4054 return None
146c80e2 4055 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327
S
4056 if m:
4057 return int(m.group('age'))
4058 if s in US_RATINGS:
4059 return US_RATINGS[s]
5a16c9d9 4060 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
b8361187 4061 if m:
5a16c9d9 4062 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
b8361187 4063 return None
146c80e2
S
4064
4065
fac55558 4066def strip_jsonp(code):
609a61e3 4067 return re.sub(
5552c9eb 4068 r'''(?sx)^
e9c671d5 4069 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
5552c9eb
YCH
4070 (?:\s*&&\s*(?P=func_name))?
4071 \s*\(\s*(?P<callback_data>.*)\);?
4072 \s*?(?://[^\n]*)*$''',
4073 r'\g<callback_data>', code)
478c2c61
PH
4074
4075
e05f6939 4076def js_to_json(code):
4195096e
S
4077 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4078 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4079 INTEGER_TABLE = (
4080 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4081 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4082 )
4083
e05f6939 4084 def fix_kv(m):
e7b6d122
PH
4085 v = m.group(0)
4086 if v in ('true', 'false', 'null'):
4087 return v
8bdd16b4 4088 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
bd1e4844 4089 return ""
4090
4091 if v[0] in ("'", '"'):
4092 v = re.sub(r'(?s)\\.|"', lambda m: {
e7b6d122 4093 '"': '\\"',
bd1e4844 4094 "\\'": "'",
4095 '\\\n': '',
4096 '\\x': '\\u00',
4097 }.get(m.group(0), m.group(0)), v[1:-1])
8bdd16b4 4098 else:
4099 for regex, base in INTEGER_TABLE:
4100 im = re.match(regex, v)
4101 if im:
4102 i = int(im.group(1), base)
4103 return '"%d":' % i if v.endswith(':') else '%d' % i
89ac4a19 4104
e7b6d122 4105 return '"%s"' % v
e05f6939 4106
bd1e4844 4107 return re.sub(r'''(?sx)
4108 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4109 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4195096e 4110 {comment}|,(?={skip}[\]}}])|
c384d537 4111 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4195096e 4112 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
8bdd16b4 4113 [0-9]+(?={skip}:)|
4114 !+
4195096e 4115 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
e05f6939
PH
4116
4117
478c2c61
PH
4118def qualities(quality_ids):
4119 """ Get a numeric quality value out of a list of possible values """
4120 def q(qid):
4121 try:
4122 return quality_ids.index(qid)
4123 except ValueError:
4124 return -1
4125 return q
4126
acd69589
PH
4127
4128DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
0a871f68 4129
a020a0dc
PH
4130
4131def limit_length(s, length):
4132 """ Add ellipses to overly long strings """
4133 if s is None:
4134 return None
4135 ELLIPSES = '...'
4136 if len(s) > length:
4137 return s[:length - len(ELLIPSES)] + ELLIPSES
4138 return s
48844745
PH
4139
4140
4141def version_tuple(v):
5f9b8394 4142 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
4143
4144
4145def is_outdated_version(version, limit, assume_new=True):
4146 if not version:
4147 return not assume_new
4148 try:
4149 return version_tuple(version) < version_tuple(limit)
4150 except ValueError:
4151 return not assume_new
732ea2f0
PH
4152
4153
4154def ytdl_is_updateable():
cefecac1 4155 """ Returns if youtube-dlc can be updated with -U """
732ea2f0
PH
4156 from zipimport import zipimporter
4157
4158 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
4159
4160
4161def args_to_str(args):
4162 # Get a short string representation for a subprocess command
702ccf2d 4163 return ' '.join(compat_shlex_quote(a) for a in args)
2ccd1b10
PH
4164
4165
9b9c5355 4166def error_to_compat_str(err):
fdae2358
S
4167 err_str = str(err)
4168 # On python 2 error byte string must be decoded with proper
4169 # encoding rather than ascii
4170 if sys.version_info[0] < 3:
4171 err_str = err_str.decode(preferredencoding())
4172 return err_str
4173
4174
c460bdd5 4175def mimetype2ext(mt):
eb9ee194
S
4176 if mt is None:
4177 return None
4178
765ac263
JMF
4179 ext = {
4180 'audio/mp4': 'm4a',
6c33d24b
YCH
4181 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4182 # it's the most popular one
4183 'audio/mpeg': 'mp3',
ba39289d 4184 'audio/x-wav': 'wav',
765ac263
JMF
4185 }.get(mt)
4186 if ext is not None:
4187 return ext
4188
c460bdd5 4189 _, _, res = mt.rpartition('/')
6562d34a 4190 res = res.split(';')[0].strip().lower()
c460bdd5
PH
4191
4192 return {
f6861ec9 4193 '3gpp': '3gp',
cafcf657 4194 'smptett+xml': 'tt',
cafcf657 4195 'ttaf+xml': 'dfxp',
a0d8d704 4196 'ttml+xml': 'ttml',
f6861ec9 4197 'x-flv': 'flv',
a0d8d704 4198 'x-mp4-fragmented': 'mp4',
d4f05d47 4199 'x-ms-sami': 'sami',
a0d8d704 4200 'x-ms-wmv': 'wmv',
b4173f15
RA
4201 'mpegurl': 'm3u8',
4202 'x-mpegurl': 'm3u8',
4203 'vnd.apple.mpegurl': 'm3u8',
4204 'dash+xml': 'mpd',
b4173f15 4205 'f4m+xml': 'f4m',
f164b971 4206 'hds+xml': 'f4m',
e910fe2f 4207 'vnd.ms-sstr+xml': 'ism',
c2b2c7e1 4208 'quicktime': 'mov',
98ce1a3f 4209 'mp2t': 'ts',
39e7107d 4210 'x-wav': 'wav',
c460bdd5
PH
4211 }.get(res, res)
4212
4213
4f3c5e06 4214def parse_codecs(codecs_str):
4215 # http://tools.ietf.org/html/rfc6381
4216 if not codecs_str:
4217 return {}
a0566bbf 4218 split_codecs = list(filter(None, map(
4f3c5e06 4219 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4220 vcodec, acodec = None, None
a0566bbf 4221 for full_codec in split_codecs:
4f3c5e06 4222 codec = full_codec.split('.')[0]
28cc2241 4223 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4f3c5e06 4224 if not vcodec:
4225 vcodec = full_codec
60f5c9fb 4226 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4f3c5e06 4227 if not acodec:
4228 acodec = full_codec
4229 else:
60f5c9fb 4230 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4f3c5e06 4231 if not vcodec and not acodec:
a0566bbf 4232 if len(split_codecs) == 2:
4f3c5e06 4233 return {
a0566bbf 4234 'vcodec': split_codecs[0],
4235 'acodec': split_codecs[1],
4f3c5e06 4236 }
4237 else:
4238 return {
4239 'vcodec': vcodec or 'none',
4240 'acodec': acodec or 'none',
4241 }
4242 return {}
4243
4244
2ccd1b10 4245def urlhandle_detect_ext(url_handle):
79298173 4246 getheader = url_handle.headers.get
2ccd1b10 4247
b55ee18f
PH
4248 cd = getheader('Content-Disposition')
4249 if cd:
4250 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4251 if m:
4252 e = determine_ext(m.group('filename'), default_ext=None)
4253 if e:
4254 return e
4255
c460bdd5 4256 return mimetype2ext(getheader('Content-Type'))
05900629
PH
4257
4258
1e399778
YCH
4259def encode_data_uri(data, mime_type):
4260 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4261
4262
05900629 4263def age_restricted(content_limit, age_limit):
6ec6cb4e 4264 """ Returns True iff the content should be blocked """
05900629
PH
4265
4266 if age_limit is None: # No limit set
4267 return False
4268 if content_limit is None:
4269 return False # Content available for everyone
4270 return age_limit < content_limit
61ca9a80
PH
4271
4272
4273def is_html(first_bytes):
4274 """ Detect whether a file contains HTML by examining its first bytes. """
4275
4276 BOMS = [
4277 (b'\xef\xbb\xbf', 'utf-8'),
4278 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4279 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4280 (b'\xff\xfe', 'utf-16-le'),
4281 (b'\xfe\xff', 'utf-16-be'),
4282 ]
4283 for bom, enc in BOMS:
4284 if first_bytes.startswith(bom):
4285 s = first_bytes[len(bom):].decode(enc, 'replace')
4286 break
4287 else:
4288 s = first_bytes.decode('utf-8', 'replace')
4289
4290 return re.match(r'^\s*<', s)
a055469f
PH
4291
4292
4293def determine_protocol(info_dict):
4294 protocol = info_dict.get('protocol')
4295 if protocol is not None:
4296 return protocol
4297
4298 url = info_dict['url']
4299 if url.startswith('rtmp'):
4300 return 'rtmp'
4301 elif url.startswith('mms'):
4302 return 'mms'
4303 elif url.startswith('rtsp'):
4304 return 'rtsp'
4305
4306 ext = determine_ext(url)
4307 if ext == 'm3u8':
4308 return 'm3u8'
4309 elif ext == 'f4m':
4310 return 'f4m'
4311
4312 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
4313
4314
4315def render_table(header_row, data):
4316 """ Render a list of rows, each as a list of values """
4317 table = [header_row] + data
4318 max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4319 format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
4320 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
4321
4322
4323def _match_one(filter_part, dct):
4324 COMPARISON_OPERATORS = {
4325 '<': operator.lt,
4326 '<=': operator.le,
4327 '>': operator.gt,
4328 '>=': operator.ge,
4329 '=': operator.eq,
4330 '!=': operator.ne,
4331 }
4332 operator_rex = re.compile(r'''(?x)\s*
4333 (?P<key>[a-z_]+)
4334 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4335 (?:
4336 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
db13c16e 4337 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
347de493
PH
4338 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4339 )
4340 \s*$
4341 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4342 m = operator_rex.search(filter_part)
4343 if m:
4344 op = COMPARISON_OPERATORS[m.group('op')]
e5a088dc 4345 actual_value = dct.get(m.group('key'))
3089bc74
S
4346 if (m.group('quotedstrval') is not None
4347 or m.group('strval') is not None
e5a088dc
S
4348 # If the original field is a string and matching comparisonvalue is
4349 # a number we should respect the origin of the original field
4350 # and process comparison value as a string (see
067aa17e 4351 # https://github.com/ytdl-org/youtube-dl/issues/11082).
3089bc74
S
4352 or actual_value is not None and m.group('intval') is not None
4353 and isinstance(actual_value, compat_str)):
347de493
PH
4354 if m.group('op') not in ('=', '!='):
4355 raise ValueError(
4356 'Operator %s does not support string values!' % m.group('op'))
db13c16e
S
4357 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4358 quote = m.group('quote')
4359 if quote is not None:
4360 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
347de493
PH
4361 else:
4362 try:
4363 comparison_value = int(m.group('intval'))
4364 except ValueError:
4365 comparison_value = parse_filesize(m.group('intval'))
4366 if comparison_value is None:
4367 comparison_value = parse_filesize(m.group('intval') + 'B')
4368 if comparison_value is None:
4369 raise ValueError(
4370 'Invalid integer value %r in filter part %r' % (
4371 m.group('intval'), filter_part))
347de493
PH
4372 if actual_value is None:
4373 return m.group('none_inclusive')
4374 return op(actual_value, comparison_value)
4375
4376 UNARY_OPERATORS = {
1cc47c66
S
4377 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4378 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
347de493
PH
4379 }
4380 operator_rex = re.compile(r'''(?x)\s*
4381 (?P<op>%s)\s*(?P<key>[a-z_]+)
4382 \s*$
4383 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4384 m = operator_rex.search(filter_part)
4385 if m:
4386 op = UNARY_OPERATORS[m.group('op')]
4387 actual_value = dct.get(m.group('key'))
4388 return op(actual_value)
4389
4390 raise ValueError('Invalid filter part %r' % filter_part)
4391
4392
4393def match_str(filter_str, dct):
4394 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4395
4396 return all(
4397 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4398
4399
4400def match_filter_func(filter_str):
4401 def _match_func(info_dict):
4402 if match_str(filter_str, info_dict):
4403 return None
4404 else:
4405 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4406 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4407 return _match_func
91410c9b
PH
4408
4409
bf6427d2
YCH
4410def parse_dfxp_time_expr(time_expr):
4411 if not time_expr:
d631d5f9 4412 return
bf6427d2
YCH
4413
4414 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4415 if mobj:
4416 return float(mobj.group('time_offset'))
4417
db2fe38b 4418 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 4419 if mobj:
db2fe38b 4420 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
4421
4422
c1c924ab
YCH
4423def srt_subtitles_timecode(seconds):
4424 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
4425
4426
4427def dfxp2srt(dfxp_data):
3869028f
YCH
4428 '''
4429 @param dfxp_data A bytes-like object containing DFXP data
4430 @returns A unicode object containing converted SRT data
4431 '''
5b995f71 4432 LEGACY_NAMESPACES = (
3869028f
YCH
4433 (b'http://www.w3.org/ns/ttml', [
4434 b'http://www.w3.org/2004/11/ttaf1',
4435 b'http://www.w3.org/2006/04/ttaf1',
4436 b'http://www.w3.org/2006/10/ttaf1',
5b995f71 4437 ]),
3869028f
YCH
4438 (b'http://www.w3.org/ns/ttml#styling', [
4439 b'http://www.w3.org/ns/ttml#style',
5b995f71
RA
4440 ]),
4441 )
4442
4443 SUPPORTED_STYLING = [
4444 'color',
4445 'fontFamily',
4446 'fontSize',
4447 'fontStyle',
4448 'fontWeight',
4449 'textDecoration'
4450 ]
4451
4e335771 4452 _x = functools.partial(xpath_with_ns, ns_map={
261f4730 4453 'xml': 'http://www.w3.org/XML/1998/namespace',
4e335771 4454 'ttml': 'http://www.w3.org/ns/ttml',
5b995f71 4455 'tts': 'http://www.w3.org/ns/ttml#styling',
4e335771 4456 })
bf6427d2 4457
5b995f71
RA
4458 styles = {}
4459 default_style = {}
4460
87de7069 4461 class TTMLPElementParser(object):
5b995f71
RA
4462 _out = ''
4463 _unclosed_elements = []
4464 _applied_styles = []
bf6427d2 4465
2b14cb56 4466 def start(self, tag, attrib):
5b995f71
RA
4467 if tag in (_x('ttml:br'), 'br'):
4468 self._out += '\n'
4469 else:
4470 unclosed_elements = []
4471 style = {}
4472 element_style_id = attrib.get('style')
4473 if default_style:
4474 style.update(default_style)
4475 if element_style_id:
4476 style.update(styles.get(element_style_id, {}))
4477 for prop in SUPPORTED_STYLING:
4478 prop_val = attrib.get(_x('tts:' + prop))
4479 if prop_val:
4480 style[prop] = prop_val
4481 if style:
4482 font = ''
4483 for k, v in sorted(style.items()):
4484 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4485 continue
4486 if k == 'color':
4487 font += ' color="%s"' % v
4488 elif k == 'fontSize':
4489 font += ' size="%s"' % v
4490 elif k == 'fontFamily':
4491 font += ' face="%s"' % v
4492 elif k == 'fontWeight' and v == 'bold':
4493 self._out += '<b>'
4494 unclosed_elements.append('b')
4495 elif k == 'fontStyle' and v == 'italic':
4496 self._out += '<i>'
4497 unclosed_elements.append('i')
4498 elif k == 'textDecoration' and v == 'underline':
4499 self._out += '<u>'
4500 unclosed_elements.append('u')
4501 if font:
4502 self._out += '<font' + font + '>'
4503 unclosed_elements.append('font')
4504 applied_style = {}
4505 if self._applied_styles:
4506 applied_style.update(self._applied_styles[-1])
4507 applied_style.update(style)
4508 self._applied_styles.append(applied_style)
4509 self._unclosed_elements.append(unclosed_elements)
bf6427d2 4510
2b14cb56 4511 def end(self, tag):
5b995f71
RA
4512 if tag not in (_x('ttml:br'), 'br'):
4513 unclosed_elements = self._unclosed_elements.pop()
4514 for element in reversed(unclosed_elements):
4515 self._out += '</%s>' % element
4516 if unclosed_elements and self._applied_styles:
4517 self._applied_styles.pop()
bf6427d2 4518
2b14cb56 4519 def data(self, data):
5b995f71 4520 self._out += data
2b14cb56 4521
4522 def close(self):
5b995f71 4523 return self._out.strip()
2b14cb56 4524
4525 def parse_node(node):
4526 target = TTMLPElementParser()
4527 parser = xml.etree.ElementTree.XMLParser(target=target)
4528 parser.feed(xml.etree.ElementTree.tostring(node))
4529 return parser.close()
bf6427d2 4530
5b995f71
RA
4531 for k, v in LEGACY_NAMESPACES:
4532 for ns in v:
4533 dfxp_data = dfxp_data.replace(ns, k)
4534
3869028f 4535 dfxp = compat_etree_fromstring(dfxp_data)
bf6427d2 4536 out = []
5b995f71 4537 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
1b0427e6
YCH
4538
4539 if not paras:
4540 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2 4541
5b995f71
RA
4542 repeat = False
4543 while True:
4544 for style in dfxp.findall(_x('.//ttml:style')):
261f4730
RA
4545 style_id = style.get('id') or style.get(_x('xml:id'))
4546 if not style_id:
4547 continue
5b995f71
RA
4548 parent_style_id = style.get('style')
4549 if parent_style_id:
4550 if parent_style_id not in styles:
4551 repeat = True
4552 continue
4553 styles[style_id] = styles[parent_style_id].copy()
4554 for prop in SUPPORTED_STYLING:
4555 prop_val = style.get(_x('tts:' + prop))
4556 if prop_val:
4557 styles.setdefault(style_id, {})[prop] = prop_val
4558 if repeat:
4559 repeat = False
4560 else:
4561 break
4562
4563 for p in ('body', 'div'):
4564 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4565 if ele is None:
4566 continue
4567 style = styles.get(ele.get('style'))
4568 if not style:
4569 continue
4570 default_style.update(style)
4571
bf6427d2 4572 for para, index in zip(paras, itertools.count(1)):
d631d5f9 4573 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 4574 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
4575 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4576 if begin_time is None:
4577 continue
7dff0363 4578 if not end_time:
d631d5f9
YCH
4579 if not dur:
4580 continue
4581 end_time = begin_time + dur
bf6427d2
YCH
4582 out.append('%d\n%s --> %s\n%s\n\n' % (
4583 index,
c1c924ab
YCH
4584 srt_subtitles_timecode(begin_time),
4585 srt_subtitles_timecode(end_time),
bf6427d2
YCH
4586 parse_node(para)))
4587
4588 return ''.join(out)
4589
4590
66e289ba
S
4591def cli_option(params, command_option, param):
4592 param = params.get(param)
98e698f1
RA
4593 if param:
4594 param = compat_str(param)
66e289ba
S
4595 return [command_option, param] if param is not None else []
4596
4597
4598def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4599 param = params.get(param)
5b232f46
S
4600 if param is None:
4601 return []
66e289ba
S
4602 assert isinstance(param, bool)
4603 if separator:
4604 return [command_option + separator + (true_value if param else false_value)]
4605 return [command_option, true_value if param else false_value]
4606
4607
4608def cli_valueless_option(params, command_option, param, expected_value=True):
4609 param = params.get(param)
4610 return [command_option] if param == expected_value else []
4611
4612
4613def cli_configuration_args(params, param, default=[]):
4614 ex_args = params.get(param)
4615 if ex_args is None:
4616 return default
4617 assert isinstance(ex_args, list)
4618 return ex_args
4619
4620
39672624
YCH
4621class ISO639Utils(object):
4622 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4623 _lang_map = {
4624 'aa': 'aar',
4625 'ab': 'abk',
4626 'ae': 'ave',
4627 'af': 'afr',
4628 'ak': 'aka',
4629 'am': 'amh',
4630 'an': 'arg',
4631 'ar': 'ara',
4632 'as': 'asm',
4633 'av': 'ava',
4634 'ay': 'aym',
4635 'az': 'aze',
4636 'ba': 'bak',
4637 'be': 'bel',
4638 'bg': 'bul',
4639 'bh': 'bih',
4640 'bi': 'bis',
4641 'bm': 'bam',
4642 'bn': 'ben',
4643 'bo': 'bod',
4644 'br': 'bre',
4645 'bs': 'bos',
4646 'ca': 'cat',
4647 'ce': 'che',
4648 'ch': 'cha',
4649 'co': 'cos',
4650 'cr': 'cre',
4651 'cs': 'ces',
4652 'cu': 'chu',
4653 'cv': 'chv',
4654 'cy': 'cym',
4655 'da': 'dan',
4656 'de': 'deu',
4657 'dv': 'div',
4658 'dz': 'dzo',
4659 'ee': 'ewe',
4660 'el': 'ell',
4661 'en': 'eng',
4662 'eo': 'epo',
4663 'es': 'spa',
4664 'et': 'est',
4665 'eu': 'eus',
4666 'fa': 'fas',
4667 'ff': 'ful',
4668 'fi': 'fin',
4669 'fj': 'fij',
4670 'fo': 'fao',
4671 'fr': 'fra',
4672 'fy': 'fry',
4673 'ga': 'gle',
4674 'gd': 'gla',
4675 'gl': 'glg',
4676 'gn': 'grn',
4677 'gu': 'guj',
4678 'gv': 'glv',
4679 'ha': 'hau',
4680 'he': 'heb',
b7acc835 4681 'iw': 'heb', # Replaced by he in 1989 revision
39672624
YCH
4682 'hi': 'hin',
4683 'ho': 'hmo',
4684 'hr': 'hrv',
4685 'ht': 'hat',
4686 'hu': 'hun',
4687 'hy': 'hye',
4688 'hz': 'her',
4689 'ia': 'ina',
4690 'id': 'ind',
b7acc835 4691 'in': 'ind', # Replaced by id in 1989 revision
39672624
YCH
4692 'ie': 'ile',
4693 'ig': 'ibo',
4694 'ii': 'iii',
4695 'ik': 'ipk',
4696 'io': 'ido',
4697 'is': 'isl',
4698 'it': 'ita',
4699 'iu': 'iku',
4700 'ja': 'jpn',
4701 'jv': 'jav',
4702 'ka': 'kat',
4703 'kg': 'kon',
4704 'ki': 'kik',
4705 'kj': 'kua',
4706 'kk': 'kaz',
4707 'kl': 'kal',
4708 'km': 'khm',
4709 'kn': 'kan',
4710 'ko': 'kor',
4711 'kr': 'kau',
4712 'ks': 'kas',
4713 'ku': 'kur',
4714 'kv': 'kom',
4715 'kw': 'cor',
4716 'ky': 'kir',
4717 'la': 'lat',
4718 'lb': 'ltz',
4719 'lg': 'lug',
4720 'li': 'lim',
4721 'ln': 'lin',
4722 'lo': 'lao',
4723 'lt': 'lit',
4724 'lu': 'lub',
4725 'lv': 'lav',
4726 'mg': 'mlg',
4727 'mh': 'mah',
4728 'mi': 'mri',
4729 'mk': 'mkd',
4730 'ml': 'mal',
4731 'mn': 'mon',
4732 'mr': 'mar',
4733 'ms': 'msa',
4734 'mt': 'mlt',
4735 'my': 'mya',
4736 'na': 'nau',
4737 'nb': 'nob',
4738 'nd': 'nde',
4739 'ne': 'nep',
4740 'ng': 'ndo',
4741 'nl': 'nld',
4742 'nn': 'nno',
4743 'no': 'nor',
4744 'nr': 'nbl',
4745 'nv': 'nav',
4746 'ny': 'nya',
4747 'oc': 'oci',
4748 'oj': 'oji',
4749 'om': 'orm',
4750 'or': 'ori',
4751 'os': 'oss',
4752 'pa': 'pan',
4753 'pi': 'pli',
4754 'pl': 'pol',
4755 'ps': 'pus',
4756 'pt': 'por',
4757 'qu': 'que',
4758 'rm': 'roh',
4759 'rn': 'run',
4760 'ro': 'ron',
4761 'ru': 'rus',
4762 'rw': 'kin',
4763 'sa': 'san',
4764 'sc': 'srd',
4765 'sd': 'snd',
4766 'se': 'sme',
4767 'sg': 'sag',
4768 'si': 'sin',
4769 'sk': 'slk',
4770 'sl': 'slv',
4771 'sm': 'smo',
4772 'sn': 'sna',
4773 'so': 'som',
4774 'sq': 'sqi',
4775 'sr': 'srp',
4776 'ss': 'ssw',
4777 'st': 'sot',
4778 'su': 'sun',
4779 'sv': 'swe',
4780 'sw': 'swa',
4781 'ta': 'tam',
4782 'te': 'tel',
4783 'tg': 'tgk',
4784 'th': 'tha',
4785 'ti': 'tir',
4786 'tk': 'tuk',
4787 'tl': 'tgl',
4788 'tn': 'tsn',
4789 'to': 'ton',
4790 'tr': 'tur',
4791 'ts': 'tso',
4792 'tt': 'tat',
4793 'tw': 'twi',
4794 'ty': 'tah',
4795 'ug': 'uig',
4796 'uk': 'ukr',
4797 'ur': 'urd',
4798 'uz': 'uzb',
4799 've': 'ven',
4800 'vi': 'vie',
4801 'vo': 'vol',
4802 'wa': 'wln',
4803 'wo': 'wol',
4804 'xh': 'xho',
4805 'yi': 'yid',
e9a50fba 4806 'ji': 'yid', # Replaced by yi in 1989 revision
39672624
YCH
4807 'yo': 'yor',
4808 'za': 'zha',
4809 'zh': 'zho',
4810 'zu': 'zul',
4811 }
4812
4813 @classmethod
4814 def short2long(cls, code):
4815 """Convert language code from ISO 639-1 to ISO 639-2/T"""
4816 return cls._lang_map.get(code[:2])
4817
4818 @classmethod
4819 def long2short(cls, code):
4820 """Convert language code from ISO 639-2/T to ISO 639-1"""
4821 for short_name, long_name in cls._lang_map.items():
4822 if long_name == code:
4823 return short_name
4824
4825
4eb10f66
YCH
4826class ISO3166Utils(object):
4827 # From http://data.okfn.org/data/core/country-list
4828 _country_map = {
4829 'AF': 'Afghanistan',
4830 'AX': 'Åland Islands',
4831 'AL': 'Albania',
4832 'DZ': 'Algeria',
4833 'AS': 'American Samoa',
4834 'AD': 'Andorra',
4835 'AO': 'Angola',
4836 'AI': 'Anguilla',
4837 'AQ': 'Antarctica',
4838 'AG': 'Antigua and Barbuda',
4839 'AR': 'Argentina',
4840 'AM': 'Armenia',
4841 'AW': 'Aruba',
4842 'AU': 'Australia',
4843 'AT': 'Austria',
4844 'AZ': 'Azerbaijan',
4845 'BS': 'Bahamas',
4846 'BH': 'Bahrain',
4847 'BD': 'Bangladesh',
4848 'BB': 'Barbados',
4849 'BY': 'Belarus',
4850 'BE': 'Belgium',
4851 'BZ': 'Belize',
4852 'BJ': 'Benin',
4853 'BM': 'Bermuda',
4854 'BT': 'Bhutan',
4855 'BO': 'Bolivia, Plurinational State of',
4856 'BQ': 'Bonaire, Sint Eustatius and Saba',
4857 'BA': 'Bosnia and Herzegovina',
4858 'BW': 'Botswana',
4859 'BV': 'Bouvet Island',
4860 'BR': 'Brazil',
4861 'IO': 'British Indian Ocean Territory',
4862 'BN': 'Brunei Darussalam',
4863 'BG': 'Bulgaria',
4864 'BF': 'Burkina Faso',
4865 'BI': 'Burundi',
4866 'KH': 'Cambodia',
4867 'CM': 'Cameroon',
4868 'CA': 'Canada',
4869 'CV': 'Cape Verde',
4870 'KY': 'Cayman Islands',
4871 'CF': 'Central African Republic',
4872 'TD': 'Chad',
4873 'CL': 'Chile',
4874 'CN': 'China',
4875 'CX': 'Christmas Island',
4876 'CC': 'Cocos (Keeling) Islands',
4877 'CO': 'Colombia',
4878 'KM': 'Comoros',
4879 'CG': 'Congo',
4880 'CD': 'Congo, the Democratic Republic of the',
4881 'CK': 'Cook Islands',
4882 'CR': 'Costa Rica',
4883 'CI': 'Côte d\'Ivoire',
4884 'HR': 'Croatia',
4885 'CU': 'Cuba',
4886 'CW': 'Curaçao',
4887 'CY': 'Cyprus',
4888 'CZ': 'Czech Republic',
4889 'DK': 'Denmark',
4890 'DJ': 'Djibouti',
4891 'DM': 'Dominica',
4892 'DO': 'Dominican Republic',
4893 'EC': 'Ecuador',
4894 'EG': 'Egypt',
4895 'SV': 'El Salvador',
4896 'GQ': 'Equatorial Guinea',
4897 'ER': 'Eritrea',
4898 'EE': 'Estonia',
4899 'ET': 'Ethiopia',
4900 'FK': 'Falkland Islands (Malvinas)',
4901 'FO': 'Faroe Islands',
4902 'FJ': 'Fiji',
4903 'FI': 'Finland',
4904 'FR': 'France',
4905 'GF': 'French Guiana',
4906 'PF': 'French Polynesia',
4907 'TF': 'French Southern Territories',
4908 'GA': 'Gabon',
4909 'GM': 'Gambia',
4910 'GE': 'Georgia',
4911 'DE': 'Germany',
4912 'GH': 'Ghana',
4913 'GI': 'Gibraltar',
4914 'GR': 'Greece',
4915 'GL': 'Greenland',
4916 'GD': 'Grenada',
4917 'GP': 'Guadeloupe',
4918 'GU': 'Guam',
4919 'GT': 'Guatemala',
4920 'GG': 'Guernsey',
4921 'GN': 'Guinea',
4922 'GW': 'Guinea-Bissau',
4923 'GY': 'Guyana',
4924 'HT': 'Haiti',
4925 'HM': 'Heard Island and McDonald Islands',
4926 'VA': 'Holy See (Vatican City State)',
4927 'HN': 'Honduras',
4928 'HK': 'Hong Kong',
4929 'HU': 'Hungary',
4930 'IS': 'Iceland',
4931 'IN': 'India',
4932 'ID': 'Indonesia',
4933 'IR': 'Iran, Islamic Republic of',
4934 'IQ': 'Iraq',
4935 'IE': 'Ireland',
4936 'IM': 'Isle of Man',
4937 'IL': 'Israel',
4938 'IT': 'Italy',
4939 'JM': 'Jamaica',
4940 'JP': 'Japan',
4941 'JE': 'Jersey',
4942 'JO': 'Jordan',
4943 'KZ': 'Kazakhstan',
4944 'KE': 'Kenya',
4945 'KI': 'Kiribati',
4946 'KP': 'Korea, Democratic People\'s Republic of',
4947 'KR': 'Korea, Republic of',
4948 'KW': 'Kuwait',
4949 'KG': 'Kyrgyzstan',
4950 'LA': 'Lao People\'s Democratic Republic',
4951 'LV': 'Latvia',
4952 'LB': 'Lebanon',
4953 'LS': 'Lesotho',
4954 'LR': 'Liberia',
4955 'LY': 'Libya',
4956 'LI': 'Liechtenstein',
4957 'LT': 'Lithuania',
4958 'LU': 'Luxembourg',
4959 'MO': 'Macao',
4960 'MK': 'Macedonia, the Former Yugoslav Republic of',
4961 'MG': 'Madagascar',
4962 'MW': 'Malawi',
4963 'MY': 'Malaysia',
4964 'MV': 'Maldives',
4965 'ML': 'Mali',
4966 'MT': 'Malta',
4967 'MH': 'Marshall Islands',
4968 'MQ': 'Martinique',
4969 'MR': 'Mauritania',
4970 'MU': 'Mauritius',
4971 'YT': 'Mayotte',
4972 'MX': 'Mexico',
4973 'FM': 'Micronesia, Federated States of',
4974 'MD': 'Moldova, Republic of',
4975 'MC': 'Monaco',
4976 'MN': 'Mongolia',
4977 'ME': 'Montenegro',
4978 'MS': 'Montserrat',
4979 'MA': 'Morocco',
4980 'MZ': 'Mozambique',
4981 'MM': 'Myanmar',
4982 'NA': 'Namibia',
4983 'NR': 'Nauru',
4984 'NP': 'Nepal',
4985 'NL': 'Netherlands',
4986 'NC': 'New Caledonia',
4987 'NZ': 'New Zealand',
4988 'NI': 'Nicaragua',
4989 'NE': 'Niger',
4990 'NG': 'Nigeria',
4991 'NU': 'Niue',
4992 'NF': 'Norfolk Island',
4993 'MP': 'Northern Mariana Islands',
4994 'NO': 'Norway',
4995 'OM': 'Oman',
4996 'PK': 'Pakistan',
4997 'PW': 'Palau',
4998 'PS': 'Palestine, State of',
4999 'PA': 'Panama',
5000 'PG': 'Papua New Guinea',
5001 'PY': 'Paraguay',
5002 'PE': 'Peru',
5003 'PH': 'Philippines',
5004 'PN': 'Pitcairn',
5005 'PL': 'Poland',
5006 'PT': 'Portugal',
5007 'PR': 'Puerto Rico',
5008 'QA': 'Qatar',
5009 'RE': 'Réunion',
5010 'RO': 'Romania',
5011 'RU': 'Russian Federation',
5012 'RW': 'Rwanda',
5013 'BL': 'Saint Barthélemy',
5014 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5015 'KN': 'Saint Kitts and Nevis',
5016 'LC': 'Saint Lucia',
5017 'MF': 'Saint Martin (French part)',
5018 'PM': 'Saint Pierre and Miquelon',
5019 'VC': 'Saint Vincent and the Grenadines',
5020 'WS': 'Samoa',
5021 'SM': 'San Marino',
5022 'ST': 'Sao Tome and Principe',
5023 'SA': 'Saudi Arabia',
5024 'SN': 'Senegal',
5025 'RS': 'Serbia',
5026 'SC': 'Seychelles',
5027 'SL': 'Sierra Leone',
5028 'SG': 'Singapore',
5029 'SX': 'Sint Maarten (Dutch part)',
5030 'SK': 'Slovakia',
5031 'SI': 'Slovenia',
5032 'SB': 'Solomon Islands',
5033 'SO': 'Somalia',
5034 'ZA': 'South Africa',
5035 'GS': 'South Georgia and the South Sandwich Islands',
5036 'SS': 'South Sudan',
5037 'ES': 'Spain',
5038 'LK': 'Sri Lanka',
5039 'SD': 'Sudan',
5040 'SR': 'Suriname',
5041 'SJ': 'Svalbard and Jan Mayen',
5042 'SZ': 'Swaziland',
5043 'SE': 'Sweden',
5044 'CH': 'Switzerland',
5045 'SY': 'Syrian Arab Republic',
5046 'TW': 'Taiwan, Province of China',
5047 'TJ': 'Tajikistan',
5048 'TZ': 'Tanzania, United Republic of',
5049 'TH': 'Thailand',
5050 'TL': 'Timor-Leste',
5051 'TG': 'Togo',
5052 'TK': 'Tokelau',
5053 'TO': 'Tonga',
5054 'TT': 'Trinidad and Tobago',
5055 'TN': 'Tunisia',
5056 'TR': 'Turkey',
5057 'TM': 'Turkmenistan',
5058 'TC': 'Turks and Caicos Islands',
5059 'TV': 'Tuvalu',
5060 'UG': 'Uganda',
5061 'UA': 'Ukraine',
5062 'AE': 'United Arab Emirates',
5063 'GB': 'United Kingdom',
5064 'US': 'United States',
5065 'UM': 'United States Minor Outlying Islands',
5066 'UY': 'Uruguay',
5067 'UZ': 'Uzbekistan',
5068 'VU': 'Vanuatu',
5069 'VE': 'Venezuela, Bolivarian Republic of',
5070 'VN': 'Viet Nam',
5071 'VG': 'Virgin Islands, British',
5072 'VI': 'Virgin Islands, U.S.',
5073 'WF': 'Wallis and Futuna',
5074 'EH': 'Western Sahara',
5075 'YE': 'Yemen',
5076 'ZM': 'Zambia',
5077 'ZW': 'Zimbabwe',
5078 }
5079
5080 @classmethod
5081 def short2full(cls, code):
5082 """Convert an ISO 3166-2 country code to the corresponding full name"""
5083 return cls._country_map.get(code.upper())
5084
5085
773f291d
S
5086class GeoUtils(object):
5087 # Major IPv4 address blocks per country
5088 _country_ip_map = {
53896ca5 5089 'AD': '46.172.224.0/19',
773f291d
S
5090 'AE': '94.200.0.0/13',
5091 'AF': '149.54.0.0/17',
5092 'AG': '209.59.64.0/18',
5093 'AI': '204.14.248.0/21',
5094 'AL': '46.99.0.0/16',
5095 'AM': '46.70.0.0/15',
5096 'AO': '105.168.0.0/13',
53896ca5
S
5097 'AP': '182.50.184.0/21',
5098 'AQ': '23.154.160.0/24',
773f291d
S
5099 'AR': '181.0.0.0/12',
5100 'AS': '202.70.112.0/20',
53896ca5 5101 'AT': '77.116.0.0/14',
773f291d
S
5102 'AU': '1.128.0.0/11',
5103 'AW': '181.41.0.0/18',
53896ca5
S
5104 'AX': '185.217.4.0/22',
5105 'AZ': '5.197.0.0/16',
773f291d
S
5106 'BA': '31.176.128.0/17',
5107 'BB': '65.48.128.0/17',
5108 'BD': '114.130.0.0/16',
5109 'BE': '57.0.0.0/8',
53896ca5 5110 'BF': '102.178.0.0/15',
773f291d
S
5111 'BG': '95.42.0.0/15',
5112 'BH': '37.131.0.0/17',
5113 'BI': '154.117.192.0/18',
5114 'BJ': '137.255.0.0/16',
53896ca5 5115 'BL': '185.212.72.0/23',
773f291d
S
5116 'BM': '196.12.64.0/18',
5117 'BN': '156.31.0.0/16',
5118 'BO': '161.56.0.0/16',
5119 'BQ': '161.0.80.0/20',
53896ca5 5120 'BR': '191.128.0.0/12',
773f291d
S
5121 'BS': '24.51.64.0/18',
5122 'BT': '119.2.96.0/19',
5123 'BW': '168.167.0.0/16',
5124 'BY': '178.120.0.0/13',
5125 'BZ': '179.42.192.0/18',
5126 'CA': '99.224.0.0/11',
5127 'CD': '41.243.0.0/16',
53896ca5
S
5128 'CF': '197.242.176.0/21',
5129 'CG': '160.113.0.0/16',
773f291d 5130 'CH': '85.0.0.0/13',
53896ca5 5131 'CI': '102.136.0.0/14',
773f291d
S
5132 'CK': '202.65.32.0/19',
5133 'CL': '152.172.0.0/14',
53896ca5 5134 'CM': '102.244.0.0/14',
773f291d
S
5135 'CN': '36.128.0.0/10',
5136 'CO': '181.240.0.0/12',
5137 'CR': '201.192.0.0/12',
5138 'CU': '152.206.0.0/15',
5139 'CV': '165.90.96.0/19',
5140 'CW': '190.88.128.0/17',
53896ca5 5141 'CY': '31.153.0.0/16',
773f291d
S
5142 'CZ': '88.100.0.0/14',
5143 'DE': '53.0.0.0/8',
5144 'DJ': '197.241.0.0/17',
5145 'DK': '87.48.0.0/12',
5146 'DM': '192.243.48.0/20',
5147 'DO': '152.166.0.0/15',
5148 'DZ': '41.96.0.0/12',
5149 'EC': '186.68.0.0/15',
5150 'EE': '90.190.0.0/15',
5151 'EG': '156.160.0.0/11',
5152 'ER': '196.200.96.0/20',
5153 'ES': '88.0.0.0/11',
5154 'ET': '196.188.0.0/14',
5155 'EU': '2.16.0.0/13',
5156 'FI': '91.152.0.0/13',
5157 'FJ': '144.120.0.0/16',
53896ca5 5158 'FK': '80.73.208.0/21',
773f291d
S
5159 'FM': '119.252.112.0/20',
5160 'FO': '88.85.32.0/19',
5161 'FR': '90.0.0.0/9',
5162 'GA': '41.158.0.0/15',
5163 'GB': '25.0.0.0/8',
5164 'GD': '74.122.88.0/21',
5165 'GE': '31.146.0.0/16',
5166 'GF': '161.22.64.0/18',
5167 'GG': '62.68.160.0/19',
53896ca5
S
5168 'GH': '154.160.0.0/12',
5169 'GI': '95.164.0.0/16',
773f291d
S
5170 'GL': '88.83.0.0/19',
5171 'GM': '160.182.0.0/15',
5172 'GN': '197.149.192.0/18',
5173 'GP': '104.250.0.0/19',
5174 'GQ': '105.235.224.0/20',
5175 'GR': '94.64.0.0/13',
5176 'GT': '168.234.0.0/16',
5177 'GU': '168.123.0.0/16',
5178 'GW': '197.214.80.0/20',
5179 'GY': '181.41.64.0/18',
5180 'HK': '113.252.0.0/14',
5181 'HN': '181.210.0.0/16',
5182 'HR': '93.136.0.0/13',
5183 'HT': '148.102.128.0/17',
5184 'HU': '84.0.0.0/14',
5185 'ID': '39.192.0.0/10',
5186 'IE': '87.32.0.0/12',
5187 'IL': '79.176.0.0/13',
5188 'IM': '5.62.80.0/20',
5189 'IN': '117.192.0.0/10',
5190 'IO': '203.83.48.0/21',
5191 'IQ': '37.236.0.0/14',
5192 'IR': '2.176.0.0/12',
5193 'IS': '82.221.0.0/16',
5194 'IT': '79.0.0.0/10',
5195 'JE': '87.244.64.0/18',
5196 'JM': '72.27.0.0/17',
5197 'JO': '176.29.0.0/16',
53896ca5 5198 'JP': '133.0.0.0/8',
773f291d
S
5199 'KE': '105.48.0.0/12',
5200 'KG': '158.181.128.0/17',
5201 'KH': '36.37.128.0/17',
5202 'KI': '103.25.140.0/22',
5203 'KM': '197.255.224.0/20',
53896ca5 5204 'KN': '198.167.192.0/19',
773f291d
S
5205 'KP': '175.45.176.0/22',
5206 'KR': '175.192.0.0/10',
5207 'KW': '37.36.0.0/14',
5208 'KY': '64.96.0.0/15',
5209 'KZ': '2.72.0.0/13',
5210 'LA': '115.84.64.0/18',
5211 'LB': '178.135.0.0/16',
53896ca5 5212 'LC': '24.92.144.0/20',
773f291d
S
5213 'LI': '82.117.0.0/19',
5214 'LK': '112.134.0.0/15',
53896ca5 5215 'LR': '102.183.0.0/16',
773f291d
S
5216 'LS': '129.232.0.0/17',
5217 'LT': '78.56.0.0/13',
5218 'LU': '188.42.0.0/16',
5219 'LV': '46.109.0.0/16',
5220 'LY': '41.252.0.0/14',
5221 'MA': '105.128.0.0/11',
5222 'MC': '88.209.64.0/18',
5223 'MD': '37.246.0.0/16',
5224 'ME': '178.175.0.0/17',
5225 'MF': '74.112.232.0/21',
5226 'MG': '154.126.0.0/17',
5227 'MH': '117.103.88.0/21',
5228 'MK': '77.28.0.0/15',
5229 'ML': '154.118.128.0/18',
5230 'MM': '37.111.0.0/17',
5231 'MN': '49.0.128.0/17',
5232 'MO': '60.246.0.0/16',
5233 'MP': '202.88.64.0/20',
5234 'MQ': '109.203.224.0/19',
5235 'MR': '41.188.64.0/18',
5236 'MS': '208.90.112.0/22',
5237 'MT': '46.11.0.0/16',
5238 'MU': '105.16.0.0/12',
5239 'MV': '27.114.128.0/18',
53896ca5 5240 'MW': '102.70.0.0/15',
773f291d
S
5241 'MX': '187.192.0.0/11',
5242 'MY': '175.136.0.0/13',
5243 'MZ': '197.218.0.0/15',
5244 'NA': '41.182.0.0/16',
5245 'NC': '101.101.0.0/18',
5246 'NE': '197.214.0.0/18',
5247 'NF': '203.17.240.0/22',
5248 'NG': '105.112.0.0/12',
5249 'NI': '186.76.0.0/15',
5250 'NL': '145.96.0.0/11',
5251 'NO': '84.208.0.0/13',
5252 'NP': '36.252.0.0/15',
5253 'NR': '203.98.224.0/19',
5254 'NU': '49.156.48.0/22',
5255 'NZ': '49.224.0.0/14',
5256 'OM': '5.36.0.0/15',
5257 'PA': '186.72.0.0/15',
5258 'PE': '186.160.0.0/14',
5259 'PF': '123.50.64.0/18',
5260 'PG': '124.240.192.0/19',
5261 'PH': '49.144.0.0/13',
5262 'PK': '39.32.0.0/11',
5263 'PL': '83.0.0.0/11',
5264 'PM': '70.36.0.0/20',
5265 'PR': '66.50.0.0/16',
5266 'PS': '188.161.0.0/16',
5267 'PT': '85.240.0.0/13',
5268 'PW': '202.124.224.0/20',
5269 'PY': '181.120.0.0/14',
5270 'QA': '37.210.0.0/15',
53896ca5 5271 'RE': '102.35.0.0/16',
773f291d 5272 'RO': '79.112.0.0/13',
53896ca5 5273 'RS': '93.86.0.0/15',
773f291d 5274 'RU': '5.136.0.0/13',
53896ca5 5275 'RW': '41.186.0.0/16',
773f291d
S
5276 'SA': '188.48.0.0/13',
5277 'SB': '202.1.160.0/19',
5278 'SC': '154.192.0.0/11',
53896ca5 5279 'SD': '102.120.0.0/13',
773f291d 5280 'SE': '78.64.0.0/12',
53896ca5 5281 'SG': '8.128.0.0/10',
773f291d
S
5282 'SI': '188.196.0.0/14',
5283 'SK': '78.98.0.0/15',
53896ca5 5284 'SL': '102.143.0.0/17',
773f291d
S
5285 'SM': '89.186.32.0/19',
5286 'SN': '41.82.0.0/15',
53896ca5 5287 'SO': '154.115.192.0/18',
773f291d
S
5288 'SR': '186.179.128.0/17',
5289 'SS': '105.235.208.0/21',
5290 'ST': '197.159.160.0/19',
5291 'SV': '168.243.0.0/16',
5292 'SX': '190.102.0.0/20',
5293 'SY': '5.0.0.0/16',
5294 'SZ': '41.84.224.0/19',
5295 'TC': '65.255.48.0/20',
5296 'TD': '154.68.128.0/19',
5297 'TG': '196.168.0.0/14',
5298 'TH': '171.96.0.0/13',
5299 'TJ': '85.9.128.0/18',
5300 'TK': '27.96.24.0/21',
5301 'TL': '180.189.160.0/20',
5302 'TM': '95.85.96.0/19',
5303 'TN': '197.0.0.0/11',
5304 'TO': '175.176.144.0/21',
5305 'TR': '78.160.0.0/11',
5306 'TT': '186.44.0.0/15',
5307 'TV': '202.2.96.0/19',
5308 'TW': '120.96.0.0/11',
5309 'TZ': '156.156.0.0/14',
53896ca5
S
5310 'UA': '37.52.0.0/14',
5311 'UG': '102.80.0.0/13',
5312 'US': '6.0.0.0/8',
773f291d 5313 'UY': '167.56.0.0/13',
53896ca5 5314 'UZ': '84.54.64.0/18',
773f291d 5315 'VA': '212.77.0.0/19',
53896ca5 5316 'VC': '207.191.240.0/21',
773f291d 5317 'VE': '186.88.0.0/13',
53896ca5 5318 'VG': '66.81.192.0/20',
773f291d
S
5319 'VI': '146.226.0.0/16',
5320 'VN': '14.160.0.0/11',
5321 'VU': '202.80.32.0/20',
5322 'WF': '117.20.32.0/21',
5323 'WS': '202.4.32.0/19',
5324 'YE': '134.35.0.0/16',
5325 'YT': '41.242.116.0/22',
5326 'ZA': '41.0.0.0/11',
53896ca5
S
5327 'ZM': '102.144.0.0/13',
5328 'ZW': '102.177.192.0/18',
773f291d
S
5329 }
5330
5331 @classmethod
5f95927a
S
5332 def random_ipv4(cls, code_or_block):
5333 if len(code_or_block) == 2:
5334 block = cls._country_ip_map.get(code_or_block.upper())
5335 if not block:
5336 return None
5337 else:
5338 block = code_or_block
773f291d
S
5339 addr, preflen = block.split('/')
5340 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5341 addr_max = addr_min | (0xffffffff >> int(preflen))
18a0defa 5342 return compat_str(socket.inet_ntoa(
4248dad9 5343 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
773f291d
S
5344
5345
91410c9b 5346class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
5347 def __init__(self, proxies=None):
5348 # Set default handlers
5349 for type in ('http', 'https'):
5350 setattr(self, '%s_open' % type,
5351 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5352 meth(r, proxy, type))
38e87f6c 5353 compat_urllib_request.ProxyHandler.__init__(self, proxies)
2461f79d 5354
91410c9b 5355 def proxy_open(self, req, proxy, type):
2461f79d 5356 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
5357 if req_proxy is not None:
5358 proxy = req_proxy
2461f79d
PH
5359 del req.headers['Ytdl-request-proxy']
5360
5361 if proxy == '__noproxy__':
5362 return None # No Proxy
51fb4995 5363 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
71aff188 5364 req.add_header('Ytdl-socks-proxy', proxy)
cefecac1 5365 # youtube-dlc's http/https handlers do wrapping the socket with socks
71aff188 5366 return None
91410c9b
PH
5367 return compat_urllib_request.ProxyHandler.proxy_open(
5368 self, req, proxy, type)
5bc880b9
YCH
5369
5370
0a5445dd
YCH
5371# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5372# released into Public Domain
5373# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5374
5375def long_to_bytes(n, blocksize=0):
5376 """long_to_bytes(n:long, blocksize:int) : string
5377 Convert a long integer to a byte string.
5378
5379 If optional blocksize is given and greater than zero, pad the front of the
5380 byte string with binary zeros so that the length is a multiple of
5381 blocksize.
5382 """
5383 # after much testing, this algorithm was deemed to be the fastest
5384 s = b''
5385 n = int(n)
5386 while n > 0:
5387 s = compat_struct_pack('>I', n & 0xffffffff) + s
5388 n = n >> 32
5389 # strip off leading zeros
5390 for i in range(len(s)):
5391 if s[i] != b'\000'[0]:
5392 break
5393 else:
5394 # only happens when n == 0
5395 s = b'\000'
5396 i = 0
5397 s = s[i:]
5398 # add back some pad bytes. this could be done more efficiently w.r.t. the
5399 # de-padding being done above, but sigh...
5400 if blocksize > 0 and len(s) % blocksize:
5401 s = (blocksize - len(s) % blocksize) * b'\000' + s
5402 return s
5403
5404
5405def bytes_to_long(s):
5406 """bytes_to_long(string) : long
5407 Convert a byte string to a long integer.
5408
5409 This is (essentially) the inverse of long_to_bytes().
5410 """
5411 acc = 0
5412 length = len(s)
5413 if length % 4:
5414 extra = (4 - length % 4)
5415 s = b'\000' * extra + s
5416 length = length + extra
5417 for i in range(0, length, 4):
5418 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5419 return acc
5420
5421
5bc880b9
YCH
5422def ohdave_rsa_encrypt(data, exponent, modulus):
5423 '''
5424 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5425
5426 Input:
5427 data: data to encrypt, bytes-like object
5428 exponent, modulus: parameter e and N of RSA algorithm, both integer
5429 Output: hex string of encrypted data
5430
5431 Limitation: supports one block encryption only
5432 '''
5433
5434 payload = int(binascii.hexlify(data[::-1]), 16)
5435 encrypted = pow(payload, exponent, modulus)
5436 return '%x' % encrypted
81bdc8fd
YCH
5437
5438
f48409c7
YCH
5439def pkcs1pad(data, length):
5440 """
5441 Padding input data with PKCS#1 scheme
5442
5443 @param {int[]} data input data
5444 @param {int} length target length
5445 @returns {int[]} padded data
5446 """
5447 if len(data) > length - 11:
5448 raise ValueError('Input data too long for PKCS#1 padding')
5449
5450 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5451 return [0, 2] + pseudo_random + [0] + data
5452
5453
5eb6bdce 5454def encode_base_n(num, n, table=None):
59f898b7 5455 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
59f898b7
YCH
5456 if not table:
5457 table = FULL_TABLE[:n]
5458
5eb6bdce
YCH
5459 if n > len(table):
5460 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5461
5462 if num == 0:
5463 return table[0]
5464
81bdc8fd
YCH
5465 ret = ''
5466 while num:
5467 ret = table[num % n] + ret
5468 num = num // n
5469 return ret
f52354a8
YCH
5470
5471
5472def decode_packed_codes(code):
06b3fe29 5473 mobj = re.search(PACKED_CODES_RE, code)
a0566bbf 5474 obfuscated_code, base, count, symbols = mobj.groups()
f52354a8
YCH
5475 base = int(base)
5476 count = int(count)
5477 symbols = symbols.split('|')
5478 symbol_table = {}
5479
5480 while count:
5481 count -= 1
5eb6bdce 5482 base_n_count = encode_base_n(count, base)
f52354a8
YCH
5483 symbol_table[base_n_count] = symbols[count] or base_n_count
5484
5485 return re.sub(
5486 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
a0566bbf 5487 obfuscated_code)
e154c651 5488
5489
1ced2221
S
5490def caesar(s, alphabet, shift):
5491 if shift == 0:
5492 return s
5493 l = len(alphabet)
5494 return ''.join(
5495 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5496 for c in s)
5497
5498
5499def rot47(s):
5500 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5501
5502
e154c651 5503def parse_m3u8_attributes(attrib):
5504 info = {}
5505 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5506 if val.startswith('"'):
5507 val = val[1:-1]
5508 info[key] = val
5509 return info
1143535d
YCH
5510
5511
5512def urshift(val, n):
5513 return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038
YCH
5514
5515
5516# Based on png2str() written by @gdkchan and improved by @yokrysty
067aa17e 5517# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
d3f8e038
YCH
5518def decode_png(png_data):
5519 # Reference: https://www.w3.org/TR/PNG/
5520 header = png_data[8:]
5521
5522 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5523 raise IOError('Not a valid PNG file.')
5524
5525 int_map = {1: '>B', 2: '>H', 4: '>I'}
5526 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5527
5528 chunks = []
5529
5530 while header:
5531 length = unpack_integer(header[:4])
5532 header = header[4:]
5533
5534 chunk_type = header[:4]
5535 header = header[4:]
5536
5537 chunk_data = header[:length]
5538 header = header[length:]
5539
5540 header = header[4:] # Skip CRC
5541
5542 chunks.append({
5543 'type': chunk_type,
5544 'length': length,
5545 'data': chunk_data
5546 })
5547
5548 ihdr = chunks[0]['data']
5549
5550 width = unpack_integer(ihdr[:4])
5551 height = unpack_integer(ihdr[4:8])
5552
5553 idat = b''
5554
5555 for chunk in chunks:
5556 if chunk['type'] == b'IDAT':
5557 idat += chunk['data']
5558
5559 if not idat:
5560 raise IOError('Unable to read PNG data.')
5561
5562 decompressed_data = bytearray(zlib.decompress(idat))
5563
5564 stride = width * 3
5565 pixels = []
5566
5567 def _get_pixel(idx):
5568 x = idx % stride
5569 y = idx // stride
5570 return pixels[y][x]
5571
5572 for y in range(height):
5573 basePos = y * (1 + stride)
5574 filter_type = decompressed_data[basePos]
5575
5576 current_row = []
5577
5578 pixels.append(current_row)
5579
5580 for x in range(stride):
5581 color = decompressed_data[1 + basePos + x]
5582 basex = y * stride + x
5583 left = 0
5584 up = 0
5585
5586 if x > 2:
5587 left = _get_pixel(basex - 3)
5588 if y > 0:
5589 up = _get_pixel(basex - stride)
5590
5591 if filter_type == 1: # Sub
5592 color = (color + left) & 0xff
5593 elif filter_type == 2: # Up
5594 color = (color + up) & 0xff
5595 elif filter_type == 3: # Average
5596 color = (color + ((left + up) >> 1)) & 0xff
5597 elif filter_type == 4: # Paeth
5598 a = left
5599 b = up
5600 c = 0
5601
5602 if x > 2 and y > 0:
5603 c = _get_pixel(basex - stride - 3)
5604
5605 p = a + b - c
5606
5607 pa = abs(p - a)
5608 pb = abs(p - b)
5609 pc = abs(p - c)
5610
5611 if pa <= pb and pa <= pc:
5612 color = (color + a) & 0xff
5613 elif pb <= pc:
5614 color = (color + b) & 0xff
5615 else:
5616 color = (color + c) & 0xff
5617
5618 current_row.append(color)
5619
5620 return width, height, pixels
efa97bdc
YCH
5621
5622
5623def write_xattr(path, key, value):
5624 # This mess below finds the best xattr tool for the job
5625 try:
5626 # try the pyxattr module...
5627 import xattr
5628
53a7e3d2
YCH
5629 if hasattr(xattr, 'set'): # pyxattr
5630 # Unicode arguments are not supported in python-pyxattr until
5631 # version 0.5.0
067aa17e 5632 # See https://github.com/ytdl-org/youtube-dl/issues/5498
53a7e3d2
YCH
5633 pyxattr_required_version = '0.5.0'
5634 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5635 # TODO: fallback to CLI tools
5636 raise XAttrUnavailableError(
5637 'python-pyxattr is detected but is too old. '
cefecac1 5638 'youtube-dlc requires %s or above while your version is %s. '
53a7e3d2
YCH
5639 'Falling back to other xattr implementations' % (
5640 pyxattr_required_version, xattr.__version__))
5641
5642 setxattr = xattr.set
5643 else: # xattr
5644 setxattr = xattr.setxattr
efa97bdc
YCH
5645
5646 try:
53a7e3d2 5647 setxattr(path, key, value)
efa97bdc
YCH
5648 except EnvironmentError as e:
5649 raise XAttrMetadataError(e.errno, e.strerror)
5650
5651 except ImportError:
5652 if compat_os_name == 'nt':
5653 # Write xattrs to NTFS Alternate Data Streams:
5654 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5655 assert ':' not in key
5656 assert os.path.exists(path)
5657
5658 ads_fn = path + ':' + key
5659 try:
5660 with open(ads_fn, 'wb') as f:
5661 f.write(value)
5662 except EnvironmentError as e:
5663 raise XAttrMetadataError(e.errno, e.strerror)
5664 else:
5665 user_has_setfattr = check_executable('setfattr', ['--version'])
5666 user_has_xattr = check_executable('xattr', ['-h'])
5667
5668 if user_has_setfattr or user_has_xattr:
5669
5670 value = value.decode('utf-8')
5671 if user_has_setfattr:
5672 executable = 'setfattr'
5673 opts = ['-n', key, '-v', value]
5674 elif user_has_xattr:
5675 executable = 'xattr'
5676 opts = ['-w', key, value]
5677
3089bc74
S
5678 cmd = ([encodeFilename(executable, True)]
5679 + [encodeArgument(o) for o in opts]
5680 + [encodeFilename(path, True)])
efa97bdc
YCH
5681
5682 try:
5683 p = subprocess.Popen(
5684 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5685 except EnvironmentError as e:
5686 raise XAttrMetadataError(e.errno, e.strerror)
5687 stdout, stderr = p.communicate()
5688 stderr = stderr.decode('utf-8', 'replace')
5689 if p.returncode != 0:
5690 raise XAttrMetadataError(p.returncode, stderr)
5691
5692 else:
5693 # On Unix, and can't find pyxattr, setfattr, or xattr.
5694 if sys.platform.startswith('linux'):
5695 raise XAttrUnavailableError(
5696 "Couldn't find a tool to set the xattrs. "
5697 "Install either the python 'pyxattr' or 'xattr' "
5698 "modules, or the GNU 'attr' package "
5699 "(which contains the 'setfattr' tool).")
5700 else:
5701 raise XAttrUnavailableError(
5702 "Couldn't find a tool to set the xattrs. "
5703 "Install either the python 'xattr' module, "
5704 "or the 'xattr' binary.")
0c265486
YCH
5705
5706
5707def random_birthday(year_field, month_field, day_field):
aa374bc7
AS
5708 start_date = datetime.date(1950, 1, 1)
5709 end_date = datetime.date(1995, 12, 31)
5710 offset = random.randint(0, (end_date - start_date).days)
5711 random_date = start_date + datetime.timedelta(offset)
0c265486 5712 return {
aa374bc7
AS
5713 year_field: str(random_date.year),
5714 month_field: str(random_date.month),
5715 day_field: str(random_date.day),
0c265486 5716 }