]> jfr.im git - yt-dlp.git/blame - youtube_dl/utils.py
[vidzi] Fix extraction
[yt-dlp.git] / youtube_dl / utils.py
CommitLineData
d77c3dfd
FV
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
ecc0c5ee
PH
4from __future__ import unicode_literals
5
1e399778 6import base64
5bc880b9 7import binascii
912b38b4 8import calendar
676eb3f2 9import codecs
62e609ab 10import contextlib
e3946f98 11import ctypes
c496ca96
PH
12import datetime
13import email.utils
f45c185f 14import errno
be4a824d 15import functools
d77c3dfd 16import gzip
b7ab0590 17import itertools
03f9daab 18import io
f4bfd65f 19import json
d77c3dfd 20import locale
02dbf93f 21import math
347de493 22import operator
d77c3dfd 23import os
4eb7f1d1 24import pipes
c496ca96 25import platform
d77c3dfd 26import re
13ebea79 27import ssl
c496ca96 28import socket
b53466e1 29import struct
1c088fa8 30import subprocess
d77c3dfd 31import sys
181c8655 32import tempfile
01951dda 33import traceback
bcf89ce6 34import xml.etree.ElementTree
d77c3dfd 35import zlib
d77c3dfd 36
8c25f81b 37from .compat import (
8f9312c3 38 compat_basestring,
8c25f81b 39 compat_chr,
36e6f62c 40 compat_etree_fromstring,
8c25f81b 41 compat_html_entities,
be4a824d 42 compat_http_client,
c86b6142 43 compat_kwargs,
8c25f81b 44 compat_parse_qs,
be4a824d 45 compat_socket_create_connection,
8c25f81b
PH
46 compat_str,
47 compat_urllib_error,
48 compat_urllib_parse,
49 compat_urllib_parse_urlparse,
50 compat_urllib_request,
51 compat_urlparse,
7d4111ed 52 shlex_quote,
8c25f81b 53)
4644ac55
S
54
55
468e2e92
FV
56# This is not clearly defined otherwise
57compiled_regex_type = type(re.compile(''))
58
3e669f36 59std_headers = {
9c7b3898 60 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/44.0 (Chrome)',
59ae15a5
PH
61 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
62 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
63 'Accept-Encoding': 'gzip, deflate',
64 'Accept-Language': 'en-us,en;q=0.5',
3e669f36 65}
f427df17 66
5f6a1245 67
bf42a990
S
68NO_DEFAULT = object()
69
7105440c
YCH
70ENGLISH_MONTH_NAMES = [
71 'January', 'February', 'March', 'April', 'May', 'June',
72 'July', 'August', 'September', 'October', 'November', 'December']
73
a7aaa398
S
74KNOWN_EXTENSIONS = (
75 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
76 'flv', 'f4v', 'f4a', 'f4b',
77 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
78 'mkv', 'mka', 'mk3d',
79 'avi', 'divx',
80 'mov',
81 'asf', 'wmv', 'wma',
82 '3gp', '3g2',
83 'mp3',
84 'flac',
85 'ape',
86 'wav',
87 'f4f', 'f4m', 'm3u8', 'smil')
88
7105440c 89
d77c3dfd 90def preferredencoding():
59ae15a5 91 """Get preferred encoding.
d77c3dfd 92
59ae15a5
PH
93 Returns the best encoding scheme for the system, based on
94 locale.getpreferredencoding() and some further tweaks.
95 """
96 try:
97 pref = locale.getpreferredencoding()
28e614de 98 'TEST'.encode(pref)
70a1165b 99 except Exception:
59ae15a5 100 pref = 'UTF-8'
bae611f2 101
59ae15a5 102 return pref
d77c3dfd 103
f4bfd65f 104
181c8655 105def write_json_file(obj, fn):
1394646a 106 """ Encode obj as JSON and write it to fn, atomically if possible """
181c8655 107
92120217 108 fn = encodeFilename(fn)
61ee5aeb 109 if sys.version_info < (3, 0) and sys.platform != 'win32':
ec5f6016
JMF
110 encoding = get_filesystem_encoding()
111 # os.path.basename returns a bytes object, but NamedTemporaryFile
112 # will fail if the filename contains non ascii characters unless we
113 # use a unicode object
114 path_basename = lambda f: os.path.basename(fn).decode(encoding)
115 # the same for os.path.dirname
116 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
117 else:
118 path_basename = os.path.basename
119 path_dirname = os.path.dirname
120
73159f99
S
121 args = {
122 'suffix': '.tmp',
ec5f6016
JMF
123 'prefix': path_basename(fn) + '.',
124 'dir': path_dirname(fn),
73159f99
S
125 'delete': False,
126 }
127
181c8655
PH
128 # In Python 2.x, json.dump expects a bytestream.
129 # In Python 3.x, it writes to a character stream
130 if sys.version_info < (3, 0):
73159f99 131 args['mode'] = 'wb'
181c8655 132 else:
73159f99
S
133 args.update({
134 'mode': 'w',
135 'encoding': 'utf-8',
136 })
137
c86b6142 138 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
181c8655
PH
139
140 try:
141 with tf:
142 json.dump(obj, tf)
1394646a
IK
143 if sys.platform == 'win32':
144 # Need to remove existing file on Windows, else os.rename raises
145 # WindowsError or FileExistsError.
146 try:
147 os.unlink(fn)
148 except OSError:
149 pass
181c8655 150 os.rename(tf.name, fn)
70a1165b 151 except Exception:
181c8655
PH
152 try:
153 os.remove(tf.name)
154 except OSError:
155 pass
156 raise
157
158
159if sys.version_info >= (2, 7):
ee114368 160 def find_xpath_attr(node, xpath, key, val=None):
59ae56fa 161 """ Find the xpath xpath[@key=val] """
5d2354f1 162 assert re.match(r'^[a-zA-Z_-]+$', key)
ee114368
S
163 if val:
164 assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
165 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
59ae56fa
PH
166 return node.find(expr)
167else:
ee114368 168 def find_xpath_attr(node, xpath, key, val=None):
4eefbfdb
PH
169 # Here comes the crazy part: In 2.6, if the xpath is a unicode,
170 # .//node does not match if a node is a direct child of . !
8f9312c3 171 if isinstance(xpath, compat_str):
4eefbfdb
PH
172 xpath = xpath.encode('ascii')
173
59ae56fa 174 for f in node.findall(xpath):
ee114368
S
175 if key not in f.attrib:
176 continue
177 if val is None or f.attrib.get(key) == val:
59ae56fa
PH
178 return f
179 return None
180
d7e66d39
JMF
181# On python2.6 the xml.etree.ElementTree.Element methods don't support
182# the namespace parameter
5f6a1245
JW
183
184
d7e66d39
JMF
185def xpath_with_ns(path, ns_map):
186 components = [c.split(':') for c in path.split('/')]
187 replaced = []
188 for c in components:
189 if len(c) == 1:
190 replaced.append(c[0])
191 else:
192 ns, tag = c
193 replaced.append('{%s}%s' % (ns_map[ns], tag))
194 return '/'.join(replaced)
195
d77c3dfd 196
a41fb80c 197def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745
S
198 def _find_xpath(xpath):
199 if sys.version_info < (2, 7): # Crazy 2.6
200 xpath = xpath.encode('ascii')
201 return node.find(xpath)
202
203 if isinstance(xpath, (str, compat_str)):
204 n = _find_xpath(xpath)
205 else:
206 for xp in xpath:
207 n = _find_xpath(xp)
208 if n is not None:
209 break
d74bebd5 210
8e636da4 211 if n is None:
bf42a990
S
212 if default is not NO_DEFAULT:
213 return default
214 elif fatal:
bf0ff932
PH
215 name = xpath if name is None else name
216 raise ExtractorError('Could not find XML element %s' % name)
217 else:
218 return None
a41fb80c
S
219 return n
220
221
222def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4
S
223 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
224 if n is None or n == default:
225 return n
226 if n.text is None:
227 if default is not NO_DEFAULT:
228 return default
229 elif fatal:
230 name = xpath if name is None else name
231 raise ExtractorError('Could not find XML element\'s text %s' % name)
232 else:
233 return None
234 return n.text
a41fb80c
S
235
236
237def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
238 n = find_xpath_attr(node, xpath, key)
239 if n is None:
240 if default is not NO_DEFAULT:
241 return default
242 elif fatal:
243 name = '%s[@%s]' % (xpath, key) if name is None else name
244 raise ExtractorError('Could not find XML attribute %s' % name)
245 else:
246 return None
247 return n.attrib[key]
bf0ff932
PH
248
249
9e6dd238 250def get_element_by_id(id, html):
43e8fafd 251 """Return the content of the tag with the specified ID in the passed HTML document"""
611c1dd9 252 return get_element_by_attribute('id', id, html)
43e8fafd 253
12ea2f30 254
43e8fafd
ND
255def get_element_by_attribute(attribute, value, html):
256 """Return the content of the tag with the specified attribute in the passed HTML document"""
9e6dd238 257
38285056
PH
258 m = re.search(r'''(?xs)
259 <([a-zA-Z0-9:._-]+)
260 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
261 \s+%s=['"]?%s['"]?
262 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+|="[^"]+"|='[^']+'))*?
263 \s*>
264 (?P<content>.*?)
265 </\1>
266 ''' % (re.escape(attribute), re.escape(value)), html)
267
268 if not m:
269 return None
270 res = m.group('content')
271
272 if res.startswith('"') or res.startswith("'"):
273 res = res[1:-1]
a921f407 274
38285056 275 return unescapeHTML(res)
a921f407 276
9e6dd238
FV
277
278def clean_html(html):
59ae15a5 279 """Clean an HTML snippet into a readable string"""
dd622d7c
PH
280
281 if html is None: # Convenience for sanitizing descriptions etc.
282 return html
283
59ae15a5
PH
284 # Newline vs <br />
285 html = html.replace('\n', ' ')
6b3aef80
FV
286 html = re.sub(r'\s*<\s*br\s*/?\s*>\s*', '\n', html)
287 html = re.sub(r'<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
59ae15a5
PH
288 # Strip html tags
289 html = re.sub('<.*?>', '', html)
290 # Replace html entities
291 html = unescapeHTML(html)
7decf895 292 return html.strip()
9e6dd238
FV
293
294
d77c3dfd 295def sanitize_open(filename, open_mode):
59ae15a5
PH
296 """Try to open the given filename, and slightly tweak it if this fails.
297
298 Attempts to open the given filename. If this fails, it tries to change
299 the filename slightly, step by step, until it's either able to open it
300 or it fails and raises a final exception, like the standard open()
301 function.
302
303 It returns the tuple (stream, definitive_file_name).
304 """
305 try:
28e614de 306 if filename == '-':
59ae15a5
PH
307 if sys.platform == 'win32':
308 import msvcrt
309 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
898280a0 310 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5
PH
311 stream = open(encodeFilename(filename), open_mode)
312 return (stream, filename)
313 except (IOError, OSError) as err:
f45c185f
PH
314 if err.errno in (errno.EACCES,):
315 raise
59ae15a5 316
f45c185f 317 # In case of error, try to remove win32 forbidden chars
d55de57b 318 alt_filename = sanitize_path(filename)
f45c185f
PH
319 if alt_filename == filename:
320 raise
321 else:
322 # An exception here should be caught in the caller
d55de57b 323 stream = open(encodeFilename(alt_filename), open_mode)
f45c185f 324 return (stream, alt_filename)
d77c3dfd
FV
325
326
327def timeconvert(timestr):
59ae15a5
PH
328 """Convert RFC 2822 defined time string into system timestamp"""
329 timestamp = None
330 timetuple = email.utils.parsedate_tz(timestr)
331 if timetuple is not None:
332 timestamp = email.utils.mktime_tz(timetuple)
333 return timestamp
1c469a94 334
5f6a1245 335
796173d0 336def sanitize_filename(s, restricted=False, is_id=False):
59ae15a5
PH
337 """Sanitizes a string so it could be used as part of a filename.
338 If restricted is set, use a stricter subset of allowed characters.
796173d0 339 Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
59ae15a5
PH
340 """
341 def replace_insane(char):
342 if char == '?' or ord(char) < 32 or ord(char) == 127:
343 return ''
344 elif char == '"':
345 return '' if restricted else '\''
346 elif char == ':':
347 return '_-' if restricted else ' -'
348 elif char in '\\/|*<>':
349 return '_'
627dcfff 350 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
59ae15a5
PH
351 return '_'
352 if restricted and ord(char) > 127:
353 return '_'
354 return char
355
2aeb06d6
PH
356 # Handle timestamps
357 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
28e614de 358 result = ''.join(map(replace_insane, s))
796173d0
PH
359 if not is_id:
360 while '__' in result:
361 result = result.replace('__', '_')
362 result = result.strip('_')
363 # Common case of "Foreign band name - English song title"
364 if restricted and result.startswith('-_'):
365 result = result[2:]
5a42414b
PH
366 if result.startswith('-'):
367 result = '_' + result[len('-'):]
a7440261 368 result = result.lstrip('.')
796173d0
PH
369 if not result:
370 result = '_'
59ae15a5 371 return result
d77c3dfd 372
5f6a1245 373
a2aaf4db
S
374def sanitize_path(s):
375 """Sanitizes and normalizes path on Windows"""
376 if sys.platform != 'win32':
377 return s
be531ef1
S
378 drive_or_unc, _ = os.path.splitdrive(s)
379 if sys.version_info < (2, 7) and not drive_or_unc:
380 drive_or_unc, _ = os.path.splitunc(s)
381 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
382 if drive_or_unc:
a2aaf4db
S
383 norm_path.pop(0)
384 sanitized_path = [
c90d16cf 385 path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\|\\\\?\\*]|[\s.]$)', '#', path_part)
a2aaf4db 386 for path_part in norm_path]
be531ef1
S
387 if drive_or_unc:
388 sanitized_path.insert(0, drive_or_unc + os.path.sep)
a2aaf4db
S
389 return os.path.join(*sanitized_path)
390
391
67dda517
S
392# Prepend protocol-less URLs with `http:` scheme in order to mitigate the number of
393# unwanted failures due to missing protocol
394def sanitized_Request(url, *args, **kwargs):
395 return compat_urllib_request.Request(
396 'http:%s' % url if url.startswith('//') else url, *args, **kwargs)
397
398
d77c3dfd 399def orderedSet(iterable):
59ae15a5
PH
400 """ Remove all duplicates from the input iterable """
401 res = []
402 for el in iterable:
403 if el not in res:
404 res.append(el)
405 return res
d77c3dfd 406
912b38b4 407
4e408e47
PH
408def _htmlentity_transform(entity):
409 """Transforms an HTML entity to a character."""
410 # Known non-numeric HTML entity
411 if entity in compat_html_entities.name2codepoint:
412 return compat_chr(compat_html_entities.name2codepoint[entity])
413
91757b0f 414 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
4e408e47
PH
415 if mobj is not None:
416 numstr = mobj.group(1)
28e614de 417 if numstr.startswith('x'):
4e408e47 418 base = 16
28e614de 419 numstr = '0%s' % numstr
4e408e47
PH
420 else:
421 base = 10
7aefc49c
S
422 # See https://github.com/rg3/youtube-dl/issues/7518
423 try:
424 return compat_chr(int(numstr, base))
425 except ValueError:
426 pass
4e408e47
PH
427
428 # Unknown entity in name, return its literal representation
7a3f0c00 429 return '&%s;' % entity
4e408e47
PH
430
431
d77c3dfd 432def unescapeHTML(s):
912b38b4
PH
433 if s is None:
434 return None
435 assert type(s) == compat_str
d77c3dfd 436
4e408e47
PH
437 return re.sub(
438 r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd 439
8bf48f23 440
aa49acd1
S
441def get_subprocess_encoding():
442 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
443 # For subprocess calls, encode with locale encoding
444 # Refer to http://stackoverflow.com/a/9951851/35070
445 encoding = preferredencoding()
446 else:
447 encoding = sys.getfilesystemencoding()
448 if encoding is None:
449 encoding = 'utf-8'
450 return encoding
451
452
8bf48f23 453def encodeFilename(s, for_subprocess=False):
59ae15a5
PH
454 """
455 @param s The name of the file
456 """
d77c3dfd 457
8bf48f23 458 assert type(s) == compat_str
d77c3dfd 459
59ae15a5
PH
460 # Python 3 has a Unicode API
461 if sys.version_info >= (3, 0):
462 return s
0f00efed 463
aa49acd1
S
464 # Pass '' directly to use Unicode APIs on Windows 2000 and up
465 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
466 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
467 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
468 return s
469
470 return s.encode(get_subprocess_encoding(), 'ignore')
471
472
473def decodeFilename(b, for_subprocess=False):
474
475 if sys.version_info >= (3, 0):
476 return b
477
478 if not isinstance(b, bytes):
479 return b
480
481 return b.decode(get_subprocess_encoding(), 'ignore')
8bf48f23 482
f07b74fc
PH
483
484def encodeArgument(s):
485 if not isinstance(s, compat_str):
486 # Legacy code that uses byte strings
487 # Uncomment the following line after fixing all post processors
7af808a5 488 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
f07b74fc
PH
489 s = s.decode('ascii')
490 return encodeFilename(s, True)
491
492
aa49acd1
S
493def decodeArgument(b):
494 return decodeFilename(b, True)
495
496
8271226a
PH
497def decodeOption(optval):
498 if optval is None:
499 return optval
500 if isinstance(optval, bytes):
501 optval = optval.decode(preferredencoding())
502
503 assert isinstance(optval, compat_str)
504 return optval
1c256f70 505
5f6a1245 506
4539dd30
PH
507def formatSeconds(secs):
508 if secs > 3600:
509 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
510 elif secs > 60:
511 return '%d:%02d' % (secs // 60, secs % 60)
512 else:
513 return '%d' % secs
514
a0ddb8a2 515
be4a824d
PH
516def make_HTTPS_handler(params, **kwargs):
517 opts_no_check_certificate = params.get('nocheckcertificate', False)
0db261ba 518 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
be5f2c19 519 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
0db261ba 520 if opts_no_check_certificate:
be5f2c19 521 context.check_hostname = False
0db261ba 522 context.verify_mode = ssl.CERT_NONE
a2366922 523 try:
be4a824d 524 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
a2366922
PH
525 except TypeError:
526 # Python 2.7.8
527 # (create_default_context present but HTTPSHandler has no context=)
528 pass
529
530 if sys.version_info < (3, 2):
d7932313 531 return YoutubeDLHTTPSHandler(params, **kwargs)
aa37e3d4 532 else: # Python < 3.4
d7932313 533 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
ea6d901e 534 context.verify_mode = (ssl.CERT_NONE
dca08720 535 if opts_no_check_certificate
ea6d901e 536 else ssl.CERT_REQUIRED)
303b479e 537 context.set_default_verify_paths()
be4a824d 538 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
ea6d901e 539
732ea2f0 540
08f2a92c
JMF
541def bug_reports_message():
542 if ytdl_is_updateable():
543 update_cmd = 'type youtube-dl -U to update'
544 else:
545 update_cmd = 'see https://yt-dl.org/update on how to update'
546 msg = '; please report this issue on https://yt-dl.org/bug .'
547 msg += ' Make sure you are using the latest version; %s.' % update_cmd
548 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
549 return msg
550
551
1c256f70
PH
552class ExtractorError(Exception):
553 """Error during info extraction."""
5f6a1245 554
d11271dd 555 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
9a82b238
PH
556 """ tb, if given, is the original traceback (so that it can be printed out).
557 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
558 """
559
560 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
561 expected = True
d11271dd
PH
562 if video_id is not None:
563 msg = video_id + ': ' + msg
410f3e73 564 if cause:
28e614de 565 msg += ' (caused by %r)' % cause
9a82b238 566 if not expected:
08f2a92c 567 msg += bug_reports_message()
1c256f70 568 super(ExtractorError, self).__init__(msg)
d5979c5d 569
1c256f70 570 self.traceback = tb
8cc83b8d 571 self.exc_info = sys.exc_info() # preserve original exception
2eabb802 572 self.cause = cause
d11271dd 573 self.video_id = video_id
1c256f70 574
01951dda
PH
575 def format_traceback(self):
576 if self.traceback is None:
577 return None
28e614de 578 return ''.join(traceback.format_tb(self.traceback))
01951dda 579
1c256f70 580
416c7fcb
PH
581class UnsupportedError(ExtractorError):
582 def __init__(self, url):
583 super(UnsupportedError, self).__init__(
584 'Unsupported URL: %s' % url, expected=True)
585 self.url = url
586
587
55b3e45b
JMF
588class RegexNotFoundError(ExtractorError):
589 """Error when a regex didn't match"""
590 pass
591
592
d77c3dfd 593class DownloadError(Exception):
59ae15a5 594 """Download Error exception.
d77c3dfd 595
59ae15a5
PH
596 This exception may be thrown by FileDownloader objects if they are not
597 configured to continue on errors. They will contain the appropriate
598 error message.
599 """
5f6a1245 600
8cc83b8d
FV
601 def __init__(self, msg, exc_info=None):
602 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
603 super(DownloadError, self).__init__(msg)
604 self.exc_info = exc_info
d77c3dfd
FV
605
606
607class SameFileError(Exception):
59ae15a5 608 """Same File exception.
d77c3dfd 609
59ae15a5
PH
610 This exception will be thrown by FileDownloader objects if they detect
611 multiple files would have to be downloaded to the same file on disk.
612 """
613 pass
d77c3dfd
FV
614
615
616class PostProcessingError(Exception):
59ae15a5 617 """Post Processing exception.
d77c3dfd 618
59ae15a5
PH
619 This exception may be raised by PostProcessor's .run() method to
620 indicate an error in the postprocessing task.
621 """
5f6a1245 622
7851b379
PH
623 def __init__(self, msg):
624 self.msg = msg
d77c3dfd 625
5f6a1245 626
d77c3dfd 627class MaxDownloadsReached(Exception):
59ae15a5
PH
628 """ --max-downloads limit has been reached. """
629 pass
d77c3dfd
FV
630
631
632class UnavailableVideoError(Exception):
59ae15a5 633 """Unavailable Format exception.
d77c3dfd 634
59ae15a5
PH
635 This exception will be thrown when a video is requested
636 in a format that is not available for that video.
637 """
638 pass
d77c3dfd
FV
639
640
641class ContentTooShortError(Exception):
59ae15a5 642 """Content Too Short exception.
d77c3dfd 643
59ae15a5
PH
644 This exception may be raised by FileDownloader objects when a file they
645 download is too small for what the server announced first, indicating
646 the connection was probably interrupted.
647 """
d77c3dfd 648
59ae15a5 649 def __init__(self, downloaded, expected):
2c7ed247 650 # Both in bytes
59ae15a5
PH
651 self.downloaded = downloaded
652 self.expected = expected
d77c3dfd 653
5f6a1245 654
c5a59d93 655def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
e5e78797
S
656 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
657 # expected HTTP responses to meet HTTP/1.0 or later (see also
658 # https://github.com/rg3/youtube-dl/issues/6727)
659 if sys.version_info < (3, 0):
5a1a2e94 660 kwargs[b'strict'] = True
be4a824d
PH
661 hc = http_class(*args, **kwargs)
662 source_address = ydl_handler._params.get('source_address')
663 if source_address is not None:
664 sa = (source_address, 0)
665 if hasattr(hc, 'source_address'): # Python 2.7+
666 hc.source_address = sa
667 else: # Python 2.6
668 def _hc_connect(self, *args, **kwargs):
669 sock = compat_socket_create_connection(
670 (self.host, self.port), self.timeout, sa)
671 if is_https:
d7932313
PH
672 self.sock = ssl.wrap_socket(
673 sock, self.key_file, self.cert_file,
674 ssl_version=ssl.PROTOCOL_TLSv1)
be4a824d
PH
675 else:
676 self.sock = sock
677 hc.connect = functools.partial(_hc_connect, hc)
678
679 return hc
680
681
87f0e62d 682def handle_youtubedl_headers(headers):
992fc9d6
YCH
683 filtered_headers = headers
684
685 if 'Youtubedl-no-compression' in filtered_headers:
686 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
87f0e62d 687 del filtered_headers['Youtubedl-no-compression']
87f0e62d 688
992fc9d6 689 return filtered_headers
87f0e62d
YCH
690
691
acebc9cd 692class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
59ae15a5
PH
693 """Handler for HTTP requests and responses.
694
695 This class, when installed with an OpenerDirector, automatically adds
696 the standard headers to every HTTP request and handles gzipped and
697 deflated responses from web servers. If compression is to be avoided in
698 a particular request, the original request in the program code only has
0424ec30 699 to include the HTTP header "Youtubedl-no-compression", which will be
59ae15a5
PH
700 removed before making the real request.
701
702 Part of this code was copied from:
703
704 http://techknack.net/python-urllib2-handlers/
705
706 Andrew Rowls, the author of that code, agreed to release it to the
707 public domain.
708 """
709
be4a824d
PH
710 def __init__(self, params, *args, **kwargs):
711 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
712 self._params = params
713
714 def http_open(self, req):
715 return self.do_open(functools.partial(
c5a59d93 716 _create_http_connection, self, compat_http_client.HTTPConnection, False),
be4a824d
PH
717 req)
718
59ae15a5
PH
719 @staticmethod
720 def deflate(data):
721 try:
722 return zlib.decompress(data, -zlib.MAX_WBITS)
723 except zlib.error:
724 return zlib.decompress(data)
725
726 @staticmethod
727 def addinfourl_wrapper(stream, headers, url, code):
728 if hasattr(compat_urllib_request.addinfourl, 'getcode'):
729 return compat_urllib_request.addinfourl(stream, headers, url, code)
730 ret = compat_urllib_request.addinfourl(stream, headers, url)
731 ret.code = code
732 return ret
733
acebc9cd 734 def http_request(self, req):
51f267d9
S
735 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
736 # always respected by websites, some tend to give out URLs with non percent-encoded
737 # non-ASCII characters (see telemb.py, ard.py [#3412])
738 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
739 # To work around aforementioned issue we will replace request's original URL with
740 # percent-encoded one
741 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
742 # the code of this workaround has been moved here from YoutubeDL.urlopen()
743 url = req.get_full_url()
744 url_escaped = escape_url(url)
745
746 # Substitute URL if any change after escaping
747 if url != url_escaped:
748 req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
749 new_req = req_type(
750 url_escaped, data=req.data, headers=req.headers,
751 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
752 new_req.timeout = req.timeout
753 req = new_req
754
33ac271b 755 for h, v in std_headers.items():
3d5f7a39
JK
756 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
757 # The dict keys are capitalized because of this bug by urllib
758 if h.capitalize() not in req.headers:
33ac271b 759 req.add_header(h, v)
87f0e62d
YCH
760
761 req.headers = handle_youtubedl_headers(req.headers)
989b4b2b
PH
762
763 if sys.version_info < (2, 7) and '#' in req.get_full_url():
764 # Python 2.6 is brain-dead when it comes to fragments
765 req._Request__original = req._Request__original.partition('#')[0]
766 req._Request__r_type = req._Request__r_type.partition('#')[0]
767
59ae15a5
PH
768 return req
769
acebc9cd 770 def http_response(self, req, resp):
59ae15a5
PH
771 old_resp = resp
772 # gzip
773 if resp.headers.get('Content-encoding', '') == 'gzip':
aa3e9507
PH
774 content = resp.read()
775 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
776 try:
777 uncompressed = io.BytesIO(gz.read())
778 except IOError as original_ioerror:
779 # There may be junk add the end of the file
780 # See http://stackoverflow.com/q/4928560/35070 for details
781 for i in range(1, 1024):
782 try:
783 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
784 uncompressed = io.BytesIO(gz.read())
785 except IOError:
786 continue
787 break
788 else:
789 raise original_ioerror
790 resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
59ae15a5 791 resp.msg = old_resp.msg
c047270c 792 del resp.headers['Content-encoding']
59ae15a5
PH
793 # deflate
794 if resp.headers.get('Content-encoding', '') == 'deflate':
795 gz = io.BytesIO(self.deflate(resp.read()))
796 resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
797 resp.msg = old_resp.msg
c047270c 798 del resp.headers['Content-encoding']
ad729172
S
799 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
800 # https://github.com/rg3/youtube-dl/issues/6457).
5a4d9ddb
S
801 if 300 <= resp.code < 400:
802 location = resp.headers.get('Location')
803 if location:
804 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
805 if sys.version_info >= (3, 0):
806 location = location.encode('iso-8859-1').decode('utf-8')
807 location_escaped = escape_url(location)
808 if location != location_escaped:
809 del resp.headers['Location']
810 resp.headers['Location'] = location_escaped
59ae15a5 811 return resp
0f8d03f8 812
acebc9cd
PH
813 https_request = http_request
814 https_response = http_response
bf50b038 815
5de90176 816
be4a824d
PH
817class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
818 def __init__(self, params, https_conn_class=None, *args, **kwargs):
819 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
820 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
821 self._params = params
822
823 def https_open(self, req):
4f264c02
JMF
824 kwargs = {}
825 if hasattr(self, '_context'): # python > 2.6
826 kwargs['context'] = self._context
827 if hasattr(self, '_check_hostname'): # python 3.x
828 kwargs['check_hostname'] = self._check_hostname
be4a824d
PH
829 return self.do_open(functools.partial(
830 _create_http_connection, self, self._https_conn_class, True),
4f264c02 831 req, **kwargs)
be4a824d
PH
832
833
a6420bf5
S
834class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
835 def __init__(self, cookiejar=None):
836 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
837
838 def http_response(self, request, response):
839 # Python 2 will choke on next HTTP request in row if there are non-ASCII
840 # characters in Set-Cookie HTTP header of last response (see
841 # https://github.com/rg3/youtube-dl/issues/6769).
842 # In order to at least prevent crashing we will percent encode Set-Cookie
843 # header before HTTPCookieProcessor starts processing it.
e28034c5
S
844 # if sys.version_info < (3, 0) and response.headers:
845 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
846 # set_cookie = response.headers.get(set_cookie_header)
847 # if set_cookie:
848 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
849 # if set_cookie != set_cookie_escaped:
850 # del response.headers[set_cookie_header]
851 # response.headers[set_cookie_header] = set_cookie_escaped
a6420bf5
S
852 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
853
854 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
855 https_response = http_response
856
857
08b38d54 858def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4
PH
859 """ Return a UNIX timestamp from the given date """
860
861 if date_str is None:
862 return None
863
52c3a6e4
S
864 date_str = re.sub(r'\.[0-9]+', '', date_str)
865
08b38d54
PH
866 if timezone is None:
867 m = re.search(
52c3a6e4 868 r'(?:Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
08b38d54
PH
869 date_str)
870 if not m:
912b38b4
PH
871 timezone = datetime.timedelta()
872 else:
08b38d54
PH
873 date_str = date_str[:-len(m.group(0))]
874 if not m.group('sign'):
875 timezone = datetime.timedelta()
876 else:
877 sign = 1 if m.group('sign') == '+' else -1
878 timezone = datetime.timedelta(
879 hours=sign * int(m.group('hours')),
880 minutes=sign * int(m.group('minutes')))
52c3a6e4
S
881 try:
882 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
883 dt = datetime.datetime.strptime(date_str, date_format) - timezone
884 return calendar.timegm(dt.timetuple())
885 except ValueError:
886 pass
912b38b4
PH
887
888
42bdd9d0 889def unified_strdate(date_str, day_first=True):
bf50b038 890 """Return a string with the date in the format YYYYMMDD"""
64e7ad60
PH
891
892 if date_str is None:
893 return None
bf50b038 894 upload_date = None
5f6a1245 895 # Replace commas
026fcc04 896 date_str = date_str.replace(',', ' ')
bf50b038 897 # %z (UTC offset) is only supported in python>=3.2
15ac8413
S
898 if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str):
899 date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
42bdd9d0 900 # Remove AM/PM + timezone
9bb8e0a3 901 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
42bdd9d0 902
19e1d359
JMF
903 format_expressions = [
904 '%d %B %Y',
0f99566c 905 '%d %b %Y',
19e1d359
JMF
906 '%B %d %Y',
907 '%b %d %Y',
f160785c
S
908 '%b %dst %Y %I:%M',
909 '%b %dnd %Y %I:%M',
910 '%b %dth %Y %I:%M',
a69801e2 911 '%Y %m %d',
19e1d359 912 '%Y-%m-%d',
fe556f1b 913 '%Y/%m/%d',
19e1d359 914 '%Y/%m/%d %H:%M:%S',
5d73273f 915 '%Y-%m-%d %H:%M:%S',
e9be9a6a 916 '%Y-%m-%d %H:%M:%S.%f',
19e1d359 917 '%d.%m.%Y %H:%M',
b047de6f 918 '%d.%m.%Y %H.%M',
19e1d359 919 '%Y-%m-%dT%H:%M:%SZ',
59040888
PH
920 '%Y-%m-%dT%H:%M:%S.%fZ',
921 '%Y-%m-%dT%H:%M:%S.%f0Z',
2e1fa03b 922 '%Y-%m-%dT%H:%M:%S',
7ff5d5c2 923 '%Y-%m-%dT%H:%M:%S.%f',
5de90176 924 '%Y-%m-%dT%H:%M',
19e1d359 925 ]
42bdd9d0
PH
926 if day_first:
927 format_expressions.extend([
79c21abb 928 '%d-%m-%Y',
776dc399
S
929 '%d.%m.%Y',
930 '%d/%m/%Y',
931 '%d/%m/%y',
42bdd9d0
PH
932 '%d/%m/%Y %H:%M:%S',
933 ])
934 else:
935 format_expressions.extend([
79c21abb 936 '%m-%d-%Y',
776dc399
S
937 '%m.%d.%Y',
938 '%m/%d/%Y',
939 '%m/%d/%y',
42bdd9d0
PH
940 '%m/%d/%Y %H:%M:%S',
941 ])
bf50b038
JMF
942 for expression in format_expressions:
943 try:
944 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
5de90176 945 except ValueError:
bf50b038 946 pass
42393ce2
PH
947 if upload_date is None:
948 timetuple = email.utils.parsedate_tz(date_str)
949 if timetuple:
950 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
6a750402
JMF
951 if upload_date is not None:
952 return compat_str(upload_date)
bf50b038 953
5f6a1245 954
28e614de 955def determine_ext(url, default_ext='unknown_video'):
f4776371
S
956 if url is None:
957 return default_ext
9cb9a5df 958 guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a
PH
959 if re.match(r'^[A-Za-z0-9]+$', guess):
960 return guess
a7aaa398
S
961 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
962 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df 963 return guess.rstrip('/')
73e79f2a 964 else:
cbdbb766 965 return default_ext
73e79f2a 966
5f6a1245 967
d4051a8e 968def subtitles_filename(filename, sub_lang, sub_format):
28e614de 969 return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
d4051a8e 970
5f6a1245 971
bd558525 972def date_from_str(date_str):
37254abc
JMF
973 """
974 Return a datetime object from a string in the format YYYYMMDD or
975 (now|today)[+-][0-9](day|week|month|year)(s)?"""
976 today = datetime.date.today()
f8795e10 977 if date_str in ('now', 'today'):
37254abc 978 return today
f8795e10
PH
979 if date_str == 'yesterday':
980 return today - datetime.timedelta(days=1)
37254abc
JMF
981 match = re.match('(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
982 if match is not None:
983 sign = match.group('sign')
984 time = int(match.group('time'))
985 if sign == '-':
986 time = -time
987 unit = match.group('unit')
dfb1b146 988 # A bad approximation?
37254abc
JMF
989 if unit == 'month':
990 unit = 'day'
991 time *= 30
992 elif unit == 'year':
993 unit = 'day'
994 time *= 365
995 unit += 's'
996 delta = datetime.timedelta(**{unit: time})
997 return today + delta
611c1dd9 998 return datetime.datetime.strptime(date_str, '%Y%m%d').date()
5f6a1245
JW
999
1000
e63fc1be 1001def hyphenate_date(date_str):
1002 """
1003 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
1004 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
1005 if match is not None:
1006 return '-'.join(match.groups())
1007 else:
1008 return date_str
1009
5f6a1245 1010
bd558525
JMF
1011class DateRange(object):
1012 """Represents a time interval between two dates"""
5f6a1245 1013
bd558525
JMF
1014 def __init__(self, start=None, end=None):
1015 """start and end must be strings in the format accepted by date"""
1016 if start is not None:
1017 self.start = date_from_str(start)
1018 else:
1019 self.start = datetime.datetime.min.date()
1020 if end is not None:
1021 self.end = date_from_str(end)
1022 else:
1023 self.end = datetime.datetime.max.date()
37254abc 1024 if self.start > self.end:
bd558525 1025 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
5f6a1245 1026
bd558525
JMF
1027 @classmethod
1028 def day(cls, day):
1029 """Returns a range that only contains the given day"""
5f6a1245
JW
1030 return cls(day, day)
1031
bd558525
JMF
1032 def __contains__(self, date):
1033 """Check if the date is in the range"""
37254abc
JMF
1034 if not isinstance(date, datetime.date):
1035 date = date_from_str(date)
1036 return self.start <= date <= self.end
5f6a1245 1037
bd558525 1038 def __str__(self):
5f6a1245 1039 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
c496ca96
PH
1040
1041
1042def platform_name():
1043 """ Returns the platform name as a compat_str """
1044 res = platform.platform()
1045 if isinstance(res, bytes):
1046 res = res.decode(preferredencoding())
1047
1048 assert isinstance(res, compat_str)
1049 return res
c257baff
PH
1050
1051
b58ddb32
PH
1052def _windows_write_string(s, out):
1053 """ Returns True if the string was written using special methods,
1054 False if it has yet to be written out."""
1055 # Adapted from http://stackoverflow.com/a/3259271/35070
1056
1057 import ctypes
1058 import ctypes.wintypes
1059
1060 WIN_OUTPUT_IDS = {
1061 1: -11,
1062 2: -12,
1063 }
1064
a383a98a
PH
1065 try:
1066 fileno = out.fileno()
1067 except AttributeError:
1068 # If the output stream doesn't have a fileno, it's virtual
1069 return False
aa42e873
PH
1070 except io.UnsupportedOperation:
1071 # Some strange Windows pseudo files?
1072 return False
b58ddb32
PH
1073 if fileno not in WIN_OUTPUT_IDS:
1074 return False
1075
e2f89ec7 1076 GetStdHandle = ctypes.WINFUNCTYPE(
b58ddb32 1077 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
611c1dd9 1078 (b'GetStdHandle', ctypes.windll.kernel32))
b58ddb32
PH
1079 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
1080
e2f89ec7 1081 WriteConsoleW = ctypes.WINFUNCTYPE(
b58ddb32
PH
1082 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
1083 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
611c1dd9 1084 ctypes.wintypes.LPVOID)((b'WriteConsoleW', ctypes.windll.kernel32))
b58ddb32
PH
1085 written = ctypes.wintypes.DWORD(0)
1086
611c1dd9 1087 GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b'GetFileType', ctypes.windll.kernel32))
b58ddb32
PH
1088 FILE_TYPE_CHAR = 0x0002
1089 FILE_TYPE_REMOTE = 0x8000
e2f89ec7 1090 GetConsoleMode = ctypes.WINFUNCTYPE(
b58ddb32
PH
1091 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
1092 ctypes.POINTER(ctypes.wintypes.DWORD))(
611c1dd9 1093 (b'GetConsoleMode', ctypes.windll.kernel32))
b58ddb32
PH
1094 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
1095
1096 def not_a_console(handle):
1097 if handle == INVALID_HANDLE_VALUE or handle is None:
1098 return True
8fb3ac36
PH
1099 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
1100 GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
b58ddb32
PH
1101
1102 if not_a_console(h):
1103 return False
1104
d1b9c912
PH
1105 def next_nonbmp_pos(s):
1106 try:
1107 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
1108 except StopIteration:
1109 return len(s)
1110
1111 while s:
1112 count = min(next_nonbmp_pos(s), 1024)
1113
b58ddb32 1114 ret = WriteConsoleW(
d1b9c912 1115 h, s, count if count else 2, ctypes.byref(written), None)
b58ddb32
PH
1116 if ret == 0:
1117 raise OSError('Failed to write string')
d1b9c912
PH
1118 if not count: # We just wrote a non-BMP character
1119 assert written.value == 2
1120 s = s[1:]
1121 else:
1122 assert written.value > 0
1123 s = s[written.value:]
b58ddb32
PH
1124 return True
1125
1126
734f90bb 1127def write_string(s, out=None, encoding=None):
7459e3a2
PH
1128 if out is None:
1129 out = sys.stderr
8bf48f23 1130 assert type(s) == compat_str
7459e3a2 1131
b58ddb32
PH
1132 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
1133 if _windows_write_string(s, out):
1134 return
1135
7459e3a2
PH
1136 if ('b' in getattr(out, 'mode', '') or
1137 sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
104aa738
PH
1138 byt = s.encode(encoding or preferredencoding(), 'ignore')
1139 out.write(byt)
1140 elif hasattr(out, 'buffer'):
1141 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
1142 byt = s.encode(enc, 'ignore')
1143 out.buffer.write(byt)
1144 else:
8bf48f23 1145 out.write(s)
7459e3a2
PH
1146 out.flush()
1147
1148
48ea9cea
PH
1149def bytes_to_intlist(bs):
1150 if not bs:
1151 return []
1152 if isinstance(bs[0], int): # Python 3
1153 return list(bs)
1154 else:
1155 return [ord(c) for c in bs]
1156
c257baff 1157
cba892fa 1158def intlist_to_bytes(xs):
1159 if not xs:
1160 return b''
eb4157fd 1161 return struct_pack('%dB' % len(xs), *xs)
c38b1e77
PH
1162
1163
c1c9a79c
PH
1164# Cross-platform file locking
1165if sys.platform == 'win32':
1166 import ctypes.wintypes
1167 import msvcrt
1168
1169 class OVERLAPPED(ctypes.Structure):
1170 _fields_ = [
1171 ('Internal', ctypes.wintypes.LPVOID),
1172 ('InternalHigh', ctypes.wintypes.LPVOID),
1173 ('Offset', ctypes.wintypes.DWORD),
1174 ('OffsetHigh', ctypes.wintypes.DWORD),
1175 ('hEvent', ctypes.wintypes.HANDLE),
1176 ]
1177
1178 kernel32 = ctypes.windll.kernel32
1179 LockFileEx = kernel32.LockFileEx
1180 LockFileEx.argtypes = [
1181 ctypes.wintypes.HANDLE, # hFile
1182 ctypes.wintypes.DWORD, # dwFlags
1183 ctypes.wintypes.DWORD, # dwReserved
1184 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1185 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1186 ctypes.POINTER(OVERLAPPED) # Overlapped
1187 ]
1188 LockFileEx.restype = ctypes.wintypes.BOOL
1189 UnlockFileEx = kernel32.UnlockFileEx
1190 UnlockFileEx.argtypes = [
1191 ctypes.wintypes.HANDLE, # hFile
1192 ctypes.wintypes.DWORD, # dwReserved
1193 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
1194 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
1195 ctypes.POINTER(OVERLAPPED) # Overlapped
1196 ]
1197 UnlockFileEx.restype = ctypes.wintypes.BOOL
1198 whole_low = 0xffffffff
1199 whole_high = 0x7fffffff
1200
1201 def _lock_file(f, exclusive):
1202 overlapped = OVERLAPPED()
1203 overlapped.Offset = 0
1204 overlapped.OffsetHigh = 0
1205 overlapped.hEvent = 0
1206 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
1207 handle = msvcrt.get_osfhandle(f.fileno())
1208 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
1209 whole_low, whole_high, f._lock_file_overlapped_p):
1210 raise OSError('Locking file failed: %r' % ctypes.FormatError())
1211
1212 def _unlock_file(f):
1213 assert f._lock_file_overlapped_p
1214 handle = msvcrt.get_osfhandle(f.fileno())
1215 if not UnlockFileEx(handle, 0,
1216 whole_low, whole_high, f._lock_file_overlapped_p):
1217 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
1218
1219else:
1220 import fcntl
1221
1222 def _lock_file(f, exclusive):
2582bebe 1223 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
c1c9a79c
PH
1224
1225 def _unlock_file(f):
2582bebe 1226 fcntl.flock(f, fcntl.LOCK_UN)
c1c9a79c
PH
1227
1228
1229class locked_file(object):
1230 def __init__(self, filename, mode, encoding=None):
1231 assert mode in ['r', 'a', 'w']
1232 self.f = io.open(filename, mode, encoding=encoding)
1233 self.mode = mode
1234
1235 def __enter__(self):
1236 exclusive = self.mode != 'r'
1237 try:
1238 _lock_file(self.f, exclusive)
1239 except IOError:
1240 self.f.close()
1241 raise
1242 return self
1243
1244 def __exit__(self, etype, value, traceback):
1245 try:
1246 _unlock_file(self.f)
1247 finally:
1248 self.f.close()
1249
1250 def __iter__(self):
1251 return iter(self.f)
1252
1253 def write(self, *args):
1254 return self.f.write(*args)
1255
1256 def read(self, *args):
1257 return self.f.read(*args)
4eb7f1d1
JMF
1258
1259
4644ac55
S
1260def get_filesystem_encoding():
1261 encoding = sys.getfilesystemencoding()
1262 return encoding if encoding is not None else 'utf-8'
1263
1264
4eb7f1d1 1265def shell_quote(args):
a6a173c2 1266 quoted_args = []
4644ac55 1267 encoding = get_filesystem_encoding()
a6a173c2
JMF
1268 for a in args:
1269 if isinstance(a, bytes):
1270 # We may get a filename encoded with 'encodeFilename'
1271 a = a.decode(encoding)
1272 quoted_args.append(pipes.quote(a))
28e614de 1273 return ' '.join(quoted_args)
9d4660ca
PH
1274
1275
1276def smuggle_url(url, data):
1277 """ Pass additional data in a URL for internal use. """
1278
1279 sdata = compat_urllib_parse.urlencode(
28e614de
PH
1280 {'__youtubedl_smuggle': json.dumps(data)})
1281 return url + '#' + sdata
9d4660ca
PH
1282
1283
79f82953 1284def unsmuggle_url(smug_url, default=None):
83e865a3 1285 if '#__youtubedl_smuggle' not in smug_url:
79f82953 1286 return smug_url, default
28e614de
PH
1287 url, _, sdata = smug_url.rpartition('#')
1288 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca
PH
1289 data = json.loads(jsond)
1290 return url, data
02dbf93f
PH
1291
1292
02dbf93f
PH
1293def format_bytes(bytes):
1294 if bytes is None:
28e614de 1295 return 'N/A'
02dbf93f
PH
1296 if type(bytes) is str:
1297 bytes = float(bytes)
1298 if bytes == 0.0:
1299 exponent = 0
1300 else:
1301 exponent = int(math.log(bytes, 1024.0))
28e614de 1302 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
02dbf93f 1303 converted = float(bytes) / float(1024 ** exponent)
28e614de 1304 return '%.2f%s' % (converted, suffix)
f53c966a 1305
1c088fa8 1306
be64b5b0
PH
1307def parse_filesize(s):
1308 if s is None:
1309 return None
1310
dfb1b146 1311 # The lower-case forms are of course incorrect and unofficial,
be64b5b0
PH
1312 # but we support those too
1313 _UNIT_TABLE = {
1314 'B': 1,
1315 'b': 1,
1316 'KiB': 1024,
1317 'KB': 1000,
1318 'kB': 1024,
1319 'Kb': 1000,
1320 'MiB': 1024 ** 2,
1321 'MB': 1000 ** 2,
1322 'mB': 1024 ** 2,
1323 'Mb': 1000 ** 2,
1324 'GiB': 1024 ** 3,
1325 'GB': 1000 ** 3,
1326 'gB': 1024 ** 3,
1327 'Gb': 1000 ** 3,
1328 'TiB': 1024 ** 4,
1329 'TB': 1000 ** 4,
1330 'tB': 1024 ** 4,
1331 'Tb': 1000 ** 4,
1332 'PiB': 1024 ** 5,
1333 'PB': 1000 ** 5,
1334 'pB': 1024 ** 5,
1335 'Pb': 1000 ** 5,
1336 'EiB': 1024 ** 6,
1337 'EB': 1000 ** 6,
1338 'eB': 1024 ** 6,
1339 'Eb': 1000 ** 6,
1340 'ZiB': 1024 ** 7,
1341 'ZB': 1000 ** 7,
1342 'zB': 1024 ** 7,
1343 'Zb': 1000 ** 7,
1344 'YiB': 1024 ** 8,
1345 'YB': 1000 ** 8,
1346 'yB': 1024 ** 8,
1347 'Yb': 1000 ** 8,
1348 }
1349
1350 units_re = '|'.join(re.escape(u) for u in _UNIT_TABLE)
4349c07d
PH
1351 m = re.match(
1352 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)' % units_re, s)
be64b5b0
PH
1353 if not m:
1354 return None
1355
4349c07d
PH
1356 num_str = m.group('num').replace(',', '.')
1357 mult = _UNIT_TABLE[m.group('unit')]
1358 return int(float(num_str) * mult)
be64b5b0
PH
1359
1360
caefb1de
PH
1361def month_by_name(name):
1362 """ Return the number of a month by (locale-independently) English name """
1363
caefb1de 1364 try:
7105440c
YCH
1365 return ENGLISH_MONTH_NAMES.index(name) + 1
1366 except ValueError:
1367 return None
1368
1369
1370def month_by_abbreviation(abbrev):
1371 """ Return the number of a month by (locale-independently) English
1372 abbreviations """
1373
1374 try:
1375 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de
PH
1376 except ValueError:
1377 return None
18258362
JMF
1378
1379
5aafe895 1380def fix_xml_ampersands(xml_str):
18258362 1381 """Replace all the '&' by '&amp;' in XML"""
5aafe895
PH
1382 return re.sub(
1383 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
28e614de 1384 '&amp;',
5aafe895 1385 xml_str)
e3946f98
PH
1386
1387
1388def setproctitle(title):
8bf48f23 1389 assert isinstance(title, compat_str)
e3946f98 1390 try:
611c1dd9 1391 libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98
PH
1392 except OSError:
1393 return
6eefe533
PH
1394 title_bytes = title.encode('utf-8')
1395 buf = ctypes.create_string_buffer(len(title_bytes))
1396 buf.value = title_bytes
e3946f98 1397 try:
6eefe533 1398 libc.prctl(15, buf, 0, 0, 0)
e3946f98
PH
1399 except AttributeError:
1400 return # Strange libc, just skip this
d7dda168
PH
1401
1402
1403def remove_start(s, start):
1404 if s.startswith(start):
1405 return s[len(start):]
1406 return s
29eb5174
PH
1407
1408
2b9faf55
PH
1409def remove_end(s, end):
1410 if s.endswith(end):
1411 return s[:-len(end)]
1412 return s
1413
1414
31b2051e
S
1415def remove_quotes(s):
1416 if s is None or len(s) < 2:
1417 return s
1418 for quote in ('"', "'", ):
1419 if s[0] == quote and s[-1] == quote:
1420 return s[1:-1]
1421 return s
1422
1423
29eb5174 1424def url_basename(url):
9b8aaeed 1425 path = compat_urlparse.urlparse(url).path
28e614de 1426 return path.strip('/').split('/')[-1]
aa94a6d3
PH
1427
1428
1429class HEADRequest(compat_urllib_request.Request):
1430 def get_method(self):
611c1dd9 1431 return 'HEAD'
7217e148
PH
1432
1433
9732d77e 1434def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
28746fbd
PH
1435 if get_attr:
1436 if v is not None:
1437 v = getattr(v, get_attr, None)
9572013d
PH
1438 if v == '':
1439 v = None
1812afb7
S
1440 if v is None:
1441 return default
1442 try:
1443 return int(v) * invscale // scale
1444 except ValueError:
af98f8ff 1445 return default
9732d77e 1446
9572013d 1447
40a90862
JMF
1448def str_or_none(v, default=None):
1449 return default if v is None else compat_str(v)
1450
9732d77e
PH
1451
1452def str_to_int(int_str):
48d4681e 1453 """ A more relaxed version of int_or_none """
9732d77e
PH
1454 if int_str is None:
1455 return None
28e614de 1456 int_str = re.sub(r'[,\.\+]', '', int_str)
9732d77e 1457 return int(int_str)
608d11f5
PH
1458
1459
9732d77e 1460def float_or_none(v, scale=1, invscale=1, default=None):
caf80631
S
1461 if v is None:
1462 return default
1463 try:
1464 return float(v) * invscale / scale
1465 except ValueError:
1466 return default
43f775e4
PH
1467
1468
608d11f5 1469def parse_duration(s):
8f9312c3 1470 if not isinstance(s, compat_basestring):
608d11f5
PH
1471 return None
1472
ca7b3246
S
1473 s = s.strip()
1474
608d11f5 1475 m = re.match(
9d22a7df 1476 r'''(?ix)(?:P?T)?
e8df5cee 1477 (?:
9c29bc69 1478 (?P<only_mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*|
e8df5cee
PH
1479 (?P<only_hours>[0-9.]+)\s*(?:hours?)|
1480
9c29bc69 1481 \s*(?P<hours_reversed>[0-9]+)\s*(?:[:h]|hours?)\s*(?P<mins_reversed>[0-9]+)\s*(?:[:m]|mins?\.?|minutes?)\s*|
6a68bb57 1482 (?:
8f4b58d7
PH
1483 (?:
1484 (?:(?P<days>[0-9]+)\s*(?:[:d]|days?)\s*)?
1485 (?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*
1486 )?
6a68bb57
PH
1487 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
1488 )?
e8df5cee
PH
1489 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
1490 )$''', s)
608d11f5
PH
1491 if not m:
1492 return None
e8df5cee
PH
1493 res = 0
1494 if m.group('only_mins'):
1495 return float_or_none(m.group('only_mins'), invscale=60)
1496 if m.group('only_hours'):
1497 return float_or_none(m.group('only_hours'), invscale=60 * 60)
1498 if m.group('secs'):
1499 res += int(m.group('secs'))
3e675fab
PH
1500 if m.group('mins_reversed'):
1501 res += int(m.group('mins_reversed')) * 60
608d11f5
PH
1502 if m.group('mins'):
1503 res += int(m.group('mins')) * 60
e8df5cee
PH
1504 if m.group('hours'):
1505 res += int(m.group('hours')) * 60 * 60
3e675fab
PH
1506 if m.group('hours_reversed'):
1507 res += int(m.group('hours_reversed')) * 60 * 60
8f4b58d7
PH
1508 if m.group('days'):
1509 res += int(m.group('days')) * 24 * 60 * 60
7adcbe75
PH
1510 if m.group('ms'):
1511 res += float(m.group('ms'))
608d11f5 1512 return res
91d7d0b3
JMF
1513
1514
e65e4c88 1515def prepend_extension(filename, ext, expected_real_ext=None):
5f6a1245 1516 name, real_ext = os.path.splitext(filename)
e65e4c88
S
1517 return (
1518 '{0}.{1}{2}'.format(name, ext, real_ext)
1519 if not expected_real_ext or real_ext[1:] == expected_real_ext
1520 else '{0}.{1}'.format(filename, ext))
d70ad093
PH
1521
1522
b3ed15b7
S
1523def replace_extension(filename, ext, expected_real_ext=None):
1524 name, real_ext = os.path.splitext(filename)
1525 return '{0}.{1}'.format(
1526 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
1527 ext)
1528
1529
d70ad093
PH
1530def check_executable(exe, args=[]):
1531 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
1532 args can be a list of arguments for a short output (like -version) """
1533 try:
1534 subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
1535 except OSError:
1536 return False
1537 return exe
b7ab0590
PH
1538
1539
95807118 1540def get_exe_version(exe, args=['--version'],
cae97f65 1541 version_re=None, unrecognized='present'):
95807118
PH
1542 """ Returns the version of the specified executable,
1543 or False if the executable is not present """
1544 try:
cae97f65 1545 out, _ = subprocess.Popen(
54116803 1546 [encodeArgument(exe)] + args,
95807118
PH
1547 stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
1548 except OSError:
1549 return False
cae97f65
PH
1550 if isinstance(out, bytes): # Python 2.x
1551 out = out.decode('ascii', 'ignore')
1552 return detect_exe_version(out, version_re, unrecognized)
1553
1554
1555def detect_exe_version(output, version_re=None, unrecognized='present'):
1556 assert isinstance(output, compat_str)
1557 if version_re is None:
1558 version_re = r'version\s+([-0-9._a-zA-Z]+)'
1559 m = re.search(version_re, output)
95807118
PH
1560 if m:
1561 return m.group(1)
1562 else:
1563 return unrecognized
1564
1565
b7ab0590 1566class PagedList(object):
dd26ced1
PH
1567 def __len__(self):
1568 # This is only useful for tests
1569 return len(self.getslice())
1570
9c44d242
PH
1571
1572class OnDemandPagedList(PagedList):
b95dc034 1573 def __init__(self, pagefunc, pagesize, use_cache=False):
9c44d242
PH
1574 self._pagefunc = pagefunc
1575 self._pagesize = pagesize
b95dc034
YCH
1576 self._use_cache = use_cache
1577 if use_cache:
1578 self._cache = {}
9c44d242 1579
b7ab0590
PH
1580 def getslice(self, start=0, end=None):
1581 res = []
1582 for pagenum in itertools.count(start // self._pagesize):
1583 firstid = pagenum * self._pagesize
1584 nextfirstid = pagenum * self._pagesize + self._pagesize
1585 if start >= nextfirstid:
1586 continue
1587
b95dc034
YCH
1588 page_results = None
1589 if self._use_cache:
1590 page_results = self._cache.get(pagenum)
1591 if page_results is None:
1592 page_results = list(self._pagefunc(pagenum))
1593 if self._use_cache:
1594 self._cache[pagenum] = page_results
b7ab0590
PH
1595
1596 startv = (
1597 start % self._pagesize
1598 if firstid <= start < nextfirstid
1599 else 0)
1600
1601 endv = (
1602 ((end - 1) % self._pagesize) + 1
1603 if (end is not None and firstid <= end <= nextfirstid)
1604 else None)
1605
1606 if startv != 0 or endv is not None:
1607 page_results = page_results[startv:endv]
1608 res.extend(page_results)
1609
1610 # A little optimization - if current page is not "full", ie. does
1611 # not contain page_size videos then we can assume that this page
1612 # is the last one - there are no more ids on further pages -
1613 # i.e. no need to query again.
1614 if len(page_results) + startv < self._pagesize:
1615 break
1616
1617 # If we got the whole page, but the next page is not interesting,
1618 # break out early as well
1619 if end == nextfirstid:
1620 break
1621 return res
81c2f20b
PH
1622
1623
9c44d242
PH
1624class InAdvancePagedList(PagedList):
1625 def __init__(self, pagefunc, pagecount, pagesize):
1626 self._pagefunc = pagefunc
1627 self._pagecount = pagecount
1628 self._pagesize = pagesize
1629
1630 def getslice(self, start=0, end=None):
1631 res = []
1632 start_page = start // self._pagesize
1633 end_page = (
1634 self._pagecount if end is None else (end // self._pagesize + 1))
1635 skip_elems = start - start_page * self._pagesize
1636 only_more = None if end is None else end - start
1637 for pagenum in range(start_page, end_page):
1638 page = list(self._pagefunc(pagenum))
1639 if skip_elems:
1640 page = page[skip_elems:]
1641 skip_elems = None
1642 if only_more is not None:
1643 if len(page) < only_more:
1644 only_more -= len(page)
1645 else:
1646 page = page[:only_more]
1647 res.extend(page)
1648 break
1649 res.extend(page)
1650 return res
1651
1652
81c2f20b 1653def uppercase_escape(s):
676eb3f2 1654 unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b 1655 return re.sub(
a612753d 1656 r'\\U[0-9a-fA-F]{8}',
676eb3f2
PH
1657 lambda m: unicode_escape(m.group(0))[0],
1658 s)
0fe2ff78
YCH
1659
1660
1661def lowercase_escape(s):
1662 unicode_escape = codecs.getdecoder('unicode_escape')
1663 return re.sub(
1664 r'\\u[0-9a-fA-F]{4}',
1665 lambda m: unicode_escape(m.group(0))[0],
1666 s)
b53466e1 1667
d05cfe06
S
1668
1669def escape_rfc3986(s):
1670 """Escape non-ASCII characters as suggested by RFC 3986"""
8f9312c3 1671 if sys.version_info < (3, 0) and isinstance(s, compat_str):
d05cfe06 1672 s = s.encode('utf-8')
ecc0c5ee 1673 return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
d05cfe06
S
1674
1675
1676def escape_url(url):
1677 """Escape URL as suggested by RFC 3986"""
1678 url_parsed = compat_urllib_parse_urlparse(url)
1679 return url_parsed._replace(
1680 path=escape_rfc3986(url_parsed.path),
1681 params=escape_rfc3986(url_parsed.params),
1682 query=escape_rfc3986(url_parsed.query),
1683 fragment=escape_rfc3986(url_parsed.fragment)
1684 ).geturl()
1685
b53466e1 1686try:
28e614de 1687 struct.pack('!I', 0)
b53466e1
PH
1688except TypeError:
1689 # In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
1690 def struct_pack(spec, *args):
1691 if isinstance(spec, compat_str):
1692 spec = spec.encode('ascii')
1693 return struct.pack(spec, *args)
1694
1695 def struct_unpack(spec, *args):
1696 if isinstance(spec, compat_str):
1697 spec = spec.encode('ascii')
1698 return struct.unpack(spec, *args)
1699else:
1700 struct_pack = struct.pack
1701 struct_unpack = struct.unpack
62e609ab
PH
1702
1703
1704def read_batch_urls(batch_fd):
1705 def fixup(url):
1706 if not isinstance(url, compat_str):
1707 url = url.decode('utf-8', 'replace')
28e614de 1708 BOM_UTF8 = '\xef\xbb\xbf'
62e609ab
PH
1709 if url.startswith(BOM_UTF8):
1710 url = url[len(BOM_UTF8):]
1711 url = url.strip()
1712 if url.startswith(('#', ';', ']')):
1713 return False
1714 return url
1715
1716 with contextlib.closing(batch_fd) as fd:
1717 return [url for url in map(fixup, fd) if url]
b74fa8cd
JMF
1718
1719
1720def urlencode_postdata(*args, **kargs):
1721 return compat_urllib_parse.urlencode(*args, **kargs).encode('ascii')
bcf89ce6
PH
1722
1723
16392824 1724def encode_dict(d, encoding='utf-8'):
7e1f5447
S
1725 def encode(v):
1726 return v.encode(encoding) if isinstance(v, compat_basestring) else v
1727 return dict((encode(k), encode(v)) for k, v in d.items())
16392824 1728
8e60dc75 1729
86296ad2 1730def dict_get(d, key_or_keys, default=None, skip_false_values=True):
cbecc9b9
S
1731 if isinstance(key_or_keys, (list, tuple)):
1732 for key in key_or_keys:
86296ad2
S
1733 if key not in d or d[key] is None or skip_false_values and not d[key]:
1734 continue
1735 return d[key]
cbecc9b9
S
1736 return default
1737 return d.get(key_or_keys, default)
1738
1739
8e60dc75
S
1740def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
1741 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
1742
16392824 1743
a1a530b0
PH
1744US_RATINGS = {
1745 'G': 0,
1746 'PG': 10,
1747 'PG-13': 13,
1748 'R': 16,
1749 'NC': 18,
1750}
fac55558
PH
1751
1752
146c80e2
S
1753def parse_age_limit(s):
1754 if s is None:
d838b1bd 1755 return None
146c80e2 1756 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
d800609c 1757 return int(m.group('age')) if m else US_RATINGS.get(s)
146c80e2
S
1758
1759
fac55558 1760def strip_jsonp(code):
609a61e3 1761 return re.sub(
8411229b 1762 r'(?s)^[a-zA-Z0-9_.]+\s*\(\s*(.*)\);?\s*?(?://[^\n]*)*$', r'\1', code)
478c2c61
PH
1763
1764
e05f6939
PH
1765def js_to_json(code):
1766 def fix_kv(m):
e7b6d122
PH
1767 v = m.group(0)
1768 if v in ('true', 'false', 'null'):
1769 return v
1770 if v.startswith('"'):
d01949dc
S
1771 v = re.sub(r"\\'", "'", v[1:-1])
1772 elif v.startswith("'"):
e7b6d122
PH
1773 v = v[1:-1]
1774 v = re.sub(r"\\\\|\\'|\"", lambda m: {
1775 '\\\\': '\\\\',
1776 "\\'": "'",
1777 '"': '\\"',
1778 }[m.group(0)], v)
1779 return '"%s"' % v
e05f6939
PH
1780
1781 res = re.sub(r'''(?x)
d305dd73
PH
1782 "(?:[^"\\]*(?:\\\\|\\['"nu]))*[^"\\]*"|
1783 '(?:[^'\\]*(?:\\\\|\\['"nu]))*[^'\\]*'|
8f4b58d7 1784 [a-zA-Z_][.a-zA-Z_0-9]*
e05f6939 1785 ''', fix_kv, code)
ba9e68f4 1786 res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res)
e05f6939
PH
1787 return res
1788
1789
478c2c61
PH
1790def qualities(quality_ids):
1791 """ Get a numeric quality value out of a list of possible values """
1792 def q(qid):
1793 try:
1794 return quality_ids.index(qid)
1795 except ValueError:
1796 return -1
1797 return q
1798
acd69589
PH
1799
1800DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
0a871f68 1801
a020a0dc
PH
1802
1803def limit_length(s, length):
1804 """ Add ellipses to overly long strings """
1805 if s is None:
1806 return None
1807 ELLIPSES = '...'
1808 if len(s) > length:
1809 return s[:length - len(ELLIPSES)] + ELLIPSES
1810 return s
48844745
PH
1811
1812
1813def version_tuple(v):
5f9b8394 1814 return tuple(int(e) for e in re.split(r'[-.]', v))
48844745
PH
1815
1816
1817def is_outdated_version(version, limit, assume_new=True):
1818 if not version:
1819 return not assume_new
1820 try:
1821 return version_tuple(version) < version_tuple(limit)
1822 except ValueError:
1823 return not assume_new
732ea2f0
PH
1824
1825
1826def ytdl_is_updateable():
1827 """ Returns if youtube-dl can be updated with -U """
1828 from zipimport import zipimporter
1829
1830 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
7d4111ed
PH
1831
1832
1833def args_to_str(args):
1834 # Get a short string representation for a subprocess command
1835 return ' '.join(shlex_quote(a) for a in args)
2ccd1b10
PH
1836
1837
9b9c5355 1838def error_to_compat_str(err):
fdae2358
S
1839 err_str = str(err)
1840 # On python 2 error byte string must be decoded with proper
1841 # encoding rather than ascii
1842 if sys.version_info[0] < 3:
1843 err_str = err_str.decode(preferredencoding())
1844 return err_str
1845
1846
c460bdd5 1847def mimetype2ext(mt):
765ac263
JMF
1848 ext = {
1849 'audio/mp4': 'm4a',
1850 }.get(mt)
1851 if ext is not None:
1852 return ext
1853
c460bdd5
PH
1854 _, _, res = mt.rpartition('/')
1855
1856 return {
f6861ec9 1857 '3gpp': '3gp',
cafcf657 1858 'smptett+xml': 'tt',
1859 'srt': 'srt',
1860 'ttaf+xml': 'dfxp',
a0d8d704 1861 'ttml+xml': 'ttml',
cafcf657 1862 'vtt': 'vtt',
f6861ec9 1863 'x-flv': 'flv',
a0d8d704
YCH
1864 'x-mp4-fragmented': 'mp4',
1865 'x-ms-wmv': 'wmv',
c460bdd5
PH
1866 }.get(res, res)
1867
1868
2ccd1b10
PH
1869def urlhandle_detect_ext(url_handle):
1870 try:
1871 url_handle.headers
1872 getheader = lambda h: url_handle.headers[h]
1873 except AttributeError: # Python < 3
1874 getheader = url_handle.info().getheader
1875
b55ee18f
PH
1876 cd = getheader('Content-Disposition')
1877 if cd:
1878 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
1879 if m:
1880 e = determine_ext(m.group('filename'), default_ext=None)
1881 if e:
1882 return e
1883
c460bdd5 1884 return mimetype2ext(getheader('Content-Type'))
05900629
PH
1885
1886
1e399778
YCH
1887def encode_data_uri(data, mime_type):
1888 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
1889
1890
05900629 1891def age_restricted(content_limit, age_limit):
6ec6cb4e 1892 """ Returns True iff the content should be blocked """
05900629
PH
1893
1894 if age_limit is None: # No limit set
1895 return False
1896 if content_limit is None:
1897 return False # Content available for everyone
1898 return age_limit < content_limit
61ca9a80
PH
1899
1900
1901def is_html(first_bytes):
1902 """ Detect whether a file contains HTML by examining its first bytes. """
1903
1904 BOMS = [
1905 (b'\xef\xbb\xbf', 'utf-8'),
1906 (b'\x00\x00\xfe\xff', 'utf-32-be'),
1907 (b'\xff\xfe\x00\x00', 'utf-32-le'),
1908 (b'\xff\xfe', 'utf-16-le'),
1909 (b'\xfe\xff', 'utf-16-be'),
1910 ]
1911 for bom, enc in BOMS:
1912 if first_bytes.startswith(bom):
1913 s = first_bytes[len(bom):].decode(enc, 'replace')
1914 break
1915 else:
1916 s = first_bytes.decode('utf-8', 'replace')
1917
1918 return re.match(r'^\s*<', s)
a055469f
PH
1919
1920
1921def determine_protocol(info_dict):
1922 protocol = info_dict.get('protocol')
1923 if protocol is not None:
1924 return protocol
1925
1926 url = info_dict['url']
1927 if url.startswith('rtmp'):
1928 return 'rtmp'
1929 elif url.startswith('mms'):
1930 return 'mms'
1931 elif url.startswith('rtsp'):
1932 return 'rtsp'
1933
1934 ext = determine_ext(url)
1935 if ext == 'm3u8':
1936 return 'm3u8'
1937 elif ext == 'f4m':
1938 return 'f4m'
1939
1940 return compat_urllib_parse_urlparse(url).scheme
cfb56d1a
PH
1941
1942
1943def render_table(header_row, data):
1944 """ Render a list of rows, each as a list of values """
1945 table = [header_row] + data
1946 max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
1947 format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
1948 return '\n'.join(format_str % tuple(row) for row in table)
347de493
PH
1949
1950
1951def _match_one(filter_part, dct):
1952 COMPARISON_OPERATORS = {
1953 '<': operator.lt,
1954 '<=': operator.le,
1955 '>': operator.gt,
1956 '>=': operator.ge,
1957 '=': operator.eq,
1958 '!=': operator.ne,
1959 }
1960 operator_rex = re.compile(r'''(?x)\s*
1961 (?P<key>[a-z_]+)
1962 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1963 (?:
1964 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
1965 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
1966 )
1967 \s*$
1968 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
1969 m = operator_rex.search(filter_part)
1970 if m:
1971 op = COMPARISON_OPERATORS[m.group('op')]
1972 if m.group('strval') is not None:
1973 if m.group('op') not in ('=', '!='):
1974 raise ValueError(
1975 'Operator %s does not support string values!' % m.group('op'))
1976 comparison_value = m.group('strval')
1977 else:
1978 try:
1979 comparison_value = int(m.group('intval'))
1980 except ValueError:
1981 comparison_value = parse_filesize(m.group('intval'))
1982 if comparison_value is None:
1983 comparison_value = parse_filesize(m.group('intval') + 'B')
1984 if comparison_value is None:
1985 raise ValueError(
1986 'Invalid integer value %r in filter part %r' % (
1987 m.group('intval'), filter_part))
1988 actual_value = dct.get(m.group('key'))
1989 if actual_value is None:
1990 return m.group('none_inclusive')
1991 return op(actual_value, comparison_value)
1992
1993 UNARY_OPERATORS = {
1994 '': lambda v: v is not None,
1995 '!': lambda v: v is None,
1996 }
1997 operator_rex = re.compile(r'''(?x)\s*
1998 (?P<op>%s)\s*(?P<key>[a-z_]+)
1999 \s*$
2000 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
2001 m = operator_rex.search(filter_part)
2002 if m:
2003 op = UNARY_OPERATORS[m.group('op')]
2004 actual_value = dct.get(m.group('key'))
2005 return op(actual_value)
2006
2007 raise ValueError('Invalid filter part %r' % filter_part)
2008
2009
2010def match_str(filter_str, dct):
2011 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
2012
2013 return all(
2014 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
2015
2016
2017def match_filter_func(filter_str):
2018 def _match_func(info_dict):
2019 if match_str(filter_str, info_dict):
2020 return None
2021 else:
2022 video_title = info_dict.get('title', info_dict.get('id', 'video'))
2023 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
2024 return _match_func
91410c9b
PH
2025
2026
bf6427d2
YCH
2027def parse_dfxp_time_expr(time_expr):
2028 if not time_expr:
d631d5f9 2029 return
bf6427d2
YCH
2030
2031 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
2032 if mobj:
2033 return float(mobj.group('time_offset'))
2034
db2fe38b 2035 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
bf6427d2 2036 if mobj:
db2fe38b 2037 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2
YCH
2038
2039
c1c924ab
YCH
2040def srt_subtitles_timecode(seconds):
2041 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
bf6427d2
YCH
2042
2043
2044def dfxp2srt(dfxp_data):
4e335771
YCH
2045 _x = functools.partial(xpath_with_ns, ns_map={
2046 'ttml': 'http://www.w3.org/ns/ttml',
2047 'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
2048 })
bf6427d2 2049
87de7069 2050 class TTMLPElementParser(object):
2b14cb56 2051 out = ''
bf6427d2 2052
2b14cb56 2053 def start(self, tag, attrib):
2054 if tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
2055 self.out += '\n'
bf6427d2 2056
2b14cb56 2057 def end(self, tag):
2058 pass
bf6427d2 2059
2b14cb56 2060 def data(self, data):
2061 self.out += data
2062
2063 def close(self):
2064 return self.out.strip()
2065
2066 def parse_node(node):
2067 target = TTMLPElementParser()
2068 parser = xml.etree.ElementTree.XMLParser(target=target)
2069 parser.feed(xml.etree.ElementTree.tostring(node))
2070 return parser.close()
bf6427d2 2071
36e6f62c 2072 dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
bf6427d2 2073 out = []
4e335771 2074 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p')
1b0427e6
YCH
2075
2076 if not paras:
2077 raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2
YCH
2078
2079 for para, index in zip(paras, itertools.count(1)):
d631d5f9 2080 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363 2081 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9
YCH
2082 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
2083 if begin_time is None:
2084 continue
7dff0363 2085 if not end_time:
d631d5f9
YCH
2086 if not dur:
2087 continue
2088 end_time = begin_time + dur
bf6427d2
YCH
2089 out.append('%d\n%s --> %s\n%s\n\n' % (
2090 index,
c1c924ab
YCH
2091 srt_subtitles_timecode(begin_time),
2092 srt_subtitles_timecode(end_time),
bf6427d2
YCH
2093 parse_node(para)))
2094
2095 return ''.join(out)
2096
2097
66e289ba
S
2098def cli_option(params, command_option, param):
2099 param = params.get(param)
2100 return [command_option, param] if param is not None else []
2101
2102
2103def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
2104 param = params.get(param)
2105 assert isinstance(param, bool)
2106 if separator:
2107 return [command_option + separator + (true_value if param else false_value)]
2108 return [command_option, true_value if param else false_value]
2109
2110
2111def cli_valueless_option(params, command_option, param, expected_value=True):
2112 param = params.get(param)
2113 return [command_option] if param == expected_value else []
2114
2115
2116def cli_configuration_args(params, param, default=[]):
2117 ex_args = params.get(param)
2118 if ex_args is None:
2119 return default
2120 assert isinstance(ex_args, list)
2121 return ex_args
2122
2123
39672624
YCH
2124class ISO639Utils(object):
2125 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
2126 _lang_map = {
2127 'aa': 'aar',
2128 'ab': 'abk',
2129 'ae': 'ave',
2130 'af': 'afr',
2131 'ak': 'aka',
2132 'am': 'amh',
2133 'an': 'arg',
2134 'ar': 'ara',
2135 'as': 'asm',
2136 'av': 'ava',
2137 'ay': 'aym',
2138 'az': 'aze',
2139 'ba': 'bak',
2140 'be': 'bel',
2141 'bg': 'bul',
2142 'bh': 'bih',
2143 'bi': 'bis',
2144 'bm': 'bam',
2145 'bn': 'ben',
2146 'bo': 'bod',
2147 'br': 'bre',
2148 'bs': 'bos',
2149 'ca': 'cat',
2150 'ce': 'che',
2151 'ch': 'cha',
2152 'co': 'cos',
2153 'cr': 'cre',
2154 'cs': 'ces',
2155 'cu': 'chu',
2156 'cv': 'chv',
2157 'cy': 'cym',
2158 'da': 'dan',
2159 'de': 'deu',
2160 'dv': 'div',
2161 'dz': 'dzo',
2162 'ee': 'ewe',
2163 'el': 'ell',
2164 'en': 'eng',
2165 'eo': 'epo',
2166 'es': 'spa',
2167 'et': 'est',
2168 'eu': 'eus',
2169 'fa': 'fas',
2170 'ff': 'ful',
2171 'fi': 'fin',
2172 'fj': 'fij',
2173 'fo': 'fao',
2174 'fr': 'fra',
2175 'fy': 'fry',
2176 'ga': 'gle',
2177 'gd': 'gla',
2178 'gl': 'glg',
2179 'gn': 'grn',
2180 'gu': 'guj',
2181 'gv': 'glv',
2182 'ha': 'hau',
2183 'he': 'heb',
2184 'hi': 'hin',
2185 'ho': 'hmo',
2186 'hr': 'hrv',
2187 'ht': 'hat',
2188 'hu': 'hun',
2189 'hy': 'hye',
2190 'hz': 'her',
2191 'ia': 'ina',
2192 'id': 'ind',
2193 'ie': 'ile',
2194 'ig': 'ibo',
2195 'ii': 'iii',
2196 'ik': 'ipk',
2197 'io': 'ido',
2198 'is': 'isl',
2199 'it': 'ita',
2200 'iu': 'iku',
2201 'ja': 'jpn',
2202 'jv': 'jav',
2203 'ka': 'kat',
2204 'kg': 'kon',
2205 'ki': 'kik',
2206 'kj': 'kua',
2207 'kk': 'kaz',
2208 'kl': 'kal',
2209 'km': 'khm',
2210 'kn': 'kan',
2211 'ko': 'kor',
2212 'kr': 'kau',
2213 'ks': 'kas',
2214 'ku': 'kur',
2215 'kv': 'kom',
2216 'kw': 'cor',
2217 'ky': 'kir',
2218 'la': 'lat',
2219 'lb': 'ltz',
2220 'lg': 'lug',
2221 'li': 'lim',
2222 'ln': 'lin',
2223 'lo': 'lao',
2224 'lt': 'lit',
2225 'lu': 'lub',
2226 'lv': 'lav',
2227 'mg': 'mlg',
2228 'mh': 'mah',
2229 'mi': 'mri',
2230 'mk': 'mkd',
2231 'ml': 'mal',
2232 'mn': 'mon',
2233 'mr': 'mar',
2234 'ms': 'msa',
2235 'mt': 'mlt',
2236 'my': 'mya',
2237 'na': 'nau',
2238 'nb': 'nob',
2239 'nd': 'nde',
2240 'ne': 'nep',
2241 'ng': 'ndo',
2242 'nl': 'nld',
2243 'nn': 'nno',
2244 'no': 'nor',
2245 'nr': 'nbl',
2246 'nv': 'nav',
2247 'ny': 'nya',
2248 'oc': 'oci',
2249 'oj': 'oji',
2250 'om': 'orm',
2251 'or': 'ori',
2252 'os': 'oss',
2253 'pa': 'pan',
2254 'pi': 'pli',
2255 'pl': 'pol',
2256 'ps': 'pus',
2257 'pt': 'por',
2258 'qu': 'que',
2259 'rm': 'roh',
2260 'rn': 'run',
2261 'ro': 'ron',
2262 'ru': 'rus',
2263 'rw': 'kin',
2264 'sa': 'san',
2265 'sc': 'srd',
2266 'sd': 'snd',
2267 'se': 'sme',
2268 'sg': 'sag',
2269 'si': 'sin',
2270 'sk': 'slk',
2271 'sl': 'slv',
2272 'sm': 'smo',
2273 'sn': 'sna',
2274 'so': 'som',
2275 'sq': 'sqi',
2276 'sr': 'srp',
2277 'ss': 'ssw',
2278 'st': 'sot',
2279 'su': 'sun',
2280 'sv': 'swe',
2281 'sw': 'swa',
2282 'ta': 'tam',
2283 'te': 'tel',
2284 'tg': 'tgk',
2285 'th': 'tha',
2286 'ti': 'tir',
2287 'tk': 'tuk',
2288 'tl': 'tgl',
2289 'tn': 'tsn',
2290 'to': 'ton',
2291 'tr': 'tur',
2292 'ts': 'tso',
2293 'tt': 'tat',
2294 'tw': 'twi',
2295 'ty': 'tah',
2296 'ug': 'uig',
2297 'uk': 'ukr',
2298 'ur': 'urd',
2299 'uz': 'uzb',
2300 've': 'ven',
2301 'vi': 'vie',
2302 'vo': 'vol',
2303 'wa': 'wln',
2304 'wo': 'wol',
2305 'xh': 'xho',
2306 'yi': 'yid',
2307 'yo': 'yor',
2308 'za': 'zha',
2309 'zh': 'zho',
2310 'zu': 'zul',
2311 }
2312
2313 @classmethod
2314 def short2long(cls, code):
2315 """Convert language code from ISO 639-1 to ISO 639-2/T"""
2316 return cls._lang_map.get(code[:2])
2317
2318 @classmethod
2319 def long2short(cls, code):
2320 """Convert language code from ISO 639-2/T to ISO 639-1"""
2321 for short_name, long_name in cls._lang_map.items():
2322 if long_name == code:
2323 return short_name
2324
2325
4eb10f66
YCH
2326class ISO3166Utils(object):
2327 # From http://data.okfn.org/data/core/country-list
2328 _country_map = {
2329 'AF': 'Afghanistan',
2330 'AX': 'Åland Islands',
2331 'AL': 'Albania',
2332 'DZ': 'Algeria',
2333 'AS': 'American Samoa',
2334 'AD': 'Andorra',
2335 'AO': 'Angola',
2336 'AI': 'Anguilla',
2337 'AQ': 'Antarctica',
2338 'AG': 'Antigua and Barbuda',
2339 'AR': 'Argentina',
2340 'AM': 'Armenia',
2341 'AW': 'Aruba',
2342 'AU': 'Australia',
2343 'AT': 'Austria',
2344 'AZ': 'Azerbaijan',
2345 'BS': 'Bahamas',
2346 'BH': 'Bahrain',
2347 'BD': 'Bangladesh',
2348 'BB': 'Barbados',
2349 'BY': 'Belarus',
2350 'BE': 'Belgium',
2351 'BZ': 'Belize',
2352 'BJ': 'Benin',
2353 'BM': 'Bermuda',
2354 'BT': 'Bhutan',
2355 'BO': 'Bolivia, Plurinational State of',
2356 'BQ': 'Bonaire, Sint Eustatius and Saba',
2357 'BA': 'Bosnia and Herzegovina',
2358 'BW': 'Botswana',
2359 'BV': 'Bouvet Island',
2360 'BR': 'Brazil',
2361 'IO': 'British Indian Ocean Territory',
2362 'BN': 'Brunei Darussalam',
2363 'BG': 'Bulgaria',
2364 'BF': 'Burkina Faso',
2365 'BI': 'Burundi',
2366 'KH': 'Cambodia',
2367 'CM': 'Cameroon',
2368 'CA': 'Canada',
2369 'CV': 'Cape Verde',
2370 'KY': 'Cayman Islands',
2371 'CF': 'Central African Republic',
2372 'TD': 'Chad',
2373 'CL': 'Chile',
2374 'CN': 'China',
2375 'CX': 'Christmas Island',
2376 'CC': 'Cocos (Keeling) Islands',
2377 'CO': 'Colombia',
2378 'KM': 'Comoros',
2379 'CG': 'Congo',
2380 'CD': 'Congo, the Democratic Republic of the',
2381 'CK': 'Cook Islands',
2382 'CR': 'Costa Rica',
2383 'CI': 'Côte d\'Ivoire',
2384 'HR': 'Croatia',
2385 'CU': 'Cuba',
2386 'CW': 'Curaçao',
2387 'CY': 'Cyprus',
2388 'CZ': 'Czech Republic',
2389 'DK': 'Denmark',
2390 'DJ': 'Djibouti',
2391 'DM': 'Dominica',
2392 'DO': 'Dominican Republic',
2393 'EC': 'Ecuador',
2394 'EG': 'Egypt',
2395 'SV': 'El Salvador',
2396 'GQ': 'Equatorial Guinea',
2397 'ER': 'Eritrea',
2398 'EE': 'Estonia',
2399 'ET': 'Ethiopia',
2400 'FK': 'Falkland Islands (Malvinas)',
2401 'FO': 'Faroe Islands',
2402 'FJ': 'Fiji',
2403 'FI': 'Finland',
2404 'FR': 'France',
2405 'GF': 'French Guiana',
2406 'PF': 'French Polynesia',
2407 'TF': 'French Southern Territories',
2408 'GA': 'Gabon',
2409 'GM': 'Gambia',
2410 'GE': 'Georgia',
2411 'DE': 'Germany',
2412 'GH': 'Ghana',
2413 'GI': 'Gibraltar',
2414 'GR': 'Greece',
2415 'GL': 'Greenland',
2416 'GD': 'Grenada',
2417 'GP': 'Guadeloupe',
2418 'GU': 'Guam',
2419 'GT': 'Guatemala',
2420 'GG': 'Guernsey',
2421 'GN': 'Guinea',
2422 'GW': 'Guinea-Bissau',
2423 'GY': 'Guyana',
2424 'HT': 'Haiti',
2425 'HM': 'Heard Island and McDonald Islands',
2426 'VA': 'Holy See (Vatican City State)',
2427 'HN': 'Honduras',
2428 'HK': 'Hong Kong',
2429 'HU': 'Hungary',
2430 'IS': 'Iceland',
2431 'IN': 'India',
2432 'ID': 'Indonesia',
2433 'IR': 'Iran, Islamic Republic of',
2434 'IQ': 'Iraq',
2435 'IE': 'Ireland',
2436 'IM': 'Isle of Man',
2437 'IL': 'Israel',
2438 'IT': 'Italy',
2439 'JM': 'Jamaica',
2440 'JP': 'Japan',
2441 'JE': 'Jersey',
2442 'JO': 'Jordan',
2443 'KZ': 'Kazakhstan',
2444 'KE': 'Kenya',
2445 'KI': 'Kiribati',
2446 'KP': 'Korea, Democratic People\'s Republic of',
2447 'KR': 'Korea, Republic of',
2448 'KW': 'Kuwait',
2449 'KG': 'Kyrgyzstan',
2450 'LA': 'Lao People\'s Democratic Republic',
2451 'LV': 'Latvia',
2452 'LB': 'Lebanon',
2453 'LS': 'Lesotho',
2454 'LR': 'Liberia',
2455 'LY': 'Libya',
2456 'LI': 'Liechtenstein',
2457 'LT': 'Lithuania',
2458 'LU': 'Luxembourg',
2459 'MO': 'Macao',
2460 'MK': 'Macedonia, the Former Yugoslav Republic of',
2461 'MG': 'Madagascar',
2462 'MW': 'Malawi',
2463 'MY': 'Malaysia',
2464 'MV': 'Maldives',
2465 'ML': 'Mali',
2466 'MT': 'Malta',
2467 'MH': 'Marshall Islands',
2468 'MQ': 'Martinique',
2469 'MR': 'Mauritania',
2470 'MU': 'Mauritius',
2471 'YT': 'Mayotte',
2472 'MX': 'Mexico',
2473 'FM': 'Micronesia, Federated States of',
2474 'MD': 'Moldova, Republic of',
2475 'MC': 'Monaco',
2476 'MN': 'Mongolia',
2477 'ME': 'Montenegro',
2478 'MS': 'Montserrat',
2479 'MA': 'Morocco',
2480 'MZ': 'Mozambique',
2481 'MM': 'Myanmar',
2482 'NA': 'Namibia',
2483 'NR': 'Nauru',
2484 'NP': 'Nepal',
2485 'NL': 'Netherlands',
2486 'NC': 'New Caledonia',
2487 'NZ': 'New Zealand',
2488 'NI': 'Nicaragua',
2489 'NE': 'Niger',
2490 'NG': 'Nigeria',
2491 'NU': 'Niue',
2492 'NF': 'Norfolk Island',
2493 'MP': 'Northern Mariana Islands',
2494 'NO': 'Norway',
2495 'OM': 'Oman',
2496 'PK': 'Pakistan',
2497 'PW': 'Palau',
2498 'PS': 'Palestine, State of',
2499 'PA': 'Panama',
2500 'PG': 'Papua New Guinea',
2501 'PY': 'Paraguay',
2502 'PE': 'Peru',
2503 'PH': 'Philippines',
2504 'PN': 'Pitcairn',
2505 'PL': 'Poland',
2506 'PT': 'Portugal',
2507 'PR': 'Puerto Rico',
2508 'QA': 'Qatar',
2509 'RE': 'Réunion',
2510 'RO': 'Romania',
2511 'RU': 'Russian Federation',
2512 'RW': 'Rwanda',
2513 'BL': 'Saint Barthélemy',
2514 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
2515 'KN': 'Saint Kitts and Nevis',
2516 'LC': 'Saint Lucia',
2517 'MF': 'Saint Martin (French part)',
2518 'PM': 'Saint Pierre and Miquelon',
2519 'VC': 'Saint Vincent and the Grenadines',
2520 'WS': 'Samoa',
2521 'SM': 'San Marino',
2522 'ST': 'Sao Tome and Principe',
2523 'SA': 'Saudi Arabia',
2524 'SN': 'Senegal',
2525 'RS': 'Serbia',
2526 'SC': 'Seychelles',
2527 'SL': 'Sierra Leone',
2528 'SG': 'Singapore',
2529 'SX': 'Sint Maarten (Dutch part)',
2530 'SK': 'Slovakia',
2531 'SI': 'Slovenia',
2532 'SB': 'Solomon Islands',
2533 'SO': 'Somalia',
2534 'ZA': 'South Africa',
2535 'GS': 'South Georgia and the South Sandwich Islands',
2536 'SS': 'South Sudan',
2537 'ES': 'Spain',
2538 'LK': 'Sri Lanka',
2539 'SD': 'Sudan',
2540 'SR': 'Suriname',
2541 'SJ': 'Svalbard and Jan Mayen',
2542 'SZ': 'Swaziland',
2543 'SE': 'Sweden',
2544 'CH': 'Switzerland',
2545 'SY': 'Syrian Arab Republic',
2546 'TW': 'Taiwan, Province of China',
2547 'TJ': 'Tajikistan',
2548 'TZ': 'Tanzania, United Republic of',
2549 'TH': 'Thailand',
2550 'TL': 'Timor-Leste',
2551 'TG': 'Togo',
2552 'TK': 'Tokelau',
2553 'TO': 'Tonga',
2554 'TT': 'Trinidad and Tobago',
2555 'TN': 'Tunisia',
2556 'TR': 'Turkey',
2557 'TM': 'Turkmenistan',
2558 'TC': 'Turks and Caicos Islands',
2559 'TV': 'Tuvalu',
2560 'UG': 'Uganda',
2561 'UA': 'Ukraine',
2562 'AE': 'United Arab Emirates',
2563 'GB': 'United Kingdom',
2564 'US': 'United States',
2565 'UM': 'United States Minor Outlying Islands',
2566 'UY': 'Uruguay',
2567 'UZ': 'Uzbekistan',
2568 'VU': 'Vanuatu',
2569 'VE': 'Venezuela, Bolivarian Republic of',
2570 'VN': 'Viet Nam',
2571 'VG': 'Virgin Islands, British',
2572 'VI': 'Virgin Islands, U.S.',
2573 'WF': 'Wallis and Futuna',
2574 'EH': 'Western Sahara',
2575 'YE': 'Yemen',
2576 'ZM': 'Zambia',
2577 'ZW': 'Zimbabwe',
2578 }
2579
2580 @classmethod
2581 def short2full(cls, code):
2582 """Convert an ISO 3166-2 country code to the corresponding full name"""
2583 return cls._country_map.get(code.upper())
2584
2585
91410c9b 2586class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
2461f79d
PH
2587 def __init__(self, proxies=None):
2588 # Set default handlers
2589 for type in ('http', 'https'):
2590 setattr(self, '%s_open' % type,
2591 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
2592 meth(r, proxy, type))
2593 return compat_urllib_request.ProxyHandler.__init__(self, proxies)
2594
91410c9b 2595 def proxy_open(self, req, proxy, type):
2461f79d 2596 req_proxy = req.headers.get('Ytdl-request-proxy')
91410c9b
PH
2597 if req_proxy is not None:
2598 proxy = req_proxy
2461f79d
PH
2599 del req.headers['Ytdl-request-proxy']
2600
2601 if proxy == '__noproxy__':
2602 return None # No Proxy
91410c9b
PH
2603 return compat_urllib_request.ProxyHandler.proxy_open(
2604 self, req, proxy, type)
5bc880b9
YCH
2605
2606
2607def ohdave_rsa_encrypt(data, exponent, modulus):
2608 '''
2609 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
2610
2611 Input:
2612 data: data to encrypt, bytes-like object
2613 exponent, modulus: parameter e and N of RSA algorithm, both integer
2614 Output: hex string of encrypted data
2615
2616 Limitation: supports one block encryption only
2617 '''
2618
2619 payload = int(binascii.hexlify(data[::-1]), 16)
2620 encrypted = pow(payload, exponent, modulus)
2621 return '%x' % encrypted
81bdc8fd
YCH
2622
2623
2624def base_n(num, n, table):
2625 if num == 0:
2626 return '0'
2627 ret = ''
2628 while num:
2629 ret = table[num % n] + ret
2630 num = num // n
2631 return ret
2632
2633
48188829
YCH
2634def base36(num):
2635 return base_n(num, 36, '0123456789abcdefghijklmnopqrstuvwxyz')
2636
2637
81bdc8fd
YCH
2638def base62(num):
2639 return base_n(num, 62, '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')