jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	#!/usr/bin/env python
	2	# -- coding: utf-8 --
	3
	4	from __future__ import unicode_literals
	5
	6	import calendar
	7	import codecs
	8	import contextlib
	9	import ctypes
	10	import datetime
	11	import email.utils
	12	import errno
	13	import functools
	14	import gzip
	15	import itertools
	16	import io
	17	import json
	18	import locale
	19	import math
	20	import operator
	21	import os
	22	import pipes
	23	import platform
	24	import re
	25	import ssl
	26	import socket
	27	import struct
	28	import subprocess
	29	import sys
	30	import tempfile
	31	import traceback
	32	import xml.etree.ElementTree
	33	import zlib
	34
	35	from .compat import (
	36	compat_basestring,
	37	compat_chr,
	38	compat_html_entities,
	39	compat_http_client,
	40	compat_kwargs,
	41	compat_parse_qs,
	42	compat_socket_create_connection,
	43	compat_str,
	44	compat_urllib_error,
	45	compat_urllib_parse,
	46	compat_urllib_parse_urlparse,
	47	compat_urllib_request,
	48	compat_urlparse,
	49	shlex_quote,
	50	)
	51
	52
	53	# This is not clearly defined otherwise
	54	compiled_regex_type = type(re.compile(''))
	55
	56	std_headers = {
	57	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/20.0 (Chrome)',
	58	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
	59	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8',
	60	'Accept-Encoding': 'gzip, deflate',
	61	'Accept-Language': 'en-us,en;q=0.5',
	62	}
	63
	64
	65	NO_DEFAULT = object()
	66
	67	ENGLISH_MONTH_NAMES = [
	68	'January', 'February', 'March', 'April', 'May', 'June',
	69	'July', 'August', 'September', 'October', 'November', 'December']
	70
	71
	72	def preferredencoding():
	73	"""Get preferred encoding.
	74
	75	Returns the best encoding scheme for the system, based on
	76	locale.getpreferredencoding() and some further tweaks.
	77	"""
	78	try:
	79	pref = locale.getpreferredencoding()
	80	'TEST'.encode(pref)
	81	except Exception:
	82	pref = 'UTF-8'
	83
	84	return pref
	85
	86
	87	def write_json_file(obj, fn):
	88	""" Encode obj as JSON and write it to fn, atomically if possible """
	89
	90	fn = encodeFilename(fn)
	91	if sys.version_info < (3, 0) and sys.platform != 'win32':
	92	encoding = get_filesystem_encoding()
	93	# os.path.basename returns a bytes object, but NamedTemporaryFile
	94	# will fail if the filename contains non ascii characters unless we
	95	# use a unicode object
	96	path_basename = lambda f: os.path.basename(fn).decode(encoding)
	97	# the same for os.path.dirname
	98	path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
	99	else:
	100	path_basename = os.path.basename
	101	path_dirname = os.path.dirname
	102
	103	args = {
	104	'suffix': '.tmp',
	105	'prefix': path_basename(fn) + '.',
	106	'dir': path_dirname(fn),
	107	'delete': False,
	108	}
	109
	110	# In Python 2.x, json.dump expects a bytestream.
	111	# In Python 3.x, it writes to a character stream
	112	if sys.version_info < (3, 0):
	113	args['mode'] = 'wb'
	114	else:
	115	args.update({
	116	'mode': 'w',
	117	'encoding': 'utf-8',
	118	})
	119
	120	tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
	121
	122	try:
	123	with tf:
	124	json.dump(obj, tf)
	125	if sys.platform == 'win32':
	126	# Need to remove existing file on Windows, else os.rename raises
	127	# WindowsError or FileExistsError.
	128	try:
	129	os.unlink(fn)
	130	except OSError:
	131	pass
	132	os.rename(tf.name, fn)
	133	except Exception:
	134	try:
	135	os.remove(tf.name)
	136	except OSError:
	137	pass
	138	raise
	139
	140
	141	if sys.version_info >= (2, 7):
	142	def find_xpath_attr(node, xpath, key, val=None):
	143	""" Find the xpath xpath[@key=val] """
	144	assert re.match(r'^[a-zA-Z-]+$', key)
	145	if val:
	146	assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
	147	expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
	148	return node.find(expr)
	149	else:
	150	def find_xpath_attr(node, xpath, key, val=None):
	151	# Here comes the crazy part: In 2.6, if the xpath is a unicode,
	152	# .//node does not match if a node is a direct child of . !
	153	if isinstance(xpath, compat_str):
	154	xpath = xpath.encode('ascii')
	155
	156	for f in node.findall(xpath):
	157	if key not in f.attrib:
	158	continue
	159	if val is None or f.attrib.get(key) == val:
	160	return f
	161	return None
	162
	163	# On python2.6 the xml.etree.ElementTree.Element methods don't support
	164	# the namespace parameter
	165
	166
	167	def xpath_with_ns(path, ns_map):
	168	components = [c.split(':') for c in path.split('/')]
	169	replaced = []
	170	for c in components:
	171	if len(c) == 1:
	172	replaced.append(c[0])
	173	else:
	174	ns, tag = c
	175	replaced.append('{%s}%s' % (ns_map[ns], tag))
	176	return '/'.join(replaced)
	177
	178
	179	def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
	180	if sys.version_info < (2, 7): # Crazy 2.6
	181	xpath = xpath.encode('ascii')
	182
	183	n = node.find(xpath)
	184	if n is None or n.text is None:
	185	if default is not NO_DEFAULT:
	186	return default
	187	elif fatal:
	188	name = xpath if name is None else name
	189	raise ExtractorError('Could not find XML element %s' % name)
	190	else:
	191	return None
	192	return n.text
	193
	194
	195	def get_element_by_id(id, html):
	196	"""Return the content of the tag with the specified ID in the passed HTML document"""
	197	return get_element_by_attribute("id", id, html)
	198
	199
	200	def get_element_by_attribute(attribute, value, html):
	201	"""Return the content of the tag with the specified attribute in the passed HTML document"""
	202
	203	m = re.search(r'''(?xs)
	204	<([a-zA-Z0-9:._-]+)
	205	(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+\|="[^"]+"\|='[^']+'))*?
	206	\s+%s=['"]?%s['"]?
	207	(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+\|="[^"]+"\|='[^']+'))*?
	208	\s*>
	209	(?P<content>.*?)
	210	</\1>
	211	''' % (re.escape(attribute), re.escape(value)), html)
	212
	213	if not m:
	214	return None
	215	res = m.group('content')
	216
	217	if res.startswith('"') or res.startswith("'"):
	218	res = res[1:-1]
	219
	220	return unescapeHTML(res)
	221
	222
	223	def clean_html(html):
	224	"""Clean an HTML snippet into a readable string"""
	225
	226	if html is None: # Convenience for sanitizing descriptions etc.
	227	return html
	228
	229	# Newline vs <br />
	230	html = html.replace('\n', ' ')
	231	html = re.sub(r'\s<\sbr\s/?\s>\s*', '\n', html)
	232	html = re.sub(r'<\s/\sp\s>\s<\sp[^>]>', '\n', html)
	233	# Strip html tags
	234	html = re.sub('<.*?>', '', html)
	235	# Replace html entities
	236	html = unescapeHTML(html)
	237	return html.strip()
	238
	239
	240	def sanitize_open(filename, open_mode):
	241	"""Try to open the given filename, and slightly tweak it if this fails.
	242
	243	Attempts to open the given filename. If this fails, it tries to change
	244	the filename slightly, step by step, until it's either able to open it
	245	or it fails and raises a final exception, like the standard open()
	246	function.
	247
	248	It returns the tuple (stream, definitive_file_name).
	249	"""
	250	try:
	251	if filename == '-':
	252	if sys.platform == 'win32':
	253	import msvcrt
	254	msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
	255	return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
	256	stream = open(encodeFilename(filename), open_mode)
	257	return (stream, filename)
	258	except (IOError, OSError) as err:
	259	if err.errno in (errno.EACCES,):
	260	raise
	261
	262	# In case of error, try to remove win32 forbidden chars
	263	alt_filename = sanitize_path(filename)
	264	if alt_filename == filename:
	265	raise
	266	else:
	267	# An exception here should be caught in the caller
	268	stream = open(encodeFilename(alt_filename), open_mode)
	269	return (stream, alt_filename)
	270
	271
	272	def timeconvert(timestr):
	273	"""Convert RFC 2822 defined time string into system timestamp"""
	274	timestamp = None
	275	timetuple = email.utils.parsedate_tz(timestr)
	276	if timetuple is not None:
	277	timestamp = email.utils.mktime_tz(timetuple)
	278	return timestamp
	279
	280
	281	def sanitize_filename(s, restricted=False, is_id=False):
	282	"""Sanitizes a string so it could be used as part of a filename.
	283	If restricted is set, use a stricter subset of allowed characters.
	284	Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
	285	"""
	286	def replace_insane(char):
	287	if char == '?' or ord(char) < 32 or ord(char) == 127:
	288	return ''
	289	elif char == '"':
	290	return '' if restricted else '\''
	291	elif char == ':':
	292	return '_-' if restricted else ' -'
	293	elif char in '\\/\|*<>':
	294	return '_'
	295	if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
	296	return '_'
	297	if restricted and ord(char) > 127:
	298	return '_'
	299	return char
	300
	301	# Handle timestamps
	302	s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
	303	result = ''.join(map(replace_insane, s))
	304	if not is_id:
	305	while '__' in result:
	306	result = result.replace('__', '_')
	307	result = result.strip('_')
	308	# Common case of "Foreign band name - English song title"
	309	if restricted and result.startswith('-_'):
	310	result = result[2:]
	311	if result.startswith('-'):
	312	result = '_' + result[len('-'):]
	313	result = result.lstrip('.')
	314	if not result:
	315	result = '_'
	316	return result
	317
	318
	319	def sanitize_path(s):
	320	"""Sanitizes and normalizes path on Windows"""
	321	if sys.platform != 'win32':
	322	return s
	323	drive_or_unc, _ = os.path.splitdrive(s)
	324	if sys.version_info < (2, 7) and not drive_or_unc:
	325	drive_or_unc, _ = os.path.splitunc(s)
	326	norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
	327	if drive_or_unc:
	328	norm_path.pop(0)
	329	sanitized_path = [
	330	path_part if path_part in ['.', '..'] else re.sub('(?:[/<>:"\\\|\\\\?\\*]\|\.$)', '#', path_part)
	331	for path_part in norm_path]
	332	if drive_or_unc:
	333	sanitized_path.insert(0, drive_or_unc + os.path.sep)
	334	return os.path.join(*sanitized_path)
	335
	336
	337	def orderedSet(iterable):
	338	""" Remove all duplicates from the input iterable """
	339	res = []
	340	for el in iterable:
	341	if el not in res:
	342	res.append(el)
	343	return res
	344
	345
	346	def _htmlentity_transform(entity):
	347	"""Transforms an HTML entity to a character."""
	348	# Known non-numeric HTML entity
	349	if entity in compat_html_entities.name2codepoint:
	350	return compat_chr(compat_html_entities.name2codepoint[entity])
	351
	352	mobj = re.match(r'#(x[0-9a-fA-F]+\|[0-9]+)', entity)
	353	if mobj is not None:
	354	numstr = mobj.group(1)
	355	if numstr.startswith('x'):
	356	base = 16
	357	numstr = '0%s' % numstr
	358	else:
	359	base = 10
	360	return compat_chr(int(numstr, base))
	361
	362	# Unknown entity in name, return its literal representation
	363	return ('&%s;' % entity)
	364
	365
	366	def unescapeHTML(s):
	367	if s is None:
	368	return None
	369	assert type(s) == compat_str
	370
	371	return re.sub(
	372	r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
	373
	374
	375	def get_subprocess_encoding():
	376	if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
	377	# For subprocess calls, encode with locale encoding
	378	# Refer to http://stackoverflow.com/a/9951851/35070
	379	encoding = preferredencoding()
	380	else:
	381	encoding = sys.getfilesystemencoding()
	382	if encoding is None:
	383	encoding = 'utf-8'
	384	return encoding
	385
	386
	387	def encodeFilename(s, for_subprocess=False):
	388	"""
	389	@param s The name of the file
	390	"""
	391
	392	assert type(s) == compat_str
	393
	394	# Python 3 has a Unicode API
	395	if sys.version_info >= (3, 0):
	396	return s
	397
	398	# Pass '' directly to use Unicode APIs on Windows 2000 and up
	399	# (Detecting Windows NT 4 is tricky because 'major >= 4' would
	400	# match Windows 9x series as well. Besides, NT 4 is obsolete.)
	401	if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
	402	return s
	403
	404	return s.encode(get_subprocess_encoding(), 'ignore')
	405
	406
	407	def decodeFilename(b, for_subprocess=False):
	408
	409	if sys.version_info >= (3, 0):
	410	return b
	411
	412	if not isinstance(b, bytes):
	413	return b
	414
	415	return b.decode(get_subprocess_encoding(), 'ignore')
	416
	417
	418	def encodeArgument(s):
	419	if not isinstance(s, compat_str):
	420	# Legacy code that uses byte strings
	421	# Uncomment the following line after fixing all post processors
	422	# assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
	423	s = s.decode('ascii')
	424	return encodeFilename(s, True)
	425
	426
	427	def decodeArgument(b):
	428	return decodeFilename(b, True)
	429
	430
	431	def decodeOption(optval):
	432	if optval is None:
	433	return optval
	434	if isinstance(optval, bytes):
	435	optval = optval.decode(preferredencoding())
	436
	437	assert isinstance(optval, compat_str)
	438	return optval
	439
	440
	441	def formatSeconds(secs):
	442	if secs > 3600:
	443	return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
	444	elif secs > 60:
	445	return '%d:%02d' % (secs // 60, secs % 60)
	446	else:
	447	return '%d' % secs
	448
	449
	450	def make_HTTPS_handler(params, **kwargs):
	451	opts_no_check_certificate = params.get('nocheckcertificate', False)
	452	if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
	453	context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
	454	if opts_no_check_certificate:
	455	context.check_hostname = False
	456	context.verify_mode = ssl.CERT_NONE
	457	try:
	458	return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
	459	except TypeError:
	460	# Python 2.7.8
	461	# (create_default_context present but HTTPSHandler has no context=)
	462	pass
	463
	464	if sys.version_info < (3, 2):
	465	return YoutubeDLHTTPSHandler(params, **kwargs)
	466	else: # Python < 3.4
	467	context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
	468	context.verify_mode = (ssl.CERT_NONE
	469	if opts_no_check_certificate
	470	else ssl.CERT_REQUIRED)
	471	context.set_default_verify_paths()
	472	return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
	473
	474
	475	def bug_reports_message():
	476	if ytdl_is_updateable():
	477	update_cmd = 'type youtube-dl -U to update'
	478	else:
	479	update_cmd = 'see https://yt-dl.org/update on how to update'
	480	msg = '; please report this issue on https://yt-dl.org/bug .'
	481	msg += ' Make sure you are using the latest version; %s.' % update_cmd
	482	msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
	483	return msg
	484
	485
	486	class ExtractorError(Exception):
	487	"""Error during info extraction."""
	488
	489	def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
	490	""" tb, if given, is the original traceback (so that it can be printed out).
	491	If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
	492	"""
	493
	494	if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
	495	expected = True
	496	if video_id is not None:
	497	msg = video_id + ': ' + msg
	498	if cause:
	499	msg += ' (caused by %r)' % cause
	500	if not expected:
	501	msg += bug_reports_message()
	502	super(ExtractorError, self).__init__(msg)
	503
	504	self.traceback = tb
	505	self.exc_info = sys.exc_info() # preserve original exception
	506	self.cause = cause
	507	self.video_id = video_id
	508
	509	def format_traceback(self):
	510	if self.traceback is None:
	511	return None
	512	return ''.join(traceback.format_tb(self.traceback))
	513
	514
	515	class UnsupportedError(ExtractorError):
	516	def __init__(self, url):
	517	super(UnsupportedError, self).__init__(
	518	'Unsupported URL: %s' % url, expected=True)
	519	self.url = url
	520
	521
	522	class RegexNotFoundError(ExtractorError):
	523	"""Error when a regex didn't match"""
	524	pass
	525
	526
	527	class DownloadError(Exception):
	528	"""Download Error exception.
	529
	530	This exception may be thrown by FileDownloader objects if they are not
	531	configured to continue on errors. They will contain the appropriate
	532	error message.
	533	"""
	534
	535	def __init__(self, msg, exc_info=None):
	536	""" exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
	537	super(DownloadError, self).__init__(msg)
	538	self.exc_info = exc_info
	539
	540
	541	class SameFileError(Exception):
	542	"""Same File exception.
	543
	544	This exception will be thrown by FileDownloader objects if they detect
	545	multiple files would have to be downloaded to the same file on disk.
	546	"""
	547	pass
	548
	549
	550	class PostProcessingError(Exception):
	551	"""Post Processing exception.
	552
	553	This exception may be raised by PostProcessor's .run() method to
	554	indicate an error in the postprocessing task.
	555	"""
	556
	557	def __init__(self, msg):
	558	self.msg = msg
	559
	560
	561	class MaxDownloadsReached(Exception):
	562	""" --max-downloads limit has been reached. """
	563	pass
	564
	565
	566	class UnavailableVideoError(Exception):
	567	"""Unavailable Format exception.
	568
	569	This exception will be thrown when a video is requested
	570	in a format that is not available for that video.
	571	"""
	572	pass
	573
	574
	575	class ContentTooShortError(Exception):
	576	"""Content Too Short exception.
	577
	578	This exception may be raised by FileDownloader objects when a file they
	579	download is too small for what the server announced first, indicating
	580	the connection was probably interrupted.
	581	"""
	582
	583	def __init__(self, downloaded, expected):
	584	# Both in bytes
	585	self.downloaded = downloaded
	586	self.expected = expected
	587
	588
	589	def _create_http_connection(ydl_handler, http_class, is_https, args, *kwargs):
	590	hc = http_class(args, *kwargs)
	591	source_address = ydl_handler._params.get('source_address')
	592	if source_address is not None:
	593	sa = (source_address, 0)
	594	if hasattr(hc, 'source_address'): # Python 2.7+
	595	hc.source_address = sa
	596	else: # Python 2.6
	597	def _hc_connect(self, args, *kwargs):
	598	sock = compat_socket_create_connection(
	599	(self.host, self.port), self.timeout, sa)
	600	if is_https:
	601	self.sock = ssl.wrap_socket(
	602	sock, self.key_file, self.cert_file,
	603	ssl_version=ssl.PROTOCOL_TLSv1)
	604	else:
	605	self.sock = sock
	606	hc.connect = functools.partial(_hc_connect, hc)
	607
	608	return hc
	609
	610
	611	class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
	612	"""Handler for HTTP requests and responses.
	613
	614	This class, when installed with an OpenerDirector, automatically adds
	615	the standard headers to every HTTP request and handles gzipped and
	616	deflated responses from web servers. If compression is to be avoided in
	617	a particular request, the original request in the program code only has
	618	to include the HTTP header "Youtubedl-No-Compression", which will be
	619	removed before making the real request.
	620
	621	Part of this code was copied from:
	622
	623	http://techknack.net/python-urllib2-handlers/
	624
	625	Andrew Rowls, the author of that code, agreed to release it to the
	626	public domain.
	627	"""
	628
	629	def __init__(self, params, args, *kwargs):
	630	compat_urllib_request.HTTPHandler.__init__(self, args, *kwargs)
	631	self._params = params
	632
	633	def http_open(self, req):
	634	return self.do_open(functools.partial(
	635	_create_http_connection, self, compat_http_client.HTTPConnection, False),
	636	req)
	637
	638	@staticmethod
	639	def deflate(data):
	640	try:
	641	return zlib.decompress(data, -zlib.MAX_WBITS)
	642	except zlib.error:
	643	return zlib.decompress(data)
	644
	645	@staticmethod
	646	def addinfourl_wrapper(stream, headers, url, code):
	647	if hasattr(compat_urllib_request.addinfourl, 'getcode'):
	648	return compat_urllib_request.addinfourl(stream, headers, url, code)
	649	ret = compat_urllib_request.addinfourl(stream, headers, url)
	650	ret.code = code
	651	return ret
	652
	653	def http_request(self, req):
	654	# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
	655	# always respected by websites, some tend to give out URLs with non percent-encoded
	656	# non-ASCII characters (see telemb.py, ard.py [#3412])
	657	# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
	658	# To work around aforementioned issue we will replace request's original URL with
	659	# percent-encoded one
	660	# Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
	661	# the code of this workaround has been moved here from YoutubeDL.urlopen()
	662	url = req.get_full_url()
	663	url_escaped = escape_url(url)
	664
	665	# Substitute URL if any change after escaping
	666	if url != url_escaped:
	667	req_type = HEADRequest if req.get_method() == 'HEAD' else compat_urllib_request.Request
	668	new_req = req_type(
	669	url_escaped, data=req.data, headers=req.headers,
	670	origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
	671	new_req.timeout = req.timeout
	672	req = new_req
	673
	674	for h, v in std_headers.items():
	675	# Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
	676	# The dict keys are capitalized because of this bug by urllib
	677	if h.capitalize() not in req.headers:
	678	req.add_header(h, v)
	679	if 'Youtubedl-no-compression' in req.headers:
	680	if 'Accept-encoding' in req.headers:
	681	del req.headers['Accept-encoding']
	682	del req.headers['Youtubedl-no-compression']
	683
	684	if sys.version_info < (2, 7) and '#' in req.get_full_url():
	685	# Python 2.6 is brain-dead when it comes to fragments
	686	req._Request__original = req._Request__original.partition('#')[0]
	687	req._Request__r_type = req._Request__r_type.partition('#')[0]
	688
	689	return req
	690
	691	def http_response(self, req, resp):
	692	old_resp = resp
	693	# gzip
	694	if resp.headers.get('Content-encoding', '') == 'gzip':
	695	content = resp.read()
	696	gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
	697	try:
	698	uncompressed = io.BytesIO(gz.read())
	699	except IOError as original_ioerror:
	700	# There may be junk add the end of the file
	701	# See http://stackoverflow.com/q/4928560/35070 for details
	702	for i in range(1, 1024):
	703	try:
	704	gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
	705	uncompressed = io.BytesIO(gz.read())
	706	except IOError:
	707	continue
	708	break
	709	else:
	710	raise original_ioerror
	711	resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
	712	resp.msg = old_resp.msg
	713	# deflate
	714	if resp.headers.get('Content-encoding', '') == 'deflate':
	715	gz = io.BytesIO(self.deflate(resp.read()))
	716	resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
	717	resp.msg = old_resp.msg
	718	# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986
	719	if 300 <= resp.code < 400:
	720	location = resp.headers.get('Location')
	721	if location:
	722	# As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
	723	if sys.version_info >= (3, 0):
	724	location = location.encode('iso-8859-1').decode('utf-8')
	725	location_escaped = escape_url(location)
	726	if location != location_escaped:
	727	del resp.headers['Location']
	728	resp.headers['Location'] = location_escaped
	729	return resp
	730
	731	https_request = http_request
	732	https_response = http_response
	733
	734
	735	class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
	736	def __init__(self, params, https_conn_class=None, args, *kwargs):
	737	compat_urllib_request.HTTPSHandler.__init__(self, args, *kwargs)
	738	self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
	739	self._params = params
	740
	741	def https_open(self, req):
	742	kwargs = {}
	743	if hasattr(self, '_context'): # python > 2.6
	744	kwargs['context'] = self._context
	745	if hasattr(self, '_check_hostname'): # python 3.x
	746	kwargs['check_hostname'] = self._check_hostname
	747	return self.do_open(functools.partial(
	748	_create_http_connection, self, self._https_conn_class, True),
	749	req, **kwargs)
	750
	751
	752	def parse_iso8601(date_str, delimiter='T', timezone=None):
	753	""" Return a UNIX timestamp from the given date """
	754
	755	if date_str is None:
	756	return None
	757
	758	if timezone is None:
	759	m = re.search(
	760	r'(\.[0-9]+)?(?:Z$\| ?(?P<sign>\+\|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
	761	date_str)
	762	if not m:
	763	timezone = datetime.timedelta()
	764	else:
	765	date_str = date_str[:-len(m.group(0))]
	766	if not m.group('sign'):
	767	timezone = datetime.timedelta()
	768	else:
	769	sign = 1 if m.group('sign') == '+' else -1
	770	timezone = datetime.timedelta(
	771	hours=sign * int(m.group('hours')),
	772	minutes=sign * int(m.group('minutes')))
	773	date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
	774	dt = datetime.datetime.strptime(date_str, date_format) - timezone
	775	return calendar.timegm(dt.timetuple())
	776
	777
	778	def unified_strdate(date_str, day_first=True):
	779	"""Return a string with the date in the format YYYYMMDD"""
	780
	781	if date_str is None:
	782	return None
	783	upload_date = None
	784	# Replace commas
	785	date_str = date_str.replace(',', ' ')
	786	# %z (UTC offset) is only supported in python>=3.2
	787	if not re.match(r'^[0-9]{1,2}-[0-9]{1,2}-[0-9]{4}$', date_str):
	788	date_str = re.sub(r' ?(\+\|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
	789	# Remove AM/PM + timezone
	790	date_str = re.sub(r'(?i)\s*(?:AM\|PM)(?:\s+[A-Z]+)?', '', date_str)
	791
	792	format_expressions = [
	793	'%d %B %Y',
	794	'%d %b %Y',
	795	'%B %d %Y',
	796	'%b %d %Y',
	797	'%b %dst %Y %I:%M%p',
	798	'%b %dnd %Y %I:%M%p',
	799	'%b %dth %Y %I:%M%p',
	800	'%Y %m %d',
	801	'%Y-%m-%d',
	802	'%Y/%m/%d',
	803	'%Y/%m/%d %H:%M:%S',
	804	'%Y-%m-%d %H:%M:%S',
	805	'%Y-%m-%d %H:%M:%S.%f',
	806	'%d.%m.%Y %H:%M',
	807	'%d.%m.%Y %H.%M',
	808	'%Y-%m-%dT%H:%M:%SZ',
	809	'%Y-%m-%dT%H:%M:%S.%fZ',
	810	'%Y-%m-%dT%H:%M:%S.%f0Z',
	811	'%Y-%m-%dT%H:%M:%S',
	812	'%Y-%m-%dT%H:%M:%S.%f',
	813	'%Y-%m-%dT%H:%M',
	814	]
	815	if day_first:
	816	format_expressions.extend([
	817	'%d-%m-%Y',
	818	'%d.%m.%Y',
	819	'%d/%m/%Y',
	820	'%d/%m/%y',
	821	'%d/%m/%Y %H:%M:%S',
	822	])
	823	else:
	824	format_expressions.extend([
	825	'%m-%d-%Y',
	826	'%m.%d.%Y',
	827	'%m/%d/%Y',
	828	'%m/%d/%y',
	829	'%m/%d/%Y %H:%M:%S',
	830	])
	831	for expression in format_expressions:
	832	try:
	833	upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
	834	except ValueError:
	835	pass
	836	if upload_date is None:
	837	timetuple = email.utils.parsedate_tz(date_str)
	838	if timetuple:
	839	upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
	840	return upload_date
	841
	842
	843	def determine_ext(url, default_ext='unknown_video'):
	844	if url is None:
	845	return default_ext
	846	guess = url.partition('?')[0].rpartition('.')[2]
	847	if re.match(r'^[A-Za-z0-9]+$', guess):
	848	return guess
	849	else:
	850	return default_ext
	851
	852
	853	def subtitles_filename(filename, sub_lang, sub_format):
	854	return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
	855
	856
	857	def date_from_str(date_str):
	858	"""
	859	Return a datetime object from a string in the format YYYYMMDD or
	860	(now\|today)[+-][0-9](day\|week\|month\|year)(s)?"""
	861	today = datetime.date.today()
	862	if date_str in ('now', 'today'):
	863	return today
	864	if date_str == 'yesterday':
	865	return today - datetime.timedelta(days=1)
	866	match = re.match('(now\|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day\|week\|month\|year)(s)?', date_str)
	867	if match is not None:
	868	sign = match.group('sign')
	869	time = int(match.group('time'))
	870	if sign == '-':
	871	time = -time
	872	unit = match.group('unit')
	873	# A bad aproximation?
	874	if unit == 'month':
	875	unit = 'day'
	876	time *= 30
	877	elif unit == 'year':
	878	unit = 'day'
	879	time *= 365
	880	unit += 's'
	881	delta = datetime.timedelta(**{unit: time})
	882	return today + delta
	883	return datetime.datetime.strptime(date_str, "%Y%m%d").date()
	884
	885
	886	def hyphenate_date(date_str):
	887	"""
	888	Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
	889	match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
	890	if match is not None:
	891	return '-'.join(match.groups())
	892	else:
	893	return date_str
	894
	895
	896	class DateRange(object):
	897	"""Represents a time interval between two dates"""
	898
	899	def __init__(self, start=None, end=None):
	900	"""start and end must be strings in the format accepted by date"""
	901	if start is not None:
	902	self.start = date_from_str(start)
	903	else:
	904	self.start = datetime.datetime.min.date()
	905	if end is not None:
	906	self.end = date_from_str(end)
	907	else:
	908	self.end = datetime.datetime.max.date()
	909	if self.start > self.end:
	910	raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
	911
	912	@classmethod
	913	def day(cls, day):
	914	"""Returns a range that only contains the given day"""
	915	return cls(day, day)
	916
	917	def __contains__(self, date):
	918	"""Check if the date is in the range"""
	919	if not isinstance(date, datetime.date):
	920	date = date_from_str(date)
	921	return self.start <= date <= self.end
	922
	923	def __str__(self):
	924	return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
	925
	926
	927	def platform_name():
	928	""" Returns the platform name as a compat_str """
	929	res = platform.platform()
	930	if isinstance(res, bytes):
	931	res = res.decode(preferredencoding())
	932
	933	assert isinstance(res, compat_str)
	934	return res
	935
	936
	937	def _windows_write_string(s, out):
	938	""" Returns True if the string was written using special methods,
	939	False if it has yet to be written out."""
	940	# Adapted from http://stackoverflow.com/a/3259271/35070
	941
	942	import ctypes
	943	import ctypes.wintypes
	944
	945	WIN_OUTPUT_IDS = {
	946	1: -11,
	947	2: -12,
	948	}
	949
	950	try:
	951	fileno = out.fileno()
	952	except AttributeError:
	953	# If the output stream doesn't have a fileno, it's virtual
	954	return False
	955	except io.UnsupportedOperation:
	956	# Some strange Windows pseudo files?
	957	return False
	958	if fileno not in WIN_OUTPUT_IDS:
	959	return False
	960
	961	GetStdHandle = ctypes.WINFUNCTYPE(
	962	ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
	963	(b"GetStdHandle", ctypes.windll.kernel32))
	964	h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
	965
	966	WriteConsoleW = ctypes.WINFUNCTYPE(
	967	ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
	968	ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
	969	ctypes.wintypes.LPVOID)((b"WriteConsoleW", ctypes.windll.kernel32))
	970	written = ctypes.wintypes.DWORD(0)
	971
	972	GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b"GetFileType", ctypes.windll.kernel32))
	973	FILE_TYPE_CHAR = 0x0002
	974	FILE_TYPE_REMOTE = 0x8000
	975	GetConsoleMode = ctypes.WINFUNCTYPE(
	976	ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
	977	ctypes.POINTER(ctypes.wintypes.DWORD))(
	978	(b"GetConsoleMode", ctypes.windll.kernel32))
	979	INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
	980
	981	def not_a_console(handle):
	982	if handle == INVALID_HANDLE_VALUE or handle is None:
	983	return True
	984	return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
	985	GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
	986
	987	if not_a_console(h):
	988	return False
	989
	990	def next_nonbmp_pos(s):
	991	try:
	992	return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
	993	except StopIteration:
	994	return len(s)
	995
	996	while s:
	997	count = min(next_nonbmp_pos(s), 1024)
	998
	999	ret = WriteConsoleW(
	1000	h, s, count if count else 2, ctypes.byref(written), None)
	1001	if ret == 0:
	1002	raise OSError('Failed to write string')
	1003	if not count: # We just wrote a non-BMP character
	1004	assert written.value == 2
	1005	s = s[1:]
	1006	else:
	1007	assert written.value > 0
	1008	s = s[written.value:]
	1009	return True
	1010
	1011
	1012	def write_string(s, out=None, encoding=None):
	1013	if out is None:
	1014	out = sys.stderr
	1015	assert type(s) == compat_str
	1016
	1017	if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
	1018	if _windows_write_string(s, out):
	1019	return
	1020
	1021	if ('b' in getattr(out, 'mode', '') or
	1022	sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
	1023	byt = s.encode(encoding or preferredencoding(), 'ignore')
	1024	out.write(byt)
	1025	elif hasattr(out, 'buffer'):
	1026	enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
	1027	byt = s.encode(enc, 'ignore')
	1028	out.buffer.write(byt)
	1029	else:
	1030	out.write(s)
	1031	out.flush()
	1032
	1033
	1034	def bytes_to_intlist(bs):
	1035	if not bs:
	1036	return []
	1037	if isinstance(bs[0], int): # Python 3
	1038	return list(bs)
	1039	else:
	1040	return [ord(c) for c in bs]
	1041
	1042
	1043	def intlist_to_bytes(xs):
	1044	if not xs:
	1045	return b''
	1046	return struct_pack('%dB' % len(xs), *xs)
	1047
	1048
	1049	# Cross-platform file locking
	1050	if sys.platform == 'win32':
	1051	import ctypes.wintypes
	1052	import msvcrt
	1053
	1054	class OVERLAPPED(ctypes.Structure):
	1055	_fields_ = [
	1056	('Internal', ctypes.wintypes.LPVOID),
	1057	('InternalHigh', ctypes.wintypes.LPVOID),
	1058	('Offset', ctypes.wintypes.DWORD),
	1059	('OffsetHigh', ctypes.wintypes.DWORD),
	1060	('hEvent', ctypes.wintypes.HANDLE),
	1061	]
	1062
	1063	kernel32 = ctypes.windll.kernel32
	1064	LockFileEx = kernel32.LockFileEx
	1065	LockFileEx.argtypes = [
	1066	ctypes.wintypes.HANDLE, # hFile
	1067	ctypes.wintypes.DWORD, # dwFlags
	1068	ctypes.wintypes.DWORD, # dwReserved
	1069	ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
	1070	ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
	1071	ctypes.POINTER(OVERLAPPED) # Overlapped
	1072	]
	1073	LockFileEx.restype = ctypes.wintypes.BOOL
	1074	UnlockFileEx = kernel32.UnlockFileEx
	1075	UnlockFileEx.argtypes = [
	1076	ctypes.wintypes.HANDLE, # hFile
	1077	ctypes.wintypes.DWORD, # dwReserved
	1078	ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
	1079	ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
	1080	ctypes.POINTER(OVERLAPPED) # Overlapped
	1081	]
	1082	UnlockFileEx.restype = ctypes.wintypes.BOOL
	1083	whole_low = 0xffffffff
	1084	whole_high = 0x7fffffff
	1085
	1086	def _lock_file(f, exclusive):
	1087	overlapped = OVERLAPPED()
	1088	overlapped.Offset = 0
	1089	overlapped.OffsetHigh = 0
	1090	overlapped.hEvent = 0
	1091	f._lock_file_overlapped_p = ctypes.pointer(overlapped)
	1092	handle = msvcrt.get_osfhandle(f.fileno())
	1093	if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
	1094	whole_low, whole_high, f._lock_file_overlapped_p):
	1095	raise OSError('Locking file failed: %r' % ctypes.FormatError())
	1096
	1097	def _unlock_file(f):
	1098	assert f._lock_file_overlapped_p
	1099	handle = msvcrt.get_osfhandle(f.fileno())
	1100	if not UnlockFileEx(handle, 0,
	1101	whole_low, whole_high, f._lock_file_overlapped_p):
	1102	raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
	1103
	1104	else:
	1105	import fcntl
	1106
	1107	def _lock_file(f, exclusive):
	1108	fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
	1109
	1110	def _unlock_file(f):
	1111	fcntl.flock(f, fcntl.LOCK_UN)
	1112
	1113
	1114	class locked_file(object):
	1115	def __init__(self, filename, mode, encoding=None):
	1116	assert mode in ['r', 'a', 'w']
	1117	self.f = io.open(filename, mode, encoding=encoding)
	1118	self.mode = mode
	1119
	1120	def __enter__(self):
	1121	exclusive = self.mode != 'r'
	1122	try:
	1123	_lock_file(self.f, exclusive)
	1124	except IOError:
	1125	self.f.close()
	1126	raise
	1127	return self
	1128
	1129	def __exit__(self, etype, value, traceback):
	1130	try:
	1131	_unlock_file(self.f)
	1132	finally:
	1133	self.f.close()
	1134
	1135	def __iter__(self):
	1136	return iter(self.f)
	1137
	1138	def write(self, *args):
	1139	return self.f.write(*args)
	1140
	1141	def read(self, *args):
	1142	return self.f.read(*args)
	1143
	1144
	1145	def get_filesystem_encoding():
	1146	encoding = sys.getfilesystemencoding()
	1147	return encoding if encoding is not None else 'utf-8'
	1148
	1149
	1150	def shell_quote(args):
	1151	quoted_args = []
	1152	encoding = get_filesystem_encoding()
	1153	for a in args:
	1154	if isinstance(a, bytes):
	1155	# We may get a filename encoded with 'encodeFilename'
	1156	a = a.decode(encoding)
	1157	quoted_args.append(pipes.quote(a))
	1158	return ' '.join(quoted_args)
	1159
	1160
	1161	def smuggle_url(url, data):
	1162	""" Pass additional data in a URL for internal use. """
	1163
	1164	sdata = compat_urllib_parse.urlencode(
	1165	{'__youtubedl_smuggle': json.dumps(data)})
	1166	return url + '#' + sdata
	1167
	1168
	1169	def unsmuggle_url(smug_url, default=None):
	1170	if '#__youtubedl_smuggle' not in smug_url:
	1171	return smug_url, default
	1172	url, _, sdata = smug_url.rpartition('#')
	1173	jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
	1174	data = json.loads(jsond)
	1175	return url, data
	1176
	1177
	1178	def format_bytes(bytes):
	1179	if bytes is None:
	1180	return 'N/A'
	1181	if type(bytes) is str:
	1182	bytes = float(bytes)
	1183	if bytes == 0.0:
	1184	exponent = 0
	1185	else:
	1186	exponent = int(math.log(bytes, 1024.0))
	1187	suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
	1188	converted = float(bytes) / float(1024 ** exponent)
	1189	return '%.2f%s' % (converted, suffix)
	1190
	1191
	1192	def parse_filesize(s):
	1193	if s is None:
	1194	return None
	1195
	1196	# The lower-case forms are of course incorrect and inofficial,
	1197	# but we support those too
	1198	_UNIT_TABLE = {
	1199	'B': 1,
	1200	'b': 1,
	1201	'KiB': 1024,
	1202	'KB': 1000,
	1203	'kB': 1024,
	1204	'Kb': 1000,
	1205	'MiB': 1024 ** 2,
	1206	'MB': 1000 ** 2,
	1207	'mB': 1024 ** 2,
	1208	'Mb': 1000 ** 2,
	1209	'GiB': 1024 ** 3,
	1210	'GB': 1000 ** 3,
	1211	'gB': 1024 ** 3,
	1212	'Gb': 1000 ** 3,
	1213	'TiB': 1024 ** 4,
	1214	'TB': 1000 ** 4,
	1215	'tB': 1024 ** 4,
	1216	'Tb': 1000 ** 4,
	1217	'PiB': 1024 ** 5,
	1218	'PB': 1000 ** 5,
	1219	'pB': 1024 ** 5,
	1220	'Pb': 1000 ** 5,
	1221	'EiB': 1024 ** 6,
	1222	'EB': 1000 ** 6,
	1223	'eB': 1024 ** 6,
	1224	'Eb': 1000 ** 6,
	1225	'ZiB': 1024 ** 7,
	1226	'ZB': 1000 ** 7,
	1227	'zB': 1024 ** 7,
	1228	'Zb': 1000 ** 7,
	1229	'YiB': 1024 ** 8,
	1230	'YB': 1000 ** 8,
	1231	'yB': 1024 ** 8,
	1232	'Yb': 1000 ** 8,
	1233	}
	1234
	1235	units_re = '\|'.join(re.escape(u) for u in _UNIT_TABLE)
	1236	m = re.match(
	1237	r'(?P<num>[0-9]+(?:[,.][0-9])?)\s(?P<unit>%s)' % units_re, s)
	1238	if not m:
	1239	return None
	1240
	1241	num_str = m.group('num').replace(',', '.')
	1242	mult = _UNIT_TABLE[m.group('unit')]
	1243	return int(float(num_str) * mult)
	1244
	1245
	1246	def month_by_name(name):
	1247	""" Return the number of a month by (locale-independently) English name """
	1248
	1249	try:
	1250	return ENGLISH_MONTH_NAMES.index(name) + 1
	1251	except ValueError:
	1252	return None
	1253
	1254
	1255	def month_by_abbreviation(abbrev):
	1256	""" Return the number of a month by (locale-independently) English
	1257	abbreviations """
	1258
	1259	try:
	1260	return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
	1261	except ValueError:
	1262	return None
	1263
	1264
	1265	def fix_xml_ampersands(xml_str):
	1266	"""Replace all the '&' by '&' in XML"""
	1267	return re.sub(
	1268	r'&(?!amp;\|lt;\|gt;\|apos;\|quot;\|#x[0-9a-fA-F]{,4};\|#[0-9]{,4};)',
	1269	'&',
	1270	xml_str)
	1271
	1272
	1273	def setproctitle(title):
	1274	assert isinstance(title, compat_str)
	1275	try:
	1276	libc = ctypes.cdll.LoadLibrary("libc.so.6")
	1277	except OSError:
	1278	return
	1279	title_bytes = title.encode('utf-8')
	1280	buf = ctypes.create_string_buffer(len(title_bytes))
	1281	buf.value = title_bytes
	1282	try:
	1283	libc.prctl(15, buf, 0, 0, 0)
	1284	except AttributeError:
	1285	return # Strange libc, just skip this
	1286
	1287
	1288	def remove_start(s, start):
	1289	if s.startswith(start):
	1290	return s[len(start):]
	1291	return s
	1292
	1293
	1294	def remove_end(s, end):
	1295	if s.endswith(end):
	1296	return s[:-len(end)]
	1297	return s
	1298
	1299
	1300	def url_basename(url):
	1301	path = compat_urlparse.urlparse(url).path
	1302	return path.strip('/').split('/')[-1]
	1303
	1304
	1305	class HEADRequest(compat_urllib_request.Request):
	1306	def get_method(self):
	1307	return "HEAD"
	1308
	1309
	1310	def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
	1311	if get_attr:
	1312	if v is not None:
	1313	v = getattr(v, get_attr, None)
	1314	if v == '':
	1315	v = None
	1316	return default if v is None else (int(v) * invscale // scale)
	1317
	1318
	1319	def str_or_none(v, default=None):
	1320	return default if v is None else compat_str(v)
	1321
	1322
	1323	def str_to_int(int_str):
	1324	""" A more relaxed version of int_or_none """
	1325	if int_str is None:
	1326	return None
	1327	int_str = re.sub(r'[,\.\+]', '', int_str)
	1328	return int(int_str)
	1329
	1330
	1331	def float_or_none(v, scale=1, invscale=1, default=None):
	1332	return default if v is None else (float(v) * invscale / scale)
	1333
	1334
	1335	def parse_duration(s):
	1336	if not isinstance(s, compat_basestring):
	1337	return None
	1338
	1339	s = s.strip()
	1340
	1341	m = re.match(
	1342	r'''(?ix)(?:P?T)?
	1343	(?:
	1344	(?P<only_mins>[0-9.]+)\s(?:mins?\.?\|minutes?)\s\|
	1345	(?P<only_hours>[0-9.]+)\s*(?:hours?)\|
	1346
	1347	\s(?P<hours_reversed>[0-9]+)\s(?:[:h]\|hours?)\s(?P<mins_reversed>[0-9]+)\s(?:[:m]\|mins?\.?\|minutes?)\s*\|
	1348	(?:
	1349	(?:
	1350	(?:(?P<days>[0-9]+)\s(?:[:d]\|days?)\s)?
	1351	(?P<hours>[0-9]+)\s(?:[:h]\|hours?)\s
	1352	)?
	1353	(?P<mins>[0-9]+)\s(?:[:m]\|mins?\|minutes?)\s
	1354	)?
	1355	(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s\|secs?\|seconds?)?
	1356	)$''', s)
	1357	if not m:
	1358	return None
	1359	res = 0
	1360	if m.group('only_mins'):
	1361	return float_or_none(m.group('only_mins'), invscale=60)
	1362	if m.group('only_hours'):
	1363	return float_or_none(m.group('only_hours'), invscale=60 * 60)
	1364	if m.group('secs'):
	1365	res += int(m.group('secs'))
	1366	if m.group('mins_reversed'):
	1367	res += int(m.group('mins_reversed')) * 60
	1368	if m.group('mins'):
	1369	res += int(m.group('mins')) * 60
	1370	if m.group('hours'):
	1371	res += int(m.group('hours')) * 60 * 60
	1372	if m.group('hours_reversed'):
	1373	res += int(m.group('hours_reversed')) * 60 * 60
	1374	if m.group('days'):
	1375	res += int(m.group('days')) * 24 * 60 * 60
	1376	if m.group('ms'):
	1377	res += float(m.group('ms'))
	1378	return res
	1379
	1380
	1381	def prepend_extension(filename, ext, expected_real_ext=None):
	1382	name, real_ext = os.path.splitext(filename)
	1383	return (
	1384	'{0}.{1}{2}'.format(name, ext, real_ext)
	1385	if not expected_real_ext or real_ext[1:] == expected_real_ext
	1386	else '{0}.{1}'.format(filename, ext))
	1387
	1388
	1389	def replace_extension(filename, ext, expected_real_ext=None):
	1390	name, real_ext = os.path.splitext(filename)
	1391	return '{0}.{1}'.format(
	1392	name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
	1393	ext)
	1394
	1395
	1396	def check_executable(exe, args=[]):
	1397	""" Checks if the given binary is installed somewhere in PATH, and returns its name.
	1398	args can be a list of arguments for a short output (like -version) """
	1399	try:
	1400	subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
	1401	except OSError:
	1402	return False
	1403	return exe
	1404
	1405
	1406	def get_exe_version(exe, args=['--version'],
	1407	version_re=None, unrecognized='present'):
	1408	""" Returns the version of the specified executable,
	1409	or False if the executable is not present """
	1410	try:
	1411	out, _ = subprocess.Popen(
	1412	[encodeArgument(exe)] + args,
	1413	stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
	1414	except OSError:
	1415	return False
	1416	if isinstance(out, bytes): # Python 2.x
	1417	out = out.decode('ascii', 'ignore')
	1418	return detect_exe_version(out, version_re, unrecognized)
	1419
	1420
	1421	def detect_exe_version(output, version_re=None, unrecognized='present'):
	1422	assert isinstance(output, compat_str)
	1423	if version_re is None:
	1424	version_re = r'version\s+([-0-9._a-zA-Z]+)'
	1425	m = re.search(version_re, output)
	1426	if m:
	1427	return m.group(1)
	1428	else:
	1429	return unrecognized
	1430
	1431
	1432	class PagedList(object):
	1433	def __len__(self):
	1434	# This is only useful for tests
	1435	return len(self.getslice())
	1436
	1437
	1438	class OnDemandPagedList(PagedList):
	1439	def __init__(self, pagefunc, pagesize):
	1440	self._pagefunc = pagefunc
	1441	self._pagesize = pagesize
	1442
	1443	def getslice(self, start=0, end=None):
	1444	res = []
	1445	for pagenum in itertools.count(start // self._pagesize):
	1446	firstid = pagenum * self._pagesize
	1447	nextfirstid = pagenum * self._pagesize + self._pagesize
	1448	if start >= nextfirstid:
	1449	continue
	1450
	1451	page_results = list(self._pagefunc(pagenum))
	1452
	1453	startv = (
	1454	start % self._pagesize
	1455	if firstid <= start < nextfirstid
	1456	else 0)
	1457
	1458	endv = (
	1459	((end - 1) % self._pagesize) + 1
	1460	if (end is not None and firstid <= end <= nextfirstid)
	1461	else None)
	1462
	1463	if startv != 0 or endv is not None:
	1464	page_results = page_results[startv:endv]
	1465	res.extend(page_results)
	1466
	1467	# A little optimization - if current page is not "full", ie. does
	1468	# not contain page_size videos then we can assume that this page
	1469	# is the last one - there are no more ids on further pages -
	1470	# i.e. no need to query again.
	1471	if len(page_results) + startv < self._pagesize:
	1472	break
	1473
	1474	# If we got the whole page, but the next page is not interesting,
	1475	# break out early as well
	1476	if end == nextfirstid:
	1477	break
	1478	return res
	1479
	1480
	1481	class InAdvancePagedList(PagedList):
	1482	def __init__(self, pagefunc, pagecount, pagesize):
	1483	self._pagefunc = pagefunc
	1484	self._pagecount = pagecount
	1485	self._pagesize = pagesize
	1486
	1487	def getslice(self, start=0, end=None):
	1488	res = []
	1489	start_page = start // self._pagesize
	1490	end_page = (
	1491	self._pagecount if end is None else (end // self._pagesize + 1))
	1492	skip_elems = start - start_page * self._pagesize
	1493	only_more = None if end is None else end - start
	1494	for pagenum in range(start_page, end_page):
	1495	page = list(self._pagefunc(pagenum))
	1496	if skip_elems:
	1497	page = page[skip_elems:]
	1498	skip_elems = None
	1499	if only_more is not None:
	1500	if len(page) < only_more:
	1501	only_more -= len(page)
	1502	else:
	1503	page = page[:only_more]
	1504	res.extend(page)
	1505	break
	1506	res.extend(page)
	1507	return res
	1508
	1509
	1510	def uppercase_escape(s):
	1511	unicode_escape = codecs.getdecoder('unicode_escape')
	1512	return re.sub(
	1513	r'\\U[0-9a-fA-F]{8}',
	1514	lambda m: unicode_escape(m.group(0))[0],
	1515	s)
	1516
	1517
	1518	def lowercase_escape(s):
	1519	unicode_escape = codecs.getdecoder('unicode_escape')
	1520	return re.sub(
	1521	r'\\u[0-9a-fA-F]{4}',
	1522	lambda m: unicode_escape(m.group(0))[0],
	1523	s)
	1524
	1525
	1526	def escape_rfc3986(s):
	1527	"""Escape non-ASCII characters as suggested by RFC 3986"""
	1528	if sys.version_info < (3, 0) and isinstance(s, compat_str):
	1529	s = s.encode('utf-8')
	1530	return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
	1531
	1532
	1533	def escape_url(url):
	1534	"""Escape URL as suggested by RFC 3986"""
	1535	url_parsed = compat_urllib_parse_urlparse(url)
	1536	return url_parsed._replace(
	1537	path=escape_rfc3986(url_parsed.path),
	1538	params=escape_rfc3986(url_parsed.params),
	1539	query=escape_rfc3986(url_parsed.query),
	1540	fragment=escape_rfc3986(url_parsed.fragment)
	1541	).geturl()
	1542
	1543	try:
	1544	struct.pack('!I', 0)
	1545	except TypeError:
	1546	# In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
	1547	def struct_pack(spec, *args):
	1548	if isinstance(spec, compat_str):
	1549	spec = spec.encode('ascii')
	1550	return struct.pack(spec, *args)
	1551
	1552	def struct_unpack(spec, *args):
	1553	if isinstance(spec, compat_str):
	1554	spec = spec.encode('ascii')
	1555	return struct.unpack(spec, *args)
	1556	else:
	1557	struct_pack = struct.pack
	1558	struct_unpack = struct.unpack
	1559
	1560
	1561	def read_batch_urls(batch_fd):
	1562	def fixup(url):
	1563	if not isinstance(url, compat_str):
	1564	url = url.decode('utf-8', 'replace')
	1565	BOM_UTF8 = '\xef\xbb\xbf'
	1566	if url.startswith(BOM_UTF8):
	1567	url = url[len(BOM_UTF8):]
	1568	url = url.strip()
	1569	if url.startswith(('#', ';', ']')):
	1570	return False
	1571	return url
	1572
	1573	with contextlib.closing(batch_fd) as fd:
	1574	return [url for url in map(fixup, fd) if url]
	1575
	1576
	1577	def urlencode_postdata(args, *kargs):
	1578	return compat_urllib_parse.urlencode(args, *kargs).encode('ascii')
	1579
	1580
	1581	try:
	1582	etree_iter = xml.etree.ElementTree.Element.iter
	1583	except AttributeError: # Python <=2.6
	1584	etree_iter = lambda n: n.findall('.//*')
	1585
	1586
	1587	def parse_xml(s):
	1588	class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
	1589	def doctype(self, name, pubid, system):
	1590	pass # Ignore doctypes
	1591
	1592	parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
	1593	kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
	1594	tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
	1595	# Fix up XML parser in Python 2.x
	1596	if sys.version_info < (3, 0):
	1597	for n in etree_iter(tree):
	1598	if n.text is not None:
	1599	if not isinstance(n.text, compat_str):
	1600	n.text = n.text.decode('utf-8')
	1601	return tree
	1602
	1603
	1604	US_RATINGS = {
	1605	'G': 0,
	1606	'PG': 10,
	1607	'PG-13': 13,
	1608	'R': 16,
	1609	'NC': 18,
	1610	}
	1611
	1612
	1613	def parse_age_limit(s):
	1614	if s is None:
	1615	return None
	1616	m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
	1617	return int(m.group('age')) if m else US_RATINGS.get(s, None)
	1618
	1619
	1620	def strip_jsonp(code):
	1621	return re.sub(
	1622	r'(?s)^[a-zA-Z0-9_]+\s$\s(.)$;?\s?(?://[^\n])$', r'\1', code)
	1623
	1624
	1625	def js_to_json(code):
	1626	def fix_kv(m):
	1627	v = m.group(0)
	1628	if v in ('true', 'false', 'null'):
	1629	return v
	1630	if v.startswith('"'):
	1631	return v
	1632	if v.startswith("'"):
	1633	v = v[1:-1]
	1634	v = re.sub(r"\\\\\|\\'\|\"", lambda m: {
	1635	'\\\\': '\\\\',
	1636	"\\'": "'",
	1637	'"': '\\"',
	1638	}[m.group(0)], v)
	1639	return '"%s"' % v
	1640
	1641	res = re.sub(r'''(?x)
	1642	"(?:[^"\\](?:\\\\\|\\['"nu]))[^"\\]*"\|
	1643	'(?:[^'\\](?:\\\\\|\\['"nu]))[^'\\]*'\|
	1644	[a-zA-Z_][.a-zA-Z_0-9]*
	1645	''', fix_kv, code)
	1646	res = re.sub(r',(\s*[\]}])', lambda m: m.group(1), res)
	1647	return res
	1648
	1649
	1650	def qualities(quality_ids):
	1651	""" Get a numeric quality value out of a list of possible values """
	1652	def q(qid):
	1653	try:
	1654	return quality_ids.index(qid)
	1655	except ValueError:
	1656	return -1
	1657	return q
	1658
	1659
	1660	DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
	1661
	1662
	1663	def limit_length(s, length):
	1664	""" Add ellipses to overly long strings """
	1665	if s is None:
	1666	return None
	1667	ELLIPSES = '...'
	1668	if len(s) > length:
	1669	return s[:length - len(ELLIPSES)] + ELLIPSES
	1670	return s
	1671
	1672
	1673	def version_tuple(v):
	1674	return tuple(int(e) for e in re.split(r'[-.]', v))
	1675
	1676
	1677	def is_outdated_version(version, limit, assume_new=True):
	1678	if not version:
	1679	return not assume_new
	1680	try:
	1681	return version_tuple(version) < version_tuple(limit)
	1682	except ValueError:
	1683	return not assume_new
	1684
	1685
	1686	def ytdl_is_updateable():
	1687	""" Returns if youtube-dl can be updated with -U """
	1688	from zipimport import zipimporter
	1689
	1690	return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
	1691
	1692
	1693	def args_to_str(args):
	1694	# Get a short string representation for a subprocess command
	1695	return ' '.join(shlex_quote(a) for a in args)
	1696
	1697
	1698	def mimetype2ext(mt):
	1699	_, _, res = mt.rpartition('/')
	1700
	1701	return {
	1702	'x-ms-wmv': 'wmv',
	1703	'x-mp4-fragmented': 'mp4',
	1704	'ttml+xml': 'ttml',
	1705	}.get(res, res)
	1706
	1707
	1708	def urlhandle_detect_ext(url_handle):
	1709	try:
	1710	url_handle.headers
	1711	getheader = lambda h: url_handle.headers[h]
	1712	except AttributeError: # Python < 3
	1713	getheader = url_handle.info().getheader
	1714
	1715	cd = getheader('Content-Disposition')
	1716	if cd:
	1717	m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
	1718	if m:
	1719	e = determine_ext(m.group('filename'), default_ext=None)
	1720	if e:
	1721	return e
	1722
	1723	return mimetype2ext(getheader('Content-Type'))
	1724
	1725
	1726	def age_restricted(content_limit, age_limit):
	1727	""" Returns True iff the content should be blocked """
	1728
	1729	if age_limit is None: # No limit set
	1730	return False
	1731	if content_limit is None:
	1732	return False # Content available for everyone
	1733	return age_limit < content_limit
	1734
	1735
	1736	def is_html(first_bytes):
	1737	""" Detect whether a file contains HTML by examining its first bytes. """
	1738
	1739	BOMS = [
	1740	(b'\xef\xbb\xbf', 'utf-8'),
	1741	(b'\x00\x00\xfe\xff', 'utf-32-be'),
	1742	(b'\xff\xfe\x00\x00', 'utf-32-le'),
	1743	(b'\xff\xfe', 'utf-16-le'),
	1744	(b'\xfe\xff', 'utf-16-be'),
	1745	]
	1746	for bom, enc in BOMS:
	1747	if first_bytes.startswith(bom):
	1748	s = first_bytes[len(bom):].decode(enc, 'replace')
	1749	break
	1750	else:
	1751	s = first_bytes.decode('utf-8', 'replace')
	1752
	1753	return re.match(r'^\s*<', s)
	1754
	1755
	1756	def determine_protocol(info_dict):
	1757	protocol = info_dict.get('protocol')
	1758	if protocol is not None:
	1759	return protocol
	1760
	1761	url = info_dict['url']
	1762	if url.startswith('rtmp'):
	1763	return 'rtmp'
	1764	elif url.startswith('mms'):
	1765	return 'mms'
	1766	elif url.startswith('rtsp'):
	1767	return 'rtsp'
	1768
	1769	ext = determine_ext(url)
	1770	if ext == 'm3u8':
	1771	return 'm3u8'
	1772	elif ext == 'f4m':
	1773	return 'f4m'
	1774
	1775	return compat_urllib_parse_urlparse(url).scheme
	1776
	1777
	1778	def render_table(header_row, data):
	1779	""" Render a list of rows, each as a list of values """
	1780	table = [header_row] + data
	1781	max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
	1782	format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
	1783	return '\n'.join(format_str % tuple(row) for row in table)
	1784
	1785
	1786	def _match_one(filter_part, dct):
	1787	COMPARISON_OPERATORS = {
	1788	'<': operator.lt,
	1789	'<=': operator.le,
	1790	'>': operator.gt,
	1791	'>=': operator.ge,
	1792	'=': operator.eq,
	1793	'!=': operator.ne,
	1794	}
	1795	operator_rex = re.compile(r'''(?x)\s*
	1796	(?P<key>[a-z_]+)
	1797	\s(?P<op>%s)(?P<none_inclusive>\s\?)?\s*
	1798	(?:
	1799	(?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\|
	1800	(?P<strval>(?![0-9.])[a-z0-9A-Z]*)
	1801	)
	1802	\s*$
	1803	''' % '\|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
	1804	m = operator_rex.search(filter_part)
	1805	if m:
	1806	op = COMPARISON_OPERATORS[m.group('op')]
	1807	if m.group('strval') is not None:
	1808	if m.group('op') not in ('=', '!='):
	1809	raise ValueError(
	1810	'Operator %s does not support string values!' % m.group('op'))
	1811	comparison_value = m.group('strval')
	1812	else:
	1813	try:
	1814	comparison_value = int(m.group('intval'))
	1815	except ValueError:
	1816	comparison_value = parse_filesize(m.group('intval'))
	1817	if comparison_value is None:
	1818	comparison_value = parse_filesize(m.group('intval') + 'B')
	1819	if comparison_value is None:
	1820	raise ValueError(
	1821	'Invalid integer value %r in filter part %r' % (
	1822	m.group('intval'), filter_part))
	1823	actual_value = dct.get(m.group('key'))
	1824	if actual_value is None:
	1825	return m.group('none_inclusive')
	1826	return op(actual_value, comparison_value)
	1827
	1828	UNARY_OPERATORS = {
	1829	'': lambda v: v is not None,
	1830	'!': lambda v: v is None,
	1831	}
	1832	operator_rex = re.compile(r'''(?x)\s*
	1833	(?P<op>%s)\s*(?P<key>[a-z_]+)
	1834	\s*$
	1835	''' % '\|'.join(map(re.escape, UNARY_OPERATORS.keys())))
	1836	m = operator_rex.search(filter_part)
	1837	if m:
	1838	op = UNARY_OPERATORS[m.group('op')]
	1839	actual_value = dct.get(m.group('key'))
	1840	return op(actual_value)
	1841
	1842	raise ValueError('Invalid filter part %r' % filter_part)
	1843
	1844
	1845	def match_str(filter_str, dct):
	1846	""" Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
	1847
	1848	return all(
	1849	_match_one(filter_part, dct) for filter_part in filter_str.split('&'))
	1850
	1851
	1852	def match_filter_func(filter_str):
	1853	def _match_func(info_dict):
	1854	if match_str(filter_str, info_dict):
	1855	return None
	1856	else:
	1857	video_title = info_dict.get('title', info_dict.get('id', 'video'))
	1858	return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
	1859	return _match_func
	1860
	1861
	1862	def parse_dfxp_time_expr(time_expr):
	1863	if not time_expr:
	1864	return 0.0
	1865
	1866	mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
	1867	if mobj:
	1868	return float(mobj.group('time_offset'))
	1869
	1870	mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:\.\d+)?)$', time_expr)
	1871	if mobj:
	1872	return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3))
	1873
	1874
	1875	def srt_subtitles_timecode(seconds):
	1876	return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
	1877
	1878
	1879	def dfxp2srt(dfxp_data):
	1880	_x = functools.partial(xpath_with_ns, ns_map={
	1881	'ttml': 'http://www.w3.org/ns/ttml',
	1882	'ttaf1': 'http://www.w3.org/2006/10/ttaf1',
	1883	})
	1884
	1885	def parse_node(node):
	1886	str_or_empty = functools.partial(str_or_none, default='')
	1887
	1888	out = str_or_empty(node.text)
	1889
	1890	for child in node:
	1891	if child.tag in (_x('ttml:br'), _x('ttaf1:br'), 'br'):
	1892	out += '\n' + str_or_empty(child.tail)
	1893	elif child.tag in (_x('ttml:span'), _x('ttaf1:span'), 'span'):
	1894	out += str_or_empty(parse_node(child))
	1895	else:
	1896	out += str_or_empty(xml.etree.ElementTree.tostring(child))
	1897
	1898	return out
	1899
	1900	dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
	1901	out = []
	1902	paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p')
	1903
	1904	if not paras:
	1905	raise ValueError('Invalid dfxp/TTML subtitle')
	1906
	1907	for para, index in zip(paras, itertools.count(1)):
	1908	begin_time = parse_dfxp_time_expr(para.attrib['begin'])
	1909	end_time = parse_dfxp_time_expr(para.attrib.get('end'))
	1910	if not end_time:
	1911	end_time = begin_time + parse_dfxp_time_expr(para.attrib['dur'])
	1912	out.append('%d\n%s --> %s\n%s\n\n' % (
	1913	index,
	1914	srt_subtitles_timecode(begin_time),
	1915	srt_subtitles_timecode(end_time),
	1916	parse_node(para)))
	1917
	1918	return ''.join(out)
	1919
	1920
	1921	class ISO639Utils(object):
	1922	# See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
	1923	_lang_map = {
	1924	'aa': 'aar',
	1925	'ab': 'abk',
	1926	'ae': 'ave',
	1927	'af': 'afr',
	1928	'ak': 'aka',
	1929	'am': 'amh',
	1930	'an': 'arg',
	1931	'ar': 'ara',
	1932	'as': 'asm',
	1933	'av': 'ava',
	1934	'ay': 'aym',
	1935	'az': 'aze',
	1936	'ba': 'bak',
	1937	'be': 'bel',
	1938	'bg': 'bul',
	1939	'bh': 'bih',
	1940	'bi': 'bis',
	1941	'bm': 'bam',
	1942	'bn': 'ben',
	1943	'bo': 'bod',
	1944	'br': 'bre',
	1945	'bs': 'bos',
	1946	'ca': 'cat',
	1947	'ce': 'che',
	1948	'ch': 'cha',
	1949	'co': 'cos',
	1950	'cr': 'cre',
	1951	'cs': 'ces',
	1952	'cu': 'chu',
	1953	'cv': 'chv',
	1954	'cy': 'cym',
	1955	'da': 'dan',
	1956	'de': 'deu',
	1957	'dv': 'div',
	1958	'dz': 'dzo',
	1959	'ee': 'ewe',
	1960	'el': 'ell',
	1961	'en': 'eng',
	1962	'eo': 'epo',
	1963	'es': 'spa',
	1964	'et': 'est',
	1965	'eu': 'eus',
	1966	'fa': 'fas',
	1967	'ff': 'ful',
	1968	'fi': 'fin',
	1969	'fj': 'fij',
	1970	'fo': 'fao',
	1971	'fr': 'fra',
	1972	'fy': 'fry',
	1973	'ga': 'gle',
	1974	'gd': 'gla',
	1975	'gl': 'glg',
	1976	'gn': 'grn',
	1977	'gu': 'guj',
	1978	'gv': 'glv',
	1979	'ha': 'hau',
	1980	'he': 'heb',
	1981	'hi': 'hin',
	1982	'ho': 'hmo',
	1983	'hr': 'hrv',
	1984	'ht': 'hat',
	1985	'hu': 'hun',
	1986	'hy': 'hye',
	1987	'hz': 'her',
	1988	'ia': 'ina',
	1989	'id': 'ind',
	1990	'ie': 'ile',
	1991	'ig': 'ibo',
	1992	'ii': 'iii',
	1993	'ik': 'ipk',
	1994	'io': 'ido',
	1995	'is': 'isl',
	1996	'it': 'ita',
	1997	'iu': 'iku',
	1998	'ja': 'jpn',
	1999	'jv': 'jav',
	2000	'ka': 'kat',
	2001	'kg': 'kon',
	2002	'ki': 'kik',
	2003	'kj': 'kua',
	2004	'kk': 'kaz',
	2005	'kl': 'kal',
	2006	'km': 'khm',
	2007	'kn': 'kan',
	2008	'ko': 'kor',
	2009	'kr': 'kau',
	2010	'ks': 'kas',
	2011	'ku': 'kur',
	2012	'kv': 'kom',
	2013	'kw': 'cor',
	2014	'ky': 'kir',
	2015	'la': 'lat',
	2016	'lb': 'ltz',
	2017	'lg': 'lug',
	2018	'li': 'lim',
	2019	'ln': 'lin',
	2020	'lo': 'lao',
	2021	'lt': 'lit',
	2022	'lu': 'lub',
	2023	'lv': 'lav',
	2024	'mg': 'mlg',
	2025	'mh': 'mah',
	2026	'mi': 'mri',
	2027	'mk': 'mkd',
	2028	'ml': 'mal',
	2029	'mn': 'mon',
	2030	'mr': 'mar',
	2031	'ms': 'msa',
	2032	'mt': 'mlt',
	2033	'my': 'mya',
	2034	'na': 'nau',
	2035	'nb': 'nob',
	2036	'nd': 'nde',
	2037	'ne': 'nep',
	2038	'ng': 'ndo',
	2039	'nl': 'nld',
	2040	'nn': 'nno',
	2041	'no': 'nor',
	2042	'nr': 'nbl',
	2043	'nv': 'nav',
	2044	'ny': 'nya',
	2045	'oc': 'oci',
	2046	'oj': 'oji',
	2047	'om': 'orm',
	2048	'or': 'ori',
	2049	'os': 'oss',
	2050	'pa': 'pan',
	2051	'pi': 'pli',
	2052	'pl': 'pol',
	2053	'ps': 'pus',
	2054	'pt': 'por',
	2055	'qu': 'que',
	2056	'rm': 'roh',
	2057	'rn': 'run',
	2058	'ro': 'ron',
	2059	'ru': 'rus',
	2060	'rw': 'kin',
	2061	'sa': 'san',
	2062	'sc': 'srd',
	2063	'sd': 'snd',
	2064	'se': 'sme',
	2065	'sg': 'sag',
	2066	'si': 'sin',
	2067	'sk': 'slk',
	2068	'sl': 'slv',
	2069	'sm': 'smo',
	2070	'sn': 'sna',
	2071	'so': 'som',
	2072	'sq': 'sqi',
	2073	'sr': 'srp',
	2074	'ss': 'ssw',
	2075	'st': 'sot',
	2076	'su': 'sun',
	2077	'sv': 'swe',
	2078	'sw': 'swa',
	2079	'ta': 'tam',
	2080	'te': 'tel',
	2081	'tg': 'tgk',
	2082	'th': 'tha',
	2083	'ti': 'tir',
	2084	'tk': 'tuk',
	2085	'tl': 'tgl',
	2086	'tn': 'tsn',
	2087	'to': 'ton',
	2088	'tr': 'tur',
	2089	'ts': 'tso',
	2090	'tt': 'tat',
	2091	'tw': 'twi',
	2092	'ty': 'tah',
	2093	'ug': 'uig',
	2094	'uk': 'ukr',
	2095	'ur': 'urd',
	2096	'uz': 'uzb',
	2097	've': 'ven',
	2098	'vi': 'vie',
	2099	'vo': 'vol',
	2100	'wa': 'wln',
	2101	'wo': 'wol',
	2102	'xh': 'xho',
	2103	'yi': 'yid',
	2104	'yo': 'yor',
	2105	'za': 'zha',
	2106	'zh': 'zho',
	2107	'zu': 'zul',
	2108	}
	2109
	2110	@classmethod
	2111	def short2long(cls, code):
	2112	"""Convert language code from ISO 639-1 to ISO 639-2/T"""
	2113	return cls._lang_map.get(code[:2])
	2114
	2115	@classmethod
	2116	def long2short(cls, code):
	2117	"""Convert language code from ISO 639-2/T to ISO 639-1"""
	2118	for short_name, long_name in cls._lang_map.items():
	2119	if long_name == code:
	2120	return short_name
	2121
	2122
	2123	class ISO3166Utils(object):
	2124	# From http://data.okfn.org/data/core/country-list
	2125	_country_map = {
	2126	'AF': 'Afghanistan',
	2127	'AX': 'Åland Islands',
	2128	'AL': 'Albania',
	2129	'DZ': 'Algeria',
	2130	'AS': 'American Samoa',
	2131	'AD': 'Andorra',
	2132	'AO': 'Angola',
	2133	'AI': 'Anguilla',
	2134	'AQ': 'Antarctica',
	2135	'AG': 'Antigua and Barbuda',
	2136	'AR': 'Argentina',
	2137	'AM': 'Armenia',
	2138	'AW': 'Aruba',
	2139	'AU': 'Australia',
	2140	'AT': 'Austria',
	2141	'AZ': 'Azerbaijan',
	2142	'BS': 'Bahamas',
	2143	'BH': 'Bahrain',
	2144	'BD': 'Bangladesh',
	2145	'BB': 'Barbados',
	2146	'BY': 'Belarus',
	2147	'BE': 'Belgium',
	2148	'BZ': 'Belize',
	2149	'BJ': 'Benin',
	2150	'BM': 'Bermuda',
	2151	'BT': 'Bhutan',
	2152	'BO': 'Bolivia, Plurinational State of',
	2153	'BQ': 'Bonaire, Sint Eustatius and Saba',
	2154	'BA': 'Bosnia and Herzegovina',
	2155	'BW': 'Botswana',
	2156	'BV': 'Bouvet Island',
	2157	'BR': 'Brazil',
	2158	'IO': 'British Indian Ocean Territory',
	2159	'BN': 'Brunei Darussalam',
	2160	'BG': 'Bulgaria',
	2161	'BF': 'Burkina Faso',
	2162	'BI': 'Burundi',
	2163	'KH': 'Cambodia',
	2164	'CM': 'Cameroon',
	2165	'CA': 'Canada',
	2166	'CV': 'Cape Verde',
	2167	'KY': 'Cayman Islands',
	2168	'CF': 'Central African Republic',
	2169	'TD': 'Chad',
	2170	'CL': 'Chile',
	2171	'CN': 'China',
	2172	'CX': 'Christmas Island',
	2173	'CC': 'Cocos (Keeling) Islands',
	2174	'CO': 'Colombia',
	2175	'KM': 'Comoros',
	2176	'CG': 'Congo',
	2177	'CD': 'Congo, the Democratic Republic of the',
	2178	'CK': 'Cook Islands',
	2179	'CR': 'Costa Rica',
	2180	'CI': 'Côte d\'Ivoire',
	2181	'HR': 'Croatia',
	2182	'CU': 'Cuba',
	2183	'CW': 'Curaçao',
	2184	'CY': 'Cyprus',
	2185	'CZ': 'Czech Republic',
	2186	'DK': 'Denmark',
	2187	'DJ': 'Djibouti',
	2188	'DM': 'Dominica',
	2189	'DO': 'Dominican Republic',
	2190	'EC': 'Ecuador',
	2191	'EG': 'Egypt',
	2192	'SV': 'El Salvador',
	2193	'GQ': 'Equatorial Guinea',
	2194	'ER': 'Eritrea',
	2195	'EE': 'Estonia',
	2196	'ET': 'Ethiopia',
	2197	'FK': 'Falkland Islands (Malvinas)',
	2198	'FO': 'Faroe Islands',
	2199	'FJ': 'Fiji',
	2200	'FI': 'Finland',
	2201	'FR': 'France',
	2202	'GF': 'French Guiana',
	2203	'PF': 'French Polynesia',
	2204	'TF': 'French Southern Territories',
	2205	'GA': 'Gabon',
	2206	'GM': 'Gambia',
	2207	'GE': 'Georgia',
	2208	'DE': 'Germany',
	2209	'GH': 'Ghana',
	2210	'GI': 'Gibraltar',
	2211	'GR': 'Greece',
	2212	'GL': 'Greenland',
	2213	'GD': 'Grenada',
	2214	'GP': 'Guadeloupe',
	2215	'GU': 'Guam',
	2216	'GT': 'Guatemala',
	2217	'GG': 'Guernsey',
	2218	'GN': 'Guinea',
	2219	'GW': 'Guinea-Bissau',
	2220	'GY': 'Guyana',
	2221	'HT': 'Haiti',
	2222	'HM': 'Heard Island and McDonald Islands',
	2223	'VA': 'Holy See (Vatican City State)',
	2224	'HN': 'Honduras',
	2225	'HK': 'Hong Kong',
	2226	'HU': 'Hungary',
	2227	'IS': 'Iceland',
	2228	'IN': 'India',
	2229	'ID': 'Indonesia',
	2230	'IR': 'Iran, Islamic Republic of',
	2231	'IQ': 'Iraq',
	2232	'IE': 'Ireland',
	2233	'IM': 'Isle of Man',
	2234	'IL': 'Israel',
	2235	'IT': 'Italy',
	2236	'JM': 'Jamaica',
	2237	'JP': 'Japan',
	2238	'JE': 'Jersey',
	2239	'JO': 'Jordan',
	2240	'KZ': 'Kazakhstan',
	2241	'KE': 'Kenya',
	2242	'KI': 'Kiribati',
	2243	'KP': 'Korea, Democratic People\'s Republic of',
	2244	'KR': 'Korea, Republic of',
	2245	'KW': 'Kuwait',
	2246	'KG': 'Kyrgyzstan',
	2247	'LA': 'Lao People\'s Democratic Republic',
	2248	'LV': 'Latvia',
	2249	'LB': 'Lebanon',
	2250	'LS': 'Lesotho',
	2251	'LR': 'Liberia',
	2252	'LY': 'Libya',
	2253	'LI': 'Liechtenstein',
	2254	'LT': 'Lithuania',
	2255	'LU': 'Luxembourg',
	2256	'MO': 'Macao',
	2257	'MK': 'Macedonia, the Former Yugoslav Republic of',
	2258	'MG': 'Madagascar',
	2259	'MW': 'Malawi',
	2260	'MY': 'Malaysia',
	2261	'MV': 'Maldives',
	2262	'ML': 'Mali',
	2263	'MT': 'Malta',
	2264	'MH': 'Marshall Islands',
	2265	'MQ': 'Martinique',
	2266	'MR': 'Mauritania',
	2267	'MU': 'Mauritius',
	2268	'YT': 'Mayotte',
	2269	'MX': 'Mexico',
	2270	'FM': 'Micronesia, Federated States of',
	2271	'MD': 'Moldova, Republic of',
	2272	'MC': 'Monaco',
	2273	'MN': 'Mongolia',
	2274	'ME': 'Montenegro',
	2275	'MS': 'Montserrat',
	2276	'MA': 'Morocco',
	2277	'MZ': 'Mozambique',
	2278	'MM': 'Myanmar',
	2279	'NA': 'Namibia',
	2280	'NR': 'Nauru',
	2281	'NP': 'Nepal',
	2282	'NL': 'Netherlands',
	2283	'NC': 'New Caledonia',
	2284	'NZ': 'New Zealand',
	2285	'NI': 'Nicaragua',
	2286	'NE': 'Niger',
	2287	'NG': 'Nigeria',
	2288	'NU': 'Niue',
	2289	'NF': 'Norfolk Island',
	2290	'MP': 'Northern Mariana Islands',
	2291	'NO': 'Norway',
	2292	'OM': 'Oman',
	2293	'PK': 'Pakistan',
	2294	'PW': 'Palau',
	2295	'PS': 'Palestine, State of',
	2296	'PA': 'Panama',
	2297	'PG': 'Papua New Guinea',
	2298	'PY': 'Paraguay',
	2299	'PE': 'Peru',
	2300	'PH': 'Philippines',
	2301	'PN': 'Pitcairn',
	2302	'PL': 'Poland',
	2303	'PT': 'Portugal',
	2304	'PR': 'Puerto Rico',
	2305	'QA': 'Qatar',
	2306	'RE': 'Réunion',
	2307	'RO': 'Romania',
	2308	'RU': 'Russian Federation',
	2309	'RW': 'Rwanda',
	2310	'BL': 'Saint Barthélemy',
	2311	'SH': 'Saint Helena, Ascension and Tristan da Cunha',
	2312	'KN': 'Saint Kitts and Nevis',
	2313	'LC': 'Saint Lucia',
	2314	'MF': 'Saint Martin (French part)',
	2315	'PM': 'Saint Pierre and Miquelon',
	2316	'VC': 'Saint Vincent and the Grenadines',
	2317	'WS': 'Samoa',
	2318	'SM': 'San Marino',
	2319	'ST': 'Sao Tome and Principe',
	2320	'SA': 'Saudi Arabia',
	2321	'SN': 'Senegal',
	2322	'RS': 'Serbia',
	2323	'SC': 'Seychelles',
	2324	'SL': 'Sierra Leone',
	2325	'SG': 'Singapore',
	2326	'SX': 'Sint Maarten (Dutch part)',
	2327	'SK': 'Slovakia',
	2328	'SI': 'Slovenia',
	2329	'SB': 'Solomon Islands',
	2330	'SO': 'Somalia',
	2331	'ZA': 'South Africa',
	2332	'GS': 'South Georgia and the South Sandwich Islands',
	2333	'SS': 'South Sudan',
	2334	'ES': 'Spain',
	2335	'LK': 'Sri Lanka',
	2336	'SD': 'Sudan',
	2337	'SR': 'Suriname',
	2338	'SJ': 'Svalbard and Jan Mayen',
	2339	'SZ': 'Swaziland',
	2340	'SE': 'Sweden',
	2341	'CH': 'Switzerland',
	2342	'SY': 'Syrian Arab Republic',
	2343	'TW': 'Taiwan, Province of China',
	2344	'TJ': 'Tajikistan',
	2345	'TZ': 'Tanzania, United Republic of',
	2346	'TH': 'Thailand',
	2347	'TL': 'Timor-Leste',
	2348	'TG': 'Togo',
	2349	'TK': 'Tokelau',
	2350	'TO': 'Tonga',
	2351	'TT': 'Trinidad and Tobago',
	2352	'TN': 'Tunisia',
	2353	'TR': 'Turkey',
	2354	'TM': 'Turkmenistan',
	2355	'TC': 'Turks and Caicos Islands',
	2356	'TV': 'Tuvalu',
	2357	'UG': 'Uganda',
	2358	'UA': 'Ukraine',
	2359	'AE': 'United Arab Emirates',
	2360	'GB': 'United Kingdom',
	2361	'US': 'United States',
	2362	'UM': 'United States Minor Outlying Islands',
	2363	'UY': 'Uruguay',
	2364	'UZ': 'Uzbekistan',
	2365	'VU': 'Vanuatu',
	2366	'VE': 'Venezuela, Bolivarian Republic of',
	2367	'VN': 'Viet Nam',
	2368	'VG': 'Virgin Islands, British',
	2369	'VI': 'Virgin Islands, U.S.',
	2370	'WF': 'Wallis and Futuna',
	2371	'EH': 'Western Sahara',
	2372	'YE': 'Yemen',
	2373	'ZM': 'Zambia',
	2374	'ZW': 'Zimbabwe',
	2375	}
	2376
	2377	@classmethod
	2378	def short2full(cls, code):
	2379	"""Convert an ISO 3166-2 country code to the corresponding full name"""
	2380	return cls._country_map.get(code.upper())
	2381
	2382
	2383	class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
	2384	def __init__(self, proxies=None):
	2385	# Set default handlers
	2386	for type in ('http', 'https'):
	2387	setattr(self, '%s_open' % type,
	2388	lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
	2389	meth(r, proxy, type))
	2390	return compat_urllib_request.ProxyHandler.__init__(self, proxies)
	2391
	2392	def proxy_open(self, req, proxy, type):
	2393	req_proxy = req.headers.get('Ytdl-request-proxy')
	2394	if req_proxy is not None:
	2395	proxy = req_proxy
	2396	del req.headers['Ytdl-request-proxy']
	2397
	2398	if proxy == '__noproxy__':
	2399	return None # No Proxy
	2400	return compat_urllib_request.ProxyHandler.proxy_open(
	2401	self, req, proxy, type)