jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	#!/usr/bin/env python
	2	# -- coding: utf-8 --
	3
	4	import datetime
	5	import email.utils
	6	import errno
	7	import gzip
	8	import io
	9	import json
	10	import locale
	11	import math
	12	import os
	13	import pipes
	14	import platform
	15	import re
	16	import ssl
	17	import socket
	18	import sys
	19	import traceback
	20	import zlib
	21
	22	try:
	23	import urllib.request as compat_urllib_request
	24	except ImportError: # Python 2
	25	import urllib2 as compat_urllib_request
	26
	27	try:
	28	import urllib.error as compat_urllib_error
	29	except ImportError: # Python 2
	30	import urllib2 as compat_urllib_error
	31
	32	try:
	33	import urllib.parse as compat_urllib_parse
	34	except ImportError: # Python 2
	35	import urllib as compat_urllib_parse
	36
	37	try:
	38	from urllib.parse import urlparse as compat_urllib_parse_urlparse
	39	except ImportError: # Python 2
	40	from urlparse import urlparse as compat_urllib_parse_urlparse
	41
	42	try:
	43	import urllib.parse as compat_urlparse
	44	except ImportError: # Python 2
	45	import urlparse as compat_urlparse
	46
	47	try:
	48	import http.cookiejar as compat_cookiejar
	49	except ImportError: # Python 2
	50	import cookielib as compat_cookiejar
	51
	52	try:
	53	import html.entities as compat_html_entities
	54	except ImportError: # Python 2
	55	import htmlentitydefs as compat_html_entities
	56
	57	try:
	58	import html.parser as compat_html_parser
	59	except ImportError: # Python 2
	60	import HTMLParser as compat_html_parser
	61
	62	try:
	63	import http.client as compat_http_client
	64	except ImportError: # Python 2
	65	import httplib as compat_http_client
	66
	67	try:
	68	from urllib.error import HTTPError as compat_HTTPError
	69	except ImportError: # Python 2
	70	from urllib2 import HTTPError as compat_HTTPError
	71
	72	try:
	73	from urllib.request import urlretrieve as compat_urlretrieve
	74	except ImportError: # Python 2
	75	from urllib import urlretrieve as compat_urlretrieve
	76
	77
	78	try:
	79	from subprocess import DEVNULL
	80	compat_subprocess_get_DEVNULL = lambda: DEVNULL
	81	except ImportError:
	82	compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
	83
	84	try:
	85	from urllib.parse import parse_qs as compat_parse_qs
	86	except ImportError: # Python 2
	87	# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
	88	# Python 2's version is apparently totally broken
	89	def _unquote(string, encoding='utf-8', errors='replace'):
	90	if string == '':
	91	return string
	92	res = string.split('%')
	93	if len(res) == 1:
	94	return string
	95	if encoding is None:
	96	encoding = 'utf-8'
	97	if errors is None:
	98	errors = 'replace'
	99	# pct_sequence: contiguous sequence of percent-encoded bytes, decoded
	100	pct_sequence = b''
	101	string = res[0]
	102	for item in res[1:]:
	103	try:
	104	if not item:
	105	raise ValueError
	106	pct_sequence += item[:2].decode('hex')
	107	rest = item[2:]
	108	if not rest:
	109	# This segment was just a single percent-encoded character.
	110	# May be part of a sequence of code units, so delay decoding.
	111	# (Stored in pct_sequence).
	112	continue
	113	except ValueError:
	114	rest = '%' + item
	115	# Encountered non-percent-encoded characters. Flush the current
	116	# pct_sequence.
	117	string += pct_sequence.decode(encoding, errors) + rest
	118	pct_sequence = b''
	119	if pct_sequence:
	120	# Flush the final pct_sequence
	121	string += pct_sequence.decode(encoding, errors)
	122	return string
	123
	124	def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
	125	encoding='utf-8', errors='replace'):
	126	qs, _coerce_result = qs, unicode
	127	pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
	128	r = []
	129	for name_value in pairs:
	130	if not name_value and not strict_parsing:
	131	continue
	132	nv = name_value.split('=', 1)
	133	if len(nv) != 2:
	134	if strict_parsing:
	135	raise ValueError("bad query field: %r" % (name_value,))
	136	# Handle case of a control-name with no equal sign
	137	if keep_blank_values:
	138	nv.append('')
	139	else:
	140	continue
	141	if len(nv[1]) or keep_blank_values:
	142	name = nv[0].replace('+', ' ')
	143	name = _unquote(name, encoding=encoding, errors=errors)
	144	name = _coerce_result(name)
	145	value = nv[1].replace('+', ' ')
	146	value = _unquote(value, encoding=encoding, errors=errors)
	147	value = _coerce_result(value)
	148	r.append((name, value))
	149	return r
	150
	151	def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
	152	encoding='utf-8', errors='replace'):
	153	parsed_result = {}
	154	pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
	155	encoding=encoding, errors=errors)
	156	for name, value in pairs:
	157	if name in parsed_result:
	158	parsed_result[name].append(value)
	159	else:
	160	parsed_result[name] = [value]
	161	return parsed_result
	162
	163	try:
	164	compat_str = unicode # Python 2
	165	except NameError:
	166	compat_str = str
	167
	168	try:
	169	compat_chr = unichr # Python 2
	170	except NameError:
	171	compat_chr = chr
	172
	173	def compat_ord(c):
	174	if type(c) is int: return c
	175	else: return ord(c)
	176
	177	# This is not clearly defined otherwise
	178	compiled_regex_type = type(re.compile(''))
	179
	180	std_headers = {
	181	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
	182	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
	183	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8',
	184	'Accept-Encoding': 'gzip, deflate',
	185	'Accept-Language': 'en-us,en;q=0.5',
	186	}
	187
	188	def preferredencoding():
	189	"""Get preferred encoding.
	190
	191	Returns the best encoding scheme for the system, based on
	192	locale.getpreferredencoding() and some further tweaks.
	193	"""
	194	try:
	195	pref = locale.getpreferredencoding()
	196	u'TEST'.encode(pref)
	197	except:
	198	pref = 'UTF-8'
	199
	200	return pref
	201
	202	if sys.version_info < (3,0):
	203	def compat_print(s):
	204	print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
	205	else:
	206	def compat_print(s):
	207	assert type(s) == type(u'')
	208	print(s)
	209
	210	# In Python 2.x, json.dump expects a bytestream.
	211	# In Python 3.x, it writes to a character stream
	212	if sys.version_info < (3,0):
	213	def write_json_file(obj, fn):
	214	with open(fn, 'wb') as f:
	215	json.dump(obj, f)
	216	else:
	217	def write_json_file(obj, fn):
	218	with open(fn, 'w', encoding='utf-8') as f:
	219	json.dump(obj, f)
	220
	221	if sys.version_info >= (2,7):
	222	def find_xpath_attr(node, xpath, key, val):
	223	""" Find the xpath xpath[@key=val] """
	224	assert re.match(r'^[a-zA-Z]+$', key)
	225	assert re.match(r'^[a-zA-Z0-9@\s]*$', val)
	226	expr = xpath + u"[@%s='%s']" % (key, val)
	227	return node.find(expr)
	228	else:
	229	def find_xpath_attr(node, xpath, key, val):
	230	for f in node.findall(xpath):
	231	if f.attrib.get(key) == val:
	232	return f
	233	return None
	234
	235	# On python2.6 the xml.etree.ElementTree.Element methods don't support
	236	# the namespace parameter
	237	def xpath_with_ns(path, ns_map):
	238	components = [c.split(':') for c in path.split('/')]
	239	replaced = []
	240	for c in components:
	241	if len(c) == 1:
	242	replaced.append(c[0])
	243	else:
	244	ns, tag = c
	245	replaced.append('{%s}%s' % (ns_map[ns], tag))
	246	return '/'.join(replaced)
	247
	248	def htmlentity_transform(matchobj):
	249	"""Transforms an HTML entity to a character.
	250
	251	This function receives a match object and is intended to be used with
	252	the re.sub() function.
	253	"""
	254	entity = matchobj.group(1)
	255
	256	# Known non-numeric HTML entity
	257	if entity in compat_html_entities.name2codepoint:
	258	return compat_chr(compat_html_entities.name2codepoint[entity])
	259
	260	mobj = re.match(u'(?u)#(x?\\d+)', entity)
	261	if mobj is not None:
	262	numstr = mobj.group(1)
	263	if numstr.startswith(u'x'):
	264	base = 16
	265	numstr = u'0%s' % numstr
	266	else:
	267	base = 10
	268	return compat_chr(int(numstr, base))
	269
	270	# Unknown entity in name, return its literal representation
	271	return (u'&%s;' % entity)
	272
	273	compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_](?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>](?:\s=+\s(?:'[^']'\|"[^"]"\|(?!['"])[^>\s]))?\s))?\s""", re.VERBOSE) # backport bugfix
	274	class BaseHTMLParser(compat_html_parser.HTMLParser):
	275	def __init(self):
	276	compat_html_parser.HTMLParser.__init__(self)
	277	self.html = None
	278
	279	def loads(self, html):
	280	self.html = html
	281	self.feed(html)
	282	self.close()
	283
	284	class AttrParser(BaseHTMLParser):
	285	"""Modified HTMLParser that isolates a tag with the specified attribute"""
	286	def __init__(self, attribute, value):
	287	self.attribute = attribute
	288	self.value = value
	289	self.result = None
	290	self.started = False
	291	self.depth = {}
	292	self.watch_startpos = False
	293	self.error_count = 0
	294	BaseHTMLParser.__init__(self)
	295
	296	def error(self, message):
	297	if self.error_count > 10 or self.started:
	298	raise compat_html_parser.HTMLParseError(message, self.getpos())
	299	self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
	300	self.error_count += 1
	301	self.goahead(1)
	302
	303	def handle_starttag(self, tag, attrs):
	304	attrs = dict(attrs)
	305	if self.started:
	306	self.find_startpos(None)
	307	if self.attribute in attrs and attrs[self.attribute] == self.value:
	308	self.result = [tag]
	309	self.started = True
	310	self.watch_startpos = True
	311	if self.started:
	312	if not tag in self.depth: self.depth[tag] = 0
	313	self.depth[tag] += 1
	314
	315	def handle_endtag(self, tag):
	316	if self.started:
	317	if tag in self.depth: self.depth[tag] -= 1
	318	if self.depth[self.result[0]] == 0:
	319	self.started = False
	320	self.result.append(self.getpos())
	321
	322	def find_startpos(self, x):
	323	"""Needed to put the start position of the result (self.result[1])
	324	after the opening tag with the requested id"""
	325	if self.watch_startpos:
	326	self.watch_startpos = False
	327	self.result.append(self.getpos())
	328	handle_entityref = handle_charref = handle_data = handle_comment = \
	329	handle_decl = handle_pi = unknown_decl = find_startpos
	330
	331	def get_result(self):
	332	if self.result is None:
	333	return None
	334	if len(self.result) != 3:
	335	return None
	336	lines = self.html.split('\n')
	337	lines = lines[self.result[1][0]-1:self.result[2][0]]
	338	lines[0] = lines[0][self.result[1][1]:]
	339	if len(lines) == 1:
	340	lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
	341	lines[-1] = lines[-1][:self.result[2][1]]
	342	return '\n'.join(lines).strip()
	343	# Hack for https://github.com/rg3/youtube-dl/issues/662
	344	if sys.version_info < (2, 7, 3):
	345	AttrParser.parse_endtag = (lambda self, i:
	346	i + len("</scr'+'ipt>")
	347	if self.rawdata[i:].startswith("</scr'+'ipt>")
	348	else compat_html_parser.HTMLParser.parse_endtag(self, i))
	349
	350	def get_element_by_id(id, html):
	351	"""Return the content of the tag with the specified ID in the passed HTML document"""
	352	return get_element_by_attribute("id", id, html)
	353
	354	def get_element_by_attribute(attribute, value, html):
	355	"""Return the content of the tag with the specified attribute in the passed HTML document"""
	356	parser = AttrParser(attribute, value)
	357	try:
	358	parser.loads(html)
	359	except compat_html_parser.HTMLParseError:
	360	pass
	361	return parser.get_result()
	362
	363	class MetaParser(BaseHTMLParser):
	364	"""
	365	Modified HTMLParser that isolates a meta tag with the specified name
	366	attribute.
	367	"""
	368	def __init__(self, name):
	369	BaseHTMLParser.__init__(self)
	370	self.name = name
	371	self.content = None
	372	self.result = None
	373
	374	def handle_starttag(self, tag, attrs):
	375	if tag != 'meta':
	376	return
	377	attrs = dict(attrs)
	378	if attrs.get('name') == self.name:
	379	self.result = attrs.get('content')
	380
	381	def get_result(self):
	382	return self.result
	383
	384	def get_meta_content(name, html):
	385	"""
	386	Return the content attribute from the meta tag with the given name attribute.
	387	"""
	388	parser = MetaParser(name)
	389	try:
	390	parser.loads(html)
	391	except compat_html_parser.HTMLParseError:
	392	pass
	393	return parser.get_result()
	394
	395
	396	def clean_html(html):
	397	"""Clean an HTML snippet into a readable string"""
	398	# Newline vs <br />
	399	html = html.replace('\n', ' ')
	400	html = re.sub(r'\s<\sbr\s/?\s>\s*', '\n', html)
	401	html = re.sub(r'<\s/\sp\s>\s<\sp[^>]>', '\n', html)
	402	# Strip html tags
	403	html = re.sub('<.*?>', '', html)
	404	# Replace html entities
	405	html = unescapeHTML(html)
	406	return html.strip()
	407
	408
	409	def sanitize_open(filename, open_mode):
	410	"""Try to open the given filename, and slightly tweak it if this fails.
	411
	412	Attempts to open the given filename. If this fails, it tries to change
	413	the filename slightly, step by step, until it's either able to open it
	414	or it fails and raises a final exception, like the standard open()
	415	function.
	416
	417	It returns the tuple (stream, definitive_file_name).
	418	"""
	419	try:
	420	if filename == u'-':
	421	if sys.platform == 'win32':
	422	import msvcrt
	423	msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
	424	return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
	425	stream = open(encodeFilename(filename), open_mode)
	426	return (stream, filename)
	427	except (IOError, OSError) as err:
	428	if err.errno in (errno.EACCES,):
	429	raise
	430
	431	# In case of error, try to remove win32 forbidden chars
	432	alt_filename = os.path.join(
	433	re.sub(u'[/<>:"\\\|\\\\?\\*]', u'#', path_part)
	434	for path_part in os.path.split(filename)
	435	)
	436	if alt_filename == filename:
	437	raise
	438	else:
	439	# An exception here should be caught in the caller
	440	stream = open(encodeFilename(filename), open_mode)
	441	return (stream, alt_filename)
	442
	443
	444	def timeconvert(timestr):
	445	"""Convert RFC 2822 defined time string into system timestamp"""
	446	timestamp = None
	447	timetuple = email.utils.parsedate_tz(timestr)
	448	if timetuple is not None:
	449	timestamp = email.utils.mktime_tz(timetuple)
	450	return timestamp
	451
	452	def sanitize_filename(s, restricted=False, is_id=False):
	453	"""Sanitizes a string so it could be used as part of a filename.
	454	If restricted is set, use a stricter subset of allowed characters.
	455	Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
	456	"""
	457	def replace_insane(char):
	458	if char == '?' or ord(char) < 32 or ord(char) == 127:
	459	return ''
	460	elif char == '"':
	461	return '' if restricted else '\''
	462	elif char == ':':
	463	return '_-' if restricted else ' -'
	464	elif char in '\\/\|*<>':
	465	return '_'
	466	if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
	467	return '_'
	468	if restricted and ord(char) > 127:
	469	return '_'
	470	return char
	471
	472	result = u''.join(map(replace_insane, s))
	473	if not is_id:
	474	while '__' in result:
	475	result = result.replace('__', '_')
	476	result = result.strip('_')
	477	# Common case of "Foreign band name - English song title"
	478	if restricted and result.startswith('-_'):
	479	result = result[2:]
	480	if not result:
	481	result = '_'
	482	return result
	483
	484	def orderedSet(iterable):
	485	""" Remove all duplicates from the input iterable """
	486	res = []
	487	for el in iterable:
	488	if el not in res:
	489	res.append(el)
	490	return res
	491
	492	def unescapeHTML(s):
	493	"""
	494	@param s a string
	495	"""
	496	assert type(s) == type(u'')
	497
	498	result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
	499	return result
	500
	501	def encodeFilename(s):
	502	"""
	503	@param s The name of the file
	504	"""
	505
	506	assert type(s) == type(u'')
	507
	508	# Python 3 has a Unicode API
	509	if sys.version_info >= (3, 0):
	510	return s
	511
	512	if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
	513	# Pass u'' directly to use Unicode APIs on Windows 2000 and up
	514	# (Detecting Windows NT 4 is tricky because 'major >= 4' would
	515	# match Windows 9x series as well. Besides, NT 4 is obsolete.)
	516	return s
	517	else:
	518	encoding = sys.getfilesystemencoding()
	519	if encoding is None:
	520	encoding = 'utf-8'
	521	return s.encode(encoding, 'ignore')
	522
	523	def decodeOption(optval):
	524	if optval is None:
	525	return optval
	526	if isinstance(optval, bytes):
	527	optval = optval.decode(preferredencoding())
	528
	529	assert isinstance(optval, compat_str)
	530	return optval
	531
	532	def formatSeconds(secs):
	533	if secs > 3600:
	534	return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
	535	elif secs > 60:
	536	return '%d:%02d' % (secs // 60, secs % 60)
	537	else:
	538	return '%d' % secs
	539
	540	def make_HTTPS_handler(opts_no_check_certificate):
	541	if sys.version_info < (3, 2):
	542	import httplib
	543
	544	class HTTPSConnectionV3(httplib.HTTPSConnection):
	545	def __init__(self, args, *kwargs):
	546	httplib.HTTPSConnection.__init__(self, args, *kwargs)
	547
	548	def connect(self):
	549	sock = socket.create_connection((self.host, self.port), self.timeout)
	550	if self._tunnel_host:
	551	self.sock = sock
	552	self._tunnel()
	553	try:
	554	self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv3)
	555	except ssl.SSLError:
	556	self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
	557
	558	class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
	559	def https_open(self, req):
	560	return self.do_open(HTTPSConnectionV3, req)
	561	return HTTPSHandlerV3()
	562	else:
	563	context = ssl.SSLContext(ssl.PROTOCOL_SSLv3)
	564	context.set_default_verify_paths()
	565
	566	context.verify_mode = (ssl.CERT_NONE
	567	if opts_no_check_certificate
	568	else ssl.CERT_REQUIRED)
	569	return compat_urllib_request.HTTPSHandler(context=context)
	570
	571	class ExtractorError(Exception):
	572	"""Error during info extraction."""
	573	def __init__(self, msg, tb=None, expected=False, cause=None):
	574	""" tb, if given, is the original traceback (so that it can be printed out).
	575	If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
	576	"""
	577
	578	if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
	579	expected = True
	580	if not expected:
	581	msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type youtube-dl -U to update.'
	582	super(ExtractorError, self).__init__(msg)
	583
	584	self.traceback = tb
	585	self.exc_info = sys.exc_info() # preserve original exception
	586	self.cause = cause
	587
	588	def format_traceback(self):
	589	if self.traceback is None:
	590	return None
	591	return u''.join(traceback.format_tb(self.traceback))
	592
	593
	594	class RegexNotFoundError(ExtractorError):
	595	"""Error when a regex didn't match"""
	596	pass
	597
	598
	599	class DownloadError(Exception):
	600	"""Download Error exception.
	601
	602	This exception may be thrown by FileDownloader objects if they are not
	603	configured to continue on errors. They will contain the appropriate
	604	error message.
	605	"""
	606	def __init__(self, msg, exc_info=None):
	607	""" exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
	608	super(DownloadError, self).__init__(msg)
	609	self.exc_info = exc_info
	610
	611
	612	class SameFileError(Exception):
	613	"""Same File exception.
	614
	615	This exception will be thrown by FileDownloader objects if they detect
	616	multiple files would have to be downloaded to the same file on disk.
	617	"""
	618	pass
	619
	620
	621	class PostProcessingError(Exception):
	622	"""Post Processing exception.
	623
	624	This exception may be raised by PostProcessor's .run() method to
	625	indicate an error in the postprocessing task.
	626	"""
	627	def __init__(self, msg):
	628	self.msg = msg
	629
	630	class MaxDownloadsReached(Exception):
	631	""" --max-downloads limit has been reached. """
	632	pass
	633
	634
	635	class UnavailableVideoError(Exception):
	636	"""Unavailable Format exception.
	637
	638	This exception will be thrown when a video is requested
	639	in a format that is not available for that video.
	640	"""
	641	pass
	642
	643
	644	class ContentTooShortError(Exception):
	645	"""Content Too Short exception.
	646
	647	This exception may be raised by FileDownloader objects when a file they
	648	download is too small for what the server announced first, indicating
	649	the connection was probably interrupted.
	650	"""
	651	# Both in bytes
	652	downloaded = None
	653	expected = None
	654
	655	def __init__(self, downloaded, expected):
	656	self.downloaded = downloaded
	657	self.expected = expected
	658
	659	class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
	660	"""Handler for HTTP requests and responses.
	661
	662	This class, when installed with an OpenerDirector, automatically adds
	663	the standard headers to every HTTP request and handles gzipped and
	664	deflated responses from web servers. If compression is to be avoided in
	665	a particular request, the original request in the program code only has
	666	to include the HTTP header "Youtubedl-No-Compression", which will be
	667	removed before making the real request.
	668
	669	Part of this code was copied from:
	670
	671	http://techknack.net/python-urllib2-handlers/
	672
	673	Andrew Rowls, the author of that code, agreed to release it to the
	674	public domain.
	675	"""
	676
	677	@staticmethod
	678	def deflate(data):
	679	try:
	680	return zlib.decompress(data, -zlib.MAX_WBITS)
	681	except zlib.error:
	682	return zlib.decompress(data)
	683
	684	@staticmethod
	685	def addinfourl_wrapper(stream, headers, url, code):
	686	if hasattr(compat_urllib_request.addinfourl, 'getcode'):
	687	return compat_urllib_request.addinfourl(stream, headers, url, code)
	688	ret = compat_urllib_request.addinfourl(stream, headers, url)
	689	ret.code = code
	690	return ret
	691
	692	def http_request(self, req):
	693	for h,v in std_headers.items():
	694	if h in req.headers:
	695	del req.headers[h]
	696	req.add_header(h, v)
	697	if 'Youtubedl-no-compression' in req.headers:
	698	if 'Accept-encoding' in req.headers:
	699	del req.headers['Accept-encoding']
	700	del req.headers['Youtubedl-no-compression']
	701	if 'Youtubedl-user-agent' in req.headers:
	702	if 'User-agent' in req.headers:
	703	del req.headers['User-agent']
	704	req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
	705	del req.headers['Youtubedl-user-agent']
	706	return req
	707
	708	def http_response(self, req, resp):
	709	old_resp = resp
	710	# gzip
	711	if resp.headers.get('Content-encoding', '') == 'gzip':
	712	content = resp.read()
	713	gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
	714	try:
	715	uncompressed = io.BytesIO(gz.read())
	716	except IOError as original_ioerror:
	717	# There may be junk add the end of the file
	718	# See http://stackoverflow.com/q/4928560/35070 for details
	719	for i in range(1, 1024):
	720	try:
	721	gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
	722	uncompressed = io.BytesIO(gz.read())
	723	except IOError:
	724	continue
	725	break
	726	else:
	727	raise original_ioerror
	728	resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
	729	resp.msg = old_resp.msg
	730	# deflate
	731	if resp.headers.get('Content-encoding', '') == 'deflate':
	732	gz = io.BytesIO(self.deflate(resp.read()))
	733	resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
	734	resp.msg = old_resp.msg
	735	return resp
	736
	737	https_request = http_request
	738	https_response = http_response
	739
	740	def unified_strdate(date_str):
	741	"""Return a string with the date in the format YYYYMMDD"""
	742	upload_date = None
	743	#Replace commas
	744	date_str = date_str.replace(',',' ')
	745	# %z (UTC offset) is only supported in python>=3.2
	746	date_str = re.sub(r' (\+\|-)[\d]*$', '', date_str)
	747	format_expressions = [
	748	'%d %B %Y',
	749	'%B %d %Y',
	750	'%b %d %Y',
	751	'%Y-%m-%d',
	752	'%d/%m/%Y',
	753	'%Y/%m/%d %H:%M:%S',
	754	'%d.%m.%Y %H:%M',
	755	'%Y-%m-%dT%H:%M:%SZ',
	756	'%Y-%m-%dT%H:%M:%S.%fZ',
	757	'%Y-%m-%dT%H:%M:%S.%f0Z',
	758	'%Y-%m-%dT%H:%M:%S',
	759	]
	760	for expression in format_expressions:
	761	try:
	762	upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
	763	except:
	764	pass
	765	return upload_date
	766
	767	def determine_ext(url, default_ext=u'unknown_video'):
	768	guess = url.partition(u'?')[0].rpartition(u'.')[2]
	769	if re.match(r'^[A-Za-z0-9]+$', guess):
	770	return guess
	771	else:
	772	return default_ext
	773
	774	def subtitles_filename(filename, sub_lang, sub_format):
	775	return filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
	776
	777	def date_from_str(date_str):
	778	"""
	779	Return a datetime object from a string in the format YYYYMMDD or
	780	(now\|today)[+-][0-9](day\|week\|month\|year)(s)?"""
	781	today = datetime.date.today()
	782	if date_str == 'now'or date_str == 'today':
	783	return today
	784	match = re.match('(now\|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day\|week\|month\|year)(s)?', date_str)
	785	if match is not None:
	786	sign = match.group('sign')
	787	time = int(match.group('time'))
	788	if sign == '-':
	789	time = -time
	790	unit = match.group('unit')
	791	#A bad aproximation?
	792	if unit == 'month':
	793	unit = 'day'
	794	time *= 30
	795	elif unit == 'year':
	796	unit = 'day'
	797	time *= 365
	798	unit += 's'
	799	delta = datetime.timedelta(**{unit: time})
	800	return today + delta
	801	return datetime.datetime.strptime(date_str, "%Y%m%d").date()
	802
	803	class DateRange(object):
	804	"""Represents a time interval between two dates"""
	805	def __init__(self, start=None, end=None):
	806	"""start and end must be strings in the format accepted by date"""
	807	if start is not None:
	808	self.start = date_from_str(start)
	809	else:
	810	self.start = datetime.datetime.min.date()
	811	if end is not None:
	812	self.end = date_from_str(end)
	813	else:
	814	self.end = datetime.datetime.max.date()
	815	if self.start > self.end:
	816	raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
	817	@classmethod
	818	def day(cls, day):
	819	"""Returns a range that only contains the given day"""
	820	return cls(day,day)
	821	def __contains__(self, date):
	822	"""Check if the date is in the range"""
	823	if not isinstance(date, datetime.date):
	824	date = date_from_str(date)
	825	return self.start <= date <= self.end
	826	def __str__(self):
	827	return '%s - %s' % ( self.start.isoformat(), self.end.isoformat())
	828
	829
	830	def platform_name():
	831	""" Returns the platform name as a compat_str """
	832	res = platform.platform()
	833	if isinstance(res, bytes):
	834	res = res.decode(preferredencoding())
	835
	836	assert isinstance(res, compat_str)
	837	return res
	838
	839
	840	def write_string(s, out=None):
	841	if out is None:
	842	out = sys.stderr
	843	assert type(s) == type(u'')
	844
	845	if ('b' in getattr(out, 'mode', '') or
	846	sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
	847	s = s.encode(preferredencoding(), 'ignore')
	848	out.write(s)
	849	out.flush()
	850
	851
	852	def bytes_to_intlist(bs):
	853	if not bs:
	854	return []
	855	if isinstance(bs[0], int): # Python 3
	856	return list(bs)
	857	else:
	858	return [ord(c) for c in bs]
	859
	860
	861	def intlist_to_bytes(xs):
	862	if not xs:
	863	return b''
	864	if isinstance(chr(0), bytes): # Python 2
	865	return ''.join([chr(x) for x in xs])
	866	else:
	867	return bytes(xs)
	868
	869
	870	def get_cachedir(params={}):
	871	cache_root = os.environ.get('XDG_CACHE_HOME',
	872	os.path.expanduser('~/.cache'))
	873	return params.get('cachedir', os.path.join(cache_root, 'youtube-dl'))
	874
	875
	876	# Cross-platform file locking
	877	if sys.platform == 'win32':
	878	import ctypes.wintypes
	879	import msvcrt
	880
	881	class OVERLAPPED(ctypes.Structure):
	882	_fields_ = [
	883	('Internal', ctypes.wintypes.LPVOID),
	884	('InternalHigh', ctypes.wintypes.LPVOID),
	885	('Offset', ctypes.wintypes.DWORD),
	886	('OffsetHigh', ctypes.wintypes.DWORD),
	887	('hEvent', ctypes.wintypes.HANDLE),
	888	]
	889
	890	kernel32 = ctypes.windll.kernel32
	891	LockFileEx = kernel32.LockFileEx
	892	LockFileEx.argtypes = [
	893	ctypes.wintypes.HANDLE, # hFile
	894	ctypes.wintypes.DWORD, # dwFlags
	895	ctypes.wintypes.DWORD, # dwReserved
	896	ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
	897	ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
	898	ctypes.POINTER(OVERLAPPED) # Overlapped
	899	]
	900	LockFileEx.restype = ctypes.wintypes.BOOL
	901	UnlockFileEx = kernel32.UnlockFileEx
	902	UnlockFileEx.argtypes = [
	903	ctypes.wintypes.HANDLE, # hFile
	904	ctypes.wintypes.DWORD, # dwReserved
	905	ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
	906	ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
	907	ctypes.POINTER(OVERLAPPED) # Overlapped
	908	]
	909	UnlockFileEx.restype = ctypes.wintypes.BOOL
	910	whole_low = 0xffffffff
	911	whole_high = 0x7fffffff
	912
	913	def _lock_file(f, exclusive):
	914	overlapped = OVERLAPPED()
	915	overlapped.Offset = 0
	916	overlapped.OffsetHigh = 0
	917	overlapped.hEvent = 0
	918	f._lock_file_overlapped_p = ctypes.pointer(overlapped)
	919	handle = msvcrt.get_osfhandle(f.fileno())
	920	if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
	921	whole_low, whole_high, f._lock_file_overlapped_p):
	922	raise OSError('Locking file failed: %r' % ctypes.FormatError())
	923
	924	def _unlock_file(f):
	925	assert f._lock_file_overlapped_p
	926	handle = msvcrt.get_osfhandle(f.fileno())
	927	if not UnlockFileEx(handle, 0,
	928	whole_low, whole_high, f._lock_file_overlapped_p):
	929	raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
	930
	931	else:
	932	import fcntl
	933
	934	def _lock_file(f, exclusive):
	935	fcntl.lockf(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
	936
	937	def _unlock_file(f):
	938	fcntl.lockf(f, fcntl.LOCK_UN)
	939
	940
	941	class locked_file(object):
	942	def __init__(self, filename, mode, encoding=None):
	943	assert mode in ['r', 'a', 'w']
	944	self.f = io.open(filename, mode, encoding=encoding)
	945	self.mode = mode
	946
	947	def __enter__(self):
	948	exclusive = self.mode != 'r'
	949	try:
	950	_lock_file(self.f, exclusive)
	951	except IOError:
	952	self.f.close()
	953	raise
	954	return self
	955
	956	def __exit__(self, etype, value, traceback):
	957	try:
	958	_unlock_file(self.f)
	959	finally:
	960	self.f.close()
	961
	962	def __iter__(self):
	963	return iter(self.f)
	964
	965	def write(self, *args):
	966	return self.f.write(*args)
	967
	968	def read(self, *args):
	969	return self.f.read(*args)
	970
	971
	972	def shell_quote(args):
	973	quoted_args = []
	974	encoding = sys.getfilesystemencoding()
	975	if encoding is None:
	976	encoding = 'utf-8'
	977	for a in args:
	978	if isinstance(a, bytes):
	979	# We may get a filename encoded with 'encodeFilename'
	980	a = a.decode(encoding)
	981	quoted_args.append(pipes.quote(a))
	982	return u' '.join(quoted_args)
	983
	984
	985	def takewhile_inclusive(pred, seq):
	986	""" Like itertools.takewhile, but include the latest evaluated element
	987	(the first element so that Not pred(e)) """
	988	for e in seq:
	989	yield e
	990	if not pred(e):
	991	return
	992
	993
	994	def smuggle_url(url, data):
	995	""" Pass additional data in a URL for internal use. """
	996
	997	sdata = compat_urllib_parse.urlencode(
	998	{u'__youtubedl_smuggle': json.dumps(data)})
	999	return url + u'#' + sdata
	1000
	1001
	1002	def unsmuggle_url(smug_url):
	1003	if not '#__youtubedl_smuggle' in smug_url:
	1004	return smug_url, None
	1005	url, _, sdata = smug_url.rpartition(u'#')
	1006	jsond = compat_parse_qs(sdata)[u'__youtubedl_smuggle'][0]
	1007	data = json.loads(jsond)
	1008	return url, data
	1009
	1010
	1011	def format_bytes(bytes):
	1012	if bytes is None:
	1013	return u'N/A'
	1014	if type(bytes) is str:
	1015	bytes = float(bytes)
	1016	if bytes == 0.0:
	1017	exponent = 0
	1018	else:
	1019	exponent = int(math.log(bytes, 1024.0))
	1020	suffix = [u'B', u'KiB', u'MiB', u'GiB', u'TiB', u'PiB', u'EiB', u'ZiB', u'YiB'][exponent]
	1021	converted = float(bytes) / float(1024 ** exponent)
	1022	return u'%.2f%s' % (converted, suffix)