jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	#!/usr/bin/env python
	2	# -- coding: utf-8 --
	3
	4	from __future__ import unicode_literals
	5
	6	import calendar
	7	import codecs
	8	import contextlib
	9	import ctypes
	10	import datetime
	11	import email.utils
	12	import errno
	13	import gzip
	14	import itertools
	15	import io
	16	import json
	17	import locale
	18	import math
	19	import os
	20	import pipes
	21	import platform
	22	import re
	23	import ssl
	24	import socket
	25	import struct
	26	import subprocess
	27	import sys
	28	import tempfile
	29	import traceback
	30	import xml.etree.ElementTree
	31	import zlib
	32
	33	from .compat import (
	34	compat_chr,
	35	compat_getenv,
	36	compat_html_entities,
	37	compat_parse_qs,
	38	compat_str,
	39	compat_urllib_error,
	40	compat_urllib_parse,
	41	compat_urllib_parse_urlparse,
	42	compat_urllib_request,
	43	compat_urlparse,
	44	shlex_quote,
	45	)
	46
	47
	48	# This is not clearly defined otherwise
	49	compiled_regex_type = type(re.compile(''))
	50
	51	std_headers = {
	52	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0 (Chrome)',
	53	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
	54	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8',
	55	'Accept-Encoding': 'gzip, deflate',
	56	'Accept-Language': 'en-us,en;q=0.5',
	57	}
	58
	59
	60	def preferredencoding():
	61	"""Get preferred encoding.
	62
	63	Returns the best encoding scheme for the system, based on
	64	locale.getpreferredencoding() and some further tweaks.
	65	"""
	66	try:
	67	pref = locale.getpreferredencoding()
	68	'TEST'.encode(pref)
	69	except:
	70	pref = 'UTF-8'
	71
	72	return pref
	73
	74
	75	def write_json_file(obj, fn):
	76	""" Encode obj as JSON and write it to fn, atomically if possible """
	77
	78	fn = encodeFilename(fn)
	79	if sys.version_info < (3, 0) and sys.platform != 'win32':
	80	encoding = get_filesystem_encoding()
	81	# os.path.basename returns a bytes object, but NamedTemporaryFile
	82	# will fail if the filename contains non ascii characters unless we
	83	# use a unicode object
	84	path_basename = lambda f: os.path.basename(fn).decode(encoding)
	85	# the same for os.path.dirname
	86	path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
	87	else:
	88	path_basename = os.path.basename
	89	path_dirname = os.path.dirname
	90
	91	args = {
	92	'suffix': '.tmp',
	93	'prefix': path_basename(fn) + '.',
	94	'dir': path_dirname(fn),
	95	'delete': False,
	96	}
	97
	98	# In Python 2.x, json.dump expects a bytestream.
	99	# In Python 3.x, it writes to a character stream
	100	if sys.version_info < (3, 0):
	101	args['mode'] = 'wb'
	102	else:
	103	args.update({
	104	'mode': 'w',
	105	'encoding': 'utf-8',
	106	})
	107
	108	tf = tempfile.NamedTemporaryFile(**args)
	109
	110	try:
	111	with tf:
	112	json.dump(obj, tf)
	113	if sys.platform == 'win32':
	114	# Need to remove existing file on Windows, else os.rename raises
	115	# WindowsError or FileExistsError.
	116	try:
	117	os.unlink(fn)
	118	except OSError:
	119	pass
	120	os.rename(tf.name, fn)
	121	except:
	122	try:
	123	os.remove(tf.name)
	124	except OSError:
	125	pass
	126	raise
	127
	128
	129	if sys.version_info >= (2, 7):
	130	def find_xpath_attr(node, xpath, key, val):
	131	""" Find the xpath xpath[@key=val] """
	132	assert re.match(r'^[a-zA-Z-]+$', key)
	133	assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
	134	expr = xpath + "[@%s='%s']" % (key, val)
	135	return node.find(expr)
	136	else:
	137	def find_xpath_attr(node, xpath, key, val):
	138	# Here comes the crazy part: In 2.6, if the xpath is a unicode,
	139	# .//node does not match if a node is a direct child of . !
	140	if isinstance(xpath, unicode):
	141	xpath = xpath.encode('ascii')
	142
	143	for f in node.findall(xpath):
	144	if f.attrib.get(key) == val:
	145	return f
	146	return None
	147
	148	# On python2.6 the xml.etree.ElementTree.Element methods don't support
	149	# the namespace parameter
	150
	151
	152	def xpath_with_ns(path, ns_map):
	153	components = [c.split(':') for c in path.split('/')]
	154	replaced = []
	155	for c in components:
	156	if len(c) == 1:
	157	replaced.append(c[0])
	158	else:
	159	ns, tag = c
	160	replaced.append('{%s}%s' % (ns_map[ns], tag))
	161	return '/'.join(replaced)
	162
	163
	164	def xpath_text(node, xpath, name=None, fatal=False):
	165	if sys.version_info < (2, 7): # Crazy 2.6
	166	xpath = xpath.encode('ascii')
	167
	168	n = node.find(xpath)
	169	if n is None or n.text is None:
	170	if fatal:
	171	name = xpath if name is None else name
	172	raise ExtractorError('Could not find XML element %s' % name)
	173	else:
	174	return None
	175	return n.text
	176
	177
	178	def get_element_by_id(id, html):
	179	"""Return the content of the tag with the specified ID in the passed HTML document"""
	180	return get_element_by_attribute("id", id, html)
	181
	182
	183	def get_element_by_attribute(attribute, value, html):
	184	"""Return the content of the tag with the specified attribute in the passed HTML document"""
	185
	186	m = re.search(r'''(?xs)
	187	<([a-zA-Z0-9:._-]+)
	188	(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+\|="[^"]+"\|='[^']+'))*?
	189	\s+%s=['"]?%s['"]?
	190	(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]+\|="[^"]+"\|='[^']+'))*?
	191	\s*>
	192	(?P<content>.*?)
	193	</\1>
	194	''' % (re.escape(attribute), re.escape(value)), html)
	195
	196	if not m:
	197	return None
	198	res = m.group('content')
	199
	200	if res.startswith('"') or res.startswith("'"):
	201	res = res[1:-1]
	202
	203	return unescapeHTML(res)
	204
	205
	206	def clean_html(html):
	207	"""Clean an HTML snippet into a readable string"""
	208
	209	if html is None: # Convenience for sanitizing descriptions etc.
	210	return html
	211
	212	# Newline vs <br />
	213	html = html.replace('\n', ' ')
	214	html = re.sub(r'\s<\sbr\s/?\s>\s*', '\n', html)
	215	html = re.sub(r'<\s/\sp\s>\s<\sp[^>]>', '\n', html)
	216	# Strip html tags
	217	html = re.sub('<.*?>', '', html)
	218	# Replace html entities
	219	html = unescapeHTML(html)
	220	return html.strip()
	221
	222
	223	def sanitize_open(filename, open_mode):
	224	"""Try to open the given filename, and slightly tweak it if this fails.
	225
	226	Attempts to open the given filename. If this fails, it tries to change
	227	the filename slightly, step by step, until it's either able to open it
	228	or it fails and raises a final exception, like the standard open()
	229	function.
	230
	231	It returns the tuple (stream, definitive_file_name).
	232	"""
	233	try:
	234	if filename == '-':
	235	if sys.platform == 'win32':
	236	import msvcrt
	237	msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
	238	return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
	239	stream = open(encodeFilename(filename), open_mode)
	240	return (stream, filename)
	241	except (IOError, OSError) as err:
	242	if err.errno in (errno.EACCES,):
	243	raise
	244
	245	# In case of error, try to remove win32 forbidden chars
	246	alt_filename = os.path.join(
	247	re.sub('[/<>:"\\\|\\\\?\\*]', '#', path_part)
	248	for path_part in os.path.split(filename)
	249	)
	250	if alt_filename == filename:
	251	raise
	252	else:
	253	# An exception here should be caught in the caller
	254	stream = open(encodeFilename(filename), open_mode)
	255	return (stream, alt_filename)
	256
	257
	258	def timeconvert(timestr):
	259	"""Convert RFC 2822 defined time string into system timestamp"""
	260	timestamp = None
	261	timetuple = email.utils.parsedate_tz(timestr)
	262	if timetuple is not None:
	263	timestamp = email.utils.mktime_tz(timetuple)
	264	return timestamp
	265
	266
	267	def sanitize_filename(s, restricted=False, is_id=False):
	268	"""Sanitizes a string so it could be used as part of a filename.
	269	If restricted is set, use a stricter subset of allowed characters.
	270	Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
	271	"""
	272	def replace_insane(char):
	273	if char == '?' or ord(char) < 32 or ord(char) == 127:
	274	return ''
	275	elif char == '"':
	276	return '' if restricted else '\''
	277	elif char == ':':
	278	return '_-' if restricted else ' -'
	279	elif char in '\\/\|*<>':
	280	return '_'
	281	if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
	282	return '_'
	283	if restricted and ord(char) > 127:
	284	return '_'
	285	return char
	286
	287	result = ''.join(map(replace_insane, s))
	288	if not is_id:
	289	while '__' in result:
	290	result = result.replace('__', '_')
	291	result = result.strip('_')
	292	# Common case of "Foreign band name - English song title"
	293	if restricted and result.startswith('-_'):
	294	result = result[2:]
	295	if not result:
	296	result = '_'
	297	return result
	298
	299
	300	def orderedSet(iterable):
	301	""" Remove all duplicates from the input iterable """
	302	res = []
	303	for el in iterable:
	304	if el not in res:
	305	res.append(el)
	306	return res
	307
	308
	309	def _htmlentity_transform(entity):
	310	"""Transforms an HTML entity to a character."""
	311	# Known non-numeric HTML entity
	312	if entity in compat_html_entities.name2codepoint:
	313	return compat_chr(compat_html_entities.name2codepoint[entity])
	314
	315	mobj = re.match(r'#(x?[0-9]+)', entity)
	316	if mobj is not None:
	317	numstr = mobj.group(1)
	318	if numstr.startswith('x'):
	319	base = 16
	320	numstr = '0%s' % numstr
	321	else:
	322	base = 10
	323	return compat_chr(int(numstr, base))
	324
	325	# Unknown entity in name, return its literal representation
	326	return ('&%s;' % entity)
	327
	328
	329	def unescapeHTML(s):
	330	if s is None:
	331	return None
	332	assert type(s) == compat_str
	333
	334	return re.sub(
	335	r'&([^;]+);', lambda m: _htmlentity_transform(m.group(1)), s)
	336
	337
	338	def encodeFilename(s, for_subprocess=False):
	339	"""
	340	@param s The name of the file
	341	"""
	342
	343	assert type(s) == compat_str
	344
	345	# Python 3 has a Unicode API
	346	if sys.version_info >= (3, 0):
	347	return s
	348
	349	if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
	350	# Pass '' directly to use Unicode APIs on Windows 2000 and up
	351	# (Detecting Windows NT 4 is tricky because 'major >= 4' would
	352	# match Windows 9x series as well. Besides, NT 4 is obsolete.)
	353	if not for_subprocess:
	354	return s
	355	else:
	356	# For subprocess calls, encode with locale encoding
	357	# Refer to http://stackoverflow.com/a/9951851/35070
	358	encoding = preferredencoding()
	359	else:
	360	encoding = sys.getfilesystemencoding()
	361	if encoding is None:
	362	encoding = 'utf-8'
	363	return s.encode(encoding, 'ignore')
	364
	365
	366	def encodeArgument(s):
	367	if not isinstance(s, compat_str):
	368	# Legacy code that uses byte strings
	369	# Uncomment the following line after fixing all post processors
	370	# assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
	371	s = s.decode('ascii')
	372	return encodeFilename(s, True)
	373
	374
	375	def decodeOption(optval):
	376	if optval is None:
	377	return optval
	378	if isinstance(optval, bytes):
	379	optval = optval.decode(preferredencoding())
	380
	381	assert isinstance(optval, compat_str)
	382	return optval
	383
	384
	385	def formatSeconds(secs):
	386	if secs > 3600:
	387	return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
	388	elif secs > 60:
	389	return '%d:%02d' % (secs // 60, secs % 60)
	390	else:
	391	return '%d' % secs
	392
	393
	394	def make_HTTPS_handler(opts_no_check_certificate, **kwargs):
	395	if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
	396	context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
	397	if opts_no_check_certificate:
	398	context.verify_mode = ssl.CERT_NONE
	399	try:
	400	return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
	401	except TypeError:
	402	# Python 2.7.8
	403	# (create_default_context present but HTTPSHandler has no context=)
	404	pass
	405
	406	if sys.version_info < (3, 2):
	407	import httplib
	408
	409	class HTTPSConnectionV3(httplib.HTTPSConnection):
	410	def __init__(self, args, *kwargs):
	411	httplib.HTTPSConnection.__init__(self, args, *kwargs)
	412
	413	def connect(self):
	414	sock = socket.create_connection((self.host, self.port), self.timeout)
	415	if getattr(self, '_tunnel_host', False):
	416	self.sock = sock
	417	self._tunnel()
	418	try:
	419	self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_TLSv1)
	420	except ssl.SSLError:
	421	self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, ssl_version=ssl.PROTOCOL_SSLv23)
	422
	423	class HTTPSHandlerV3(compat_urllib_request.HTTPSHandler):
	424	def https_open(self, req):
	425	return self.do_open(HTTPSConnectionV3, req)
	426	return HTTPSHandlerV3(**kwargs)
	427	else: # Python < 3.4
	428	context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
	429	context.verify_mode = (ssl.CERT_NONE
	430	if opts_no_check_certificate
	431	else ssl.CERT_REQUIRED)
	432	context.set_default_verify_paths()
	433	return compat_urllib_request.HTTPSHandler(context=context, **kwargs)
	434
	435
	436	class ExtractorError(Exception):
	437	"""Error during info extraction."""
	438
	439	def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
	440	""" tb, if given, is the original traceback (so that it can be printed out).
	441	If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
	442	"""
	443
	444	if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
	445	expected = True
	446	if video_id is not None:
	447	msg = video_id + ': ' + msg
	448	if cause:
	449	msg += ' (caused by %r)' % cause
	450	if not expected:
	451	if ytdl_is_updateable():
	452	update_cmd = 'type youtube-dl -U to update'
	453	else:
	454	update_cmd = 'see https://yt-dl.org/update on how to update'
	455	msg += '; please report this issue on https://yt-dl.org/bug .'
	456	msg += ' Make sure you are using the latest version; %s.' % update_cmd
	457	msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
	458	super(ExtractorError, self).__init__(msg)
	459
	460	self.traceback = tb
	461	self.exc_info = sys.exc_info() # preserve original exception
	462	self.cause = cause
	463	self.video_id = video_id
	464
	465	def format_traceback(self):
	466	if self.traceback is None:
	467	return None
	468	return ''.join(traceback.format_tb(self.traceback))
	469
	470
	471	class UnsupportedError(ExtractorError):
	472	def __init__(self, url):
	473	super(UnsupportedError, self).__init__(
	474	'Unsupported URL: %s' % url, expected=True)
	475	self.url = url
	476
	477
	478	class RegexNotFoundError(ExtractorError):
	479	"""Error when a regex didn't match"""
	480	pass
	481
	482
	483	class DownloadError(Exception):
	484	"""Download Error exception.
	485
	486	This exception may be thrown by FileDownloader objects if they are not
	487	configured to continue on errors. They will contain the appropriate
	488	error message.
	489	"""
	490
	491	def __init__(self, msg, exc_info=None):
	492	""" exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
	493	super(DownloadError, self).__init__(msg)
	494	self.exc_info = exc_info
	495
	496
	497	class SameFileError(Exception):
	498	"""Same File exception.
	499
	500	This exception will be thrown by FileDownloader objects if they detect
	501	multiple files would have to be downloaded to the same file on disk.
	502	"""
	503	pass
	504
	505
	506	class PostProcessingError(Exception):
	507	"""Post Processing exception.
	508
	509	This exception may be raised by PostProcessor's .run() method to
	510	indicate an error in the postprocessing task.
	511	"""
	512
	513	def __init__(self, msg):
	514	self.msg = msg
	515
	516
	517	class MaxDownloadsReached(Exception):
	518	""" --max-downloads limit has been reached. """
	519	pass
	520
	521
	522	class UnavailableVideoError(Exception):
	523	"""Unavailable Format exception.
	524
	525	This exception will be thrown when a video is requested
	526	in a format that is not available for that video.
	527	"""
	528	pass
	529
	530
	531	class ContentTooShortError(Exception):
	532	"""Content Too Short exception.
	533
	534	This exception may be raised by FileDownloader objects when a file they
	535	download is too small for what the server announced first, indicating
	536	the connection was probably interrupted.
	537	"""
	538	# Both in bytes
	539	downloaded = None
	540	expected = None
	541
	542	def __init__(self, downloaded, expected):
	543	self.downloaded = downloaded
	544	self.expected = expected
	545
	546
	547	class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
	548	"""Handler for HTTP requests and responses.
	549
	550	This class, when installed with an OpenerDirector, automatically adds
	551	the standard headers to every HTTP request and handles gzipped and
	552	deflated responses from web servers. If compression is to be avoided in
	553	a particular request, the original request in the program code only has
	554	to include the HTTP header "Youtubedl-No-Compression", which will be
	555	removed before making the real request.
	556
	557	Part of this code was copied from:
	558
	559	http://techknack.net/python-urllib2-handlers/
	560
	561	Andrew Rowls, the author of that code, agreed to release it to the
	562	public domain.
	563	"""
	564
	565	@staticmethod
	566	def deflate(data):
	567	try:
	568	return zlib.decompress(data, -zlib.MAX_WBITS)
	569	except zlib.error:
	570	return zlib.decompress(data)
	571
	572	@staticmethod
	573	def addinfourl_wrapper(stream, headers, url, code):
	574	if hasattr(compat_urllib_request.addinfourl, 'getcode'):
	575	return compat_urllib_request.addinfourl(stream, headers, url, code)
	576	ret = compat_urllib_request.addinfourl(stream, headers, url)
	577	ret.code = code
	578	return ret
	579
	580	def http_request(self, req):
	581	for h, v in std_headers.items():
	582	if h not in req.headers:
	583	req.add_header(h, v)
	584	if 'Youtubedl-no-compression' in req.headers:
	585	if 'Accept-encoding' in req.headers:
	586	del req.headers['Accept-encoding']
	587	del req.headers['Youtubedl-no-compression']
	588	if 'Youtubedl-user-agent' in req.headers:
	589	if 'User-agent' in req.headers:
	590	del req.headers['User-agent']
	591	req.headers['User-agent'] = req.headers['Youtubedl-user-agent']
	592	del req.headers['Youtubedl-user-agent']
	593
	594	if sys.version_info < (2, 7) and '#' in req.get_full_url():
	595	# Python 2.6 is brain-dead when it comes to fragments
	596	req._Request__original = req._Request__original.partition('#')[0]
	597	req._Request__r_type = req._Request__r_type.partition('#')[0]
	598
	599	return req
	600
	601	def http_response(self, req, resp):
	602	old_resp = resp
	603	# gzip
	604	if resp.headers.get('Content-encoding', '') == 'gzip':
	605	content = resp.read()
	606	gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
	607	try:
	608	uncompressed = io.BytesIO(gz.read())
	609	except IOError as original_ioerror:
	610	# There may be junk add the end of the file
	611	# See http://stackoverflow.com/q/4928560/35070 for details
	612	for i in range(1, 1024):
	613	try:
	614	gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
	615	uncompressed = io.BytesIO(gz.read())
	616	except IOError:
	617	continue
	618	break
	619	else:
	620	raise original_ioerror
	621	resp = self.addinfourl_wrapper(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
	622	resp.msg = old_resp.msg
	623	# deflate
	624	if resp.headers.get('Content-encoding', '') == 'deflate':
	625	gz = io.BytesIO(self.deflate(resp.read()))
	626	resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
	627	resp.msg = old_resp.msg
	628	return resp
	629
	630	https_request = http_request
	631	https_response = http_response
	632
	633
	634	def parse_iso8601(date_str, delimiter='T'):
	635	""" Return a UNIX timestamp from the given date """
	636
	637	if date_str is None:
	638	return None
	639
	640	m = re.search(
	641	r'(\.[0-9]+)?(?:Z$\| ?(?P<sign>\+\|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
	642	date_str)
	643	if not m:
	644	timezone = datetime.timedelta()
	645	else:
	646	date_str = date_str[:-len(m.group(0))]
	647	if not m.group('sign'):
	648	timezone = datetime.timedelta()
	649	else:
	650	sign = 1 if m.group('sign') == '+' else -1
	651	timezone = datetime.timedelta(
	652	hours=sign * int(m.group('hours')),
	653	minutes=sign * int(m.group('minutes')))
	654	date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
	655	dt = datetime.datetime.strptime(date_str, date_format) - timezone
	656	return calendar.timegm(dt.timetuple())
	657
	658
	659	def unified_strdate(date_str, day_first=True):
	660	"""Return a string with the date in the format YYYYMMDD"""
	661
	662	if date_str is None:
	663	return None
	664	upload_date = None
	665	# Replace commas
	666	date_str = date_str.replace(',', ' ')
	667	# %z (UTC offset) is only supported in python>=3.2
	668	date_str = re.sub(r' ?(\+\|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
	669	# Remove AM/PM + timezone
	670	date_str = re.sub(r'(?i)\s*(?:AM\|PM)\s+[A-Z]+', '', date_str)
	671
	672	format_expressions = [
	673	'%d %B %Y',
	674	'%d %b %Y',
	675	'%B %d %Y',
	676	'%b %d %Y',
	677	'%b %dst %Y %I:%M%p',
	678	'%b %dnd %Y %I:%M%p',
	679	'%b %dth %Y %I:%M%p',
	680	'%Y-%m-%d',
	681	'%Y/%m/%d',
	682	'%Y/%m/%d %H:%M:%S',
	683	'%Y-%m-%d %H:%M:%S',
	684	'%Y-%m-%d %H:%M:%S.%f',
	685	'%d.%m.%Y %H:%M',
	686	'%d.%m.%Y %H.%M',
	687	'%Y-%m-%dT%H:%M:%SZ',
	688	'%Y-%m-%dT%H:%M:%S.%fZ',
	689	'%Y-%m-%dT%H:%M:%S.%f0Z',
	690	'%Y-%m-%dT%H:%M:%S',
	691	'%Y-%m-%dT%H:%M:%S.%f',
	692	'%Y-%m-%dT%H:%M',
	693	]
	694	if day_first:
	695	format_expressions.extend([
	696	'%d.%m.%Y',
	697	'%d/%m/%Y',
	698	'%d/%m/%y',
	699	'%d/%m/%Y %H:%M:%S',
	700	])
	701	else:
	702	format_expressions.extend([
	703	'%m.%d.%Y',
	704	'%m/%d/%Y',
	705	'%m/%d/%y',
	706	'%m/%d/%Y %H:%M:%S',
	707	])
	708	for expression in format_expressions:
	709	try:
	710	upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
	711	except ValueError:
	712	pass
	713	if upload_date is None:
	714	timetuple = email.utils.parsedate_tz(date_str)
	715	if timetuple:
	716	upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
	717	return upload_date
	718
	719
	720	def determine_ext(url, default_ext='unknown_video'):
	721	if url is None:
	722	return default_ext
	723	guess = url.partition('?')[0].rpartition('.')[2]
	724	if re.match(r'^[A-Za-z0-9]+$', guess):
	725	return guess
	726	else:
	727	return default_ext
	728
	729
	730	def subtitles_filename(filename, sub_lang, sub_format):
	731	return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
	732
	733
	734	def date_from_str(date_str):
	735	"""
	736	Return a datetime object from a string in the format YYYYMMDD or
	737	(now\|today)[+-][0-9](day\|week\|month\|year)(s)?"""
	738	today = datetime.date.today()
	739	if date_str in ('now', 'today'):
	740	return today
	741	if date_str == 'yesterday':
	742	return today - datetime.timedelta(days=1)
	743	match = re.match('(now\|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day\|week\|month\|year)(s)?', date_str)
	744	if match is not None:
	745	sign = match.group('sign')
	746	time = int(match.group('time'))
	747	if sign == '-':
	748	time = -time
	749	unit = match.group('unit')
	750	# A bad aproximation?
	751	if unit == 'month':
	752	unit = 'day'
	753	time *= 30
	754	elif unit == 'year':
	755	unit = 'day'
	756	time *= 365
	757	unit += 's'
	758	delta = datetime.timedelta(**{unit: time})
	759	return today + delta
	760	return datetime.datetime.strptime(date_str, "%Y%m%d").date()
	761
	762
	763	def hyphenate_date(date_str):
	764	"""
	765	Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
	766	match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
	767	if match is not None:
	768	return '-'.join(match.groups())
	769	else:
	770	return date_str
	771
	772
	773	class DateRange(object):
	774	"""Represents a time interval between two dates"""
	775
	776	def __init__(self, start=None, end=None):
	777	"""start and end must be strings in the format accepted by date"""
	778	if start is not None:
	779	self.start = date_from_str(start)
	780	else:
	781	self.start = datetime.datetime.min.date()
	782	if end is not None:
	783	self.end = date_from_str(end)
	784	else:
	785	self.end = datetime.datetime.max.date()
	786	if self.start > self.end:
	787	raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
	788
	789	@classmethod
	790	def day(cls, day):
	791	"""Returns a range that only contains the given day"""
	792	return cls(day, day)
	793
	794	def __contains__(self, date):
	795	"""Check if the date is in the range"""
	796	if not isinstance(date, datetime.date):
	797	date = date_from_str(date)
	798	return self.start <= date <= self.end
	799
	800	def __str__(self):
	801	return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
	802
	803
	804	def platform_name():
	805	""" Returns the platform name as a compat_str """
	806	res = platform.platform()
	807	if isinstance(res, bytes):
	808	res = res.decode(preferredencoding())
	809
	810	assert isinstance(res, compat_str)
	811	return res
	812
	813
	814	def _windows_write_string(s, out):
	815	""" Returns True if the string was written using special methods,
	816	False if it has yet to be written out."""
	817	# Adapted from http://stackoverflow.com/a/3259271/35070
	818
	819	import ctypes
	820	import ctypes.wintypes
	821
	822	WIN_OUTPUT_IDS = {
	823	1: -11,
	824	2: -12,
	825	}
	826
	827	try:
	828	fileno = out.fileno()
	829	except AttributeError:
	830	# If the output stream doesn't have a fileno, it's virtual
	831	return False
	832	if fileno not in WIN_OUTPUT_IDS:
	833	return False
	834
	835	GetStdHandle = ctypes.WINFUNCTYPE(
	836	ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
	837	(b"GetStdHandle", ctypes.windll.kernel32))
	838	h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
	839
	840	WriteConsoleW = ctypes.WINFUNCTYPE(
	841	ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
	842	ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
	843	ctypes.wintypes.LPVOID)((b"WriteConsoleW", ctypes.windll.kernel32))
	844	written = ctypes.wintypes.DWORD(0)
	845
	846	GetFileType = ctypes.WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)((b"GetFileType", ctypes.windll.kernel32))
	847	FILE_TYPE_CHAR = 0x0002
	848	FILE_TYPE_REMOTE = 0x8000
	849	GetConsoleMode = ctypes.WINFUNCTYPE(
	850	ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
	851	ctypes.POINTER(ctypes.wintypes.DWORD))(
	852	(b"GetConsoleMode", ctypes.windll.kernel32))
	853	INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
	854
	855	def not_a_console(handle):
	856	if handle == INVALID_HANDLE_VALUE or handle is None:
	857	return True
	858	return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
	859	or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
	860
	861	if not_a_console(h):
	862	return False
	863
	864	def next_nonbmp_pos(s):
	865	try:
	866	return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
	867	except StopIteration:
	868	return len(s)
	869
	870	while s:
	871	count = min(next_nonbmp_pos(s), 1024)
	872
	873	ret = WriteConsoleW(
	874	h, s, count if count else 2, ctypes.byref(written), None)
	875	if ret == 0:
	876	raise OSError('Failed to write string')
	877	if not count: # We just wrote a non-BMP character
	878	assert written.value == 2
	879	s = s[1:]
	880	else:
	881	assert written.value > 0
	882	s = s[written.value:]
	883	return True
	884
	885
	886	def write_string(s, out=None, encoding=None):
	887	if out is None:
	888	out = sys.stderr
	889	assert type(s) == compat_str
	890
	891	if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
	892	if _windows_write_string(s, out):
	893	return
	894
	895	if ('b' in getattr(out, 'mode', '') or
	896	sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
	897	byt = s.encode(encoding or preferredencoding(), 'ignore')
	898	out.write(byt)
	899	elif hasattr(out, 'buffer'):
	900	enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
	901	byt = s.encode(enc, 'ignore')
	902	out.buffer.write(byt)
	903	else:
	904	out.write(s)
	905	out.flush()
	906
	907
	908	def bytes_to_intlist(bs):
	909	if not bs:
	910	return []
	911	if isinstance(bs[0], int): # Python 3
	912	return list(bs)
	913	else:
	914	return [ord(c) for c in bs]
	915
	916
	917	def intlist_to_bytes(xs):
	918	if not xs:
	919	return b''
	920	return struct_pack('%dB' % len(xs), *xs)
	921
	922
	923	# Cross-platform file locking
	924	if sys.platform == 'win32':
	925	import ctypes.wintypes
	926	import msvcrt
	927
	928	class OVERLAPPED(ctypes.Structure):
	929	_fields_ = [
	930	('Internal', ctypes.wintypes.LPVOID),
	931	('InternalHigh', ctypes.wintypes.LPVOID),
	932	('Offset', ctypes.wintypes.DWORD),
	933	('OffsetHigh', ctypes.wintypes.DWORD),
	934	('hEvent', ctypes.wintypes.HANDLE),
	935	]
	936
	937	kernel32 = ctypes.windll.kernel32
	938	LockFileEx = kernel32.LockFileEx
	939	LockFileEx.argtypes = [
	940	ctypes.wintypes.HANDLE, # hFile
	941	ctypes.wintypes.DWORD, # dwFlags
	942	ctypes.wintypes.DWORD, # dwReserved
	943	ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
	944	ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
	945	ctypes.POINTER(OVERLAPPED) # Overlapped
	946	]
	947	LockFileEx.restype = ctypes.wintypes.BOOL
	948	UnlockFileEx = kernel32.UnlockFileEx
	949	UnlockFileEx.argtypes = [
	950	ctypes.wintypes.HANDLE, # hFile
	951	ctypes.wintypes.DWORD, # dwReserved
	952	ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
	953	ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
	954	ctypes.POINTER(OVERLAPPED) # Overlapped
	955	]
	956	UnlockFileEx.restype = ctypes.wintypes.BOOL
	957	whole_low = 0xffffffff
	958	whole_high = 0x7fffffff
	959
	960	def _lock_file(f, exclusive):
	961	overlapped = OVERLAPPED()
	962	overlapped.Offset = 0
	963	overlapped.OffsetHigh = 0
	964	overlapped.hEvent = 0
	965	f._lock_file_overlapped_p = ctypes.pointer(overlapped)
	966	handle = msvcrt.get_osfhandle(f.fileno())
	967	if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
	968	whole_low, whole_high, f._lock_file_overlapped_p):
	969	raise OSError('Locking file failed: %r' % ctypes.FormatError())
	970
	971	def _unlock_file(f):
	972	assert f._lock_file_overlapped_p
	973	handle = msvcrt.get_osfhandle(f.fileno())
	974	if not UnlockFileEx(handle, 0,
	975	whole_low, whole_high, f._lock_file_overlapped_p):
	976	raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
	977
	978	else:
	979	import fcntl
	980
	981	def _lock_file(f, exclusive):
	982	fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
	983
	984	def _unlock_file(f):
	985	fcntl.flock(f, fcntl.LOCK_UN)
	986
	987
	988	class locked_file(object):
	989	def __init__(self, filename, mode, encoding=None):
	990	assert mode in ['r', 'a', 'w']
	991	self.f = io.open(filename, mode, encoding=encoding)
	992	self.mode = mode
	993
	994	def __enter__(self):
	995	exclusive = self.mode != 'r'
	996	try:
	997	_lock_file(self.f, exclusive)
	998	except IOError:
	999	self.f.close()
	1000	raise
	1001	return self
	1002
	1003	def __exit__(self, etype, value, traceback):
	1004	try:
	1005	_unlock_file(self.f)
	1006	finally:
	1007	self.f.close()
	1008
	1009	def __iter__(self):
	1010	return iter(self.f)
	1011
	1012	def write(self, *args):
	1013	return self.f.write(*args)
	1014
	1015	def read(self, *args):
	1016	return self.f.read(*args)
	1017
	1018
	1019	def get_filesystem_encoding():
	1020	encoding = sys.getfilesystemencoding()
	1021	return encoding if encoding is not None else 'utf-8'
	1022
	1023
	1024	def shell_quote(args):
	1025	quoted_args = []
	1026	encoding = get_filesystem_encoding()
	1027	for a in args:
	1028	if isinstance(a, bytes):
	1029	# We may get a filename encoded with 'encodeFilename'
	1030	a = a.decode(encoding)
	1031	quoted_args.append(pipes.quote(a))
	1032	return ' '.join(quoted_args)
	1033
	1034
	1035	def takewhile_inclusive(pred, seq):
	1036	""" Like itertools.takewhile, but include the latest evaluated element
	1037	(the first element so that Not pred(e)) """
	1038	for e in seq:
	1039	yield e
	1040	if not pred(e):
	1041	return
	1042
	1043
	1044	def smuggle_url(url, data):
	1045	""" Pass additional data in a URL for internal use. """
	1046
	1047	sdata = compat_urllib_parse.urlencode(
	1048	{'__youtubedl_smuggle': json.dumps(data)})
	1049	return url + '#' + sdata
	1050
	1051
	1052	def unsmuggle_url(smug_url, default=None):
	1053	if '#__youtubedl_smuggle' not in smug_url:
	1054	return smug_url, default
	1055	url, _, sdata = smug_url.rpartition('#')
	1056	jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
	1057	data = json.loads(jsond)
	1058	return url, data
	1059
	1060
	1061	def format_bytes(bytes):
	1062	if bytes is None:
	1063	return 'N/A'
	1064	if type(bytes) is str:
	1065	bytes = float(bytes)
	1066	if bytes == 0.0:
	1067	exponent = 0
	1068	else:
	1069	exponent = int(math.log(bytes, 1024.0))
	1070	suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
	1071	converted = float(bytes) / float(1024 ** exponent)
	1072	return '%.2f%s' % (converted, suffix)
	1073
	1074
	1075	def parse_filesize(s):
	1076	if s is None:
	1077	return None
	1078
	1079	# The lower-case forms are of course incorrect and inofficial,
	1080	# but we support those too
	1081	_UNIT_TABLE = {
	1082	'B': 1,
	1083	'b': 1,
	1084	'KiB': 1024,
	1085	'KB': 1000,
	1086	'kB': 1024,
	1087	'Kb': 1000,
	1088	'MiB': 1024 ** 2,
	1089	'MB': 1000 ** 2,
	1090	'mB': 1024 ** 2,
	1091	'Mb': 1000 ** 2,
	1092	'GiB': 1024 ** 3,
	1093	'GB': 1000 ** 3,
	1094	'gB': 1024 ** 3,
	1095	'Gb': 1000 ** 3,
	1096	'TiB': 1024 ** 4,
	1097	'TB': 1000 ** 4,
	1098	'tB': 1024 ** 4,
	1099	'Tb': 1000 ** 4,
	1100	'PiB': 1024 ** 5,
	1101	'PB': 1000 ** 5,
	1102	'pB': 1024 ** 5,
	1103	'Pb': 1000 ** 5,
	1104	'EiB': 1024 ** 6,
	1105	'EB': 1000 ** 6,
	1106	'eB': 1024 ** 6,
	1107	'Eb': 1000 ** 6,
	1108	'ZiB': 1024 ** 7,
	1109	'ZB': 1000 ** 7,
	1110	'zB': 1024 ** 7,
	1111	'Zb': 1000 ** 7,
	1112	'YiB': 1024 ** 8,
	1113	'YB': 1000 ** 8,
	1114	'yB': 1024 ** 8,
	1115	'Yb': 1000 ** 8,
	1116	}
	1117
	1118	units_re = '\|'.join(re.escape(u) for u in _UNIT_TABLE)
	1119	m = re.match(
	1120	r'(?P<num>[0-9]+(?:[,.][0-9])?)\s(?P<unit>%s)' % units_re, s)
	1121	if not m:
	1122	return None
	1123
	1124	num_str = m.group('num').replace(',', '.')
	1125	mult = _UNIT_TABLE[m.group('unit')]
	1126	return int(float(num_str) * mult)
	1127
	1128
	1129	def get_term_width():
	1130	columns = compat_getenv('COLUMNS', None)
	1131	if columns:
	1132	return int(columns)
	1133
	1134	try:
	1135	sp = subprocess.Popen(
	1136	['stty', 'size'],
	1137	stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	1138	out, err = sp.communicate()
	1139	return int(out.split()[1])
	1140	except:
	1141	pass
	1142	return None
	1143
	1144
	1145	def month_by_name(name):
	1146	""" Return the number of a month by (locale-independently) English name """
	1147
	1148	ENGLISH_NAMES = [
	1149	'January', 'February', 'March', 'April', 'May', 'June',
	1150	'July', 'August', 'September', 'October', 'November', 'December']
	1151	try:
	1152	return ENGLISH_NAMES.index(name) + 1
	1153	except ValueError:
	1154	return None
	1155
	1156
	1157	def fix_xml_ampersands(xml_str):
	1158	"""Replace all the '&' by '&' in XML"""
	1159	return re.sub(
	1160	r'&(?!amp;\|lt;\|gt;\|apos;\|quot;\|#x[0-9a-fA-F]{,4};\|#[0-9]{,4};)',
	1161	'&',
	1162	xml_str)
	1163
	1164
	1165	def setproctitle(title):
	1166	assert isinstance(title, compat_str)
	1167	try:
	1168	libc = ctypes.cdll.LoadLibrary("libc.so.6")
	1169	except OSError:
	1170	return
	1171	title_bytes = title.encode('utf-8')
	1172	buf = ctypes.create_string_buffer(len(title_bytes))
	1173	buf.value = title_bytes
	1174	try:
	1175	libc.prctl(15, buf, 0, 0, 0)
	1176	except AttributeError:
	1177	return # Strange libc, just skip this
	1178
	1179
	1180	def remove_start(s, start):
	1181	if s.startswith(start):
	1182	return s[len(start):]
	1183	return s
	1184
	1185
	1186	def remove_end(s, end):
	1187	if s.endswith(end):
	1188	return s[:-len(end)]
	1189	return s
	1190
	1191
	1192	def url_basename(url):
	1193	path = compat_urlparse.urlparse(url).path
	1194	return path.strip('/').split('/')[-1]
	1195
	1196
	1197	class HEADRequest(compat_urllib_request.Request):
	1198	def get_method(self):
	1199	return "HEAD"
	1200
	1201
	1202	def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
	1203	if get_attr:
	1204	if v is not None:
	1205	v = getattr(v, get_attr, None)
	1206	if v == '':
	1207	v = None
	1208	return default if v is None else (int(v) * invscale // scale)
	1209
	1210
	1211	def str_or_none(v, default=None):
	1212	return default if v is None else compat_str(v)
	1213
	1214
	1215	def str_to_int(int_str):
	1216	""" A more relaxed version of int_or_none """
	1217	if int_str is None:
	1218	return None
	1219	int_str = re.sub(r'[,\.\+]', '', int_str)
	1220	return int(int_str)
	1221
	1222
	1223	def float_or_none(v, scale=1, invscale=1, default=None):
	1224	return default if v is None else (float(v) * invscale / scale)
	1225
	1226
	1227	def parse_duration(s):
	1228	if s is None:
	1229	return None
	1230
	1231	s = s.strip()
	1232
	1233	m = re.match(
	1234	r'''(?ix)T?
	1235	(?:
	1236	(?P<only_mins>[0-9.]+)\s(?:mins?\|minutes?)\s\|
	1237	(?P<only_hours>[0-9.]+)\s*(?:hours?)\|
	1238
	1239	(?:
	1240	(?:(?P<hours>[0-9]+)\s(?:[:h]\|hours?)\s)?
	1241	(?P<mins>[0-9]+)\s(?:[:m]\|mins?\|minutes?)\s
	1242	)?
	1243	(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s\|secs?\|seconds?)?
	1244	)$''', s)
	1245	if not m:
	1246	return None
	1247	res = 0
	1248	if m.group('only_mins'):
	1249	return float_or_none(m.group('only_mins'), invscale=60)
	1250	if m.group('only_hours'):
	1251	return float_or_none(m.group('only_hours'), invscale=60 * 60)
	1252	if m.group('secs'):
	1253	res += int(m.group('secs'))
	1254	if m.group('mins'):
	1255	res += int(m.group('mins')) * 60
	1256	if m.group('hours'):
	1257	res += int(m.group('hours')) * 60 * 60
	1258	if m.group('ms'):
	1259	res += float(m.group('ms'))
	1260	return res
	1261
	1262
	1263	def prepend_extension(filename, ext):
	1264	name, real_ext = os.path.splitext(filename)
	1265	return '{0}.{1}{2}'.format(name, ext, real_ext)
	1266
	1267
	1268	def check_executable(exe, args=[]):
	1269	""" Checks if the given binary is installed somewhere in PATH, and returns its name.
	1270	args can be a list of arguments for a short output (like -version) """
	1271	try:
	1272	subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
	1273	except OSError:
	1274	return False
	1275	return exe
	1276
	1277
	1278	def get_exe_version(exe, args=['--version'],
	1279	version_re=None, unrecognized='present'):
	1280	""" Returns the version of the specified executable,
	1281	or False if the executable is not present """
	1282	try:
	1283	out, _ = subprocess.Popen(
	1284	[exe] + args,
	1285	stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
	1286	except OSError:
	1287	return False
	1288	if isinstance(out, bytes): # Python 2.x
	1289	out = out.decode('ascii', 'ignore')
	1290	return detect_exe_version(out, version_re, unrecognized)
	1291
	1292
	1293	def detect_exe_version(output, version_re=None, unrecognized='present'):
	1294	assert isinstance(output, compat_str)
	1295	if version_re is None:
	1296	version_re = r'version\s+([-0-9._a-zA-Z]+)'
	1297	m = re.search(version_re, output)
	1298	if m:
	1299	return m.group(1)
	1300	else:
	1301	return unrecognized
	1302
	1303
	1304	class PagedList(object):
	1305	def __len__(self):
	1306	# This is only useful for tests
	1307	return len(self.getslice())
	1308
	1309
	1310	class OnDemandPagedList(PagedList):
	1311	def __init__(self, pagefunc, pagesize):
	1312	self._pagefunc = pagefunc
	1313	self._pagesize = pagesize
	1314
	1315	def getslice(self, start=0, end=None):
	1316	res = []
	1317	for pagenum in itertools.count(start // self._pagesize):
	1318	firstid = pagenum * self._pagesize
	1319	nextfirstid = pagenum * self._pagesize + self._pagesize
	1320	if start >= nextfirstid:
	1321	continue
	1322
	1323	page_results = list(self._pagefunc(pagenum))
	1324
	1325	startv = (
	1326	start % self._pagesize
	1327	if firstid <= start < nextfirstid
	1328	else 0)
	1329
	1330	endv = (
	1331	((end - 1) % self._pagesize) + 1
	1332	if (end is not None and firstid <= end <= nextfirstid)
	1333	else None)
	1334
	1335	if startv != 0 or endv is not None:
	1336	page_results = page_results[startv:endv]
	1337	res.extend(page_results)
	1338
	1339	# A little optimization - if current page is not "full", ie. does
	1340	# not contain page_size videos then we can assume that this page
	1341	# is the last one - there are no more ids on further pages -
	1342	# i.e. no need to query again.
	1343	if len(page_results) + startv < self._pagesize:
	1344	break
	1345
	1346	# If we got the whole page, but the next page is not interesting,
	1347	# break out early as well
	1348	if end == nextfirstid:
	1349	break
	1350	return res
	1351
	1352
	1353	class InAdvancePagedList(PagedList):
	1354	def __init__(self, pagefunc, pagecount, pagesize):
	1355	self._pagefunc = pagefunc
	1356	self._pagecount = pagecount
	1357	self._pagesize = pagesize
	1358
	1359	def getslice(self, start=0, end=None):
	1360	res = []
	1361	start_page = start // self._pagesize
	1362	end_page = (
	1363	self._pagecount if end is None else (end // self._pagesize + 1))
	1364	skip_elems = start - start_page * self._pagesize
	1365	only_more = None if end is None else end - start
	1366	for pagenum in range(start_page, end_page):
	1367	page = list(self._pagefunc(pagenum))
	1368	if skip_elems:
	1369	page = page[skip_elems:]
	1370	skip_elems = None
	1371	if only_more is not None:
	1372	if len(page) < only_more:
	1373	only_more -= len(page)
	1374	else:
	1375	page = page[:only_more]
	1376	res.extend(page)
	1377	break
	1378	res.extend(page)
	1379	return res
	1380
	1381
	1382	def uppercase_escape(s):
	1383	unicode_escape = codecs.getdecoder('unicode_escape')
	1384	return re.sub(
	1385	r'\\U[0-9a-fA-F]{8}',
	1386	lambda m: unicode_escape(m.group(0))[0],
	1387	s)
	1388
	1389
	1390	def escape_rfc3986(s):
	1391	"""Escape non-ASCII characters as suggested by RFC 3986"""
	1392	if sys.version_info < (3, 0) and isinstance(s, unicode):
	1393	s = s.encode('utf-8')
	1394	return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
	1395
	1396
	1397	def escape_url(url):
	1398	"""Escape URL as suggested by RFC 3986"""
	1399	url_parsed = compat_urllib_parse_urlparse(url)
	1400	return url_parsed._replace(
	1401	path=escape_rfc3986(url_parsed.path),
	1402	params=escape_rfc3986(url_parsed.params),
	1403	query=escape_rfc3986(url_parsed.query),
	1404	fragment=escape_rfc3986(url_parsed.fragment)
	1405	).geturl()
	1406
	1407	try:
	1408	struct.pack('!I', 0)
	1409	except TypeError:
	1410	# In Python 2.6 (and some 2.7 versions), struct requires a bytes argument
	1411	def struct_pack(spec, *args):
	1412	if isinstance(spec, compat_str):
	1413	spec = spec.encode('ascii')
	1414	return struct.pack(spec, *args)
	1415
	1416	def struct_unpack(spec, *args):
	1417	if isinstance(spec, compat_str):
	1418	spec = spec.encode('ascii')
	1419	return struct.unpack(spec, *args)
	1420	else:
	1421	struct_pack = struct.pack
	1422	struct_unpack = struct.unpack
	1423
	1424
	1425	def read_batch_urls(batch_fd):
	1426	def fixup(url):
	1427	if not isinstance(url, compat_str):
	1428	url = url.decode('utf-8', 'replace')
	1429	BOM_UTF8 = '\xef\xbb\xbf'
	1430	if url.startswith(BOM_UTF8):
	1431	url = url[len(BOM_UTF8):]
	1432	url = url.strip()
	1433	if url.startswith(('#', ';', ']')):
	1434	return False
	1435	return url
	1436
	1437	with contextlib.closing(batch_fd) as fd:
	1438	return [url for url in map(fixup, fd) if url]
	1439
	1440
	1441	def urlencode_postdata(args, *kargs):
	1442	return compat_urllib_parse.urlencode(args, *kargs).encode('ascii')
	1443
	1444
	1445	try:
	1446	etree_iter = xml.etree.ElementTree.Element.iter
	1447	except AttributeError: # Python <=2.6
	1448	etree_iter = lambda n: n.findall('.//*')
	1449
	1450
	1451	def parse_xml(s):
	1452	class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
	1453	def doctype(self, name, pubid, system):
	1454	pass # Ignore doctypes
	1455
	1456	parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
	1457	kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
	1458	tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
	1459	# Fix up XML parser in Python 2.x
	1460	if sys.version_info < (3, 0):
	1461	for n in etree_iter(tree):
	1462	if n.text is not None:
	1463	if not isinstance(n.text, compat_str):
	1464	n.text = n.text.decode('utf-8')
	1465	return tree
	1466
	1467
	1468	US_RATINGS = {
	1469	'G': 0,
	1470	'PG': 10,
	1471	'PG-13': 13,
	1472	'R': 16,
	1473	'NC': 18,
	1474	}
	1475
	1476
	1477	def parse_age_limit(s):
	1478	if s is None:
	1479	return None
	1480	m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
	1481	return int(m.group('age')) if m else US_RATINGS.get(s, None)
	1482
	1483
	1484	def strip_jsonp(code):
	1485	return re.sub(
	1486	r'(?s)^[a-zA-Z0-9_]+\s$\s(.)$;?\s?(?://[^\n])$', r'\1', code)
	1487
	1488
	1489	def js_to_json(code):
	1490	def fix_kv(m):
	1491	v = m.group(0)
	1492	if v in ('true', 'false', 'null'):
	1493	return v
	1494	if v.startswith('"'):
	1495	return v
	1496	if v.startswith("'"):
	1497	v = v[1:-1]
	1498	v = re.sub(r"\\\\\|\\'\|\"", lambda m: {
	1499	'\\\\': '\\\\',
	1500	"\\'": "'",
	1501	'"': '\\"',
	1502	}[m.group(0)], v)
	1503	return '"%s"' % v
	1504
	1505	res = re.sub(r'''(?x)
	1506	"(?:[^"\\](?:\\\\\|\\")?)"\|
	1507	'(?:[^'\\](?:\\\\\|\\')?)'\|
	1508	[a-zA-Z_][a-zA-Z_0-9]*
	1509	''', fix_kv, code)
	1510	res = re.sub(r',(\s*\])', lambda m: m.group(1), res)
	1511	return res
	1512
	1513
	1514	def qualities(quality_ids):
	1515	""" Get a numeric quality value out of a list of possible values """
	1516	def q(qid):
	1517	try:
	1518	return quality_ids.index(qid)
	1519	except ValueError:
	1520	return -1
	1521	return q
	1522
	1523
	1524	DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
	1525
	1526
	1527	def limit_length(s, length):
	1528	""" Add ellipses to overly long strings """
	1529	if s is None:
	1530	return None
	1531	ELLIPSES = '...'
	1532	if len(s) > length:
	1533	return s[:length - len(ELLIPSES)] + ELLIPSES
	1534	return s
	1535
	1536
	1537	def version_tuple(v):
	1538	return tuple(int(e) for e in re.split(r'[-.]', v))
	1539
	1540
	1541	def is_outdated_version(version, limit, assume_new=True):
	1542	if not version:
	1543	return not assume_new
	1544	try:
	1545	return version_tuple(version) < version_tuple(limit)
	1546	except ValueError:
	1547	return not assume_new
	1548
	1549
	1550	def ytdl_is_updateable():
	1551	""" Returns if youtube-dl can be updated with -U """
	1552	from zipimport import zipimporter
	1553
	1554	return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
	1555
	1556
	1557	def args_to_str(args):
	1558	# Get a short string representation for a subprocess command
	1559	return ' '.join(shlex_quote(a) for a in args)
	1560
	1561
	1562	def urlhandle_detect_ext(url_handle):
	1563	try:
	1564	url_handle.headers
	1565	getheader = lambda h: url_handle.headers[h]
	1566	except AttributeError: # Python < 3
	1567	getheader = url_handle.info().getheader
	1568
	1569	return getheader('Content-Type').split("/")[1]
	1570
	1571
	1572	def age_restricted(content_limit, age_limit):
	1573	""" Returns True iff the content should be blocked """
	1574
	1575	if age_limit is None: # No limit set
	1576	return False
	1577	if content_limit is None:
	1578	return False # Content available for everyone
	1579	return age_limit < content_limit