jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	#!/usr/bin/env python
	2	# coding: utf-8
	3
	4	from __future__ import unicode_literals
	5
	6	import base64
	7	import binascii
	8	import calendar
	9	import codecs
	10	import contextlib
	11	import ctypes
	12	import datetime
	13	import email.utils
	14	import email.header
	15	import errno
	16	import functools
	17	import gzip
	18	import io
	19	import itertools
	20	import json
	21	import locale
	22	import math
	23	import operator
	24	import os
	25	import platform
	26	import random
	27	import re
	28	import socket
	29	import ssl
	30	import subprocess
	31	import sys
	32	import tempfile
	33	import traceback
	34	import xml.etree.ElementTree
	35	import zlib
	36
	37	from .compat import (
	38	compat_HTMLParseError,
	39	compat_HTMLParser,
	40	compat_basestring,
	41	compat_chr,
	42	compat_ctypes_WINFUNCTYPE,
	43	compat_etree_fromstring,
	44	compat_expanduser,
	45	compat_html_entities,
	46	compat_html_entities_html5,
	47	compat_http_client,
	48	compat_kwargs,
	49	compat_os_name,
	50	compat_parse_qs,
	51	compat_shlex_quote,
	52	compat_socket_create_connection,
	53	compat_str,
	54	compat_struct_pack,
	55	compat_struct_unpack,
	56	compat_urllib_error,
	57	compat_urllib_parse,
	58	compat_urllib_parse_urlencode,
	59	compat_urllib_parse_urlparse,
	60	compat_urllib_parse_unquote_plus,
	61	compat_urllib_request,
	62	compat_urlparse,
	63	compat_xpath,
	64	)
	65
	66	from .socks import (
	67	ProxyType,
	68	sockssocket,
	69	)
	70
	71
	72	def register_socks_protocols():
	73	# "Register" SOCKS protocols
	74	# In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
	75	# URLs with protocols not in urlparse.uses_netloc are not handled correctly
	76	for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
	77	if scheme not in compat_urlparse.uses_netloc:
	78	compat_urlparse.uses_netloc.append(scheme)
	79
	80
	81	# This is not clearly defined otherwise
	82	compiled_regex_type = type(re.compile(''))
	83
	84	std_headers = {
	85	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:59.0) Gecko/20100101 Firefox/59.0 (Chrome)',
	86	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
	87	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8',
	88	'Accept-Encoding': 'gzip, deflate',
	89	'Accept-Language': 'en-us,en;q=0.5',
	90	}
	91
	92
	93	USER_AGENTS = {
	94	'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
	95	}
	96
	97
	98	NO_DEFAULT = object()
	99
	100	ENGLISH_MONTH_NAMES = [
	101	'January', 'February', 'March', 'April', 'May', 'June',
	102	'July', 'August', 'September', 'October', 'November', 'December']
	103
	104	MONTH_NAMES = {
	105	'en': ENGLISH_MONTH_NAMES,
	106	'fr': [
	107	'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
	108	'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
	109	}
	110
	111	KNOWN_EXTENSIONS = (
	112	'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
	113	'flv', 'f4v', 'f4a', 'f4b',
	114	'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
	115	'mkv', 'mka', 'mk3d',
	116	'avi', 'divx',
	117	'mov',
	118	'asf', 'wmv', 'wma',
	119	'3gp', '3g2',
	120	'mp3',
	121	'flac',
	122	'ape',
	123	'wav',
	124	'f4f', 'f4m', 'm3u8', 'smil')
	125
	126	# needed for sanitizing filenames in restricted mode
	127	ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
	128	itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'],
	129	'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy')))
	130
	131	DATE_FORMATS = (
	132	'%d %B %Y',
	133	'%d %b %Y',
	134	'%B %d %Y',
	135	'%B %dst %Y',
	136	'%B %dnd %Y',
	137	'%B %dth %Y',
	138	'%b %d %Y',
	139	'%b %dst %Y',
	140	'%b %dnd %Y',
	141	'%b %dth %Y',
	142	'%b %dst %Y %I:%M',
	143	'%b %dnd %Y %I:%M',
	144	'%b %dth %Y %I:%M',
	145	'%Y %m %d',
	146	'%Y-%m-%d',
	147	'%Y/%m/%d',
	148	'%Y/%m/%d %H:%M',
	149	'%Y/%m/%d %H:%M:%S',
	150	'%Y-%m-%d %H:%M',
	151	'%Y-%m-%d %H:%M:%S',
	152	'%Y-%m-%d %H:%M:%S.%f',
	153	'%d.%m.%Y %H:%M',
	154	'%d.%m.%Y %H.%M',
	155	'%Y-%m-%dT%H:%M:%SZ',
	156	'%Y-%m-%dT%H:%M:%S.%fZ',
	157	'%Y-%m-%dT%H:%M:%S.%f0Z',
	158	'%Y-%m-%dT%H:%M:%S',
	159	'%Y-%m-%dT%H:%M:%S.%f',
	160	'%Y-%m-%dT%H:%M',
	161	'%b %d %Y at %H:%M',
	162	'%b %d %Y at %H:%M:%S',
	163	'%B %d %Y at %H:%M',
	164	'%B %d %Y at %H:%M:%S',
	165	)
	166
	167	DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
	168	DATE_FORMATS_DAY_FIRST.extend([
	169	'%d-%m-%Y',
	170	'%d.%m.%Y',
	171	'%d.%m.%y',
	172	'%d/%m/%Y',
	173	'%d/%m/%y',
	174	'%d/%m/%Y %H:%M:%S',
	175	])
	176
	177	DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
	178	DATE_FORMATS_MONTH_FIRST.extend([
	179	'%m-%d-%Y',
	180	'%m.%d.%Y',
	181	'%m/%d/%Y',
	182	'%m/%d/%y',
	183	'%m/%d/%Y %H:%M:%S',
	184	])
	185
	186	PACKED_CODES_RE = r"}$'(.+)',(\d+),(\d+),'([^']+)'\.split\('\\|'$"
	187
	188
	189	def preferredencoding():
	190	"""Get preferred encoding.
	191
	192	Returns the best encoding scheme for the system, based on
	193	locale.getpreferredencoding() and some further tweaks.
	194	"""
	195	try:
	196	pref = locale.getpreferredencoding()
	197	'TEST'.encode(pref)
	198	except Exception:
	199	pref = 'UTF-8'
	200
	201	return pref
	202
	203
	204	def write_json_file(obj, fn):
	205	""" Encode obj as JSON and write it to fn, atomically if possible """
	206
	207	fn = encodeFilename(fn)
	208	if sys.version_info < (3, 0) and sys.platform != 'win32':
	209	encoding = get_filesystem_encoding()
	210	# os.path.basename returns a bytes object, but NamedTemporaryFile
	211	# will fail if the filename contains non ascii characters unless we
	212	# use a unicode object
	213	path_basename = lambda f: os.path.basename(fn).decode(encoding)
	214	# the same for os.path.dirname
	215	path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
	216	else:
	217	path_basename = os.path.basename
	218	path_dirname = os.path.dirname
	219
	220	args = {
	221	'suffix': '.tmp',
	222	'prefix': path_basename(fn) + '.',
	223	'dir': path_dirname(fn),
	224	'delete': False,
	225	}
	226
	227	# In Python 2.x, json.dump expects a bytestream.
	228	# In Python 3.x, it writes to a character stream
	229	if sys.version_info < (3, 0):
	230	args['mode'] = 'wb'
	231	else:
	232	args.update({
	233	'mode': 'w',
	234	'encoding': 'utf-8',
	235	})
	236
	237	tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
	238
	239	try:
	240	with tf:
	241	json.dump(obj, tf)
	242	if sys.platform == 'win32':
	243	# Need to remove existing file on Windows, else os.rename raises
	244	# WindowsError or FileExistsError.
	245	try:
	246	os.unlink(fn)
	247	except OSError:
	248	pass
	249	os.rename(tf.name, fn)
	250	except Exception:
	251	try:
	252	os.remove(tf.name)
	253	except OSError:
	254	pass
	255	raise
	256
	257
	258	if sys.version_info >= (2, 7):
	259	def find_xpath_attr(node, xpath, key, val=None):
	260	""" Find the xpath xpath[@key=val] """
	261	assert re.match(r'^[a-zA-Z_-]+$', key)
	262	expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
	263	return node.find(expr)
	264	else:
	265	def find_xpath_attr(node, xpath, key, val=None):
	266	for f in node.findall(compat_xpath(xpath)):
	267	if key not in f.attrib:
	268	continue
	269	if val is None or f.attrib.get(key) == val:
	270	return f
	271	return None
	272
	273	# On python2.6 the xml.etree.ElementTree.Element methods don't support
	274	# the namespace parameter
	275
	276
	277	def xpath_with_ns(path, ns_map):
	278	components = [c.split(':') for c in path.split('/')]
	279	replaced = []
	280	for c in components:
	281	if len(c) == 1:
	282	replaced.append(c[0])
	283	else:
	284	ns, tag = c
	285	replaced.append('{%s}%s' % (ns_map[ns], tag))
	286	return '/'.join(replaced)
	287
	288
	289	def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
	290	def _find_xpath(xpath):
	291	return node.find(compat_xpath(xpath))
	292
	293	if isinstance(xpath, (str, compat_str)):
	294	n = _find_xpath(xpath)
	295	else:
	296	for xp in xpath:
	297	n = _find_xpath(xp)
	298	if n is not None:
	299	break
	300
	301	if n is None:
	302	if default is not NO_DEFAULT:
	303	return default
	304	elif fatal:
	305	name = xpath if name is None else name
	306	raise ExtractorError('Could not find XML element %s' % name)
	307	else:
	308	return None
	309	return n
	310
	311
	312	def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
	313	n = xpath_element(node, xpath, name, fatal=fatal, default=default)
	314	if n is None or n == default:
	315	return n
	316	if n.text is None:
	317	if default is not NO_DEFAULT:
	318	return default
	319	elif fatal:
	320	name = xpath if name is None else name
	321	raise ExtractorError('Could not find XML element\'s text %s' % name)
	322	else:
	323	return None
	324	return n.text
	325
	326
	327	def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
	328	n = find_xpath_attr(node, xpath, key)
	329	if n is None:
	330	if default is not NO_DEFAULT:
	331	return default
	332	elif fatal:
	333	name = '%s[@%s]' % (xpath, key) if name is None else name
	334	raise ExtractorError('Could not find XML attribute %s' % name)
	335	else:
	336	return None
	337	return n.attrib[key]
	338
	339
	340	def get_element_by_id(id, html):
	341	"""Return the content of the tag with the specified ID in the passed HTML document"""
	342	return get_element_by_attribute('id', id, html)
	343
	344
	345	def get_element_by_class(class_name, html):
	346	"""Return the content of the first tag with the specified class in the passed HTML document"""
	347	retval = get_elements_by_class(class_name, html)
	348	return retval[0] if retval else None
	349
	350
	351	def get_element_by_attribute(attribute, value, html, escape_value=True):
	352	retval = get_elements_by_attribute(attribute, value, html, escape_value)
	353	return retval[0] if retval else None
	354
	355
	356	def get_elements_by_class(class_name, html):
	357	"""Return the content of all tags with the specified class in the passed HTML document as a list"""
	358	return get_elements_by_attribute(
	359	'class', r'[^\'"]\b%s\b[^\'"]' % re.escape(class_name),
	360	html, escape_value=False)
	361
	362
	363	def get_elements_by_attribute(attribute, value, html, escape_value=True):
	364	"""Return the content of the tag with the specified attribute in the passed HTML document"""
	365
	366	value = re.escape(value) if escape_value else value
	367
	368	retlist = []
	369	for m in re.finditer(r'''(?xs)
	370	<([a-zA-Z0-9:._-]+)
	371	(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]\|="[^"]"\|='[^']'\|))?
	372	\s+%s=['"]?%s['"]?
	373	(?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]\|="[^"]"\|='[^']'\|))?
	374	\s*>
	375	(?P<content>.*?)
	376	</\1>
	377	''' % (re.escape(attribute), value), html):
	378	res = m.group('content')
	379
	380	if res.startswith('"') or res.startswith("'"):
	381	res = res[1:-1]
	382
	383	retlist.append(unescapeHTML(res))
	384
	385	return retlist
	386
	387
	388	class HTMLAttributeParser(compat_HTMLParser):
	389	"""Trivial HTML parser to gather the attributes for a single element"""
	390	def __init__(self):
	391	self.attrs = {}
	392	compat_HTMLParser.__init__(self)
	393
	394	def handle_starttag(self, tag, attrs):
	395	self.attrs = dict(attrs)
	396
	397
	398	def extract_attributes(html_element):
	399	"""Given a string for an HTML element such as
	400	<el
	401	a="foo" B="bar" c="&98;az" d=boz
	402	empty= noval entity="&"
	403	sq='"' dq="'"
	404	>
	405	Decode and return a dictionary of attributes.
	406	{
	407	'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
	408	'empty': '', 'noval': None, 'entity': '&',
	409	'sq': '"', 'dq': '\''
	410	}.
	411	NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
	412	but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
	413	"""
	414	parser = HTMLAttributeParser()
	415	try:
	416	parser.feed(html_element)
	417	parser.close()
	418	# Older Python may throw HTMLParseError in case of malformed HTML
	419	except compat_HTMLParseError:
	420	pass
	421	return parser.attrs
	422
	423
	424	def clean_html(html):
	425	"""Clean an HTML snippet into a readable string"""
	426
	427	if html is None: # Convenience for sanitizing descriptions etc.
	428	return html
	429
	430	# Newline vs <br />
	431	html = html.replace('\n', ' ')
	432	html = re.sub(r'(?u)\s<\sbr\s/?\s>\s*', '\n', html)
	433	html = re.sub(r'(?u)<\s/\sp\s>\s<\sp[^>]>', '\n', html)
	434	# Strip html tags
	435	html = re.sub('<.*?>', '', html)
	436	# Replace html entities
	437	html = unescapeHTML(html)
	438	return html.strip()
	439
	440
	441	def sanitize_open(filename, open_mode):
	442	"""Try to open the given filename, and slightly tweak it if this fails.
	443
	444	Attempts to open the given filename. If this fails, it tries to change
	445	the filename slightly, step by step, until it's either able to open it
	446	or it fails and raises a final exception, like the standard open()
	447	function.
	448
	449	It returns the tuple (stream, definitive_file_name).
	450	"""
	451	try:
	452	if filename == '-':
	453	if sys.platform == 'win32':
	454	import msvcrt
	455	msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
	456	return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
	457	stream = open(encodeFilename(filename), open_mode)
	458	return (stream, filename)
	459	except (IOError, OSError) as err:
	460	if err.errno in (errno.EACCES,):
	461	raise
	462
	463	# In case of error, try to remove win32 forbidden chars
	464	alt_filename = sanitize_path(filename)
	465	if alt_filename == filename:
	466	raise
	467	else:
	468	# An exception here should be caught in the caller
	469	stream = open(encodeFilename(alt_filename), open_mode)
	470	return (stream, alt_filename)
	471
	472
	473	def timeconvert(timestr):
	474	"""Convert RFC 2822 defined time string into system timestamp"""
	475	timestamp = None
	476	timetuple = email.utils.parsedate_tz(timestr)
	477	if timetuple is not None:
	478	timestamp = email.utils.mktime_tz(timetuple)
	479	return timestamp
	480
	481
	482	def sanitize_filename(s, restricted=False, is_id=False):
	483	"""Sanitizes a string so it could be used as part of a filename.
	484	If restricted is set, use a stricter subset of allowed characters.
	485	Set is_id if this is not an arbitrary string, but an ID that should be kept
	486	if possible.
	487	"""
	488	def replace_insane(char):
	489	if restricted and char in ACCENT_CHARS:
	490	return ACCENT_CHARS[char]
	491	if char == '?' or ord(char) < 32 or ord(char) == 127:
	492	return ''
	493	elif char == '"':
	494	return '' if restricted else '\''
	495	elif char == ':':
	496	return '_-' if restricted else ' -'
	497	elif char in '\\/\|*<>':
	498	return '_'
	499	if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
	500	return '_'
	501	if restricted and ord(char) > 127:
	502	return '_'
	503	return char
	504
	505	# Handle timestamps
	506	s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
	507	result = ''.join(map(replace_insane, s))
	508	if not is_id:
	509	while '__' in result:
	510	result = result.replace('__', '_')
	511	result = result.strip('_')
	512	# Common case of "Foreign band name - English song title"
	513	if restricted and result.startswith('-_'):
	514	result = result[2:]
	515	if result.startswith('-'):
	516	result = '_' + result[len('-'):]
	517	result = result.lstrip('.')
	518	if not result:
	519	result = '_'
	520	return result
	521
	522
	523	def sanitize_path(s):
	524	"""Sanitizes and normalizes path on Windows"""
	525	if sys.platform != 'win32':
	526	return s
	527	drive_or_unc, _ = os.path.splitdrive(s)
	528	if sys.version_info < (2, 7) and not drive_or_unc:
	529	drive_or_unc, _ = os.path.splitunc(s)
	530	norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
	531	if drive_or_unc:
	532	norm_path.pop(0)
	533	sanitized_path = [
	534	path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\\|\\?\*]\|[\s.]$)', '#', path_part)
	535	for path_part in norm_path]
	536	if drive_or_unc:
	537	sanitized_path.insert(0, drive_or_unc + os.path.sep)
	538	return os.path.join(*sanitized_path)
	539
	540
	541	def sanitize_url(url):
	542	# Prepend protocol-less URLs with `http:` scheme in order to mitigate
	543	# the number of unwanted failures due to missing protocol
	544	if url.startswith('//'):
	545	return 'http:%s' % url
	546	# Fix some common typos seen so far
	547	COMMON_TYPOS = (
	548	# https://github.com/rg3/youtube-dl/issues/15649
	549	(r'^httpss://', r'https://'),
	550	# https://bx1.be/lives/direct-tv/
	551	(r'^rmtp([es]?)://', r'rtmp\1://'),
	552	)
	553	for mistake, fixup in COMMON_TYPOS:
	554	if re.match(mistake, url):
	555	return re.sub(mistake, fixup, url)
	556	return url
	557
	558
	559	def sanitized_Request(url, args, *kwargs):
	560	return compat_urllib_request.Request(sanitize_url(url), args, *kwargs)
	561
	562
	563	def expand_path(s):
	564	"""Expand shell variables and ~"""
	565	return os.path.expandvars(compat_expanduser(s))
	566
	567
	568	def orderedSet(iterable):
	569	""" Remove all duplicates from the input iterable """
	570	res = []
	571	for el in iterable:
	572	if el not in res:
	573	res.append(el)
	574	return res
	575
	576
	577	def _htmlentity_transform(entity_with_semicolon):
	578	"""Transforms an HTML entity to a character."""
	579	entity = entity_with_semicolon[:-1]
	580
	581	# Known non-numeric HTML entity
	582	if entity in compat_html_entities.name2codepoint:
	583	return compat_chr(compat_html_entities.name2codepoint[entity])
	584
	585	# TODO: HTML5 allows entities without a semicolon. For example,
	586	# '&Eacuteric' should be decoded as 'Éric'.
	587	if entity_with_semicolon in compat_html_entities_html5:
	588	return compat_html_entities_html5[entity_with_semicolon]
	589
	590	mobj = re.match(r'#(x[0-9a-fA-F]+\|[0-9]+)', entity)
	591	if mobj is not None:
	592	numstr = mobj.group(1)
	593	if numstr.startswith('x'):
	594	base = 16
	595	numstr = '0%s' % numstr
	596	else:
	597	base = 10
	598	# See https://github.com/rg3/youtube-dl/issues/7518
	599	try:
	600	return compat_chr(int(numstr, base))
	601	except ValueError:
	602	pass
	603
	604	# Unknown entity in name, return its literal representation
	605	return '&%s;' % entity
	606
	607
	608	def unescapeHTML(s):
	609	if s is None:
	610	return None
	611	assert type(s) == compat_str
	612
	613	return re.sub(
	614	r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
	615
	616
	617	def get_subprocess_encoding():
	618	if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
	619	# For subprocess calls, encode with locale encoding
	620	# Refer to http://stackoverflow.com/a/9951851/35070
	621	encoding = preferredencoding()
	622	else:
	623	encoding = sys.getfilesystemencoding()
	624	if encoding is None:
	625	encoding = 'utf-8'
	626	return encoding
	627
	628
	629	def encodeFilename(s, for_subprocess=False):
	630	"""
	631	@param s The name of the file
	632	"""
	633
	634	assert type(s) == compat_str
	635
	636	# Python 3 has a Unicode API
	637	if sys.version_info >= (3, 0):
	638	return s
	639
	640	# Pass '' directly to use Unicode APIs on Windows 2000 and up
	641	# (Detecting Windows NT 4 is tricky because 'major >= 4' would
	642	# match Windows 9x series as well. Besides, NT 4 is obsolete.)
	643	if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
	644	return s
	645
	646	# Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
	647	if sys.platform.startswith('java'):
	648	return s
	649
	650	return s.encode(get_subprocess_encoding(), 'ignore')
	651
	652
	653	def decodeFilename(b, for_subprocess=False):
	654
	655	if sys.version_info >= (3, 0):
	656	return b
	657
	658	if not isinstance(b, bytes):
	659	return b
	660
	661	return b.decode(get_subprocess_encoding(), 'ignore')
	662
	663
	664	def encodeArgument(s):
	665	if not isinstance(s, compat_str):
	666	# Legacy code that uses byte strings
	667	# Uncomment the following line after fixing all post processors
	668	# assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
	669	s = s.decode('ascii')
	670	return encodeFilename(s, True)
	671
	672
	673	def decodeArgument(b):
	674	return decodeFilename(b, True)
	675
	676
	677	def decodeOption(optval):
	678	if optval is None:
	679	return optval
	680	if isinstance(optval, bytes):
	681	optval = optval.decode(preferredencoding())
	682
	683	assert isinstance(optval, compat_str)
	684	return optval
	685
	686
	687	def formatSeconds(secs):
	688	if secs > 3600:
	689	return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
	690	elif secs > 60:
	691	return '%d:%02d' % (secs // 60, secs % 60)
	692	else:
	693	return '%d' % secs
	694
	695
	696	def make_HTTPS_handler(params, **kwargs):
	697	opts_no_check_certificate = params.get('nocheckcertificate', False)
	698	if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
	699	context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
	700	if opts_no_check_certificate:
	701	context.check_hostname = False
	702	context.verify_mode = ssl.CERT_NONE
	703	try:
	704	return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
	705	except TypeError:
	706	# Python 2.7.8
	707	# (create_default_context present but HTTPSHandler has no context=)
	708	pass
	709
	710	if sys.version_info < (3, 2):
	711	return YoutubeDLHTTPSHandler(params, **kwargs)
	712	else: # Python < 3.4
	713	context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
	714	context.verify_mode = (ssl.CERT_NONE
	715	if opts_no_check_certificate
	716	else ssl.CERT_REQUIRED)
	717	context.set_default_verify_paths()
	718	return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
	719
	720
	721	def bug_reports_message():
	722	if ytdl_is_updateable():
	723	update_cmd = 'type youtube-dl -U to update'
	724	else:
	725	update_cmd = 'see https://yt-dl.org/update on how to update'
	726	msg = '; please report this issue on https://yt-dl.org/bug .'
	727	msg += ' Make sure you are using the latest version; %s.' % update_cmd
	728	msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
	729	return msg
	730
	731
	732	class YoutubeDLError(Exception):
	733	"""Base exception for YoutubeDL errors."""
	734	pass
	735
	736
	737	class ExtractorError(YoutubeDLError):
	738	"""Error during info extraction."""
	739
	740	def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
	741	""" tb, if given, is the original traceback (so that it can be printed out).
	742	If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
	743	"""
	744
	745	if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
	746	expected = True
	747	if video_id is not None:
	748	msg = video_id + ': ' + msg
	749	if cause:
	750	msg += ' (caused by %r)' % cause
	751	if not expected:
	752	msg += bug_reports_message()
	753	super(ExtractorError, self).__init__(msg)
	754
	755	self.traceback = tb
	756	self.exc_info = sys.exc_info() # preserve original exception
	757	self.cause = cause
	758	self.video_id = video_id
	759
	760	def format_traceback(self):
	761	if self.traceback is None:
	762	return None
	763	return ''.join(traceback.format_tb(self.traceback))
	764
	765
	766	class UnsupportedError(ExtractorError):
	767	def __init__(self, url):
	768	super(UnsupportedError, self).__init__(
	769	'Unsupported URL: %s' % url, expected=True)
	770	self.url = url
	771
	772
	773	class RegexNotFoundError(ExtractorError):
	774	"""Error when a regex didn't match"""
	775	pass
	776
	777
	778	class GeoRestrictedError(ExtractorError):
	779	"""Geographic restriction Error exception.
	780
	781	This exception may be thrown when a video is not available from your
	782	geographic location due to geographic restrictions imposed by a website.
	783	"""
	784	def __init__(self, msg, countries=None):
	785	super(GeoRestrictedError, self).__init__(msg, expected=True)
	786	self.msg = msg
	787	self.countries = countries
	788
	789
	790	class DownloadError(YoutubeDLError):
	791	"""Download Error exception.
	792
	793	This exception may be thrown by FileDownloader objects if they are not
	794	configured to continue on errors. They will contain the appropriate
	795	error message.
	796	"""
	797
	798	def __init__(self, msg, exc_info=None):
	799	""" exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
	800	super(DownloadError, self).__init__(msg)
	801	self.exc_info = exc_info
	802
	803
	804	class SameFileError(YoutubeDLError):
	805	"""Same File exception.
	806
	807	This exception will be thrown by FileDownloader objects if they detect
	808	multiple files would have to be downloaded to the same file on disk.
	809	"""
	810	pass
	811
	812
	813	class PostProcessingError(YoutubeDLError):
	814	"""Post Processing exception.
	815
	816	This exception may be raised by PostProcessor's .run() method to
	817	indicate an error in the postprocessing task.
	818	"""
	819
	820	def __init__(self, msg):
	821	super(PostProcessingError, self).__init__(msg)
	822	self.msg = msg
	823
	824
	825	class MaxDownloadsReached(YoutubeDLError):
	826	""" --max-downloads limit has been reached. """
	827	pass
	828
	829
	830	class UnavailableVideoError(YoutubeDLError):
	831	"""Unavailable Format exception.
	832
	833	This exception will be thrown when a video is requested
	834	in a format that is not available for that video.
	835	"""
	836	pass
	837
	838
	839	class ContentTooShortError(YoutubeDLError):
	840	"""Content Too Short exception.
	841
	842	This exception may be raised by FileDownloader objects when a file they
	843	download is too small for what the server announced first, indicating
	844	the connection was probably interrupted.
	845	"""
	846
	847	def __init__(self, downloaded, expected):
	848	super(ContentTooShortError, self).__init__(
	849	'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
	850	)
	851	# Both in bytes
	852	self.downloaded = downloaded
	853	self.expected = expected
	854
	855
	856	class XAttrMetadataError(YoutubeDLError):
	857	def __init__(self, code=None, msg='Unknown error'):
	858	super(XAttrMetadataError, self).__init__(msg)
	859	self.code = code
	860	self.msg = msg
	861
	862	# Parsing code and msg
	863	if (self.code in (errno.ENOSPC, errno.EDQUOT) or
	864	'No space left' in self.msg or 'Disk quota excedded' in self.msg):
	865	self.reason = 'NO_SPACE'
	866	elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
	867	self.reason = 'VALUE_TOO_LONG'
	868	else:
	869	self.reason = 'NOT_SUPPORTED'
	870
	871
	872	class XAttrUnavailableError(YoutubeDLError):
	873	pass
	874
	875
	876	def _create_http_connection(ydl_handler, http_class, is_https, args, *kwargs):
	877	# Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
	878	# expected HTTP responses to meet HTTP/1.0 or later (see also
	879	# https://github.com/rg3/youtube-dl/issues/6727)
	880	if sys.version_info < (3, 0):
	881	kwargs['strict'] = True
	882	hc = http_class(args, *compat_kwargs(kwargs))
	883	source_address = ydl_handler._params.get('source_address')
	884	if source_address is not None:
	885	sa = (source_address, 0)
	886	if hasattr(hc, 'source_address'): # Python 2.7+
	887	hc.source_address = sa
	888	else: # Python 2.6
	889	def _hc_connect(self, args, *kwargs):
	890	sock = compat_socket_create_connection(
	891	(self.host, self.port), self.timeout, sa)
	892	if is_https:
	893	self.sock = ssl.wrap_socket(
	894	sock, self.key_file, self.cert_file,
	895	ssl_version=ssl.PROTOCOL_TLSv1)
	896	else:
	897	self.sock = sock
	898	hc.connect = functools.partial(_hc_connect, hc)
	899
	900	return hc
	901
	902
	903	def handle_youtubedl_headers(headers):
	904	filtered_headers = headers
	905
	906	if 'Youtubedl-no-compression' in filtered_headers:
	907	filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
	908	del filtered_headers['Youtubedl-no-compression']
	909
	910	return filtered_headers
	911
	912
	913	class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
	914	"""Handler for HTTP requests and responses.
	915
	916	This class, when installed with an OpenerDirector, automatically adds
	917	the standard headers to every HTTP request and handles gzipped and
	918	deflated responses from web servers. If compression is to be avoided in
	919	a particular request, the original request in the program code only has
	920	to include the HTTP header "Youtubedl-no-compression", which will be
	921	removed before making the real request.
	922
	923	Part of this code was copied from:
	924
	925	http://techknack.net/python-urllib2-handlers/
	926
	927	Andrew Rowls, the author of that code, agreed to release it to the
	928	public domain.
	929	"""
	930
	931	def __init__(self, params, args, *kwargs):
	932	compat_urllib_request.HTTPHandler.__init__(self, args, *kwargs)
	933	self._params = params
	934
	935	def http_open(self, req):
	936	conn_class = compat_http_client.HTTPConnection
	937
	938	socks_proxy = req.headers.get('Ytdl-socks-proxy')
	939	if socks_proxy:
	940	conn_class = make_socks_conn_class(conn_class, socks_proxy)
	941	del req.headers['Ytdl-socks-proxy']
	942
	943	return self.do_open(functools.partial(
	944	_create_http_connection, self, conn_class, False),
	945	req)
	946
	947	@staticmethod
	948	def deflate(data):
	949	try:
	950	return zlib.decompress(data, -zlib.MAX_WBITS)
	951	except zlib.error:
	952	return zlib.decompress(data)
	953
	954	def http_request(self, req):
	955	# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
	956	# always respected by websites, some tend to give out URLs with non percent-encoded
	957	# non-ASCII characters (see telemb.py, ard.py [#3412])
	958	# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
	959	# To work around aforementioned issue we will replace request's original URL with
	960	# percent-encoded one
	961	# Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
	962	# the code of this workaround has been moved here from YoutubeDL.urlopen()
	963	url = req.get_full_url()
	964	url_escaped = escape_url(url)
	965
	966	# Substitute URL if any change after escaping
	967	if url != url_escaped:
	968	req = update_Request(req, url=url_escaped)
	969
	970	for h, v in std_headers.items():
	971	# Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
	972	# The dict keys are capitalized because of this bug by urllib
	973	if h.capitalize() not in req.headers:
	974	req.add_header(h, v)
	975
	976	req.headers = handle_youtubedl_headers(req.headers)
	977
	978	if sys.version_info < (2, 7) and '#' in req.get_full_url():
	979	# Python 2.6 is brain-dead when it comes to fragments
	980	req._Request__original = req._Request__original.partition('#')[0]
	981	req._Request__r_type = req._Request__r_type.partition('#')[0]
	982
	983	return req
	984
	985	def http_response(self, req, resp):
	986	old_resp = resp
	987	# gzip
	988	if resp.headers.get('Content-encoding', '') == 'gzip':
	989	content = resp.read()
	990	gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
	991	try:
	992	uncompressed = io.BytesIO(gz.read())
	993	except IOError as original_ioerror:
	994	# There may be junk add the end of the file
	995	# See http://stackoverflow.com/q/4928560/35070 for details
	996	for i in range(1, 1024):
	997	try:
	998	gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
	999	uncompressed = io.BytesIO(gz.read())
	1000	except IOError:
	1001	continue
	1002	break
	1003	else:
	1004	raise original_ioerror
	1005	resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
	1006	resp.msg = old_resp.msg
	1007	del resp.headers['Content-encoding']
	1008	# deflate
	1009	if resp.headers.get('Content-encoding', '') == 'deflate':
	1010	gz = io.BytesIO(self.deflate(resp.read()))
	1011	resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
	1012	resp.msg = old_resp.msg
	1013	del resp.headers['Content-encoding']
	1014	# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
	1015	# https://github.com/rg3/youtube-dl/issues/6457).
	1016	if 300 <= resp.code < 400:
	1017	location = resp.headers.get('Location')
	1018	if location:
	1019	# As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
	1020	if sys.version_info >= (3, 0):
	1021	location = location.encode('iso-8859-1').decode('utf-8')
	1022	else:
	1023	location = location.decode('utf-8')
	1024	location_escaped = escape_url(location)
	1025	if location != location_escaped:
	1026	del resp.headers['Location']
	1027	if sys.version_info < (3, 0):
	1028	location_escaped = location_escaped.encode('utf-8')
	1029	resp.headers['Location'] = location_escaped
	1030	return resp
	1031
	1032	https_request = http_request
	1033	https_response = http_response
	1034
	1035
	1036	def make_socks_conn_class(base_class, socks_proxy):
	1037	assert issubclass(base_class, (
	1038	compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
	1039
	1040	url_components = compat_urlparse.urlparse(socks_proxy)
	1041	if url_components.scheme.lower() == 'socks5':
	1042	socks_type = ProxyType.SOCKS5
	1043	elif url_components.scheme.lower() in ('socks', 'socks4'):
	1044	socks_type = ProxyType.SOCKS4
	1045	elif url_components.scheme.lower() == 'socks4a':
	1046	socks_type = ProxyType.SOCKS4A
	1047
	1048	def unquote_if_non_empty(s):
	1049	if not s:
	1050	return s
	1051	return compat_urllib_parse_unquote_plus(s)
	1052
	1053	proxy_args = (
	1054	socks_type,
	1055	url_components.hostname, url_components.port or 1080,
	1056	True, # Remote DNS
	1057	unquote_if_non_empty(url_components.username),
	1058	unquote_if_non_empty(url_components.password),
	1059	)
	1060
	1061	class SocksConnection(base_class):
	1062	def connect(self):
	1063	self.sock = sockssocket()
	1064	self.sock.setproxy(*proxy_args)
	1065	if type(self.timeout) in (int, float):
	1066	self.sock.settimeout(self.timeout)
	1067	self.sock.connect((self.host, self.port))
	1068
	1069	if isinstance(self, compat_http_client.HTTPSConnection):
	1070	if hasattr(self, '_context'): # Python > 2.6
	1071	self.sock = self._context.wrap_socket(
	1072	self.sock, server_hostname=self.host)
	1073	else:
	1074	self.sock = ssl.wrap_socket(self.sock)
	1075
	1076	return SocksConnection
	1077
	1078
	1079	class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
	1080	def __init__(self, params, https_conn_class=None, args, *kwargs):
	1081	compat_urllib_request.HTTPSHandler.__init__(self, args, *kwargs)
	1082	self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
	1083	self._params = params
	1084
	1085	def https_open(self, req):
	1086	kwargs = {}
	1087	conn_class = self._https_conn_class
	1088
	1089	if hasattr(self, '_context'): # python > 2.6
	1090	kwargs['context'] = self._context
	1091	if hasattr(self, '_check_hostname'): # python 3.x
	1092	kwargs['check_hostname'] = self._check_hostname
	1093
	1094	socks_proxy = req.headers.get('Ytdl-socks-proxy')
	1095	if socks_proxy:
	1096	conn_class = make_socks_conn_class(conn_class, socks_proxy)
	1097	del req.headers['Ytdl-socks-proxy']
	1098
	1099	return self.do_open(functools.partial(
	1100	_create_http_connection, self, conn_class, True),
	1101	req, **kwargs)
	1102
	1103
	1104	class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
	1105	def __init__(self, cookiejar=None):
	1106	compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
	1107
	1108	def http_response(self, request, response):
	1109	# Python 2 will choke on next HTTP request in row if there are non-ASCII
	1110	# characters in Set-Cookie HTTP header of last response (see
	1111	# https://github.com/rg3/youtube-dl/issues/6769).
	1112	# In order to at least prevent crashing we will percent encode Set-Cookie
	1113	# header before HTTPCookieProcessor starts processing it.
	1114	# if sys.version_info < (3, 0) and response.headers:
	1115	# for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
	1116	# set_cookie = response.headers.get(set_cookie_header)
	1117	# if set_cookie:
	1118	# set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
	1119	# if set_cookie != set_cookie_escaped:
	1120	# del response.headers[set_cookie_header]
	1121	# response.headers[set_cookie_header] = set_cookie_escaped
	1122	return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
	1123
	1124	https_request = compat_urllib_request.HTTPCookieProcessor.http_request
	1125	https_response = http_response
	1126
	1127
	1128	def extract_timezone(date_str):
	1129	m = re.search(
	1130	r'^.{8,}?(?P<tz>Z$\| ?(?P<sign>\+\|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
	1131	date_str)
	1132	if not m:
	1133	timezone = datetime.timedelta()
	1134	else:
	1135	date_str = date_str[:-len(m.group('tz'))]
	1136	if not m.group('sign'):
	1137	timezone = datetime.timedelta()
	1138	else:
	1139	sign = 1 if m.group('sign') == '+' else -1
	1140	timezone = datetime.timedelta(
	1141	hours=sign * int(m.group('hours')),
	1142	minutes=sign * int(m.group('minutes')))
	1143	return timezone, date_str
	1144
	1145
	1146	def parse_iso8601(date_str, delimiter='T', timezone=None):
	1147	""" Return a UNIX timestamp from the given date """
	1148
	1149	if date_str is None:
	1150	return None
	1151
	1152	date_str = re.sub(r'\.[0-9]+', '', date_str)
	1153
	1154	if timezone is None:
	1155	timezone, date_str = extract_timezone(date_str)
	1156
	1157	try:
	1158	date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
	1159	dt = datetime.datetime.strptime(date_str, date_format) - timezone
	1160	return calendar.timegm(dt.timetuple())
	1161	except ValueError:
	1162	pass
	1163
	1164
	1165	def date_formats(day_first=True):
	1166	return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
	1167
	1168
	1169	def unified_strdate(date_str, day_first=True):
	1170	"""Return a string with the date in the format YYYYMMDD"""
	1171
	1172	if date_str is None:
	1173	return None
	1174	upload_date = None
	1175	# Replace commas
	1176	date_str = date_str.replace(',', ' ')
	1177	# Remove AM/PM + timezone
	1178	date_str = re.sub(r'(?i)\s*(?:AM\|PM)(?:\s+[A-Z]+)?', '', date_str)
	1179	_, date_str = extract_timezone(date_str)
	1180
	1181	for expression in date_formats(day_first):
	1182	try:
	1183	upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
	1184	except ValueError:
	1185	pass
	1186	if upload_date is None:
	1187	timetuple = email.utils.parsedate_tz(date_str)
	1188	if timetuple:
	1189	try:
	1190	upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
	1191	except ValueError:
	1192	pass
	1193	if upload_date is not None:
	1194	return compat_str(upload_date)
	1195
	1196
	1197	def unified_timestamp(date_str, day_first=True):
	1198	if date_str is None:
	1199	return None
	1200
	1201	date_str = re.sub(r'[,\|]', '', date_str)
	1202
	1203	pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
	1204	timezone, date_str = extract_timezone(date_str)
	1205
	1206	# Remove AM/PM + timezone
	1207	date_str = re.sub(r'(?i)\s*(?:AM\|PM)(?:\s+[A-Z]+)?', '', date_str)
	1208
	1209	# Remove unrecognized timezones from ISO 8601 alike timestamps
	1210	m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
	1211	if m:
	1212	date_str = date_str[:-len(m.group('tz'))]
	1213
	1214	# Python only supports microseconds, so remove nanoseconds
	1215	m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
	1216	if m:
	1217	date_str = m.group(1)
	1218
	1219	for expression in date_formats(day_first):
	1220	try:
	1221	dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
	1222	return calendar.timegm(dt.timetuple())
	1223	except ValueError:
	1224	pass
	1225	timetuple = email.utils.parsedate_tz(date_str)
	1226	if timetuple:
	1227	return calendar.timegm(timetuple) + pm_delta * 3600
	1228
	1229
	1230	def determine_ext(url, default_ext='unknown_video'):
	1231	if url is None:
	1232	return default_ext
	1233	guess = url.partition('?')[0].rpartition('.')[2]
	1234	if re.match(r'^[A-Za-z0-9]+$', guess):
	1235	return guess
	1236	# Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
	1237	elif guess.rstrip('/') in KNOWN_EXTENSIONS:
	1238	return guess.rstrip('/')
	1239	else:
	1240	return default_ext
	1241
	1242
	1243	def subtitles_filename(filename, sub_lang, sub_format):
	1244	return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
	1245
	1246
	1247	def date_from_str(date_str):
	1248	"""
	1249	Return a datetime object from a string in the format YYYYMMDD or
	1250	(now\|today)[+-][0-9](day\|week\|month\|year)(s)?"""
	1251	today = datetime.date.today()
	1252	if date_str in ('now', 'today'):
	1253	return today
	1254	if date_str == 'yesterday':
	1255	return today - datetime.timedelta(days=1)
	1256	match = re.match(r'(now\|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day\|week\|month\|year)(s)?', date_str)
	1257	if match is not None:
	1258	sign = match.group('sign')
	1259	time = int(match.group('time'))
	1260	if sign == '-':
	1261	time = -time
	1262	unit = match.group('unit')
	1263	# A bad approximation?
	1264	if unit == 'month':
	1265	unit = 'day'
	1266	time *= 30
	1267	elif unit == 'year':
	1268	unit = 'day'
	1269	time *= 365
	1270	unit += 's'
	1271	delta = datetime.timedelta(**{unit: time})
	1272	return today + delta
	1273	return datetime.datetime.strptime(date_str, '%Y%m%d').date()
	1274
	1275
	1276	def hyphenate_date(date_str):
	1277	"""
	1278	Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
	1279	match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
	1280	if match is not None:
	1281	return '-'.join(match.groups())
	1282	else:
	1283	return date_str
	1284
	1285
	1286	class DateRange(object):
	1287	"""Represents a time interval between two dates"""
	1288
	1289	def __init__(self, start=None, end=None):
	1290	"""start and end must be strings in the format accepted by date"""
	1291	if start is not None:
	1292	self.start = date_from_str(start)
	1293	else:
	1294	self.start = datetime.datetime.min.date()
	1295	if end is not None:
	1296	self.end = date_from_str(end)
	1297	else:
	1298	self.end = datetime.datetime.max.date()
	1299	if self.start > self.end:
	1300	raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
	1301
	1302	@classmethod
	1303	def day(cls, day):
	1304	"""Returns a range that only contains the given day"""
	1305	return cls(day, day)
	1306
	1307	def __contains__(self, date):
	1308	"""Check if the date is in the range"""
	1309	if not isinstance(date, datetime.date):
	1310	date = date_from_str(date)
	1311	return self.start <= date <= self.end
	1312
	1313	def __str__(self):
	1314	return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
	1315
	1316
	1317	def platform_name():
	1318	""" Returns the platform name as a compat_str """
	1319	res = platform.platform()
	1320	if isinstance(res, bytes):
	1321	res = res.decode(preferredencoding())
	1322
	1323	assert isinstance(res, compat_str)
	1324	return res
	1325
	1326
	1327	def _windows_write_string(s, out):
	1328	""" Returns True if the string was written using special methods,
	1329	False if it has yet to be written out."""
	1330	# Adapted from http://stackoverflow.com/a/3259271/35070
	1331
	1332	import ctypes
	1333	import ctypes.wintypes
	1334
	1335	WIN_OUTPUT_IDS = {
	1336	1: -11,
	1337	2: -12,
	1338	}
	1339
	1340	try:
	1341	fileno = out.fileno()
	1342	except AttributeError:
	1343	# If the output stream doesn't have a fileno, it's virtual
	1344	return False
	1345	except io.UnsupportedOperation:
	1346	# Some strange Windows pseudo files?
	1347	return False
	1348	if fileno not in WIN_OUTPUT_IDS:
	1349	return False
	1350
	1351	GetStdHandle = compat_ctypes_WINFUNCTYPE(
	1352	ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
	1353	('GetStdHandle', ctypes.windll.kernel32))
	1354	h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
	1355
	1356	WriteConsoleW = compat_ctypes_WINFUNCTYPE(
	1357	ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
	1358	ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
	1359	ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
	1360	written = ctypes.wintypes.DWORD(0)
	1361
	1362	GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
	1363	FILE_TYPE_CHAR = 0x0002
	1364	FILE_TYPE_REMOTE = 0x8000
	1365	GetConsoleMode = compat_ctypes_WINFUNCTYPE(
	1366	ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
	1367	ctypes.POINTER(ctypes.wintypes.DWORD))(
	1368	('GetConsoleMode', ctypes.windll.kernel32))
	1369	INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
	1370
	1371	def not_a_console(handle):
	1372	if handle == INVALID_HANDLE_VALUE or handle is None:
	1373	return True
	1374	return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or
	1375	GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
	1376
	1377	if not_a_console(h):
	1378	return False
	1379
	1380	def next_nonbmp_pos(s):
	1381	try:
	1382	return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
	1383	except StopIteration:
	1384	return len(s)
	1385
	1386	while s:
	1387	count = min(next_nonbmp_pos(s), 1024)
	1388
	1389	ret = WriteConsoleW(
	1390	h, s, count if count else 2, ctypes.byref(written), None)
	1391	if ret == 0:
	1392	raise OSError('Failed to write string')
	1393	if not count: # We just wrote a non-BMP character
	1394	assert written.value == 2
	1395	s = s[1:]
	1396	else:
	1397	assert written.value > 0
	1398	s = s[written.value:]
	1399	return True
	1400
	1401
	1402	def write_string(s, out=None, encoding=None):
	1403	if out is None:
	1404	out = sys.stderr
	1405	assert type(s) == compat_str
	1406
	1407	if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
	1408	if _windows_write_string(s, out):
	1409	return
	1410
	1411	if ('b' in getattr(out, 'mode', '') or
	1412	sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
	1413	byt = s.encode(encoding or preferredencoding(), 'ignore')
	1414	out.write(byt)
	1415	elif hasattr(out, 'buffer'):
	1416	enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
	1417	byt = s.encode(enc, 'ignore')
	1418	out.buffer.write(byt)
	1419	else:
	1420	out.write(s)
	1421	out.flush()
	1422
	1423
	1424	def bytes_to_intlist(bs):
	1425	if not bs:
	1426	return []
	1427	if isinstance(bs[0], int): # Python 3
	1428	return list(bs)
	1429	else:
	1430	return [ord(c) for c in bs]
	1431
	1432
	1433	def intlist_to_bytes(xs):
	1434	if not xs:
	1435	return b''
	1436	return compat_struct_pack('%dB' % len(xs), *xs)
	1437
	1438
	1439	# Cross-platform file locking
	1440	if sys.platform == 'win32':
	1441	import ctypes.wintypes
	1442	import msvcrt
	1443
	1444	class OVERLAPPED(ctypes.Structure):
	1445	_fields_ = [
	1446	('Internal', ctypes.wintypes.LPVOID),
	1447	('InternalHigh', ctypes.wintypes.LPVOID),
	1448	('Offset', ctypes.wintypes.DWORD),
	1449	('OffsetHigh', ctypes.wintypes.DWORD),
	1450	('hEvent', ctypes.wintypes.HANDLE),
	1451	]
	1452
	1453	kernel32 = ctypes.windll.kernel32
	1454	LockFileEx = kernel32.LockFileEx
	1455	LockFileEx.argtypes = [
	1456	ctypes.wintypes.HANDLE, # hFile
	1457	ctypes.wintypes.DWORD, # dwFlags
	1458	ctypes.wintypes.DWORD, # dwReserved
	1459	ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
	1460	ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
	1461	ctypes.POINTER(OVERLAPPED) # Overlapped
	1462	]
	1463	LockFileEx.restype = ctypes.wintypes.BOOL
	1464	UnlockFileEx = kernel32.UnlockFileEx
	1465	UnlockFileEx.argtypes = [
	1466	ctypes.wintypes.HANDLE, # hFile
	1467	ctypes.wintypes.DWORD, # dwReserved
	1468	ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
	1469	ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
	1470	ctypes.POINTER(OVERLAPPED) # Overlapped
	1471	]
	1472	UnlockFileEx.restype = ctypes.wintypes.BOOL
	1473	whole_low = 0xffffffff
	1474	whole_high = 0x7fffffff
	1475
	1476	def _lock_file(f, exclusive):
	1477	overlapped = OVERLAPPED()
	1478	overlapped.Offset = 0
	1479	overlapped.OffsetHigh = 0
	1480	overlapped.hEvent = 0
	1481	f._lock_file_overlapped_p = ctypes.pointer(overlapped)
	1482	handle = msvcrt.get_osfhandle(f.fileno())
	1483	if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
	1484	whole_low, whole_high, f._lock_file_overlapped_p):
	1485	raise OSError('Locking file failed: %r' % ctypes.FormatError())
	1486
	1487	def _unlock_file(f):
	1488	assert f._lock_file_overlapped_p
	1489	handle = msvcrt.get_osfhandle(f.fileno())
	1490	if not UnlockFileEx(handle, 0,
	1491	whole_low, whole_high, f._lock_file_overlapped_p):
	1492	raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
	1493
	1494	else:
	1495	# Some platforms, such as Jython, is missing fcntl
	1496	try:
	1497	import fcntl
	1498
	1499	def _lock_file(f, exclusive):
	1500	fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
	1501
	1502	def _unlock_file(f):
	1503	fcntl.flock(f, fcntl.LOCK_UN)
	1504	except ImportError:
	1505	UNSUPPORTED_MSG = 'file locking is not supported on this platform'
	1506
	1507	def _lock_file(f, exclusive):
	1508	raise IOError(UNSUPPORTED_MSG)
	1509
	1510	def _unlock_file(f):
	1511	raise IOError(UNSUPPORTED_MSG)
	1512
	1513
	1514	class locked_file(object):
	1515	def __init__(self, filename, mode, encoding=None):
	1516	assert mode in ['r', 'a', 'w']
	1517	self.f = io.open(filename, mode, encoding=encoding)
	1518	self.mode = mode
	1519
	1520	def __enter__(self):
	1521	exclusive = self.mode != 'r'
	1522	try:
	1523	_lock_file(self.f, exclusive)
	1524	except IOError:
	1525	self.f.close()
	1526	raise
	1527	return self
	1528
	1529	def __exit__(self, etype, value, traceback):
	1530	try:
	1531	_unlock_file(self.f)
	1532	finally:
	1533	self.f.close()
	1534
	1535	def __iter__(self):
	1536	return iter(self.f)
	1537
	1538	def write(self, *args):
	1539	return self.f.write(*args)
	1540
	1541	def read(self, *args):
	1542	return self.f.read(*args)
	1543
	1544
	1545	def get_filesystem_encoding():
	1546	encoding = sys.getfilesystemencoding()
	1547	return encoding if encoding is not None else 'utf-8'
	1548
	1549
	1550	def shell_quote(args):
	1551	quoted_args = []
	1552	encoding = get_filesystem_encoding()
	1553	for a in args:
	1554	if isinstance(a, bytes):
	1555	# We may get a filename encoded with 'encodeFilename'
	1556	a = a.decode(encoding)
	1557	quoted_args.append(compat_shlex_quote(a))
	1558	return ' '.join(quoted_args)
	1559
	1560
	1561	def smuggle_url(url, data):
	1562	""" Pass additional data in a URL for internal use. """
	1563
	1564	url, idata = unsmuggle_url(url, {})
	1565	data.update(idata)
	1566	sdata = compat_urllib_parse_urlencode(
	1567	{'__youtubedl_smuggle': json.dumps(data)})
	1568	return url + '#' + sdata
	1569
	1570
	1571	def unsmuggle_url(smug_url, default=None):
	1572	if '#__youtubedl_smuggle' not in smug_url:
	1573	return smug_url, default
	1574	url, _, sdata = smug_url.rpartition('#')
	1575	jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
	1576	data = json.loads(jsond)
	1577	return url, data
	1578
	1579
	1580	def format_bytes(bytes):
	1581	if bytes is None:
	1582	return 'N/A'
	1583	if type(bytes) is str:
	1584	bytes = float(bytes)
	1585	if bytes == 0.0:
	1586	exponent = 0
	1587	else:
	1588	exponent = int(math.log(bytes, 1024.0))
	1589	suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
	1590	converted = float(bytes) / float(1024 ** exponent)
	1591	return '%.2f%s' % (converted, suffix)
	1592
	1593
	1594	def lookup_unit_table(unit_table, s):
	1595	units_re = '\|'.join(re.escape(u) for u in unit_table)
	1596	m = re.match(
	1597	r'(?P<num>[0-9]+(?:[,.][0-9])?)\s(?P<unit>%s)\b' % units_re, s)
	1598	if not m:
	1599	return None
	1600	num_str = m.group('num').replace(',', '.')
	1601	mult = unit_table[m.group('unit')]
	1602	return int(float(num_str) * mult)
	1603
	1604
	1605	def parse_filesize(s):
	1606	if s is None:
	1607	return None
	1608
	1609	# The lower-case forms are of course incorrect and unofficial,
	1610	# but we support those too
	1611	_UNIT_TABLE = {
	1612	'B': 1,
	1613	'b': 1,
	1614	'bytes': 1,
	1615	'KiB': 1024,
	1616	'KB': 1000,
	1617	'kB': 1024,
	1618	'Kb': 1000,
	1619	'kb': 1000,
	1620	'kilobytes': 1000,
	1621	'kibibytes': 1024,
	1622	'MiB': 1024 ** 2,
	1623	'MB': 1000 ** 2,
	1624	'mB': 1024 ** 2,
	1625	'Mb': 1000 ** 2,
	1626	'mb': 1000 ** 2,
	1627	'megabytes': 1000 ** 2,
	1628	'mebibytes': 1024 ** 2,
	1629	'GiB': 1024 ** 3,
	1630	'GB': 1000 ** 3,
	1631	'gB': 1024 ** 3,
	1632	'Gb': 1000 ** 3,
	1633	'gb': 1000 ** 3,
	1634	'gigabytes': 1000 ** 3,
	1635	'gibibytes': 1024 ** 3,
	1636	'TiB': 1024 ** 4,
	1637	'TB': 1000 ** 4,
	1638	'tB': 1024 ** 4,
	1639	'Tb': 1000 ** 4,
	1640	'tb': 1000 ** 4,
	1641	'terabytes': 1000 ** 4,
	1642	'tebibytes': 1024 ** 4,
	1643	'PiB': 1024 ** 5,
	1644	'PB': 1000 ** 5,
	1645	'pB': 1024 ** 5,
	1646	'Pb': 1000 ** 5,
	1647	'pb': 1000 ** 5,
	1648	'petabytes': 1000 ** 5,
	1649	'pebibytes': 1024 ** 5,
	1650	'EiB': 1024 ** 6,
	1651	'EB': 1000 ** 6,
	1652	'eB': 1024 ** 6,
	1653	'Eb': 1000 ** 6,
	1654	'eb': 1000 ** 6,
	1655	'exabytes': 1000 ** 6,
	1656	'exbibytes': 1024 ** 6,
	1657	'ZiB': 1024 ** 7,
	1658	'ZB': 1000 ** 7,
	1659	'zB': 1024 ** 7,
	1660	'Zb': 1000 ** 7,
	1661	'zb': 1000 ** 7,
	1662	'zettabytes': 1000 ** 7,
	1663	'zebibytes': 1024 ** 7,
	1664	'YiB': 1024 ** 8,
	1665	'YB': 1000 ** 8,
	1666	'yB': 1024 ** 8,
	1667	'Yb': 1000 ** 8,
	1668	'yb': 1000 ** 8,
	1669	'yottabytes': 1000 ** 8,
	1670	'yobibytes': 1024 ** 8,
	1671	}
	1672
	1673	return lookup_unit_table(_UNIT_TABLE, s)
	1674
	1675
	1676	def parse_count(s):
	1677	if s is None:
	1678	return None
	1679
	1680	s = s.strip()
	1681
	1682	if re.match(r'^[\d,.]+$', s):
	1683	return str_to_int(s)
	1684
	1685	_UNIT_TABLE = {
	1686	'k': 1000,
	1687	'K': 1000,
	1688	'm': 1000 ** 2,
	1689	'M': 1000 ** 2,
	1690	'kk': 1000 ** 2,
	1691	'KK': 1000 ** 2,
	1692	}
	1693
	1694	return lookup_unit_table(_UNIT_TABLE, s)
	1695
	1696
	1697	def parse_resolution(s):
	1698	if s is None:
	1699	return {}
	1700
	1701	mobj = re.search(r'\b(?P<w>\d+)\s[xX×]\s(?P<h>\d+)\b', s)
	1702	if mobj:
	1703	return {
	1704	'width': int(mobj.group('w')),
	1705	'height': int(mobj.group('h')),
	1706	}
	1707
	1708	mobj = re.search(r'\b(\d+)[pPiI]\b', s)
	1709	if mobj:
	1710	return {'height': int(mobj.group(1))}
	1711
	1712	mobj = re.search(r'\b([48])[kK]\b', s)
	1713	if mobj:
	1714	return {'height': int(mobj.group(1)) * 540}
	1715
	1716	return {}
	1717
	1718
	1719	def month_by_name(name, lang='en'):
	1720	""" Return the number of a month by (locale-independently) English name """
	1721
	1722	month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
	1723
	1724	try:
	1725	return month_names.index(name) + 1
	1726	except ValueError:
	1727	return None
	1728
	1729
	1730	def month_by_abbreviation(abbrev):
	1731	""" Return the number of a month by (locale-independently) English
	1732	abbreviations """
	1733
	1734	try:
	1735	return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
	1736	except ValueError:
	1737	return None
	1738
	1739
	1740	def fix_xml_ampersands(xml_str):
	1741	"""Replace all the '&' by '&' in XML"""
	1742	return re.sub(
	1743	r'&(?!amp;\|lt;\|gt;\|apos;\|quot;\|#x[0-9a-fA-F]{,4};\|#[0-9]{,4};)',
	1744	'&',
	1745	xml_str)
	1746
	1747
	1748	def setproctitle(title):
	1749	assert isinstance(title, compat_str)
	1750
	1751	# ctypes in Jython is not complete
	1752	# http://bugs.jython.org/issue2148
	1753	if sys.platform.startswith('java'):
	1754	return
	1755
	1756	try:
	1757	libc = ctypes.cdll.LoadLibrary('libc.so.6')
	1758	except OSError:
	1759	return
	1760	except TypeError:
	1761	# LoadLibrary in Windows Python 2.7.13 only expects
	1762	# a bytestring, but since unicode_literals turns
	1763	# every string into a unicode string, it fails.
	1764	return
	1765	title_bytes = title.encode('utf-8')
	1766	buf = ctypes.create_string_buffer(len(title_bytes))
	1767	buf.value = title_bytes
	1768	try:
	1769	libc.prctl(15, buf, 0, 0, 0)
	1770	except AttributeError:
	1771	return # Strange libc, just skip this
	1772
	1773
	1774	def remove_start(s, start):
	1775	return s[len(start):] if s is not None and s.startswith(start) else s
	1776
	1777
	1778	def remove_end(s, end):
	1779	return s[:-len(end)] if s is not None and s.endswith(end) else s
	1780
	1781
	1782	def remove_quotes(s):
	1783	if s is None or len(s) < 2:
	1784	return s
	1785	for quote in ('"', "'", ):
	1786	if s[0] == quote and s[-1] == quote:
	1787	return s[1:-1]
	1788	return s
	1789
	1790
	1791	def url_basename(url):
	1792	path = compat_urlparse.urlparse(url).path
	1793	return path.strip('/').split('/')[-1]
	1794
	1795
	1796	def base_url(url):
	1797	return re.match(r'https?://[^?#&]+/', url).group()
	1798
	1799
	1800	def urljoin(base, path):
	1801	if isinstance(path, bytes):
	1802	path = path.decode('utf-8')
	1803	if not isinstance(path, compat_str) or not path:
	1804	return None
	1805	if re.match(r'^(?:https?:)?//', path):
	1806	return path
	1807	if isinstance(base, bytes):
	1808	base = base.decode('utf-8')
	1809	if not isinstance(base, compat_str) or not re.match(
	1810	r'^(?:https?:)?//', base):
	1811	return None
	1812	return compat_urlparse.urljoin(base, path)
	1813
	1814
	1815	class HEADRequest(compat_urllib_request.Request):
	1816	def get_method(self):
	1817	return 'HEAD'
	1818
	1819
	1820	class PUTRequest(compat_urllib_request.Request):
	1821	def get_method(self):
	1822	return 'PUT'
	1823
	1824
	1825	def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
	1826	if get_attr:
	1827	if v is not None:
	1828	v = getattr(v, get_attr, None)
	1829	if v == '':
	1830	v = None
	1831	if v is None:
	1832	return default
	1833	try:
	1834	return int(v) * invscale // scale
	1835	except ValueError:
	1836	return default
	1837
	1838
	1839	def str_or_none(v, default=None):
	1840	return default if v is None else compat_str(v)
	1841
	1842
	1843	def str_to_int(int_str):
	1844	""" A more relaxed version of int_or_none """
	1845	if int_str is None:
	1846	return None
	1847	int_str = re.sub(r'[,\.\+]', '', int_str)
	1848	return int(int_str)
	1849
	1850
	1851	def float_or_none(v, scale=1, invscale=1, default=None):
	1852	if v is None:
	1853	return default
	1854	try:
	1855	return float(v) * invscale / scale
	1856	except ValueError:
	1857	return default
	1858
	1859
	1860	def bool_or_none(v, default=None):
	1861	return v if isinstance(v, bool) else default
	1862
	1863
	1864	def strip_or_none(v):
	1865	return None if v is None else v.strip()
	1866
	1867
	1868	def parse_duration(s):
	1869	if not isinstance(s, compat_basestring):
	1870	return None
	1871
	1872	s = s.strip()
	1873
	1874	days, hours, mins, secs, ms = [None] * 5
	1875	m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
	1876	if m:
	1877	days, hours, mins, secs, ms = m.groups()
	1878	else:
	1879	m = re.match(
	1880	r'''(?ix)(?:P?
	1881	(?:
	1882	[0-9]+\sy(?:ears?)?\s
	1883	)?
	1884	(?:
	1885	[0-9]+\sm(?:onths?)?\s
	1886	)?
	1887	(?:
	1888	[0-9]+\sw(?:eeks?)?\s
	1889	)?
	1890	(?:
	1891	(?P<days>[0-9]+)\sd(?:ays?)?\s
	1892	)?
	1893	T)?
	1894	(?:
	1895	(?P<hours>[0-9]+)\sh(?:ours?)?\s
	1896	)?
	1897	(?:
	1898	(?P<mins>[0-9]+)\sm(?:in(?:ute)?s?)?\s
	1899	)?
	1900	(?:
	1901	(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\ss(?:ec(?:ond)?s?)?\s
	1902	)?Z?$''', s)
	1903	if m:
	1904	days, hours, mins, secs, ms = m.groups()
	1905	else:
	1906	m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s(?:hours?)\|(?P<mins>[0-9.]+)\s(?:mins?\.?\|minutes?)\s*)Z?$', s)
	1907	if m:
	1908	hours, mins = m.groups()
	1909	else:
	1910	return None
	1911
	1912	duration = 0
	1913	if secs:
	1914	duration += float(secs)
	1915	if mins:
	1916	duration += float(mins) * 60
	1917	if hours:
	1918	duration += float(hours) * 60 * 60
	1919	if days:
	1920	duration += float(days) * 24 * 60 * 60
	1921	if ms:
	1922	duration += float(ms)
	1923	return duration
	1924
	1925
	1926	def prepend_extension(filename, ext, expected_real_ext=None):
	1927	name, real_ext = os.path.splitext(filename)
	1928	return (
	1929	'{0}.{1}{2}'.format(name, ext, real_ext)
	1930	if not expected_real_ext or real_ext[1:] == expected_real_ext
	1931	else '{0}.{1}'.format(filename, ext))
	1932
	1933
	1934	def replace_extension(filename, ext, expected_real_ext=None):
	1935	name, real_ext = os.path.splitext(filename)
	1936	return '{0}.{1}'.format(
	1937	name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
	1938	ext)
	1939
	1940
	1941	def check_executable(exe, args=[]):
	1942	""" Checks if the given binary is installed somewhere in PATH, and returns its name.
	1943	args can be a list of arguments for a short output (like -version) """
	1944	try:
	1945	subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
	1946	except OSError:
	1947	return False
	1948	return exe
	1949
	1950
	1951	def get_exe_version(exe, args=['--version'],
	1952	version_re=None, unrecognized='present'):
	1953	""" Returns the version of the specified executable,
	1954	or False if the executable is not present """
	1955	try:
	1956	# STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
	1957	# SIGTTOU if youtube-dl is run in the background.
	1958	# See https://github.com/rg3/youtube-dl/issues/955#issuecomment-209789656
	1959	out, _ = subprocess.Popen(
	1960	[encodeArgument(exe)] + args,
	1961	stdin=subprocess.PIPE,
	1962	stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
	1963	except OSError:
	1964	return False
	1965	if isinstance(out, bytes): # Python 2.x
	1966	out = out.decode('ascii', 'ignore')
	1967	return detect_exe_version(out, version_re, unrecognized)
	1968
	1969
	1970	def detect_exe_version(output, version_re=None, unrecognized='present'):
	1971	assert isinstance(output, compat_str)
	1972	if version_re is None:
	1973	version_re = r'version\s+([-0-9._a-zA-Z]+)'
	1974	m = re.search(version_re, output)
	1975	if m:
	1976	return m.group(1)
	1977	else:
	1978	return unrecognized
	1979
	1980
	1981	class PagedList(object):
	1982	def __len__(self):
	1983	# This is only useful for tests
	1984	return len(self.getslice())
	1985
	1986
	1987	class OnDemandPagedList(PagedList):
	1988	def __init__(self, pagefunc, pagesize, use_cache=True):
	1989	self._pagefunc = pagefunc
	1990	self._pagesize = pagesize
	1991	self._use_cache = use_cache
	1992	if use_cache:
	1993	self._cache = {}
	1994
	1995	def getslice(self, start=0, end=None):
	1996	res = []
	1997	for pagenum in itertools.count(start // self._pagesize):
	1998	firstid = pagenum * self._pagesize
	1999	nextfirstid = pagenum * self._pagesize + self._pagesize
	2000	if start >= nextfirstid:
	2001	continue
	2002
	2003	page_results = None
	2004	if self._use_cache:
	2005	page_results = self._cache.get(pagenum)
	2006	if page_results is None:
	2007	page_results = list(self._pagefunc(pagenum))
	2008	if self._use_cache:
	2009	self._cache[pagenum] = page_results
	2010
	2011	startv = (
	2012	start % self._pagesize
	2013	if firstid <= start < nextfirstid
	2014	else 0)
	2015
	2016	endv = (
	2017	((end - 1) % self._pagesize) + 1
	2018	if (end is not None and firstid <= end <= nextfirstid)
	2019	else None)
	2020
	2021	if startv != 0 or endv is not None:
	2022	page_results = page_results[startv:endv]
	2023	res.extend(page_results)
	2024
	2025	# A little optimization - if current page is not "full", ie. does
	2026	# not contain page_size videos then we can assume that this page
	2027	# is the last one - there are no more ids on further pages -
	2028	# i.e. no need to query again.
	2029	if len(page_results) + startv < self._pagesize:
	2030	break
	2031
	2032	# If we got the whole page, but the next page is not interesting,
	2033	# break out early as well
	2034	if end == nextfirstid:
	2035	break
	2036	return res
	2037
	2038
	2039	class InAdvancePagedList(PagedList):
	2040	def __init__(self, pagefunc, pagecount, pagesize):
	2041	self._pagefunc = pagefunc
	2042	self._pagecount = pagecount
	2043	self._pagesize = pagesize
	2044
	2045	def getslice(self, start=0, end=None):
	2046	res = []
	2047	start_page = start // self._pagesize
	2048	end_page = (
	2049	self._pagecount if end is None else (end // self._pagesize + 1))
	2050	skip_elems = start - start_page * self._pagesize
	2051	only_more = None if end is None else end - start
	2052	for pagenum in range(start_page, end_page):
	2053	page = list(self._pagefunc(pagenum))
	2054	if skip_elems:
	2055	page = page[skip_elems:]
	2056	skip_elems = None
	2057	if only_more is not None:
	2058	if len(page) < only_more:
	2059	only_more -= len(page)
	2060	else:
	2061	page = page[:only_more]
	2062	res.extend(page)
	2063	break
	2064	res.extend(page)
	2065	return res
	2066
	2067
	2068	def uppercase_escape(s):
	2069	unicode_escape = codecs.getdecoder('unicode_escape')
	2070	return re.sub(
	2071	r'\\U[0-9a-fA-F]{8}',
	2072	lambda m: unicode_escape(m.group(0))[0],
	2073	s)
	2074
	2075
	2076	def lowercase_escape(s):
	2077	unicode_escape = codecs.getdecoder('unicode_escape')
	2078	return re.sub(
	2079	r'\\u[0-9a-fA-F]{4}',
	2080	lambda m: unicode_escape(m.group(0))[0],
	2081	s)
	2082
	2083
	2084	def escape_rfc3986(s):
	2085	"""Escape non-ASCII characters as suggested by RFC 3986"""
	2086	if sys.version_info < (3, 0) and isinstance(s, compat_str):
	2087	s = s.encode('utf-8')
	2088	return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
	2089
	2090
	2091	def escape_url(url):
	2092	"""Escape URL as suggested by RFC 3986"""
	2093	url_parsed = compat_urllib_parse_urlparse(url)
	2094	return url_parsed._replace(
	2095	netloc=url_parsed.netloc.encode('idna').decode('ascii'),
	2096	path=escape_rfc3986(url_parsed.path),
	2097	params=escape_rfc3986(url_parsed.params),
	2098	query=escape_rfc3986(url_parsed.query),
	2099	fragment=escape_rfc3986(url_parsed.fragment)
	2100	).geturl()
	2101
	2102
	2103	def read_batch_urls(batch_fd):
	2104	def fixup(url):
	2105	if not isinstance(url, compat_str):
	2106	url = url.decode('utf-8', 'replace')
	2107	BOM_UTF8 = '\xef\xbb\xbf'
	2108	if url.startswith(BOM_UTF8):
	2109	url = url[len(BOM_UTF8):]
	2110	url = url.strip()
	2111	if url.startswith(('#', ';', ']')):
	2112	return False
	2113	return url
	2114
	2115	with contextlib.closing(batch_fd) as fd:
	2116	return [url for url in map(fixup, fd) if url]
	2117
	2118
	2119	def urlencode_postdata(args, *kargs):
	2120	return compat_urllib_parse_urlencode(args, *kargs).encode('ascii')
	2121
	2122
	2123	def update_url_query(url, query):
	2124	if not query:
	2125	return url
	2126	parsed_url = compat_urlparse.urlparse(url)
	2127	qs = compat_parse_qs(parsed_url.query)
	2128	qs.update(query)
	2129	return compat_urlparse.urlunparse(parsed_url._replace(
	2130	query=compat_urllib_parse_urlencode(qs, True)))
	2131
	2132
	2133	def update_Request(req, url=None, data=None, headers={}, query={}):
	2134	req_headers = req.headers.copy()
	2135	req_headers.update(headers)
	2136	req_data = data or req.data
	2137	req_url = update_url_query(url or req.get_full_url(), query)
	2138	req_get_method = req.get_method()
	2139	if req_get_method == 'HEAD':
	2140	req_type = HEADRequest
	2141	elif req_get_method == 'PUT':
	2142	req_type = PUTRequest
	2143	else:
	2144	req_type = compat_urllib_request.Request
	2145	new_req = req_type(
	2146	req_url, data=req_data, headers=req_headers,
	2147	origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
	2148	if hasattr(req, 'timeout'):
	2149	new_req.timeout = req.timeout
	2150	return new_req
	2151
	2152
	2153	def _multipart_encode_impl(data, boundary):
	2154	content_type = 'multipart/form-data; boundary=%s' % boundary
	2155
	2156	out = b''
	2157	for k, v in data.items():
	2158	out += b'--' + boundary.encode('ascii') + b'\r\n'
	2159	if isinstance(k, compat_str):
	2160	k = k.encode('utf-8')
	2161	if isinstance(v, compat_str):
	2162	v = v.encode('utf-8')
	2163	# RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
	2164	# suggests sending UTF-8 directly. Firefox sends UTF-8, too
	2165	content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
	2166	if boundary.encode('ascii') in content:
	2167	raise ValueError('Boundary overlaps with data')
	2168	out += content
	2169
	2170	out += b'--' + boundary.encode('ascii') + b'--\r\n'
	2171
	2172	return out, content_type
	2173
	2174
	2175	def multipart_encode(data, boundary=None):
	2176	'''
	2177	Encode a dict to RFC 7578-compliant form-data
	2178
	2179	data:
	2180	A dict where keys and values can be either Unicode or bytes-like
	2181	objects.
	2182	boundary:
	2183	If specified a Unicode object, it's used as the boundary. Otherwise
	2184	a random boundary is generated.
	2185
	2186	Reference: https://tools.ietf.org/html/rfc7578
	2187	'''
	2188	has_specified_boundary = boundary is not None
	2189
	2190	while True:
	2191	if boundary is None:
	2192	boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
	2193
	2194	try:
	2195	out, content_type = _multipart_encode_impl(data, boundary)
	2196	break
	2197	except ValueError:
	2198	if has_specified_boundary:
	2199	raise
	2200	boundary = None
	2201
	2202	return out, content_type
	2203
	2204
	2205	def dict_get(d, key_or_keys, default=None, skip_false_values=True):
	2206	if isinstance(key_or_keys, (list, tuple)):
	2207	for key in key_or_keys:
	2208	if key not in d or d[key] is None or skip_false_values and not d[key]:
	2209	continue
	2210	return d[key]
	2211	return default
	2212	return d.get(key_or_keys, default)
	2213
	2214
	2215	def try_get(src, getter, expected_type=None):
	2216	if not isinstance(getter, (list, tuple)):
	2217	getter = [getter]
	2218	for get in getter:
	2219	try:
	2220	v = get(src)
	2221	except (AttributeError, KeyError, TypeError, IndexError):
	2222	pass
	2223	else:
	2224	if expected_type is None or isinstance(v, expected_type):
	2225	return v
	2226
	2227
	2228	def merge_dicts(*dicts):
	2229	merged = {}
	2230	for a_dict in dicts:
	2231	for k, v in a_dict.items():
	2232	if v is None:
	2233	continue
	2234	if (k not in merged or
	2235	(isinstance(v, compat_str) and v and
	2236	isinstance(merged[k], compat_str) and
	2237	not merged[k])):
	2238	merged[k] = v
	2239	return merged
	2240
	2241
	2242	def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
	2243	return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
	2244
	2245
	2246	US_RATINGS = {
	2247	'G': 0,
	2248	'PG': 10,
	2249	'PG-13': 13,
	2250	'R': 16,
	2251	'NC': 18,
	2252	}
	2253
	2254
	2255	TV_PARENTAL_GUIDELINES = {
	2256	'TV-Y': 0,
	2257	'TV-Y7': 7,
	2258	'TV-G': 0,
	2259	'TV-PG': 0,
	2260	'TV-14': 14,
	2261	'TV-MA': 17,
	2262	}
	2263
	2264
	2265	def parse_age_limit(s):
	2266	if type(s) == int:
	2267	return s if 0 <= s <= 21 else None
	2268	if not isinstance(s, compat_basestring):
	2269	return None
	2270	m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
	2271	if m:
	2272	return int(m.group('age'))
	2273	if s in US_RATINGS:
	2274	return US_RATINGS[s]
	2275	return TV_PARENTAL_GUIDELINES.get(s)
	2276
	2277
	2278	def strip_jsonp(code):
	2279	return re.sub(
	2280	r'''(?sx)^
	2281	(?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]+)
	2282	(?:\s&&\s(?P=func_name))?
	2283	\s$\s(?P<callback_data>.*)$;?
	2284	\s?(?://[^\n])*$''',
	2285	r'\g<callback_data>', code)
	2286
	2287
	2288	def js_to_json(code):
	2289	COMMENT_RE = r'/\(?:(?!\/).)?\/\|//[^\n]*'
	2290	SKIP_RE = r'\s(?:{comment})?\s'.format(comment=COMMENT_RE)
	2291	INTEGER_TABLE = (
	2292	(r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
	2293	(r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
	2294	)
	2295
	2296	def fix_kv(m):
	2297	v = m.group(0)
	2298	if v in ('true', 'false', 'null'):
	2299	return v
	2300	elif v.startswith('/*') or v.startswith('//') or v == ',':
	2301	return ""
	2302
	2303	if v[0] in ("'", '"'):
	2304	v = re.sub(r'(?s)\\.\|"', lambda m: {
	2305	'"': '\\"',
	2306	"\\'": "'",
	2307	'\\\n': '',
	2308	'\\x': '\\u00',
	2309	}.get(m.group(0), m.group(0)), v[1:-1])
	2310
	2311	for regex, base in INTEGER_TABLE:
	2312	im = re.match(regex, v)
	2313	if im:
	2314	i = int(im.group(1), base)
	2315	return '"%d":' % i if v.endswith(':') else '%d' % i
	2316
	2317	return '"%s"' % v
	2318
	2319	return re.sub(r'''(?sx)
	2320	"(?:[^"\\](?:\\\\\|\\['"nurtbfx/\n]))[^"\\]*"\|
	2321	'(?:[^'\\](?:\\\\\|\\['"nurtbfx/\n]))[^'\\]*'\|
	2322	{comment}\|,(?={skip}[\]}}])\|
	2323	(?:(?<![0-9])[eE]\|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*\|
	2324	\b(?:0[xX][0-9a-fA-F]+\|0+[0-7]+)(?:{skip}:)?\|
	2325	[0-9]+(?={skip}:)
	2326	'''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
	2327
	2328
	2329	def qualities(quality_ids):
	2330	""" Get a numeric quality value out of a list of possible values """
	2331	def q(qid):
	2332	try:
	2333	return quality_ids.index(qid)
	2334	except ValueError:
	2335	return -1
	2336	return q
	2337
	2338
	2339	DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
	2340
	2341
	2342	def limit_length(s, length):
	2343	""" Add ellipses to overly long strings """
	2344	if s is None:
	2345	return None
	2346	ELLIPSES = '...'
	2347	if len(s) > length:
	2348	return s[:length - len(ELLIPSES)] + ELLIPSES
	2349	return s
	2350
	2351
	2352	def version_tuple(v):
	2353	return tuple(int(e) for e in re.split(r'[-.]', v))
	2354
	2355
	2356	def is_outdated_version(version, limit, assume_new=True):
	2357	if not version:
	2358	return not assume_new
	2359	try:
	2360	return version_tuple(version) < version_tuple(limit)
	2361	except ValueError:
	2362	return not assume_new
	2363
	2364
	2365	def ytdl_is_updateable():
	2366	""" Returns if youtube-dl can be updated with -U """
	2367	from zipimport import zipimporter
	2368
	2369	return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
	2370
	2371
	2372	def args_to_str(args):
	2373	# Get a short string representation for a subprocess command
	2374	return ' '.join(compat_shlex_quote(a) for a in args)
	2375
	2376
	2377	def error_to_compat_str(err):
	2378	err_str = str(err)
	2379	# On python 2 error byte string must be decoded with proper
	2380	# encoding rather than ascii
	2381	if sys.version_info[0] < 3:
	2382	err_str = err_str.decode(preferredencoding())
	2383	return err_str
	2384
	2385
	2386	def mimetype2ext(mt):
	2387	if mt is None:
	2388	return None
	2389
	2390	ext = {
	2391	'audio/mp4': 'm4a',
	2392	# Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
	2393	# it's the most popular one
	2394	'audio/mpeg': 'mp3',
	2395	}.get(mt)
	2396	if ext is not None:
	2397	return ext
	2398
	2399	_, _, res = mt.rpartition('/')
	2400	res = res.split(';')[0].strip().lower()
	2401
	2402	return {
	2403	'3gpp': '3gp',
	2404	'smptett+xml': 'tt',
	2405	'ttaf+xml': 'dfxp',
	2406	'ttml+xml': 'ttml',
	2407	'x-flv': 'flv',
	2408	'x-mp4-fragmented': 'mp4',
	2409	'x-ms-sami': 'sami',
	2410	'x-ms-wmv': 'wmv',
	2411	'mpegurl': 'm3u8',
	2412	'x-mpegurl': 'm3u8',
	2413	'vnd.apple.mpegurl': 'm3u8',
	2414	'dash+xml': 'mpd',
	2415	'f4m+xml': 'f4m',
	2416	'hds+xml': 'f4m',
	2417	'vnd.ms-sstr+xml': 'ism',
	2418	'quicktime': 'mov',
	2419	'mp2t': 'ts',
	2420	}.get(res, res)
	2421
	2422
	2423	def parse_codecs(codecs_str):
	2424	# http://tools.ietf.org/html/rfc6381
	2425	if not codecs_str:
	2426	return {}
	2427	splited_codecs = list(filter(None, map(
	2428	lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
	2429	vcodec, acodec = None, None
	2430	for full_codec in splited_codecs:
	2431	codec = full_codec.split('.')[0]
	2432	if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1'):
	2433	if not vcodec:
	2434	vcodec = full_codec
	2435	elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
	2436	if not acodec:
	2437	acodec = full_codec
	2438	else:
	2439	write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
	2440	if not vcodec and not acodec:
	2441	if len(splited_codecs) == 2:
	2442	return {
	2443	'vcodec': vcodec,
	2444	'acodec': acodec,
	2445	}
	2446	elif len(splited_codecs) == 1:
	2447	return {
	2448	'vcodec': 'none',
	2449	'acodec': vcodec,
	2450	}
	2451	else:
	2452	return {
	2453	'vcodec': vcodec or 'none',
	2454	'acodec': acodec or 'none',
	2455	}
	2456	return {}
	2457
	2458
	2459	def urlhandle_detect_ext(url_handle):
	2460	getheader = url_handle.headers.get
	2461
	2462	cd = getheader('Content-Disposition')
	2463	if cd:
	2464	m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
	2465	if m:
	2466	e = determine_ext(m.group('filename'), default_ext=None)
	2467	if e:
	2468	return e
	2469
	2470	return mimetype2ext(getheader('Content-Type'))
	2471
	2472
	2473	def encode_data_uri(data, mime_type):
	2474	return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
	2475
	2476
	2477	def age_restricted(content_limit, age_limit):
	2478	""" Returns True iff the content should be blocked """
	2479
	2480	if age_limit is None: # No limit set
	2481	return False
	2482	if content_limit is None:
	2483	return False # Content available for everyone
	2484	return age_limit < content_limit
	2485
	2486
	2487	def is_html(first_bytes):
	2488	""" Detect whether a file contains HTML by examining its first bytes. """
	2489
	2490	BOMS = [
	2491	(b'\xef\xbb\xbf', 'utf-8'),
	2492	(b'\x00\x00\xfe\xff', 'utf-32-be'),
	2493	(b'\xff\xfe\x00\x00', 'utf-32-le'),
	2494	(b'\xff\xfe', 'utf-16-le'),
	2495	(b'\xfe\xff', 'utf-16-be'),
	2496	]
	2497	for bom, enc in BOMS:
	2498	if first_bytes.startswith(bom):
	2499	s = first_bytes[len(bom):].decode(enc, 'replace')
	2500	break
	2501	else:
	2502	s = first_bytes.decode('utf-8', 'replace')
	2503
	2504	return re.match(r'^\s*<', s)
	2505
	2506
	2507	def determine_protocol(info_dict):
	2508	protocol = info_dict.get('protocol')
	2509	if protocol is not None:
	2510	return protocol
	2511
	2512	url = info_dict['url']
	2513	if url.startswith('rtmp'):
	2514	return 'rtmp'
	2515	elif url.startswith('mms'):
	2516	return 'mms'
	2517	elif url.startswith('rtsp'):
	2518	return 'rtsp'
	2519
	2520	ext = determine_ext(url)
	2521	if ext == 'm3u8':
	2522	return 'm3u8'
	2523	elif ext == 'f4m':
	2524	return 'f4m'
	2525
	2526	return compat_urllib_parse_urlparse(url).scheme
	2527
	2528
	2529	def render_table(header_row, data):
	2530	""" Render a list of rows, each as a list of values """
	2531	table = [header_row] + data
	2532	max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
	2533	format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
	2534	return '\n'.join(format_str % tuple(row) for row in table)
	2535
	2536
	2537	def _match_one(filter_part, dct):
	2538	COMPARISON_OPERATORS = {
	2539	'<': operator.lt,
	2540	'<=': operator.le,
	2541	'>': operator.gt,
	2542	'>=': operator.ge,
	2543	'=': operator.eq,
	2544	'!=': operator.ne,
	2545	}
	2546	operator_rex = re.compile(r'''(?x)\s*
	2547	(?P<key>[a-z_]+)
	2548	\s(?P<op>%s)(?P<none_inclusive>\s\?)?\s*
	2549	(?:
	2550	(?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\|
	2551	(?P<quote>["\'])(?P<quotedstrval>(?:\\.\|(?!(?P=quote)\|\\).)+?)(?P=quote)\|
	2552	(?P<strval>(?![0-9.])[a-z0-9A-Z]*)
	2553	)
	2554	\s*$
	2555	''' % '\|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
	2556	m = operator_rex.search(filter_part)
	2557	if m:
	2558	op = COMPARISON_OPERATORS[m.group('op')]
	2559	actual_value = dct.get(m.group('key'))
	2560	if (m.group('quotedstrval') is not None or
	2561	m.group('strval') is not None or
	2562	# If the original field is a string and matching comparisonvalue is
	2563	# a number we should respect the origin of the original field
	2564	# and process comparison value as a string (see
	2565	# https://github.com/rg3/youtube-dl/issues/11082).
	2566	actual_value is not None and m.group('intval') is not None and
	2567	isinstance(actual_value, compat_str)):
	2568	if m.group('op') not in ('=', '!='):
	2569	raise ValueError(
	2570	'Operator %s does not support string values!' % m.group('op'))
	2571	comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
	2572	quote = m.group('quote')
	2573	if quote is not None:
	2574	comparison_value = comparison_value.replace(r'\%s' % quote, quote)
	2575	else:
	2576	try:
	2577	comparison_value = int(m.group('intval'))
	2578	except ValueError:
	2579	comparison_value = parse_filesize(m.group('intval'))
	2580	if comparison_value is None:
	2581	comparison_value = parse_filesize(m.group('intval') + 'B')
	2582	if comparison_value is None:
	2583	raise ValueError(
	2584	'Invalid integer value %r in filter part %r' % (
	2585	m.group('intval'), filter_part))
	2586	if actual_value is None:
	2587	return m.group('none_inclusive')
	2588	return op(actual_value, comparison_value)
	2589
	2590	UNARY_OPERATORS = {
	2591	'': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
	2592	'!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
	2593	}
	2594	operator_rex = re.compile(r'''(?x)\s*
	2595	(?P<op>%s)\s*(?P<key>[a-z_]+)
	2596	\s*$
	2597	''' % '\|'.join(map(re.escape, UNARY_OPERATORS.keys())))
	2598	m = operator_rex.search(filter_part)
	2599	if m:
	2600	op = UNARY_OPERATORS[m.group('op')]
	2601	actual_value = dct.get(m.group('key'))
	2602	return op(actual_value)
	2603
	2604	raise ValueError('Invalid filter part %r' % filter_part)
	2605
	2606
	2607	def match_str(filter_str, dct):
	2608	""" Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
	2609
	2610	return all(
	2611	_match_one(filter_part, dct) for filter_part in filter_str.split('&'))
	2612
	2613
	2614	def match_filter_func(filter_str):
	2615	def _match_func(info_dict):
	2616	if match_str(filter_str, info_dict):
	2617	return None
	2618	else:
	2619	video_title = info_dict.get('title', info_dict.get('id', 'video'))
	2620	return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
	2621	return _match_func
	2622
	2623
	2624	def parse_dfxp_time_expr(time_expr):
	2625	if not time_expr:
	2626	return
	2627
	2628	mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
	2629	if mobj:
	2630	return float(mobj.group('time_offset'))
	2631
	2632	mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.\|:)\d+)?)$', time_expr)
	2633	if mobj:
	2634	return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
	2635
	2636
	2637	def srt_subtitles_timecode(seconds):
	2638	return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
	2639
	2640
	2641	def dfxp2srt(dfxp_data):
	2642	'''
	2643	@param dfxp_data A bytes-like object containing DFXP data
	2644	@returns A unicode object containing converted SRT data
	2645	'''
	2646	LEGACY_NAMESPACES = (
	2647	(b'http://www.w3.org/ns/ttml', [
	2648	b'http://www.w3.org/2004/11/ttaf1',
	2649	b'http://www.w3.org/2006/04/ttaf1',
	2650	b'http://www.w3.org/2006/10/ttaf1',
	2651	]),
	2652	(b'http://www.w3.org/ns/ttml#styling', [
	2653	b'http://www.w3.org/ns/ttml#style',
	2654	]),
	2655	)
	2656
	2657	SUPPORTED_STYLING = [
	2658	'color',
	2659	'fontFamily',
	2660	'fontSize',
	2661	'fontStyle',
	2662	'fontWeight',
	2663	'textDecoration'
	2664	]
	2665
	2666	_x = functools.partial(xpath_with_ns, ns_map={
	2667	'ttml': 'http://www.w3.org/ns/ttml',
	2668	'tts': 'http://www.w3.org/ns/ttml#styling',
	2669	})
	2670
	2671	styles = {}
	2672	default_style = {}
	2673
	2674	class TTMLPElementParser(object):
	2675	_out = ''
	2676	_unclosed_elements = []
	2677	_applied_styles = []
	2678
	2679	def start(self, tag, attrib):
	2680	if tag in (_x('ttml:br'), 'br'):
	2681	self._out += '\n'
	2682	else:
	2683	unclosed_elements = []
	2684	style = {}
	2685	element_style_id = attrib.get('style')
	2686	if default_style:
	2687	style.update(default_style)
	2688	if element_style_id:
	2689	style.update(styles.get(element_style_id, {}))
	2690	for prop in SUPPORTED_STYLING:
	2691	prop_val = attrib.get(_x('tts:' + prop))
	2692	if prop_val:
	2693	style[prop] = prop_val
	2694	if style:
	2695	font = ''
	2696	for k, v in sorted(style.items()):
	2697	if self._applied_styles and self._applied_styles[-1].get(k) == v:
	2698	continue
	2699	if k == 'color':
	2700	font += ' color="%s"' % v
	2701	elif k == 'fontSize':
	2702	font += ' size="%s"' % v
	2703	elif k == 'fontFamily':
	2704	font += ' face="%s"' % v
	2705	elif k == 'fontWeight' and v == 'bold':
	2706	self._out += '<b>'
	2707	unclosed_elements.append('b')
	2708	elif k == 'fontStyle' and v == 'italic':
	2709	self._out += '<i>'
	2710	unclosed_elements.append('i')
	2711	elif k == 'textDecoration' and v == 'underline':
	2712	self._out += '<u>'
	2713	unclosed_elements.append('u')
	2714	if font:
	2715	self._out += '<font' + font + '>'
	2716	unclosed_elements.append('font')
	2717	applied_style = {}
	2718	if self._applied_styles:
	2719	applied_style.update(self._applied_styles[-1])
	2720	applied_style.update(style)
	2721	self._applied_styles.append(applied_style)
	2722	self._unclosed_elements.append(unclosed_elements)
	2723
	2724	def end(self, tag):
	2725	if tag not in (_x('ttml:br'), 'br'):
	2726	unclosed_elements = self._unclosed_elements.pop()
	2727	for element in reversed(unclosed_elements):
	2728	self._out += '</%s>' % element
	2729	if unclosed_elements and self._applied_styles:
	2730	self._applied_styles.pop()
	2731
	2732	def data(self, data):
	2733	self._out += data
	2734
	2735	def close(self):
	2736	return self._out.strip()
	2737
	2738	def parse_node(node):
	2739	target = TTMLPElementParser()
	2740	parser = xml.etree.ElementTree.XMLParser(target=target)
	2741	parser.feed(xml.etree.ElementTree.tostring(node))
	2742	return parser.close()
	2743
	2744	for k, v in LEGACY_NAMESPACES:
	2745	for ns in v:
	2746	dfxp_data = dfxp_data.replace(ns, k)
	2747
	2748	dfxp = compat_etree_fromstring(dfxp_data)
	2749	out = []
	2750	paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
	2751
	2752	if not paras:
	2753	raise ValueError('Invalid dfxp/TTML subtitle')
	2754
	2755	repeat = False
	2756	while True:
	2757	for style in dfxp.findall(_x('.//ttml:style')):
	2758	style_id = style.get('id')
	2759	parent_style_id = style.get('style')
	2760	if parent_style_id:
	2761	if parent_style_id not in styles:
	2762	repeat = True
	2763	continue
	2764	styles[style_id] = styles[parent_style_id].copy()
	2765	for prop in SUPPORTED_STYLING:
	2766	prop_val = style.get(_x('tts:' + prop))
	2767	if prop_val:
	2768	styles.setdefault(style_id, {})[prop] = prop_val
	2769	if repeat:
	2770	repeat = False
	2771	else:
	2772	break
	2773
	2774	for p in ('body', 'div'):
	2775	ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
	2776	if ele is None:
	2777	continue
	2778	style = styles.get(ele.get('style'))
	2779	if not style:
	2780	continue
	2781	default_style.update(style)
	2782
	2783	for para, index in zip(paras, itertools.count(1)):
	2784	begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
	2785	end_time = parse_dfxp_time_expr(para.attrib.get('end'))
	2786	dur = parse_dfxp_time_expr(para.attrib.get('dur'))
	2787	if begin_time is None:
	2788	continue
	2789	if not end_time:
	2790	if not dur:
	2791	continue
	2792	end_time = begin_time + dur
	2793	out.append('%d\n%s --> %s\n%s\n\n' % (
	2794	index,
	2795	srt_subtitles_timecode(begin_time),
	2796	srt_subtitles_timecode(end_time),
	2797	parse_node(para)))
	2798
	2799	return ''.join(out)
	2800
	2801
	2802	def cli_option(params, command_option, param):
	2803	param = params.get(param)
	2804	if param:
	2805	param = compat_str(param)
	2806	return [command_option, param] if param is not None else []
	2807
	2808
	2809	def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
	2810	param = params.get(param)
	2811	if param is None:
	2812	return []
	2813	assert isinstance(param, bool)
	2814	if separator:
	2815	return [command_option + separator + (true_value if param else false_value)]
	2816	return [command_option, true_value if param else false_value]
	2817
	2818
	2819	def cli_valueless_option(params, command_option, param, expected_value=True):
	2820	param = params.get(param)
	2821	return [command_option] if param == expected_value else []
	2822
	2823
	2824	def cli_configuration_args(params, param, default=[]):
	2825	ex_args = params.get(param)
	2826	if ex_args is None:
	2827	return default
	2828	assert isinstance(ex_args, list)
	2829	return ex_args
	2830
	2831
	2832	class ISO639Utils(object):
	2833	# See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
	2834	_lang_map = {
	2835	'aa': 'aar',
	2836	'ab': 'abk',
	2837	'ae': 'ave',
	2838	'af': 'afr',
	2839	'ak': 'aka',
	2840	'am': 'amh',
	2841	'an': 'arg',
	2842	'ar': 'ara',
	2843	'as': 'asm',
	2844	'av': 'ava',
	2845	'ay': 'aym',
	2846	'az': 'aze',
	2847	'ba': 'bak',
	2848	'be': 'bel',
	2849	'bg': 'bul',
	2850	'bh': 'bih',
	2851	'bi': 'bis',
	2852	'bm': 'bam',
	2853	'bn': 'ben',
	2854	'bo': 'bod',
	2855	'br': 'bre',
	2856	'bs': 'bos',
	2857	'ca': 'cat',
	2858	'ce': 'che',
	2859	'ch': 'cha',
	2860	'co': 'cos',
	2861	'cr': 'cre',
	2862	'cs': 'ces',
	2863	'cu': 'chu',
	2864	'cv': 'chv',
	2865	'cy': 'cym',
	2866	'da': 'dan',
	2867	'de': 'deu',
	2868	'dv': 'div',
	2869	'dz': 'dzo',
	2870	'ee': 'ewe',
	2871	'el': 'ell',
	2872	'en': 'eng',
	2873	'eo': 'epo',
	2874	'es': 'spa',
	2875	'et': 'est',
	2876	'eu': 'eus',
	2877	'fa': 'fas',
	2878	'ff': 'ful',
	2879	'fi': 'fin',
	2880	'fj': 'fij',
	2881	'fo': 'fao',
	2882	'fr': 'fra',
	2883	'fy': 'fry',
	2884	'ga': 'gle',
	2885	'gd': 'gla',
	2886	'gl': 'glg',
	2887	'gn': 'grn',
	2888	'gu': 'guj',
	2889	'gv': 'glv',
	2890	'ha': 'hau',
	2891	'he': 'heb',
	2892	'hi': 'hin',
	2893	'ho': 'hmo',
	2894	'hr': 'hrv',
	2895	'ht': 'hat',
	2896	'hu': 'hun',
	2897	'hy': 'hye',
	2898	'hz': 'her',
	2899	'ia': 'ina',
	2900	'id': 'ind',
	2901	'ie': 'ile',
	2902	'ig': 'ibo',
	2903	'ii': 'iii',
	2904	'ik': 'ipk',
	2905	'io': 'ido',
	2906	'is': 'isl',
	2907	'it': 'ita',
	2908	'iu': 'iku',
	2909	'ja': 'jpn',
	2910	'jv': 'jav',
	2911	'ka': 'kat',
	2912	'kg': 'kon',
	2913	'ki': 'kik',
	2914	'kj': 'kua',
	2915	'kk': 'kaz',
	2916	'kl': 'kal',
	2917	'km': 'khm',
	2918	'kn': 'kan',
	2919	'ko': 'kor',
	2920	'kr': 'kau',
	2921	'ks': 'kas',
	2922	'ku': 'kur',
	2923	'kv': 'kom',
	2924	'kw': 'cor',
	2925	'ky': 'kir',
	2926	'la': 'lat',
	2927	'lb': 'ltz',
	2928	'lg': 'lug',
	2929	'li': 'lim',
	2930	'ln': 'lin',
	2931	'lo': 'lao',
	2932	'lt': 'lit',
	2933	'lu': 'lub',
	2934	'lv': 'lav',
	2935	'mg': 'mlg',
	2936	'mh': 'mah',
	2937	'mi': 'mri',
	2938	'mk': 'mkd',
	2939	'ml': 'mal',
	2940	'mn': 'mon',
	2941	'mr': 'mar',
	2942	'ms': 'msa',
	2943	'mt': 'mlt',
	2944	'my': 'mya',
	2945	'na': 'nau',
	2946	'nb': 'nob',
	2947	'nd': 'nde',
	2948	'ne': 'nep',
	2949	'ng': 'ndo',
	2950	'nl': 'nld',
	2951	'nn': 'nno',
	2952	'no': 'nor',
	2953	'nr': 'nbl',
	2954	'nv': 'nav',
	2955	'ny': 'nya',
	2956	'oc': 'oci',
	2957	'oj': 'oji',
	2958	'om': 'orm',
	2959	'or': 'ori',
	2960	'os': 'oss',
	2961	'pa': 'pan',
	2962	'pi': 'pli',
	2963	'pl': 'pol',
	2964	'ps': 'pus',
	2965	'pt': 'por',
	2966	'qu': 'que',
	2967	'rm': 'roh',
	2968	'rn': 'run',
	2969	'ro': 'ron',
	2970	'ru': 'rus',
	2971	'rw': 'kin',
	2972	'sa': 'san',
	2973	'sc': 'srd',
	2974	'sd': 'snd',
	2975	'se': 'sme',
	2976	'sg': 'sag',
	2977	'si': 'sin',
	2978	'sk': 'slk',
	2979	'sl': 'slv',
	2980	'sm': 'smo',
	2981	'sn': 'sna',
	2982	'so': 'som',
	2983	'sq': 'sqi',
	2984	'sr': 'srp',
	2985	'ss': 'ssw',
	2986	'st': 'sot',
	2987	'su': 'sun',
	2988	'sv': 'swe',
	2989	'sw': 'swa',
	2990	'ta': 'tam',
	2991	'te': 'tel',
	2992	'tg': 'tgk',
	2993	'th': 'tha',
	2994	'ti': 'tir',
	2995	'tk': 'tuk',
	2996	'tl': 'tgl',
	2997	'tn': 'tsn',
	2998	'to': 'ton',
	2999	'tr': 'tur',
	3000	'ts': 'tso',
	3001	'tt': 'tat',
	3002	'tw': 'twi',
	3003	'ty': 'tah',
	3004	'ug': 'uig',
	3005	'uk': 'ukr',
	3006	'ur': 'urd',
	3007	'uz': 'uzb',
	3008	've': 'ven',
	3009	'vi': 'vie',
	3010	'vo': 'vol',
	3011	'wa': 'wln',
	3012	'wo': 'wol',
	3013	'xh': 'xho',
	3014	'yi': 'yid',
	3015	'yo': 'yor',
	3016	'za': 'zha',
	3017	'zh': 'zho',
	3018	'zu': 'zul',
	3019	}
	3020
	3021	@classmethod
	3022	def short2long(cls, code):
	3023	"""Convert language code from ISO 639-1 to ISO 639-2/T"""
	3024	return cls._lang_map.get(code[:2])
	3025
	3026	@classmethod
	3027	def long2short(cls, code):
	3028	"""Convert language code from ISO 639-2/T to ISO 639-1"""
	3029	for short_name, long_name in cls._lang_map.items():
	3030	if long_name == code:
	3031	return short_name
	3032
	3033
	3034	class ISO3166Utils(object):
	3035	# From http://data.okfn.org/data/core/country-list
	3036	_country_map = {
	3037	'AF': 'Afghanistan',
	3038	'AX': 'Åland Islands',
	3039	'AL': 'Albania',
	3040	'DZ': 'Algeria',
	3041	'AS': 'American Samoa',
	3042	'AD': 'Andorra',
	3043	'AO': 'Angola',
	3044	'AI': 'Anguilla',
	3045	'AQ': 'Antarctica',
	3046	'AG': 'Antigua and Barbuda',
	3047	'AR': 'Argentina',
	3048	'AM': 'Armenia',
	3049	'AW': 'Aruba',
	3050	'AU': 'Australia',
	3051	'AT': 'Austria',
	3052	'AZ': 'Azerbaijan',
	3053	'BS': 'Bahamas',
	3054	'BH': 'Bahrain',
	3055	'BD': 'Bangladesh',
	3056	'BB': 'Barbados',
	3057	'BY': 'Belarus',
	3058	'BE': 'Belgium',
	3059	'BZ': 'Belize',
	3060	'BJ': 'Benin',
	3061	'BM': 'Bermuda',
	3062	'BT': 'Bhutan',
	3063	'BO': 'Bolivia, Plurinational State of',
	3064	'BQ': 'Bonaire, Sint Eustatius and Saba',
	3065	'BA': 'Bosnia and Herzegovina',
	3066	'BW': 'Botswana',
	3067	'BV': 'Bouvet Island',
	3068	'BR': 'Brazil',
	3069	'IO': 'British Indian Ocean Territory',
	3070	'BN': 'Brunei Darussalam',
	3071	'BG': 'Bulgaria',
	3072	'BF': 'Burkina Faso',
	3073	'BI': 'Burundi',
	3074	'KH': 'Cambodia',
	3075	'CM': 'Cameroon',
	3076	'CA': 'Canada',
	3077	'CV': 'Cape Verde',
	3078	'KY': 'Cayman Islands',
	3079	'CF': 'Central African Republic',
	3080	'TD': 'Chad',
	3081	'CL': 'Chile',
	3082	'CN': 'China',
	3083	'CX': 'Christmas Island',
	3084	'CC': 'Cocos (Keeling) Islands',
	3085	'CO': 'Colombia',
	3086	'KM': 'Comoros',
	3087	'CG': 'Congo',
	3088	'CD': 'Congo, the Democratic Republic of the',
	3089	'CK': 'Cook Islands',
	3090	'CR': 'Costa Rica',
	3091	'CI': 'Côte d\'Ivoire',
	3092	'HR': 'Croatia',
	3093	'CU': 'Cuba',
	3094	'CW': 'Curaçao',
	3095	'CY': 'Cyprus',
	3096	'CZ': 'Czech Republic',
	3097	'DK': 'Denmark',
	3098	'DJ': 'Djibouti',
	3099	'DM': 'Dominica',
	3100	'DO': 'Dominican Republic',
	3101	'EC': 'Ecuador',
	3102	'EG': 'Egypt',
	3103	'SV': 'El Salvador',
	3104	'GQ': 'Equatorial Guinea',
	3105	'ER': 'Eritrea',
	3106	'EE': 'Estonia',
	3107	'ET': 'Ethiopia',
	3108	'FK': 'Falkland Islands (Malvinas)',
	3109	'FO': 'Faroe Islands',
	3110	'FJ': 'Fiji',
	3111	'FI': 'Finland',
	3112	'FR': 'France',
	3113	'GF': 'French Guiana',
	3114	'PF': 'French Polynesia',
	3115	'TF': 'French Southern Territories',
	3116	'GA': 'Gabon',
	3117	'GM': 'Gambia',
	3118	'GE': 'Georgia',
	3119	'DE': 'Germany',
	3120	'GH': 'Ghana',
	3121	'GI': 'Gibraltar',
	3122	'GR': 'Greece',
	3123	'GL': 'Greenland',
	3124	'GD': 'Grenada',
	3125	'GP': 'Guadeloupe',
	3126	'GU': 'Guam',
	3127	'GT': 'Guatemala',
	3128	'GG': 'Guernsey',
	3129	'GN': 'Guinea',
	3130	'GW': 'Guinea-Bissau',
	3131	'GY': 'Guyana',
	3132	'HT': 'Haiti',
	3133	'HM': 'Heard Island and McDonald Islands',
	3134	'VA': 'Holy See (Vatican City State)',
	3135	'HN': 'Honduras',
	3136	'HK': 'Hong Kong',
	3137	'HU': 'Hungary',
	3138	'IS': 'Iceland',
	3139	'IN': 'India',
	3140	'ID': 'Indonesia',
	3141	'IR': 'Iran, Islamic Republic of',
	3142	'IQ': 'Iraq',
	3143	'IE': 'Ireland',
	3144	'IM': 'Isle of Man',
	3145	'IL': 'Israel',
	3146	'IT': 'Italy',
	3147	'JM': 'Jamaica',
	3148	'JP': 'Japan',
	3149	'JE': 'Jersey',
	3150	'JO': 'Jordan',
	3151	'KZ': 'Kazakhstan',
	3152	'KE': 'Kenya',
	3153	'KI': 'Kiribati',
	3154	'KP': 'Korea, Democratic People\'s Republic of',
	3155	'KR': 'Korea, Republic of',
	3156	'KW': 'Kuwait',
	3157	'KG': 'Kyrgyzstan',
	3158	'LA': 'Lao People\'s Democratic Republic',
	3159	'LV': 'Latvia',
	3160	'LB': 'Lebanon',
	3161	'LS': 'Lesotho',
	3162	'LR': 'Liberia',
	3163	'LY': 'Libya',
	3164	'LI': 'Liechtenstein',
	3165	'LT': 'Lithuania',
	3166	'LU': 'Luxembourg',
	3167	'MO': 'Macao',
	3168	'MK': 'Macedonia, the Former Yugoslav Republic of',
	3169	'MG': 'Madagascar',
	3170	'MW': 'Malawi',
	3171	'MY': 'Malaysia',
	3172	'MV': 'Maldives',
	3173	'ML': 'Mali',
	3174	'MT': 'Malta',
	3175	'MH': 'Marshall Islands',
	3176	'MQ': 'Martinique',
	3177	'MR': 'Mauritania',
	3178	'MU': 'Mauritius',
	3179	'YT': 'Mayotte',
	3180	'MX': 'Mexico',
	3181	'FM': 'Micronesia, Federated States of',
	3182	'MD': 'Moldova, Republic of',
	3183	'MC': 'Monaco',
	3184	'MN': 'Mongolia',
	3185	'ME': 'Montenegro',
	3186	'MS': 'Montserrat',
	3187	'MA': 'Morocco',
	3188	'MZ': 'Mozambique',
	3189	'MM': 'Myanmar',
	3190	'NA': 'Namibia',
	3191	'NR': 'Nauru',
	3192	'NP': 'Nepal',
	3193	'NL': 'Netherlands',
	3194	'NC': 'New Caledonia',
	3195	'NZ': 'New Zealand',
	3196	'NI': 'Nicaragua',
	3197	'NE': 'Niger',
	3198	'NG': 'Nigeria',
	3199	'NU': 'Niue',
	3200	'NF': 'Norfolk Island',
	3201	'MP': 'Northern Mariana Islands',
	3202	'NO': 'Norway',
	3203	'OM': 'Oman',
	3204	'PK': 'Pakistan',
	3205	'PW': 'Palau',
	3206	'PS': 'Palestine, State of',
	3207	'PA': 'Panama',
	3208	'PG': 'Papua New Guinea',
	3209	'PY': 'Paraguay',
	3210	'PE': 'Peru',
	3211	'PH': 'Philippines',
	3212	'PN': 'Pitcairn',
	3213	'PL': 'Poland',
	3214	'PT': 'Portugal',
	3215	'PR': 'Puerto Rico',
	3216	'QA': 'Qatar',
	3217	'RE': 'Réunion',
	3218	'RO': 'Romania',
	3219	'RU': 'Russian Federation',
	3220	'RW': 'Rwanda',
	3221	'BL': 'Saint Barthélemy',
	3222	'SH': 'Saint Helena, Ascension and Tristan da Cunha',
	3223	'KN': 'Saint Kitts and Nevis',
	3224	'LC': 'Saint Lucia',
	3225	'MF': 'Saint Martin (French part)',
	3226	'PM': 'Saint Pierre and Miquelon',
	3227	'VC': 'Saint Vincent and the Grenadines',
	3228	'WS': 'Samoa',
	3229	'SM': 'San Marino',
	3230	'ST': 'Sao Tome and Principe',
	3231	'SA': 'Saudi Arabia',
	3232	'SN': 'Senegal',
	3233	'RS': 'Serbia',
	3234	'SC': 'Seychelles',
	3235	'SL': 'Sierra Leone',
	3236	'SG': 'Singapore',
	3237	'SX': 'Sint Maarten (Dutch part)',
	3238	'SK': 'Slovakia',
	3239	'SI': 'Slovenia',
	3240	'SB': 'Solomon Islands',
	3241	'SO': 'Somalia',
	3242	'ZA': 'South Africa',
	3243	'GS': 'South Georgia and the South Sandwich Islands',
	3244	'SS': 'South Sudan',
	3245	'ES': 'Spain',
	3246	'LK': 'Sri Lanka',
	3247	'SD': 'Sudan',
	3248	'SR': 'Suriname',
	3249	'SJ': 'Svalbard and Jan Mayen',
	3250	'SZ': 'Swaziland',
	3251	'SE': 'Sweden',
	3252	'CH': 'Switzerland',
	3253	'SY': 'Syrian Arab Republic',
	3254	'TW': 'Taiwan, Province of China',
	3255	'TJ': 'Tajikistan',
	3256	'TZ': 'Tanzania, United Republic of',
	3257	'TH': 'Thailand',
	3258	'TL': 'Timor-Leste',
	3259	'TG': 'Togo',
	3260	'TK': 'Tokelau',
	3261	'TO': 'Tonga',
	3262	'TT': 'Trinidad and Tobago',
	3263	'TN': 'Tunisia',
	3264	'TR': 'Turkey',
	3265	'TM': 'Turkmenistan',
	3266	'TC': 'Turks and Caicos Islands',
	3267	'TV': 'Tuvalu',
	3268	'UG': 'Uganda',
	3269	'UA': 'Ukraine',
	3270	'AE': 'United Arab Emirates',
	3271	'GB': 'United Kingdom',
	3272	'US': 'United States',
	3273	'UM': 'United States Minor Outlying Islands',
	3274	'UY': 'Uruguay',
	3275	'UZ': 'Uzbekistan',
	3276	'VU': 'Vanuatu',
	3277	'VE': 'Venezuela, Bolivarian Republic of',
	3278	'VN': 'Viet Nam',
	3279	'VG': 'Virgin Islands, British',
	3280	'VI': 'Virgin Islands, U.S.',
	3281	'WF': 'Wallis and Futuna',
	3282	'EH': 'Western Sahara',
	3283	'YE': 'Yemen',
	3284	'ZM': 'Zambia',
	3285	'ZW': 'Zimbabwe',
	3286	}
	3287
	3288	@classmethod
	3289	def short2full(cls, code):
	3290	"""Convert an ISO 3166-2 country code to the corresponding full name"""
	3291	return cls._country_map.get(code.upper())
	3292
	3293
	3294	class GeoUtils(object):
	3295	# Major IPv4 address blocks per country
	3296	_country_ip_map = {
	3297	'AD': '85.94.160.0/19',
	3298	'AE': '94.200.0.0/13',
	3299	'AF': '149.54.0.0/17',
	3300	'AG': '209.59.64.0/18',
	3301	'AI': '204.14.248.0/21',
	3302	'AL': '46.99.0.0/16',
	3303	'AM': '46.70.0.0/15',
	3304	'AO': '105.168.0.0/13',
	3305	'AP': '159.117.192.0/21',
	3306	'AR': '181.0.0.0/12',
	3307	'AS': '202.70.112.0/20',
	3308	'AT': '84.112.0.0/13',
	3309	'AU': '1.128.0.0/11',
	3310	'AW': '181.41.0.0/18',
	3311	'AZ': '5.191.0.0/16',
	3312	'BA': '31.176.128.0/17',
	3313	'BB': '65.48.128.0/17',
	3314	'BD': '114.130.0.0/16',
	3315	'BE': '57.0.0.0/8',
	3316	'BF': '129.45.128.0/17',
	3317	'BG': '95.42.0.0/15',
	3318	'BH': '37.131.0.0/17',
	3319	'BI': '154.117.192.0/18',
	3320	'BJ': '137.255.0.0/16',
	3321	'BL': '192.131.134.0/24',
	3322	'BM': '196.12.64.0/18',
	3323	'BN': '156.31.0.0/16',
	3324	'BO': '161.56.0.0/16',
	3325	'BQ': '161.0.80.0/20',
	3326	'BR': '152.240.0.0/12',
	3327	'BS': '24.51.64.0/18',
	3328	'BT': '119.2.96.0/19',
	3329	'BW': '168.167.0.0/16',
	3330	'BY': '178.120.0.0/13',
	3331	'BZ': '179.42.192.0/18',
	3332	'CA': '99.224.0.0/11',
	3333	'CD': '41.243.0.0/16',
	3334	'CF': '196.32.200.0/21',
	3335	'CG': '197.214.128.0/17',
	3336	'CH': '85.0.0.0/13',
	3337	'CI': '154.232.0.0/14',
	3338	'CK': '202.65.32.0/19',
	3339	'CL': '152.172.0.0/14',
	3340	'CM': '165.210.0.0/15',
	3341	'CN': '36.128.0.0/10',
	3342	'CO': '181.240.0.0/12',
	3343	'CR': '201.192.0.0/12',
	3344	'CU': '152.206.0.0/15',
	3345	'CV': '165.90.96.0/19',
	3346	'CW': '190.88.128.0/17',
	3347	'CY': '46.198.0.0/15',
	3348	'CZ': '88.100.0.0/14',
	3349	'DE': '53.0.0.0/8',
	3350	'DJ': '197.241.0.0/17',
	3351	'DK': '87.48.0.0/12',
	3352	'DM': '192.243.48.0/20',
	3353	'DO': '152.166.0.0/15',
	3354	'DZ': '41.96.0.0/12',
	3355	'EC': '186.68.0.0/15',
	3356	'EE': '90.190.0.0/15',
	3357	'EG': '156.160.0.0/11',
	3358	'ER': '196.200.96.0/20',
	3359	'ES': '88.0.0.0/11',
	3360	'ET': '196.188.0.0/14',
	3361	'EU': '2.16.0.0/13',
	3362	'FI': '91.152.0.0/13',
	3363	'FJ': '144.120.0.0/16',
	3364	'FM': '119.252.112.0/20',
	3365	'FO': '88.85.32.0/19',
	3366	'FR': '90.0.0.0/9',
	3367	'GA': '41.158.0.0/15',
	3368	'GB': '25.0.0.0/8',
	3369	'GD': '74.122.88.0/21',
	3370	'GE': '31.146.0.0/16',
	3371	'GF': '161.22.64.0/18',
	3372	'GG': '62.68.160.0/19',
	3373	'GH': '45.208.0.0/14',
	3374	'GI': '85.115.128.0/19',
	3375	'GL': '88.83.0.0/19',
	3376	'GM': '160.182.0.0/15',
	3377	'GN': '197.149.192.0/18',
	3378	'GP': '104.250.0.0/19',
	3379	'GQ': '105.235.224.0/20',
	3380	'GR': '94.64.0.0/13',
	3381	'GT': '168.234.0.0/16',
	3382	'GU': '168.123.0.0/16',
	3383	'GW': '197.214.80.0/20',
	3384	'GY': '181.41.64.0/18',
	3385	'HK': '113.252.0.0/14',
	3386	'HN': '181.210.0.0/16',
	3387	'HR': '93.136.0.0/13',
	3388	'HT': '148.102.128.0/17',
	3389	'HU': '84.0.0.0/14',
	3390	'ID': '39.192.0.0/10',
	3391	'IE': '87.32.0.0/12',
	3392	'IL': '79.176.0.0/13',
	3393	'IM': '5.62.80.0/20',
	3394	'IN': '117.192.0.0/10',
	3395	'IO': '203.83.48.0/21',
	3396	'IQ': '37.236.0.0/14',
	3397	'IR': '2.176.0.0/12',
	3398	'IS': '82.221.0.0/16',
	3399	'IT': '79.0.0.0/10',
	3400	'JE': '87.244.64.0/18',
	3401	'JM': '72.27.0.0/17',
	3402	'JO': '176.29.0.0/16',
	3403	'JP': '126.0.0.0/8',
	3404	'KE': '105.48.0.0/12',
	3405	'KG': '158.181.128.0/17',
	3406	'KH': '36.37.128.0/17',
	3407	'KI': '103.25.140.0/22',
	3408	'KM': '197.255.224.0/20',
	3409	'KN': '198.32.32.0/19',
	3410	'KP': '175.45.176.0/22',
	3411	'KR': '175.192.0.0/10',
	3412	'KW': '37.36.0.0/14',
	3413	'KY': '64.96.0.0/15',
	3414	'KZ': '2.72.0.0/13',
	3415	'LA': '115.84.64.0/18',
	3416	'LB': '178.135.0.0/16',
	3417	'LC': '192.147.231.0/24',
	3418	'LI': '82.117.0.0/19',
	3419	'LK': '112.134.0.0/15',
	3420	'LR': '41.86.0.0/19',
	3421	'LS': '129.232.0.0/17',
	3422	'LT': '78.56.0.0/13',
	3423	'LU': '188.42.0.0/16',
	3424	'LV': '46.109.0.0/16',
	3425	'LY': '41.252.0.0/14',
	3426	'MA': '105.128.0.0/11',
	3427	'MC': '88.209.64.0/18',
	3428	'MD': '37.246.0.0/16',
	3429	'ME': '178.175.0.0/17',
	3430	'MF': '74.112.232.0/21',
	3431	'MG': '154.126.0.0/17',
	3432	'MH': '117.103.88.0/21',
	3433	'MK': '77.28.0.0/15',
	3434	'ML': '154.118.128.0/18',
	3435	'MM': '37.111.0.0/17',
	3436	'MN': '49.0.128.0/17',
	3437	'MO': '60.246.0.0/16',
	3438	'MP': '202.88.64.0/20',
	3439	'MQ': '109.203.224.0/19',
	3440	'MR': '41.188.64.0/18',
	3441	'MS': '208.90.112.0/22',
	3442	'MT': '46.11.0.0/16',
	3443	'MU': '105.16.0.0/12',
	3444	'MV': '27.114.128.0/18',
	3445	'MW': '105.234.0.0/16',
	3446	'MX': '187.192.0.0/11',
	3447	'MY': '175.136.0.0/13',
	3448	'MZ': '197.218.0.0/15',
	3449	'NA': '41.182.0.0/16',
	3450	'NC': '101.101.0.0/18',
	3451	'NE': '197.214.0.0/18',
	3452	'NF': '203.17.240.0/22',
	3453	'NG': '105.112.0.0/12',
	3454	'NI': '186.76.0.0/15',
	3455	'NL': '145.96.0.0/11',
	3456	'NO': '84.208.0.0/13',
	3457	'NP': '36.252.0.0/15',
	3458	'NR': '203.98.224.0/19',
	3459	'NU': '49.156.48.0/22',
	3460	'NZ': '49.224.0.0/14',
	3461	'OM': '5.36.0.0/15',
	3462	'PA': '186.72.0.0/15',
	3463	'PE': '186.160.0.0/14',
	3464	'PF': '123.50.64.0/18',
	3465	'PG': '124.240.192.0/19',
	3466	'PH': '49.144.0.0/13',
	3467	'PK': '39.32.0.0/11',
	3468	'PL': '83.0.0.0/11',
	3469	'PM': '70.36.0.0/20',
	3470	'PR': '66.50.0.0/16',
	3471	'PS': '188.161.0.0/16',
	3472	'PT': '85.240.0.0/13',
	3473	'PW': '202.124.224.0/20',
	3474	'PY': '181.120.0.0/14',
	3475	'QA': '37.210.0.0/15',
	3476	'RE': '139.26.0.0/16',
	3477	'RO': '79.112.0.0/13',
	3478	'RS': '178.220.0.0/14',
	3479	'RU': '5.136.0.0/13',
	3480	'RW': '105.178.0.0/15',
	3481	'SA': '188.48.0.0/13',
	3482	'SB': '202.1.160.0/19',
	3483	'SC': '154.192.0.0/11',
	3484	'SD': '154.96.0.0/13',
	3485	'SE': '78.64.0.0/12',
	3486	'SG': '152.56.0.0/14',
	3487	'SI': '188.196.0.0/14',
	3488	'SK': '78.98.0.0/15',
	3489	'SL': '197.215.0.0/17',
	3490	'SM': '89.186.32.0/19',
	3491	'SN': '41.82.0.0/15',
	3492	'SO': '197.220.64.0/19',
	3493	'SR': '186.179.128.0/17',
	3494	'SS': '105.235.208.0/21',
	3495	'ST': '197.159.160.0/19',
	3496	'SV': '168.243.0.0/16',
	3497	'SX': '190.102.0.0/20',
	3498	'SY': '5.0.0.0/16',
	3499	'SZ': '41.84.224.0/19',
	3500	'TC': '65.255.48.0/20',
	3501	'TD': '154.68.128.0/19',
	3502	'TG': '196.168.0.0/14',
	3503	'TH': '171.96.0.0/13',
	3504	'TJ': '85.9.128.0/18',
	3505	'TK': '27.96.24.0/21',
	3506	'TL': '180.189.160.0/20',
	3507	'TM': '95.85.96.0/19',
	3508	'TN': '197.0.0.0/11',
	3509	'TO': '175.176.144.0/21',
	3510	'TR': '78.160.0.0/11',
	3511	'TT': '186.44.0.0/15',
	3512	'TV': '202.2.96.0/19',
	3513	'TW': '120.96.0.0/11',
	3514	'TZ': '156.156.0.0/14',
	3515	'UA': '93.72.0.0/13',
	3516	'UG': '154.224.0.0/13',
	3517	'US': '3.0.0.0/8',
	3518	'UY': '167.56.0.0/13',
	3519	'UZ': '82.215.64.0/18',
	3520	'VA': '212.77.0.0/19',
	3521	'VC': '24.92.144.0/20',
	3522	'VE': '186.88.0.0/13',
	3523	'VG': '172.103.64.0/18',
	3524	'VI': '146.226.0.0/16',
	3525	'VN': '14.160.0.0/11',
	3526	'VU': '202.80.32.0/20',
	3527	'WF': '117.20.32.0/21',
	3528	'WS': '202.4.32.0/19',
	3529	'YE': '134.35.0.0/16',
	3530	'YT': '41.242.116.0/22',
	3531	'ZA': '41.0.0.0/11',
	3532	'ZM': '165.56.0.0/13',
	3533	'ZW': '41.85.192.0/19',
	3534	}
	3535
	3536	@classmethod
	3537	def random_ipv4(cls, code):
	3538	block = cls._country_ip_map.get(code.upper())
	3539	if not block:
	3540	return None
	3541	addr, preflen = block.split('/')
	3542	addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
	3543	addr_max = addr_min \| (0xffffffff >> int(preflen))
	3544	return compat_str(socket.inet_ntoa(
	3545	compat_struct_pack('!L', random.randint(addr_min, addr_max))))
	3546
	3547
	3548	class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
	3549	def __init__(self, proxies=None):
	3550	# Set default handlers
	3551	for type in ('http', 'https'):
	3552	setattr(self, '%s_open' % type,
	3553	lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
	3554	meth(r, proxy, type))
	3555	return compat_urllib_request.ProxyHandler.__init__(self, proxies)
	3556
	3557	def proxy_open(self, req, proxy, type):
	3558	req_proxy = req.headers.get('Ytdl-request-proxy')
	3559	if req_proxy is not None:
	3560	proxy = req_proxy
	3561	del req.headers['Ytdl-request-proxy']
	3562
	3563	if proxy == '__noproxy__':
	3564	return None # No Proxy
	3565	if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
	3566	req.add_header('Ytdl-socks-proxy', proxy)
	3567	# youtube-dl's http/https handlers do wrapping the socket with socks
	3568	return None
	3569	return compat_urllib_request.ProxyHandler.proxy_open(
	3570	self, req, proxy, type)
	3571
	3572
	3573	# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
	3574	# released into Public Domain
	3575	# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
	3576
	3577	def long_to_bytes(n, blocksize=0):
	3578	"""long_to_bytes(n:long, blocksize:int) : string
	3579	Convert a long integer to a byte string.
	3580
	3581	If optional blocksize is given and greater than zero, pad the front of the
	3582	byte string with binary zeros so that the length is a multiple of
	3583	blocksize.
	3584	"""
	3585	# after much testing, this algorithm was deemed to be the fastest
	3586	s = b''
	3587	n = int(n)
	3588	while n > 0:
	3589	s = compat_struct_pack('>I', n & 0xffffffff) + s
	3590	n = n >> 32
	3591	# strip off leading zeros
	3592	for i in range(len(s)):
	3593	if s[i] != b'\000'[0]:
	3594	break
	3595	else:
	3596	# only happens when n == 0
	3597	s = b'\000'
	3598	i = 0
	3599	s = s[i:]
	3600	# add back some pad bytes. this could be done more efficiently w.r.t. the
	3601	# de-padding being done above, but sigh...
	3602	if blocksize > 0 and len(s) % blocksize:
	3603	s = (blocksize - len(s) % blocksize) * b'\000' + s
	3604	return s
	3605
	3606
	3607	def bytes_to_long(s):
	3608	"""bytes_to_long(string) : long
	3609	Convert a byte string to a long integer.
	3610
	3611	This is (essentially) the inverse of long_to_bytes().
	3612	"""
	3613	acc = 0
	3614	length = len(s)
	3615	if length % 4:
	3616	extra = (4 - length % 4)
	3617	s = b'\000' * extra + s
	3618	length = length + extra
	3619	for i in range(0, length, 4):
	3620	acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
	3621	return acc
	3622
	3623
	3624	def ohdave_rsa_encrypt(data, exponent, modulus):
	3625	'''
	3626	Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
	3627
	3628	Input:
	3629	data: data to encrypt, bytes-like object
	3630	exponent, modulus: parameter e and N of RSA algorithm, both integer
	3631	Output: hex string of encrypted data
	3632
	3633	Limitation: supports one block encryption only
	3634	'''
	3635
	3636	payload = int(binascii.hexlify(data[::-1]), 16)
	3637	encrypted = pow(payload, exponent, modulus)
	3638	return '%x' % encrypted
	3639
	3640
	3641	def pkcs1pad(data, length):
	3642	"""
	3643	Padding input data with PKCS#1 scheme
	3644
	3645	@param {int[]} data input data
	3646	@param {int} length target length
	3647	@returns {int[]} padded data
	3648	"""
	3649	if len(data) > length - 11:
	3650	raise ValueError('Input data too long for PKCS#1 padding')
	3651
	3652	pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
	3653	return [0, 2] + pseudo_random + [0] + data
	3654
	3655
	3656	def encode_base_n(num, n, table=None):
	3657	FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
	3658	if not table:
	3659	table = FULL_TABLE[:n]
	3660
	3661	if n > len(table):
	3662	raise ValueError('base %d exceeds table length %d' % (n, len(table)))
	3663
	3664	if num == 0:
	3665	return table[0]
	3666
	3667	ret = ''
	3668	while num:
	3669	ret = table[num % n] + ret
	3670	num = num // n
	3671	return ret
	3672
	3673
	3674	def decode_packed_codes(code):
	3675	mobj = re.search(PACKED_CODES_RE, code)
	3676	obfucasted_code, base, count, symbols = mobj.groups()
	3677	base = int(base)
	3678	count = int(count)
	3679	symbols = symbols.split('\|')
	3680	symbol_table = {}
	3681
	3682	while count:
	3683	count -= 1
	3684	base_n_count = encode_base_n(count, base)
	3685	symbol_table[base_n_count] = symbols[count] or base_n_count
	3686
	3687	return re.sub(
	3688	r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
	3689	obfucasted_code)
	3690
	3691
	3692	def parse_m3u8_attributes(attrib):
	3693	info = {}
	3694	for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"\|[^",]+)(?:,\|$)', attrib):
	3695	if val.startswith('"'):
	3696	val = val[1:-1]
	3697	info[key] = val
	3698	return info
	3699
	3700
	3701	def urshift(val, n):
	3702	return val >> n if val >= 0 else (val + 0x100000000) >> n
	3703
	3704
	3705	# Based on png2str() written by @gdkchan and improved by @yokrysty
	3706	# Originally posted at https://github.com/rg3/youtube-dl/issues/9706
	3707	def decode_png(png_data):
	3708	# Reference: https://www.w3.org/TR/PNG/
	3709	header = png_data[8:]
	3710
	3711	if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
	3712	raise IOError('Not a valid PNG file.')
	3713
	3714	int_map = {1: '>B', 2: '>H', 4: '>I'}
	3715	unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
	3716
	3717	chunks = []
	3718
	3719	while header:
	3720	length = unpack_integer(header[:4])
	3721	header = header[4:]
	3722
	3723	chunk_type = header[:4]
	3724	header = header[4:]
	3725
	3726	chunk_data = header[:length]
	3727	header = header[length:]
	3728
	3729	header = header[4:] # Skip CRC
	3730
	3731	chunks.append({
	3732	'type': chunk_type,
	3733	'length': length,
	3734	'data': chunk_data
	3735	})
	3736
	3737	ihdr = chunks[0]['data']
	3738
	3739	width = unpack_integer(ihdr[:4])
	3740	height = unpack_integer(ihdr[4:8])
	3741
	3742	idat = b''
	3743
	3744	for chunk in chunks:
	3745	if chunk['type'] == b'IDAT':
	3746	idat += chunk['data']
	3747
	3748	if not idat:
	3749	raise IOError('Unable to read PNG data.')
	3750
	3751	decompressed_data = bytearray(zlib.decompress(idat))
	3752
	3753	stride = width * 3
	3754	pixels = []
	3755
	3756	def _get_pixel(idx):
	3757	x = idx % stride
	3758	y = idx // stride
	3759	return pixels[y][x]
	3760
	3761	for y in range(height):
	3762	basePos = y * (1 + stride)
	3763	filter_type = decompressed_data[basePos]
	3764
	3765	current_row = []
	3766
	3767	pixels.append(current_row)
	3768
	3769	for x in range(stride):
	3770	color = decompressed_data[1 + basePos + x]
	3771	basex = y * stride + x
	3772	left = 0
	3773	up = 0
	3774
	3775	if x > 2:
	3776	left = _get_pixel(basex - 3)
	3777	if y > 0:
	3778	up = _get_pixel(basex - stride)
	3779
	3780	if filter_type == 1: # Sub
	3781	color = (color + left) & 0xff
	3782	elif filter_type == 2: # Up
	3783	color = (color + up) & 0xff
	3784	elif filter_type == 3: # Average
	3785	color = (color + ((left + up) >> 1)) & 0xff
	3786	elif filter_type == 4: # Paeth
	3787	a = left
	3788	b = up
	3789	c = 0
	3790
	3791	if x > 2 and y > 0:
	3792	c = _get_pixel(basex - stride - 3)
	3793
	3794	p = a + b - c
	3795
	3796	pa = abs(p - a)
	3797	pb = abs(p - b)
	3798	pc = abs(p - c)
	3799
	3800	if pa <= pb and pa <= pc:
	3801	color = (color + a) & 0xff
	3802	elif pb <= pc:
	3803	color = (color + b) & 0xff
	3804	else:
	3805	color = (color + c) & 0xff
	3806
	3807	current_row.append(color)
	3808
	3809	return width, height, pixels
	3810
	3811
	3812	def write_xattr(path, key, value):
	3813	# This mess below finds the best xattr tool for the job
	3814	try:
	3815	# try the pyxattr module...
	3816	import xattr
	3817
	3818	if hasattr(xattr, 'set'): # pyxattr
	3819	# Unicode arguments are not supported in python-pyxattr until
	3820	# version 0.5.0
	3821	# See https://github.com/rg3/youtube-dl/issues/5498
	3822	pyxattr_required_version = '0.5.0'
	3823	if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
	3824	# TODO: fallback to CLI tools
	3825	raise XAttrUnavailableError(
	3826	'python-pyxattr is detected but is too old. '
	3827	'youtube-dl requires %s or above while your version is %s. '
	3828	'Falling back to other xattr implementations' % (
	3829	pyxattr_required_version, xattr.__version__))
	3830
	3831	setxattr = xattr.set
	3832	else: # xattr
	3833	setxattr = xattr.setxattr
	3834
	3835	try:
	3836	setxattr(path, key, value)
	3837	except EnvironmentError as e:
	3838	raise XAttrMetadataError(e.errno, e.strerror)
	3839
	3840	except ImportError:
	3841	if compat_os_name == 'nt':
	3842	# Write xattrs to NTFS Alternate Data Streams:
	3843	# http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
	3844	assert ':' not in key
	3845	assert os.path.exists(path)
	3846
	3847	ads_fn = path + ':' + key
	3848	try:
	3849	with open(ads_fn, 'wb') as f:
	3850	f.write(value)
	3851	except EnvironmentError as e:
	3852	raise XAttrMetadataError(e.errno, e.strerror)
	3853	else:
	3854	user_has_setfattr = check_executable('setfattr', ['--version'])
	3855	user_has_xattr = check_executable('xattr', ['-h'])
	3856
	3857	if user_has_setfattr or user_has_xattr:
	3858
	3859	value = value.decode('utf-8')
	3860	if user_has_setfattr:
	3861	executable = 'setfattr'
	3862	opts = ['-n', key, '-v', value]
	3863	elif user_has_xattr:
	3864	executable = 'xattr'
	3865	opts = ['-w', key, value]
	3866
	3867	cmd = ([encodeFilename(executable, True)] +
	3868	[encodeArgument(o) for o in opts] +
	3869	[encodeFilename(path, True)])
	3870
	3871	try:
	3872	p = subprocess.Popen(
	3873	cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
	3874	except EnvironmentError as e:
	3875	raise XAttrMetadataError(e.errno, e.strerror)
	3876	stdout, stderr = p.communicate()
	3877	stderr = stderr.decode('utf-8', 'replace')
	3878	if p.returncode != 0:
	3879	raise XAttrMetadataError(p.returncode, stderr)
	3880
	3881	else:
	3882	# On Unix, and can't find pyxattr, setfattr, or xattr.
	3883	if sys.platform.startswith('linux'):
	3884	raise XAttrUnavailableError(
	3885	"Couldn't find a tool to set the xattrs. "
	3886	"Install either the python 'pyxattr' or 'xattr' "
	3887	"modules, or the GNU 'attr' package "
	3888	"(which contains the 'setfattr' tool).")
	3889	else:
	3890	raise XAttrUnavailableError(
	3891	"Couldn't find a tool to set the xattrs. "
	3892	"Install either the python 'xattr' module, "
	3893	"or the 'xattr' binary.")
	3894
	3895
	3896	def random_birthday(year_field, month_field, day_field):
	3897	return {
	3898	year_field: str(random.randint(1950, 1995)),
	3899	month_field: str(random.randint(1, 12)),
	3900	day_field: str(random.randint(1, 31)),
	3901	}