jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	from __future__ import unicode_literals
	2
	3	import binascii
	4	import collections
	5	import email
	6	import getpass
	7	import io
	8	import optparse
	9	import os
	10	import re
	11	import shlex
	12	import shutil
	13	import socket
	14	import subprocess
	15	import sys
	16	import itertools
	17	import xml.etree.ElementTree
	18
	19
	20	try:
	21	import urllib.request as compat_urllib_request
	22	except ImportError: # Python 2
	23	import urllib2 as compat_urllib_request
	24
	25	try:
	26	import urllib.error as compat_urllib_error
	27	except ImportError: # Python 2
	28	import urllib2 as compat_urllib_error
	29
	30	try:
	31	import urllib.parse as compat_urllib_parse
	32	except ImportError: # Python 2
	33	import urllib as compat_urllib_parse
	34
	35	try:
	36	from urllib.parse import urlparse as compat_urllib_parse_urlparse
	37	except ImportError: # Python 2
	38	from urlparse import urlparse as compat_urllib_parse_urlparse
	39
	40	try:
	41	import urllib.parse as compat_urlparse
	42	except ImportError: # Python 2
	43	import urlparse as compat_urlparse
	44
	45	try:
	46	import urllib.response as compat_urllib_response
	47	except ImportError: # Python 2
	48	import urllib as compat_urllib_response
	49
	50	try:
	51	import http.cookiejar as compat_cookiejar
	52	except ImportError: # Python 2
	53	import cookielib as compat_cookiejar
	54
	55	try:
	56	import http.cookies as compat_cookies
	57	except ImportError: # Python 2
	58	import Cookie as compat_cookies
	59
	60	try:
	61	import html.entities as compat_html_entities
	62	except ImportError: # Python 2
	63	import htmlentitydefs as compat_html_entities
	64
	65	try:
	66	import http.client as compat_http_client
	67	except ImportError: # Python 2
	68	import httplib as compat_http_client
	69
	70	try:
	71	from urllib.error import HTTPError as compat_HTTPError
	72	except ImportError: # Python 2
	73	from urllib2 import HTTPError as compat_HTTPError
	74
	75	try:
	76	from urllib.request import urlretrieve as compat_urlretrieve
	77	except ImportError: # Python 2
	78	from urllib import urlretrieve as compat_urlretrieve
	79
	80	try:
	81	from html.parser import HTMLParser as compat_HTMLParser
	82	except ImportError: # Python 2
	83	from HTMLParser import HTMLParser as compat_HTMLParser
	84
	85
	86	try:
	87	from subprocess import DEVNULL
	88	compat_subprocess_get_DEVNULL = lambda: DEVNULL
	89	except ImportError:
	90	compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
	91
	92	try:
	93	import http.server as compat_http_server
	94	except ImportError:
	95	import BaseHTTPServer as compat_http_server
	96
	97	try:
	98	compat_str = unicode # Python 2
	99	except NameError:
	100	compat_str = str
	101
	102	try:
	103	from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
	104	from urllib.parse import unquote as compat_urllib_parse_unquote
	105	from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
	106	except ImportError: # Python 2
	107	_asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
	108	else re.compile('([\x00-\x7f]+)'))
	109
	110	# HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
	111	# implementations from cpython 3.4.3's stdlib. Python 2's version
	112	# is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244)
	113
	114	def compat_urllib_parse_unquote_to_bytes(string):
	115	"""unquote_to_bytes('abc%20def') -> b'abc def'."""
	116	# Note: strings are encoded as UTF-8. This is only an issue if it contains
	117	# unescaped non-ASCII characters, which URIs should not.
	118	if not string:
	119	# Is it a string-like object?
	120	string.split
	121	return b''
	122	if isinstance(string, compat_str):
	123	string = string.encode('utf-8')
	124	bits = string.split(b'%')
	125	if len(bits) == 1:
	126	return string
	127	res = [bits[0]]
	128	append = res.append
	129	for item in bits[1:]:
	130	try:
	131	append(compat_urllib_parse._hextochr[item[:2]])
	132	append(item[2:])
	133	except KeyError:
	134	append(b'%')
	135	append(item)
	136	return b''.join(res)
	137
	138	def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
	139	"""Replace %xx escapes by their single-character equivalent. The optional
	140	encoding and errors parameters specify how to decode percent-encoded
	141	sequences into Unicode characters, as accepted by the bytes.decode()
	142	method.
	143	By default, percent-encoded sequences are decoded with UTF-8, and invalid
	144	sequences are replaced by a placeholder character.
	145
	146	unquote('abc%20def') -> 'abc def'.
	147	"""
	148	if '%' not in string:
	149	string.split
	150	return string
	151	if encoding is None:
	152	encoding = 'utf-8'
	153	if errors is None:
	154	errors = 'replace'
	155	bits = _asciire.split(string)
	156	res = [bits[0]]
	157	append = res.append
	158	for i in range(1, len(bits), 2):
	159	append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
	160	append(bits[i + 1])
	161	return ''.join(res)
	162
	163	def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
	164	"""Like unquote(), but also replace plus signs by spaces, as required for
	165	unquoting HTML form values.
	166
	167	unquote_plus('%7e/abc+def') -> '~/abc def'
	168	"""
	169	string = string.replace('+', ' ')
	170	return compat_urllib_parse_unquote(string, encoding, errors)
	171
	172	try:
	173	from urllib.parse import urlencode as compat_urllib_parse_urlencode
	174	except ImportError: # Python 2
	175	# Python 2 will choke in urlencode on mixture of byte and unicode strings.
	176	# Possible solutions are to either port it from python 3 with all
	177	# the friends or manually ensure input query contains only byte strings.
	178	# We will stick with latter thus recursively encoding the whole query.
	179	def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'):
	180	def encode_elem(e):
	181	if isinstance(e, dict):
	182	e = encode_dict(e)
	183	elif isinstance(e, (list, tuple,)):
	184	list_e = encode_list(e)
	185	e = tuple(list_e) if isinstance(e, tuple) else list_e
	186	elif isinstance(e, compat_str):
	187	e = e.encode(encoding)
	188	return e
	189
	190	def encode_dict(d):
	191	return dict((encode_elem(k), encode_elem(v)) for k, v in d.items())
	192
	193	def encode_list(l):
	194	return [encode_elem(e) for e in l]
	195
	196	return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq)
	197
	198	try:
	199	from urllib.request import DataHandler as compat_urllib_request_DataHandler
	200	except ImportError: # Python < 3.4
	201	# Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
	202	class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler):
	203	def data_open(self, req):
	204	# data URLs as specified in RFC 2397.
	205	#
	206	# ignores POSTed data
	207	#
	208	# syntax:
	209	# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
	210	# mediatype := [ type "/" subtype ] *( ";" parameter )
	211	# data := *urlchar
	212	# parameter := attribute "=" value
	213	url = req.get_full_url()
	214
	215	scheme, data = url.split(':', 1)
	216	mediatype, data = data.split(',', 1)
	217
	218	# even base64 encoded data URLs might be quoted so unquote in any case:
	219	data = compat_urllib_parse_unquote_to_bytes(data)
	220	if mediatype.endswith(';base64'):
	221	data = binascii.a2b_base64(data)
	222	mediatype = mediatype[:-7]
	223
	224	if not mediatype:
	225	mediatype = 'text/plain;charset=US-ASCII'
	226
	227	headers = email.message_from_string(
	228	'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))
	229
	230	return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
	231
	232	try:
	233	compat_basestring = basestring # Python 2
	234	except NameError:
	235	compat_basestring = str
	236
	237	try:
	238	compat_chr = unichr # Python 2
	239	except NameError:
	240	compat_chr = chr
	241
	242	try:
	243	from xml.etree.ElementTree import ParseError as compat_xml_parse_error
	244	except ImportError: # Python 2.6
	245	from xml.parsers.expat import ExpatError as compat_xml_parse_error
	246
	247	if sys.version_info[0] >= 3:
	248	compat_etree_fromstring = xml.etree.ElementTree.fromstring
	249	else:
	250	# python 2.x tries to encode unicode strings with ascii (see the
	251	# XMLParser._fixtext method)
	252	etree = xml.etree.ElementTree
	253
	254	try:
	255	_etree_iter = etree.Element.iter
	256	except AttributeError: # Python <=2.6
	257	def _etree_iter(root):
	258	for el in root.findall('*'):
	259	yield el
	260	for sub in _etree_iter(el):
	261	yield sub
	262
	263	# on 2.6 XML doesn't have a parser argument, function copied from CPython
	264	# 2.7 source
	265	def _XML(text, parser=None):
	266	if not parser:
	267	parser = etree.XMLParser(target=etree.TreeBuilder())
	268	parser.feed(text)
	269	return parser.close()
	270
	271	def _element_factory(args, *kwargs):
	272	el = etree.Element(args, *kwargs)
	273	for k, v in el.items():
	274	if isinstance(v, bytes):
	275	el.set(k, v.decode('utf-8'))
	276	return el
	277
	278	def compat_etree_fromstring(text):
	279	doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))
	280	for el in _etree_iter(doc):
	281	if el.text is not None and isinstance(el.text, bytes):
	282	el.text = el.text.decode('utf-8')
	283	return doc
	284
	285	if sys.version_info < (2, 7):
	286	# Here comes the crazy part: In 2.6, if the xpath is a unicode,
	287	# .//node does not match if a node is a direct child of . !
	288	def compat_xpath(xpath):
	289	if isinstance(xpath, compat_str):
	290	xpath = xpath.encode('ascii')
	291	return xpath
	292	else:
	293	compat_xpath = lambda xpath: xpath
	294
	295	try:
	296	from urllib.parse import parse_qs as compat_parse_qs
	297	except ImportError: # Python 2
	298	# HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
	299	# Python 2's version is apparently totally broken
	300
	301	def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
	302	encoding='utf-8', errors='replace'):
	303	qs, _coerce_result = qs, compat_str
	304	pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
	305	r = []
	306	for name_value in pairs:
	307	if not name_value and not strict_parsing:
	308	continue
	309	nv = name_value.split('=', 1)
	310	if len(nv) != 2:
	311	if strict_parsing:
	312	raise ValueError('bad query field: %r' % (name_value,))
	313	# Handle case of a control-name with no equal sign
	314	if keep_blank_values:
	315	nv.append('')
	316	else:
	317	continue
	318	if len(nv[1]) or keep_blank_values:
	319	name = nv[0].replace('+', ' ')
	320	name = compat_urllib_parse_unquote(
	321	name, encoding=encoding, errors=errors)
	322	name = _coerce_result(name)
	323	value = nv[1].replace('+', ' ')
	324	value = compat_urllib_parse_unquote(
	325	value, encoding=encoding, errors=errors)
	326	value = _coerce_result(value)
	327	r.append((name, value))
	328	return r
	329
	330	def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
	331	encoding='utf-8', errors='replace'):
	332	parsed_result = {}
	333	pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
	334	encoding=encoding, errors=errors)
	335	for name, value in pairs:
	336	if name in parsed_result:
	337	parsed_result[name].append(value)
	338	else:
	339	parsed_result[name] = [value]
	340	return parsed_result
	341
	342	try:
	343	from shlex import quote as shlex_quote
	344	except ImportError: # Python < 3.3
	345	def shlex_quote(s):
	346	if re.match(r'^[-_\w./]+$', s):
	347	return s
	348	else:
	349	return "'" + s.replace("'", "'\"'\"'") + "'"
	350
	351
	352	if sys.version_info >= (2, 7, 3):
	353	compat_shlex_split = shlex.split
	354	else:
	355	# Working around shlex issue with unicode strings on some python 2
	356	# versions (see http://bugs.python.org/issue1548891)
	357	def compat_shlex_split(s, comments=False, posix=True):
	358	if isinstance(s, compat_str):
	359	s = s.encode('utf-8')
	360	return shlex.split(s, comments, posix)
	361
	362
	363	def compat_ord(c):
	364	if type(c) is int:
	365	return c
	366	else:
	367	return ord(c)
	368
	369
	370	compat_os_name = os._name if os.name == 'java' else os.name
	371
	372
	373	if sys.version_info >= (3, 0):
	374	compat_getenv = os.getenv
	375	compat_expanduser = os.path.expanduser
	376
	377	def compat_setenv(key, value, env=os.environ):
	378	env[key] = value
	379	else:
	380	# Environment variables should be decoded with filesystem encoding.
	381	# Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
	382
	383	def compat_getenv(key, default=None):
	384	from .utils import get_filesystem_encoding
	385	env = os.getenv(key, default)
	386	if env:
	387	env = env.decode(get_filesystem_encoding())
	388	return env
	389
	390	def compat_setenv(key, value, env=os.environ):
	391	def encode(v):
	392	from .utils import get_filesystem_encoding
	393	return v.encode(get_filesystem_encoding()) if isinstance(v, compat_str) else v
	394	env[encode(key)] = encode(value)
	395
	396	# HACK: The default implementations of os.path.expanduser from cpython do not decode
	397	# environment variables with filesystem encoding. We will work around this by
	398	# providing adjusted implementations.
	399	# The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
	400	# for different platforms with correct environment variables decoding.
	401
	402	if compat_os_name == 'posix':
	403	def compat_expanduser(path):
	404	"""Expand ~ and ~user constructions. If user or $HOME is unknown,
	405	do nothing."""
	406	if not path.startswith('~'):
	407	return path
	408	i = path.find('/', 1)
	409	if i < 0:
	410	i = len(path)
	411	if i == 1:
	412	if 'HOME' not in os.environ:
	413	import pwd
	414	userhome = pwd.getpwuid(os.getuid()).pw_dir
	415	else:
	416	userhome = compat_getenv('HOME')
	417	else:
	418	import pwd
	419	try:
	420	pwent = pwd.getpwnam(path[1:i])
	421	except KeyError:
	422	return path
	423	userhome = pwent.pw_dir
	424	userhome = userhome.rstrip('/')
	425	return (userhome + path[i:]) or '/'
	426	elif compat_os_name == 'nt' or compat_os_name == 'ce':
	427	def compat_expanduser(path):
	428	"""Expand ~ and ~user constructs.
	429
	430	If user or $HOME is unknown, do nothing."""
	431	if path[:1] != '~':
	432	return path
	433	i, n = 1, len(path)
	434	while i < n and path[i] not in '/\\':
	435	i = i + 1
	436
	437	if 'HOME' in os.environ:
	438	userhome = compat_getenv('HOME')
	439	elif 'USERPROFILE' in os.environ:
	440	userhome = compat_getenv('USERPROFILE')
	441	elif 'HOMEPATH' not in os.environ:
	442	return path
	443	else:
	444	try:
	445	drive = compat_getenv('HOMEDRIVE')
	446	except KeyError:
	447	drive = ''
	448	userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
	449
	450	if i != 1: # ~user
	451	userhome = os.path.join(os.path.dirname(userhome), path[1:i])
	452
	453	return userhome + path[i:]
	454	else:
	455	compat_expanduser = os.path.expanduser
	456
	457
	458	if sys.version_info < (3, 0):
	459	def compat_print(s):
	460	from .utils import preferredencoding
	461	print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
	462	else:
	463	def compat_print(s):
	464	assert isinstance(s, compat_str)
	465	print(s)
	466
	467
	468	try:
	469	subprocess_check_output = subprocess.check_output
	470	except AttributeError:
	471	def subprocess_check_output(args, *kwargs):
	472	assert 'input' not in kwargs
	473	p = subprocess.Popen(args, stdout=subprocess.PIPE, *kwargs)
	474	output, _ = p.communicate()
	475	ret = p.poll()
	476	if ret:
	477	raise subprocess.CalledProcessError(ret, p.args, output=output)
	478	return output
	479
	480	if sys.version_info < (3, 0) and sys.platform == 'win32':
	481	def compat_getpass(prompt, args, *kwargs):
	482	if isinstance(prompt, compat_str):
	483	from .utils import preferredencoding
	484	prompt = prompt.encode(preferredencoding())
	485	return getpass.getpass(prompt, args, *kwargs)
	486	else:
	487	compat_getpass = getpass.getpass
	488
	489	# Python < 2.6.5 require kwargs to be bytes
	490	try:
	491	def _testfunc(x):
	492	pass
	493	_testfunc(**{'x': 0})
	494	except TypeError:
	495	def compat_kwargs(kwargs):
	496	return dict((bytes(k), v) for k, v in kwargs.items())
	497	else:
	498	compat_kwargs = lambda kwargs: kwargs
	499
	500
	501	if sys.version_info < (2, 7):
	502	def compat_socket_create_connection(address, timeout, source_address=None):
	503	host, port = address
	504	err = None
	505	for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
	506	af, socktype, proto, canonname, sa = res
	507	sock = None
	508	try:
	509	sock = socket.socket(af, socktype, proto)
	510	sock.settimeout(timeout)
	511	if source_address:
	512	sock.bind(source_address)
	513	sock.connect(sa)
	514	return sock
	515	except socket.error as _:
	516	err = _
	517	if sock is not None:
	518	sock.close()
	519	if err is not None:
	520	raise err
	521	else:
	522	raise socket.error('getaddrinfo returns an empty list')
	523	else:
	524	compat_socket_create_connection = socket.create_connection
	525
	526
	527	# Fix https://github.com/rg3/youtube-dl/issues/4223
	528	# See http://bugs.python.org/issue9161 for what is broken
	529	def workaround_optparse_bug9161():
	530	op = optparse.OptionParser()
	531	og = optparse.OptionGroup(op, 'foo')
	532	try:
	533	og.add_option('-t')
	534	except TypeError:
	535	real_add_option = optparse.OptionGroup.add_option
	536
	537	def _compat_add_option(self, args, *kwargs):
	538	enc = lambda v: (
	539	v.encode('ascii', 'replace') if isinstance(v, compat_str)
	540	else v)
	541	bargs = [enc(a) for a in args]
	542	bkwargs = dict(
	543	(k, enc(v)) for k, v in kwargs.items())
	544	return real_add_option(self, bargs, *bkwargs)
	545	optparse.OptionGroup.add_option = _compat_add_option
	546
	547	if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
	548	compat_get_terminal_size = shutil.get_terminal_size
	549	else:
	550	_terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
	551
	552	def compat_get_terminal_size(fallback=(80, 24)):
	553	columns = compat_getenv('COLUMNS')
	554	if columns:
	555	columns = int(columns)
	556	else:
	557	columns = None
	558	lines = compat_getenv('LINES')
	559	if lines:
	560	lines = int(lines)
	561	else:
	562	lines = None
	563
	564	if columns is None or lines is None or columns <= 0 or lines <= 0:
	565	try:
	566	sp = subprocess.Popen(
	567	['stty', 'size'],
	568	stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	569	out, err = sp.communicate()
	570	_lines, _columns = map(int, out.split())
	571	except Exception:
	572	_columns, _lines = _terminal_size(*fallback)
	573
	574	if columns is None or columns <= 0:
	575	columns = _columns
	576	if lines is None or lines <= 0:
	577	lines = _lines
	578	return _terminal_size(columns, lines)
	579
	580	try:
	581	itertools.count(start=0, step=1)
	582	compat_itertools_count = itertools.count
	583	except TypeError: # Python 2.6
	584	def compat_itertools_count(start=0, step=1):
	585	n = start
	586	while True:
	587	yield n
	588	n += step
	589
	590	if sys.version_info >= (3, 0):
	591	from tokenize import tokenize as compat_tokenize_tokenize
	592	else:
	593	from tokenize import generate_tokens as compat_tokenize_tokenize
	594
	595	__all__ = [
	596	'compat_HTMLParser',
	597	'compat_HTTPError',
	598	'compat_basestring',
	599	'compat_chr',
	600	'compat_cookiejar',
	601	'compat_cookies',
	602	'compat_etree_fromstring',
	603	'compat_expanduser',
	604	'compat_get_terminal_size',
	605	'compat_getenv',
	606	'compat_getpass',
	607	'compat_html_entities',
	608	'compat_http_client',
	609	'compat_http_server',
	610	'compat_itertools_count',
	611	'compat_kwargs',
	612	'compat_ord',
	613	'compat_os_name',
	614	'compat_parse_qs',
	615	'compat_print',
	616	'compat_setenv',
	617	'compat_shlex_split',
	618	'compat_socket_create_connection',
	619	'compat_str',
	620	'compat_subprocess_get_DEVNULL',
	621	'compat_tokenize_tokenize',
	622	'compat_urllib_error',
	623	'compat_urllib_parse',
	624	'compat_urllib_parse_unquote',
	625	'compat_urllib_parse_unquote_plus',
	626	'compat_urllib_parse_unquote_to_bytes',
	627	'compat_urllib_parse_urlencode',
	628	'compat_urllib_parse_urlparse',
	629	'compat_urllib_request',
	630	'compat_urllib_request_DataHandler',
	631	'compat_urllib_response',
	632	'compat_urlparse',
	633	'compat_urlretrieve',
	634	'compat_xml_parse_error',
	635	'compat_xpath',
	636	'shlex_quote',
	637	'subprocess_check_output',
	638	'workaround_optparse_bug9161',
	639	]