[yt-dlp.git] / youtube_dl / utils.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import gzip
import io
import locale
import os
import re
import sys
import zlib
import email.utils
import json

try:
	import urllib.request as compat_urllib_request
except ImportError: # Python 2
	import urllib2 as compat_urllib_request

try:
	import urllib.error as compat_urllib_error
except ImportError: # Python 2
	import urllib2 as compat_urllib_error

try:
	import urllib.parse as compat_urllib_parse
except ImportError: # Python 2
	import urllib as compat_urllib_parse

try:
	import http.cookiejar as compat_cookiejar
except ImportError: # Python 2
	import cookielib as compat_cookiejar

try:
	import html.entities as compat_html_entities
except ImportError: # Python 2
	import htmlentitydefs as compat_html_entities

try:
	import html.parser as compat_html_parser
except ImportError: # Python 2
	import HTMLParser as compat_html_parser

try:
	import http.client as compat_http_client
except ImportError: # Python 2
	import httplib as compat_http_client

try:
	from urllib.parse import parse_qs as compat_parse_qs
except ImportError: # Python 2
	from urlparse import parse_qs as compat_parse_qs

try:
	compat_str = unicode # Python 2
except NameError:
	compat_str = str

try:
	compat_chr = unichr # Python 2
except NameError:
	compat_chr = chr

std_headers = {
	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
	'Accept-Encoding': 'gzip, deflate',
	'Accept-Language': 'en-us,en;q=0.5',
}
def preferredencoding():
	"""Get preferred encoding.

	Returns the best encoding scheme for the system, based on
	locale.getpreferredencoding() and some further tweaks.
	"""
	try:
		pref = locale.getpreferredencoding()
		u'TEST'.encode(pref)
	except:
		pref = 'UTF-8'

	return pref

if sys.version_info < (3,0):
	def compat_print(s):
		print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
else:
	def compat_print(s):
		assert type(s) == type(u'')
		print(s)

def htmlentity_transform(matchobj):
	"""Transforms an HTML entity to a character.

	This function receives a match object and is intended to be used with
	the re.sub() function.
	"""
	entity = matchobj.group(1)

	# Known non-numeric HTML entity
	if entity in compat_html_entities.name2codepoint:
		return compat_chr(compat_html_entities.name2codepoint[entity])

	mobj = re.match(u'(?u)#(x?\\d+)', entity)
	if mobj is not None:
		numstr = mobj.group(1)
		if numstr.startswith(u'x'):
			base = 16
			numstr = u'0%s' % numstr
		else:
			base = 10
		return compat_chr(int(numstr, base))

	# Unknown entity in name, return its literal representation
	return (u'&%s;' % entity)

compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
class IDParser(compat_html_parser.HTMLParser):
	"""Modified HTMLParser that isolates a tag with the specified id"""
	def __init__(self, id):
		self.id = id
		self.result = None
		self.started = False
		self.depth = {}
		self.html = None
		self.watch_startpos = False
		self.error_count = 0
		compat_html_parser.HTMLParser.__init__(self)

	def error(self, message):
		if self.error_count > 10 or self.started:
			raise compat_html_parser.HTMLParseError(message, self.getpos())
		self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
		self.error_count += 1
		self.goahead(1)

	def loads(self, html):
		self.html = html
		self.feed(html)
		self.close()

	def handle_starttag(self, tag, attrs):
		attrs = dict(attrs)
		if self.started:
			self.find_startpos(None)
		if 'id' in attrs and attrs['id'] == self.id:
			self.result = [tag]
			self.started = True
			self.watch_startpos = True
		if self.started:
			if not tag in self.depth: self.depth[tag] = 0
			self.depth[tag] += 1

	def handle_endtag(self, tag):
		if self.started:
			if tag in self.depth: self.depth[tag] -= 1
			if self.depth[self.result[0]] == 0:
				self.started = False
				self.result.append(self.getpos())

	def find_startpos(self, x):
		"""Needed to put the start position of the result (self.result[1])
		after the opening tag with the requested id"""
		if self.watch_startpos:
			self.watch_startpos = False
			self.result.append(self.getpos())
	handle_entityref = handle_charref = handle_data = handle_comment = \
	handle_decl = handle_pi = unknown_decl = find_startpos

	def get_result(self):
		if self.result is None:
			return None
		if len(self.result) != 3:
			return None
		lines = self.html.split('\n')
		lines = lines[self.result[1][0]-1:self.result[2][0]]
		lines[0] = lines[0][self.result[1][1]:]
		if len(lines) == 1:
			lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
		lines[-1] = lines[-1][:self.result[2][1]]
		return '\n'.join(lines).strip()

def get_element_by_id(id, html):
	"""Return the content of the tag with the specified id in the passed HTML document"""
	parser = IDParser(id)
	try:
		parser.loads(html)
	except compat_html_parser.HTMLParseError:
		pass
	return parser.get_result()


def clean_html(html):
	"""Clean an HTML snippet into a readable string"""
	# Newline vs <br />
	html = html.replace('\n', ' ')
	html = re.sub('\s*<\s*br\s*/?\s*>\s*', '\n', html)
	# Strip html tags
	html = re.sub('<.*?>', '', html)
	# Replace html entities
	html = unescapeHTML(html)
	return html


def sanitize_open(filename, open_mode):
	"""Try to open the given filename, and slightly tweak it if this fails.

	Attempts to open the given filename. If this fails, it tries to change
	the filename slightly, step by step, until it's either able to open it
	or it fails and raises a final exception, like the standard open()
	function.

	It returns the tuple (stream, definitive_file_name).
	"""
	try:
		if filename == u'-':
			if sys.platform == 'win32':
				import msvcrt
				msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
			return (sys.stdout, filename)
		stream = open(encodeFilename(filename), open_mode)
		return (stream, filename)
	except (IOError, OSError) as err:
		# In case of error, try to remove win32 forbidden chars
		filename = re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', filename)

		# An exception here should be caught in the caller
		stream = open(encodeFilename(filename), open_mode)
		return (stream, filename)


def timeconvert(timestr):
	"""Convert RFC 2822 defined time string into system timestamp"""
	timestamp = None
	timetuple = email.utils.parsedate_tz(timestr)
	if timetuple is not None:
		timestamp = email.utils.mktime_tz(timetuple)
	return timestamp

def sanitize_filename(s, restricted=False):
	"""Sanitizes a string so it could be used as part of a filename.
	If restricted is set, use a stricter subset of allowed characters.
	"""
	def replace_insane(char):
		if char == '?' or ord(char) < 32 or ord(char) == 127:
			return ''
		elif char == '"':
			return '' if restricted else '\''
		elif char == ':':
			return '_-' if restricted else ' -'
		elif char in '\\/|*<>':
			return '_'
		if restricted and (char in '!&\'' or char.isspace()):
			return '_'
		if restricted and ord(char) > 127:
			return '_'
		return char

	result = u''.join(map(replace_insane, s))
	while '__' in result:
		result = result.replace('__', '_')
	result = result.strip('_')
	# Common case of "Foreign band name - English song title"
	if restricted and result.startswith('-_'):
		result = result[2:]
	if not result:
		result = '_'
	return result

def orderedSet(iterable):
	""" Remove all duplicates from the input iterable """
	res = []
	for el in iterable:
		if el not in res:
			res.append(el)
	return res

def unescapeHTML(s):
	"""
	@param s a string
	"""
	assert type(s) == type(u'')

	result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
	return result

def encodeFilename(s):
	"""
	@param s The name of the file
	"""

	assert type(s) == type(u'')

	if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
		# Pass u'' directly to use Unicode APIs on Windows 2000 and up
		# (Detecting Windows NT 4 is tricky because 'major >= 4' would
		# match Windows 9x series as well. Besides, NT 4 is obsolete.)
		return s
	else:
		return s.encode(sys.getfilesystemencoding(), 'ignore')

class DownloadError(Exception):
	"""Download Error exception.

	This exception may be thrown by FileDownloader objects if they are not
	configured to continue on errors. They will contain the appropriate
	error message.
	"""
	pass


class SameFileError(Exception):
	"""Same File exception.

	This exception will be thrown by FileDownloader objects if they detect
	multiple files would have to be downloaded to the same file on disk.
	"""
	pass


class PostProcessingError(Exception):
	"""Post Processing exception.

	This exception may be raised by PostProcessor's .run() method to
	indicate an error in the postprocessing task.
	"""
	pass

class MaxDownloadsReached(Exception):
	""" --max-downloads limit has been reached. """
	pass


class UnavailableVideoError(Exception):
	"""Unavailable Format exception.

	This exception will be thrown when a video is requested
	in a format that is not available for that video.
	"""
	pass


class ContentTooShortError(Exception):
	"""Content Too Short exception.

	This exception may be raised by FileDownloader objects when a file they
	download is too small for what the server announced first, indicating
	the connection was probably interrupted.
	"""
	# Both in bytes
	downloaded = None
	expected = None

	def __init__(self, downloaded, expected):
		self.downloaded = downloaded
		self.expected = expected


class Trouble(Exception):
	"""Trouble helper exception

	This is an exception to be handled with
	FileDownloader.trouble
	"""

class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
	"""Handler for HTTP requests and responses.

	This class, when installed with an OpenerDirector, automatically adds
	the standard headers to every HTTP request and handles gzipped and
	deflated responses from web servers. If compression is to be avoided in
	a particular request, the original request in the program code only has
	to include the HTTP header "Youtubedl-No-Compression", which will be
	removed before making the real request.

	Part of this code was copied from:

	http://techknack.net/python-urllib2-handlers/

	Andrew Rowls, the author of that code, agreed to release it to the
	public domain.
	"""

	@staticmethod
	def deflate(data):
		try:
			return zlib.decompress(data, -zlib.MAX_WBITS)
		except zlib.error:
			return zlib.decompress(data)

	@staticmethod
	def addinfourl_wrapper(stream, headers, url, code):
		if hasattr(compat_urllib_request.addinfourl, 'getcode'):
			return compat_urllib_request.addinfourl(stream, headers, url, code)
		ret = compat_urllib_request.addinfourl(stream, headers, url)
		ret.code = code
		return ret

	def http_request(self, req):
		for h in std_headers:
			if h in req.headers:
				del req.headers[h]
			req.add_header(h, std_headers[h])
		if 'Youtubedl-no-compression' in req.headers:
			if 'Accept-encoding' in req.headers:
				del req.headers['Accept-encoding']
			del req.headers['Youtubedl-no-compression']
		return req

	def http_response(self, req, resp):
		old_resp = resp
		# gzip
		if resp.headers.get('Content-encoding', '') == 'gzip':
			gz = gzip.GzipFile(fileobj=io.BytesIO(resp.read()), mode='r')
			resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
			resp.msg = old_resp.msg
		# deflate
		if resp.headers.get('Content-encoding', '') == 'deflate':
			gz = io.BytesIO(self.deflate(resp.read()))
			resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
			resp.msg = old_resp.msg
		return resp
Commit	Line	Data
d77c3dfd FV	1	#!/usr/bin/env python
	2	# -- coding: utf-8 --
	3
	4	import gzip
03f9daab	5	import io
d77c3dfd FV	6	import locale
	7	import os
	8	import re
	9	import sys
	10	import zlib
d77c3dfd	11	import email.utils
921a1455	12	import json
d77c3dfd	13
01ba00ca PH	14	try:
	15	import urllib.request as compat_urllib_request
	16	except ImportError: # Python 2
	17	import urllib2 as compat_urllib_request
	18
	19	try:
	20	import urllib.error as compat_urllib_error
	21	except ImportError: # Python 2
	22	import urllib2 as compat_urllib_error
	23
	24	try:
	25	import urllib.parse as compat_urllib_parse
	26	except ImportError: # Python 2
da779b49	27	import urllib as compat_urllib_parse
01ba00ca PH	28
	29	try:
	30	import http.cookiejar as compat_cookiejar
	31	except ImportError: # Python 2
	32	import cookielib as compat_cookiejar
	33
3e669f36 PH	34	try:
3e669f36 PH	35	import html.entities as compat_html_entities
9f37a959	36	except ImportError: # Python 2
3e669f36 PH	37	import htmlentitydefs as compat_html_entities
3e669f36 PH	38
a8156c1d PH	39	try:
a8156c1d PH	40	import html.parser as compat_html_parser
9f37a959	41	except ImportError: # Python 2
a8156c1d PH	42	import HTMLParser as compat_html_parser
a8156c1d PH	43
348d0a7a	44	try:
5bd9cc7a	45	import http.client as compat_http_client
9f37a959	46	except ImportError: # Python 2
5bd9cc7a	47	import httplib as compat_http_client
348d0a7a	48
9f37a959	49	try:
73dce4b2	50	from urllib.parse import parse_qs as compat_parse_qs
9f37a959 PH	51	except ImportError: # Python 2
9f37a959 PH	52	from urlparse import parse_qs as compat_parse_qs
348d0a7a	53
3e669f36 PH	54	try:
	55	compat_str = unicode # Python 2
	56	except NameError:
	57	compat_str = str
	58
	59	try:
	60	compat_chr = unichr # Python 2
	61	except NameError:
	62	compat_chr = chr
	63
3e669f36 PH	64	std_headers = {
	65	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
	66	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
	67	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8',
	68	'Accept-Encoding': 'gzip, deflate',
	69	'Accept-Language': 'en-us,en;q=0.5',
	70	}
d77c3dfd FV	71	def preferredencoding():
	72	"""Get preferred encoding.
	73
	74	Returns the best encoding scheme for the system, based on
	75	locale.getpreferredencoding() and some further tweaks.
	76	"""
bae611f2 AS	77	try:
	78	pref = locale.getpreferredencoding()
	79	u'TEST'.encode(pref)
	80	except:
	81	pref = 'UTF-8'
	82
	83	return pref
d77c3dfd	84
8cd10ac4 PH	85	if sys.version_info < (3,0):
	86	def compat_print(s):
	87	print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
	88	else:
	89	def compat_print(s):
e6137fd6	90	assert type(s) == type(u'')
8cd10ac4	91	print(s)
d77c3dfd FV	92
d77c3dfd FV	93	def htmlentity_transform(matchobj):
dd109dee	94	"""Transforms an HTML entity to a character.
d77c3dfd FV	95
	96	This function receives a match object and is intended to be used with
	97	the re.sub() function.
	98	"""
	99	entity = matchobj.group(1)
	100
	101	# Known non-numeric HTML entity
3e669f36 PH	102	if entity in compat_html_entities.name2codepoint:
3e669f36 PH	103	return compat_chr(compat_html_entities.name2codepoint[entity])
d77c3dfd	104
89fb51dd	105	mobj = re.match(u'(?u)#(x?\\d+)', entity)
d77c3dfd FV	106	if mobj is not None:
	107	numstr = mobj.group(1)
	108	if numstr.startswith(u'x'):
	109	base = 16
	110	numstr = u'0%s' % numstr
	111	else:
	112	base = 10
3e669f36	113	return compat_chr(int(numstr, base))
d77c3dfd FV	114
	115	# Unknown entity in name, return its literal representation
	116	return (u'&%s;' % entity)
	117
a8156c1d PH	118	compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_](?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>](?:\s=+\s(?:'[^']'\|"[^"]"\|(?!['"])[^>\s]))?\s))?\s""", re.VERBOSE) # backport bugfix
a8156c1d PH	119	class IDParser(compat_html_parser.HTMLParser):
9e6dd238 FV	120	"""Modified HTMLParser that isolates a tag with the specified id"""
	121	def __init__(self, id):
	122	self.id = id
	123	self.result = None
	124	self.started = False
	125	self.depth = {}
	126	self.html = None
	127	self.watch_startpos = False
9beb5af8	128	self.error_count = 0
a8156c1d	129	compat_html_parser.HTMLParser.__init__(self)
9e6dd238	130
9beb5af8	131	def error(self, message):
9beb5af8	132	if self.error_count > 10 or self.started:
a8156c1d	133	raise compat_html_parser.HTMLParseError(message, self.getpos())
9beb5af8 FV	134	self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
	135	self.error_count += 1
	136	self.goahead(1)
	137
9e6dd238 FV	138	def loads(self, html):
	139	self.html = html
	140	self.feed(html)
	141	self.close()
	142
	143	def handle_starttag(self, tag, attrs):
	144	attrs = dict(attrs)
	145	if self.started:
	146	self.find_startpos(None)
	147	if 'id' in attrs and attrs['id'] == self.id:
	148	self.result = [tag]
	149	self.started = True
	150	self.watch_startpos = True
	151	if self.started:
	152	if not tag in self.depth: self.depth[tag] = 0
	153	self.depth[tag] += 1
	154
	155	def handle_endtag(self, tag):
	156	if self.started:
	157	if tag in self.depth: self.depth[tag] -= 1
	158	if self.depth[self.result[0]] == 0:
	159	self.started = False
	160	self.result.append(self.getpos())
	161
	162	def find_startpos(self, x):
	163	"""Needed to put the start position of the result (self.result[1])
	164	after the opening tag with the requested id"""
	165	if self.watch_startpos:
	166	self.watch_startpos = False
	167	self.result.append(self.getpos())
	168	handle_entityref = handle_charref = handle_data = handle_comment = \
	169	handle_decl = handle_pi = unknown_decl = find_startpos
	170
	171	def get_result(self):
b514df20 PH	172	if self.result is None:
	173	return None
	174	if len(self.result) != 3:
	175	return None
9e6dd238 FV	176	lines = self.html.split('\n')
	177	lines = lines[self.result[1][0]-1:self.result[2][0]]
	178	lines[0] = lines[0][self.result[1][1]:]
	179	if len(lines) == 1:
	180	lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
	181	lines[-1] = lines[-1][:self.result[2][1]]
	182	return '\n'.join(lines).strip()
	183
	184	def get_element_by_id(id, html):
	185	"""Return the content of the tag with the specified id in the passed HTML document"""
	186	parser = IDParser(id)
	187	try:
	188	parser.loads(html)
a8156c1d	189	except compat_html_parser.HTMLParseError:
9e6dd238 FV	190	pass
	191	return parser.get_result()
	192
	193
	194	def clean_html(html):
	195	"""Clean an HTML snippet into a readable string"""
	196	# Newline vs <br />
	197	html = html.replace('\n', ' ')
	198	html = re.sub('\s<\sbr\s/?\s>\s*', '\n', html)
	199	# Strip html tags
	200	html = re.sub('<.*?>', '', html)
	201	# Replace html entities
	202	html = unescapeHTML(html)
	203	return html
	204
	205
d77c3dfd FV	206	def sanitize_open(filename, open_mode):
	207	"""Try to open the given filename, and slightly tweak it if this fails.
	208
	209	Attempts to open the given filename. If this fails, it tries to change
	210	the filename slightly, step by step, until it's either able to open it
	211	or it fails and raises a final exception, like the standard open()
	212	function.
	213
	214	It returns the tuple (stream, definitive_file_name).
	215	"""
	216	try:
	217	if filename == u'-':
	218	if sys.platform == 'win32':
	219	import msvcrt
	220	msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
	221	return (sys.stdout, filename)
	222	stream = open(encodeFilename(filename), open_mode)
	223	return (stream, filename)
e08bee32	224	except (IOError, OSError) as err:
d77c3dfd	225	# In case of error, try to remove win32 forbidden chars
89fb51dd	226	filename = re.sub(u'[/<>:"\\\|\\\\?\\*]', u'#', filename)
d77c3dfd FV	227
	228	# An exception here should be caught in the caller
	229	stream = open(encodeFilename(filename), open_mode)
	230	return (stream, filename)
	231
	232
	233	def timeconvert(timestr):
	234	"""Convert RFC 2822 defined time string into system timestamp"""
	235	timestamp = None
	236	timetuple = email.utils.parsedate_tz(timestr)
	237	if timetuple is not None:
	238	timestamp = email.utils.mktime_tz(timetuple)
	239	return timestamp
1c469a94 PH	240
	241	def sanitize_filename(s, restricted=False):
	242	"""Sanitizes a string so it could be used as part of a filename.
	243	If restricted is set, use a stricter subset of allowed characters.
	244	"""
2c288bda	245	def replace_insane(char):
42cb53fc FV	246	if char == '?' or ord(char) < 32 or ord(char) == 127:
	247	return ''
	248	elif char == '"':
240089e5	249	return '' if restricted else '\''
42cb53fc	250	elif char == ':':
1c469a94	251	return '_-' if restricted else ' -'
42cb53fc	252	elif char in '\\/\|*<>':
56781d3d	253	return '_'
dffe658b	254	if restricted and (char in '!&\'' or char.isspace()):
1c469a94	255	return '_'
56781d3d PH	256	if restricted and ord(char) > 127:
56781d3d PH	257	return '_'
2c288bda	258	return char
42cb53fc FV	259
42cb53fc FV	260	result = u''.join(map(replace_insane, s))
56781d3d PH	261	while '__' in result:
	262	result = result.replace('__', '_')
	263	result = result.strip('_')
46cbda0b PH	264	# Common case of "Foreign band name - English song title"
	265	if restricted and result.startswith('-_'):
	266	result = result[2:]
56781d3d PH	267	if not result:
	268	result = '_'
	269	return result
d77c3dfd FV	270
	271	def orderedSet(iterable):
	272	""" Remove all duplicates from the input iterable """
	273	res = []
	274	for el in iterable:
	275	if el not in res:
	276	res.append(el)
	277	return res
	278
	279	def unescapeHTML(s):
	280	"""
dd109dee	281	@param s a string
d77c3dfd FV	282	"""
	283	assert type(s) == type(u'')
	284
89fb51dd	285	result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
9e6dd238	286	return result
d77c3dfd FV	287
	288	def encodeFilename(s):
	289	"""
dd109dee	290	@param s The name of the file
d77c3dfd FV	291	"""
	292
	293	assert type(s) == type(u'')
	294
9bb8dc8e	295	if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
d77c3dfd FV	296	# Pass u'' directly to use Unicode APIs on Windows 2000 and up
	297	# (Detecting Windows NT 4 is tricky because 'major >= 4' would
	298	# match Windows 9x series as well. Besides, NT 4 is obsolete.)
	299	return s
	300	else:
	301	return s.encode(sys.getfilesystemencoding(), 'ignore')
	302
	303	class DownloadError(Exception):
	304	"""Download Error exception.
	305
	306	This exception may be thrown by FileDownloader objects if they are not
	307	configured to continue on errors. They will contain the appropriate
	308	error message.
	309	"""
	310	pass
	311
	312
	313	class SameFileError(Exception):
	314	"""Same File exception.
	315
	316	This exception will be thrown by FileDownloader objects if they detect
	317	multiple files would have to be downloaded to the same file on disk.
	318	"""
	319	pass
	320
	321
	322	class PostProcessingError(Exception):
	323	"""Post Processing exception.
	324
	325	This exception may be raised by PostProcessor's .run() method to
	326	indicate an error in the postprocessing task.
	327	"""
	328	pass
	329
	330	class MaxDownloadsReached(Exception):
	331	""" --max-downloads limit has been reached. """
	332	pass
	333
	334
	335	class UnavailableVideoError(Exception):
	336	"""Unavailable Format exception.
	337
	338	This exception will be thrown when a video is requested
	339	in a format that is not available for that video.
	340	"""
	341	pass
	342
	343
	344	class ContentTooShortError(Exception):
	345	"""Content Too Short exception.
	346
	347	This exception may be raised by FileDownloader objects when a file they
	348	download is too small for what the server announced first, indicating
	349	the connection was probably interrupted.
	350	"""
	351	# Both in bytes
	352	downloaded = None
	353	expected = None
	354
	355	def __init__(self, downloaded, expected):
	356	self.downloaded = downloaded
	357	self.expected = expected
	358
	359
0b8c922d FV	360	class Trouble(Exception):
0b8c922d FV	361	"""Trouble helper exception
dffe658b	362
0b8c922d FV	363	This is an exception to be handled with
	364	FileDownloader.trouble
	365	"""
	366
01ba00ca	367	class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
d77c3dfd FV	368	"""Handler for HTTP requests and responses.
	369
	370	This class, when installed with an OpenerDirector, automatically adds
	371	the standard headers to every HTTP request and handles gzipped and
	372	deflated responses from web servers. If compression is to be avoided in
	373	a particular request, the original request in the program code only has
	374	to include the HTTP header "Youtubedl-No-Compression", which will be
	375	removed before making the real request.
	376
	377	Part of this code was copied from:
	378
	379	http://techknack.net/python-urllib2-handlers/
	380
	381	Andrew Rowls, the author of that code, agreed to release it to the
	382	public domain.
	383	"""
	384
	385	@staticmethod
	386	def deflate(data):
	387	try:
	388	return zlib.decompress(data, -zlib.MAX_WBITS)
	389	except zlib.error:
	390	return zlib.decompress(data)
	391
	392	@staticmethod
	393	def addinfourl_wrapper(stream, headers, url, code):
01ba00ca PH	394	if hasattr(compat_urllib_request.addinfourl, 'getcode'):
	395	return compat_urllib_request.addinfourl(stream, headers, url, code)
	396	ret = compat_urllib_request.addinfourl(stream, headers, url)
d77c3dfd FV	397	ret.code = code
	398	return ret
	399
	400	def http_request(self, req):
	401	for h in std_headers:
	402	if h in req.headers:
	403	del req.headers[h]
	404	req.add_header(h, std_headers[h])
	405	if 'Youtubedl-no-compression' in req.headers:
	406	if 'Accept-encoding' in req.headers:
	407	del req.headers['Accept-encoding']
	408	del req.headers['Youtubedl-no-compression']
	409	return req
	410
	411	def http_response(self, req, resp):
	412	old_resp = resp
	413	# gzip
	414	if resp.headers.get('Content-encoding', '') == 'gzip':
03f9daab	415	gz = gzip.GzipFile(fileobj=io.BytesIO(resp.read()), mode='r')
d77c3dfd FV	416	resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
	417	resp.msg = old_resp.msg
	418	# deflate
	419	if resp.headers.get('Content-encoding', '') == 'deflate':
03f9daab	420	gz = io.BytesIO(self.deflate(resp.read()))
d77c3dfd FV	421	resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
	422	resp.msg = old_resp.msg
	423	return resp