[yt-dlp.git] / youtube_dl / utils.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import gzip
import io
import locale
import os
import re
import sys
import zlib
import email.utils
import json

try:
	import urllib.request as compat_urllib_request
except ImportError: # Python 2
	import urllib2 as compat_urllib_request

try:
	import urllib.error as compat_urllib_error
except ImportError: # Python 2
	import urllib2 as compat_urllib_error

try:
	import urllib.parse as compat_urllib_parse
except ImportError: # Python 2
	import urllib as compat_urllib_parse

try:
	import http.cookiejar as compat_cookiejar
except ImportError: # Python 2
	import cookielib as compat_cookiejar

try:
	import html.entities as compat_html_entities
except ImportError: # Python 2
	import htmlentitydefs as compat_html_entities

try:
	import html.parser as compat_html_parser
except ImportError: # Python 2
	import HTMLParser as compat_html_parser

try:
	import http.client as compat_http_client
except ImportError: # Python 2
	import httplib as compat_http_client

try:
	from urllib.parse import parse_qs as compat_parse_qs
except ImportError: # Python 2
	from urlparse import parse_qs as compat_parse_qs

try:
	compat_str = unicode # Python 2
except NameError:
	compat_str = str

try:
	compat_chr = unichr # Python 2
except NameError:
	compat_chr = chr


std_headers = {
	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
	'Accept-Encoding': 'gzip, deflate',
	'Accept-Language': 'en-us,en;q=0.5',
}
def preferredencoding():
	"""Get preferred encoding.

	Returns the best encoding scheme for the system, based on
	locale.getpreferredencoding() and some further tweaks.
	"""
	try:
		pref = locale.getpreferredencoding()
		u'TEST'.encode(pref)
	except:
		pref = 'UTF-8'

	return pref


def htmlentity_transform(matchobj):
	"""Transforms an HTML entity to a character.

	This function receives a match object and is intended to be used with
	the re.sub() function.
	"""
	entity = matchobj.group(1)

	# Known non-numeric HTML entity
	if entity in compat_html_entities.name2codepoint:
		return compat_chr(compat_html_entities.name2codepoint[entity])

	mobj = re.match(u'(?u)#(x?\\d+)', entity)
	if mobj is not None:
		numstr = mobj.group(1)
		if numstr.startswith(u'x'):
			base = 16
			numstr = u'0%s' % numstr
		else:
			base = 10
		return compat_chr(int(numstr, base))

	# Unknown entity in name, return its literal representation
	return (u'&%s;' % entity)

compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
class IDParser(compat_html_parser.HTMLParser):
	"""Modified HTMLParser that isolates a tag with the specified id"""
	def __init__(self, id):
		self.id = id
		self.result = None
		self.started = False
		self.depth = {}
		self.html = None
		self.watch_startpos = False
		self.error_count = 0
		compat_html_parser.HTMLParser.__init__(self)

	def error(self, message):
		if self.error_count > 10 or self.started:
			raise compat_html_parser.HTMLParseError(message, self.getpos())
		self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
		self.error_count += 1
		self.goahead(1)

	def loads(self, html):
		self.html = html
		self.feed(html)
		self.close()

	def handle_starttag(self, tag, attrs):
		attrs = dict(attrs)
		if self.started:
			self.find_startpos(None)
		if 'id' in attrs and attrs['id'] == self.id:
			self.result = [tag]
			self.started = True
			self.watch_startpos = True
		if self.started:
			if not tag in self.depth: self.depth[tag] = 0
			self.depth[tag] += 1

	def handle_endtag(self, tag):
		if self.started:
			if tag in self.depth: self.depth[tag] -= 1
			if self.depth[self.result[0]] == 0:
				self.started = False
				self.result.append(self.getpos())

	def find_startpos(self, x):
		"""Needed to put the start position of the result (self.result[1])
		after the opening tag with the requested id"""
		if self.watch_startpos:
			self.watch_startpos = False
			self.result.append(self.getpos())
	handle_entityref = handle_charref = handle_data = handle_comment = \
	handle_decl = handle_pi = unknown_decl = find_startpos

	def get_result(self):
		if self.result is None:
			return None
		if len(self.result) != 3:
			return None
		lines = self.html.split('\n')
		lines = lines[self.result[1][0]-1:self.result[2][0]]
		lines[0] = lines[0][self.result[1][1]:]
		if len(lines) == 1:
			lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
		lines[-1] = lines[-1][:self.result[2][1]]
		return '\n'.join(lines).strip()

def get_element_by_id(id, html):
	"""Return the content of the tag with the specified id in the passed HTML document"""
	parser = IDParser(id)
	try:
		parser.loads(html)
	except compat_html_parser.HTMLParseError:
		pass
	return parser.get_result()


def clean_html(html):
	"""Clean an HTML snippet into a readable string"""
	# Newline vs <br />
	html = html.replace('\n', ' ')
	html = re.sub('\s*<\s*br\s*/?\s*>\s*', '\n', html)
	# Strip html tags
	html = re.sub('<.*?>', '', html)
	# Replace html entities
	html = unescapeHTML(html)
	return html


def sanitize_open(filename, open_mode):
	"""Try to open the given filename, and slightly tweak it if this fails.

	Attempts to open the given filename. If this fails, it tries to change
	the filename slightly, step by step, until it's either able to open it
	or it fails and raises a final exception, like the standard open()
	function.

	It returns the tuple (stream, definitive_file_name).
	"""
	try:
		if filename == u'-':
			if sys.platform == 'win32':
				import msvcrt
				msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
			return (sys.stdout, filename)
		stream = open(encodeFilename(filename), open_mode)
		return (stream, filename)
	except (IOError, OSError) as err:
		# In case of error, try to remove win32 forbidden chars
		filename = re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', filename)

		# An exception here should be caught in the caller
		stream = open(encodeFilename(filename), open_mode)
		return (stream, filename)


def timeconvert(timestr):
	"""Convert RFC 2822 defined time string into system timestamp"""
	timestamp = None
	timetuple = email.utils.parsedate_tz(timestr)
	if timetuple is not None:
		timestamp = email.utils.mktime_tz(timetuple)
	return timestamp

def sanitize_filename(s, restricted=False):
	"""Sanitizes a string so it could be used as part of a filename.
	If restricted is set, use a stricter subset of allowed characters.
	"""
	def replace_insane(char):
		if char == '?' or ord(char) < 32 or ord(char) == 127:
			return ''
		elif char == '"':
			return '' if restricted else '\''
		elif char == ':':
			return '_-' if restricted else ' -'
		elif char in '\\/|*<>':
			return '_'
		if restricted and (char in '!&\'' or char.isspace()):
			return '_'
		if restricted and ord(char) > 127:
			return '_'
		return char

	result = u''.join(map(replace_insane, s))
	while '__' in result:
		result = result.replace('__', '_')
	result = result.strip('_')
	# Common case of "Foreign band name - English song title"
	if restricted and result.startswith('-_'):
		result = result[2:]
	if not result:
		result = '_'
	return result

def orderedSet(iterable):
	""" Remove all duplicates from the input iterable """
	res = []
	for el in iterable:
		if el not in res:
			res.append(el)
	return res

def unescapeHTML(s):
	"""
	@param s a string
	"""
	assert type(s) == type(u'')

	result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
	return result

def encodeFilename(s):
	"""
	@param s The name of the file
	"""

	assert type(s) == type(u'')

	if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
		# Pass u'' directly to use Unicode APIs on Windows 2000 and up
		# (Detecting Windows NT 4 is tricky because 'major >= 4' would
		# match Windows 9x series as well. Besides, NT 4 is obsolete.)
		return s
	else:
		return s.encode(sys.getfilesystemencoding(), 'ignore')

class DownloadError(Exception):
	"""Download Error exception.

	This exception may be thrown by FileDownloader objects if they are not
	configured to continue on errors. They will contain the appropriate
	error message.
	"""
	pass


class SameFileError(Exception):
	"""Same File exception.

	This exception will be thrown by FileDownloader objects if they detect
	multiple files would have to be downloaded to the same file on disk.
	"""
	pass


class PostProcessingError(Exception):
	"""Post Processing exception.

	This exception may be raised by PostProcessor's .run() method to
	indicate an error in the postprocessing task.
	"""
	pass

class MaxDownloadsReached(Exception):
	""" --max-downloads limit has been reached. """
	pass


class UnavailableVideoError(Exception):
	"""Unavailable Format exception.

	This exception will be thrown when a video is requested
	in a format that is not available for that video.
	"""
	pass


class ContentTooShortError(Exception):
	"""Content Too Short exception.

	This exception may be raised by FileDownloader objects when a file they
	download is too small for what the server announced first, indicating
	the connection was probably interrupted.
	"""
	# Both in bytes
	downloaded = None
	expected = None

	def __init__(self, downloaded, expected):
		self.downloaded = downloaded
		self.expected = expected


class Trouble(Exception):
	"""Trouble helper exception

	This is an exception to be handled with
	FileDownloader.trouble
	"""

class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
	"""Handler for HTTP requests and responses.

	This class, when installed with an OpenerDirector, automatically adds
	the standard headers to every HTTP request and handles gzipped and
	deflated responses from web servers. If compression is to be avoided in
	a particular request, the original request in the program code only has
	to include the HTTP header "Youtubedl-No-Compression", which will be
	removed before making the real request.

	Part of this code was copied from:

	http://techknack.net/python-urllib2-handlers/

	Andrew Rowls, the author of that code, agreed to release it to the
	public domain.
	"""

	@staticmethod
	def deflate(data):
		try:
			return zlib.decompress(data, -zlib.MAX_WBITS)
		except zlib.error:
			return zlib.decompress(data)

	@staticmethod
	def addinfourl_wrapper(stream, headers, url, code):
		if hasattr(compat_urllib_request.addinfourl, 'getcode'):
			return compat_urllib_request.addinfourl(stream, headers, url, code)
		ret = compat_urllib_request.addinfourl(stream, headers, url)
		ret.code = code
		return ret

	def http_request(self, req):
		for h in std_headers:
			if h in req.headers:
				del req.headers[h]
			req.add_header(h, std_headers[h])
		if 'Youtubedl-no-compression' in req.headers:
			if 'Accept-encoding' in req.headers:
				del req.headers['Accept-encoding']
			del req.headers['Youtubedl-no-compression']
		return req

	def http_response(self, req, resp):
		old_resp = resp
		# gzip
		if resp.headers.get('Content-encoding', '') == 'gzip':
			gz = gzip.GzipFile(fileobj=io.BytesIO(resp.read()), mode='r')
			resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
			resp.msg = old_resp.msg
		# deflate
		if resp.headers.get('Content-encoding', '') == 'deflate':
			gz = io.BytesIO(self.deflate(resp.read()))
			resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
			resp.msg = old_resp.msg
		return resp
Commit	Line	Data
d77c3dfd FV	1	#!/usr/bin/env python
	2	# -- coding: utf-8 --
	3
	4	import gzip
03f9daab	5	import io
d77c3dfd FV	6	import locale
	7	import os
	8	import re
	9	import sys
	10	import zlib
d77c3dfd	11	import email.utils
921a1455	12	import json
d77c3dfd	13
01ba00ca PH	14	try:
	15	import urllib.request as compat_urllib_request
	16	except ImportError: # Python 2
	17	import urllib2 as compat_urllib_request
	18
	19	try:
	20	import urllib.error as compat_urllib_error
	21	except ImportError: # Python 2
	22	import urllib2 as compat_urllib_error
	23
	24	try:
	25	import urllib.parse as compat_urllib_parse
	26	except ImportError: # Python 2
da779b49	27	import urllib as compat_urllib_parse
01ba00ca PH	28
	29	try:
	30	import http.cookiejar as compat_cookiejar
	31	except ImportError: # Python 2
	32	import cookielib as compat_cookiejar
	33
3e669f36 PH	34	try:
3e669f36 PH	35	import html.entities as compat_html_entities
9f37a959	36	except ImportError: # Python 2
3e669f36 PH	37	import htmlentitydefs as compat_html_entities
3e669f36 PH	38
a8156c1d PH	39	try:
a8156c1d PH	40	import html.parser as compat_html_parser
9f37a959	41	except ImportError: # Python 2
a8156c1d PH	42	import HTMLParser as compat_html_parser
a8156c1d PH	43
348d0a7a	44	try:
5bd9cc7a	45	import http.client as compat_http_client
9f37a959	46	except ImportError: # Python 2
5bd9cc7a	47	import httplib as compat_http_client
348d0a7a	48
9f37a959	49	try:
73dce4b2	50	from urllib.parse import parse_qs as compat_parse_qs
9f37a959 PH	51	except ImportError: # Python 2
9f37a959 PH	52	from urlparse import parse_qs as compat_parse_qs
348d0a7a	53
3e669f36 PH	54	try:
	55	compat_str = unicode # Python 2
	56	except NameError:
	57	compat_str = str
	58
	59	try:
	60	compat_chr = unichr # Python 2
	61	except NameError:
	62	compat_chr = chr
	63
	64
	65	std_headers = {
	66	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
	67	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
	68	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8',
	69	'Accept-Encoding': 'gzip, deflate',
	70	'Accept-Language': 'en-us,en;q=0.5',
	71	}
d77c3dfd FV	72	def preferredencoding():
	73	"""Get preferred encoding.
	74
	75	Returns the best encoding scheme for the system, based on
	76	locale.getpreferredencoding() and some further tweaks.
	77	"""
bae611f2 AS	78	try:
	79	pref = locale.getpreferredencoding()
	80	u'TEST'.encode(pref)
	81	except:
	82	pref = 'UTF-8'
	83
	84	return pref
d77c3dfd FV	85
	86
	87	def htmlentity_transform(matchobj):
dd109dee	88	"""Transforms an HTML entity to a character.
d77c3dfd FV	89
	90	This function receives a match object and is intended to be used with
	91	the re.sub() function.
	92	"""
	93	entity = matchobj.group(1)
	94
	95	# Known non-numeric HTML entity
3e669f36 PH	96	if entity in compat_html_entities.name2codepoint:
3e669f36 PH	97	return compat_chr(compat_html_entities.name2codepoint[entity])
d77c3dfd	98
89fb51dd	99	mobj = re.match(u'(?u)#(x?\\d+)', entity)
d77c3dfd FV	100	if mobj is not None:
	101	numstr = mobj.group(1)
	102	if numstr.startswith(u'x'):
	103	base = 16
	104	numstr = u'0%s' % numstr
	105	else:
	106	base = 10
3e669f36	107	return compat_chr(int(numstr, base))
d77c3dfd FV	108
	109	# Unknown entity in name, return its literal representation
	110	return (u'&%s;' % entity)
	111
a8156c1d PH	112	compat_html_parser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_](?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>](?:\s=+\s(?:'[^']'\|"[^"]"\|(?!['"])[^>\s]))?\s))?\s""", re.VERBOSE) # backport bugfix
a8156c1d PH	113	class IDParser(compat_html_parser.HTMLParser):
9e6dd238 FV	114	"""Modified HTMLParser that isolates a tag with the specified id"""
	115	def __init__(self, id):
	116	self.id = id
	117	self.result = None
	118	self.started = False
	119	self.depth = {}
	120	self.html = None
	121	self.watch_startpos = False
9beb5af8	122	self.error_count = 0
a8156c1d	123	compat_html_parser.HTMLParser.__init__(self)
9e6dd238	124
9beb5af8	125	def error(self, message):
9beb5af8	126	if self.error_count > 10 or self.started:
a8156c1d	127	raise compat_html_parser.HTMLParseError(message, self.getpos())
9beb5af8 FV	128	self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
	129	self.error_count += 1
	130	self.goahead(1)
	131
9e6dd238 FV	132	def loads(self, html):
	133	self.html = html
	134	self.feed(html)
	135	self.close()
	136
	137	def handle_starttag(self, tag, attrs):
	138	attrs = dict(attrs)
	139	if self.started:
	140	self.find_startpos(None)
	141	if 'id' in attrs and attrs['id'] == self.id:
	142	self.result = [tag]
	143	self.started = True
	144	self.watch_startpos = True
	145	if self.started:
	146	if not tag in self.depth: self.depth[tag] = 0
	147	self.depth[tag] += 1
	148
	149	def handle_endtag(self, tag):
	150	if self.started:
	151	if tag in self.depth: self.depth[tag] -= 1
	152	if self.depth[self.result[0]] == 0:
	153	self.started = False
	154	self.result.append(self.getpos())
	155
	156	def find_startpos(self, x):
	157	"""Needed to put the start position of the result (self.result[1])
	158	after the opening tag with the requested id"""
	159	if self.watch_startpos:
	160	self.watch_startpos = False
	161	self.result.append(self.getpos())
	162	handle_entityref = handle_charref = handle_data = handle_comment = \
	163	handle_decl = handle_pi = unknown_decl = find_startpos
	164
	165	def get_result(self):
b514df20 PH	166	if self.result is None:
	167	return None
	168	if len(self.result) != 3:
	169	return None
9e6dd238 FV	170	lines = self.html.split('\n')
	171	lines = lines[self.result[1][0]-1:self.result[2][0]]
	172	lines[0] = lines[0][self.result[1][1]:]
	173	if len(lines) == 1:
	174	lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
	175	lines[-1] = lines[-1][:self.result[2][1]]
	176	return '\n'.join(lines).strip()
	177
	178	def get_element_by_id(id, html):
	179	"""Return the content of the tag with the specified id in the passed HTML document"""
	180	parser = IDParser(id)
	181	try:
	182	parser.loads(html)
a8156c1d	183	except compat_html_parser.HTMLParseError:
9e6dd238 FV	184	pass
	185	return parser.get_result()
	186
	187
	188	def clean_html(html):
	189	"""Clean an HTML snippet into a readable string"""
	190	# Newline vs <br />
	191	html = html.replace('\n', ' ')
	192	html = re.sub('\s<\sbr\s/?\s>\s*', '\n', html)
	193	# Strip html tags
	194	html = re.sub('<.*?>', '', html)
	195	# Replace html entities
	196	html = unescapeHTML(html)
	197	return html
	198
	199
d77c3dfd FV	200	def sanitize_open(filename, open_mode):
	201	"""Try to open the given filename, and slightly tweak it if this fails.
	202
	203	Attempts to open the given filename. If this fails, it tries to change
	204	the filename slightly, step by step, until it's either able to open it
	205	or it fails and raises a final exception, like the standard open()
	206	function.
	207
	208	It returns the tuple (stream, definitive_file_name).
	209	"""
	210	try:
	211	if filename == u'-':
	212	if sys.platform == 'win32':
	213	import msvcrt
	214	msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
	215	return (sys.stdout, filename)
	216	stream = open(encodeFilename(filename), open_mode)
	217	return (stream, filename)
e08bee32	218	except (IOError, OSError) as err:
d77c3dfd	219	# In case of error, try to remove win32 forbidden chars
89fb51dd	220	filename = re.sub(u'[/<>:"\\\|\\\\?\\*]', u'#', filename)
d77c3dfd FV	221
	222	# An exception here should be caught in the caller
	223	stream = open(encodeFilename(filename), open_mode)
	224	return (stream, filename)
	225
	226
	227	def timeconvert(timestr):
	228	"""Convert RFC 2822 defined time string into system timestamp"""
	229	timestamp = None
	230	timetuple = email.utils.parsedate_tz(timestr)
	231	if timetuple is not None:
	232	timestamp = email.utils.mktime_tz(timetuple)
	233	return timestamp
1c469a94 PH	234
	235	def sanitize_filename(s, restricted=False):
	236	"""Sanitizes a string so it could be used as part of a filename.
	237	If restricted is set, use a stricter subset of allowed characters.
	238	"""
2c288bda	239	def replace_insane(char):
42cb53fc FV	240	if char == '?' or ord(char) < 32 or ord(char) == 127:
	241	return ''
	242	elif char == '"':
240089e5	243	return '' if restricted else '\''
42cb53fc	244	elif char == ':':
1c469a94	245	return '_-' if restricted else ' -'
42cb53fc	246	elif char in '\\/\|*<>':
56781d3d	247	return '_'
dffe658b	248	if restricted and (char in '!&\'' or char.isspace()):
1c469a94	249	return '_'
56781d3d PH	250	if restricted and ord(char) > 127:
56781d3d PH	251	return '_'
2c288bda	252	return char
42cb53fc FV	253
42cb53fc FV	254	result = u''.join(map(replace_insane, s))
56781d3d PH	255	while '__' in result:
	256	result = result.replace('__', '_')
	257	result = result.strip('_')
46cbda0b PH	258	# Common case of "Foreign band name - English song title"
	259	if restricted and result.startswith('-_'):
	260	result = result[2:]
56781d3d PH	261	if not result:
	262	result = '_'
	263	return result
d77c3dfd FV	264
	265	def orderedSet(iterable):
	266	""" Remove all duplicates from the input iterable """
	267	res = []
	268	for el in iterable:
	269	if el not in res:
	270	res.append(el)
	271	return res
	272
	273	def unescapeHTML(s):
	274	"""
dd109dee	275	@param s a string
d77c3dfd FV	276	"""
	277	assert type(s) == type(u'')
	278
89fb51dd	279	result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
9e6dd238	280	return result
d77c3dfd FV	281
	282	def encodeFilename(s):
	283	"""
dd109dee	284	@param s The name of the file
d77c3dfd FV	285	"""
	286
	287	assert type(s) == type(u'')
	288
9bb8dc8e	289	if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
d77c3dfd FV	290	# Pass u'' directly to use Unicode APIs on Windows 2000 and up
	291	# (Detecting Windows NT 4 is tricky because 'major >= 4' would
	292	# match Windows 9x series as well. Besides, NT 4 is obsolete.)
	293	return s
	294	else:
	295	return s.encode(sys.getfilesystemencoding(), 'ignore')
	296
	297	class DownloadError(Exception):
	298	"""Download Error exception.
	299
	300	This exception may be thrown by FileDownloader objects if they are not
	301	configured to continue on errors. They will contain the appropriate
	302	error message.
	303	"""
	304	pass
	305
	306
	307	class SameFileError(Exception):
	308	"""Same File exception.
	309
	310	This exception will be thrown by FileDownloader objects if they detect
	311	multiple files would have to be downloaded to the same file on disk.
	312	"""
	313	pass
	314
	315
	316	class PostProcessingError(Exception):
	317	"""Post Processing exception.
	318
	319	This exception may be raised by PostProcessor's .run() method to
	320	indicate an error in the postprocessing task.
	321	"""
	322	pass
	323
	324	class MaxDownloadsReached(Exception):
	325	""" --max-downloads limit has been reached. """
	326	pass
	327
	328
	329	class UnavailableVideoError(Exception):
	330	"""Unavailable Format exception.
	331
	332	This exception will be thrown when a video is requested
	333	in a format that is not available for that video.
	334	"""
	335	pass
	336
	337
	338	class ContentTooShortError(Exception):
	339	"""Content Too Short exception.
	340
	341	This exception may be raised by FileDownloader objects when a file they
	342	download is too small for what the server announced first, indicating
	343	the connection was probably interrupted.
	344	"""
	345	# Both in bytes
	346	downloaded = None
	347	expected = None
	348
	349	def __init__(self, downloaded, expected):
	350	self.downloaded = downloaded
	351	self.expected = expected
	352
	353
0b8c922d FV	354	class Trouble(Exception):
0b8c922d FV	355	"""Trouble helper exception
dffe658b	356
0b8c922d FV	357	This is an exception to be handled with
	358	FileDownloader.trouble
	359	"""
	360
01ba00ca	361	class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
d77c3dfd FV	362	"""Handler for HTTP requests and responses.
	363
	364	This class, when installed with an OpenerDirector, automatically adds
	365	the standard headers to every HTTP request and handles gzipped and
	366	deflated responses from web servers. If compression is to be avoided in
	367	a particular request, the original request in the program code only has
	368	to include the HTTP header "Youtubedl-No-Compression", which will be
	369	removed before making the real request.
	370
	371	Part of this code was copied from:
	372
	373	http://techknack.net/python-urllib2-handlers/
	374
	375	Andrew Rowls, the author of that code, agreed to release it to the
	376	public domain.
	377	"""
	378
	379	@staticmethod
	380	def deflate(data):
	381	try:
	382	return zlib.decompress(data, -zlib.MAX_WBITS)
	383	except zlib.error:
	384	return zlib.decompress(data)
	385
	386	@staticmethod
	387	def addinfourl_wrapper(stream, headers, url, code):
01ba00ca PH	388	if hasattr(compat_urllib_request.addinfourl, 'getcode'):
	389	return compat_urllib_request.addinfourl(stream, headers, url, code)
	390	ret = compat_urllib_request.addinfourl(stream, headers, url)
d77c3dfd FV	391	ret.code = code
	392	return ret
	393
	394	def http_request(self, req):
	395	for h in std_headers:
	396	if h in req.headers:
	397	del req.headers[h]
	398	req.add_header(h, std_headers[h])
	399	if 'Youtubedl-no-compression' in req.headers:
	400	if 'Accept-encoding' in req.headers:
	401	del req.headers['Accept-encoding']
	402	del req.headers['Youtubedl-no-compression']
	403	return req
	404
	405	def http_response(self, req, resp):
	406	old_resp = resp
	407	# gzip
	408	if resp.headers.get('Content-encoding', '') == 'gzip':
03f9daab	409	gz = gzip.GzipFile(fileobj=io.BytesIO(resp.read()), mode='r')
d77c3dfd FV	410	resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
	411	resp.msg = old_resp.msg
	412	# deflate
	413	if resp.headers.get('Content-encoding', '') == 'deflate':
03f9daab	414	gz = io.BytesIO(self.deflate(resp.read()))
d77c3dfd FV	415	resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
	416	resp.msg = old_resp.msg
	417	return resp