[yt-dlp.git] / youtube_dl / utils.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import gzip
import htmlentitydefs
import HTMLParser
import locale
import os
import re
import sys
import zlib
import email.utils
import json

try:
	import cStringIO as StringIO
except ImportError:
	import StringIO

std_headers = {
	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
	'Accept-Encoding': 'gzip, deflate',
	'Accept-Language': 'en-us,en;q=0.5',
}

try:
	compat_str = unicode # Python 2
except NameError:
	compat_str = str

try:
	import urllib.request as compat_urllib_request
except ImportError: # Python 2
	import urllib2 as compat_urllib_request

try:
	import urllib.error as compat_urllib_error
except ImportError: # Python 2
	import urllib2 as compat_urllib_error

try:
	import urllib.parse as compat_urllib_parse
except ImportError: # Python 2
	import urllib as compat_urllib_parse

try:
	import http.cookiejar as compat_cookiejar
except ImportError: # Python 2
	import cookielib as compat_cookiejar

def preferredencoding():
	"""Get preferred encoding.

	Returns the best encoding scheme for the system, based on
	locale.getpreferredencoding() and some further tweaks.
	"""
	try:
		pref = locale.getpreferredencoding()
		u'TEST'.encode(pref)
	except:
		pref = 'UTF-8'

	return pref


def htmlentity_transform(matchobj):
	"""Transforms an HTML entity to a character.

	This function receives a match object and is intended to be used with
	the re.sub() function.
	"""
	entity = matchobj.group(1)

	# Known non-numeric HTML entity
	if entity in htmlentitydefs.name2codepoint:
		return unichr(htmlentitydefs.name2codepoint[entity])

	mobj = re.match(u'(?u)#(x?\\d+)', entity)
	if mobj is not None:
		numstr = mobj.group(1)
		if numstr.startswith(u'x'):
			base = 16
			numstr = u'0%s' % numstr
		else:
			base = 10
		return unichr(int(numstr, base))

	# Unknown entity in name, return its literal representation
	return (u'&%s;' % entity)

HTMLParser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_]*(?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>]*(?:\s*=+\s*(?:'[^']*'|"[^"]*"|(?!['"])[^>\s]*))?\s*)*)?\s*""", re.VERBOSE) # backport bugfix
class IDParser(HTMLParser.HTMLParser):
	"""Modified HTMLParser that isolates a tag with the specified id"""
	def __init__(self, id):
		self.id = id
		self.result = None
		self.started = False
		self.depth = {}
		self.html = None
		self.watch_startpos = False
		self.error_count = 0
		HTMLParser.HTMLParser.__init__(self)

	def error(self, message):
		if self.error_count > 10 or self.started:
			raise HTMLParser.HTMLParseError(message, self.getpos())
		self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
		self.error_count += 1
		self.goahead(1)

	def loads(self, html):
		self.html = html
		self.feed(html)
		self.close()

	def handle_starttag(self, tag, attrs):
		attrs = dict(attrs)
		if self.started:
			self.find_startpos(None)
		if 'id' in attrs and attrs['id'] == self.id:
			self.result = [tag]
			self.started = True
			self.watch_startpos = True
		if self.started:
			if not tag in self.depth: self.depth[tag] = 0
			self.depth[tag] += 1

	def handle_endtag(self, tag):
		if self.started:
			if tag in self.depth: self.depth[tag] -= 1
			if self.depth[self.result[0]] == 0:
				self.started = False
				self.result.append(self.getpos())

	def find_startpos(self, x):
		"""Needed to put the start position of the result (self.result[1])
		after the opening tag with the requested id"""
		if self.watch_startpos:
			self.watch_startpos = False
			self.result.append(self.getpos())
	handle_entityref = handle_charref = handle_data = handle_comment = \
	handle_decl = handle_pi = unknown_decl = find_startpos

	def get_result(self):
		if self.result is None:
			return None
		if len(self.result) != 3:
			return None
		lines = self.html.split('\n')
		lines = lines[self.result[1][0]-1:self.result[2][0]]
		lines[0] = lines[0][self.result[1][1]:]
		if len(lines) == 1:
			lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
		lines[-1] = lines[-1][:self.result[2][1]]
		return '\n'.join(lines).strip()

def get_element_by_id(id, html):
	"""Return the content of the tag with the specified id in the passed HTML document"""
	parser = IDParser(id)
	try:
		parser.loads(html)
	except HTMLParser.HTMLParseError:
		pass
	return parser.get_result()


def clean_html(html):
	"""Clean an HTML snippet into a readable string"""
	# Newline vs <br />
	html = html.replace('\n', ' ')
	html = re.sub('\s*<\s*br\s*/?\s*>\s*', '\n', html)
	# Strip html tags
	html = re.sub('<.*?>', '', html)
	# Replace html entities
	html = unescapeHTML(html)
	return html


def sanitize_open(filename, open_mode):
	"""Try to open the given filename, and slightly tweak it if this fails.

	Attempts to open the given filename. If this fails, it tries to change
	the filename slightly, step by step, until it's either able to open it
	or it fails and raises a final exception, like the standard open()
	function.

	It returns the tuple (stream, definitive_file_name).
	"""
	try:
		if filename == u'-':
			if sys.platform == 'win32':
				import msvcrt
				msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
			return (sys.stdout, filename)
		stream = open(encodeFilename(filename), open_mode)
		return (stream, filename)
	except (IOError, OSError) as err:
		# In case of error, try to remove win32 forbidden chars
		filename = re.sub(u'[/<>:"\\|\\\\?\\*]', u'#', filename)

		# An exception here should be caught in the caller
		stream = open(encodeFilename(filename), open_mode)
		return (stream, filename)


def timeconvert(timestr):
	"""Convert RFC 2822 defined time string into system timestamp"""
	timestamp = None
	timetuple = email.utils.parsedate_tz(timestr)
	if timetuple is not None:
		timestamp = email.utils.mktime_tz(timetuple)
	return timestamp

def sanitize_filename(s, restricted=False):
	"""Sanitizes a string so it could be used as part of a filename.
	If restricted is set, use a stricter subset of allowed characters.
	"""
	def replace_insane(char):
		if char == '?' or ord(char) < 32 or ord(char) == 127:
			return ''
		elif char == '"':
			return '' if restricted else '\''
		elif char == ':':
			return '_-' if restricted else ' -'
		elif char in '\\/|*<>':
			return '_'
		if restricted and (char in '!&\'' or char.isspace()):
			return '_'
		if restricted and ord(char) > 127:
			return '_'
		return char

	result = u''.join(map(replace_insane, s))
	while '__' in result:
		result = result.replace('__', '_')
	result = result.strip('_')
	# Common case of "Foreign band name - English song title"
	if restricted and result.startswith('-_'):
		result = result[2:]
	if not result:
		result = '_'
	return result

def orderedSet(iterable):
	""" Remove all duplicates from the input iterable """
	res = []
	for el in iterable:
		if el not in res:
			res.append(el)
	return res

def unescapeHTML(s):
	"""
	@param s a string
	"""
	assert type(s) == type(u'')

	result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
	return result

def encodeFilename(s):
	"""
	@param s The name of the file
	"""

	assert type(s) == type(u'')

	if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
		# Pass u'' directly to use Unicode APIs on Windows 2000 and up
		# (Detecting Windows NT 4 is tricky because 'major >= 4' would
		# match Windows 9x series as well. Besides, NT 4 is obsolete.)
		return s
	else:
		return s.encode(sys.getfilesystemencoding(), 'ignore')

class DownloadError(Exception):
	"""Download Error exception.

	This exception may be thrown by FileDownloader objects if they are not
	configured to continue on errors. They will contain the appropriate
	error message.
	"""
	pass


class SameFileError(Exception):
	"""Same File exception.

	This exception will be thrown by FileDownloader objects if they detect
	multiple files would have to be downloaded to the same file on disk.
	"""
	pass


class PostProcessingError(Exception):
	"""Post Processing exception.

	This exception may be raised by PostProcessor's .run() method to
	indicate an error in the postprocessing task.
	"""
	pass

class MaxDownloadsReached(Exception):
	""" --max-downloads limit has been reached. """
	pass


class UnavailableVideoError(Exception):
	"""Unavailable Format exception.

	This exception will be thrown when a video is requested
	in a format that is not available for that video.
	"""
	pass


class ContentTooShortError(Exception):
	"""Content Too Short exception.

	This exception may be raised by FileDownloader objects when a file they
	download is too small for what the server announced first, indicating
	the connection was probably interrupted.
	"""
	# Both in bytes
	downloaded = None
	expected = None

	def __init__(self, downloaded, expected):
		self.downloaded = downloaded
		self.expected = expected


class Trouble(Exception):
	"""Trouble helper exception

	This is an exception to be handled with
	FileDownloader.trouble
	"""

class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
	"""Handler for HTTP requests and responses.

	This class, when installed with an OpenerDirector, automatically adds
	the standard headers to every HTTP request and handles gzipped and
	deflated responses from web servers. If compression is to be avoided in
	a particular request, the original request in the program code only has
	to include the HTTP header "Youtubedl-No-Compression", which will be
	removed before making the real request.

	Part of this code was copied from:

	http://techknack.net/python-urllib2-handlers/

	Andrew Rowls, the author of that code, agreed to release it to the
	public domain.
	"""

	@staticmethod
	def deflate(data):
		try:
			return zlib.decompress(data, -zlib.MAX_WBITS)
		except zlib.error:
			return zlib.decompress(data)

	@staticmethod
	def addinfourl_wrapper(stream, headers, url, code):
		if hasattr(compat_urllib_request.addinfourl, 'getcode'):
			return compat_urllib_request.addinfourl(stream, headers, url, code)
		ret = compat_urllib_request.addinfourl(stream, headers, url)
		ret.code = code
		return ret

	def http_request(self, req):
		for h in std_headers:
			if h in req.headers:
				del req.headers[h]
			req.add_header(h, std_headers[h])
		if 'Youtubedl-no-compression' in req.headers:
			if 'Accept-encoding' in req.headers:
				del req.headers['Accept-encoding']
			del req.headers['Youtubedl-no-compression']
		return req

	def http_response(self, req, resp):
		old_resp = resp
		# gzip
		if resp.headers.get('Content-encoding', '') == 'gzip':
			gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
			resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
			resp.msg = old_resp.msg
		# deflate
		if resp.headers.get('Content-encoding', '') == 'deflate':
			gz = StringIO.StringIO(self.deflate(resp.read()))
			resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
			resp.msg = old_resp.msg
		return resp
Commit	Line	Data
d77c3dfd FV	1	#!/usr/bin/env python
	2	# -- coding: utf-8 --
	3
	4	import gzip
	5	import htmlentitydefs
	6	import HTMLParser
	7	import locale
	8	import os
	9	import re
	10	import sys
	11	import zlib
d77c3dfd	12	import email.utils
921a1455	13	import json
d77c3dfd FV	14
	15	try:
	16	import cStringIO as StringIO
	17	except ImportError:
	18	import StringIO
	19
	20	std_headers = {
38612b4e	21	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0',
d77c3dfd FV	22	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
	23	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8',
	24	'Accept-Encoding': 'gzip, deflate',
	25	'Accept-Language': 'en-us,en;q=0.5',
	26	}
	27
92b91c18	28	try:
96731798	29	compat_str = unicode # Python 2
92b91c18	30	except NameError:
96731798	31	compat_str = str
92b91c18	32
01ba00ca PH	33	try:
	34	import urllib.request as compat_urllib_request
	35	except ImportError: # Python 2
	36	import urllib2 as compat_urllib_request
	37
	38	try:
	39	import urllib.error as compat_urllib_error
	40	except ImportError: # Python 2
	41	import urllib2 as compat_urllib_error
	42
	43	try:
	44	import urllib.parse as compat_urllib_parse
	45	except ImportError: # Python 2
da779b49	46	import urllib as compat_urllib_parse
01ba00ca PH	47
	48	try:
	49	import http.cookiejar as compat_cookiejar
	50	except ImportError: # Python 2
	51	import cookielib as compat_cookiejar
	52
d77c3dfd FV	53	def preferredencoding():
	54	"""Get preferred encoding.
	55
	56	Returns the best encoding scheme for the system, based on
	57	locale.getpreferredencoding() and some further tweaks.
	58	"""
bae611f2 AS	59	try:
	60	pref = locale.getpreferredencoding()
	61	u'TEST'.encode(pref)
	62	except:
	63	pref = 'UTF-8'
	64
	65	return pref
d77c3dfd FV	66
	67
	68	def htmlentity_transform(matchobj):
dd109dee	69	"""Transforms an HTML entity to a character.
d77c3dfd FV	70
	71	This function receives a match object and is intended to be used with
	72	the re.sub() function.
	73	"""
	74	entity = matchobj.group(1)
	75
	76	# Known non-numeric HTML entity
	77	if entity in htmlentitydefs.name2codepoint:
	78	return unichr(htmlentitydefs.name2codepoint[entity])
	79
89fb51dd	80	mobj = re.match(u'(?u)#(x?\\d+)', entity)
d77c3dfd FV	81	if mobj is not None:
	82	numstr = mobj.group(1)
	83	if numstr.startswith(u'x'):
	84	base = 16
	85	numstr = u'0%s' % numstr
	86	else:
	87	base = 10
dd109dee	88	return unichr(int(numstr, base))
d77c3dfd FV	89
	90	# Unknown entity in name, return its literal representation
	91	return (u'&%s;' % entity)
	92
9beb5af8	93	HTMLParser.locatestarttagend = re.compile(r"""<[a-zA-Z][-.a-zA-Z0-9:_](?:\s+(?:(?<=['"\s])[^\s/>][^\s/=>](?:\s=+\s(?:'[^']'\|"[^"]"\|(?!['"])[^>\s]))?\s))?\s""", re.VERBOSE) # backport bugfix
9e6dd238 FV	94	class IDParser(HTMLParser.HTMLParser):
	95	"""Modified HTMLParser that isolates a tag with the specified id"""
	96	def __init__(self, id):
	97	self.id = id
	98	self.result = None
	99	self.started = False
	100	self.depth = {}
	101	self.html = None
	102	self.watch_startpos = False
9beb5af8	103	self.error_count = 0
9e6dd238 FV	104	HTMLParser.HTMLParser.__init__(self)
9e6dd238 FV	105
9beb5af8	106	def error(self, message):
9beb5af8 FV	107	if self.error_count > 10 or self.started:
	108	raise HTMLParser.HTMLParseError(message, self.getpos())
	109	self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line
	110	self.error_count += 1
	111	self.goahead(1)
	112
9e6dd238 FV	113	def loads(self, html):
	114	self.html = html
	115	self.feed(html)
	116	self.close()
	117
	118	def handle_starttag(self, tag, attrs):
	119	attrs = dict(attrs)
	120	if self.started:
	121	self.find_startpos(None)
	122	if 'id' in attrs and attrs['id'] == self.id:
	123	self.result = [tag]
	124	self.started = True
	125	self.watch_startpos = True
	126	if self.started:
	127	if not tag in self.depth: self.depth[tag] = 0
	128	self.depth[tag] += 1
	129
	130	def handle_endtag(self, tag):
	131	if self.started:
	132	if tag in self.depth: self.depth[tag] -= 1
	133	if self.depth[self.result[0]] == 0:
	134	self.started = False
	135	self.result.append(self.getpos())
	136
	137	def find_startpos(self, x):
	138	"""Needed to put the start position of the result (self.result[1])
	139	after the opening tag with the requested id"""
	140	if self.watch_startpos:
	141	self.watch_startpos = False
	142	self.result.append(self.getpos())
	143	handle_entityref = handle_charref = handle_data = handle_comment = \
	144	handle_decl = handle_pi = unknown_decl = find_startpos
	145
	146	def get_result(self):
b514df20 PH	147	if self.result is None:
	148	return None
	149	if len(self.result) != 3:
	150	return None
9e6dd238 FV	151	lines = self.html.split('\n')
	152	lines = lines[self.result[1][0]-1:self.result[2][0]]
	153	lines[0] = lines[0][self.result[1][1]:]
	154	if len(lines) == 1:
	155	lines[-1] = lines[-1][:self.result[2][1]-self.result[1][1]]
	156	lines[-1] = lines[-1][:self.result[2][1]]
	157	return '\n'.join(lines).strip()
	158
	159	def get_element_by_id(id, html):
	160	"""Return the content of the tag with the specified id in the passed HTML document"""
	161	parser = IDParser(id)
	162	try:
	163	parser.loads(html)
	164	except HTMLParser.HTMLParseError:
	165	pass
	166	return parser.get_result()
	167
	168
	169	def clean_html(html):
	170	"""Clean an HTML snippet into a readable string"""
	171	# Newline vs <br />
	172	html = html.replace('\n', ' ')
	173	html = re.sub('\s<\sbr\s/?\s>\s*', '\n', html)
	174	# Strip html tags
	175	html = re.sub('<.*?>', '', html)
	176	# Replace html entities
	177	html = unescapeHTML(html)
	178	return html
	179
	180
d77c3dfd FV	181	def sanitize_open(filename, open_mode):
	182	"""Try to open the given filename, and slightly tweak it if this fails.
	183
	184	Attempts to open the given filename. If this fails, it tries to change
	185	the filename slightly, step by step, until it's either able to open it
	186	or it fails and raises a final exception, like the standard open()
	187	function.
	188
	189	It returns the tuple (stream, definitive_file_name).
	190	"""
	191	try:
	192	if filename == u'-':
	193	if sys.platform == 'win32':
	194	import msvcrt
	195	msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
	196	return (sys.stdout, filename)
	197	stream = open(encodeFilename(filename), open_mode)
	198	return (stream, filename)
e08bee32	199	except (IOError, OSError) as err:
d77c3dfd	200	# In case of error, try to remove win32 forbidden chars
89fb51dd	201	filename = re.sub(u'[/<>:"\\\|\\\\?\\*]', u'#', filename)
d77c3dfd FV	202
	203	# An exception here should be caught in the caller
	204	stream = open(encodeFilename(filename), open_mode)
	205	return (stream, filename)
	206
	207
	208	def timeconvert(timestr):
	209	"""Convert RFC 2822 defined time string into system timestamp"""
	210	timestamp = None
	211	timetuple = email.utils.parsedate_tz(timestr)
	212	if timetuple is not None:
	213	timestamp = email.utils.mktime_tz(timetuple)
	214	return timestamp
1c469a94 PH	215
	216	def sanitize_filename(s, restricted=False):
	217	"""Sanitizes a string so it could be used as part of a filename.
	218	If restricted is set, use a stricter subset of allowed characters.
	219	"""
2c288bda	220	def replace_insane(char):
42cb53fc FV	221	if char == '?' or ord(char) < 32 or ord(char) == 127:
	222	return ''
	223	elif char == '"':
240089e5	224	return '' if restricted else '\''
42cb53fc	225	elif char == ':':
1c469a94	226	return '_-' if restricted else ' -'
42cb53fc	227	elif char in '\\/\|*<>':
56781d3d	228	return '_'
dffe658b	229	if restricted and (char in '!&\'' or char.isspace()):
1c469a94	230	return '_'
56781d3d PH	231	if restricted and ord(char) > 127:
56781d3d PH	232	return '_'
2c288bda	233	return char
42cb53fc FV	234
42cb53fc FV	235	result = u''.join(map(replace_insane, s))
56781d3d PH	236	while '__' in result:
	237	result = result.replace('__', '_')
	238	result = result.strip('_')
46cbda0b PH	239	# Common case of "Foreign band name - English song title"
	240	if restricted and result.startswith('-_'):
	241	result = result[2:]
56781d3d PH	242	if not result:
	243	result = '_'
	244	return result
d77c3dfd FV	245
	246	def orderedSet(iterable):
	247	""" Remove all duplicates from the input iterable """
	248	res = []
	249	for el in iterable:
	250	if el not in res:
	251	res.append(el)
	252	return res
	253
	254	def unescapeHTML(s):
	255	"""
dd109dee	256	@param s a string
d77c3dfd FV	257	"""
	258	assert type(s) == type(u'')
	259
89fb51dd	260	result = re.sub(u'(?u)&(.+?);', htmlentity_transform, s)
9e6dd238	261	return result
d77c3dfd FV	262
	263	def encodeFilename(s):
	264	"""
dd109dee	265	@param s The name of the file
d77c3dfd FV	266	"""
	267
	268	assert type(s) == type(u'')
	269
9bb8dc8e	270	if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
d77c3dfd FV	271	# Pass u'' directly to use Unicode APIs on Windows 2000 and up
	272	# (Detecting Windows NT 4 is tricky because 'major >= 4' would
	273	# match Windows 9x series as well. Besides, NT 4 is obsolete.)
	274	return s
	275	else:
	276	return s.encode(sys.getfilesystemencoding(), 'ignore')
	277
	278	class DownloadError(Exception):
	279	"""Download Error exception.
	280
	281	This exception may be thrown by FileDownloader objects if they are not
	282	configured to continue on errors. They will contain the appropriate
	283	error message.
	284	"""
	285	pass
	286
	287
	288	class SameFileError(Exception):
	289	"""Same File exception.
	290
	291	This exception will be thrown by FileDownloader objects if they detect
	292	multiple files would have to be downloaded to the same file on disk.
	293	"""
	294	pass
	295
	296
	297	class PostProcessingError(Exception):
	298	"""Post Processing exception.
	299
	300	This exception may be raised by PostProcessor's .run() method to
	301	indicate an error in the postprocessing task.
	302	"""
	303	pass
	304
	305	class MaxDownloadsReached(Exception):
	306	""" --max-downloads limit has been reached. """
	307	pass
	308
	309
	310	class UnavailableVideoError(Exception):
	311	"""Unavailable Format exception.
	312
	313	This exception will be thrown when a video is requested
	314	in a format that is not available for that video.
	315	"""
	316	pass
	317
	318
	319	class ContentTooShortError(Exception):
	320	"""Content Too Short exception.
	321
	322	This exception may be raised by FileDownloader objects when a file they
	323	download is too small for what the server announced first, indicating
	324	the connection was probably interrupted.
	325	"""
	326	# Both in bytes
	327	downloaded = None
	328	expected = None
	329
	330	def __init__(self, downloaded, expected):
	331	self.downloaded = downloaded
	332	self.expected = expected
	333
	334
0b8c922d FV	335	class Trouble(Exception):
0b8c922d FV	336	"""Trouble helper exception
dffe658b	337
0b8c922d FV	338	This is an exception to be handled with
	339	FileDownloader.trouble
	340	"""
	341
01ba00ca	342	class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
d77c3dfd FV	343	"""Handler for HTTP requests and responses.
	344
	345	This class, when installed with an OpenerDirector, automatically adds
	346	the standard headers to every HTTP request and handles gzipped and
	347	deflated responses from web servers. If compression is to be avoided in
	348	a particular request, the original request in the program code only has
	349	to include the HTTP header "Youtubedl-No-Compression", which will be
	350	removed before making the real request.
	351
	352	Part of this code was copied from:
	353
	354	http://techknack.net/python-urllib2-handlers/
	355
	356	Andrew Rowls, the author of that code, agreed to release it to the
	357	public domain.
	358	"""
	359
	360	@staticmethod
	361	def deflate(data):
	362	try:
	363	return zlib.decompress(data, -zlib.MAX_WBITS)
	364	except zlib.error:
	365	return zlib.decompress(data)
	366
	367	@staticmethod
	368	def addinfourl_wrapper(stream, headers, url, code):
01ba00ca PH	369	if hasattr(compat_urllib_request.addinfourl, 'getcode'):
	370	return compat_urllib_request.addinfourl(stream, headers, url, code)
	371	ret = compat_urllib_request.addinfourl(stream, headers, url)
d77c3dfd FV	372	ret.code = code
	373	return ret
	374
	375	def http_request(self, req):
	376	for h in std_headers:
	377	if h in req.headers:
	378	del req.headers[h]
	379	req.add_header(h, std_headers[h])
	380	if 'Youtubedl-no-compression' in req.headers:
	381	if 'Accept-encoding' in req.headers:
	382	del req.headers['Accept-encoding']
	383	del req.headers['Youtubedl-no-compression']
	384	return req
	385
	386	def http_response(self, req, resp):
	387	old_resp = resp
	388	# gzip
	389	if resp.headers.get('Content-encoding', '') == 'gzip':
	390	gz = gzip.GzipFile(fileobj=StringIO.StringIO(resp.read()), mode='r')
	391	resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
	392	resp.msg = old_resp.msg
	393	# deflate
	394	if resp.headers.get('Content-encoding', '') == 'deflate':
	395	gz = StringIO.StringIO(self.deflate(resp.read()))
	396	resp = self.addinfourl_wrapper(gz, old_resp.headers, old_resp.url, old_resp.code)
	397	resp.msg = old_resp.msg
	398	return resp