jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	#!/usr/bin/env python3
	2	# coding: utf-8
	3
	4	from __future__ import unicode_literals
	5
	6	import base64
	7	import binascii
	8	import calendar
	9	import codecs
	10	import collections
	11	import contextlib
	12	import ctypes
	13	import datetime
	14	import email.utils
	15	import email.header
	16	import errno
	17	import functools
	18	import gzip
	19	import hashlib
	20	import hmac
	21	import importlib.util
	22	import io
	23	import itertools
	24	import json
	25	import locale
	26	import math
	27	import operator
	28	import os
	29	import platform
	30	import random
	31	import re
	32	import socket
	33	import ssl
	34	import subprocess
	35	import sys
	36	import tempfile
	37	import time
	38	import traceback
	39	import xml.etree.ElementTree
	40	import zlib
	41	import mimetypes
	42
	43	from .compat import (
	44	compat_HTMLParseError,
	45	compat_HTMLParser,
	46	compat_HTTPError,
	47	compat_basestring,
	48	compat_chr,
	49	compat_cookiejar,
	50	compat_ctypes_WINFUNCTYPE,
	51	compat_etree_fromstring,
	52	compat_expanduser,
	53	compat_html_entities,
	54	compat_html_entities_html5,
	55	compat_http_client,
	56	compat_integer_types,
	57	compat_numeric_types,
	58	compat_kwargs,
	59	compat_os_name,
	60	compat_parse_qs,
	61	compat_shlex_split,
	62	compat_shlex_quote,
	63	compat_str,
	64	compat_struct_pack,
	65	compat_struct_unpack,
	66	compat_urllib_error,
	67	compat_urllib_parse,
	68	compat_urllib_parse_urlencode,
	69	compat_urllib_parse_urlparse,
	70	compat_urllib_parse_urlunparse,
	71	compat_urllib_parse_quote,
	72	compat_urllib_parse_quote_plus,
	73	compat_urllib_parse_unquote_plus,
	74	compat_urllib_request,
	75	compat_urlparse,
	76	compat_xpath,
	77	)
	78
	79	from .socks import (
	80	ProxyType,
	81	sockssocket,
	82	)
	83
	84
	85	def register_socks_protocols():
	86	# "Register" SOCKS protocols
	87	# In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
	88	# URLs with protocols not in urlparse.uses_netloc are not handled correctly
	89	for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
	90	if scheme not in compat_urlparse.uses_netloc:
	91	compat_urlparse.uses_netloc.append(scheme)
	92
	93
	94	# This is not clearly defined otherwise
	95	compiled_regex_type = type(re.compile(''))
	96
	97
	98	def random_user_agent():
	99	_USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
	100	_CHROME_VERSIONS = (
	101	'90.0.4430.212',
	102	'90.0.4430.24',
	103	'90.0.4430.70',
	104	'90.0.4430.72',
	105	'90.0.4430.85',
	106	'90.0.4430.93',
	107	'91.0.4472.101',
	108	'91.0.4472.106',
	109	'91.0.4472.114',
	110	'91.0.4472.124',
	111	'91.0.4472.164',
	112	'91.0.4472.19',
	113	'91.0.4472.77',
	114	'92.0.4515.107',
	115	'92.0.4515.115',
	116	'92.0.4515.131',
	117	'92.0.4515.159',
	118	'92.0.4515.43',
	119	'93.0.4556.0',
	120	'93.0.4577.15',
	121	'93.0.4577.63',
	122	'93.0.4577.82',
	123	'94.0.4606.41',
	124	'94.0.4606.54',
	125	'94.0.4606.61',
	126	'94.0.4606.71',
	127	'94.0.4606.81',
	128	'94.0.4606.85',
	129	'95.0.4638.17',
	130	'95.0.4638.50',
	131	'95.0.4638.54',
	132	'95.0.4638.69',
	133	'95.0.4638.74',
	134	'96.0.4664.18',
	135	'96.0.4664.45',
	136	'96.0.4664.55',
	137	'96.0.4664.93',
	138	'97.0.4692.20',
	139	)
	140	return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
	141
	142
	143	std_headers = {
	144	'User-Agent': random_user_agent(),
	145	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8',
	146	'Accept-Encoding': 'gzip, deflate',
	147	'Accept-Language': 'en-us,en;q=0.5',
	148	'Sec-Fetch-Mode': 'navigate',
	149	}
	150
	151
	152	USER_AGENTS = {
	153	'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
	154	}
	155
	156
	157	NO_DEFAULT = object()
	158
	159	ENGLISH_MONTH_NAMES = [
	160	'January', 'February', 'March', 'April', 'May', 'June',
	161	'July', 'August', 'September', 'October', 'November', 'December']
	162
	163	MONTH_NAMES = {
	164	'en': ENGLISH_MONTH_NAMES,
	165	'fr': [
	166	'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
	167	'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
	168	}
	169
	170	KNOWN_EXTENSIONS = (
	171	'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
	172	'flv', 'f4v', 'f4a', 'f4b',
	173	'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
	174	'mkv', 'mka', 'mk3d',
	175	'avi', 'divx',
	176	'mov',
	177	'asf', 'wmv', 'wma',
	178	'3gp', '3g2',
	179	'mp3',
	180	'flac',
	181	'ape',
	182	'wav',
	183	'f4f', 'f4m', 'm3u8', 'smil')
	184
	185	# needed for sanitizing filenames in restricted mode
	186	ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
	187	itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
	188	'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
	189
	190	DATE_FORMATS = (
	191	'%d %B %Y',
	192	'%d %b %Y',
	193	'%B %d %Y',
	194	'%B %dst %Y',
	195	'%B %dnd %Y',
	196	'%B %drd %Y',
	197	'%B %dth %Y',
	198	'%b %d %Y',
	199	'%b %dst %Y',
	200	'%b %dnd %Y',
	201	'%b %drd %Y',
	202	'%b %dth %Y',
	203	'%b %dst %Y %I:%M',
	204	'%b %dnd %Y %I:%M',
	205	'%b %drd %Y %I:%M',
	206	'%b %dth %Y %I:%M',
	207	'%Y %m %d',
	208	'%Y-%m-%d',
	209	'%Y.%m.%d.',
	210	'%Y/%m/%d',
	211	'%Y/%m/%d %H:%M',
	212	'%Y/%m/%d %H:%M:%S',
	213	'%Y%m%d%H%M',
	214	'%Y%m%d%H%M%S',
	215	'%Y%m%d',
	216	'%Y-%m-%d %H:%M',
	217	'%Y-%m-%d %H:%M:%S',
	218	'%Y-%m-%d %H:%M:%S.%f',
	219	'%Y-%m-%d %H:%M:%S:%f',
	220	'%d.%m.%Y %H:%M',
	221	'%d.%m.%Y %H.%M',
	222	'%Y-%m-%dT%H:%M:%SZ',
	223	'%Y-%m-%dT%H:%M:%S.%fZ',
	224	'%Y-%m-%dT%H:%M:%S.%f0Z',
	225	'%Y-%m-%dT%H:%M:%S',
	226	'%Y-%m-%dT%H:%M:%S.%f',
	227	'%Y-%m-%dT%H:%M',
	228	'%b %d %Y at %H:%M',
	229	'%b %d %Y at %H:%M:%S',
	230	'%B %d %Y at %H:%M',
	231	'%B %d %Y at %H:%M:%S',
	232	'%H:%M %d-%b-%Y',
	233	)
	234
	235	DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
	236	DATE_FORMATS_DAY_FIRST.extend([
	237	'%d-%m-%Y',
	238	'%d.%m.%Y',
	239	'%d.%m.%y',
	240	'%d/%m/%Y',
	241	'%d/%m/%y',
	242	'%d/%m/%Y %H:%M:%S',
	243	])
	244
	245	DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
	246	DATE_FORMATS_MONTH_FIRST.extend([
	247	'%m-%d-%Y',
	248	'%m.%d.%Y',
	249	'%m/%d/%Y',
	250	'%m/%d/%y',
	251	'%m/%d/%Y %H:%M:%S',
	252	])
	253
	254	PACKED_CODES_RE = r"}$'(.+)',(\d+),(\d+),'([^']+)'\.split\('\\|'$"
	255	JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
	256
	257
	258	def preferredencoding():
	259	"""Get preferred encoding.
	260
	261	Returns the best encoding scheme for the system, based on
	262	locale.getpreferredencoding() and some further tweaks.
	263	"""
	264	try:
	265	pref = locale.getpreferredencoding()
	266	'TEST'.encode(pref)
	267	except Exception:
	268	pref = 'UTF-8'
	269
	270	return pref
	271
	272
	273	def write_json_file(obj, fn):
	274	""" Encode obj as JSON and write it to fn, atomically if possible """
	275
	276	fn = encodeFilename(fn)
	277	if sys.version_info < (3, 0) and sys.platform != 'win32':
	278	encoding = get_filesystem_encoding()
	279	# os.path.basename returns a bytes object, but NamedTemporaryFile
	280	# will fail if the filename contains non ascii characters unless we
	281	# use a unicode object
	282	path_basename = lambda f: os.path.basename(fn).decode(encoding)
	283	# the same for os.path.dirname
	284	path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
	285	else:
	286	path_basename = os.path.basename
	287	path_dirname = os.path.dirname
	288
	289	args = {
	290	'suffix': '.tmp',
	291	'prefix': path_basename(fn) + '.',
	292	'dir': path_dirname(fn),
	293	'delete': False,
	294	}
	295
	296	# In Python 2.x, json.dump expects a bytestream.
	297	# In Python 3.x, it writes to a character stream
	298	if sys.version_info < (3, 0):
	299	args['mode'] = 'wb'
	300	else:
	301	args.update({
	302	'mode': 'w',
	303	'encoding': 'utf-8',
	304	})
	305
	306	tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
	307
	308	try:
	309	with tf:
	310	json.dump(obj, tf, ensure_ascii=False)
	311	if sys.platform == 'win32':
	312	# Need to remove existing file on Windows, else os.rename raises
	313	# WindowsError or FileExistsError.
	314	try:
	315	os.unlink(fn)
	316	except OSError:
	317	pass
	318	try:
	319	mask = os.umask(0)
	320	os.umask(mask)
	321	os.chmod(tf.name, 0o666 & ~mask)
	322	except OSError:
	323	pass
	324	os.rename(tf.name, fn)
	325	except Exception:
	326	try:
	327	os.remove(tf.name)
	328	except OSError:
	329	pass
	330	raise
	331
	332
	333	if sys.version_info >= (2, 7):
	334	def find_xpath_attr(node, xpath, key, val=None):
	335	""" Find the xpath xpath[@key=val] """
	336	assert re.match(r'^[a-zA-Z_-]+$', key)
	337	expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
	338	return node.find(expr)
	339	else:
	340	def find_xpath_attr(node, xpath, key, val=None):
	341	for f in node.findall(compat_xpath(xpath)):
	342	if key not in f.attrib:
	343	continue
	344	if val is None or f.attrib.get(key) == val:
	345	return f
	346	return None
	347
	348	# On python2.6 the xml.etree.ElementTree.Element methods don't support
	349	# the namespace parameter
	350
	351
	352	def xpath_with_ns(path, ns_map):
	353	components = [c.split(':') for c in path.split('/')]
	354	replaced = []
	355	for c in components:
	356	if len(c) == 1:
	357	replaced.append(c[0])
	358	else:
	359	ns, tag = c
	360	replaced.append('{%s}%s' % (ns_map[ns], tag))
	361	return '/'.join(replaced)
	362
	363
	364	def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
	365	def _find_xpath(xpath):
	366	return node.find(compat_xpath(xpath))
	367
	368	if isinstance(xpath, (str, compat_str)):
	369	n = _find_xpath(xpath)
	370	else:
	371	for xp in xpath:
	372	n = _find_xpath(xp)
	373	if n is not None:
	374	break
	375
	376	if n is None:
	377	if default is not NO_DEFAULT:
	378	return default
	379	elif fatal:
	380	name = xpath if name is None else name
	381	raise ExtractorError('Could not find XML element %s' % name)
	382	else:
	383	return None
	384	return n
	385
	386
	387	def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
	388	n = xpath_element(node, xpath, name, fatal=fatal, default=default)
	389	if n is None or n == default:
	390	return n
	391	if n.text is None:
	392	if default is not NO_DEFAULT:
	393	return default
	394	elif fatal:
	395	name = xpath if name is None else name
	396	raise ExtractorError('Could not find XML element\'s text %s' % name)
	397	else:
	398	return None
	399	return n.text
	400
	401
	402	def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
	403	n = find_xpath_attr(node, xpath, key)
	404	if n is None:
	405	if default is not NO_DEFAULT:
	406	return default
	407	elif fatal:
	408	name = '%s[@%s]' % (xpath, key) if name is None else name
	409	raise ExtractorError('Could not find XML attribute %s' % name)
	410	else:
	411	return None
	412	return n.attrib[key]
	413
	414
	415	def get_element_by_id(id, html):
	416	"""Return the content of the tag with the specified ID in the passed HTML document"""
	417	return get_element_by_attribute('id', id, html)
	418
	419
	420	def get_element_html_by_id(id, html):
	421	"""Return the html of the tag with the specified ID in the passed HTML document"""
	422	return get_element_html_by_attribute('id', id, html)
	423
	424
	425	def get_element_by_class(class_name, html):
	426	"""Return the content of the first tag with the specified class in the passed HTML document"""
	427	retval = get_elements_by_class(class_name, html)
	428	return retval[0] if retval else None
	429
	430
	431	def get_element_html_by_class(class_name, html):
	432	"""Return the html of the first tag with the specified class in the passed HTML document"""
	433	retval = get_elements_html_by_class(class_name, html)
	434	return retval[0] if retval else None
	435
	436
	437	def get_element_by_attribute(attribute, value, html, escape_value=True):
	438	retval = get_elements_by_attribute(attribute, value, html, escape_value)
	439	return retval[0] if retval else None
	440
	441
	442	def get_element_html_by_attribute(attribute, value, html, escape_value=True):
	443	retval = get_elements_html_by_attribute(attribute, value, html, escape_value)
	444	return retval[0] if retval else None
	445
	446
	447	def get_elements_by_class(class_name, html):
	448	"""Return the content of all tags with the specified class in the passed HTML document as a list"""
	449	return get_elements_by_attribute(
	450	'class', r'[^\'"]\b%s\b[^\'"]' % re.escape(class_name),
	451	html, escape_value=False)
	452
	453
	454	def get_elements_html_by_class(class_name, html):
	455	"""Return the html of all tags with the specified class in the passed HTML document as a list"""
	456	return get_elements_html_by_attribute(
	457	'class', r'[^\'"]\b%s\b[^\'"]' % re.escape(class_name),
	458	html, escape_value=False)
	459
	460
	461	def get_elements_by_attribute(args, *kwargs):
	462	"""Return the content of the tag with the specified attribute in the passed HTML document"""
	463	return [content for content, _ in get_elements_text_and_html_by_attribute(args, *kwargs)]
	464
	465
	466	def get_elements_html_by_attribute(args, *kwargs):
	467	"""Return the html of the tag with the specified attribute in the passed HTML document"""
	468	return [whole for _, whole in get_elements_text_and_html_by_attribute(args, *kwargs)]
	469
	470
	471	def get_elements_text_and_html_by_attribute(attribute, value, html, escape_value=True):
	472	"""
	473	Return the text (content) and the html (whole) of the tag with the specified
	474	attribute in the passed HTML document
	475	"""
	476
	477	value_quote_optional = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
	478
	479	value = re.escape(value) if escape_value else value
	480
	481	partial_element_re = r'''(?x)
	482	<(?P<tag>[a-zA-Z0-9:._-]+)
	483	(?:\s(?:[^>"']\|"[^"]"\|'[^']')*)?
	484	\s%(attribute)s\s=\s(?P<_q>['"]%(vqo)s)(?-x:%(value)s)(?P=_q)
	485	''' % {'attribute': re.escape(attribute), 'value': value, 'vqo': value_quote_optional}
	486
	487	for m in re.finditer(partial_element_re, html):
	488	content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
	489
	490	yield (
	491	unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
	492	whole
	493	)
	494
	495
	496	class HTMLBreakOnClosingTagParser(compat_HTMLParser):
	497	"""
	498	HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
	499	closing tag for the first opening tag it has encountered, and can be used
	500	as a context manager
	501	"""
	502
	503	class HTMLBreakOnClosingTagException(Exception):
	504	pass
	505
	506	def __init__(self):
	507	self.tagstack = collections.deque()
	508	compat_HTMLParser.__init__(self)
	509
	510	def __enter__(self):
	511	return self
	512
	513	def __exit__(self, *_):
	514	self.close()
	515
	516	def close(self):
	517	# handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
	518	# so data remains buffered; we no longer have any interest in it, thus
	519	# override this method to discard it
	520	pass
	521
	522	def handle_starttag(self, tag, _):
	523	self.tagstack.append(tag)
	524
	525	def handle_endtag(self, tag):
	526	if not self.tagstack:
	527	raise compat_HTMLParseError('no tags in the stack')
	528	while self.tagstack:
	529	inner_tag = self.tagstack.pop()
	530	if inner_tag == tag:
	531	break
	532	else:
	533	raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
	534	if not self.tagstack:
	535	raise self.HTMLBreakOnClosingTagException()
	536
	537
	538	def get_element_text_and_html_by_tag(tag, html):
	539	"""
	540	For the first element with the specified tag in the passed HTML document
	541	return its' content (text) and the whole element (html)
	542	"""
	543	def find_or_raise(haystack, needle, exc):
	544	try:
	545	return haystack.index(needle)
	546	except ValueError:
	547	raise exc
	548	closing_tag = f'</{tag}>'
	549	whole_start = find_or_raise(
	550	html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
	551	content_start = find_or_raise(
	552	html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
	553	content_start += whole_start + 1
	554	with HTMLBreakOnClosingTagParser() as parser:
	555	parser.feed(html[whole_start:content_start])
	556	if not parser.tagstack or parser.tagstack[0] != tag:
	557	raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
	558	offset = content_start
	559	while offset < len(html):
	560	next_closing_tag_start = find_or_raise(
	561	html[offset:], closing_tag,
	562	compat_HTMLParseError(f'closing {tag} tag not found'))
	563	next_closing_tag_end = next_closing_tag_start + len(closing_tag)
	564	try:
	565	parser.feed(html[offset:offset + next_closing_tag_end])
	566	offset += next_closing_tag_end
	567	except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
	568	return html[content_start:offset + next_closing_tag_start], \
	569	html[whole_start:offset + next_closing_tag_end]
	570	raise compat_HTMLParseError('unexpected end of html')
	571
	572
	573	class HTMLAttributeParser(compat_HTMLParser):
	574	"""Trivial HTML parser to gather the attributes for a single element"""
	575
	576	def __init__(self):
	577	self.attrs = {}
	578	compat_HTMLParser.__init__(self)
	579
	580	def handle_starttag(self, tag, attrs):
	581	self.attrs = dict(attrs)
	582
	583
	584	class HTMLListAttrsParser(compat_HTMLParser):
	585	"""HTML parser to gather the attributes for the elements of a list"""
	586
	587	def __init__(self):
	588	compat_HTMLParser.__init__(self)
	589	self.items = []
	590	self._level = 0
	591
	592	def handle_starttag(self, tag, attrs):
	593	if tag == 'li' and self._level == 0:
	594	self.items.append(dict(attrs))
	595	self._level += 1
	596
	597	def handle_endtag(self, tag):
	598	self._level -= 1
	599
	600
	601	def extract_attributes(html_element):
	602	"""Given a string for an HTML element such as
	603	<el
	604	a="foo" B="bar" c="&98;az" d=boz
	605	empty= noval entity="&"
	606	sq='"' dq="'"
	607	>
	608	Decode and return a dictionary of attributes.
	609	{
	610	'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
	611	'empty': '', 'noval': None, 'entity': '&',
	612	'sq': '"', 'dq': '\''
	613	}.
	614	NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
	615	but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
	616	"""
	617	parser = HTMLAttributeParser()
	618	try:
	619	parser.feed(html_element)
	620	parser.close()
	621	# Older Python may throw HTMLParseError in case of malformed HTML
	622	except compat_HTMLParseError:
	623	pass
	624	return parser.attrs
	625
	626
	627	def parse_list(webpage):
	628	"""Given a string for an series of HTML <li> elements,
	629	return a dictionary of their attributes"""
	630	parser = HTMLListAttrsParser()
	631	parser.feed(webpage)
	632	parser.close()
	633	return parser.items
	634
	635
	636	def clean_html(html):
	637	"""Clean an HTML snippet into a readable string"""
	638
	639	if html is None: # Convenience for sanitizing descriptions etc.
	640	return html
	641
	642	html = re.sub(r'\s+', ' ', html)
	643	html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
	644	html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
	645	# Strip html tags
	646	html = re.sub('<.*?>', '', html)
	647	# Replace html entities
	648	html = unescapeHTML(html)
	649	return html.strip()
	650
	651
	652	def sanitize_open(filename, open_mode):
	653	"""Try to open the given filename, and slightly tweak it if this fails.
	654
	655	Attempts to open the given filename. If this fails, it tries to change
	656	the filename slightly, step by step, until it's either able to open it
	657	or it fails and raises a final exception, like the standard open()
	658	function.
	659
	660	It returns the tuple (stream, definitive_file_name).
	661	"""
	662	try:
	663	if filename == '-':
	664	if sys.platform == 'win32':
	665	import msvcrt
	666	msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
	667	return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
	668	stream = locked_file(filename, open_mode, block=False).open()
	669	return (stream, filename)
	670	except (IOError, OSError) as err:
	671	if err.errno in (errno.EACCES,):
	672	raise
	673
	674	# In case of error, try to remove win32 forbidden chars
	675	alt_filename = sanitize_path(filename)
	676	if alt_filename == filename:
	677	raise
	678	else:
	679	# An exception here should be caught in the caller
	680	stream = locked_file(filename, open_mode, block=False).open()
	681	return (stream, alt_filename)
	682
	683
	684	def timeconvert(timestr):
	685	"""Convert RFC 2822 defined time string into system timestamp"""
	686	timestamp = None
	687	timetuple = email.utils.parsedate_tz(timestr)
	688	if timetuple is not None:
	689	timestamp = email.utils.mktime_tz(timetuple)
	690	return timestamp
	691
	692
	693	def sanitize_filename(s, restricted=False, is_id=False):
	694	"""Sanitizes a string so it could be used as part of a filename.
	695	If restricted is set, use a stricter subset of allowed characters.
	696	Set is_id if this is not an arbitrary string, but an ID that should be kept
	697	if possible.
	698	"""
	699	def replace_insane(char):
	700	if restricted and char in ACCENT_CHARS:
	701	return ACCENT_CHARS[char]
	702	elif not restricted and char == '\n':
	703	return ' '
	704	elif char == '?' or ord(char) < 32 or ord(char) == 127:
	705	return ''
	706	elif char == '"':
	707	return '' if restricted else '\''
	708	elif char == ':':
	709	return '_-' if restricted else ' -'
	710	elif char in '\\/\|*<>':
	711	return '_'
	712	if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
	713	return '_'
	714	if restricted and ord(char) > 127:
	715	return '_'
	716	return char
	717
	718	if s == '':
	719	return ''
	720	# Handle timestamps
	721	s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
	722	result = ''.join(map(replace_insane, s))
	723	if not is_id:
	724	while '__' in result:
	725	result = result.replace('__', '_')
	726	result = result.strip('_')
	727	# Common case of "Foreign band name - English song title"
	728	if restricted and result.startswith('-_'):
	729	result = result[2:]
	730	if result.startswith('-'):
	731	result = '_' + result[len('-'):]
	732	result = result.lstrip('.')
	733	if not result:
	734	result = '_'
	735	return result
	736
	737
	738	def sanitize_path(s, force=False):
	739	"""Sanitizes and normalizes path on Windows"""
	740	if sys.platform == 'win32':
	741	force = False
	742	drive_or_unc, _ = os.path.splitdrive(s)
	743	if sys.version_info < (2, 7) and not drive_or_unc:
	744	drive_or_unc, _ = os.path.splitunc(s)
	745	elif force:
	746	drive_or_unc = ''
	747	else:
	748	return s
	749
	750	norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
	751	if drive_or_unc:
	752	norm_path.pop(0)
	753	sanitized_path = [
	754	path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\\|\\?\*]\|[\s.]$)', '#', path_part)
	755	for path_part in norm_path]
	756	if drive_or_unc:
	757	sanitized_path.insert(0, drive_or_unc + os.path.sep)
	758	elif force and s[0] == os.path.sep:
	759	sanitized_path.insert(0, os.path.sep)
	760	return os.path.join(*sanitized_path)
	761
	762
	763	def sanitize_url(url):
	764	# Prepend protocol-less URLs with `http:` scheme in order to mitigate
	765	# the number of unwanted failures due to missing protocol
	766	if url.startswith('//'):
	767	return 'http:%s' % url
	768	# Fix some common typos seen so far
	769	COMMON_TYPOS = (
	770	# https://github.com/ytdl-org/youtube-dl/issues/15649
	771	(r'^httpss://', r'https://'),
	772	# https://bx1.be/lives/direct-tv/
	773	(r'^rmtp([es]?)://', r'rtmp\1://'),
	774	)
	775	for mistake, fixup in COMMON_TYPOS:
	776	if re.match(mistake, url):
	777	return re.sub(mistake, fixup, url)
	778	return url
	779
	780
	781	def extract_basic_auth(url):
	782	parts = compat_urlparse.urlsplit(url)
	783	if parts.username is None:
	784	return url, None
	785	url = compat_urlparse.urlunsplit(parts._replace(netloc=(
	786	parts.hostname if parts.port is None
	787	else '%s:%d' % (parts.hostname, parts.port))))
	788	auth_payload = base64.b64encode(
	789	('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
	790	return url, 'Basic ' + auth_payload.decode('utf-8')
	791
	792
	793	def sanitized_Request(url, args, *kwargs):
	794	url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
	795	if auth_header is not None:
	796	headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
	797	headers['Authorization'] = auth_header
	798	return compat_urllib_request.Request(url, args, *kwargs)
	799
	800
	801	def expand_path(s):
	802	"""Expand shell variables and ~"""
	803	return os.path.expandvars(compat_expanduser(s))
	804
	805
	806	def orderedSet(iterable):
	807	""" Remove all duplicates from the input iterable """
	808	res = []
	809	for el in iterable:
	810	if el not in res:
	811	res.append(el)
	812	return res
	813
	814
	815	def _htmlentity_transform(entity_with_semicolon):
	816	"""Transforms an HTML entity to a character."""
	817	entity = entity_with_semicolon[:-1]
	818
	819	# Known non-numeric HTML entity
	820	if entity in compat_html_entities.name2codepoint:
	821	return compat_chr(compat_html_entities.name2codepoint[entity])
	822
	823	# TODO: HTML5 allows entities without a semicolon. For example,
	824	# '&Eacuteric' should be decoded as 'Éric'.
	825	if entity_with_semicolon in compat_html_entities_html5:
	826	return compat_html_entities_html5[entity_with_semicolon]
	827
	828	mobj = re.match(r'#(x[0-9a-fA-F]+\|[0-9]+)', entity)
	829	if mobj is not None:
	830	numstr = mobj.group(1)
	831	if numstr.startswith('x'):
	832	base = 16
	833	numstr = '0%s' % numstr
	834	else:
	835	base = 10
	836	# See https://github.com/ytdl-org/youtube-dl/issues/7518
	837	try:
	838	return compat_chr(int(numstr, base))
	839	except ValueError:
	840	pass
	841
	842	# Unknown entity in name, return its literal representation
	843	return '&%s;' % entity
	844
	845
	846	def unescapeHTML(s):
	847	if s is None:
	848	return None
	849	assert type(s) == compat_str
	850
	851	return re.sub(
	852	r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
	853
	854
	855	def escapeHTML(text):
	856	return (
	857	text
	858	.replace('&', '&')
	859	.replace('<', '<')
	860	.replace('>', '>')
	861	.replace('"', '"')
	862	.replace("'", ''')
	863	)
	864
	865
	866	def process_communicate_or_kill(p, args, *kwargs):
	867	try:
	868	return p.communicate(args, *kwargs)
	869	except BaseException: # Including KeyboardInterrupt
	870	p.kill()
	871	p.wait()
	872	raise
	873
	874
	875	class Popen(subprocess.Popen):
	876	if sys.platform == 'win32':
	877	_startupinfo = subprocess.STARTUPINFO()
	878	_startupinfo.dwFlags \|= subprocess.STARTF_USESHOWWINDOW
	879	else:
	880	_startupinfo = None
	881
	882	def __init__(self, args, *kwargs):
	883	super(Popen, self).__init__(args, *kwargs, startupinfo=self._startupinfo)
	884
	885	def communicate_or_kill(self, args, *kwargs):
	886	return process_communicate_or_kill(self, args, *kwargs)
	887
	888
	889	def get_subprocess_encoding():
	890	if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
	891	# For subprocess calls, encode with locale encoding
	892	# Refer to http://stackoverflow.com/a/9951851/35070
	893	encoding = preferredencoding()
	894	else:
	895	encoding = sys.getfilesystemencoding()
	896	if encoding is None:
	897	encoding = 'utf-8'
	898	return encoding
	899
	900
	901	def encodeFilename(s, for_subprocess=False):
	902	"""
	903	@param s The name of the file
	904	"""
	905
	906	assert type(s) == compat_str
	907
	908	# Python 3 has a Unicode API
	909	if sys.version_info >= (3, 0):
	910	return s
	911
	912	# Pass '' directly to use Unicode APIs on Windows 2000 and up
	913	# (Detecting Windows NT 4 is tricky because 'major >= 4' would
	914	# match Windows 9x series as well. Besides, NT 4 is obsolete.)
	915	if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
	916	return s
	917
	918	# Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
	919	if sys.platform.startswith('java'):
	920	return s
	921
	922	return s.encode(get_subprocess_encoding(), 'ignore')
	923
	924
	925	def decodeFilename(b, for_subprocess=False):
	926
	927	if sys.version_info >= (3, 0):
	928	return b
	929
	930	if not isinstance(b, bytes):
	931	return b
	932
	933	return b.decode(get_subprocess_encoding(), 'ignore')
	934
	935
	936	def encodeArgument(s):
	937	if not isinstance(s, compat_str):
	938	# Legacy code that uses byte strings
	939	# Uncomment the following line after fixing all post processors
	940	# assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
	941	s = s.decode('ascii')
	942	return encodeFilename(s, True)
	943
	944
	945	def decodeArgument(b):
	946	return decodeFilename(b, True)
	947
	948
	949	def decodeOption(optval):
	950	if optval is None:
	951	return optval
	952	if isinstance(optval, bytes):
	953	optval = optval.decode(preferredencoding())
	954
	955	assert isinstance(optval, compat_str)
	956	return optval
	957
	958
	959	_timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
	960
	961
	962	def timetuple_from_msec(msec):
	963	secs, msec = divmod(msec, 1000)
	964	mins, secs = divmod(secs, 60)
	965	hrs, mins = divmod(mins, 60)
	966	return _timetuple(hrs, mins, secs, msec)
	967
	968
	969	def formatSeconds(secs, delim=':', msec=False):
	970	time = timetuple_from_msec(secs * 1000)
	971	if time.hours:
	972	ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
	973	elif time.minutes:
	974	ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
	975	else:
	976	ret = '%d' % time.seconds
	977	return '%s.%03d' % (ret, time.milliseconds) if msec else ret
	978
	979
	980	def _ssl_load_windows_store_certs(ssl_context, storename):
	981	# Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
	982	try:
	983	certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
	984	if encoding == 'x509_asn' and (
	985	trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
	986	except PermissionError:
	987	return
	988	for cert in certs:
	989	try:
	990	ssl_context.load_verify_locations(cadata=cert)
	991	except ssl.SSLError:
	992	pass
	993
	994
	995	def make_HTTPS_handler(params, **kwargs):
	996	opts_check_certificate = not params.get('nocheckcertificate')
	997	context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
	998	context.check_hostname = opts_check_certificate
	999	if params.get('legacyserverconnect'):
	1000	context.options \|= 4 # SSL_OP_LEGACY_SERVER_CONNECT
	1001	context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
	1002	if opts_check_certificate:
	1003	try:
	1004	context.load_default_certs()
	1005	# Work around the issue in load_default_certs when there are bad certificates. See:
	1006	# https://github.com/yt-dlp/yt-dlp/issues/1060,
	1007	# https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
	1008	except ssl.SSLError:
	1009	# enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
	1010	if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
	1011	# Create a new context to discard any certificates that were already loaded
	1012	context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
	1013	context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED
	1014	for storename in ('CA', 'ROOT'):
	1015	_ssl_load_windows_store_certs(context, storename)
	1016	context.set_default_verify_paths()
	1017	return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
	1018
	1019
	1020	def bug_reports_message(before=';'):
	1021	msg = ('please report this issue on https://github.com/yt-dlp/yt-dlp , '
	1022	'filling out the "Broken site" issue template properly. '
	1023	'Confirm you are on the latest version using -U')
	1024
	1025	before = before.rstrip()
	1026	if not before or before.endswith(('.', '!', '?')):
	1027	msg = msg[0].title() + msg[1:]
	1028
	1029	return (before + ' ' if before else '') + msg
	1030
	1031
	1032	class YoutubeDLError(Exception):
	1033	"""Base exception for YoutubeDL errors."""
	1034	msg = None
	1035
	1036	def __init__(self, msg=None):
	1037	if msg is not None:
	1038	self.msg = msg
	1039	elif self.msg is None:
	1040	self.msg = type(self).__name__
	1041	super().__init__(self.msg)
	1042
	1043
	1044	network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
	1045	if hasattr(ssl, 'CertificateError'):
	1046	network_exceptions.append(ssl.CertificateError)
	1047	network_exceptions = tuple(network_exceptions)
	1048
	1049
	1050	class ExtractorError(YoutubeDLError):
	1051	"""Error during info extraction."""
	1052
	1053	def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
	1054	""" tb, if given, is the original traceback (so that it can be printed out).
	1055	If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
	1056	"""
	1057	if sys.exc_info()[0] in network_exceptions:
	1058	expected = True
	1059
	1060	self.msg = str(msg)
	1061	self.traceback = tb
	1062	self.expected = expected
	1063	self.cause = cause
	1064	self.video_id = video_id
	1065	self.ie = ie
	1066	self.exc_info = sys.exc_info() # preserve original exception
	1067
	1068	super(ExtractorError, self).__init__(''.join((
	1069	format_field(ie, template='[%s] '),
	1070	format_field(video_id, template='%s: '),
	1071	self.msg,
	1072	format_field(cause, template=' (caused by %r)'),
	1073	'' if expected else bug_reports_message())))
	1074
	1075	def format_traceback(self):
	1076	if self.traceback is None:
	1077	return None
	1078	return ''.join(traceback.format_tb(self.traceback))
	1079
	1080
	1081	class UnsupportedError(ExtractorError):
	1082	def __init__(self, url):
	1083	super(UnsupportedError, self).__init__(
	1084	'Unsupported URL: %s' % url, expected=True)
	1085	self.url = url
	1086
	1087
	1088	class RegexNotFoundError(ExtractorError):
	1089	"""Error when a regex didn't match"""
	1090	pass
	1091
	1092
	1093	class GeoRestrictedError(ExtractorError):
	1094	"""Geographic restriction Error exception.
	1095
	1096	This exception may be thrown when a video is not available from your
	1097	geographic location due to geographic restrictions imposed by a website.
	1098	"""
	1099
	1100	def __init__(self, msg, countries=None, **kwargs):
	1101	kwargs['expected'] = True
	1102	super(GeoRestrictedError, self).__init__(msg, **kwargs)
	1103	self.countries = countries
	1104
	1105
	1106	class DownloadError(YoutubeDLError):
	1107	"""Download Error exception.
	1108
	1109	This exception may be thrown by FileDownloader objects if they are not
	1110	configured to continue on errors. They will contain the appropriate
	1111	error message.
	1112	"""
	1113
	1114	def __init__(self, msg, exc_info=None):
	1115	""" exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
	1116	super(DownloadError, self).__init__(msg)
	1117	self.exc_info = exc_info
	1118
	1119
	1120	class EntryNotInPlaylist(YoutubeDLError):
	1121	"""Entry not in playlist exception.
	1122
	1123	This exception will be thrown by YoutubeDL when a requested entry
	1124	is not found in the playlist info_dict
	1125	"""
	1126	msg = 'Entry not found in info'
	1127
	1128
	1129	class SameFileError(YoutubeDLError):
	1130	"""Same File exception.
	1131
	1132	This exception will be thrown by FileDownloader objects if they detect
	1133	multiple files would have to be downloaded to the same file on disk.
	1134	"""
	1135	msg = 'Fixed output name but more than one file to download'
	1136
	1137	def __init__(self, filename=None):
	1138	if filename is not None:
	1139	self.msg += f': {filename}'
	1140	super().__init__(self.msg)
	1141
	1142
	1143	class PostProcessingError(YoutubeDLError):
	1144	"""Post Processing exception.
	1145
	1146	This exception may be raised by PostProcessor's .run() method to
	1147	indicate an error in the postprocessing task.
	1148	"""
	1149
	1150
	1151	class DownloadCancelled(YoutubeDLError):
	1152	""" Exception raised when the download queue should be interrupted """
	1153	msg = 'The download was cancelled'
	1154
	1155
	1156	class ExistingVideoReached(DownloadCancelled):
	1157	""" --break-on-existing triggered """
	1158	msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
	1159
	1160
	1161	class RejectedVideoReached(DownloadCancelled):
	1162	""" --break-on-reject triggered """
	1163	msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
	1164
	1165
	1166	class MaxDownloadsReached(DownloadCancelled):
	1167	""" --max-downloads limit has been reached. """
	1168	msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
	1169
	1170
	1171	class ReExtractInfo(YoutubeDLError):
	1172	""" Video info needs to be re-extracted. """
	1173
	1174	def __init__(self, msg, expected=False):
	1175	super().__init__(msg)
	1176	self.expected = expected
	1177
	1178
	1179	class ThrottledDownload(ReExtractInfo):
	1180	""" Download speed below --throttled-rate. """
	1181	msg = 'The download speed is below throttle limit'
	1182
	1183	def __init__(self):
	1184	super().__init__(self.msg, expected=False)
	1185
	1186
	1187	class UnavailableVideoError(YoutubeDLError):
	1188	"""Unavailable Format exception.
	1189
	1190	This exception will be thrown when a video is requested
	1191	in a format that is not available for that video.
	1192	"""
	1193	msg = 'Unable to download video'
	1194
	1195	def __init__(self, err=None):
	1196	if err is not None:
	1197	self.msg += f': {err}'
	1198	super().__init__(self.msg)
	1199
	1200
	1201	class ContentTooShortError(YoutubeDLError):
	1202	"""Content Too Short exception.
	1203
	1204	This exception may be raised by FileDownloader objects when a file they
	1205	download is too small for what the server announced first, indicating
	1206	the connection was probably interrupted.
	1207	"""
	1208
	1209	def __init__(self, downloaded, expected):
	1210	super(ContentTooShortError, self).__init__(
	1211	'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
	1212	)
	1213	# Both in bytes
	1214	self.downloaded = downloaded
	1215	self.expected = expected
	1216
	1217
	1218	class XAttrMetadataError(YoutubeDLError):
	1219	def __init__(self, code=None, msg='Unknown error'):
	1220	super(XAttrMetadataError, self).__init__(msg)
	1221	self.code = code
	1222	self.msg = msg
	1223
	1224	# Parsing code and msg
	1225	if (self.code in (errno.ENOSPC, errno.EDQUOT)
	1226	or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
	1227	self.reason = 'NO_SPACE'
	1228	elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
	1229	self.reason = 'VALUE_TOO_LONG'
	1230	else:
	1231	self.reason = 'NOT_SUPPORTED'
	1232
	1233
	1234	class XAttrUnavailableError(YoutubeDLError):
	1235	pass
	1236
	1237
	1238	def _create_http_connection(ydl_handler, http_class, is_https, args, *kwargs):
	1239	# Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
	1240	# expected HTTP responses to meet HTTP/1.0 or later (see also
	1241	# https://github.com/ytdl-org/youtube-dl/issues/6727)
	1242	if sys.version_info < (3, 0):
	1243	kwargs['strict'] = True
	1244	hc = http_class(args, *compat_kwargs(kwargs))
	1245	source_address = ydl_handler._params.get('source_address')
	1246
	1247	if source_address is not None:
	1248	# This is to workaround _create_connection() from socket where it will try all
	1249	# address data from getaddrinfo() including IPv6. This filters the result from
	1250	# getaddrinfo() based on the source_address value.
	1251	# This is based on the cpython socket.create_connection() function.
	1252	# https://github.com/python/cpython/blob/master/Lib/socket.py#L691
	1253	def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
	1254	host, port = address
	1255	err = None
	1256	addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
	1257	af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
	1258	ip_addrs = [addr for addr in addrs if addr[0] == af]
	1259	if addrs and not ip_addrs:
	1260	ip_version = 'v4' if af == socket.AF_INET else 'v6'
	1261	raise socket.error(
	1262	"No remote IP%s addresses available for connect, can't use '%s' as source address"
	1263	% (ip_version, source_address[0]))
	1264	for res in ip_addrs:
	1265	af, socktype, proto, canonname, sa = res
	1266	sock = None
	1267	try:
	1268	sock = socket.socket(af, socktype, proto)
	1269	if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
	1270	sock.settimeout(timeout)
	1271	sock.bind(source_address)
	1272	sock.connect(sa)
	1273	err = None # Explicitly break reference cycle
	1274	return sock
	1275	except socket.error as _:
	1276	err = _
	1277	if sock is not None:
	1278	sock.close()
	1279	if err is not None:
	1280	raise err
	1281	else:
	1282	raise socket.error('getaddrinfo returns an empty list')
	1283	if hasattr(hc, '_create_connection'):
	1284	hc._create_connection = _create_connection
	1285	sa = (source_address, 0)
	1286	if hasattr(hc, 'source_address'): # Python 2.7+
	1287	hc.source_address = sa
	1288	else: # Python 2.6
	1289	def _hc_connect(self, args, *kwargs):
	1290	sock = _create_connection(
	1291	(self.host, self.port), self.timeout, sa)
	1292	if is_https:
	1293	self.sock = ssl.wrap_socket(
	1294	sock, self.key_file, self.cert_file,
	1295	ssl_version=ssl.PROTOCOL_TLSv1)
	1296	else:
	1297	self.sock = sock
	1298	hc.connect = functools.partial(_hc_connect, hc)
	1299
	1300	return hc
	1301
	1302
	1303	def handle_youtubedl_headers(headers):
	1304	filtered_headers = headers
	1305
	1306	if 'Youtubedl-no-compression' in filtered_headers:
	1307	filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
	1308	del filtered_headers['Youtubedl-no-compression']
	1309
	1310	return filtered_headers
	1311
	1312
	1313	class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
	1314	"""Handler for HTTP requests and responses.
	1315
	1316	This class, when installed with an OpenerDirector, automatically adds
	1317	the standard headers to every HTTP request and handles gzipped and
	1318	deflated responses from web servers. If compression is to be avoided in
	1319	a particular request, the original request in the program code only has
	1320	to include the HTTP header "Youtubedl-no-compression", which will be
	1321	removed before making the real request.
	1322
	1323	Part of this code was copied from:
	1324
	1325	http://techknack.net/python-urllib2-handlers/
	1326
	1327	Andrew Rowls, the author of that code, agreed to release it to the
	1328	public domain.
	1329	"""
	1330
	1331	def __init__(self, params, args, *kwargs):
	1332	compat_urllib_request.HTTPHandler.__init__(self, args, *kwargs)
	1333	self._params = params
	1334
	1335	def http_open(self, req):
	1336	conn_class = compat_http_client.HTTPConnection
	1337
	1338	socks_proxy = req.headers.get('Ytdl-socks-proxy')
	1339	if socks_proxy:
	1340	conn_class = make_socks_conn_class(conn_class, socks_proxy)
	1341	del req.headers['Ytdl-socks-proxy']
	1342
	1343	return self.do_open(functools.partial(
	1344	_create_http_connection, self, conn_class, False),
	1345	req)
	1346
	1347	@staticmethod
	1348	def deflate(data):
	1349	if not data:
	1350	return data
	1351	try:
	1352	return zlib.decompress(data, -zlib.MAX_WBITS)
	1353	except zlib.error:
	1354	return zlib.decompress(data)
	1355
	1356	def http_request(self, req):
	1357	# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
	1358	# always respected by websites, some tend to give out URLs with non percent-encoded
	1359	# non-ASCII characters (see telemb.py, ard.py [#3412])
	1360	# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
	1361	# To work around aforementioned issue we will replace request's original URL with
	1362	# percent-encoded one
	1363	# Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
	1364	# the code of this workaround has been moved here from YoutubeDL.urlopen()
	1365	url = req.get_full_url()
	1366	url_escaped = escape_url(url)
	1367
	1368	# Substitute URL if any change after escaping
	1369	if url != url_escaped:
	1370	req = update_Request(req, url=url_escaped)
	1371
	1372	for h, v in std_headers.items():
	1373	# Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
	1374	# The dict keys are capitalized because of this bug by urllib
	1375	if h.capitalize() not in req.headers:
	1376	req.add_header(h, v)
	1377
	1378	req.headers = handle_youtubedl_headers(req.headers)
	1379
	1380	if sys.version_info < (2, 7) and '#' in req.get_full_url():
	1381	# Python 2.6 is brain-dead when it comes to fragments
	1382	req._Request__original = req._Request__original.partition('#')[0]
	1383	req._Request__r_type = req._Request__r_type.partition('#')[0]
	1384
	1385	return req
	1386
	1387	def http_response(self, req, resp):
	1388	old_resp = resp
	1389	# gzip
	1390	if resp.headers.get('Content-encoding', '') == 'gzip':
	1391	content = resp.read()
	1392	gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
	1393	try:
	1394	uncompressed = io.BytesIO(gz.read())
	1395	except IOError as original_ioerror:
	1396	# There may be junk add the end of the file
	1397	# See http://stackoverflow.com/q/4928560/35070 for details
	1398	for i in range(1, 1024):
	1399	try:
	1400	gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
	1401	uncompressed = io.BytesIO(gz.read())
	1402	except IOError:
	1403	continue
	1404	break
	1405	else:
	1406	raise original_ioerror
	1407	resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
	1408	resp.msg = old_resp.msg
	1409	del resp.headers['Content-encoding']
	1410	# deflate
	1411	if resp.headers.get('Content-encoding', '') == 'deflate':
	1412	gz = io.BytesIO(self.deflate(resp.read()))
	1413	resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
	1414	resp.msg = old_resp.msg
	1415	del resp.headers['Content-encoding']
	1416	# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
	1417	# https://github.com/ytdl-org/youtube-dl/issues/6457).
	1418	if 300 <= resp.code < 400:
	1419	location = resp.headers.get('Location')
	1420	if location:
	1421	# As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
	1422	if sys.version_info >= (3, 0):
	1423	location = location.encode('iso-8859-1').decode('utf-8')
	1424	else:
	1425	location = location.decode('utf-8')
	1426	location_escaped = escape_url(location)
	1427	if location != location_escaped:
	1428	del resp.headers['Location']
	1429	if sys.version_info < (3, 0):
	1430	location_escaped = location_escaped.encode('utf-8')
	1431	resp.headers['Location'] = location_escaped
	1432	return resp
	1433
	1434	https_request = http_request
	1435	https_response = http_response
	1436
	1437
	1438	def make_socks_conn_class(base_class, socks_proxy):
	1439	assert issubclass(base_class, (
	1440	compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
	1441
	1442	url_components = compat_urlparse.urlparse(socks_proxy)
	1443	if url_components.scheme.lower() == 'socks5':
	1444	socks_type = ProxyType.SOCKS5
	1445	elif url_components.scheme.lower() in ('socks', 'socks4'):
	1446	socks_type = ProxyType.SOCKS4
	1447	elif url_components.scheme.lower() == 'socks4a':
	1448	socks_type = ProxyType.SOCKS4A
	1449
	1450	def unquote_if_non_empty(s):
	1451	if not s:
	1452	return s
	1453	return compat_urllib_parse_unquote_plus(s)
	1454
	1455	proxy_args = (
	1456	socks_type,
	1457	url_components.hostname, url_components.port or 1080,
	1458	True, # Remote DNS
	1459	unquote_if_non_empty(url_components.username),
	1460	unquote_if_non_empty(url_components.password),
	1461	)
	1462
	1463	class SocksConnection(base_class):
	1464	def connect(self):
	1465	self.sock = sockssocket()
	1466	self.sock.setproxy(*proxy_args)
	1467	if type(self.timeout) in (int, float):
	1468	self.sock.settimeout(self.timeout)
	1469	self.sock.connect((self.host, self.port))
	1470
	1471	if isinstance(self, compat_http_client.HTTPSConnection):
	1472	if hasattr(self, '_context'): # Python > 2.6
	1473	self.sock = self._context.wrap_socket(
	1474	self.sock, server_hostname=self.host)
	1475	else:
	1476	self.sock = ssl.wrap_socket(self.sock)
	1477
	1478	return SocksConnection
	1479
	1480
	1481	class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
	1482	def __init__(self, params, https_conn_class=None, args, *kwargs):
	1483	compat_urllib_request.HTTPSHandler.__init__(self, args, *kwargs)
	1484	self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
	1485	self._params = params
	1486
	1487	def https_open(self, req):
	1488	kwargs = {}
	1489	conn_class = self._https_conn_class
	1490
	1491	if hasattr(self, '_context'): # python > 2.6
	1492	kwargs['context'] = self._context
	1493	if hasattr(self, '_check_hostname'): # python 3.x
	1494	kwargs['check_hostname'] = self._check_hostname
	1495
	1496	socks_proxy = req.headers.get('Ytdl-socks-proxy')
	1497	if socks_proxy:
	1498	conn_class = make_socks_conn_class(conn_class, socks_proxy)
	1499	del req.headers['Ytdl-socks-proxy']
	1500
	1501	return self.do_open(functools.partial(
	1502	_create_http_connection, self, conn_class, True),
	1503	req, **kwargs)
	1504
	1505
	1506	class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
	1507	"""
	1508	See [1] for cookie file format.
	1509
	1510	1. https://curl.haxx.se/docs/http-cookies.html
	1511	"""
	1512	_HTTPONLY_PREFIX = '#HttpOnly_'
	1513	_ENTRY_LEN = 7
	1514	_HEADER = '''# Netscape HTTP Cookie File
	1515	# This file is generated by yt-dlp. Do not edit.
	1516
	1517	'''
	1518	_CookieFileEntry = collections.namedtuple(
	1519	'CookieFileEntry',
	1520	('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
	1521
	1522	def save(self, filename=None, ignore_discard=False, ignore_expires=False):
	1523	"""
	1524	Save cookies to a file.
	1525
	1526	Most of the code is taken from CPython 3.8 and slightly adapted
	1527	to support cookie files with UTF-8 in both python 2 and 3.
	1528	"""
	1529	if filename is None:
	1530	if self.filename is not None:
	1531	filename = self.filename
	1532	else:
	1533	raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
	1534
	1535	# Store session cookies with `expires` set to 0 instead of an empty
	1536	# string
	1537	for cookie in self:
	1538	if cookie.expires is None:
	1539	cookie.expires = 0
	1540
	1541	with io.open(filename, 'w', encoding='utf-8') as f:
	1542	f.write(self._HEADER)
	1543	now = time.time()
	1544	for cookie in self:
	1545	if not ignore_discard and cookie.discard:
	1546	continue
	1547	if not ignore_expires and cookie.is_expired(now):
	1548	continue
	1549	if cookie.secure:
	1550	secure = 'TRUE'
	1551	else:
	1552	secure = 'FALSE'
	1553	if cookie.domain.startswith('.'):
	1554	initial_dot = 'TRUE'
	1555	else:
	1556	initial_dot = 'FALSE'
	1557	if cookie.expires is not None:
	1558	expires = compat_str(cookie.expires)
	1559	else:
	1560	expires = ''
	1561	if cookie.value is None:
	1562	# cookies.txt regards 'Set-Cookie: foo' as a cookie
	1563	# with no name, whereas http.cookiejar regards it as a
	1564	# cookie with no value.
	1565	name = ''
	1566	value = cookie.name
	1567	else:
	1568	name = cookie.name
	1569	value = cookie.value
	1570	f.write(
	1571	'\t'.join([cookie.domain, initial_dot, cookie.path,
	1572	secure, expires, name, value]) + '\n')
	1573
	1574	def load(self, filename=None, ignore_discard=False, ignore_expires=False):
	1575	"""Load cookies from a file."""
	1576	if filename is None:
	1577	if self.filename is not None:
	1578	filename = self.filename
	1579	else:
	1580	raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
	1581
	1582	def prepare_line(line):
	1583	if line.startswith(self._HTTPONLY_PREFIX):
	1584	line = line[len(self._HTTPONLY_PREFIX):]
	1585	# comments and empty lines are fine
	1586	if line.startswith('#') or not line.strip():
	1587	return line
	1588	cookie_list = line.split('\t')
	1589	if len(cookie_list) != self._ENTRY_LEN:
	1590	raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
	1591	cookie = self._CookieFileEntry(*cookie_list)
	1592	if cookie.expires_at and not cookie.expires_at.isdigit():
	1593	raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
	1594	return line
	1595
	1596	cf = io.StringIO()
	1597	with io.open(filename, encoding='utf-8') as f:
	1598	for line in f:
	1599	try:
	1600	cf.write(prepare_line(line))
	1601	except compat_cookiejar.LoadError as e:
	1602	write_string(
	1603	'WARNING: skipping cookie file entry due to %s: %r\n'
	1604	% (e, line), sys.stderr)
	1605	continue
	1606	cf.seek(0)
	1607	self._really_load(cf, filename, ignore_discard, ignore_expires)
	1608	# Session cookies are denoted by either `expires` field set to
	1609	# an empty string or 0. MozillaCookieJar only recognizes the former
	1610	# (see [1]). So we need force the latter to be recognized as session
	1611	# cookies on our own.
	1612	# Session cookies may be important for cookies-based authentication,
	1613	# e.g. usually, when user does not check 'Remember me' check box while
	1614	# logging in on a site, some important cookies are stored as session
	1615	# cookies so that not recognizing them will result in failed login.
	1616	# 1. https://bugs.python.org/issue17164
	1617	for cookie in self:
	1618	# Treat `expires=0` cookies as session cookies
	1619	if cookie.expires == 0:
	1620	cookie.expires = None
	1621	cookie.discard = True
	1622
	1623
	1624	class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
	1625	def __init__(self, cookiejar=None):
	1626	compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
	1627
	1628	def http_response(self, request, response):
	1629	# Python 2 will choke on next HTTP request in row if there are non-ASCII
	1630	# characters in Set-Cookie HTTP header of last response (see
	1631	# https://github.com/ytdl-org/youtube-dl/issues/6769).
	1632	# In order to at least prevent crashing we will percent encode Set-Cookie
	1633	# header before HTTPCookieProcessor starts processing it.
	1634	# if sys.version_info < (3, 0) and response.headers:
	1635	# for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
	1636	# set_cookie = response.headers.get(set_cookie_header)
	1637	# if set_cookie:
	1638	# set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
	1639	# if set_cookie != set_cookie_escaped:
	1640	# del response.headers[set_cookie_header]
	1641	# response.headers[set_cookie_header] = set_cookie_escaped
	1642	return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
	1643
	1644	https_request = compat_urllib_request.HTTPCookieProcessor.http_request
	1645	https_response = http_response
	1646
	1647
	1648	class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
	1649	"""YoutubeDL redirect handler
	1650
	1651	The code is based on HTTPRedirectHandler implementation from CPython [1].
	1652
	1653	This redirect handler solves two issues:
	1654	- ensures redirect URL is always unicode under python 2
	1655	- introduces support for experimental HTTP response status code
	1656	308 Permanent Redirect [2] used by some sites [3]
	1657
	1658	1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
	1659	2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
	1660	3. https://github.com/ytdl-org/youtube-dl/issues/28768
	1661	"""
	1662
	1663	http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
	1664
	1665	def redirect_request(self, req, fp, code, msg, headers, newurl):
	1666	"""Return a Request or None in response to a redirect.
	1667
	1668	This is called by the http_error_30x methods when a
	1669	redirection response is received. If a redirection should
	1670	take place, return a new Request to allow http_error_30x to
	1671	perform the redirect. Otherwise, raise HTTPError if no-one
	1672	else should try to handle this url. Return None if you can't
	1673	but another Handler might.
	1674	"""
	1675	m = req.get_method()
	1676	if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
	1677	or code in (301, 302, 303) and m == "POST")):
	1678	raise compat_HTTPError(req.full_url, code, msg, headers, fp)
	1679	# Strictly (according to RFC 2616), 301 or 302 in response to
	1680	# a POST MUST NOT cause a redirection without confirmation
	1681	# from the user (of urllib.request, in this case). In practice,
	1682	# essentially all clients do redirect in this case, so we do
	1683	# the same.
	1684
	1685	# On python 2 urlh.geturl() may sometimes return redirect URL
	1686	# as byte string instead of unicode. This workaround allows
	1687	# to force it always return unicode.
	1688	if sys.version_info[0] < 3:
	1689	newurl = compat_str(newurl)
	1690
	1691	# Be conciliant with URIs containing a space. This is mainly
	1692	# redundant with the more complete encoding done in http_error_302(),
	1693	# but it is kept for compatibility with other callers.
	1694	newurl = newurl.replace(' ', '%20')
	1695
	1696	CONTENT_HEADERS = ("content-length", "content-type")
	1697	# NB: don't use dict comprehension for python 2.6 compatibility
	1698	newheaders = dict((k, v) for k, v in req.headers.items()
	1699	if k.lower() not in CONTENT_HEADERS)
	1700	return compat_urllib_request.Request(
	1701	newurl, headers=newheaders, origin_req_host=req.origin_req_host,
	1702	unverifiable=True)
	1703
	1704
	1705	def extract_timezone(date_str):
	1706	m = re.search(
	1707	r'''(?x)
	1708	^.{8,}? # >=8 char non-TZ prefix, if present
	1709	(?P<tz>Z\| # just the UTC Z, or
	1710	(?:(?<=.\b\d{4}\|\b\d{2}:\d\d)\| # preceded by 4 digits or hh:mm or
	1711	(?<!.\b[a-zA-Z]{3}\|[a-zA-Z]{4}\|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
	1712	[ ]? # optional space
	1713	(?P<sign>\+\|-) # +/-
	1714	(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
	1715	$)
	1716	''', date_str)
	1717	if not m:
	1718	timezone = datetime.timedelta()
	1719	else:
	1720	date_str = date_str[:-len(m.group('tz'))]
	1721	if not m.group('sign'):
	1722	timezone = datetime.timedelta()
	1723	else:
	1724	sign = 1 if m.group('sign') == '+' else -1
	1725	timezone = datetime.timedelta(
	1726	hours=sign * int(m.group('hours')),
	1727	minutes=sign * int(m.group('minutes')))
	1728	return timezone, date_str
	1729
	1730
	1731	def parse_iso8601(date_str, delimiter='T', timezone=None):
	1732	""" Return a UNIX timestamp from the given date """
	1733
	1734	if date_str is None:
	1735	return None
	1736
	1737	date_str = re.sub(r'\.[0-9]+', '', date_str)
	1738
	1739	if timezone is None:
	1740	timezone, date_str = extract_timezone(date_str)
	1741
	1742	try:
	1743	date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
	1744	dt = datetime.datetime.strptime(date_str, date_format) - timezone
	1745	return calendar.timegm(dt.timetuple())
	1746	except ValueError:
	1747	pass
	1748
	1749
	1750	def date_formats(day_first=True):
	1751	return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
	1752
	1753
	1754	def unified_strdate(date_str, day_first=True):
	1755	"""Return a string with the date in the format YYYYMMDD"""
	1756
	1757	if date_str is None:
	1758	return None
	1759	upload_date = None
	1760	# Replace commas
	1761	date_str = date_str.replace(',', ' ')
	1762	# Remove AM/PM + timezone
	1763	date_str = re.sub(r'(?i)\s*(?:AM\|PM)(?:\s+[A-Z]+)?', '', date_str)
	1764	_, date_str = extract_timezone(date_str)
	1765
	1766	for expression in date_formats(day_first):
	1767	try:
	1768	upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
	1769	except ValueError:
	1770	pass
	1771	if upload_date is None:
	1772	timetuple = email.utils.parsedate_tz(date_str)
	1773	if timetuple:
	1774	try:
	1775	upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
	1776	except ValueError:
	1777	pass
	1778	if upload_date is not None:
	1779	return compat_str(upload_date)
	1780
	1781
	1782	def unified_timestamp(date_str, day_first=True):
	1783	if date_str is None:
	1784	return None
	1785
	1786	date_str = re.sub(r'[,\|]', '', date_str)
	1787
	1788	pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
	1789	timezone, date_str = extract_timezone(date_str)
	1790
	1791	# Remove AM/PM + timezone
	1792	date_str = re.sub(r'(?i)\s*(?:AM\|PM)(?:\s+[A-Z]+)?', '', date_str)
	1793
	1794	# Remove unrecognized timezones from ISO 8601 alike timestamps
	1795	m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
	1796	if m:
	1797	date_str = date_str[:-len(m.group('tz'))]
	1798
	1799	# Python only supports microseconds, so remove nanoseconds
	1800	m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
	1801	if m:
	1802	date_str = m.group(1)
	1803
	1804	for expression in date_formats(day_first):
	1805	try:
	1806	dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
	1807	return calendar.timegm(dt.timetuple())
	1808	except ValueError:
	1809	pass
	1810	timetuple = email.utils.parsedate_tz(date_str)
	1811	if timetuple:
	1812	return calendar.timegm(timetuple) + pm_delta * 3600
	1813
	1814
	1815	def determine_ext(url, default_ext='unknown_video'):
	1816	if url is None or '.' not in url:
	1817	return default_ext
	1818	guess = url.partition('?')[0].rpartition('.')[2]
	1819	if re.match(r'^[A-Za-z0-9]+$', guess):
	1820	return guess
	1821	# Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
	1822	elif guess.rstrip('/') in KNOWN_EXTENSIONS:
	1823	return guess.rstrip('/')
	1824	else:
	1825	return default_ext
	1826
	1827
	1828	def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
	1829	return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
	1830
	1831
	1832	def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
	1833	"""
	1834	Return a datetime object from a string in the format YYYYMMDD or
	1835	(now\|today\|date)[+-][0-9](microsecond\|second\|minute\|hour\|day\|week\|month\|year)(s)?
	1836
	1837	format: string date format used to return datetime object from
	1838	precision: round the time portion of a datetime object.
	1839	auto\|microsecond\|second\|minute\|hour\|day.
	1840	auto: round to the unit provided in date_str (if applicable).
	1841	"""
	1842	auto_precision = False
	1843	if precision == 'auto':
	1844	auto_precision = True
	1845	precision = 'microsecond'
	1846	today = datetime_round(datetime.datetime.utcnow(), precision)
	1847	if date_str in ('now', 'today'):
	1848	return today
	1849	if date_str == 'yesterday':
	1850	return today - datetime.timedelta(days=1)
	1851	match = re.match(
	1852	r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond\|second\|minute\|hour\|day\|week\|month\|year)(s)?',
	1853	date_str)
	1854	if match is not None:
	1855	start_time = datetime_from_str(match.group('start'), precision, format)
	1856	time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
	1857	unit = match.group('unit')
	1858	if unit == 'month' or unit == 'year':
	1859	new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
	1860	unit = 'day'
	1861	else:
	1862	if unit == 'week':
	1863	unit = 'day'
	1864	time *= 7
	1865	delta = datetime.timedelta(**{unit + 's': time})
	1866	new_date = start_time + delta
	1867	if auto_precision:
	1868	return datetime_round(new_date, unit)
	1869	return new_date
	1870
	1871	return datetime_round(datetime.datetime.strptime(date_str, format), precision)
	1872
	1873
	1874	def date_from_str(date_str, format='%Y%m%d'):
	1875	"""
	1876	Return a datetime object from a string in the format YYYYMMDD or
	1877	(now\|today\|date)[+-][0-9](microsecond\|second\|minute\|hour\|day\|week\|month\|year)(s)?
	1878
	1879	format: string date format used to return datetime object from
	1880	"""
	1881	return datetime_from_str(date_str, precision='microsecond', format=format).date()
	1882
	1883
	1884	def datetime_add_months(dt, months):
	1885	"""Increment/Decrement a datetime object by months."""
	1886	month = dt.month + months - 1
	1887	year = dt.year + month // 12
	1888	month = month % 12 + 1
	1889	day = min(dt.day, calendar.monthrange(year, month)[1])
	1890	return dt.replace(year, month, day)
	1891
	1892
	1893	def datetime_round(dt, precision='day'):
	1894	"""
	1895	Round a datetime object's time to a specific precision
	1896	"""
	1897	if precision == 'microsecond':
	1898	return dt
	1899
	1900	unit_seconds = {
	1901	'day': 86400,
	1902	'hour': 3600,
	1903	'minute': 60,
	1904	'second': 1,
	1905	}
	1906	roundto = lambda x, n: ((x + n / 2) // n) * n
	1907	timestamp = calendar.timegm(dt.timetuple())
	1908	return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
	1909
	1910
	1911	def hyphenate_date(date_str):
	1912	"""
	1913	Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
	1914	match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
	1915	if match is not None:
	1916	return '-'.join(match.groups())
	1917	else:
	1918	return date_str
	1919
	1920
	1921	class DateRange(object):
	1922	"""Represents a time interval between two dates"""
	1923
	1924	def __init__(self, start=None, end=None):
	1925	"""start and end must be strings in the format accepted by date"""
	1926	if start is not None:
	1927	self.start = date_from_str(start)
	1928	else:
	1929	self.start = datetime.datetime.min.date()
	1930	if end is not None:
	1931	self.end = date_from_str(end)
	1932	else:
	1933	self.end = datetime.datetime.max.date()
	1934	if self.start > self.end:
	1935	raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
	1936
	1937	@classmethod
	1938	def day(cls, day):
	1939	"""Returns a range that only contains the given day"""
	1940	return cls(day, day)
	1941
	1942	def __contains__(self, date):
	1943	"""Check if the date is in the range"""
	1944	if not isinstance(date, datetime.date):
	1945	date = date_from_str(date)
	1946	return self.start <= date <= self.end
	1947
	1948	def __str__(self):
	1949	return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
	1950
	1951
	1952	def platform_name():
	1953	""" Returns the platform name as a compat_str """
	1954	res = platform.platform()
	1955	if isinstance(res, bytes):
	1956	res = res.decode(preferredencoding())
	1957
	1958	assert isinstance(res, compat_str)
	1959	return res
	1960
	1961
	1962	def get_windows_version():
	1963	''' Get Windows version. None if it's not running on Windows '''
	1964	if compat_os_name == 'nt':
	1965	return version_tuple(platform.win32_ver()[1])
	1966	else:
	1967	return None
	1968
	1969
	1970	def _windows_write_string(s, out):
	1971	""" Returns True if the string was written using special methods,
	1972	False if it has yet to be written out."""
	1973	# Adapted from http://stackoverflow.com/a/3259271/35070
	1974
	1975	import ctypes.wintypes
	1976
	1977	WIN_OUTPUT_IDS = {
	1978	1: -11,
	1979	2: -12,
	1980	}
	1981
	1982	try:
	1983	fileno = out.fileno()
	1984	except AttributeError:
	1985	# If the output stream doesn't have a fileno, it's virtual
	1986	return False
	1987	except io.UnsupportedOperation:
	1988	# Some strange Windows pseudo files?
	1989	return False
	1990	if fileno not in WIN_OUTPUT_IDS:
	1991	return False
	1992
	1993	GetStdHandle = compat_ctypes_WINFUNCTYPE(
	1994	ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
	1995	('GetStdHandle', ctypes.windll.kernel32))
	1996	h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
	1997
	1998	WriteConsoleW = compat_ctypes_WINFUNCTYPE(
	1999	ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
	2000	ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
	2001	ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
	2002	written = ctypes.wintypes.DWORD(0)
	2003
	2004	GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
	2005	FILE_TYPE_CHAR = 0x0002
	2006	FILE_TYPE_REMOTE = 0x8000
	2007	GetConsoleMode = compat_ctypes_WINFUNCTYPE(
	2008	ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
	2009	ctypes.POINTER(ctypes.wintypes.DWORD))(
	2010	('GetConsoleMode', ctypes.windll.kernel32))
	2011	INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
	2012
	2013	def not_a_console(handle):
	2014	if handle == INVALID_HANDLE_VALUE or handle is None:
	2015	return True
	2016	return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
	2017	or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
	2018
	2019	if not_a_console(h):
	2020	return False
	2021
	2022	def next_nonbmp_pos(s):
	2023	try:
	2024	return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
	2025	except StopIteration:
	2026	return len(s)
	2027
	2028	while s:
	2029	count = min(next_nonbmp_pos(s), 1024)
	2030
	2031	ret = WriteConsoleW(
	2032	h, s, count if count else 2, ctypes.byref(written), None)
	2033	if ret == 0:
	2034	raise OSError('Failed to write string')
	2035	if not count: # We just wrote a non-BMP character
	2036	assert written.value == 2
	2037	s = s[1:]
	2038	else:
	2039	assert written.value > 0
	2040	s = s[written.value:]
	2041	return True
	2042
	2043
	2044	def write_string(s, out=None, encoding=None):
	2045	if out is None:
	2046	out = sys.stderr
	2047	assert type(s) == compat_str
	2048
	2049	if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
	2050	if _windows_write_string(s, out):
	2051	return
	2052
	2053	if ('b' in getattr(out, 'mode', '')
	2054	or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
	2055	byt = s.encode(encoding or preferredencoding(), 'ignore')
	2056	out.write(byt)
	2057	elif hasattr(out, 'buffer'):
	2058	enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
	2059	byt = s.encode(enc, 'ignore')
	2060	out.buffer.write(byt)
	2061	else:
	2062	out.write(s)
	2063	out.flush()
	2064
	2065
	2066	def bytes_to_intlist(bs):
	2067	if not bs:
	2068	return []
	2069	if isinstance(bs[0], int): # Python 3
	2070	return list(bs)
	2071	else:
	2072	return [ord(c) for c in bs]
	2073
	2074
	2075	def intlist_to_bytes(xs):
	2076	if not xs:
	2077	return b''
	2078	return compat_struct_pack('%dB' % len(xs), *xs)
	2079
	2080
	2081	# Cross-platform file locking
	2082	if sys.platform == 'win32':
	2083	import ctypes.wintypes
	2084	import msvcrt
	2085
	2086	class OVERLAPPED(ctypes.Structure):
	2087	_fields_ = [
	2088	('Internal', ctypes.wintypes.LPVOID),
	2089	('InternalHigh', ctypes.wintypes.LPVOID),
	2090	('Offset', ctypes.wintypes.DWORD),
	2091	('OffsetHigh', ctypes.wintypes.DWORD),
	2092	('hEvent', ctypes.wintypes.HANDLE),
	2093	]
	2094
	2095	kernel32 = ctypes.windll.kernel32
	2096	LockFileEx = kernel32.LockFileEx
	2097	LockFileEx.argtypes = [
	2098	ctypes.wintypes.HANDLE, # hFile
	2099	ctypes.wintypes.DWORD, # dwFlags
	2100	ctypes.wintypes.DWORD, # dwReserved
	2101	ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
	2102	ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
	2103	ctypes.POINTER(OVERLAPPED) # Overlapped
	2104	]
	2105	LockFileEx.restype = ctypes.wintypes.BOOL
	2106	UnlockFileEx = kernel32.UnlockFileEx
	2107	UnlockFileEx.argtypes = [
	2108	ctypes.wintypes.HANDLE, # hFile
	2109	ctypes.wintypes.DWORD, # dwReserved
	2110	ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
	2111	ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
	2112	ctypes.POINTER(OVERLAPPED) # Overlapped
	2113	]
	2114	UnlockFileEx.restype = ctypes.wintypes.BOOL
	2115	whole_low = 0xffffffff
	2116	whole_high = 0x7fffffff
	2117
	2118	def _lock_file(f, exclusive, block): # todo: block unused on win32
	2119	overlapped = OVERLAPPED()
	2120	overlapped.Offset = 0
	2121	overlapped.OffsetHigh = 0
	2122	overlapped.hEvent = 0
	2123	f._lock_file_overlapped_p = ctypes.pointer(overlapped)
	2124	handle = msvcrt.get_osfhandle(f.fileno())
	2125	if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
	2126	whole_low, whole_high, f._lock_file_overlapped_p):
	2127	raise OSError('Locking file failed: %r' % ctypes.FormatError())
	2128
	2129	def _unlock_file(f):
	2130	assert f._lock_file_overlapped_p
	2131	handle = msvcrt.get_osfhandle(f.fileno())
	2132	if not UnlockFileEx(handle, 0,
	2133	whole_low, whole_high, f._lock_file_overlapped_p):
	2134	raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
	2135
	2136	else:
	2137	# Some platforms, such as Jython, is missing fcntl
	2138	try:
	2139	import fcntl
	2140
	2141	def _lock_file(f, exclusive, block):
	2142	fcntl.flock(f,
	2143	fcntl.LOCK_SH if not exclusive
	2144	else fcntl.LOCK_EX if block
	2145	else fcntl.LOCK_EX \| fcntl.LOCK_NB)
	2146
	2147	def _unlock_file(f):
	2148	fcntl.flock(f, fcntl.LOCK_UN)
	2149
	2150	except ImportError:
	2151	UNSUPPORTED_MSG = 'file locking is not supported on this platform'
	2152
	2153	def _lock_file(f, exclusive, block):
	2154	raise IOError(UNSUPPORTED_MSG)
	2155
	2156	def _unlock_file(f):
	2157	raise IOError(UNSUPPORTED_MSG)
	2158
	2159
	2160	class locked_file(object):
	2161	def __init__(self, filename, mode, block=True, encoding=None):
	2162	assert mode in ['r', 'rb', 'a', 'ab', 'w', 'wb']
	2163	self.f = io.open(filename, mode, encoding=encoding)
	2164	self.mode = mode
	2165	self.block = block
	2166
	2167	def __enter__(self):
	2168	exclusive = 'r' not in self.mode
	2169	try:
	2170	_lock_file(self.f, exclusive, self.block)
	2171	except IOError:
	2172	self.f.close()
	2173	raise
	2174	return self
	2175
	2176	def __exit__(self, etype, value, traceback):
	2177	try:
	2178	_unlock_file(self.f)
	2179	finally:
	2180	self.f.close()
	2181
	2182	def __iter__(self):
	2183	return iter(self.f)
	2184
	2185	def write(self, *args):
	2186	return self.f.write(*args)
	2187
	2188	def read(self, *args):
	2189	return self.f.read(*args)
	2190
	2191	def flush(self):
	2192	self.f.flush()
	2193
	2194	def open(self):
	2195	return self.__enter__()
	2196
	2197	def close(self, *args):
	2198	self.__exit__(self, *args, value=False, traceback=False)
	2199
	2200
	2201	def get_filesystem_encoding():
	2202	encoding = sys.getfilesystemencoding()
	2203	return encoding if encoding is not None else 'utf-8'
	2204
	2205
	2206	def shell_quote(args):
	2207	quoted_args = []
	2208	encoding = get_filesystem_encoding()
	2209	for a in args:
	2210	if isinstance(a, bytes):
	2211	# We may get a filename encoded with 'encodeFilename'
	2212	a = a.decode(encoding)
	2213	quoted_args.append(compat_shlex_quote(a))
	2214	return ' '.join(quoted_args)
	2215
	2216
	2217	def smuggle_url(url, data):
	2218	""" Pass additional data in a URL for internal use. """
	2219
	2220	url, idata = unsmuggle_url(url, {})
	2221	data.update(idata)
	2222	sdata = compat_urllib_parse_urlencode(
	2223	{'__youtubedl_smuggle': json.dumps(data)})
	2224	return url + '#' + sdata
	2225
	2226
	2227	def unsmuggle_url(smug_url, default=None):
	2228	if '#__youtubedl_smuggle' not in smug_url:
	2229	return smug_url, default
	2230	url, _, sdata = smug_url.rpartition('#')
	2231	jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
	2232	data = json.loads(jsond)
	2233	return url, data
	2234
	2235
	2236	def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
	2237	""" Formats numbers with decimal sufixes like K, M, etc """
	2238	num, factor = float_or_none(num), float(factor)
	2239	if num is None:
	2240	return None
	2241	exponent = 0 if num == 0 else int(math.log(num, factor))
	2242	suffix = ['', *'kMGTPEZY'][exponent]
	2243	if factor == 1024:
	2244	suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
	2245	converted = num / (factor ** exponent)
	2246	return fmt % (converted, suffix)
	2247
	2248
	2249	def format_bytes(bytes):
	2250	return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
	2251
	2252
	2253	def lookup_unit_table(unit_table, s):
	2254	units_re = '\|'.join(re.escape(u) for u in unit_table)
	2255	m = re.match(
	2256	r'(?P<num>[0-9]+(?:[,.][0-9])?)\s(?P<unit>%s)\b' % units_re, s)
	2257	if not m:
	2258	return None
	2259	num_str = m.group('num').replace(',', '.')
	2260	mult = unit_table[m.group('unit')]
	2261	return int(float(num_str) * mult)
	2262
	2263
	2264	def parse_filesize(s):
	2265	if s is None:
	2266	return None
	2267
	2268	# The lower-case forms are of course incorrect and unofficial,
	2269	# but we support those too
	2270	_UNIT_TABLE = {
	2271	'B': 1,
	2272	'b': 1,
	2273	'bytes': 1,
	2274	'KiB': 1024,
	2275	'KB': 1000,
	2276	'kB': 1024,
	2277	'Kb': 1000,
	2278	'kb': 1000,
	2279	'kilobytes': 1000,
	2280	'kibibytes': 1024,
	2281	'MiB': 1024 ** 2,
	2282	'MB': 1000 ** 2,
	2283	'mB': 1024 ** 2,
	2284	'Mb': 1000 ** 2,
	2285	'mb': 1000 ** 2,
	2286	'megabytes': 1000 ** 2,
	2287	'mebibytes': 1024 ** 2,
	2288	'GiB': 1024 ** 3,
	2289	'GB': 1000 ** 3,
	2290	'gB': 1024 ** 3,
	2291	'Gb': 1000 ** 3,
	2292	'gb': 1000 ** 3,
	2293	'gigabytes': 1000 ** 3,
	2294	'gibibytes': 1024 ** 3,
	2295	'TiB': 1024 ** 4,
	2296	'TB': 1000 ** 4,
	2297	'tB': 1024 ** 4,
	2298	'Tb': 1000 ** 4,
	2299	'tb': 1000 ** 4,
	2300	'terabytes': 1000 ** 4,
	2301	'tebibytes': 1024 ** 4,
	2302	'PiB': 1024 ** 5,
	2303	'PB': 1000 ** 5,
	2304	'pB': 1024 ** 5,
	2305	'Pb': 1000 ** 5,
	2306	'pb': 1000 ** 5,
	2307	'petabytes': 1000 ** 5,
	2308	'pebibytes': 1024 ** 5,
	2309	'EiB': 1024 ** 6,
	2310	'EB': 1000 ** 6,
	2311	'eB': 1024 ** 6,
	2312	'Eb': 1000 ** 6,
	2313	'eb': 1000 ** 6,
	2314	'exabytes': 1000 ** 6,
	2315	'exbibytes': 1024 ** 6,
	2316	'ZiB': 1024 ** 7,
	2317	'ZB': 1000 ** 7,
	2318	'zB': 1024 ** 7,
	2319	'Zb': 1000 ** 7,
	2320	'zb': 1000 ** 7,
	2321	'zettabytes': 1000 ** 7,
	2322	'zebibytes': 1024 ** 7,
	2323	'YiB': 1024 ** 8,
	2324	'YB': 1000 ** 8,
	2325	'yB': 1024 ** 8,
	2326	'Yb': 1000 ** 8,
	2327	'yb': 1000 ** 8,
	2328	'yottabytes': 1000 ** 8,
	2329	'yobibytes': 1024 ** 8,
	2330	}
	2331
	2332	return lookup_unit_table(_UNIT_TABLE, s)
	2333
	2334
	2335	def parse_count(s):
	2336	if s is None:
	2337	return None
	2338
	2339	s = re.sub(r'^[^\d]+\s', '', s).strip()
	2340
	2341	if re.match(r'^[\d,.]+$', s):
	2342	return str_to_int(s)
	2343
	2344	_UNIT_TABLE = {
	2345	'k': 1000,
	2346	'K': 1000,
	2347	'm': 1000 ** 2,
	2348	'M': 1000 ** 2,
	2349	'kk': 1000 ** 2,
	2350	'KK': 1000 ** 2,
	2351	'b': 1000 ** 3,
	2352	'B': 1000 ** 3,
	2353	}
	2354
	2355	ret = lookup_unit_table(_UNIT_TABLE, s)
	2356	if ret is not None:
	2357	return ret
	2358
	2359	mobj = re.match(r'([\d,.]+)(?:$\|\s)', s)
	2360	if mobj:
	2361	return str_to_int(mobj.group(1))
	2362
	2363
	2364	def parse_resolution(s):
	2365	if s is None:
	2366	return {}
	2367
	2368	mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s[xX×,]\s(?P<h>\d+)(?![a-zA-Z0-9])', s)
	2369	if mobj:
	2370	return {
	2371	'width': int(mobj.group('w')),
	2372	'height': int(mobj.group('h')),
	2373	}
	2374
	2375	mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
	2376	if mobj:
	2377	return {'height': int(mobj.group(1))}
	2378
	2379	mobj = re.search(r'\b([48])[kK]\b', s)
	2380	if mobj:
	2381	return {'height': int(mobj.group(1)) * 540}
	2382
	2383	return {}
	2384
	2385
	2386	def parse_bitrate(s):
	2387	if not isinstance(s, compat_str):
	2388	return
	2389	mobj = re.search(r'\b(\d+)\s*kbps', s)
	2390	if mobj:
	2391	return int(mobj.group(1))
	2392
	2393
	2394	def month_by_name(name, lang='en'):
	2395	""" Return the number of a month by (locale-independently) English name """
	2396
	2397	month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
	2398
	2399	try:
	2400	return month_names.index(name) + 1
	2401	except ValueError:
	2402	return None
	2403
	2404
	2405	def month_by_abbreviation(abbrev):
	2406	""" Return the number of a month by (locale-independently) English
	2407	abbreviations """
	2408
	2409	try:
	2410	return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
	2411	except ValueError:
	2412	return None
	2413
	2414
	2415	def fix_xml_ampersands(xml_str):
	2416	"""Replace all the '&' by '&' in XML"""
	2417	return re.sub(
	2418	r'&(?!amp;\|lt;\|gt;\|apos;\|quot;\|#x[0-9a-fA-F]{,4};\|#[0-9]{,4};)',
	2419	'&',
	2420	xml_str)
	2421
	2422
	2423	def setproctitle(title):
	2424	assert isinstance(title, compat_str)
	2425
	2426	# ctypes in Jython is not complete
	2427	# http://bugs.jython.org/issue2148
	2428	if sys.platform.startswith('java'):
	2429	return
	2430
	2431	try:
	2432	libc = ctypes.cdll.LoadLibrary('libc.so.6')
	2433	except OSError:
	2434	return
	2435	except TypeError:
	2436	# LoadLibrary in Windows Python 2.7.13 only expects
	2437	# a bytestring, but since unicode_literals turns
	2438	# every string into a unicode string, it fails.
	2439	return
	2440	title_bytes = title.encode('utf-8')
	2441	buf = ctypes.create_string_buffer(len(title_bytes))
	2442	buf.value = title_bytes
	2443	try:
	2444	libc.prctl(15, buf, 0, 0, 0)
	2445	except AttributeError:
	2446	return # Strange libc, just skip this
	2447
	2448
	2449	def remove_start(s, start):
	2450	return s[len(start):] if s is not None and s.startswith(start) else s
	2451
	2452
	2453	def remove_end(s, end):
	2454	return s[:-len(end)] if s is not None and s.endswith(end) else s
	2455
	2456
	2457	def remove_quotes(s):
	2458	if s is None or len(s) < 2:
	2459	return s
	2460	for quote in ('"', "'", ):
	2461	if s[0] == quote and s[-1] == quote:
	2462	return s[1:-1]
	2463	return s
	2464
	2465
	2466	def get_domain(url):
	2467	domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
	2468	return domain.group('domain') if domain else None
	2469
	2470
	2471	def url_basename(url):
	2472	path = compat_urlparse.urlparse(url).path
	2473	return path.strip('/').split('/')[-1]
	2474
	2475
	2476	def base_url(url):
	2477	return re.match(r'https?://[^?#&]+/', url).group()
	2478
	2479
	2480	def urljoin(base, path):
	2481	if isinstance(path, bytes):
	2482	path = path.decode('utf-8')
	2483	if not isinstance(path, compat_str) or not path:
	2484	return None
	2485	if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
	2486	return path
	2487	if isinstance(base, bytes):
	2488	base = base.decode('utf-8')
	2489	if not isinstance(base, compat_str) or not re.match(
	2490	r'^(?:https?:)?//', base):
	2491	return None
	2492	return compat_urlparse.urljoin(base, path)
	2493
	2494
	2495	class HEADRequest(compat_urllib_request.Request):
	2496	def get_method(self):
	2497	return 'HEAD'
	2498
	2499
	2500	class PUTRequest(compat_urllib_request.Request):
	2501	def get_method(self):
	2502	return 'PUT'
	2503
	2504
	2505	def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
	2506	if get_attr and v is not None:
	2507	v = getattr(v, get_attr, None)
	2508	try:
	2509	return int(v) * invscale // scale
	2510	except (ValueError, TypeError, OverflowError):
	2511	return default
	2512
	2513
	2514	def str_or_none(v, default=None):
	2515	return default if v is None else compat_str(v)
	2516
	2517
	2518	def str_to_int(int_str):
	2519	""" A more relaxed version of int_or_none """
	2520	if isinstance(int_str, compat_integer_types):
	2521	return int_str
	2522	elif isinstance(int_str, compat_str):
	2523	int_str = re.sub(r'[,\.\+]', '', int_str)
	2524	return int_or_none(int_str)
	2525
	2526
	2527	def float_or_none(v, scale=1, invscale=1, default=None):
	2528	if v is None:
	2529	return default
	2530	try:
	2531	return float(v) * invscale / scale
	2532	except (ValueError, TypeError):
	2533	return default
	2534
	2535
	2536	def bool_or_none(v, default=None):
	2537	return v if isinstance(v, bool) else default
	2538
	2539
	2540	def strip_or_none(v, default=None):
	2541	return v.strip() if isinstance(v, compat_str) else default
	2542
	2543
	2544	def url_or_none(url):
	2545	if not url or not isinstance(url, compat_str):
	2546	return None
	2547	url = url.strip()
	2548	return url if re.match(r'^(?:(?:https?\|rt(?:m(?:pt?[es]?\|fp)\|sp[su]?)\|mms\|ftps?):)?//', url) else None
	2549
	2550
	2551	def strftime_or_none(timestamp, date_format, default=None):
	2552	datetime_object = None
	2553	try:
	2554	if isinstance(timestamp, compat_numeric_types): # unix timestamp
	2555	datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
	2556	elif isinstance(timestamp, compat_str): # assume YYYYMMDD
	2557	datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
	2558	return datetime_object.strftime(date_format)
	2559	except (ValueError, TypeError, AttributeError):
	2560	return default
	2561
	2562
	2563	def parse_duration(s):
	2564	if not isinstance(s, compat_basestring):
	2565	return None
	2566	s = s.strip()
	2567	if not s:
	2568	return None
	2569
	2570	days, hours, mins, secs, ms = [None] * 5
	2571	m = re.match(r'''(?x)
	2572	(?P<before_secs>
	2573	(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
	2574	(?P<secs>(?(before_secs)[0-9]{1,2}\|[0-9]+))
	2575	(?P<ms>[.:][0-9]+)?Z?$
	2576	''', s)
	2577	if m:
	2578	days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
	2579	else:
	2580	m = re.match(
	2581	r'''(?ix)(?:P?
	2582	(?:
	2583	[0-9]+\sy(?:ears?)?\s
	2584	)?
	2585	(?:
	2586	[0-9]+\sm(?:onths?)?\s
	2587	)?
	2588	(?:
	2589	[0-9]+\sw(?:eeks?)?\s
	2590	)?
	2591	(?:
	2592	(?P<days>[0-9]+)\sd(?:ays?)?\s
	2593	)?
	2594	T)?
	2595	(?:
	2596	(?P<hours>[0-9]+)\sh(?:ours?)?\s
	2597	)?
	2598	(?:
	2599	(?P<mins>[0-9]+)\sm(?:in(?:ute)?s?)?\s
	2600	)?
	2601	(?:
	2602	(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\ss(?:ec(?:ond)?s?)?\s
	2603	)?Z?$''', s)
	2604	if m:
	2605	days, hours, mins, secs, ms = m.groups()
	2606	else:
	2607	m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s(?:hours?)\|(?P<mins>[0-9.]+)\s(?:mins?\.?\|minutes?)\s*)Z?$', s)
	2608	if m:
	2609	hours, mins = m.groups()
	2610	else:
	2611	return None
	2612
	2613	duration = 0
	2614	if secs:
	2615	duration += float(secs)
	2616	if mins:
	2617	duration += float(mins) * 60
	2618	if hours:
	2619	duration += float(hours) * 60 * 60
	2620	if days:
	2621	duration += float(days) * 24 * 60 * 60
	2622	if ms:
	2623	duration += float(ms.replace(':', '.'))
	2624	return duration
	2625
	2626
	2627	def prepend_extension(filename, ext, expected_real_ext=None):
	2628	name, real_ext = os.path.splitext(filename)
	2629	return (
	2630	'{0}.{1}{2}'.format(name, ext, real_ext)
	2631	if not expected_real_ext or real_ext[1:] == expected_real_ext
	2632	else '{0}.{1}'.format(filename, ext))
	2633
	2634
	2635	def replace_extension(filename, ext, expected_real_ext=None):
	2636	name, real_ext = os.path.splitext(filename)
	2637	return '{0}.{1}'.format(
	2638	name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
	2639	ext)
	2640
	2641
	2642	def check_executable(exe, args=[]):
	2643	""" Checks if the given binary is installed somewhere in PATH, and returns its name.
	2644	args can be a list of arguments for a short output (like -version) """
	2645	try:
	2646	Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate_or_kill()
	2647	except OSError:
	2648	return False
	2649	return exe
	2650
	2651
	2652	def _get_exe_version_output(exe, args):
	2653	try:
	2654	# STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
	2655	# SIGTTOU if yt-dlp is run in the background.
	2656	# See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
	2657	out, _ = Popen(
	2658	[encodeArgument(exe)] + args, stdin=subprocess.PIPE,
	2659	stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate_or_kill()
	2660	except OSError:
	2661	return False
	2662	if isinstance(out, bytes): # Python 2.x
	2663	out = out.decode('ascii', 'ignore')
	2664	return out
	2665
	2666
	2667	def detect_exe_version(output, version_re=None, unrecognized='present'):
	2668	assert isinstance(output, compat_str)
	2669	if version_re is None:
	2670	version_re = r'version\s+([-0-9._a-zA-Z]+)'
	2671	m = re.search(version_re, output)
	2672	if m:
	2673	return m.group(1)
	2674	else:
	2675	return unrecognized
	2676
	2677
	2678	def get_exe_version(exe, args=['--version'],
	2679	version_re=None, unrecognized='present'):
	2680	""" Returns the version of the specified executable,
	2681	or False if the executable is not present """
	2682	out = _get_exe_version_output(exe, args)
	2683	return detect_exe_version(out, version_re, unrecognized) if out else False
	2684
	2685
	2686	class LazyList(collections.abc.Sequence):
	2687	''' Lazy immutable list from an iterable
	2688	Note that slices of a LazyList are lists and not LazyList'''
	2689
	2690	class IndexError(IndexError):
	2691	pass
	2692
	2693	def __init__(self, iterable, *, reverse=False, _cache=None):
	2694	self.__iterable = iter(iterable)
	2695	self.__cache = [] if _cache is None else _cache
	2696	self.__reversed = reverse
	2697
	2698	def __iter__(self):
	2699	if self.__reversed:
	2700	# We need to consume the entire iterable to iterate in reverse
	2701	yield from self.exhaust()
	2702	return
	2703	yield from self.__cache
	2704	for item in self.__iterable:
	2705	self.__cache.append(item)
	2706	yield item
	2707
	2708	def __exhaust(self):
	2709	self.__cache.extend(self.__iterable)
	2710	# Discard the emptied iterable to make it pickle-able
	2711	self.__iterable = []
	2712	return self.__cache
	2713
	2714	def exhaust(self):
	2715	''' Evaluate the entire iterable '''
	2716	return self.__exhaust()[::-1 if self.__reversed else 1]
	2717
	2718	@staticmethod
	2719	def __reverse_index(x):
	2720	return None if x is None else -(x + 1)
	2721
	2722	def __getitem__(self, idx):
	2723	if isinstance(idx, slice):
	2724	if self.__reversed:
	2725	idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
	2726	start, stop, step = idx.start, idx.stop, idx.step or 1
	2727	elif isinstance(idx, int):
	2728	if self.__reversed:
	2729	idx = self.__reverse_index(idx)
	2730	start, stop, step = idx, idx, 0
	2731	else:
	2732	raise TypeError('indices must be integers or slices')
	2733	if ((start or 0) < 0 or (stop or 0) < 0
	2734	or (start is None and step < 0)
	2735	or (stop is None and step > 0)):
	2736	# We need to consume the entire iterable to be able to slice from the end
	2737	# Obviously, never use this with infinite iterables
	2738	self.__exhaust()
	2739	try:
	2740	return self.__cache[idx]
	2741	except IndexError as e:
	2742	raise self.IndexError(e) from e
	2743	n = max(start or 0, stop or 0) - len(self.__cache) + 1
	2744	if n > 0:
	2745	self.__cache.extend(itertools.islice(self.__iterable, n))
	2746	try:
	2747	return self.__cache[idx]
	2748	except IndexError as e:
	2749	raise self.IndexError(e) from e
	2750
	2751	def __bool__(self):
	2752	try:
	2753	self[-1] if self.__reversed else self[0]
	2754	except self.IndexError:
	2755	return False
	2756	return True
	2757
	2758	def __len__(self):
	2759	self.__exhaust()
	2760	return len(self.__cache)
	2761
	2762	def __reversed__(self):
	2763	return type(self)(self.__iterable, reverse=not self.__reversed, _cache=self.__cache)
	2764
	2765	def __copy__(self):
	2766	return type(self)(self.__iterable, reverse=self.__reversed, _cache=self.__cache)
	2767
	2768	def __repr__(self):
	2769	# repr and str should mimic a list. So we exhaust the iterable
	2770	return repr(self.exhaust())
	2771
	2772	def __str__(self):
	2773	return repr(self.exhaust())
	2774
	2775
	2776	class PagedList:
	2777
	2778	class IndexError(IndexError):
	2779	pass
	2780
	2781	def __len__(self):
	2782	# This is only useful for tests
	2783	return len(self.getslice())
	2784
	2785	def __init__(self, pagefunc, pagesize, use_cache=True):
	2786	self._pagefunc = pagefunc
	2787	self._pagesize = pagesize
	2788	self._use_cache = use_cache
	2789	self._cache = {}
	2790
	2791	def getpage(self, pagenum):
	2792	page_results = self._cache.get(pagenum)
	2793	if page_results is None:
	2794	page_results = list(self._pagefunc(pagenum))
	2795	if self._use_cache:
	2796	self._cache[pagenum] = page_results
	2797	return page_results
	2798
	2799	def getslice(self, start=0, end=None):
	2800	return list(self._getslice(start, end))
	2801
	2802	def _getslice(self, start, end):
	2803	raise NotImplementedError('This method must be implemented by subclasses')
	2804
	2805	def __getitem__(self, idx):
	2806	# NOTE: cache must be enabled if this is used
	2807	if not isinstance(idx, int) or idx < 0:
	2808	raise TypeError('indices must be non-negative integers')
	2809	entries = self.getslice(idx, idx + 1)
	2810	if not entries:
	2811	raise self.IndexError()
	2812	return entries[0]
	2813
	2814
	2815	class OnDemandPagedList(PagedList):
	2816	def _getslice(self, start, end):
	2817	for pagenum in itertools.count(start // self._pagesize):
	2818	firstid = pagenum * self._pagesize
	2819	nextfirstid = pagenum * self._pagesize + self._pagesize
	2820	if start >= nextfirstid:
	2821	continue
	2822
	2823	startv = (
	2824	start % self._pagesize
	2825	if firstid <= start < nextfirstid
	2826	else 0)
	2827	endv = (
	2828	((end - 1) % self._pagesize) + 1
	2829	if (end is not None and firstid <= end <= nextfirstid)
	2830	else None)
	2831
	2832	page_results = self.getpage(pagenum)
	2833	if startv != 0 or endv is not None:
	2834	page_results = page_results[startv:endv]
	2835	yield from page_results
	2836
	2837	# A little optimization - if current page is not "full", ie. does
	2838	# not contain page_size videos then we can assume that this page
	2839	# is the last one - there are no more ids on further pages -
	2840	# i.e. no need to query again.
	2841	if len(page_results) + startv < self._pagesize:
	2842	break
	2843
	2844	# If we got the whole page, but the next page is not interesting,
	2845	# break out early as well
	2846	if end == nextfirstid:
	2847	break
	2848
	2849
	2850	class InAdvancePagedList(PagedList):
	2851	def __init__(self, pagefunc, pagecount, pagesize):
	2852	self._pagecount = pagecount
	2853	PagedList.__init__(self, pagefunc, pagesize, True)
	2854
	2855	def _getslice(self, start, end):
	2856	start_page = start // self._pagesize
	2857	end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
	2858	skip_elems = start - start_page * self._pagesize
	2859	only_more = None if end is None else end - start
	2860	for pagenum in range(start_page, end_page):
	2861	page_results = self.getpage(pagenum)
	2862	if skip_elems:
	2863	page_results = page_results[skip_elems:]
	2864	skip_elems = None
	2865	if only_more is not None:
	2866	if len(page_results) < only_more:
	2867	only_more -= len(page_results)
	2868	else:
	2869	yield from page_results[:only_more]
	2870	break
	2871	yield from page_results
	2872
	2873
	2874	def uppercase_escape(s):
	2875	unicode_escape = codecs.getdecoder('unicode_escape')
	2876	return re.sub(
	2877	r'\\U[0-9a-fA-F]{8}',
	2878	lambda m: unicode_escape(m.group(0))[0],
	2879	s)
	2880
	2881
	2882	def lowercase_escape(s):
	2883	unicode_escape = codecs.getdecoder('unicode_escape')
	2884	return re.sub(
	2885	r'\\u[0-9a-fA-F]{4}',
	2886	lambda m: unicode_escape(m.group(0))[0],
	2887	s)
	2888
	2889
	2890	def escape_rfc3986(s):
	2891	"""Escape non-ASCII characters as suggested by RFC 3986"""
	2892	if sys.version_info < (3, 0) and isinstance(s, compat_str):
	2893	s = s.encode('utf-8')
	2894	return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
	2895
	2896
	2897	def escape_url(url):
	2898	"""Escape URL as suggested by RFC 3986"""
	2899	url_parsed = compat_urllib_parse_urlparse(url)
	2900	return url_parsed._replace(
	2901	netloc=url_parsed.netloc.encode('idna').decode('ascii'),
	2902	path=escape_rfc3986(url_parsed.path),
	2903	params=escape_rfc3986(url_parsed.params),
	2904	query=escape_rfc3986(url_parsed.query),
	2905	fragment=escape_rfc3986(url_parsed.fragment)
	2906	).geturl()
	2907
	2908
	2909	def parse_qs(url):
	2910	return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
	2911
	2912
	2913	def read_batch_urls(batch_fd):
	2914	def fixup(url):
	2915	if not isinstance(url, compat_str):
	2916	url = url.decode('utf-8', 'replace')
	2917	BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
	2918	for bom in BOM_UTF8:
	2919	if url.startswith(bom):
	2920	url = url[len(bom):]
	2921	url = url.lstrip()
	2922	if not url or url.startswith(('#', ';', ']')):
	2923	return False
	2924	# "#" cannot be stripped out since it is part of the URI
	2925	# However, it can be safely stipped out if follwing a whitespace
	2926	return re.split(r'\s#', url, 1)[0].rstrip()
	2927
	2928	with contextlib.closing(batch_fd) as fd:
	2929	return [url for url in map(fixup, fd) if url]
	2930
	2931
	2932	def urlencode_postdata(args, *kargs):
	2933	return compat_urllib_parse_urlencode(args, *kargs).encode('ascii')
	2934
	2935
	2936	def update_url_query(url, query):
	2937	if not query:
	2938	return url
	2939	parsed_url = compat_urlparse.urlparse(url)
	2940	qs = compat_parse_qs(parsed_url.query)
	2941	qs.update(query)
	2942	return compat_urlparse.urlunparse(parsed_url._replace(
	2943	query=compat_urllib_parse_urlencode(qs, True)))
	2944
	2945
	2946	def update_Request(req, url=None, data=None, headers={}, query={}):
	2947	req_headers = req.headers.copy()
	2948	req_headers.update(headers)
	2949	req_data = data or req.data
	2950	req_url = update_url_query(url or req.get_full_url(), query)
	2951	req_get_method = req.get_method()
	2952	if req_get_method == 'HEAD':
	2953	req_type = HEADRequest
	2954	elif req_get_method == 'PUT':
	2955	req_type = PUTRequest
	2956	else:
	2957	req_type = compat_urllib_request.Request
	2958	new_req = req_type(
	2959	req_url, data=req_data, headers=req_headers,
	2960	origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
	2961	if hasattr(req, 'timeout'):
	2962	new_req.timeout = req.timeout
	2963	return new_req
	2964
	2965
	2966	def _multipart_encode_impl(data, boundary):
	2967	content_type = 'multipart/form-data; boundary=%s' % boundary
	2968
	2969	out = b''
	2970	for k, v in data.items():
	2971	out += b'--' + boundary.encode('ascii') + b'\r\n'
	2972	if isinstance(k, compat_str):
	2973	k = k.encode('utf-8')
	2974	if isinstance(v, compat_str):
	2975	v = v.encode('utf-8')
	2976	# RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
	2977	# suggests sending UTF-8 directly. Firefox sends UTF-8, too
	2978	content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
	2979	if boundary.encode('ascii') in content:
	2980	raise ValueError('Boundary overlaps with data')
	2981	out += content
	2982
	2983	out += b'--' + boundary.encode('ascii') + b'--\r\n'
	2984
	2985	return out, content_type
	2986
	2987
	2988	def multipart_encode(data, boundary=None):
	2989	'''
	2990	Encode a dict to RFC 7578-compliant form-data
	2991
	2992	data:
	2993	A dict where keys and values can be either Unicode or bytes-like
	2994	objects.
	2995	boundary:
	2996	If specified a Unicode object, it's used as the boundary. Otherwise
	2997	a random boundary is generated.
	2998
	2999	Reference: https://tools.ietf.org/html/rfc7578
	3000	'''
	3001	has_specified_boundary = boundary is not None
	3002
	3003	while True:
	3004	if boundary is None:
	3005	boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
	3006
	3007	try:
	3008	out, content_type = _multipart_encode_impl(data, boundary)
	3009	break
	3010	except ValueError:
	3011	if has_specified_boundary:
	3012	raise
	3013	boundary = None
	3014
	3015	return out, content_type
	3016
	3017
	3018	def dict_get(d, key_or_keys, default=None, skip_false_values=True):
	3019	if isinstance(key_or_keys, (list, tuple)):
	3020	for key in key_or_keys:
	3021	if key not in d or d[key] is None or skip_false_values and not d[key]:
	3022	continue
	3023	return d[key]
	3024	return default
	3025	return d.get(key_or_keys, default)
	3026
	3027
	3028	def try_get(src, getter, expected_type=None):
	3029	for get in variadic(getter):
	3030	try:
	3031	v = get(src)
	3032	except (AttributeError, KeyError, TypeError, IndexError):
	3033	pass
	3034	else:
	3035	if expected_type is None or isinstance(v, expected_type):
	3036	return v
	3037
	3038
	3039	def merge_dicts(*dicts):
	3040	merged = {}
	3041	for a_dict in dicts:
	3042	for k, v in a_dict.items():
	3043	if v is None:
	3044	continue
	3045	if (k not in merged
	3046	or (isinstance(v, compat_str) and v
	3047	and isinstance(merged[k], compat_str)
	3048	and not merged[k])):
	3049	merged[k] = v
	3050	return merged
	3051
	3052
	3053	def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
	3054	return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
	3055
	3056
	3057	US_RATINGS = {
	3058	'G': 0,
	3059	'PG': 10,
	3060	'PG-13': 13,
	3061	'R': 16,
	3062	'NC': 18,
	3063	}
	3064
	3065
	3066	TV_PARENTAL_GUIDELINES = {
	3067	'TV-Y': 0,
	3068	'TV-Y7': 7,
	3069	'TV-G': 0,
	3070	'TV-PG': 0,
	3071	'TV-14': 14,
	3072	'TV-MA': 17,
	3073	}
	3074
	3075
	3076	def parse_age_limit(s):
	3077	if type(s) == int:
	3078	return s if 0 <= s <= 21 else None
	3079	if not isinstance(s, compat_basestring):
	3080	return None
	3081	m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
	3082	if m:
	3083	return int(m.group('age'))
	3084	s = s.upper()
	3085	if s in US_RATINGS:
	3086	return US_RATINGS[s]
	3087	m = re.match(r'^TV[_-]?(%s)$' % '\|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
	3088	if m:
	3089	return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
	3090	return None
	3091
	3092
	3093	def strip_jsonp(code):
	3094	return re.sub(
	3095	r'''(?sx)^
	3096	(?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
	3097	(?:\s&&\s(?P=func_name))?
	3098	\s$\s(?P<callback_data>.*)$;?
	3099	\s?(?://[^\n])*$''',
	3100	r'\g<callback_data>', code)
	3101
	3102
	3103	def js_to_json(code, vars={}):
	3104	# vars is a dict of var, val pairs to substitute
	3105	COMMENT_RE = r'/\(?:(?!\/).)?\/\|//[^\n]*\n'
	3106	SKIP_RE = r'\s(?:{comment})?\s'.format(comment=COMMENT_RE)
	3107	INTEGER_TABLE = (
	3108	(r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
	3109	(r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
	3110	)
	3111
	3112	def fix_kv(m):
	3113	v = m.group(0)
	3114	if v in ('true', 'false', 'null'):
	3115	return v
	3116	elif v in ('undefined', 'void 0'):
	3117	return 'null'
	3118	elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
	3119	return ""
	3120
	3121	if v[0] in ("'", '"'):
	3122	v = re.sub(r'(?s)\\.\|"', lambda m: {
	3123	'"': '\\"',
	3124	"\\'": "'",
	3125	'\\\n': '',
	3126	'\\x': '\\u00',
	3127	}.get(m.group(0), m.group(0)), v[1:-1])
	3128	else:
	3129	for regex, base in INTEGER_TABLE:
	3130	im = re.match(regex, v)
	3131	if im:
	3132	i = int(im.group(1), base)
	3133	return '"%d":' % i if v.endswith(':') else '%d' % i
	3134
	3135	if v in vars:
	3136	return vars[v]
	3137
	3138	return '"%s"' % v
	3139
	3140	return re.sub(r'''(?sx)
	3141	"(?:[^"\\](?:\\\\\|\\['"nurtbfx/\n]))[^"\\]*"\|
	3142	'(?:[^'\\](?:\\\\\|\\['"nurtbfx/\n]))[^'\\]*'\|
	3143	{comment}\|,(?={skip}[\]}}])\|
	3144	void\s0\|(?:(?<![0-9])[eE]\|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*\|
	3145	\b(?:0[xX][0-9a-fA-F]+\|0+[0-7]+)(?:{skip}:)?\|
	3146	[0-9]+(?={skip}:)\|
	3147	!+
	3148	'''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
	3149
	3150
	3151	def qualities(quality_ids):
	3152	""" Get a numeric quality value out of a list of possible values """
	3153	def q(qid):
	3154	try:
	3155	return quality_ids.index(qid)
	3156	except ValueError:
	3157	return -1
	3158	return q
	3159
	3160
	3161	POSTPROCESS_WHEN = {'pre_process', 'before_dl', 'after_move', 'post_process', 'after_video', 'playlist'}
	3162
	3163
	3164	DEFAULT_OUTTMPL = {
	3165	'default': '%(title)s [%(id)s].%(ext)s',
	3166	'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
	3167	}
	3168	OUTTMPL_TYPES = {
	3169	'chapter': None,
	3170	'subtitle': None,
	3171	'thumbnail': None,
	3172	'description': 'description',
	3173	'annotation': 'annotations.xml',
	3174	'infojson': 'info.json',
	3175	'link': None,
	3176	'pl_video': None,
	3177	'pl_thumbnail': None,
	3178	'pl_description': 'description',
	3179	'pl_infojson': 'info.json',
	3180	}
	3181
	3182	# As of [1] format syntax is:
	3183	# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
	3184	# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
	3185	STR_FORMAT_RE_TMPL = r'''(?x)
	3186	(?<!%)(?P<prefix>(?:%%)*)
	3187	%
	3188	(?P<has_key>$(?P<key>{0})$)?
	3189	(?P<format>
	3190	(?P<conversion>[#0\-+ ]+)?
	3191	(?P<min_width>\d+)?
	3192	(?P<precision>\.\d+)?
	3193	(?P<len_mod>[hlL])? # unused in python
	3194	{1} # conversion type
	3195	)
	3196	'''
	3197
	3198
	3199	STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
	3200
	3201
	3202	def limit_length(s, length):
	3203	""" Add ellipses to overly long strings """
	3204	if s is None:
	3205	return None
	3206	ELLIPSES = '...'
	3207	if len(s) > length:
	3208	return s[:length - len(ELLIPSES)] + ELLIPSES
	3209	return s
	3210
	3211
	3212	def version_tuple(v):
	3213	return tuple(int(e) for e in re.split(r'[-.]', v))
	3214
	3215
	3216	def is_outdated_version(version, limit, assume_new=True):
	3217	if not version:
	3218	return not assume_new
	3219	try:
	3220	return version_tuple(version) < version_tuple(limit)
	3221	except ValueError:
	3222	return not assume_new
	3223
	3224
	3225	def ytdl_is_updateable():
	3226	""" Returns if yt-dlp can be updated with -U """
	3227
	3228	from .update import is_non_updateable
	3229
	3230	return not is_non_updateable()
	3231
	3232
	3233	def args_to_str(args):
	3234	# Get a short string representation for a subprocess command
	3235	return ' '.join(compat_shlex_quote(a) for a in args)
	3236
	3237
	3238	def error_to_compat_str(err):
	3239	err_str = str(err)
	3240	# On python 2 error byte string must be decoded with proper
	3241	# encoding rather than ascii
	3242	if sys.version_info[0] < 3:
	3243	err_str = err_str.decode(preferredencoding())
	3244	return err_str
	3245
	3246
	3247	def mimetype2ext(mt):
	3248	if mt is None:
	3249	return None
	3250
	3251	mt, _, params = mt.partition(';')
	3252	mt = mt.strip()
	3253
	3254	FULL_MAP = {
	3255	'audio/mp4': 'm4a',
	3256	# Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
	3257	# it's the most popular one
	3258	'audio/mpeg': 'mp3',
	3259	'audio/x-wav': 'wav',
	3260	'audio/wav': 'wav',
	3261	'audio/wave': 'wav',
	3262	}
	3263
	3264	ext = FULL_MAP.get(mt)
	3265	if ext is not None:
	3266	return ext
	3267
	3268	SUBTYPE_MAP = {
	3269	'3gpp': '3gp',
	3270	'smptett+xml': 'tt',
	3271	'ttaf+xml': 'dfxp',
	3272	'ttml+xml': 'ttml',
	3273	'x-flv': 'flv',
	3274	'x-mp4-fragmented': 'mp4',
	3275	'x-ms-sami': 'sami',
	3276	'x-ms-wmv': 'wmv',
	3277	'mpegurl': 'm3u8',
	3278	'x-mpegurl': 'm3u8',
	3279	'vnd.apple.mpegurl': 'm3u8',
	3280	'dash+xml': 'mpd',
	3281	'f4m+xml': 'f4m',
	3282	'hds+xml': 'f4m',
	3283	'vnd.ms-sstr+xml': 'ism',
	3284	'quicktime': 'mov',
	3285	'mp2t': 'ts',
	3286	'x-wav': 'wav',
	3287	'filmstrip+json': 'fs',
	3288	'svg+xml': 'svg',
	3289	}
	3290
	3291	_, _, subtype = mt.rpartition('/')
	3292	ext = SUBTYPE_MAP.get(subtype.lower())
	3293	if ext is not None:
	3294	return ext
	3295
	3296	SUFFIX_MAP = {
	3297	'json': 'json',
	3298	'xml': 'xml',
	3299	'zip': 'zip',
	3300	'gzip': 'gz',
	3301	}
	3302
	3303	_, _, suffix = subtype.partition('+')
	3304	ext = SUFFIX_MAP.get(suffix)
	3305	if ext is not None:
	3306	return ext
	3307
	3308	return subtype.replace('+', '.')
	3309
	3310
	3311	def ext2mimetype(ext_or_url):
	3312	if not ext_or_url:
	3313	return None
	3314	if '.' not in ext_or_url:
	3315	ext_or_url = f'file.{ext_or_url}'
	3316	return mimetypes.guess_type(ext_or_url)[0]
	3317
	3318
	3319	def parse_codecs(codecs_str):
	3320	# http://tools.ietf.org/html/rfc6381
	3321	if not codecs_str:
	3322	return {}
	3323	split_codecs = list(filter(None, map(
	3324	str.strip, codecs_str.strip().strip(',').split(','))))
	3325	vcodec, acodec, tcodec, hdr = None, None, None, None
	3326	for full_codec in split_codecs:
	3327	parts = full_codec.split('.')
	3328	codec = parts[0].replace('0', '')
	3329	if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
	3330	'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
	3331	if not vcodec:
	3332	vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1', 'hvc1') else full_codec
	3333	if codec in ('dvh1', 'dvhe'):
	3334	hdr = 'DV'
	3335	elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
	3336	hdr = 'HDR10'
	3337	elif full_codec.replace('0', '').startswith('vp9.2'):
	3338	hdr = 'HDR10'
	3339	elif codec in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
	3340	if not acodec:
	3341	acodec = full_codec
	3342	elif codec in ('stpp', 'wvtt',):
	3343	if not tcodec:
	3344	tcodec = full_codec
	3345	else:
	3346	write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
	3347	if vcodec or acodec or tcodec:
	3348	return {
	3349	'vcodec': vcodec or 'none',
	3350	'acodec': acodec or 'none',
	3351	'dynamic_range': hdr,
	3352	**({'tcodec': tcodec} if tcodec is not None else {}),
	3353	}
	3354	elif len(split_codecs) == 2:
	3355	return {
	3356	'vcodec': split_codecs[0],
	3357	'acodec': split_codecs[1],
	3358	}
	3359	return {}
	3360
	3361
	3362	def urlhandle_detect_ext(url_handle):
	3363	getheader = url_handle.headers.get
	3364
	3365	cd = getheader('Content-Disposition')
	3366	if cd:
	3367	m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
	3368	if m:
	3369	e = determine_ext(m.group('filename'), default_ext=None)
	3370	if e:
	3371	return e
	3372
	3373	return mimetype2ext(getheader('Content-Type'))
	3374
	3375
	3376	def encode_data_uri(data, mime_type):
	3377	return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
	3378
	3379
	3380	def age_restricted(content_limit, age_limit):
	3381	""" Returns True iff the content should be blocked """
	3382
	3383	if age_limit is None: # No limit set
	3384	return False
	3385	if content_limit is None:
	3386	return False # Content available for everyone
	3387	return age_limit < content_limit
	3388
	3389
	3390	def is_html(first_bytes):
	3391	""" Detect whether a file contains HTML by examining its first bytes. """
	3392
	3393	BOMS = [
	3394	(b'\xef\xbb\xbf', 'utf-8'),
	3395	(b'\x00\x00\xfe\xff', 'utf-32-be'),
	3396	(b'\xff\xfe\x00\x00', 'utf-32-le'),
	3397	(b'\xff\xfe', 'utf-16-le'),
	3398	(b'\xfe\xff', 'utf-16-be'),
	3399	]
	3400	for bom, enc in BOMS:
	3401	if first_bytes.startswith(bom):
	3402	s = first_bytes[len(bom):].decode(enc, 'replace')
	3403	break
	3404	else:
	3405	s = first_bytes.decode('utf-8', 'replace')
	3406
	3407	return re.match(r'^\s*<', s)
	3408
	3409
	3410	def determine_protocol(info_dict):
	3411	protocol = info_dict.get('protocol')
	3412	if protocol is not None:
	3413	return protocol
	3414
	3415	url = sanitize_url(info_dict['url'])
	3416	if url.startswith('rtmp'):
	3417	return 'rtmp'
	3418	elif url.startswith('mms'):
	3419	return 'mms'
	3420	elif url.startswith('rtsp'):
	3421	return 'rtsp'
	3422
	3423	ext = determine_ext(url)
	3424	if ext == 'm3u8':
	3425	return 'm3u8'
	3426	elif ext == 'f4m':
	3427	return 'f4m'
	3428
	3429	return compat_urllib_parse_urlparse(url).scheme
	3430
	3431
	3432	def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
	3433	""" Render a list of rows, each as a list of values.
	3434	Text after a \t will be right aligned """
	3435	def width(string):
	3436	return len(remove_terminal_sequences(string).replace('\t', ''))
	3437
	3438	def get_max_lens(table):
	3439	return [max(width(str(v)) for v in col) for col in zip(*table)]
	3440
	3441	def filter_using_list(row, filterArray):
	3442	return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
	3443
	3444	max_lens = get_max_lens(data) if hide_empty else []
	3445	header_row = filter_using_list(header_row, max_lens)
	3446	data = [filter_using_list(row, max_lens) for row in data]
	3447
	3448	table = [header_row] + data
	3449	max_lens = get_max_lens(table)
	3450	extra_gap += 1
	3451	if delim:
	3452	table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
	3453	table[1][-1] = table[1][-1][:-extra_gap] # Remove extra_gap from end of delimiter
	3454	for row in table:
	3455	for pos, text in enumerate(map(str, row)):
	3456	if '\t' in text:
	3457	row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
	3458	else:
	3459	row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
	3460	ret = '\n'.join(''.join(row).rstrip() for row in table)
	3461	return ret
	3462
	3463
	3464	def _match_one(filter_part, dct, incomplete):
	3465	# TODO: Generalize code with YoutubeDL._build_format_filter
	3466	STRING_OPERATORS = {
	3467	'*=': operator.contains,
	3468	'^=': lambda attr, value: attr.startswith(value),
	3469	'$=': lambda attr, value: attr.endswith(value),
	3470	'~=': lambda attr, value: re.search(value, attr),
	3471	}
	3472	COMPARISON_OPERATORS = {
	3473	**STRING_OPERATORS,
	3474	'<=': operator.le, # "<=" must be defined above "<"
	3475	'<': operator.lt,
	3476	'>=': operator.ge,
	3477	'>': operator.gt,
	3478	'=': operator.eq,
	3479	}
	3480
	3481	operator_rex = re.compile(r'''(?x)\s*
	3482	(?P<key>[a-z_]+)
	3483	\s(?P<negation>!\s)?(?P<op>%s)(?P<none_inclusive>\s\?)?\s
	3484	(?:
	3485	(?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)\|
	3486	(?P<strval>.+?)
	3487	)
	3488	\s*$
	3489	''' % '\|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
	3490	m = operator_rex.search(filter_part)
	3491	if m:
	3492	m = m.groupdict()
	3493	unnegated_op = COMPARISON_OPERATORS[m['op']]
	3494	if m['negation']:
	3495	op = lambda attr, value: not unnegated_op(attr, value)
	3496	else:
	3497	op = unnegated_op
	3498	comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
	3499	if m['quote']:
	3500	comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
	3501	actual_value = dct.get(m['key'])
	3502	numeric_comparison = None
	3503	if isinstance(actual_value, compat_numeric_types):
	3504	# If the original field is a string and matching comparisonvalue is
	3505	# a number we should respect the origin of the original field
	3506	# and process comparison value as a string (see
	3507	# https://github.com/ytdl-org/youtube-dl/issues/11082)
	3508	try:
	3509	numeric_comparison = int(comparison_value)
	3510	except ValueError:
	3511	numeric_comparison = parse_filesize(comparison_value)
	3512	if numeric_comparison is None:
	3513	numeric_comparison = parse_filesize(f'{comparison_value}B')
	3514	if numeric_comparison is None:
	3515	numeric_comparison = parse_duration(comparison_value)
	3516	if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
	3517	raise ValueError('Operator %s only supports string values!' % m['op'])
	3518	if actual_value is None:
	3519	return incomplete or m['none_inclusive']
	3520	return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
	3521
	3522	UNARY_OPERATORS = {
	3523	'': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
	3524	'!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
	3525	}
	3526	operator_rex = re.compile(r'''(?x)\s*
	3527	(?P<op>%s)\s*(?P<key>[a-z_]+)
	3528	\s*$
	3529	''' % '\|'.join(map(re.escape, UNARY_OPERATORS.keys())))
	3530	m = operator_rex.search(filter_part)
	3531	if m:
	3532	op = UNARY_OPERATORS[m.group('op')]
	3533	actual_value = dct.get(m.group('key'))
	3534	if incomplete and actual_value is None:
	3535	return True
	3536	return op(actual_value)
	3537
	3538	raise ValueError('Invalid filter part %r' % filter_part)
	3539
	3540
	3541	def match_str(filter_str, dct, incomplete=False):
	3542	""" Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
	3543	When incomplete, all conditions passes on missing fields
	3544	"""
	3545	return all(
	3546	_match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
	3547	for filter_part in re.split(r'(?<!\\)&', filter_str))
	3548
	3549
	3550	def match_filter_func(filter_str):
	3551	def _match_func(info_dict, args, *kwargs):
	3552	if match_str(filter_str, info_dict, args, *kwargs):
	3553	return None
	3554	else:
	3555	video_title = info_dict.get('title', info_dict.get('id', 'video'))
	3556	return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
	3557	return _match_func
	3558
	3559
	3560	def parse_dfxp_time_expr(time_expr):
	3561	if not time_expr:
	3562	return
	3563
	3564	mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
	3565	if mobj:
	3566	return float(mobj.group('time_offset'))
	3567
	3568	mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.\|:)\d+)?)$', time_expr)
	3569	if mobj:
	3570	return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
	3571
	3572
	3573	def srt_subtitles_timecode(seconds):
	3574	return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
	3575
	3576
	3577	def ass_subtitles_timecode(seconds):
	3578	time = timetuple_from_msec(seconds * 1000)
	3579	return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
	3580
	3581
	3582	def dfxp2srt(dfxp_data):
	3583	'''
	3584	@param dfxp_data A bytes-like object containing DFXP data
	3585	@returns A unicode object containing converted SRT data
	3586	'''
	3587	LEGACY_NAMESPACES = (
	3588	(b'http://www.w3.org/ns/ttml', [
	3589	b'http://www.w3.org/2004/11/ttaf1',
	3590	b'http://www.w3.org/2006/04/ttaf1',
	3591	b'http://www.w3.org/2006/10/ttaf1',
	3592	]),
	3593	(b'http://www.w3.org/ns/ttml#styling', [
	3594	b'http://www.w3.org/ns/ttml#style',
	3595	]),
	3596	)
	3597
	3598	SUPPORTED_STYLING = [
	3599	'color',
	3600	'fontFamily',
	3601	'fontSize',
	3602	'fontStyle',
	3603	'fontWeight',
	3604	'textDecoration'
	3605	]
	3606
	3607	_x = functools.partial(xpath_with_ns, ns_map={
	3608	'xml': 'http://www.w3.org/XML/1998/namespace',
	3609	'ttml': 'http://www.w3.org/ns/ttml',
	3610	'tts': 'http://www.w3.org/ns/ttml#styling',
	3611	})
	3612
	3613	styles = {}
	3614	default_style = {}
	3615
	3616	class TTMLPElementParser(object):
	3617	_out = ''
	3618	_unclosed_elements = []
	3619	_applied_styles = []
	3620
	3621	def start(self, tag, attrib):
	3622	if tag in (_x('ttml:br'), 'br'):
	3623	self._out += '\n'
	3624	else:
	3625	unclosed_elements = []
	3626	style = {}
	3627	element_style_id = attrib.get('style')
	3628	if default_style:
	3629	style.update(default_style)
	3630	if element_style_id:
	3631	style.update(styles.get(element_style_id, {}))
	3632	for prop in SUPPORTED_STYLING:
	3633	prop_val = attrib.get(_x('tts:' + prop))
	3634	if prop_val:
	3635	style[prop] = prop_val
	3636	if style:
	3637	font = ''
	3638	for k, v in sorted(style.items()):
	3639	if self._applied_styles and self._applied_styles[-1].get(k) == v:
	3640	continue
	3641	if k == 'color':
	3642	font += ' color="%s"' % v
	3643	elif k == 'fontSize':
	3644	font += ' size="%s"' % v
	3645	elif k == 'fontFamily':
	3646	font += ' face="%s"' % v
	3647	elif k == 'fontWeight' and v == 'bold':
	3648	self._out += '<b>'
	3649	unclosed_elements.append('b')
	3650	elif k == 'fontStyle' and v == 'italic':
	3651	self._out += '<i>'
	3652	unclosed_elements.append('i')
	3653	elif k == 'textDecoration' and v == 'underline':
	3654	self._out += '<u>'
	3655	unclosed_elements.append('u')
	3656	if font:
	3657	self._out += '<font' + font + '>'
	3658	unclosed_elements.append('font')
	3659	applied_style = {}
	3660	if self._applied_styles:
	3661	applied_style.update(self._applied_styles[-1])
	3662	applied_style.update(style)
	3663	self._applied_styles.append(applied_style)
	3664	self._unclosed_elements.append(unclosed_elements)
	3665
	3666	def end(self, tag):
	3667	if tag not in (_x('ttml:br'), 'br'):
	3668	unclosed_elements = self._unclosed_elements.pop()
	3669	for element in reversed(unclosed_elements):
	3670	self._out += '</%s>' % element
	3671	if unclosed_elements and self._applied_styles:
	3672	self._applied_styles.pop()
	3673
	3674	def data(self, data):
	3675	self._out += data
	3676
	3677	def close(self):
	3678	return self._out.strip()
	3679
	3680	def parse_node(node):
	3681	target = TTMLPElementParser()
	3682	parser = xml.etree.ElementTree.XMLParser(target=target)
	3683	parser.feed(xml.etree.ElementTree.tostring(node))
	3684	return parser.close()
	3685
	3686	for k, v in LEGACY_NAMESPACES:
	3687	for ns in v:
	3688	dfxp_data = dfxp_data.replace(ns, k)
	3689
	3690	dfxp = compat_etree_fromstring(dfxp_data)
	3691	out = []
	3692	paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
	3693
	3694	if not paras:
	3695	raise ValueError('Invalid dfxp/TTML subtitle')
	3696
	3697	repeat = False
	3698	while True:
	3699	for style in dfxp.findall(_x('.//ttml:style')):
	3700	style_id = style.get('id') or style.get(_x('xml:id'))
	3701	if not style_id:
	3702	continue
	3703	parent_style_id = style.get('style')
	3704	if parent_style_id:
	3705	if parent_style_id not in styles:
	3706	repeat = True
	3707	continue
	3708	styles[style_id] = styles[parent_style_id].copy()
	3709	for prop in SUPPORTED_STYLING:
	3710	prop_val = style.get(_x('tts:' + prop))
	3711	if prop_val:
	3712	styles.setdefault(style_id, {})[prop] = prop_val
	3713	if repeat:
	3714	repeat = False
	3715	else:
	3716	break
	3717
	3718	for p in ('body', 'div'):
	3719	ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
	3720	if ele is None:
	3721	continue
	3722	style = styles.get(ele.get('style'))
	3723	if not style:
	3724	continue
	3725	default_style.update(style)
	3726
	3727	for para, index in zip(paras, itertools.count(1)):
	3728	begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
	3729	end_time = parse_dfxp_time_expr(para.attrib.get('end'))
	3730	dur = parse_dfxp_time_expr(para.attrib.get('dur'))
	3731	if begin_time is None:
	3732	continue
	3733	if not end_time:
	3734	if not dur:
	3735	continue
	3736	end_time = begin_time + dur
	3737	out.append('%d\n%s --> %s\n%s\n\n' % (
	3738	index,
	3739	srt_subtitles_timecode(begin_time),
	3740	srt_subtitles_timecode(end_time),
	3741	parse_node(para)))
	3742
	3743	return ''.join(out)
	3744
	3745
	3746	def cli_option(params, command_option, param):
	3747	param = params.get(param)
	3748	if param:
	3749	param = compat_str(param)
	3750	return [command_option, param] if param is not None else []
	3751
	3752
	3753	def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
	3754	param = params.get(param)
	3755	if param is None:
	3756	return []
	3757	assert isinstance(param, bool)
	3758	if separator:
	3759	return [command_option + separator + (true_value if param else false_value)]
	3760	return [command_option, true_value if param else false_value]
	3761
	3762
	3763	def cli_valueless_option(params, command_option, param, expected_value=True):
	3764	param = params.get(param)
	3765	return [command_option] if param == expected_value else []
	3766
	3767
	3768	def cli_configuration_args(argdict, keys, default=[], use_compat=True):
	3769	if isinstance(argdict, (list, tuple)): # for backward compatibility
	3770	if use_compat:
	3771	return argdict
	3772	else:
	3773	argdict = None
	3774	if argdict is None:
	3775	return default
	3776	assert isinstance(argdict, dict)
	3777
	3778	assert isinstance(keys, (list, tuple))
	3779	for key_list in keys:
	3780	arg_list = list(filter(
	3781	lambda x: x is not None,
	3782	[argdict.get(key.lower()) for key in variadic(key_list)]))
	3783	if arg_list:
	3784	return [arg for args in arg_list for arg in args]
	3785	return default
	3786
	3787
	3788	def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
	3789	main_key, exe = main_key.lower(), exe.lower()
	3790	root_key = exe if main_key == exe else f'{main_key}+{exe}'
	3791	keys = [f'{root_key}{k}' for k in (keys or [''])]
	3792	if root_key in keys:
	3793	if main_key != exe:
	3794	keys.append((main_key, exe))
	3795	keys.append('default')
	3796	else:
	3797	use_compat = False
	3798	return cli_configuration_args(argdict, keys, default, use_compat)
	3799
	3800
	3801	class ISO639Utils(object):
	3802	# See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
	3803	_lang_map = {
	3804	'aa': 'aar',
	3805	'ab': 'abk',
	3806	'ae': 'ave',
	3807	'af': 'afr',
	3808	'ak': 'aka',
	3809	'am': 'amh',
	3810	'an': 'arg',
	3811	'ar': 'ara',
	3812	'as': 'asm',
	3813	'av': 'ava',
	3814	'ay': 'aym',
	3815	'az': 'aze',
	3816	'ba': 'bak',
	3817	'be': 'bel',
	3818	'bg': 'bul',
	3819	'bh': 'bih',
	3820	'bi': 'bis',
	3821	'bm': 'bam',
	3822	'bn': 'ben',
	3823	'bo': 'bod',
	3824	'br': 'bre',
	3825	'bs': 'bos',
	3826	'ca': 'cat',
	3827	'ce': 'che',
	3828	'ch': 'cha',
	3829	'co': 'cos',
	3830	'cr': 'cre',
	3831	'cs': 'ces',
	3832	'cu': 'chu',
	3833	'cv': 'chv',
	3834	'cy': 'cym',
	3835	'da': 'dan',
	3836	'de': 'deu',
	3837	'dv': 'div',
	3838	'dz': 'dzo',
	3839	'ee': 'ewe',
	3840	'el': 'ell',
	3841	'en': 'eng',
	3842	'eo': 'epo',
	3843	'es': 'spa',
	3844	'et': 'est',
	3845	'eu': 'eus',
	3846	'fa': 'fas',
	3847	'ff': 'ful',
	3848	'fi': 'fin',
	3849	'fj': 'fij',
	3850	'fo': 'fao',
	3851	'fr': 'fra',
	3852	'fy': 'fry',
	3853	'ga': 'gle',
	3854	'gd': 'gla',
	3855	'gl': 'glg',
	3856	'gn': 'grn',
	3857	'gu': 'guj',
	3858	'gv': 'glv',
	3859	'ha': 'hau',
	3860	'he': 'heb',
	3861	'iw': 'heb', # Replaced by he in 1989 revision
	3862	'hi': 'hin',
	3863	'ho': 'hmo',
	3864	'hr': 'hrv',
	3865	'ht': 'hat',
	3866	'hu': 'hun',
	3867	'hy': 'hye',
	3868	'hz': 'her',
	3869	'ia': 'ina',
	3870	'id': 'ind',
	3871	'in': 'ind', # Replaced by id in 1989 revision
	3872	'ie': 'ile',
	3873	'ig': 'ibo',
	3874	'ii': 'iii',
	3875	'ik': 'ipk',
	3876	'io': 'ido',
	3877	'is': 'isl',
	3878	'it': 'ita',
	3879	'iu': 'iku',
	3880	'ja': 'jpn',
	3881	'jv': 'jav',
	3882	'ka': 'kat',
	3883	'kg': 'kon',
	3884	'ki': 'kik',
	3885	'kj': 'kua',
	3886	'kk': 'kaz',
	3887	'kl': 'kal',
	3888	'km': 'khm',
	3889	'kn': 'kan',
	3890	'ko': 'kor',
	3891	'kr': 'kau',
	3892	'ks': 'kas',
	3893	'ku': 'kur',
	3894	'kv': 'kom',
	3895	'kw': 'cor',
	3896	'ky': 'kir',
	3897	'la': 'lat',
	3898	'lb': 'ltz',
	3899	'lg': 'lug',
	3900	'li': 'lim',
	3901	'ln': 'lin',
	3902	'lo': 'lao',
	3903	'lt': 'lit',
	3904	'lu': 'lub',
	3905	'lv': 'lav',
	3906	'mg': 'mlg',
	3907	'mh': 'mah',
	3908	'mi': 'mri',
	3909	'mk': 'mkd',
	3910	'ml': 'mal',
	3911	'mn': 'mon',
	3912	'mr': 'mar',
	3913	'ms': 'msa',
	3914	'mt': 'mlt',
	3915	'my': 'mya',
	3916	'na': 'nau',
	3917	'nb': 'nob',
	3918	'nd': 'nde',
	3919	'ne': 'nep',
	3920	'ng': 'ndo',
	3921	'nl': 'nld',
	3922	'nn': 'nno',
	3923	'no': 'nor',
	3924	'nr': 'nbl',
	3925	'nv': 'nav',
	3926	'ny': 'nya',
	3927	'oc': 'oci',
	3928	'oj': 'oji',
	3929	'om': 'orm',
	3930	'or': 'ori',
	3931	'os': 'oss',
	3932	'pa': 'pan',
	3933	'pi': 'pli',
	3934	'pl': 'pol',
	3935	'ps': 'pus',
	3936	'pt': 'por',
	3937	'qu': 'que',
	3938	'rm': 'roh',
	3939	'rn': 'run',
	3940	'ro': 'ron',
	3941	'ru': 'rus',
	3942	'rw': 'kin',
	3943	'sa': 'san',
	3944	'sc': 'srd',
	3945	'sd': 'snd',
	3946	'se': 'sme',
	3947	'sg': 'sag',
	3948	'si': 'sin',
	3949	'sk': 'slk',
	3950	'sl': 'slv',
	3951	'sm': 'smo',
	3952	'sn': 'sna',
	3953	'so': 'som',
	3954	'sq': 'sqi',
	3955	'sr': 'srp',
	3956	'ss': 'ssw',
	3957	'st': 'sot',
	3958	'su': 'sun',
	3959	'sv': 'swe',
	3960	'sw': 'swa',
	3961	'ta': 'tam',
	3962	'te': 'tel',
	3963	'tg': 'tgk',
	3964	'th': 'tha',
	3965	'ti': 'tir',
	3966	'tk': 'tuk',
	3967	'tl': 'tgl',
	3968	'tn': 'tsn',
	3969	'to': 'ton',
	3970	'tr': 'tur',
	3971	'ts': 'tso',
	3972	'tt': 'tat',
	3973	'tw': 'twi',
	3974	'ty': 'tah',
	3975	'ug': 'uig',
	3976	'uk': 'ukr',
	3977	'ur': 'urd',
	3978	'uz': 'uzb',
	3979	've': 'ven',
	3980	'vi': 'vie',
	3981	'vo': 'vol',
	3982	'wa': 'wln',
	3983	'wo': 'wol',
	3984	'xh': 'xho',
	3985	'yi': 'yid',
	3986	'ji': 'yid', # Replaced by yi in 1989 revision
	3987	'yo': 'yor',
	3988	'za': 'zha',
	3989	'zh': 'zho',
	3990	'zu': 'zul',
	3991	}
	3992
	3993	@classmethod
	3994	def short2long(cls, code):
	3995	"""Convert language code from ISO 639-1 to ISO 639-2/T"""
	3996	return cls._lang_map.get(code[:2])
	3997
	3998	@classmethod
	3999	def long2short(cls, code):
	4000	"""Convert language code from ISO 639-2/T to ISO 639-1"""
	4001	for short_name, long_name in cls._lang_map.items():
	4002	if long_name == code:
	4003	return short_name
	4004
	4005
	4006	class ISO3166Utils(object):
	4007	# From http://data.okfn.org/data/core/country-list
	4008	_country_map = {
	4009	'AF': 'Afghanistan',
	4010	'AX': 'Åland Islands',
	4011	'AL': 'Albania',
	4012	'DZ': 'Algeria',
	4013	'AS': 'American Samoa',
	4014	'AD': 'Andorra',
	4015	'AO': 'Angola',
	4016	'AI': 'Anguilla',
	4017	'AQ': 'Antarctica',
	4018	'AG': 'Antigua and Barbuda',
	4019	'AR': 'Argentina',
	4020	'AM': 'Armenia',
	4021	'AW': 'Aruba',
	4022	'AU': 'Australia',
	4023	'AT': 'Austria',
	4024	'AZ': 'Azerbaijan',
	4025	'BS': 'Bahamas',
	4026	'BH': 'Bahrain',
	4027	'BD': 'Bangladesh',
	4028	'BB': 'Barbados',
	4029	'BY': 'Belarus',
	4030	'BE': 'Belgium',
	4031	'BZ': 'Belize',
	4032	'BJ': 'Benin',
	4033	'BM': 'Bermuda',
	4034	'BT': 'Bhutan',
	4035	'BO': 'Bolivia, Plurinational State of',
	4036	'BQ': 'Bonaire, Sint Eustatius and Saba',
	4037	'BA': 'Bosnia and Herzegovina',
	4038	'BW': 'Botswana',
	4039	'BV': 'Bouvet Island',
	4040	'BR': 'Brazil',
	4041	'IO': 'British Indian Ocean Territory',
	4042	'BN': 'Brunei Darussalam',
	4043	'BG': 'Bulgaria',
	4044	'BF': 'Burkina Faso',
	4045	'BI': 'Burundi',
	4046	'KH': 'Cambodia',
	4047	'CM': 'Cameroon',
	4048	'CA': 'Canada',
	4049	'CV': 'Cape Verde',
	4050	'KY': 'Cayman Islands',
	4051	'CF': 'Central African Republic',
	4052	'TD': 'Chad',
	4053	'CL': 'Chile',
	4054	'CN': 'China',
	4055	'CX': 'Christmas Island',
	4056	'CC': 'Cocos (Keeling) Islands',
	4057	'CO': 'Colombia',
	4058	'KM': 'Comoros',
	4059	'CG': 'Congo',
	4060	'CD': 'Congo, the Democratic Republic of the',
	4061	'CK': 'Cook Islands',
	4062	'CR': 'Costa Rica',
	4063	'CI': 'Côte d\'Ivoire',
	4064	'HR': 'Croatia',
	4065	'CU': 'Cuba',
	4066	'CW': 'Curaçao',
	4067	'CY': 'Cyprus',
	4068	'CZ': 'Czech Republic',
	4069	'DK': 'Denmark',
	4070	'DJ': 'Djibouti',
	4071	'DM': 'Dominica',
	4072	'DO': 'Dominican Republic',
	4073	'EC': 'Ecuador',
	4074	'EG': 'Egypt',
	4075	'SV': 'El Salvador',
	4076	'GQ': 'Equatorial Guinea',
	4077	'ER': 'Eritrea',
	4078	'EE': 'Estonia',
	4079	'ET': 'Ethiopia',
	4080	'FK': 'Falkland Islands (Malvinas)',
	4081	'FO': 'Faroe Islands',
	4082	'FJ': 'Fiji',
	4083	'FI': 'Finland',
	4084	'FR': 'France',
	4085	'GF': 'French Guiana',
	4086	'PF': 'French Polynesia',
	4087	'TF': 'French Southern Territories',
	4088	'GA': 'Gabon',
	4089	'GM': 'Gambia',
	4090	'GE': 'Georgia',
	4091	'DE': 'Germany',
	4092	'GH': 'Ghana',
	4093	'GI': 'Gibraltar',
	4094	'GR': 'Greece',
	4095	'GL': 'Greenland',
	4096	'GD': 'Grenada',
	4097	'GP': 'Guadeloupe',
	4098	'GU': 'Guam',
	4099	'GT': 'Guatemala',
	4100	'GG': 'Guernsey',
	4101	'GN': 'Guinea',
	4102	'GW': 'Guinea-Bissau',
	4103	'GY': 'Guyana',
	4104	'HT': 'Haiti',
	4105	'HM': 'Heard Island and McDonald Islands',
	4106	'VA': 'Holy See (Vatican City State)',
	4107	'HN': 'Honduras',
	4108	'HK': 'Hong Kong',
	4109	'HU': 'Hungary',
	4110	'IS': 'Iceland',
	4111	'IN': 'India',
	4112	'ID': 'Indonesia',
	4113	'IR': 'Iran, Islamic Republic of',
	4114	'IQ': 'Iraq',
	4115	'IE': 'Ireland',
	4116	'IM': 'Isle of Man',
	4117	'IL': 'Israel',
	4118	'IT': 'Italy',
	4119	'JM': 'Jamaica',
	4120	'JP': 'Japan',
	4121	'JE': 'Jersey',
	4122	'JO': 'Jordan',
	4123	'KZ': 'Kazakhstan',
	4124	'KE': 'Kenya',
	4125	'KI': 'Kiribati',
	4126	'KP': 'Korea, Democratic People\'s Republic of',
	4127	'KR': 'Korea, Republic of',
	4128	'KW': 'Kuwait',
	4129	'KG': 'Kyrgyzstan',
	4130	'LA': 'Lao People\'s Democratic Republic',
	4131	'LV': 'Latvia',
	4132	'LB': 'Lebanon',
	4133	'LS': 'Lesotho',
	4134	'LR': 'Liberia',
	4135	'LY': 'Libya',
	4136	'LI': 'Liechtenstein',
	4137	'LT': 'Lithuania',
	4138	'LU': 'Luxembourg',
	4139	'MO': 'Macao',
	4140	'MK': 'Macedonia, the Former Yugoslav Republic of',
	4141	'MG': 'Madagascar',
	4142	'MW': 'Malawi',
	4143	'MY': 'Malaysia',
	4144	'MV': 'Maldives',
	4145	'ML': 'Mali',
	4146	'MT': 'Malta',
	4147	'MH': 'Marshall Islands',
	4148	'MQ': 'Martinique',
	4149	'MR': 'Mauritania',
	4150	'MU': 'Mauritius',
	4151	'YT': 'Mayotte',
	4152	'MX': 'Mexico',
	4153	'FM': 'Micronesia, Federated States of',
	4154	'MD': 'Moldova, Republic of',
	4155	'MC': 'Monaco',
	4156	'MN': 'Mongolia',
	4157	'ME': 'Montenegro',
	4158	'MS': 'Montserrat',
	4159	'MA': 'Morocco',
	4160	'MZ': 'Mozambique',
	4161	'MM': 'Myanmar',
	4162	'NA': 'Namibia',
	4163	'NR': 'Nauru',
	4164	'NP': 'Nepal',
	4165	'NL': 'Netherlands',
	4166	'NC': 'New Caledonia',
	4167	'NZ': 'New Zealand',
	4168	'NI': 'Nicaragua',
	4169	'NE': 'Niger',
	4170	'NG': 'Nigeria',
	4171	'NU': 'Niue',
	4172	'NF': 'Norfolk Island',
	4173	'MP': 'Northern Mariana Islands',
	4174	'NO': 'Norway',
	4175	'OM': 'Oman',
	4176	'PK': 'Pakistan',
	4177	'PW': 'Palau',
	4178	'PS': 'Palestine, State of',
	4179	'PA': 'Panama',
	4180	'PG': 'Papua New Guinea',
	4181	'PY': 'Paraguay',
	4182	'PE': 'Peru',
	4183	'PH': 'Philippines',
	4184	'PN': 'Pitcairn',
	4185	'PL': 'Poland',
	4186	'PT': 'Portugal',
	4187	'PR': 'Puerto Rico',
	4188	'QA': 'Qatar',
	4189	'RE': 'Réunion',
	4190	'RO': 'Romania',
	4191	'RU': 'Russian Federation',
	4192	'RW': 'Rwanda',
	4193	'BL': 'Saint Barthélemy',
	4194	'SH': 'Saint Helena, Ascension and Tristan da Cunha',
	4195	'KN': 'Saint Kitts and Nevis',
	4196	'LC': 'Saint Lucia',
	4197	'MF': 'Saint Martin (French part)',
	4198	'PM': 'Saint Pierre and Miquelon',
	4199	'VC': 'Saint Vincent and the Grenadines',
	4200	'WS': 'Samoa',
	4201	'SM': 'San Marino',
	4202	'ST': 'Sao Tome and Principe',
	4203	'SA': 'Saudi Arabia',
	4204	'SN': 'Senegal',
	4205	'RS': 'Serbia',
	4206	'SC': 'Seychelles',
	4207	'SL': 'Sierra Leone',
	4208	'SG': 'Singapore',
	4209	'SX': 'Sint Maarten (Dutch part)',
	4210	'SK': 'Slovakia',
	4211	'SI': 'Slovenia',
	4212	'SB': 'Solomon Islands',
	4213	'SO': 'Somalia',
	4214	'ZA': 'South Africa',
	4215	'GS': 'South Georgia and the South Sandwich Islands',
	4216	'SS': 'South Sudan',
	4217	'ES': 'Spain',
	4218	'LK': 'Sri Lanka',
	4219	'SD': 'Sudan',
	4220	'SR': 'Suriname',
	4221	'SJ': 'Svalbard and Jan Mayen',
	4222	'SZ': 'Swaziland',
	4223	'SE': 'Sweden',
	4224	'CH': 'Switzerland',
	4225	'SY': 'Syrian Arab Republic',
	4226	'TW': 'Taiwan, Province of China',
	4227	'TJ': 'Tajikistan',
	4228	'TZ': 'Tanzania, United Republic of',
	4229	'TH': 'Thailand',
	4230	'TL': 'Timor-Leste',
	4231	'TG': 'Togo',
	4232	'TK': 'Tokelau',
	4233	'TO': 'Tonga',
	4234	'TT': 'Trinidad and Tobago',
	4235	'TN': 'Tunisia',
	4236	'TR': 'Turkey',
	4237	'TM': 'Turkmenistan',
	4238	'TC': 'Turks and Caicos Islands',
	4239	'TV': 'Tuvalu',
	4240	'UG': 'Uganda',
	4241	'UA': 'Ukraine',
	4242	'AE': 'United Arab Emirates',
	4243	'GB': 'United Kingdom',
	4244	'US': 'United States',
	4245	'UM': 'United States Minor Outlying Islands',
	4246	'UY': 'Uruguay',
	4247	'UZ': 'Uzbekistan',
	4248	'VU': 'Vanuatu',
	4249	'VE': 'Venezuela, Bolivarian Republic of',
	4250	'VN': 'Viet Nam',
	4251	'VG': 'Virgin Islands, British',
	4252	'VI': 'Virgin Islands, U.S.',
	4253	'WF': 'Wallis and Futuna',
	4254	'EH': 'Western Sahara',
	4255	'YE': 'Yemen',
	4256	'ZM': 'Zambia',
	4257	'ZW': 'Zimbabwe',
	4258	}
	4259
	4260	@classmethod
	4261	def short2full(cls, code):
	4262	"""Convert an ISO 3166-2 country code to the corresponding full name"""
	4263	return cls._country_map.get(code.upper())
	4264
	4265
	4266	class GeoUtils(object):
	4267	# Major IPv4 address blocks per country
	4268	_country_ip_map = {
	4269	'AD': '46.172.224.0/19',
	4270	'AE': '94.200.0.0/13',
	4271	'AF': '149.54.0.0/17',
	4272	'AG': '209.59.64.0/18',
	4273	'AI': '204.14.248.0/21',
	4274	'AL': '46.99.0.0/16',
	4275	'AM': '46.70.0.0/15',
	4276	'AO': '105.168.0.0/13',
	4277	'AP': '182.50.184.0/21',
	4278	'AQ': '23.154.160.0/24',
	4279	'AR': '181.0.0.0/12',
	4280	'AS': '202.70.112.0/20',
	4281	'AT': '77.116.0.0/14',
	4282	'AU': '1.128.0.0/11',
	4283	'AW': '181.41.0.0/18',
	4284	'AX': '185.217.4.0/22',
	4285	'AZ': '5.197.0.0/16',
	4286	'BA': '31.176.128.0/17',
	4287	'BB': '65.48.128.0/17',
	4288	'BD': '114.130.0.0/16',
	4289	'BE': '57.0.0.0/8',
	4290	'BF': '102.178.0.0/15',
	4291	'BG': '95.42.0.0/15',
	4292	'BH': '37.131.0.0/17',
	4293	'BI': '154.117.192.0/18',
	4294	'BJ': '137.255.0.0/16',
	4295	'BL': '185.212.72.0/23',
	4296	'BM': '196.12.64.0/18',
	4297	'BN': '156.31.0.0/16',
	4298	'BO': '161.56.0.0/16',
	4299	'BQ': '161.0.80.0/20',
	4300	'BR': '191.128.0.0/12',
	4301	'BS': '24.51.64.0/18',
	4302	'BT': '119.2.96.0/19',
	4303	'BW': '168.167.0.0/16',
	4304	'BY': '178.120.0.0/13',
	4305	'BZ': '179.42.192.0/18',
	4306	'CA': '99.224.0.0/11',
	4307	'CD': '41.243.0.0/16',
	4308	'CF': '197.242.176.0/21',
	4309	'CG': '160.113.0.0/16',
	4310	'CH': '85.0.0.0/13',
	4311	'CI': '102.136.0.0/14',
	4312	'CK': '202.65.32.0/19',
	4313	'CL': '152.172.0.0/14',
	4314	'CM': '102.244.0.0/14',
	4315	'CN': '36.128.0.0/10',
	4316	'CO': '181.240.0.0/12',
	4317	'CR': '201.192.0.0/12',
	4318	'CU': '152.206.0.0/15',
	4319	'CV': '165.90.96.0/19',
	4320	'CW': '190.88.128.0/17',
	4321	'CY': '31.153.0.0/16',
	4322	'CZ': '88.100.0.0/14',
	4323	'DE': '53.0.0.0/8',
	4324	'DJ': '197.241.0.0/17',
	4325	'DK': '87.48.0.0/12',
	4326	'DM': '192.243.48.0/20',
	4327	'DO': '152.166.0.0/15',
	4328	'DZ': '41.96.0.0/12',
	4329	'EC': '186.68.0.0/15',
	4330	'EE': '90.190.0.0/15',
	4331	'EG': '156.160.0.0/11',
	4332	'ER': '196.200.96.0/20',
	4333	'ES': '88.0.0.0/11',
	4334	'ET': '196.188.0.0/14',
	4335	'EU': '2.16.0.0/13',
	4336	'FI': '91.152.0.0/13',
	4337	'FJ': '144.120.0.0/16',
	4338	'FK': '80.73.208.0/21',
	4339	'FM': '119.252.112.0/20',
	4340	'FO': '88.85.32.0/19',
	4341	'FR': '90.0.0.0/9',
	4342	'GA': '41.158.0.0/15',
	4343	'GB': '25.0.0.0/8',
	4344	'GD': '74.122.88.0/21',
	4345	'GE': '31.146.0.0/16',
	4346	'GF': '161.22.64.0/18',
	4347	'GG': '62.68.160.0/19',
	4348	'GH': '154.160.0.0/12',
	4349	'GI': '95.164.0.0/16',
	4350	'GL': '88.83.0.0/19',
	4351	'GM': '160.182.0.0/15',
	4352	'GN': '197.149.192.0/18',
	4353	'GP': '104.250.0.0/19',
	4354	'GQ': '105.235.224.0/20',
	4355	'GR': '94.64.0.0/13',
	4356	'GT': '168.234.0.0/16',
	4357	'GU': '168.123.0.0/16',
	4358	'GW': '197.214.80.0/20',
	4359	'GY': '181.41.64.0/18',
	4360	'HK': '113.252.0.0/14',
	4361	'HN': '181.210.0.0/16',
	4362	'HR': '93.136.0.0/13',
	4363	'HT': '148.102.128.0/17',
	4364	'HU': '84.0.0.0/14',
	4365	'ID': '39.192.0.0/10',
	4366	'IE': '87.32.0.0/12',
	4367	'IL': '79.176.0.0/13',
	4368	'IM': '5.62.80.0/20',
	4369	'IN': '117.192.0.0/10',
	4370	'IO': '203.83.48.0/21',
	4371	'IQ': '37.236.0.0/14',
	4372	'IR': '2.176.0.0/12',
	4373	'IS': '82.221.0.0/16',
	4374	'IT': '79.0.0.0/10',
	4375	'JE': '87.244.64.0/18',
	4376	'JM': '72.27.0.0/17',
	4377	'JO': '176.29.0.0/16',
	4378	'JP': '133.0.0.0/8',
	4379	'KE': '105.48.0.0/12',
	4380	'KG': '158.181.128.0/17',
	4381	'KH': '36.37.128.0/17',
	4382	'KI': '103.25.140.0/22',
	4383	'KM': '197.255.224.0/20',
	4384	'KN': '198.167.192.0/19',
	4385	'KP': '175.45.176.0/22',
	4386	'KR': '175.192.0.0/10',
	4387	'KW': '37.36.0.0/14',
	4388	'KY': '64.96.0.0/15',
	4389	'KZ': '2.72.0.0/13',
	4390	'LA': '115.84.64.0/18',
	4391	'LB': '178.135.0.0/16',
	4392	'LC': '24.92.144.0/20',
	4393	'LI': '82.117.0.0/19',
	4394	'LK': '112.134.0.0/15',
	4395	'LR': '102.183.0.0/16',
	4396	'LS': '129.232.0.0/17',
	4397	'LT': '78.56.0.0/13',
	4398	'LU': '188.42.0.0/16',
	4399	'LV': '46.109.0.0/16',
	4400	'LY': '41.252.0.0/14',
	4401	'MA': '105.128.0.0/11',
	4402	'MC': '88.209.64.0/18',
	4403	'MD': '37.246.0.0/16',
	4404	'ME': '178.175.0.0/17',
	4405	'MF': '74.112.232.0/21',
	4406	'MG': '154.126.0.0/17',
	4407	'MH': '117.103.88.0/21',
	4408	'MK': '77.28.0.0/15',
	4409	'ML': '154.118.128.0/18',
	4410	'MM': '37.111.0.0/17',
	4411	'MN': '49.0.128.0/17',
	4412	'MO': '60.246.0.0/16',
	4413	'MP': '202.88.64.0/20',
	4414	'MQ': '109.203.224.0/19',
	4415	'MR': '41.188.64.0/18',
	4416	'MS': '208.90.112.0/22',
	4417	'MT': '46.11.0.0/16',
	4418	'MU': '105.16.0.0/12',
	4419	'MV': '27.114.128.0/18',
	4420	'MW': '102.70.0.0/15',
	4421	'MX': '187.192.0.0/11',
	4422	'MY': '175.136.0.0/13',
	4423	'MZ': '197.218.0.0/15',
	4424	'NA': '41.182.0.0/16',
	4425	'NC': '101.101.0.0/18',
	4426	'NE': '197.214.0.0/18',
	4427	'NF': '203.17.240.0/22',
	4428	'NG': '105.112.0.0/12',
	4429	'NI': '186.76.0.0/15',
	4430	'NL': '145.96.0.0/11',
	4431	'NO': '84.208.0.0/13',
	4432	'NP': '36.252.0.0/15',
	4433	'NR': '203.98.224.0/19',
	4434	'NU': '49.156.48.0/22',
	4435	'NZ': '49.224.0.0/14',
	4436	'OM': '5.36.0.0/15',
	4437	'PA': '186.72.0.0/15',
	4438	'PE': '186.160.0.0/14',
	4439	'PF': '123.50.64.0/18',
	4440	'PG': '124.240.192.0/19',
	4441	'PH': '49.144.0.0/13',
	4442	'PK': '39.32.0.0/11',
	4443	'PL': '83.0.0.0/11',
	4444	'PM': '70.36.0.0/20',
	4445	'PR': '66.50.0.0/16',
	4446	'PS': '188.161.0.0/16',
	4447	'PT': '85.240.0.0/13',
	4448	'PW': '202.124.224.0/20',
	4449	'PY': '181.120.0.0/14',
	4450	'QA': '37.210.0.0/15',
	4451	'RE': '102.35.0.0/16',
	4452	'RO': '79.112.0.0/13',
	4453	'RS': '93.86.0.0/15',
	4454	'RU': '5.136.0.0/13',
	4455	'RW': '41.186.0.0/16',
	4456	'SA': '188.48.0.0/13',
	4457	'SB': '202.1.160.0/19',
	4458	'SC': '154.192.0.0/11',
	4459	'SD': '102.120.0.0/13',
	4460	'SE': '78.64.0.0/12',
	4461	'SG': '8.128.0.0/10',
	4462	'SI': '188.196.0.0/14',
	4463	'SK': '78.98.0.0/15',
	4464	'SL': '102.143.0.0/17',
	4465	'SM': '89.186.32.0/19',
	4466	'SN': '41.82.0.0/15',
	4467	'SO': '154.115.192.0/18',
	4468	'SR': '186.179.128.0/17',
	4469	'SS': '105.235.208.0/21',
	4470	'ST': '197.159.160.0/19',
	4471	'SV': '168.243.0.0/16',
	4472	'SX': '190.102.0.0/20',
	4473	'SY': '5.0.0.0/16',
	4474	'SZ': '41.84.224.0/19',
	4475	'TC': '65.255.48.0/20',
	4476	'TD': '154.68.128.0/19',
	4477	'TG': '196.168.0.0/14',
	4478	'TH': '171.96.0.0/13',
	4479	'TJ': '85.9.128.0/18',
	4480	'TK': '27.96.24.0/21',
	4481	'TL': '180.189.160.0/20',
	4482	'TM': '95.85.96.0/19',
	4483	'TN': '197.0.0.0/11',
	4484	'TO': '175.176.144.0/21',
	4485	'TR': '78.160.0.0/11',
	4486	'TT': '186.44.0.0/15',
	4487	'TV': '202.2.96.0/19',
	4488	'TW': '120.96.0.0/11',
	4489	'TZ': '156.156.0.0/14',
	4490	'UA': '37.52.0.0/14',
	4491	'UG': '102.80.0.0/13',
	4492	'US': '6.0.0.0/8',
	4493	'UY': '167.56.0.0/13',
	4494	'UZ': '84.54.64.0/18',
	4495	'VA': '212.77.0.0/19',
	4496	'VC': '207.191.240.0/21',
	4497	'VE': '186.88.0.0/13',
	4498	'VG': '66.81.192.0/20',
	4499	'VI': '146.226.0.0/16',
	4500	'VN': '14.160.0.0/11',
	4501	'VU': '202.80.32.0/20',
	4502	'WF': '117.20.32.0/21',
	4503	'WS': '202.4.32.0/19',
	4504	'YE': '134.35.0.0/16',
	4505	'YT': '41.242.116.0/22',
	4506	'ZA': '41.0.0.0/11',
	4507	'ZM': '102.144.0.0/13',
	4508	'ZW': '102.177.192.0/18',
	4509	}
	4510
	4511	@classmethod
	4512	def random_ipv4(cls, code_or_block):
	4513	if len(code_or_block) == 2:
	4514	block = cls._country_ip_map.get(code_or_block.upper())
	4515	if not block:
	4516	return None
	4517	else:
	4518	block = code_or_block
	4519	addr, preflen = block.split('/')
	4520	addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
	4521	addr_max = addr_min \| (0xffffffff >> int(preflen))
	4522	return compat_str(socket.inet_ntoa(
	4523	compat_struct_pack('!L', random.randint(addr_min, addr_max))))
	4524
	4525
	4526	class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
	4527	def __init__(self, proxies=None):
	4528	# Set default handlers
	4529	for type in ('http', 'https'):
	4530	setattr(self, '%s_open' % type,
	4531	lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
	4532	meth(r, proxy, type))
	4533	compat_urllib_request.ProxyHandler.__init__(self, proxies)
	4534
	4535	def proxy_open(self, req, proxy, type):
	4536	req_proxy = req.headers.get('Ytdl-request-proxy')
	4537	if req_proxy is not None:
	4538	proxy = req_proxy
	4539	del req.headers['Ytdl-request-proxy']
	4540
	4541	if proxy == '__noproxy__':
	4542	return None # No Proxy
	4543	if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
	4544	req.add_header('Ytdl-socks-proxy', proxy)
	4545	# yt-dlp's http/https handlers do wrapping the socket with socks
	4546	return None
	4547	return compat_urllib_request.ProxyHandler.proxy_open(
	4548	self, req, proxy, type)
	4549
	4550
	4551	# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
	4552	# released into Public Domain
	4553	# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
	4554
	4555	def long_to_bytes(n, blocksize=0):
	4556	"""long_to_bytes(n:long, blocksize:int) : string
	4557	Convert a long integer to a byte string.
	4558
	4559	If optional blocksize is given and greater than zero, pad the front of the
	4560	byte string with binary zeros so that the length is a multiple of
	4561	blocksize.
	4562	"""
	4563	# after much testing, this algorithm was deemed to be the fastest
	4564	s = b''
	4565	n = int(n)
	4566	while n > 0:
	4567	s = compat_struct_pack('>I', n & 0xffffffff) + s
	4568	n = n >> 32
	4569	# strip off leading zeros
	4570	for i in range(len(s)):
	4571	if s[i] != b'\000'[0]:
	4572	break
	4573	else:
	4574	# only happens when n == 0
	4575	s = b'\000'
	4576	i = 0
	4577	s = s[i:]
	4578	# add back some pad bytes. this could be done more efficiently w.r.t. the
	4579	# de-padding being done above, but sigh...
	4580	if blocksize > 0 and len(s) % blocksize:
	4581	s = (blocksize - len(s) % blocksize) * b'\000' + s
	4582	return s
	4583
	4584
	4585	def bytes_to_long(s):
	4586	"""bytes_to_long(string) : long
	4587	Convert a byte string to a long integer.
	4588
	4589	This is (essentially) the inverse of long_to_bytes().
	4590	"""
	4591	acc = 0
	4592	length = len(s)
	4593	if length % 4:
	4594	extra = (4 - length % 4)
	4595	s = b'\000' * extra + s
	4596	length = length + extra
	4597	for i in range(0, length, 4):
	4598	acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
	4599	return acc
	4600
	4601
	4602	def ohdave_rsa_encrypt(data, exponent, modulus):
	4603	'''
	4604	Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
	4605
	4606	Input:
	4607	data: data to encrypt, bytes-like object
	4608	exponent, modulus: parameter e and N of RSA algorithm, both integer
	4609	Output: hex string of encrypted data
	4610
	4611	Limitation: supports one block encryption only
	4612	'''
	4613
	4614	payload = int(binascii.hexlify(data[::-1]), 16)
	4615	encrypted = pow(payload, exponent, modulus)
	4616	return '%x' % encrypted
	4617
	4618
	4619	def pkcs1pad(data, length):
	4620	"""
	4621	Padding input data with PKCS#1 scheme
	4622
	4623	@param {int[]} data input data
	4624	@param {int} length target length
	4625	@returns {int[]} padded data
	4626	"""
	4627	if len(data) > length - 11:
	4628	raise ValueError('Input data too long for PKCS#1 padding')
	4629
	4630	pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
	4631	return [0, 2] + pseudo_random + [0] + data
	4632
	4633
	4634	def encode_base_n(num, n, table=None):
	4635	FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
	4636	if not table:
	4637	table = FULL_TABLE[:n]
	4638
	4639	if n > len(table):
	4640	raise ValueError('base %d exceeds table length %d' % (n, len(table)))
	4641
	4642	if num == 0:
	4643	return table[0]
	4644
	4645	ret = ''
	4646	while num:
	4647	ret = table[num % n] + ret
	4648	num = num // n
	4649	return ret
	4650
	4651
	4652	def decode_packed_codes(code):
	4653	mobj = re.search(PACKED_CODES_RE, code)
	4654	obfuscated_code, base, count, symbols = mobj.groups()
	4655	base = int(base)
	4656	count = int(count)
	4657	symbols = symbols.split('\|')
	4658	symbol_table = {}
	4659
	4660	while count:
	4661	count -= 1
	4662	base_n_count = encode_base_n(count, base)
	4663	symbol_table[base_n_count] = symbols[count] or base_n_count
	4664
	4665	return re.sub(
	4666	r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
	4667	obfuscated_code)
	4668
	4669
	4670	def caesar(s, alphabet, shift):
	4671	if shift == 0:
	4672	return s
	4673	l = len(alphabet)
	4674	return ''.join(
	4675	alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
	4676	for c in s)
	4677
	4678
	4679	def rot47(s):
	4680	return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{\|}~''', 47)
	4681
	4682
	4683	def parse_m3u8_attributes(attrib):
	4684	info = {}
	4685	for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"\|[^",]+)(?:,\|$)', attrib):
	4686	if val.startswith('"'):
	4687	val = val[1:-1]
	4688	info[key] = val
	4689	return info
	4690
	4691
	4692	def urshift(val, n):
	4693	return val >> n if val >= 0 else (val + 0x100000000) >> n
	4694
	4695
	4696	# Based on png2str() written by @gdkchan and improved by @yokrysty
	4697	# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
	4698	def decode_png(png_data):
	4699	# Reference: https://www.w3.org/TR/PNG/
	4700	header = png_data[8:]
	4701
	4702	if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
	4703	raise IOError('Not a valid PNG file.')
	4704
	4705	int_map = {1: '>B', 2: '>H', 4: '>I'}
	4706	unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
	4707
	4708	chunks = []
	4709
	4710	while header:
	4711	length = unpack_integer(header[:4])
	4712	header = header[4:]
	4713
	4714	chunk_type = header[:4]
	4715	header = header[4:]
	4716
	4717	chunk_data = header[:length]
	4718	header = header[length:]
	4719
	4720	header = header[4:] # Skip CRC
	4721
	4722	chunks.append({
	4723	'type': chunk_type,
	4724	'length': length,
	4725	'data': chunk_data
	4726	})
	4727
	4728	ihdr = chunks[0]['data']
	4729
	4730	width = unpack_integer(ihdr[:4])
	4731	height = unpack_integer(ihdr[4:8])
	4732
	4733	idat = b''
	4734
	4735	for chunk in chunks:
	4736	if chunk['type'] == b'IDAT':
	4737	idat += chunk['data']
	4738
	4739	if not idat:
	4740	raise IOError('Unable to read PNG data.')
	4741
	4742	decompressed_data = bytearray(zlib.decompress(idat))
	4743
	4744	stride = width * 3
	4745	pixels = []
	4746
	4747	def _get_pixel(idx):
	4748	x = idx % stride
	4749	y = idx // stride
	4750	return pixels[y][x]
	4751
	4752	for y in range(height):
	4753	basePos = y * (1 + stride)
	4754	filter_type = decompressed_data[basePos]
	4755
	4756	current_row = []
	4757
	4758	pixels.append(current_row)
	4759
	4760	for x in range(stride):
	4761	color = decompressed_data[1 + basePos + x]
	4762	basex = y * stride + x
	4763	left = 0
	4764	up = 0
	4765
	4766	if x > 2:
	4767	left = _get_pixel(basex - 3)
	4768	if y > 0:
	4769	up = _get_pixel(basex - stride)
	4770
	4771	if filter_type == 1: # Sub
	4772	color = (color + left) & 0xff
	4773	elif filter_type == 2: # Up
	4774	color = (color + up) & 0xff
	4775	elif filter_type == 3: # Average
	4776	color = (color + ((left + up) >> 1)) & 0xff
	4777	elif filter_type == 4: # Paeth
	4778	a = left
	4779	b = up
	4780	c = 0
	4781
	4782	if x > 2 and y > 0:
	4783	c = _get_pixel(basex - stride - 3)
	4784
	4785	p = a + b - c
	4786
	4787	pa = abs(p - a)
	4788	pb = abs(p - b)
	4789	pc = abs(p - c)
	4790
	4791	if pa <= pb and pa <= pc:
	4792	color = (color + a) & 0xff
	4793	elif pb <= pc:
	4794	color = (color + b) & 0xff
	4795	else:
	4796	color = (color + c) & 0xff
	4797
	4798	current_row.append(color)
	4799
	4800	return width, height, pixels
	4801
	4802
	4803	def write_xattr(path, key, value):
	4804	# This mess below finds the best xattr tool for the job
	4805	try:
	4806	# try the pyxattr module...
	4807	import xattr
	4808
	4809	if hasattr(xattr, 'set'): # pyxattr
	4810	# Unicode arguments are not supported in python-pyxattr until
	4811	# version 0.5.0
	4812	# See https://github.com/ytdl-org/youtube-dl/issues/5498
	4813	pyxattr_required_version = '0.5.0'
	4814	if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
	4815	# TODO: fallback to CLI tools
	4816	raise XAttrUnavailableError(
	4817	'python-pyxattr is detected but is too old. '
	4818	'yt-dlp requires %s or above while your version is %s. '
	4819	'Falling back to other xattr implementations' % (
	4820	pyxattr_required_version, xattr.__version__))
	4821
	4822	setxattr = xattr.set
	4823	else: # xattr
	4824	setxattr = xattr.setxattr
	4825
	4826	try:
	4827	setxattr(path, key, value)
	4828	except EnvironmentError as e:
	4829	raise XAttrMetadataError(e.errno, e.strerror)
	4830
	4831	except ImportError:
	4832	if compat_os_name == 'nt':
	4833	# Write xattrs to NTFS Alternate Data Streams:
	4834	# http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
	4835	assert ':' not in key
	4836	assert os.path.exists(path)
	4837
	4838	ads_fn = path + ':' + key
	4839	try:
	4840	with open(ads_fn, 'wb') as f:
	4841	f.write(value)
	4842	except EnvironmentError as e:
	4843	raise XAttrMetadataError(e.errno, e.strerror)
	4844	else:
	4845	user_has_setfattr = check_executable('setfattr', ['--version'])
	4846	user_has_xattr = check_executable('xattr', ['-h'])
	4847
	4848	if user_has_setfattr or user_has_xattr:
	4849
	4850	value = value.decode('utf-8')
	4851	if user_has_setfattr:
	4852	executable = 'setfattr'
	4853	opts = ['-n', key, '-v', value]
	4854	elif user_has_xattr:
	4855	executable = 'xattr'
	4856	opts = ['-w', key, value]
	4857
	4858	cmd = ([encodeFilename(executable, True)]
	4859	+ [encodeArgument(o) for o in opts]
	4860	+ [encodeFilename(path, True)])
	4861
	4862	try:
	4863	p = Popen(
	4864	cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
	4865	except EnvironmentError as e:
	4866	raise XAttrMetadataError(e.errno, e.strerror)
	4867	stdout, stderr = p.communicate_or_kill()
	4868	stderr = stderr.decode('utf-8', 'replace')
	4869	if p.returncode != 0:
	4870	raise XAttrMetadataError(p.returncode, stderr)
	4871
	4872	else:
	4873	# On Unix, and can't find pyxattr, setfattr, or xattr.
	4874	if sys.platform.startswith('linux'):
	4875	raise XAttrUnavailableError(
	4876	"Couldn't find a tool to set the xattrs. "
	4877	"Install either the python 'pyxattr' or 'xattr' "
	4878	"modules, or the GNU 'attr' package "
	4879	"(which contains the 'setfattr' tool).")
	4880	else:
	4881	raise XAttrUnavailableError(
	4882	"Couldn't find a tool to set the xattrs. "
	4883	"Install either the python 'xattr' module, "
	4884	"or the 'xattr' binary.")
	4885
	4886
	4887	def random_birthday(year_field, month_field, day_field):
	4888	start_date = datetime.date(1950, 1, 1)
	4889	end_date = datetime.date(1995, 12, 31)
	4890	offset = random.randint(0, (end_date - start_date).days)
	4891	random_date = start_date + datetime.timedelta(offset)
	4892	return {
	4893	year_field: str(random_date.year),
	4894	month_field: str(random_date.month),
	4895	day_field: str(random_date.day),
	4896	}
	4897
	4898
	4899	# Templates for internet shortcut files, which are plain text files.
	4900	DOT_URL_LINK_TEMPLATE = '''
	4901	[InternetShortcut]
	4902	URL=%(url)s
	4903	'''.lstrip()
	4904
	4905	DOT_WEBLOC_LINK_TEMPLATE = '''
	4906	<?xml version="1.0" encoding="UTF-8"?>
	4907	<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
	4908	<plist version="1.0">
	4909	<dict>
	4910	\t<key>URL</key>
	4911	\t<string>%(url)s</string>
	4912	</dict>
	4913	</plist>
	4914	'''.lstrip()
	4915
	4916	DOT_DESKTOP_LINK_TEMPLATE = '''
	4917	[Desktop Entry]
	4918	Encoding=UTF-8
	4919	Name=%(filename)s
	4920	Type=Link
	4921	URL=%(url)s
	4922	Icon=text-html
	4923	'''.lstrip()
	4924
	4925	LINK_TEMPLATES = {
	4926	'url': DOT_URL_LINK_TEMPLATE,
	4927	'desktop': DOT_DESKTOP_LINK_TEMPLATE,
	4928	'webloc': DOT_WEBLOC_LINK_TEMPLATE,
	4929	}
	4930
	4931
	4932	def iri_to_uri(iri):
	4933	"""
	4934	Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
	4935
	4936	The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding besides those already escaped, leaving the URI intact.
	4937	"""
	4938
	4939	iri_parts = compat_urllib_parse_urlparse(iri)
	4940
	4941	if '[' in iri_parts.netloc:
	4942	raise ValueError('IPv6 URIs are not, yet, supported.')
	4943	# Querying `.netloc`, when there's only one bracket, also raises a ValueError.
	4944
	4945	# The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
	4946
	4947	net_location = ''
	4948	if iri_parts.username:
	4949	net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
	4950	if iri_parts.password is not None:
	4951	net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
	4952	net_location += '@'
	4953
	4954	net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
	4955	# The 'idna' encoding produces ASCII text.
	4956	if iri_parts.port is not None and iri_parts.port != 80:
	4957	net_location += ':' + str(iri_parts.port)
	4958
	4959	return compat_urllib_parse_urlunparse(
	4960	(iri_parts.scheme,
	4961	net_location,
	4962
	4963	compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@\|~"),
	4964
	4965	# Unsure about the `safe` argument, since this is a legacy way of handling parameters.
	4966	compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@\|~"),
	4967
	4968	# Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
	4969	compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{\|}~"),
	4970
	4971	compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{\|}~")))
	4972
	4973	# Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
	4974
	4975
	4976	def to_high_limit_path(path):
	4977	if sys.platform in ['win32', 'cygwin']:
	4978	# Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
	4979	return r'\\?\ '.rstrip() + os.path.abspath(path)
	4980
	4981	return path
	4982
	4983
	4984	def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
	4985	val = traverse_obj(obj, *variadic(field))
	4986	if val in ignore:
	4987	return default
	4988	return template % (func(val) if func else val)
	4989
	4990
	4991	def clean_podcast_url(url):
	4992	return re.sub(r'''(?x)
	4993	(?:
	4994	(?:
	4995	chtbl\.com/track\|
	4996	media\.blubrry\.com\| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
	4997	play\.podtrac\.com
	4998	)/[^/]+\|
	4999	(?:dts\|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}\| # http://analytics.podtrac.com/how-to-measure
	5000	flex\.acast\.com\|
	5001	pd(?:
	5002	cn\.co\| # https://podcorn.com/analytics-prefix/
	5003	st\.fm # https://podsights.com/docs/
	5004	)/e
	5005	)/''', '', url)
	5006
	5007
	5008	_HEX_TABLE = '0123456789abcdef'
	5009
	5010
	5011	def random_uuidv4():
	5012	return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
	5013
	5014
	5015	def make_dir(path, to_screen=None):
	5016	try:
	5017	dn = os.path.dirname(path)
	5018	if dn and not os.path.exists(dn):
	5019	os.makedirs(dn)
	5020	return True
	5021	except (OSError, IOError) as err:
	5022	if callable(to_screen) is not None:
	5023	to_screen('unable to create directory ' + error_to_compat_str(err))
	5024	return False
	5025
	5026
	5027	def get_executable_path():
	5028	from zipimport import zipimporter
	5029	if hasattr(sys, 'frozen'): # Running from PyInstaller
	5030	path = os.path.dirname(sys.executable)
	5031	elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
	5032	path = os.path.join(os.path.dirname(__file__), '../..')
	5033	else:
	5034	path = os.path.join(os.path.dirname(__file__), '..')
	5035	return os.path.abspath(path)
	5036
	5037
	5038	def load_plugins(name, suffix, namespace):
	5039	classes = {}
	5040	try:
	5041	plugins_spec = importlib.util.spec_from_file_location(
	5042	name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
	5043	plugins = importlib.util.module_from_spec(plugins_spec)
	5044	sys.modules[plugins_spec.name] = plugins
	5045	plugins_spec.loader.exec_module(plugins)
	5046	for name in dir(plugins):
	5047	if name in namespace:
	5048	continue
	5049	if not name.endswith(suffix):
	5050	continue
	5051	klass = getattr(plugins, name)
	5052	classes[name] = namespace[name] = klass
	5053	except FileNotFoundError:
	5054	pass
	5055	return classes
	5056
	5057
	5058	def traverse_obj(
	5059	obj, *path_list, default=None, expected_type=None, get_all=True,
	5060	casesense=True, is_user_input=False, traverse_string=False):
	5061	''' Traverse nested list/dict/tuple
	5062	@param path_list A list of paths which are checked one by one.
	5063	Each path is a list of keys where each key is a string,
	5064	a function, a tuple of strings/None or "...".
	5065	When a fuction is given, it takes the key as argument and
	5066	returns whether the key matches or not. When a tuple is given,
	5067	all the keys given in the tuple are traversed, and
	5068	"..." traverses all the keys in the object
	5069	"None" returns the object without traversal
	5070	@param default Default value to return
	5071	@param expected_type Only accept final value of this type (Can also be any callable)
	5072	@param get_all Return all the values obtained from a path or only the first one
	5073	@param casesense Whether to consider dictionary keys as case sensitive
	5074	@param is_user_input Whether the keys are generated from user input. If True,
	5075	strings are converted to int/slice if necessary
	5076	@param traverse_string Whether to traverse inside strings. If True, any
	5077	non-compatible object will also be converted into a string
	5078	# TODO: Write tests
	5079	'''
	5080	if not casesense:
	5081	_lower = lambda k: (k.lower() if isinstance(k, str) else k)
	5082	path_list = (map(_lower, variadic(path)) for path in path_list)
	5083
	5084	def _traverse_obj(obj, path, _current_depth=0):
	5085	nonlocal depth
	5086	path = tuple(variadic(path))
	5087	for i, key in enumerate(path):
	5088	if None in (key, obj):
	5089	return obj
	5090	if isinstance(key, (list, tuple)):
	5091	obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
	5092	key = ...
	5093	if key is ...:
	5094	obj = (obj.values() if isinstance(obj, dict)
	5095	else obj if isinstance(obj, (list, tuple, LazyList))
	5096	else str(obj) if traverse_string else [])
	5097	_current_depth += 1
	5098	depth = max(depth, _current_depth)
	5099	return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
	5100	elif callable(key):
	5101	if isinstance(obj, (list, tuple, LazyList)):
	5102	obj = enumerate(obj)
	5103	elif isinstance(obj, dict):
	5104	obj = obj.items()
	5105	else:
	5106	if not traverse_string:
	5107	return None
	5108	obj = str(obj)
	5109	_current_depth += 1
	5110	depth = max(depth, _current_depth)
	5111	return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if key(k)]
	5112	elif isinstance(obj, dict) and not (is_user_input and key == ':'):
	5113	obj = (obj.get(key) if casesense or (key in obj)
	5114	else next((v for k, v in obj.items() if _lower(k) == key), None))
	5115	else:
	5116	if is_user_input:
	5117	key = (int_or_none(key) if ':' not in key
	5118	else slice(*map(int_or_none, key.split(':'))))
	5119	if key == slice(None):
	5120	return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
	5121	if not isinstance(key, (int, slice)):
	5122	return None
	5123	if not isinstance(obj, (list, tuple, LazyList)):
	5124	if not traverse_string:
	5125	return None
	5126	obj = str(obj)
	5127	try:
	5128	obj = obj[key]
	5129	except IndexError:
	5130	return None
	5131	return obj
	5132
	5133	if isinstance(expected_type, type):
	5134	type_test = lambda val: val if isinstance(val, expected_type) else None
	5135	elif expected_type is not None:
	5136	type_test = expected_type
	5137	else:
	5138	type_test = lambda val: val
	5139
	5140	for path in path_list:
	5141	depth = 0
	5142	val = _traverse_obj(obj, path)
	5143	if val is not None:
	5144	if depth:
	5145	for _ in range(depth - 1):
	5146	val = itertools.chain.from_iterable(v for v in val if v is not None)
	5147	val = [v for v in map(type_test, val) if v is not None]
	5148	if val:
	5149	return val if get_all else val[0]
	5150	else:
	5151	val = type_test(val)
	5152	if val is not None:
	5153	return val
	5154	return default
	5155
	5156
	5157	def traverse_dict(dictn, keys, casesense=True):
	5158	write_string('DeprecationWarning: yt_dlp.utils.traverse_dict is deprecated '
	5159	'and may be removed in a future version. Use yt_dlp.utils.traverse_obj instead')
	5160	return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
	5161
	5162
	5163	def variadic(x, allowed_types=(str, bytes, dict)):
	5164	return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
	5165
	5166
	5167	# create a JSON Web Signature (jws) with HS256 algorithm
	5168	# the resulting format is in JWS Compact Serialization
	5169	# implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
	5170	# implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
	5171	def jwt_encode_hs256(payload_data, key, headers={}):
	5172	header_data = {
	5173	'alg': 'HS256',
	5174	'typ': 'JWT',
	5175	}
	5176	if headers:
	5177	header_data.update(headers)
	5178	header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8'))
	5179	payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8'))
	5180	h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256)
	5181	signature_b64 = base64.b64encode(h.digest())
	5182	token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
	5183	return token
	5184
	5185
	5186	# can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
	5187	def jwt_decode_hs256(jwt):
	5188	header_b64, payload_b64, signature_b64 = jwt.split('.')
	5189	payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
	5190	return payload_data
	5191
	5192
	5193	def supports_terminal_sequences(stream):
	5194	if compat_os_name == 'nt':
	5195	from .compat import WINDOWS_VT_MODE # Must be imported locally
	5196	if not WINDOWS_VT_MODE or get_windows_version() < (10, 0, 10586):
	5197	return False
	5198	elif not os.getenv('TERM'):
	5199	return False
	5200	try:
	5201	return stream.isatty()
	5202	except BaseException:
	5203	return False
	5204
	5205
	5206	_terminal_sequences_re = re.compile('\033\\[[^m]+m')
	5207
	5208
	5209	def remove_terminal_sequences(string):
	5210	return _terminal_sequences_re.sub('', string)
	5211
	5212
	5213	def number_of_digits(number):
	5214	return len('%d' % number)
	5215
	5216
	5217	def join_nonempty(*values, delim='-', from_dict=None):
	5218	if from_dict is not None:
	5219	values = map(from_dict.get, values)
	5220	return delim.join(map(str, filter(None, values)))
	5221
	5222
	5223	class Config:
	5224	own_args = None
	5225	filename = None
	5226	__initialized = False
	5227
	5228	def __init__(self, parser, label=None):
	5229	self._parser, self.label = parser, label
	5230	self._loaded_paths, self.configs = set(), []
	5231
	5232	def init(self, args=None, filename=None):
	5233	assert not self.__initialized
	5234	directory = ''
	5235	if filename:
	5236	location = os.path.realpath(filename)
	5237	directory = os.path.dirname(location)
	5238	if location in self._loaded_paths:
	5239	return False
	5240	self._loaded_paths.add(location)
	5241
	5242	self.__initialized = True
	5243	self.own_args, self.filename = args, filename
	5244	for location in self._parser.parse_args(args)[0].config_locations or []:
	5245	location = os.path.join(directory, expand_path(location))
	5246	if os.path.isdir(location):
	5247	location = os.path.join(location, 'yt-dlp.conf')
	5248	if not os.path.exists(location):
	5249	self._parser.error(f'config location {location} does not exist')
	5250	self.append_config(self.read_file(location), location)
	5251	return True
	5252
	5253	def __str__(self):
	5254	label = join_nonempty(
	5255	self.label, 'config', f'"{self.filename}"' if self.filename else '',
	5256	delim=' ')
	5257	return join_nonempty(
	5258	self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
	5259	*(f'\n{c}'.replace('\n', '\n\| ')[1:] for c in self.configs),
	5260	delim='\n')
	5261
	5262	@staticmethod
	5263	def read_file(filename, default=[]):
	5264	try:
	5265	optionf = open(filename)
	5266	except IOError:
	5267	return default # silently skip if file is not present
	5268	try:
	5269	# FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
	5270	contents = optionf.read()
	5271	if sys.version_info < (3,):
	5272	contents = contents.decode(preferredencoding())
	5273	res = compat_shlex_split(contents, comments=True)
	5274	finally:
	5275	optionf.close()
	5276	return res
	5277
	5278	@staticmethod
	5279	def hide_login_info(opts):
	5280	PRIVATE_OPTS = set(['-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'])
	5281	eqre = re.compile('^(?P<key>' + ('\|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
	5282
	5283	def _scrub_eq(o):
	5284	m = eqre.match(o)
	5285	if m:
	5286	return m.group('key') + '=PRIVATE'
	5287	else:
	5288	return o
	5289
	5290	opts = list(map(_scrub_eq, opts))
	5291	for idx, opt in enumerate(opts):
	5292	if opt in PRIVATE_OPTS and idx + 1 < len(opts):
	5293	opts[idx + 1] = 'PRIVATE'
	5294	return opts
	5295
	5296	def append_config(self, *args, label=None):
	5297	config = type(self)(self._parser, label)
	5298	config._loaded_paths = self._loaded_paths
	5299	if config.init(*args):
	5300	self.configs.append(config)
	5301
	5302	@property
	5303	def all_args(self):
	5304	for config in reversed(self.configs):
	5305	yield from config.all_args
	5306	yield from self.own_args or []
	5307
	5308	def parse_args(self):
	5309	return self._parser.parse_args(list(self.all_args))